Related
I am getting different values for variance accounted for by factors in factor analysis whether I check them with fit$loadings or with fit$Vaccounted. I am using the psych package with the fa() function. Why would that be the case if they're supposed to be exactly the same thing (I guess they're not or that they are calculated differently)?
The total difference is not huge, but still not trivial (about 0.7 for cumulative). I have a reprex below.
(I'm sorry for the large dataset, I was not able to replicate the issue with different datasets or a subset, so it might have to do with something funky with the data.)
data <- structure(list(X1 = c(5, 5, 5, 7, 2, 2, 2, 2, 7, 5, 4, 9, 8,
8, 6, 9, 9, 2, 2, 2, 2, 3, 2, 2, 9, 7, 8, 4, 3, 4, 6, 6, 3, 4,
4, 4, 8, 7, 6, 7, 5, 6, 6, 4, 8, 8, 8, 3, 9, 9, 6, 4, 8, 7, 8,
7, 8, 8, 8, 8), X2 = c(6, 4, 4, 6, 2, 2, 2, 2, 6, 5, 4, 8, 7,
9, 6, 9, 4, 2, 2, 2, 6, 4, 6, 7, 9, 6, 8, 4, 3, 3, 5, 5, 2, 3,
4, 7, 7, 5, 5, 6, 7, 7, 7, 3, 8, 5, 3, 2, 9, 9, 4, 4, 4, 6, 4,
4, 8, 8, 8, 8), X3 = c(7, 5, 4, 7, 2, 2, 2, 2, 7, 5, 3, 7, 8,
9, 7, 9, 2, 2, 2, 2, 4, 2, 5, 4, 9, 6, 8, 4, 3, 2, 4, 5, 3, 2,
2, 7, 7, 6, 6, 5, 7, 7, 7, 4, 8, 7, 3, 2, 9, 9, 4, 3, 4, 4, 5,
5, 8, 7, 7, 7), X5 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 4, 3, 7, 7,
9, 6, 9, 2, 2, 2, 2, 2, 2, 4, 4, 9, 8, 6, 5, 2, 2, 4, 3, 2, 2,
4, 7, 7, 6, 5, 6, 7, 7, 7, 3, 4, 5, 3, 2, 9, 9, 4, 2, 4, 4, 4,
5, 8, 4, 6, 5), X6 = c(8, 4, 3, 8, 3, 2, 2, 2, 6, 5, 3, 7, 9,
9, 7, 9, 2, 2, 2, 2, 6, 4, 6, 5, 8, 7, 6, 3, 2, 2, 2, 2, 4, 5,
8, 8, 8, 2, 3, 4, 8, 8, 5, 3, 2, 2, 2, 2, 9, 9, 4, 4, 4, 4, 4,
4, 5, 3, 4, 5), X7 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 6,
7, 4, 6, 2, 2, 2, 2, 2, 2, 4, 2, 7, 4, 8, 2, 2, 2, 4, 3, 3, 3,
2, 5, 8, 4, 6, 7, 6, 6, 4, 2, 4, 8, 7, 2, 8, 8, 3, 3, 5, 5, 6,
6, 5, 8, 8, 8), X8 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 5,
7, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 3, 7, 3, 2, 2, 4, 2, 2, 2,
2, 4, 7, 4, 4, 6, 6, 6, 5, 2, 2, 7, 3, 2, 8, 7, 3, 3, 4, 5, 5,
5, 4, 6, 8, 8), X10 = c(9, 9, 9, 8, 9, 9, 9, 9, 4, 6, 8, 3, 6,
5, 6, 4, 9, 9, 9, 9, 8, 7, 8, 8, 2, 8, 3, 9, 9, 9, 9, 7, 7, 8,
7, 7, 4, 3, 7, 6, 9, 6, 9, 9, 9, 9, 9, 9, 4, 4, 8, 9, 9, 6, 8,
8, 9, 9, 9, 9), X11 = c(5, 6, 4, 7, 2, 3, 2, 3, 7, 6, 2, 3, 8,
7, 6, 7, 2, 2, 2, 2, 3, 2, 2, 3, 9, 4, 8, 2, 2, 2, 6, 5, 3, 2,
2, 2, 5, 7, 4, 6, 8, 5, 8, 2, 7, 7, 2, 2, 8, 8, 4, 4, 5, 4, 5,
4, 5, 3, 5, 3), X12 = c(8, 6, 4, 6, 2, 2, 2, 2, 2, 5, 2, 2, 3,
3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 4, 4, 2, 2, 3, 6, 2, 3,
3, 3, 4, 4, 8, 7, 5, 8, 6, 4, 5, 8, 2, 2, 2, 4, 4, 3, 5, 5, 4,
4, 7, 4, 6, 6), X13 = c(9, 8, 8, 8, 2, 2, 2, 2, 3, 5, 3, 2, 7,
5, 8, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 3, 3, 2, 2, 5, 6, 7, 7,
8, 6, 3, 4, 8, 6, 4, 6, 6, 6, 9, 9, 9, 4, 3, 5, 6, 8, 8, 8, 8,
9, 7, 8, 9, 9), X14 = c(7, 5, 6, 8, 2, 2, 2, 2, 7, 5, 3, 9, 8,
8, 6, 9, 2, 2, 2, 2, 5, 2, 3, 3, 9, 6, 8, 2, 5, 4, 6, 4, 4, 5,
5, 6, 6, 8, 3, 5, 9, 7, 6, 8, 9, 9, 4, 3, 9, 9, 4, 4, 6, 7, 6,
7, 8, 8, 8, 9), X15 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 5, 3, 8, 9,
7, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 4, 4, 5, 3,
4, 7, 2, 3, 5, 2, 6, 5, 6, 3, 4, 7, 5, 3, 8, 8, 3, 4, 5, 5, 6,
6, 8, 7, 6, 7), X16 = c(7, 6, 4, 6, 2, 3, 2, 2, 7, 5, 3, 8, 9,
9, 7, 9, 2, 2, 2, 2, 2, 2, 7, 5, 9, 7, 8, 2, 2, 2, 4, 4, 5, 4,
4, 6, 9, 8, 6, 6, 6, 5, 6, 3, 8, 7, 3, 3, 8, 8, 4, 4, 4, 5, 5,
5, 8, 7, 5, 7), X17 = c(9, 4, 3, 7, 3, 3, 2, 2, 2, 2, 2, 2, 9,
8, 7, 4, 2, 2, 2, 2, 2, 2, 2, 2, 9, 5, 8, 3, 2, 2, 7, 6, 4, 2,
3, 3, 4, 7, 6, 6, 8, 7, 7, 3, 2, 2, 3, 3, 2, 7, 5, 4, 4, 4, 4,
4, 4, 4, 4, 3), X18 = c(8, 5, 7, 7, 2, 2, 2, 2, 2, 5, 3, 7, 9,
8, 9, 9, 2, 2, 2, 2, 4, 4, 5, 3, 9, 8, 9, 3, 3, 2, 5, 4, 3, 4,
6, 5, 6, 8, 8, 8, 4, 5, 3, 2, 9, 8, 7, 3, 6, 8, 4, 2, 2, 4, 4,
3, 6, 4, 3, 6), X19 = c(4, 5, 7, 8, 2, 2, 2, 2, 7, 4, 3, 8, 9,
8, 7, 9, 2, 2, 2, 2, 2, 2, 4, 2, 9, 6, 8, 2, 2, 2, 5, 4, 3, 2,
2, 2, 8, 9, 3, 7, 6, 6, 2, 2, 8, 5, 2, 3, 7, 9, 3, 3, 5, 3, 4,
2, 7, 5, 4, 5), X20 = c(8, 7, 7, 7, 5, 6, 6, 6, 4, 3, 4, 4, 8,
5, 6, 7, 6, 6, 6, 6, 4, 2, 4, 4, 9, 4, 7, 6, 5, 5, 5, 5, 6, 6,
6, 6, 8, 5, 6, 5, 5, 3, 2, 2, 8, 9, 9, 9, 9, 9, 6, 7, 8, 8, 8,
9, 9, 8, 9, 8), X21 = c(9, 8, 7, 7, 4, 4, 5, 5, 9, 3, 8, 9, 9,
9, 9, 9, 4, 4, 4, 4, 8, 7, 7, 4, 9, 8, 9, 9, 4, 5, 5, 5, 5, 6,
5, 6, 9, 7, 7, 7, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 6, 8, 8, 8, 8,
9, 9, 8, 9, 9), X23 = c(4, 4, 3, 6, 3, 2, 2, 2, 2, 2, 2, 2, 6,
7, 4, 7, 3, 3, 3, 3, 3, 2, 2, 2, 7, 5, 7, 4, 2, 2, 2, 2, 4, 6,
8, 7, 4, 2, 5, 4, 2, 2, 2, 2, 2, 2, 2, 2, 8, 9, 5, 5, 4, 6, 5,
5, 5, 3, 5, 8), X24 = c(4, 3, 6, 3, 2, 2, 2, 4, 2, 2, 2, 2, 8,
8, 7, 7, 2, 2, 2, 2, 7, 8, 5, 5, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 7, 5, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 8, 2, 2, 2,
2, 2, 2, 2, 2), X25 = c(6, 6, 6, 7, 3, 5, 3, 3, 7, 5, 3, 5, 8,
8, 9, 9, 2, 2, 2, 2, 6, 7, 6, 5, 7, 2, 3, 2, 2, 2, 2, 2, 2, 3,
3, 4, 5, 4, 6, 6, 7, 9, 7, 4, 2, 2, 2, 2, 5, 6, 2, 9, 2, 5, 4,
3, 4, 3, 3, 6), X26 = c(8, 7, 5, 7, 3, 5, 3, 4, 4, 5, 3, 6, 7,
6, 7, 4, 2, 2, 2, 2, 2, 6, 5, 4, 2, 9, 9, 3, 2, 2, 2, 2, 4, 6,
7, 4, 5, 6, 8, 6, 6, 6, 7, 3, 3, 7, 5, 4, 4, 5, 3, 5, 4, 5, 5,
4, 4, 4, 5, 6), X28 = c(6, 4, 5, 6, 2, 2, 2, 2, 7, 4, 2, 5, 8,
6, 7, 5, 3, 3, 3, 3, 2, 2, 2, 2, 7, 4, 6, 2, 2, 2, 2, 2, 3, 3,
2, 4, 5, 7, 7, 6, 5, 3, 6, 5, 2, 8, 2, 2, 5, 5, 7, 7, 4, 4, 4,
5, 4, 3, 4, 7), X29 = c(5, 8, 6, 6, 9, 9, 9, 9, 5, 6, 9, 5, 3,
4, 4, 6, 8, 8, 8, 8, 9, 8, 9, 8, 5, 8, 8, 8, 8, 8, 6, 7, 6, 7,
7, 5, 4, 3, 4, 4, 6, 4, 6, 5, 8, 5, 8, 8, 7, 7, 4, 5, 7, 7, 6,
7, 8, 8, 9, 8), X30 = c(3, 3, 4, 5, 2, 2, 2, 2, 5, 4, 2, 5, 8,
7, 7, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 5, 6, 3, 3, 2, 2, 2, 2, 2,
4, 3, 7, 8, 7, 6, 2, 2, 2, 2, 2, 9, 3, 2, 4, 3, 6, 5, 3, 2, 4,
3, 2, 2, 2, 4), X32 = c(2, 3, 3, 3, 2, 4, 2, 3, 3, 2, 2, 6, 8,
7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 8, 5, 8, 2, 2, 2, 2, 2, 3, 2,
2, 3, 2, 6, 4, 6, 9, 9, 9, 5, 2, 9, 2, 2, 5, 4, 6, 7, 2, 2, 2,
2, 5, 6, 5, 6), X34 = c(2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 4,
3, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2,
6, 6, 2, 2, 3, 2, 6, 8, 7, 2, 2, 2, 3, 2, 6, 4, 3, 3, 3, 4, 3,
3, 4, 3, 4, 2)), class = "data.frame", row.names = c(NA, 60L))
Now that we defined the dataset, let's jump in with the code.
library(psych)
fit <- fa(data, nfactors = 4)
#> Loading required namespace: GPArotation
print(fit$loadings)
#>
#> [Loadings truncated for brevity]
#>
#> MR1 MR2 MR3 MR4
#> SS loadings 9.464 3.571 2.171 1.682
#> Proportion Var 0.338 0.128 0.078 0.060
#> Cumulative Var 0.338 0.466 0.543 0.603
print(fit$Vaccounted, digits = 3)
#> MR1 MR2 MR3 MR4
#> SS loadings 10.392 4.328 2.324 1.8283
#> Proportion Var 0.371 0.155 0.083 0.0653
#> Cumulative Var 0.371 0.526 0.609 0.6740
Created on 2022-02-10 by the reprex package (v2.0.1)
We can see the values differ. Any ideas why?
https://www.researchgate.net/post/How_can_of_Variance_of_factors_in_exploratory_factor_analysis_be_calculated_when_factors_are_correlated
I am not familiar with factor analysis, but as shown here, it seems that SS loading cannot be calculated as a sum of squares because of inter-factor correlations when oblique rotation is used. Perhaps, fit$Vaccounted takes this problem into account but fit$loadings is simply the sum of squares. I think this difference appears.
Note that the default rotation in the fa package is oblimin which is obliqu rotation, so I think this difference will appear.
I would like to obtain the initial communalities for an exploratory factor analysis in R
(that is, the R squared of each item when predicted by the other items included in the analysis).
Is there a way to do this with either jmv::efa or psych::fa ?
I only see the uniqueness, which informs me of the communalities AFTER factor extraction (1-uniqueness)...
Thank you for your consideration : )
As you note, the initial communalities in a factor analysis are the squared multiple correlations (SMC) of each variable by the remaining variables. Using the built-in attitude dataset as an example they are easily calculated without additional packages via:
1 - 1 / diag(solve(cor(attitude)))
rating complaints privileges learning raises critical advance
0.7326020 0.7700868 0.3831176 0.6194561 0.6770498 0.1881465 0.5186447
The psych package includes the smc() function for convenience:
psych::smc(attitude)
rating complaints privileges learning raises critical advance
0.7326020 0.7700868 0.3831176 0.6194561 0.6770498 0.1881465 0.5186447
Dataset
Here is the dput for the data I am using, hereafter called hwk:
hwk <- structure(list(V1 = structure(c(4, 4, 2, 2, 2, 2, 2, 2, 4, 4,
2, 3, 2, 3, 4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4,
1, 2, 3, 2, 3, 1, 1, 2, 2, 4, 3, 2, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 4, 3, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4, 3, 2,
3, 3, 3, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2, 2, 3, 3), format.spss = "F8.0"),
V2 = structure(c(4, 4, 3, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 4,
3, 3, 2, 3, 4, 3, 1, 4, 2, 3, 4, 2, 4, 3, 3, 2, 3, 2, 3,
3, 4, 3, 3, 3, 3, 3, 3, 2, 4, 2, 2, 2, 4, 3, 4, 4, 2, 4,
2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
3, 4, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3, 2, 3, 3, 4, 3, 4, 3,
4, 4, 3), format.spss = "F8.0"), V3 = structure(c(4, 4, 4,
4, 4, 4, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V4 = structure(c(4, 4, 3, 4, 3, 4, 2, 1, 3, 2, 3, 1, 4, 4,
2, 3, 2, 2, 2, 4, 1, 2, 2, 2, 3, 2, 3, 2, 2, 1, 3, 1, 1,
2, 4, 1, 1, 2, 3, 2, 2, 1, 1, 1, 3, 2, 4, 3, 3, 3, 3, 3,
3, 4, 3, 1, 4, 3, 4, 3, 2, 3, 2, 1, 4, 1, 4, 1, 2, 4, 4,
4, 3, 3, 3, 2, 2, 1, 4, 3, 2, 3, 2, 1, 3, 4, 1, 2, 4, 3,
4, 2, 2), format.spss = "F8.0"), V5 = structure(c(3, 3, 3,
4, 3, 4, 3, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 3, 2,
2, 2, 2, 4, 2, 3, 2, 3, 4, 1, 4, 2, 3, 3, 2, 2, 3, 2, 2,
3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 2, 3,
2, 2, 3, 3, 3, 2, 3, 3, 3, 4, 3, 2, 3, 3, 3, 3, 3, 3, 4,
3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 4, 3, 3), format.spss = "F8.0"),
V6 = structure(c(4, 4, 3, 4, 3, 4, 4, 1, 3, 3, 3, 3, 2, 3,
4, 2, 4, 3, 3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 4, 3,
3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2, 4,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 4,
4, 3, 4, 4, 4, 4, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 4, 4), format.spss = "F8.0"), V7 = structure(c(4, 4, 2,
4, 2, 4, 4, 3, 3, 3, 2, 2, 4, 4, 3, 3, 1, 4, 3, 3, 1, 2,
4, 3, 4, 2, 4, 4, 3, 3, 2, 2, 3, 2, 4, 3, 3, 3, 3, 3, 3,
1, 4, 3, 2, 2, 4, 3, 4, 4, 2, 4, 2, 3, 4, 3, 3, 3, 4, 3,
4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 3, 3, 4, 3, 4,
3, 3, 3, 3, 2, 2, 4, 4, 4, 4, 2, 4, 4, 3), format.spss = "F8.0"),
V8 = structure(c(4, 4, 2, 1, 2, 1, 1, 1, 3, 3, 2, 3, 2, 3,
4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4, 1, 2,
3, 2, 3, 1, 1, 2, 2, 3, 1, 1, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 3, 4, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4,
3, 4, 4, 4, 4, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2,
1, 3, 4), format.spss = "F8.0"), V9 = structure(c(4, 4, 4,
4, 4, 4, 4, 4, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 4, 4,
4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 2, 4, 3, 4,
4, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 4, 3, 4, 3, 2, 4,
3, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4,
4, 3, 4, 4, 4, 4, 4, 3, 2, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V10 = structure(c(4, 4, 2, 4, 2, 4, 3, 2, 3, 3, 3, 2, 4,
4, 2, 2, 1, 3, 4, 4, 1, 4, 2, 3, 3, 2, 4, 3, 2, 3, 3, 1,
3, 2, 4, 3, 2, 3, 3, 3, 3, 1, 2, 4, 2, 3, 4, 4, 3, 3, 2,
4, 2, 4, 3, 3, 4, 3, 4, 3, 4, 4, 4, 1, 4, 3, 3, 4, 3, 4,
4, 3, 3, 3, 3, 3, 4, 1, 4, 3, 3, 3, 3, 2, 3, 4, 4, 2, 4,
2, 4, 4, 3), format.spss = "F8.0"), V11 = structure(c(3,
3, 1, 4, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 2, 1,
2, 3, 1, 2, 3, 3, 2, 1, 2, 2, 2, 3, 2, 2, 3, 2, 1, 2, 2,
1, 1, 4, 3, 1, 3, 2, 3, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 3,
2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 3, 4, 2, 1, 2, 2, 3, 3, 3,
3, 4, 3, 2, 3, 3, 2, 2, 2, 2, 1, 3, 1, 4, 1, 3), format.spss = "F8.0"),
V12 = structure(c(4, 4, 3, 2, 3, 2, 3, 1, 3, 3, 3, 3, 2,
3, 3, 2, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3, 4, 3, 4, 4,
3, 3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2,
4, 3, 3, 3, 3, 4, 3, 3, 2, 2, 1, 1, 3, 1, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
3, 2, 3, 4), format.spss = "F8.0")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -93L))
EFA
I did some research after my initial answer and it appears that there is a package for this called EFA Tools. There is a function called EFA that allows you to specify that you want the initial communalities. First, run the library and the EFA itself below:
# Load EFA Tools library:
library(EFAtools)
# Run EFA:
hwkfa <- EFA(hwk,
n_factors = 3,
start_method = "psych",
method = "PAF",
rotation = "promax",
init_comm = "smc", # selected initial communalities
type = "SPSS")
Obtaining initial communalities:
Then from there you can simply select the initial communalities by using the following code:
hwkfa$h2_init
Which gives you the following vector of output:
V1 V2 V3 V4 V5 V6 V7
0.8034001 0.5583605 0.5487691 0.3255253 0.5685402 0.4643686 0.5227481
V8 V9 V10 V11 V12
0.8050573 0.3474202 0.5564858 0.3496354 0.3783390
I ran the same thing in SPSS and got matching values:
I want to calculate the shortest path between two proteins using two dataframes. For example, I want to calculate the shortest path of first from the first list and the first from the seconds list, the first from the first list and the second from the second list, etc.
structure(list(LAS1L = c("FKBP4", "RBM6", "UPF1", "SLC25A5",
"DHX33", "ELAC2", "CCDC124", "RPS20", "CSDE1", "AKAP8L", "UTP18",
"PTBP1", "DCN", "MATR3", "SAMD4A", "AQR", "STRAP", "SEC63", "BCLAF1",
"TFB1M", "GRN", "ZCCHC8", "NSUN2", "SKIV2L2", "STAU2", "CTNNA1",
"YTHDC2", "POLR2B", "TPR", "MAP4", "NOP16", "FAM120A", "R3HDM1",
"PTCD2", "RRP12", "MRTO4", "THRAP3", "NOP58", "USP36", "MLL3",
"PUM2", "MRPL43", "ZFR", "RC3H2", "ZC3H11A", "PARP12", "ALDH18A1",
"CSDA", "CCAR1")), class = "data.frame", row.names = c(NA, -49L
))
structure(list(GNL3L = c("FMR1", "FRAXA", "UBA1", "CSTF2", "MECP2",
"PHF6", "RBM10", "GSPT2", "SLC25A5", "EIF1AX", "NKRF", "RPS4X",
"RBMX2", "HTATSF1", "LAS1L", "MBNL3", "HUWE1", "RPL10", "RPL15",
"RBMX", "NONO", "RPGR", "UPF3B", "RBM3", "HNRNPH2", "UTP14A",
"DKC1", "MEX3C", "DDX3X", "FLNA", "FAM120C")), class = "data.frame", row.names = c(NA,
-31L))
So far, I just come out with this.
sp<-shortest_path[protein1[,1],protein2[,1]]
dput for shortest_path:
structure(c(0, 4, 6, 4, 4, 4, 4, 3, 3, 3, 5, 3, 5, 3, 3, 3, 4,
3, 3, 3, 4, 0, 5, 4, 4, 4, 4, 3, 3, 3, 5, 3, 5, 3, 3, 3, 4, 3,
3, 3, 6, 5, 0, 6, 4, 6, 5, 5, 5, 5, 7, 5, 6, 5, 5, 3, 6, 5, 5,
5, 4, 4, 6, 0, 3, 3, 3, 3, 3, 3, 4, 3, 5, 3, 3, 3, 4, 3, 3, 3,
4, 4, 4, 3, 0, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 3, 4, 3, 3, 3, 4,
4, 6, 3, 4, 0, 3, 3, 3, 3, 5, 3, 5, 3, 3, 3, 4, 3, 3, 3, 4, 4,
5, 3, 3, 3, 0, 3, 3, 2, 3, 3, 5, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5,
3, 3, 3, 3, 0, 2, 2, 4, 2, 4, 2, 2, 2, 3, 2, 2, 2, 3, 3, 5, 3,
3, 3, 3, 2, 0, 2, 4, 2, 4, 2, 2, 2, 3, 2, 2, 2, 3, 3, 5, 3, 3,
3, 2, 2, 2, 0, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 5, 5, 7, 4, 4, 5,
3, 4, 4, 2, 0, 4, 6, 4, 4, 4, 5, 4, 4, 4, 3, 3, 5, 3, 3, 3, 3,
2, 2, 2, 4, 0, 4, 2, 2, 2, 3, 2, 2, 2, 5, 5, 6, 5, 5, 5, 5, 4,
4, 4, 6, 4, 0, 4, 4, 4, 5, 4, 4, 4, 3, 3, 5, 3, 3, 3, 3, 2, 2,
2, 4, 2, 4, 0, 2, 2, 3, 2, 2, 2, 3, 3, 5, 3, 3, 3, 3, 2, 2, 2,
4, 2, 4, 2, 0, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 4,
2, 4, 2, 2, 0, 3, 2, 2, 2, 4, 4, 6, 4, 4, 4, 4, 3, 3, 3, 5, 3,
5, 3, 3, 3, 0, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 2, 2, 2, 4, 2, 4,
2, 2, 2, 3, 0, 1, 2, 3, 3, 5, 3, 3, 3, 3, 2, 2, 2, 4, 2, 4, 2,
2, 2, 3, 1, 0, 2, 3, 3, 5, 3, 3, 3, 3, 2, 2, 2, 4, 2, 4, 2, 2,
2, 3, 2, 2, 0), .Dim = c(20L, 20L), .Dimnames = list(c("1810055G02Rik",
"2810046L04Rik", "4922501C03Rik", "4930572J05Rik", "9830001H06Rik",
"A1CF", "A2M", "AAGAB", "AATF", "ABCA1", "ABCA13", "ABCA2", "ABCA4",
"ABCB1", "ABCB7", "ABCC2", "ABCC8", "ABCD1", "ABCD3", "ABCD4"
), c("1810055G02Rik", "2810046L04Rik", "4922501C03Rik", "4930572J05Rik",
"9830001H06Rik", "A1CF", "A2M", "AAGAB", "AATF", "ABCA1", "ABCA13",
"ABCA2", "ABCA4", "ABCB1", "ABCB7", "ABCC2", "ABCC8", "ABCD1",
"ABCD3", "ABCD4")))
Thanks in advance!
Maybe you can try the code below
outer(protein1$LAS1L, protein2$GNL3L, FUN = function(x, y) shortest_path[x, y])
I'm currently trying to develop a similar result as this link. I have a significant number of columns and several different labels for the x-axis.
col1 <- c(2, 4, 1, 2, 5, 1, 2, 0, 1, 4, 4, 3, 5, 2, 4, 3, 3, 6, 5, 3, 6, 4, 3, 4, 4, 3, 4,
2, 4, 3, 3, 5, 3, 5, 5, 0, 0, 3, 3, 6, 5, 4, 4, 1, 3, 3, 2, 0, 5, 3, 6, 6, 2, 3,
3, 1, 5, 3, 4, 6)
col2 <- c(2, 4, 4, 0, 4, 4, 4, 4, 1, 4, 4, 3, 5, 0, 4, 5, 3, 6, 5, 3, 6, 4, 4, 2, 4, 4, 4,
1, 1, 2, 2, 3, 3, 5, 0, 3, 4, 2, 4, 5, 5, 4, 4, 2, 3, 5, 2, 6, 5, 2, 4, 6, 3, 3,
3, 1, 4, 3, 5, 4)
col3 <- c(2, 5, 4, 1, 4, 2, 3, 0, 1, 3, 4, 2, 5, 1, 4, 3, 4, 6, 3, 4, 6, 4, 1, 3, 5, 4, 3,
2, 1, 3, 2, 2, 2, 4, 0, 1, 4, 4, 3, 5, 3, 2, 5, 2, 3, 3, 4, 2, 4, 2, 4, 5, 1, 3,
3, 3, 4, 3, 5, 4)
col4 <- c(2, 5, 2, 1, 4, 1, 3, 4, 1, 3, 5, 2, 4, 3, 5, 3, 4, 6, 3, 4, 6, 4, 3, 2, 5, 5, 4,
2, 3, 2, 2, 3, 3, 4, 0, 1, 4, 3, 3, 5, 4, 4, 4, 3, 3, 5, 4, 3, 5, 3, 6, 6, 4, 2,
3, 3, 4, 4, 4, 6)
data2 <- data.frame(col1,col2,col3,col4)
data2[,1:4] <- lapply(data2[,1:4], as.factor)
colnames(data2)<- c("A","B","C", "D")
> x.axis.list
[[1]]
expression(beta[paste(1, ",", 1L)])
[[2]]
expression(beta[paste(1, ",", 2L)])
[[3]]
expression(beta[paste(1, ",", 3L)])
[[4]]
expression(beta[paste(1, ",", 4L)])
myplots <- vector('list', ncol(data2))
for (i in seq_along(data2)) {
message(i)
myplots[[i]] <- local({
i <- i
p1 <- ggplot(data2, aes(x = data2[[i]])) +
geom_histogram(fill = "lightgreen") +
xlab(x.axis.list[[i]])
print(p1)
})
}
In the past, I've been able to do something similar to this where I can just put x.axis.list[[i]] in my loop and change the symbols. However, I continue to get the term expression on the axis. So the symbol for Beta is correct as well as the subscript but the word "expression" remains. I'm not sure exactly what I'm doing wrong, for a moment, I was able to produce a plot without "expression" but it has since stayed in the ggplot.
I want to be able to produce this plot, or one with the title on the y-axis without the word "expression".
My image currently looks . I'm not worried about this example data and the result of the plot, I'm wondering how to get rid of "expression" so only the math symbol shows.
Thanks in advance.
You can do:
for (i in seq_along(data2)) {
df <- data2[i]
names(df)[1] <- "x"
myplots[[i]] <- local({
p1 <- ggplot(df, aes(x = x)) +
geom_bar(fill = "lightgreen", stat = "count") +
xlab(x.axis.list[[i]])
})
}
And we can show all the plots together:
library(patchwork)
(myplots[[1]] + myplots[[2]]) / (myplots[[3]] + myplots[[4]])
Note I created the expression list like this:
x.axis.list <- lapply(1:4, function(i){
parse(text = paste0("beta[paste(1, \",\", ", i, ")]"))
})
I have a 5 x 5 scatterplot matrix that I created using ggplot. I made histograms for X and Y axis, but I needed an additional histogram for the diagonals of the matrix as well.
Edited for data
data <- structure(c(5, 5, 5, 3, 4, 4, 2, 4, 4, 4, 5, 4, 5, 4, 5, 1, 4,
3, 5, 4, 5, 2, 3, 3, 3, 4, 2, 5, 2, 4, 3, 3, 3, 3, 5, 4, 3, 4,
4, 4, 3, 3, 5, 3, 1, 3, 4, 5, 5, 3, 2, 4, 5, 4, 4, 5, 3, 5, 1,
3, 4, 5, 3, 2, 4, 3, 4, 1, 4, 3, 5, 2, 3, 3, 4, 5, 5, 5, 4, 3,
1, 1, 4, 2, 5, 4, 4, 1, 5, 3, 4, 2, 4, 3, 4, 4, 5, 4, 5, 1, 4,
5, 5, 5, 3, 4, 4, 2, 4, 4, 4, 5, 4, 5, 4, 5, 1, 4, 3, 5, 4, 5,
2, 3, 3, 3, 4, 2, 5, 2, 4, 3, 3, 3, 3, 5, 4, 3, 4, 4, 4, 3, 3,
5, 3, 1, 3, 4, 5, 5, 3, 2, 4, 5, 4, 4, 5, 3, 5, 1, 3, 3, 5, 2,
1, 1, 4, 5, 4, 5, 1, 1, 5, 4, 5, 3, 1, 3, 5, 5, 5, 5, 2, 1, 1,
1, 2, 3, 5, 1, 2, 5, 3, 5, 4, 5, 2, 2, 5, 2, 3, 5), .Dim = c(101L,
2L))
Here is the code
library(ggplot2)
library(gridExtra)
data <- as.data.frame(data)
x <- data$V2
y <- data$V1
xhist <- qplot(x, geom="histogram", binwidth = 0.5)
yhist <- qplot(y, geom="histogram", binwidth = 0.5) + coord_flip()
none <- ggplot()+geom_point(aes(1,1), colour="white") +
theme(axis.ticks=element_blank(), panel.background=element_blank(),
axis.text.x=element_blank(), axis.text.y=element_blank(),
axis.title.x=element_blank(), axis.title.y=element_blank())
g1 <- ggplot(data, aes(x,y)) +
geom_point(size = 1, position = position_jitter(w=0.3, h=0.3))
grid.arrange(yhist, g1, none, xhist, ncol=2, nrow=2, widths=c(1, 4), heights=c(4,1))
Is there a way to directly plot z-axis histogram from this data alone? What I want is to remove the panel of 'none', and instead place a histogram for data points across the diagonal.