I am getting different values for variance accounted for by factors in factor analysis whether I check them with fit$loadings or with fit$Vaccounted. I am using the psych package with the fa() function. Why would that be the case if they're supposed to be exactly the same thing (I guess they're not or that they are calculated differently)?
The total difference is not huge, but still not trivial (about 0.7 for cumulative). I have a reprex below.
(I'm sorry for the large dataset, I was not able to replicate the issue with different datasets or a subset, so it might have to do with something funky with the data.)
data <- structure(list(X1 = c(5, 5, 5, 7, 2, 2, 2, 2, 7, 5, 4, 9, 8,
8, 6, 9, 9, 2, 2, 2, 2, 3, 2, 2, 9, 7, 8, 4, 3, 4, 6, 6, 3, 4,
4, 4, 8, 7, 6, 7, 5, 6, 6, 4, 8, 8, 8, 3, 9, 9, 6, 4, 8, 7, 8,
7, 8, 8, 8, 8), X2 = c(6, 4, 4, 6, 2, 2, 2, 2, 6, 5, 4, 8, 7,
9, 6, 9, 4, 2, 2, 2, 6, 4, 6, 7, 9, 6, 8, 4, 3, 3, 5, 5, 2, 3,
4, 7, 7, 5, 5, 6, 7, 7, 7, 3, 8, 5, 3, 2, 9, 9, 4, 4, 4, 6, 4,
4, 8, 8, 8, 8), X3 = c(7, 5, 4, 7, 2, 2, 2, 2, 7, 5, 3, 7, 8,
9, 7, 9, 2, 2, 2, 2, 4, 2, 5, 4, 9, 6, 8, 4, 3, 2, 4, 5, 3, 2,
2, 7, 7, 6, 6, 5, 7, 7, 7, 4, 8, 7, 3, 2, 9, 9, 4, 3, 4, 4, 5,
5, 8, 7, 7, 7), X5 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 4, 3, 7, 7,
9, 6, 9, 2, 2, 2, 2, 2, 2, 4, 4, 9, 8, 6, 5, 2, 2, 4, 3, 2, 2,
4, 7, 7, 6, 5, 6, 7, 7, 7, 3, 4, 5, 3, 2, 9, 9, 4, 2, 4, 4, 4,
5, 8, 4, 6, 5), X6 = c(8, 4, 3, 8, 3, 2, 2, 2, 6, 5, 3, 7, 9,
9, 7, 9, 2, 2, 2, 2, 6, 4, 6, 5, 8, 7, 6, 3, 2, 2, 2, 2, 4, 5,
8, 8, 8, 2, 3, 4, 8, 8, 5, 3, 2, 2, 2, 2, 9, 9, 4, 4, 4, 4, 4,
4, 5, 3, 4, 5), X7 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 6,
7, 4, 6, 2, 2, 2, 2, 2, 2, 4, 2, 7, 4, 8, 2, 2, 2, 4, 3, 3, 3,
2, 5, 8, 4, 6, 7, 6, 6, 4, 2, 4, 8, 7, 2, 8, 8, 3, 3, 5, 5, 6,
6, 5, 8, 8, 8), X8 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 5,
7, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 3, 7, 3, 2, 2, 4, 2, 2, 2,
2, 4, 7, 4, 4, 6, 6, 6, 5, 2, 2, 7, 3, 2, 8, 7, 3, 3, 4, 5, 5,
5, 4, 6, 8, 8), X10 = c(9, 9, 9, 8, 9, 9, 9, 9, 4, 6, 8, 3, 6,
5, 6, 4, 9, 9, 9, 9, 8, 7, 8, 8, 2, 8, 3, 9, 9, 9, 9, 7, 7, 8,
7, 7, 4, 3, 7, 6, 9, 6, 9, 9, 9, 9, 9, 9, 4, 4, 8, 9, 9, 6, 8,
8, 9, 9, 9, 9), X11 = c(5, 6, 4, 7, 2, 3, 2, 3, 7, 6, 2, 3, 8,
7, 6, 7, 2, 2, 2, 2, 3, 2, 2, 3, 9, 4, 8, 2, 2, 2, 6, 5, 3, 2,
2, 2, 5, 7, 4, 6, 8, 5, 8, 2, 7, 7, 2, 2, 8, 8, 4, 4, 5, 4, 5,
4, 5, 3, 5, 3), X12 = c(8, 6, 4, 6, 2, 2, 2, 2, 2, 5, 2, 2, 3,
3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 4, 4, 2, 2, 3, 6, 2, 3,
3, 3, 4, 4, 8, 7, 5, 8, 6, 4, 5, 8, 2, 2, 2, 4, 4, 3, 5, 5, 4,
4, 7, 4, 6, 6), X13 = c(9, 8, 8, 8, 2, 2, 2, 2, 3, 5, 3, 2, 7,
5, 8, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 3, 3, 2, 2, 5, 6, 7, 7,
8, 6, 3, 4, 8, 6, 4, 6, 6, 6, 9, 9, 9, 4, 3, 5, 6, 8, 8, 8, 8,
9, 7, 8, 9, 9), X14 = c(7, 5, 6, 8, 2, 2, 2, 2, 7, 5, 3, 9, 8,
8, 6, 9, 2, 2, 2, 2, 5, 2, 3, 3, 9, 6, 8, 2, 5, 4, 6, 4, 4, 5,
5, 6, 6, 8, 3, 5, 9, 7, 6, 8, 9, 9, 4, 3, 9, 9, 4, 4, 6, 7, 6,
7, 8, 8, 8, 9), X15 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 5, 3, 8, 9,
7, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 4, 4, 5, 3,
4, 7, 2, 3, 5, 2, 6, 5, 6, 3, 4, 7, 5, 3, 8, 8, 3, 4, 5, 5, 6,
6, 8, 7, 6, 7), X16 = c(7, 6, 4, 6, 2, 3, 2, 2, 7, 5, 3, 8, 9,
9, 7, 9, 2, 2, 2, 2, 2, 2, 7, 5, 9, 7, 8, 2, 2, 2, 4, 4, 5, 4,
4, 6, 9, 8, 6, 6, 6, 5, 6, 3, 8, 7, 3, 3, 8, 8, 4, 4, 4, 5, 5,
5, 8, 7, 5, 7), X17 = c(9, 4, 3, 7, 3, 3, 2, 2, 2, 2, 2, 2, 9,
8, 7, 4, 2, 2, 2, 2, 2, 2, 2, 2, 9, 5, 8, 3, 2, 2, 7, 6, 4, 2,
3, 3, 4, 7, 6, 6, 8, 7, 7, 3, 2, 2, 3, 3, 2, 7, 5, 4, 4, 4, 4,
4, 4, 4, 4, 3), X18 = c(8, 5, 7, 7, 2, 2, 2, 2, 2, 5, 3, 7, 9,
8, 9, 9, 2, 2, 2, 2, 4, 4, 5, 3, 9, 8, 9, 3, 3, 2, 5, 4, 3, 4,
6, 5, 6, 8, 8, 8, 4, 5, 3, 2, 9, 8, 7, 3, 6, 8, 4, 2, 2, 4, 4,
3, 6, 4, 3, 6), X19 = c(4, 5, 7, 8, 2, 2, 2, 2, 7, 4, 3, 8, 9,
8, 7, 9, 2, 2, 2, 2, 2, 2, 4, 2, 9, 6, 8, 2, 2, 2, 5, 4, 3, 2,
2, 2, 8, 9, 3, 7, 6, 6, 2, 2, 8, 5, 2, 3, 7, 9, 3, 3, 5, 3, 4,
2, 7, 5, 4, 5), X20 = c(8, 7, 7, 7, 5, 6, 6, 6, 4, 3, 4, 4, 8,
5, 6, 7, 6, 6, 6, 6, 4, 2, 4, 4, 9, 4, 7, 6, 5, 5, 5, 5, 6, 6,
6, 6, 8, 5, 6, 5, 5, 3, 2, 2, 8, 9, 9, 9, 9, 9, 6, 7, 8, 8, 8,
9, 9, 8, 9, 8), X21 = c(9, 8, 7, 7, 4, 4, 5, 5, 9, 3, 8, 9, 9,
9, 9, 9, 4, 4, 4, 4, 8, 7, 7, 4, 9, 8, 9, 9, 4, 5, 5, 5, 5, 6,
5, 6, 9, 7, 7, 7, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 6, 8, 8, 8, 8,
9, 9, 8, 9, 9), X23 = c(4, 4, 3, 6, 3, 2, 2, 2, 2, 2, 2, 2, 6,
7, 4, 7, 3, 3, 3, 3, 3, 2, 2, 2, 7, 5, 7, 4, 2, 2, 2, 2, 4, 6,
8, 7, 4, 2, 5, 4, 2, 2, 2, 2, 2, 2, 2, 2, 8, 9, 5, 5, 4, 6, 5,
5, 5, 3, 5, 8), X24 = c(4, 3, 6, 3, 2, 2, 2, 4, 2, 2, 2, 2, 8,
8, 7, 7, 2, 2, 2, 2, 7, 8, 5, 5, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 7, 5, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 8, 2, 2, 2,
2, 2, 2, 2, 2), X25 = c(6, 6, 6, 7, 3, 5, 3, 3, 7, 5, 3, 5, 8,
8, 9, 9, 2, 2, 2, 2, 6, 7, 6, 5, 7, 2, 3, 2, 2, 2, 2, 2, 2, 3,
3, 4, 5, 4, 6, 6, 7, 9, 7, 4, 2, 2, 2, 2, 5, 6, 2, 9, 2, 5, 4,
3, 4, 3, 3, 6), X26 = c(8, 7, 5, 7, 3, 5, 3, 4, 4, 5, 3, 6, 7,
6, 7, 4, 2, 2, 2, 2, 2, 6, 5, 4, 2, 9, 9, 3, 2, 2, 2, 2, 4, 6,
7, 4, 5, 6, 8, 6, 6, 6, 7, 3, 3, 7, 5, 4, 4, 5, 3, 5, 4, 5, 5,
4, 4, 4, 5, 6), X28 = c(6, 4, 5, 6, 2, 2, 2, 2, 7, 4, 2, 5, 8,
6, 7, 5, 3, 3, 3, 3, 2, 2, 2, 2, 7, 4, 6, 2, 2, 2, 2, 2, 3, 3,
2, 4, 5, 7, 7, 6, 5, 3, 6, 5, 2, 8, 2, 2, 5, 5, 7, 7, 4, 4, 4,
5, 4, 3, 4, 7), X29 = c(5, 8, 6, 6, 9, 9, 9, 9, 5, 6, 9, 5, 3,
4, 4, 6, 8, 8, 8, 8, 9, 8, 9, 8, 5, 8, 8, 8, 8, 8, 6, 7, 6, 7,
7, 5, 4, 3, 4, 4, 6, 4, 6, 5, 8, 5, 8, 8, 7, 7, 4, 5, 7, 7, 6,
7, 8, 8, 9, 8), X30 = c(3, 3, 4, 5, 2, 2, 2, 2, 5, 4, 2, 5, 8,
7, 7, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 5, 6, 3, 3, 2, 2, 2, 2, 2,
4, 3, 7, 8, 7, 6, 2, 2, 2, 2, 2, 9, 3, 2, 4, 3, 6, 5, 3, 2, 4,
3, 2, 2, 2, 4), X32 = c(2, 3, 3, 3, 2, 4, 2, 3, 3, 2, 2, 6, 8,
7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 8, 5, 8, 2, 2, 2, 2, 2, 3, 2,
2, 3, 2, 6, 4, 6, 9, 9, 9, 5, 2, 9, 2, 2, 5, 4, 6, 7, 2, 2, 2,
2, 5, 6, 5, 6), X34 = c(2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 4,
3, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2,
6, 6, 2, 2, 3, 2, 6, 8, 7, 2, 2, 2, 3, 2, 6, 4, 3, 3, 3, 4, 3,
3, 4, 3, 4, 2)), class = "data.frame", row.names = c(NA, 60L))
Now that we defined the dataset, let's jump in with the code.
library(psych)
fit <- fa(data, nfactors = 4)
#> Loading required namespace: GPArotation
print(fit$loadings)
#>
#> [Loadings truncated for brevity]
#>
#> MR1 MR2 MR3 MR4
#> SS loadings 9.464 3.571 2.171 1.682
#> Proportion Var 0.338 0.128 0.078 0.060
#> Cumulative Var 0.338 0.466 0.543 0.603
print(fit$Vaccounted, digits = 3)
#> MR1 MR2 MR3 MR4
#> SS loadings 10.392 4.328 2.324 1.8283
#> Proportion Var 0.371 0.155 0.083 0.0653
#> Cumulative Var 0.371 0.526 0.609 0.6740
Created on 2022-02-10 by the reprex package (v2.0.1)
We can see the values differ. Any ideas why?
https://www.researchgate.net/post/How_can_of_Variance_of_factors_in_exploratory_factor_analysis_be_calculated_when_factors_are_correlated
I am not familiar with factor analysis, but as shown here, it seems that SS loading cannot be calculated as a sum of squares because of inter-factor correlations when oblique rotation is used. Perhaps, fit$Vaccounted takes this problem into account but fit$loadings is simply the sum of squares. I think this difference appears.
Note that the default rotation in the fa package is oblimin which is obliqu rotation, so I think this difference will appear.
The stem and leaf plot that I need to convert is given below-
24|9
23|
22|1
21|7
20|2, 2, 5, 5, 6, 9, 9, 9
19|0, 0, 0, 0, 0, 1, 1, 2, 4, 4, 5, 8
18|0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 9, 9, 9
17|1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 9
16|0, 0, 1, 1, 1, 1, 2, 4, 5, 5, 6, 6, 8, 8, 8, 8
15|0, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9
14|0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 9
13|0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9
12|1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9
11|0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 9, 9
10|0, 2, 3, 3, 3, 4, 4, 5, 7, 7, 8
9|0, 0, 9
8|6
Here's maybe one way. If your data looks like this
stem <- "24|9
23|
22|1
21|7
20|2, 2, 5, 5, 6, 9, 9, 9
19|0, 0, 0, 0, 0, 1, 1, 2, 4, 4, 5, 8
18|0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 9, 9, 9
17|1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 9
16|0, 0, 1, 1, 1, 1, 2, 4, 5, 5, 6, 6, 8, 8, 8, 8
15|0, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9
14|0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 9
13|0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9
12|1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9
11|0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 9, 9
10|0, 2, 3, 3, 3, 4, 4, 5, 7, 7, 8
9|0, 0, 9
8|6"
Then we can split up the rows and for each row we split by the pipe. Then we split the right side by commas and join each of those values to the value to the left of the pipe.
rows <- strsplit(stem,"\n")[[1]]
values <- unlist(lapply(strsplit(rows,"\\|"), function(x) {
end_digits <- strsplit(x[2], ", ")[[1]]
if (!all(is.na(end_digits))) {
paste0(x[1], end_digits)
} else {
NULL
}
}
))
This will return character values, but you could convert to numeric with
as.numeric(values)
Here is a different approach. Using #MrFlick's stem and rows objects:
rows <- strsplit(stem,"\n")[[1]]
rows.lst <- strsplit(rows,"\\|")
tens <- as.numeric(sapply(rows.lst, "[", 1)) * 10
ones <- sapply(strsplit(sapply(rows.lst, "[", 2), ","), as.numeric)
vals <- unlist(mapply("+", tens, ones))
vals <- vals[!is.na(vals)]