Difference between fit$loadings and fit$Vaccounted for variance accounted for in factor analysis? - r

I am getting different values for variance accounted for by factors in factor analysis whether I check them with fit$loadings or with fit$Vaccounted. I am using the psych package with the fa() function. Why would that be the case if they're supposed to be exactly the same thing (I guess they're not or that they are calculated differently)?
The total difference is not huge, but still not trivial (about 0.7 for cumulative). I have a reprex below.
(I'm sorry for the large dataset, I was not able to replicate the issue with different datasets or a subset, so it might have to do with something funky with the data.)
data <- structure(list(X1 = c(5, 5, 5, 7, 2, 2, 2, 2, 7, 5, 4, 9, 8,
8, 6, 9, 9, 2, 2, 2, 2, 3, 2, 2, 9, 7, 8, 4, 3, 4, 6, 6, 3, 4,
4, 4, 8, 7, 6, 7, 5, 6, 6, 4, 8, 8, 8, 3, 9, 9, 6, 4, 8, 7, 8,
7, 8, 8, 8, 8), X2 = c(6, 4, 4, 6, 2, 2, 2, 2, 6, 5, 4, 8, 7,
9, 6, 9, 4, 2, 2, 2, 6, 4, 6, 7, 9, 6, 8, 4, 3, 3, 5, 5, 2, 3,
4, 7, 7, 5, 5, 6, 7, 7, 7, 3, 8, 5, 3, 2, 9, 9, 4, 4, 4, 6, 4,
4, 8, 8, 8, 8), X3 = c(7, 5, 4, 7, 2, 2, 2, 2, 7, 5, 3, 7, 8,
9, 7, 9, 2, 2, 2, 2, 4, 2, 5, 4, 9, 6, 8, 4, 3, 2, 4, 5, 3, 2,
2, 7, 7, 6, 6, 5, 7, 7, 7, 4, 8, 7, 3, 2, 9, 9, 4, 3, 4, 4, 5,
5, 8, 7, 7, 7), X5 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 4, 3, 7, 7,
9, 6, 9, 2, 2, 2, 2, 2, 2, 4, 4, 9, 8, 6, 5, 2, 2, 4, 3, 2, 2,
4, 7, 7, 6, 5, 6, 7, 7, 7, 3, 4, 5, 3, 2, 9, 9, 4, 2, 4, 4, 4,
5, 8, 4, 6, 5), X6 = c(8, 4, 3, 8, 3, 2, 2, 2, 6, 5, 3, 7, 9,
9, 7, 9, 2, 2, 2, 2, 6, 4, 6, 5, 8, 7, 6, 3, 2, 2, 2, 2, 4, 5,
8, 8, 8, 2, 3, 4, 8, 8, 5, 3, 2, 2, 2, 2, 9, 9, 4, 4, 4, 4, 4,
4, 5, 3, 4, 5), X7 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 6,
7, 4, 6, 2, 2, 2, 2, 2, 2, 4, 2, 7, 4, 8, 2, 2, 2, 4, 3, 3, 3,
2, 5, 8, 4, 6, 7, 6, 6, 4, 2, 4, 8, 7, 2, 8, 8, 3, 3, 5, 5, 6,
6, 5, 8, 8, 8), X8 = c(6, 6, 4, 4, 2, 2, 2, 2, 7, 4, 3, 7, 5,
7, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 3, 7, 3, 2, 2, 4, 2, 2, 2,
2, 4, 7, 4, 4, 6, 6, 6, 5, 2, 2, 7, 3, 2, 8, 7, 3, 3, 4, 5, 5,
5, 4, 6, 8, 8), X10 = c(9, 9, 9, 8, 9, 9, 9, 9, 4, 6, 8, 3, 6,
5, 6, 4, 9, 9, 9, 9, 8, 7, 8, 8, 2, 8, 3, 9, 9, 9, 9, 7, 7, 8,
7, 7, 4, 3, 7, 6, 9, 6, 9, 9, 9, 9, 9, 9, 4, 4, 8, 9, 9, 6, 8,
8, 9, 9, 9, 9), X11 = c(5, 6, 4, 7, 2, 3, 2, 3, 7, 6, 2, 3, 8,
7, 6, 7, 2, 2, 2, 2, 3, 2, 2, 3, 9, 4, 8, 2, 2, 2, 6, 5, 3, 2,
2, 2, 5, 7, 4, 6, 8, 5, 8, 2, 7, 7, 2, 2, 8, 8, 4, 4, 5, 4, 5,
4, 5, 3, 5, 3), X12 = c(8, 6, 4, 6, 2, 2, 2, 2, 2, 5, 2, 2, 3,
3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 4, 4, 2, 2, 3, 6, 2, 3,
3, 3, 4, 4, 8, 7, 5, 8, 6, 4, 5, 8, 2, 2, 2, 4, 4, 3, 5, 5, 4,
4, 7, 4, 6, 6), X13 = c(9, 8, 8, 8, 2, 2, 2, 2, 3, 5, 3, 2, 7,
5, 8, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 3, 3, 2, 2, 5, 6, 7, 7,
8, 6, 3, 4, 8, 6, 4, 6, 6, 6, 9, 9, 9, 4, 3, 5, 6, 8, 8, 8, 8,
9, 7, 8, 9, 9), X14 = c(7, 5, 6, 8, 2, 2, 2, 2, 7, 5, 3, 9, 8,
8, 6, 9, 2, 2, 2, 2, 5, 2, 3, 3, 9, 6, 8, 2, 5, 4, 6, 4, 4, 5,
5, 6, 6, 8, 3, 5, 9, 7, 6, 8, 9, 9, 4, 3, 9, 9, 4, 4, 6, 7, 6,
7, 8, 8, 8, 9), X15 = c(7, 6, 4, 6, 2, 2, 2, 2, 6, 5, 3, 8, 9,
7, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 4, 4, 5, 3,
4, 7, 2, 3, 5, 2, 6, 5, 6, 3, 4, 7, 5, 3, 8, 8, 3, 4, 5, 5, 6,
6, 8, 7, 6, 7), X16 = c(7, 6, 4, 6, 2, 3, 2, 2, 7, 5, 3, 8, 9,
9, 7, 9, 2, 2, 2, 2, 2, 2, 7, 5, 9, 7, 8, 2, 2, 2, 4, 4, 5, 4,
4, 6, 9, 8, 6, 6, 6, 5, 6, 3, 8, 7, 3, 3, 8, 8, 4, 4, 4, 5, 5,
5, 8, 7, 5, 7), X17 = c(9, 4, 3, 7, 3, 3, 2, 2, 2, 2, 2, 2, 9,
8, 7, 4, 2, 2, 2, 2, 2, 2, 2, 2, 9, 5, 8, 3, 2, 2, 7, 6, 4, 2,
3, 3, 4, 7, 6, 6, 8, 7, 7, 3, 2, 2, 3, 3, 2, 7, 5, 4, 4, 4, 4,
4, 4, 4, 4, 3), X18 = c(8, 5, 7, 7, 2, 2, 2, 2, 2, 5, 3, 7, 9,
8, 9, 9, 2, 2, 2, 2, 4, 4, 5, 3, 9, 8, 9, 3, 3, 2, 5, 4, 3, 4,
6, 5, 6, 8, 8, 8, 4, 5, 3, 2, 9, 8, 7, 3, 6, 8, 4, 2, 2, 4, 4,
3, 6, 4, 3, 6), X19 = c(4, 5, 7, 8, 2, 2, 2, 2, 7, 4, 3, 8, 9,
8, 7, 9, 2, 2, 2, 2, 2, 2, 4, 2, 9, 6, 8, 2, 2, 2, 5, 4, 3, 2,
2, 2, 8, 9, 3, 7, 6, 6, 2, 2, 8, 5, 2, 3, 7, 9, 3, 3, 5, 3, 4,
2, 7, 5, 4, 5), X20 = c(8, 7, 7, 7, 5, 6, 6, 6, 4, 3, 4, 4, 8,
5, 6, 7, 6, 6, 6, 6, 4, 2, 4, 4, 9, 4, 7, 6, 5, 5, 5, 5, 6, 6,
6, 6, 8, 5, 6, 5, 5, 3, 2, 2, 8, 9, 9, 9, 9, 9, 6, 7, 8, 8, 8,
9, 9, 8, 9, 8), X21 = c(9, 8, 7, 7, 4, 4, 5, 5, 9, 3, 8, 9, 9,
9, 9, 9, 4, 4, 4, 4, 8, 7, 7, 4, 9, 8, 9, 9, 4, 5, 5, 5, 5, 6,
5, 6, 9, 7, 7, 7, 6, 6, 6, 6, 9, 9, 9, 9, 9, 9, 6, 8, 8, 8, 8,
9, 9, 8, 9, 9), X23 = c(4, 4, 3, 6, 3, 2, 2, 2, 2, 2, 2, 2, 6,
7, 4, 7, 3, 3, 3, 3, 3, 2, 2, 2, 7, 5, 7, 4, 2, 2, 2, 2, 4, 6,
8, 7, 4, 2, 5, 4, 2, 2, 2, 2, 2, 2, 2, 2, 8, 9, 5, 5, 4, 6, 5,
5, 5, 3, 5, 8), X24 = c(4, 3, 6, 3, 2, 2, 2, 4, 2, 2, 2, 2, 8,
8, 7, 7, 2, 2, 2, 2, 7, 8, 5, 5, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 7, 5, 6, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 8, 2, 2, 2,
2, 2, 2, 2, 2), X25 = c(6, 6, 6, 7, 3, 5, 3, 3, 7, 5, 3, 5, 8,
8, 9, 9, 2, 2, 2, 2, 6, 7, 6, 5, 7, 2, 3, 2, 2, 2, 2, 2, 2, 3,
3, 4, 5, 4, 6, 6, 7, 9, 7, 4, 2, 2, 2, 2, 5, 6, 2, 9, 2, 5, 4,
3, 4, 3, 3, 6), X26 = c(8, 7, 5, 7, 3, 5, 3, 4, 4, 5, 3, 6, 7,
6, 7, 4, 2, 2, 2, 2, 2, 6, 5, 4, 2, 9, 9, 3, 2, 2, 2, 2, 4, 6,
7, 4, 5, 6, 8, 6, 6, 6, 7, 3, 3, 7, 5, 4, 4, 5, 3, 5, 4, 5, 5,
4, 4, 4, 5, 6), X28 = c(6, 4, 5, 6, 2, 2, 2, 2, 7, 4, 2, 5, 8,
6, 7, 5, 3, 3, 3, 3, 2, 2, 2, 2, 7, 4, 6, 2, 2, 2, 2, 2, 3, 3,
2, 4, 5, 7, 7, 6, 5, 3, 6, 5, 2, 8, 2, 2, 5, 5, 7, 7, 4, 4, 4,
5, 4, 3, 4, 7), X29 = c(5, 8, 6, 6, 9, 9, 9, 9, 5, 6, 9, 5, 3,
4, 4, 6, 8, 8, 8, 8, 9, 8, 9, 8, 5, 8, 8, 8, 8, 8, 6, 7, 6, 7,
7, 5, 4, 3, 4, 4, 6, 4, 6, 5, 8, 5, 8, 8, 7, 7, 4, 5, 7, 7, 6,
7, 8, 8, 9, 8), X30 = c(3, 3, 4, 5, 2, 2, 2, 2, 5, 4, 2, 5, 8,
7, 7, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 5, 6, 3, 3, 2, 2, 2, 2, 2,
4, 3, 7, 8, 7, 6, 2, 2, 2, 2, 2, 9, 3, 2, 4, 3, 6, 5, 3, 2, 4,
3, 2, 2, 2, 4), X32 = c(2, 3, 3, 3, 2, 4, 2, 3, 3, 2, 2, 6, 8,
7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 8, 5, 8, 2, 2, 2, 2, 2, 3, 2,
2, 3, 2, 6, 4, 6, 9, 9, 9, 5, 2, 9, 2, 2, 5, 4, 6, 7, 2, 2, 2,
2, 5, 6, 5, 6), X34 = c(2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 4,
3, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2,
6, 6, 2, 2, 3, 2, 6, 8, 7, 2, 2, 2, 3, 2, 6, 4, 3, 3, 3, 4, 3,
3, 4, 3, 4, 2)), class = "data.frame", row.names = c(NA, 60L))
Now that we defined the dataset, let's jump in with the code.
library(psych)
fit <- fa(data, nfactors = 4)
#> Loading required namespace: GPArotation
print(fit$loadings)
#>
#> [Loadings truncated for brevity]
#>
#> MR1 MR2 MR3 MR4
#> SS loadings 9.464 3.571 2.171 1.682
#> Proportion Var 0.338 0.128 0.078 0.060
#> Cumulative Var 0.338 0.466 0.543 0.603
print(fit$Vaccounted, digits = 3)
#> MR1 MR2 MR3 MR4
#> SS loadings 10.392 4.328 2.324 1.8283
#> Proportion Var 0.371 0.155 0.083 0.0653
#> Cumulative Var 0.371 0.526 0.609 0.6740
Created on 2022-02-10 by the reprex package (v2.0.1)
We can see the values differ. Any ideas why?

https://www.researchgate.net/post/How_can_of_Variance_of_factors_in_exploratory_factor_analysis_be_calculated_when_factors_are_correlated
I am not familiar with factor analysis, but as shown here, it seems that SS loading cannot be calculated as a sum of squares because of inter-factor correlations when oblique rotation is used. Perhaps, fit$Vaccounted takes this problem into account but fit$loadings is simply the sum of squares. I think this difference appears.
Note that the default rotation in the fa package is oblimin which is obliqu rotation, so I think this difference will appear.

Related

r restructure/ stack wide 'boxy' data into one long table

I have a table that is downloaded from Excel. The structure looks like this:
excel_table <- tribble(
~hour, ~day, ~value_1, ~value_2, ~value_3, ~value_4, ~day, ~value_1, ~value_2, ~value_3, ~value_4, ~day, ~value_1, ~value_2, ~value_3, ~value_4,
"10am", "11-03-2021", 2, 3, 4, 5, "11-10-2021", 2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"11am", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"12pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"1pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"2pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"3pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"4pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"5pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"6pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"7pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5,
"8pm", "11-03-2021", 2, 3, 4, 5, "11-10-2021"2, 3, 4, 5, "11-17-2021", 2, 3, 4, 5
)
This is the output that I am looking for:
excel_table <- tribble(
~hour, ~day, ~value_1, ~value_2, ~value_3, ~value_4,
"10am", "11-03-2021", 2, 3, 4, 5,
"11am", "11-03-2021", 2, 3, 4, 5,
"12pm", "11-03-2021", 2, 3, 4, 5,
"1pm", "11-03-2021", 2, 3, 4, 5,
"2pm", "11-03-2021", 2, 3, 4, 5,
"3pm", "11-03-2021", 2, 3, 4, 5,
"4pm", "11-03-2021", 2, 3, 4, 5,
"5pm", "11-03-2021", 2, 3, 4, 5,
"6pm", "11-03-2021", 2, 3, 4, 5,
"7pm", "11-03-2021", 2, 3, 4, 5,
"8pm", "11-03-2021", 2, 3, 4, 5,
"10am", "11-10-2021", 2, 3, 4, 5,
"11am", "11-10-2021", 2, 3, 4, 5,
"12pm", "11-10-2021", 2, 3, 4, 5,
"1pm", "11-10-2021", 2, 3, 4, 5,
"2pm", "11-10-2021", 2, 3, 4, 5,
"3pm", "11-10-2021", 2, 3, 4, 5,
"4pm", "11-10-2021", 2, 3, 4, 5,
"5pm", "11-10-2021", 2, 3, 4, 5,
"6pm", "11-10-2021", 2, 3, 4, 5,
"7pm", "11-10-2021", 2, 3, 4, 5,
"8pm", "11-10-2021", 2, 3, 4, 5,
"10am", "11-17-2021", 2, 3, 4, 5,
"11am", "11-17-2021", 2, 3, 4, 5,
"12pm", "11-17-2021", 2, 3, 4, 5,
"1pm", "11-17-2021", 2, 3, 4, 5,
"2pm", "11-17-2021", 2, 3, 4, 5,
"3pm", "11-17-2021", 2, 3, 4, 5,
"4pm", "11-17-2021", 2, 3, 4, 5,
"5pm", "11-17-2021", 2, 3, 4, 5,
"6pm", "11-17-2021", 2, 3, 4, 5,
"7pm", "11-17-2021", 2, 3, 4, 5,
"8pm", "11-17-2021", 2, 3, 4, 5,
)
My first attempts were using tidyr::gather or tidyr::pivot_longer but that didn't get a good result and I won't reproduce that attempt here because it wasn't the right approach. Then it occurred to me that I could just cut off the columns into new dataframes and then use rbind() or dplyr::bind_rows() to stack the rows on top of each other where the columns match. So I started down that road but it's not such a good road to go down because I was timing myself and it would take way too long. The table I'm working with has more than the three dates; it has many years worth of data.
Is there a solution where I can restructure this data? I'm looking to preserve the first six columns and then stack the next five columns on the bottom of the rows, and then the next five on the bottom of that (and also I'm hoping to repeat the first column that says 'hour' all the way down)

How to properly set rendering Rmarkdown to pdf?

I use Rmarkdown to generate reports and if my line is too long it is usually cut after rendering.
Is there a way to fix it?
I attach a screenshot in order better explain my issue.
You can use the chunk option tidy=TRUE to automatically insert line breaks in the code.
---
output: pdf_document
---
```{r, tidy = TRUE}
c(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0)
```
The linebreaks are inserted by formatR::tidy_source(). See https://yihui.org/knitr/options/#code-decoration for more details.
chunk_content <- "c(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0)"
formatR::tidy_source(text = chunk_content, width.cutoff = 30)
#> c(1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
#> 1, 2, 3, 4, 5, 6, 7, 8, 9,
#> 0, 1, 2, 3, 4, 5, 6, 7, 8,
#> 9, 0, 1, 2, 3, 4, 5, 6, 7,
#> 8, 9, 0)

How can I add edges into an existing plot?

I am wanting to plot graph clusters that I define by myself. I am using the simplified undirected enron data.
library(igraphdata)
data("enron")
g <- as.undirected(enron)
g <- simplify(g)
rm("enron")
member <- c(1, 8, 9, 9, 10, 10, 8, 7, 4, 1, 2, 6, 3, 1, 2, 8, 7, 2, 1, 5,
1, 7, 6, 4, 8, 4, 8, 10, 3, 6, 1, 4, 7, 4, 3, 7, 9, 10, 3, 8, 1,
9, 8, 2, 7, 2, 9, 5, 1, 2, 6, 10, 3, 3, 2, 1, 9, 10, 3, 5, 6, 5,
5, 3, 7, 6, 9, 10, 8, 10, 8, 8, 10, 10, 10, 8, 7, 7, 9, 1, 9, 2, 9,
7, 2, 7, 7, 3, 2, 5, 2, 1, 6, 5, 10, 4, 3, 2, 4, 6, 4, 9, 5, 4,
1, 10, 2, 3, 4, 3, 6, 3, 6, 4, 6, 8, 2, 4, 5, 1, 5, 1, 4, 10, 4, 7,
5, 9, 10, 1, 2, 1, 5, 7, 5, 3, 5, 8, 7, 9, 5, 8, 1, 5, 3, 3, 3, 10,
1, 7, 8, 4, 1, 10, 9, 6, 9, 9, 4, 2, 6, 4, 6, 3, 5, 6, 9, 7, 6, 6,
4, 8, 6, 8, 8, 2, 5, 4, 3, 2, 9, 10, 2, 7)
I have tried many ways but none looks good. The best I can make is
edges_data_frame <- get.data.frame(g, what = "edges")
w.mem <- rep(0, length(E(g)))
for (i in 1:length(E(g))){
w.mem[i] <- ifelse(member[edges_data_frame$from[i]] == member[edges_data_frame$to[i]], 500, 1)
}
mem <- make_clusters(g,member)
E(g)$weight <- w.mem
colors <- rainbow(max(membership(mem)))
layout <- layout.fruchterman.reingold(g, weights=w.mem)
set.seed(1234)
plot(g, vertex.color=colors[mem$membership],
mark.groups=communities(mem),
vertex.label = NA,
edge.width = 1, edge.color = "lightgray", vertex.size = 5)
my first trial
I found that the "deleting edges plot" looks much cleaner
coGrph <- delete_edges(g, E(g)[crossing(mem, g)])
col_vector <- c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000')
temp <- sapply(1:length(V(g)), FUN = function(i) {col_vector[member[i]]})
V(coGrph)$color <- temp
plot(coGrph, vertex.label = NA, vertex.size = 5)
my second trial
However, this plot has some missing edges and does not reflect the true connection of the plot. I want to use this plot and add the deleted edges back to the plot without changing the positions I have right now. Is it possible?
Thank you very much I really appreciate your help.
Yes. Use your coGrph to create a layout, but then plot the original graph.
Continuing your "second trial"
set.seed(1234)
LOcG = layout_nicely(coGrph)
V(g)$color <- temp
plot(g, layout=LOcG, vertex.label = NA, vertex.size = 5)

How to convert a stem and leaf plot into a data set in R?

The stem and leaf plot that I need to convert is given below-
24|9
23|
22|1
21|7
20|2, 2, 5, 5, 6, 9, 9, 9
19|0, 0, 0, 0, 0, 1, 1, 2, 4, 4, 5, 8
18|0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 9, 9, 9
17|1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 9
16|0, 0, 1, 1, 1, 1, 2, 4, 5, 5, 6, 6, 8, 8, 8, 8
15|0, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9
14|0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 9
13|0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9
12|1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9
11|0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 9, 9
10|0, 2, 3, 3, 3, 4, 4, 5, 7, 7, 8
9|0, 0, 9
8|6
Here's maybe one way. If your data looks like this
stem <- "24|9
23|
22|1
21|7
20|2, 2, 5, 5, 6, 9, 9, 9
19|0, 0, 0, 0, 0, 1, 1, 2, 4, 4, 5, 8
18|0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 9, 9, 9
17|1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 9
16|0, 0, 1, 1, 1, 1, 2, 4, 5, 5, 6, 6, 8, 8, 8, 8
15|0, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9
14|0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 9
13|0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9
12|1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9
11|0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 9, 9
10|0, 2, 3, 3, 3, 4, 4, 5, 7, 7, 8
9|0, 0, 9
8|6"
Then we can split up the rows and for each row we split by the pipe. Then we split the right side by commas and join each of those values to the value to the left of the pipe.
rows <- strsplit(stem,"\n")[[1]]
values <- unlist(lapply(strsplit(rows,"\\|"), function(x) {
end_digits <- strsplit(x[2], ", ")[[1]]
if (!all(is.na(end_digits))) {
paste0(x[1], end_digits)
} else {
NULL
}
}
))
This will return character values, but you could convert to numeric with
as.numeric(values)
Here is a different approach. Using #MrFlick's stem and rows objects:
rows <- strsplit(stem,"\n")[[1]]
rows.lst <- strsplit(rows,"\\|")
tens <- as.numeric(sapply(rows.lst, "[", 1)) * 10
ones <- sapply(strsplit(sapply(rows.lst, "[", 2), ","), as.numeric)
vals <- unlist(mapply("+", tens, ones))
vals <- vals[!is.na(vals)]

Error reading dataset in R

I have problem in reading a dataset
My code :
require(igraph)
g <- graph(c(0, 1, 1, 2, 2, 0, 1, 3, 3, 4,
4, 5, 5, 3, 4, 6, 6, 7, 7, 8,
8, 6, 9, 10, 10, 11, 11, 9))
Error :
Error in graph(c(0, 1, 1, 2, 2, 0, 1, 3, 3, 4, 4, 5, 5, 3, 4, 6, 6, 7, :
At structure_generators.c:84 : Invalid (negative) vertex id, Invalid vertex id
The problem seems to be vertex of name 0
yourgraph <- c(0, 1, 1, 2, 2, 0, 1, 3, 3, 4,
4, 5, 5, 3, 4, 6, 6, 7, 7, 8,
8, 6, 9, 10, 10, 11, 11, 9)
g <- graph(yourgraph + 1)

Resources