I have a dataframe with different company IDs appearing from once to over 30 times in different rows. I want to add a new column "di_Flex" and fill it with specific values depending on how often the same company ID appears in a column:
If it appears twice in the column, add the value 6 to the new column "di_Flex",
if it appears 3x, add "8",
if it appears 4x add "10",
if it appears 5x add "12.8",
if it appears 6x add "14.67",
if it appears 7 or more times add "16".
Here is the dataframe:
c(0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14,
15, 16, 17, 17, 18, 18, 19, 20, 21, 22, 23, 23, 23, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27,
28, 29, 30, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 36,
36, 37, 38, 38, 38, 38, 38, 38, 39, 40, 41, 41, 41, 42, 42, 42,
43, 43, 43, 44, 45, 45, 46, 46, 46, 47, 48, 49, 50, 50, 51, 53,
54, 54, 54, 54, 55, 57, 57, 57, 59, 59, 59, 59, 60, 60, 60, 60,
61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 66, 66, 66,
66, 66, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)
Thank you for your help!
Assuming your data is called df with a column value:
library(tidyverse)
left_join(df, df %>%
group_by(value) %>%
tally()) %>%
mutate(di_Flex = case_when(n == 2 ~ 6,
n == 3 ~ 8,
n == 4 ~ 10,
n == 5 ~ 12.8,
n == 6 ~ 14.67,
n >= 7 ~ 16)) %>%
select(-n)
This gives us:
1 0 12.8
2 0 12.8
3 0 12.8
4 0 12.8
5 0 12.8
6 1 NA
7 2 NA
8 3 NA
9 4 NA
10 5 8.0
11 5 8.0
12 5 8.0
13 6 16.0
14 6 16.0
15 6 16.0
16 6 16.0
17 6 16.0
18 6 16.0
19 6 16.0
20 6 16.0
Data:
df <- data.frame(value = c(0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14,
15, 16, 17, 17, 18, 18, 19, 20, 21, 22, 23, 23, 23, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27,
28, 29, 30, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 36,
36, 37, 38, 38, 38, 38, 38, 38, 39, 40, 41, 41, 41, 42, 42, 42,
43, 43, 43, 44, 45, 45, 46, 46, 46, 47, 48, 49, 50, 50, 51, 53,
54, 54, 54, 54, 55, 57, 57, 57, 59, 59, 59, 59, 60, 60, 60, 60,
61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 66, 66, 66,
66, 66, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA))
I usually face problems with dataframes that have many columns.
For example, trying to calculate the Mann-Kendall test:
library(tidyverse)
library(Kendall)
df <- tibble::tribble(
~Season, ~Luxembourg, ~Germany, ~France, ~Russia, ~USA, ~Spain,
"Summer 2000", 29, 88, NA, NA, 31, 10,
"Summer 2001", 134, 36, 23, NA, 37, 4,
"Summer 2002", 22, 9, 10, NA, 7, 3,
"Summer 2003", 40, 11, 19, NA, 16, 6,
"Summer 2004", 74, 19, 26, NA, 27, 9,
"Summer 2005", 16, 8, 14, NA, 8, 4,
"Summer 2006", 191, 22, 32, NA, 32, 16,
"Summer 2007", 15, 7, 13, NA, 14, 4,
"Summer 2008", 43, 48, 22, NA, 27, 14,
"Summer 2009", 16, 35, 14, NA, 7, 9,
"Summer 2010", 29, 8, 25, 18, 11, 7,
"Summer 2011", 10, 4, 6, 12, 3, 3,
"Summer 2012", 55, 33, 11, 17, 46, 23,
"Summer 2013", 62, 57, 23, 70, 45, 21,
"Summer 2014", 65, 7, 6, 21, 12, 7,
"Summer 2015", 74, 11, 43, 110, 21, 11,
"Summer 2016", 85, 35, 79, 89, 30, 20,
"Summer 2017", NA, 11, 12, 14, 10, 6,
"Summer 2018", NA, 22, 11, 30, 18, 12,
"Summer 2019", NA, 20, 11, 25, NA, 14
)
Mk_Luxembourg <- MannKendall(df$Luxembourg)
Mk_Germany <- MannKendall(df$Germany)
Mk_France <- MannKendall(df$France)
Mk_Russia <- MannKendall(df$Russia)
Mk_USA <- MannKendall(df$USA)
Mk_Spain <- MannKendall(df$Spain)
Mk_results <- as.matrix(bind_rows(unlist(Mk_Luxembourg),
unlist(Mk_Germany),
unlist(Mk_France),
unlist(Mk_Russia),
unlist(Mk_USA),
unlist(Mk_Spain)))
row.names(Mk_results) <- c("Mk_Luxembourg", "Mk_Germany", "Mk_France", "Mk_Russia", "Mk_USA", "Mk_Spain" )
Mk_results
#> country tau sl S D varS
#> 1 France -0.113 0.527 -19 168. 810.
#> 2 Germany -0.0697 0.696 -13 186. 942.
#> 3 Luxembourg 0.171 0.364 23 134. 586.
#> 4 Russia 0.244 0.371 11 45 125
#> 5 Spain 0.280 0.0965 52 186. 941.
#> 6 USA -0.0529 0.779 -9 170. 815
This becomes very tedious when instead of 6 I have 70 variables.
Is there a simpler way to call all the variables at once?
You can use something like following
data.frame(
stringsAsFactors = FALSE,
Season = c("Summer 2000","Summer 2001",
"Summer 2002","Summer 2003","Summer 2004","Summer 2005",
"Summer 2006","Summer 2007","Summer 2008",
"Summer 2009","Summer 2010","Summer 2011","Summer 2012",
"Summer 2013","Summer 2014","Summer 2015","Summer 2016",
"Summer 2017","Summer 2018","Summer 2019"),
Luxembourg = c(29,134,22,40,74,16,
191,15,43,16,29,10,55,62,65,74,85,
NA,NA,NA),
Germany = c(88,36,9,11,19,8,22,
7,48,35,8,4,33,57,7,11,35,11,22,
20),
France = c(NA,23,10,19,26,14,
32,13,22,14,25,6,11,23,6,43,79,12,
11,11),
Russia = c(NA,NA,NA,NA,NA,NA,NA,NA,
NA,NA,18,12,17,70,21,110,89,14,30,
25),
USA = c(31,37,7,16,27,8,32,
14,27,7,11,3,46,45,12,21,30,10,
18,NA),
Spain = c(10,4,3,6,9,4,16,
4,14,9,7,3,23,21,7,11,20,6,12,14)
) -> df
library(Kendall)
#Apply the MannKendall function across the columns
try <- lapply(df[-1], MannKendall)
#Convert list to data frame
do.call(rbind.data.frame, try)
# tau sl S D varS
# Luxembourg 0.17101437 0.36358595 23 134.4916 586.3333
# Germany -0.06971737 0.69586229 -13 186.4672 942.3333
# France -0.11311328 0.52717423 -19 167.9732 810.3333
# Russia 0.24444444 0.37109327 11 45.0000 125.0000
# USA -0.05294209 0.77930272 -9 169.9971 815.0000
# Spain 0.27963457 0.09646106 52 185.9570 941.3333
This seems to be a case where the traditional (cbind, apply...) syntax looks better than the tidyverse's:
library(Kendall)
# tidyverse
df %>%
pivot_longer(-Season, names_to = "country") %>%
group_by(country) %>%
summarise(x = list(enframe(unlist(MannKendall(value))))) %>%
unnest(x) %>%
pivot_wider(names_from = name, values_from = value)
# traditional
do.call(rbind.data.frame, lapply(df[-1], MannKendall))
I can compute the algebraic connectivity of the complete graph on 20 vertices in fraction of a second using
import networkx
D = {}
for i in range(20):
D[i] = [j for j in range(20)]
G = networkx.Graph(D)
networkx.algebraic_connectivity(G)
However, in a process I generate a graph (on 20 nodes) that I ask networkx to compute its algebraic connectivity, and it keeps running for ever with no errors. Here is the graph:
import networkx
D = {0: [32, 33, 19, 5, 21, 37, 6, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 5: [32, 0, 33, 19, 37, 21, 6, 22, 38, 39, 41, 26, 42, 11, 43, 44, 28, 15, 31], 6: [0, 32, 33, 19, 5, 37, 21, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 11: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 43, 28, 44, 15, 31], 15: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31], 19: [0, 32, 33, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 21: [32, 0, 33, 19, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 22: [32, 33, 19, 5, 21, 37, 6, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 26: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 42, 11, 43, 28, 44, 15, 31], 28: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 43, 44, 15, 31], 31: [32, 0, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15], 32: [0, 33, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31, 15], 33: [0, 32, 19, 5, 21, 37, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 37: [32, 0, 33, 19, 5, 21, 6, 22, 38, 39, 41, 26, 42, 11, 43, 28, 44, 31, 15], 38: [32, 0, 33, 19, 21, 37, 5, 6, 22, 39, 41, 26, 42, 11, 43, 28, 44, 15, 31], 39: [0, 32, 33, 19, 5, 21, 37, 6, 22, 38, 41, 26, 42, 11, 43, 28, 44, 15, 31], 41: [32, 0, 33, 19, 21, 37, 5, 38, 6, 22, 39, 26, 42, 11, 43, 28, 44, 15, 31], 42: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 11, 43, 28, 44, 15, 31], 43: [32, 0, 33, 19, 21, 37, 5, 6, 22, 38, 39, 41, 26, 42, 11, 28, 44, 15, 31], 44: [32, 0, 33, 19, 5, 21, 37, 38, 6, 22, 39, 41, 42, 26, 11, 43, 28, 15, 31]}
G = networkx.Graph(D)
networkx.algebraic_connectivity(G)
Any reasons why it is so, and how to fix it?
There seems to be a bug in the tracemin method that is the default with networkx.algebraic_connectivity(). Try using
networkx.algebraic_connectivity(G, method='lanczos')
I got a nested list, named mylist which has length 4.
Each element of this list is an experiment: exp1.1, exp1.2, exp2.1 and exp2.2.
Each experiment contains observations of length (in days) of four plant growth stages: EM-V6 V6-R0 R0-R4 and R4-R9.
Each growth stage is organized as a data frame with year and mean.
Here is the complete data:
mylist=structure(list(exp1.1 = structure(list(`EM-V6` = structure(list(
year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28,
27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27,
30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29,
32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27,
32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26,
28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26,
26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,
mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,
32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,
31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,
30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,
31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,
30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,
29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,
mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,
32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,
33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,
31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,
32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,
30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,
31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,
mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,
29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,
28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,
26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,
28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,
25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,
26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",
"R4-R9")), exp1.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100,
mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,
33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,
30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,
28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,
30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,
26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,
25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,
mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,
32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,
31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,
30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,
31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,
30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,
29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,
mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,
32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,
33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,
31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,
32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,
30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,
31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,
mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,
29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,
28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,
26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,
28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,
25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,
26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",
"R4-R9")), exp2.1 = structure(list(`EM-V6` = structure(list(year = 2011:2100,
mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,
33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,
30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,
28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,
30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,
26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,
25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,
mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,
32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,
31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,
30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,
31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,
30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,
29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,
mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,
32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,
33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,
31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,
32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,
30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,
31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,
mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,
29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,
28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,
26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,
28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,
25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,
26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",
"R4-R9")), exp2.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100,
mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,
33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,
30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,
28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,
30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,
26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,
25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,
mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,
32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,
31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,
30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,
31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,
30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,
29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,
mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,
32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,
33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,
31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,
32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,
30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,
31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,
mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,
29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,
28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,
26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,
28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,
25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,
26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,
-90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",
"R4-R9"))), .Names = c("exp1.1", "exp1.2", "exp2.1", "exp2.2"
))
What I need to do is to "unlist" this nested list to a data frame that will look like this:
YEAR EXP EM-V6 V6-R0 R0-R4 R4-R9
2011 exp1.1 34 30 31 27
2011 exp1.2 34 30 31 27
2011 exp2.1 34 30 31 27
2011 exp1.1 34 30 31 27
Which means:
- first year, first experiment, and growth stages.
- first year, second experiment and growth stages.
- first year, third experiment and growth stages
- first year, fourth experiment and growth stages
- second year, first experiment and growth stages
and so on.
How to perform that data transformation?
An alternative using rbindlist from the data.table-package twice:
library(data.table)
# bind the dataframes in the 'listed lists' together and include the year with the 'id'-parameter
# the resulting 'data.table's are returned as a list
step1 <- lapply(mylist, rbindlist, id = 'stages')
# bind the resulting list together and include the experiment id
step2 <- rbindlist(step1, id = 'experiment')
# reshape to wide format
dcast(step2, year + experiment ~ stages, value.var = 'mean')
Or in one go:
dcast(rbindlist(lapply(mylist, rbindlist, id = 'stages'), id = 'experiment'),
year + experiment ~ stages, value.var = 'mean')
which gives:
year experiment EM-V6 R0-R4 R4-R9 V6-R0
1: 2011 exp1.1 34 31 27 30
2: 2011 exp1.2 34 31 27 30
3: 2011 exp2.1 34 31 27 30
4: 2011 exp2.2 34 31 27 30
5: 2012 exp1.1 34 32 29 33
---
356: 2099 exp2.2 30 30 25 29
357: 2100 exp1.1 26 30 24 29
358: 2100 exp1.2 26 30 24 29
359: 2100 exp2.1 26 30 24 29
360: 2100 exp2.2 26 30 24 29
Alternate tidyverse:
library(tidyverse)
map_df(mylist, ~bind_rows(., .id="id"), .id="EXP") %>%
spread(id, mean)
We can use tidyverse with more compact and readable code
library(dplyr)
library(tidyr)
library(purrr)
res1 <- mylist %>%
#bind the inner datasets and create an id column
map(bind_rows, .id = "id") %>%
#bind the outer datasets and create an EXP column
bind_rows(.id = "EXP") %>%
#reshape to wide format
spread(id, mean)
head(res1, 4)
# EXP year EM-V6 R0-R4 R4-R9 V6-R0
#1 exp1.1 2011 34 31 27 30
#2 exp1.1 2012 34 32 29 33
#3 exp1.1 2013 32 32 28 33
#4 exp1.1 2014 28 33 28 32
Or we can approach this by looping through the mylist with lapply, then create a new column 'name' usign Map by cbinding the names of the inner list elements, then rbind the list elements with do.call(rbind, now do a second Map to create a new column based on the names of 'mylist', rbind the list elements and then reshape from base R to convert it to 'wide'
res <- do.call(rbind, Map(cbind, lapply(mylist, function(x)
do.call(rbind, Map(cbind, x, name = names(x)))), EXP= names(mylist)))
res2 <- reshape(res, idvar = c("year", "EXP"),
timevar = "name", direction = "wide")
row.names(res2) <- NULL
head(res2, 4)
# year EXP mean.EM-V6 mean.V6-R0 mean.R0-R4 mean.R4-R9
#1 2011 exp1.1 34 30 31 27
#2 2012 exp1.1 34 33 32 29
#3 2013 exp1.1 32 33 32 28
#4 2014 exp1.1 28 32 33 28
NOTE: No external packages used (100% base R)
or use dcast from reshape2 to transform to 'wide' format
library(reshape2)
res2 <- dcast(res, year + EXP~name, value.var = "mean")
head(res2, 4)
# year EXP EM-V6 V6-R0 R0-R4 R4-R9
#1 2011 exp1.1 34 30 31 27
#2 2011 exp1.2 34 30 31 27
#3 2011 exp2.1 34 30 31 27
#4 2011 exp2.2 34 30 31 27