R - Convert List of Lists into single dataframe - r

So, I have created a list (and a single column matrix) that contains 256 nested lists. What I would like to do, is to convert each of the 256 lists into a single dataframe of 16 columns and then write.table it. Although each list contains the same number of columns (16), the number of rows for each list varies. I have tried to use unlist unsuccessfully because the changing row counts. I can subset each list individually, so I know there's an easier way to do the whole list.
I'm pretty new to R, so I apologize for asking what may be a naive novice question. I searched through a lot of topics the last couple days and didn't see anything that seemed to match my problem. for loop seems like it might be unnecessary and I wasn't sure if lapply was the correct route, either.
UPDATE: dput of first list:
list(structure(list(structure(c(2L, 11L, 15L, 8L, 7L, 3L, 6L, 10L,
1L, 1L, 18L, 13L, 14L, 19L, 16L, 17L, 4L, 5L, 9L, 12L), .Label = c("",
"Aaron Rodgers", "Andrew Quarless", "Derrick Coleman", "Doug Baldwin",
"DuJuan Harris", "Eddie Lacy", "James Starks", "Jermaine Kearse",
"John Kuhn", "Jordy Nelson", "Luke Willson", "Marshawn Lynch", "Percy
Harvin", "Randall Cobb", "Ricardo Lockette", "Robert Turbin",
"Russell Wilson", "Zach Miller"), class = "factor"), Tm =
structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("GNB", "Passing", "SEA", "Tm"),
class = "factor"), Cmp = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 4L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "19",
"23", "Cmp", "Rushing"), class = "factor"), Att = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 4L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("", "28", "33", "Att", "Receiving"
), class = "factor"), Yds = structure(c(2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, NA, 4L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("", "189", "191", "Yds"), class = "factor"),
TD = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 4L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "1",
"2", "TD"), class = "factor"), Int = structure(c(3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, NA, 4L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("", "0", "1", "Int"), class = "factor"),
Lng = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 4L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "23",
"33", "Lng"), class = "factor"), Att = structure(c(1L, 1L,
1L, 7L, 3L, 1L, 2L, 2L, NA, 8L, 7L, 4L, 5L, 1L, 1L, 6L, 1L,
1L, 1L, 1L), .Label = c("", "1", "12", "20", "4", "6", "7",
"Att"), class = "factor"), Yds = structure(c(1L, 1L, 1L,
7L, 6L, 1L, 9L, 3L, NA, 10L, 5L, 2L, 8L, 1L, 1L, 4L, 1L,
1L, 1L, 1L), .Label = c("", "110", "2", "27", "29", "34",
"37", "41", "7", "Yds"), class = "factor"), TD = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 2L, 3L, NA, 5L, 2L, 4L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L), .Label = c("", "0", "1", "2", "TD"), class = "factor"),
Lng = structure(c(1L, 1L, 1L, 2L, 4L, 1L, 8L, 6L, NA, 9L,
3L, 7L, 5L, 1L, 1L, 8L, 1L, 1L, 1L, 1L), .Label = c("", "12",
"13", "15", "16", "2", "21", "7", "Lng"), class = "factor"),
Rec = structure(c(1L, 7L, 5L, 3L, 4L, 4L, 1L, 1L, NA, 8L,
1L, 2L, 6L, 4L, 3L, 1L, 2L, 4L, 2L, 2L), .Label = c("", "1",
"2", "3", "6", "7", "9", "Rec"), class = "factor"), Yds = structure(c(1L,
12L, 9L, 3L, 3L, 6L, 1L, 1L, NA, 13L, 1L, 4L, 10L, 8L, 7L,
1L, 5L, 4L, 11L, 2L), .Label = c("", "1", "11", "14", "15",
"26", "38", "42", "58", "59", "8", "83", "Yds"), class = "factor"),
TD = structure(c(1L, 2L, 3L, 2L, 2L, 2L, 1L, 1L, NA, 4L,
1L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 2L), .Label = c("", "0",
"1", "TD"), class = "factor"), Lng = structure(c(1L, 7L,
9L, 3L, 4L, 8L, 1L, 1L, NA, 14L, 1L, 5L, 11L, 10L, 11L, 1L,
6L, 12L, 13L, 2L), .Label = c("", "1", "11", "12", "14",
"15", "16", "18", "23", "24", "33", "6", "8", "Lng"), class = "factor")), .Names = c("", "Tm", "Cmp", "Att", "Yds", "TD", "Int",
"Lng", "Att", "Yds", "TD", "Lng", "Rec", "Yds", "TD", "Lng"),
row.names = c(NA, -20L ), class = "data.frame"))
So, each observation in my list is like this above and I want to convert all of the lists into their 16 column(Now that I think about it, it's 17 columns, one is just unnamed) dataframe layout and stack all the rows together in one place that I can then write.table

Let's call your list l where l[[1]] is what you have dput above.
Two easy ways from base R and from data.table
do.call("rbind", l)
data.table::rbindlist(l)
This assumes that the columns match in each list element. Your example doesn't confirm this, although you state it.

Related

How to report REPEATED MEASURES ANOVA output in tables on a word text file?

I would like to export tables for the following result for a repeated measure anova:
Here the function which ANOVA test has been implemented
fAddANOVA = function(data) data %>%
ezANOVA(dv = .(value), wid = .(ID), within = .(COND)) %>% as_tibble()
And here the commands to explore ANOVA statistics
aov_stats <- df_join %>% group_by(signals) %>%
mutate(ANOVA = map(data, ~fAddANOVA(.x))) %>%
dplyr::select(., -data) %>%
unnest(ANOVA)
> aov_stats
# A tibble: 12 x 4
# Groups: signals [12]
signals ANOVA$Effect $DFn $DFd $F $p $`p<.05` $ges `Mauchly's Test~ $W $p $`p<.05` `Sphericity Cor~ $GGe $`p[GG]` $`p[GG]<.05` $HFe $`p[HF]` $`p[HF]<.05`
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
1 P3FCz COND 2 48 0.0440 9.57e-1 "" 3.38e-4 COND 0.938 0.480 "" COND 0.942 9.50e-1 "" 1.02 9.57e-1 ""
2 P3Cz COND 2 48 0.594 5.56e-1 "" 6.30e-3 COND 0.846 0.147 "" COND 0.867 5.33e-1 "" 0.928 5.44e-1 ""
3 P3Pz COND 2 48 5.18 9.22e-3 "*" 4.28e-2 COND 0.989 0.885 "" COND 0.990 9.46e-3 "*" 1.08 9.22e-3 "*"
4 LPPearlyFCz COND 2 48 3.59 3.52e-2 "*" 2.40e-2 COND 0.997 0.965 "" COND 0.997 3.54e-2 "*" 1.09 3.52e-2 "*"
5 LPPearlyCz COND 2 48 7.09 2.00e-3 "*" 6.87e-2 COND 0.949 0.549 "" COND 0.952 2.40e-3 "*" 1.03 2.00e-3 "*"
6 LPPearlyPz COND 2 48 13.9 1.70e-5 "*" 1.14e-1 COND 0.948 0.544 "" COND 0.951 2.53e-5 "*" 1.03 1.70e-5 "*"
7 LPP1FCz COND 2 48 4.56 1.54e-2 "*" 2.92e-2 COND 0.849 0.151 "" COND 0.868 2.02e-2 "*" 0.930 1.78e-2 "*"
8 LPP1Cz COND 2 48 7.05 2.07e-3 "*" 6.37e-2 COND 0.823 0.107 "" COND 0.850 3.65e-3 "*" 0.908 2.93e-3 "*"
9 LPP1Pz COND 2 48 13.3 2.52e-5 "*" 9.94e-2 COND 0.774 0.0522 "" COND 0.815 1.07e-4 "*" 0.867 7.14e-5 "*"
10 LPP2FCz COND 2 48 0.286 7.53e-1 "" 2.84e-3 COND 0.734 0.0285 "*" COND 0.790 7.01e-1 "" 0.836 7.14e-1 ""
11 LPP2Cz COND 2 48 1.05 3.59e-1 "" 1.22e-2 COND 0.945 0.520 "" COND 0.948 3.56e-1 "" 1.03 3.59e-1 ""
12 LPP2Pz COND 2 48 2.64 8.15e-2 "" 3.15e-2 COND 0.904 0.314 "" COND 0.913 8.71e-2 "" 0.984 8.25e-2 ""
>
I kindly ask some suggestions for reporting results adopting this two visualizing methods
SOLUTION 1:
three splitted tables on a word doc containing:
ANOVA measures, ranging from the first to the eighth column;
Machly's Test statistics, from the ninth one to the twelweth columns as follows in the tibble so that also the column containing the signals which these statistics are referred to is reported as well;
Spherificity test, from the thirtheenth one to the ending column, always including the signals columns;
SOLUTION 2:
An one table in a way
to get rid of the redundant one (or COND)
and above of each results columns chunk (ANOVA(3-8), Mauchly's test (10-12) and Sphericity test(14-19)), grouping surmounting line with the names of the statistics that ranges refers to.
Thank you very much in advance
In case I let the dataset below
> dput(head(df_join))
structure(list(signals = c("P3FCz", "P3Cz", "P3Pz", "LPPearlyFCz",
"LPPearlyCz", "LPPearlyPz"), data = list(structure(list(ID = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L,
11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L,
16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L,
20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L,
24L, 25L, 25L, 25L), .Label = c("01", "04", "06", "07", "08",
"09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.6312151716924, -11.1438413285935,
-3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385, -0.213621915029167, -2.96032491743069,
-1.97168681693488, -2.83109425298642, 1.09291198163802, -6.692991645215,
4.23849942428043, 2.9898889629932, 3.5510699900835, 9.57481668808606,
5.4167795618285, 1.7067607715475, -6.13036076093477, -2.82955734597919,
-2.50672211111696, 0.528517585832501, 8.16418133488309, 1.88777321897925,
-7.73588468896919, -9.83058052401056, -6.97442700196932,
1.27327945355082, 2.11962397764132, 0.524299677616254, -1.83310726842883,
0.658810483381172, -0.261373488428192, 4.37524298634374,
0.625555654900511, 3.19617639836154, 0.0405517582137798,
-3.29357103412113, -0.381435057304614, -5.73445509910268,
-6.1129152355645, -2.45744234877604, 2.95352732001065, 0.527721249096473,
1.91803490989119, -3.46703346467546, -2.40438419043702, -5.35374408162217,
-7.27028665849262, -7.1532211375959, -5.39955520296854, 2.65765002364624,
0.372495441513391, 6.24433066412776, 1.85698518142405, -0.564454675803529,
-0.068523080368053, -7.04782633579147, -4.52263283590558,
-6.62134671432544, 4.56661945182626, 3.05859761335498, 2.02997952225347,
-6.10523962206958, -0.521871236969702, -3.97851995684846,
-2.61258020387919, -4.13974828699279, -3.9210032516844, -4.63162466544638,
-4.36762718685405, -6.71005969834916, -4.22719611676328,
-0.229916506217565, -5.69725200870146)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441,
-1.30019374375434, -3.59899040898949, -1.92340529575071,
2.19344184533265, 5.87900720863083, -5.92378937757888, 2.44958531767688,
3.10043497883256, 1.65779442628225, 13.7118233181713, 6.86178446511352,
5.31481098188172, -4.13240668697805, 0.162182285588285, 0.142083484505352,
5.42592103255673, 14.5496375672716, 4.52018125654081, -2.40677805475299,
-5.3832670295207, -1.55736964635117, 3.48359241788107, 4.23167123533126,
2.00051785325202, 1.48755216347718, 2.37269462739372, 1.30346907198835,
3.89476490634811, 1.87516303240986, 4.36353100770575, 1.9413417416824,
-2.22114447555529, -0.015852062711641, -2.76146409940467,
-3.51627712447581, 1.01799377568815, 1.74783962328435, 1.1303870721987,
2.16398550183836, -3.31557794753334, -1.83920975041768, -6.06703163736936,
-8.1566939611461, -9.23030396302541, -4.35545141573936, 0.906302081219897,
0.45401759063429, 3.80236232314171, 4.0336657306528, 2.0185967445137,
0.835589319243251, -4.6805488231028, -1.20746167339041, -5.50475999427345,
4.96594373869991, 4.1349308440931, 3.00187233307059, -5.61465293602653,
0.544596077279702, -5.20450410570445, -0.0325220589039272,
-2.28038421035601, -2.01375702882255, -1.6547144697087, -0.619979893871085,
-4.48258340054462, -1.42281778522059, 2.62315679073783, -4.13736508533355
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(11.8802266972569,
12.1053426662461, 12.955441582096, 15.0981004360619, 15.4046229884164,
16.671036999147, 3.13771453335467, -0.0892565159000666, 2.15365554736525,
13.6778924406572, 14.3862738306396, 6.86762877785576, 7.47946451329025,
8.93405130318593, 8.45962311067909, 23.4166601996042, 15.1868092142896,
9.97183712753913, 6.267521071803, 10.142198458411, 10.6320358418368,
12.9998037913548, 20.7052065690674, 11.8852179570666, 15.7899796085713,
7.50729833890206, 14.3076172484818, 9.93797956768228, 10.7693238464384,
5.04681800218272, 5.16656503460515, 7.87875085817396, 2.29899409536951,
10.0135486953849, 5.48278706243332, 7.81908431468528, 8.64382513728869,
3.35777109534179, 3.47474629234488, 4.35678644331281, 3.47085321062162,
6.56231512354717, 4.93825547529124, 7.33985613752315, 6.81966900599588,
6.54487921689425, 7.25872117706077, 1.10301223694429, -0.856423579793706,
-0.887835692028378, -0.931653372049331, 5.6617683754256,
2.29939831067085, 5.1554825066748, 6.59026080217083, 3.0741733363644,
1.80359068950898, 1.63892755704177, 3.857933716935, 0.769316188513939,
10.7031907391191, 9.53278894637555, 8.01071628743378, 6.04891324234645,
11.1964453850602, 3.46633322373091, 14.4393884282958, 11.2339563353478,
7.74933708914689, 7.1182095475238, 7.39260082121406, 0.627435381320771,
9.15473202689768, 13.6559037433263, 7.14786907480758)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.7785042972793, -9.14927207125904,
-7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179, 4.46729570509601, 2.54036572774646, 2.22923889930115,
-0.883620011106743, -2.63569087592267, -2.0629672230873,
1.14544537612393, 2.08056674659401, 0.0422658298956365, 13.2986259796748,
5.06669915366333, 3.93467692474742, 0.0229069420708053, 4.31923128857779,
0.237726051904304, 1.89972383690448, 3.2371880079134, 0.318100791495115,
-8.08292381883298, -5.73174008540523, -15.7998485301436,
1.75469999857951, 0.677370118816266, -1.8397955509895, 2.55445787016256,
-0.380810453692585, 0.62462329496673, 2.61316333850434, 2.68202480583985,
1.76690658846479, 0.148635887703097, -0.958853757041888,
-3.17305964093897, -7.82526758429289, -6.58557573679886,
-4.39207076049089, 2.36752476749952, 0.594715760553033, -0.29794568443312,
-4.5365387390683, 0.196832250811775, -2.70852853745588, 0.498995124872827,
0.165171574219401, 0.269498974991661, 0.901948386281446,
-2.45955661653299, 1.63525170542944, 0.155897732673534, 1.8491735212703,
-0.856727109535223, -1.16182571974245, 1.07658425742917,
-2.21433585407388, 4.3385479368043, 4.40588599635354, 0.127710423625772,
-6.26956613362656, -1.17658595005389, -7.25886366924741,
-0.888293709383838, -2.14177059335841, -2.42141595261389,
-2.958120275175, -5.1274001953303, -5.32347488769128, -4.41290818553442,
-1.21404719262173, -4.23649270310915)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645,
0.160393685024631, 3.52155765271648, 2.10437989449921, 2.70693992810407,
5.49897156207812, 5.81171180245335, -1.37301251388987, -0.434363848460157,
2.87987510596148, -1.27152670283348, 17.2093269365993, 7.79412746755931,
8.11964589961276, 4.95253363860044, 9.50695673265293, 4.15235381401148,
6.1294488368639, 8.01447499455337, 0.783414018677801, -1.24197194087055,
-0.487178595894761, -9.79031812534203, 4.22150266269492,
4.20139847550095, 0.208005397351335, 4.19096721581768, 0.815283302847055,
1.48137456347872, 2.0809543999959, 4.35199943309111, 2.84860039832237,
3.05879540677983, 2.11976068962167, -0.269002712326028, -2.77155065610474,
-2.59002218694999, 0.17928456999128, 2.24515223348079, 1.88805943988563,
-0.0920286086411814, -2.00968595029144, 2.59427260100332,
-1.27622011197768, 0.588399071755827, -1.43982473126936,
1.96978732491278, -0.338674980283045, -1.86484698930706,
-0.0154791822607025, 2.55036185373462, 4.42520405730058,
-0.599156247027551, 1.60091251589958, 4.7367320574401, -0.192490723623988,
4.8452288234686, 5.71745745981867, 1.02554478706585, -4.5951256708181,
1.1704842909792, -7.42770276334892, 3.15655538248828, -0.639830772856786,
-0.345116641695513, -0.0391030568720636, -2.61585906518491,
-2.71685194532693, -1.7348388034111, 1.00287124847525, -2.4844653851482
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(8.23981597718437,
9.51261484648731, 9.42367409925817, 5.06332653216481, 5.02619159395405,
9.07903916629231, 7.56089165217984, 5.49719893790597, 4.91476855238182,
13.0320953572069, 10.8414516494484, 5.86927622259489, 3.25309970442897,
4.6847880297099, 2.71096740085175, 25.567439566524, 16.3241813617706,
13.0990192799703, 11.9200281736866, 14.6901305277101, 9.67397418905514,
10.2974302220899, 12.0768070828642, 5.9401530589224, 12.4817579327688,
12.419526465857, 1.00612108990875, 9.63063375751153, 10.5631237176538,
3.08031473770521, 3.35694102903017, 4.28046277054405, -0.133592200169464,
6.9103658689166, 7.64737651416791, 6.75669517393108, 8.5369185279747,
7.08645126073423, 4.47409706618326, 4.39617687043259, 3.27924738047746,
6.06169418872804, 5.34939694712468, 5.58288092654703, 4.85729686493463,
7.38032829587839, 11.7259526759912, 4.95764559864061, 6.24066579989613,
3.49843659402445, 4.07498375647916, 3.55732294589389, 1.33918111568512,
0.956782967443242, 2.32002496709926, 3.15289777246607, -0.832211906889126,
6.39254974438057, 7.0533787627062, 2.97245026797807, 6.23573445580928,
7.6052386193207, 2.98791225155534, 3.10850022259445, 8.12060882554471,
-0.00459651443883508, 13.5899217198075, 9.93070913311253,
8.10285456644801, 5.04464304009428, 2.02262615478956, 1.0510618938653,
5.62233873107127, 10.1193593084848, 5.87476640145049)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)))), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
signals = c("LPPearlyCz", "LPPearlyFCz", "LPPearlyPz", "P3Cz",
"P3FCz", "P3Pz"), .rows = structure(list(5L, 4L, 6L, 2L,
1L, 3L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .drop = TRUE))
>
As for Solution 1:
### Solution 1
library(officer)
library(flextable)
tab_1 <- aov_stats %>% select(signals, ANOVA) %>% as.data.frame()
tab_1 <- flextable(cbind(tab_1[, 1], tab_1[, 2]) %>% rename(signals = `tab_1[, 1]`))
tab_1 <- set_caption(tab_1, "1. ANOVA")
tab_2 <- aov_stats %>% select(signals, `Mauchly's Test for Sphericity`) %>% as.data.frame()
tab_2 <- flextable(cbind(tab_2[, 1], tab_2[, 2]) %>% rename(signals = `tab_2[, 1]`))
tab_2 <- set_caption(tab_2, "2. Mauchly's Test for Sphericity")
tab_3 <- aov_stats %>% select(signals, `Sphericity Corrections`) %>% as.data.frame()
tab_3 <- flextable(cbind(tab_3[, 1], tab_3[, 2]) %>% rename(signals = `tab_3[, 1]`))
tab_3 <- set_caption(tab_2, "3. Sphericity Corrections")
word_export <- read_docx()
body_add_flextable(word_export, tab_1, align = "left", split = FALSE)
body_add_par(word_export, value = "")
body_add_flextable(word_export, tab_2, align = "left", split = FALSE)
body_add_par(word_export, value = "")
body_add_flextable(word_export, tab_3, align = "left", split = FALSE)
print(word_export, 'ANOVA.docx')
Edit:
Solution 2:
### Solution 2
library(flextable)
tab <- aov_stats %>% as.data.frame()
cols <- colnames(cbind(tab[, 1], tab[, 2], tab[, 3], tab[, 4]))[-c(9,13)]
cols <- replace(cols, cols == "tab[, 1]", "signals")
tab <- flextable(cbind(tab[, 1], tab[, 2], tab[, 3], tab[, 4]) %>% setNames(1:19) %>% select(-c(9, 13)))
tab <- delete_part(tab, part = "header")
tab <- add_header_row(tab, values = cols, colwidths = rep(1, 17))
tab <- add_header_row(tab, values = c("", "ANOVA", "Mauchly's Test for Sphericity.", "Sphericity Corrections."), colwidths = c(2, 6, 3, 6))
tab <- theme_box(tab)

Calculate the most consumed food items by Ill Patients

I have a dataset as below, which has many columns. There are some columns whose headings are :
baked_hamburgur,spinach,mashed_potato,cabbages,jello,rolls,brown,milk,coffee,water,cakes,vanilla,chocolate,fruitsalad
There are other columns as well, but I am only interested as of now in the above columns.
the value in each row of these columns is either: yes, or no.
A screenshot of this data is as under, as I am not able to attach/share this file in the question itself.
The dput(head()) output is as under:
> dput(head(illness_data))
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L
), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L), .Label = c("-1",
"Female", "Male"), class = "factor"), timesupper = c(2000L,
1830L, 1830L, 1930L, 1930L, 1930L), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L), .Label = c("18-Apr", "18-Jun", "18/4",
"19-Apr"), class = "factor"), onsettime = c(30L, 30L, 30L,
2230L, 2230L, 200L), baked_hamburgur = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
spinach = structure(c(2L, 2L, 2L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
cabbages = structure(c(1L, 2L, 1L, 2L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), jello = structure(c(1L, 1L, 1L,
2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
rolls = structure(c(2L, 1L, 1L, 1L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), brown = structure(c(1L, 1L, 1L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
milk = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), coffee = structure(c(2L, 2L, 2L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
water = structure(c(1L, 1L, 1L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), cakes = structure(c(1L, 1L, 2L,
1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
vanilla = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), chocolate = structure(c(1L, 2L,
2L, 1L, 1L, 2L), .Label = c("no", "yes"), class = "factor"),
fruitsalad = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes", "yes</pre></body></html>Ztext/plain\b\v\035(F]l~Ó_Ý\026R\002\001"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), row.names = c(NA, 6L), class = "data.frame")
A complete dput command output is as under:
> dput(illness_data)
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L,
36L, 8L, 11L, 7L, 24L, 10L, 8L, 35L, 34L, 6L, 22L, 39L, 12L,
9L, 36L, 17L, 9L, 20L, 37L, 27L, 32L, 30L, 21L, 24L, 3L, 18L,
33L, 16L, 5L, 31L, 28L, 14L, 19L, 38L, 2L, 4L, 23L, 1L, 18L,
15L), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L,
3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 3L,
2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 3L, 2L, 3L), .Label = c("-1", "Female", "Male"
), class = "factor"), timesupper = c(2000L, 1830L, 1830L,
1930L, 1930L, 1930L, 2200L, 1900L, 1930L, NA, NA, NA, NA,
2200L, NA, NA, NA, 2200L, NA, NA, 2200L, 2200L, NA, NA, 2200L,
NA, NA, NA, NA, NA, 1900L, NA, 1100L, NA, NA, NA, 2200L,
1930L, 1930L, 2200L, NA, NA, 1930L, 1930L, NA, NA), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 3L,
1L, 4L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L, 4L, 1L
), .Label = c("18-Apr", "18-Jun", "18/4", "19-Apr"), class = "factor"),
onsettime = c(30L, 30L, 30L, 2230L, 2230L, 200L, 100L, 2300L,
200L, 1030L, 30L, 2215L, 2200L, 100L, 2300L, 2145L, 2145L,
100L, 2300L, 2100L, 100L, 100L, 2115L, 2330L, 100L, 2130L,
230L, 200L, 2130L, 30L, 100L, 2230L, 1500L, 2400L, 2300L,
2230L, 100L, 230L, 2330L, 100L, 30L, 30L, 100L, 2400L, 215L,
2300L), baked_hamburgur = structure(c(2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), spinach = structure(c(2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), cabbages = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("no", "yes"), class = "factor"), jello = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), rolls = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), brown = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), milk = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("no", "yes"), class = "factor"), coffee = structure(c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), water = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), cakes = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), vanilla = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), chocolate = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, NA, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), fruitsalad = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L
), .Label = c("no", "yes", "yes</pre></body></html>Ztext/plain\b\v\035(F]l~Ó_Ý\026R\002\001"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), class = "data.frame", row.names = c(NA, -46L))
R has correctly read these columns as Factor type variable(s).
Now, each of these columns correspond to what every ill patient in a hospital has consumed.
I'd like to know the most consumed food item by ill patients, using R.
Kindly advise a good way to do so. Thanks!
Note, I have not tried any other options, than the ones mentioned in this URL below. However, I could not make it work.
Count Factor Columns Using R
Since all the rows have ill = 'yes' we can count in each column the number of values with yes in it. A base R approach could be :
head(sort(colSums(illness_data[7:20] == "yes"), decreasing = TRUE), 5)
# vanilla baked_hamburgur cakes spinach mashed_potato
# 43 29 27 26 23
I have selected columns 7 to 20 because those are the only columns where food item is present. Also I have selected only top 5 values, you can select any value by changing the number 5 in head command.
I am not completely sure what you are looking for, but this will calculate how often foods are consumed (using the tidyverse package):
library(tidyverse)
illness_data_summed <- illness_data %>%
mutate_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime), ~ifelse(. == "yes", 1,0)) %>%
summarise_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime, -ill), ~sum(., na.rm = TRUE))
illness_data_summed[which(illness_data_summed == max(illness_data_summed))]
So first I convert the yes into 1 and no into 0, which makes the sum a representation of the number of times the specific food was eaten. I do it for all columns except those you are not interested in (indicated by the - in vars) but you can also reverse that if that is desirable (e.g., when the number of vars yo uwant to convert is lower than those you do not want to convert).
The last part will result in:
vanilla
1 43

Warning message In `[<-.factor`(`*tmp*`, iseq, value = foo) : invalid factor level, NA generated when trying to add vector to row subset

I'm writing a function that attempts to add values in a single row of a data.frame in several columns at once:
require(stringr)
addPointsToKeyRow = function(df, keyRowNum, searchStringForPointColNames, pointsVector){
colsWithMatchingSearchResults = str_match(colnames(df), searchStringForPointColNames)
pointColNums = (which(!is.na(colsWithMatchingSearchResults)))
pointsVectorCleaned = pointsVector[!is.na(pointsVector)]
print(is.vector(pointsVectorCleaned)) #Returns TRUE
print(is.data.frame(pointsVectorCleaned)) #Returns FALSE
print(pointsVectorCleaned)
if(length(pointsVectorCleaned) == length(pointColNums)){
newDf = data.frame(df, stringsAsFactors = FALSE)
newDf[keyRowNum, pointColNums] = as.character(pointsVectorCleaned)
#for(i in 1:length(pointColNums)){
# newDf[keyRowNum,pointColNums[i]]=as.character(pointsVectorCleaned[i])
#}
print(newDf[keyRowNum,])
}
}
When I apply the function to my data (addPointsToKeyRow(finalDf, which(finalDf[,1]=="key"), "points_q", pointVals)), I get the following warnings:
In [<-.factor(*tmp*, iseq, value = "2") :
invalid factor level, NA generated
I've looked for the error on SO and other sites, and the recommendation always seems to be to make sure your data.frame has stringsAsFactors = FALSE.
I think my issue might be that when I subset the data.frame (newDf[keyRowNum, pointColNums]), it no longer keeps stringsAsFactors = FALSE.
Regardless of whether that's the issue or not, I'd very much welcome some help solving this weird issue. Many thanks in advance!
For the sake of an example, let's say df is:
df = structure(list(first = structure(c(7L, 9L, 5L, 4L, 10L, 2L, 3L,
6L, 1L, 8L), .Label = c("autumn", "spring", "summer", "winter",
"july", "betty", "november", "echo", "victor", "tango"), class = "factor"),
last = structure(c(6L, 2L, 4L, 5L, 1L, 8L, 3L, 9L, 10L, 7L
), .Label = c("brummett1", "do", "drorbaugh", "galeno", "gerber",
"key", "lyons", "pecsok", "perezfranco", "swatt"), class = "factor"),
question1 = structure(c(1L, 1L, 1L, 4L, 6L, 2L, 5L, 3L, 5L,
5L), .Label = c("0", "0.25", "1:02:01", "1:2 50%", "2-Jan",
"50%"), class = "factor"), points_q1 = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question2 = structure(c(8L, 10L, 6L, 5L, 2L, 3L, 7L, 1L,
4L, 9L), .Label = c(" a | b; A| Aa | Ab; b| ab | bb; the possibility that the offspring will be heterozygous is about 25%. The same goes for the homozygous recessive it is a 1:1:1:1",
"1/4 heterozygous for \xf1a\xee and 0 recessive for \xf1b\xee",
"16-Mar", "2-Jan", "3:1 25%", "4-Jan", "Male=aabb Female=AAbb Heterozygous is going to be 1/2. Homozygous is going to be 1/4.",
"possible offspring genotypes (each with probability of 0.25): AABb AaBb AAbb Aabb. Question is asking about probability of Aabb_ which is 0.25.",
"The square shows Ab Ab_ Bb Bb so 50% or 1/2. ", "Xa Yb (father) crossed with XA Xb (mother) = 1/2 "
), class = "factor"), points_q2 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question3 = structure(c(4L, 5L, 3L, 5L, 5L, 5L, 7L, 2L, 6L,
1L), .Label = c("Codominance", "coheritance", "incomplete dominance",
"Incomplete dominance", "Incomplete dominance ", "Incomplete dominance. ",
"Independent Assortment"), class = "factor"), points_q3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question4 = structure(c(3L, 4L, 2L, 3L, 6L, 3L, 7L, 1L, 5L,
4L), .Label = c("", "co-dominance", "Codominance", "Codominance ",
"Codominance. ", "Codominant ", "Independent Assortment? (Wrong)"
), class = "factor"), points_q4 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question5 = structure(c(2L, 10L, 6L, 4L, 5L, 3L, 8L, 1L,
7L, 9L), .Label = c(" X | Y; X| XX | XY; x| Xx | xY; the percentage will be 25 % or 1/4 the same applies to the son ",
"0 for daughter_ because male can only give non-colorblind X chromosome (because he's not colorblind an only has one X chromosome). 0.25 for both son and colorblind.",
"0.25", "25% for son and 25% for daughter", "25% for the son and 25% for the daughter ",
"4-Jan", "50%", "Father=XY Mother=X2Y Therefore_ by using the punnet square_ I was able to show/understand that the probability of them having a son AND him being colorblind is 1/4.",
"To have a son or daughter is 50/50. To have a colorblind daughter is .25 whereas to have a colorblind son is .75 because it is carried on the X chromosome and the son is much more likely to inherit this because he has less x to work with",
"XcY (father) XC Xc (mother) Daughter is 1/4 son 1/4"), class = "factor"),
points_q5 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "", class = "factor"), question6 = structure(c(3L,
6L, 7L, 8L, 5L, 2L, 10L, 9L, 4L, 1L), .Label = c("Chromatids ",
"Chromosomes (diploids)", "homologous chromosome pairs",
"Homologous chromosome pairs are being separated. ", "Homologous chromosomes ",
"Homologous pairs ", "homologous pairs of chromosomes", "Homologus Chromosomes ",
"sister chromatids ", "Sister Chromatids?"), class = "factor"),
points_q6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "", class = "factor"), question7 = structure(c(6L,
8L, 5L, 7L, 8L, 2L, 3L, 1L, 9L, 4L), .Label = c("", "Chromatids (haploids)",
"Daughter Chromosomes?", "One cell to 2", "sister chromatids",
"Sister chromatids", "Sister Chromatids", "Sister chromatids ",
"Sister chromatids within daughter cells are separating. "
), class = "factor"), points_q7 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question8 = structure(c(1L, 4L, 1L, 2L, 4L, 2L, 3L, 6L, 5L,
3L), .Label = c("sister chromatids", "Sister chromatids",
"Sister Chromatids", "Sister chromatids ", "Sister chromatids are held together by the centromeres. In prophase chromosomes become visible. During metaphase chromosomes attach to spindles. During Anaphase the chromosomes are split apart and in telophase the cells start to create cleavage. ",
"sisters chromatides"), class = "factor"), points_q8 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question9 = structure(c(2L, 4L, 1L, 3L, 4L, 3L, 3L, 2L, 5L,
3L), .Label = c("prohase ", "prophase", "Prophase", "Prophase ",
"They condense during prophase before the rest of the phases. "
), class = "factor"), points_q9 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question10 = structure(c(1L, 3L, 1L, 2L, 3L, 2L, 2L, 1L,
4L, 2L), .Label = c("anaphase", "Anaphase", "Anaphase ",
"During anaphase. "), class = "factor"), points_q10 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question11 = structure(c(3L, 4L, 3L, 4L, 4L, 4L, 4L, 3L,
1L, 2L), .Label = c("During prophase. ", "Telephase ", "telophase",
"Telophase"), class = "factor"), points_q11 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question12 = structure(c(1L, 3L, 1L, 2L, 3L, 2L, 3L, 1L,
4L, 2L), .Label = c("metaphase", "Metaphase", "Metaphase ",
"Metaphase. "), class = "factor"), points_q12 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "", class = "factor"),
question13 = structure(c(1L, 4L, 1L, 4L, 2L, 4L, 2L, 5L,
3L, 6L), .Label = c("centromere", "Centromere", "Centromere. ",
"Centromeres", "centromeres ", "Cleavage"), class = "factor"),
points_q13 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "", class = "factor")), .Names = c("first",
"last", "question1", "points_q1", "question2", "points_q2", "question3",
"points_q3", "question4", "points_q4", "question5", "points_q5",
"question6", "points_q6", "question7", "points_q7", "question8",
"points_q8", "question9", "points_q9", "question10", "points_q10",
"question11", "points_q11", "question12", "points_q12", "question13",
"points_q13"), row.names = c(NA, -10L), class = "data.frame")
which(finalDf[,1]=="key") is 1.
pointVals is c(NA, "2", "2", "2", "2", "2", "2", "2", "1", "1", "1", "1",
"1", "1")
For clarification, I'd want the final table to look something like:
First Last question1 points_q1 question2 points_q2 etc.
key key 0 2 "possible_offspring_genotypes..." 1 etc.
I have reduced your function based on my understanding , let me know if it gives what you want or if I have misunderstood something
addPointsToKeyRow = function(df, keyRowNum, searchString, pointsVector) {
#Find columns which has searchString in it
cols <- grepl(searchString, colnames(df))
#Check if the columns with searchString and length of pointsVector is the same
if (sum(cols) == length(pointsVector)) {
#Assign the value
df[keyRowNum,cols] <- pointsVector
}
#Return the updated dataframe
df
}
#Convert all the variables in the column from factor to character
df[] <- lapply(df, as.character)
#define the values to be replaced
pointVals <- c("2", "2", "2", "2", "2", "2", "2", "1", "1", "1", "1","1", "1")
#Call the function
df <- addPointsToKeyRow(df, 1, "points_q", pointsval)
#Check the dataframe
df

Separating ggplot using rectangles in the background

Here is an image of my plot so far. At the end of the post I provide the code to reproduce it.
For the time being i use horizontal lines to separate the four groups of lines (defined by variable de in the dataframe). But I would like to use colored rectangles in the background of each group. See the following image to get an idea.
I tried geom_rect and geom_tile with no success. Could anybody help me?
mdfr<-structure(list(name = structure(c(13L, 13L, 13L, 14L, 14L, 14L,
1L, 1L, 1L, 10L, 10L, 10L, 7L, 7L, 7L, 2L, 2L, 2L, 15L, 15L,
15L, 8L, 8L, 8L, 11L, 11L, 11L, 16L, 16L, 16L, 4L, 4L, 4L, 12L,
12L, 12L, 9L, 9L, 9L, 17L, 17L, 17L, 5L, 5L, 5L, 6L, 6L, 6L,
3L, 3L, 3L, 13L, 13L, 13L, 14L, 14L, 14L, 1L, 1L, 1L, 10L, 10L,
10L, 7L, 7L, 7L, 2L, 2L, 2L, 15L, 15L, 15L, 8L, 8L, 8L, 11L,
11L, 11L, 16L, 16L, 16L, 4L, 4L, 4L, 12L, 12L, 12L, 9L, 9L, 9L,
17L, 17L, 17L, 5L, 5L, 5L, 6L, 6L, 6L, 3L, 3L, 3L, 13L, 13L,
14L, 14L, 1L, 1L, 10L, 10L, 7L, 7L, 2L, 2L, 15L, 15L, 8L, 8L,
11L, 11L, 16L, 16L, 4L, 4L, 12L, 12L, 9L, 9L, 17L, 17L, 5L, 5L,
6L, 6L, 3L, 3L), .Label = c("10012/06", "541/13", "700-1/15",
"700/13", "737/13", "751/15", "512/12", "579/13", "715/14", "458/07",
"635/13", "705/13, \n705-1/15", "10004/07", "10005/07", "563/09",
"698/16", "717/14"), class = "factor"), Contr.finish = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Initial", "Current",
"Forecast", "Cost"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("start_date", "end_date"
), class = "factor"), value = c("2007-05-30", "2009-03-30", "2016-06-29",
"2007-09-05", "2010-03-05", "2017-09-30", "2006-09-26", "2008-09-26",
"2015-08-31", "2007-11-20", "2011-11-20", "2014-03-20", "2012-01-31",
"2014-07-31", "2016-03-20", "2013-06-21", "2016-06-21", "2016-06-21",
"2009-04-15", "2011-04-15", "2017-12-31", "2013-06-21", "2016-06-21",
"2016-06-21", "2013-12-18", "2016-08-18", "2017-08-18", "2016-04-14",
"2018-02-14", "2018-02-14", "2013-06-03", "2014-10-03", "2016-05-10",
"2013-08-07", "2015-02-07", "2016-06-30", "2014-09-11", "2016-09-11",
"2016-09-11", "2014-09-26", "2016-09-26", "2016-09-26", "2013-03-20",
"2016-03-20", "2016-03-20", "2015-10-09", "2016-08-09", "2016-08-09",
"2015-11-10", "2016-05-10", "2016-05-10", "2009-03-30", "2016-06-29",
"2016-06-29", "2010-03-05", "2017-09-30", "2017-09-30", "2008-09-26",
"2015-08-31", "2016-08-31", "2011-11-20", "2014-03-20", "2015-12-31",
"2014-07-31", "2016-03-20", "2016-12-20", "2016-06-21", "2016-06-21",
"2016-12-30", "2011-04-15", "2017-12-31", "2017-12-31", "2016-06-21",
"2016-06-21", "2018-03-31", "2016-08-18", "2017-08-18", "2018-02-28",
"2018-02-14", "2018-02-14", "2018-02-14", "2014-10-03", "2016-05-10",
"2016-05-10", "2015-02-07", "2016-06-30", "2016-06-30", "2016-09-11",
"2016-09-11", "2017-07-28", "2016-09-26", "2016-09-26", "2016-09-26",
"2016-03-20", "2016-03-20", "2018-10-19", "2016-08-09", "2016-08-09",
"2016-08-09", "2016-05-10", "2016-05-10", "2016-05-10", "2007-05-30",
"2013-09-24", "2007-09-05", "2010-10-21", "2006-09-26", "2016-08-02",
"2007-11-20", "2015-10-19", "2012-01-31", "2015-11-23", "2013-06-21",
"2015-06-09", "2009-04-15", "2014-05-06", "2013-06-21", "2015-03-28",
"2013-12-18", "2015-05-24", "2016-04-14", "2016-04-14", "2013-06-03",
"2016-01-07", "2013-08-07", "2015-12-08", "2014-09-11", "2015-07-24",
"2014-09-26", "2015-06-18", "2013-03-20", "2017-02-22", "2015-10-09",
"2015-10-09", "2015-11-10", "2016-01-06"), bar = c(5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5), de = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 4L, 4L, 4L, 2L,
2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
2L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 4L,
4L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 3L, 3L,
3L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 1L, 1L, 3L, 3L, 2L, 2L, 4L, 4L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("de1", "de2", "de3", "de4"), class = "factor")), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
"101", "102", "110", "410", "710", "103", "131", "161", "191",
"221", "251", "281", "311", "341", "371", "401", "431", "461",
"491", "521", "551", "581", "611", "641", "671", "701", "731",
"761", "791", "821", "851", "881", "911", "941", "971", "1001"
), .Names = c("name", "Contr.finish", "variable", "value", "bar",
"de"), class = "data.frame")
dfr<-structure(list(name = structure(c(2L, 4L, 3L, 1L), .Label = c("10004/07",
"10012/06", "458/07", "512/12"), class = "factor"), text = c("Region 1",
"Region 2", "Region 3", "Region 4"), name0 = c(0, 6.5, 9.5, 12.5
)), .Names = c("name", "text", "name0"), row.names = c(NA, -4L
), class = "data.frame")
library(ggplot2)
library(scales)
library(ggthemes)
ggplot(mdfr, aes(as.POSIXct(as.Date(value, "%Y-%m-%d")), name, colour = Contr.finish)) +
geom_line(aes(size=bar)) +
guides(colour = guide_legend(override.aes = list(size=5)), size="none", fill="none") +
geom_line(size=2.0) +
xlab("") + ylab("") +
theme_stata() +
geom_hline(data=dfr, aes(yintercept = name0), color = "#4d4d4d", size=0.8) + #
scale_fill_brewer(palette="Dark2") +
scale_x_datetime(breaks = date_breaks("1 year"),labels = abbreviate) +
scale_colour_manual(values=c("Initial" = "#67bf5c", "Current" = "#1f77b4",
"Forecast" = "#ff9e4a", "Cost" = "#c10534")) +
theme(legend.position = "bottom",
axis.text.y=element_text(angle=0)
)
You can use geom_rect() and there set xmin= and xmax= to minimal and maximal values of your dates or some other values outside the limits. For the ymin= and ymax= used name values converted to numeric (they have to factors in your dataframe) and then -0.5 and +0.5 (as for each discrete value there is place of 1 around it). Added expand=c(0,0) to scale_x_datetime() to remove white areas.
+ geom_rect(aes(xmin=min(as.POSIXct(as.Date(value, "%Y-%m-%d"))),
xmax=max(as.POSIXct(as.Date(value, "%Y-%m-%d"))),
ymin=as.numeric(name)-0.5,ymax=as.numeric(name)+0.5,
fill=de),alpha=0.05,linetype=0)

Conditionally subsetting a list of dataframes in R

I have a list of dataframes called myList (see sample below) and all I want is to subset that list of dataframes by the condition that only rows with a "pointNum" > 100 are included in the new list. Should be easy but I just can't get it to work. So the output should look like this for the first item on the list:
[[1]]
study Identi locDate locNumb meanLat meanLon pointNum
5 study 1 SDU101 2011-07-13 49 32.8837771221667 -117.24038866075 120
9 study 1 SDU101 2011-07-13 60 32.8838778530086 -117.240522195673 349
11 study 1 SDU101 2011-07-13 321 32.8027296698536 -117.210527201581 683
I've been trying to get this to work, and other similar subsetting options. It currently runs but doesn't do anything:
newList = lapply(myList, function(x) { subset(x, "pointNum" > 2)} )
I know that similar questions have been posted, but I couldn't get any of those solutions to to work for my particular problem. Any help would be greatly appreciated.
myList <- list(structure(list(study = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Study 1", class = "factor"),
Identi = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "SDU101", class = "factor"),
locDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "2011-07-13", class = "factor"),
locNumb = structure(c(12L, 15L, 1L, 2L, 8L, 9L, 10L, 11L,
13L, 14L, 3L, 4L, 5L, 6L, 7L), .Label = c("10", "11", "321",
"323", "324", "326", "329", "49", "56", "57", "59", "6",
"60", "61", "7"), class = "factor"), meanLat = structure(c(11L,
10L, 4L, 9L, 6L, 8L, 3L, 5L, 7L, 12L, 1L, 15L, 13L, 14L,
2L), .Label = c("32.8027296698536", "32.802755201875", "32.883244695",
"32.8835599674286", "32.8837003266667", "32.8837771221667",
"32.8838778530086", "32.88411147", "32.88419565", "32.8841969254545",
"32.884720435", "32.8853723146154", "32.8853777533333", "32.8854051",
"32.9164754136842"), class = "factor"), meanLon = structure(c(13L,
10L, 12L, 15L, 9L, 8L, 7L, 4L, 11L, 6L, 2L, 3L, 14L, 5L,
1L), .Label = c("-117.210382870833", "-117.210527201581",
"-117.236141991053", "-117.239834913333", "-117.23989078",
"-117.240133633077", "-117.240140015", "-117.24022087", "-117.24038866075",
"-117.240416713636", "-117.240522195673", "-117.240532619714",
"-117.24062533", "-117.24063566", "-117.24070002"), class = "factor"),
pointNum = structure(c(6L, 2L, 9L, 1L, 3L, 1L, 6L, 7L, 8L,
4L, 11L, 5L, 7L, 1L, 10L), .Label = c("1", "11", "120", "13",
"19", "2", "3", "349", "35", "48", "683"), class = "factor")), .Names = c("study",
"Identi", "locDate", "locNumb", "meanLat", "meanLon", "pointNum"
), row.names = c(NA, -15L), class = "data.frame"), structure(list(
study = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "Study 1", class = "factor"),
Identi = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "SDU111", class = "factor"),
locDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "2011-07-12", class = "factor"),
locNumb = structure(c(14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L), .Label = c("354", "418", "419",
"420", "421", "422", "425", "426", "427", "428", "429", "430",
"432", "67"), class = "factor"), meanLat = structure(c(2L,
3L, 9L, 1L, 5L, 8L, 13L, 11L, 6L, 7L, 10L, 12L, 14L, 4L), .Label = c("32.8651107616667",
"32.86543857", "32.867004565", "32.868283279", "32.868857725",
"32.869014345", "32.8692111971429", "32.8693627126536", "32.8694241808955",
"32.8694814566667", "32.86955278", "32.8696187847619", "32.8696329253571",
"32.8698972233333"), class = "factor"), meanLon = structure(c(13L,
12L, 8L, 14L, 11L, 2L, 7L, 5L, 4L, 1L, 3L, 9L, 6L, 10L), .Label = c("-117.235456126857",
"-117.235585179972", "-117.235959423333", "-117.25006813",
"-117.25014399", "-117.250450876667", "-117.250467514464",
"-117.25050148", "-117.250773722857", "-117.2512085715",
"-117.25133879", "-117.25283091", "-117.254194355", "-117.254406255417"
), class = "factor"), pointNum = structure(c(2L, 2L, 11L,
5L, 2L, 8L, 9L, 1L, 2L, 7L, 6L, 4L, 10L, 3L), .Label = c("1",
"2", "20", "21", "24", "3", "35", "358", "56", "6", "67"), class = "factor")), .Names = c("study",
"Identi", "locDate", "locNumb", "meanLat", "meanLon", "pointNum"
), row.names = c(NA, -14L), class = "data.frame"))
You have two issues - extra quotes and your pointNum from your dput is a factor, so do this:
lapply(myList, function(x) { subset(x, as.integer(as.character(pointNum)) > 2)} )

Resources