I am trying to convert my data frame from a long to a wide format. Currently there is an InputCode column, which has Input A and B, and these need to be their own columns using values from 'DataValue'. Ive been trying spread and dcast,
data_wide <- spread(oldData_long, InputCode, DataValue)
or
data_wide2 <- dcast(oldData_long, Indicator + IndicatorID + InputName DataYear + Country + Division + InputUnit ~ InputCode, value.var="DataValue")
but the number of rows in my dataframe remains the same (84) instead of becoming 42, despite the creation of Input A and Input B columns. Whenever there is a value for Input A theres NA in the column for input B and vice versa.
Furthermore, ideally there would be an InputUnit column for each input Code, e.g 'InputAUnit', as this value will also be unique when trying to spread the data and might be causing my above problem. The same for InputName, but I have no idea how to also pull that information across neatly.
Any help would be greatly appreciated!!
dput:
structure(list(ID = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Waste Generated", class = "factor"), IndicatorID = c(11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L), InputCode = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("InputA", "InputB"), class = "factor"), InputName = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Waste Generated - Waste incinerated",
"Waste Generated - Waste sent to landfill"), class = "factor"),
DataValue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 5L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 6L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 10L,
3L), .Label = c("0", "155", "19", "2,898.00", "20,462.34",
"22.317", "4.368", "40", "6,695.65", "8.998"), class = "factor"),
UnitCode = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "t", class = "factor"), DataYear = c(2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L), Country = structure(c(4L, 1L, 2L, 3L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L,
2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L), .Label = c("Afghanistan",
"Albania", "Algeria", "All", "American Samoa", "Andorra",
"Angola", "Antigua and Barbuda", "Argentina", "Armenia",
"Aruba", "Australia", "Austria", "Azerbaijan"), class = "factor"),
ISO = structure(c(5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L, 8L,
1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L,
8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L), .Label = c("ABW", "AFG", "AGO",
"ALB", "ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT",
"AZE", "DZA"), class = "factor"), Division = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test", class = "factor"),
FurtherDetails1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Test1", class = "factor"), FurtherDetails2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor")), class = "data.frame", row.names = c(NA,
-84L))
This would be the ideal output :
structure(list(ID = c(NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Waste Generated", class = "factor"),
IndicatorID = c(11L, 11L, 11L, 11L, 11L, 11L), DataYear = c(2009L,
2009L, 2009L, 2009L, 2009L, 2009L), Country = structure(c(4L,
1L, 2L, 3L, 5L, 6L), .Label = c("Afghanistan", "Albania",
"Algeria", "All", "American Samoa", "Andorra", "Angola",
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
"Austria", "Azerbaijan"), class = "factor"), ISO = structure(c(5L,
2L, 4L, 14L, 9L, 6L), .Label = c("ABW", "AFG", "AGO", "ALB",
"ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE",
"DZA"), class = "factor"), Division = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "Test", class = "factor"), FurtherDetails1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Test1", class = "factor"),
FurtherDetails2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor"),
InputA = c(0L, 0L, 0L, 0L, 0L, 0L), InputAUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputAName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste sent to landfill"), class = "factor"),
InputB = c(0L, 0L, 0L, 0L, 0L, 0L), InputBUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputBName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste incinerated"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
Thanks!!
A possible tidyr solution.
library(tidyr)
out <- pivot_wider(oldData_long, names_from = InputCode, values_from = c(DataValue, UnitCode, InputName))
out
# A tibble: 42 x 15
ID Indicator IndicatorID DataYear Country ISO Division FurtherDetails1 FurtherDetails2 DataValue_InputA DataValue_InputB
<lgl> <fct> <int> <int> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 NA Waste Ge… 11 2009 All ALL Test Test1 Test2 0 0
2 NA Waste Ge… 11 2009 Afghan… AFG Test Test1 Test2 0 0
3 NA Waste Ge… 11 2009 Albania ALB Test Test1 Test2 0 0
4 NA Waste Ge… 11 2009 Algeria DZA Test Test1 Test2 0 0
5 NA Waste Ge… 11 2009 Americ… ASM Test Test1 Test2 0 0
6 NA Waste Ge… 11 2009 Andorra AND Test Test1 Test2 0 0
7 NA Waste Ge… 11 2009 Angola AGO Test Test1 Test2 0 0
8 NA Waste Ge… 11 2009 Antigu… ATG Test Test1 Test2 0 0
9 NA Waste Ge… 11 2009 Argent… ARG Test Test1 Test2 0 0
10 NA Waste Ge… 11 2009 Armenia ARM Test Test1 Test2 0 0
# … with 32 more rows, and 4 more variables: UnitCode_InputA <fct>, UnitCode_InputB <fct>, InputName_InputA <fct>, InputName_InputB <fct>
str(out)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 42 obs. of 15 variables:
$ ID : logi NA NA NA NA NA NA ...
$ Indicator : Factor w/ 1 level "Waste Generated": 1 1 1 1 1 1 1 1 1 1 ...
$ IndicatorID : int 11 11 11 11 11 11 11 11 11 11 ...
$ DataYear : int 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
$ Country : Factor w/ 14 levels "Afghanistan",..: 4 1 2 3 5 6 7 8 9 10 ...
$ ISO : Factor w/ 14 levels "ABW","AFG","AGO",..: 5 2 4 14 9 6 3 10 7 8 ...
$ Division : Factor w/ 1 level "Test": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails1 : Factor w/ 1 level "Test1": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails2 : Factor w/ 1 level "Test2": 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputA: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputB: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputA : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputB : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ InputName_InputA: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 2 2 2 2 2 2 2 2 2 2 ...
$ InputName_InputB: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 1 1 1 1 1 1 1 1 1 1 ...
Related
I would like to export tables for the following result for a repeated measure anova:
Here the function which ANOVA test has been implemented
fAddANOVA = function(data) data %>%
ezANOVA(dv = .(value), wid = .(ID), within = .(COND)) %>% as_tibble()
And here the commands to explore ANOVA statistics
aov_stats <- df_join %>% group_by(signals) %>%
mutate(ANOVA = map(data, ~fAddANOVA(.x))) %>%
dplyr::select(., -data) %>%
unnest(ANOVA)
> aov_stats
# A tibble: 12 x 4
# Groups: signals [12]
signals ANOVA$Effect $DFn $DFd $F $p $`p<.05` $ges `Mauchly's Test~ $W $p $`p<.05` `Sphericity Cor~ $GGe $`p[GG]` $`p[GG]<.05` $HFe $`p[HF]` $`p[HF]<.05`
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
1 P3FCz COND 2 48 0.0440 9.57e-1 "" 3.38e-4 COND 0.938 0.480 "" COND 0.942 9.50e-1 "" 1.02 9.57e-1 ""
2 P3Cz COND 2 48 0.594 5.56e-1 "" 6.30e-3 COND 0.846 0.147 "" COND 0.867 5.33e-1 "" 0.928 5.44e-1 ""
3 P3Pz COND 2 48 5.18 9.22e-3 "*" 4.28e-2 COND 0.989 0.885 "" COND 0.990 9.46e-3 "*" 1.08 9.22e-3 "*"
4 LPPearlyFCz COND 2 48 3.59 3.52e-2 "*" 2.40e-2 COND 0.997 0.965 "" COND 0.997 3.54e-2 "*" 1.09 3.52e-2 "*"
5 LPPearlyCz COND 2 48 7.09 2.00e-3 "*" 6.87e-2 COND 0.949 0.549 "" COND 0.952 2.40e-3 "*" 1.03 2.00e-3 "*"
6 LPPearlyPz COND 2 48 13.9 1.70e-5 "*" 1.14e-1 COND 0.948 0.544 "" COND 0.951 2.53e-5 "*" 1.03 1.70e-5 "*"
7 LPP1FCz COND 2 48 4.56 1.54e-2 "*" 2.92e-2 COND 0.849 0.151 "" COND 0.868 2.02e-2 "*" 0.930 1.78e-2 "*"
8 LPP1Cz COND 2 48 7.05 2.07e-3 "*" 6.37e-2 COND 0.823 0.107 "" COND 0.850 3.65e-3 "*" 0.908 2.93e-3 "*"
9 LPP1Pz COND 2 48 13.3 2.52e-5 "*" 9.94e-2 COND 0.774 0.0522 "" COND 0.815 1.07e-4 "*" 0.867 7.14e-5 "*"
10 LPP2FCz COND 2 48 0.286 7.53e-1 "" 2.84e-3 COND 0.734 0.0285 "*" COND 0.790 7.01e-1 "" 0.836 7.14e-1 ""
11 LPP2Cz COND 2 48 1.05 3.59e-1 "" 1.22e-2 COND 0.945 0.520 "" COND 0.948 3.56e-1 "" 1.03 3.59e-1 ""
12 LPP2Pz COND 2 48 2.64 8.15e-2 "" 3.15e-2 COND 0.904 0.314 "" COND 0.913 8.71e-2 "" 0.984 8.25e-2 ""
>
I kindly ask some suggestions for reporting results adopting this two visualizing methods
SOLUTION 1:
three splitted tables on a word doc containing:
ANOVA measures, ranging from the first to the eighth column;
Machly's Test statistics, from the ninth one to the twelweth columns as follows in the tibble so that also the column containing the signals which these statistics are referred to is reported as well;
Spherificity test, from the thirtheenth one to the ending column, always including the signals columns;
SOLUTION 2:
An one table in a way
to get rid of the redundant one (or COND)
and above of each results columns chunk (ANOVA(3-8), Mauchly's test (10-12) and Sphericity test(14-19)), grouping surmounting line with the names of the statistics that ranges refers to.
Thank you very much in advance
In case I let the dataset below
> dput(head(df_join))
structure(list(signals = c("P3FCz", "P3Cz", "P3Pz", "LPPearlyFCz",
"LPPearlyCz", "LPPearlyPz"), data = list(structure(list(ID = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L,
11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L,
16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L,
20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L,
24L, 25L, 25L, 25L), .Label = c("01", "04", "06", "07", "08",
"09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.6312151716924, -11.1438413285935,
-3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385, -0.213621915029167, -2.96032491743069,
-1.97168681693488, -2.83109425298642, 1.09291198163802, -6.692991645215,
4.23849942428043, 2.9898889629932, 3.5510699900835, 9.57481668808606,
5.4167795618285, 1.7067607715475, -6.13036076093477, -2.82955734597919,
-2.50672211111696, 0.528517585832501, 8.16418133488309, 1.88777321897925,
-7.73588468896919, -9.83058052401056, -6.97442700196932,
1.27327945355082, 2.11962397764132, 0.524299677616254, -1.83310726842883,
0.658810483381172, -0.261373488428192, 4.37524298634374,
0.625555654900511, 3.19617639836154, 0.0405517582137798,
-3.29357103412113, -0.381435057304614, -5.73445509910268,
-6.1129152355645, -2.45744234877604, 2.95352732001065, 0.527721249096473,
1.91803490989119, -3.46703346467546, -2.40438419043702, -5.35374408162217,
-7.27028665849262, -7.1532211375959, -5.39955520296854, 2.65765002364624,
0.372495441513391, 6.24433066412776, 1.85698518142405, -0.564454675803529,
-0.068523080368053, -7.04782633579147, -4.52263283590558,
-6.62134671432544, 4.56661945182626, 3.05859761335498, 2.02997952225347,
-6.10523962206958, -0.521871236969702, -3.97851995684846,
-2.61258020387919, -4.13974828699279, -3.9210032516844, -4.63162466544638,
-4.36762718685405, -6.71005969834916, -4.22719611676328,
-0.229916506217565, -5.69725200870146)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441,
-1.30019374375434, -3.59899040898949, -1.92340529575071,
2.19344184533265, 5.87900720863083, -5.92378937757888, 2.44958531767688,
3.10043497883256, 1.65779442628225, 13.7118233181713, 6.86178446511352,
5.31481098188172, -4.13240668697805, 0.162182285588285, 0.142083484505352,
5.42592103255673, 14.5496375672716, 4.52018125654081, -2.40677805475299,
-5.3832670295207, -1.55736964635117, 3.48359241788107, 4.23167123533126,
2.00051785325202, 1.48755216347718, 2.37269462739372, 1.30346907198835,
3.89476490634811, 1.87516303240986, 4.36353100770575, 1.9413417416824,
-2.22114447555529, -0.015852062711641, -2.76146409940467,
-3.51627712447581, 1.01799377568815, 1.74783962328435, 1.1303870721987,
2.16398550183836, -3.31557794753334, -1.83920975041768, -6.06703163736936,
-8.1566939611461, -9.23030396302541, -4.35545141573936, 0.906302081219897,
0.45401759063429, 3.80236232314171, 4.0336657306528, 2.0185967445137,
0.835589319243251, -4.6805488231028, -1.20746167339041, -5.50475999427345,
4.96594373869991, 4.1349308440931, 3.00187233307059, -5.61465293602653,
0.544596077279702, -5.20450410570445, -0.0325220589039272,
-2.28038421035601, -2.01375702882255, -1.6547144697087, -0.619979893871085,
-4.48258340054462, -1.42281778522059, 2.62315679073783, -4.13736508533355
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(11.8802266972569,
12.1053426662461, 12.955441582096, 15.0981004360619, 15.4046229884164,
16.671036999147, 3.13771453335467, -0.0892565159000666, 2.15365554736525,
13.6778924406572, 14.3862738306396, 6.86762877785576, 7.47946451329025,
8.93405130318593, 8.45962311067909, 23.4166601996042, 15.1868092142896,
9.97183712753913, 6.267521071803, 10.142198458411, 10.6320358418368,
12.9998037913548, 20.7052065690674, 11.8852179570666, 15.7899796085713,
7.50729833890206, 14.3076172484818, 9.93797956768228, 10.7693238464384,
5.04681800218272, 5.16656503460515, 7.87875085817396, 2.29899409536951,
10.0135486953849, 5.48278706243332, 7.81908431468528, 8.64382513728869,
3.35777109534179, 3.47474629234488, 4.35678644331281, 3.47085321062162,
6.56231512354717, 4.93825547529124, 7.33985613752315, 6.81966900599588,
6.54487921689425, 7.25872117706077, 1.10301223694429, -0.856423579793706,
-0.887835692028378, -0.931653372049331, 5.6617683754256,
2.29939831067085, 5.1554825066748, 6.59026080217083, 3.0741733363644,
1.80359068950898, 1.63892755704177, 3.857933716935, 0.769316188513939,
10.7031907391191, 9.53278894637555, 8.01071628743378, 6.04891324234645,
11.1964453850602, 3.46633322373091, 14.4393884282958, 11.2339563353478,
7.74933708914689, 7.1182095475238, 7.39260082121406, 0.627435381320771,
9.15473202689768, 13.6559037433263, 7.14786907480758)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.7785042972793, -9.14927207125904,
-7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179, 4.46729570509601, 2.54036572774646, 2.22923889930115,
-0.883620011106743, -2.63569087592267, -2.0629672230873,
1.14544537612393, 2.08056674659401, 0.0422658298956365, 13.2986259796748,
5.06669915366333, 3.93467692474742, 0.0229069420708053, 4.31923128857779,
0.237726051904304, 1.89972383690448, 3.2371880079134, 0.318100791495115,
-8.08292381883298, -5.73174008540523, -15.7998485301436,
1.75469999857951, 0.677370118816266, -1.8397955509895, 2.55445787016256,
-0.380810453692585, 0.62462329496673, 2.61316333850434, 2.68202480583985,
1.76690658846479, 0.148635887703097, -0.958853757041888,
-3.17305964093897, -7.82526758429289, -6.58557573679886,
-4.39207076049089, 2.36752476749952, 0.594715760553033, -0.29794568443312,
-4.5365387390683, 0.196832250811775, -2.70852853745588, 0.498995124872827,
0.165171574219401, 0.269498974991661, 0.901948386281446,
-2.45955661653299, 1.63525170542944, 0.155897732673534, 1.8491735212703,
-0.856727109535223, -1.16182571974245, 1.07658425742917,
-2.21433585407388, 4.3385479368043, 4.40588599635354, 0.127710423625772,
-6.26956613362656, -1.17658595005389, -7.25886366924741,
-0.888293709383838, -2.14177059335841, -2.42141595261389,
-2.958120275175, -5.1274001953303, -5.32347488769128, -4.41290818553442,
-1.21404719262173, -4.23649270310915)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645,
0.160393685024631, 3.52155765271648, 2.10437989449921, 2.70693992810407,
5.49897156207812, 5.81171180245335, -1.37301251388987, -0.434363848460157,
2.87987510596148, -1.27152670283348, 17.2093269365993, 7.79412746755931,
8.11964589961276, 4.95253363860044, 9.50695673265293, 4.15235381401148,
6.1294488368639, 8.01447499455337, 0.783414018677801, -1.24197194087055,
-0.487178595894761, -9.79031812534203, 4.22150266269492,
4.20139847550095, 0.208005397351335, 4.19096721581768, 0.815283302847055,
1.48137456347872, 2.0809543999959, 4.35199943309111, 2.84860039832237,
3.05879540677983, 2.11976068962167, -0.269002712326028, -2.77155065610474,
-2.59002218694999, 0.17928456999128, 2.24515223348079, 1.88805943988563,
-0.0920286086411814, -2.00968595029144, 2.59427260100332,
-1.27622011197768, 0.588399071755827, -1.43982473126936,
1.96978732491278, -0.338674980283045, -1.86484698930706,
-0.0154791822607025, 2.55036185373462, 4.42520405730058,
-0.599156247027551, 1.60091251589958, 4.7367320574401, -0.192490723623988,
4.8452288234686, 5.71745745981867, 1.02554478706585, -4.5951256708181,
1.1704842909792, -7.42770276334892, 3.15655538248828, -0.639830772856786,
-0.345116641695513, -0.0391030568720636, -2.61585906518491,
-2.71685194532693, -1.7348388034111, 1.00287124847525, -2.4844653851482
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(8.23981597718437,
9.51261484648731, 9.42367409925817, 5.06332653216481, 5.02619159395405,
9.07903916629231, 7.56089165217984, 5.49719893790597, 4.91476855238182,
13.0320953572069, 10.8414516494484, 5.86927622259489, 3.25309970442897,
4.6847880297099, 2.71096740085175, 25.567439566524, 16.3241813617706,
13.0990192799703, 11.9200281736866, 14.6901305277101, 9.67397418905514,
10.2974302220899, 12.0768070828642, 5.9401530589224, 12.4817579327688,
12.419526465857, 1.00612108990875, 9.63063375751153, 10.5631237176538,
3.08031473770521, 3.35694102903017, 4.28046277054405, -0.133592200169464,
6.9103658689166, 7.64737651416791, 6.75669517393108, 8.5369185279747,
7.08645126073423, 4.47409706618326, 4.39617687043259, 3.27924738047746,
6.06169418872804, 5.34939694712468, 5.58288092654703, 4.85729686493463,
7.38032829587839, 11.7259526759912, 4.95764559864061, 6.24066579989613,
3.49843659402445, 4.07498375647916, 3.55732294589389, 1.33918111568512,
0.956782967443242, 2.32002496709926, 3.15289777246607, -0.832211906889126,
6.39254974438057, 7.0533787627062, 2.97245026797807, 6.23573445580928,
7.6052386193207, 2.98791225155534, 3.10850022259445, 8.12060882554471,
-0.00459651443883508, 13.5899217198075, 9.93070913311253,
8.10285456644801, 5.04464304009428, 2.02262615478956, 1.0510618938653,
5.62233873107127, 10.1193593084848, 5.87476640145049)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)))), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
signals = c("LPPearlyCz", "LPPearlyFCz", "LPPearlyPz", "P3Cz",
"P3FCz", "P3Pz"), .rows = structure(list(5L, 4L, 6L, 2L,
1L, 3L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .drop = TRUE))
>
As for Solution 1:
### Solution 1
library(officer)
library(flextable)
tab_1 <- aov_stats %>% select(signals, ANOVA) %>% as.data.frame()
tab_1 <- flextable(cbind(tab_1[, 1], tab_1[, 2]) %>% rename(signals = `tab_1[, 1]`))
tab_1 <- set_caption(tab_1, "1. ANOVA")
tab_2 <- aov_stats %>% select(signals, `Mauchly's Test for Sphericity`) %>% as.data.frame()
tab_2 <- flextable(cbind(tab_2[, 1], tab_2[, 2]) %>% rename(signals = `tab_2[, 1]`))
tab_2 <- set_caption(tab_2, "2. Mauchly's Test for Sphericity")
tab_3 <- aov_stats %>% select(signals, `Sphericity Corrections`) %>% as.data.frame()
tab_3 <- flextable(cbind(tab_3[, 1], tab_3[, 2]) %>% rename(signals = `tab_3[, 1]`))
tab_3 <- set_caption(tab_2, "3. Sphericity Corrections")
word_export <- read_docx()
body_add_flextable(word_export, tab_1, align = "left", split = FALSE)
body_add_par(word_export, value = "")
body_add_flextable(word_export, tab_2, align = "left", split = FALSE)
body_add_par(word_export, value = "")
body_add_flextable(word_export, tab_3, align = "left", split = FALSE)
print(word_export, 'ANOVA.docx')
Edit:
Solution 2:
### Solution 2
library(flextable)
tab <- aov_stats %>% as.data.frame()
cols <- colnames(cbind(tab[, 1], tab[, 2], tab[, 3], tab[, 4]))[-c(9,13)]
cols <- replace(cols, cols == "tab[, 1]", "signals")
tab <- flextable(cbind(tab[, 1], tab[, 2], tab[, 3], tab[, 4]) %>% setNames(1:19) %>% select(-c(9, 13)))
tab <- delete_part(tab, part = "header")
tab <- add_header_row(tab, values = cols, colwidths = rep(1, 17))
tab <- add_header_row(tab, values = c("", "ANOVA", "Mauchly's Test for Sphericity.", "Sphericity Corrections."), colwidths = c(2, 6, 3, 6))
tab <- theme_box(tab)
I need to perform an analysis with glmer on many different subgroups of a large dataset and only extract the estimate and z-value of each model. This works perfectly fine if I only use a small subset of my data (or some dummy data, as attached below), but when I try to include the whole data set, it takes forever. Currently I am using this bit of code:
slope_range <- df %>%
group_by(region, year, species) %>%
summarise(slope = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[2],
p_val = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[6])
As I said, this works fine, but very slow on a large data set. I'm aware that I could also just write multiple loops, but I assume this would take even longer. Does anyone have a better solution of what could be done to make it faster? Thanks!
Dummy data:
> dput(df)
structure(list(region = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ARG", "CHE"), class = "factor"),
transect = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), presence = c(1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L), year = c(2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L), species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a", "b"), class = "factor"),
road = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("FG", "MK", "PL", "XY"), class = "factor")), class = "data.frame", row.names = c(NA,
-160L))
You are calling coef(summary(glmer(...))) twice for each group, so you can cut the execution time roughly in half by fitting the model and extracting the coefficients once for each group. The following code will extract all the coefficients and their Z and p-values, not just the two values you specified, which I think is preferable if you might end up needing them later. Of course it can be easily modified to discard the other coefficients and keep only the two you specified.
code
library(tidyverse)
library(lme4)
df %>%
group_by(region, year, species) %>%
group_modify(~ data.frame(variable = c('Intercept', 'transect'),
coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial", data = .)))))
output
# A tibble: 16 x 8
# Groups: region, year, species [8]
region year species variable Estimate Std..Error z.value Pr...z..
<fct> <int> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 ARG 2007 a Intercept 6.11 2.81 2.17 0.0300
2 ARG 2007 a transect -0.743 0.361 -2.06 0.0398
3 ARG 2007 b Intercept 1.91 1.22 1.57 0.116
4 ARG 2007 b transect -0.396 0.208 -1.90 0.0570
5 ARG 2017 a Intercept 3.95 1.73 2.28 0.0223
6 ARG 2017 a transect -0.654 0.275 -2.38 0.0174
7 ARG 2017 b Intercept 2.44 1.33 1.83 0.0668
8 ARG 2017 b transect -0.396 0.208 -1.90 0.0570
9 CHE 2007 a Intercept 3.95 1.73 2.28 0.0223
10 CHE 2007 a transect -0.654 0.275 -2.38 0.0174
11 CHE 2007 b Intercept 2.44 1.33 1.83 0.0668
12 CHE 2007 b transect -0.396 0.208 -1.90 0.0570
13 CHE 2017 a Intercept 6.11 2.81 2.17 0.0300
14 CHE 2017 a transect -0.743 0.361 -2.06 0.0398
15 CHE 2017 b Intercept 1.91 1.22 1.57 0.116
16 CHE 2017 b transect -0.396 0.208 -1.90 0.0570
You could use a parallel approach as suggested earlier, e.g. with parallel::mclapply (on my 6-core machine using more than 4 cores gave only marginal improvements, though).
You could speed up glmer using nAGQ=0, at the cost of precision (see https://stats.stackexchange.com/questions/132841/default-lme4-optimizer-requires-lots-of-iterations-for-high-dimensional-data).
Example code with benchmarks:
invisible(lapply(c("lme4", "data.table", "tidyverse", "parallel", "microbenchmark"),
require, character.only = TRUE))
#> Loading required package: lme4
#> Loading required package: Matrix
#> Loading required package: data.table
#> Loading required package: tidyverse
#> Loading required package: parallel
#> Loading required package: microbenchmark
df <- structure(list(region = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ARG", "CHE"), class = "factor"),
transect = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), presence = c(1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L), year = c(2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L), species = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a", "b"), class = "factor"),
road = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("FG", "MK", "PL", "XY"), class = "factor")), class = "data.frame", row.names = c(NA,
-160L))
## Your function for comparison
tidy_fun <- function(){
df %>%
group_by(region, year, species) %>%
summarise(slope = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[2],
p_val = coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial")))[6])
}
gf2 <- function(presence, transect, road, nAGQ = 1L) {
res <- coef(summary(glmer(presence ~ transect + (1 | road), family = "binomial", nAGQ=nAGQ)))
return(data.table(slope=res[2], p_val=res[6]))
}
parLM <- function(mc.cores=4L, nAGQ=1L){
DT <- data.table(df, key = c("region","year","species"))
iDT <- DT[,by=.(region, year, species),.(irange=.(range(.I)))]
result <- mclapply(seq(nrow(iDT)),
function(x) DT[do.call(seq, as.list(iDT[x, irange][[1]])),
.(gf2(presence, transect, road, nAGQ=nAGQ))], mc.cores=mc.cores)
return(cbind(iDT, rbindlist(result))[,-4])
}
microbenchmark(
original = suppressMessages(tidy_fun()),
multicore = parLM(mc.cores = 4L, nAGQ = 1L),
singlecore.nAGQ0 = parLM(mc.cores = 1L, nAGQ = 0L),
multicore.nAGQ0 = parLM(mc.cores = 4L, nAGQ = 0L),
times=10L)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> original 898.2732 925.0621 963.7452 940.9577 973.0648 1157.0030 10
#> multicore 319.1234 334.4151 347.8024 344.1370 362.6539 373.8189 10
#> singlecore.nAGQ0 237.4782 245.4084 262.6290 268.1308 274.8516 280.7944 10
#> multicore.nAGQ0 132.3356 132.9963 137.2777 135.8659 141.5145 144.2564 10
#> cld
#> d
#> c
#> b
#> a
My dataset is this:
sparsedf <- structure(list(colname1 = structure(c(8L, 3L, 4L, 7L, 2L, 6L,
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("", "price106", "price142", "price185",
"price655", "price67", "price753", "price99"), class = "factor"),
colname2 = structure(c(2L, 3L, 8L, 15L, 5L, 4L, 12L, 9L,
10L, 7L, 11L, 6L, 13L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("", "price100", "price143",
"price16", "price271", "price29", "price3", "price36", "price391",
"price433", "price505", "price56", "price578", "price655",
"price753"), class = "factor"), colname3 = structure(c(2L,
8L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 19L, 20L, 3L, 18L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"price101", "price106", "price186", "price228", "price272",
"price314", "price33", "price354", "price392", "price434",
"price469", "price506", "price541", "price579", "price615",
"price652", "price67", "price686", "price720"), class = "factor"),
colname4 = structure(c(2L, 3L, 8L, 5L, 9L, 6L, 18L, 7L, 13L,
10L, 19L, 12L, 14L, 16L, 11L, 15L, 4L, 17L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("", "price102", "price144",
"price20", "price229", "price315", "price393", "price4",
"price46", "price470", "price52", "price542", "price55",
"price580", "price6", "price616", "price655", "price7", "price753"
), class = "factor"), colname6 = structure(c(1L, 2L, 3L,
4L, 6L, 7L, 8L, 9L, 10L, 11L, 5L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L), .Label = c("price104",
"price146", "price188", "price231", "price25", "price274",
"price317", "price356", "price395", "price436", "price472",
"price544", "price582", "price618", "price654", "price687",
"price722", "price752", "price779", "price809", "price835",
"price857", "price881", "price904", "price926", "price947",
"price966"), class = "factor"), colname7 = structure(c(2L,
4L, 5L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"price105", "price106", "price147", "price189"), class = "factor"),
colname9 = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 11L, 8L, 9L,
10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("", "price107", "price149", "price191",
"price233", "price276", "price319", "price397", "price438",
"price474", "price57"), class = "factor"), colname11 = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L,
19L, 17L, 10L, 18L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"price109", "price12", "price193", "price235", "price278",
"price321", "price359", "price399", "price40", "price440",
"price475", "price511", "price547", "price585", "price621",
"price689", "price754", "price78"), class = "factor"), colname12 = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 10L, 24L, 25L, 26L, 27L,
1L), .Label = c("", "price110", "price150", "price194", "price236",
"price279", "price322", "price360", "price400", "price42",
"price441", "price476", "price512", "price548", "price586",
"price622", "price656", "price690", "price725", "price755",
"price782", "price812", "price838", "price884", "price907",
"price929", "price950"), class = "factor"), colname13 = structure(c(3L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 4L, 17L, 12L, 13L, 14L, 15L,
16L, 18L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"price106", "price11", "price12", "price13", "price15", "price237",
"price280", "price323", "price361", "price401", "price513",
"price549", "price587", "price623", "price657", "price67",
"price753"), class = "factor")), .Names = c("colname1", "colname2",
"colname3", "colname4", "colname6", "colname7", "colname9", "colname11",
"colname12", "colname13"), class = "data.frame", row.names = c(NA,
-27L))
I would like using the following list (frequency_term_df), which contains for every term in the first column the name of the term and in the second column the frequency of each term.
I would like to use this list to the above dataframe and remove the terms which contains frequency equals or lower to 2.
Is it possible to make it?
frequency_term_df <- structure(list(name = c("price99", "price100", "price101", "price102",
"price104", "price105", "price107", "price109", "price110", "price11",
"price142", "price143", "price33", "price144", "price146", "price147",
"price149", "price12", "price150", "price13", "price185", "price36",
"price186", "price4", "price188", "price189", "price191", "price193",
"price194", "price15", "price753", "price228", "price229", "price231",
"price106", "price233", "price235", "price236", "price237", "price271",
"price272", "price46", "price274", "", "price276", "price278",
"price279", "price280", "price67", "price16", "price314", "price315",
"price317", "price319", "price321", "price322", "price323", "price655",
"price56", "price354", "price7", "price356", "price57", "price359",
"price360", "price361", "price391", "price392", "price393", "price395",
"price397", "price399", "price400", "price401", "price433", "price434",
"price55", "price436", "price438", "price440", "price441", "price3",
"price469", "price470", "price472", "price474", "price475", "price476",
"price505", "price506", "price25", "price511", "price512", "price513",
"price29", "price541", "price542", "price544", "price547", "price548",
"price549", "price578", "price579", "price580", "price582", "price585",
"price586", "price587", "price615", "price616", "price618", "price621",
"price622", "price623", "price652", "price52", "price654", "price78",
"price656", "price657", "price686", "price6", "price687", "price689",
"price690", "price720", "price20", "price722", "price40", "price725",
"price752", "price754", "price755", "price779", "price782", "price809",
"price812", "price835", "price838", "price857", "price42", "price881",
"price884", "price904", "price907", "price926", "price929", "price947",
"price950", "price966"), Number = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 110L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
)), row.names = c(NA, -150L), class = c("data.table", "data.frame"
), .Names = c("name", "Number"), .internal.selfref = <pointer: 0x00000000003b0788>)
I'm assuming that by "remove" you mean turn the values into blank strings.
First, identify the names which have a frequency <= 2.
freq2 <- frequency_term_df$name[frequency_term_df$Number <= 2]
Then use ifelse together with sapply to go through each column and replace the names you don't want with blanks.
sparsedf2 <- as.data.frame(sapply(sparsedf, FUN = function(x) ifelse(x %in% freq2, "", as.character(x))))
I've got a list with over 50.000 lines of Tweets. Now I've already exported the hashtags from that list but now I'm stuck with several thousand lines of hashtags which look like this
hashtag1;hashtag2;hashtag3;hashtag4
Since I want to do a co-hashtag-analysis I'm looking for a way to connect these multiple hashtags with each other without having to manually transform these lines into undirected edges. Example:
hashtag1;hashtag2
hashtag1;hashtag3
hashtag1;hashtag4
hashtag2;hashtag3
hashtag2;hashtag4
hashtag3;hashtag4
So, do you have an idea on how to accomplish this task (e.g. via R)? I'm an R-noob and even less "well versed" with other languages but I'm eager to learn.
structure(list(V1 = structure(c(1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 8L, 8L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L), .Label = c("profitkapital", "resupply",
"robotik", "rudidutschke", "russland", "sanktionen", "sanktionieren",
"schiller", "siegertyp", "snowden", "sockeleinkommen", "solidarity",
"sozialismus", "sozialphilosoph"), class = "factor"), V2 = structure(c(4L,
3L, 2L, 7L, 7L, 7L, 7L, 17L, 6L, 8L, 9L, 10L, 10L, 11L, 12L,
13L, 18L, 18L, 1L, 15L, 15L, 14L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 16L, 16L), .Label = c("alltag",
"arbeit", "bbq", "bge", "blockupy", "deutschland", "digitalisierung",
"griechenland", "grundeinkommen", "hartziv", "kenfm", "kirche",
"kopf", "kraft", "marx", "negt", "piraten", "sanktion"), class = "factor"),
V3 = structure(c(1L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 4L,
4L, 4L, 13L, 10L, 13L, 4L, 14L, 14L, 7L, 6L, 6L, 15L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 12L, 12L, 11L, 11L,
11L, 11L, 9L, 9L), .Label = c("", "abitur", "bbqrub", "bge",
"brd", "brecht", "deutschen", "fsa", "grundeinkommen", "hartziv",
"linkezukunft", "ows", "vatikan", "widerspruch", "würde"
), class = "factor"), V4 = structure(c(1L, 3L, 6L, 1L, 1L,
1L, 1L, 1L, 8L, 1L, 2L, 1L, 9L, 5L, 9L, 10L, 4L, 4L, 7L,
3L, 3L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
12L, 12L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("", "bank",
"bge", "eilantrag", "haarp", "job", "jobcentern", "merkel",
"pastor", "probleme", "super", "unibrennt"), class = "factor"),
V5 = structure(c(1L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 7L, 1L,
10L, 1L, 2L, 9L, 2L, 4L, 8L, 8L, 6L, 1L, 1L, 6L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "bge", "bgenation", "fliegen", "geld",
"hartziv", "hitler", "sg", "ttip", "vorbild"), class = "factor"),
V6 = structure(c(1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L,
1L, 1L, 8L, 4L, 8L, 7L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "altersarmut", "antifa", "bge", "deeznuts",
"holocaust", "klatsch", "sex"), class = "factor"), V7 = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "bge",
"cia", "hartz", "spanishrevolution", "wahre"), class = "factor"),
V8 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "cityoflondon", "grund", "peace"), class = "factor"),
V9 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "bge", "occupy", "rothschild"), class = "factor"),
V10 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "ard", "gezi"), class = "factor"), V11 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "refugeeswelcome",
"zdf"), class = "factor"), V12 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nolegida",
"wdr"), class = "factor"), V13 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nopegida",
"swr"), class = "factor"), V14 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nocastor",
"zukunft"), class = "factor")), .Names = c("V1", "V2", "V3",
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13",
"V14"), class = "data.frame", row.names = c(NA, -41L))
you can try the package combinat with combn wich will generate the couple of permutations
library(combinat)
combn(c("hashtag1", "hashtag2", "hashtag3", "hashtag4"), 2)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "hashtag1" "hashtag1" "hashtag1" "hashtag2" "hashtag2" "hashtag3"
[2,] "hashtag2" "hashtag3" "hashtag4" "hashtag3" "hashtag4" "hashtag4"
structure(list(Team = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "Union", class = "factor"), Date = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 3L, 3L, 5L, 3L, 3L, 6L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L,
6L, 3L, 7L, 8L, 9L, 10L, 10L), .Label = c("2012-01-06", "2012-02-06",
"2012-03-06", "2012-04-06", "2012-05-06", "2012-07-06", "2012-09-06",
"2012-10-06", "2012-11-06", "2012-12-06"), class = "factor"),
STime = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "07:03", class = "factor"), ETime = structure(c(6L,
7L, 8L, 5L, 5L, 1L, 2L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 11L,
10L, 9L, 8L, 10L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L), .Label = c("01:13",
"03:13", "06:13", "09:13", "10:13", "11:13", "12:13", "13:13",
"15:13", "16:13", "18:13"), class = "factor")), .Names = c("Team",
"Date", "STime", "ETime"), class = "data.frame", row.names = c(NA,
-40L))
I amd doing this:
ggplot(df, aes(Date, ETime, group="Team")) + geom_point(size=0.3) + facet_wrap(~ Team)
I would like to have y-axis from 00:00 to 23:29 with 2 hours increments. I tried scale_y_continous, which is not working. Any suggestions?
I suggest changing your date and time columns into POSIXt formated data. Then changing the axis breaks and labeling becomes easier. Currently, your dates and times are stored as factors.
library(ggplot2)
# Change relevant columns from 'factor' to 'POSIXt'.
df$ETime = strptime(as.character(df$ETime), "%H:%M")
df$Date = strptime(as.character(df$Date), "%Y-%m-%d")
plot_1 = ggplot(df, aes(x=Date, y=ETime)) +
geom_point() +
labs(title="Plot 1")
# Manually set datetime limits and breaks.
y_limits = as.POSIXct(c(strptime("00:00", "%H:%M"), strptime("23:29", "%H:%M")))
y_breaks = seq(from=strptime("00:00", "%H:%M"),
to=strptime("23:29", "%H:%M"), by="2 hours")
y_labels = format(y_breaks, "%H:%M")
plot_2 = ggplot(df, aes(x=Date, y=ETime)) +
geom_point() +
scale_y_datetime(limits=y_limits, breaks=y_breaks, labels=y_labels) +
labs(title="Plot 2")
library(gridExtra)
png("plots.png", width=8, height=4, units="in", res=120)
grid.arrange(plot_1, plot_2, nrow=1)
dev.off()