Can't fit penalized logistic regression model using lrm function - r

I am using the rms library and the lrm function to do a penalized logistic regression.
Just look to my data:
> dput(cs_data_train[1:50,])
structure(list(DataCRMSanoflore.Year_Sales = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.HOURS_INSCR = c(14L,
18L, 17L, 16L, 11L, 22L, 23L, 17L, 9L, 21L, 18L, 19L, 12L, 11L,
17L, 16L, 21L, 20L, 14L, 19L, 22L, 17L, 22L, 13L, 19L, 13L, 21L,
16L, 23L, 19L, 11L, 21L, 11L, 22L, 20L, 13L, 11L, 17L, 15L, 12L,
15L, 21L, 17L, 14L, 10L, 17L, 10L, 12L, 18L, 13L), DataCRMSanoflore.Month_Sales = structure(c(9L,
2L, 5L, 9L, 4L, 7L, 3L, 9L, 7L, 12L, 3L, 3L, 12L, 3L, 3L, 6L,
3L, 4L, 5L, 8L, 8L, 1L, 4L, 10L, 9L, 5L, 4L, 9L, 2L, 12L, 9L,
4L, 4L, 3L, 6L, 8L, 6L, 4L, 12L, 5L, 6L, 9L, 7L, 9L, 1L, 9L,
7L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12"), class = "factor"), DataCRMSanoflore.Date_Sales = structure(c(3L,
10L, 22L, 23L, 26L, 13L, 12L, 2L, 25L, 11L, 10L, 9L, 4L, 10L,
18L, 9L, 9L, 1L, 14L, 24L, 4L, 2L, 2L, 22L, 17L, 4L, 14L, 22L,
2L, 5L, 29L, 13L, 2L, 10L, 25L, 5L, 10L, 1L, 6L, 20L, 7L, 9L,
1L, 3L, 17L, 22L, 3L, 9L, 20L, 13L), .Label = c("01", "02", "03",
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "30", "31"), class = "factor"), DataCRMSanoflore.HOURS_INSCR.1 = c(14L,
18L, 17L, 16L, 11L, 22L, 23L, 17L, 9L, 21L, 18L, 19L, 12L, 11L,
17L, 16L, 21L, 20L, 14L, 19L, 22L, 17L, 22L, 13L, 19L, 13L, 21L,
16L, 23L, 19L, 11L, 21L, 11L, 22L, 20L, 13L, 11L, 17L, 15L, 12L,
15L, 21L, 17L, 14L, 10L, 17L, 10L, 12L, 18L, 13L), DataCRMSanoflore.Year_Creation_Sales = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.Month_Creation_Sales = structure(c(9L,
2L, 10L, 10L, 9L, 7L, 12L, 9L, 7L, 12L, 3L, 4L, 2L, 6L, 3L, 6L,
10L, 4L, 5L, 8L, 3L, 1L, 4L, 11L, 9L, 5L, 4L, 9L, 2L, 12L, 10L,
4L, 4L, 3L, 10L, 8L, 6L, 4L, 12L, 8L, 6L, 2L, 10L, 5L, 1L, 9L,
8L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12"), class = "factor"), DataCRMSanoflore.Day_Creation_Sales = structure(c(11L,
15L, 2L, 31L, 26L, 23L, 5L, 2L, 25L, 16L, 10L, 13L, 7L, 3L, 18L,
9L, 8L, 27L, 18L, 24L, 6L, 2L, 4L, 16L, 17L, 12L, 15L, 22L, 10L,
5L, 1L, 14L, 2L, 10L, 5L, 5L, 10L, 25L, 6L, 5L, 28L, 8L, 10L,
18L, 17L, 22L, 31L, 9L, 21L, 22L), .Label = c("01", "02", "03",
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "30", "31"), class = "factor"), DataCRMSanoflore.Year_Validation_Sales = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.Month_Validation_Sales = structure(c(9L,
2L, 10L, 11L, 10L, 7L, 12L, 9L, 7L, 12L, 3L, 4L, 2L, 6L, 3L,
6L, 10L, 4L, 5L, 8L, 3L, 1L, 4L, 11L, 9L, 5L, 4L, 9L, 2L, 12L,
10L, 4L, 4L, 3L, 10L, 8L, 6L, 4L, 12L, 8L, 6L, 2L, 10L, 5L, 1L,
9L, 9L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05",
"06", "07", "08", "09", "10", "11", "12"), class = "factor"),
DataCRMSanoflore.Day_Validation_Sales = structure(c(14L,
16L, 3L, 3L, 1L, 27L, 6L, 5L, 27L, 21L, 19L, 27L, 8L, 5L,
21L, 10L, 9L, 30L, 26L, 27L, 7L, 4L, 15L, 17L, 18L, 13L,
20L, 29L, 11L, 7L, 2L, 16L, 3L, 20L, 6L, 6L, 13L, 29L, 8L,
6L, 30L, 9L, 12L, 20L, 18L, 29L, 1L, 10L, 23L, 25L), .Label = c("01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "24", "25", "26", "27", "28", "29", "30", "31"
), class = "factor"), DataCRMSanoflore.AGE_CUSTUMER = c(37L,
23L, 34L, 32L, 45L, 52L, 44L, 55L, 37L, 29L, 33L, 29L, 30L,
37L, 56L, 48L, 44L, 42L, 45L, 33L, 37L, 53L, 55L, 60L, 57L,
33L, 51L, 32L, 35L, 54L, 41L, 47L, 59L, 33L, 45L, 35L, 36L,
28L, 42L, 24L, 32L, 39L, 33L, 36L, 49L, 56L, 45L, 39L, 54L,
55L), DataCRMSanoflore.MEAN_PURCHASE = c(71.75, 50.7142857142857,
18.6666666666667, 0, 0, 54.7, 0.666666666666667, 38, 6.5,
0, 83.3333333333333, 44.3333333333333, 25.7777777777778,
24.1818181818182, 23.3846153846154, 35.5294117647059, 21.6363636363636,
1.125, 6, 8.66666666666667, 18.4, 16.9285714285714, 0, 0,
36.5, 21.5, 18.5714285714286, 28.125, 101.333333333333, 0,
2, 0, 20.9166666666667, 69.1428571428571, 16.6666666666667,
1.5, 87.1666666666667, 48.25, 13.3333333333333, 20.5833333333333,
12, 0, 23, 15.1428571428571, 0, 30.4375, 30.3076923076923,
24.625, 23.4285714285714, 20.0833333333333), DataCRMSanoflore.NUMBER_GIFTS = c(1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 3L,
4L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 4L, 1L, 1L, 1L,
2L, 5L, 2L, 2L), SENSIBILITE = c(4L, 4L, 1L, 3L, 1L, 1L,
2L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 1L, 4L,
1L, 1L, 4L, 1L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 1L,
1L, 4L, 1L, 3L, 2L, 1L, 3L, 4L, 1L, 1L, 4L, 3L, 1L, 4L),
IMPERFECTIONS = c(4L, 3L, 1L, 2L, 1L, 1L, 4L, 1L, 1L, 1L,
3L, 1L, 2L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L,
3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 2L,
3L, 1L, 2L, 2L, 1L, 1L, 3L, 3L, 1L, 3L), BRILLANCE = c(2L,
2L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L,
4L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 4L, 4L, 4L, 4L, 1L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 1L,
4L, 4L, 1L, 4L), GRAIN_PEAU = c(4L, 4L, 1L, 4L, 1L, 1L, 2L,
1L, 1L, 1L, 4L, 1L, 2L, 1L, 2L, 4L, 4L, 1L, 1L, 1L, 3L, 1L,
1L, 2L, 1L, 2L, 4L, 4L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 4L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 3L, 1L, 4L), RIDES_VISAGE = c(2L,
2L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 2L,
4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 4L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 1L, 1L,
4L, 4L, 1L, 4L), ALLERGIES = c(2L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 2L), MAINS = c(4L,
4L, 1L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L,
3L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 3L, 4L, 4L, 3L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 4L, 3L, 1L, 3L, 4L, 1L, 1L,
3L, 3L, 1L, 4L), PEAU_CORPS = c(3L, 3L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 2L, 1L,
1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L,
3L, 1L, 3L, 2L, 1L, 2L, 4L, 1L, 1L, 3L, 3L, 1L, 3L), INTERET_ALIM_NATURELLE = c(4L,
4L, 1L, 2L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 4L, 4L, 1L, 4L, 2L, 1L, 1L,
4L, 2L, 1L, 2L), INTERET_ORIGINE_GEO = c(4L, 2L, 1L, 2L,
1L, 1L, 5L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 5L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 5L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L,
2L), INTERET_VACANCES = c(4L, 2L, 1L, 3L, 1L, 1L, 2L, 1L,
1L, 1L, 3L, 1L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 1L, 2L, 1L, 1L,
3L, 1L, 4L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 4L, 3L, 1L, 1L, 2L, 2L, 1L, 2L), INTERET_ENVIRONNEMENT = c(5L,
5L, 1L, 5L, 1L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L,
3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 5L, 1L, 5L, 3L, 1L, 1L,
3L, 5L, 1L, 3L), INTERET_COMPOSITION = c(2L, 2L, 1L, 4L,
1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 4L, 1L, 2L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L,
2L), DataCRMSanoflore.Nb_achats = c(4, 7, 3, 3, 4, 10, 3,
4, 14, 4, 6, 6, 9, 22, 26, 17, 22, 8, 3, 9, 10, 14, 3, 7,
12, 6, 14, 16, 3, 3, 3, 3, 12, 7, 3, 6, 6, 12, 18, 12, 15,
6, 21, 7, 6, 16, 13, 16, 14, 12), OUTCOME = structure(c(1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor")), .Names = c("DataCRMSanoflore.Year_Sales",
"DataCRMSanoflore.HOURS_INSCR", "DataCRMSanoflore.Month_Sales",
"DataCRMSanoflore.Date_Sales", "DataCRMSanoflore.HOURS_INSCR.1",
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales",
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales",
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales",
"DataCRMSanoflore.AGE_CUSTUMER", "DataCRMSanoflore.MEAN_PURCHASE",
"DataCRMSanoflore.NUMBER_GIFTS", "SENSIBILITE", "IMPERFECTIONS",
"BRILLANCE", "GRAIN_PEAU", "RIDES_VISAGE", "ALLERGIES", "MAINS",
"PEAU_CORPS", "INTERET_ALIM_NATURELLE", "INTERET_ORIGINE_GEO",
"INTERET_VACANCES", "INTERET_ENVIRONNEMENT", "INTERET_COMPOSITION",
"DataCRMSanoflore.Nb_achats", "OUTCOME"), row.names = c(22L,
33L, 40L, 48L, 54L, 59L, 74L, 78L, 87L, 89L, 104L, 115L, 121L,
141L, 159L, 161L, 163L, 165L, 196L, 202L, 211L, 222L, 272L, 300L,
318L, 325L, 327L, 349L, 374L, 380L, 392L, 393L, 394L, 398L, 427L,
440L, 449L, 456L, 470L, 477L, 479L, 490L, 505L, 508L, 514L, 520L,
528L, 531L, 534L, 543L), class = "data.frame")
Then when I want to fit the model using this code:
fit = lrm(OUTCOME ~ .-1,data = cs_data_train,x=T, y=T)
It gives an error:
singular information matrix in lrm.fit (rank= 148 ). Offending
variable(s): DataCRMSanoflore.HOURS_INSCR.1 Error in lrm(OUTCOME ~ .
- 1, data = cs_data_train, x = T, y = T) : Unable to fit model using “lrm.fit”
I searched but I could not resolve this issue. Thank you for your help!
EDIT:
As Said in the comment below. I need to remove one of each both correlated variables. So I write this code :
> highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=(0.7),verbose = FALSE)
> print(highlyCorrelated)
[1] 21 20 26 15 18 17 22 16 25 19 23 24 6 9 7 10 28 2
> important_var=colnames(DATA_BASE[,-highlyCorrelated])
> important_var
[1] "DataCRMSanoflore.Year_Sales" "DataCRMSanoflore.Date_Sales" "DataCRMSanoflore.HOURS_INSCR.1"
[4] "DataCRMSanoflore.Day_Creation_Sales" "DataCRMSanoflore.MEAN_PURCHASE" "OUTCOME"
> DATA_BASE<-DATA_BASE[,-highlyCorrelated]
> str(DATA_BASE)
'data.frame': 5775 obs. of 6 variables:
$ DataCRMSanoflore.Year_Sales : num 2 1 2 1 2 1 1 1 1 2 ...
$ DataCRMSanoflore.Date_Sales : num 13 3 10 22 23 26 13 1 12 2 ...
$ DataCRMSanoflore.HOURS_INSCR.1 : num 17 14 18 17 16 11 22 14 23 17 ...
$ DataCRMSanoflore.Day_Creation_Sales: num 13 11 15 2 31 26 23 1 5 2 ...
$ DataCRMSanoflore.MEAN_PURCHASE : num 0 71.8 50.7 18.7 0 ...
$ OUTCOME : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 2 2 1 1 ...
But I get then the same error
Error in lrm(OUTCOME ~ . - 1, data = train, x = T, y = T) : Unable
to fit model using “lrm.fit”
This really weird!
How can I resolve this please ?

Related

R: Can I make rename accept a function as a new varname

I have a data set and want to create a time series of means by education and by race. However, I am struggling to make rename() accept a command rather than just a string as a new variable name.
My code:
#libraries
install.packages(c("tidyverse", "spatstat"))
lapply(c("tidyverse", "spatstat"), require, character.only = TRUE)
#calculate weighted medians by race and education
wmedians <- lapply(data[,c("race", "education")],function(vars){
data %>%
group_by((vars), year) %>%
summarize(w_median = weighted.median(wealth, weight))%>%
rename(colnames(vars) = "(vars)")
})
This gives me the following error:
Error: unexpected '=' in:
" summarize(w_median = weighted.median(wealth, weight))%>%
rename(colnames(vars) ="
Desired Output:
At the moment I get a list with two tibbles where the column name of the first column is "(vars)". Instead I would like the column name to be "race" in the first tibble and "education" in the second one.
I tried around a bit and my guess is that the rename-function generally does not accept any function (such as colnames()) as a new variable name. Do you have any idea how to get around this?
Sample of my data:
structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L), .Label = c("1989", "1992", "1995", "1998", "2001",
"2004", "2007", "2010", "2013", "2016"), class = "factor"), weight = c(9084.9691295,
1571.9511258, 191.75635451, 204.62890325, 204.62890325, 20.462890325,
10.412082059, 144.25723032, 66.170395167, 17169.253056, 5240.2917738,
768.75688855, 152.72765752, 166.52285228, 4165.0038712, 12.696579164,
1.3158341152, 3574.4768327, 18.640983311, 687.040202, 6738.6966881,
0.9648849583, 2.9597366608, 20.019564258, 895.51359665, 1.3152534108,
3244.207427, 2.0000789024, 750.95122778, 1.6580375994, 4007.7581965,
4.7569235917, 180.73948443, 237.26008744, 2.8105880617, 2.8105880617,
1.7964957199, 4883.711226, 17.268444467, 2.9783310762, 354.15138196,
162.00933944, 1.6450475811, 1.3755398392, 4174.6347012, 44.17020127,
4987.2079388, 1.3755398392, 18.01293584, 3.3426730968, 1.5455142055,
904.20169275, 12.578831203, 10051.580218, 162.70814346, 2.1257090517,
2.1257090517, 1027.7013368, 8166.4587927, 1.7239086827, 2.1374243666,
0.901741906, 2.9900010571, 33.443685091, 12913.631224, 5973.1098594,
9527.6211412, 2.2331957715, 376.47055359, 2.9488054663, 2.9488054663,
2.8570458091, 8.0625166988, 4.7867036342, 923.0539464, 2.7381019933,
5800.0572063, 4.3379657179, 0.8195417131, 6108.0937784, 23.232719795,
121.97520298, 1684.0365357, 4.7867036342, 4.8109344834, 479.31438165,
4.8109344834, 3.7209489469, 5257.7592767, 3220.9438379, 3.1738803883,
3.8910375552, 3.7209489469, 30.231850875, 3277.3748665, 32.237421329,
2.7966498146, 3208.415157, 34.375315295, 30.200771547, 31.811971048,
20.701306688, 2.7966498146, 31.968899323, 33.280487562, 2548.6399138,
32.94638396, 2.7966498146, 2.6033164134, 30.815250688, 23.702590485,
31.465956118, 29.46116036, 760.36545895, 8.3114397117, 23.702590485,
4.6504256805, 3204.2440292, 6.8002099257, 35.813850525), race = structure(c(1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L,
1L, 4L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 4L,
4L, 4L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 1L, 4L, 4L, 1L, 1L,
1L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L,
1L, 1L, 4L, 1L, 4L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 4L,
1L, 4L, 4L, 4L, 1L, 4L, 1L), .Label = c("black", "Hispanic",
"other", "white"), class = "factor"), education = structure(c(2L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("college degree", "no college",
"some college"), class = "factor"), wealth = c(370932.98, 10170000.94,
12598660.39, 114293258.81, 211275067.86, 290691670.17, 85726008.16,
230065771.49, 246480115.73, 349587.55, 378694.96, 3164512.87,
8495442.34, 6020105.92, 505133.05, 367073411.82, 2928346179.67,
533643.21, 554996993.66, 5648836.48, 392098.63, 692454429.71,
872798466.66, 352917443.83, 798505.65, 1250534235.01, 645694.94,
570963643.71, 10983328.56, 360732249.01, 717840.77, 365513260.49,
7204246.71, 8901952.1, 473348324.51, 686068914.54, 566191645.55,
451622.81, 376484717.9, 702785331.9, 6677625.87, 5826581.94,
538339875.62, 81126854.16, 1073395.38, 595512233.49, 769008.48,
363748981.28, 203466108.48, 801021687.19, 744505545.07, 5219227.59,
132716087.05, 813859.01, 4429075.76, 376023173.93, 418753292.21,
591392.47, 986060.58, 740193054.24, 305729499.91, 815008777.34,
292998224.45, 25586473.36, 1482007.57, 832890.03, 1431390.64,
465587944.43, 31578347.08, 771207206.27, 774386788.46, 283388639.66,
491131539.34, 1106105605.16, 15792325.59, 448260665.36, 1598512.35,
256789830.23, 1327614423.44, 1034918.02, 44314918.71, 47310703.56,
1174945.19, 1050196871.69, 791040687.75, 40226229.67, 1193945180.43,
882118783.02, 865693.49, 598203.92, 94409003.49, 321989895.39,
611845894.05, 414941965.04, 1697121.78, 192234467.65, 1200517207.56,
1522890.02, 42378401.16, 269560657.85, 49377539.54, 450366559.75,
1362371354.42, 254034284.01, 65726116.22, 840350.29, 87111742.24,
1116803883.14, 1703800000, 248761000, 104962000, 111565400, 1224715000,
1405000, 1196478000, 981045000, 535052000, 2077700, 444554200,
41907500)), row.names = c(NA, -120L), groups = structure(list(
year = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L), .Label = c("1989", "1992", "1995", "1998", "2001",
"2004", "2007", "2010", "2013", "2016"), class = "factor"),
education = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L), .Label = c("college degree", "no college", "some college"
), class = "factor"), race = structure(c(1L, 4L, 1L, 4L,
1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L,
4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L,
1L, 4L, 1L, 4L, 1L, 4L), .Label = c("black", "Hispanic",
"other", "white"), class = "factor"), .rows = structure(list(
c(2L, 3L, 12L), c(5L, 6L, 9L), c(1L, 10L, 11L), c(4L,
7L, 8L), c(13L, 14L, 20L), c(17L, 22L, 23L), c(15L, 18L,
21L), c(16L, 19L, 24L), c(29L, 33L, 34L), c(26L, 35L,
36L), c(25L, 27L, 31L), c(28L, 30L, 32L), c(41L, 42L,
44L), c(37L, 40L, 43L), c(38L, 45L, 47L), c(39L, 46L,
48L), c(52L, 55L, 57L), c(50L, 51L, 60L), c(54L, 58L,
59L), c(49L, 53L, 56L), c(63L, 64L, 69L), c(62L, 70L,
71L), 65:67, c(61L, 68L, 72L), c(75L, 81L, 82L), c(74L,
79L, 84L), c(77L, 80L, 83L), c(73L, 76L, 78L), c(86L,
91L, 92L), c(85L, 87L, 88L), c(89L, 90L, 95L), c(93L,
94L, 96L), c(101L, 105L, 107L), c(97L, 103L, 108L), c(98L,
99L, 106L), c(100L, 102L, 104L), 110:112, c(109L, 113L,
115L), c(114L, 118L, 120L), c(116L, 117L, 119L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Thanks a lot in advance!
Pass column names in lapply and use .data to refer them in group_by so you don't need rename at all as column names would be maintained.
library(dplyr)
library(spatstat)
lapply(c("race", "education"),function(vars){
data %>%
group_by(.data[[vars]], year) %>%
summarize(w_median = weighted.median(wealth, weight))
}) -> result
result

Weird results in ANOVA: the three-way interaction has a pvalue of exactly 1

I am running a three way interaction, predicting 'judgment' from 'factor_1' (between subject, two levels), 'factor_2' (between subject, two levels) and factor_3 (within subject, two levels). I have 120 participants (30 in each level of factor_1 and factor_2)
model <- aov(
judgment ~ factor_1*factor_2*factor_3 +
Error(participant/factor_3),
data = MyData)
summary(model)
I got a strange 3 way interaction result: the Sum Sq, Mean Sq, and F value have a value of (exactly) 0, and the p value is 1.
How is it possible?
Here are my data:
MyData = structure(list(participant = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L,
11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L,
17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L,
24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L,
30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L,
37L, 37L, 38L, 38L, 39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L,
43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L,
50L, 50L, 51L, 51L, 52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L,
56L, 57L, 57L, 58L, 58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L,
63L, 63L, 64L, 64L, 65L, 65L, 66L, 66L, 67L, 67L, 68L, 68L, 69L,
69L, 70L, 70L, 71L, 71L, 72L, 72L, 73L, 73L, 74L, 74L, 75L, 75L,
76L, 76L, 77L, 77L, 78L, 78L, 79L, 79L, 80L, 80L, 81L, 81L, 82L,
82L, 83L, 83L, 84L, 84L, 85L, 85L, 86L, 86L, 87L, 87L, 88L, 88L,
89L, 89L, 90L, 90L, 91L, 91L, 92L, 92L, 93L, 93L, 94L, 94L, 95L,
95L, 96L, 96L, 97L, 97L, 98L, 98L, 99L, 99L, 100L, 100L, 101L,
101L, 102L, 102L, 103L, 103L, 104L, 104L, 105L, 105L, 106L, 106L,
107L, 107L, 108L, 108L, 109L, 109L, 110L, 110L, 111L, 111L, 112L,
112L, 113L, 113L, 114L, 114L, 115L, 115L, 116L, 116L, 117L, 117L,
118L, 118L, 119L, 119L, 120L, 120L), .Label = c("101", "102",
"103", "104", "105", "106", "107", "108", "109", "110", "111",
"112", "113", "114", "115", "116", "117", "118", "119", "120",
"121", "122", "123", "124", "125", "126", "127", "128", "129",
"130", "131", "132", "133", "134", "135", "136", "137", "138",
"139", "140", "141", "142", "143", "144", "145", "146", "147",
"148", "149", "150", "151", "152", "153", "154", "155", "156",
"157", "158", "159", "160", "161", "162", "163", "164", "165",
"166", "167", "168", "169", "170", "171", "172", "173", "174",
"175", "176", "177", "179", "180", "181", "182", "183", "184",
"185", "186", "187", "188", "189", "190", "191", "192", "193",
"194", "195", "196", "197", "198", "199", "200", "201", "202",
"203", "204", "205", "206", "207", "208", "209", "210", "211",
"212", "213", "214", "215", "216", "217", "218", "219", "220",
"221"), class = "factor"), factor_1 = structure(c(2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("L",
"P"), class = "factor"), factor_2 = structure(c(1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), factor_3 = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("HighLoss",
"LowLoss"), class = "factor"), judgment = c(10L, 5L, 10L, 10L,
5L, 5L, 5L, 1L, 7L, 5L, 8L, 7L, 5L, 5L, 10L, 10L, 3L, 6L, 4L,
6L, 10L, 10L, 10L, 6L, 10L, 10L, 1L, 1L, 8L, 8L, 6L, 6L, 8L,
10L, 8L, 1L, 5L, 5L, 4L, 4L, 3L, 3L, 5L, 2L, 10L, 10L, 8L, 8L,
7L, 5L, 7L, 10L, 10L, 10L, 4L, 4L, 5L, 5L, 5L, 5L, 10L, 10L,
6L, 6L, 3L, 2L, 6L, 6L, 7L, 5L, 10L, 9L, 8L, 8L, 6L, 5L, 6L,
6L, 8L, 10L, 6L, 6L, 7L, 7L, 5L, 5L, 10L, 6L, 10L, 10L, 10L,
6L, 10L, 10L, 10L, 7L, 8L, 8L, 10L, 10L, 9L, 10L, 10L, 10L, 6L,
8L, 10L, 10L, 6L, 6L, 6L, 3L, 6L, 8L, 5L, 7L, 10L, 10L, 7L, 5L,
3L, 3L, 6L, 3L, 10L, 10L, 10L, 10L, 10L, 7L, 8L, 10L, 8L, 5L,
9L, 6L, 6L, 6L, 8L, 8L, 10L, 10L, 10L, 10L, 5L, 5L, 6L, 3L, 9L,
9L, 2L, 1L, 6L, 6L, 10L, 10L, 8L, 8L, 4L, 8L, 5L, 9L, 10L, 10L,
10L, 10L, 8L, 8L, 5L, 5L, 8L, 8L, 4L, 3L, 6L, 6L, 1L, 1L, 10L,
10L, 10L, 10L, 7L, 9L, 8L, 8L, 7L, 7L, 5L, 5L, 6L, 6L, 5L, 5L,
8L, 8L, 1L, 1L, 2L, 3L, 8L, 6L, 8L, 8L, 8L, 6L, 7L, 9L, 10L,
10L, 4L, 4L, 10L, 10L, 10L, 10L, 10L, 10L, 5L, 5L, 1L, 1L, 10L,
10L, 4L, 1L, 10L, 10L, 6L, 6L, 7L, 7L, 7L, 9L, 5L, 5L, 10L, 10L,
7L, 2L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-240L), .Names = c("participant", "factor_1", "factor_2", "factor_3",
"judgment"))

partial eta squared error

I have a table of means av.rt with 3 factors: Subject (N=28), Reward (rewarded or non-rewarded), Congruency (congruent or incongruent), and numeric values - reaction times. My table has 108 observations for all Subject/Reward/Congruency combinations.
> dput(av.rt)
structure(list(Reward = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("R", "U"), class = "factor"),
Congruency = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("con",
"inc"), class = "factor"), Subject = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L,
15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L,
21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 27L,
27L, 27L, 27L), .Label = c("9", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36"), class = "factor"), TTime.Shifted = c(565.618181818182,
605.669565217391, 554.683333333333, 655.486956521739, 577.247619047619,
631.409523809524, 591.419047619048, 757.270588235294, 520.191304347826,
607.617391304348, 495.290909090909, 648.842105263158, 500.9,
585.945454545455, 521.105882352941, 768.8, 553.009090909091,
622.078260869565, 564.636363636364, 696.094736842105, 484.6,
553.739130434783, 636.657142857143, 635.817391304348, 618.017391304348,
713.145454545455, 600.533333333333, 857.61, 437.582608695652,
526.572727272727, 445.390476190476, 531.345454545455, 565.104347826087,
613.746666666667, 609.714285714286, 614.87, 566.716666666667,
687.99, 588.694736842105, 655.555555555556, 462.53, 726.34,
611.8625, 803.2, 485.234782608696, 609.284210526316, 504.2,
647.46, 471.226086956522, 554.741666666667, 506.62, 631.009523809524,
617.991304347826, 623.252173913044, 655.895652173913, 659.85,
605.318181818182, 580.947826086957, 619.675, 657.857142857143,
523.22, 626.976470588235, 565.233333333333, 711.336842105263,
455.716666666667, 533.818181818182, 464.5, 663.505263157895,
549.172727272727, 608.85, 579.2, 602.952941176471, 646.383333333333,
626, 732.555555555556, 663.91, 469.48, 578.095238095238,
568.694736842105, 620.922222222222, 571.37, 603.386666666667,
650.109090909091, 632.233333333333, 584.363636363636, 643.811111111111,
594.961904761905, 723.82, 515.845454545455, 526.75652173913,
594.647619047619, 545.878260869565, 541.38, 550.15652173913,
538.778947368421, 618.175, 539.219047619048, 563.842105263158,
539.12380952381, 684.018181818182, 430.217391304348, 590.85,
439.547826086957, 605.634782608696, 591.933333333333, 673.742857142857,
633.441666666667, 762.022222222222)), .Names = c("Reward",
"Congruency", "Subject", "TTime.Shifted"), row.names = c(NA,
-108L), class = "data.frame")
I run a rANOVA and a fucntion to calculate the eta squared:
aov_CSRA <- with(av.rt, aov(TTime.Shifted ~ Reward*Congruency + Error(Subject / (Reward*Congruency))))
summary(aov_CSRA)
library(lsr)
etaSquared( aov_CSRA )
However, I get an error:
Error in etaSquared(aov_CSRA) : "x" must be a linear model object
How can I fix it?
After a quick google search I found out that there is another package with a function to compute eta squared, package DescTools, function EtaSq.
library(DescTools)
aov_CSRA <- aov(TTime.Shifted ~ Reward*Congruency + Error(Subject / (Reward*Congruency)), data = av.rt)
summary(aov_CSRA)
EtaSq(aov_CSRA, type = 1, anova = FALSE)
# eta.sq eta.sq.part eta.sq.gen
#Reward 0.281762511 0.6430538 0.31363362
#Congruency 0.094438684 0.7177132 0.13281446
#Reward:Congruency 0.007180224 0.1280018 0.01151048
EtaSq(aov_CSRA, type = 1, anova = TRUE)
# eta.sq eta.sq.part eta.sq.gen SS df MS
#Reward 0.281762511 0.6430538 0.31363362 193165.658 1 193165.658
#Congruency 0.094438684 0.7177132 0.13281446 64743.569 1 64743.569
#Reward:Congruency 0.007180224 0.1280018 0.01151048 4922.489 1 4922.489
# SSE dfE F p
#Reward 107222.34 26 46.840119 2.895631e-07
#Congruency 25464.56 26 66.104912 1.304265e-08
#Reward:Congruency 33533.93 26 3.816574 6.158667e-02
And it worked at the first try.
Note that I've changed the call to aov a little bit. Instead of with I use aov argument data. I find it that it makes the code more readable.

How do I reduce this data frame by groups?

I have the following
t <- structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Alice", "Bob",
"Jane Doe", "John Doe"), class = "factor"), school = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Alice School",
"Bob School", "Someother School", "Someschool College"), class = "factor"),
group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A", "B"), class = "factor"),
question = structure(c(2L, 4L, 6L, 8L, 1L, 3L, 5L, 7L, 2L,
4L, 6L, 8L, 1L, 3L, 5L, 7L, 2L, 4L, 6L, 8L, 1L, 3L, 5L, 7L,
2L, 4L, 6L, 8L, 1L, 3L, 5L, 7L), .Label = c("q1", "q2", "q3",
"q4", "q5", "q6", "q7", "q8"), class = "factor"), mark = c(0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L,
1L), subject = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("C", "M"), class = "factor")), .Names = c("name",
"school", "group", "question", "mark", "subject"), row.names = c(7L,
15L, 23L, 31L, 3L, 11L, 19L, 27L, 8L, 16L, 24L, 32L, 4L, 12L,
20L, 28L, 6L, 14L, 22L, 30L, 2L, 10L, 18L, 26L, 5L, 13L, 21L,
29L, 1L, 9L, 17L, 25L), class = "data.frame")
and I need to produce a data frame in which each student has one combined mark for each subject. The combination is simply a sum of the marks on each question. So, for example, Jane Doe will have 3 on subject C and 2 on subject M. I've been banging my head for long enough with Reduce and other approaches. I could possibly solve this in a very procedural way, but if I could do that with a one-liner (or close approximation), I'd be happier. I'm sure it can be done...
You said it in your question; you want to group_by student and subject and compute the sum
library(tidyverse)
asdf %>%
group_by(name, subject) %>%
summarise(score = sum(mark))
Here a data.table solution:
library(data.table)
setDT(t)[, sum(mark), by = list(name, subject)]
And just for completeness, base R:
aggregate(mark ~ name + subject, data=t, sum)
This says "aggregate the response variable mark by the grouping variables name and subject, using sum as the aggregation function".

format color and legend in ggplot geom_tile of p-values

I am currently trying to make a 'heat map' using ggplot2 to display a series of p-values, but can't figure out how to tailor the actual color assignments and legend.
sampledata.m <- melt(sampledata)
sampledata.m$var2 <- as.character(sampledata.m$var2)
sampledata.m$var2 <- factor(sampledata.m$var2, levels=unique(sampledata.m$var2),ordered=TRUE)
sampledata.m$var1 <- as.character(sampledata.m$var1)
sampledata.m$var1 <- factor(sampledata.m$var1, levels=unique(sampledata.m$var1),ordered=TRUE)
This was done so that I could maintain the order of my variables.
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradientn(colours=c("light green","dark green", "black"),
values=rescale(c(0,0.0003,0.05,0.5,1)),limits=c(0,1)))
p + theme_bw(base_size = base_size) + labs(x = "", y = "") +
scale_x_discrete(expand = c(0,0)) +
theme(legend.position = "bottom", axis.ticks = element_blank(),
axis.text.x = element_text(size = base_size * 0.8, angle = 310,
hjust = 0, colour = "black"))
This creates a nice looking plot, however my legend and my color gradient don't represent the rescale that I assigned. Forgive my ignorance if this is a simple fix, but I've only been coding R for about 2 weeks now. Ideally, I would love my plot and legend to mimic the color scheme and legend labeling similar to this paper: http://www.ncbi.nlm.nih.gov/pubmed/22496159
structure(list(var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("A", "B", "C",
"D", "E"), class = "factor"), var2 = structure(c(1L,
5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L,
8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L,
4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L,
20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L,
13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L,
2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L,
23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L,
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L), .Label = c("1", "2",
"3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21",
"22", "23", "24"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "pvalue", class = "factor"),
value = c(0.810172671, 0.596026338, 0.076550169, 0.908670635,
0.300418653, 0.051553286, 0.124196482, 0.601568833, 0.058431468,
0.341726981, 0.876674726, 0.002698295, 0.812059425, 0.068199656,
0.758383287, 0.60362134, 0.89265723, 0.246111936, 0.156348035,
0.909574522, 0.020202377, 0.388843992, 0.769441835, 0.102272916,
0.38895717, 0.882296525, 0.792438683, 0.000491393, 0.004233434,
0.202424095, 0.426941568, 0.08520186, 0.763036306, 0.602828564,
0.037278697, 0.121642743, 0.669123606, 0.974328438, 0.834329923,
0.050413697, 0.078476666, 0.387647156, 0.000540422, 0.379576632,
0.361428444, 0.502439758, 0.001326035, 0.027652693, 0.188885638,
0.579244445, 0.471985778, 0.677458228, 0.119307242, 0.364857868,
0.238260538, 0.53472206, 0.204344281, 0.291888993, 0.295809688,
0.00029, 0.005476157, 0.960975822, 0.00029, 0.055915429,
0.618284682, 0.040605253, 0.521649682, 0.421086546, 0.164333061,
0.755528982, 0.306854182, 0.012832628, 0.270393143, 0.946675764,
0.59227376, 0.112658388, 0.429091426, 0.01662083, 0.017342483,
0.065817234, 0.012140224, 0.359828816, 0.031969725, 0.00029,
0.14555102, 0.18865081, 0.00029, 0.064107531, 0.505257768,
0.070224536, 0.017082975, 0.375864198, 0.00029, 0.104103689,
0.898979883, 0.004879605, 0.003597954, 0.036722932, 0.849058218,
0.00029, 0.003739938, 0.00029, 0.00029, 0.00029, 0.008179017,
0.193870353, 0.460181712, 0.389475522, 0.00029, 0.8785017,
0.070414642, 0.584977921, 0.990764677, 0.767253318, 0.002234906,
0.051331823, 0.00446149, 0.234477639, 0.275139791)), .Names = c("var1", "var2", "variable", "value"), row.names = c(NA, -119L), class = "data.frame")
I'm not going to get into all of the theme settings you've got - as I understand it the key of your problem is the scale of the fill gradient. You can set this in scale_fill_gradient() with a log transformation:
p <- ggplot(sampledata.m, aes(var2, var1)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradient(trans = "log", low = "light green", high = "black",
breaks = c(0, 0.001, 0.05, 0.5))
dt <- data.frame(
N=letters[5:11],
a=c(0.01,0.05,0.1,0.5,1,5,10),
b=c(10,20,50,100,200,1000,2000))
dt.mlt <- melt(dt,variable.name="Cls",value.name="Val")
ggplot(dt.mlt,aes(x=N,y=Cls,fill=Val))+
geom_tile()+
scale_fill_gradient2(
low="green",high="red",mid="black",trans="log",breaks=c(0,0.01,0.1,1,10,100,1000))+
geom_text(data=dt.mlt,aes(x=N,y=Cls,label=Val))
But if I add the midpoint=10 to the scale_fill_gradient2, the picture will become:

Resources