Related
Why cant I run a Breusch-Pagan Test bptest() on a linear mixed effect model lmer() in order to test for heteroscedasticity? The bptest function works fine on models built with lm and glmer but not lmer. Is there a different function I should be using?
error message
Error: $ operator not defined for this S4 class
data <- structure(list(Mn_new = c(3.90508190744665, 3.41518826685297,
3.98107659173858, 4.06706444435455, 2.40431879320057, 3.8090250549363,
3.72177711209025, 2.93248691964847, 4.10035133820019, 4.20508065155943,
3.64103189844949, 4.24257964492719, 4.20182664641102, 3.41263061412322,
4.04144915900294, 4.28185091235415, 3.09415352803393, 3.67021392570071,
3.56418529613595, 3.21715355220772, 3.21429992539095, 3.54553486317315,
4.03025205893711, 2.97382166830262, 3.80757707518732, 3.78523559035143,
3.41487105608904, 2.75799799020337, 3.06834870580776, 3.30533869585591,
2.8380338262522, 2.65147541433061, 3.53356800468757, 2.51733199167976,
3.16115687664055, 3.64858366279116, 3.48272937241829, 2.91621249433787,
3.26028181088023, 3.49589461456199, 2.82832109354896, 3.40328200399306,
3.28568362736306, 2.87324453863543, 3.10651957200347, 2.81769064140214,
2.57165695575711, 2.97592292304521, 3.18174081921005, 3.54312301316704,
2.70447719350618, 3.48454089015539, 3.39666701335652, 3.03088932872189,
3.1057376517166, 2.91083893666025, 3.18752169045788, 3.04054322208808,
3.04284811683015, 3.53376439846743, 3.57155887085371, 2.67921235204479,
3.24539585432457, 3.32270430796322, 3.75933211625452, 3.30303225771367,
2.94140225772847, 3.22916966186489, 3.45512223500913, 2.89996056576201,
3.19536565883228, 2.49108662931588, 2.55337036896523, 2.98316003461686,
3.58241577241437, 3.40385600372579, 3.66136967423154, 3.71807222845311,
3.73004186004765, 4.10988004656572, 3.90759927253415, 2.86608298949975,
3.61450793458081, 3.85162032119424, 4.44992983828838, 3.19109366840847,
3.09329595776341, 3.69955310870145, 4.47202033690943, 3.61326633240611,
3.64532602062922, 3.33230174866167, 2.74653680127074, 3.61473897523957
), SEX = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), S_M = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AFTER",
"BEFORE"), class = "factor"), ID = structure(c(43L, 40L, 25L,
17L, 1L, 20L, 4L, 13L, 45L, 32L, 28L, 5L, 14L, 21L, 44L, 9L,
16L, 42L, 18L, 35L, 22L, 10L, 8L, 36L, 37L, 15L, 19L, 43L, 40L,
25L, 17L, 1L, 20L, 4L, 13L, 45L, 32L, 28L, 5L, 14L, 21L, 44L,
9L, 16L, 42L, 18L, 35L, 22L, 10L, 8L, 36L, 37L, 15L, 19L, 47L,
46L, 34L, 38L, 29L, 41L, 33L, 26L, 23L, 27L, 24L, 11L, 7L, 3L,
6L, 12L, 30L, 39L, 2L, 31L, 47L, 46L, 34L, 38L, 29L, 41L, 33L,
26L, 23L, 27L, 24L, 11L, 7L, 3L, 6L, 12L, 30L, 39L, 2L, 31L), .Label = c("BLA1",
"BLA10", "BLA14", "BLA16", "BLA17", "BLA2", "BLA20", "BLA202",
"BLA203", "BLA205", "BLA21", "BLA211", "BLA213", "BLA214", "BLA215",
"BLA216", "BLA217", "BLA219", "BLA221", "BLA224", "BLA228", "BLA23",
"BLA238", "BLA24", "BLA248", "BLA25", "BLA27", "BLA270", "BLA283",
"BLA294", "BLA296", "BLA300", "BLA307", "BLA31", "BLA33", "BLA36",
"BLA38", "BLA42", "BLA47", "BLA48", "BLA5", "BLA53", "BLA60",
"BLA61", "BLA74", "BLA79", "BLA80"), class = "factor")), class = "data.frame", row.names = c(NA,
-94L))
code for lmer
#Mg
Mg_model <- lmer(Mg_new ~ SEX * S_M + (1|ID), data=data)
summary(Mg_model)
library(lmtest)
bptest(Mg_model)
error
Error: $ operator not defined for this S4 class
The Breusch-Pagan test "fits a linear regression model to the residuals of a linear regression model ... By default the same explanatory variables are taken as in the main regression model".
The version in base R "works" for lm and glm models, but I wouldn't trust it for glm models — as far as I know the test doesn't apply, it's just that the generic functions it uses also work for glm objects. (Contrary to your question, it throws an error for glmer fits - maybe you meant to say glm?)
I don't know offhand if the B-P test has been extended to cover the LMM case. If you had continuous predictors it would be tricky, but as you only have factors you can use a Levene's test as in this answer:
library(lme4)
library(broom.mixed)
library(ggplot2)
Mn_model <- lmer(Mn_new ~ SEX * S_M + (1|ID), data=data)
aa <- augment(Mn_model, .data = data)
ggplot(aa, aes(x = interaction(S_M,SEX), y = .resid)) + geom_boxplot()
car::leveneTest(.resid ~ S_M*SEX, data = aa)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.271 0.08566 .
## 90
I have the following data set:
structure(list(Age = c(83L, 26L, 26L, 20L, 20L, 77L, 32L, 21L,
15L, 75L, 27L, 81L, 81L, 15L, 24L, 16L, 35L, 27L, 30L, 31L, 24L,
24L, 31L, 79L, 30L, 19L, 20L, 42L, 62L, 83L, 79L, 18L, 26L, 66L,
23L, 83L, 77L, 80L, 57L, 42L, 32L, 76L, 85L, 29L, 65L, 79L, 9L,
34L, 20L, 16L, 34L, 22L, 19L, 23L, 25L, 14L, 53L, 28L, 79L, 22L,
22L, 21L, 82L, 81L, 16L, 19L, 77L, 15L, 18L, 15L, 78L, 24L, 16L,
14L, 29L, 18L, 50L, 17L, 43L, 8L, 14L, 85L, 31L, 20L, 30L, 23L,
78L, 29L, 6L, 61L, 14L, 22L, 10L, 83L, 15L, 13L, 15L, 15L, 29L,
8L, 9L, 15L, 8L, 9L, 15L, 9L, 34L, 8L, 9L, 9L, 16L, 8L, 25L,
21L, 23L, 13L, 56L, 10L, 7L, 27L, 8L, 8L, 8L, 8L, 80L, 80L, 6L,
15L, 42L, 25L, 23L, 21L, 8L, 11L, 43L, 69L, 34L, 34L, 14L, 12L,
10L, 22L, 78L, 16L, 76L, 12L, 10L, 16L, 6L, 13L, 66L, 11L, 26L,
12L, 16L, 13L, 24L, 76L, 10L, 65L, 20L, 13L, 25L, 14L, 12L, 15L,
43L, 51L, 27L, 15L, 24L, 34L, 63L, 17L, 15L, 9L, 12L, 17L, 82L,
75L, 24L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L,
84L, 54L, 23L, 23L, 14L, 81L, 17L, 42L, 44L, 16L, 15L, 43L, 45L,
50L, 53L, 23L, 53L, 49L, 13L, 69L, 14L, 65L, 14L, 13L, 22L, 67L,
59L, 52L, 54L, 44L, 78L, 62L, 69L, 10L, 63L, 57L, 22L, 12L, 62L,
9L, 82L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 77L, 49L,
61L, 62L, 57L, 67L, 16L, 65L, 75L, 45L, 16L, 55L, 17L, 64L, 67L,
56L, 52L, 63L, 10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L,
69L, 21L, 67L, 34L, 52L, 15L, 31L, 64L, 55L, 13L, 48L, 71L, 64L,
13L, 25L, 34L, 50L, 61L, 70L, 33L, 57L, 51L, 46L, 57L, 69L, 46L,
8L, 11L, 46L, 71L, 33L, 38L, 56L, 17L, 29L, 28L, 6L, 8L), Sex = structure(c(1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
mean_AD_scaled = c(3.15891332561581, -0.0551328105526693,
0.582747640515478, 1.94179165777054, 1.7064645993306, 2.37250948563045,
1.015775832203, 1.36189033704266, -1.05640048650493, 0.184814975542474,
-0.143366705302007, 1.81560178585347, 2.06325078470728, -0.473088628698217,
0.414641167726219, 0.199887349084444, -0.60620959209809,
-0.17879228399189, -1.03483709078065, -1.43497010225613,
-0.958595084469815, 1.0203965598582, -1.44731404613503, -1.17191867788498,
-2.02547709312595, -1.22395687266857, -1.09952727795348,
-1.0830246791849, 1.21072653232248, 1.69997357714829, 1.53648783201423,
0.208688735094353, 0.0862394522314924, 1.08662698958276,
-0.731299290763917, 2.29307697689102, -0.660008064083659,
-1.21425334459264, 1.10191939777498, -2.0957781638801, -1.14947514355972,
0.248845058764562, 2.6526135953958, 0.197907037232212, -0.222469162066061,
1.92880961340592, 1.23328008397287, -1.17288683034607, -0.308282675662673,
-1.02603570477074, -1.32647101621898, -1.58316343919798,
-0.0440210607151585, -0.388375288352846, -0.935491446193807,
-0.63789458173376, 0.454577456746182, -1.77391147749773,
0.709267564407921, 0.125735671950958, -0.821073428064989,
-0.126534054558056, 0.519597695894384, 0.188005477971066,
0.212319306823438, -1.45807374053215, 1.5856655763446, -1.25641198358011,
-0.910847565366061, -1.1191763722206, 0.25300371365424, -0.750772357310844,
0.37932560636146, -0.871791414947088, -1.92771569802088,
-1.1752191976387, 0.210449012296334, -0.347778895382139,
-0.132254955464496, 0.953616043508016, -0.0862677135627232,
0.838977990728951, -1.8993092246739, -0.0254281327692267,
0.298022803094927, -1.21559555595915, 0.0134079829994995,
-0.763094297724715, 0.334768589686298, -1.12568939786794,
-2.11786964276497, -0.0434709740895377, 0.388237009696492,
1.30050066962355, -0.260645173884043, -0.60620959209809,
1.05945271027717, -0.275717547426008, -0.0238878902174922,
0.496604074943496, 0.534009965485611, -0.692903244295693,
-0.566933407028871, 0.125625654625835, -0.518305749324122,
1.79381835547894, -0.790708646330802, -0.227860010997131,
0.347420582075538, 0.784189362817269, -0.660118081408782,
1.29962053102256, -0.561652575422924, -0.710395998990384,
-1.29315777017148, -0.457356151205503, -1.01756437073621,
0.146528946399368, -1.07136284272178, -1.42968927065019,
0.798601632408495, -0.799730066990963, -0.431348055546223,
0.569545561500617, 2.32168148142323, 0.472070211440872, 1.65145593676866,
-0.814142336582189, -0.544489872703603, -0.315433801795725,
0.382626126115175, -0.623812364117908, 0.216279930527897,
-0.606099574772967, -0.367207954999011, 0.719829227619811,
-0.749122097433987, 0.934693063586709, -0.79026857703031,
-0.371872689584264, 0.0769979969210905, -0.793899148759394,
1.50414273842782, 0.730280873506577, -0.290569886317732,
0.303743704001367, 0.390877425499463, -1.00359217044547,
-0.534918365417827, 0.325967203676389, 0.129036191704673,
0.34434009697207, -0.141386393449775, -0.363401355549725,
-0.395416397160769, -0.0235578382421178, -1.13583299524436,
1.16781977552417, -1.31890182425046, 0.139377820266317, 0.0160483988024708,
0.481311666751279, -1.05475022662807, 0.839858129329941,
0.652498624644007, -0.350199276534864, -0.262075399110649,
0.178543988010412, -1.13198238886502, -0.05117218684821,
-1.29678834190056, 0.429603523943066, 1.05098137624263, -0.956504755292464,
0.502765045150433, -0.81678275238516, -1.50263075720731,
-0.826684311646306, 2.40100397283753, 2.06633126981075, -0.470558230220369,
0.484942238480364, 0.822035322659877, 0.143888530596397,
0.384056351341786, -0.63580425255641, 0.358422314587926,
-0.372422776209885, 0.0607154328027556, -0.113221958218067,
1.02710761669075, -0.349649189909243, 2.27195365046724, -0.507634068787109,
-0.326105482332738, -1.0396778530861, 1.06484355920824, 1.32151397872221,
-0.185173288849074, -0.651888785489516, -0.171311105883464,
-0.104200537557911, -0.693673365571561, -1.26609350819101,
0.411230630647381, -0.929770545287362, -0.481009876107135,
0.386146680519137, 0.0482834750637615, -0.198265350538812,
0.790020281048832, 0.926001694901924, -1.08918564939184,
0.50298507980068, -0.0694350628187722, 1.04966116834114,
0.00878725534429612, 1.48742010500899, 0.750194009353997,
0.423772605711498, -0.596418050162068, -0.652636903300361,
-0.308942779613417, 0.314437388003408, 0.679562886624478,
-1.24312189070515, -0.432712270377761, 0.00427654501421597,
-0.197935298563442, 0.228821905592019, 1.06957430418856,
-1.61612462980509, 1.9499329398297, -0.263285589687014, 0.156430505660519,
-0.322254875953402, -0.451085163673446, -0.35526007349056,
0.10780284795577, 0.408700232169533, -0.957604928543701,
-1.05662052115517, 1.00345389178912, -0.238751726184391,
0.300003114947154, -0.397946795638617, -0.0802167606809086,
0.943714484246865, 1.10973062785877, 1.76279346979401, 1.62087112038423,
0.25533608094687, 0.226841593739787, 0.869672824438507, -1.44960240649761,
-0.450315042397579, -0.199629565370345, 0.29813282042005,
0.760425620590513, 1.87391096816911, -0.454275666102039,
-0.0559029318285365, -0.343048150401812, -1.01371376435687,
0.68880434193488, -0.29222014619459, 1.16132875334186, -1.95715633422403,
-0.534368278792206, -0.560112332871189, 1.84508642898666,
-1.19150176175703, -0.772203732244971, -0.3443683583033,
-1.45684154649076, -0.633823940704178, -1.77454957798344,
0.279539892474118, -0.875532004001301, 1.26001429397797,
-0.536590628759707, 2.1869102581465, 0.211109116247078, 0.130246382281038,
-0.355810160116181, -0.898085555651692, -0.429741802599415,
1.13360438741065, 1.61338994227581, 0.588688576072169, 0.454137387445685,
0.747113524250528, 0.460848444278238, -0.38177424884541,
-0.169990897981981, -0.747361820232001, -0.760123829946369,
0.208028631143609, -1.28748087619509, 2.33950428809329, -0.973029357526068,
-1.06091119683501, 0.917530360867389, -0.35041931118511,
-1.90613029883158, -1.15057531681095, 0.65348878057012, 0.43147381847017
)), row.names = c(NA, -308L), class = c("tbl_df", "tbl",
"data.frame"))
I am using this gam model:
m1 <- gam(mean_AD_scaled ~ s(Age, bs = 'ad', k = -1) + Sex + ti(Age, by = Sex, bs ='fs'),
data = DF,
method = 'REML',
family = gaussian)
Output:
Family: gaussian
Link function: identity
Formula:
mean_AD_scaled ~ s(Age, bs = "ad", k = -1) + Sex + ti(Age,
by = Sex, bs = "fs")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.04691 0.06976 0.672 0.502
SexFemale -0.12950 0.09428 -1.374 0.171
Approximate significance of smooth terms:
edf Ref.df F p-value
s(Age) 2.980 3.959 8.72 2.24e-06 ***
ti(Age):SexMale 2.391 2.873 23.47 < 2e-16 ***
ti(Age):SexFemale 1.000 1.000 43.40 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Rank: 48/49
R-sq.(adj) = 0.34 Deviance explained = 35.6%
-REML = 375.4 Scale est. = 0.63867 n = 308
But when I use gtsummary, I get a repeated value for each gender 'interaction':
tbl_regression(m1, tidy_fun = tidy_gam)
I see the following in a publication, which I am trying to replicate with gender and age:
I am not sure how to fix this. My goal is to print a table for a manuscript so any other gam-related information that can be added like edf and R^2.
I think you've found a bug in the handling of these types of interactions. While we work on a fix to the bug, this code should get you what you need. Thanks
library(gtsummary)
#> #BlackLivesMatter
library(mgcv)
packageVersion("gtsummary")
#> [1] ‘1.5.2’
m1 <- gam(marker ~ s(age, bs = 'ad', k = -1) + grade + ti(age, by = grade, bs ='fs'),
data = gtsummary::trial,
method = 'REML',
family = gaussian)
tbl_regression(m1, tidy_fun = gtsummary::tidy_gam) %>%
modify_table_body(
~ .x %>%
dplyr::select(-n_obs) %>%
dplyr::distinct()
) %>%
as_kable() # convert to kable to display on SO
Characteristic
Beta
95% CI
p-value
Grade
I
—
—
II
-0.39
-0.70, -0.08
0.014
III
-0.13
-0.43, 0.18
0.4
s(age)
>0.9
ti(age):gradeI
0.6
ti(age):gradeII
>0.9
ti(age):gradeIII
0.6
Created on 2022-02-21 by the reprex package (v2.0.1)
Suppose I have this dataset:
set.seed (1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),
c=rep(c("d","e","f"),20)) %>% head()
Then I want to add many columns (in this example I only added two), to identify distinct cases between each group (in this case, column "a").
set.seed(1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),c=rep(c("d","e","f"),20)) %>%
group_by(a) %>% dplyr::mutate_at(vars(c(b,c)), .funs= list(dups_hash_ing= ~n_distinct(.)))
This code leaves the following dataset:
If I set the dataset with dput, the outcome is
structure(list(a = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("no", "si"), class = "factor"), b = structure(c(22L,
1L, 51L, 34L, 50L, 57L, 53L, 10L, 47L, 3L, 11L, 23L, 15L, 38L,
58L, 39L, 41L, 17L, 28L, 21L, 37L, 45L, 29L, 46L, 32L, 48L, 56L,
52L, 26L, 19L, 35L, 8L, 55L, 20L, 9L, 36L, 2L, 12L, 6L, 42L,
49L, 43L, 59L, 54L, 31L, 13L, 60L, 44L, 14L, 30L, 7L, 5L, 16L,
27L, 33L, 18L, 24L, 4L, 25L, 40L), .Label = c("-0.0997905884418961",
"-0.151736536534977", "-0.198416273822079", "-0.254874652654534",
"-0.274704218225806", "-0.304721068966714", "-0.324393300483657",
"-0.400235237343163", "-0.415751788401515", "-0.50873701541522",
"-0.538070788884863", "-0.60615111526422", "-0.659770093821306",
"-0.684320344136007", "-0.789646852263761", "-0.933503340589868",
"-0.965903210133575", "-1.07754212275943", "-1.11444896479736",
"-1.60708093984972", "-2.07823754188738", "-2.7322195229558",
"-2.85575865501923", "-3.23315213292314", "0.0295178303214797",
"0.0326639575014441", "0.116845344986082", "0.162654708118265",
"0.185513915583057", "0.186492083080971", "0.287709728313787",
"0.311681028661359", "0.319160238648117", "0.413868915451097",
"0.418057822385083", "0.42200837321742", "0.485226820569252",
"0.487814635163685", "0.500694614280786", "0.594273774110513",
"0.62021020366732", "0.629536099884472", "0.660212631820405",
"0.677415500438328", "0.696768778564913", "0.700733515544461",
"0.704180178465512", "0.760462361967838", "0.895171980275539",
"0.912322161610113", "0.976031734922396", "1.1123628412626",
"1.16910851401363", "1.17349757263239", "1.49349310261748", "1.84246362620766",
"1.98373220068438", "2.16803253951933", "2.27348352044748", "2.91914013071762"
), class = "factor"), c = structure(c(1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("d", "e", "f"), class = "factor"),
a_dups_hash_ing = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), b_dups_hash_ing = c(30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), c_dups_hash_ing = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), groups = structure(list(
a = structure(1:2, .Label = c("no", "si"), class = "factor"),
.rows = list(c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L,
22L, 24L, 26L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L,
46L, 48L, 50L, 52L, 54L, 56L, 58L, 60L), c(1L, 3L, 5L, 7L,
9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 29L, 31L,
33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L, 55L,
57L, 59L))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
What I need to do, is replace, column by column, if the number of distinct cases is more than one per group, with the value of the original column. I have to do this for more than 50 columns. An example of this will be provided for only one column with mutate:
dplyr::mutate(b_dups_hash_ing= ifelse(>1,b,0))
I need to repeat the code provided above for many variables. This is very similar to a mutate_at (words in brackets is what I would do). The following example does not work, but is something I would do in an ideal world, just for your better understanding of my problem.
dplyr::mutate_at(vars(contains('_dups_hash_ing')), .funs = list(~ifelse(.>1,vars([original]),0)))
Is this what you're looking for?
df %>% dplyr::mutate_at(vars(contains('_dups_hash_ing')), ~ ifelse(. > 1, ., 0)) %>% head
#> # A tibble: 6 x 6
#> # Groups: a [2]
#> a b c a_dups_hash_ing b_dups_hash_ing c_dups_hash_ing
#> <fct> <fct> <fct> <dbl> <int> <int>
#> 1 si -2.7322195229558 d 0 30 3
#> 2 no -0.09979058844189… e 0 30 3
#> 3 si 0.976031734922396 f 0 30 3
#> 4 no 0.413868915451097 d 0 30 3
#> 5 si 0.912322161610113 e 0 30 3
#> 6 no 1.98373220068438 f 0 30 3
I am generating multiple experimental designs of different sizes and shapes. This is done using a function dependent on the agricolae package (I’ve included it below). To generate practical data sheets for field operations I need to order the data frame by Row, then for odd Rows sort the Range ascending and for even Rows sort it descending.
Using sort, order, rep and seq I have been able to find a simple solution to this. Any suggestions are greatly appreciated!
So the data frame will go from something like this:
df1 <- structure(list(Block = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), Range = c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), Row = c(1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L
), Plot = c(101L, 201L, 301L, 401L, 102L, 202L, 302L, 402L, 103L,
203L, 303L, 403L, 104L, 204L, 304L, 404L, 105L, 205L, 305L, 405L,
106L, 206L, 306L, 406L, 107L, 207L, 307L, 407L, 108L, 208L, 308L,
408L, 109L, 209L, 309L, 409L, 110L, 210L, 310L, 410L, 111L, 211L,
311L, 411L, 112L, 212L, 312L, 412L), Entry.Num = c(14L, 26L,
18L, 4L, 52L, 17L, 41L, 47L, 40L, 30L, 21L, 12L, 9L, 2L, 8L,
36L, 25L, 43L, 15L, 6L, 33L, 48L, 54L, 37L, 9L, 18L, 8L, 41L,
48L, 28L, 7L, 47L, 54L, 38L, 46L, 23L, 19L, 1L, 3L, 27L, 36L,
14L, 12L, 33L, 16L, 24L, 31L, 2L)), .Names = c("Block", "Range",
"Row", "Plot", "Entry.Num"), class = "data.frame", row.names = c(NA,
-48L))
To something like this:
df2 <- structure(list(Block = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), Range = c(1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L), Row = c(1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L
), Plot = c(101L, 201L, 301L, 401L, 402L, 302L, 202L, 102L, 103L,
203L, 303L, 403L, 404L, 304L, 204L, 104L, 105L, 205L, 305L, 405L,
406L, 306L, 206L, 106L, 107L, 207L, 307L, 407L, 408L, 308L, 208L,
108L, 109L, 209L, 309L, 409L, 410L, 310L, 210L, 110L, 111L, 211L,
311L, 411L, 412L, 312L, 212L, 112L), Entry.Num = c(14L, 26L,
18L, 4L, 47L, 41L, 17L, 52L, 40L, 30L, 21L, 12L, 36L, 8L, 2L,
9L, 25L, 43L, 15L, 6L, 37L, 54L, 48L, 33L, 9L, 18L, 8L, 41L,
47L, 7L, 28L, 48L, 54L, 38L, 46L, 23L, 27L, 3L, 1L, 19L, 36L,
14L, 12L, 33L, 2L, 31L, 24L, 16L)), .Names = c("Block", "Range",
"Row", "Plot", "Entry.Num"), class = "data.frame", row.names = c(NA,
-48L))
In case you're interested, this is the trial design function. There is undoubtedly a more elegant way to do this but I am not particularly good at R:
Trial.Design <- function(Total.Entries, Rows.per.Block, Ranges.per.Block, Trial.Name){
library(agricolae)
library(reshape2)
#########################################################################################
# Generate a trial design #
#########################################################################################
total.trt <- Total.Entries
if(total.trt%%2) # If the variety number is uneven it will return the following error message
stop("WARNING: Variety number is uneven! Subsequent script will not work correctly!")
blocks <- 4 # This is fixed, we are unlikely to use a different block number in any trial.
trt<-c(1:total.trt) # You could in theory have the variety names here.
# This function from agricolae generates a statistically sound trial design.
outdesign <-design.rcbd(trt, blocks, serie=0,continue=TRUE,986,"Wichmann-Hill") # seed for ranomization = 986
# This uses an agricolae function to print the "field book" of the trial.
book <-outdesign$book # field book
#########################################################################################
# Generate blocking in two directions #
#########################################################################################
# The following generates an appropriately blocked map. The idea is block in two directions.
# We use this design so that the blocking structure captures field trends both down and across the field.
Block.Rows <- Rows.per.Block
Block.Ranges <- Ranges.per.Block
ifelse(total.trt==Block.Rows*Block.Ranges, "Entry number is okay",
stop("WARNING: Block is uneven and/or does not equal entry number! Subsequent script will not work correctly!"))
Block <- matrix(rep(1, times=total.trt))
Range <- matrix(rep(1:Block.Rows, times=Block.Ranges))
Row <- matrix(rep(1:Block.Ranges, each=Block.Rows))
Block.1 <- cbind(Block, Range)
Block.1 <- cbind(Block.1, Row)
Block <- matrix(rep(3, times=total.trt))
Range <- matrix(rep((Block.Rows+1):(Block.Rows*2), times=Block.Ranges))
Row <- matrix(rep(1:Block.Ranges, each=Block.Rows))
Block.3 <- cbind(Block, Range)
Block.3 <- cbind(Block.3, Row)
Block <- matrix(rep(2, times=total.trt))
Range <- matrix(rep(1:Block.Rows, times=Block.Ranges))
Row <- matrix(rep((Block.Ranges+1):(Block.Ranges*2), each=Block.Rows))
Block.2 <- cbind(Block, Range)
Block.2 <- cbind(Block.2, Row)
Block <- matrix(rep(4, times=total.trt))
Range <- matrix(rep((Block.Rows+1):(Block.Rows*2), times=Block.Ranges))
Row <- matrix(rep((Block.Ranges+1):(Block.Ranges*2), each=Block.Rows))
Block.4 <- cbind(Block, Range)
Block.4 <- cbind(Block.4, Row)
# The following adds the coordinates generated above to our field book.
Field.book <- rbind(Block.1, Block.2)
Field.book <- rbind(Field.book, Block.3)
Field.book <- rbind(Field.book, Block.4)
Plots <- as.matrix(rep(1:(total.trt*4)))
Field.book <- cbind(Plots, Field.book)
# Generate temporary Range names.
colnames(Field.book) <- c("plots", "block", "range", "row")
Field.book <- as.data.frame(Field.book)
Field.book$range <- as.numeric(Field.book$range)
Field.book$row <- as.numeric(Field.book$row)
# This joins the experimental design generated by agricolae to the plot layout generated above.
Field.book <- join(Field.book, book, by= c("plots","block"))
# Generate better Range names.
colnames(Field.book) <- c("Plot.Num", "Block", "Range", "Row", "Entry.Num")
# Create Plot coordinates.
Field.book$Plot <- (Field.book$Range * 100) + Field.book$Row
# Reorders the Ranges to something more intuitive.
# I drop the 'plot number' Range generated by agricolae because I don't think it is useful or necessary in our case.
Field.book <- Field.book[c("Block", "Range", "Row", "Plot", "Entry.Num")]
# Sort the plots by Range and Row.
Field.book <- Field.book[order(Field.book$Range, Field.book$Row),]
Field.book <<- Field.book
# Convert the Ranges to factors to allow for conversion to a 'wide' format.
Field.book$Block <- as.factor(Field.book$Block)
Field.book$Range <- as.factor(Field.book$Range)
Field.book$Row <- as.factor(Field.book$Row)
Field.book$Plot <- as.factor(Field.book$Plot)
#########################################################################################
# Generate plot maps #
#########################################################################################
# This function rotates the design if it's deemed necessary.
# rotate <- function(x) t(apply(x, 2, rev))
Field.design.num <- dcast(Field.book, Row ~ Range, value.var = "Entry.Num")
Field.design.num$Row <- as.numeric(Field.design.num$Row)
Field.design.num <- Field.design.num[order(-Field.design.num$Row),]
Field.book$Plot <- as.factor(Field.book$Plot)
colnames(Field.design.num)[2:ncol(Field.design.num)] <- paste("Row", colnames(Field.design.num[,c(2:ncol(Field.design.num))]), sep = "-")
Field.design.num$Row <- sub("^", "Range-", Field.design.num$Row)
#rotate(Field.design.num)
Field.design.num <<- Field.design.num
Field.design.plot <- dcast(Field.book, Row ~ Range, value.var = "Plot")
Field.design.plot$Row <- as.numeric(Field.design.plot$Row)
Field.design.plot <- Field.design.plot[order(-Field.design.plot$Row),]
Field.book$Plot <- as.factor(Field.book$Plot)
colnames(Field.design.plot)[2:ncol(Field.design.plot)] <- paste("Row", colnames(Field.design.plot[,c(2:ncol(Field.design.plot))]), sep = "-")
Field.design.plot$Row <- sub("^", "Range-", Field.design.plot$Row)
#rotate(Field.design.plot)
Field.design.plot <<- Field.design.plot
Field.design.Block <- dcast(Field.book, Row ~ Range, value.var = "Block")
Field.design.Block$Row <- as.numeric(Field.design.Block$Row)
Field.design.Block <- Field.design.Block[order(-Field.design.Block$Row),]
Field.book$Block <- as.factor(Field.book$Block)
colnames(Field.design.Block)[2:ncol(Field.design.Block)] <- paste("Row", colnames(Field.design.Block[,c(2:ncol(Field.design.Block))]), sep = "-")
Field.design.Block$Row <- sub("^", "Range-", Field.design.Block$Row)
#rotate(Field.design.Block)
Field.design.Block <<- Field.design.Block
#########################################################################################
# Write the files #
#########################################################################################
write.csv(Field.book, paste("Field Book",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.num, paste("Field map Entry",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.plot, paste("Field map Plots",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.Block, paste("Field map Blocks",Trial.Name,".csv"), row.names=FALSE)
#########################################################################################
}
# The parameters are:
# The total number of entires/varieties in a replicate (NOTE: The number of entries must be an even number).
# The number of rows in an individual block/replicate.
# The number of ranges in an individual block/replicate.
# (NOTE: The number of rows and ranges must multiply to give the number of entries.)
# The trial name is what will be written to your working directory.
Total.Entries = 54
Rows.per.Block = 9
Ranges.per.Block = 6
Trial.Name = "Example"
Trial.Design (Total.Entries, Rows.per.Block, Ranges.per.Block, Trial.Name)
The magic of order awaits you:
df1[order(df1$Row, c(-1,1)[df1$Row %% 2 + 1] * df1$Range ),]
Essentially what this does is order by Row, then by Range, multiplied by -1 if it is even. x %% 2 can be used to check for odd/even status.
all.equal(
df1[order(df1$Row, c(-1,1)[df1$Row %% 2 + 1] * df1$Range ),],
df2,
check.attributes=FALSE
)
#[1] TRUE
I have aggregated retail weekly data with seasonal periods of 52.2 (a 53rd week every five years). I want to use this aggregated data to calculate a seasonal index that can be applied to each item within the category to derive its de-seasonalised demand.
Using stl, I would calculate the seasonal index as "seasonal" / "trend" + 1 (normalised to 52). I switched to tbats because my seasonality was not an integer and I have multiple seasonal periods (52.2 and 261)
I am using tbats with seasonal.periods = 52.2 and extract the components using tbats.components. The components are "observed", "level" and "season". Google has not revealed much in terms of what these components are and how to consume them. I also extracted the residuals
I noticed that "observed" is the log of my data. I also notice that season is changing over time (which is exactly what I want)
My questions are:
1.Is "season" a natural log too?
2.How can I extract the future "season" values? I can run a forecast on the data so I am assuming that there must be a projected "season"
3. What would be the best approach to calculating an "index" considering that it will be divided into the granular data. I am currently using: exp("season") / centered moving average(exp("season"))
My Data:
weeklyu <-structure(list(V1 = c(8L, 5L, 7L, 3L, 1L, 2L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 8L, 2L,
4L, 8L, 6L, 7L, 8L, 9L, 15L, 15L, 13L, 9L, 16L, 19L, 16L, 16L,
10L, 31L, 45L, 90L, 185L, 34L, 8L, 19L, 11L, 19L, 21L, 8L, 5L,
7L, 6L, 3L, 10L, 2L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 16L, 22L, 18L, 23L, 11L, 5L, 8L, 21L, 18L, 11L, 26L,
28L, 9L, 3L, 6L, 3L, 6L, 1L, 5L, 3L, 3L, 2L, 1L, 4L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
19L, 11L, 17L, 23L, 50L, 52L, 23L, 18L, 22L, 44L, 37L, 22L, 30L,
32L, 47L, 34L, 30L, 26L, 25L, 44L, 87L, 65L, 30L, 17L, 12L, 2L,
16L, 14L, 17L, 6L, 7L, 3L, 6L, 7L, 8L, 11L, 12L, 4L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L)), .Names = "V1", class = "data.frame", row.names = c(NA,
-188L))
My Code:
wklytbat <- tbats(msts(weeklyu, seasonal.periods = 52.2, ts.frequency=52.2), use.parallel=FALSE)
extract season:
seasu <-data.table(exp(as.numeric(tbats.components(wklytbat)[,'season'])))