My dataset looks like this:
> head(GLM_df)
hour Feeding Foraging Standing ID Area Feeding_Foraging
1 0 0.119 0.789 0.0339 41361 Seronera 0.908
2 1 0.0920 0.819 0.0339 41361 Seronera 0.911
3 2 0.0847 0.824 0.0678 41361 Seronera 0.909
4 3 0.233 0.632 0.132 41361 Seronera 0.866
5 4 0.254 0.597 0.124 41361 Seronera 0.852
6 5 0.245 0.664 0.0832 41361 Seronera 0.909
And I'm trying to run a glmer() model as such to verify an interaction, the error associated is found below:
> m <- glmer(cbind(Feeding_Foraging,Standing) ~ poly(hour,2)*Area+(1|ID) , data=GLM_df , family=binomial)
Error in length(value <- as.numeric(value)) == 1L :
(maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate
In addition: Warning message:
In eval(family$initialize, rho) : non-integer counts in a binomial glm!
I apologize if I'm not asking on the right forum, but does somebody know what is the cause of this error? I've been using this dataset to run other glmer() models not having such issue, so I hope somebody can help me.
I can provide a dput() sample of the data below:
> dput(GLM_df)
structure(list(hour = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L), Feeding = c(0.118579234700529,
0.0919594065024507, 0.0846994533575204, 0.233092895639896, 0.254098360072561,
0.244523639258233, 0.238513660654777, 0.245289616923379, 0.211748633393801,
0.253514225911475, 0.275555554923133, 0.222477230819087, 0.232641165221989,
0.238368461591879, 0.30265937999754, 0.433661201190504, 0.178745053292422,
0.12125395428024, 0.10605844594333, 0.163238946470857, 0.174611180767811,
0.22483854891269, 0.177868852050793, 0.183918813004901, 0.241998438164344,
0.161698956409812, 0.158105646267371, 0.36138433432542, 0.468670308578279,
0.333151183206247, 0.32072859671381, 0.301413227120555, 0.295571885509692,
0.313952640445209, 0.343315117609149, 0.309435336266141, 0.345573769698683,
0.307176684176607, 0.322987248803344, 0.303788706042306, 0.266520946564997,
0.179710144515087, 0.151781420416677, 0.272293057460473, 0.384777516681307,
0.358157688483229, 0.370418942683556, 0.295571885509692, 0.194038747691774,
0.0980730512560762, 0.104719324151116, 0.287394007254483, 0.360255008280653,
0.356867030146353, 0.303788706042306, 0.297908422154037, 0.295883423728938,
0.309435336266141, 0.335409835295781, 0.294754097684171, 0.329763205071946,
0.311693988355675, 0.252969034027794, 0.320554854245385, 0.269908924699298,
0.114670029160951, 0.145400728263743, 0.208925318281884, 0.252065573191981,
0.343637782193368, 0.234552332374672, 0.25071038193826, 0.139938227286338,
0.127049180036281, 0.0779234970889187, 0.271038250744065, 0.37923497180722,
0.365027321566604, 0.313661201465914, 0.342076501947147, 0.292896174191167,
0.283060108639971, 0.271038250744065, 0.238251365573412, 0.196721311023918,
0.191256830162143, 0.16601092858074, 0.0626775954845651, 0.134426229199678,
0.105704917790185, 0.11195058182907, 0.140192198660723, 0.14806719253611,
0.21262483463543, 0.226733921295516, 0.21891551021636, 0.120612021581109,
0.140939890386914, 0.0931693986932724, 0.2142076497816, 0.228415300022216,
0.194244079699913, 0.181821493207477, 0.186922931547631, 0.153588342088304,
0.15187488188245, 0.135519125372033, 0.171657558804575, 0.144302772386887,
0.113322027250751, 0.0931693986932724, 0.0657666343717217, 0.126775955993192,
0.0912147959234835, 0.0966201171633936, 0.143219075677262, 0.127049180036281,
0.145683059774935, 0.171657558804575, 0.140731399424803, 0.238570126957016,
0.109339294334254, 0.14013909555517, 0.190856101565613, 0.175240248325904,
0.217486338298665, 0.251366119641673, 0.295081966535877, 0.278688523950551,
0.268852458399355, 0.349726775153633, 0.328961747878886, 0.351912567498343,
0.284153004812326, 0.220218578729553, 0.179437360446302, 0.283460837236502,
0.156693988711413, 0.114187411193102, 0.207187893597627, 0.198761383878981,
0.22134790477432, 0.199890709923748, 0.218466176246294), Foraging = c(0.78939890529209,
0.81876138245603, 0.824408012679865, 0.632422585069486, 0.59741347768171,
0.66404371432296, 0.599672129771244, 0.632422585069486, 0.629034606935185,
0.575956282831139, 0.525136610816626, 0.588378869323575, 0.577085608875906,
0.574826956786372, 0.482222221115483, 0.336377829048438, 0.677595626860163,
0.811985426187429, 0.797304187605459, 0.744225863501412, 0.727285972829908,
0.702440799845036, 0.721639342606074, 0.744225863501412, 0.593480307663729,
0.692276865442133, 0.705828777979336, 0.29136611954987, 0.178520386307389,
0.320647930567756, 0.343470886718772, 0.422913132626516, 0.393706424572198,
0.350480496651808, 0.350091073877751, 0.339966081752254, 0.289107467460336,
0.294403617187519, 0.226644054501503, 0.185602280400827, 0.465282330443979,
0.671948996636328, 0.677595626860163, 0.525136610816626, 0.359125682235886,
0.398652093802729, 0.407725644438271, 0.496903459697453, 0.519489980592792,
0.647103823651456, 0.618870672532282, 0.247583017506598, 0.159987856341983,
0.170810564270999, 0.290898812221001, 0.315807961804469, 0.2952380945605,
0.274543055710583, 0.21405861848537, 0.274947456283643, 0.241067674940635,
0.254098360072561, 0.192437158028286, 0.1589743586095, 0.334732239668921,
0.591766847457876, 0.587638966052866, 0.500018841889913, 0.436807180886641,
0.401884302827407, 0.44922080447396, 0.438017173077463, 0.748633878063245,
0.820765025438681, 0.896174861331183, 0.336612021085371, 0.116546447819948,
0.204633879311769, 0.282720933965792, 0.313952640445209, 0.293235348865346,
0.217959926640019, 0.244687309699503, 0.267759562227, 0.256357012162095,
0.20666666619235, 0.110109289364776, 0.0532396563961557, 0.284590163281268,
0.810928959887485, 0.790163932612739, 0.619999998577049, 0.523384208333367,
0.47682655223493, 0.493009231956877, 0.637874503906291, 0.632422585069486,
0.726775954616143, 0.817486336921616, 0.340983605774792, 0.142779078516963,
0.193598750531475, 0.256357012162095, 0.254682494233647, 0.206783493024567,
0.19198542761038, 0.221428570920375, 0.213793102957603, 0.203278688058049,
0.194157208465701, 0.112932604476694, 0.0948633877604228, 0.380582877086458,
0.787978140268028, 0.810928959887485, 0.719125681409657, 0.625136610587118,
0.562404370293935, 0.366120217738959, 0.535519124454, 0.655009105964824,
0.782513659406253, 0.757377047442085, 0.18996877395901, 0.158105646267371,
0.182574377237322, 0.24367381196702, 0.248087431124608, 0.269869982421893,
0.283586317908142, 0.23846153791425, 0.29272131080359, 0.220218578729553,
0.13834244048395, 0.101639344029024, 0.0846994533575204, 0.23846153791425,
0.745355189546179, 0.686338796239004, 0.605318759995079, 0.500936767000192,
0.414375787195254, 0.393442622047837, 0.509364988467295), Standing = c(0.0338797813430082,
0.0338797813430082, 0.0677595626860163, 0.131754705222809, 0.124225864924363,
0.0831594632964746, 0.162622950446439, 0.101639344029024, 0.112932604476694,
0.0931693986932724, 0.0975737702678635, 0.101639344029024, 0.12046144477514,
0.128743169103431, 0.137059115433078, 0.14761904728025, 0.0677595626860163,
0.0338797813430082, 0.0338797813430082, 0.0639951425367932, 0.0423497266787602,
0.0677595626860163, 0.107285974252859, 0.054207650148813, 0.0790528231336857,
0.0609836064174147, 0.0451730417906775, 0.195749847759603, 0.229629629102611,
0.225865208953388, 0.198259461192418, 0.160928961379289, 0.183201780595526,
0.203278688058049, 0.149321999252517, 0.198605614769358, 0.212958625584623,
0.281462798849606, 0.306128024277895, 0.398379497860889, 0.111677797760286,
0.0677595626860163, 0.0547288775540901, 0.0931693986932724, 0.145830363172079,
0.153350589236774, 0.105403764178248, 0.149071037909236, 0.152459016043537,
0.135519125372033, 0.119882303213721, 0.254098360072561, 0.296740153831865,
0.255227686117328, 0.178182553729895, 0.206102003169966, 0.186338797386545,
0.175045536938875, 0.264028640811029, 0.235903662684649, 0.235855400887864,
0.189259468191977, 0.333151183206247, 0.403169397981797, 0.203278688058049,
0.0884638735067435, 0.116461748366591, 0.127819175066803, 0.183918813004901,
0.155538996165628, 0.179710144515087, 0.15951730382333, 0.190573770054421,
0.167140254625507, 0.11067395238716, 0.392349725875482, 0.526775955075159,
0.469945354112694, 0.421857922529069, 0.365901638504488, 0.43278688425262,
0.506010927800412, 0.515846993351608, 0.493989069904506, 0.555191255556392,
0.608743168001792, 0.768306009165636, 0.947540981431873, 0.590163933071755,
0.169398906715041, 0.163752276491206, 0.297658078942143, 0.42228727459678,
0.412398717726961, 0.432306009936784, 0.283743168747693, 0.300400727908006,
0.183201780595526, 0.132573057429162, 0.444808742148526, 0.6426229493448,
0.637158468483024, 0.575956282831139, 0.58688524455469, 0.657923495757771,
0.690710380928424, 0.664480872791902, 0.633879779965959, 0.690710380928424,
0.731147539305563, 0.828415298645167, 0.933333331191257, 0.504918031628057,
0.161580495635885, 0.141411261257773, 0.231511839177222, 0.389617485444594,
0.325245900892878, 0.467759561767984, 0.370341058128744, 0.244523639258233,
0.255094824229708, 0.184927139830586, 0.643715845517155, 0.774863386199767,
0.676502730687808, 0.544262293832841, 0.456830600044432, 0.468852457940339,
0.48415300435331, 0.450273223010302, 0.43497267659733, 0.449180326837947,
0.608743168001792, 0.724590162271432, 0.816393440749261, 0.525683058902804,
0.196825396373666, 0.2766848809679, 0.298142075818472, 0.393247462017059,
0.468475597191251, 0.426885244921903, 0.380496005852245), ID = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("41361",
"41365", "41366", "41366bis", "41367", "41368"), class = "factor"),
Area = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Loliondo",
"Seronera"), class = "factor"), Feeding_Foraging = c(0.907978139992619,
0.910720788958481, 0.909107466037385, 0.865515480709382,
0.851511837754272, 0.908567353581193, 0.838185790426022,
0.877712201992865, 0.840783240328986, 0.829470508742613,
0.800692165739759, 0.810856100142662, 0.809726774097895,
0.81319541837825, 0.784881601113022, 0.770039030238942, 0.856340680152585,
0.933239380467668, 0.903362633548788, 0.90746480997227, 0.901897153597719,
0.927279348757726, 0.899508194656866, 0.928144676506314,
0.835478745828073, 0.853975821851945, 0.863934424246708,
0.65275045387529, 0.647190694885669, 0.653799113774003, 0.664199483432583,
0.724326359747071, 0.68927831008189, 0.664433137097017, 0.6934061914869,
0.649401418018395, 0.634681237159019, 0.601580301364126,
0.549631303304847, 0.489390986443134, 0.731803277008976,
0.851659141151415, 0.82937704727684, 0.797429668277099, 0.743903198917193,
0.756809782285958, 0.778144587121826, 0.792475345207145,
0.713528728284566, 0.745176874907532, 0.723589996683398,
0.534977024761081, 0.520242864622636, 0.527677594417352,
0.594687518263307, 0.613716383958506, 0.591121518289437,
0.583978391976724, 0.54946845378115, 0.569701553967814, 0.570830880012581,
0.565792348428236, 0.44540619205608, 0.479529212854885, 0.604641164368219,
0.706436876618826, 0.733039694316609, 0.708944160171797,
0.688872754078621, 0.745522085020775, 0.683773136848632,
0.688727555015723, 0.888572105349583, 0.947814205474962,
0.974098358420102, 0.607650271829437, 0.495781419627168,
0.569661200878373, 0.596382135431706, 0.656029142392356,
0.586131523056514, 0.501020035279991, 0.515725560443569,
0.506010927800412, 0.453078323186013, 0.397923496354493,
0.276120217945516, 0.115917251880721, 0.419016392480946,
0.916633877677671, 0.902114514441809, 0.760192197237773,
0.671451400869477, 0.68945138687036, 0.719743153252393, 0.856790014122652,
0.753034606650595, 0.867715845003057, 0.910655735614888,
0.555191255556392, 0.371194378539179, 0.387842830231389,
0.438178505369572, 0.441605425781279, 0.360371835112871,
0.34386030949283, 0.356947696292407, 0.385450661762178, 0.347581460444935,
0.307479235716452, 0.206102003169966, 0.160630022132145,
0.50735883307965, 0.879192936191512, 0.907549077050879, 0.862344757086919,
0.752185790623399, 0.70808743006887, 0.537777776543534, 0.676250523878803,
0.89357923292184, 0.891852953740506, 0.897516142997256, 0.380824875524623,
0.333345894593276, 0.400060715535987, 0.495039931608694,
0.543169397660485, 0.548558506372443, 0.552438776307497,
0.588188313067882, 0.621683058682476, 0.572131146227896,
0.422495445296276, 0.321857922758577, 0.264136813803823,
0.521922375150751, 0.902049178257592, 0.800526207432105,
0.812506653592706, 0.699698150879173, 0.635723691969573,
0.593333331971585, 0.727831164713589)), row.names = c(NA,
-144L), vars = "hour", indices = list(c(0L, 24L, 48L, 72L, 96L,
120L), c(1L, 25L, 49L, 73L, 97L, 121L), c(2L, 26L, 50L, 74L,
98L, 122L), c(3L, 27L, 51L, 75L, 99L, 123L), c(4L, 28L, 52L,
76L, 100L, 124L), c(5L, 29L, 53L, 77L, 101L, 125L), c(6L, 30L,
54L, 78L, 102L, 126L), c(7L, 31L, 55L, 79L, 103L, 127L), c(8L,
32L, 56L, 80L, 104L, 128L), c(9L, 33L, 57L, 81L, 105L, 129L),
c(10L, 34L, 58L, 82L, 106L, 130L), c(11L, 35L, 59L, 83L,
107L, 131L), c(12L, 36L, 60L, 84L, 108L, 132L), c(13L, 37L,
61L, 85L, 109L, 133L), c(14L, 38L, 62L, 86L, 110L, 134L),
c(15L, 39L, 63L, 87L, 111L, 135L), c(16L, 40L, 64L, 88L,
112L, 136L), c(17L, 41L, 65L, 89L, 113L, 137L), c(18L, 42L,
66L, 90L, 114L, 138L), c(19L, 43L, 67L, 91L, 115L, 139L),
c(20L, 44L, 68L, 92L, 116L, 140L), c(21L, 45L, 69L, 93L,
117L, 141L), c(22L, 46L, 70L, 94L, 118L, 142L), c(23L, 47L,
71L, 95L, 119L, 143L)), group_sizes = c(6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L), biggest_group_size = 6L, labels = structure(list(
hour = 0:23), row.names = c(NA, -24L), class = "data.frame", vars = "hour"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Any input is appreciated!
Related
I cannot figure out how to use a loop to plot one histogram for each unique combination of levels from TWO factors.
Here is my data: https://www.dropbox.com/sh/exsjhu23fnpwf4r/AABvitLBN1nRMpXcyYMVIOIDa?dl=0
# perhaps need to have factors
df$freq <- as.factor(df$freq)
df$time <- as.factor(df$time)
I learned how to use a loop to plot histograms for ONE factor levels:
# space for plots
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21), nrow=3, ncol=7, byrow=T))
layout.show(a)
# loop
for (i in 1:length(unique(df$freq))) {
value <- subset(df, freq == unique (df$freq)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I tried variations of this loop for TWO factors but that unfortunately does not work:
for (i in 1:length(unique(df[c("freq", "time")]))) {
value <- subset(df, freq == unique (df$freq)[i] & time == unique(df$time)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I would also like to learn how to label each histogram based on the levels of TWO factors (not just one)...
It's more convenient to use by here.
For the titles we prefer characters to factors.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
Then open windows,
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7))
layout.show(a)
and plot.
by(df1, df1[c("freq", "time")], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
Result
Edit
To get the specific order we probably have to do some more stuff.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7, byrow=TRUE)) # with byrow
layout.show(a)
l <- split(df1, df1[c("freq", "time")])
m <- t(sapply(l, function(x) x[1, c(1, 3)])) # matrix of first rows of each subset
m[, 2] <- sub("m", "", m[, 2]) # use the values...
m <- apply(m, 1:2, as.numeric) # ... make numeric
Now we obtain the histograms within a lapply over the list ordered by m.
lapply(l[order(m[, 2], m[, 1])], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
New Result
Data
df1 <- structure(list(freq = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("4",
"8", "12.5", "16", "20", "25", "31.5"), class = "factor"), thr = c(60L,
25L, 20L, 15L, 15L, 30L, 35L, 60L, 25L, 10L, 15L, 15L, 30L, 35L,
55L, 30L, 15L, 15L, 10L, 25L, 40L, 50L, 25L, 15L, 10L, 15L, 20L,
40L, 50L, 30L, 10L, 15L, 15L, 20L, 25L, 50L, 25L, 10L, 10L, 10L,
20L, 25L, 45L, 20L, 10L, 10L, 10L, 20L, 25L, 45L, 15L, 10L, 10L,
10L, 20L, 30L, 60L, 30L, 10L, 10L, 10L, 15L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 30L, 45L, 25L, 15L, 10L, 15L, 30L, 35L, 50L, 25L,
15L, 10L, 15L, 25L, 35L, 60L, 25L, 10L, 10L, 15L, 20L, 30L, 60L,
25L, 5L, 5L, 10L, 20L, 30L, 45L, 20L, 5L, 10L, 10L, 20L, 30L,
45L, 20L, 10L, 10L, 10L, 20L, 30L, 60L, 30L, 15L, 10L, 15L, 25L,
30L, 55L, 25L, 10L, 10L, 10L, 20L, 30L, 55L, 35L, 10L, 10L, 10L,
20L, 30L, 60L, 35L, 15L, 10L, 10L, 15L, 25L, 50L, 30L, 10L, 10L,
10L, 20L, 25L, 55L, 25L, 10L, 10L, 15L, 25L, 25L, 65L, 30L, 10L,
10L, 15L, 20L, 30L, 60L, 30L, 15L, 15L, 15L, 15L, 30L, 55L, 35L,
15L, 15L, 15L, 25L, 35L, 55L, 35L, 15L, 15L, 15L, 25L, 35L, 60L,
35L, 15L, 15L, 15L, 25L, 35L, 60L, 30L, 10L, 10L, 15L, 25L, 35L,
55L, 30L, 15L, 10L, 10L, 25L, 30L, 50L, 25L, 10L, 10L, 10L, 20L,
30L, 55L, 30L, 10L, 10L, 15L, 20L, 30L, 55L, 30L, 10L, 15L, 20L,
25L, 35L, 55L, 25L, 15L, 15L, 15L, 25L, 40L, 50L, 20L, 10L, 10L,
20L, 30L, 40L, 45L, 25L, 10L, 10L, 10L, 20L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 25L, 55L, 20L, 10L, 10L, 15L, 25L, 35L, 50L, 20L,
10L, 10L, 15L, 25L, 30L, 45L, 20L, 15L, 10L, 10L, 20L, 30L, 50L,
20L, 15L, 15L, 15L, 20L, 30L, 60L, 35L, 15L, 10L, 15L, 25L, 30L,
60L, 35L, 15L, 15L, 15L, 30L, 35L, 55L, 25L, 10L, 15L, 15L, 25L,
35L, 50L, 30L, 10L, 15L, 15L, 25L, 35L, 55L, 25L, 20L, 15L, 15L,
25L, 30L, 55L, 25L, 15L, 15L, 15L, 30L, 35L), time = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("3m", "6m", "9m"), class = "factor")), row.names = c(NA,
-322L), class = "data.frame")
How can you use rbind in a for loop that runs through a list of dataframes? I tried to follow Looping through list of data frames in R but receive the following:
Error in apply(dataFramesList, 2, function(x) { :
dim(X) must have a positive length
I have two dataframes, dfTraining and dfAccuracy (code to reproduce dataframes is below), and need to add a row for any of the crop types missing from either of two columns, CROP or CROP_LABEL. I believe my problem is in my last line of code.
My code block is:
dataFramesList <- list(dfTraining, dfAccuracy)
apply(dataFramesList, 2, function(x){
cropNumbers <- seq(1,23, by = 1)
cropNumbers <- cropNumbers[-c(3)]
cropNumbers <- append(cropNumbers, 34)
listofCROPandCROP_LABELColumns <- list(dataFrameList$CROP, dataFrameList$CROP_LABEL)
missingCROP <- NULL
for (i in listofCROPandCROP_LABELColumns){
for (j in cropNumbers){
if (!j %in% i){
# If crop number is missing from CROP_LABEL, add missingCROP observation (row)
# Make row for missing crop type
missingCrop <- list(FREQUENCY = 0, AA = 1, CROP = j, CROP_LABEL = j, ACRES = 0)
dataFrameList <- rbind(dataFrameList, missingCrop)
}
}
}
})
My dfAccuracy dataframe:
structure(list(FREQUENCY = c(4L, 2L, 1L, 1L, 1L, 1L, 65L, 1L,
1L, 4L, 1L, 5L, 5L, 2L, 4L, 1L, 1L, 1L, 1L, 4L, 9L, 2L, 1L, 1L,
1L, 2L, 4L, 1L, 2L, 18L, 1L, 10L, 3L, 1L, 7L, 1L, 1L, 1L, 3L,
1L, 7L, 1L), AA = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
CROP = c(1L, 4L, 12L, 13L, 14L, 18L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 19L,
19L, 21L, 21L, 21L, 21L), CROP_LABEL = c(1L, 4L, 14L, 13L,
12L, 18L, 1L, 4L, 5L, 6L, 18L, 1L, 4L, 6L, 14L, 18L, 12L,
14L, 18L, 1L, 6L, 14L, 18L, 18L, 4L, 6L, 13L, 21L, 12L, 14L,
18L, 1L, 6L, 14L, 18L, 21L, 1L, 19L, 6L, 13L, 21L, 34L),
ACRES = c(331.737184484, 193.772138572, 26.48543619, 73.2696289437,
112.470306056, 66.6556450342, 3905.71121736, 24.9581079934,
39.9287379709, 259.662359273, 85.2786247851, 306.051491303,
368.342995232, 154.82030835, 265.754349805, 70.3722566979,
35.4066607701, 139.336463432, 58.4307705147, 251.070357093,
471.031628349, 150.965736858, 28.2780117926, 35.3426930108,
34.5730542194, 67.7383953308, 144.442123948, 33.2746560126,
69.4072817311, 1219.65459596, 92.4840910734, 582.983473317,
191.957841327, 35.708775262, 319.638682538, 60.6889287642,
82.6244195055, 36.2898952104, 267.422844756, 72.8352758659,
489.746546145, 65.5392893502)), row.names = c(25L, 26L, 27L,
29L, 30L, 31L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L,
83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L
), class = "data.frame")
and my dfTraining dataframe is:
structure(list(FREQUENCY = c(7L, 1L, 1L, 4L, 2L, 6L, 1L, 107L,
1L, 21L, 1L, 1L, 1L, 2L, 1L, 19L, 3L, 1L, 1L, 12L, 1L, 2L, 32L,
2L, 2L, 29L, 2L, 18L, 1L), AA = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), CROP = c(1L, 1L, 4L, 4L, 12L, 13L, 21L,
1L, 1L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 12L, 13L, 14L, 14L,
14L, 18L, 18L, 18L, 19L, 21L, 34L), CROP_LABEL = c(1L, 4L, 1L,
4L, 12L, 13L, 21L, 1L, 6L, 4L, 6L, 1L, 5L, 14L, 18L, 6L, 14L,
1L, 12L, 13L, 1L, 6L, 14L, 6L, 14L, 18L, 19L, 21L, 34L), ACRES = c(624.940370218,
26.9188766351, 37.8773839813, 291.79294767, 140.949264214, 391.571023675,
44.5217011939, 6806.02216989, 72.7500299887, 1676.12121152, 14.8739557721,
67.0700291739, 59.7438207953, 82.6713019474, 75.62666152, 1370.78710769,
145.215281276, 41.7380537313, 66.5236760194, 679.91208779, 70.9661875374,
38.8514254734, 1749.63365551, 109.917242057, 79.7758083723, 1660.85759895,
96.8771921798, 1428.71888481, 69.473161379)), row.names = c(18L,
19L, 20L, 21L, 22L, 23L, 24L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L), class = "data.frame")
I have a dataframe df
df<-structure(list(subject = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L), sex = c(1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), age = c(29L, 54L, 67L,
36L, 48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L,
55L, 56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L,
32L, 66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L,
26L, 38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L, 29L, 54L, 67L, 36L,
48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L, 55L,
56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L, 32L,
66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L, 26L,
38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L), edu = c(4L, 3L, 3L,
3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L, 2L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 3L, 3L,
4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 6L, 1L, 3L,
4L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L,
2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L,
4L, 3L, 3L, 4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L,
6L, 1L, 3L), biz_exp = c(5L, 15L, 3L, 4L, 10L, 6L, 0L, 5L, 8L,
5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L, 11L, 4L, 4L, 11L, 3L, 15L, 4L,
4L, 6L, 6L, 5L, 13L, 2L, 13L, 6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L,
4L, 8L, 8L, 4L, 15L, 18L, 30L, 9L, 14L, 18L, 21L, 16L, 5L, 15L,
3L, 4L, 10L, 6L, 0L, 5L, 8L, 5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L,
11L, 4L, 4L, 11L, 3L, 15L, 4L, 4L, 6L, 6L, 5L, 13L, 2L, 13L,
6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L, 4L, 8L, 8L, 4L, 15L, 18L, 30L,
9L, 14L, 18L, 21L, 16L), turnov = c(36L, NA, 12L, 9L, 48L, 9L,
8L, 24L, 4L, 250L, NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L,
200L, 50L, 150L, 48L, NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L,
7L, 5L, NA, 20L, 450L, 5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L,
12L, 12L, 14L, 18L, 36L, NA, 12L, 9L, 48L, 9L, 8L, 24L, 4L, 250L,
NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L, 200L, 50L, 150L, 48L,
NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L, 7L, 5L, NA, 20L, 450L,
5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L, 12L, 12L, 14L, 18L),
loc_pr = c(1L, 1L, 1L, 6L, 1L, 6L, 4L, 1L, 8L, 5L, 1L, 3L,
1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L, 1L, 1L, 2L, 8L, 2L, 4L,
4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L, 4L, 4L, NA, 4L, 5L, 5L,
5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L, 1L, 1L, 1L, 6L, 1L, 6L,
4L, 1L, 8L, 5L, 1L, 3L, 1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L,
1L, 1L, 2L, 8L, 2L, 4L, 4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L,
4L, 4L, NA, 4L, 5L, 5L, 5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L
), type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L, 1L, 2L, 4L, 1L, 2L, 1L,
1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L,
1L, 2L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L
), age_rec = c(2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L, 2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L), biz_exp_rec = c(2L, 4L, 2L, 3L, 3L, 3L, 1L,
2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 2L, 2L, 4L, 2L,
4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 2L, 3L,
3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 2L,
4L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L,
2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L,
3L, 3L, 4L, 3L, 2L, 3L, 3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L), turnov_rec = structure(c(3L, NA, 3L,
2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 3L, 4L, 3L, 5L, 2L, 3L, 3L, 2L, NA, 2L, 4L, 3L, 4L,
4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L, 2L, 4L, 3L, 4L, 2L, 3L,
3L, 4L, 3L, 3L, NA, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, NA, 2L, 3L, 3L,
2L, NA, 2L, 4L, 3L, 4L, 4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L,
2L, 4L, 3L, 4L, 2L, 3L, 3L, 4L, 3L), .Label = c("1", "2",
"3", "4", "MA"), class = "factor"), bundle = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), investment = c(86L,
100L, 100L, 75L, 100L, 59L, 68L, 86L, 80L, 100L, 86L, 100L,
100L, 100L, 100L, 100L, 100L, 93L, 64L, 100L, 24L, 18L, 89L,
75L, 80L, 29L, 54L, 65L, 100L, 27L, 59L, 30L, 59L, 43L, 59L,
59L, 5L, 26L, 100L, 75L, 59L, 5L, 59L, 74L, 59L, 79L, 75L,
75L, 86L, 66L, 86L, 55L, 100L, 68L, 1L, 75L, 1L, 1L, 79L,
1L, 54L, 48L, 33L, 55L, 90L, 85L, 39L, 70L, 1L, 45L, 54L,
33L, 3L, 44L, 75L, 1L, 1L, 1L, 1L, 96L, 26L, 1L, 23L, 66L,
1L, 89L, 83L, 52L, 61L, 1L, 88L, 45L, 72L, 60L, 1L, 60L,
2L, 86L, 10L, 63L, 1L, 88L)), .Names = c("subject", "sex",
"age", "edu", "biz_exp", "turnov", "loc_pr", "type", "age_rec",
"biz_exp_rec", "turnov_rec", "bundle", "investment"), class = "data.frame", row.names = c(NA,
-102L))
In this dataframe investment is my dependent variable and the other variables are my independent variables. My subjects are crossed within type of bundle. First of all, I would like know whether my subjects do bundle or not (bundle= 1 means that people bundle and bundle=0 means that people do not bundle), it will have an effect on the investment.
I have done this mixed effect linear model but I am not sure if this is correct as my p-value are equal to zero.
library(nlme)
model <- lme(investment~bundle, random = ~1|subject/bundle, data=df)
I have also tried to make an anova with repeated measures as such:
aov(investment~bundle+ Error(subject/bundle), data=df)
It works but not sure if the model formula is right
Anyone could help me with that?
To find genotype frequency across SNPs I need to find the proportion of a certain genotype (XX, YX, or YY) in the total number of samples (XX, YX, and YY). I think I would need to start my dplyr statement with
dat %>% group_by(Assay) %>%
but I don't know how to finish it. The data, dat, provided below and dput at the bottom.
Source: local data frame [143 x 3]
Groups: Assay
Assay Final n
1 One_apoe-83 Invalid 2
2 One_apoe-83 No Call 9
3 One_apoe-83 NTC 2
4 One_apoe-83 XX 4
5 One_apoe-83 YX 41
6 One_apoe-83 YY 134
7 One_CD9-269 Invalid 2
8 One_CD9-269 No Call 5
9 One_CD9-269 NTC 2
10 One_CD9-269 XX 99
.. ... ... ...
I could use a for loop across SNPs to get what I'm looking for with boolean patterning for each genotype but that would be very verbose.
for(i in seq(levels(dat$Assay))) {
storage_df[i,1] <- dat[dat$Assay == levels(dat$Assay)[i],]$XX / (dat[dat$Assay == levels(dat$Assay)[i],]$XX + dat[dat$Assay == levels(dat$Assay)[i],]$YX + dat[dat$Assay == levels(dat$Assay)[i],]$XY) ...
You get the point. How would I do this in dplyr? The whole object is below.
dat <- structure(list(Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("One_apoe-83",
"One_CD9-269", "One_Cytb_26", "One_E2", "One_ghsR-66", "One_IL8r-362",
"One_KPNA-422", "One_lpp1-44", "One_MHC2_190", "One_MHC2_251",
"One_Prl2", "One_redd1-414", "One_STC-410", "One_STR07", "One_sys1-230",
"One_U1004-183", "One_U1105", "One_U1201-492", "One_U1203-175",
"One_U1209-111", "One_U1212-106", "One_U401-224", "One_vamp5-255",
"One_ZNF-61"), class = "factor"), Final = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("Invalid",
"No Call", "NTC", "XX", "YX", "YY"), class = "factor"), n = c(2L,
9L, 2L, 4L, 41L, 134L, 2L, 5L, 2L, 99L, 75L, 9L, 2L, 7L, 2L,
110L, 71L, 2L, 8L, 2L, 110L, 59L, 11L, 2L, 6L, 2L, 67L, 86L,
29L, 2L, 3L, 2L, 152L, 28L, 5L, 2L, 4L, 2L, 78L, 81L, 25L, 2L,
4L, 2L, 115L, 62L, 7L, 2L, 17L, 2L, 80L, 62L, 29L, 2L, 13L, 2L,
59L, 68L, 48L, 2L, 7L, 2L, 48L, 86L, 47L, 2L, 7L, 2L, 42L, 87L,
52L, 2L, 3L, 2L, 47L, 81L, 57L, 2L, 9L, 2L, 40L, 85L, 54L, 2L,
8L, 2L, 52L, 86L, 42L, 2L, 7L, 2L, 9L, 39L, 133L, 2L, 8L, 2L,
101L, 71L, 8L, 2L, 13L, 2L, 20L, 82L, 73L, 2L, 11L, 2L, 27L,
75L, 75L, 2L, 6L, 2L, 3L, 40L, 139L, 2L, 13L, 2L, 59L, 82L, 34L,
2L, 19L, 2L, 20L, 84L, 65L, 2L, 11L, 2L, 119L, 47L, 11L, 2L,
8L, 2L, 51L, 100L, 29L)), class = "data.frame", .Names = c("Assay",
"Final", "n"), row.names = c(NA, -143L))
Hope I am not misunderstanding. Are you looking for below:
Assume the data structure is:
df <- structure(list(Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L), .Label = c("One_apoe-83", "One_CD9-269"), class = "factor"),
Final = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L
), .Label = c("Invalid", "No Call", "NTC", "XX", "YX", "YY"
), class = "factor"), n = c(2L, 9L, 2L, 4L, 41L, 134L, 2L,
5L, 2L, 99L)), .Names = c("Assay", "Final", "n"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
Code
df %>% group_by(Assay) %>% mutate(n_percent = n/sum(n)*100)
# Assay Final n n_percent
# 1 One_apoe-83 Invalid 2 1.041667
# 2 One_apoe-83 No Call 9 4.687500
# 3 One_apoe-83 NTC 2 1.041667
# 4 One_apoe-83 XX 4 2.083333
# 5 One_apoe-83 YX 41 21.354167
# 6 One_apoe-83 YY 134 69.791667
# 7 One_CD9-269 Invalid 2 1.851852
# 8 One_CD9-269 No Call 5 4.629630
# 9 One_CD9-269 NTC 2 1.851852
# 10 One_CD9-269 XX 99 91.666667
Option 2
Here is the code based on the comment. A line is added to filter out the elements you don't want.
df %>%
filter(! Final %in% c("Invalid", "No Call", "NTC")) %>%
group_by(Assay) %>%
mutate(n_percent = n/sum(n)*100)
# Source: local data frame [4 x 4]
# Groups: Assay
#
# Assay Final n n_percent
# 1 One_apoe-83 XX 4 2.234637
# 2 One_apoe-83 YX 41 22.905028
# 3 One_apoe-83 YY 134 74.860335
# 4 One_CD9-269 XX 99 100.000000
I would like to create a stacked bar graph that contains two levels of x-axis labels. For each stacked bar there is the primary label (dat$HUC_12_NAM), then I would like to group these stacked bars by dat$HUC_10_NAM and label this group as well. I could likely use annotate to manually define and place the labels, but that would be very time consuming, clunky, and could easily result in mis-labeling.
Here is the data....
dat <- structure(list(HUC_12_NAM = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Apostle Islands",
"Raspberry River-Frontal Lake Superior", "Sand River", "Saxine Creek-Frontal Lake Superior"
), class = "factor"), HUC_10_NAM = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Chequamegon Bay-Frontal Lake Superior",
"Sand River-Frontal Lake Superior"), class = "factor"), variable = structure(c(9L,
8L, 4L, 1L, 6L, 11L, 14L, 13L, 10L, 7L, NA, 5L, 15L, 3L, 2L,
12L, 8L, 6L, 3L, 2L, 4L, 1L, 15L, 5L, 11L, 14L, 10L, 9L, 13L,
7L, 12L, NA, 12L, 4L, 10L, 8L, 3L, NA, 2L, 6L, 1L, 13L, 7L, 11L,
9L, 14L, 5L, 15L, 9L, 1L, 8L, 12L, 10L, 4L, 3L, 11L, NA, 7L,
15L, 13L, 14L, 6L, 5L, 2L), .Label = c("Agriculture", "Barren land",
"Developed - High intensity", "Developed - Medium intensity",
"Developed - Low intensity", "Developed - Open space", "Evergreen forest",
"Deciduous forest", "Mixed forest", "Herbaceous", "Pasture",
"Shrub", "Woody wetland", "Herbaceous wetland", "Water"), class = "factor"),
perc_veg = c(11.8839579283911, 57.2626205743974, 0.00544969027593598,
0.514995731075951, 2.59586913477084, 2.53864738687351, 0.108085523806064,
5.3007320750604, 0.731166778688078, 6.04007338916238, 0,
0.0953695798288797, 0.11807662264528, 0, 0.00363312685062399,
12.8013224581736, 58.9563880536275, 4.47423752571726, 0.0158260043860641,
0.101738599624698, 0.0633040175442563, 0.180868621555018,
1.07390744048292, 0.300694083335217, 2.65876873685876, 0.00226085776943772,
0.065564875313694, 15.484614862879, 2.68363817232258, 7.99665393050123,
5.94153421808234, 0, 2.79708137828397, 0.0260443580892536,
0.0078546476777114, 30.3801236073503, 0.028524773145373,
0, 0.470038653134625, 1.99838773021352, 0.0355526158043779,
4.43084809524794, 23.6515843651171, 0.169081626325472, 32.6501167862089,
0.595713015978007, 0.174455858947064, 2.5845924884764, 23.2366527830367,
0.25141991669822, 52.6482393032942, 3.73494888299886, 0.136312003029156,
0.00605831124574025, 0, 1.85535781900795, 0, 11.0851950018932,
0.427110942824688, 2.85800833017796, 0, 3.54714123438092,
0.146914047709201, 0.0666414237031428)), .Names = c("HUC_12_NAM",
"HUC_10_NAM", "variable", "perc_veg"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L), class = "data.frame")
And here is the current stacked bar plot...
library(ggplot2)
p <- ggplot () + geom_bar(data=dat,aes(x=HUC_12_NAM,y=perc_veg,fill=variable),stat='identity')
p <- p + coord_flip() #this helps fit the xlabel
p
And the resulting plot...
The next label, or grouping, would be from dat$HUC_10_NAM and in this example would add two additional labels, 'Sand River-Frontal Lake Superior' and 'Chequamegon Bay-Frontal Lake Superior'.
Maybe this would just be too cluttered...especially with the long names. But, I would like to see if there is a way to add these second level labels quickly and easily.
Thanks
-cherrytree
If you're willing to facet instead of adding a second row of labels, then you can do this:
ggplot(data=dat, aes(x=HUC_12_NAM, y=perc_veg, fill=variable)) +
geom_bar(stat='identity') +
facet_grid(. ~ HUC_10_NAM, scales="free")
Incidentally, you can reformat the longer labels with a line-break, for example:
dat[,1:2] = lapply(1:2, function(x) gsub("-","\n", dat[,x]))