Related
Data:
structure(list(ID = c(19903L, 28185L, 28207L, 28429L, 28522L,
29092L, 29127L, 29219L, 29304L, 30981L, 31166L, 31411L, 32010L,
33231L, 33640L, 33714L, 34093L, 34193L, 34385L, 35054L, 35337L,
35377L, 35608L, 35881L, 35940L, 37112L, 37122L, 37125L, 37170L,
37198L, 37266L, 37378L, 37589L, 37725L, 37877L, 38519L, 38522L,
38605L, 38623L, 38806L, 39040L, 39083L, 39159L, 39218L, 39593L,
39636L, 39657L, 39686L, 39700L, 39819L, 39820L, 39951L, 40151L,
40152L, 40181L, 40226L, 40248L, 40286L, 40382L, 40556L, 40623L,
40628L, 40798L, 40800L, 40815L, 40915L, 43282L, 43299L, 43450L,
43466L, 43509L, 43677L, 43740L, 43762L, 43998L, 44068L, 44130L,
44131L, 44307L, 44408L, 50679L, 50848L, 51064L, 51455L, 51690L,
51726L, 51727L, 51796L, 52126L, 52183L, 52461L, 52500L, 52502L,
52577L, 52614L, 53202L, 53320L, 53390L, 53456L, 53473L, 53474L,
53475L, 53577L, 53626L, 53851L, 53873L, 54153L, 54206L, 54532L,
54581L, 54913L, 55122L, 55267L, 55332L, 55462L, 55542L, 55612L,
55728L, 55867L, 55903L, 55920L, 55991L, 56022L, 56098L, 56307L,
56420L, 56679L, 56703L, 56746L, 56919L, 57005L, 57035L, 57405L,
57445L, 57480L, 57725L, 57808L, 57809L, 57863L, 58004L, 58060L,
58130L, 58145L, 58215L, 58229L, 58503L, 58515L, 58667L, 58999L,
59326L, 59327L, 59344L, 59361L, 59428L, 59756L, 59865L, 60099L,
60100L, 60169L, 60252L, 60280L, 60306L, 60384L, 60429L, 60472L,
60493L, 60503L, 60575L, 60603L, 60662L, 60664L, 60806L, 60846L,
60925L, 61274L, 61415L, 61727L, 61749L, 61882L, 61883L, 62081L,
62144L, 62210L, 62285L, 62411L, 62809L, 62917L, 62934L, 62937L,
62983L, 62989L, 63327L, 63329L, 63383L, 63458L, 63470L, 63589L,
64081L, 64328L, 64418L, 64507L, 64596L, 65178L, 65250L, 65302L,
65478L, 65480L, 65487L, 65565L, 65572L, 65574L, 65617L, 65802L,
65865L, 65934L, 65935L, 65974L, 65975L, 65978L, 65991L, 65995L,
66013L, 66154L, 66232L, 66237L, 66245L, 66314L, 66389L, 66396L,
66460L, 66572L, 66589L, 66735L, 67174L, 73230L, 73525L, 73539L,
73677L, 73705L, 73942L, 73953L, 74034L, 74113L, 74114L, 74425L,
74427L, 74439L, 74607L, 74618L, 74641L, 74657L, 74794L, 74800L,
74836L, 74942L, 74952L, 74962L, 74969L, 74975L, 74977L, 74985L,
74989L, 75220L, 75229L, 75377L, 75407L, 75432L, 75653L, 75732L,
75735L, 75737L, 75757L, 75895L, 75898L, 76381L, 76559L, 76574L,
76594L, 76595L, 76746L, 76751L, 76755L, 76759L, 76775L, 77088L,
77091L, 77099L, 77109L, 77134L, 77182L, 77188L, 77203L, 77204L,
77252L, 77304L, 77453L, 77528L, 77556L, 77585L, 77668L, 77733L,
77758L, 78262L, 79724L, 79730L, 79747L, 79850L, 79977L, 80052L,
80819L, 80901L, 80932L, 81064L, 81065L, 81071L, 81098L, 81112L,
81142L, 81175L, 81727L, 81938L, 82554L, 83744L, 83949L), Age = c(83L,
26L, 26L, 20L, 84L, 20L, 23L, 77L, 32L, 14L, 21L, 9L, 76L, 18L,
21L, 15L, 75L, 27L, 34L, 81L, 81L, 15L, 24L, 24L, 16L, 35L, 27L,
7L, 30L, 31L, 24L, 24L, 79L, 30L, 19L, 78L, 25L, 20L, 42L, 62L,
83L, 79L, 18L, 26L, 66L, 23L, 83L, 21L, 77L, 24L, 57L, 42L, 32L,
76L, 85L, 29L, 77L, 65L, 79L, 9L, 34L, 20L, 11L, 16L, 9L, 21L,
16L, 34L, 22L, 19L, 23L, 25L, 14L, 53L, 28L, 79L, 22L, 22L, 21L,
82L, 81L, 16L, 19L, 77L, 15L, 18L, 15L, 78L, 24L, 16L, 14L, 29L,
18L, 50L, 17L, 43L, 8L, 14L, 85L, 31L, 20L, 30L, 23L, 78L, 29L,
6L, 61L, 14L, 22L, 10L, 83L, 15L, 13L, 15L, 15L, 29L, 8L, 9L,
15L, 8L, 9L, 15L, 9L, 34L, 8L, 9L, 9L, 16L, 8L, 25L, 21L, 23L,
13L, 56L, 10L, 7L, 27L, 8L, 8L, 8L, 8L, 80L, 80L, 6L, 15L, 42L,
25L, 23L, 21L, 8L, 11L, 43L, 69L, 34L, 34L, 14L, 12L, 10L, 22L,
78L, 16L, 76L, 12L, 10L, 16L, 6L, 13L, 66L, 11L, 26L, 12L, 16L,
13L, 24L, 76L, 10L, 20L, 13L, 25L, 14L, 12L, 15L, 43L, 51L, 27L,
15L, 24L, 34L, 63L, 17L, 15L, 9L, 12L, 17L, 82L, 75L, 24L, 44L,
69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 84L, 54L, 23L,
23L, 14L, 81L, 17L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 23L,
53L, 49L, 13L, 69L, 14L, 65L, 14L, 13L, 22L, 67L, 59L, 52L, 54L,
44L, 78L, 62L, 69L, 10L, 63L, 57L, 22L, 12L, 62L, 9L, 82L, 53L,
54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 77L, 49L, 61L, 62L, 57L,
67L, 16L, 65L, 75L, 45L, 16L, 55L, 17L, 64L, 67L, 56L, 52L, 63L,
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 21L, 67L,
34L, 52L, 15L, 31L, 64L, 55L, 13L, 48L, 71L, 64L, 13L, 25L, 34L,
50L, 61L, 70L, 33L, 57L, 51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L,
71L, 33L, 38L, 56L, 17L, 29L, 28L, 6L, 8L), Sex = structure(c(1L,
1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
mean_FA_scaled = c(-1.52160414281774, -1.30073487609629,
-1.39164271432334, -1.83373601712535, -2.19478262184568,
-0.47769168350816, -1.66624867866514, -0.36061779499817,
-1.10976759821506, -2.01706489349897, -1.21708170925372,
-0.68001882107227, -0.770347444019124, -1.21756680205088,
-1.04908755742334, -0.654272701867476, 0.791455877697352,
0.0263414533200063, -1.48353521852673, -1.48465744813212,
0.885781086077571, 0.937258844105155, -1.76609091258925,
-1.40930154017838, -1.42620014597815, -0.395529996012095,
-1.79188771313106, -1.6968602062236, -1.6213377738768, -1.26578647412735,
-1.3364652186935, -1.52114801078458, 0.587760344033774, -1.4860765255686,
-1.41824317606643, -1.08076339305916, -1.84290933912549,
-1.42950167307528, -0.186882171702826, 0.94192876730175,
-1.96157606965602, -0.668579319288362, -1.2972378638421,
-2.10201405453099, 0.593407693015703, -1.87521507137852,
-0.399874110613579, -2.16173114991939, -1.71213049306692,
-2.03230549555918, 0.864393561856266, 1.66450706953957, -1.76062456838238,
-1.42625806750617, -0.635317881823001, -1.05738481631217,
-0.905876579394418, 0.0731565283419971, -1.15139145628828,
-0.742407546940581, -1.69348627721645, 0.153573329806466,
-1.09929828202549, -0.982123030841461, 0.725678742439884,
-0.850887328730634, -0.99078229928042, 0.215368360012574,
-0.402661584149531, 0.0241114744912448, -0.71105027970887,
0.366463906043185, 0.957024565541906, 0.669292134912623,
1.05465854121026, 1.82844671440856, -0.181835758574102, 0.736386984932541,
-1.09078381740658, 0.0590019549321627, -1.02109697900777,
0.321350275906775, -0.0449237467173357, 0.0239956314352051,
0.117669222625202, -0.725516181331811, 0.387590783388401,
0.829691326381412, 1.37355999410519, -0.459526044282955,
-0.460235583001197, -0.311304854080326, 0.578796987572713,
0.997164184459617, 0.18257029477137, 0.291839257380694, -0.863007408468775,
-1.87780705975741, 2.29568520056216, 0.00319456268509986,
0.881190804982003, 0.930713711438919, -0.525093214001351,
2.54459572703618, 0.166620153992923, 1.20602921449896, -0.289055747129726,
1.46280982859267, -0.391909900510859, 2.11139337878521, 1.59105533181948,
-0.209203680563451, -0.763585105622814, -0.373635658420616,
0.6654186327263, -1.62880965099135, -0.961003393687248, 0.201720599972912,
-0.335957704443747, 0.757593504378786, -0.162251041912412,
0.141221563956246, 0.0760670851249914, -2.24164331007099,
0.424957409152164, -0.0769326311392693, -0.0363368801884033,
0.30505984615121, -0.551628514025415, 0.33740901955026, -0.31017538428394,
0.966704700912213, -1.19032920349958, 0.711567610176064,
0.67279638735782, -0.599819225337876, 0.0996845881750585,
0.656310472445189, -0.0716472917074639, -0.483100106187007,
-0.511691620455773, 2.1239406297925, 1.29844301245453, 0.101559797644699,
-1.35720112572458, 0.307058138867893, -0.0785544339238233,
0.27531714151305, -0.660383423073563, -0.957274695320974,
-1.47069111968835, -0.526229923988739, -0.645664114765535,
-0.887580616731169, 0.119110020634694, -0.368379279752821,
-1.37513507883771, 0.756384392481372, 0.0675019391690662,
1.18129672203451, 0.788168830982229, 0.780204620879509, 0.283447876008828,
0.146224535938955, -0.389296191558966, 0.807326376374772,
0.590410253940679, -0.41226207741881, -1.02024263646948,
0.0042805913354707, -0.217414057160255, 0.302561980255357,
-0.0445038156391923, -0.782909175408415, 0.298159944125853,
0.0170233274998232, -0.0487465675666421, -0.456839933421037,
0.310127979852941, -0.787615299560023, -0.21877521306872,
-0.395986128045251, -0.266386709100983, 0.372589107631277,
-0.47845190356342, 0.546216128061583, -0.483150787524024,
-0.638590448156119, 2.21420409102033, 0.550980173741211,
0.781797462900053, 0.0321553266949922, 0.224223113608598,
0.45913835087484, 0.924827436153908, 2.19646562306427, -0.622017650951458,
0.554498906568413, -0.0470089217260485, -0.401307668432068,
-0.588777934059104, 0.462266113387909, 0.263008816808847,
-0.162403085923465, -0.062640494100388, 0.660965915259779,
0.113397509933743, 0.191685695243484, 1.14629763872856, 0.407899519150338,
0.473039517599588, 0.589070818605222, 1.07992680780889, 0.0233440142449823,
0.303792812725778, 0.560066613449315, -0.401387310533095,
-0.286101749200717, -0.673299923821975, 1.66157479218356,
1.44751130500445, 0.402802424684597, 1.46472123901732, -0.397311082998703,
-0.641768892006205, 0.839031172774602, -0.603272796446055,
1.48020076738061, -0.550643848049078, 0.299513859843316,
0.739782634512702, 0.517841819522891, 0.240976915588321,
0.407841597622318, 1.04632508136641, 0.140700270204069, 0.320249766874399,
-0.0720093012575883, 0.191207842637321, 1.89043722977174,
1.44823532410469, -0.403472485541808, 1.81747058484881, 0.510261339543303,
0.874862878045841, -0.274271277102676, 1.60814942277632,
-0.625188854610541, 0.262176194843562, 0.546426093600656,
-0.0371912227266948, -0.0447861830882888, 1.43379838324576,
-0.0424331210124857, 1.86971580312266, -0.228122299652913,
0.731789463645971, 0.0910470403091081, 0.618791802670374,
0.267229848163289, 0.199251694841068, 0.246957313356364,
1.87125072361518, -1.40312565725327, -0.190900477709198,
0.257180463051856, 1.48421907338698, 0.0556569866890196,
-0.667601893503029, 0.247688572647614, 0.188977863808559,
0.91364858124609, 1.5448556730327, 0.930329981315788, 0.312119032378622,
1.15772266013046, -0.0360834735033167, 1.78212397237474,
-0.861407326257228, 0.476608931763807, 1.38366006055364,
0.803771442592559, 0.145174708243597, -1.13023561817905,
0.570130478942752, 0.862605234678655, -0.328963679935357,
0.654840713671687, 0.852222800781108, 0.304538552399032,
0.652132882236762, -0.639712677761503, 0.046078213992748,
-0.171257839519489, 0.349420496423362, 0.184018332971865,
0.149583984564103, 1.29365724620189, 0.621419992004272, -0.866656464734021,
1.09066401106555, 0.810541021179871, 1.62963106948065, 1.03406743799922,
-0.118969180099629, -0.372665472826285, 1.40028353909531,
0.381002209576151, 0.508378889882659, 0.667424165633985,
0.4092534348678, 0.813183690895774, 1.08099111588625, 0.708867018932142,
0.0693192271106869, 1.26885235182742, -0.117571823236151,
0.174801569825717, 0.584835306868775, -0.84211945742664,
1.05460061968224, 1.61507104537468, -1.62830066556388, 0.0799550676933195
), RAVLT_DELAY = c(NA, 12L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 5L, NA, NA, NA, NA, NA, NA, NA,
7L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, 12L,
NA, NA, NA, NA, 14L, NA, NA, NA, NA, NA, 6L, 7L, NA, NA,
NA, NA, 7L, 1L, 1L, 11L, 4L, 12L, 7L, 9L, 9L, 8L, 14L, 12L,
7L, 12L, 7L, 6L, 13L, 10L, 13L, NA, 11L, 14L, 8L, 0L, 11L,
15L, 13L, 6L, 9L, 9L, 12L, 5L, 14L, 15L, 12L, 4L, 15L, 8L,
15L, 14L, 5L, 12L, 8L, 9L, 9L, 13L, 6L, 4L, 10L, NA, 4L,
13L, 9L, 14L, 8L, 15L, 14L, 9L, 15L, 14L, 11L, 11L, 15L,
12L, 9L, 13L, 14L, 7L, 13L, 9L, 12L, 10L, 6L, 9L, 10L, 11L,
15L, 11L, 11L, NA, 9L, 12L, 10L, 9L, 11L, 2L, 12L, NA, 6L,
12L, 12L, 10L, 11L, 4L, 13L, 4L, 5L, 6L, 12L, 15L, 11L, 11L,
14L, 2L, 11L, 5L, 10L, 12L, 10L, NA, 12L, 8L, 12L, 12L, 8L,
7L, 14L, 14L, 7L, 8L, NA, 9L, 6L, 15L, 7L, 14L, 8L, 14L,
11L, 13L, 6L, 12L, 11L, 14L, 15L, 10L, 6L, 13L, 7L, 4L, 12L,
14L, 7L, 13L, 3L, 13L, 7L, 10L, 6L, 8L, 3L, 15L, 11L, 15L,
11L, 11L, 8L, 4L, 7L, 10L, 5L, 7L, 8L, 9L, 14L, 12L, 14L,
12L, NA, NA, 11L, 10L, 13L, 7L, 12L, 12L, 14L, 8L, 13L, 2L,
11L, 8L, 7L, 4L, 7L, 9L, 4L, 12L, 14L, 15L, 12L, 13L, 9L,
7L, 11L, 10L, 14L, 6L, 5L, 5L, 10L, 8L, 5L, 12L, 2L, 11L,
8L, NA, 9L, 7L, 8L, 12L, 10L, 7L, 13L, 15L, 9L, 6L, 4L, 10L,
8L, 13L, 10L, 9L, 7L, 7L, 15L, 8L, 12L, 9L, 10L, 12L, 6L,
13L, 8L, 11L, 9L, 1L, 13L, 12L, NA, 8L, 2L, 11L, 9L, 7L,
6L, 10L, 13L, 15L, 6L, 5L, 7L, 5L, 5L, 11L, 11L, 13L, 9L,
4L, 10L, 2L, NA, 12L, 10L, 15L, NA, 6L)), row.names = c(NA,
-324L), class = c("tbl_df", "tbl", "data.frame"))
I am using the following model in mgcv::gam:
m1 <- gam(mean_FA_scaled ~ s(Age, bs = 'ad', k = -1) + Sex +
te(Age, by = Sex, bs ='fs') +
te(RAVLT_DELAY, by = Sex, bs = 'fs') + s(RAVLT_DELAY),
data = DF,
method = 'REML', family = gaussian)
I would like to reproduce the gam plot:
But in ggplot. However, When I use predict_gam my plot is very jagged. This doesn't happen when I try to plot the smooth term effect on age.
# Plot
m1_p <- predict_gam(m1)
m1_p %>%
ggplot(aes(x = RAVLT_DELAY, y = fit)) +
geom_line(aes(color = Sex))
geom_smooth_ci(Sex, size = 1, alpha = 1) +
theme_classic(base_size = 24)
Your fit object has predictions for each age and each sex along the length of RAVLY_DELAY. With your existing code, each series tries to plot all the values from these various lines as one series, hence the jaggies.
If we tell ggplot to treat each Age,Sex combination as a different series (aka group), we get:
m1_p %>%
ggplot(aes(x = RAVLT_DELAY, y = fit)) +
geom_line(aes(color = Sex, group = interaction(Age,Sex)))
There are a lot of age groups here, which we could see separately with:
m1_p %>%
mutate(Age = round(Age, 1)) %>%
ggplot(aes(x = RAVLT_DELAY, y = fit)) +
geom_line(aes(color = Sex)) +
facet_wrap(~Age, ncol = 10)
While wrong, I liked the aesthetic qualities that arose when I grouped by Age only:
I had the same problem and I finally managed to fix it, or that's what I think. I am a beginner, not an expert, so sorry for my dummy language in this field.
This is happening because you have more variables in your model, apart from the ones you are plotting, that cause variance.
So, what you have to do is to create a new database with all the variables you are not plotting fixed somehow, the numeric you can use means, the factors, choose one, etc.
Then run the model with the function predict.gam (not "_"), that let you add a new database, that will be the one with your variables fixed.
Then predict.gam has to be turned into a database to plot it, so you bind it (the result) with your new data, and then you can use ggplot2 and geom_smooth_ci with no problem.
EXAMPLE:
model<-x~gam(s(v4, by=v3) + s(v2, by=v1)
#I want to plot the first smooth, first create the data:
new=expand.grid(v1=levels(circ$v1)[1], v2=mean(circ$v2), v3=levels(circ$v3), v4=seq(0,23, 0.1))
# see that I maintain the levels and the numbers of v3 and v4, and I fix the other ones randomly.
predict<-predict.gam(model, newdata = new, se.fit = TRUE)
mew=cbind(new,preddist2n)
mew %>%
ggplot(aes(v4, fit)) +
geom_smooth_ci(v3, ci_z = 1.96, ci_alpha = 0.05 )
If after you need to plot the second smooth, you should create another database to fix the variables out of the second smooth...
Tell me if it worked for you :)
I have the following dataset:
structure(list(Age_group = structure(c(4L, 2L, 2L, 2L, 4L, 2L,
2L, 4L, 3L, 1L, 2L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 3L, 4L, 4L, 1L,
2L, 2L, 1L, 2L, 1L, 3L, 3L, 2L, 2L, 3L, 4L, 3L, 2L, 4L, 2L, 2L,
3L, 4L, 4L, 4L, 1L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 3L, 3L, 3L,
4L, 4L, 2L, 4L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 2L,
2L, 2L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 4L, 1L, 1L,
1L, 4L, 2L, 1L, 1L, 2L, 1L, 3L, 1L, 3L, 1L, 1L, 4L, 3L, 2L, 3L,
2L, 4L, 2L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 3L, 2L, 2L, 2L, 1L,
1L, 3L, 4L, 3L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, 4L, 1L, 1L, 1L, 1L,
1L, 4L, 1L, 2L, 1L, 1L, 1L, 2L, 4L, 1L, 4L, 2L, 1L, 2L, 1L, 1L,
1L, 3L, 3L, 2L, 1L, 2L, 3L, 4L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 2L,
3L, 4L, 1L, 1L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 4L, 3L, 2L, 2L, 1L,
4L, 1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 4L, 1L,
4L, 1L, 1L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 4L, 3L, 2L,
1L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 4L, 3L, 1L, 3L, 3L, 4L, 3L, 4L,
4L, 3L, 4L, 1L, 4L, 4L, 3L, 1L, 3L, 1L, 4L, 4L, 3L, 3L, 4L, 1L,
4L, 1L, 4L, 4L, 1L, 1L, 3L, 3L, 3L, 4L, 2L, 4L, 3L, 3L, 1L, 3L,
4L, 3L, 3L, 1L, 3L, 4L, 4L, 1L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L,
3L, 3L, 4L, 3L, 1L, 1L, 3L, 4L, 3L, 3L, 3L, 1L, 2L, 2L, 1L), .Label = c("Adolescent",
"Young", "Middle", "Older"), class = "factor"), Value = c(0.344845,
0.290967, 0.246231, 0.262066, 0.214854, 0.369023, 0.244076, 0.280915,
0.30564, 0.296507, 0.323117, 0.276703, 0.225361, 0.415376, 0.26803,
0.297092, 0.39287, 0.373648, 0.231434, 0.215282, 0.402466, 0.324974,
0.234958, 0.255247, 0.247927, 0.200748, 0.194252, 0.171439, 0.276834,
0.201723, 0.309028, 0.203337, 0.433123, 0.242758, 0.30205, 0.370564,
0.267963, 0.283591, 0.336721, 0.596052, 0.244396, 0.387599, 0.347128,
0.227341, 0.432896, 0.282985, 0.284935, 0.231549, 0.524341, 0.33092,
0.236906, 0.54037, 0.378644, 0.206526, 0.0978536, 0.252193, 0.332135,
0.315254, 0.280426, 0.217306, 0.23161, 0.240526, 0.446109, 0.41504,
0.28913, 0.269704, 0.317209, 0.433796, 0.392358, 0.299284, 0.338003,
0.311145, 0.426087, 0.339064, 0.506221, 0.519461, 0.464624, 0.422413,
0.311408, 0.384049, 0.30677, 0.316512, 0.274162, 0.426846, 0.437163,
0.350454, 0.406764, 0.502995, 0.330374, 0.299821, 0.43784, 0.329894,
0.48, 0.351307, 0.355426, 0.377012, 0.349693, 0.302153, 0.152044,
0.423236, 0.569011, 0.451337, 0.402483, 0.54266, 0.368916, 0.300246,
0.328711, 0.44537, 0.338924, 0.378004, 0.484292, 0.373512, 0.655633,
0.320122, 0.376306, 0.701183, 0.42354, 0.354544, 0.366982, 0.485444,
0.2711, 0.39679, 0.499632, 0.380856, 0.364726, 0.460057, 0.254963,
0.368593, 0.210968, 0.338162, 0.338745, 0.498087, 0.366381, 0.452842,
0.225168, 0.456962, 0.414057, 0.313421, 0.434526, 0.217877, 0.338147,
0.300099, 0.516165, 0.375086, 0.460186, 0.373398, 0.309855, 0.296928,
0.301164, 0.334937, 0.320049, 0.389919, 0.282245, 0.241675, 0.332736,
0.593453, 0.201379, 0.416399, 0.371206, 0.4048, 0.414817, 0.4947,
0.593219, 0.376317, 0.318016, 0.395748, 0.352561, 0.350144, 0.543684,
0.444405, 0.336287, 0.0667227, 0.325322, 0.379068, 0.391071,
0.37585, 0.476663, 0.464114, 0.461864, 0.415283, 0.458221, 0.400008,
0.38393, 0.285078, 0.237714, 0.361987, 0.426509, 0.317339, 0.294408,
0.619243, 0.34253, 0.329934, 0.355375, 0.46283, 0.407967, 0.242693,
0.51851, 0.317998, 0.323249, 0.448899, 0.360369, 0.459298, 0.484034,
0.27694, 0.487715, 0.434585, 0.605315, 0.494404, 0.256854, 0.351891,
0.231474, 0.413763, 0.410932, 0.365665, 0.511102, 0.365337, 0.527372,
0.400869, 0.24765, 0.369774, 0.350247, 0.530748, 0.461709, 0.428728,
0.303493, 0.573203, 0.498893, 0.280537, 0.387132, 0.594904, 0.425032,
0.370547, 0.535847, 0.397682, 0.372345, 0.305478, 0.193977, 0.362042,
0.453853, 0.383845, 0.359185, 0.349271, 0.248476, 0.404103, 0.333776,
0.433578, 0.317914, 0.36847, 0.394821, 0.254976, 0.436492, 0.596257,
0.331286, 0.299685, 0.063502, 0.469766, 0.403892, 0.447094, 0.471031,
0.458835, 0.248689, 0.479741, 0.277219, 0.294354, 0.450719, 0.32319,
0.481539, 0.489301, 0.301525, 0.310258, 0.415681, 0.42438, 0.320633,
0.441025, 0.0533728, 0.252189, 0.317907, 0.401426, 0.282361,
0.501992, 0.417136, 0.273503, 0.448618, 0.459488, 0.286582, 0.336108,
0.289597, 0.42585, 0.367346, 0.525273, 0.456723, 0.411294, 0.299206,
0.31401, 0.350646, 0.389548, 0.34972, 0.357895, 0.45329, 0.452023,
0.408471, 0.428022, 0.572826, 0.340292, 0.0470799, 0.326013,
0.38702, 0.375492, 0.555507, 0.403654, 0.620388, 0.259259, 0.386142,
0.389715, 0.305789, 0.39022, 0.385585, 0.0526119, 0.379378, 0.411465,
0.376643, 0.0645194, 0.519351, 0.459602, 0.520458), CO2 = c(29L,
28L, 25L, 25L, 28L, NA, 28L, 29L, 32L, NA, 28L, NA, NA, 27L,
28L, 29L, 31L, 31L, NA, 24L, 27L, NA, 27L, 26L, NA, 29L, 24L,
25L, 26L, 29L, NA, 28L, 26L, NA, 22L, 26L, 25L, 22L, NA, 27L,
NA, 26L, 25L, 29L, 26L, NA, NA, 23L, 27L, 26L, 28L, NA, 24L,
22L, 22L, 27L, 23L, 26L, 27L, 28L, 24L, NA, 22L, 21L, NA, 27L,
24L, 24L, NA, 28L, 25L, 26L, 25L, 26L, 27L, 26L, 24L, 25L, 27L,
25L, NA, 25L, NA, 28L, NA, 30L, 27L, NA, 23L, 25L, NA, 27L, NA,
28L, 25L, 26L, NA, NA, 25L, 26L, 29L, 26L, 26L, 29L, 26L, NA,
NA, 24L, NA, NA, NA, NA, NA, 26L, NA, 22L, NA, 25L, 27L, 25L,
29L, 26L, NA, 26L, 21L, NA, 25L, 26L, 25L, 28L, 29L, 26L, NA,
27L, 23L, NA, NA, NA, 23L, NA, 25L, 28L, 28L, NA, 29L, 30L, NA,
27L, 25L, 26L, 24L, NA, NA, NA, 27L, 25L, 25L, 24L, NA, NA, NA,
NA, 25L, NA, 25L, NA, NA, 27L, NA, 26L, 21L, 25L, 26L, 25L, NA,
NA, 27L, 19L, 26L, NA, NA, NA, 24L, 26L, 23L, NA, NA, 29L, 31L,
33L, NA, NA, NA, NA, NA, 27L, 22L, 31L, 25L, 26L, NA, NA, 21L,
23L, 23L, 27L, NA, 26L, 23L, 34L, 28L, 29L, 31L, 24L, 23L, NA,
NA, 25L, 27L, 27L, 25L, 24L, NA, 24L, 26L, 22L, 26L, NA, 26L,
24L, 24L, NA, 24L, 26L, 22L, 29L, 24L, 25L, 24L, 26L, 28L, NA,
NA, 28L, 26L, 22L, NA, 27L, 21L, 27L, NA, 26L, NA, 27L, 24L,
24L, 24L, 25L, NA, 24L, 23L, 21L, 28L, 29L, 25L, 26L, 23L, NA,
26L, 22L, 29L, 23L, 28L, 23L, 26L, 27L, NA, 24L, 27L, 25L, NA,
29L, NA, NA, NA, NA, NA, 25L, 24L, 25L, 21L, NA, 23L, 23L, 21L,
26L, 28L, NA, 22L, 28L, 24L, NA, NA, 24L, 27L, 23L, 27L, 25L,
28L, 26L, 23L, 28L, NA, 26L, NA, NA, 20L, 27L, 23L, NA, NA, 23L,
NA, 21L, 21L)), row.names = c(NA, -325L), class = c("tbl_df",
"tbl", "data.frame"))
I used the following code to calculate correlations between X and Y and facet by age groups:
library(tidyverse)
library(ggpubr)
p <- ggscatter(DF, x = "CO2", y = "Value",
fill = "Age_group",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson")
facet(p, facet.by = "Age_group", scales = "free")
Which gives me the partly-desired output:
However, I would like to correct for having 4 separate correlations calculated. Is this possible as a quick option within my code? Like stat_compare_means has p.adjust.method options that I can implement?
Or do I need to calculate this separately and paste into the charts?
This is a bit hacky, but you you can manually adjust the p.value label with stats::p.adjust:
library(readr)
ggplot(DF, aes(x = CO2, y = Value, fill = Age_group, group = Age_group)) +
geom_point() +
stat_smooth(method = "lm",
color = "black") +
stat_cor(aes(label = paste0(..r.label..,
"~`,`~`p=`~",
p.adjust(readr::parse_number(..p.label..), n = 4))),
method = "pearson", label.y = 0.6) +
facet_wrap(~Age_group, scales = "free") +
theme_bw() +
theme(panel.grid = element_blank(),
legend.position = "top")
Note that you will have to manually change the n= argument.
I want to see the percentage of people based on their race from the age of 20 to 35 reported their educational status. In the next step I make shiny app. However, I got this error. ![enter image description here][1]
Please help me how to link this code to shiny app.
My question is how can I by changing sliderInput from the age of 20 to 35 , in each age knows how many people have high school, college degree, and bachelor based on their race.
Below you can see the coding of age, education and race.
# rename education
nlsy97$educstat1997<-Recode(nlsy97$R1205700, recodes="0='None';1:2='Hischool';3='college';4='bachelor';5:7='mastermore' ;-5=NA;-3=NA;-4=NA", as.factor=T)
#rename ages
nlsy97$age1<-Recode(nlsy97$R1194100, recodes="12=12;13=13;14=14;15=15;16=16=17=17;18=18;19=19;-5=NA",as.factor=F)
# recode race
nlsy97$race<-Recode(nlsy97$R1482600 , recodes="1='black';2='hispanic' ;4='white';else=NA", as.factor=T)
table(nlsy97$race)
In the next step, I have made the transitions.
myvars1<-c( "R0000100","R0536300", "R0536402","R1489700","R1489800", "gender","race","age1","age2","age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16","age17","educstat1997","educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013","educstat2015")
which(myvars1 %in% names(nlsy97))
sub<-nlsy97[,myvars1]
sub<-subset(sub,is.na(sub$age1)==F&is.na(sub$age2)==F&is.na(sub$age3)==F&is.na(sub$age4)==F&is.na(sub$age5)==F&is.na(sub$age6)==F&is.na(sub$age7)==F&is.na(sub$age8)==F&is.na(sub$age9)==F&is.na(sub$age10)==F&is.na(sub$age11)==F&is.na(sub$age12)==F&is.na(sub$age13)==F&is.na(sub$age14)==F&is.na(sub$age15)==F&is.na(sub$age16)==F&is.na(sub$age17)==F)
head(sub, n=5)
x.vertical<-reshape(sub, idvar="R0000100", varying=list(age1=c("age1", "age2", "age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16"), age2=c("age2", "age3","age4","age5","age6","age7","age8","age9","age10","age11","age12","age13","age14","age15","age16","age17"),educstat1=c("educstat1997","educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013"),educstat2=c("educstat1998","educstat1999","educstat2000","educstat2001","educstat2002","educstat2003","educstat2004","educstat2005","educstat2006","educstat2007","educstat2008","educstat2009","educstat2010","educstat2011","educstat2013","educstat2015")), times=1:16, direction="long", v.names=c( "agestart", "ageend","educstat1","educstat2") )
x.vertical<-x.vertical[order(x.vertical$R0000100, x.vertical$time),]
The last step is making shiny dashboard.
# STEP 1: copy an example Shiny app into app.R (or ui.R and server.R)
library(shiny)
library(tidyverse)
library(gapminder)
# User Interface
ui <- basicPage(
# STEP 3: Create an input widget here (e.g. sliderInput)
sliderInput("age", "Select Age:", animate = TRUE, # STEP 4: add animate = TRUE here
min = 20, max = 35, value = 25,
step = 1,
sep="" # so thousands are not separated with a comma (without this defaults to 1,952 - 2,007)
), #note this comma here - different to our usual R code
tabPanel("Plot", plotOutput(outputId = "myplot"))
)
x.vertical2<-x.vertical[complete.cases(x.vertical[, c("race","educstat2")]),]
sums<-as.data.frame(xtabs(~educstat2+race, x.vertical2))
# Server
server <- function(input, output) {
output$myplot <- renderPlot({
# STEP 2: copy your plot code here
x.vertical2 %>%
filter(agestart==input$agestart)
mutate(educstat2 = fct_relevel(educstat2,
"None", "Hischool", "c",
"college")) %>%
filter(is.na(educstat2)==F)%>%
group_by(race, educstat2)%>%
summarise(n = n())%>%
mutate(freq= n /sum(n))%>%
ggplot(aes(x = factor(educstat2),y=freq, fill= race)) +
geom_bar( stat="identity",position = "dodge") +theme_bw()
})
}
shinyApp(ui, server)
dput(head(nlsy97, 10))
structure(list(R0000100 = 1:10, R0536300 = c(2L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L), R0536401 = c(9L, 7L, 9L, 2L, 10L, 1L,
4L, 6L, 10L, 3L), R0536402 = c(1981L, 1982L, 1983L, 1981L, 1982L,
1982L, 1983L, 1981L, 1982L, 1984L), R1194100 = c(15L, 14L, 13L,
15L, 15L, 15L, 14L, 16L, 15L, 14L), R1205700 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), R1235800 = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), R1482600 = c(4L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L), R2553500 = c(17L, 16L, 15L, 17L, 16L, 16L, 15L,
17L, 16L, 14L), R2564101 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), R3876300 = c(18L, 17L, 16L, 18L, 17L, 17L, 16L, 18L,
17L, 15L), R3885701 = c(2L, 0L, 0L, 2L, 0L, 0L, 0L, 2L, 0L, 0L
), R5453700 = c(19L, 18L, 17L, 19L, 18L, 19L, 17L, 19L, 18L,
16L), R5464901 = c(2L, 2L, 0L, 2L, 2L, 2L, 0L, 2L, 2L, 0L), R7216000 = c(20L,
19L, 18L, 20L, 19L, 20L, 18L, 20L, 19L, 17L), R7228601 = c(2L,
2L, 2L, 2L, 2L, 2L, 0L, 2L, 2L, 0L), S1531400 = c(21L, 20L, 19L,
21L, 20L, 20L, 19L, 21L, 20L, 18L), S1542501 = c(2L, 2L, 2L,
2L, 2L, 2L, 0L, 2L, 2L, 2L), S2001000 = c(22L, 21L, 20L, 22L,
21L, 22L, -5L, 22L, 21L, 19L), S2012301 = c(4L, 2L, 3L, 2L, 2L,
2L, -5L, 4L, 2L, 2L), S3801100 = c(23L, 22L, 21L, 23L, 22L, 23L,
-5L, 23L, 22L, 20L), S3813801 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
4L, 2L, 2L), S5401000 = c(24L, 23L, -5L, 24L, 23L, 24L, 22L,
24L, 23L, 21L), S5413400 = c(4L, 2L, -5L, 2L, 2L, 2L, 0L, 4L,
4L, 2L), S7501200 = c(25L, -5L, -5L, 25L, 24L, 25L, 23L, 25L,
24L, 22L), S7514300 = c(4L, -5L, -5L, 2L, 2L, 2L, 0L, 4L, 4L,
4L), T0008500 = c(26L, -5L, -5L, 26L, 25L, 25L, -5L, 26L, 25L,
23L), T0014700 = c(4L, -5L, -5L, 2L, 2L, 2L, -5L, 4L, 4L, 4L),
T2011100 = c(27L, 26L, -5L, 27L, 26L, 26L, -5L, -5L, 26L,
24L), T2016800 = c(4L, 2L, -5L, 2L, 2L, 2L, -5L, -5L, 4L,
4L), T3601500 = c(28L, 27L, 26L, 28L, 26L, 27L, 26L, 28L,
27L, 25L), T3607100 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L, 5L, 4L,
4L), T5201400 = c(29L, 28L, -5L, 29L, 28L, 28L, 27L, -5L,
28L, -5L), T5207400 = c(4L, 2L, -5L, 2L, 2L, 2L, 1L, -5L,
4L, -5L), T5207500 = c(4L, 2L, -5L, 2L, 2L, 2L, 1L, -5L,
4L, -5L), T6651300 = c(29L, 29L, 28L, 30L, 29L, 29L, 28L,
30L, 29L, -5L), T6657200 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L,
5L, 5L, -5L), T6657300 = c(4L, 2L, 3L, 2L, 2L, 2L, 1L, 5L,
5L, -5L), T8123600 = c(32L, 31L, 30L, 32L, 31L, 31L, -5L,
-5L, 31L, -5L), T8129600 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
-5L, 5L, -5L), T8129700 = c(4L, 2L, 3L, 2L, 2L, 2L, -5L,
-5L, 5L, -5L), U0001800 = c(34L, 33L, -5L, 34L, 33L, 34L,
32L, 34L, 33L, -5L), U0009400 = c(4L, 2L, -5L, 2L, 2L, 2L,
1L, 5L, 5L, -5L), U1838500 = c(-5L, 35L, 34L, 36L, 35L, 35L,
-5L, -5L, 35L, 34L), weight = c(607550L, 0L, 0L, 261156L,
450091L, 367309L, 0L, 0L, 618091L, 0L)), row.names = c(NA,
10L), class = "data.frame")
How can you use rbind in a for loop that runs through a list of dataframes? I tried to follow Looping through list of data frames in R but receive the following:
Error in apply(dataFramesList, 2, function(x) { :
dim(X) must have a positive length
I have two dataframes, dfTraining and dfAccuracy (code to reproduce dataframes is below), and need to add a row for any of the crop types missing from either of two columns, CROP or CROP_LABEL. I believe my problem is in my last line of code.
My code block is:
dataFramesList <- list(dfTraining, dfAccuracy)
apply(dataFramesList, 2, function(x){
cropNumbers <- seq(1,23, by = 1)
cropNumbers <- cropNumbers[-c(3)]
cropNumbers <- append(cropNumbers, 34)
listofCROPandCROP_LABELColumns <- list(dataFrameList$CROP, dataFrameList$CROP_LABEL)
missingCROP <- NULL
for (i in listofCROPandCROP_LABELColumns){
for (j in cropNumbers){
if (!j %in% i){
# If crop number is missing from CROP_LABEL, add missingCROP observation (row)
# Make row for missing crop type
missingCrop <- list(FREQUENCY = 0, AA = 1, CROP = j, CROP_LABEL = j, ACRES = 0)
dataFrameList <- rbind(dataFrameList, missingCrop)
}
}
}
})
My dfAccuracy dataframe:
structure(list(FREQUENCY = c(4L, 2L, 1L, 1L, 1L, 1L, 65L, 1L,
1L, 4L, 1L, 5L, 5L, 2L, 4L, 1L, 1L, 1L, 1L, 4L, 9L, 2L, 1L, 1L,
1L, 2L, 4L, 1L, 2L, 18L, 1L, 10L, 3L, 1L, 7L, 1L, 1L, 1L, 3L,
1L, 7L, 1L), AA = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
CROP = c(1L, 4L, 12L, 13L, 14L, 18L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 19L,
19L, 21L, 21L, 21L, 21L), CROP_LABEL = c(1L, 4L, 14L, 13L,
12L, 18L, 1L, 4L, 5L, 6L, 18L, 1L, 4L, 6L, 14L, 18L, 12L,
14L, 18L, 1L, 6L, 14L, 18L, 18L, 4L, 6L, 13L, 21L, 12L, 14L,
18L, 1L, 6L, 14L, 18L, 21L, 1L, 19L, 6L, 13L, 21L, 34L),
ACRES = c(331.737184484, 193.772138572, 26.48543619, 73.2696289437,
112.470306056, 66.6556450342, 3905.71121736, 24.9581079934,
39.9287379709, 259.662359273, 85.2786247851, 306.051491303,
368.342995232, 154.82030835, 265.754349805, 70.3722566979,
35.4066607701, 139.336463432, 58.4307705147, 251.070357093,
471.031628349, 150.965736858, 28.2780117926, 35.3426930108,
34.5730542194, 67.7383953308, 144.442123948, 33.2746560126,
69.4072817311, 1219.65459596, 92.4840910734, 582.983473317,
191.957841327, 35.708775262, 319.638682538, 60.6889287642,
82.6244195055, 36.2898952104, 267.422844756, 72.8352758659,
489.746546145, 65.5392893502)), row.names = c(25L, 26L, 27L,
29L, 30L, 31L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L,
83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L
), class = "data.frame")
and my dfTraining dataframe is:
structure(list(FREQUENCY = c(7L, 1L, 1L, 4L, 2L, 6L, 1L, 107L,
1L, 21L, 1L, 1L, 1L, 2L, 1L, 19L, 3L, 1L, 1L, 12L, 1L, 2L, 32L,
2L, 2L, 29L, 2L, 18L, 1L), AA = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), CROP = c(1L, 1L, 4L, 4L, 12L, 13L, 21L,
1L, 1L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 12L, 13L, 14L, 14L,
14L, 18L, 18L, 18L, 19L, 21L, 34L), CROP_LABEL = c(1L, 4L, 1L,
4L, 12L, 13L, 21L, 1L, 6L, 4L, 6L, 1L, 5L, 14L, 18L, 6L, 14L,
1L, 12L, 13L, 1L, 6L, 14L, 6L, 14L, 18L, 19L, 21L, 34L), ACRES = c(624.940370218,
26.9188766351, 37.8773839813, 291.79294767, 140.949264214, 391.571023675,
44.5217011939, 6806.02216989, 72.7500299887, 1676.12121152, 14.8739557721,
67.0700291739, 59.7438207953, 82.6713019474, 75.62666152, 1370.78710769,
145.215281276, 41.7380537313, 66.5236760194, 679.91208779, 70.9661875374,
38.8514254734, 1749.63365551, 109.917242057, 79.7758083723, 1660.85759895,
96.8771921798, 1428.71888481, 69.473161379)), row.names = c(18L,
19L, 20L, 21L, 22L, 23L, 24L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L), class = "data.frame")
I am new to R and trying to figure out a way to plot means for individual samples as well as group means with ggplot.
I am following this articles on R-bloggers (last paragraph):
https://www.r-bloggers.com/plotting-individual-observations-and-group-means-with-ggplot2/
This is my code:
gd <- meanplot1 %>%
group_by(treatment, value) %>%
summarise(measurement = mean(measurement))
ggplot(meanplot1, aes(x=value, y=measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
geom_line(data=gd, size=3, alpha=0.9) +
theme_bw()
Whilst the sample means are being shown, the group means arenĀ“t. I get the error
geom_path: Each group consists of only one observation. Do you need
to adjust the group aesthetic?
Upon adding group=1, I get a weirdly mixed category mean, but not what I am looking for..
I scrolled through a lot of articles already, but couldnt find an answer - I would be so happy if somebody could help me out here!! :)
My data (meanplot1) is formatted like this:
treatment sample value measurement
1 control, control 1, initial, 20,
2 control, control 1, 26, NA,
3 control, control 1, 26', 28,
12 control, control 2, initial, 22,
13 control control 2, 26, NA,
14 control control 2, 26', 36,
15 control control 2, 28, 45,
67 stressed, stress 1, initial, 37,
68 stressed, stress 1, 26, NA,
69 stressed, stress 1, 26', 17,
78 stressed, stress 2, initial, 36,
79 stressed, stress 2, 26, NA,
80 stressed, stress 2, 26', 25,
I am hoping to see 6 lines, one mean for stress 1, stress 2, control 1 and control 2, and one mean for all treatment=control, and one for all treatment=stressed
output dput(gd):
structure(list(treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("control", "stressed"), class = "factor"), value = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L), .Label = c("26", "26'", "28", "28'",
"30", "30'", "32", "32'", "34", "34'", "initial"), class = "factor"),
measurement = c(NA, 32.3333333333333, 39.5, 30.3333333333333,
31.8333333333333, 31.8333333333333, NA, 36, 34.6666666666667,
36, 24.6666666666667, NA, 25.3333333333333, 33.3333333333333,
32, 50.1666666666667, 39.1666666666667, NA, 33.5, 24.3333333333333,
27.3333333333333, 36)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -22L), vars = list(treatment), drop = TRUE, .Names = c("treatment",
"value", "measurement"))
output dput(meanplot1):
structure(list(treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control",
"stressed"), class = "factor"), sample = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("control 1",
"control 2", "control 3", "control 4", "control 5", "control 6",
"stress 1", "stress 2", "stress 3", "stress 4", "stress 5", "stress 6"
), class = "factor"), value = structure(c(11L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("26", "26'",
"28", "28'", "30", "30'", "32", "32'", "34", "34'", "initial"
), class = "factor"), measurement = c(20L, NA, 28L, 18L, 17L,
19L, 34L, NA, 23L, 29L, 27L, 22L, NA, 36L, 45L, 31L, 40L, 44L,
NA, 49L, 40L, 39L, 32L, NA, 35L, 57L, 30L, 37L, 29L, NA, 44L,
37L, 46L, 20L, NA, 39L, 27L, 30L, 40L, 25L, NA, 29L, 50L, 30L,
26L, NA, 28L, 45L, 47L, 27L, 35L, NA, 24L, 22L, 35L, 28L, NA,
28L, 45L, 27L, 28L, 24L, NA, 47L, 30L, 39L, 37L, NA, 17L, 29L,
29L, 31L, 29L, NA, 37L, 21L, 27L, 36L, NA, 25L, 41L, 51L, 66L,
50L, NA, 33L, 25L, 22L, 36L, NA, 33L, 45L, 26L, 72L, 59L, NA,
33L, 26L, 25L, 33L, NA, 21L, 33L, 25L, 29L, 21L, NA, 26L, 20L,
16L, 22L, NA, 30L, 27L, 28L, 57L, 41L, NA, 28L, 23L, 17L, 52L,
NA, 26L, 25L, 33L, 46L, 35L, NA, 44L, 31L, 57L)), .Names = c("treatment",
"sample", "value", "measurement"), class = "data.frame", row.names = c(NA,
-132L))
I suppose you are aiming to plot the treatment means.
By default, since you are using a categorical x-axis, the grouping is set to the interaction between x and color. You only want to group by treatment, however. So we'll add the correct grouping to the call.
ggplot(meanplot1, aes(x = value, y = measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
geom_line(aes(group = treatment), gd, size=3, alpha=0.9) +
theme_bw()
Also note that
ggplot(meanplot1, aes(x=value, y=measurement, color=treatment)) +
geom_line(aes(group=sample), alpha=0.3) +
stat_summary(aes(group = treatment), fun.y = mean, geom = 'line', size=3, alpha=0.9) +
theme_bw()
Gives the same plot, without the interruption.