Related
I have data that i want to show in barplot in a way that i could'nt figure out how. Hope you can help me with this!
My table consists of 4 columns: cluster (0:6), IgG_Status (mild_high, mild_low,Severe_High), patient (1-16) and value (normalized value per each cluster). These are the lines of code i'm using now to create a barplot of the sum of values for each cluster, divided to IgG_Status (dodged style).
ggplot(mat, aes(x= cluster, fill= IgG_status, group=IgG_status)) + geom_bar(aes(weight = normalizedppstatus), position = "dodge")
I want to add to this graph horizontal lines that describe each patient contribution to the each bar. i managed to do it using facet_grid but it changed the whole style of the figure so it's not good for me. code for using facet_grid below:
ggplot(mat, aes(x= IgG_status , y= normalizedppstatus, fill= IgG_status)) + geom_col(color = "black") + facet_grid(~cluster) +
theme(axis.text.x = element_text(angle=30))
mat structure:
structure(list(cluster = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("0", "1", "2", "3", "4"), class = "factor"), IgG_status = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("mild_high", "mild_low", "Severe_High" ), class = "factor"), patient = structure(c(5L, 11L, 16L, 4L, 6L, 7L, 8L, 12L, 2L, 3L, 9L, 10L, 13L, 14L, 17L, 5L, 11L, 16L, 4L, 6L, 7L, 8L, 12L, 2L, 3L, 9L, 10L, 13L, 14L, 17L, 5L, 11L, 16L, 4L, 6L, 7L, 8L, 12L, 2L, 3L, 9L, 10L, 13L, 14L, 17L, 5L, 11L, 16L, 4L, 6L, 7L, 8L, 12L, 2L, 3L, 9L, 10L, 13L, 14L, 17L, 5L, 16L, 4L, 6L, 7L, 8L, 2L, 3L, 9L, 13L, 14L, 17L), .Label = c("Contact3", "CoV1", "CoV2", "CoV3", "CoV4", "CoV5", "CoV6", "CoV7", "CoV8", "CoV9", "CoV10", "CoV11", "CoV12", "CoV13", "CoV14", "CoV15", "CoV16"), class = "factor"), Freq = c(176L, 164L, 345L, 505L, 277L, 1421L, 679L, 104L, 235L, 933L, 692L, 682L, 133L, 1278L, 330L, 420L, 166L, 288L, 231L, 701L, 1105L, 431L, 506L, 180L, 814L, 410L, 363L, 283L, 182L, 268L, 657L, 155L, 82L, 872L, 385L, 277L, 23L, 298L, 87L, 128L, 469L, 640L, 197L, 148L, 73L, 688L, 220L, 51L, 263L, 456L, 312L, 693L, 303L, 120L, 400L, 373L, 35L, 62L, 170L, 166L, 7L, 530L, 5L, 1L, 80L, 876L, 19L, 7L, 2L, 4L, 15L, 153L), percentperstatus = c(0.0445682451253482, 0.041529501139529, 0.0873638895923018, 0.0467419474268789, 0.0256386523509811, 0.131525360977416, 0.0628470936690115, 0.00962606442058497, 0.0233807581335191, 0.0928265844194607, 0.0688488707591284, 0.0678539448811064, 0.0132325141776938, 0.127151527211223, 0.0328325539747289, 0.106356039503672, 0.0420359584704989, 0.0729298556596607, 0.021380970011107, 0.0648833765272121, 0.102276934468715, 0.0398926323583858, 0.0468345057386153, 0.0179086658043976, 0.0809869664709979, 0.0407919609989056, 0.0361158093722018, 0.0281564023480251, 0.018107650980002, 0.0266640135309919, 0.166371233223601, 0.0392504431501646, 0.0207647505697645, 0.0807108478341355, 0.0356349500185117, 0.0256386523509811, 0.00212884116993706, 0.0275823768974454, 0.00865585513879216, 0.0127350512386827, 0.0466620236792359, 0.0636752561934136, 0.0196000397970351, 0.0147249029947269, 0.00726295890956124, 0.174221321853634, 0.0557103064066852, 0.0129146619397316, 0.0243428359866716, 0.0422065901517956, 0.0288781932617549, 0.064142910033321, 0.0280451684561274, 0.011939110536265, 0.0397970351208835, 0.0371107352502239, 0.00348224057307731, 0.00616854044373694, 0.0169137399263755, 0.0165157695751667, 0.00177260065839453, 0.134211192707014, 0.00046279155868197, 9.25583117363939e-05, 0.00740466493891151, 0.0810810810810811, 0.00189035916824197, 0.000696448114615461, 0.000198985175604417, 0.000397970351208835, 0.00149238881703313, 0.0152223659337379), normalizedppstatus = c(0.0508788793021933, 0.0474098648043165, 0.0997341668139585, 0.0533603666644943, 0.0292689535961681, 0.150148675307419, 0.0717459187429537, 0.0109890656101137, 0.0266913531758627, 0.105970351119489, 0.0785975165859447, 0.077461714323142, 0.0151061700952755, 0.145155529186181, 0.0374814746724881, 0.142698157688979, 0.0563997480389775, 0.0978501652724429, 0.0286868996290798, 0.0870541845886795, 0.137225212511399, 0.0535240421650797, 0.0628379706160796, 0.0240281006032283, 0.10866041050571, 0.0547306735962423, 0.0484566695498438, 0.0377775137261868, 0.0242950794988198, 0.0357751720092511, 0.291165142146192, 0.0686919285124198, 0.0363402460517317, 0.141251495387918, 0.0623644790416841, 0.0448700277780429, 0.00372567017651619, 0.048271726634862, 0.0151485520846987, 0.0222875249062233, 0.0816628842267089, 0.111437624531117, 0.0343018938009843, 0.0257699506728207, 0.0127108540480805, 0.309787838022592, 0.0990600644839683, 0.0229639240394654, 0.0432846821009943, 0.0750487263804311, 0.0513491285760844, 0.114054314433418, 0.0498679037133128, 0.0212292686198854, 0.0707642287329512, 0.065987643293477, 0.00619187001413323, 0.0109684554536074, 0.0300747972115043, 0.0293671549241748, 0.00723736731183195, 0.547972096467276, 0.00188953585407682, 0.000377907170815364, 0.0302325736652291, 0.331046681634259, 0.00771816460016861, 0.00284353432637791, 0.000812438378965117, 0.00162487675793023, 0.00609328784223838, 0.0621515359908314)), row.names = c(5L, 11L, 16L, 21L, 23L, 24L, 25L, 29L, 36L, 37L, 43L, 44L, 47L, 48L, 51L, 56L, 62L, 67L, 72L, 74L, 75L, 76L, 80L, 87L, 88L, 94L, 95L, 98L, 99L, 102L, 107L, 113L, 118L, 123L, 125L, 126L, 127L, 131L, 138L, 139L, 145L, 146L, 149L, 150L, 153L, 158L, 164L, 169L, 174L, 176L, 177L, 178L, 182L, 189L, 190L, 196L, 197L, 200L, 201L, 204L, 209L, 220L, 225L, 227L, 228L, 229L, 240L, 241L, 247L, 251L, 252L, 255L), class = "data.frame")
Thanks!
library(tidyverse)
ggplot(mat, aes(x= cluster, fill= IgG_status, group=IgG_status)) +
geom_col(aes(y = normalizedppstatus), color = "white", position = "dodge")
By using geom_col, ggplot plots each patient as a bar, rather than calculating one grouped total to display with geom_bar. Adding a white border with the color aesthetic seems like the simplest way to separate the patients visually.
My dataset looks like this:
> head(GLM_df)
hour Feeding Foraging Standing ID Area Feeding_Foraging
1 0 0.119 0.789 0.0339 41361 Seronera 0.908
2 1 0.0920 0.819 0.0339 41361 Seronera 0.911
3 2 0.0847 0.824 0.0678 41361 Seronera 0.909
4 3 0.233 0.632 0.132 41361 Seronera 0.866
5 4 0.254 0.597 0.124 41361 Seronera 0.852
6 5 0.245 0.664 0.0832 41361 Seronera 0.909
And I'm trying to run a glmer() model as such to verify an interaction, the error associated is found below:
> m <- glmer(cbind(Feeding_Foraging,Standing) ~ poly(hour,2)*Area+(1|ID) , data=GLM_df , family=binomial)
Error in length(value <- as.numeric(value)) == 1L :
(maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate
In addition: Warning message:
In eval(family$initialize, rho) : non-integer counts in a binomial glm!
I apologize if I'm not asking on the right forum, but does somebody know what is the cause of this error? I've been using this dataset to run other glmer() models not having such issue, so I hope somebody can help me.
I can provide a dput() sample of the data below:
> dput(GLM_df)
structure(list(hour = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L), Feeding = c(0.118579234700529,
0.0919594065024507, 0.0846994533575204, 0.233092895639896, 0.254098360072561,
0.244523639258233, 0.238513660654777, 0.245289616923379, 0.211748633393801,
0.253514225911475, 0.275555554923133, 0.222477230819087, 0.232641165221989,
0.238368461591879, 0.30265937999754, 0.433661201190504, 0.178745053292422,
0.12125395428024, 0.10605844594333, 0.163238946470857, 0.174611180767811,
0.22483854891269, 0.177868852050793, 0.183918813004901, 0.241998438164344,
0.161698956409812, 0.158105646267371, 0.36138433432542, 0.468670308578279,
0.333151183206247, 0.32072859671381, 0.301413227120555, 0.295571885509692,
0.313952640445209, 0.343315117609149, 0.309435336266141, 0.345573769698683,
0.307176684176607, 0.322987248803344, 0.303788706042306, 0.266520946564997,
0.179710144515087, 0.151781420416677, 0.272293057460473, 0.384777516681307,
0.358157688483229, 0.370418942683556, 0.295571885509692, 0.194038747691774,
0.0980730512560762, 0.104719324151116, 0.287394007254483, 0.360255008280653,
0.356867030146353, 0.303788706042306, 0.297908422154037, 0.295883423728938,
0.309435336266141, 0.335409835295781, 0.294754097684171, 0.329763205071946,
0.311693988355675, 0.252969034027794, 0.320554854245385, 0.269908924699298,
0.114670029160951, 0.145400728263743, 0.208925318281884, 0.252065573191981,
0.343637782193368, 0.234552332374672, 0.25071038193826, 0.139938227286338,
0.127049180036281, 0.0779234970889187, 0.271038250744065, 0.37923497180722,
0.365027321566604, 0.313661201465914, 0.342076501947147, 0.292896174191167,
0.283060108639971, 0.271038250744065, 0.238251365573412, 0.196721311023918,
0.191256830162143, 0.16601092858074, 0.0626775954845651, 0.134426229199678,
0.105704917790185, 0.11195058182907, 0.140192198660723, 0.14806719253611,
0.21262483463543, 0.226733921295516, 0.21891551021636, 0.120612021581109,
0.140939890386914, 0.0931693986932724, 0.2142076497816, 0.228415300022216,
0.194244079699913, 0.181821493207477, 0.186922931547631, 0.153588342088304,
0.15187488188245, 0.135519125372033, 0.171657558804575, 0.144302772386887,
0.113322027250751, 0.0931693986932724, 0.0657666343717217, 0.126775955993192,
0.0912147959234835, 0.0966201171633936, 0.143219075677262, 0.127049180036281,
0.145683059774935, 0.171657558804575, 0.140731399424803, 0.238570126957016,
0.109339294334254, 0.14013909555517, 0.190856101565613, 0.175240248325904,
0.217486338298665, 0.251366119641673, 0.295081966535877, 0.278688523950551,
0.268852458399355, 0.349726775153633, 0.328961747878886, 0.351912567498343,
0.284153004812326, 0.220218578729553, 0.179437360446302, 0.283460837236502,
0.156693988711413, 0.114187411193102, 0.207187893597627, 0.198761383878981,
0.22134790477432, 0.199890709923748, 0.218466176246294), Foraging = c(0.78939890529209,
0.81876138245603, 0.824408012679865, 0.632422585069486, 0.59741347768171,
0.66404371432296, 0.599672129771244, 0.632422585069486, 0.629034606935185,
0.575956282831139, 0.525136610816626, 0.588378869323575, 0.577085608875906,
0.574826956786372, 0.482222221115483, 0.336377829048438, 0.677595626860163,
0.811985426187429, 0.797304187605459, 0.744225863501412, 0.727285972829908,
0.702440799845036, 0.721639342606074, 0.744225863501412, 0.593480307663729,
0.692276865442133, 0.705828777979336, 0.29136611954987, 0.178520386307389,
0.320647930567756, 0.343470886718772, 0.422913132626516, 0.393706424572198,
0.350480496651808, 0.350091073877751, 0.339966081752254, 0.289107467460336,
0.294403617187519, 0.226644054501503, 0.185602280400827, 0.465282330443979,
0.671948996636328, 0.677595626860163, 0.525136610816626, 0.359125682235886,
0.398652093802729, 0.407725644438271, 0.496903459697453, 0.519489980592792,
0.647103823651456, 0.618870672532282, 0.247583017506598, 0.159987856341983,
0.170810564270999, 0.290898812221001, 0.315807961804469, 0.2952380945605,
0.274543055710583, 0.21405861848537, 0.274947456283643, 0.241067674940635,
0.254098360072561, 0.192437158028286, 0.1589743586095, 0.334732239668921,
0.591766847457876, 0.587638966052866, 0.500018841889913, 0.436807180886641,
0.401884302827407, 0.44922080447396, 0.438017173077463, 0.748633878063245,
0.820765025438681, 0.896174861331183, 0.336612021085371, 0.116546447819948,
0.204633879311769, 0.282720933965792, 0.313952640445209, 0.293235348865346,
0.217959926640019, 0.244687309699503, 0.267759562227, 0.256357012162095,
0.20666666619235, 0.110109289364776, 0.0532396563961557, 0.284590163281268,
0.810928959887485, 0.790163932612739, 0.619999998577049, 0.523384208333367,
0.47682655223493, 0.493009231956877, 0.637874503906291, 0.632422585069486,
0.726775954616143, 0.817486336921616, 0.340983605774792, 0.142779078516963,
0.193598750531475, 0.256357012162095, 0.254682494233647, 0.206783493024567,
0.19198542761038, 0.221428570920375, 0.213793102957603, 0.203278688058049,
0.194157208465701, 0.112932604476694, 0.0948633877604228, 0.380582877086458,
0.787978140268028, 0.810928959887485, 0.719125681409657, 0.625136610587118,
0.562404370293935, 0.366120217738959, 0.535519124454, 0.655009105964824,
0.782513659406253, 0.757377047442085, 0.18996877395901, 0.158105646267371,
0.182574377237322, 0.24367381196702, 0.248087431124608, 0.269869982421893,
0.283586317908142, 0.23846153791425, 0.29272131080359, 0.220218578729553,
0.13834244048395, 0.101639344029024, 0.0846994533575204, 0.23846153791425,
0.745355189546179, 0.686338796239004, 0.605318759995079, 0.500936767000192,
0.414375787195254, 0.393442622047837, 0.509364988467295), Standing = c(0.0338797813430082,
0.0338797813430082, 0.0677595626860163, 0.131754705222809, 0.124225864924363,
0.0831594632964746, 0.162622950446439, 0.101639344029024, 0.112932604476694,
0.0931693986932724, 0.0975737702678635, 0.101639344029024, 0.12046144477514,
0.128743169103431, 0.137059115433078, 0.14761904728025, 0.0677595626860163,
0.0338797813430082, 0.0338797813430082, 0.0639951425367932, 0.0423497266787602,
0.0677595626860163, 0.107285974252859, 0.054207650148813, 0.0790528231336857,
0.0609836064174147, 0.0451730417906775, 0.195749847759603, 0.229629629102611,
0.225865208953388, 0.198259461192418, 0.160928961379289, 0.183201780595526,
0.203278688058049, 0.149321999252517, 0.198605614769358, 0.212958625584623,
0.281462798849606, 0.306128024277895, 0.398379497860889, 0.111677797760286,
0.0677595626860163, 0.0547288775540901, 0.0931693986932724, 0.145830363172079,
0.153350589236774, 0.105403764178248, 0.149071037909236, 0.152459016043537,
0.135519125372033, 0.119882303213721, 0.254098360072561, 0.296740153831865,
0.255227686117328, 0.178182553729895, 0.206102003169966, 0.186338797386545,
0.175045536938875, 0.264028640811029, 0.235903662684649, 0.235855400887864,
0.189259468191977, 0.333151183206247, 0.403169397981797, 0.203278688058049,
0.0884638735067435, 0.116461748366591, 0.127819175066803, 0.183918813004901,
0.155538996165628, 0.179710144515087, 0.15951730382333, 0.190573770054421,
0.167140254625507, 0.11067395238716, 0.392349725875482, 0.526775955075159,
0.469945354112694, 0.421857922529069, 0.365901638504488, 0.43278688425262,
0.506010927800412, 0.515846993351608, 0.493989069904506, 0.555191255556392,
0.608743168001792, 0.768306009165636, 0.947540981431873, 0.590163933071755,
0.169398906715041, 0.163752276491206, 0.297658078942143, 0.42228727459678,
0.412398717726961, 0.432306009936784, 0.283743168747693, 0.300400727908006,
0.183201780595526, 0.132573057429162, 0.444808742148526, 0.6426229493448,
0.637158468483024, 0.575956282831139, 0.58688524455469, 0.657923495757771,
0.690710380928424, 0.664480872791902, 0.633879779965959, 0.690710380928424,
0.731147539305563, 0.828415298645167, 0.933333331191257, 0.504918031628057,
0.161580495635885, 0.141411261257773, 0.231511839177222, 0.389617485444594,
0.325245900892878, 0.467759561767984, 0.370341058128744, 0.244523639258233,
0.255094824229708, 0.184927139830586, 0.643715845517155, 0.774863386199767,
0.676502730687808, 0.544262293832841, 0.456830600044432, 0.468852457940339,
0.48415300435331, 0.450273223010302, 0.43497267659733, 0.449180326837947,
0.608743168001792, 0.724590162271432, 0.816393440749261, 0.525683058902804,
0.196825396373666, 0.2766848809679, 0.298142075818472, 0.393247462017059,
0.468475597191251, 0.426885244921903, 0.380496005852245), ID = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("41361",
"41365", "41366", "41366bis", "41367", "41368"), class = "factor"),
Area = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Loliondo",
"Seronera"), class = "factor"), Feeding_Foraging = c(0.907978139992619,
0.910720788958481, 0.909107466037385, 0.865515480709382,
0.851511837754272, 0.908567353581193, 0.838185790426022,
0.877712201992865, 0.840783240328986, 0.829470508742613,
0.800692165739759, 0.810856100142662, 0.809726774097895,
0.81319541837825, 0.784881601113022, 0.770039030238942, 0.856340680152585,
0.933239380467668, 0.903362633548788, 0.90746480997227, 0.901897153597719,
0.927279348757726, 0.899508194656866, 0.928144676506314,
0.835478745828073, 0.853975821851945, 0.863934424246708,
0.65275045387529, 0.647190694885669, 0.653799113774003, 0.664199483432583,
0.724326359747071, 0.68927831008189, 0.664433137097017, 0.6934061914869,
0.649401418018395, 0.634681237159019, 0.601580301364126,
0.549631303304847, 0.489390986443134, 0.731803277008976,
0.851659141151415, 0.82937704727684, 0.797429668277099, 0.743903198917193,
0.756809782285958, 0.778144587121826, 0.792475345207145,
0.713528728284566, 0.745176874907532, 0.723589996683398,
0.534977024761081, 0.520242864622636, 0.527677594417352,
0.594687518263307, 0.613716383958506, 0.591121518289437,
0.583978391976724, 0.54946845378115, 0.569701553967814, 0.570830880012581,
0.565792348428236, 0.44540619205608, 0.479529212854885, 0.604641164368219,
0.706436876618826, 0.733039694316609, 0.708944160171797,
0.688872754078621, 0.745522085020775, 0.683773136848632,
0.688727555015723, 0.888572105349583, 0.947814205474962,
0.974098358420102, 0.607650271829437, 0.495781419627168,
0.569661200878373, 0.596382135431706, 0.656029142392356,
0.586131523056514, 0.501020035279991, 0.515725560443569,
0.506010927800412, 0.453078323186013, 0.397923496354493,
0.276120217945516, 0.115917251880721, 0.419016392480946,
0.916633877677671, 0.902114514441809, 0.760192197237773,
0.671451400869477, 0.68945138687036, 0.719743153252393, 0.856790014122652,
0.753034606650595, 0.867715845003057, 0.910655735614888,
0.555191255556392, 0.371194378539179, 0.387842830231389,
0.438178505369572, 0.441605425781279, 0.360371835112871,
0.34386030949283, 0.356947696292407, 0.385450661762178, 0.347581460444935,
0.307479235716452, 0.206102003169966, 0.160630022132145,
0.50735883307965, 0.879192936191512, 0.907549077050879, 0.862344757086919,
0.752185790623399, 0.70808743006887, 0.537777776543534, 0.676250523878803,
0.89357923292184, 0.891852953740506, 0.897516142997256, 0.380824875524623,
0.333345894593276, 0.400060715535987, 0.495039931608694,
0.543169397660485, 0.548558506372443, 0.552438776307497,
0.588188313067882, 0.621683058682476, 0.572131146227896,
0.422495445296276, 0.321857922758577, 0.264136813803823,
0.521922375150751, 0.902049178257592, 0.800526207432105,
0.812506653592706, 0.699698150879173, 0.635723691969573,
0.593333331971585, 0.727831164713589)), row.names = c(NA,
-144L), vars = "hour", indices = list(c(0L, 24L, 48L, 72L, 96L,
120L), c(1L, 25L, 49L, 73L, 97L, 121L), c(2L, 26L, 50L, 74L,
98L, 122L), c(3L, 27L, 51L, 75L, 99L, 123L), c(4L, 28L, 52L,
76L, 100L, 124L), c(5L, 29L, 53L, 77L, 101L, 125L), c(6L, 30L,
54L, 78L, 102L, 126L), c(7L, 31L, 55L, 79L, 103L, 127L), c(8L,
32L, 56L, 80L, 104L, 128L), c(9L, 33L, 57L, 81L, 105L, 129L),
c(10L, 34L, 58L, 82L, 106L, 130L), c(11L, 35L, 59L, 83L,
107L, 131L), c(12L, 36L, 60L, 84L, 108L, 132L), c(13L, 37L,
61L, 85L, 109L, 133L), c(14L, 38L, 62L, 86L, 110L, 134L),
c(15L, 39L, 63L, 87L, 111L, 135L), c(16L, 40L, 64L, 88L,
112L, 136L), c(17L, 41L, 65L, 89L, 113L, 137L), c(18L, 42L,
66L, 90L, 114L, 138L), c(19L, 43L, 67L, 91L, 115L, 139L),
c(20L, 44L, 68L, 92L, 116L, 140L), c(21L, 45L, 69L, 93L,
117L, 141L), c(22L, 46L, 70L, 94L, 118L, 142L), c(23L, 47L,
71L, 95L, 119L, 143L)), group_sizes = c(6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L), biggest_group_size = 6L, labels = structure(list(
hour = 0:23), row.names = c(NA, -24L), class = "data.frame", vars = "hour"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Any input is appreciated!
What I'm trying to do is getting a dataframe where the repeated rows in the first column act as an index to copy the corresponding rows of other columns. I know this sound messy, and my inability to accurately state the issue is one of the reasons I'm having so many problems with this.
I'll provide a reproducible example below.
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L), .Label = c("2016-01", "2016-02", "2016-03", "2016-04",
"2016-05", "2016-06", "2016-07", "2016-08", "2016-09", "2016-10",
"2016-11", "2016-12", "2017-01", "2017-02", "2017-03", "2017-04",
"2017-05"), class = "factor"), Var2 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("B2B", "B2C", "B2K"), class = "factor"), Freq = c(5L,
13L, 8L, 13L, 36L, 5L, 18L, 1L, 12L, 24L, 22L, 6L, 24L, 15L,
11L, 26L, 1L, 338L, 285L, 291L, 232L, 142L, 42L, 92L, 9L, 46L,
34L, 45L, 35L, 30L, 31L, 36L, 56L, 9L, 0L, 1L, 0L, 0L, 0L, 0L,
7L, 0L, 13L, 0L, 1L, 0L, 0L, 0L, 0L, 2L, 0L)), .Names = c("Var1",
"Var2", "Freq"), class = "data.frame", row.names = c(NA, -51L
))
basically what I want is:
On Var1 no repeated dates
On the row where the date is repeated, take the value of Var2 and Freq and copy them in two new columns to the index of the unique date
This must be done for every distinct level of Var2
Thank you in advance!
I think what your trying to explain is a dcast. Does this end up how you want it?
library(reshape2)
dcast(x,Var1~Var2,value.var="Freq")
A base R option would be
xtabs(Freq~Var1 + Var2, df1)
To find genotype frequency across SNPs I need to find the proportion of a certain genotype (XX, YX, or YY) in the total number of samples (XX, YX, and YY). I think I would need to start my dplyr statement with
dat %>% group_by(Assay) %>%
but I don't know how to finish it. The data, dat, provided below and dput at the bottom.
Source: local data frame [143 x 3]
Groups: Assay
Assay Final n
1 One_apoe-83 Invalid 2
2 One_apoe-83 No Call 9
3 One_apoe-83 NTC 2
4 One_apoe-83 XX 4
5 One_apoe-83 YX 41
6 One_apoe-83 YY 134
7 One_CD9-269 Invalid 2
8 One_CD9-269 No Call 5
9 One_CD9-269 NTC 2
10 One_CD9-269 XX 99
.. ... ... ...
I could use a for loop across SNPs to get what I'm looking for with boolean patterning for each genotype but that would be very verbose.
for(i in seq(levels(dat$Assay))) {
storage_df[i,1] <- dat[dat$Assay == levels(dat$Assay)[i],]$XX / (dat[dat$Assay == levels(dat$Assay)[i],]$XX + dat[dat$Assay == levels(dat$Assay)[i],]$YX + dat[dat$Assay == levels(dat$Assay)[i],]$XY) ...
You get the point. How would I do this in dplyr? The whole object is below.
dat <- structure(list(Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("One_apoe-83",
"One_CD9-269", "One_Cytb_26", "One_E2", "One_ghsR-66", "One_IL8r-362",
"One_KPNA-422", "One_lpp1-44", "One_MHC2_190", "One_MHC2_251",
"One_Prl2", "One_redd1-414", "One_STC-410", "One_STR07", "One_sys1-230",
"One_U1004-183", "One_U1105", "One_U1201-492", "One_U1203-175",
"One_U1209-111", "One_U1212-106", "One_U401-224", "One_vamp5-255",
"One_ZNF-61"), class = "factor"), Final = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("Invalid",
"No Call", "NTC", "XX", "YX", "YY"), class = "factor"), n = c(2L,
9L, 2L, 4L, 41L, 134L, 2L, 5L, 2L, 99L, 75L, 9L, 2L, 7L, 2L,
110L, 71L, 2L, 8L, 2L, 110L, 59L, 11L, 2L, 6L, 2L, 67L, 86L,
29L, 2L, 3L, 2L, 152L, 28L, 5L, 2L, 4L, 2L, 78L, 81L, 25L, 2L,
4L, 2L, 115L, 62L, 7L, 2L, 17L, 2L, 80L, 62L, 29L, 2L, 13L, 2L,
59L, 68L, 48L, 2L, 7L, 2L, 48L, 86L, 47L, 2L, 7L, 2L, 42L, 87L,
52L, 2L, 3L, 2L, 47L, 81L, 57L, 2L, 9L, 2L, 40L, 85L, 54L, 2L,
8L, 2L, 52L, 86L, 42L, 2L, 7L, 2L, 9L, 39L, 133L, 2L, 8L, 2L,
101L, 71L, 8L, 2L, 13L, 2L, 20L, 82L, 73L, 2L, 11L, 2L, 27L,
75L, 75L, 2L, 6L, 2L, 3L, 40L, 139L, 2L, 13L, 2L, 59L, 82L, 34L,
2L, 19L, 2L, 20L, 84L, 65L, 2L, 11L, 2L, 119L, 47L, 11L, 2L,
8L, 2L, 51L, 100L, 29L)), class = "data.frame", .Names = c("Assay",
"Final", "n"), row.names = c(NA, -143L))
Hope I am not misunderstanding. Are you looking for below:
Assume the data structure is:
df <- structure(list(Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L), .Label = c("One_apoe-83", "One_CD9-269"), class = "factor"),
Final = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L
), .Label = c("Invalid", "No Call", "NTC", "XX", "YX", "YY"
), class = "factor"), n = c(2L, 9L, 2L, 4L, 41L, 134L, 2L,
5L, 2L, 99L)), .Names = c("Assay", "Final", "n"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
Code
df %>% group_by(Assay) %>% mutate(n_percent = n/sum(n)*100)
# Assay Final n n_percent
# 1 One_apoe-83 Invalid 2 1.041667
# 2 One_apoe-83 No Call 9 4.687500
# 3 One_apoe-83 NTC 2 1.041667
# 4 One_apoe-83 XX 4 2.083333
# 5 One_apoe-83 YX 41 21.354167
# 6 One_apoe-83 YY 134 69.791667
# 7 One_CD9-269 Invalid 2 1.851852
# 8 One_CD9-269 No Call 5 4.629630
# 9 One_CD9-269 NTC 2 1.851852
# 10 One_CD9-269 XX 99 91.666667
Option 2
Here is the code based on the comment. A line is added to filter out the elements you don't want.
df %>%
filter(! Final %in% c("Invalid", "No Call", "NTC")) %>%
group_by(Assay) %>%
mutate(n_percent = n/sum(n)*100)
# Source: local data frame [4 x 4]
# Groups: Assay
#
# Assay Final n n_percent
# 1 One_apoe-83 XX 4 2.234637
# 2 One_apoe-83 YX 41 22.905028
# 3 One_apoe-83 YY 134 74.860335
# 4 One_CD9-269 XX 99 100.000000
I would like to create a stacked bar graph that contains two levels of x-axis labels. For each stacked bar there is the primary label (dat$HUC_12_NAM), then I would like to group these stacked bars by dat$HUC_10_NAM and label this group as well. I could likely use annotate to manually define and place the labels, but that would be very time consuming, clunky, and could easily result in mis-labeling.
Here is the data....
dat <- structure(list(HUC_12_NAM = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Apostle Islands",
"Raspberry River-Frontal Lake Superior", "Sand River", "Saxine Creek-Frontal Lake Superior"
), class = "factor"), HUC_10_NAM = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Chequamegon Bay-Frontal Lake Superior",
"Sand River-Frontal Lake Superior"), class = "factor"), variable = structure(c(9L,
8L, 4L, 1L, 6L, 11L, 14L, 13L, 10L, 7L, NA, 5L, 15L, 3L, 2L,
12L, 8L, 6L, 3L, 2L, 4L, 1L, 15L, 5L, 11L, 14L, 10L, 9L, 13L,
7L, 12L, NA, 12L, 4L, 10L, 8L, 3L, NA, 2L, 6L, 1L, 13L, 7L, 11L,
9L, 14L, 5L, 15L, 9L, 1L, 8L, 12L, 10L, 4L, 3L, 11L, NA, 7L,
15L, 13L, 14L, 6L, 5L, 2L), .Label = c("Agriculture", "Barren land",
"Developed - High intensity", "Developed - Medium intensity",
"Developed - Low intensity", "Developed - Open space", "Evergreen forest",
"Deciduous forest", "Mixed forest", "Herbaceous", "Pasture",
"Shrub", "Woody wetland", "Herbaceous wetland", "Water"), class = "factor"),
perc_veg = c(11.8839579283911, 57.2626205743974, 0.00544969027593598,
0.514995731075951, 2.59586913477084, 2.53864738687351, 0.108085523806064,
5.3007320750604, 0.731166778688078, 6.04007338916238, 0,
0.0953695798288797, 0.11807662264528, 0, 0.00363312685062399,
12.8013224581736, 58.9563880536275, 4.47423752571726, 0.0158260043860641,
0.101738599624698, 0.0633040175442563, 0.180868621555018,
1.07390744048292, 0.300694083335217, 2.65876873685876, 0.00226085776943772,
0.065564875313694, 15.484614862879, 2.68363817232258, 7.99665393050123,
5.94153421808234, 0, 2.79708137828397, 0.0260443580892536,
0.0078546476777114, 30.3801236073503, 0.028524773145373,
0, 0.470038653134625, 1.99838773021352, 0.0355526158043779,
4.43084809524794, 23.6515843651171, 0.169081626325472, 32.6501167862089,
0.595713015978007, 0.174455858947064, 2.5845924884764, 23.2366527830367,
0.25141991669822, 52.6482393032942, 3.73494888299886, 0.136312003029156,
0.00605831124574025, 0, 1.85535781900795, 0, 11.0851950018932,
0.427110942824688, 2.85800833017796, 0, 3.54714123438092,
0.146914047709201, 0.0666414237031428)), .Names = c("HUC_12_NAM",
"HUC_10_NAM", "variable", "perc_veg"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L), class = "data.frame")
And here is the current stacked bar plot...
library(ggplot2)
p <- ggplot () + geom_bar(data=dat,aes(x=HUC_12_NAM,y=perc_veg,fill=variable),stat='identity')
p <- p + coord_flip() #this helps fit the xlabel
p
And the resulting plot...
The next label, or grouping, would be from dat$HUC_10_NAM and in this example would add two additional labels, 'Sand River-Frontal Lake Superior' and 'Chequamegon Bay-Frontal Lake Superior'.
Maybe this would just be too cluttered...especially with the long names. But, I would like to see if there is a way to add these second level labels quickly and easily.
Thanks
-cherrytree
If you're willing to facet instead of adding a second row of labels, then you can do this:
ggplot(data=dat, aes(x=HUC_12_NAM, y=perc_veg, fill=variable)) +
geom_bar(stat='identity') +
facet_grid(. ~ HUC_10_NAM, scales="free")
Incidentally, you can reformat the longer labels with a line-break, for example:
dat[,1:2] = lapply(1:2, function(x) gsub("-","\n", dat[,x]))