is there a way to place significance results in my ggplot? - r

I'm looking for a way to place my dunn's test results on my ggplot:
either directly by somehow implementing the necessary code lines on the plot regarding the results of the dunn test
or indirectly by placing just the letters regarding significance by the order in which i know they appear
In the example graphic I attached, I put in the letters myself through a 2nd party drawing software, to give you an idea of what I'm looking for - ideally i would want the lower case letters to represent significant differences only within the same variable called "type" but a different "day" and capitals for the opposite. I do however recognize this is a tall order, and would be happy with just the differences within the same type.
here is my dput and str:
> dput(table5)
structure(list(day = c("day 00", "day 00", "day 00", "day 00",
"day 00", "day 00", "day 00", "day 00", "day 07", "day 07", "day 07",
"day 07", "day 07", "day 07", "day 07", "day 07", "day 14", "day 14",
"day 14", "day 14", "day 14", "day 14", "day 14", "day 14", "day 21",
"day 21", "day 21", "day 21", "day 21", "day 21", "day 21", "day 21",
"day 28", "day 28", "day 28", "day 28", "day 28", "day 28", "day 28",
"day 28", "day 00", "day 00", "day 00", "day 00", "day 00", "day 00",
"day 00", "day 00", "day 07", "day 07", "day 07", "day 07", "day 07",
"day 07", "day 07", "day 07", "day 14", "day 14", "day 14", "day 14",
"day 14", "day 14", "day 14", "day 14", "day 21", "day 21", "day 21",
"day 21", "day 21", "day 21", "day 21", "day 21", "day 28", "day 28",
"day 28", "day 28", "day 28", "day 28", "day 28", "day 28", "day 00",
"day 00", "day 00", "day 00", "day 00", "day 00", "day 00", "day 00",
"day 07", "day 07", "day 07", "day 07", "day 07", "day 07", "day 07",
"day 07", "day 14", "day 14", "day 14", "day 14", "day 14", "day 14",
"day 14", "day 14", "day 21", "day 21", "day 21", "day 21", "day 21",
"day 21", "day 21", "day 21", "day 28", "day 28", "day 28", "day 28",
"day 28", "day 28", "day 28", "day 28", "day 00", "day 00", "day 00",
"day 00", "day 00", "day 00", "day 00", "day 00", "day 07", "day 07",
"day 07", "day 07", "day 07", "day 07", "day 07", "day 07", "day 14",
"day 14", "day 14", "day 14", "day 14", "day 14", "day 14", "day 14",
"day 21", "day 21", "day 21", "day 21", "day 21", "day 21", "day 21",
"day 21", "day 28", "day 28", "day 28", "day 28", "day 28", "day 28",
"day 28", "day 28", "day 00", "day 00", "day 00", "day 00", "day 00",
"day 00", "day 00", "day 00", "day 07", "day 07", "day 07", "day 07",
"day 07", "day 07", "day 07", "day 07", "day 14", "day 14", "day 14",
"day 14", "day 14", "day 14", "day 14", "day 14", "day 21", "day 21",
"day 21", "day 21", "day 21", "day 21", "day 21", "day 21", "day 28",
"day 28", "day 28", "day 28", "day 28", "day 28", "day 28", "day 28",
"day 00", "day 00", "day 00", "day 00", "day 00", "day 00", "day 00",
"day 00", "day 07", "day 07", "day 07", "day 07", "day 07", "day 07",
"day 07", "day 07", "day 14", "day 14", "day 14", "day 14", "day 14",
"day 14", "day 14", "day 14", "day 21", "day 21", "day 21", "day 21",
"day 21", "day 21", "day 21", "day 21", "day 28", "day 28", "day 28",
"day 28", "day 28", "day 28", "day 28", "day 28", "day 00", "day 00",
"day 00", "day 00", "day 00", "day 00", "day 00", "day 00", "day 07",
"day 07", "day 07", "day 07", "day 07", "day 07", "day 07", "day 07",
"day 14", "day 14", "day 14", "day 14", "day 14", "day 14", "day 14",
"day 14", "day 21", "day 21", "day 21", "day 21", "day 21", "day 21",
"day 21", "day 21", "day 28", "day 28", "day 28", "day 28", "day 28",
"day 28", "day 28", "day 28"), type = c("control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_", "nZn1_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_",
"nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn10_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_", "nZn100_",
"nZn100_", "nZn100_", "nZn100_", "Zn1_", "Zn1_", "Zn1_", "Zn1_",
"Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_",
"Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_",
"Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_",
"Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn1_",
"Zn1_", "Zn1_", "Zn1_", "Zn1_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_", "Zn10_",
"Zn10_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_",
"Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_",
"Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_",
"Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_",
"Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_",
"Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_", "Zn100_"),
TAC = c(0.0134723395589115, 0.0161888871061509, 0.0146337654145718,
0.0153067871292595, 0.012800314735395, 0.0160841665978896,
0.0140621616691814, 0.0135425580967982, 0.0132198270328205,
0.0138496077219653, 0.0135775493518084, 0.0126333962864469,
0.0164821881641534, 0.0132516331108305, 0.0157791571175251,
0.0129960024291699, 0.0146323678504021, 0.0134451215151322,
0.0143262838325461, 0.0153573779185249, 0.0139773746147923,
0.0159350865128266, 0.0156720782857077, 0.0155096081292032,
0.013476349735956, 0.0140104181996115, 0.0129878390010014,
0.0147239859165112, 0.015160930718777, 0.0148955399340424,
0.013274378116328, 0.0153663044374496, 0.0145472559523844,
0.0132898660703847, 0.0139871399975842, 0.0124985111701027,
0.0149240276338179, 0.0129573902698069, 0.0147729343794709,
0.0128674264777598, 0.0147815872982594, 0.0139767796824041,
0.0144185398405766, 0.0155799146991459, 0.0135417909851351,
0.015988596586438, 0.0139603963976125, 0.0126397298299191,
0.013297964384596, 0.012347536157165, 0.0152573470818857,
0.0136566619097667, 0.0125192707022401, 0.0141156296691061,
0.0139603724286662, 0.0141388938152221, 0.0127749097766803,
0.0142082519110294, 0.0149398326676766, 0.0143207529313558,
0.0144381103787128, 0.0149147414885484, 0.0139224295866318,
0.0161358891403436, 0.0151690152511571, 0.0120945286936824,
0.0153132383654698, 0.0131770823852777, 0.0136750345235747,
0.0129352436377984, 0.0162120454010317, 0.0155409171425954,
0.0135940425474181, 0.0142951343511937, 0.0143779323175896,
0.0136891451722703, 0.0140286347004686, 0.0122667606250391,
0.0152446224172418, 0.013442306549535, 0.0129068996979612,
0.0147404146947943, 0.013688825582269, 0.0130193063055386,
0.01285971255513, 0.0151660181611206, 0.0138280467330508,
0.0135147736966651, 0.0158580706409006, 0.0149366602534351,
0.0106554950909403, 0.0179654260106192, 0.0120425346368713,
0.0145387164119486, 0.0139546280207597, 0.0121871897075845,
0.0150418870034593, 0.0148117380734173, 0.0139690179111281,
0.0170751257982307, 0.0129661477952429, 0.0144612227917873,
0.0146065893466387, 0.0126241343210384, 0.0170751257982307,
0.0130964557093226, 0.0134570968344701, 0.0165480203562944,
0.0151921149184481, 0.0130666062376204, 0.012722050697886,
0.0155582048904096, 0.0125288074742436, 0.016985639190516,
0.0176528351294189, 0.0138432089287227, 0.013890319218671,
0.017035215335001, 0.0168839977227436, 0.0133203267470888,
0.013892777179513, 0.0155216139064973, 0.0130076218759369,
0.013903958340264, 0.0135000204009635, 0.0148519977852621,
0.0153029154169557, 0.0141832966293512, 0.0176005510379328,
0.0180687740940438, 0.0177789446952697, 0.0182099087520794,
0.0184723827329167, 0.022483746075728, 0.0196648164641345,
0.0170131886149416, 0.0215058343136062, 0.0211259597744559,
0.0196373761289472, 0.0206737739206, 0.020532594441278, 0.0193494766153245,
0.0211617300063814, 0.0213333413267872, 0.0202163436360403,
0.0236752367085596, 0.0231873026647459, 0.0228522660496144,
0.0238366734630018, 0.0264524093818515, 0.0268093919646026,
0.0252668406573153, 0.0258403852690662, 0.0223986018317785,
0.0272147558779617, 0.0225116847733454, 0.0247724813762193,
0.022691182948792, 0.0235805783268122, 0.0270689051186104,
0.0126334908832258, 0.0164665820507107, 0.0129386884401034,
0.0119158011756844, 0.0130928729787235, 0.0149940706645974,
0.0129535502638655, 0.0162831996423606, 0.0176755444192191,
0.0161755659998132, 0.0174173101524856, 0.0155714069341957,
0.01433383826834, 0.0143819293817603, 0.0185494616259894,
0.0140319779691521, 0.0144114680062016, 0.0174497227904159,
0.0180907703704672, 0.0157478259355293, 0.0158958906812569,
0.0147163839619763, 0.0146701443994308, 0.0180369287296324,
0.0149336258279806, 0.0186097801562105, 0.0137231521985133,
0.0153650910635747, 0.0138998273293687, 0.0155199902217533,
0.0163903022171882, 0.015754928008943, 0.0171808546793322,
0.0154244829039175, 0.0134954450270778, 0.0147187179502944,
0.0160939056001929, 0.0145497150558122, 0.0154571534643691,
0.015511148172344, 0.0132885919777709, 0.0138910418368534,
0.0152496449072613, 0.0132820365830201, 0.013480084079182,
0.016683045565325, 0.0176337406920335, 0.0151657804062655,
0.0125455114843902, 0.0118102856445592, 0.0116410665300014,
0.0146556231989517, 0.014464999427952, 0.0121229802720933,
0.0146834533301593, 0.0121645122630423, 0.0136816673389857,
0.0135984961089614, 0.0164906141382343, 0.0149265724276527,
0.0163311308492402, 0.017967595623527, 0.0143263172313383,
0.0145117513172078, 0.0149694356038913, 0.0136478358101476,
0.0148523043836901, 0.0140267859486034, 0.0136857372651645,
0.0161384954212, 0.0171836598216303, 0.0165288287203719,
0.0163703032374203, 0.0149628937118673, 0.0167639896711626,
0.0144140290861155, 0.0164700832677882, 0.017097353142466,
0.0177233791174971, 0.016410406871025, 0.0145656397252108,
0.0127795571441824, 0.0139787766512734, 0.0145603577832239,
0.0130325210010334, 0.0157142193796273, 0.0165295708322065,
0.0154878492755022, 0.0176888974165639, 0.0186435561581489,
0.0177330425080685, 0.0182856446463086, 0.0219973970170363,
0.0217533371623466, 0.0176290655250839, 0.0202192044566584,
0.01917805317661, 0.0186277616395779, 0.0170154664932417,
0.0195884686724334, 0.0201420675026667, 0.0183148068985733,
0.020836323932372, 0.0207067552945439, 0.018534989031893,
0.019680916901509, 0.0219673944081694, 0.0236890701508884,
0.0235543150426157, 0.0234233849979097, 0.0210565415662947,
0.0232511101944444, 0.0227186732866978, 0.0225332903957415,
0.0234773944195847, 0.0229988542468931, 0.022618525386521,
0.0197686090869307, 0.0186686467858637, 0.0189525178016395
), conditions = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 26L, 27L, 27L, 27L, 27L, 27L,
27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L,
32L, 32L, 32L, 32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 35L,
35L, 35L, 35L, 35L, 35L, 35L, 35L), levels = c("controlday 00",
"controlday 07", "controlday 14", "controlday 21", "controlday 28",
"nZn1_day 00", "nZn1_day 07", "nZn1_day 14", "nZn1_day 21",
"nZn1_day 28", "nZn10_day 00", "nZn10_day 07", "nZn10_day 14",
"nZn10_day 21", "nZn10_day 28", "nZn100_day 00", "nZn100_day 07",
"nZn100_day 14", "nZn100_day 21", "nZn100_day 28", "Zn1_day 00",
"Zn1_day 07", "Zn1_day 14", "Zn1_day 21", "Zn1_day 28", "Zn10_day 00",
"Zn10_day 07", "Zn10_day 14", "Zn10_day 21", "Zn10_day 28",
"Zn100_day 00", "Zn100_day 07", "Zn100_day 14", "Zn100_day 21",
"Zn100_day 28"), class = "factor")), class = "data.frame", row.names = c(NA,
-280L))
> str(table5)
'data.frame': 280 obs. of 4 variables:
$ day : chr "day 00" "day 00" "day 00" "day 00" ...
$ type : chr "control" "control" "control" "control" ...
$ TAC : num 0.0135 0.0162 0.0146 0.0153 0.0128 ...
$ conditions: Factor w/ 35 levels "controlday 00",..: 1 1 1 1 1 1 1 1 2 2 ...
and my ggplot plus dunn codes:
library(plotrix)
library(ggplot2)
aa <- aggregate(CAT ~ day + type, data=table5, FUN=mean)
bb <- aggregate(CAT ~ day + type, data=table5, FUN=sd)
ee <- aggregate(CAT ~ day + type, data=table5, FUN=std.error)
cc <- merge(aa, ee, by=c("day", "type"))
colnames(cc)[3:4] <- c("mean", "se")
ggplot(cc, aes(x = type, y = mean, fill = day))+
geom_bar(stat="identity", position= "dodge") +
scale_fill_brewer(palette="Paired")+
theme_minimal() +
labs(x="", y="ratio", title = "CAT") +
theme(panel.background = element_blank(),
axis.line = element_line(colour = "black"),
panel.grid=element_blank(),
plot.title = element_text(size=40, hjust = 0),
legend.text = element_text(size=30),
legend.title = element_text(size=32),
axis.title.x = element_text(size = 36),
axis.text.x = element_text(size = 34),
axis.title.y = element_text(size = 36),
axis.text.y = element_text(size = 28)
) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
position = "dodge")
library(dunn.test)
dunn.test(table5$CAT, table5$conditions, method = "bh")
All my thanks for any help provided.

Related

tidyr summarize and mutate by multiple groups - calculation

I have some data, see subset below. For each Method I want to calculate the difference in the mean Cq between the 2uL and 4 uL and the 4 uL and the 8uL.
I have a function to calculate the mean for each Method, grouped by Volume. But I can't figure out how to add another column with the difference. I think I might have to summarize the summ table, but I"m getting confused. Any help appreciated. Thanks
dat_summ<-
dat %>%
group_by(Volume,Method) %>%
summarise(mean_Cq = mean(Cq,na.rm=T), sd_Cq=sd(Cq,na.rm=T),
CV=(sd(Cq,na.rm=T)/mean(Cq,na.rm=T))*100)
what I want but know if wrong:
dat_summ<-
dat %>%
group_by(Volume,Method) %>%
summarise(mean_Cq = mean(Cq,na.rm=T), sd_Cq=sd(Cq,na.rm=T),
CV=(sd(Cq,na.rm=T)/mean(Cq,na.rm=T))*100)+
**mutate(delta_doub=mean_Cq_for2uL-meanCq_for4uL)**
current output:
> dat_summ
# A tibble: 12 × 5
# Groups: Volume [3]
Volume Method mean_Cq sd_Cq CV
<chr> <fct> <dbl> <dbl> <dbl>
1 2ul 2ew 20.0 0.295 1.47
2 2ul 3ew 21.9 1.79 8.18
3 2ul Manual 22.2 0.248 1.12
4 2ul WN2ew 20.5 0.604 2.94
5 4ul 2ew 19.3 0.278 1.44
6 4ul 3ew 21.2 1.33 6.29
7 4ul Manual 22.2 0.139 0.627
8 4ul WN2ew 19.9 0.493 2.48
9 8ul 2ew 18.8 0.270 1.43
10 8ul 3ew 20.8 1.21 5.81
11 8ul Manual 23.7 1.50 6.35
12 8ul WN2ew 19.5 0.463 2.38
subset of dat:
sample Method Volume Cq
1 Sample 1 2ew 2ul 20.11
2 Sample 2 2ew 2ul 20.12
3 Sample 3 2ew 2ul 19.76
17 Sample 1 WN2ew 2ul 19.89
18 Sample 2 WN2ew 2ul 20.62
19 Sample 3 WN2ew 2ul 21.07
20 Sample 4 WN2ew 2ul 20.08
52 Sample 1 2ew 4ul 19.30
53 Sample 2 2ew 4ul 19.33
54 Sample 3 2ew 4ul 19.16
68 Sample 1 WN2ew 4ul 19.49
69 Sample 2 WN2ew 4ul 19.46
70 Sample 3 WN2ew 4ul 20.42
103 Sample 1 2ew 8ul 18.91
104 Sample 2 2ew 8ul 18.60
105 Sample 3 2ew 8ul 18.42
119 Sample 1 WN2ew 8ul 18.66
120 Sample 2 WN2ew 8ul 19.13
121 Sample 3 WN2ew 8ul 19.52
> dput(dat)
structure(list(sample = c("Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9",
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14",
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9",
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14",
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 10",
"Sample 11", "Sample 12", "Sample 13", "Sample 14", "Sample 15",
"Sample 16", "Sample 1", "Sample 2", "Sample 3", "Sample 4",
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
"Sample 7", "Sample 8", "Sample 9", "Sample 10", "Sample 11",
"Sample 12", "Sample 13", "Sample 14", "Sample 15", "Sample 16",
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
"Sample 7", "Sample 8", "Sample 9", "Sample 10", "Sample 11",
"Sample 12", "Sample 13", "Sample 14", "Sample 15", "Sample 16",
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
"Sample 7", "Sample 8", "Sample 10", "Sample 11", "Sample 12",
"Sample 13", "Sample 14", "Sample 15", "Sample 16", "Sample 1",
"Sample 2", "Sample 3", "Sample 4", "Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9",
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14",
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9",
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14",
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3",
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 10",
"Sample 11", "Sample 12", "Sample 13", "Sample 14", "Sample 15",
"Sample 16", "Sample 1", "Sample 2", "Sample 3", "Sample 4"),
Method = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("2ew", "3ew",
"Manual", "WN2ew"), class = "factor"), Volume = c("2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul",
"2ul", "2ul", "2ul", "2ul", "2ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul",
"4ul", "4ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul",
"8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul",
"8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul",
"8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul",
"8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul",
"8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul"),
Cq = c(20.11, 20.12, 19.76, 20.07, 20.19, 19.87, 20.33, 19.81,
20.15, 19.79, 19.67, 20.23, 19.9, 20.9, 19.93, 19.96, 19.89,
20.62, 21.07, 20.08, 21.32, 21.15, 21.07, 20.85, 21.16, 21.03,
20.79, 19.39, 20.25, 19.6, 20.14, 20.32, 26.35, 21.36, 21.67,
21.13, 21.28, 21.27, 21.36, 21.08, 20.56, 26.18, 21.31, 21.35,
21.06, 21.15, 21.28, 22.2, 22.18, 21.96, 22.56, 19.3, 19.33,
19.16, 19.27, 19.42, 19.16, 19.53, 19.1, 19.38, 19.08, 19.2,
19.44, 19.18, 20.11, 19.43, 18.81, 19.49, 19.46, 20.42, 19.21,
20.69, 20.39, 20.19, 20.13, 20.29, 20.49, 20.09, 19.19, 19.63,
19.27, 19.82, 19.76, 25.57, 20.45, 20.83, 20.68, 20.72, 21.25,
21.14, 21.06, 20.47, 22.51, 20.49, 20.9, 20.47, 20.24, 20.71,
22.09, 22.07, 22.13, 22.37, 18.91, 18.6, 18.42, 18.64, 19.14,
18.77, 18.77, 18.71, 19.39, 18.7, 18.67, 19.18, 18.79, 19.22,
18.73, NA, 18.66, 19.13, 19.52, 19.02, 20.25, 19.66, 19.78,
19.71, 19.89, 20.25, 19.47, 19.06, 19.49, 18.84, 19.27, 19.22,
24.97, 20.05, 20.33, 20.05, 20.59, 20.39, 20.08, 20.73, 20.3,
20.76, 21.12, 20.81, 20.22, 20.32, 20.69, 22.15, 25.2, 24.69,
22.63)), row.names = c(NA, -153L), class = "data.frame")
If the mean delta should be for each 'Method', then create the column first grouped by 'Method' (or if it is based across all the Method, then we do not need any grouping), get the mean difference of 'Cq' where 'Volume is '2ul' and '4ul' respectively, use that in grouping for calculating the rest of the summarised columns
library(dplyr)
dat %>%
group_by(Method) %>%
mutate(delta_doub =mean(Cq[Volume == '2ul'], na.rm = TRUE) -
mean(Cq[Volume=='4ul'], na.rm = TRUE) ) %>%
group_by(Volume, Method, delta_doub) %>%
summarise(mean_Cq = mean(Cq,na.rm=TRUE), sd_Cq=sd(Cq,na.rm=TRUE),
CV=(sd(Cq,na.rm=TRUE)/mean(Cq,na.rm=TRUE))*100, .groups = "drop")
-output
# A tibble: 12 × 6
Volume Method delta_doub mean_Cq sd_Cq CV
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 2ul 2ew 0.743 20.0 0.295 1.47
2 2ul 3ew 0.727 21.9 1.79 8.18
3 2ul Manual 0.0600 22.2 0.248 1.12
4 2ul WN2ew 0.638 20.5 0.604 2.94
5 4ul 2ew 0.743 19.3 0.278 1.44
6 4ul 3ew 0.727 21.2 1.33 6.29
7 4ul Manual 0.0600 22.2 0.139 0.627
8 4ul WN2ew 0.638 19.9 0.493 2.48
9 8ul 2ew 0.743 18.8 0.270 1.43
10 8ul 3ew 0.727 20.8 1.21 5.81
11 8ul Manual 0.0600 23.7 1.50 6.35
12 8ul WN2ew 0.638 19.5 0.463 2.38
Or it can be
dat %>%
group_by(Volume,Method) %>%
summarise(mean_Cq = mean(Cq,na.rm=TRUE), sd_Cq=sd(Cq,na.rm=TRUE),
CV=(sd(Cq,na.rm=TRUE)/mean(Cq,na.rm=TRUE))*100,
.groups = 'drop') %>%
mutate(delta_doub_2_4 = mean(mean_Cq[Volume == '2ul']) -
mean(mean_Cq[Volume == '4ul']),
delta_doub_4_8 = mean(mean_Cq[Volume == '4ul']) -
mean(mean_Cq[Volume == '8ul']))
-output
# A tibble: 12 × 7
Volume Method mean_Cq sd_Cq CV delta_doub_2_4 delta_doub_4_8
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2ul 2ew 20.0 0.295 1.47 0.542 -0.0443
2 2ul 3ew 21.9 1.79 8.18 0.542 -0.0443
3 2ul Manual 22.2 0.248 1.12 0.542 -0.0443
4 2ul WN2ew 20.5 0.604 2.94 0.542 -0.0443
5 4ul 2ew 19.3 0.278 1.44 0.542 -0.0443
6 4ul 3ew 21.2 1.33 6.29 0.542 -0.0443
7 4ul Manual 22.2 0.139 0.627 0.542 -0.0443
8 4ul WN2ew 19.9 0.493 2.48 0.542 -0.0443
9 8ul 2ew 18.8 0.270 1.43 0.542 -0.0443
10 8ul 3ew 20.8 1.21 5.81 0.542 -0.0443
11 8ul Manual 23.7 1.50 6.35 0.542 -0.0443
12 8ul WN2ew 19.5 0.463 2.38 0.542 -0.0443

Why is the wilcox.test results overlapping when I try to plot a graph?

I'm trying to get statistics on my faceted graph, but the output of the wilcoxon test are overlapped like this:
The code I am using is this:
ggplot(df, aes(y = count, x = time, group = time)) +
theme_bw() +
geom_boxplot()+
theme(legend.position = "none")+
scale_y_log10(limits = c(1, 250)) +
facet_wrap(vars(cluster), scales = "fixed")+
stat_compare_means(method= "wilcox.test")
labs(y = "Clone count", x = "Time point")
And my data looks like this:
structure(list(time = c("Day 0", "Day 0", "Day 0", "Day 0", "Day 0",
"Day 0", "Day 0", "Day 0", "Day 0", "Day 0", "Day 1", "Day 1",
"Day 1", "Day 1", "Day 1", "Day 1", "Day 1", "Day 1", "Day 1",
"Day 1", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2",
"Day 2", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2",
"Day 2", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2", "Day 2",
"Day 2", "Day 2", "Day 2", "Day 2", "Day 2"), count = c(1L, 4L,
1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 59L, 1L, 1L, 1L, 3L, 1L, 2L,
5L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 54L, 3L, 6L, 1L, 1L, 1L, 2L,
1L, 1L, 7L, 1L, 3L, 1L, 207L, 5L, 3L, 3L, 11L, 2L, 1L), cluster = c("C",
"C", "C", "C", "D", "D", "D", "D", "D", "D", "A", "A", "D", "D",
"D", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", "B", "C",
"B", "B", "C", "C", "C", "C", "C", "B", "B", "B", "A", "A", "A",
"A", "A", "A", "A", "A")), row.names = c(NA, -45L), class = "data.frame")
Also, how would I add significance bars to this?
It may helps by specifying comparisons.
my_comparisons <- list(c("Day 0", "Day 1"), c("Day 1", "Day 2"), c("Day 0", "Day 2"))
ggplot(df1, aes(y = count, x = time)) +
theme_bw() +
geom_boxplot()+
theme(legend.position = "none")+
scale_y_log10() +
stat_compare_means(method= "wilcox.test", comparisons = my_comparisons) +
facet_wrap(.~(cluster), scales = "fixed")+
labs(y = "Clone count", x = "Time point") +
ylim(c(0,400))

Clustering around fixed vector of values

I have a dataset of brands with different features like calories, sugar content, fiber content, etc.
for eg
Using dput():
structure(list(Row = 1:30, Brands = structure(c(1L, 112L, 223L, 242L,
253L, 264L, 275L, 286L, 297L, 2L, 13L, 24L, 35L, 46L, 57L, 68L, 79L,
90L, 101L, 113L, 124L, 135L, 146L, 157L, 168L, 179L, 190L, 201L,
212L, 224L), .Label = c("Brand 1", "Brand 10", "Brand 100", "Brand
101", "Brand 102", "Brand 103", "Brand 104", "Brand 105", "Brand
106", "Brand 107", "Brand 108", "Brand 109", "Brand 11", "Brand 110",
"Brand 111", "Brand 112", "Brand 113", "Brand 114", "Brand 115",
"Brand 116", "Brand 117", "Brand 118", "Brand 119", "Brand 12",
"Brand 120", "Brand 121", "Brand 122", "Brand 123", "Brand 124",
"Brand 125", "Brand 126", "Brand 127", "Brand 128", "Brand 129",
"Brand 13", "Brand 130", "Brand 131", "Brand 132", "Brand 133",
"Brand 134", "Brand 135", "Brand 136", "Brand 137", "Brand 138",
"Brand 139", "Brand 14", "Brand 140", "Brand 141", "Brand 142",
"Brand 143", "Brand 144", "Brand 145", "Brand 146", "Brand 147",
"Brand 148", "Brand 149", "Brand 15", "Brand 150", "Brand 151",
"Brand 152", "Brand 153", "Brand 154", "Brand 155", "Brand 156",
"Brand 157", "Brand 158", "Brand 159", "Brand 16", "Brand 160",
"Brand 161", "Brand 162", "Brand 163", "Brand 164", "Brand 165",
"Brand 166", "Brand 167", "Brand 168", "Brand 169", "Brand 17",
"Brand 170", "Brand 171", "Brand 172", "Brand 173", "Brand 174",
"Brand 175", "Brand 176", "Brand 177", "Brand 178", "Brand 179",
"Brand 18", "Brand 180", "Brand 181", "Brand 182", "Brand 183",
"Brand 184", "Brand 185", "Brand 186", "Brand 187", "Brand 188",
"Brand 189", "Brand 19", "Brand 190", "Brand 191", "Brand 192",
"Brand 193", "Brand 194", "Brand 195", "Brand 196", "Brand 197",
"Brand 198", "Brand 199", "Brand 2", "Brand 20", "Brand 200", "Brand
201", "Brand 202", "Brand 203", "Brand 204", "Brand 205", "Brand
206", "Brand 207", "Brand 208", "Brand 209", "Brand 21", "Brand 210",
"Brand 211", "Brand 212", "Brand 213", "Brand 214", "Brand 215",
"Brand 216", "Brand 217", "Brand 218", "Brand 219", "Brand 22",
"Brand 220", "Brand 221", "Brand 222", "Brand 223", "Brand 224",
"Brand 225", "Brand 226", "Brand 227", "Brand 228", "Brand 229",
"Brand 23", "Brand 230", "Brand 231", "Brand 232", "Brand 233",
"Brand 234", "Brand 235", "Brand 236", "Brand 237", "Brand 238",
"Brand 239", "Brand 24", "Brand 240", "Brand 241", "Brand 242",
"Brand 243", "Brand 244", "Brand 245", "Brand 246", "Brand 247",
"Brand 248", "Brand 249", "Brand 25", "Brand 250", "Brand 251",
"Brand 252", "Brand 253", "Brand 254", "Brand 255", "Brand 256",
"Brand 257", "Brand 258", "Brand 259", "Brand 26", "Brand 260",
"Brand 261", "Brand 262", "Brand 263", "Brand 264", "Brand 265",
"Brand 266", "Brand 267", "Brand 268", "Brand 269", "Brand 27",
"Brand 270", "Brand 271", "Brand 272", "Brand 273", "Brand 274",
"Brand 275", "Brand 276", "Brand 277", "Brand 278", "Brand 279",
"Brand 28", "Brand 280", "Brand 281", "Brand 282", "Brand 283",
"Brand 284", "Brand 285", "Brand 286", "Brand 287", "Brand 288",
"Brand 289", "Brand 29", "Brand 290", "Brand 291", "Brand 292",
"Brand 293", "Brand 294", "Brand 295", "Brand 296", "Brand 297",
"Brand 298", "Brand 299", "Brand 3", "Brand 30", "Brand 300", "Brand
301", "Brand 302", "Brand 303", "Brand 304", "Brand 305", "Brand
306", "Brand 307", "Brand 31", "Brand 32", "Brand 33", "Brand 34",
"Brand 35", "Brand 36", "Brand 37", "Brand 38", "Brand 39", "Brand
4", "Brand 40", "Brand 41", "Brand 42", "Brand 43", "Brand 44",
"Brand 45", "Brand 46", "Brand 47", "Brand 48", "Brand 49", "Brand
5", "Brand 50", "Brand 51", "Brand 52", "Brand 53", "Brand 54",
"Brand 55", "Brand 56", "Brand 57", "Brand 58", "Brand 59", "Brand
6", "Brand 60", "Brand 61", "Brand 62", "Brand 63", "Brand 64",
"Brand 65", "Brand 66", "Brand 67", "Brand 68", "Brand 69", "Brand 7",
"Brand 70", "Brand 71", "Brand 72", "Brand 73", "Brand 74", "Brand
75", "Brand 76", "Brand 77", "Brand 78", "Brand 79", "Brand 8",
"Brand 80", "Brand 81", "Brand 82", "Brand 83", "Brand 84", "Brand
85", "Brand 86", "Brand 87", "Brand 88", "Brand 89", "Brand 9",
"Brand 90", "Brand 91", "Brand 92", "Brand 93", "Brand 94", "Brand
95", "Brand 96", "Brand 97", "Brand 98", "Brand 99"), class =
"factor"), Fiber = c(82L, 36L, 51L, 86L, 26L, 98L, 91L, 28L, 1L, 88L,
35L, 84L, 27L, 58L, 9L, 43L, 49L, 56L, 66L, 43L, 62L, 73L, 20L, 33L,
17L, 88L, 57L, 45L, 89L, 16L), Sugar = c(77L, 87L, 40L, 69L, 9L, 1L,
54L, 64L, 24L, 52L, 29L, 14L, 76L, 24L, 39L, 54L, 18L, 72L, 54L, 9L,
45L, 65L, 43L, 90L, 40L, 93L, 75L, 50L, 1L, 44L), Calories = c(94L,
14L, 36L, 34L, 40L, 91L, 58L, 82L, 91L, 19L, 60L, 79L, 44L, 60L, 80L,
27L, 17L, 5L, 10L, 89L, 63L, 43L, 29L, 99L, 92L, 19L, 9L, 38L, 43L,
9L), Feature.4 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
1L), Feature.5 = c(1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L,
0L), Feature.6 = c(7L, 11L, 45L, 45L, 35L, 28L, 56L, 52L, 1L, 49L,
28L, 68L, 99L, 70L, 62L, 73L, 97L, 2L, 41L, 14L, 68L, 84L, 76L, 2L,
53L, 38L, 3L, 52L, 12L, 70L), Feature.7 = c(54L, 22L, 11L, 67L, 22L,
67L, 69L, 67L, 89L, 24L, 32L, 25L, 90L, 62L, 82L, 100L, 53L, 50L,
75L, 79L, 53L, 4L, 31L, 96L, 55L, 35L, 69L, 74L, 88L, 9L)), row.names
= c(NA, 30L), class = "data.frame")
I want to pick 5 brands among them, say Brand 1,2,3,4&5 and then form clusters or groups of the brands similar in features to each of these 5 brands and keep all the rest of all the brands which are not similar as a separate cluster.
So, I will have 1 cluster Brand 1, 1 for Brand 2, and l Brand 3 and similarly for Brand 4 & 5. And then there will be 1 cluster of those brands which are not similar to any of these 5 brands. The feature may be a dummy or continuous.
I think this is should be easy, however, I couldn't find any package for this in "R".
Here is a simple example using the data you included which I am calling dta. First we compute z-scores for the values except for the dichotomies:
library(fields)
dta.zscores <- dta
dta.zscores[, c(3:5, 8:9)] <- scale(dta[, c(3:5, 8:9)])
Now dta.zscores contains the z-scores of the original data so that each variable will be weighted equally. Next we compute the distances from each row to rows 1 - 5 (Brands 1 through 5) using columns 3 through 9:
dta.dist <- rdist(dta.zscores[1:5, 3:9], dta.zscores[, 3:9])
dta.mindist <- apply(dta.dist, 2, min)
dta.brand <- apply(dta.dist, 2, which.min)
quantile(dta.mindist[-c(1:5)])
# 0% 25% 50% 75% 100%
# 1.131532 1.952891 2.383079 2.908602 3.475676
table(dta.brand)
# dta.brand
# 1 2 3 4 5
# 4 2 7 11 6
The matrix dta.dist is 30 columns (each of the observations) by 5 rows (distance to each of the brands). The vector dta.mindist is the minimum distance for each observation. That will be 0 for the first 5 observations. The vector dta.brand indicates which of the 5 brands is the nearest for that observation. The quartile function shows the range of distances to the nearest brand after excluding the first 5 brands which were used to define the groups. Finally the table shows how many observations are assigned to each brand.
You would still need to decide how far is too far to assign an observation to a brand and move these observations to another cluster, e.g. cluster 6. Based on the quantiles, 25% of the distances are greater than 2.9. You can specify other quantiles using the probs= argument in quantile(), e.g. .90 or .95 depending on how many observations you want to be in cluster 6.

How do I separate these apparently linked columns?

Running into something strange as I try to use dplyr's select command to reduce the number of columns I have. I name three columns but I keep getting 4. Aside from the star trek chain of command flashbacks I find this behaviour odd and not sure how to get around it. Also, why is this happening?
Here is my dataframe expressing the number of occurrences at in each block of time in a day. It's also pretty verbose for just 6 rows of data.
library(dplyr)
library(tidyr)
test <- structure(list(Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10",
"Dec 11", "Dec 11"), Number = c(10L, 10L, 10L, 10L, 11L, 11L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00"
), Start = structure(c(1544430600, 1544443200, 1544455800,
1544468400, 1544517000, 1544529600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), n = c(29L, 74L, 20L, 26L, 29L,
32L)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L), group_sizes = c(1L, 1L, 1L, 1L,
1L, 1L), biggest_group_size = 1L, labels = structure(list(Day = c("Dec 10",
"Dec 10", "Dec 10", "Dec 10", "Dec 11", "Dec 11"), Number = c(10L,
10L, 10L, 10L, 11L, 11L), time = c("08:30", "12:00", "15:30",
"19:00", "08:30", "12:00")), class = "data.frame", row.names = c(NA,
-6L), vars = c("Day", "Number", "time"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Day = c("Dec 10", "Dec 10", "Dec 10", "Dec 10", "Dec 11",
"Dec 11", "Dec 11", "Dec 11", "Dec 12", "Dec 12", "Dec 12",
"Dec 12", "Dec 13", "Dec 13", "Dec 13", "Dec 13", "Dec 14",
"Dec 14", "Dec 14", "Dec 14", "Dec 15", "Dec 15", "Dec 15",
"Dec 17", "Dec 17", "Dec 17", "Dec 17", "Dec 18", "Dec 18",
"Dec 18", "Dec 18", "Dec 19", "Dec 19", "Dec 19", "Dec 4",
"Dec 4", "Dec 4", "Dec 4", "Dec 5", "Dec 5", "Dec 5", "Dec 5",
"Dec 6", "Dec 6", "Dec 6", "Dec 6", "Dec 7", "Dec 7", "Dec 7",
"Dec 7", "Dec 8", "Dec 8", "Dec 8"), Number = c(10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 17L, 17L, 17L,
17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L),
time = c("08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30", "19:00", "08:30",
"12:00", "15:30", "19:00", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "08:30", "12:00", "15:30", "19:00",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"08:30", "12:00", "15:30", "19:00", "08:30", "12:00", "15:30",
"19:00", "08:30", "12:00", "15:30", "19:00", "08:30", "12:00",
"15:30", "19:00", "08:30", "12:00", "15:30")), class = "data.frame", row.names = c(NA,
-53L), vars = c("Day", "Number", "time"), drop = TRUE)))
You can see in the output that there's only 3 variables listed but oddly shows more. And when I select for specific variables or subtract others it won't work.
test %>%
select(Day, time, n)
The tibble should be ungrouped before selecting variables, as described here "Adding missing grouping variables" message in dplyr in R:
Without ungrouping:
test %>%
select(Day, time, n)
> test %>%
+ select(Day, time, n)
Adding missing grouping variables: `Number`
# A tibble: 6 x 4
# Groups: Day, Number, time [6]
Number Day time n
<int> <chr> <chr> <int>
1 10 Dec 10 08:30 29
2 10 Dec 10 12:00 74
3 10 Dec 10 15:30 20
4 10 Dec 10 19:00 26
5 11 Dec 11 08:30 29
6 11 Dec 11 12:00 32
With ungrouping
test %>%
ungroup() %>%
select(Day, time, n)
> test %>%
+ ungroup() %>%
+ select(Day, time, n)
# A tibble: 6 x 3
Day time n
<chr> <chr> <int>
1 Dec 10 08:30 29
2 Dec 10 12:00 74
3 Dec 10 15:30 20
4 Dec 10 19:00 26
5 Dec 11 08:30 29
6 Dec 11 12:00 32
Ungrouping the dataframe fixes it all.

ggplot assigns wrong colors to hexadecimal colors

I'm assigning hexadecimal colors which are in a df column to ggplot.
However in the plot it shows the names of the hexadecimal colors but not the right colors, colors don't match the hexadecimal colors.
Dataframe and plot code:
str(Trun)
'data.frame': 1043 obs. of 12 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ DE : num 36.5 37 40.2 36.8 38.8 ...
$ hex : chr NA NA NA NA ...
unique(Trun$hex)
[1] NA "#8A8F8C" "#507085" "#3F7767" "#917652" "#DBAD5D"
ggplot(data=Trun, aes(x=X, y=DE, colour=hex)) + geom_point() + ggtitle("DE with 35* sd values")
Below the data reworked with dput
structure(list(X = 1:50, Sample = structure(c(1L, 12L, 23L, 34L,
45L, 47L, 48L, 49L, 50L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 44L, 46L), .Label = c("Sample 1", "Sample 10",
"Sample 11", "Sample 12", "Sample 13", "Sample 14", "Sample 15",
"Sample 16", "Sample 17", "Sample 18", "Sample 19", "Sample 2",
"Sample 20", "Sample 21", "Sample 22", "Sample 23", "Sample 24",
"Sample 25", "Sample 26", "Sample 27", "Sample 28", "Sample 29",
"Sample 3", "Sample 30", "Sample 31", "Sample 32", "Sample 33",
"Sample 34", "Sample 35", "Sample 36", "Sample 37", "Sample 38",
"Sample 39", "Sample 4", "Sample 40", "Sample 41", "Sample 42",
"Sample 43", "Sample 44", "Sample 45", "Sample 46", "Sample 47",
"Sample 48", "Sample 49", "Sample 5", "Sample 50", "Sample 6",
"Sample 7", "Sample 8", "Sample 9"), class = "factor"), Time = structure(1:50, .Label = c("10:51:04 AM",
"10:51:05 AM", "10:51:06 AM", "10:51:07 AM", "10:51:08 AM", "10:51:09 AM",
"10:51:10 AM", "10:51:11 AM", "10:51:12 AM", "10:51:13 AM", "10:51:14 AM",
"10:51:15 AM", "10:51:16 AM", "10:51:17 AM", "10:51:18 AM", "10:51:19 AM",
"10:51:20 AM", "10:51:21 AM", "10:51:22 AM", "10:51:23 AM", "10:51:24 AM",
"10:51:25 AM", "10:51:26 AM", "10:51:27 AM", "10:51:28 AM", "10:51:29 AM",
"10:51:30 AM", "10:51:31 AM", "10:51:32 AM", "10:51:33 AM", "10:51:34 AM",
"10:51:35 AM", "10:51:36 AM", "10:51:37 AM", "10:51:38 AM", "10:51:39 AM",
"10:51:40 AM", "10:51:41 AM", "10:51:42 AM", "10:51:43 AM", "10:51:44 AM",
"10:51:45 AM", "10:51:46 AM", "10:51:47 AM", "10:51:48 AM", "10:51:49 AM",
"10:51:50 AM", "10:51:51 AM", "10:51:52 AM", "10:51:53 AM"), class = "factor"),
L = c(57.61, 57.16, 53.96, 57.3, 55.27, 57.9, 59.05, 55.13,
53.8, 57.59, 52.23, 57.93, 58.59, 56.27, 58.62, 61.25, 56.76,
56.64, 58.49, 53.99, 53.17, 56.77, 57.35, 53.43, 55.19, 54.5,
53.17, 53.88, 55.15, 61.81, 57.03, 55.97, 54.83, 59.53, 54.29,
56.84, 53.53, 55.38, 57.84, 58.32, 54.67, 52.72, 53.94, 55.17,
58.15, 53.55, 58.75, 56.07, 58.46, 60.33), C = c(4.56, 4.17,
5.14, 3.9, 3.63, 3.47, 4.3, 4.95, 5.76, 3.49, 4.7, 4.64,
5.64, 3.76, 2.25, 4.66, 5.96, 4.13, 5.32, 4.45, 4.11, 3.88,
5.47, 4.17, 5.92, 2.71, 5.2, 4.24, 5.78, 5.37, 4.71, 4.39,
3.83, 5.01, 4.62, 5.08, 4.74, 3.62, 3.59, 4.09, 3.32, 4.06,
4.09, 5.16, 3.1, 5.59, 3.06, 3.67, 4.56, 6.75), h = c(219.98,
226.13, 233.39, 221.78, 213.56, 214.16, 230.93, 229.57, 236.17,
230.59, 235.2, 237.58, 240.43, 228.53, 206.55, 234.13, 241.12,
231.82, 227.03, 231.32, 218.68, 230.31, 228.59, 229.36, 235.56,
197.32, 232.49, 228.39, 244.63, 235.78, 231.22, 228.92, 231.17,
244.2, 228.49, 234.01, 227.85, 226.29, 210.04, 232.38, 222.71,
220.69, 226.08, 233.15, 215.76, 236.26, 206.3, 219.79, 232.37,
246.63), L1 = c(57.61, 57.16, 53.96, 57.3, 55.27, 57.9, 59.05,
55.13, 53.8, 57.59, 52.23, 57.93, 58.59, 56.27, 58.62, 61.25,
56.76, 56.64, 58.49, 53.99, 53.17, 56.77, 57.35, 53.43, 55.19,
54.5, 53.17, 53.88, 55.15, 61.81, 57.03, 55.97, 54.83, 59.53,
54.29, 56.84, 53.53, 55.38, 57.84, 58.32, 54.67, 52.72, 53.94,
55.17, 58.15, 53.55, 58.75, 56.07, 58.46, 60.33), a = c(-3.49,
-2.89, -3.06, -2.91, -3.03, -2.87, -2.71, -3.21, -3.21, -2.22,
-2.68, -2.49, -2.79, -2.49, -2.01, -2.73, -2.88, -2.55, -3.63,
-2.78, -3.21, -2.48, -3.62, -2.72, -3.35, -2.58, -3.16, -2.81,
-2.48, -3.02, -2.95, -2.88, -2.4, -2.18, -3.06, -2.99, -3.18,
-2.5, -3.11, -2.5, -2.44, -3.08, -2.84, -3.1, -2.51, -3.1,
-2.74, -2.82, -2.79, -2.68), b = c(-2.93, -3.01, -4.13, -2.6,
-2.01, -1.95, -3.34, -3.77, -4.79, -2.7, -3.86, -3.92, -4.91,
-2.82, -1, -3.78, -5.22, -3.24, -3.89, -3.47, -2.57, -2.99,
-4.11, -3.17, -4.88, -0.81, -4.12, -3.17, -5.22, -4.44, -3.67,
-3.31, -2.99, -4.51, -3.46, -4.11, -3.52, -2.61, -1.8, -3.24,
-2.25, -2.64, -2.94, -4.13, -1.81, -4.65, -1.35, -2.35, -3.61,
-6.2), DE = c(36.52, 36.95, 40.24, 36.78, 38.77, 36.13, 35.08,
39.04, 40.47, 36.47, 41.92, 36.24, 35.72, 37.8, 35.35, 32.94,
37.57, 37.46, 35.74, 40.14, 40.9, 37.31, 36.9, 40.67, 39.11,
39.48, 41.02, 40.22, 39.15, 32.48, 37.13, 38.15, 39.24, 34.72,
39.84, 37.37, 40.61, 38.67, 36.2, 35.79, 39.35, 41.35, 40.14,
39.03, 35.86, 40.7, 35.25, 37.98, 35.69, 34.2), heihgtmm = c(53.1,
67.01, 80.16, 85.3, 86.37, 92.36, 91.12, 90.56, 91.02, 94.25,
96.79, 94.36, 96.26, 95.4, 93, 91.58, 91.92, 89, 81.52, 68.57,
53.54, 49.43, 54.31, 72.51, 81.95, 82.85, 86.77, 85.47, 90.1,
87.95, 90.1, 86.97, 89.42, 89.65, 87.56, 83.48, 76.43, 63.15,
54.92, 64.79, 67.47, 64.77, 61.6, 63.58, 69.27, 79.75, 83.41,
85.56, 88.47, 90.57), hex = structure(c(1L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 2L
), .Label = c("", "#507085", "#8A8F8C"), class = "factor")), .Names = c("X",
"Sample", "Time", "L", "C", "h", "L1", "a", "b", "DE", "heihgtmm",
"hex"), class = "data.frame", row.names = c(NA, -50L))
I created a new variable containing hex-values as character and added that variable into the aes.
#create a new variable, substituting red for missings and turning factor into string (you can add your own colour)
Trun$hex2 <- ifelse(Trun$hex=="","#CC0000",as.character(Trun$hex))
#plot
ggplot(data=Trun, aes(x=X, y=DE, colour=hex2)) + geom_point(size=3) + #larger vor visibility
ggtitle("DE with 35* sd values") +
scale_colour_identity()

Resources