stat_compare_means with multiple groups - r

I need some help with stat_compare_means and multiple groups.
Here is what my data look like.
> head(df_annot)
Row.names Diversity_sh Diversity_si Evenness Chao1 Location Bean Fungi Insect
1 R-B1 1.314181 0.6040213 0.3053349 91.00000 Root Bean M- NI
2 R-B2 1.323718 0.6117602 0.3075507 77.43750 Root Bean M- NI
3 R-B3 1.249950 0.5737293 0.2877545 81.50000 Root Bean M- NI
4 R-BF-1 1.177111 0.5414276 0.2693958 92.33333 Root Bean M+ NI
5 R-BF-2 1.191254 0.5252688 0.2742420 79.54545 Root Bean M+ NI
6 R-BF-3 1.397233 0.6285945 0.3179540 85.50000 Root Bean M+ NI
Here is a graph and I would like ALL comparisons labelled.
Here is some code. I know that I don't have my_comparisons correct, but I don't know where to start for the two groups. I want to compare M+/Insect to M-/Insect and M+/Insect to M+/NI etc.., all two-way comparisons. Any suggestions would be great. thanks
my_comparisons<- list( c("M+", "M-"), c("Insect", "NI"))
ggplot(df_annot,aes_string(x="Insect",y=index,fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste(index))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
dput(df_annot)
structure(list(Row.names = structure(c("R-B1", "R-B2", "R-B3",
"R-BF-1", "R-BF-2", "R-BF-3", "R-BFi-1", "R-BFi-2", "R-Bi-1",
"R-Bi-2", "R-Bi-3"), class = "AsIs"), Diversity_sh = c(1.31418133185869,
1.32371839350534, 1.24994951615418, 1.17711111336449, 1.19125374868316,
1.39723272927515, 1.34145146126423, 1.21674449259962, 1.20721660188555,
1.17245529262564, 1.20912937911657), Diversity_si = c(0.604021268328531,
0.611760247980402, 0.573729285531772, 0.541427625516077, 0.525268755766239,
0.628594506768001, 0.597250229879166, 0.554646956896473, 0.548992316400345,
0.531291238688503, 0.583806537719818), Evenness = c(0.305334910927276,
0.307550737463383, 0.287754490536268, 0.269395848882803, 0.274241968272787,
0.317954009728278, 0.305260435164649, 0.276882141486585, 0.273949061455415,
0.269914321375221, 0.275929262855007), Chao1 = c(91, 77.4375,
81.5, 92.3333333333333, 79.5454545454545, 85.5, 87.5, 90.5454545454545,
89.3333333333333, 88.6666666666667, 88.0769230769231), Location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Root", "Rhizospheric Soil"
), class = "factor"), Bean = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Bean", class = "factor"),
Fungi = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L), .Label = c("M+", "M-"), class = "factor"), Insect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor")), row.names = c(NA, -11L), class = "data.frame")

facet_wrap() might help you as discussed here
ggplot(df_annot, aes(x=df_annot$Insect, y= df_annot$Evenness)) +
facet_wrap(~df_annot$Fungi)+
geom_boxplot(alpha=0.8) +
geom_point()+
stat_compare_means(comparisons = list(c("Insect", "NI") ), label="p.format",method="wilcox.test")
EDIT
ok here is a - not too elegant - solution without faceting.
Create a new variable containing Insect info and Fungi status:
df_annot$var <- paste(df_annot$Insect,df_annot$Fungi, sep = "_" )
Then build the contrasts
my_comparisons <- rev(list(c("Insect_M-","Insect_M+"),c("NI_M-","Insect_M-"),c("NI_M+","Insect_M-"),
c("Insect_M+", "NI_M-"), c("Insect_M+", "NI_M+"), c("NI_M-","NI_M+")))
and plot your graph
ggplot(df_annot,aes_string(x="var",y="Evenness",fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste("Evenness"))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
you might want to make better names and such. but this could be what you are looking for.

Related

My function aspect ratio not having any effect on my ggplot

My theme(aspect.ratio) is not having any effect on my graph and I am wondering why. When I take it off it stretches the graph too much. I want to have control so I tune my graph. Currently I am getting a wide width but unable to manipulate the height of the graph
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L
), .Label = c("CEN", "IPS", "CTL"), class = "factor"), test = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Pre-test", "Post-test"
), class = "factor"), Session = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Adaptation", "Post-adaptation"
), class = "factor"), N = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12), Abs_IDE = c(23.7992344166667, 13.0386095591667, 15.7028633063333,
19.84749675725, 18.1572712615, 11.68093831675, 16.1626619356667,
22.9018392689167, 25.980293756, 13.45901734025, 24.4302041906667,
12.0791439846667, 22.4191223275, 8.96559583341667, 16.7862310038333,
18.9797044311667, 20.1929425604167, 11.7699855065833, 13.56023988325,
19.73171153625, 33.5879796116667, 14.6798949093333, 26.1433479941667,
8.74572327816667), sd = c(8.04062173855832, 9.0782958352921,
6.90557155294788, 10.7770226212508, 9.90142743265505, 6.13623590788893,
9.72344405555326, 11.4928324154261, 14.7569847333998, 8.68503337334045,
14.1914679614007, 8.09149043273921, 8.16863922288962, 7.04528824438605,
5.65528757276849, 10.2612900552688, 8.59034167901021, 9.73347852641845,
6.85089019152845, 10.7480537432634, 14.6034318371352, 7.50792731288404,
10.5564213216697, 4.75186261300342), se = c(2.3211275626043,
2.62067827214448, 1.99346679750134, 3.11105845572093, 2.85829589680247,
1.77137872661536, 2.80691652146197, 3.31769494439875, 4.25997455412779,
2.5071531780095, 4.09672392385531, 2.33581208974363, 2.35808302712413,
2.0337995322074, 1.63254090124132, 2.96217928782113, 2.4798180404037,
2.80981322368957, 1.97768164813376, 3.10269586096884, 4.21564765113117,
2.16735192757487, 3.04737634587255, 1.37174457938482), ci = c(5.10876731997174,
5.76807398636459, 4.38759083843585, 6.84739349321937, 6.29106685201305,
3.89877829018243, 6.17798160935384, 7.3021973383327, 9.37614077600327,
5.51820693887549, 9.01682856139793, 5.14108774628837, 5.19010574896418,
4.4763625889898, 3.59319829687477, 6.51971265402513, 5.45804270665608,
6.18435720796999, 4.35284795892798, 6.82898754627016, 9.27857792031489,
4.7703094292883, 6.70723011447976, 3.01918946266214)), row.names = c(NA,
-24L), class = "data.frame")
library(ggh4x)
p <- ggplot(tgc, aes(x = Condition, y = Abs_IDE), fill = test) +
geom_errorbar(aes(ymin=Abs_IDE-se, ymax=Abs_IDE+se, group = test), position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test), stat = "identity", width = 0.5, color = "black", position='dodge') + ylim(0,38.5) + theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),#, angle = 10, hjust = .5, vjust = .5),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 20),
axis.title.x = element_text(vjust= -0.5, size = 20),
axis.title = element_text(face = "bold")) + xlab("space") + ylab("Plot title") + theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) +
facet_nested(. ~ Session + Group )
p + guides(fill=guide_legend(title="Test:")) + theme(legend.text=element_text(size=16),legend.title=element_text(size=16) ) +
theme(strip.text = element_text(face="bold", size=12)) + theme(aspect.ratio = 1)
I think I fixed the bug, at least the following should work now with the current github branch:
library(ggplot2)
library(ggh4x) # devtools::install_github("teunbrand/ggh4x")
# tgc <- structure(...) # omitted for brevity
p <- ggplot(tgc, aes(x = Condition, y = Abs_IDE), fill = test) +
geom_errorbar(aes(ymin=Abs_IDE-se, ymax=Abs_IDE+se, group = test), position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test), stat = "identity", width = 0.5, color = "black", position='dodge') + ylim(0,38.5) + theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),#, angle = 10, hjust = .5, vjust = .5),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 20),
axis.title.x = element_text(vjust= -0.5, size = 20),
axis.title = element_text(face = "bold")) + xlab("space") + ylab("Plot title") + theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) +
facet_nested(. ~ Session + Group )
p + guides(fill=guide_legend(title="Test:")) + theme(legend.text=element_text(size=16),legend.title=element_text(size=16) ) +
theme(strip.text = element_text(face="bold", size=12)) + theme(aspect.ratio = 2)
Created on 2021-10-28 by the reprex package (v2.0.1)

ggplot with error bars differentiated by shape of points

Getting an error message that reads ' Error: All unnamed arguments must be length 1'. I am trying to differentiate my line by the shape of the point and I keep getting the errors above. I want to use all solid lines but differentiated by shape. The journal I want to public my work requires black and white instead of color
The plot I want to modify:
tgc <- structure(
list(
GROUP = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L),
.Label = c("LLL", "LRL", "RLR", "RRR"),
class = "factor"
),
condition = structure(
c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L),
.Label = c("Midline", "No Midline crossing", "Midline crossing"),
class = "factor"
),
names = structure(
c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L),
.Label = c("102", "104"),
class = "factor"
),
Trial_type = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("retention", "transfer"),
class = "factor"
),
Training = structure(
c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("right", "left"),
class = "factor"
),
N = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8),
NormalizedJerk = c(2060.1571177375, 1092.701687475,
2981.812368875, 1508.28547575, 2089.925186675, 1269.6704558875,
1391.97364445, 914.38364425, 3900.4119165625, 2335.7186722875,
4015.516784, 2563.16723475, 1411.2016892375, 847.952527975,
1853.413925525, 1394.847246875, 6830.81906275, 3915.019566375,
2310.5893110125, 1023.1702538875, 1340.4653657625, 765.2752940875,
5617.967587, 1705.629421875
),
sd = c(1507.9737264907, 946.522319997832,
1403.37144167604, 813.034284948276, 1383.95826055979, 915.414811222361,
580.358119711544, 361.141583497209, 3283.59638643298, 1471.60790379469,
1178.5979495842, 806.56121914288, 701.001824354642, 324.415375522303,
2103.42765701483, 1208.14874080532, 4671.80701687463, 2861.85154237875,
2130.92970547315, 813.649686635084, 489.544827982279, 256.250905151245,
5305.91963495324, 878.475841087918
),
se = c(533.14922392636,
334.646175507445, 496.166731466336, 287.451028112041, 489.303135460484,
323.648010306967, 205.187580982353, 127.682831329662, 1160.9266357632,
520.291964010475, 416.697301221774, 285.16245374901, 247.841571812654,
114.6981559765, 743.673980005258, 427.145083652716, 1651.73321101347,
1011.8173161826, 753.397422485958, 287.668605464989, 173.080233780536,
90.5983763588182, 1875.92587715314, 310.588112170911
),
ci = c(1260.69758461414,
791.31246230768, 1173.24788605882, 679.713672219011, 1157.01806049025,
765.305934151606, 485.191530116531, 301.92191947415, 2745.15527724569,
1230.29499600801, 985.332544042271, 674.302053778614, 586.052191261077,
271.218041235005, 1758.50952839896, 1010.03762375384, 3905.72840792368,
2392.56776402689, 1781.50181629779, 680.228160904389, 409.269718268804,
214.231117892992, 4435.85982330679, 734.424182295757
)
),
row.names = c(NA, -24L),
class = "data.frame"
)
tgc <- summarySE(
data10,
measurevar = "NormalizedJerk",
groupvars = c("GROUP", "condition", "names","Trial_type", "Training")
)
pd <- position_dodge2(0.2)
p <-ggplot(
gc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(
aes(ymin = NormalizedJerk - se, ymax = NormalizedJerk + se),
width = .3,
position = pd
) +
geom_line(
position = pd,
size = 1
) +
geom_point(
aes(shape = Training),
position = pd,
size = 1
)+
scale_colour_manual(
name = "Experimental group",
values = c("#999999","#999999","#000000","#000000")
labels = c("RRR","LLL","LRL","RLR")
) +
scale_shape_manual(
name = "Experimental group",
values = c("19","18","19","18")
) +
scale_linetype_manual(
name = "Experimental group",
values = c("solid","solid","solid","solid"),
labels = c("RRR","LLL","LRL","RLR")
) +
theme_bw() +
facet_wrap(.~condition) +
theme(
axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold")
) +
xlab("Block of trials") +
ylab("Normalized Jerk")
p +
scale_x_discrete(
breaks=c("102","104"),
labels=c("Pretest","Posttest")
)
I modified your code a little. Thanks to Nic3500 for formatting the code.
You needed to add the same labels to the shape manual override as the others and turn the error bar legend off. I also increased the size of the legend because the default seems too small when showing different line types.
ggplot(tgc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(aes(ymin=NormalizedJerk-se, ymax=NormalizedJerk+se),
show.legend=FALSE, # <- here
width=.3, position=pd) +
geom_line(position=pd, size = 1) +
geom_point(position=pd, size= 2) +
scale_colour_manual(name = "Experimental group",
values=c("#999999","#999999","#000000","#000000"),
labels=c("RRR","LLL","LRL","RLR")) +
scale_shape_manual(name = "Experimental group", values=c(19,18,19,18),
labels=c("RRR","LLL","LRL","RLR")) + # <- here
scale_linetype_manual(name = "Experimental group",
values=c("solid","dashed","solid","dashed"),
labels=c("RRR","LLL","LRL","RLR")) +
theme_bw()+
facet_wrap(.~condition) +
theme(axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold"),
legend.key.width = grid::unit(1.25, "cm")) + # <- here
xlab("Block of trials") + ylab("Normalized Jerk")

error with stat_compare_means and multiple groups

I would like to label my boxplots with pvalues.
Here is my code:
ggplot(df_annot,aes(x=Insect,y=index,fill=Fungi))+geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
facet_wrap(~Location,scales="free" )+
stat_compare_means(aes(group="Insect"))+
guides(fill=guide_legend("M. robertii")) +
scale_x_discrete(labels= c("I+","I-","soil alone"))+
ylab(index_name)+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
Here is the error message that I'm getting:
Warning messages:
1: Unknown or uninitialised column: 'p'.
2: Computation failed in stat_compare_means(): argument "x" is missing, with no default
3: Unknown or uninitialised column: 'p'.
4: Computation failed in stat_compare_means(): argument "x" is missing, with no default
I've tried moving around the aes() from the main ggplot call to the boxplot call. I've tried different inherit.aes in the stat_compare_means().
I've tried first subsetting the root section and making them separately , but the same error.
Any help is appreciated.
thanks
here is my data:
> dput(df_annot)
structure(list(Location = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Root", "Rhizospheric Soil"
), class = "factor"), Bean = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bean", "No bean"), class = "factor"),
Fungi = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("M+", "M-"), class = "factor"), Insect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor"), index = c(2.90952191983974, 3.19997588762484,
2.96753469534499, 2.93030877512644, 2.72220793003196, 3.09008037591454,
2.63687890737919, 2.73583925812843, 3.06766793411045, 3.26431040286099,
3.03361194852963, 2.9181623054061)), row.names = c("S-B1",
"S-B2", "S-B3", "S-BF-1", "S-BF-2", "S-BF-3", "S-BFi-1", "S-BFi-2",
"S-BFi-3", "S-Bi-1", "S-Bi-2", "S-Bi-3"), class = "data.frame")
A possible and easy fix to your error maybe to use the exact variable name (i.e. remove the double quotes from the variable name) rather that the quoted variable name (i.e. character) in the stat_compare_means (), so the function should look like this:
stat_compare_means(aes(group=Insect))
A working example using ggboxplot() is as follows:
library(ggpubr)
boxplot <- ggboxplot(ToothGrowth, x = "dose", y = "len", add = "jitter",
color = "supp", group="supp", palette = "jco", legend.title="Supplier")
boxplot <- boxplot + stat_compare_means(aes(group=supp), label = "p.signif", method="wilcox.test", hide.ns=T, paired=F)
print(bxp.legend)
There is a warning message for the above example, but I do not know how improve the code to remove the warning message:
`cols` is now required.
Please use `cols = c(p)`

creating a factor-based in dendrogram with R and ggplot2

This is not so much a coding as general approach call for help ;-) I prepared a table containing taxonomic information about organisms. But I want to use the "names" of these organisms, so no values or anything where you could compute a distance or clustering with (this is also all the information I have). I just want to use these factors to create a plot that shows the relationship. My data looks like this:
test2<-structure(list(genus = structure(c(4L, 2L, 7L, 8L, 6L, 1L, 3L,
5L, 5L), .Label = c("Aminobacter", "Bradyrhizobium", "Hoeflea",
"Hyphomonas", "Mesorhizobium", "Methylosinus", "Ochrobactrum",
"uncultured"), class = "factor"), family = structure(c(4L, 1L,
2L, 3L, 5L, 6L, 6L, 6L, 6L), .Label = c("Bradyrhizobiaceae",
"Brucellaceae", "Hyphomicrobiaceae", "Hyphomonadaceae", "Methylocystaceae",
"Phyllobacteriaceae"), class = "factor"), order = structure(c(1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Caulobacterales",
"Rhizobiales"), class = "factor"), class = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Alphaproteobacteria", class = "factor"),
phylum = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Proteobacteria", class = "factor")), .Names = c("genus",
"family", "order", "class", "phylum"), class = "data.frame", row.names = c(NA,
9L))
is it necessary to set up artificial values to describe a distance between the levels?
Here is an attempt using data.tree library
First create a string variable in the form:
Proteobacteria/Alphaproteobacteria/Caulobacterales/Hyphomonadaceae/Hyphomonas
library(data.tree)
test2$pathString <- with(test2,
paste(phylum,
class,
order,
family,
genus, sep = "/"))
tree_test2 = as.Node(test2)
plot(tree_test2)
many things can be done after like:
Interactive network:
library(networkD3)
test2_Network <- ToDataFrameNetwork(tree_test2, "name")
simpleNetwork(test2_Network)
or graph styled
library(igraph)
plot(as.igraph(tree_test2, directed = TRUE, direction = "climb"))
check out the vignette
using ggplot2:
library(ggraph)
graph = as.igraph(tree_test2, directed = TRUE, direction = "climb")
ggraph(graph, layout = 'kk') +
geom_node_text(aes(label = name))+
geom_edge_link(arrow = arrow(type = "closed", ends = "first",
length = unit(0.20, "inches"),
angle = 15)) +
geom_node_point() +
theme_graph()+
coord_cartesian(xlim = c(-3,3), expand = TRUE)
or perhaps:
ggraph(graph, layout = 'kk') +
geom_node_text(aes(label = name), repel = T)+
geom_edge_link(angle_calc = 'along',
end_cap = circle(3, 'mm'))+
geom_node_point(size = 5) +
theme_graph()+
coord_cartesian(xlim = c(-3,3), expand = TRUE)

Error bars on stacked bar ggplot2

I'm struggling to put error bars into the correct place on a stacked bar. As I read on an earlier post I used ddply in order to stack the error bars. Then that changed the order of the stacking so I ordered the factor. Now it appears the error bars are correct on one set of bars but not the other. What I want is a graph that looks like that below, just with the standard error shown with error bars. I'm listing the dput of the original data and the ddply data as well as the data set.
Suz2$org <- factor(Suz2$org, levels = c('fungi','bacteria'),ordered = TRUE)
library(plyr)
plydat <- ddply(Suz2,.(org, group, time),transform,ybegin = copy - se,yend = copy + se)
colvec <-c("blue", "orange")
ggplot(plydat, aes(time, copy)) +
geom_bar(aes(fill = factor(org)), stat="identity", width = 0.7) +
scale_fill_manual(values = colvec) +
facet_wrap(~group,nrow = 1)+
geom_errorbar(aes(ymax=ybegin , ymin= yend ),width=.5) +
theme(panel.background = element_rect(fill='white', colour='white'),
panel.grid = element_line(color = NA),
panel.grid.minor = element_line(color = NA),
panel.border = element_rect(fill = NA, color = "black"),
axis.text.x = element_text(size=10, colour="black", face = "bold"),
axis.title.x = element_text(vjust=0.1, face = "bold"),
axis.text.y = element_text(size=12, colour="black"),
axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))
dput(plydat)
structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W",
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000,
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000,
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0",
"Native D707", "Notill D0", "Notill D707"), class = "factor"),
se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000,
1930000, 2980000, 43900000, 20100000, 56400000, 41200000,
75700000, 22500000, 57500000, 28100000), ybegin = c(86700000,
12690000, 33090000, 7490000, 38100000, 12800000, 14070000,
5660000, 254100000, 57800000, 176600000, 178800000, 261300000,
65900000, 266500000, 160900000), yend = c(108900000, 18310000,
47310000, 13310000, 72100000, 15800000, 17930000, 11620000,
341900000, 9.8e+07, 289400000, 261200000, 412700000, 110900000,
381500000, 217100000)), .Names = c("org", "time", "copy",
"group", "se", "ybegin", "yend"), row.names = c(NA, -16L), class = "data.frame")
dput(Suz2)
structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W",
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000,
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000,
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0",
"Native D707", "Notill D0", "Notill D707"), class = "factor"),
se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000,
1930000, 2980000, 43900000, 20100000, 56400000, 41200000,
75700000, 22500000, 57500000, 28100000)), .Names = c("org",
"time", "copy", "group", "se"), row.names = c(NA, -16L), class = "data.frame")
Suz2
org time copy group se
1 fungi 0W 9.78e+07 Notill D0 11100000
2 fungi 0W 1.55e+07 Notill D707 2810000
3 fungi 0W 4.02e+07 Native D0 7110000
4 fungi 0W 1.04e+07 Native D707 2910000
5 fungi 6W 5.51e+07 Notill D0 17000000
6 fungi 6W 1.43e+07 Notill D707 1500000
7 fungi 6W 1.60e+07 Native D0 1930000
8 fungi 6W 8.64e+06 Native D707 2980000
9 bacteria 0W 2.98e+08 Notill D0 43900000
10 bacteria 0W 7.79e+07 Notill D707 20100000
11 bacteria 0W 2.33e+08 Native D0 56400000
12 bacteria 0W 2.20e+08 Native D707 41200000
13 bacteria 6W 3.37e+08 Notill D0 75700000
14 bacteria 6W 8.84e+07 Notill D707 22500000
15 bacteria 6W 3.24e+08 Native D0 57500000
16 bacteria 6W 1.89e+08 Native D707 28100000
The values for both ybegin and yend, the range of the errorbar, are too low for the bacteria data. Since the bars for bacteria are on top of the fungi bars, the height of the fungi bars (plydat$copy[plydat$org == "fungi"]) has to be added to the errorbar values of the bacteria data.
plydat[plydat$org == "bacteria", ]
<- transform(plydat[plydat$org == "bacteria", ],
ybegin = ybegin + plydat[plydat$org == "fungi", "copy"],
yend = yend + plydat[plydat$org == "fungi", "copy"])
Personally, I'm not really fond of a stacked bar chart, especially when the number of stacked bars is large (which is not the case for you). The main problem is that fact that all but the lowest stack do not share the same baseline. In your case, it is hard to compare the orange bacteria class as they do not share the same base (y value, copy).
I propose to use a plot called a dotplot:
library(ggplot2)
theme_set(theme_bw())
ggplot(plydat, aes(time, copy, color = org)) +
geom_point() + facet_wrap(~group, ncol = 1) +
geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0) + coord_flip()
Note that the copy value is not additive here as it was in the stacked barchart. Because they share the same base copy value (0), you can easily compare between different values of bacteria. In addition, I swap the x and y axis to make it easy to compare the value of copy (just remove the coord_flip to see how bad that works in comparing copy).
The only real downside is that there is no easy way of judging the sum of fungi and bacteria. Depending on what the chart is meant to show (the story of the chart) this may or may not be a problem. You could add a separate additional category to org, i.e. both which is the sum of both categories, to remedy this. Of course, interpreting the error in this summed category is non-trivial.
From a combination of the above answers I think I'm going to go with something like this.
plydat <- ddply(Suz2,.(org),transform,ybegin = copy - se,yend = copy + se)
colvec <-c("blue", "orange")
ggplot(plydat, aes(time, copy, color = factor(org))) +
geom_point(size = 3.5) + facet_wrap(~group, ncol = 4) +
scale_color_manual(values = colvec) +
geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0.08,
color = "black", size = 0.1) +
theme(panel.background = element_rect(fill='white', colour='white'),
panel.grid = element_line(color = NA),
panel.grid.minor = element_line(color = NA),
panel.border = element_rect(fill = NA, color = "black"),
strip.background = element_blank(),
axis.text.x = element_text(size=10, colour="black", face = "bold"),
axis.title.x = element_text(vjust=0.1, face = "bold"),
axis.text.y = element_text(size=12, colour="black"),
axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))

Resources