Error bars on stacked bar ggplot2 - r

I'm struggling to put error bars into the correct place on a stacked bar. As I read on an earlier post I used ddply in order to stack the error bars. Then that changed the order of the stacking so I ordered the factor. Now it appears the error bars are correct on one set of bars but not the other. What I want is a graph that looks like that below, just with the standard error shown with error bars. I'm listing the dput of the original data and the ddply data as well as the data set.
Suz2$org <- factor(Suz2$org, levels = c('fungi','bacteria'),ordered = TRUE)
library(plyr)
plydat <- ddply(Suz2,.(org, group, time),transform,ybegin = copy - se,yend = copy + se)
colvec <-c("blue", "orange")
ggplot(plydat, aes(time, copy)) +
geom_bar(aes(fill = factor(org)), stat="identity", width = 0.7) +
scale_fill_manual(values = colvec) +
facet_wrap(~group,nrow = 1)+
geom_errorbar(aes(ymax=ybegin , ymin= yend ),width=.5) +
theme(panel.background = element_rect(fill='white', colour='white'),
panel.grid = element_line(color = NA),
panel.grid.minor = element_line(color = NA),
panel.border = element_rect(fill = NA, color = "black"),
axis.text.x = element_text(size=10, colour="black", face = "bold"),
axis.title.x = element_text(vjust=0.1, face = "bold"),
axis.text.y = element_text(size=12, colour="black"),
axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))
dput(plydat)
structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W",
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000,
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000,
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0",
"Native D707", "Notill D0", "Notill D707"), class = "factor"),
se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000,
1930000, 2980000, 43900000, 20100000, 56400000, 41200000,
75700000, 22500000, 57500000, 28100000), ybegin = c(86700000,
12690000, 33090000, 7490000, 38100000, 12800000, 14070000,
5660000, 254100000, 57800000, 176600000, 178800000, 261300000,
65900000, 266500000, 160900000), yend = c(108900000, 18310000,
47310000, 13310000, 72100000, 15800000, 17930000, 11620000,
341900000, 9.8e+07, 289400000, 261200000, 412700000, 110900000,
381500000, 217100000)), .Names = c("org", "time", "copy",
"group", "se", "ybegin", "yend"), row.names = c(NA, -16L), class = "data.frame")
dput(Suz2)
structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W",
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000,
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000,
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0",
"Native D707", "Notill D0", "Notill D707"), class = "factor"),
se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000,
1930000, 2980000, 43900000, 20100000, 56400000, 41200000,
75700000, 22500000, 57500000, 28100000)), .Names = c("org",
"time", "copy", "group", "se"), row.names = c(NA, -16L), class = "data.frame")
Suz2
org time copy group se
1 fungi 0W 9.78e+07 Notill D0 11100000
2 fungi 0W 1.55e+07 Notill D707 2810000
3 fungi 0W 4.02e+07 Native D0 7110000
4 fungi 0W 1.04e+07 Native D707 2910000
5 fungi 6W 5.51e+07 Notill D0 17000000
6 fungi 6W 1.43e+07 Notill D707 1500000
7 fungi 6W 1.60e+07 Native D0 1930000
8 fungi 6W 8.64e+06 Native D707 2980000
9 bacteria 0W 2.98e+08 Notill D0 43900000
10 bacteria 0W 7.79e+07 Notill D707 20100000
11 bacteria 0W 2.33e+08 Native D0 56400000
12 bacteria 0W 2.20e+08 Native D707 41200000
13 bacteria 6W 3.37e+08 Notill D0 75700000
14 bacteria 6W 8.84e+07 Notill D707 22500000
15 bacteria 6W 3.24e+08 Native D0 57500000
16 bacteria 6W 1.89e+08 Native D707 28100000

The values for both ybegin and yend, the range of the errorbar, are too low for the bacteria data. Since the bars for bacteria are on top of the fungi bars, the height of the fungi bars (plydat$copy[plydat$org == "fungi"]) has to be added to the errorbar values of the bacteria data.
plydat[plydat$org == "bacteria", ]
<- transform(plydat[plydat$org == "bacteria", ],
ybegin = ybegin + plydat[plydat$org == "fungi", "copy"],
yend = yend + plydat[plydat$org == "fungi", "copy"])

Personally, I'm not really fond of a stacked bar chart, especially when the number of stacked bars is large (which is not the case for you). The main problem is that fact that all but the lowest stack do not share the same baseline. In your case, it is hard to compare the orange bacteria class as they do not share the same base (y value, copy).
I propose to use a plot called a dotplot:
library(ggplot2)
theme_set(theme_bw())
ggplot(plydat, aes(time, copy, color = org)) +
geom_point() + facet_wrap(~group, ncol = 1) +
geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0) + coord_flip()
Note that the copy value is not additive here as it was in the stacked barchart. Because they share the same base copy value (0), you can easily compare between different values of bacteria. In addition, I swap the x and y axis to make it easy to compare the value of copy (just remove the coord_flip to see how bad that works in comparing copy).
The only real downside is that there is no easy way of judging the sum of fungi and bacteria. Depending on what the chart is meant to show (the story of the chart) this may or may not be a problem. You could add a separate additional category to org, i.e. both which is the sum of both categories, to remedy this. Of course, interpreting the error in this summed category is non-trivial.

From a combination of the above answers I think I'm going to go with something like this.
plydat <- ddply(Suz2,.(org),transform,ybegin = copy - se,yend = copy + se)
colvec <-c("blue", "orange")
ggplot(plydat, aes(time, copy, color = factor(org))) +
geom_point(size = 3.5) + facet_wrap(~group, ncol = 4) +
scale_color_manual(values = colvec) +
geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0.08,
color = "black", size = 0.1) +
theme(panel.background = element_rect(fill='white', colour='white'),
panel.grid = element_line(color = NA),
panel.grid.minor = element_line(color = NA),
panel.border = element_rect(fill = NA, color = "black"),
strip.background = element_blank(),
axis.text.x = element_text(size=10, colour="black", face = "bold"),
axis.title.x = element_text(vjust=0.1, face = "bold"),
axis.text.y = element_text(size=12, colour="black"),
axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))

Related

Creating raincloud plot from a data frame in R

I wanted a visualization something like this
I ended up getting like this one
I'm kind of close what I want to get except Im not able to separate them
Here is my data frame
dput(dat_red)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L), .Label = c("M0", "M1", "M2", "M3", "M4",
"M5"), class = "factor"), Risk_Cyto = structure(c(2L, 3L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("Good", "Intermediate",
"Poor"), class = "factor"), `TCGA-AB-2856` = c(0, 0.203446022561853,
0.057566971226641, 0.050525640210207, 0.050663468813024, 0.108022967842345,
0.03563961790061, 0.091955619434079, 0.09562601922977, 0.072990036124458,
0.05292549370956, 0.134908910498566, 0.056146007781438, 0.166755814327401,
0.072370918290216, 0.092982169160965, 0.053571132330207, 0.026946730545354,
0.096491482450314, 0.086393933157139, 0.086056971395349, 0.059872483122941,
0.05562972070039, 0.080629871622231, 0.06458076058265, 0.109295018454197,
0.15019108327262, 0.122208033564744), `TCGA-AB-2849` = c(0.203446022561853,
0, 0.138756102002674, 0.109150212934145, 0.130381628657973, 0.186028570196918,
0.201142265508601, 0.117008908236162, 0.07523492135779, 0.237542759238287,
0.154026516322799, 0.093169870680731, 0.174873827256869, 0.077917778705184,
0.217466101351585, 0.247196178178148, 0.139168631446623, 0.130879779506245,
0.094044964277672, 0.102330796604311, 0.115883670128914, 0.106007290303468,
0.124207778875499, 0.100051046626221, 0.096898638044544, 0.081075416500332,
0.066801569316824, 0.095571899845876), `TCGA-AB-2971` = c(0.057566971226641,
0.138756102002674, 0, 0.057153443556063, 0.049118618822663, 0.108803803345704,
0.038593571058361, 0.05623480754803, 0.061897696825206, 0.056921365921972,
0.027147582644049, 0.100579305160467, 0.031712766628694, 0.099623521686644,
0.043315406299788, 0.079156224894216, 0.070713735063067, 0.042797402350358,
0.064121331342957, 0.076245258448711, 0.057969352005916, 0.056411884330189,
0.029950269541688, 0.052538503817376, 0.053263317374002, 0.073813902166228,
0.081932722355952, 0.095255347468669), `TCGA-AB-2930` = c(0.050525640210207,
0.109150212934145, 0.057153443556063, 0, 0.040710142137316, 0.087506794353747,
0.076018856821365, 0.054334641613629, 0.043854827190482, 0.121490922447548,
0.060145981627256, 0.070829823037578, 0.0708179998993, 0.083561655580485,
0.106626803408534, 0.149000581782327, 0.049861493156012, 0.018112612744773,
0.05246829209315, 0.041582348253964, 0.053306367816997, 0.035373116643303,
0.042875256342202, 0.03406333799917, 0.036306618864362, 0.045647830531497,
0.084727864328183, 0.079147350281325), `TCGA-AB-2891` = c(0.050663468813024,
0.130381628657973, 0.049118618822663, 0.040710142137316, 0, 0.117167203965628,
0.057145523476846, 0.07089819966556, 0.058848771210843, 0.090222074046894,
0.052188574602838, 0.091623506635555, 0.053000329480576, 0.094592248885481,
0.082033497053918, 0.111240839210373, 0.065982245111563, 0.038618210190806,
0.063406266346048, 0.062231987650712, 0.067503749234478, 0.039970960455281,
0.042758552599394, 0.049740193805893, 0.04884538212911, 0.07959023948363,
0.090749468265183, 0.075792324166325)), class = "data.frame", row.names = c(NA,
-28L))
My code
dat_red = read.csv("JSD_test_map_.txt",sep = "\t",check.names = FALSE)
df_melt = melt(JSD_MAP, id.vars=c("FAB","Risk_Cyto")
)
To plot the above I used this tutorial
source("R_rainclouds.R")
df_melt %>% ggplot(aes(x=Risk_Cyto,y=value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2, alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(aes(x = Risk_Cyto, y = value, fill = FAB),outlier.shape = NA, alpha = .5, width = .1, colour = "black")+
#theme_jen() +
labs(title = "Raincloud plot of body mass by species", x = 'Risk_Cyto', y = 'JSD') +
easy_remove_legend()
So I have the following group in my metadata or patient info in this subset
> unique(dat_red$FAB)
[1] M4 M0 M2 M1 M3 M5
Levels: M0 M1 M2 M3 M4 M5
> unique(dat_red$Risk_Cyto)
[1] Intermediate Poor Good
Levels: Good Intermediate Poor
My objective is to show The Risk_Cyto as my main group similar to the first figure where They have shown ColonT HeartLV Liver Muscle etc and subsequently I have different FAB subtypes which i want to show similar to Young and Old
Right now everything is kind of stacked or rather messed up in single plot
Any help or suggestion is really appreciated
Put FAB on the x axis and facet by Risk_Cyto
df_melt %>%
ggplot(aes(FAB, value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2,
alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(outlier.shape = NA,
alpha = .5, width = .1, colour = "black")+
labs(title = "Raincloud plot of body mass by species",
x = 'Risk_Cyto', y = 'JSD') +
facet_grid(.~Risk_Cyto, scales = "free_x", space = "free_x") +
theme_bw(base_size = 16) +
theme(legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(face = 2, size = 22))

stat_compare_means with multiple groups

I need some help with stat_compare_means and multiple groups.
Here is what my data look like.
> head(df_annot)
Row.names Diversity_sh Diversity_si Evenness Chao1 Location Bean Fungi Insect
1 R-B1 1.314181 0.6040213 0.3053349 91.00000 Root Bean M- NI
2 R-B2 1.323718 0.6117602 0.3075507 77.43750 Root Bean M- NI
3 R-B3 1.249950 0.5737293 0.2877545 81.50000 Root Bean M- NI
4 R-BF-1 1.177111 0.5414276 0.2693958 92.33333 Root Bean M+ NI
5 R-BF-2 1.191254 0.5252688 0.2742420 79.54545 Root Bean M+ NI
6 R-BF-3 1.397233 0.6285945 0.3179540 85.50000 Root Bean M+ NI
Here is a graph and I would like ALL comparisons labelled.
Here is some code. I know that I don't have my_comparisons correct, but I don't know where to start for the two groups. I want to compare M+/Insect to M-/Insect and M+/Insect to M+/NI etc.., all two-way comparisons. Any suggestions would be great. thanks
my_comparisons<- list( c("M+", "M-"), c("Insect", "NI"))
ggplot(df_annot,aes_string(x="Insect",y=index,fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste(index))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
dput(df_annot)
structure(list(Row.names = structure(c("R-B1", "R-B2", "R-B3",
"R-BF-1", "R-BF-2", "R-BF-3", "R-BFi-1", "R-BFi-2", "R-Bi-1",
"R-Bi-2", "R-Bi-3"), class = "AsIs"), Diversity_sh = c(1.31418133185869,
1.32371839350534, 1.24994951615418, 1.17711111336449, 1.19125374868316,
1.39723272927515, 1.34145146126423, 1.21674449259962, 1.20721660188555,
1.17245529262564, 1.20912937911657), Diversity_si = c(0.604021268328531,
0.611760247980402, 0.573729285531772, 0.541427625516077, 0.525268755766239,
0.628594506768001, 0.597250229879166, 0.554646956896473, 0.548992316400345,
0.531291238688503, 0.583806537719818), Evenness = c(0.305334910927276,
0.307550737463383, 0.287754490536268, 0.269395848882803, 0.274241968272787,
0.317954009728278, 0.305260435164649, 0.276882141486585, 0.273949061455415,
0.269914321375221, 0.275929262855007), Chao1 = c(91, 77.4375,
81.5, 92.3333333333333, 79.5454545454545, 85.5, 87.5, 90.5454545454545,
89.3333333333333, 88.6666666666667, 88.0769230769231), Location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Root", "Rhizospheric Soil"
), class = "factor"), Bean = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Bean", class = "factor"),
Fungi = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L), .Label = c("M+", "M-"), class = "factor"), Insect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor")), row.names = c(NA, -11L), class = "data.frame")
facet_wrap() might help you as discussed here
ggplot(df_annot, aes(x=df_annot$Insect, y= df_annot$Evenness)) +
facet_wrap(~df_annot$Fungi)+
geom_boxplot(alpha=0.8) +
geom_point()+
stat_compare_means(comparisons = list(c("Insect", "NI") ), label="p.format",method="wilcox.test")
EDIT
ok here is a - not too elegant - solution without faceting.
Create a new variable containing Insect info and Fungi status:
df_annot$var <- paste(df_annot$Insect,df_annot$Fungi, sep = "_" )
Then build the contrasts
my_comparisons <- rev(list(c("Insect_M-","Insect_M+"),c("NI_M-","Insect_M-"),c("NI_M+","Insect_M-"),
c("Insect_M+", "NI_M-"), c("Insect_M+", "NI_M+"), c("NI_M-","NI_M+")))
and plot your graph
ggplot(df_annot,aes_string(x="var",y="Evenness",fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste("Evenness"))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
you might want to make better names and such. but this could be what you are looking for.

Convert two ggplots into one

I am facing some problem to have one plot instead of two from separate data frames. I explained the situation a bit below. The data frames look like:
df1 <- structure(list(value = c(9921L, 21583L, 11822L, 1054L, 13832L,
16238L, 13838L, 20801L, 20204L, 13881L, 19935L, 13829L, 14012L,
20654L, 13862L, 21191L, 3777L, 15552L, 13817L, 20428L, 16850L,
21003L, 11072L, 22477L, 12321L, 12856L, 16295L, 11431L, 13469L,
14680L, 10552L, 15272L, 9132L, 9374L, 15123L, 22754L, 10363L,
12160L, 13729L, 11151L, 11451L, 11272L, 14900L, 14688L, 17133L,
7315L, 7268L, 6262L, 72769L, 7650L, 16389L, 13027L, 7134L, 6465L,
6490L, 15183L, 7201L, 14070L, 11210L, 10146L), limit = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1Mbit",
"5Mbit", "10Mbit"), class = "factor")), class = "data.frame", row.names = c(NA,
-60L))
df2 <- structure(list(value = c(37262L, 39881L, 30914L, 32976L, 28657L,
39364L, 39915L, 30115L, 29326L, 36199L, 37976L, 36694L, 33718L,
36945L, 33182L, 35866L, 34188L, 33426L, 32804L, 34986L, 29355L,
30470L, 37420L, 26465L, 28975L, 29144L, 27491L, 30507L, 27146L,
26257L, 31231L, 30521L, 30370L, 31683L, 33774L, 35654L, 34172L,
38554L, 38030L, 33439L, 34817L, 31278L, 33579L, 31175L, 31001L,
29908L, 31658L, 33381L, 28709L, 34794L, 34154L, 30157L, 33362L,
30363L, 31097L, 29116L, 27703L, 31229L, 30196L, 30077L), limit = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("180ms",
"190ms", "200ms"), class = "factor")), class = "data.frame", row.names = c(NA,
-60L))
from the data frames above, I have these plots:
limit_bw <- factor(df1$limit, levels = c("1Mbit", "5Mbit", "10Mbit"))
limit_lt <- factor(df2$limit, levels = c("200ms", "190ms", "180ms"))
(to use them sequentially)
bw_line <- ggplot(df1, aes(x = limit_bw, y = value, group=1)) + geom_quantile(method = "loess")
lt_line <- ggplot(df2, aes(x = limit_lt, y = value, group=1)) + geom_quantile(method = "loess")
(I actually have many data so I used geom_quantile())
And also two plots in a grid using rbind/cbind (which is not I want now):
grid.draw(rbind(ggplotGrob(ggplot(df1, aes(limit_bw,value,group=1)) + geom_quantile(method = "loess") + labs(title = "value vs bw",x="bandwidth",y="value")),
ggplotGrob(ggplot(df2, aes(limit_lt, value, group = 1)) + geom_quantile(method="loess") + labs(title="value vs latency", x="latency", y="value")), size = "last"))
I am seeking your help to merge them together into one plot (putting bw_line and lt_line together in the same graph) showing two x-axes either at the top and bottom or two axes in the bottom mentioning their title. Please note, the value has different range for each of the data set. However I need to show two y-axes for separate ranges for each data frame or may be one y-axis showing all the values (min to max) from the both data frame.
I actually seen one very close solution here from #RichieCotton but could not figure out for my data since I have some factors instead of integer values.
I really appreciate your help. Thank you.
I think it's probably easiest to approach this by combining the data into one data frame first. Here I make combined x-values and map your data to those. Then we map as usual, with the addition of a secondary y axis.
library(tidyverse); library(forcats)
# Create shared x axis and combine data frames
limit_combo <- data.frame(level_num = 1:3,
level = as_factor(c("1Mbit\n200ms",
"5Mbit\n190ms",
"10Mbit\n180ms")))
df1b <- df1 %>%
mutate(level_num = limit %>% as.numeric) %>%
left_join(limit_combo)
df2b <- df2 %>%
mutate(level_num = 4 - (limit %>% as.numeric)) %>%
left_join(limit_combo)
df3 <- bind_rows(df1b, df2b, .id = "plot") %>%
mutate(plot = if_else(plot == "1", "bw", "lt"))
# plot with adjusted y values and second axis for reference
ggplot(df3, aes(x = level,
y = value * if_else(plot == "lt", 0.44, 1),
group=plot, color = plot)) +
geom_quantile(method = "loess") +
scale_y_continuous("value", sec.axis = sec_axis(~./0.44)) +
theme(axis.text.y.left = element_text(color = "#F8766D"),
axis.text.y.right = element_text(color = "#00BFC4"))
Here is a different approach to create a single plot from the two datasets which avoids to combine both datasets into one and deal with the factors of limit. df1, df2, limit_bw, and limit_lt are used as given by the OP.
The plot is refined in three steps.
1. Common x axis, common y scale
library(ggplot2)
ggplot() + aes(y = value) +
geom_quantile(aes(x = as.integer(limit_bw), colour = "bw"), df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), colour = "lt"), df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = paste(levels(limit_bw), levels(limit_lt), sep = "\n")) +
scale_colour_discrete(NULL)
2. Separate x axes, common y scale
library(ggplot2)
ggplot() + aes(y = value) +
geom_quantile(aes(x = as.integer(limit_bw), colour = "bw"), df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), colour = "lt"), df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = levels(limit_bw),
sec.axis = dup_axis(labels = levels(limit_lt))) +
scale_colour_manual(NULL, values = c(bw = "blue", lt = "red")) +
theme(axis.text.x.bottom = element_text(color = "blue"),
axis.text.x.top = element_text(color = "red"))
3. Separate x axes, separate y axes
Here, the y-values of the second dataset are scaled such that the min and max values of the two datasets will coincide.
# compute scaling factor and offset
library(magrittr) # used to improve readability
bw_rng <- loess(df1$value ~ as.integer(limit_bw)) %>% fitted() %>% range()
lt_rng <- loess(df2$value ~ as.integer(limit_lt)) %>% fitted() %>% range()
scl <- diff(bw_rng) / diff(lt_rng)
ofs <- bw_rng[1] - scl * lt_rng[1]
library(ggplot2)
ggplot() +
geom_quantile(aes(x = as.integer(limit_bw), y = value, colour = "bw"),
df1, method = "loess") +
geom_quantile(aes(x = as.integer(limit_lt), y = scl * value + ofs, colour = "lt"),
df2, method = "loess") +
scale_x_continuous("limit",
breaks = 1:nlevels(limit_bw),
labels = levels(limit_bw),
sec.axis = dup_axis(labels = levels(limit_lt))) +
scale_y_continuous(sec.axis = sec_axis(~ (. - ofs) / scl)) +
scale_colour_manual(NULL, values = c(bw = "blue", lt = "red")) +
theme(axis.text.x.bottom = element_text(color = "blue"),
axis.text.x.top = element_text(color = "red"),
axis.text.y.left = element_text(color = "blue"),
axis.text.y.right = element_text(color = "red"))

How to add comparison bars to a plot to denote which comparison a p value corresponds to

I'm using the following data frame:
df1 <- structure(list(Genotype = structure(c(1L, 1L, 1L, 1L, 1L,
2L,2L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L,1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
.Label= c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"),
Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("iLN", "Spleen", "Skin", "Colon"), class = "factor"),
`Cells/SC/Live/CD8—,, CD4+/Foxp3+,Median,<BV421-A>,CD127` = c(518L,
715L, 572L, 599L, 614L, 881L, 743L, 722L, 779L, 843L, 494L,
610L, 613L, 624L, 631L, 925L, 880L, 932L, 876L, 926L, 1786L,
2079L, 2199L, 2345L, 2360L, 2408L, 2509L, 3129L, 3263L, 3714L,
917L, NA, 1066L, 1059L, 939L, 1269L, 1047L, 974L, 1048L,
1084L)),
.Names = c("Genotype", "Tissue", "Cells/SC/Live/CD8—,,CD4+/Foxp3+,Median,<BV421-A>,CD127"),
row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"))
and trying to make a plot using ggplot2 where box plots and points are displayed grouped by "Tissue" and interleaved by "Genotype". The significance values are displaying properly but I would like to add lines to denote the comparisons being made and have them start at the center of each "miR-15/16 FL" box plot and end at the center of each "miR-15/16 cKO" box plot and sit directly below the significance values. Below is the code I am using to generate the plot:
library(ggplot2)
library(ggpubr)
color.groups <- c("black","red")
names(color.groups) <- unique(df1$Genotype)
shape.groups <- c(16, 1)
names(shape.groups) <- unique(df1$Genotype)
ggplot(df1, aes(x = Tissue, y = df1[3], color = Genotype, shape = Genotype)) +
geom_boxplot(position = position_dodge(), outlier.shape = NA) +
geom_point(position=position_dodge(width=0.75)) +
ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
scale_color_manual(values=color.groups) +
scale_shape_manual(values=shape.groups) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x=element_blank(), aspect.ratio = 1,
text = element_text(size = 9)) +
stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
label.y = c(0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(1:10),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(11:20),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(21:30),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(31:40),], na.rm = TRUE)))
Thanks for any help!
I've created the brackets with three calls to geom_segment. These calls use a new dmax data frame created to provide the reference y-values for positioning the brackets and the p-value labels. The values e and r are for tweaking these positions.
I've made a few other changes to your code.
Change the name of the third column to temp and use this name y=temp in the call to ggplot. Your original code uses y=df1[3], which essentially reaches outside the plot environment to the df1 object in the parent environment, which can cause problems. Also, having a short name to refer to makes it easier to generate the dmax data frame and refer to its columns.
Use the dmax data frame for label.y positions in stat_compare_means, which reduces the amount of code needed. (Incidently, stat_compare_means seems to require hard-coded label.y positions, rather than getting them from an aes mapping of the data.)
Position the p-value labels an absolute distance above each pair of box plots (using the value e), rather than a multiplicative distance. This makes it easier to keep spacing consistent between p-value labels, brackets, and box plots.
# Use a short column name for the third column
names(df1)[3] = "temp"
# Generate data frame of reference y-values for p-value labels and bracket positions
dmax = df1 %>% group_by(Tissue) %>%
summarise(temp=max(temp, na.rm=TRUE),
Genotype=NA)
# For tweaking position of brackets
e = 350
r = 0.6
w = 0.19
bcol = "grey30"
ggplot(df1, aes(x = Tissue, y = temp, color = Genotype, shape = Genotype)) +
geom_boxplot(position = position_dodge(), outlier.shape = NA) +
geom_point(position=position_dodge(width=0.75)) +
ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
scale_color_manual(values=color.groups) +
scale_shape_manual(values=shape.groups) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x=element_blank(), aspect.ratio = 1,
text = element_text(size = 9)) +
stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
label.y = e + dmax$temp) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue)-w, xend=as.numeric(Tissue)+w,
y=temp + r*e, yend=temp + r*e), size=0.3, color=bcol, inherit.aes=FALSE) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue) + w, xend=as.numeric(Tissue) + w,
y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue) - w, xend=as.numeric(Tissue) - w,
y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE)
To address your comment, here's an example to show that the method above inherently adjusts to any number of x-categories.
Let's begin by adding two new tissue categories:
library(forcats)
df1$Tissue = fct_expand(df1$Tissue, "Tissue 5", "Tissue 6")
df1$Tissue[seq(1,20,4)] = "Tissue 5"
df1$Tissue[seq(21,40,4)] = "Tissue 6"
dmax = df1 %>% group_by(Tissue) %>%
summarise(temp=max(temp, na.rm=TRUE),
Genotype=NA)
Now run exactly the same plot code listed above to get the following plot:

ggplot: how to add multiple legends for plot and vertical lines?

I've created a plot which shows the means of two groups and associated 95% confidence band, as below. For the plot, I've already used different line types, fillings, colors.
The data plot_band is as follows.
dput(plot_band)
structure(list(mean = c(0.0909296772008702, 0.0949102886382386,
0.0989192140983566, 0.102428753920507, 0.106190021551613, 0.109834234007574,
0.11282406874623, 0.116443987192088, 0.119646042014149, 0.122877131667032,
0.125734341129646, 0.129194412319665, 0.131921946416482, 0.13467000293138,
0.137801823091921, 0.140320771073742, 0.143300871011905, 0.145703574224808,
0.148502607395268, 0.151216269559201, 0.153957673466713, 0.15642722394871,
0.159399752204122, 0.16158535629103, 0.163992551285173, 0.166446319141126,
0.168796463238069, 0.17130024918415, 0.17319290052143, 0.175970079857704,
0.178037138778032, 0.180359643729028, 0.182563083353043, 0.184882067722455,
0.186933337196788, 0.18928611634363, 0.19095095692481, 0.193552969255731,
0.195137836881874, 0.197581990963152, 0.199824696342001, 0.201576167030431,
0.203292777876833, 0.205785273925517, 0.207611128924057, 0.209067294675698,
0.211624327477106, 0.213018027996152, 0.215073900329166, 0.21654896049152,
0.218432328738047, 0.220299232072702, 0.221520169903876, 0.224082916931098,
0.225373663731495, 0.227623092060467, 0.228971037740905, 0.230665903341562,
0.232255049713341, 0.233816039663021, 0.236156033603955, 0.237722706454038,
0.239326639984125, 0.241061288510212, 0.323782287073584, 0.325539303794681,
0.326575563604555, 0.327932235745535, 0.329326904419804, 0.330270965006864,
0.331794972975829, 0.332736401387824, 0.333736983920265, 0.334858878358806,
0.335995344145518, 0.336884010919713, 0.337760950823761, 0.338470035342276,
0.339694375762279, 0.340590586642847, 0.340934410282471, 0.342186505998774,
0.342699699846757, 0.343822718137376, 0.344352069575663, 0.345191547743302,
0.345986783878912, 0.346908459064914, 0.347636673707646, 0.3483601957891,
0.349017016236978, 0.349393026672962, 0.350215046428817, 0.350578051082168,
0.351357872622786, 0.351833990930714, 0.352451422717008, 0.352852417773313,
0.353786047124291, 0.354360144310735, 0.354804607588953, 0.355216156665893,
0.3556114518015, 0.356570758245453, 0.357097049535425, 0.357671243406622,
0.35787930232607, 0.358500009058086, 0.359107586207553, 0.359418346394681,
0.359923090516015, 0.360327770652831, 0.360646653761867, 0.361526704703965,
0.361860340596181, 0.362284616802613, 0.362408547406209, 0.363068975461424,
0.363173638916247, 0.363746165222553, 0.364318465554143, 0.364550369183249,
0.365263491228022, 0.365588246738469, 0.366124420845147, 0.366327320718437,
0.366730809501062, 0.367298014408034), p2.5 = c(0.00920236578162877,
0.0111305911426958, 0.0131257550019632, 0.015586474005665, 0.017588259827762,
0.0195835240844649, 0.021653464115484, 0.0245221378289171, 0.0263028370478539,
0.0283125178459841, 0.030809139661692, 0.034224299031932, 0.0351514351131448,
0.0374690177003245, 0.0401208217539481, 0.0416432632702995, 0.0436268495854353,
0.0455924496480308, 0.0481710615607138, 0.0498487868097217, 0.052013860735697,
0.0541864115090449, 0.0559355297931858, 0.0582185384506931, 0.0595049507852038,
0.0617291057747846, 0.0624904066599628, 0.064090526611587, 0.0665855608482458,
0.0681610015253132, 0.0689510143842853, 0.0714235246023074, 0.0730718365551066,
0.0733828347805513, 0.0749772653575311, 0.0775677990166739, 0.0782434582066251,
0.0809696065399504, 0.0800620502625316, 0.0822097262074474, 0.0837314882447324,
0.0836800886932387, 0.0843305338836378, 0.0862036703259026, 0.0874082656018874,
0.0881312854081838, 0.0887921830279765, 0.0892805555426737, 0.0901061351380764,
0.0914750995958728, 0.0913838119125662, 0.0926827936869315, 0.0929511644196126,
0.0940218350370357, 0.0944327299872979, 0.0953545299910439, 0.0948298565703383,
0.0957001873318579, 0.0961251564147676, 0.0971098251546806, 0.0974911491380601,
0.0986598120212823, 0.0982370236835561, 0.0987719638365328, 0.114148199394403,
0.125138552629865, 0.133069438084806, 0.140931059768343, 0.147647282172844,
0.155831735418124, 0.163154010787227, 0.16809087346053, 0.173413948644787,
0.178336300631342, 0.183561163161725, 0.189552221591194, 0.192350001446747,
0.19547327255232, 0.19824967633061, 0.202611107184988, 0.205071997319457,
0.206232495037667, 0.208471493073236, 0.209717390943683, 0.211692880593303,
0.213829033311537, 0.215383413348152, 0.216370831366554, 0.216980537940184,
0.217670415960084, 0.218147500129008, 0.219104770868165, 0.220215949003459,
0.219501167154474, 0.219635297722562, 0.220565169003312, 0.218821371303922,
0.218910618214851, 0.219518190869959, 0.219204079206471, 0.219448334243776,
0.219174641398391, 0.217619259716122, 0.217993716481521, 0.218343413130982,
0.217141573568049, 0.216438618727695, 0.215672180354215, 0.214841486865522,
0.214092486614703, 0.216084004877199, 0.213891621307228, 0.213397326450924,
0.212530621813324, 0.212650230928244, 0.211323326285971, 0.211512467761759,
0.209879967307571, 0.208388878793908, 0.209257043929222, 0.207665115418059,
0.207413292377895, 0.204980142991601, 0.206053394727878, 0.205039712521127,
0.203155679138143, 0.202289445844638, 0.201779149557556), p97.5 = c(0.240681337890249,
0.239988615023241, 0.239222274397932, 0.23882694927308, 0.239567463457127,
0.240035884370459, 0.239971640602537, 0.242348644629734, 0.244241554912481,
0.246794068956881, 0.248869825514075, 0.252843804762058, 0.254595507587193,
0.257498240756364, 0.26074636531938, 0.263991307688752, 0.268222101449506,
0.270245299020079, 0.278955701793892, 0.280366963871541, 0.286253886155709,
0.290942761721134, 0.29709853936211, 0.300641051539586, 0.307350564223005,
0.314475951046524, 0.31757563389217, 0.324250050938626, 0.326645521042049,
0.334746718583917, 0.341297900171566, 0.347056902406046, 0.352412986039391,
0.356409285744598, 0.364329251893085, 0.36882469705109, 0.373595444661095,
0.379308956442793, 0.388012909521406, 0.393418480355642, 0.399407258087214,
0.403270925317011, 0.407517084163824, 0.413742327029277, 0.42089783652825,
0.422996679448412, 0.430738094720356, 0.433915405828653, 0.438263395419797,
0.442376801773873, 0.450664409546504, 0.453854917168461, 0.455755257192578,
0.463879371708031, 0.470262095557133, 0.478816677993115, 0.478998770025097,
0.485204929246363, 0.490588733478761, 0.49747652543363, 0.498792119487052,
0.508008619470507, 0.51314092048762, 0.518568532547669, 0.579810955268174,
0.563256045407579, 0.55093710586083, 0.541241619905278, 0.532667775608687,
0.523824194956849, 0.518816497858615, 0.512618467188886, 0.506452368044292,
0.501653171003674, 0.499276681561068, 0.496002704329641, 0.494256887981196,
0.49200837587611, 0.490570113245846, 0.491077058931435, 0.487352049845066,
0.487927727831147, 0.487928022062059, 0.488900063808496, 0.488866145012628,
0.489808465409391, 0.491100206396406, 0.492044173457154, 0.494346147046575,
0.494980820850837, 0.49616843086841, 0.497216550345458, 0.499201695431901,
0.501160614633382, 0.502598288902507, 0.504203085629905, 0.50530488873578,
0.508449115699177, 0.508914783054669, 0.51306711977087, 0.51479783743171,
0.51648055644086, 0.518549503653961, 0.522859455223989, 0.522598786005884,
0.52736459871623, 0.527054294078792, 0.532359397607223, 0.532643025946804,
0.533817320437782, 0.535862852499484, 0.539613602346564, 0.54138065631686,
0.544340213112881, 0.545596882887723, 0.549029532028693, 0.546769636775625,
0.551728290583129, 0.552996735997194, 0.555676593069663, 0.559580922687426,
0.561700216317917, 0.562726465369815, 0.563527127546323, 0.567715046522725,
0.568850181180136, 0.56965258128659, 0.571847219713553), outcome = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("DLT",
"CB"), class = "factor"), exp_X_post = c(721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522, 721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522)), .Names = c("mean", "p2.5",
"p97.5", "outcome", "exp_X_post"), row.names = c("pi_A[1]", "pi_A[2]",
"pi_A[3]", "pi_A[4]", "pi_A[5]", "pi_A[6]", "pi_A[7]", "pi_A[8]",
"pi_A[9]", "pi_A[10]", "pi_A[11]", "pi_A[12]", "pi_A[13]", "pi_A[14]",
"pi_A[15]", "pi_A[16]", "pi_A[17]", "pi_A[18]", "pi_A[19]", "pi_A[20]",
"pi_A[21]", "pi_A[22]", "pi_A[23]", "pi_A[24]", "pi_A[25]", "pi_A[26]",
"pi_A[27]", "pi_A[28]", "pi_A[29]", "pi_A[30]", "pi_A[31]", "pi_A[32]",
"pi_A[33]", "pi_A[34]", "pi_A[35]", "pi_A[36]", "pi_A[37]", "pi_A[38]",
"pi_A[39]", "pi_A[40]", "pi_A[41]", "pi_A[42]", "pi_A[43]", "pi_A[44]",
"pi_A[45]", "pi_A[46]", "pi_A[47]", "pi_A[48]", "pi_A[49]", "pi_A[50]",
"pi_A[51]", "pi_A[52]", "pi_A[53]", "pi_A[54]", "pi_A[55]", "pi_A[56]",
"pi_A[57]", "pi_A[58]", "pi_A[59]", "pi_A[60]", "pi_A[61]", "pi_A[62]",
"pi_A[63]", "pi_A[64]", "qi_A[1]", "qi_A[2]", "qi_A[3]", "qi_A[4]",
"qi_A[5]", "qi_A[6]", "qi_A[7]", "qi_A[8]", "qi_A[9]", "qi_A[10]",
"qi_A[11]", "qi_A[12]", "qi_A[13]", "qi_A[14]", "qi_A[15]", "qi_A[16]",
"qi_A[17]", "qi_A[18]", "qi_A[19]", "qi_A[20]", "qi_A[21]", "qi_A[22]",
"qi_A[23]", "qi_A[24]", "qi_A[25]", "qi_A[26]", "qi_A[27]", "qi_A[28]",
"qi_A[29]", "qi_A[30]", "qi_A[31]", "qi_A[32]", "qi_A[33]", "qi_A[34]",
"qi_A[35]", "qi_A[36]", "qi_A[37]", "qi_A[38]", "qi_A[39]", "qi_A[40]",
"qi_A[41]", "qi_A[42]", "qi_A[43]", "qi_A[44]", "qi_A[45]", "qi_A[46]",
"qi_A[47]", "qi_A[48]", "qi_A[49]", "qi_A[50]", "qi_A[51]", "qi_A[52]",
"qi_A[53]", "qi_A[54]", "qi_A[55]", "qi_A[56]", "qi_A[57]", "qi_A[58]",
"qi_A[59]", "qi_A[60]", "qi_A[61]", "qi_A[62]", "qi_A[63]", "qi_A[64]"
), class = "data.frame")
Now I want to add some vertical dashed lines. I wish to use different color for each vertical line and have legend for those lines as well. The information for those vertical lines are in another data frame observed_mean:
dput(observed_mean)
structure(list(TRT = structure(1:9, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H", "I"), class = "factor"), gmcmin = c(967.117632548,
1306.76729845833, 2394.519441584, 2404.73065902857, 3047.48745766364,
2550.12866139, 1863.6505272925, 3569.57489109, 3660.40695204)), .Names = c("TRT",
"gmcmin"), row.names = c(NA, -9L), class = "data.frame")
Here is the code to generate the plot:
range <- range(plot_band$exp_X_post)
range <- c(floor(range[1]), ceiling(range[2]))
step <- floor((range[2] - range[1]) / 10)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5, linetype = NA,
fill = outcome),
alpha = 0.4) +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin,
linetype = 'longdash') +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
Note: the last three lines are used to change the legend title from variable name outcome to "Probability (95% CI)". NOT sure whether that's the right way though.
Questions:
I wish to put the current legend to the right, then below that I'd like to put the legend for vertical lines. Could anyone give me some clues how to do that?
As shown in the plot, there are two identical (not same color though) legends on top. The one below comes out if I change the order of the factor outcome with following code. I am not sure why that happens. How could I get rid of that?
plot_band$outcome <- factor(plot_band$outcome, levels = c("DLT", "CB"))
Thanks a lot for any comments/suggestions!!
The extra legend box is showing up because of the linetype = NA in the aes() of geom_ribbon moving the linetype out of the mapping will take care of that.
For the line labeling, you can perhaps just put the labels on the plot using geom_text
Here is a full plot that does something like that (now with ggrepel to place the labels more sensibly -- can't believe I didn't start there)
# install.packages("devtools")
# devtools::install_github("slowkow/ggrepel")
library(ggrepel)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin
, linetype = 'longdash') +
geom_text_repel(
mapping = aes(
x = gmcmin
, y = 0
, label = TRT
, color = NA
, linetype = NA)
, data = observed_mean
, show.legend = FALSE) +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
(Note: the mean labels overlap, so you may need to more careful position those, e.g., by adding another column to observed_mean giving the position where you want them plotted).
If you need the labels to be in a legend instead, you can use this code:
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(#aes(linetype = outcome)
#,
size = 1.5
# , show.legend = FALSE
) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(breaks = pretty(range)) +
geom_vline(
mapping = aes(xintercept = gmcmin
, linetype = TRT)
, data = observed_mean) +
theme_bw() +
theme(legend.position = 'right',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Treatment")
Note, that I removed the linetype from the main lines, as it was causing some weirdness with the vertical line. You can add it back by uncommenting the parts in geom_line() but note that it then shows up in the list with the treatments. There is probably a way to fix that if you absolutely need it, but my quick tries aren't working. I will note, however, that the linetypes are a bit hard to pick out.
Example plot with both the legend and the labels

Resources