Related
I wanted a visualization something like this
I ended up getting like this one
I'm kind of close what I want to get except Im not able to separate them
Here is my data frame
dput(dat_red)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L), .Label = c("M0", "M1", "M2", "M3", "M4",
"M5"), class = "factor"), Risk_Cyto = structure(c(2L, 3L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("Good", "Intermediate",
"Poor"), class = "factor"), `TCGA-AB-2856` = c(0, 0.203446022561853,
0.057566971226641, 0.050525640210207, 0.050663468813024, 0.108022967842345,
0.03563961790061, 0.091955619434079, 0.09562601922977, 0.072990036124458,
0.05292549370956, 0.134908910498566, 0.056146007781438, 0.166755814327401,
0.072370918290216, 0.092982169160965, 0.053571132330207, 0.026946730545354,
0.096491482450314, 0.086393933157139, 0.086056971395349, 0.059872483122941,
0.05562972070039, 0.080629871622231, 0.06458076058265, 0.109295018454197,
0.15019108327262, 0.122208033564744), `TCGA-AB-2849` = c(0.203446022561853,
0, 0.138756102002674, 0.109150212934145, 0.130381628657973, 0.186028570196918,
0.201142265508601, 0.117008908236162, 0.07523492135779, 0.237542759238287,
0.154026516322799, 0.093169870680731, 0.174873827256869, 0.077917778705184,
0.217466101351585, 0.247196178178148, 0.139168631446623, 0.130879779506245,
0.094044964277672, 0.102330796604311, 0.115883670128914, 0.106007290303468,
0.124207778875499, 0.100051046626221, 0.096898638044544, 0.081075416500332,
0.066801569316824, 0.095571899845876), `TCGA-AB-2971` = c(0.057566971226641,
0.138756102002674, 0, 0.057153443556063, 0.049118618822663, 0.108803803345704,
0.038593571058361, 0.05623480754803, 0.061897696825206, 0.056921365921972,
0.027147582644049, 0.100579305160467, 0.031712766628694, 0.099623521686644,
0.043315406299788, 0.079156224894216, 0.070713735063067, 0.042797402350358,
0.064121331342957, 0.076245258448711, 0.057969352005916, 0.056411884330189,
0.029950269541688, 0.052538503817376, 0.053263317374002, 0.073813902166228,
0.081932722355952, 0.095255347468669), `TCGA-AB-2930` = c(0.050525640210207,
0.109150212934145, 0.057153443556063, 0, 0.040710142137316, 0.087506794353747,
0.076018856821365, 0.054334641613629, 0.043854827190482, 0.121490922447548,
0.060145981627256, 0.070829823037578, 0.0708179998993, 0.083561655580485,
0.106626803408534, 0.149000581782327, 0.049861493156012, 0.018112612744773,
0.05246829209315, 0.041582348253964, 0.053306367816997, 0.035373116643303,
0.042875256342202, 0.03406333799917, 0.036306618864362, 0.045647830531497,
0.084727864328183, 0.079147350281325), `TCGA-AB-2891` = c(0.050663468813024,
0.130381628657973, 0.049118618822663, 0.040710142137316, 0, 0.117167203965628,
0.057145523476846, 0.07089819966556, 0.058848771210843, 0.090222074046894,
0.052188574602838, 0.091623506635555, 0.053000329480576, 0.094592248885481,
0.082033497053918, 0.111240839210373, 0.065982245111563, 0.038618210190806,
0.063406266346048, 0.062231987650712, 0.067503749234478, 0.039970960455281,
0.042758552599394, 0.049740193805893, 0.04884538212911, 0.07959023948363,
0.090749468265183, 0.075792324166325)), class = "data.frame", row.names = c(NA,
-28L))
My code
dat_red = read.csv("JSD_test_map_.txt",sep = "\t",check.names = FALSE)
df_melt = melt(JSD_MAP, id.vars=c("FAB","Risk_Cyto")
)
To plot the above I used this tutorial
source("R_rainclouds.R")
df_melt %>% ggplot(aes(x=Risk_Cyto,y=value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2, alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(aes(x = Risk_Cyto, y = value, fill = FAB),outlier.shape = NA, alpha = .5, width = .1, colour = "black")+
#theme_jen() +
labs(title = "Raincloud plot of body mass by species", x = 'Risk_Cyto', y = 'JSD') +
easy_remove_legend()
So I have the following group in my metadata or patient info in this subset
> unique(dat_red$FAB)
[1] M4 M0 M2 M1 M3 M5
Levels: M0 M1 M2 M3 M4 M5
> unique(dat_red$Risk_Cyto)
[1] Intermediate Poor Good
Levels: Good Intermediate Poor
My objective is to show The Risk_Cyto as my main group similar to the first figure where They have shown ColonT HeartLV Liver Muscle etc and subsequently I have different FAB subtypes which i want to show similar to Young and Old
Right now everything is kind of stacked or rather messed up in single plot
Any help or suggestion is really appreciated
Put FAB on the x axis and facet by Risk_Cyto
df_melt %>%
ggplot(aes(FAB, value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2,
alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(outlier.shape = NA,
alpha = .5, width = .1, colour = "black")+
labs(title = "Raincloud plot of body mass by species",
x = 'Risk_Cyto', y = 'JSD') +
facet_grid(.~Risk_Cyto, scales = "free_x", space = "free_x") +
theme_bw(base_size = 16) +
theme(legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(face = 2, size = 22))
I need to space the dates according to the days between sampling. Between some sampling there is 5 days and some 4 days.
data looks like this (also need to add to the labels BBCH):
structure(list(Time = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L), .Label = c("06.05.2016 BBCH 50–51", "09.05.2016 BBCH 51–53",
"13.05.2016 BBCH 55–59", "16.05.2016 BBCH 59–61", "20.05.2016 BBCH 61–64"
), class = "factor"), Mean1 = c(0.9133333, 0.4366667, 0.313333,
0.176, 0.4, 0.1533333, 0.2066667, 0.29, 0.4633333, 0.4833333),
sd = c(2.704973, 1.639598, 0.8780997, 0.5158375, 1.1213943,
0.5203121, 0.5461531, 0.6587969, 0.823153, 0.9965101), n = c(300L,
300L, 300L, 250L, 300L, 300L, 300L, 300L, 300L, 300L), Mean2 = c(0.15617168,
0.09466226, 0.05069711, 0.03262443, 0.06474373, 0.03004023,
0.03153216, 0.03803566, 0.04752476, 0.05753354), SNH = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("OC", "OF"
), class = "factor"), Round = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L), .Label = c("Round 1", "Round 2",
"Round 3", "Round 4", "Round 5"), class = "factor")), class = "data.frame", row.names = c(NA,
-10L))
and my script:
Pan_16<-qplot(x= Time,
y= Mean1,
group= SNH,
data = Plant) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1)+
coord_cartesian(xlim=c(), ylim=c(0,2))+
geom_line(size=1,aes(linetype = SNH)) +
scale_x_discrete(labels=function(x){sub("\\s", "\n", x)})+
scale_color_manual("Field type", values=c("#gray20", "#gray46"))+
labs(title = "", x = "", y = "")+
annotate("text", x = 1 , y = 1.3, label = c("* * * "), color="black", size=5 , fontface="bold")+
annotate("text", x = 2 , y = 0.8, label = c(" * * ") , color="black", size=5 , fontface="bold")+
annotate("text", x = 3 , y = 0.8, label = c("* * * "), color="black", size=5 , fontface="bold")+
theme(axis.line = element_line(size = 1, colour = "grey80"))+
theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text = element_text(colour = "black"))+
theme(
plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white", colour="white"))
Sisi, to get you going ... also check that your Time variable is a factor. Always check the data type, if you do not get expected results or errors.
The praise goes to #Rui who basically gave you the answer.
I stripped off the superfluous stuff from your plot to help you see the major building blocks. You can add these layers for your desired plot/end result.
library(dplyr)
df <- structure(list(Time = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L), .Label = c("06.05.2016 BBCH 50–51", "09.05.2016 BBCH 51–53",
"13.05.2016 BBCH 55–59", "16.05.2016 BBCH 59–61", "20.05.2016 BBCH 61–64"
), class = "factor"), Mean1 = c(0.9133333, 0.4366667, 0.313333,
0.176, 0.4, 0.1533333, 0.2066667, 0.29, 0.4633333, 0.4833333),
sd = c(2.704973, 1.639598, 0.8780997, 0.5158375, 1.1213943,
0.5203121, 0.5461531, 0.6587969, 0.823153, 0.9965101), n = c(300L,
300L, 300L, 250L, 300L, 300L, 300L, 300L, 300L, 300L), Mean2 = c(0.15617168,
0.09466226, 0.05069711, 0.03262443, 0.06474373, 0.03004023,
0.03153216, 0.03803566, 0.04752476, 0.05753354), SNH = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("OC", "OF"
), class = "factor"), Round = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 4L, 4L, 5L, 5L), .Label = c("Round 1", "Round 2",
"Round 3", "Round 4", "Round 5"), class = "factor")), class = "data.frame", row.names = c(NA,
-10L))
# ---------- coerce Time to character
df <- df %>% mutate(Time = as.character(Time))
# ---------- now make a Date column
df$Date <- as.Date(df$Time, "%d.%m.%Y")
# with the given data frame plot and set time axis
qplot(x= Date, y= Mean1, group= SNH, data = df) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1) +
# ------------- set a date scale and "configure" to your liking
scale_x_date( date_labels = "%d %b" # show day and month
, date_breaks = "2 days" # have a major break every 2 days
,date_minor_breaks = "1 day" # show minor breaks in between
)
Amendment to show-case setting of user-defined axis breaks
Scales support the setting of breaks. This allows to provide a vector of values or inject a function returning the desired breaks.
Below we replace the (regular) and preconfigured break setting of date_breaks by supplying a breaks statement.
# ---------- coerce Time to character
df <- df %>% mutate(Time = as.character(Time))
# ---------- now make a Date column
df$Date <- as.Date(df$Time, "%d.%m.%Y")
# with the given data frame plot and set time axis
qplot(x= Date, y= Mean1, group= SNH, data = df) +
geom_errorbar(aes(ymin = Mean1- Mean2,
ymax = Mean1 + Mean2),
width=0.2, size=1) +
# ------------- set a date scale and "configure" to your liking
scale_x_date( breaks = unique(df$Date) # setting user defined breaks
,minor_breaks = "1 day" # keep minor breaks evenly spaced
,date_labels = "%d %b" # show day and month
This yields:
I am plotting the proportion of deep sleep (y axis) vs days (x axis). I would like to add vertical shaded area for a better understanding (e.g. grey for week-ends, orange for sick period...).
I have tried using geom_ribbon (I created a variable taking the value of 30, with is the top of my y axis if the data is during the WE - information given in another column), but instead of getting rectangles, I get trapezes.
In another post, someone proposed the use of "geom_rect", or "annotate" if one's know the x and y coordinates, but I don't see how to adapt it in my case, when I want to have the colored area repeated to all week-end (it is not exactly every 7 days because some data are missing).
Do you have any idea ?
Many thanks in advance !
ggplot(Sleep.data, aes(x = DATEID)) +
geom_line(aes(y = P.DEEP, group = 1), col = "deepskyblue3") +
geom_point(aes(y = P.DEEP, group = 1, col = Sign.deep)) +
guides(col=FALSE) +
geom_ribbon(aes(ymin = min, ymax = max.WE), fill = '#6495ED80') +
facet_grid(MONTH~.) +
geom_hline(yintercept = 15, col = "forestgreen") +
geom_hline(yintercept = 20, col = "forestgreen", linetype = "dashed") +
geom_vline(xintercept = c(7,14,21,28), col = "grey") +
scale_x_continuous(breaks=seq(0,28,7)) +
scale_y_continuous(breaks=seq(0,30,5)) +
labs(x = "Days",y="Proportion of deep sleep stage", title = "Deep sleep")
Proportion of deep sleep vs time
Head(Sleep.data)
> dput(head(Sleep.data))
structure(list(DATE = structure(c(1L, 4L, 7L, 10L, 13L, 16L), .Label = c("01-Dec-17",
"01-Feb-18", "01-Jan-18", "02-Dec-17", "02-Feb-18", "02-Jan-18",
"03-Dec-17", "03-Feb-18", "03-Jan-18", "04-Dec-17", "04-Feb-18",
"04-Jan-18", "05-Dec-17", "05-Feb-18", "05-Jan-18", "06-Dec-17",
"06-Feb-18", "06-Jan-18", "07-Dec-17", "07-Feb-18", "07-Jan-18",
"08-Dec-17", "08-Jan-18", "09-Dec-17", "09-Feb-18", "09-Jan-18",
"10-Dec-17", "10-Jan-18", "11-Dec-17", "11-Feb-18", "11-Jan-18",
"12-Dec-17", "12-Jan-18", "13-Dec-17", "13-Feb-18", "13-Jan-18",
"14-Dec-17", "14-Feb-18", "14-Jan-18", "15-Dec-17", "15-Jan-18",
"16-Dec-17", "16-Jan-18", "17-Dec-17", "17-Jan-18", "18-Dec-17",
"18-Jan-18", "19-Dec-17", "19-Jan-18", "20-Dec-17", "21-Dec-17",
"21-Jan-18", "22-Dec-17", "22-Jan-18", "23-Dec-17", "23-Jan-18",
"24-Dec-17", "24-Jan-18", "25-Dec-17", "25-Jan-18", "26-Dec-17",
"26-Jan-18", "27-Dec-17", "27-Jan-18", "28-Dec-17", "28-Jan-18",
"29-Dec-17", "29-Jan-18", "30-Dec-17", "30-Jan-18", "31-Dec-17",
"31-Jan-18"), class = "factor"), DATEID = 1:6, MONTH = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Decembre", "Janvier", "Février"
), class = "factor"), DURATION = c(8.08, 7.43, 6.85, 6.23, 7.27,
6.62), D.DEEP = c(1.67, 1.37, 1.62, 1.75, 1.95, 0.9), P.DEEP = c(17L,
17L, 21L, 24L, 25L, 12L), STIMS = c(0L, 0L, 0L, 0L, 390L, 147L
), D.REM = c(1.7, 0.95, 0.95, 1.43, 1.47, 0.72), P.REM = c(17L,
11L, 12L, 20L, 19L, 9L), D.LIGHT = c(4.7, 5.12, 4.27, 3.05, 3.83,
4.98), P.LIGHT = c(49L, 63L, 55L, 43L, 49L, 66L), D.AWAKE = c(1.45,
0.58, 0.47, 0.87, 0.37, 0.85), P.AWAKE = c(15L, 7L, 6L, 12L,
4L, 11L), WAKE.UP = c(-2L, 0L, 2L, -1L, 3L, 1L), AGITATION = c(-1L,
-3L, -1L, -2L, 2L, -1L), FRAGMENTATION = c(1L, -2L, 2L, 1L, 0L,
-1L), PERIOD = structure(c(3L, 3L, 4L, 4L, 4L, 4L), .Label = c("HOLIDAYS",
"SICK", "WE", "WORK"), class = "factor"), SPORT = structure(c(2L,
1L, 2L, 2L, 2L, 1L), .Label = c("", "Day", "Evening"), class = "factor"),
ACTIVITY = structure(c(6L, 1L, 3L, 4L, 5L, 1L), .Label = c("",
"Bkool", "eBike", "Gym", "Natation", "Run"), class = "factor"),
TABLETS = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5), Ratio = c(1.15,
2.36, 3.45, 2.01, 5.27, 1.06), Sign = structure(c(2L, 2L,
2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Sign.ratio = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Sign.deep = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
Sign.awake = structure(c(1L, 2L, 2L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), Sign.light = structure(c(2L, 1L,
1L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
index = structure(c(1L, 1L, 1L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), min = c(0, 0, 0, 0, 0, 0), max.WE = c(30,
30, 0, 0, 0, 0)), .Names = c("DATE", "DATEID", "MONTH", "DURATION",
"D.DEEP", "P.DEEP", "STIMS", "D.REM", "P.REM", "D.LIGHT", "P.LIGHT",
"D.AWAKE", "P.AWAKE", "WAKE.UP", "AGITATION", "FRAGMENTATION",
"PERIOD", "SPORT", "ACTIVITY", "TABLETS", "Ratio", "Sign", "Sign.ratio",
"Sign.deep", "Sign.awake", "Sign.light", "index", "min", "max.WE"
), row.names = c(NA, 6L), class = "data.frame")
Thanks for adding the data, that makes it easier to understand exactly what you're working with and to confirm that an answer actually addresses your question.
I thought it would be helpful to make a separate table with just the start and end of each contiguous set of rows with the same PERIOD. I did this using dplyr::case_when, assuming we should mark dates as a "start" if they are the first row in the table (row_number() == 1), or they have a different PERIOD value than the prior row. I mark dates as an "end" if they are the last row of the table, or have a different PERIOD than the next row. I only keep the starts and ends, and spread these into new columns called start and end.
library(tidyverse)
Period_ranges <- Sleep.data %>%
mutate(period_status = case_when(row_number() == 1 ~ "start",
PERIOD != lag(PERIOD) ~ "start",
row_number() == n() ~ "end",
PERIOD != lead(PERIOD) ~ "end",
TRUE ~ "other")) %>%
filter(period_status %in% c("start", "end")) %>%
select(DATEID, PERIOD, period_status) %>%
mutate(PERIOD_NUM = cumsum(PERIOD != lag(PERIOD) | row_number() == 1)) %>%
spread(period_status, DATEID)
# Output based on sample data only. If there's a problem with the full data, please add more. To share full data, use `dput(Sleep.data)` or to share 20 rows use `dput(head(Sleep.data, 20))`.
>Period_ranges
PERIOD PERIOD_NUM end start
1 WE 1 2 1
2 WORK 2 6 3
We can now use that in the plot. If you want to toggle the inclusion or fiddle with the appearance separately of different PERIOD types, you could modify the code below with Period_ranges %>% filter(PERIOD == "WE"),
ggplot(Sleep.data, aes(x = DATEID)) +
# Here I specify that this geom should use its own data.
# I start the rectangles half a day before and end half a day after to fill the space.
geom_rect(data = Period_ranges, inherit.aes = F,
aes(xmin = start - 0.5, xmax = end + 0.5,
ymin = 0, ymax = 30,
fill = PERIOD), alpha = 0.5) +
# Here we can specify the shading color for each type of PERIOD
scale_fill_manual(values = c(
"WE" = '#6495ED80',
"WORK" = "gray60"
)) +
# rest of your code
Chart based on data sample:
I'm using the following data frame:
df1 <- structure(list(Genotype = structure(c(1L, 1L, 1L, 1L, 1L,
2L,2L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L,1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
.Label= c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"),
Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("iLN", "Spleen", "Skin", "Colon"), class = "factor"),
`Cells/SC/Live/CD8—,, CD4+/Foxp3+,Median,<BV421-A>,CD127` = c(518L,
715L, 572L, 599L, 614L, 881L, 743L, 722L, 779L, 843L, 494L,
610L, 613L, 624L, 631L, 925L, 880L, 932L, 876L, 926L, 1786L,
2079L, 2199L, 2345L, 2360L, 2408L, 2509L, 3129L, 3263L, 3714L,
917L, NA, 1066L, 1059L, 939L, 1269L, 1047L, 974L, 1048L,
1084L)),
.Names = c("Genotype", "Tissue", "Cells/SC/Live/CD8—,,CD4+/Foxp3+,Median,<BV421-A>,CD127"),
row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"))
and trying to make a plot using ggplot2 where box plots and points are displayed grouped by "Tissue" and interleaved by "Genotype". The significance values are displaying properly but I would like to add lines to denote the comparisons being made and have them start at the center of each "miR-15/16 FL" box plot and end at the center of each "miR-15/16 cKO" box plot and sit directly below the significance values. Below is the code I am using to generate the plot:
library(ggplot2)
library(ggpubr)
color.groups <- c("black","red")
names(color.groups) <- unique(df1$Genotype)
shape.groups <- c(16, 1)
names(shape.groups) <- unique(df1$Genotype)
ggplot(df1, aes(x = Tissue, y = df1[3], color = Genotype, shape = Genotype)) +
geom_boxplot(position = position_dodge(), outlier.shape = NA) +
geom_point(position=position_dodge(width=0.75)) +
ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
scale_color_manual(values=color.groups) +
scale_shape_manual(values=shape.groups) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x=element_blank(), aspect.ratio = 1,
text = element_text(size = 9)) +
stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
label.y = c(0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(1:10),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(11:20),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(21:30),], na.rm = TRUE),
0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(31:40),], na.rm = TRUE)))
Thanks for any help!
I've created the brackets with three calls to geom_segment. These calls use a new dmax data frame created to provide the reference y-values for positioning the brackets and the p-value labels. The values e and r are for tweaking these positions.
I've made a few other changes to your code.
Change the name of the third column to temp and use this name y=temp in the call to ggplot. Your original code uses y=df1[3], which essentially reaches outside the plot environment to the df1 object in the parent environment, which can cause problems. Also, having a short name to refer to makes it easier to generate the dmax data frame and refer to its columns.
Use the dmax data frame for label.y positions in stat_compare_means, which reduces the amount of code needed. (Incidently, stat_compare_means seems to require hard-coded label.y positions, rather than getting them from an aes mapping of the data.)
Position the p-value labels an absolute distance above each pair of box plots (using the value e), rather than a multiplicative distance. This makes it easier to keep spacing consistent between p-value labels, brackets, and box plots.
# Use a short column name for the third column
names(df1)[3] = "temp"
# Generate data frame of reference y-values for p-value labels and bracket positions
dmax = df1 %>% group_by(Tissue) %>%
summarise(temp=max(temp, na.rm=TRUE),
Genotype=NA)
# For tweaking position of brackets
e = 350
r = 0.6
w = 0.19
bcol = "grey30"
ggplot(df1, aes(x = Tissue, y = temp, color = Genotype, shape = Genotype)) +
geom_boxplot(position = position_dodge(), outlier.shape = NA) +
geom_point(position=position_dodge(width=0.75)) +
ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
scale_color_manual(values=color.groups) +
scale_shape_manual(values=shape.groups) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x=element_blank(), aspect.ratio = 1,
text = element_text(size = 9)) +
stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
label.y = e + dmax$temp) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue)-w, xend=as.numeric(Tissue)+w,
y=temp + r*e, yend=temp + r*e), size=0.3, color=bcol, inherit.aes=FALSE) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue) + w, xend=as.numeric(Tissue) + w,
y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE) +
geom_segment(data=dmax,
aes(x=as.numeric(Tissue) - w, xend=as.numeric(Tissue) - w,
y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE)
To address your comment, here's an example to show that the method above inherently adjusts to any number of x-categories.
Let's begin by adding two new tissue categories:
library(forcats)
df1$Tissue = fct_expand(df1$Tissue, "Tissue 5", "Tissue 6")
df1$Tissue[seq(1,20,4)] = "Tissue 5"
df1$Tissue[seq(21,40,4)] = "Tissue 6"
dmax = df1 %>% group_by(Tissue) %>%
summarise(temp=max(temp, na.rm=TRUE),
Genotype=NA)
Now run exactly the same plot code listed above to get the following plot:
I've created a plot which shows the means of two groups and associated 95% confidence band, as below. For the plot, I've already used different line types, fillings, colors.
The data plot_band is as follows.
dput(plot_band)
structure(list(mean = c(0.0909296772008702, 0.0949102886382386,
0.0989192140983566, 0.102428753920507, 0.106190021551613, 0.109834234007574,
0.11282406874623, 0.116443987192088, 0.119646042014149, 0.122877131667032,
0.125734341129646, 0.129194412319665, 0.131921946416482, 0.13467000293138,
0.137801823091921, 0.140320771073742, 0.143300871011905, 0.145703574224808,
0.148502607395268, 0.151216269559201, 0.153957673466713, 0.15642722394871,
0.159399752204122, 0.16158535629103, 0.163992551285173, 0.166446319141126,
0.168796463238069, 0.17130024918415, 0.17319290052143, 0.175970079857704,
0.178037138778032, 0.180359643729028, 0.182563083353043, 0.184882067722455,
0.186933337196788, 0.18928611634363, 0.19095095692481, 0.193552969255731,
0.195137836881874, 0.197581990963152, 0.199824696342001, 0.201576167030431,
0.203292777876833, 0.205785273925517, 0.207611128924057, 0.209067294675698,
0.211624327477106, 0.213018027996152, 0.215073900329166, 0.21654896049152,
0.218432328738047, 0.220299232072702, 0.221520169903876, 0.224082916931098,
0.225373663731495, 0.227623092060467, 0.228971037740905, 0.230665903341562,
0.232255049713341, 0.233816039663021, 0.236156033603955, 0.237722706454038,
0.239326639984125, 0.241061288510212, 0.323782287073584, 0.325539303794681,
0.326575563604555, 0.327932235745535, 0.329326904419804, 0.330270965006864,
0.331794972975829, 0.332736401387824, 0.333736983920265, 0.334858878358806,
0.335995344145518, 0.336884010919713, 0.337760950823761, 0.338470035342276,
0.339694375762279, 0.340590586642847, 0.340934410282471, 0.342186505998774,
0.342699699846757, 0.343822718137376, 0.344352069575663, 0.345191547743302,
0.345986783878912, 0.346908459064914, 0.347636673707646, 0.3483601957891,
0.349017016236978, 0.349393026672962, 0.350215046428817, 0.350578051082168,
0.351357872622786, 0.351833990930714, 0.352451422717008, 0.352852417773313,
0.353786047124291, 0.354360144310735, 0.354804607588953, 0.355216156665893,
0.3556114518015, 0.356570758245453, 0.357097049535425, 0.357671243406622,
0.35787930232607, 0.358500009058086, 0.359107586207553, 0.359418346394681,
0.359923090516015, 0.360327770652831, 0.360646653761867, 0.361526704703965,
0.361860340596181, 0.362284616802613, 0.362408547406209, 0.363068975461424,
0.363173638916247, 0.363746165222553, 0.364318465554143, 0.364550369183249,
0.365263491228022, 0.365588246738469, 0.366124420845147, 0.366327320718437,
0.366730809501062, 0.367298014408034), p2.5 = c(0.00920236578162877,
0.0111305911426958, 0.0131257550019632, 0.015586474005665, 0.017588259827762,
0.0195835240844649, 0.021653464115484, 0.0245221378289171, 0.0263028370478539,
0.0283125178459841, 0.030809139661692, 0.034224299031932, 0.0351514351131448,
0.0374690177003245, 0.0401208217539481, 0.0416432632702995, 0.0436268495854353,
0.0455924496480308, 0.0481710615607138, 0.0498487868097217, 0.052013860735697,
0.0541864115090449, 0.0559355297931858, 0.0582185384506931, 0.0595049507852038,
0.0617291057747846, 0.0624904066599628, 0.064090526611587, 0.0665855608482458,
0.0681610015253132, 0.0689510143842853, 0.0714235246023074, 0.0730718365551066,
0.0733828347805513, 0.0749772653575311, 0.0775677990166739, 0.0782434582066251,
0.0809696065399504, 0.0800620502625316, 0.0822097262074474, 0.0837314882447324,
0.0836800886932387, 0.0843305338836378, 0.0862036703259026, 0.0874082656018874,
0.0881312854081838, 0.0887921830279765, 0.0892805555426737, 0.0901061351380764,
0.0914750995958728, 0.0913838119125662, 0.0926827936869315, 0.0929511644196126,
0.0940218350370357, 0.0944327299872979, 0.0953545299910439, 0.0948298565703383,
0.0957001873318579, 0.0961251564147676, 0.0971098251546806, 0.0974911491380601,
0.0986598120212823, 0.0982370236835561, 0.0987719638365328, 0.114148199394403,
0.125138552629865, 0.133069438084806, 0.140931059768343, 0.147647282172844,
0.155831735418124, 0.163154010787227, 0.16809087346053, 0.173413948644787,
0.178336300631342, 0.183561163161725, 0.189552221591194, 0.192350001446747,
0.19547327255232, 0.19824967633061, 0.202611107184988, 0.205071997319457,
0.206232495037667, 0.208471493073236, 0.209717390943683, 0.211692880593303,
0.213829033311537, 0.215383413348152, 0.216370831366554, 0.216980537940184,
0.217670415960084, 0.218147500129008, 0.219104770868165, 0.220215949003459,
0.219501167154474, 0.219635297722562, 0.220565169003312, 0.218821371303922,
0.218910618214851, 0.219518190869959, 0.219204079206471, 0.219448334243776,
0.219174641398391, 0.217619259716122, 0.217993716481521, 0.218343413130982,
0.217141573568049, 0.216438618727695, 0.215672180354215, 0.214841486865522,
0.214092486614703, 0.216084004877199, 0.213891621307228, 0.213397326450924,
0.212530621813324, 0.212650230928244, 0.211323326285971, 0.211512467761759,
0.209879967307571, 0.208388878793908, 0.209257043929222, 0.207665115418059,
0.207413292377895, 0.204980142991601, 0.206053394727878, 0.205039712521127,
0.203155679138143, 0.202289445844638, 0.201779149557556), p97.5 = c(0.240681337890249,
0.239988615023241, 0.239222274397932, 0.23882694927308, 0.239567463457127,
0.240035884370459, 0.239971640602537, 0.242348644629734, 0.244241554912481,
0.246794068956881, 0.248869825514075, 0.252843804762058, 0.254595507587193,
0.257498240756364, 0.26074636531938, 0.263991307688752, 0.268222101449506,
0.270245299020079, 0.278955701793892, 0.280366963871541, 0.286253886155709,
0.290942761721134, 0.29709853936211, 0.300641051539586, 0.307350564223005,
0.314475951046524, 0.31757563389217, 0.324250050938626, 0.326645521042049,
0.334746718583917, 0.341297900171566, 0.347056902406046, 0.352412986039391,
0.356409285744598, 0.364329251893085, 0.36882469705109, 0.373595444661095,
0.379308956442793, 0.388012909521406, 0.393418480355642, 0.399407258087214,
0.403270925317011, 0.407517084163824, 0.413742327029277, 0.42089783652825,
0.422996679448412, 0.430738094720356, 0.433915405828653, 0.438263395419797,
0.442376801773873, 0.450664409546504, 0.453854917168461, 0.455755257192578,
0.463879371708031, 0.470262095557133, 0.478816677993115, 0.478998770025097,
0.485204929246363, 0.490588733478761, 0.49747652543363, 0.498792119487052,
0.508008619470507, 0.51314092048762, 0.518568532547669, 0.579810955268174,
0.563256045407579, 0.55093710586083, 0.541241619905278, 0.532667775608687,
0.523824194956849, 0.518816497858615, 0.512618467188886, 0.506452368044292,
0.501653171003674, 0.499276681561068, 0.496002704329641, 0.494256887981196,
0.49200837587611, 0.490570113245846, 0.491077058931435, 0.487352049845066,
0.487927727831147, 0.487928022062059, 0.488900063808496, 0.488866145012628,
0.489808465409391, 0.491100206396406, 0.492044173457154, 0.494346147046575,
0.494980820850837, 0.49616843086841, 0.497216550345458, 0.499201695431901,
0.501160614633382, 0.502598288902507, 0.504203085629905, 0.50530488873578,
0.508449115699177, 0.508914783054669, 0.51306711977087, 0.51479783743171,
0.51648055644086, 0.518549503653961, 0.522859455223989, 0.522598786005884,
0.52736459871623, 0.527054294078792, 0.532359397607223, 0.532643025946804,
0.533817320437782, 0.535862852499484, 0.539613602346564, 0.54138065631686,
0.544340213112881, 0.545596882887723, 0.549029532028693, 0.546769636775625,
0.551728290583129, 0.552996735997194, 0.555676593069663, 0.559580922687426,
0.561700216317917, 0.562726465369815, 0.563527127546323, 0.567715046522725,
0.568850181180136, 0.56965258128659, 0.571847219713553), outcome = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("DLT",
"CB"), class = "factor"), exp_X_post = c(721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522, 721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522)), .Names = c("mean", "p2.5",
"p97.5", "outcome", "exp_X_post"), row.names = c("pi_A[1]", "pi_A[2]",
"pi_A[3]", "pi_A[4]", "pi_A[5]", "pi_A[6]", "pi_A[7]", "pi_A[8]",
"pi_A[9]", "pi_A[10]", "pi_A[11]", "pi_A[12]", "pi_A[13]", "pi_A[14]",
"pi_A[15]", "pi_A[16]", "pi_A[17]", "pi_A[18]", "pi_A[19]", "pi_A[20]",
"pi_A[21]", "pi_A[22]", "pi_A[23]", "pi_A[24]", "pi_A[25]", "pi_A[26]",
"pi_A[27]", "pi_A[28]", "pi_A[29]", "pi_A[30]", "pi_A[31]", "pi_A[32]",
"pi_A[33]", "pi_A[34]", "pi_A[35]", "pi_A[36]", "pi_A[37]", "pi_A[38]",
"pi_A[39]", "pi_A[40]", "pi_A[41]", "pi_A[42]", "pi_A[43]", "pi_A[44]",
"pi_A[45]", "pi_A[46]", "pi_A[47]", "pi_A[48]", "pi_A[49]", "pi_A[50]",
"pi_A[51]", "pi_A[52]", "pi_A[53]", "pi_A[54]", "pi_A[55]", "pi_A[56]",
"pi_A[57]", "pi_A[58]", "pi_A[59]", "pi_A[60]", "pi_A[61]", "pi_A[62]",
"pi_A[63]", "pi_A[64]", "qi_A[1]", "qi_A[2]", "qi_A[3]", "qi_A[4]",
"qi_A[5]", "qi_A[6]", "qi_A[7]", "qi_A[8]", "qi_A[9]", "qi_A[10]",
"qi_A[11]", "qi_A[12]", "qi_A[13]", "qi_A[14]", "qi_A[15]", "qi_A[16]",
"qi_A[17]", "qi_A[18]", "qi_A[19]", "qi_A[20]", "qi_A[21]", "qi_A[22]",
"qi_A[23]", "qi_A[24]", "qi_A[25]", "qi_A[26]", "qi_A[27]", "qi_A[28]",
"qi_A[29]", "qi_A[30]", "qi_A[31]", "qi_A[32]", "qi_A[33]", "qi_A[34]",
"qi_A[35]", "qi_A[36]", "qi_A[37]", "qi_A[38]", "qi_A[39]", "qi_A[40]",
"qi_A[41]", "qi_A[42]", "qi_A[43]", "qi_A[44]", "qi_A[45]", "qi_A[46]",
"qi_A[47]", "qi_A[48]", "qi_A[49]", "qi_A[50]", "qi_A[51]", "qi_A[52]",
"qi_A[53]", "qi_A[54]", "qi_A[55]", "qi_A[56]", "qi_A[57]", "qi_A[58]",
"qi_A[59]", "qi_A[60]", "qi_A[61]", "qi_A[62]", "qi_A[63]", "qi_A[64]"
), class = "data.frame")
Now I want to add some vertical dashed lines. I wish to use different color for each vertical line and have legend for those lines as well. The information for those vertical lines are in another data frame observed_mean:
dput(observed_mean)
structure(list(TRT = structure(1:9, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H", "I"), class = "factor"), gmcmin = c(967.117632548,
1306.76729845833, 2394.519441584, 2404.73065902857, 3047.48745766364,
2550.12866139, 1863.6505272925, 3569.57489109, 3660.40695204)), .Names = c("TRT",
"gmcmin"), row.names = c(NA, -9L), class = "data.frame")
Here is the code to generate the plot:
range <- range(plot_band$exp_X_post)
range <- c(floor(range[1]), ceiling(range[2]))
step <- floor((range[2] - range[1]) / 10)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5, linetype = NA,
fill = outcome),
alpha = 0.4) +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin,
linetype = 'longdash') +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
Note: the last three lines are used to change the legend title from variable name outcome to "Probability (95% CI)". NOT sure whether that's the right way though.
Questions:
I wish to put the current legend to the right, then below that I'd like to put the legend for vertical lines. Could anyone give me some clues how to do that?
As shown in the plot, there are two identical (not same color though) legends on top. The one below comes out if I change the order of the factor outcome with following code. I am not sure why that happens. How could I get rid of that?
plot_band$outcome <- factor(plot_band$outcome, levels = c("DLT", "CB"))
Thanks a lot for any comments/suggestions!!
The extra legend box is showing up because of the linetype = NA in the aes() of geom_ribbon moving the linetype out of the mapping will take care of that.
For the line labeling, you can perhaps just put the labels on the plot using geom_text
Here is a full plot that does something like that (now with ggrepel to place the labels more sensibly -- can't believe I didn't start there)
# install.packages("devtools")
# devtools::install_github("slowkow/ggrepel")
library(ggrepel)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin
, linetype = 'longdash') +
geom_text_repel(
mapping = aes(
x = gmcmin
, y = 0
, label = TRT
, color = NA
, linetype = NA)
, data = observed_mean
, show.legend = FALSE) +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
(Note: the mean labels overlap, so you may need to more careful position those, e.g., by adding another column to observed_mean giving the position where you want them plotted).
If you need the labels to be in a legend instead, you can use this code:
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(#aes(linetype = outcome)
#,
size = 1.5
# , show.legend = FALSE
) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(breaks = pretty(range)) +
geom_vline(
mapping = aes(xintercept = gmcmin
, linetype = TRT)
, data = observed_mean) +
theme_bw() +
theme(legend.position = 'right',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Treatment")
Note, that I removed the linetype from the main lines, as it was causing some weirdness with the vertical line. You can add it back by uncommenting the parts in geom_line() but note that it then shows up in the list with the treatments. There is probably a way to fix that if you absolutely need it, but my quick tries aren't working. I will note, however, that the linetypes are a bit hard to pick out.
Example plot with both the legend and the labels