I am plotting the proportion of deep sleep (y axis) vs days (x axis). I would like to add vertical shaded area for a better understanding (e.g. grey for week-ends, orange for sick period...).
I have tried using geom_ribbon (I created a variable taking the value of 30, with is the top of my y axis if the data is during the WE - information given in another column), but instead of getting rectangles, I get trapezes.
In another post, someone proposed the use of "geom_rect", or "annotate" if one's know the x and y coordinates, but I don't see how to adapt it in my case, when I want to have the colored area repeated to all week-end (it is not exactly every 7 days because some data are missing).
Do you have any idea ?
Many thanks in advance !
ggplot(Sleep.data, aes(x = DATEID)) +
geom_line(aes(y = P.DEEP, group = 1), col = "deepskyblue3") +
geom_point(aes(y = P.DEEP, group = 1, col = Sign.deep)) +
guides(col=FALSE) +
geom_ribbon(aes(ymin = min, ymax = max.WE), fill = '#6495ED80') +
facet_grid(MONTH~.) +
geom_hline(yintercept = 15, col = "forestgreen") +
geom_hline(yintercept = 20, col = "forestgreen", linetype = "dashed") +
geom_vline(xintercept = c(7,14,21,28), col = "grey") +
scale_x_continuous(breaks=seq(0,28,7)) +
scale_y_continuous(breaks=seq(0,30,5)) +
labs(x = "Days",y="Proportion of deep sleep stage", title = "Deep sleep")
Proportion of deep sleep vs time
Head(Sleep.data)
> dput(head(Sleep.data))
structure(list(DATE = structure(c(1L, 4L, 7L, 10L, 13L, 16L), .Label = c("01-Dec-17",
"01-Feb-18", "01-Jan-18", "02-Dec-17", "02-Feb-18", "02-Jan-18",
"03-Dec-17", "03-Feb-18", "03-Jan-18", "04-Dec-17", "04-Feb-18",
"04-Jan-18", "05-Dec-17", "05-Feb-18", "05-Jan-18", "06-Dec-17",
"06-Feb-18", "06-Jan-18", "07-Dec-17", "07-Feb-18", "07-Jan-18",
"08-Dec-17", "08-Jan-18", "09-Dec-17", "09-Feb-18", "09-Jan-18",
"10-Dec-17", "10-Jan-18", "11-Dec-17", "11-Feb-18", "11-Jan-18",
"12-Dec-17", "12-Jan-18", "13-Dec-17", "13-Feb-18", "13-Jan-18",
"14-Dec-17", "14-Feb-18", "14-Jan-18", "15-Dec-17", "15-Jan-18",
"16-Dec-17", "16-Jan-18", "17-Dec-17", "17-Jan-18", "18-Dec-17",
"18-Jan-18", "19-Dec-17", "19-Jan-18", "20-Dec-17", "21-Dec-17",
"21-Jan-18", "22-Dec-17", "22-Jan-18", "23-Dec-17", "23-Jan-18",
"24-Dec-17", "24-Jan-18", "25-Dec-17", "25-Jan-18", "26-Dec-17",
"26-Jan-18", "27-Dec-17", "27-Jan-18", "28-Dec-17", "28-Jan-18",
"29-Dec-17", "29-Jan-18", "30-Dec-17", "30-Jan-18", "31-Dec-17",
"31-Jan-18"), class = "factor"), DATEID = 1:6, MONTH = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Decembre", "Janvier", "FĂ©vrier"
), class = "factor"), DURATION = c(8.08, 7.43, 6.85, 6.23, 7.27,
6.62), D.DEEP = c(1.67, 1.37, 1.62, 1.75, 1.95, 0.9), P.DEEP = c(17L,
17L, 21L, 24L, 25L, 12L), STIMS = c(0L, 0L, 0L, 0L, 390L, 147L
), D.REM = c(1.7, 0.95, 0.95, 1.43, 1.47, 0.72), P.REM = c(17L,
11L, 12L, 20L, 19L, 9L), D.LIGHT = c(4.7, 5.12, 4.27, 3.05, 3.83,
4.98), P.LIGHT = c(49L, 63L, 55L, 43L, 49L, 66L), D.AWAKE = c(1.45,
0.58, 0.47, 0.87, 0.37, 0.85), P.AWAKE = c(15L, 7L, 6L, 12L,
4L, 11L), WAKE.UP = c(-2L, 0L, 2L, -1L, 3L, 1L), AGITATION = c(-1L,
-3L, -1L, -2L, 2L, -1L), FRAGMENTATION = c(1L, -2L, 2L, 1L, 0L,
-1L), PERIOD = structure(c(3L, 3L, 4L, 4L, 4L, 4L), .Label = c("HOLIDAYS",
"SICK", "WE", "WORK"), class = "factor"), SPORT = structure(c(2L,
1L, 2L, 2L, 2L, 1L), .Label = c("", "Day", "Evening"), class = "factor"),
ACTIVITY = structure(c(6L, 1L, 3L, 4L, 5L, 1L), .Label = c("",
"Bkool", "eBike", "Gym", "Natation", "Run"), class = "factor"),
TABLETS = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5), Ratio = c(1.15,
2.36, 3.45, 2.01, 5.27, 1.06), Sign = structure(c(2L, 2L,
2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Sign.ratio = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Sign.deep = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
Sign.awake = structure(c(1L, 2L, 2L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), Sign.light = structure(c(2L, 1L,
1L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
index = structure(c(1L, 1L, 1L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), min = c(0, 0, 0, 0, 0, 0), max.WE = c(30,
30, 0, 0, 0, 0)), .Names = c("DATE", "DATEID", "MONTH", "DURATION",
"D.DEEP", "P.DEEP", "STIMS", "D.REM", "P.REM", "D.LIGHT", "P.LIGHT",
"D.AWAKE", "P.AWAKE", "WAKE.UP", "AGITATION", "FRAGMENTATION",
"PERIOD", "SPORT", "ACTIVITY", "TABLETS", "Ratio", "Sign", "Sign.ratio",
"Sign.deep", "Sign.awake", "Sign.light", "index", "min", "max.WE"
), row.names = c(NA, 6L), class = "data.frame")
Thanks for adding the data, that makes it easier to understand exactly what you're working with and to confirm that an answer actually addresses your question.
I thought it would be helpful to make a separate table with just the start and end of each contiguous set of rows with the same PERIOD. I did this using dplyr::case_when, assuming we should mark dates as a "start" if they are the first row in the table (row_number() == 1), or they have a different PERIOD value than the prior row. I mark dates as an "end" if they are the last row of the table, or have a different PERIOD than the next row. I only keep the starts and ends, and spread these into new columns called start and end.
library(tidyverse)
Period_ranges <- Sleep.data %>%
mutate(period_status = case_when(row_number() == 1 ~ "start",
PERIOD != lag(PERIOD) ~ "start",
row_number() == n() ~ "end",
PERIOD != lead(PERIOD) ~ "end",
TRUE ~ "other")) %>%
filter(period_status %in% c("start", "end")) %>%
select(DATEID, PERIOD, period_status) %>%
mutate(PERIOD_NUM = cumsum(PERIOD != lag(PERIOD) | row_number() == 1)) %>%
spread(period_status, DATEID)
# Output based on sample data only. If there's a problem with the full data, please add more. To share full data, use `dput(Sleep.data)` or to share 20 rows use `dput(head(Sleep.data, 20))`.
>Period_ranges
PERIOD PERIOD_NUM end start
1 WE 1 2 1
2 WORK 2 6 3
We can now use that in the plot. If you want to toggle the inclusion or fiddle with the appearance separately of different PERIOD types, you could modify the code below with Period_ranges %>% filter(PERIOD == "WE"),
ggplot(Sleep.data, aes(x = DATEID)) +
# Here I specify that this geom should use its own data.
# I start the rectangles half a day before and end half a day after to fill the space.
geom_rect(data = Period_ranges, inherit.aes = F,
aes(xmin = start - 0.5, xmax = end + 0.5,
ymin = 0, ymax = 30,
fill = PERIOD), alpha = 0.5) +
# Here we can specify the shading color for each type of PERIOD
scale_fill_manual(values = c(
"WE" = '#6495ED80',
"WORK" = "gray60"
)) +
# rest of your code
Chart based on data sample:
Related
Maybe it`s already too late for working in my code.
I have to redo my figure in ggplot2, and I am unabable to do so.
My errorbars are not showing and I cannot understand why.
dev.new()
ggplot() +
geom_point(data = conf_intervals, aes(y = mean_ALA, x = mean_LIN, shape=trophic,
color=feeding_type_2,
size = 3,
alpha = 0.5)) + scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(data = conf_intervals,aes(mean_ALA, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA,)) +
geom_errorbarh(data = conf_intervals,aes(mean_LIN, ymin = mean_LIN - se_LIN,
ymax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal, aes(y=ALA.d13C, x=LIN.d13C, color=feeding_type_2))
labs (title="Biplot of compound stable isotopes- Centroids with 95 % CI", subtitle="LIN VS. ALA",
y=expression({delta}^13*C[ALA]~'\211'~VPDB),
x=expression({delta}^13*C[LIN]~'\211'~VPDB)) +
# guides(color = FALSE, shape = FALSE) +
theme_classic()
Error: geom_errorbarh requires the following missing aesthetics: y
Run rlang::last_error() to see where the error occurred.
In addition: Warning message:
Ignoring unknown aesthetics: x
Here is my example data:
dput(head(CSIA_inverts_basal))
structure(list(d13C.VPDB = c(-35.10487341, -34.85465717, -34.67216423,
-34.06032315, -33.68548439, -33.4811546), d15.NAIR = c(-6.321847159,
-5.384989361, -2.638749276, -4.986045928, -5.946279778, -6.648526348
), ALA.d13C = c(-43.2375195, -44.77813854, -42.1921855, -41.58363894,
-39.156857, -40.33135344), LIN.d13C = c(-40.864145, -42.32043061,
-41.4247005, -36.08156681, -39.45744387, -37.76516617), combi = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("epilithon", "fresh.leaves",
"gammarus", "grazing.ephemeroptera", "predatory.plecoptera",
"salmonid.eyes", "shreddering.plecoptera", "submerged.leaves"
), class = "factor"), feeding_type = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), sampling.time = c("summer",
"fall", "summer", "fall", "fall", "fall"), year = c(2018L, 2016L,
2018L, 2016L, 2016L, 2016L), split = structure(c(2L, 2L, 2L,
2L, 2L, 2L), levels = c("consumer", "resource"), class = "factor"),
split_2 = c("epilithon", "epilithon", "epilithon", "epilithon",
"epilithon", "epilithon"), split_3 = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), feeding_type_2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), trophic = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Base", "Non-predatory invertebrate",
"Predatory invertebrate", "Predator"), class = "factor")), row.names = 2:7, class = "data.frame")
dput(conf_intervals)
structure(list(trophic = structure(c(1L, 1L, 1L, 2L, 2L, 3L,
4L), levels = c("Base", "Non-predatory invertebrate", "Predatory invertebrate",
"Predator"), class = "factor"), feeding_type_2 = structure(c(1L,
2L, 6L, 3L, 7L, 5L, 4L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), mean_ALA = c(-42.1, -39.7, -38.7,
-45.7, -40.3, -42.8, -42.7), mean_LIN = c(-39.2, -40, -37.2,
-40.8, -35.9, -36.7, -37.9), se_ALA = c(1.1, 1.1, 1.1, 2.2, 1.2,
1.9, 0.4), se_LIN = c(1.1, 1.1, 0.6, 1.8, 0.9, 1.3, 0.6), N_ALA = c(12L,
14L, 10L, 9L, 14L, 7L, 17L), LIN_N = c(12L, 14L, 10L, 9L, 14L,
7L, 17L)), class = "data.frame", row.names = c(NA, -7L))
Can someone help me?
geom_errorbarh doesn't have an aesthetic called x. It has a y, an xmin and an xmax. I suspect you are mixing up the x and y variables in your errorbar calls too, so check these carefully. Also, you should move alpha and size outside of aes so they don't appear in the legend.
It's also a good idea to make sure your code is formatted in such a way that it is easier to read and debug. Limiting your line length and using inheritance of the data passed to your initial ggplot call helps to simplify things a bit too.
ggplot(conf_intervals) +
geom_point(aes(y = mean_ALA, x = mean_LIN, shape = trophic,
color = feeding_type_2), size = 3, alpha = 0.5) +
scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(aes(mean_LIN, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA)) +
geom_errorbarh(aes(y = mean_ALA, xmin = mean_LIN - se_LIN,
xmax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal,
aes(y = ALA.d13C, x = LIN.d13C, color = feeding_type_2)) +
labs(title = "Biplot of compound stable isotopes- Centroids with 95 % CI",
subtitle = "LIN VS. ALA",
y = expression({delta}^13*C[ALA]~'\211'~VPDB),
x = expression({delta}^13*C[LIN]~'\211'~VPDB)) +
theme_classic()
I wanted a visualization something like this
I ended up getting like this one
I'm kind of close what I want to get except Im not able to separate them
Here is my data frame
dput(dat_red)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L), .Label = c("M0", "M1", "M2", "M3", "M4",
"M5"), class = "factor"), Risk_Cyto = structure(c(2L, 3L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("Good", "Intermediate",
"Poor"), class = "factor"), `TCGA-AB-2856` = c(0, 0.203446022561853,
0.057566971226641, 0.050525640210207, 0.050663468813024, 0.108022967842345,
0.03563961790061, 0.091955619434079, 0.09562601922977, 0.072990036124458,
0.05292549370956, 0.134908910498566, 0.056146007781438, 0.166755814327401,
0.072370918290216, 0.092982169160965, 0.053571132330207, 0.026946730545354,
0.096491482450314, 0.086393933157139, 0.086056971395349, 0.059872483122941,
0.05562972070039, 0.080629871622231, 0.06458076058265, 0.109295018454197,
0.15019108327262, 0.122208033564744), `TCGA-AB-2849` = c(0.203446022561853,
0, 0.138756102002674, 0.109150212934145, 0.130381628657973, 0.186028570196918,
0.201142265508601, 0.117008908236162, 0.07523492135779, 0.237542759238287,
0.154026516322799, 0.093169870680731, 0.174873827256869, 0.077917778705184,
0.217466101351585, 0.247196178178148, 0.139168631446623, 0.130879779506245,
0.094044964277672, 0.102330796604311, 0.115883670128914, 0.106007290303468,
0.124207778875499, 0.100051046626221, 0.096898638044544, 0.081075416500332,
0.066801569316824, 0.095571899845876), `TCGA-AB-2971` = c(0.057566971226641,
0.138756102002674, 0, 0.057153443556063, 0.049118618822663, 0.108803803345704,
0.038593571058361, 0.05623480754803, 0.061897696825206, 0.056921365921972,
0.027147582644049, 0.100579305160467, 0.031712766628694, 0.099623521686644,
0.043315406299788, 0.079156224894216, 0.070713735063067, 0.042797402350358,
0.064121331342957, 0.076245258448711, 0.057969352005916, 0.056411884330189,
0.029950269541688, 0.052538503817376, 0.053263317374002, 0.073813902166228,
0.081932722355952, 0.095255347468669), `TCGA-AB-2930` = c(0.050525640210207,
0.109150212934145, 0.057153443556063, 0, 0.040710142137316, 0.087506794353747,
0.076018856821365, 0.054334641613629, 0.043854827190482, 0.121490922447548,
0.060145981627256, 0.070829823037578, 0.0708179998993, 0.083561655580485,
0.106626803408534, 0.149000581782327, 0.049861493156012, 0.018112612744773,
0.05246829209315, 0.041582348253964, 0.053306367816997, 0.035373116643303,
0.042875256342202, 0.03406333799917, 0.036306618864362, 0.045647830531497,
0.084727864328183, 0.079147350281325), `TCGA-AB-2891` = c(0.050663468813024,
0.130381628657973, 0.049118618822663, 0.040710142137316, 0, 0.117167203965628,
0.057145523476846, 0.07089819966556, 0.058848771210843, 0.090222074046894,
0.052188574602838, 0.091623506635555, 0.053000329480576, 0.094592248885481,
0.082033497053918, 0.111240839210373, 0.065982245111563, 0.038618210190806,
0.063406266346048, 0.062231987650712, 0.067503749234478, 0.039970960455281,
0.042758552599394, 0.049740193805893, 0.04884538212911, 0.07959023948363,
0.090749468265183, 0.075792324166325)), class = "data.frame", row.names = c(NA,
-28L))
My code
dat_red = read.csv("JSD_test_map_.txt",sep = "\t",check.names = FALSE)
df_melt = melt(JSD_MAP, id.vars=c("FAB","Risk_Cyto")
)
To plot the above I used this tutorial
source("R_rainclouds.R")
df_melt %>% ggplot(aes(x=Risk_Cyto,y=value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2, alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(aes(x = Risk_Cyto, y = value, fill = FAB),outlier.shape = NA, alpha = .5, width = .1, colour = "black")+
#theme_jen() +
labs(title = "Raincloud plot of body mass by species", x = 'Risk_Cyto', y = 'JSD') +
easy_remove_legend()
So I have the following group in my metadata or patient info in this subset
> unique(dat_red$FAB)
[1] M4 M0 M2 M1 M3 M5
Levels: M0 M1 M2 M3 M4 M5
> unique(dat_red$Risk_Cyto)
[1] Intermediate Poor Good
Levels: Good Intermediate Poor
My objective is to show The Risk_Cyto as my main group similar to the first figure where They have shown ColonT HeartLV Liver Muscle etc and subsequently I have different FAB subtypes which i want to show similar to Young and Old
Right now everything is kind of stacked or rather messed up in single plot
Any help or suggestion is really appreciated
Put FAB on the x axis and facet by Risk_Cyto
df_melt %>%
ggplot(aes(FAB, value, fill = FAB)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2,
alpha = 0.5) +
geom_point(position = position_jitter(width = .15), size = .8) +
geom_boxplot(outlier.shape = NA,
alpha = .5, width = .1, colour = "black")+
labs(title = "Raincloud plot of body mass by species",
x = 'Risk_Cyto', y = 'JSD') +
facet_grid(.~Risk_Cyto, scales = "free_x", space = "free_x") +
theme_bw(base_size = 16) +
theme(legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(face = 2, size = 22))
I want to plot the p values to each panel in a faceted ggplot. If the p value is larger than 0.05, I want to display the p value as it is. If the p value is smaller than 0.05, I want to display the value in scientific notation (i.e, 0.0032 -> 3.20e-3; 0.0000425 -> 4.25e-5).
The code I wrote to do this is:
p1 <- ggplot(data = CD3, aes(location, value, color = factor(location),
fill = factor(location))) +
theme_bw(base_rect_size = 1) +
geom_boxplot(alpha = 0.3, size = 1.5, show.legend = FALSE) +
geom_jitter(width = 0.2, size = 2, show.legend = FALSE) +
scale_color_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
scale_fill_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
ylab(expression(paste("Density of clusters, ", mm^{-2}))) +
xlab(NULL) +
stat_compare_means(comparisons = list(c("CT", 'N'), c("IF","N")),
aes(label = ifelse(..p.format.. < 0.05, formatC(..p.format.., format = "e", digits = 2),
..p.format..)),
method = 'wilcox.test', show.legend = FALSE, size = 10) +
#ylab(expression(paste('Density, /', mm^2, )))+
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 20),
legend.text = element_text(size = 38),
legend.title = element_text(size = 40),
strip.background = element_rect(colour="black", fill="white", size = 2),
strip.text = element_text(margin = margin(10, 10, 10, 10), size = 40),
panel.grid = element_line(size = 1.5))
plot(p1)
This code runs without error, however, the format of numbers isn't changed. What am I doing wrong?
I attached the data to reproduce the plot: donwload data here
EDIT
structure(list(value = c(0.931966449207829, 3.24210526315789,
3.88811650210901, 0.626860993574675, 4.62085308056872, 0.477508650519031,
0.111900110501359, 3.2495164410058, 4.06626506024096, 0.21684918139434,
1.10365086026018, 4.66666666666667, 0.174109967855698, 0.597625869832174,
2.3758865248227, 0.360751947840548, 1.00441501103753, 3.65168539325843
), Criteria = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Density", "Density of cluster",
"nodular count", "Elongated count"), class = "factor"), Case = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L), .Label = c("Case 1A", "Case 1B", "Case 2", "Case 3", "Case 4",
"Case 5"), class = "factor"), Mark = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("CD3",
"CD4", "CD8", "CD20", "FoxP3"), class = "factor"), location = structure(c(3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L), .Label = c("CT", "IF", "N"), class = "factor")), row.names = c(91L,
92L, 93L, 106L, 107L, 108L, 121L, 122L, 123L, 136L, 137L, 138L,
151L, 152L, 153L, 166L, 167L, 168L), class = "data.frame")
I think your issue came from the stat_compare_means and the use of comparisons.
I'm not totally sure, but I will guess that the output of p value for stat_compare_means is different from compare_means and so, you can't use it for the aes of label.
Let me explain, with your example, you can modify the display of the p.value like this:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(ref.group = "N", aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
You get the correct display of p.value but you lost your bars. So, if you use comparisons argument, you get:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(comparisons = list(c("CT","N"), c("IF","N")), aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
So, now, you get bars but not the correct display.
To circumwent this issue, you can perform the statistics outside of ggplot2 using compare_means functions and use the package ggsignif to display the correct display.
Here, I'm using dplyr and the function mutate to create new columns, but you can do it easily in base R.
library(dplyr)
library(magrittr)
c <- compare_means(value~location, data = df, ref.group = "N")
c %<>% mutate(y_pos = c(5,5.5), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# A tibble: 2 x 10
.y. group1 group2 p p.adj p.format p.signif method y_pos labels
<chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr>
1 value N CT 0.00866 0.017 0.0087 ** Wilcoxon 5 8.7e-03
2 value N IF 0.00866 0.017 0.0087 ** Wilcoxon 5.5 8.7e-03
Then, you can plot it:
library(ggplot2)
library(ggpubr)
library(ggsignif)
ggplot(df, aes(x = location, y = value))+
geom_boxplot(aes(colour = location))+
ylim(0,6)+
geom_signif(data = as.data.frame(c), aes(xmin=group1, xmax=group2, annotations=labels, y_position=y_pos),
manual = TRUE)
Does it look what you are trying to plot ?
This might seem a really stupid mistake on my part but whenever I specify geom_point depending on a factor and choose a hollow point and a solid point (shapes 1 and 19), and plot error bars, it crosses the point.
Here are my data frames:
> dput(head(allbins.sum))
structure(list(T = c(0L, 0L, 10L, 10L, 20L, 20L), treatment = structure(c(1L,
2L, 1L, 2L, 1L, 2L), .Label = c("control bead", "dP bead"), class = "factor"),
N = c(3, 3, 3, 3, 3, 3), cellsBase = c(0, 0, 0.013028995209506,
0.135599858885737, -0.0130289952095061, 0.759359209760127
), sd = c(0, 0, 0.0597063567767786, 0.0469731690178533, 0.0983667566897066,
0.183436089048999), se = c(0, 0, 0.034471481157405, 0.0271199717771474,
0.0567920734541125, 0.105906875391532), ci = c(0, 0, 0.148318812500416,
0.116687820597672, 0.244356569875469, 0.455680506502609),
bin = c("BinA", "BinA", "BinA", "BinA", "BinA", "BinA")), .Names = c("T",
"treatment", "N", "cellsBase", "sd", "se", "ci", "bin"), row.names = c(NA,
6L), class = "data.frame")
> dput(head(allbins.fitdata))
structure(list(wellvidbin = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("A1-002-BinA", "A1-002-BinB", "A1-002-BinC", "A1-031-BinA",
"A1-031-BinB", "A1-031-BinC", "A3-004-BinA", "A3-004-BinB", "A3-004-BinC",
"B1-032-BinA", "B1-032-BinB", "B1-032-BinC", "B4-026-BinA", "B4-026-BinB",
"B4-026-BinC", "C4-027-BinA", "C4-027-BinB", "C4-027-BinC"), class = "factor"),
treatment = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control bead",
"dP bead"), class = "factor"), wellvid = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A1-002", "A1-031", "A3-004",
"B1-032", "B4-026", "C4-027"), class = "factor"), bin = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("BinA", "BinB", "BinC"), class = "factor"),
T = c(0L, 10L, 20L, 30L, 40L, 50L), T.factor = structure(1:6, .Label = c("0",
"10", "20", "30", "40", "50", "60"), class = "factor"), cells = c(7L,
11L, 26L, 27L, 28L, 36L), cellsS = c(-1.36568429306349, -1.20296446240061,
-0.592765097414793, -0.552085139749072, -0.511405182083351,
-0.185965520757582), cellsBase = c(0, 0.162719830662884,
0.772919195648701, 0.813599153314422, 0.854279110980143,
1.17971877230591), treatT = structure(c(2L, 4L, 6L, 8L, 10L,
12L), .Label = c("control bead.0", "P bead.0", "control bead.10",
"P bead.10", "control bead.20", "P bead.20", "control bead.30",
"P bead.30", "control bead.40", "P bead.40", "control bead.50",
"P bead.50", "control bead.60", "P bead.60"), class = "factor"),
fit = c(0.0285939715820639, 0.304399288764407, 0.58020460594675,
0.856009923129092, 1.13181524031144, 1.40762055749378), se.fit = c(0.157415367032567,
0.132348142293459, 0.114707848741265, 0.108190467052118,
0.114707848741265, 0.132348142293459), upr = c(0.337128090965895,
0.563801647659587, 0.805031989479629, 1.06806323855124, 1.35664262384431,
1.66702291638896), lwr = c(-0.279940147801767, 0.0449969298692267,
0.35537722241387, 0.643956607706942, 0.906987856778556, 1.1482181985986
)), .Names = c("wellvidbin", "treatment", "wellvid", "bin",
"T", "T.factor", "cells", "cellsS", "cellsBase", "treatT", "fit",
"se.fit", "upr", "lwr"), class = c("data.table", "data.frame"
), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x0000000000100788>)
And the code:
ggplot(data=allbins.sum, aes(x=T, y=cellsBase, shape=treatment)) + geom_point(size=5, aes(shape=treatment))+
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
geom_smooth(data=allbins.fitdata, size=1, aes(y=fit, ymin=lwr, ymax=upr),
color="black", method="lm", stat="identity", alpha=0.2)+
facet_grid(bin~.) +
scale_shape_manual(values=c(1, 19))
This gives me this plot:
Any hints on how to have the hollow circles to be hollowed?
I also tried specifying geom_shape (aes(fill=treatment) and then scale_fill_manual but then it is also applied to my geom_smooth
Thanks for the help!
If you mean that you don't want the line of the error bar to be visible through the 'hollow' points, then plot geom_errorbar first, then plot geom_point second, with solid fill, so it will overlay the error bar.
ggplot(data=allbins.sum, aes(x=T, y=cellsBase)) +
# plotting this first
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
# plotting this second, with a hollow fillable shape, and black outline
geom_point(size=5, shape = 21, color='black',
aes(fill = treatment)) +
# solid black and solid white fill for the points
scale_fill_manual(values = c('black', 'white')) +
theme_bw()
(The data you posted only has these points for allbins.sum, and the code for allbins.fitdata has an error, so no error bars on this plot)
I would like to display the percentage figures in the stacked bar. However, one group has a really low percentage. Two values are overlapping each other. I change to 'postion='identity'. It still wont work.....any thoughts?
x4.can.m <- structure(list(canopy = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("0%", "1 to 84%",
"85% +"), class = "factor"), YearQuarter = structure(c(1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), .Label = c("2011-09-01",
"2011-12-01", "2012-03-01", "2012-06-01", "2012-09-01"), class = "factor"),
value = c(0.51, 0.01, 0.48, 0.52, 0.01, 0.47, 0.53, 0.01,
0.47, 0.57, 0.01, 0.41, 0.61, 0.01, 0.38)), .Names = c("canopy",
"YearQuarter", "value"), row.names = c(NA, -15L), class = "data.frame")
x4.can.bar <- ggplot(data=x4.can.m, aes(x=factor(YearQuarter), y=value,fill=canopy)) + geom_bar(stat="identity",position = "stack",ymax=100)
x4.can.bar+scale_y_continuous(formatter='percent')+
labs(y="Percentage",x="Year Quarter") +
geom_text(aes(label =paste(round(value*100,0),"%",sep="")),size = 3, hjust = 0.5, vjust = 4,position ="identity")
You need to specify reasonable values for the placement of the labels - if you do this outside the ggplot call, it will be far easier than trying to do so within the call.
You can do this by taking the midpoint of each stacked component.
Using plyr and ddply this is a simple as taking the cumulative sum and subtracting half the current value within each YearQuarter
library(plyr)
x4.can.m <- ddply(x4.can.m, .(YearQuarter), mutate, csum = cumsum(value)-value/2)
x4.can.bar <- ggplot(data=x4.can.m, aes(x=factor(YearQuarter), y=value,fill=canopy)) +
geom_bar(stat="identity",position = "stack",ymax=100)
x4.can.bar +
scale_y_continuous(expand = c(0,0), labels = percent) +
labs(y="Percentage",x="Year Quarter")+
geom_text(aes(y = csum,label =paste(round(value*100,0),"%",sep="")),
size = 3, hjust = 1, vjust = 0)
Note that I am using ggplot2_0.9.2.1, so formatter is no longer a valid argument to scale_y_continuous, replaced with label = percent. See this question and related links
one solution is to change the stack bar to a dodge one
x4.can.bar <- ggplot(data=x4.can.m, aes(x=factor(YearQuarter), y=value,fill=canopy)) +
geom_bar(stat="identity",position = "dodge",ymax=100) +
geom_text(aes(label =paste(round(value*100,0),"%",sep=""),ymax=0),
position=position_dodge(width=0.9), vjust=-0.25)
x4.can.bar