I would like to add summary statistics on a box plot at the max of a dynamic y axis.
In the real data the y axis is a dynamic dropdown, one value is between 0 - 6; and the other between 0 - 100. In the example below I have hard coded where I would like the labels to be, but I cannot hard code them in the real data.
Is there a way to either:
Set labels outside the graph above the y axis? So that the labels will not move even if the axis changes?
Or is there a way to set it to max of Y + n?
Example:
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format") +
stat_summary(
fun.data = function(x)
data.frame(y = 460, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = 445, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
expand_limits(y = 100)
Thanks so much for any help in advance.
Managed to get the following working with suggestion from #MarkNeal
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format", vjust = 3) +
stat_summary(
fun.data = function(x)
data.frame(y= Inf, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 1,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = Inf, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 2,
position = position_dodge(0.9)
)
Related
set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!
I am trying to create a plot in ggplot2 similar to this one:
Here is the code I am using:
Dataset %>%
group_by(Participant, Group, Emotion) %>%
ggplot(aes(y = Score, x = Emotion, fill = Group, colour = Group)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .4) +
geom_point(aes(y = Score, color = Group), position = position_jitter(width = .15), size = 3, alpha = 0.4) +
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48') +
stat_summary(fun = mean, geom = 'pointrange', width = 0.2, size = 2, alpha = 1.2, position=position_dodge(width=0.3)) +
stat_summary(fun.data = mean_se, geom='errorbar', width = 0.25, size = 2.2, alpha = 1.2, linetype = "solid",position=position_dodge(width=0.3)) +
guides(color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100) +
graph_theme
What I am failing to do is set up the stat_summary(geom = 'line') to connect the green and orange means within each emotion on the x-axis. Could anyone give any pointers on this? I'd also like all the other features to stay the same if possible (e.g., I wouldn't like to use facet_grid or facet_wrap).
Thank you!
When I change the group argument in stat_summary to 'Group' instead of 'Emotion', means for each group are connected across emotions, but I can't figure out how to connect means of different groups within each emotion:
This is a tricky one because your line needs to connect points that have different x values but even if you jitter in the point layer, they still technically have the same x value so the line doesn't know how to connect them. What others have done is to manually add the jitter to force the points to have a different x position. For more inspiration check out this, this and this. Here's an example:
library(tidyverse)
set.seed(1)
emotion <- c("anger", "fear", "sadness")
group <- letters[1:2]
participant <- 1:10
dat <- expand_grid(emotion, group, participant) %>%
mutate(across(everything(), as.factor),
score = sample(x = 1:100, size = nrow(.), replace = T))
dat %>%
mutate(new_emot = case_when(
group == "a" ~as.numeric(emotion) - 0.125,
group == "b" ~as.numeric(emotion) + 0.125
)) %>%
ggplot(aes(x = emotion, y = score)) +
stat_summary(aes(color = group), fun = mean, geom = "point", position = position_dodge(width = 0.5)) +
stat_summary(aes(color = group), fun.data = mean_se, geom = "errorbar", width = 0.5, position = position_dodge(width = 0.5)) +
stat_summary(aes(x = new_emot, group = emotion), fun = mean, geom = "line") +
theme_bw()
Created on 2021-03-24 by the reprex package (v1.0.0)
Setting geom_line to the same position as pointrange and errorbar will solve the problem.
i.e.,
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48', position=position_dodge(width=0.3))
I have two plots I just want to know how I can add a legend for the blue and gray bar charts and also could you please show me how you could also edit the legend tittle.
X1 <- c(seq(7.912087912,44.83516484,1.538461538))
X2 <- c(seq(7.912087912,49.45054945,1.538461538))
dat2 <- data.frame(x = X2 , y = rnorm(28, 26, 5))
dat1 <- data.frame(x = X1 , y = rnorm(100, 25, 4))
ggplot(NULL) +
geom_bar(dat1, mapping = aes(x = x, y = y), stat = "identity",alpha = 0.3, position = "stack" ) + labs( x = " Time [ S ]", y = "Frequency") + theme_minimal() +
ggtitle("Histogram Of Time In Tank") + theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_bar(dat2, mapping = aes(x = x, y = y ), stat = "identity", alpha = .3, position = "stack", fill='lightblue' , color='lightblue4')
+ scale_linetype_discrete(name = LegendTitle)
If you want a legend in ggplot, you need to have an aesthetic mapping inside your aes() or no legend will appear. Here's how we can set a mapping and then use the scale to set the colors we want
ggplot(NULL) +
geom_bar(dat1, mapping = aes(x = x, y = y, fill="Grey Bars"), stat = "identity",alpha = 0.3, position = "stack" ) +
labs( x = " Time [ S ]", y = "Frequency") +
theme_minimal() +
ggtitle("Histogram Of Time In Tank") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_bar(dat2, mapping = aes(x = x, y = y, fill='Blue Bars') , stat = "identity", alpha = .3, position = "stack", color='lightblue4') +
scale_fill_manual(name="Bars", values=c("Grey Bars" = "grey35", "Blue Bars" = "lightblue"))
I'd really appreciate your help. I simply want to display a mean value at each time point for 2 treatment groups (corresponding the the red line). I've tried with geom_label but it seems to label every single point. Any idea how to label just the means (i.e. 4 value labels in treatment group A and 4 in treatment group B).
# Assign random alpha (0 [50%] or 1 [50%]) values
dat <- ddply(dat, .(id), function(x){
x$alpha = ifelse(runif(n = 1) > 0.50, 1, 0)
x
})
# Plot
ggplot(data = dat, aes(x = week, y = iop, group = id)) +
geom_point(alpha = 0.5) +
geom_line(aes(alpha=alpha, group=id)) +
guides(alpha=FALSE) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, size = 3, color = "red") +
stat_summary(aes(group = 1), geom = "line", fun.y = mean, size = 2, color = "red") +
geom_label(stat = 'summary', fun.y=mean, aes(label = round(..y.., 2)), nudge_x = 0.1, hjust = 0) +
scale_x_continuous(breaks = seq(0,18,6)) +
facet_grid(. ~ trt) +
theme_classic() +
xlab("Month") + ylab("IOP")
I am try to do a plot whith means lines by group (in ggplot), and i have my code like this
ggplot(gama, aes(x = distancia, y= glipidoscmtejido, colour= estado)) +
geom_point(position=position_dodge(.5), alpha= 1, size=3) +
geom_crossbar(data=gama,aes(x=distancia,ymin=mean(glipidoscmtejido),
ymax=mean(glipidoscmtejido),y=mean(glipidoscmtejido),colour=estado), width = 0.5)
and I get this
but i need the mean line by distancia and by estado. how can i make it?
thanks.
How about this using stat_summary to plot the means per estado per distancia:
# Generate some sample data
set.seed(2017);
df <- cbind.data.frame(
x = rnorm(100),
estado = sample(c("sana", "lesionada"), 100, replace = T),
distancia = sample(c("0-1", "2.5-3.5", "5.6"), 100, replace = T));
require(ggplot2);
ggplot(df, aes(x = distancia, y = x, colour = estado)) +
geom_point(position = position_dodge(width = 0.3)) +
stat_summary(
fun.y = mean,
geom = "errorbar",
aes(ymax = ..y.., ymin = ..y..),
position = position_dodge(width = 0.3),
width = 0.25);