Overlaying line graph to a points chart in ggplot2 - r

I need to overlay the mean of the abiotic line over the point chart. I tried using geom_line as some other answers recommend but it doesn't quite work. I also want the mean of each point to be shown for each level.
sp <- rep(c("A","B"), times = 10)
sp.val <- rnorm(20,5,1)
abitoic <- rnorm(20,40,2)
level <- rep(c("Low","High"), each = 10)
df <- data.frame(sp, sp.val, abitoic, level)
pd = position_dodge(0.5)
ggplot(df, aes(x = level, y = sp.val, col = sp, group = sp)) +
geom_point(aes(fill = sp),colour="white",pch=21, size=4, stroke = 1, alpha = 0.7, position = pd) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar",
width = 0.5, colour = "black",
position = pd) +
stat_summary(fun = median, color = "black",
geom = "point", size = 7,show.legend = FALSE,
position = pd) +
stat_summary(fun = median,
geom = "line", show.legend = FALSE,
position = pd)+
stat_summary(fun = median,
geom = "point", size = 3,show.legend = FALSE,
position = pd) +
geom_line(aes(x = level, y = abitoic/5, group = level))

Related

stat_summary() and fun.data = mean_sdl not working

set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!

log transform X axis R

I have the following raw data that I plotted in R:
And I would like to edit this plot to look like this version below which was made by log-transforming the X axis using Excel
However, when I run my code below using scale_x_log10(), the output is not the desired plot I was hoping to make. See image below:
Can anyone identify where I have gone wrong?
ggplot(data = data, aes(x = x, y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))
It looks like your first datapoint is at zero - this can't be displayed on a log scale. You'll need to work out if there's a difference in you data in excel, failing that you could achieve a similar result by modifying the lowest value of x with:
ggplot(data = data, aes(x = pmax(x,0.01), y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))

Connecting means with stat_summary (geom = 'line') within ticks on the x-axis

I am trying to create a plot in ggplot2 similar to this one:
Here is the code I am using:
Dataset %>%
group_by(Participant, Group, Emotion) %>%
ggplot(aes(y = Score, x = Emotion, fill = Group, colour = Group)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .4) +
geom_point(aes(y = Score, color = Group), position = position_jitter(width = .15), size = 3, alpha = 0.4) +
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48') +
stat_summary(fun = mean, geom = 'pointrange', width = 0.2, size = 2, alpha = 1.2, position=position_dodge(width=0.3)) +
stat_summary(fun.data = mean_se, geom='errorbar', width = 0.25, size = 2.2, alpha = 1.2, linetype = "solid",position=position_dodge(width=0.3)) +
guides(color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100) +
graph_theme
What I am failing to do is set up the stat_summary(geom = 'line') to connect the green and orange means within each emotion on the x-axis. Could anyone give any pointers on this? I'd also like all the other features to stay the same if possible (e.g., I wouldn't like to use facet_grid or facet_wrap).
Thank you!
When I change the group argument in stat_summary to 'Group' instead of 'Emotion', means for each group are connected across emotions, but I can't figure out how to connect means of different groups within each emotion:
This is a tricky one because your line needs to connect points that have different x values but even if you jitter in the point layer, they still technically have the same x value so the line doesn't know how to connect them. What others have done is to manually add the jitter to force the points to have a different x position. For more inspiration check out this, this and this. Here's an example:
library(tidyverse)
set.seed(1)
emotion <- c("anger", "fear", "sadness")
group <- letters[1:2]
participant <- 1:10
dat <- expand_grid(emotion, group, participant) %>%
mutate(across(everything(), as.factor),
score = sample(x = 1:100, size = nrow(.), replace = T))
dat %>%
mutate(new_emot = case_when(
group == "a" ~as.numeric(emotion) - 0.125,
group == "b" ~as.numeric(emotion) + 0.125
)) %>%
ggplot(aes(x = emotion, y = score)) +
stat_summary(aes(color = group), fun = mean, geom = "point", position = position_dodge(width = 0.5)) +
stat_summary(aes(color = group), fun.data = mean_se, geom = "errorbar", width = 0.5, position = position_dodge(width = 0.5)) +
stat_summary(aes(x = new_emot, group = emotion), fun = mean, geom = "line") +
theme_bw()
Created on 2021-03-24 by the reprex package (v1.0.0)
Setting geom_line to the same position as pointrange and errorbar will solve the problem.
i.e.,
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48', position=position_dodge(width=0.3))

custom color for each group + category combination raincloud plot

I have a raincloud plot:
but I would like each combination of TL group and yr to be a different color, as one can do in base boxplot():
I have tried using the following code for the raincloud plot:
Y_C_rain= ggplot(yct_rain, aes(y=d13C, x=lengthcat,fill = yr,color=yr)) +
geom_flat_violin(position = position_nudge(x = .2, y =0), alpha = .8)+
geom_point(aes(y = , color = yr),
position = position_jitter(width = .05), size = 2, alpha = .5) +
geom_boxplot(width = .3, guides = FALSE, outlier.shape = NA, alpha = 0, notch = FALSE) +
stat_summary(fun= mean, geom = "point", shape = 21, size = 3, fill = "black") +
scale_y_continuous (limits = c(-35,-10),expand = c(0,0),breaks=seq(-35,-10,5)) +
ylab("d13C") + xlab("TL group") +
ggtitle("YCT d13C") +
theme_bw() +
scale_colour_discrete(my_clrs_yct)+
scale_fill_discrete(my_clrs_yct)
Y_C_rain
I know that the colors in the rain plot will need to be coded with some variant of scale_fill_xxx but I am hitting a road block since it appears that each point also needs to have its own color. Therefore the variations of scale_fill_xxx with only 6 individual colors listed is not working.
Do you want something like this?
library(dplyr)
library(data.table)
library(ggplot2)
# used geom_flat_violin from https://gist.github.com/dgrtwo/eb7750e74997891d7c20
my_clrs_yct <- c("#404040", "#407a8c", "#7a7a7a", "#404f86", "#a6a6a6", "#3e1451")
## used storms from dplyr as reproducible example
data("storms")
setDT(storms)
storms[, season:= factor(ifelse(month <=6, "Q12", "Q34"))]
ggplot(storms, aes(x=status, y=pressure, color=interaction(status, season),
fill=interaction(status, season))) +
geom_point(aes(color = interaction(status, season)),
position = position_jitterdodge(
jitter.width=.1, dodge.width=.25), size = 2, alpha = .5)+
geom_flat_violin(position = position_nudge(x = .5, y =0), alpha = .5)+
geom_boxplot(width = .3, guides = FALSE, outlier.shape = NA, alpha = 0)+
stat_summary(fun = mean, geom = "point", shape = 21, size = 3,
fill = "black", position = position_nudge(x = c(-.075,.075), y =0)) +
theme_bw() +
scale_colour_manual(values=my_clrs_yct) +
scale_fill_manual(values=my_clrs_yct)

R ggpubr Boxplot adding summary stats label to dynamic Y axis

I would like to add summary statistics on a box plot at the max of a dynamic y axis.
In the real data the y axis is a dynamic dropdown, one value is between 0 - 6; and the other between 0 - 100. In the example below I have hard coded where I would like the labels to be, but I cannot hard code them in the real data.
Is there a way to either:
Set labels outside the graph above the y axis? So that the labels will not move even if the axis changes?
Or is there a way to set it to max of Y + n?
Example:
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format") +
stat_summary(
fun.data = function(x)
data.frame(y = 460, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = 445, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
expand_limits(y = 100)
Thanks so much for any help in advance.
Managed to get the following working with suggestion from #MarkNeal
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format", vjust = 3) +
stat_summary(
fun.data = function(x)
data.frame(y= Inf, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 1,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = Inf, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 2,
position = position_dodge(0.9)
)

Resources