stat_summary() and fun.data = mean_sdl not working - r

set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)

In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)

Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!

Related

Overlaying line graph to a points chart in ggplot2

I need to overlay the mean of the abiotic line over the point chart. I tried using geom_line as some other answers recommend but it doesn't quite work. I also want the mean of each point to be shown for each level.
sp <- rep(c("A","B"), times = 10)
sp.val <- rnorm(20,5,1)
abitoic <- rnorm(20,40,2)
level <- rep(c("Low","High"), each = 10)
df <- data.frame(sp, sp.val, abitoic, level)
pd = position_dodge(0.5)
ggplot(df, aes(x = level, y = sp.val, col = sp, group = sp)) +
geom_point(aes(fill = sp),colour="white",pch=21, size=4, stroke = 1, alpha = 0.7, position = pd) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar",
width = 0.5, colour = "black",
position = pd) +
stat_summary(fun = median, color = "black",
geom = "point", size = 7,show.legend = FALSE,
position = pd) +
stat_summary(fun = median,
geom = "line", show.legend = FALSE,
position = pd)+
stat_summary(fun = median,
geom = "point", size = 3,show.legend = FALSE,
position = pd) +
geom_line(aes(x = level, y = abitoic/5, group = level))

log transform X axis R

I have the following raw data that I plotted in R:
And I would like to edit this plot to look like this version below which was made by log-transforming the X axis using Excel
However, when I run my code below using scale_x_log10(), the output is not the desired plot I was hoping to make. See image below:
Can anyone identify where I have gone wrong?
ggplot(data = data, aes(x = x, y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))
It looks like your first datapoint is at zero - this can't be displayed on a log scale. You'll need to work out if there's a difference in you data in excel, failing that you could achieve a similar result by modifying the lowest value of x with:
ggplot(data = data, aes(x = pmax(x,0.01), y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))

Connecting means with stat_summary (geom = 'line') within ticks on the x-axis

I am trying to create a plot in ggplot2 similar to this one:
Here is the code I am using:
Dataset %>%
group_by(Participant, Group, Emotion) %>%
ggplot(aes(y = Score, x = Emotion, fill = Group, colour = Group)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .4) +
geom_point(aes(y = Score, color = Group), position = position_jitter(width = .15), size = 3, alpha = 0.4) +
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48') +
stat_summary(fun = mean, geom = 'pointrange', width = 0.2, size = 2, alpha = 1.2, position=position_dodge(width=0.3)) +
stat_summary(fun.data = mean_se, geom='errorbar', width = 0.25, size = 2.2, alpha = 1.2, linetype = "solid",position=position_dodge(width=0.3)) +
guides(color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100) +
graph_theme
What I am failing to do is set up the stat_summary(geom = 'line') to connect the green and orange means within each emotion on the x-axis. Could anyone give any pointers on this? I'd also like all the other features to stay the same if possible (e.g., I wouldn't like to use facet_grid or facet_wrap).
Thank you!
When I change the group argument in stat_summary to 'Group' instead of 'Emotion', means for each group are connected across emotions, but I can't figure out how to connect means of different groups within each emotion:
This is a tricky one because your line needs to connect points that have different x values but even if you jitter in the point layer, they still technically have the same x value so the line doesn't know how to connect them. What others have done is to manually add the jitter to force the points to have a different x position. For more inspiration check out this, this and this. Here's an example:
library(tidyverse)
set.seed(1)
emotion <- c("anger", "fear", "sadness")
group <- letters[1:2]
participant <- 1:10
dat <- expand_grid(emotion, group, participant) %>%
mutate(across(everything(), as.factor),
score = sample(x = 1:100, size = nrow(.), replace = T))
dat %>%
mutate(new_emot = case_when(
group == "a" ~as.numeric(emotion) - 0.125,
group == "b" ~as.numeric(emotion) + 0.125
)) %>%
ggplot(aes(x = emotion, y = score)) +
stat_summary(aes(color = group), fun = mean, geom = "point", position = position_dodge(width = 0.5)) +
stat_summary(aes(color = group), fun.data = mean_se, geom = "errorbar", width = 0.5, position = position_dodge(width = 0.5)) +
stat_summary(aes(x = new_emot, group = emotion), fun = mean, geom = "line") +
theme_bw()
Created on 2021-03-24 by the reprex package (v1.0.0)
Setting geom_line to the same position as pointrange and errorbar will solve the problem.
i.e.,
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48', position=position_dodge(width=0.3))

How to apply position_dodge to geom_point and geom_text in the same plot?

I woul like to be able to make the geom_text inside the geom_point to follow the re-positioning when applying position_dodge. That is, I would like to go from the code below:
Q <- as_tibble(data.frame(series = rep(c("diax","diay"),3),
value = c(3.25,3.30,3.31,3.36,3.38,3.42),
year = c(2018,2018,2019,2019,2020,2020))) %>%
select(year, series, value)
ggplot(data = Q, mapping = aes(x = year, y = value, color = series, label = sprintf("%.2f",value))) +
geom_point(size = 13) +
geom_text(vjust = 0.4,color = "white", size = 4, fontface = "bold", show.legend = FALSE)
which produces the following chart:
to the following change:
ggplot(data = Q, mapping = aes(x = year, y = value, color = series, label = sprintf("%.2f",value))) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(position = position_dodge(width = 1), vjust = 0.4,
color = "white", size = 4, fontface = "bold",
show.legend = FALSE)
which produces the following chart:
The curious thing about this is the fact that excatly the same change works just fine if I change from geom_point to geom_bar:
ggplot(Q, aes(year, value, fill = factor(series), label = sprintf("%.2f",value))) +
geom_bar(stat = "identity", position = position_dodge(width = 1)) +
geom_text(color = "black", size = 4,fontface= "bold",
position = position_dodge(width = 1), vjust = 0.4, show.legend = FALSE)
This happens because the the dodging is based on the group aesthetic, automatically set in this case to series because of the mapping to color. The issue is that the text layer has it's own color ("white") and so the grouping is dropped. Manually set the grouping, and all is good:
ggplot(Q, aes(x = year, y = value, color = series, label = sprintf("%.2f",value), group = series)) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(position = position_dodge(width = 1), vjust = 0.4, color = "white", size = 4,
fontface = "bold", show.legend = FALSE)
One patch work would be the following. Since you cannot add labels on top of the data point using geom_text() right away, you may want to go round a bit. I first created a temporary graphic with geom_point(). Then, I accessed to the data frame which is used for drawing the graphic. You can find the values of x and y axis. Using them, I created a new data frame called temp which include the axis information and the label information. Once I had this data frame, I could draw the expected outcome using temp. Make sure that you use inherit.aes = FALSE in geom_text() since you are using another data frame.
library(dplyr)
library(ggplot2)
g <- ggplot(data = Q, aes(x = year, y = value, color = series)) +
geom_point(size = 13, position = position_dodge(width = 1))
temp <- as.data.frame(ggplot_build(g)$data) %>%
select(x, y) %>%
arrange(x) %>%
mutate(label = sprintf("%.2f",Q$value))
ggplot(data = Q, aes(x = year, y = value, color = series)) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(data = temp, aes(x = x, y = y, label = label),
color = "white", inherit.aes = FALSE)

Display geom_label just for mean values

I'd really appreciate your help. I simply want to display a mean value at each time point for 2 treatment groups (corresponding the the red line). I've tried with geom_label but it seems to label every single point. Any idea how to label just the means (i.e. 4 value labels in treatment group A and 4 in treatment group B).
# Assign random alpha (0 [50%] or 1 [50%]) values
dat <- ddply(dat, .(id), function(x){
x$alpha = ifelse(runif(n = 1) > 0.50, 1, 0)
x
})
# Plot
ggplot(data = dat, aes(x = week, y = iop, group = id)) +
geom_point(alpha = 0.5) +
geom_line(aes(alpha=alpha, group=id)) +
guides(alpha=FALSE) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, size = 3, color = "red") +
stat_summary(aes(group = 1), geom = "line", fun.y = mean, size = 2, color = "red") +
geom_label(stat = 'summary', fun.y=mean, aes(label = round(..y.., 2)), nudge_x = 0.1, hjust = 0) +
scale_x_continuous(breaks = seq(0,18,6)) +
facet_grid(. ~ trt) +
theme_classic() +
xlab("Month") + ylab("IOP")

Resources