Display geom_label just for mean values - r

I'd really appreciate your help. I simply want to display a mean value at each time point for 2 treatment groups (corresponding the the red line). I've tried with geom_label but it seems to label every single point. Any idea how to label just the means (i.e. 4 value labels in treatment group A and 4 in treatment group B).
# Assign random alpha (0 [50%] or 1 [50%]) values
dat <- ddply(dat, .(id), function(x){
x$alpha = ifelse(runif(n = 1) > 0.50, 1, 0)
x
})
# Plot
ggplot(data = dat, aes(x = week, y = iop, group = id)) +
geom_point(alpha = 0.5) +
geom_line(aes(alpha=alpha, group=id)) +
guides(alpha=FALSE) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, size = 3, color = "red") +
stat_summary(aes(group = 1), geom = "line", fun.y = mean, size = 2, color = "red") +
geom_label(stat = 'summary', fun.y=mean, aes(label = round(..y.., 2)), nudge_x = 0.1, hjust = 0) +
scale_x_continuous(breaks = seq(0,18,6)) +
facet_grid(. ~ trt) +
theme_classic() +
xlab("Month") + ylab("IOP")

Related

stat_summary() and fun.data = mean_sdl not working

set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!

log transform X axis R

I have the following raw data that I plotted in R:
And I would like to edit this plot to look like this version below which was made by log-transforming the X axis using Excel
However, when I run my code below using scale_x_log10(), the output is not the desired plot I was hoping to make. See image below:
Can anyone identify where I have gone wrong?
ggplot(data = data, aes(x = x, y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))
It looks like your first datapoint is at zero - this can't be displayed on a log scale. You'll need to work out if there's a difference in you data in excel, failing that you could achieve a similar result by modifying the lowest value of x with:
ggplot(data = data, aes(x = pmax(x,0.01), y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))

Connecting means with stat_summary (geom = 'line') within ticks on the x-axis

I am trying to create a plot in ggplot2 similar to this one:
Here is the code I am using:
Dataset %>%
group_by(Participant, Group, Emotion) %>%
ggplot(aes(y = Score, x = Emotion, fill = Group, colour = Group)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .4) +
geom_point(aes(y = Score, color = Group), position = position_jitter(width = .15), size = 3, alpha = 0.4) +
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48') +
stat_summary(fun = mean, geom = 'pointrange', width = 0.2, size = 2, alpha = 1.2, position=position_dodge(width=0.3)) +
stat_summary(fun.data = mean_se, geom='errorbar', width = 0.25, size = 2.2, alpha = 1.2, linetype = "solid",position=position_dodge(width=0.3)) +
guides(color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100) +
graph_theme
What I am failing to do is set up the stat_summary(geom = 'line') to connect the green and orange means within each emotion on the x-axis. Could anyone give any pointers on this? I'd also like all the other features to stay the same if possible (e.g., I wouldn't like to use facet_grid or facet_wrap).
Thank you!
When I change the group argument in stat_summary to 'Group' instead of 'Emotion', means for each group are connected across emotions, but I can't figure out how to connect means of different groups within each emotion:
This is a tricky one because your line needs to connect points that have different x values but even if you jitter in the point layer, they still technically have the same x value so the line doesn't know how to connect them. What others have done is to manually add the jitter to force the points to have a different x position. For more inspiration check out this, this and this. Here's an example:
library(tidyverse)
set.seed(1)
emotion <- c("anger", "fear", "sadness")
group <- letters[1:2]
participant <- 1:10
dat <- expand_grid(emotion, group, participant) %>%
mutate(across(everything(), as.factor),
score = sample(x = 1:100, size = nrow(.), replace = T))
dat %>%
mutate(new_emot = case_when(
group == "a" ~as.numeric(emotion) - 0.125,
group == "b" ~as.numeric(emotion) + 0.125
)) %>%
ggplot(aes(x = emotion, y = score)) +
stat_summary(aes(color = group), fun = mean, geom = "point", position = position_dodge(width = 0.5)) +
stat_summary(aes(color = group), fun.data = mean_se, geom = "errorbar", width = 0.5, position = position_dodge(width = 0.5)) +
stat_summary(aes(x = new_emot, group = emotion), fun = mean, geom = "line") +
theme_bw()
Created on 2021-03-24 by the reprex package (v1.0.0)
Setting geom_line to the same position as pointrange and errorbar will solve the problem.
i.e.,
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48', position=position_dodge(width=0.3))

R ggpubr Boxplot adding summary stats label to dynamic Y axis

I would like to add summary statistics on a box plot at the max of a dynamic y axis.
In the real data the y axis is a dynamic dropdown, one value is between 0 - 6; and the other between 0 - 100. In the example below I have hard coded where I would like the labels to be, but I cannot hard code them in the real data.
Is there a way to either:
Set labels outside the graph above the y axis? So that the labels will not move even if the axis changes?
Or is there a way to set it to max of Y + n?
Example:
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format") +
stat_summary(
fun.data = function(x)
data.frame(y = 460, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = 445, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
expand_limits(y = 100)
Thanks so much for any help in advance.
Managed to get the following working with suggestion from #MarkNeal
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format", vjust = 3) +
stat_summary(
fun.data = function(x)
data.frame(y= Inf, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 1,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = Inf, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 2,
position = position_dodge(0.9)
)

How to create multiple graphs for Y = (Values in different columns for a given variable) and X = Time

In my dat2 file I have a series of body measurements for various subjects. These body measurements are bicep, tricep, suprailiac, sub scapular....ffm (fat free mass). I have taken these measurements at 0,6,12 weeks. The subjects were on either treatment 'a' or treatment 'b'.
I would like to create multiple graphs for both treatments where Y = Body Measurement and X = Time. I know how to do this for just one variable like "weight".
line <- ggplot(dat2, aes(time, weight))
line + stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_wrap(~ treatment, nrow = 4) + labs(x = "Time", y = "Weight") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))
I tried the following code to do this for multiple variables and to print multiple graphs.
dat2 %>% select(code:solidfatstotallog, time, bicep, tricep, subscapular, suprailiac, weight, wc, bia, bmi, wthr, X.fat, fm, ffm, height, density)
dat2 %>%
select(code, treatment, time, bicep:ffm)
gather(body_measure, value, -c(code, treatment, time)) %>%
ggplot(aes(time, body_measure)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_wrap(~ treatment, nrow = 4) + labs(x = "Time", y = "Body_Measure") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))
However, this just creates one graph of Y = weight and X = Time. With different labels where Y is now body_measure. What am I missing here?
See example picture for Weight by Time. I would like to create multiple such graphs, ideally on a single page.
So I was able to create a facet_grid, but the graph is not nice to look at. I used the following code.
dat2 %>%
select(code, treatment, time, bicep:ffm, -c(density, height)) %>% #drop irrelevant variables
gather(measure, score, -c(code, treatment, time)) %>% #data into long form
ggplot(aes(time, score)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_grid(treatment ~ measure) + labs(x = "Time", y = "Body_Measure") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))

Resources