log transform X axis R - r

I have the following raw data that I plotted in R:
And I would like to edit this plot to look like this version below which was made by log-transforming the X axis using Excel
However, when I run my code below using scale_x_log10(), the output is not the desired plot I was hoping to make. See image below:
Can anyone identify where I have gone wrong?
ggplot(data = data, aes(x = x, y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))

It looks like your first datapoint is at zero - this can't be displayed on a log scale. You'll need to work out if there's a difference in you data in excel, failing that you could achieve a similar result by modifying the lowest value of x with:
ggplot(data = data, aes(x = pmax(x,0.01), y = y, group = group, color = group)) +
stat_summary(fun = "mean", geom = "line", size = 1.2, aes(group = group, linetype = group, color = group)) +
stat_summary(fun = "mean", geom = "point", size = 3, aes(color = group)) +
theme_apa() +
scale_linetype_manual(values = c("solid", "dashed")) +
scale_color_manual(values = c("mediumturquoise", "red")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_log10(limits = c(.01, 40), breaks = c(.01, .1, 1, 10))

Related

ggplot not respecting xlimits in coord_cartesian

A new problem with the same code from an earlier question. Funny because this is the first line of code in my R journey that just keeps stumping me. Following up on the previous post, now the extra line charts are being plotted but I realized that the x axis is not respecting the limits I'm trying to set. Here is the code. Changing the values for xlim does not 'zoom in' on the x axis and seems to have no effect at all:
ggplot(aes(x = age, y = friend_count), data = pf) +
coord_cartesian(xlim = c(13,90)) +
geom_point(alpha = 0.05,
position = position_jitter(h = 0),
color = 'orange') +
coord_trans(y = "sqrt") +
geom_line(stat = 'summary', fun = mean) +
geom_line(stat = 'summary', fun = quantile, fun.args = list(probs = .1),
linetype = 2, color = 'blue') +
geom_line(stat = 'summary', fun = quantile, fun.args = list(probs = .9),
linetype = 2, color = 'blue')

stat_summary() and fun.data = mean_sdl not working

set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!

custom color for each group + category combination raincloud plot

I have a raincloud plot:
but I would like each combination of TL group and yr to be a different color, as one can do in base boxplot():
I have tried using the following code for the raincloud plot:
Y_C_rain= ggplot(yct_rain, aes(y=d13C, x=lengthcat,fill = yr,color=yr)) +
geom_flat_violin(position = position_nudge(x = .2, y =0), alpha = .8)+
geom_point(aes(y = , color = yr),
position = position_jitter(width = .05), size = 2, alpha = .5) +
geom_boxplot(width = .3, guides = FALSE, outlier.shape = NA, alpha = 0, notch = FALSE) +
stat_summary(fun= mean, geom = "point", shape = 21, size = 3, fill = "black") +
scale_y_continuous (limits = c(-35,-10),expand = c(0,0),breaks=seq(-35,-10,5)) +
ylab("d13C") + xlab("TL group") +
ggtitle("YCT d13C") +
theme_bw() +
scale_colour_discrete(my_clrs_yct)+
scale_fill_discrete(my_clrs_yct)
Y_C_rain
I know that the colors in the rain plot will need to be coded with some variant of scale_fill_xxx but I am hitting a road block since it appears that each point also needs to have its own color. Therefore the variations of scale_fill_xxx with only 6 individual colors listed is not working.
Do you want something like this?
library(dplyr)
library(data.table)
library(ggplot2)
# used geom_flat_violin from https://gist.github.com/dgrtwo/eb7750e74997891d7c20
my_clrs_yct <- c("#404040", "#407a8c", "#7a7a7a", "#404f86", "#a6a6a6", "#3e1451")
## used storms from dplyr as reproducible example
data("storms")
setDT(storms)
storms[, season:= factor(ifelse(month <=6, "Q12", "Q34"))]
ggplot(storms, aes(x=status, y=pressure, color=interaction(status, season),
fill=interaction(status, season))) +
geom_point(aes(color = interaction(status, season)),
position = position_jitterdodge(
jitter.width=.1, dodge.width=.25), size = 2, alpha = .5)+
geom_flat_violin(position = position_nudge(x = .5, y =0), alpha = .5)+
geom_boxplot(width = .3, guides = FALSE, outlier.shape = NA, alpha = 0)+
stat_summary(fun = mean, geom = "point", shape = 21, size = 3,
fill = "black", position = position_nudge(x = c(-.075,.075), y =0)) +
theme_bw() +
scale_colour_manual(values=my_clrs_yct) +
scale_fill_manual(values=my_clrs_yct)

Editing graph using ggpattern in R

I wrote some code to make a graph (both below)
p <- ggplot(for_plots, aes(x = factor(condition), y = conflict, fill = smoking_status)) +
stat_summary(fun = "mean", geom = "bar", position = "dodge") +
theme_classic() +
scale_fill_manual(labels = c("Smokers", "Ex"),
values = c("blue", "gold"), guide = "legend", (title = "Smoking status")) +
scale_color_manual(labels = c("Smokers", "Ex"),
values = c("blue", "gold"), guide = "legend", (title = "Smoking status")) +
labs(x = 'Condition', y = 'Conflict (AUC)') +
scale_x_discrete(labels = c('Animal','Smoking')) +
coord_cartesian(ylim=c(0,1.5)) +
scale_y_continuous(expand = c(0,0))
p +
stat_summary(fun.data = mean_se, geom = "errorbar", width = .08, position = position_dodge(0.9))
However, I recently read about 'ggpattern' and wondered if anyone could help me add some diagonal black lines to the yellow bars in my plot (e.g. ex-smokers conflict). I have tried multiple ways, but adding 'geom_col_pattern' to the code seems to mess up the Y axis and provide overall conflict for each condition (animal, smoking) rather than separately for smokers and ex-smokers. I think the 'geom_col_pattern' perhaps is not compatible with the 'stat_summary' I have in my code. Does anyone have any suggestions?
Thank you
Instead of adding a geom_col_pattern on top of your plot, just update the geom argument of stat_summary.
#replicate of your dataframe
for_plots <- data.frame(matrix(nrow = 100, ncol=0))
for_plots$condition <- sample(rep(c("Animal", "Smoking"), 100), 100)
for_plots$smoking_status <- sample(rep(c("Smokers", "Ex"), 100), 100)
n_smoking <- length(which(for_plots$condition == "Smoking"))
for_plots$conflict[for_plots$condition=="Smoking"] <- sample(seq(0.8, 1.3, length.out = n_smoking), n_smoking)
n_animal <- length(which(for_plots$condition == "Animal"))
for_plots$conflict[for_plots$condition=="Animal"] <- sample(seq(0.5, 1, length.out = n_animal), n_animal)
p <- ggplot(for_plots, aes(x = factor(condition), y = conflict, fill = smoking_status)) +
stat_summary(aes(pattern=smoking_status),
fun = "mean", position = "dodge",
geom = "bar_pattern", pattern_fill="black", colour="black") + #edited part
theme_classic() +
scale_fill_manual(labels = c("Smokers", "Ex"),
values = c("blue", "gold"), guide = "legend", (title = "Smoking status")) +
scale_color_manual(labels = c("Smokers", "Ex"),
values = c("blue", "gold"), guide = "legend", (title = "Smoking status")) +
labs(x = 'Condition', y = 'Conflict (AUC)') +
scale_pattern_manual(values=c("none", "stripe"))+ #edited part
scale_x_discrete(labels = c('Animal','Smoking')) +
coord_cartesian(ylim=c(0,1.5)) +
scale_y_continuous(expand = c(0,0))
p +
stat_summary(fun.data = mean_se, geom = "errorbar", width = .08, position = position_dodge(0.9))

Display geom_label just for mean values

I'd really appreciate your help. I simply want to display a mean value at each time point for 2 treatment groups (corresponding the the red line). I've tried with geom_label but it seems to label every single point. Any idea how to label just the means (i.e. 4 value labels in treatment group A and 4 in treatment group B).
# Assign random alpha (0 [50%] or 1 [50%]) values
dat <- ddply(dat, .(id), function(x){
x$alpha = ifelse(runif(n = 1) > 0.50, 1, 0)
x
})
# Plot
ggplot(data = dat, aes(x = week, y = iop, group = id)) +
geom_point(alpha = 0.5) +
geom_line(aes(alpha=alpha, group=id)) +
guides(alpha=FALSE) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, size = 3, color = "red") +
stat_summary(aes(group = 1), geom = "line", fun.y = mean, size = 2, color = "red") +
geom_label(stat = 'summary', fun.y=mean, aes(label = round(..y.., 2)), nudge_x = 0.1, hjust = 0) +
scale_x_continuous(breaks = seq(0,18,6)) +
facet_grid(. ~ trt) +
theme_classic() +
xlab("Month") + ylab("IOP")

Resources