Create ggplot with mean and confidence interval - r

I have created a graph with a curve for each individual and a mean curve that is created the same way. I would like to have a confidence interval on my mean curve. How can I do this? Should the mean curve be created in a different way?
This is my code until now:
DNAMorfR %>%
drop_na(`Normal morphology (%)`) %>%
ggplot(aes(x = Time, y = `Normal morphology (%)`, linetype = Patient, color = Patient, group
= Patient, na.rm = TRUE)) +
geom_line(size = 1) +
theme_minimal() + ggtitle("(A1) Normal morphology") +
geom_point(size = 1.5) +
scale_y_continuous(limits = c(0, 25), breaks=seq(0, 25, by = 5)) +
geom_hline(yintercept = 4, color = "grey", size = 1) +
scale_color_manual(values = c("black", "#FF3333", "#FF9933", "#CC9900"))
And this is my data:
data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
Patient = c("1","1","1","2","2","2","3","3","3","mean","mean","mean"),
`Normal morphology (%)` = c(7, 2, 3, 1, 3, 3, 6, 7, 8, 7, 9, 8),
Time = as.factor(c("Week 1","Week 2","Week 3","Week 1","Week 2","Week 3","Week 1","Week 2",
"Week 3","Week 1","Week 2","Week 3")))

This could be achieved like so:
Instead of adding the mean as additional rows you could make a summary df using e.g. dplyr::summarize
make use of stat_summay to compute the summary statistics on the fly as I do in my approach below and computed the condfidence interval as mean(x) +/- 1.96 / (length(x) - 1) * sd(x)
library(ggplot2)
library(tidyr)
library(dplyr)
DNAMorfR1 <- DNAMorfR %>%
drop_na(`Normal morphology (%)`) %>%
filter(Patient != "mean")
ggplot(DNAMorfR1, aes(x = Time, y = `Normal morphology (%)`)) +
geom_line(aes(linetype = Patient, color = Patient, group = Patient), size = 1) +
geom_point(aes(color = Patient, group = Patient), size = 1.5) +
stat_summary(aes(color = "mean", linetype = "mean", group = "mean"), geom = "line", fun = "mean") +
stat_summary(aes(color = "mean", group = "mean"), geom = "pointrange", fun = "mean",
fun.min = function(x) mean(x) - 1.96 / (length(x) - 1) * sd(x),
fun.max = function(x) mean(x) + 1.96 / (length(x) - 1) * sd(x), show.legend = FALSE) +
theme_minimal() +
ggtitle("(A1) Normal morphology") +
scale_y_continuous(limits = c(0, 25), breaks=seq(0, 25, by = 5)) +
geom_hline(yintercept = 4, color = "grey", size = 1) +
scale_color_manual(values = c("black", "#FF3333", "#FF9933", "#CC9900"))

You can use geom = "ribbon" to get the 95% CI band to your mean line. Credits to stefan where the main logic is already answered!
DNAMorfR %>%
drop_na(`Normal morphology (%)`) %>%
filter(row_number() <= n()-3) %>%
ggplot(aes(x = Time, y = `Normal morphology (%)`)) +
geom_line(aes(linetype = Patient, color = Patient, group = Patient), size = 1) +
geom_point(aes(color = Patient, group = Patient), size = 2) +
stat_summary(aes(color = "mean", linetype = "mean", group = "mean"), size=1.5, geom = "line", fun = "mean") +
stat_summary(aes(color = "mean", group = "mean"), geom = "ribbon", fun = "mean", size= 0.5, alpha=0.1,
fun.min = function(x) mean(x) - 1.96 / (length(x) - 1) * sd(x),
fun.max = function(x) mean(x) + 1.96 / (length(x) - 1) * sd(x), show.legend = FALSE) +
theme_minimal() +
ggtitle("(A1) Normal morphology") +
scale_y_continuous(limits = c(0, 25), breaks=seq(0, 25, by = 5)) +
geom_hline(yintercept = 4, color = "grey", size = 1) +
scale_color_manual(values = c("black", "#FF3333", "#FF9933", "#CC9900"))

Related

How to manually change line size and alpha values for ggplot2 lines (separated by factor)?

I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))

How to use specific filling colors when using scale_fill_binned()?

I would like to use my own filling colors (ex: c("red", "blue", "grey50", "black")) when using function scale_fill_binned() withing a ggplot code. How can I do this?
Here is a minimal reproducible example:
library(tidyverse)
dat <- mtcars %>%
group_by(cyl) %>%
summarise(n = n(),
mean_hp = mean(hp)) %>%
ungroup
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T), type = "viridis") +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Here is what the output looks like:
To use this family of functions you need to provide a function that returns a an object with class "ScaleContinuous" "Scale" "ggproto" "gg" (i.e. the equivalent output to scale_fill_viridis_c)!
scale_fill_custom <- function (..., alpha = 1, begin = 0, end = 1, direction = 1,
option = "D", values = NULL, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill") {
continuous_scale(aesthetics, scale_name = "custom",
palette = scales:::gradient_n_pal(c("red", "blue", "grey50", "black"),
values, space), na.value = na.value,
guide = guide, ...)
}
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T),
type = scale_fill_custom) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Note that you are using colour as a scale to be translated by the eye into numerically meaningful difference. The colours are interpolated between the manually applied points, so will not actually be your exact colours. If you wish to band your averages by colour it would be preferable to create a factor, then manually apply your theme.
ggplot(data = mutate(dat, n = cut(n, breaks = c(0, 8, 10, 12, 20))),
aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_discrete() +
scale_fill_manual(values = c("red", "blue", "grey50", "black")) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
With the comment of #teunbrand, I was able to come up with something.
cols <- c("red", "blue", "grey50", "black")
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal() +
binned_scale(aesthetics = "fill", scale_name = "custom",
palette = ggplot2:::binned_pal(scales::manual_pal(values = cols)),
guide = "bins",
breaks = c(8, 10, 12), limits = c(min(dat$n), max(dat$n)), show.limits = T)
Here is what the output looks like:

How to add shadow of margin of error to a diagramm

I try to create a survival prediction' diagramm
library("survival")
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
Fit a new data
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
The diagramm
library("ggplot2")
fit <- survfit(res.cox, newdata = sex_df)
library(reshape2)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
obj = strsplit(as.character(data$variable), "[.]") # делим текст на объекты по запятой
data$line = sapply(obj, '[', 1)
data$number = sapply(obj, '[', 2)
ggplot(data, aes(x=time, y=value, group=variable)) +
geom_line(aes(linetype=line, color=as.factor(number), size=line)) +
# geom_point(aes(color=number)) +
theme(legend.position="top", axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text=element_text(size=40),
legend.key.size = unit(3,"line"))+
scale_linetype_manual(values=c( 2,1,2))+ # "dotted", "twodash","dotted"
scale_color_manual(values=c("#E7B800", "#2E9FDF", 'red'))+
scale_size_manual(values=c(2, 3.5, 2)) +
scale_x_continuous(limits=c(0, 840),
breaks=seq(0, 840, 120)) + ylab("Surv prob") +
guides(linetype = FALSE, size = FALSE, color = guide_legend(override.aes = list(size=5))) + labs(color='') +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' &
data$number == "1"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "1"],6)),
fill = "#E7B800",alpha=0.1) +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
The QUESTION
The diagramm is ok but but I have to add with hands this
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
And if there were three, but not two elements in the new data, you would have to rewrite the code. Is it possible to rewrite the code so that it does not depend on the number of elements of new data?
I try to use a loop
temp = list()
uniq <- unique(unlist(data$number))
for (i in 1:length(levels(as.factor(data$number)))) {
n = geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == uniq[i]],6),
ymax = rep(data$value[data$line == 'upper' & data$number == uniq[i]],6)),
fill = "#2E9FDF", alpha=0.1) #
temp = append(n, temp)
}
temp
but this is an unsuccessful attempt. Thanks for any idea
By reshaping the data.frame so that surv, lower, and upper are separate vectors, you can group the geom_ribbon by your elements rather than the "meaning" of the lines.
Below is the code using the tidyr package; the first section is simply your code for generating the data.
library(survival)
library(reshape2)
library(ggplot2)
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
fit <- survfit(res.cox, newdata = sex_df)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
# Reformats the data into format with the survival curve and the confidence intervals in their own columns
library(tidyr)
data_wide <- data %>%
separate(col = variable, into = c("type", "sex"), sep = "\\.") %>%
spread(key = type, value = value)
ggplot(data = data_wide) +
geom_line(aes(x = time, y = surv, group = sex, colour = sex),
size = 3.5,
linetype = 1) +
geom_line(aes(x = time, y = lower, group = sex, colour = sex),
size = 2,
linetype = 2) +
geom_line(aes(x = time, y = upper, group = sex, colour = sex),
size = 2,
linetype = 2) +
# Geom_ribbom now grouped by sex
geom_ribbon(aes(x = time, ymin = lower, ymax = upper, group = sex, fill = sex),
alpha = 0.1) +
scale_colour_manual(values = c("#E7B800", "#2E9FDF")) +
scale_fill_manual(values = c("#E7B800", "#2E9FDF")) +
scale_x_continuous(limits = c(0, 840),
breaks = seq(0, 840, 120)) +
theme(legend.position = "top",
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 40),
legend.key.size = unit(3, "line")) +
ylab("Surv prob")
And this is the plot output:
We add another element to test if this works, you will have to add more colours to scale_colour_manual and scale_fill_manual.
library(dplyr)
data_wide2 <- filter(data_wide, sex == "1") %>%
mutate(sex = "3",
surv = surv - 0.2,
upper = upper - 0.2,
lower = lower - 0.2) %>%
rbind(data_wide)
This gives the following plot:

Legend in ggplot2 when separately added elements

I'd like to add a ggplot legend when we have separately added elements.
I'm aware of previous answers to this question, but for some reason I haven't been able to sort out my issue (Add legend to ggplot2 line plot). Thanks!
Fake data
library(ggplot2)
Month <- c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8,
8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12)
RR <- c(81.30271, 56.97511, 88.95428, 78.43363, 51.39398, 83.53967,
95.21302, 63.74089, 94.27137, 92.36272, 69.30449, 100.34571,
128.52172, 89.84833, 133.61527, 128.23847, 91.21498, 145.45016,
124.72499, 85.96523, 153.82113, 123.22878, 97.14116, 154.57004,
111.59289, 83.26763, 105.47873, 95.55557, 88.77395, 87.31212,
93.03579, 65.35055, 93.22413, 103.43140, 70.40292, 91.58487)
MI <- c(66.729379, 41.891775, 52.137614, 59.376967, 30.717318, 49.339675,
62.469691, 25.667561, 60.211374, 48.902722, 18.764486, 65.565712,
69.985054, 27.418330, 89.231939, 55.685138, 11.484980, 90.666826,
44.101654, -8.448102, 87.637798, 55.978782, 15.431156, 92.310042,
69.596228, 34.897628, 56.505393, 69.008904, 61.627285, 36.935451,
76.392457, 47.493886, 53.750796, 88.204738, 54.806257, 54.358201)
PE <- c(14.57333, 15.08333, 36.81667, 19.05667, 20.67667, 34.20000, 32.74333,
38.07333, 34.06000, 43.46000, 50.54000, 34.78000, 58.53667, 62.43000,
44.38333, 72.55333, 79.73000, 54.78333, 80.62333, 94.41333, 66.18333,
67.25000, 81.71000, 62.26000, 41.99667, 48.37000, 48.97333, 26.54667,
27.14667, 50.37667, 16.64333, 17.85667, 39.47333, 15.22667, 15.59667,
37.22667)
tt <- data.frame(Month, RR, MI, PE)
What I've done without sucess
ggplot(data = tt,
aes(x = factor(Month))) +
geom_boxplot(aes(y = RR, x = factor(Month)),
fill = "dodgerblue4", colour = "dodgerblue4",
alpha = 0.6) +
stat_summary(aes(y = RR, x = Month),
fun.y = mean,
geom = "smooth",
colour = "dodgerblue4") +
geom_boxplot(aes(y = MI, group = Month),
fill = "dimgray", colour = "dimgray",
alpha = 0.6) +
stat_summary(aes(y = MI, x = Month),
fun.y = mean,
geom = "smooth",
colour = "dimgray") +
geom_boxplot(aes(y = PE, group = Month),
fill = "firebrick", colour = "firebrick",
alpha = 0.6) +
stat_summary(aes(y = PE, x = Month),
fun.y = mean,
geom = "smooth",
colour = "firebrick") +
labs(x = "Months",
y = "Flux, mm") +
scale_fill_manual("",
breaks = c("dodgerblue4", "dimgray", "firebrick"),
labels = c("dodgerblue4", "dimgray", "firebrick")) +
theme_bw(base_size = 18)
You can fix your code with help of the a named vector to define color. Make sure to use color from named vector with in scope of aes for each element.
One of such option can be as:
# Named vector for color
lineColors <- c("RR" = "dodgerblue4", "MI" = "dimgray", "PE" = "firebrick")
ggplot(data = tt,
aes(x = factor(Month))) +
geom_boxplot(aes(y = RR, x = factor(Month), fill = "RR"),
colour = "dodgerblue4",
alpha = 0.6) +
stat_summary(aes(y = RR, x = Month, colour = "RR"),
fun.y = mean,
geom = "smooth") +
geom_boxplot(aes(y = MI, group = Month, fill = "MI"),
colour = "dimgray",
alpha = 0.6) +
stat_summary(aes(y = MI, x = Month, colour = "MI"),
fun.y = mean,
geom = "smooth"
) +
geom_boxplot(aes(y = PE, group = Month, fill = "PE"),
colour = "firebrick",
alpha = 0.6) +
stat_summary(aes(y = PE, x = Month, colour = "PE"),
fun.y = mean,
geom = "smooth"
) +
labs(x = "Months",
y = "Flux, mm") +
scale_colour_manual(name = "Type", values = lineColors) +
scale_fill_manual(name = "Type", values = lineColors) +
theme_bw(base_size = 18)
A more standard ggplot way, where data is provided in a long format. The boxes are dodged and overplotting is thereby avoided.
cols <- c("dimgray", "firebrick", "dodgerblue4")
ggplot(data.table::melt(setDT(tt), id = "Month", variable.factor = FALSE), aes(x = Month, y = value)) +
geom_boxplot(aes(group = interaction(Month, variable),
fill = variable), alpha = 0.6) +
stat_summary(aes(color = variable), fun.y = mean,
geom = "smooth") +
scale_fill_manual(values = cols) +
scale_color_manual(values = cols) +
scale_x_continuous(breaks = 1:12)

position of geom_vline() legend shifts

I have several plots like the one below. My problem is that the legend for geom_vline() (Type) shifts across plots, sometimes appearing above the "Mean" legend, sometimes below.
How can I specify the position of the geom_vline() legend (or the other legend), such that I do not have variation across plots in my paper?
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean = 50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
gp <- ggplot(data = data, aes_string(x = "value"))
gp <- gp + geom_density(aes_string(fill = "Type"), alpha = 0.3)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
gp2 <- gp + geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE)
gp3 <- gp2 + geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05)
gp3
You can pass the order parameter of guide_legend passed to the guide parameter of the scale_* functions for the guides you want to rearrange. For example:
library(ggplot2)
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean =50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 1)) + # fill first
scale_color_discrete(guide = guide_legend(order = 2)) # color second
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 2)) + # now fill second
scale_color_discrete(guide = guide_legend(order = 1)) # and color first

Resources