How to add shadow of margin of error to a diagramm - r

I try to create a survival prediction' diagramm
library("survival")
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
Fit a new data
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
The diagramm
library("ggplot2")
fit <- survfit(res.cox, newdata = sex_df)
library(reshape2)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
obj = strsplit(as.character(data$variable), "[.]") # делим текст на объекты по запятой
data$line = sapply(obj, '[', 1)
data$number = sapply(obj, '[', 2)
ggplot(data, aes(x=time, y=value, group=variable)) +
geom_line(aes(linetype=line, color=as.factor(number), size=line)) +
# geom_point(aes(color=number)) +
theme(legend.position="top", axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text=element_text(size=40),
legend.key.size = unit(3,"line"))+
scale_linetype_manual(values=c( 2,1,2))+ # "dotted", "twodash","dotted"
scale_color_manual(values=c("#E7B800", "#2E9FDF", 'red'))+
scale_size_manual(values=c(2, 3.5, 2)) +
scale_x_continuous(limits=c(0, 840),
breaks=seq(0, 840, 120)) + ylab("Surv prob") +
guides(linetype = FALSE, size = FALSE, color = guide_legend(override.aes = list(size=5))) + labs(color='') +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' &
data$number == "1"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "1"],6)),
fill = "#E7B800",alpha=0.1) +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
The QUESTION
The diagramm is ok but but I have to add with hands this
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
And if there were three, but not two elements in the new data, you would have to rewrite the code. Is it possible to rewrite the code so that it does not depend on the number of elements of new data?
I try to use a loop
temp = list()
uniq <- unique(unlist(data$number))
for (i in 1:length(levels(as.factor(data$number)))) {
n = geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == uniq[i]],6),
ymax = rep(data$value[data$line == 'upper' & data$number == uniq[i]],6)),
fill = "#2E9FDF", alpha=0.1) #
temp = append(n, temp)
}
temp
but this is an unsuccessful attempt. Thanks for any idea

By reshaping the data.frame so that surv, lower, and upper are separate vectors, you can group the geom_ribbon by your elements rather than the "meaning" of the lines.
Below is the code using the tidyr package; the first section is simply your code for generating the data.
library(survival)
library(reshape2)
library(ggplot2)
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
fit <- survfit(res.cox, newdata = sex_df)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
# Reformats the data into format with the survival curve and the confidence intervals in their own columns
library(tidyr)
data_wide <- data %>%
separate(col = variable, into = c("type", "sex"), sep = "\\.") %>%
spread(key = type, value = value)
ggplot(data = data_wide) +
geom_line(aes(x = time, y = surv, group = sex, colour = sex),
size = 3.5,
linetype = 1) +
geom_line(aes(x = time, y = lower, group = sex, colour = sex),
size = 2,
linetype = 2) +
geom_line(aes(x = time, y = upper, group = sex, colour = sex),
size = 2,
linetype = 2) +
# Geom_ribbom now grouped by sex
geom_ribbon(aes(x = time, ymin = lower, ymax = upper, group = sex, fill = sex),
alpha = 0.1) +
scale_colour_manual(values = c("#E7B800", "#2E9FDF")) +
scale_fill_manual(values = c("#E7B800", "#2E9FDF")) +
scale_x_continuous(limits = c(0, 840),
breaks = seq(0, 840, 120)) +
theme(legend.position = "top",
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 40),
legend.key.size = unit(3, "line")) +
ylab("Surv prob")
And this is the plot output:
We add another element to test if this works, you will have to add more colours to scale_colour_manual and scale_fill_manual.
library(dplyr)
data_wide2 <- filter(data_wide, sex == "1") %>%
mutate(sex = "3",
surv = surv - 0.2,
upper = upper - 0.2,
lower = lower - 0.2) %>%
rbind(data_wide)
This gives the following plot:

Related

Problem with reference to variable in R function

I'd like to write a function to do ANOVA in batch, but there's a problem I can't solve. The problem is with the variable reference. The whole code is written correctly, because everything is calculated "on foot", but when I try to insert this code into the function, it doesn't work. Can someone take a look and point out where the problem is?
library(tidyverse)
library(ggpubr)
library(rstatix)
library(ggprism)
df <- data.frame(
stringsAsFactors = FALSE,
id = c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5),
treat = c("o","o","o","o","o","j","j","j","j","j","z","z","z","z","z","w","w","w","w","w"),
vo2 = c("47.48","42.74","45.23","51.65","49.11","51.00","43.82","49.88","54.61","52.20","51.31",
"47.56","50.69","54.88","55.01","51.89","46.10","50.98","53.62","52.77"))
df$vo2 <- as.numeric(df$vo2)
df$treat <- factor(df$treat)
Everything works fine in the code below...
# Summary
group_by(df, treat) %>%
summarise(
N = n(),
Mean = mean(vo2, na.rm = TRUE),
Sd = sd(vo2, na.rm = TRUE))
# ANOVA
res.aov <- anova_test(dv = vo2, wid = id, within = treat, data = df)
get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- df %>%
pairwise_t_test(vo2 ~ treat, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc
pwc <- pwc %>% add_xy_position(x = "treat")
ggplot(df, aes(x = treat, y = vo2)) +
stat_boxplot(aes(x = treat, y = vo2, color = treat), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = treat, y = vo2, color = treat, fill = treat)) +
geom_jitter(aes(x = treat, y = vo2, color = treat, fill = treat), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
#xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
theme_prism(base_size = 14) + scale_x_discrete(guide = "prism_bracket") +
scale_fill_prism(palette = "floral") + scale_colour_prism(palette = "floral") +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.05))) +
stat_pvalue_manual(pwc, tip.length = 0, hide.ns = TRUE) +
labs(
subtitle = get_test_label(res.aov, detailed = TRUE),
caption = get_pwc_label(pwc)) +
theme(legend.position = "NULL")
The result of this code is the chart below
But the function doesn't work...
The fix for "deparse(substitute(x)" only partially solved the problem, and I get a warning:
1: In mean.default(~"vo2", na.rm = TRUE): argument is not numeric or logical: returning an
2: In var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm = na.rm) : NAs introduced by coercion.
Below is the full code for this function:
my_function <- function(df, x, y) {
x <- deparse(substitute(x))
y <- deparse(substitute(y))
formula <- as.formula(paste0(y, "~", x))
# Summary
a <- group_by(df, {{x}}) %>%
summarise(
N = n(),
Mean = mean({{y}}, na.rm = TRUE),
Sd = sd({{y}}, na.rm = TRUE)
)
# ANOVA
res.aov <<- anova_test(dv = {{y}}, wid = id, within = {{x}}, data = df)
b <- get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- pairwise_t_test(data = df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc2 <<- pwc %>% add_xy_position(x = {{x}})
# Plot
d <- ggplot(df, aes(x = {{x}}, y = {{y}})) +
stat_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}})) +
geom_jitter(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}}), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
stat_pvalue_manual(pwc2, tip.length = 0, hide.ns = TRUE) +
xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
scale_x_discrete(guide = "prism_bracket") +
theme_prism(base_size = 14) +
theme(legend.position = "NULL")
#ggsave(paste0(deparse(substitute(x)), "_",
# deparse(substitute(y)), ".png"), width=160, height=90, units="mm", dpi=600)
output <- list(a,b,pwc2,d)
return(output)
}
my_function(df, treat, vo2)
I have a huge request for tips on how to solve this.
The problem lies in the formula. Try adding the following code:
x1 <- deparse(substitute(x))
y1 <- deparse(substitute(y))
formula <- as.formula(paste0(y1, "~", x1))
# Pairwise comparisons
pwc <- pairwise_t_test(data=df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")

Legend for combined graph

I am trying to reproduce this figure (without the Portugal highlight):
The data (and figure) can be found in this link: https://stat.link/uz49al.
I imported and reshaped the data into a long format, but then I got stuck on how it would be possible to rearrange the legend entries in the same order as in the original.
I would very much appreciate your help!
Thanks!
Here is where I got:
# load data
f5_5_data_before <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:E68")
names(f5_5_data_before)[1] <- "Country"
names(f5_5_data_before)[2] <- "Odds_ratio"
names(f5_5_data_before)[3] <- "SE"
names(f5_5_data_before)[4] <- "sig"
names(f5_5_data_before)[5] <- "non_sig"
f5_5_data_before$Country <- as.factor(f5_5_data_before$Country)
f5_5_data_before <- f5_5_data_before %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_before$group2 <- "Before accounting for reading performance"
f5_5_data_after <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
f5_5_data_after <- f5_5_data_after[, c(1, 6:9)]
names(f5_5_data_after)[1] <- "Country"
names(f5_5_data_after)[2] <- "Odds_ratio"
names(f5_5_data_after)[3] <- "SE"
names(f5_5_data_after)[4] <- "sig"
names(f5_5_data_after)[5] <- "non_sig"
f5_5_data_after$Country <- as.factor(f5_5_data_after$Country)
f5_5_data_after <- f5_5_data_after %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_after$group2 <- "After accounting for reading performance"
# appending in long format
f5_5_data <- rbind(f5_5_data_after, f5_5_data_before)
# shaded rectangle
rect1 <- data.frame(
xmin = 14.5,
xmax = 15.5,
ymin = -Inf,
ymax = Inf
)
# figure
f5_5 <- ggplot() +
geom_col(data = f5_5_data %>% filter(group2 == "After accounting for reading performance"),
aes(x = reorder(Country,-Odds_ratio),
y = value,
fill = category,
colour = group2),
width=0.5,
) +
geom_point(
data = f5_5_data %>% filter(group2 == "Before accounting for reading performance"),
aes(x = Country,
y = value,
fill = category,
colour = group2),
shape = 23,
size = 3,
) +
geom_rect(
data = rect1,
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_y_continuous(breaks = pretty_breaks(),
limits = c(0, 25),
expand = c(0, 0)) +
labs(x = NULL,
y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90))
print(f5_5)
This yields the following output:
As you can see, the legend looks substantially different and essentially I got stuck.
One option to achieve your desired result would be via the ggnewscale package which allows for multiple scales for the same aesthetic. Doing so we could map category on the fill aes in both the geom_col and the geom_point but have two different legends:
Note: I simplified your data wrangling code a bit.
library(readxl)
library(dplyr)
library(ggplot2)
library(ggnewscale)
url <- "https://stat.link/uz49al"
download.file(url, destfile = "uz49al.xlsx")
dat <- read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
dat <- list(
before = setNames(dat[, 1:5], c("Country", "Odds_ratio", "SE", "sig", "non_sig")),
after = setNames(dat[, c(1, 6:9)], c("Country", "Odds_ratio", "SE", "sig", "non_sig"))
) %>%
bind_rows(.id = "group2")
dat <- dat %>%
mutate(
category = if_else(is.na(sig), "nonsig", "sig"),
value = if_else(is.na(sig), non_sig, sig)
) %>%
select(-sig, -non_sig)
group2_labels <- c(after = "After accounting for reading performance", before = "Before accounting for reading performance")
rect1 <- data.frame(xmin = 14.5, xmax = 15.5, ymin = -Inf, ymax = Inf)
ggplot(dat, aes(x = reorder(Country,-Odds_ratio), y = value)) +
geom_col(data = ~filter(.x, group2 == "after"), aes(fill = category), width = 0.5) +
scale_fill_manual(labels = NULL, values = c(sig = "darkblue", nonsig = "steelblue"),
name = group2_labels[["after"]], guide = guide_legend(title.position = "right")) +
new_scale_fill() +
geom_point(data = ~filter(.x, group2 == "before"), aes(fill = category), size = 3, shape = 23, color = "lightblue") +
geom_rect(data = rect1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
alpha = 0.5, inherit.aes = FALSE) +
scale_fill_manual(labels = NULL, values = c(nonsig = "white", sig = "lightblue"), breaks = c("sig", "nonsig"),
name = group2_labels[["before"]], guide = guide_legend(title.position = "right")) +
scale_y_continuous(breaks = scales::pretty_breaks(), limits = c(0, 25), expand = c(0, 0)) +
labs(x = NULL, y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "top")

How to manually change line size and alpha values for ggplot2 lines (separated by factor)?

I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))

plot density plots with confidence intervals of 95% in R

I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))

position of geom_vline() legend shifts

I have several plots like the one below. My problem is that the legend for geom_vline() (Type) shifts across plots, sometimes appearing above the "Mean" legend, sometimes below.
How can I specify the position of the geom_vline() legend (or the other legend), such that I do not have variation across plots in my paper?
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean = 50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
gp <- ggplot(data = data, aes_string(x = "value"))
gp <- gp + geom_density(aes_string(fill = "Type"), alpha = 0.3)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
gp2 <- gp + geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE)
gp3 <- gp2 + geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05)
gp3
You can pass the order parameter of guide_legend passed to the guide parameter of the scale_* functions for the guides you want to rearrange. For example:
library(ggplot2)
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean =50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 1)) + # fill first
scale_color_discrete(guide = guide_legend(order = 2)) # color second
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 2)) + # now fill second
scale_color_discrete(guide = guide_legend(order = 1)) # and color first

Resources