Legend for combined graph - r

I am trying to reproduce this figure (without the Portugal highlight):
The data (and figure) can be found in this link: https://stat.link/uz49al.
I imported and reshaped the data into a long format, but then I got stuck on how it would be possible to rearrange the legend entries in the same order as in the original.
I would very much appreciate your help!
Thanks!
Here is where I got:
# load data
f5_5_data_before <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:E68")
names(f5_5_data_before)[1] <- "Country"
names(f5_5_data_before)[2] <- "Odds_ratio"
names(f5_5_data_before)[3] <- "SE"
names(f5_5_data_before)[4] <- "sig"
names(f5_5_data_before)[5] <- "non_sig"
f5_5_data_before$Country <- as.factor(f5_5_data_before$Country)
f5_5_data_before <- f5_5_data_before %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_before$group2 <- "Before accounting for reading performance"
f5_5_data_after <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
f5_5_data_after <- f5_5_data_after[, c(1, 6:9)]
names(f5_5_data_after)[1] <- "Country"
names(f5_5_data_after)[2] <- "Odds_ratio"
names(f5_5_data_after)[3] <- "SE"
names(f5_5_data_after)[4] <- "sig"
names(f5_5_data_after)[5] <- "non_sig"
f5_5_data_after$Country <- as.factor(f5_5_data_after$Country)
f5_5_data_after <- f5_5_data_after %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_after$group2 <- "After accounting for reading performance"
# appending in long format
f5_5_data <- rbind(f5_5_data_after, f5_5_data_before)
# shaded rectangle
rect1 <- data.frame(
xmin = 14.5,
xmax = 15.5,
ymin = -Inf,
ymax = Inf
)
# figure
f5_5 <- ggplot() +
geom_col(data = f5_5_data %>% filter(group2 == "After accounting for reading performance"),
aes(x = reorder(Country,-Odds_ratio),
y = value,
fill = category,
colour = group2),
width=0.5,
) +
geom_point(
data = f5_5_data %>% filter(group2 == "Before accounting for reading performance"),
aes(x = Country,
y = value,
fill = category,
colour = group2),
shape = 23,
size = 3,
) +
geom_rect(
data = rect1,
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_y_continuous(breaks = pretty_breaks(),
limits = c(0, 25),
expand = c(0, 0)) +
labs(x = NULL,
y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90))
print(f5_5)
This yields the following output:
As you can see, the legend looks substantially different and essentially I got stuck.

One option to achieve your desired result would be via the ggnewscale package which allows for multiple scales for the same aesthetic. Doing so we could map category on the fill aes in both the geom_col and the geom_point but have two different legends:
Note: I simplified your data wrangling code a bit.
library(readxl)
library(dplyr)
library(ggplot2)
library(ggnewscale)
url <- "https://stat.link/uz49al"
download.file(url, destfile = "uz49al.xlsx")
dat <- read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
dat <- list(
before = setNames(dat[, 1:5], c("Country", "Odds_ratio", "SE", "sig", "non_sig")),
after = setNames(dat[, c(1, 6:9)], c("Country", "Odds_ratio", "SE", "sig", "non_sig"))
) %>%
bind_rows(.id = "group2")
dat <- dat %>%
mutate(
category = if_else(is.na(sig), "nonsig", "sig"),
value = if_else(is.na(sig), non_sig, sig)
) %>%
select(-sig, -non_sig)
group2_labels <- c(after = "After accounting for reading performance", before = "Before accounting for reading performance")
rect1 <- data.frame(xmin = 14.5, xmax = 15.5, ymin = -Inf, ymax = Inf)
ggplot(dat, aes(x = reorder(Country,-Odds_ratio), y = value)) +
geom_col(data = ~filter(.x, group2 == "after"), aes(fill = category), width = 0.5) +
scale_fill_manual(labels = NULL, values = c(sig = "darkblue", nonsig = "steelblue"),
name = group2_labels[["after"]], guide = guide_legend(title.position = "right")) +
new_scale_fill() +
geom_point(data = ~filter(.x, group2 == "before"), aes(fill = category), size = 3, shape = 23, color = "lightblue") +
geom_rect(data = rect1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
alpha = 0.5, inherit.aes = FALSE) +
scale_fill_manual(labels = NULL, values = c(nonsig = "white", sig = "lightblue"), breaks = c("sig", "nonsig"),
name = group2_labels[["before"]], guide = guide_legend(title.position = "right")) +
scale_y_continuous(breaks = scales::pretty_breaks(), limits = c(0, 25), expand = c(0, 0)) +
labs(x = NULL, y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "top")

Related

Circular stacked barplot in r [duplicate]

This question already has an answer here:
Circular barchart customization from r-graph-gallery
(1 answer)
Closed 8 months ago.
I am trying to run this code from this link https://www.r-graph-gallery.com/299-circular-stacked-barplot.html.
# library
library(tidyverse)
library(viridis)
# Create dataset
data <- data.frame(
individual=paste( "Mister ", seq(1,60), sep=""),
group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60, replace=T),
value2=sample( seq(10,100), 60, replace=T),
value3=sample( seq(10,100), 60, replace=T)
)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
# Save at png
ggsave(p, file="output.png", width=10, height=10)
However, I am not sure why I am not getting the gaps and the scales in my figure (see below). As depicted, the numbers are printed inside the figure and the gaps between different groups of data are not there.
The original figure should be as follows:
There is a bug in the code. group has to be a factor to make the code adding the gaps work. To fix this add data$group <- factor(data$group).
Note: My guess is that the reason for this bug is that as of version 4.0.0 R treats strings in data frames as strings rather than factors. Hence, for versions < 4.0.0 the code worked fine as is.
# library
library(tidyverse)
library(viridis)
#> Loading required package: viridisLite
# Create dataset
data <- data.frame(
individual = paste("Mister ", seq(1, 60), sep = ""),
group = c(rep("A", 10), rep("B", 30), rep("C", 14), rep("D", 6)),
value1 = sample(seq(10, 100), 60, replace = T),
value2 = sample(seq(10, 100), 60, replace = T),
value3 = sample(seq(10, 100), 60, replace = T)
)
# Convert to factor
data$group <- factor(data$group)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value = "value", -c(1, 2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame(matrix(NA, empty_bar * nlevels(data$group) * nObsType, ncol(data)))
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each = empty_bar * nObsType)
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep(seq(1, nrow(data) / nObsType), each = nObsType)
# Get the name and the y position of each label
label_data <- data %>%
group_by(id, individual) %>%
summarize(tot = sum(value))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id - 0.5) / number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse(angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle + 180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start = min(id), end = max(id) - empty_bar) %>%
rowwise() %>%
mutate(title = mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[c(nrow(grid_data), 1:nrow(grid_data) - 1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1, ]
# Make the plot
ggplot(data) +
# Add the stacked bar
geom_bar(aes(x = as.factor(id), y = value, fill = observation), stat = "identity", alpha = 0.5) +
scale_fill_viridis(discrete = TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data = grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id), 5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200"), color = "grey", size = 6, angle = 0, fontface = "bold", hjust = 1) +
ylim(-150, max(label_data$tot, na.rm = T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1, 4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data = label_data, aes(x = id, y = tot + 10, label = individual, hjust = hjust), color = "black", fontface = "bold", alpha = 0.6, size = 5, angle = label_data$angle, inherit.aes = FALSE) +
# Add base line information
geom_segment(data = base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha = 0.8, size = 0.6, inherit.aes = FALSE) +
geom_text(data = base_data, aes(x = title, y = -18, label = group), hjust = c(1, 1, 0, 0), colour = "black", alpha = 0.8, size = 4, fontface = "bold", inherit.aes = FALSE)
#> Warning: Removed 24 rows containing missing values (position_stack).
#> Warning: Removed 9 rows containing missing values (geom_text).

plot density plots with confidence intervals of 95% in R

I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))

How to use condition in geom_text / nudge_y

I want text labels were above or under of bar cap depending on where is more space for them. Now it's always down which is not always looks good:
Here is my code:
library(tidyr)
library(ggplot2)
library(dplyr)
library(stringr)
library(purrr)
numa.nodes <- tibble (
numa_name = c("numa_01","numa_01","numa_01","numa_01","numa_01","numa_01","numa_02","numa_02","numa_02","numa_02"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_03","numa_03","numa_03","numa_03","numa_03","numa_03","numa_04","numa_04","numa_04","numa_04"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_05","numa_05","numa_05","numa_05","numa_05","numa_05","numa_05"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","cpu05","memory_used","memory_total"),
value = c(sample(1:100,5), sample(1:64,1), 64)
)
numa.nodes <- numa.nodes %>% mutate(counter_name=factor(counter_name,levels = unique(counter_name),ordered = T))
memory_columns <- numa.nodes %>% filter(counter_name=='memory_total')
memory_y_scale <- max(memory_columns$value, na.rm = TRUE) + 6
plot_numa = function(num){
df = numa.nodes %>% filter(str_detect(numa_name, num))
cpu_plot = df %>%
filter(str_detect(counter_name, "cpu")) %>%
ggplot(aes(x = counter_name)) +
geom_col(aes(y = 100), fill = "white", color = "black") +
geom_col(aes(y = value), fill = "#00AFBB", color = "black") +
geom_text(aes(y = value, label = paste0(value,"%")), nudge_y = 5, color = "black") +
theme_bw() +
labs(x = "CPU", y = "")
memory_plot = df %>%
filter(str_detect(counter_name, "memory")) %>%
pivot_wider(names_from = counter_name, values_from = value) %>%
ggplot(aes(x = "") ) +
geom_col(aes(y = memory_total), fill = "white", color = "black") +
geom_col(aes(y = memory_used), fill = "#FC4E07", color = "black") +
geom_text(aes(label = paste(memory_total, "GB"), y = memory_total), nudge_y = 5, color = "black") +
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black") +
theme_bw() +
ylim(0, memory_y_scale) +
labs(x = "Memory", y = "")
ggpubr::ggarrange(cpu_plot, memory_plot, ncol = 2) %>% ggpubr::annotate_figure(top = paste("NUMA",num))
}
numa_numbers <- unique(numa.nodes$numa_name) %>% str_remove ("numa_")
ggpubr::ggarrange(plotlist = map(.x = numa_numbers, .f = ~plot_numa(num = .x)))
I tried to change this line:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black")
to something like that:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used),nudge_y = ifelse( (memory_total-memory_used) > 10, 5, -3)
, color = "black")
But I've got an error:
Error in ifelse((memory_total - memory_used) > 10, 5, -3) :
object 'memory_total' not found
Is there a better way to print labels optimal way?
What am I doing wrong?
How to change color of label to more contrast ie black on white, white on red?
Think of it this way: The nudge value will be different (potentially) for every observation in your data frame. That means that this is something that should be handled within aes(), where stuff is designed to change with your data, rather than nudge_y, which is designed to be a constant (and complains if used otherwise).
So, the solution is to do away entirely with nudge_y and build your ifelse() statement directly into aes(y=...).
In this case, here's the replacement for that particular geom_text() line:
# to see the same plot posted here, put this at the top of your code
set.seed(7331)
...
# plot code...
... +
geom_text(aes(
label = paste(memory_used, "GB"),
y = ifelse((memory_total-memory_used > 10), memory_used + 5, memory_used - 3)),
color = "black") +

How to add shadow of margin of error to a diagramm

I try to create a survival prediction' diagramm
library("survival")
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
Fit a new data
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
The diagramm
library("ggplot2")
fit <- survfit(res.cox, newdata = sex_df)
library(reshape2)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
obj = strsplit(as.character(data$variable), "[.]") # делим текст на объекты по запятой
data$line = sapply(obj, '[', 1)
data$number = sapply(obj, '[', 2)
ggplot(data, aes(x=time, y=value, group=variable)) +
geom_line(aes(linetype=line, color=as.factor(number), size=line)) +
# geom_point(aes(color=number)) +
theme(legend.position="top", axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text=element_text(size=40),
legend.key.size = unit(3,"line"))+
scale_linetype_manual(values=c( 2,1,2))+ # "dotted", "twodash","dotted"
scale_color_manual(values=c("#E7B800", "#2E9FDF", 'red'))+
scale_size_manual(values=c(2, 3.5, 2)) +
scale_x_continuous(limits=c(0, 840),
breaks=seq(0, 840, 120)) + ylab("Surv prob") +
guides(linetype = FALSE, size = FALSE, color = guide_legend(override.aes = list(size=5))) + labs(color='') +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' &
data$number == "1"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "1"],6)),
fill = "#E7B800",alpha=0.1) +
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
The QUESTION
The diagramm is ok but but I have to add with hands this
geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6),
ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)),
fill = "#2E9FDF",alpha=0.1)
And if there were three, but not two elements in the new data, you would have to rewrite the code. Is it possible to rewrite the code so that it does not depend on the number of elements of new data?
I try to use a loop
temp = list()
uniq <- unique(unlist(data$number))
for (i in 1:length(levels(as.factor(data$number)))) {
n = geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == uniq[i]],6),
ymax = rep(data$value[data$line == 'upper' & data$number == uniq[i]],6)),
fill = "#2E9FDF", alpha=0.1) #
temp = append(n, temp)
}
temp
but this is an unsuccessful attempt. Thanks for any idea
By reshaping the data.frame so that surv, lower, and upper are separate vectors, you can group the geom_ribbon by your elements rather than the "meaning" of the lines.
Below is the code using the tidyr package; the first section is simply your code for generating the data.
library(survival)
library(reshape2)
library(ggplot2)
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data = lung)
res.cox
sex_df <- with(lung,
data.frame(sex = c(1, 2),
age = rep(mean(age, na.rm = TRUE), 2),
wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2) ))
fit <- survfit(res.cox, newdata = sex_df)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")
# Reformats the data into format with the survival curve and the confidence intervals in their own columns
library(tidyr)
data_wide <- data %>%
separate(col = variable, into = c("type", "sex"), sep = "\\.") %>%
spread(key = type, value = value)
ggplot(data = data_wide) +
geom_line(aes(x = time, y = surv, group = sex, colour = sex),
size = 3.5,
linetype = 1) +
geom_line(aes(x = time, y = lower, group = sex, colour = sex),
size = 2,
linetype = 2) +
geom_line(aes(x = time, y = upper, group = sex, colour = sex),
size = 2,
linetype = 2) +
# Geom_ribbom now grouped by sex
geom_ribbon(aes(x = time, ymin = lower, ymax = upper, group = sex, fill = sex),
alpha = 0.1) +
scale_colour_manual(values = c("#E7B800", "#2E9FDF")) +
scale_fill_manual(values = c("#E7B800", "#2E9FDF")) +
scale_x_continuous(limits = c(0, 840),
breaks = seq(0, 840, 120)) +
theme(legend.position = "top",
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 40),
legend.key.size = unit(3, "line")) +
ylab("Surv prob")
And this is the plot output:
We add another element to test if this works, you will have to add more colours to scale_colour_manual and scale_fill_manual.
library(dplyr)
data_wide2 <- filter(data_wide, sex == "1") %>%
mutate(sex = "3",
surv = surv - 0.2,
upper = upper - 0.2,
lower = lower - 0.2) %>%
rbind(data_wide)
This gives the following plot:

ggplot with overlay - name the overlay with additional labels

I have a plot with labels on the y axis for the groups within the area plot. I added an overlay and want to name these.
Reproducible data at the bottom. For context I'm showing website session data and want to overlay when TV Campaigns are running.
Here's my ggplot and what it looks like. Below that is the commands to generate random data that I am using.
timeline <- ggplot(dataset, aes(x = Month, y = Sessions,fill = Channel, group = Channel)) +
geom_area(alpha = 0.2) +
stat_summary(aes(group = 2), fun.y = sum, geom = 'line', size = 2, alpha = 0.5) +
theme(axis.text.x=element_text(angle=90, hjust=1),
axis.title.x = element_blank()) +
geom_rect(data = tv_overlay, inherit.aes = FALSE,
aes(xmin = start, xmax = end,
ymin = -Inf, ymax = Inf),
fill = "black", alpha = 0.1)
This produces the following plot. Note the rectangle overlays which are meant to denote a TV campaign. How can I add a label to say "TV Campaign" to these:
Reproducible data which will allow the above commands for timeline <- to run
# dimensions
channels <- c("Facebook", "Youtube", "SEM", "Organic", "Direct", "Email")
last_month <- Sys.Date() %m+% months(-1) %>% floor_date("month")
mts <- seq(from = last_month %m+% months(-23), to = last_month, by = "1 month")
yr_month <- format(mts, "%b-%Y")
dimvars <- expand.grid(Month = yr_month, Channel = channels)
# metrics
rws <- nrow(dimvars)
set.seed(42)
# generates variablility in the random data
randwalk <- function(initial_val, ...){
initial_val + cumsum(rnorm(...))
}
Sessions <- ceiling(randwalk(3000, n = rws, mean = 8, sd = 1500)) %>% abs()
Transactions <- ceiling(randwalk(200, n = rws, mean = 0, sd = 75)) %>% abs()
Revenue <- ceiling(randwalk(10000, n = rws, mean = 0, sd = 3500)) %>% abs()
# make primary df
dataset <- cbind(dimvars, Sessions, Transactions, Revenue)
# make TV and Mass df for overlays
tv_begin <- sample(mts, 4)
tv_end <- tv_begin %m+% months(1)
tv_overlay <- data.frame(start = format(tv_begin, "%b-%Y"), end = format(tv_end, "%b-%Y"))
Map alpha to a character values to get an extra legend entry:
ggplot(dataset, aes(x = Month, y = Sessions,fill = Channel, group = Channel)) +
geom_area(alpha = 0.2) +
stat_summary(aes(group = 2), fun.y = sum, geom = 'line', size = 2, alpha = 0.5) +
geom_rect(aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, alpha = "TV Campaign"),
tv_overlay, inherit.aes = FALSE, fill = "black") +
scale_alpha_manual(name = '', values = c("TV Campaign" = 0.1)) +
theme(axis.text.x=element_text(angle=90, hjust=1),
axis.title.x = element_blank())

Resources