I'm wanting to use stat_difference() from the ggh4x package to add a shaded area between two lines in my plot. I have melted my example dataset below as I thought this was the correct approach to facet_wrap all the variables in my dataset, but I'm unsure how to use stat_difference() with the categorical variable team. I essentially want the line corresponding to Team A or Team B shaded depending on which one has a higher value, similar to the example here. Any suggestions will be great! Thanks.
library(tidyverse)
library(ggh4x)
library(reshape2)
set.seed(100)
team <- rep(rep(paste("Team", LETTERS[1:2]), each = 20))
week <- rep(c(1:20), times = 2)
var_1 <- rnorm(n = 40, mean = 20, sd = 5)
var_2 <- rnorm(n = 40, mean = 20, sd = 5)
var_3 <- rnorm(n = 40, mean = 250, sd = 50)
var_4 <- rnorm(n = 40, mean = 100, sd = 50)
dat <- data.frame(team, week, var_1, var_2, var_3, var_4)
plot_dat <- melt(dat, id.vars = c("team", "week"))
ggplot(plot_dat, aes(x = week)) +
geom_line(aes(y = value, color = team)) +
facet_wrap(~variable, scales = "free_y")
Following the post you referenced you could achieve your desired result by making separate columns with the values for each team using e.g. pivot_wider, add the lines via two geom_line and then apply stat_difference:
library(tidyverse)
library(ggh4x)
library(reshape2)
plot_dat <- pivot_wider(plot_dat, names_from = team, values_from = value)
ggplot(plot_dat, aes(x = week)) +
geom_line(aes(y = `Team A`, color = "Team A")) +
geom_line(aes(y = `Team B`, color = "Team B")) +
facet_wrap(~variable, scales = "free_y") +
stat_difference(aes(ymin = `Team B`, ymax = `Team A`), alpha = 0.3)
Related
I have the following data:
library(ggplot2)
library(gganimate)
library(tidyverse)
createData<- function(vintage, id){#create data
# Generate a sequence of dates from 2010-01-01 to 2025-12-31 with a quarterly frequency
Dates <- seq(from = as.Date("2010-01-01"), to = as.Date("2025-12-31"), by = "quarter")
RLG<- cumsum(sample(c(-1, 1), 64, TRUE))
df<- data.frame( Dates,RLG, vintage,id)
return(df)
}
#createData
df<- createData("2018-01-01",1) %>%
rbind(createData("2019-01-01",2))%>%
rbind(createData("2020-01-01",3)) %>%
rbind(createData("2021-01-01",4))%>%
rbind(createData("2022-01-01",5))%>%
rbind(createData("2023-01-01",6))%>%
rbind(createData("2024-01-01",7))%>%
rbind(createData("2025-01-01",8))
Which I use to make the following chart:
options(gganimate.nframes = 8*length(unique(df$vintage)), gganimate.res = 30)
p<- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = "RLG") +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none")+
transition_time(id)+
exit_fade(alpha = 0.5)+
shadow_mark(alpha = 0.2)
animate(p, end_pause = 30)
I would like to add a geom_rect which goes from vintage to max(Dates). At each frame, vintage will increase, so the geom_rect will shrink slightly. How can I do this without interfering with the shadow_mark and exit_fades which I am applying to the lines?
If you mean something like a progress bar you could do it like so:
create an DF for the geom which is a subset of the original
df_geom <- df |>
mutate(vintage = as.Date(vintage)) |>
group_by(id) |>
slice(n())
Use geom_segment with the DF from above.
If you want to leave shadow_mark in you can do shadow_mark(exclude_layer = 2).
p <- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = RLG) +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none") +
geom_segment(
data = df_geom,
mapping = aes(x=vintage, xend=Dates,
y = 18, yend = 18),
size = 10, alpha =.4, color ='lightblue'
) +
transition_time(id)+
exit_fade(alpha = 0.5)
# shadow_mark(alpha = 0.2)
animate(p)
I would like to overlay two ggplots from different data sources. I don't think a left_join will work because the dataframes are of two different lengths and would potential change the underlying plots.[Maybe?]
library(tidyverse)
set.seed(123)
player_df <- tibble(name = rep(c("A","B","C","D"), each = 10, times = 1),
pos = rep(c("DEF","DEF","MID","MID"), each = 10, times = 1),
load = c(rnorm(10, mean = 200, sd = 100),
rnorm(10, mean = 300, sd = 50),
rnorm(10, mean = 400, sd = 100),
rnorm(10, mean = 500, sd = 50)))
p1 <- player_df %>%
ggplot(aes(x = load, y = name)) +
geom_point()
pos_df <- tibble(pos = rep(c("DEF","MID"), each = 30, times = 1),
load = (c(rnorm(30, mean = 250, sd = 100),
rnorm(30, mean = 350, sd = 100))))
p2 <- pos_df %>%
ggplot(aes(x = load, y = pos)) +
geom_boxplot()
p1
p2
# add p2 to every p1 player plot by pos
I would like p1 to have the corresponding p2 - by pos - appear behind it. So... add the matching p2 boxplot to each p1 scatterplot.
p1:
p2:
It's not really advisable to attempt to superimpose two plots on each other. A ggplot is made of layers already, so usually it's just a case of superimposing one geom on another. This can be difficult if (as in your case) one of the axes has different labels. However, with a little work it is possible to wrangle your data so that it all sits on a single plot. In your case, you could do something like:
levs <- c("A", "DEF", "B", "C", "MID", "D")
ggplot(within(pos_df, pos <- factor(pos, levs)), aes(x = load, y = pos)) +
geom_boxplot(width = 2.3) +
geom_point(data = within(player_df, pos <- factor(name, levs))) +
scale_y_discrete(limits = c("A", "DEF", "B", " ", "C", "MID", "D"))
Dug into ggplot a bit and re-engineered a boxplot bit by bit.
# manually calculate stats that are used in boxplots
pos_df_summary <- pos_df %>%
group_by(pos, .drop = FALSE) %>%
summarise(min = fivenum(load)[1],
Q1 = fivenum(load)[2],
median = fivenum(load)[3],
Q3 = fivenum(load)[4],
max = fivenum(load)[5]
)
# add the boxplot data to each player
joined_df <- player_df %>%
left_join(., pos_df_summary, by = "pos") %>%
distinct(name, .keep_all = TRUE)
# plot
ggplot(data = NULL, aes(group = name)) +
# create the line from min to max
geom_segment(data = joined_df, aes(y = name, yend = name, x=min, xend=max), color="black") +
#create the box with median line
geom_crossbar(data = joined_df,
aes(y = name, xmin = Q1, xmax = Q3, x = median, fill = "NA"),
color = "black",
fatten = 1) +
scale_fill_manual(values = "white") +
# add the points from the player_df
geom_point(data = player_df,
aes(x = load, y = name, group=name),
color = "red",
show.legend=FALSE) +
theme(legend.position = "none")
There may be some extraneous code in here as I cobbled it from some other resources. Specifically, I'm not sure what the aes(group = name) in the ggplot() call does exactly.
I would like to add a regression line from a linear model to a plot in R.
I have created the following sample dataset:
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
mutate(time = as.factor(time))
Which creates the following plot:
# Plot
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
theme_classic() +
scale_x_discrete(limits = c("baseline", "1", "2", "3") ) +
labs(x = "Time", y = "Value")
If I add geom_smooth(method = 'lm') to the plot, nothing happens and I think it has something to do with the fact that time is a factor. However, adding geom_smooth(method = 'lm', formula = y~as.numeric(x) also doesn't work.
How do I plot a regression line on top of this graph?
EDIT1.0:
I have been able to use geom_smooth to plot a regression line with geom_smooth(aes(x = as.numeric(time), y = value), method = "lm", formula = y~x), but sadly, the regression line is incorrect...
How about using ordered factor to enable overlay with aes(as.numeric(time), value)?
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
# create ordered factor to allow synchronized order of x after as.numeric
mutate(time = factor(time, ordered = T, c("baseline", "1", "2", "3")))
## rendered results
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
theme_classic() +
labs(x = "Time", y = "Value") +
geom_smooth(aes(as.numeric(time), value), method = "lm")
## verify with this
ggplot(data = df, aes(x=time, y = value)) +
geom_point() +
theme_classic() +
labs(x = "Time", y = "Value") +
geom_smooth(aes(as.numeric(time), value), method = "lm")
Created on 2020-04-15 by the reprex package (v0.3.0)
Try this. One option to add a regression line is to compute it manually and add it to the plot via geom_line and mapping ID on the group aesthetic. The red line is the regression line for value ~ time while in case of the blue line I recoded time as a numeric, setting "baseline" to 0.
# Load libraries
library(tidyverse)
library(ggbeeswarm)
# Set seed
set.seed(123)
# Create dataset
ID <- sprintf("ID-%s",seq(1:30))
baseline <- rnorm(30, mean = 50, sd = 3)
df <- data.frame(ID, baseline) %>%
mutate(`1` = baseline - rnorm(1, mean = 5, sd = 4),
`2` = `1` - rnorm(1, mean = 7, sd = 5),
`3` = `2` - rnorm(1, mean = 10, sd = 9)) %>%
pivot_longer(-ID) %>%
rename(time = name) %>%
mutate(time = as.factor(time),
time1 = ifelse(time == "baseline", 0, as.numeric(time)),
smooth = predict(lm(value ~ time, data = .)),
smooth1 = predict(lm(value ~ time1, data = .)))
# Plot
ggplot(data = df, aes(x=time, y = value)) +
geom_quasirandom() +
# Regression line
geom_line(aes(x = time, y = smooth, group = ID), color = "red") +
geom_line(aes(x = time, y = smooth1, group = ID), color = "blue") +
theme_classic() +
scale_x_discrete(limits = c("baseline", "1", "2", "3") ) +
labs(x = "Time", y = "Value")
Created on 2020-04-15 by the reprex package (v0.3.0)
Sample data
df <- data.frame(id = rep(1:6, each = 50), x = rnorm(50*6, mean = 10, sd = 5),
y = rnorm(50*6, mean = 20, sd = 10),
z = rnorm(50*6, mean = 30, sd = 15))
ggplot(df, aes(x)) + geom_histogram() + facet_wrap(~id)
How do I show x, y, z in the same plot for each id in different colours
It's best to reshape data from wide to long first, and then add a fill aesthetic to map what (i.e. x, y, z) to different fill colours:
library(tidyverse)
df %>%
gather(what, val, -id) %>%
ggplot(aes(val, fill = what)) + geom_histogram() + facet_wrap(~id)
I'd like to plot a horizontal facet-wide line with the population median of that facet.
I tried the approach without creating a dummy summary table with the following code:
require(ggplot2)
dt = data.frame(gr = rep(1:2, each = 500),
id = rep(1:5, 2, each = 100),
y = c(rnorm(500, mean = 0, sd = 1), rnorm(500, mean = 1, sd = 2)))
ggplot(dt, aes(x = as.factor(id), y = y)) +
geom_boxplot() +
facet_wrap(~ gr) +
geom_hline(aes(yintercept = median(y), group = gr), colour = 'red')
However, the line is drawn for the median of the entire dataset instead of the median separately for each facet:
In the past, a solution has been suggested to use
geom_line(stat = "hline", yintercept = "median")
but it's been discontinued (produces the error "No stat called StatHline").
Another solution suggested
geom_errorbar(aes(ymax=..y.., ymin=..y.., y = mean))
but it generates
Error in data.frame(y = function (x, ...) :
arguments imply differing number of rows: 0, 1000
Finally, there's a way to plot the median by creating a dummy table with the desired stats but I'd like to avoid it.
You could create an extra column in dt for median per facet.
library(dplyr) # With dplyr for example
dt <- dt %>% group_by(gr) %>%
mutate(med = median(y))
# Rerun ggplot line with yintercept = med
ggplot(dt, aes(x = as.factor(id), y = y)) +
geom_boxplot() +
facet_wrap(~ gr) +
geom_hline(aes(yintercept = med, group = gr), colour = 'red')
If you don't want to add a new column with the computed median, you can add a geom_smooth using a quantile regression :
library(ggplot2)
library(quantreg)
set.seed(1234)
dt <- data.frame(gr = rep(1:2, each = 500),
id = rep(1:5, 2, each = 100),
y = c(rnorm(500, mean = 0, sd = 1),
rnorm(500, mean = 1, sd = 2)))
ggplot(dt, aes(y = y)) +
geom_boxplot(aes(x = as.factor(id))) +
geom_smooth(aes(x = id), method = "rq", formula = y ~ 1, se = FALSE) +
facet_wrap(~ gr)