adding more standard deviations to a plot - r

I'm trying to add more standard deviation to my current plot. I need to add 1std and 3std, I've already addeed the 2std to my plot.
This is my code:
tidyverse_downloads_rollmean <- treasury %>%
tq_mutate(
# tq_mutate args
select = yield,
mutate_fun = rollapply,
# rollapply args
width = 360,
align = "right",
FUN = mean,
# mean args
na.rm = TRUE,
# tq_mutate args
col_rename = "mean_360"
)
This is for the 2std but I need to add 1std and 3std in the same plot:
custom_stat_fun_2 <- function(x, na.rm = TRUE) {
m <- mean(x, na.rm = na.rm)
s <- sd(x, na.rm = na.rm)
hi <- m + 2*s
lo <- m - 2*s
ret <- c(mean = m, stdev = s, hi.95 = hi, lo.95 = lo)
return(ret)
}
I added to my data:
rollstats<- treasury %>%
tq_mutate(
select = yield,
mutate_fun = rollapply,
# rollapply args
width = 360,
align = "right",
by.column = FALSE,
FUN = custom_stat_fun_2,
# FUN args
na.rm = TRUE
)
This is my plot:
rollstats %>%
ggplot(aes(x = date)) +
# Data
geom_line(aes(y = yield), color = "grey40", alpha = 0.5, size =1) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95), alpha = 0.4) +
geom_point(aes(y = mean), linetype = 2, size = 0.5, alpha = 0.5) +
# Aesthetics
labs(title = "tidyverse packages: Volatility and Trend", x = "",
subtitle = "360-Day Moving Average with 95% Confidence Interval Bands (+/-2 Standard Deviations)") +
scale_color_tq(theme = "light") +
theme_tq() +
theme(legend.position="none")
This is my output:
But I want something like this:
So how can I add the 1std and 2std? Is there another way to plot 1std, 2std and 3std in the same plot? Thanks in advance!

You haven't provided a reprex so hard to help you. Tidyquant has functions to plot the standard deviation bands for you (geom_bbands). But, here's an idea with only ggplot2 using different data. Calculate the 1st, 2nd, and 3rd standard deviations:
library(tidyquant)
custom_stat_fun_2 <- function(x, na.rm = TRUE) {
m <- mean(x, na.rm = na.rm)
s <- sd(x, na.rm = na.rm)
hi1 <- m + s
lo1 <- m - s
hi2 <- m + 2*s
lo2 <- m - 2*s
hi3 <- m + 3*s
lo3 <- m - 3*s
ret <- c(mean = m, stdev = s,hi1 = hi1, lo1 = lo1, hi2=hi2, lo2=lo2, hi3=hi3, lo3=lo3)
return(ret)
}
treasury <- treasuryTR::get_yields("DGS10", format_out = "tibble")
rollstats<- treasury |>
tq_mutate(
select = DGS10,
mutate_fun = rollapply,
# rollapply args
width = 360,
align = "right",
by.column = FALSE,
FUN = custom_stat_fun_2
) |>
na.omit()
Melt the data frame to have one column for hi and one for lo and then set factor levels so they plot in reverse order:
rollsds <- tidyr::pivot_longer(rollstats,cols = starts_with(c("hi", "lo")),
names_to = c(".value", "sd"), names_pattern = "(.*)(\\d)")
rollsds$sd <- factor(as.character(rollsds$sd), levels=c(3,2,1))
Plot
library(ggplot2)
rollstats |>
ggplot(aes(x = date)) +
# Data
geom_ribbon(data=rollsds, aes(ymax = hi, ymin=lo, fill=sd, color=sd), alpha=0.3) +
geom_line(aes(y = mean), linetype = 2, size = 0.5, alpha = 0.5) +
geom_line(aes(y = DGS10), color = "midnightblue", alpha = 0.7, size =1) +
# Aesthetics
theme_tq()

Related

Shading regions of a plot based on whether a condition is satisfied

I'm creating lineplots using ggplot() and geom_line() for a corridor of values that develops over time.
It may happen sometimes that the upper bound is below the lower bound (which I'll call "inversion"), and I would like to highlight regions where this happens in my plot, say by using a different background color.
Searching both Google and StackOverflow has not led me anywhere.
Here is an artificial example:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>% mutate(inversion = upper < lower)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) + geom_line(linewidth = 1) + theme_light()
This produces the following plot:
As you can see, inversion occurs in a few places, e.g. around x = 50. I would like for the plot to have a darker (say gray) background where it does, based on the inversion column already in the tibble. How can I do this?
Thank you very much for the help!
One option to achieve your desired result would be to use ggh4x::stat_difference like so. Note that to this end we have to use the wide dataset and accordingly add the lines via two geom_line.
library(ggplot2)
library(ggh4x)
ggplot(d, mapping = aes(x = x)) +
stat_difference(aes(ymin = lower, ymax = upper)) +
geom_line(aes(y = lower, color = "lower"), linewidth = 1) +
geom_line(aes(y = upper, color = "upper"), linewidth = 1) +
scale_fill_manual(values = c("+" = "transparent", "-" = "darkgrey"),
breaks = "-",
labels = "Inversion") +
theme_light() +
labs(color = "Bounds")
EDIT Of course is it also possible to draw background rects for the intersection regions. But I don't know of any out-of-the-box option, i.e. the tricky part is to compute the x values where the lines intersect which requires some effort and approximation. Here is one approach but probably not the most efficient one.
library(tidyverse)
# Compute intersection points and prepare data to draw rects
n <- 20 # Increase for a better approximation
rect <- data.frame(
x = seq(1, N, length.out = N * n)
)
# Shamefully stolen from ggh4x
rle_id <- function(x) with(rle(x), rep.int(seq_along(values), lengths))
rect <- rect |>
mutate(lower = approx(d$x, d$lower, x)[["y"]],
upper = approx(d$x, d$upper, x)[["y"]],
inversion = upper < lower,
rle = with(rle(inversion & !is.na(inversion)), rep.int(seq_along(values), lengths))
) |>
filter(inversion) |>
group_by(rle) |>
slice(c(1, n())) |>
mutate(label = c("xmin", "xmax")) |>
ungroup() |>
select(x, rle, label) |>
pivot_wider(names_from = label, values_from = x)
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = rect, aes(xmin = xmin, xmax = xmax, group = rle),
ymin = -Inf, ymax = Inf, fill = "darkgrey", alpha = .3, inherit.aes = FALSE) +
theme_light()
#> Warning: Removed 9 rows containing missing values (`geom_line()`).
Answering myself, the following worked for me in the end (also using actual data and plots grouped with facet_wrap()); h/t to #stefan, whose approach with geom_rect() I recycled:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>%
mutate(inversion = upper < lower,
inversionLag = if_else(is.na(lag(inversion)), FALSE, lag(inversion)),
inversionLead = if_else(is.na(lead(inversion)), FALSE, lead(inversion)),
inversionStart = inversion & !inversionLag,
inversionEnd = inversion & !inversionLead
)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
iS <- d %>% filter(inversionStart) %>% select(x) %>% rowid_to_column() %>% rename(iS = x)
iE <- d %>% filter(inversionEnd) %>% select(x) %>% rowid_to_column() %>% rename(iE = x)
iD <- iS %>% full_join(iE, by = c("rowid"))
g <- ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = iD, mapping = aes(xmin = iS, xmax = iE, fill = "Inversion"), ymin = -Inf, ymax = Inf, alpha = 0.3, inherit.aes = FALSE) +
scale_fill_manual(name = "Inversions", values = "darkgray") +
theme_light()
g
This gives
which is pretty much what I was after.

Problem with reference to variable in R function

I'd like to write a function to do ANOVA in batch, but there's a problem I can't solve. The problem is with the variable reference. The whole code is written correctly, because everything is calculated "on foot", but when I try to insert this code into the function, it doesn't work. Can someone take a look and point out where the problem is?
library(tidyverse)
library(ggpubr)
library(rstatix)
library(ggprism)
df <- data.frame(
stringsAsFactors = FALSE,
id = c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5),
treat = c("o","o","o","o","o","j","j","j","j","j","z","z","z","z","z","w","w","w","w","w"),
vo2 = c("47.48","42.74","45.23","51.65","49.11","51.00","43.82","49.88","54.61","52.20","51.31",
"47.56","50.69","54.88","55.01","51.89","46.10","50.98","53.62","52.77"))
df$vo2 <- as.numeric(df$vo2)
df$treat <- factor(df$treat)
Everything works fine in the code below...
# Summary
group_by(df, treat) %>%
summarise(
N = n(),
Mean = mean(vo2, na.rm = TRUE),
Sd = sd(vo2, na.rm = TRUE))
# ANOVA
res.aov <- anova_test(dv = vo2, wid = id, within = treat, data = df)
get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- df %>%
pairwise_t_test(vo2 ~ treat, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc
pwc <- pwc %>% add_xy_position(x = "treat")
ggplot(df, aes(x = treat, y = vo2)) +
stat_boxplot(aes(x = treat, y = vo2, color = treat), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = treat, y = vo2, color = treat, fill = treat)) +
geom_jitter(aes(x = treat, y = vo2, color = treat, fill = treat), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
#xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
theme_prism(base_size = 14) + scale_x_discrete(guide = "prism_bracket") +
scale_fill_prism(palette = "floral") + scale_colour_prism(palette = "floral") +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.05))) +
stat_pvalue_manual(pwc, tip.length = 0, hide.ns = TRUE) +
labs(
subtitle = get_test_label(res.aov, detailed = TRUE),
caption = get_pwc_label(pwc)) +
theme(legend.position = "NULL")
The result of this code is the chart below
But the function doesn't work...
The fix for "deparse(substitute(x)" only partially solved the problem, and I get a warning:
1: In mean.default(~"vo2", na.rm = TRUE): argument is not numeric or logical: returning an
2: In var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm = na.rm) : NAs introduced by coercion.
Below is the full code for this function:
my_function <- function(df, x, y) {
x <- deparse(substitute(x))
y <- deparse(substitute(y))
formula <- as.formula(paste0(y, "~", x))
# Summary
a <- group_by(df, {{x}}) %>%
summarise(
N = n(),
Mean = mean({{y}}, na.rm = TRUE),
Sd = sd({{y}}, na.rm = TRUE)
)
# ANOVA
res.aov <<- anova_test(dv = {{y}}, wid = id, within = {{x}}, data = df)
b <- get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- pairwise_t_test(data = df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc2 <<- pwc %>% add_xy_position(x = {{x}})
# Plot
d <- ggplot(df, aes(x = {{x}}, y = {{y}})) +
stat_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}})) +
geom_jitter(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}}), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
stat_pvalue_manual(pwc2, tip.length = 0, hide.ns = TRUE) +
xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
scale_x_discrete(guide = "prism_bracket") +
theme_prism(base_size = 14) +
theme(legend.position = "NULL")
#ggsave(paste0(deparse(substitute(x)), "_",
# deparse(substitute(y)), ".png"), width=160, height=90, units="mm", dpi=600)
output <- list(a,b,pwc2,d)
return(output)
}
my_function(df, treat, vo2)
I have a huge request for tips on how to solve this.
The problem lies in the formula. Try adding the following code:
x1 <- deparse(substitute(x))
y1 <- deparse(substitute(y))
formula <- as.formula(paste0(y1, "~", x1))
# Pairwise comparisons
pwc <- pairwise_t_test(data=df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")

How to pass break values to stat_contour by facet or group

I am trying to use the ks library to calculate the 95% home range for groups within a data set. The problem is that the "break" values which define the cut-off for the 95% contours differ between groups. So far, I have been able to get my plots, but I have to manually add the break values for each group/level and I would really like to find a solution where I can create figures in ggplot where the break values are imported automatically.
require(ks)
require(dplyr)
require(ggplot2)
# define the ks function to pass to a grouped_df
ksFUN = function(data){
H = Hpi(data[,c("x","y")], binned = TRUE) * 1
fhata = kde(data[,c("x","y")], H = H, compute.cont = TRUE,
xmin = c(minX, minY), xmax = c(maxX, maxY))
res95 = data.frame(HR = contourSizes(fhata, cont = 95, approx = TRUE))
dimnames(fhata[['estimate']]) = list(fhata[["eval.points"]][[1]],
fhata[["eval.points"]][[2]])
dat = reshape2::melt(fhata[['estimate']])
dat$breaks50 = fhata[["cont"]]["50%"]
dat$breaks95 = fhata[["cont"]]["5%"]
return(dat)
}
set.seed(100)
# create some data
df1 = data.frame(x = rnorm(100, 0, 5),
y = rnorm(100, 0, 5),
Group = "Test1")
df2 = data.frame(x = rnorm(100, 10, 5),
y = rnorm(100, 10, 5),
Group = "Test2")
df = rbind(df1, df2)
# Set the minimum and maximum x and y values outside
# of the ksFUN to keep the data on the same scale
minX = min(df$x, na.rm = T); maxX = max(df$x, na.rm = T)
minY = min(df$y, na.rm = T); maxY = max(df$y, na.rm = T)
xx = df %>%
group_by(Group) %>%
do(as.data.frame(ksFUN(.)))
# extract the break value for the 95% contour (home range) and 50% (core area)
breaks = xx %>%
group_by(Group) %>%
summarize(breaks95 = mean(breaks95),
breaks50 = mean(breaks50))
breaks
# The only way I have been able to add the breaks is to manually add them
ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group)) +
stat_contour(data = xx[xx$Group == "Test1",], aes(z = value),
breaks = 0.000587, alpha = 0.3, geom = "polygon") +
stat_contour(data = xx[xx$Group == "Test2",], aes(z = value),
breaks = 0.000527, alpha = 0.3, geom = "polygon")
I would really like to find a solution where I don't have to explicitly pass the break values to the stat_contour functions
Is there a problem with using the breaks column in breaks? e.g.
# base plot
pl <- ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group))
groups <- unique(xx$Group)
# loop and add for each group
for(i in groups){
pl <- pl + stat_contour(data = xx[xx$Group == i,], aes(z = value),
breaks = breaks[breaks$Group == i, ]$breaks,
alpha = 0.3, geom = "polygon")
}
pl
I get some weird plots, at the edges, especially when I remove the breaks part from stat_contour, which leads me to think there might be a bug in ksFUN

Generating multiple plots containing functions in ggplot2

I am trying to make a composite plot in R using the packages ggplot2 and ggpubr.
I have no problem in making the composite plots except each plot has a normal distribution curve specific to that dataset. When I generate the composite plot, both plots have the same curve, that of the last dataset.
How can I generate the composite plot with each plot having its own specific normal distribution curve?
CODE AND OUTPUT PLOTS
## PLOT 1 ##
results_matrix_C <- data.frame(matrix(rnorm(20), nrow=20))
colnames(results_matrix_C) <- c("X")
m <- mean(results_matrix_C$X)
sd <- sd(results_matrix_C$X)
dnorm_C <- function(x){
norm_C <- dnorm(x, m, sd)
return(norm_C)
}
e = 1
dnorm_one_sd_C <- function(x){
norm_one_sd_C <- dnorm(x, m, sd)
# Have NA values outside interval x in [e]:
norm_one_sd_C[x <= e] <- NA
return(norm_one_sd_C)
}
C <- ggplot(results_matrix_C, aes(x = results_matrix_C$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(fun = dnorm_one_sd_C, geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(fun = dnorm_C, colour = "#CE0539", size = 1) +
theme_classic()
## PLOT 2 ##
results_matrix_U <- data.frame(matrix(rnorm(20)+1, nrow=20))
colnames(results_matrix_U) <- c("X")
m <- mean(results_matrix_U$X)
sd <- sd(results_matrix_U$X)
dnorm_U <- function(x){
norm_U <- dnorm(x, m, sd)
return(norm_U)
}
e = 2
dnorm_one_sd_U <- function(x){
norm_one_sd_U <- dnorm(x, m, sd)
# Have NA values outside interval x in [e]:
norm_one_sd_U[x <= e] <- NA
return(norm_one_sd_U)
}
U <- ggplot(results_matrix_U, aes(x = results_matrix_U$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(fun = dnorm_one_sd_U, geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(fun = dnorm_U, colour = "#CE0539", size = 1) +
theme_classic()
library(ggpubr)
ggarrange(C, U,
nrow = 1, ncol = 2)
As you can see in the composite plot, the first one has taken the normal distribution curve of the second plot rather than its own one from my initial plot (Plot 1).
UPDATE
Variable "e" refers to the shaded area which is related to the distribution curve.
m = mean of the dataset
sd = standard deviation of the dataset
m and sd are used to generate the normal distribution curves
SOLVED
By inserting the function in full into the stat_function section of the ggplot2 code, this has worked
i.e:
## PLOT 1 ##
results_matrix_C <- data.frame(matrix(rnorm(20), nrow=20))
colnames(results_matrix_C) <- c("X")
mean <- mean(results_matrix_C$X)
sd <- sd(results_matrix_C$X)
e = 1
C <- ggplot(results_matrix_C, aes(x = results_matrix_C$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(
fun = function(x, mean, sd, e){
norm_one_sd_C <- dnorm(x, mean, sd)
norm_one_sd_C[x <= e] <- NA
return(norm_one_sd_C)},
args = c(mean = mean, sd = sd, e = e), geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(
fun = function(x, mean, sd){
dnorm(x = x, mean = mean, sd = sd)},
args = c(mean = mean, sd = sd), colour = "#CE0539", size = 1) +
theme_classic()
## PLOT 2 ##
results_matrix_U <- data.frame(matrix(rnorm(20)+1, nrow=20))
colnames(results_matrix_U) <- c("X")
mean <- mean(results_matrix_U$X)
sd <- sd(results_matrix_U$X)
e = 2
U <- ggplot(results_matrix_U, aes(x = results_matrix_U$X)) +
geom_histogram(aes(y=..density..), bins = 10, colour = "black", fill = "white") +
stat_function(
fun = function(x, mean, sd, e){
norm_one_sd_U <- dnorm(x, mean, sd)
norm_one_sd_U[x <= e] <- NA
return(norm_one_sd_U)},
args = c(mean = mean, sd = sd, e = e), geom = "area", fill = "#CE9A05", color = "#CE9A05", alpha = 0.25, size = 1) +
stat_function(
fun = function(x, mean, sd){
dnorm(x = x, mean = mean, sd = sd)},
args = c(mean = mean, sd = sd), colour = "#CE0539", size = 1) +
theme_classic()
library(ggpubr)
ggarrange(C, U,
nrow = 1, ncol = 2)

ggplot2: display blocks of nested split violins

I have the following dataset:
df <- data.frame(dens = rnorm(5000),
split = as.factor(sample(1:2, 5000, replace = T)),
method = as.factor(sample(c("A","B"), 5000, replace = T)),
counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))
What i am wanting to do is to do split violin plots for splits 1 and 2 within groups A and B for each count (which would be in the logscale, but that is not important for this example). We have four groups for each setting but there is a nested aspect to it.
So, I can do the following:
df$key <- factor(paste(df$split, df$method))
and then:
library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = split)) +
geom_violin(aes(fill = key), scale = "width", draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values = cbPalette) + theme_bw()
which gives me the following plot:
But what I want is really the light blue and the dark blue to be the two halves of a split violin plot and the light green and the dark green to be the two halves of another split violin plot and these plots should be bunched together. I would also like the different counts to be more separated from each other, but i feel that I can figure that out.
Note that this question is different than the one I have listed or Split violin plot with ggplot2 because we are bunching two different levels of nested split violin plots for each "Counts".
I was trying to follow enter link description here but
I can not tell how to add such a nested groups setting to the code there and am looking for some advice.
Here is what I have tried:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
# By #YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = interaction(split,method))) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom")
And here is what I get:
As can be seen, the green images are on top of the blues. How do I get around this? Thanks!
EDIT: Folllowing Axeman's suggestion, I am almost there:
levels(df$split) <- factor(0:3)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(interaction(df$split,df$counts)[-length(interaction(df$split,df$counts))], drop = FALSE)
So almost there!
Would like two fixes: the white space arising from the last interaction between split and counts, and the scale to only have counts for each bunch.
Wonder if these should be separate questions on Stackoverflow.
Almost there!
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE)
This yields:
I still need to place the value of counts on the x-axis, in between the two plots.
I think that this question has become too long and the basic parts of this question have been answered. I have put up a new question on how to change the discrete scale. Hopefully, someone will know! Anyway, here is the answer to this question (thanks, Axe!). It is in the edited version of my question.
library(ggplot2)
df <- data.frame(dens = rnorm(5000),
split = factor(sample(1:2, 5000, replace = T)),
method = factor(sample(c("A","B"), 5000, replace = T)),
counts = factor(sample(c(1, 10, 100, 1000, 10000), 5000, replace = T)))
df$key <- factor(paste(df$split, df$method))
levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
theme_light() +
theme(legend.position="bottom") +
scale_x_discrete(
limits = levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],
drop = FALSE,
name = "Counts"
)

Resources