How to avoid overlapping labels using split violin plot in ggplot2 (R) - r

I am generating split violin plots using the geom_split_violin function created here: Split violin plot with ggplot2.
Then, I add labels for sample sizes (n = ...) for each split violin. However, unfortunately the labels overlap. How could I please move them slightly to the left and right, so that they do not overlap?
Here is the full code that I am using and below it the result with overlapping "n = ..." labels.
# Create data
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(500), rnorm(300, 0.5), rnorm(400, 1), rnorm(200, 1.5)),
x=c(rep('a', 800), rep('b', 600)),
m=c(rep('i', 300), rep('j', 700), rep('i', 400)))
# Code to create geom_split_violin function from link above
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
# Add labels 'n = ...'
give_n = function(x, y_lo = min(my_data$y)) {
data.frame(y = y_lo * 1.06,
label = paste("n =", length(x)))
}
# Plot data
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text")
Result (note overlapping 'n = ...' labels):

Does adding position_nudge() solve your problem?
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text",
position = position_nudge(x = c(-0.25, 0.25)))

Related

Split violin plot with one group using ggplot

To create a split violin plot in ggplot, I have used the geom_split_violin function created by jan-glx below. This function is very handy for two or more groups, and I was wondering if there was a way to utilize this function for only one group? I am aiming to create a split violin plot where the only categorical variable is gender (male/female).
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
ggplot(my_data, aes(x, y, fill = m)) + geom_split_violin()
There is geom_violinhalf from the see package:
library(tidyverse)
library(see)
ggplot(iris, aes(x = "x", y = Sepal.Length, fill = Species)) +
geom_violinhalf(
data = iris %>% filter(Species == "setosa")
) +
geom_violinhalf(
data = iris %>% filter(Species == "versicolor"),
flip = TRUE
)
Created on 2021-11-25 by the reprex package (v2.0.1)

Add boxplot and stats to split violin plot in ggplot2 (R)

I love the split violin plot and #jan-glx 's awesome geom_split_violin function created here: Split violin plot with ggplot2.
I would love to add split boxplots and stats to this, as I explain below.
First, to be complete, here are the full data and code.
Data (copied from above link)
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000)))
Code to create geom_split_violin function (copied from above link)
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
My attempt to add boxplots and stats
Here is the code that I used to try to add:
Split boxplots.
P values using wilcox.test stats.
Sample sizes (n).
Code:
library(ggpubr)
give.n <- function(x){return(y = -2.6, label = length(x))}
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
geom_boxplot(width = 0.2, notch = TRUE, fill="white", outlier.shape = NA) +
stat_summary(fun.data = give.n, geom = "text") +
stat_compare_means(aes(label = ifelse(p < 1.e-4, sprintf("p = %2.1e",
as.numeric(..p.format..)), sprintf("p = %5.4f",
as.numeric(..p.format..)))), method = "wilcox.test", paired = FALSE) +
stat_summary(fun.data = give.n, geom = "text")
This is the result:
Unfortunately, this throws an error and is not quite where I hoped to get, because it is missing the p values and the sample sizes (n) and the boxplots are not split. I also tried one of #Axeman 's excellent suggestions in another SO answer, but no luck so far.
What I am hoping to achieve is something similar to this (also with p values no longer "NA"):
This seems a big challenge, but I am hoping someone out there might be able to help, as others will probably love this as much as me. Thank you.

aes() and aes_string() produce different behaviors in split_violin_plot

I am trying to use a function from this post to produce a split violin plot.
Here's the code:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's an example dataset:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Use aes() in the plot function, we could get the normal plot
ggplot(my_data, aes(x, y, fill=m)) + geom_split_violin()
But if use aes_string(), I got a weird plot where two split violin on top of each other.
ggplot(my_data, aes_string(x='x', y='y', fill='m')) + geom_split_violin()
I do not have a good understanding of how aes vs. aes_string might lead to this difference. Hope someone could give me a hint. Thanks a lot!
Instead of using aes_string(), you can use tidy evaluation approach in ggplot2 3.0 or higher by:
converting the input strings to variables using sym()
unquote them inside aes() using !! so they get evaluated
library(tidyverse)
ggplot(my_data, aes(x = !!sym('x'), y = !!sym('y'), fill = !!sym('m'))) + geom_split_violin()
Created on 2018-10-04 by the reprex package (v0.2.1.9000)

ggplot split violin plot with horizontal mean lines

I've created these split half violin plots using ggplot.
However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean.
This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot.
How can I achieve this? I can't figure it out and I'd appreciate any help!
Here's some example data:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000,
1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Here's the extension for geom_violin to create split_geom_violin:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's the code for the graph:
library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) +
geom_split_violin(trim = TRUE) +
geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
labs(x=NULL,y="GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(values=c("#E69F00", "#999999"),
name="Survey\nPart",
breaks=c("1", "2"),
labels=c("Time 1", "Time 5"))
You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only
library(ggplot2)
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin(trim = TRUE) +
stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar",
width = 0.25,
position = position_dodge(width = .25),
) +
labs(x = NULL, y = "GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(
values = c("#E69F00", "#999999"),
name = "Survey\nPart",
breaks = c("1", "2"),
labels = c("Time 1", "Time 5")
)
Data & function used:
set.seed(20160229)
my_data <- data.frame(
y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x = c(rep("a", 2000), rep("b", 2000)),
m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)
GeomSplitViolin <- ggproto(
"GeomSplitViolin",
GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
grp <- data[1, "group"]
newdata <- plyr::arrange(
transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
if (grp %% 2 == 1) y else -y
)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname(
"geom_split_violin",
grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
)
} else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
geom_split_violin <- function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "identity", ...,
draw_quantiles = NULL,
trim = TRUE,
scale = "area",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSplitViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm, ...
)
)
}
Created on 2018-07-08 by the reprex package (v0.2.0.9000).

ggplot2: display blocks of nested split violins

I have the following dataset:
df <- data.frame(dens = rnorm(5000),
split = as.factor(sample(1:2, 5000, replace = T)),
method = as.factor(sample(c("A","B"), 5000, replace = T)),
counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))
What i am wanting to do is to do split violin plots for splits 1 and 2 within groups A and B for each count (which would be in the logscale, but that is not important for this example). We have four groups for each setting but there is a nested aspect to it.
So, I can do the following:
df$key <- factor(paste(df$split, df$method))
and then:
library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = split)) +
geom_violin(aes(fill = key), scale = "width", draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values = cbPalette) + theme_bw()
which gives me the following plot:
But what I want is really the light blue and the dark blue to be the two halves of a split violin plot and the light green and the dark green to be the two halves of another split violin plot and these plots should be bunched together. I would also like the different counts to be more separated from each other, but i feel that I can figure that out.
Note that this question is different than the one I have listed or Split violin plot with ggplot2 because we are bunching two different levels of nested split violin plots for each "Counts".
I was trying to follow enter link description here but
I can not tell how to add such a nested groups setting to the code there and am looking for some advice.
Here is what I have tried:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
# By #YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = interaction(split,method))) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom")
And here is what I get:
As can be seen, the green images are on top of the blues. How do I get around this? Thanks!
EDIT: Folllowing Axeman's suggestion, I am almost there:
levels(df$split) <- factor(0:3)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(interaction(df$split,df$counts)[-length(interaction(df$split,df$counts))], drop = FALSE)
So almost there!
Would like two fixes: the white space arising from the last interaction between split and counts, and the scale to only have counts for each bunch.
Wonder if these should be separate questions on Stackoverflow.
Almost there!
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE)
This yields:
I still need to place the value of counts on the x-axis, in between the two plots.
I think that this question has become too long and the basic parts of this question have been answered. I have put up a new question on how to change the discrete scale. Hopefully, someone will know! Anyway, here is the answer to this question (thanks, Axe!). It is in the edited version of my question.
library(ggplot2)
df <- data.frame(dens = rnorm(5000),
split = factor(sample(1:2, 5000, replace = T)),
method = factor(sample(c("A","B"), 5000, replace = T)),
counts = factor(sample(c(1, 10, 100, 1000, 10000), 5000, replace = T)))
df$key <- factor(paste(df$split, df$method))
levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
theme_light() +
theme(legend.position="bottom") +
scale_x_discrete(
limits = levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],
drop = FALSE,
name = "Counts"
)

Resources