Split violin plot with one group using ggplot - r

To create a split violin plot in ggplot, I have used the geom_split_violin function created by jan-glx below. This function is very handy for two or more groups, and I was wondering if there was a way to utilize this function for only one group? I am aiming to create a split violin plot where the only categorical variable is gender (male/female).
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
ggplot(my_data, aes(x, y, fill = m)) + geom_split_violin()

There is geom_violinhalf from the see package:
library(tidyverse)
library(see)
ggplot(iris, aes(x = "x", y = Sepal.Length, fill = Species)) +
geom_violinhalf(
data = iris %>% filter(Species == "setosa")
) +
geom_violinhalf(
data = iris %>% filter(Species == "versicolor"),
flip = TRUE
)
Created on 2021-11-25 by the reprex package (v2.0.1)

Related

Violin plot in one picture

I´m trying to plot a split violin but the plot of my violin is divided and I need both variables stick together to better comparison
I want something like this plot
plot1
But I obtain this plot
myplot
I am using this code:
library(tidyverse)
my_data <- tibble(Var_1 = c(900, 1500, 350, 1200, 750, 100,125,250,300),
Var_2 = c(385, 988, 150, 355,555, 900,20, 25, 500),
Gender = c("W", "W", "W", "M", "M", "W", "W", "M", "W"),
my_weights = c(2.2, 3.1, 8.2, 4.2, 5.3, 6.8, 12, 25, 1))
my_data %>%
ggplot(aes(x = Gender , y = Var_1, weight = my_weights, fill = Gender))+
geom_split_violin(color = "black", scale = "count")
And for the geom_split_violin () I am used this code from an old post in this page here is the post
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Any suggestion of how make it work? Thanks
You are using Gender twice: in x and in fill.
If you want the correct plot, you will need an other variable for the x axis. You can also cheat like this:
my_data %>%
ggplot(aes(x = 0 , y = Var_1, weight = my_weights, fill = Gender))+
geom_split_violin(color = "black", scale = "count")

How to avoid overlapping labels using split violin plot in ggplot2 (R)

I am generating split violin plots using the geom_split_violin function created here: Split violin plot with ggplot2.
Then, I add labels for sample sizes (n = ...) for each split violin. However, unfortunately the labels overlap. How could I please move them slightly to the left and right, so that they do not overlap?
Here is the full code that I am using and below it the result with overlapping "n = ..." labels.
# Create data
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(500), rnorm(300, 0.5), rnorm(400, 1), rnorm(200, 1.5)),
x=c(rep('a', 800), rep('b', 600)),
m=c(rep('i', 300), rep('j', 700), rep('i', 400)))
# Code to create geom_split_violin function from link above
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
# Add labels 'n = ...'
give_n = function(x, y_lo = min(my_data$y)) {
data.frame(y = y_lo * 1.06,
label = paste("n =", length(x)))
}
# Plot data
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text")
Result (note overlapping 'n = ...' labels):
Does adding position_nudge() solve your problem?
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text",
position = position_nudge(x = c(-0.25, 0.25)))

Add boxplot and stats to split violin plot in ggplot2 (R)

I love the split violin plot and #jan-glx 's awesome geom_split_violin function created here: Split violin plot with ggplot2.
I would love to add split boxplots and stats to this, as I explain below.
First, to be complete, here are the full data and code.
Data (copied from above link)
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000)))
Code to create geom_split_violin function (copied from above link)
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
My attempt to add boxplots and stats
Here is the code that I used to try to add:
Split boxplots.
P values using wilcox.test stats.
Sample sizes (n).
Code:
library(ggpubr)
give.n <- function(x){return(y = -2.6, label = length(x))}
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
geom_boxplot(width = 0.2, notch = TRUE, fill="white", outlier.shape = NA) +
stat_summary(fun.data = give.n, geom = "text") +
stat_compare_means(aes(label = ifelse(p < 1.e-4, sprintf("p = %2.1e",
as.numeric(..p.format..)), sprintf("p = %5.4f",
as.numeric(..p.format..)))), method = "wilcox.test", paired = FALSE) +
stat_summary(fun.data = give.n, geom = "text")
This is the result:
Unfortunately, this throws an error and is not quite where I hoped to get, because it is missing the p values and the sample sizes (n) and the boxplots are not split. I also tried one of #Axeman 's excellent suggestions in another SO answer, but no luck so far.
What I am hoping to achieve is something similar to this (also with p values no longer "NA"):
This seems a big challenge, but I am hoping someone out there might be able to help, as others will probably love this as much as me. Thank you.

aes() and aes_string() produce different behaviors in split_violin_plot

I am trying to use a function from this post to produce a split violin plot.
Here's the code:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's an example dataset:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Use aes() in the plot function, we could get the normal plot
ggplot(my_data, aes(x, y, fill=m)) + geom_split_violin()
But if use aes_string(), I got a weird plot where two split violin on top of each other.
ggplot(my_data, aes_string(x='x', y='y', fill='m')) + geom_split_violin()
I do not have a good understanding of how aes vs. aes_string might lead to this difference. Hope someone could give me a hint. Thanks a lot!
Instead of using aes_string(), you can use tidy evaluation approach in ggplot2 3.0 or higher by:
converting the input strings to variables using sym()
unquote them inside aes() using !! so they get evaluated
library(tidyverse)
ggplot(my_data, aes(x = !!sym('x'), y = !!sym('y'), fill = !!sym('m'))) + geom_split_violin()
Created on 2018-10-04 by the reprex package (v0.2.1.9000)

ggplot split violin plot with horizontal mean lines

I've created these split half violin plots using ggplot.
However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean.
This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot.
How can I achieve this? I can't figure it out and I'd appreciate any help!
Here's some example data:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000,
1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Here's the extension for geom_violin to create split_geom_violin:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's the code for the graph:
library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) +
geom_split_violin(trim = TRUE) +
geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
labs(x=NULL,y="GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(values=c("#E69F00", "#999999"),
name="Survey\nPart",
breaks=c("1", "2"),
labels=c("Time 1", "Time 5"))
You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only
library(ggplot2)
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin(trim = TRUE) +
stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar",
width = 0.25,
position = position_dodge(width = .25),
) +
labs(x = NULL, y = "GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(
values = c("#E69F00", "#999999"),
name = "Survey\nPart",
breaks = c("1", "2"),
labels = c("Time 1", "Time 5")
)
Data & function used:
set.seed(20160229)
my_data <- data.frame(
y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x = c(rep("a", 2000), rep("b", 2000)),
m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)
GeomSplitViolin <- ggproto(
"GeomSplitViolin",
GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
grp <- data[1, "group"]
newdata <- plyr::arrange(
transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
if (grp %% 2 == 1) y else -y
)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname(
"geom_split_violin",
grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
)
} else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
geom_split_violin <- function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "identity", ...,
draw_quantiles = NULL,
trim = TRUE,
scale = "area",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSplitViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm, ...
)
)
}
Created on 2018-07-08 by the reprex package (v0.2.0.9000).

Resources