ggplot split violin plot with horizontal mean lines - r

I've created these split half violin plots using ggplot.
However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean.
This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot.
How can I achieve this? I can't figure it out and I'd appreciate any help!
Here's some example data:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000,
1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Here's the extension for geom_violin to create split_geom_violin:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's the code for the graph:
library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) +
geom_split_violin(trim = TRUE) +
geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
labs(x=NULL,y="GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(values=c("#E69F00", "#999999"),
name="Survey\nPart",
breaks=c("1", "2"),
labels=c("Time 1", "Time 5"))

You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only
library(ggplot2)
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin(trim = TRUE) +
stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar",
width = 0.25,
position = position_dodge(width = .25),
) +
labs(x = NULL, y = "GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(
values = c("#E69F00", "#999999"),
name = "Survey\nPart",
breaks = c("1", "2"),
labels = c("Time 1", "Time 5")
)
Data & function used:
set.seed(20160229)
my_data <- data.frame(
y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x = c(rep("a", 2000), rep("b", 2000)),
m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)
GeomSplitViolin <- ggproto(
"GeomSplitViolin",
GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
grp <- data[1, "group"]
newdata <- plyr::arrange(
transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
if (grp %% 2 == 1) y else -y
)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname(
"geom_split_violin",
grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
)
} else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
geom_split_violin <- function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "identity", ...,
draw_quantiles = NULL,
trim = TRUE,
scale = "area",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSplitViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm, ...
)
)
}
Created on 2018-07-08 by the reprex package (v0.2.0.9000).

Related

How to avoid overlapping labels using split violin plot in ggplot2 (R)

I am generating split violin plots using the geom_split_violin function created here: Split violin plot with ggplot2.
Then, I add labels for sample sizes (n = ...) for each split violin. However, unfortunately the labels overlap. How could I please move them slightly to the left and right, so that they do not overlap?
Here is the full code that I am using and below it the result with overlapping "n = ..." labels.
# Create data
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(500), rnorm(300, 0.5), rnorm(400, 1), rnorm(200, 1.5)),
x=c(rep('a', 800), rep('b', 600)),
m=c(rep('i', 300), rep('j', 700), rep('i', 400)))
# Code to create geom_split_violin function from link above
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
# Add labels 'n = ...'
give_n = function(x, y_lo = min(my_data$y)) {
data.frame(y = y_lo * 1.06,
label = paste("n =", length(x)))
}
# Plot data
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text")
Result (note overlapping 'n = ...' labels):
Does adding position_nudge() solve your problem?
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text",
position = position_nudge(x = c(-0.25, 0.25)))

Add boxplot and stats to split violin plot in ggplot2 (R)

I love the split violin plot and #jan-glx 's awesome geom_split_violin function created here: Split violin plot with ggplot2.
I would love to add split boxplots and stats to this, as I explain below.
First, to be complete, here are the full data and code.
Data (copied from above link)
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000)))
Code to create geom_split_violin function (copied from above link)
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
My attempt to add boxplots and stats
Here is the code that I used to try to add:
Split boxplots.
P values using wilcox.test stats.
Sample sizes (n).
Code:
library(ggpubr)
give.n <- function(x){return(y = -2.6, label = length(x))}
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
geom_boxplot(width = 0.2, notch = TRUE, fill="white", outlier.shape = NA) +
stat_summary(fun.data = give.n, geom = "text") +
stat_compare_means(aes(label = ifelse(p < 1.e-4, sprintf("p = %2.1e",
as.numeric(..p.format..)), sprintf("p = %5.4f",
as.numeric(..p.format..)))), method = "wilcox.test", paired = FALSE) +
stat_summary(fun.data = give.n, geom = "text")
This is the result:
Unfortunately, this throws an error and is not quite where I hoped to get, because it is missing the p values and the sample sizes (n) and the boxplots are not split. I also tried one of #Axeman 's excellent suggestions in another SO answer, but no luck so far.
What I am hoping to achieve is something similar to this (also with p values no longer "NA"):
This seems a big challenge, but I am hoping someone out there might be able to help, as others will probably love this as much as me. Thank you.

Add geom_rug like boxplots per group in ggplot2

I'd like to add a rug boxplot per group to the bottom and top of my density plot. I could not find an implementation, so I tried to manually create the boxplots and then add those with annotation_custom to the plot.
Currently there is the problem that x axes of the density plot and the boxplots do not align. I tried to extract the limits of the first plot, but could only find a way to extract the limits of the data.
The second problem is the exact y alignment of the boxplots, this should be the same as geom_rug handles it.
The third problem is to ensure that the same fill colors are used by the density and boxplots. I used a manual approach to solve this, but clearly it would be a lot more general if I do not have to specify the color in multiple places.
set.seed(123)
library(ggplot2)
library(ggpubr)
library(data.table)
Data <- data.table(x = rnorm(100),
group = rep(c("group1", "group2"), times = c(30, 70)))
# Colors for groups
colors <- c("group1" = "#66C2A5", "group2" = "#FC8D62")
p <-
ggplot(Data, aes(x = x, fill = group, color = group)) +
geom_density(alpha = 0.5) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors)
# Rugs
p +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
#-----
# Boxplots
boxplot1 <-
ggplot(Data[group %in% "group1"]) +
geom_boxplot(aes(y = x), fill = colors[["group1"]]) +
coord_flip() +
theme_transparent()
boxplot2 <-
ggplot(Data[group %in% "group2"]) +
geom_boxplot(aes(y = x), fill = colors[["group2"]]) +
coord_flip() +
theme_transparent()
boxplot1_grob <- ggplotGrob(boxplot1)
boxplot2_grob <- ggplotGrob(boxplot2)
# Place box plots inside density plot
x <- ggplot_build(p)$layout$panel_scales_x[[1]]$range$range
xmin <- x[1]
xmax <- x[2]
y <- ggplot_build(p)$layout$panel_scales_y[[1]]$range$range
ymin <- y[1]
ymax <- y[2]
yoffset <- (1/28) * ymax
xoffset <- (1/28) * xmax
# Add boxplots with annotation_custom
p2 <- p +
annotation_custom(grob = boxplot1_grob, xmin = xmin, xmax = xmax,
ymin = ymin - yoffset, ymax = ymin + yoffset) +
annotation_custom(grob = boxplot2_grob,
xmin = xmin, xmax = xmax,
ymin = ymax - yoffset, ymax = ymax + yoffset)
p2
# Alignment is not correct
p2 +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
I made something similar for practice sometime back, & have yet to test it rigorously, but it does seem to work for your use case. If anything breaks, let me know & I'll see if I can fix them:
# with boxplots only
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t")
# with both boxplots & geom_rug (check that they align exactly)
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t") +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
The marginal boxplot's dimensions imitate those of geom_rug, occupying 3% of the plot panel's height / width. Both x & y have to be mapped in aes(), though in this case y isn't actually needed, so I assigned it the value 1 as a placeholder.
Run the following to get geom_marginboxplot:
library(ggplot2)
library(grid)
`%||%` <- function (x, y) if (is.null(x)) y else x
geom_marginboxplot <- function(mapping = NULL, data = NULL,
...,
sides = "bl",
outlier.shape = 16,
outlier.size = 1.5,
outlier.stroke = 0.5,
width = 0.9,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatMarginBoxplot,
geom = GeomMarginBoxplot,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
sides = sides,
outlier.shape = outlier.shape,
outlier.size = outlier.size,
outlier.stroke = outlier.stroke,
width = width,
notch = FALSE,
notchwidth = 0.5,
varwidth = FALSE,
na.rm = na.rm,
...
)
)
}
StatMarginBoxplot <- ggproto(
"StatMarginBoxplot", Stat,
optional_aes = c("x", "y"),
non_missing_aes = "weight",
setup_data = function(data, params,
sides = "bl") {
if(grepl("l|r", sides)){
data.vertical <- data
data.vertical$orientation <- "vertical"
} else data.vertical <- data.frame()
if(grepl("b|t", sides)){
data.horizontal <- data
data.horizontal$y <- data.horizontal$x
data.horizontal$orientation <- "horizontal"
} else data.horizontal <- data.frame()
data <- remove_missing(rbind(data.vertical,
data.horizontal),
na.rm = FALSE, vars = "x",
"stat_boxplot")
data
},
compute_group = function(data, scales, sides = "bl",
width = 0.9, na.rm = FALSE, coef = 1.5){
if(grepl("l|r", sides)){
df.vertical <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "vertical", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.vertical <- df.vertical[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.vertical$orientation = "vertical"
} else df.vertical <- data.frame()
if(grepl("b|t", sides)){
df.horizontal <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "horizontal", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.horizontal <- df.horizontal[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.horizontal$orientation = "horizontal"
} else df.horizontal <- data.frame()
df <- rbind(df.vertical, df.horizontal)
colnames(df) <- gsub("^y", "", colnames(df))
df
}
)
GeomMarginBoxplot <- ggproto(
"GeomMarginBoxplot", Geom,
setup_data = function(data, params, sides = "bl") {
data.vertical <- data[data$orientation == "vertical", ]
if(nrow(data.vertical) > 0) {
colnames(data.vertical)[1:6] <- paste0("y", colnames(data.vertical)[1:6])
}
data.horizontal <- data[data$orientation == "horizontal", ]
if(nrow(data.horizontal) > 0){
colnames(data.horizontal)[1:6] <- paste0("x", colnames(data.horizontal)[1:6])
}
data <- merge(data.vertical, data.horizontal, all = TRUE)
data <- data[, sapply(data, function(x) !all(is.na(x)))]
data
},
draw_group = function(data, panel_params, coord, fatten = 2,
outlier.shape = 19, outlier.stroke = 0.5,
outlier.size = 1.5, width = 0.9,
notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
sides = "bl") {
draw.marginal.box <- function(sides){
if(sides %in% c("l", "b")){
pos1 <- unit(0, "npc"); pos2 <- unit(0.03, "npc")
} else {
pos2 <- unit(0.97, "npc"); pos1 <- unit(1, "npc")
}
if(width > 0 & width < 1){
increment <- (1 - width) / 2
increment <- increment * (pos2 - pos1)
pos1 <- pos1 + increment
pos2 <- pos2 - increment
}
pos3 <- 0.5 * pos1 + 0.5 * pos2
outliers_grob <- NULL
if(sides %in% c("l", "r")) {
data <- data[data$orientation == "vertical", ]
if (!is.null(data$youtliers) && length(data$youtliers[[1]] >= 1)) {
outliers <- data.frame(
y = unlist(data$youtliers[[1]]),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- rep(pos3, nrow(coords))
y.pos <- unit(coords$y, "native")
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
y = c(data$ymin, data$ylower, data$ymiddle, data$yupper, data$ymax),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
x0 = rep(pos3, 2),
x1 = rep(pos3, 2),
y0 = unit(c(box.whiskers$y[1], box.whiskers$y[5]), "native"),
y1 = unit(c(box.whiskers$y[2], box.whiskers$y[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
x = pos1,
y = unit(box.whiskers$y[4], "native"),
width = pos2 - pos1,
height = unit(box.whiskers$y[4] - box.whiskers$y[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
x0 = rep(pos1, 2),
x1 = rep(pos2, 2),
y0 = unit(box.whiskers$y[3], "native"),
y1 = unit(box.whiskers$y[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
if(sides %in% c("b", "t")) {
data <- data[data$orientation == "horizontal", ]
if (!is.null(data$xoutliers) && length(data$xoutliers[[1]] >= 1)) {
outliers <- data.frame(
x = unlist(data$xoutliers[[1]]),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- unit(coords$x, "native")
y.pos <- rep(pos3, nrow(coords))
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
x = c(data$xmin, data$xlower, data$xmiddle, data$xupper, data$xmax),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
y0 = rep(pos3, 2),
y1 = rep(pos3, 2),
x0 = unit(c(box.whiskers$x[1], box.whiskers$x[5]), "native"),
x1 = unit(c(box.whiskers$x[2], box.whiskers$x[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
y = pos2,
x = unit(box.whiskers$x[2], "native"),
height = pos2 - pos1,
width = unit(box.whiskers$x[4] - box.whiskers$x[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
y0 = rep(pos1, 2),
y1 = rep(pos2, 2),
x0 = unit(box.whiskers$x[3], "native"),
x1 = unit(box.whiskers$x[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
grobTree(outliers_grob,
whiskers_grob,
box_grob,
median_grob)
}
result <- list()
if(grepl("l", sides)) result$l <- draw.marginal.box("l")
if(grepl("r", sides)) result$r <- draw.marginal.box("r")
if(grepl("b", sides)) result$b <- draw.marginal.box("b")
if(grepl("t", sides)) result$t <- draw.marginal.box("t")
gTree(children = do.call("gList", result))
},
draw_key = draw_key_boxplot,
default_aes = aes(weight = 1, colour = "grey20", fill = "white",
size = 0.5, stroke = 0.5,
alpha = 0.75, shape = 16, linetype = "solid",
sides = "bl"),
optional_aes = c("lower", "upper", "middle", "min", "max")
)
Session info: R 3.5.1, ggplot2 3.0.0.

aes() and aes_string() produce different behaviors in split_violin_plot

I am trying to use a function from this post to produce a split violin plot.
Here's the code:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's an example dataset:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Use aes() in the plot function, we could get the normal plot
ggplot(my_data, aes(x, y, fill=m)) + geom_split_violin()
But if use aes_string(), I got a weird plot where two split violin on top of each other.
ggplot(my_data, aes_string(x='x', y='y', fill='m')) + geom_split_violin()
I do not have a good understanding of how aes vs. aes_string might lead to this difference. Hope someone could give me a hint. Thanks a lot!
Instead of using aes_string(), you can use tidy evaluation approach in ggplot2 3.0 or higher by:
converting the input strings to variables using sym()
unquote them inside aes() using !! so they get evaluated
library(tidyverse)
ggplot(my_data, aes(x = !!sym('x'), y = !!sym('y'), fill = !!sym('m'))) + geom_split_violin()
Created on 2018-10-04 by the reprex package (v0.2.1.9000)

Fill transparency with geom_violin

How can increase the alpha of the fill of violin plots but not the alpha of the boundary line?
Changing alpha as an argument to geom_violin() results in both the fill and line changing.
Here's what can be done if you wish to avoid plotting twice. Since the introduction of the extension mechanism we can easily modify the existing source code to define our own geoms.
First we need to check what's going on in geom_violin. The actual plotting is done with GeomPolygon$draw_panel(newdata, ...). So the trick is to tinker with geom_polygon. The modification required is really simple: in the plotting block
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = alpha(first_rows$colour, first_rows$alpha),
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
just replace colour specification to col = first_rows$colour.
Alright, we're good to go. Just declare our custom geom_violin2, borrowing the code from original source and applying several ad-hoc fixes.
library(grid)
GeomPolygon2 <- ggproto("GeomPolygon2", Geom,
draw_panel = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1) return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
ggplot2:::ggname("geom_polygon",
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
)
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1,
alpha = NA),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon
)
`%||%` <- function (a, b)
{
if (!is.null(a))
a
else b
}
GeomViolin2 <- ggproto("GeomViolin", Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
plyr::ddply(data, "group", transform,
xmin = x - width / 2,
xmax = x + width / 2
)
},
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
newdata <- rbind(
plyr::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
)
newdata <- rbind(newdata, newdata[1,])
if (length(draw_quantiles) > 0) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[
rep(1, nrow(quantiles)),
setdiff(names(data), c("x", "y")),
drop = FALSE
]
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_violin", grobTree(
GeomPolygon2$draw_panel(newdata, ...),
quantile_grob)
)
} else {
ggplot2:::ggname("geom_violin", GeomPolygon2$draw_panel(newdata, ...))
}
},
draw_key = draw_key_polygon,
default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"),
required_aes = c("x", "y")
)
geom_violin2 <- function(mapping = NULL, data = NULL, stat = "ydensity",
draw_quantiles = NULL, position = "dodge",
trim = TRUE, scale = "area",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomViolin2,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm,
...
)
)
}
Now behold! The colours are questionable, I admit. But you can clearly see that the border is not affected by alpha.
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_violin2(alpha = 0.7, size = 3, colour = "blue", fill = "red")

Resources