Fill transparency with geom_violin - r

How can increase the alpha of the fill of violin plots but not the alpha of the boundary line?
Changing alpha as an argument to geom_violin() results in both the fill and line changing.

Here's what can be done if you wish to avoid plotting twice. Since the introduction of the extension mechanism we can easily modify the existing source code to define our own geoms.
First we need to check what's going on in geom_violin. The actual plotting is done with GeomPolygon$draw_panel(newdata, ...). So the trick is to tinker with geom_polygon. The modification required is really simple: in the plotting block
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = alpha(first_rows$colour, first_rows$alpha),
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
just replace colour specification to col = first_rows$colour.
Alright, we're good to go. Just declare our custom geom_violin2, borrowing the code from original source and applying several ad-hoc fixes.
library(grid)
GeomPolygon2 <- ggproto("GeomPolygon2", Geom,
draw_panel = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1) return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
ggplot2:::ggname("geom_polygon",
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
)
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1,
alpha = NA),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon
)
`%||%` <- function (a, b)
{
if (!is.null(a))
a
else b
}
GeomViolin2 <- ggproto("GeomViolin", Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
plyr::ddply(data, "group", transform,
xmin = x - width / 2,
xmax = x + width / 2
)
},
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
newdata <- rbind(
plyr::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
)
newdata <- rbind(newdata, newdata[1,])
if (length(draw_quantiles) > 0) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[
rep(1, nrow(quantiles)),
setdiff(names(data), c("x", "y")),
drop = FALSE
]
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_violin", grobTree(
GeomPolygon2$draw_panel(newdata, ...),
quantile_grob)
)
} else {
ggplot2:::ggname("geom_violin", GeomPolygon2$draw_panel(newdata, ...))
}
},
draw_key = draw_key_polygon,
default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"),
required_aes = c("x", "y")
)
geom_violin2 <- function(mapping = NULL, data = NULL, stat = "ydensity",
draw_quantiles = NULL, position = "dodge",
trim = TRUE, scale = "area",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomViolin2,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm,
...
)
)
}
Now behold! The colours are questionable, I admit. But you can clearly see that the border is not affected by alpha.
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_violin2(alpha = 0.7, size = 3, colour = "blue", fill = "red")

Related

Adapt `draw_key()` according to own `draw_panel()` for new `ggproto`

Based on the example in Master Software Development in R, I wrote a new geom_my_point(), adapting the alpha depending on the number of data points.
This works fine, but the alpha value of the label is not correct if alpha is explicitly set.
Here the code for the figures:
d <- data.frame(x = runif(200))
d$y <- 1 * d$x + rnorm(200, 0, 0.2)
d$z <- factor(sample(c("group1", "group2"), size = 200, replace = TRUE))
require("ggplot2")
gg1 <- ggplot(d) + geom_my_point(aes(x, y, colour = z)) + ggtitle("gg1")
gg2 <- ggplot(d) + geom_my_point(aes(x, y, colour = z), alpha = 1) + ggtitle("gg2")
gg3 <- ggplot(d) + geom_my_point(aes(x, y, colour = z, alpha = z)) + ggtitle("gg3")
Here the code for the geom_*():
geom_my_point <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
ggplot2::layer(
geom = GeomMyPoint, mapping = mapping,
data = data, stat = stat, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
GeomMyPoint <- ggplot2::ggproto("GeomMyPoint", ggplot2::Geom,
required_aes = c("x", "y"),
non_missing_aes = c("size", "shape", "colour"),
default_aes = ggplot2::aes(
shape = 19, colour = "black", size = 2,
fill = NA, alpha = NA, stroke = 0.5
),
setup_params = function(data, params) {
n <- nrow(data)
if (n > 100 && n <= 200) {
params$alpha <- 0.3
} else if (n > 200) {
params$alpha <- 0.15
} else {
params$alpha <- 1
}
params
},
draw_panel = function(data, panel_scales, coord, alpha) {
if (is.character(data$shape)) {
data$shape <- translate_shape_string(data$shape)
}
## Transform the data first
coords <- coord$transform(data, panel_scales)
## Get alpha conditional on number of data points
if (any(is.na(coords$alpha))) {
coords$alpha <- alpha
}
## Construct a grid grob
grid::pointsGrob(
x = coords$x,
y = coords$y,
pch = coords$shape,
gp = grid::gpar(
col = alpha(coords$colour, coords$alpha),
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * ggplot2::.pt + coords$stroke * ggplot2::.stroke / 2,
lwd = coords$stroke * ggplot2::.stroke / 2
)
)
},
draw_key = function(data, params, size) {
data$alpha <- params$alpha
ggplot2::draw_key_point(data, params, size)
}
)
EDIT:
According to the comment of #teunbrand, the problem for the plot qq2 can be solved by the following adaptions to the draw_key() function:
draw_key = function(data, params, size) {
if (is.na(data$alpha)) {
data$alpha <- params$alpha
}
ggplot2::draw_key_point(data, params, size)
}
But this still does not solve the problem with the graph qq3 - so the underlying question is why alpha is not correctly represented by the data argument of the draw_key() function. Compare also the following plot qq4, in which the size is correctly displayed in the legend (set a browser() w/i draw_key()):
gg4 <- ggplot(d) + geom_my_point(aes(x, y, colour = z, alpha = z, size = z)) + ggtitle("gg4")

Add geom_rug like boxplots per group in ggplot2

I'd like to add a rug boxplot per group to the bottom and top of my density plot. I could not find an implementation, so I tried to manually create the boxplots and then add those with annotation_custom to the plot.
Currently there is the problem that x axes of the density plot and the boxplots do not align. I tried to extract the limits of the first plot, but could only find a way to extract the limits of the data.
The second problem is the exact y alignment of the boxplots, this should be the same as geom_rug handles it.
The third problem is to ensure that the same fill colors are used by the density and boxplots. I used a manual approach to solve this, but clearly it would be a lot more general if I do not have to specify the color in multiple places.
set.seed(123)
library(ggplot2)
library(ggpubr)
library(data.table)
Data <- data.table(x = rnorm(100),
group = rep(c("group1", "group2"), times = c(30, 70)))
# Colors for groups
colors <- c("group1" = "#66C2A5", "group2" = "#FC8D62")
p <-
ggplot(Data, aes(x = x, fill = group, color = group)) +
geom_density(alpha = 0.5) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors)
# Rugs
p +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
#-----
# Boxplots
boxplot1 <-
ggplot(Data[group %in% "group1"]) +
geom_boxplot(aes(y = x), fill = colors[["group1"]]) +
coord_flip() +
theme_transparent()
boxplot2 <-
ggplot(Data[group %in% "group2"]) +
geom_boxplot(aes(y = x), fill = colors[["group2"]]) +
coord_flip() +
theme_transparent()
boxplot1_grob <- ggplotGrob(boxplot1)
boxplot2_grob <- ggplotGrob(boxplot2)
# Place box plots inside density plot
x <- ggplot_build(p)$layout$panel_scales_x[[1]]$range$range
xmin <- x[1]
xmax <- x[2]
y <- ggplot_build(p)$layout$panel_scales_y[[1]]$range$range
ymin <- y[1]
ymax <- y[2]
yoffset <- (1/28) * ymax
xoffset <- (1/28) * xmax
# Add boxplots with annotation_custom
p2 <- p +
annotation_custom(grob = boxplot1_grob, xmin = xmin, xmax = xmax,
ymin = ymin - yoffset, ymax = ymin + yoffset) +
annotation_custom(grob = boxplot2_grob,
xmin = xmin, xmax = xmax,
ymin = ymax - yoffset, ymax = ymax + yoffset)
p2
# Alignment is not correct
p2 +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
I made something similar for practice sometime back, & have yet to test it rigorously, but it does seem to work for your use case. If anything breaks, let me know & I'll see if I can fix them:
# with boxplots only
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t")
# with both boxplots & geom_rug (check that they align exactly)
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t") +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
The marginal boxplot's dimensions imitate those of geom_rug, occupying 3% of the plot panel's height / width. Both x & y have to be mapped in aes(), though in this case y isn't actually needed, so I assigned it the value 1 as a placeholder.
Run the following to get geom_marginboxplot:
library(ggplot2)
library(grid)
`%||%` <- function (x, y) if (is.null(x)) y else x
geom_marginboxplot <- function(mapping = NULL, data = NULL,
...,
sides = "bl",
outlier.shape = 16,
outlier.size = 1.5,
outlier.stroke = 0.5,
width = 0.9,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatMarginBoxplot,
geom = GeomMarginBoxplot,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
sides = sides,
outlier.shape = outlier.shape,
outlier.size = outlier.size,
outlier.stroke = outlier.stroke,
width = width,
notch = FALSE,
notchwidth = 0.5,
varwidth = FALSE,
na.rm = na.rm,
...
)
)
}
StatMarginBoxplot <- ggproto(
"StatMarginBoxplot", Stat,
optional_aes = c("x", "y"),
non_missing_aes = "weight",
setup_data = function(data, params,
sides = "bl") {
if(grepl("l|r", sides)){
data.vertical <- data
data.vertical$orientation <- "vertical"
} else data.vertical <- data.frame()
if(grepl("b|t", sides)){
data.horizontal <- data
data.horizontal$y <- data.horizontal$x
data.horizontal$orientation <- "horizontal"
} else data.horizontal <- data.frame()
data <- remove_missing(rbind(data.vertical,
data.horizontal),
na.rm = FALSE, vars = "x",
"stat_boxplot")
data
},
compute_group = function(data, scales, sides = "bl",
width = 0.9, na.rm = FALSE, coef = 1.5){
if(grepl("l|r", sides)){
df.vertical <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "vertical", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.vertical <- df.vertical[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.vertical$orientation = "vertical"
} else df.vertical <- data.frame()
if(grepl("b|t", sides)){
df.horizontal <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "horizontal", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.horizontal <- df.horizontal[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.horizontal$orientation = "horizontal"
} else df.horizontal <- data.frame()
df <- rbind(df.vertical, df.horizontal)
colnames(df) <- gsub("^y", "", colnames(df))
df
}
)
GeomMarginBoxplot <- ggproto(
"GeomMarginBoxplot", Geom,
setup_data = function(data, params, sides = "bl") {
data.vertical <- data[data$orientation == "vertical", ]
if(nrow(data.vertical) > 0) {
colnames(data.vertical)[1:6] <- paste0("y", colnames(data.vertical)[1:6])
}
data.horizontal <- data[data$orientation == "horizontal", ]
if(nrow(data.horizontal) > 0){
colnames(data.horizontal)[1:6] <- paste0("x", colnames(data.horizontal)[1:6])
}
data <- merge(data.vertical, data.horizontal, all = TRUE)
data <- data[, sapply(data, function(x) !all(is.na(x)))]
data
},
draw_group = function(data, panel_params, coord, fatten = 2,
outlier.shape = 19, outlier.stroke = 0.5,
outlier.size = 1.5, width = 0.9,
notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
sides = "bl") {
draw.marginal.box <- function(sides){
if(sides %in% c("l", "b")){
pos1 <- unit(0, "npc"); pos2 <- unit(0.03, "npc")
} else {
pos2 <- unit(0.97, "npc"); pos1 <- unit(1, "npc")
}
if(width > 0 & width < 1){
increment <- (1 - width) / 2
increment <- increment * (pos2 - pos1)
pos1 <- pos1 + increment
pos2 <- pos2 - increment
}
pos3 <- 0.5 * pos1 + 0.5 * pos2
outliers_grob <- NULL
if(sides %in% c("l", "r")) {
data <- data[data$orientation == "vertical", ]
if (!is.null(data$youtliers) && length(data$youtliers[[1]] >= 1)) {
outliers <- data.frame(
y = unlist(data$youtliers[[1]]),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- rep(pos3, nrow(coords))
y.pos <- unit(coords$y, "native")
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
y = c(data$ymin, data$ylower, data$ymiddle, data$yupper, data$ymax),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
x0 = rep(pos3, 2),
x1 = rep(pos3, 2),
y0 = unit(c(box.whiskers$y[1], box.whiskers$y[5]), "native"),
y1 = unit(c(box.whiskers$y[2], box.whiskers$y[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
x = pos1,
y = unit(box.whiskers$y[4], "native"),
width = pos2 - pos1,
height = unit(box.whiskers$y[4] - box.whiskers$y[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
x0 = rep(pos1, 2),
x1 = rep(pos2, 2),
y0 = unit(box.whiskers$y[3], "native"),
y1 = unit(box.whiskers$y[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
if(sides %in% c("b", "t")) {
data <- data[data$orientation == "horizontal", ]
if (!is.null(data$xoutliers) && length(data$xoutliers[[1]] >= 1)) {
outliers <- data.frame(
x = unlist(data$xoutliers[[1]]),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- unit(coords$x, "native")
y.pos <- rep(pos3, nrow(coords))
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
x = c(data$xmin, data$xlower, data$xmiddle, data$xupper, data$xmax),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
y0 = rep(pos3, 2),
y1 = rep(pos3, 2),
x0 = unit(c(box.whiskers$x[1], box.whiskers$x[5]), "native"),
x1 = unit(c(box.whiskers$x[2], box.whiskers$x[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
y = pos2,
x = unit(box.whiskers$x[2], "native"),
height = pos2 - pos1,
width = unit(box.whiskers$x[4] - box.whiskers$x[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
y0 = rep(pos1, 2),
y1 = rep(pos2, 2),
x0 = unit(box.whiskers$x[3], "native"),
x1 = unit(box.whiskers$x[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
grobTree(outliers_grob,
whiskers_grob,
box_grob,
median_grob)
}
result <- list()
if(grepl("l", sides)) result$l <- draw.marginal.box("l")
if(grepl("r", sides)) result$r <- draw.marginal.box("r")
if(grepl("b", sides)) result$b <- draw.marginal.box("b")
if(grepl("t", sides)) result$t <- draw.marginal.box("t")
gTree(children = do.call("gList", result))
},
draw_key = draw_key_boxplot,
default_aes = aes(weight = 1, colour = "grey20", fill = "white",
size = 0.5, stroke = 0.5,
alpha = 0.75, shape = 16, linetype = "solid",
sides = "bl"),
optional_aes = c("lower", "upper", "middle", "min", "max")
)
Session info: R 3.5.1, ggplot2 3.0.0.

ggplot split violin plot with horizontal mean lines

I've created these split half violin plots using ggplot.
However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean.
This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot.
How can I achieve this? I can't figure it out and I'd appreciate any help!
Here's some example data:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000,
1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Here's the extension for geom_violin to create split_geom_violin:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's the code for the graph:
library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) +
geom_split_violin(trim = TRUE) +
geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
labs(x=NULL,y="GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(values=c("#E69F00", "#999999"),
name="Survey\nPart",
breaks=c("1", "2"),
labels=c("Time 1", "Time 5"))
You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only
library(ggplot2)
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin(trim = TRUE) +
stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar",
width = 0.25,
position = position_dodge(width = .25),
) +
labs(x = NULL, y = "GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(
values = c("#E69F00", "#999999"),
name = "Survey\nPart",
breaks = c("1", "2"),
labels = c("Time 1", "Time 5")
)
Data & function used:
set.seed(20160229)
my_data <- data.frame(
y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x = c(rep("a", 2000), rep("b", 2000)),
m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)
GeomSplitViolin <- ggproto(
"GeomSplitViolin",
GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
grp <- data[1, "group"]
newdata <- plyr::arrange(
transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
if (grp %% 2 == 1) y else -y
)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname(
"geom_split_violin",
grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
)
} else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
geom_split_violin <- function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "identity", ...,
draw_quantiles = NULL,
trim = TRUE,
scale = "area",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSplitViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm, ...
)
)
}
Created on 2018-07-08 by the reprex package (v0.2.0.9000).

How to modify the backgroup color of label in the multiple-ggproto using ggplot2

I want to draw a graph which is familiar to the enterotype plot in the research. But my new multiple-ggproto seems terrible as showed in p1, owing to the missing backgroup color of the label. I've tried multiple variations of this, for example modify GeomLabel$draw_panel in order to reset the default arguments of geom in ggplot2::ggproto. However, I could not find the labelGrob() function which is removed in ggplot2 and grid package. Thus, the solution of modification didn't work. How to modify the backgroup color of label in the multiple-ggproto. Any ideas? Thanks in advance. Here is my code and two pictures.
p1: the background color of label should be white or the text color should be black.
P2:displays the wrong point color, line color and legend.
geom_enterotype <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity",
alpha = 0.3, prop = 0.5, ..., lineend = "butt", linejoin = "round",
linemitre = 1, arrow = NULL, na.rm = FALSE, parse = FALSE,
nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, "lines"),
label.r = unit(0.15, "lines"), label.size = 0.1,
show.legend = TRUE, inherit.aes = TRUE) {
library(ggplot2)
# create new stat and geom for PCA scatterplot with ellipses
StatEllipse <- ggproto("StatEllipse", Stat,
required_aes = c("x", "y"),
compute_group = function(., data, scales, level = 0.75, segments = 51, ...) {
library(MASS)
dfn <- 2
dfd <- length(data$x) - 1
if (dfd < 3) {
ellipse <- rbind(c(NA, NA))
} else {
v <- cov.trob(cbind(data$x, data$y))
shape <- v$cov
center <- v$center
radius <- sqrt(dfn * qf(level, dfn, dfd))
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
ellipse <- t(center + radius * t(unit.circle %*% chol(shape)))
}
ellipse <- as.data.frame(ellipse)
colnames(ellipse) <- c("x", "y")
return(ellipse)
})
# write new ggproto
GeomEllipse <- ggproto("GeomEllipse", Geom,
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1)
return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
grid::pathGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = grid::gpar(col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha), lwd = first_rows$size * .pt, lty = first_rows$linetype))
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, alpha = NA, prop = 0.5),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_path
)
# create a new stat for PCA scatterplot with lines which totally directs to the center
StatConline <- ggproto("StatConline", Stat,
compute_group = function(data, scales) {
library(miscTools)
library(MASS)
df <- data.frame(data$x,data$y)
mat <- as.matrix(df)
center <- cov.trob(df)$center
names(center)<- NULL
mat_insert <- insertRow(mat, 2, center )
for(i in 1:nrow(mat)) {
mat_insert <- insertRow( mat_insert, 2*i, center )
next
}
mat_insert <- mat_insert[-c(2:3),]
rownames(mat_insert) <- NULL
mat_insert <- as.data.frame(mat_insert,center)
colnames(mat_insert) =c("x","y")
return(mat_insert)
},
required_aes = c("x", "y")
)
# create a new stat for PCA scatterplot with center labels
StatLabel <- ggproto("StatLabel" ,Stat,
compute_group = function(data, scales) {
library(MASS)
df <- data.frame(data$x,data$y)
center <- cov.trob(df)$center
names(center)<- NULL
center <- t(as.data.frame(center))
center <- as.data.frame(cbind(center))
colnames(center) <- c("x","y")
rownames(center) <- NULL
return(center)
},
required_aes = c("x", "y")
)
layer1 <- layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...))
layer2 <- layer(stat = StatEllipse, data = data, mapping = mapping, geom = GeomEllipse, position = position, show.legend = FALSE,
inherit.aes = inherit.aes, params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...))
layer3 <- layer(data = data, mapping = mapping, stat = StatConline, geom = GeomPath,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(lineend = lineend, linejoin = linejoin,
linemitre = linemitre, arrow = arrow, na.rm = na.rm, ...))
if (!missing(nudge_x) || !missing(nudge_y)) {
if (!missing(position)) {
stop("Specify either `position` or `nudge_x`/`nudge_y`",
call. = FALSE)
}
position <- position_nudge(nudge_x, nudge_y)
}
layer4 <- layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel,
position = position, show.legend = FALSE, inherit.aes = inherit.aes,
params = list(parse = parse, label.padding = label.padding,
label.r = label.r, label.size = label.size, na.rm = na.rm, ...))
return(list(layer1,layer2,layer3,layer4))
}
# data
data(Cars93, package = "MASS")
car_df <- Cars93[, c(3, 5, 13:15, 17, 19:25)]
car_df <- subset(car_df, Type == "Large" | Type == "Midsize" | Type == "Small")
x1 <- mean(car_df$Price) + 2 * sd(car_df$Price)
x2 <- mean(car_df$Price) - 2 * sd(car_df$Price)
car_df <- subset(car_df, Price > x2 | Price < x1)
car_df <- na.omit(car_df)
# Principal Component Analysis
car.pca <- prcomp(car_df[, -1], scale = T)
car.pca_pre <- cbind(as.data.frame(predict(car.pca)[, 1:2]), car_df[, 1])
colnames(car.pca_pre) <- c("PC1", "PC2", "Type")
xlab <- paste("PC1(", round(((car.pca$sdev[1])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
ylab <- paste("PC2(", round(((car.pca$sdev[2])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
head(car.pca_pre)
#plot
library(ggplot2)
p1 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color= Type ,label = Type)) +
geom_enterotype()
p2 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , label = Type)) +
geom_enterotype()
You can manually change the colour scale to give it more emphasis against the background fill colour:
p3 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color = Type, label = Type)) +
geom_enterotype() +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p3
You can additionally adjust your fill colours by changing the alpha values, or assigning different colour values to give better contrast to your labels.
p4 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, fill = Type, colour = Type)) +
geom_enterotype() +
scale_fill_manual(values = alpha(c("pink", "lightgreen", "skyblue"), 1)) +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p4
Finally, if you want a background white colour to your labels, you have to remove the fill option. You can also additionally assign a shape value.
As you can observe, the background text colour is associated with the shape fill colour, while the text label colour is associated with the line colour, the the shape border colour.
p5 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, colour = Type)) +
geom_enterotype() + scale_colour_manual(values = c("red4", "green4", "blue4"))
p5

ggplot: How to add a segment with stat_summary

This question is related to How to change boxplot settings when stat_summary is used, where I managed to build nice unicolor boxplots.
However, due to the "unicolor", the median segment's colour cannot be distinguished from the rest of the box. I've managed to add a black point for the median, but I prefer to add a segment instead. Here is the code:
# Data
xdf2 <- data.frame(month = rep(1:6, each = 100),
grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n = nrow(xdf2),10)
# Definition of whiskers
f <- function(x) {
r <- quantile(x, probs = c(0.10, 0.25, 0.5, 0.75, 0.90))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# Add points outside of whiskers
o <- function(x) {
subset(x, x < quantile(x, probs=0.1) | quantile(x, probs=0.9) < x)
}
# Plot
ggplot(data = xdf2, aes(factor(month), m, colour = grp, fill = grp)) +
stat_summary(fun.data = f, geom="boxplot",
position = position_dodge(width=1), size = 2) +
stat_summary(fun.y = o, geom="point",
position = position_dodge(width = 1)) +
scale_color_manual(values = c("indianred","orange"), labels = c("AAA", "BBB")) +
scale_fill_manual(values = c("indianred", "orange"), labels = c("AAA", "BBB")) +
theme_bw() +
stat_summary(fun.y = "median", geom = "point",
position = position_dodge(width = 1), col = "black", size = 4)
And here is the graph:
I would like to add a segment by building a function which compute the parameters for geom="segment":
# Add function to compute segment parameters
s <- function(x,y,z) {
x2 <- x - z
y2 <- median(y)
x2end <- x + z
y2end <- median(y)
# assemble the named output
out <- c(x = x2, y = y2, xend = x2end, yend = y2end)
names(out) <- c("x","y","xend","yend")
out
}
and replace the geom="point" part with:
stat_summary(fun.y = s(month, m, 0.3), geom = "segment",
position = position_dodge(width = 1), col="black")
What I get is:
Error in s(month, m, 0.3) (from #2) : object 'month' not found
If I could better understand the logic of stat_summary, I could solve this problem. But I find it's not easy. If somebody could help me to solve this problem with stat_summary and geom = "segment", I would be very glad and maybe I will understand better the logic behind.
I would also appreciate alternative solutions for adding a horizontal line to mark the median.
There are so many pieces to a boxplot that it's probably worth the effort to change the underlying ggproto object, rather than recreate outliers / whiskers / boxes / median segments piece by piece, & hope they still stack well together.
Here's the result:
# Data
set.seed(123)
xdf2 <- data.frame(month = rep(1:6,each=100), grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n=nrow(xdf2),10)
p <- ggplot(data = xdf2,
aes(factor(month), m, colour = grp, fill = grp)) +
scale_color_manual(values = c("A" = "indianred", "B" = "orange"),
labels = c("A" = "AAA", "B" = "BBB"),
aesthetics = c("color", "fill")) +
theme_bw() +
theme(legend.position = "bottom")
p +
geom_boxplot2(position = position_dodge(width = 1), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black")
Here are some more variations with different aesthetic options:
library(dplyr)
cowplot::plot_grid(
p +
labs(subtitle = paste("quantiles = c(0.05, 0.3, 0.5, 0.7, 0.95)",
"median segment color = brown",
sep = "\n")) +
geom_boxplot2(position = position_dodge(width = 0.8), size = 2,
qs = c(0.05, 0.3, 0.5, 0.7, 0.95),
median.colour = "brown"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge2, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge2(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
nrow = 1
)
Code:
# define stat_boxplot2() based on stat_boxplot, but with boxplot quantiles (qs)
# added as a parameter (default values are same as original function), &
# stat = StatBoxplot2 instead of StatBoxplot
stat_boxplot2 <- function (
mapping = NULL, data = NULL, geom = "boxplot", position = "dodge2",
..., coef = 1.5, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
layer(data = data, mapping = mapping, stat = StatBoxplot2,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
coef = coef,
qs = qs, ...))
}
# define StatBoxplot2 based on StatBoxplot, with compute_group function tweaked
# to use qs passed from stat_boxplot2(), & outlier definition simplified to
# include all data points beyond the range of qs values
StatBoxplot2 <- ggproto(
"StatBoxplot2", StatBoxplot,
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
if (!is.null(data$weight)) {
mod <- quantreg::rq(y ~ 1, weights = weight, data = data,
tau = qs)
stats <- as.numeric(stats::coef(mod))
}
else {
stats <- as.numeric(stats::quantile(data$y, qs))
}
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
outliers <- data$y < stats[1] | data$y > stats[5] # change outlier definition
if (length(unique(data$x)) > 1)
width <- diff(range(data$x)) * 0.9
df <- as.data.frame(as.list(stats))
df$outliers <- list(data$y[outliers])
if (is.null(data$weight)) {
n <- sum(!is.na(data$y))
}
else {
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
}
df$notchupper <- df$middle + 1.58 * iqr/sqrt(n)
df$notchlower <- df$middle - 1.58 * iqr/sqrt(n)
df$x <- if (is.factor(data$x))
data$x[1]
else mean(range(data$x))
df$width <- width
df$relvarwidth <- sqrt(n)
df
}
)
# define geom_boxplot2() based on geom_boxplot, using stat = "boxplot2" by
# default instead of "boxplot", with a new parameter median.colour, &
# geom = GeomBoxplot2 instead of GeomBoxplot
geom_boxplot2 <- function(mapping = NULL, data = NULL, stat = "boxplot2", position = "dodge2",
..., outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL,
outlier.shape = 19, outlier.size = 1.5, outlier.stroke = 0.5,
outlier.alpha = NULL, notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
median.colour = NULL, median.color = NULL) {
if (is.character(position)) {
if (varwidth == TRUE)
position <- position_dodge2(preserve = "single")
}
else {
if (identical(position$preserve, "total") & varwidth ==
TRUE) {
warning("Can't preserve total widths when varwidth = TRUE.",
call. = FALSE)
position$preserve <- "single"
}
}
layer(data = data, mapping = mapping, stat = stat, geom = GeomBoxplot2,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(outlier.colour = outlier.color %||% outlier.colour,
outlier.fill = outlier.fill, outlier.shape = outlier.shape,
outlier.size = outlier.size, outlier.stroke = outlier.stroke,
outlier.alpha = outlier.alpha, notch = notch, notchwidth = notchwidth,
varwidth = varwidth, na.rm = na.rm,
median.colour = median.color %||% median.colour, ...))
}
# define GeomBoxplot2 based on GeomBoxplot, with draw_group function & draw_key
# functions tweaked to use median.colour for the median segment's colour, if available
GeomBoxplot2 <- ggproto(
"GeomBoxplot2",
GeomBoxplot,
draw_group = function (data, panel_params, coord, fatten = 2, outlier.colour = NULL,
outlier.fill = NULL, outlier.shape = 19, outlier.size = 1.5,
outlier.stroke = 0.5, outlier.alpha = NULL, notch = FALSE,
notchwidth = 0.5, varwidth = FALSE, median.colour = NULL) {
common <- data.frame(colour = data$colour, size = data$size,
linetype = data$linetype, fill = alpha(data$fill, data$alpha),
group = data$group, stringsAsFactors = FALSE)
whiskers <- data.frame(x = data$x, xend = data$x,
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = NA,
common, stringsAsFactors = FALSE)
box <- data.frame(xmin = data$xmin, xmax = data$xmax, ymin = data$lower,
y = data$middle, ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch,
data$notchupper, NA),
notchwidth = notchwidth, alpha = data$alpha,
common, stringsAsFactors = FALSE)
if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
outliers <- data.frame(y = data$outliers[[1]], x = data$x[1],
colour = outlier.colour %||% data$colour[1], fill = outlier.fill %||%
data$fill[1], shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1], stroke = outlier.stroke %||%
data$stroke[1], fill = NA, alpha = outlier.alpha %||%
data$alpha[1], stringsAsFactors = FALSE)
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params,
coord)
}
else {
outliers_grob <- NULL
}
if(is.null(median.colour)){
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord)))
} else {
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord),
GeomSegment$draw_panel(transform(box, x = xmin, xend = xmax, yend = y,
size = size, alpha = NA,
colour = median.colour),
panel_params,
coord)))
}
},
draw_key = function (data, params, size) {
if(is.null(params$median.colour)){
draw_key_boxplot(data, params, size)
} else {
grobTree(linesGrob(0.5, c(0.1, 0.25)),
linesGrob(0.5, c(0.75, 0.9)),
rectGrob(height = 0.5, width = 0.75),
linesGrob(c(0.125, 0.875), 0.5,
gp = gpar(col = params$median.colour)),
gp = gpar(col = data$colour,
fill = alpha(data$fill, data$alpha),
lwd = data$size * .pt,
lty = data$linetype))
}
}
)

Resources