Related
I have a plot made with ggplot where the legends adds extra black points to all the other legends (see image).
library(tidyverse)
library(ggnewscale)
set.seed(12345)
brks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
fd = expand.grid(x = seq(6,16, length.out = 100),
y = seq(6,18, length.out = 100))
fd$z = sample(x = seq(0,1, length.out = 100), size = nrow(fd), replace = T)
df.t = data.frame(s = LETTERS[1:5], l = c(11,12,8,15,14), d = c(13,10,7,16,8))
mypal = data.frame(A = "black", B = "red",C = "blue", D = "green", E = "yellow")
summmmmmmm = expand.grid(s = LETTERS[1:5],
yr = 1995:2012)
summmmmmmm$yr = as.factor(summmmmmmm$yr)
summmmmmmm$l = NA
summmmmmmm$d = NA
summmmmmmm[summmmmmmm$s == "A","l"] = rnorm(n = 18, mean = 11, sd = .5)
summmmmmmm[summmmmmmm$s == "B","l"] = rnorm(n = 18, mean = 12, sd = .5)
summmmmmmm[summmmmmmm$s == "C","l"] = rnorm(n = 18, mean = 8, sd = .5)
summmmmmmm[summmmmmmm$s == "D","l"] = rnorm(n = 18, mean = 15, sd = .5)
summmmmmmm[summmmmmmm$s == "E","l"] = rnorm(n = 18, mean = 14, sd = .5)
summmmmmmm[summmmmmmm$s == "A","d"] = rnorm(n = 18, mean = 13, sd = .5)
summmmmmmm[summmmmmmm$s == "B","d"] = rnorm(n = 18, mean = 10, sd = .5)
summmmmmmm[summmmmmmm$s == "C","d"] = rnorm(n = 18, mean = 8, sd = .5)
summmmmmmm[summmmmmmm$s == "D","d"] = rnorm(n = 18, mean = 16, sd = .5)
summmmmmmm[summmmmmmm$s == "E","d"] = rnorm(n = 18, mean = 9, sd = .5)
ggplot(data = fd, mapping = aes(x = x, y = y, z = z)) +
geom_contour_filled(breaks = brks)+
geom_point(data = df.t,
mapping = aes(x = l, y = d, color = s), inherit.aes = FALSE, size = 5) +
scale_fill_manual(values = alpha(hcl.colors(100, "YlOrRd", rev = TRUE, alpha = 1), .99))+
scale_color_manual(values = alpha(mypal,1),
name = "obj")+
new_scale_color() +
geom_point(data = summmmmmmm,
mapping = aes(x = l, y = d,
color = yr, group = s),
shape = 19,
inherit.aes = FALSE,
show.legend = TRUE) +
geom_path(data = summmmmmmm[order(summmmmmmm$yr),],
mapping = aes(x = l, y = d, color = yr,
group = as.factor(s)), inherit.aes = FALSE,
show.legend = FALSE) +
scale_color_viridis_d(name = "time")
I'd like to get rid of those extra points. Also, I like the 'time' legend to be in 2 columns, but not the other legends. Is there a way to do this?
You need to use guide = guide_legend(ncol = 2) in your viridis scale to get two columns.
You can set show.legend = c(colour = TRUE, fill = FALSE) in the second point layer, to specifically show the legend in colour scales but not in fill scales.
See example below (where I've renamed summmmmmmm to df for my own sanity)
ggplot(data = fd, mapping = aes(x = x, y = y, z = z)) +
geom_contour_filled(breaks = brks)+
geom_point(
data = df.t,
mapping = aes(x = l, y = d, color = s), inherit.aes = FALSE, size = 5
) +
scale_fill_manual(
values = alpha(hcl.colors(100, "YlOrRd", rev = TRUE, alpha = 1), .99)
)+
scale_color_manual(values = alpha(mypal,1), name = "obj")+
new_scale_color() +
geom_point(
data = df,
mapping = aes(x = l, y = d, color = yr, group = s),
shape = 19, inherit.aes = FALSE,
show.legend = c(colour = TRUE, fill = FALSE)
) +
geom_path(
data = df[order(df$yr),],
mapping = aes(x = l, y = d, color = yr, group = as.factor(s)),
inherit.aes = FALSE, show.legend = FALSE
) +
scale_color_viridis_d(name = "time", guide = guide_legend(ncol = 2))
This very useful post shows how to display error bars in one direction only with ggplot. To do this, the geom_errorbar function is modified as follows (as proposed by Sean Hughes):
geom_uperrorbar <- function(mapping = NULL, data = NULL,
stat = "identity", position = "identity",
...,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomUperrorbar,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
GeomUperrorbar <- ggproto("GeomUperrorbar", Geom,
default_aes = aes(colour = "black", size = 0.5, linetype = 1, width = 0.5,
alpha = NA),
draw_key = draw_key_path,
required_aes = c("x", "y", "ymax"),
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
transform(data,
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
},draw_panel = function(data, panel_scales, coord, width = NULL) {
GeomPath$draw_panel(data.frame(
x = as.vector(rbind(data$xmin, data$xmax, NA, data$x, data$x)),
y = as.vector(rbind(data$ymax, data$ymax, NA, data$ymax, data$y)),
colour = rep(data$colour, each = 5),
alpha = rep(data$alpha, each = 5),
size = rep(data$size, each = 5),
linetype = rep(data$linetype, each = 5),
group = rep(1:(nrow(data)), each = 5),
stringsAsFactors = FALSE,
row.names = 1:(nrow(data) * 5)
), panel_scales, coord)
}
)
"%||%" <- function(a, b) {
if (!is.null(a)) a else b
}
But now I have the case that the last bar is negative. Thus, the error bar at the last bar must head in the other direction. Does anyone have an idea how this could work?
Here is a reproducible example:
df <- data.frame(trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, -2),
group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2))
df2 <- df[c(1,3), ]
limits <- aes(ymax = resp + se, ymin = resp - se)
dodge <- position_dodge(width = 0.9)
p <- ggplot(df, aes(fill = group, y = resp, x = trt))
p + geom_bar(position = dodge, stat = "identity") +
geom_errorbar(limits, position = dodge, width = 0.25)
The solution in the linked question will actually produce this result. You just need to make sure that the standard error is subtracted instead of added when the bar points down:
limits <- aes(ymax = resp + se * sign(resp))
Now your plotting code is pretty much unaltered:
dodge <- position_dodge(width = 0.9)
ggplot(df, aes(fill = group, y = resp, x = trt)) +
geom_bar(position = dodge, stat = "identity") +
geom_uperrorbar(limits, position = dodge, width = 0.25)
I'd like to add a rug boxplot per group to the bottom and top of my density plot. I could not find an implementation, so I tried to manually create the boxplots and then add those with annotation_custom to the plot.
Currently there is the problem that x axes of the density plot and the boxplots do not align. I tried to extract the limits of the first plot, but could only find a way to extract the limits of the data.
The second problem is the exact y alignment of the boxplots, this should be the same as geom_rug handles it.
The third problem is to ensure that the same fill colors are used by the density and boxplots. I used a manual approach to solve this, but clearly it would be a lot more general if I do not have to specify the color in multiple places.
set.seed(123)
library(ggplot2)
library(ggpubr)
library(data.table)
Data <- data.table(x = rnorm(100),
group = rep(c("group1", "group2"), times = c(30, 70)))
# Colors for groups
colors <- c("group1" = "#66C2A5", "group2" = "#FC8D62")
p <-
ggplot(Data, aes(x = x, fill = group, color = group)) +
geom_density(alpha = 0.5) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors)
# Rugs
p +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
#-----
# Boxplots
boxplot1 <-
ggplot(Data[group %in% "group1"]) +
geom_boxplot(aes(y = x), fill = colors[["group1"]]) +
coord_flip() +
theme_transparent()
boxplot2 <-
ggplot(Data[group %in% "group2"]) +
geom_boxplot(aes(y = x), fill = colors[["group2"]]) +
coord_flip() +
theme_transparent()
boxplot1_grob <- ggplotGrob(boxplot1)
boxplot2_grob <- ggplotGrob(boxplot2)
# Place box plots inside density plot
x <- ggplot_build(p)$layout$panel_scales_x[[1]]$range$range
xmin <- x[1]
xmax <- x[2]
y <- ggplot_build(p)$layout$panel_scales_y[[1]]$range$range
ymin <- y[1]
ymax <- y[2]
yoffset <- (1/28) * ymax
xoffset <- (1/28) * xmax
# Add boxplots with annotation_custom
p2 <- p +
annotation_custom(grob = boxplot1_grob, xmin = xmin, xmax = xmax,
ymin = ymin - yoffset, ymax = ymin + yoffset) +
annotation_custom(grob = boxplot2_grob,
xmin = xmin, xmax = xmax,
ymin = ymax - yoffset, ymax = ymax + yoffset)
p2
# Alignment is not correct
p2 +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
I made something similar for practice sometime back, & have yet to test it rigorously, but it does seem to work for your use case. If anything breaks, let me know & I'll see if I can fix them:
# with boxplots only
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t")
# with both boxplots & geom_rug (check that they align exactly)
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t") +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
The marginal boxplot's dimensions imitate those of geom_rug, occupying 3% of the plot panel's height / width. Both x & y have to be mapped in aes(), though in this case y isn't actually needed, so I assigned it the value 1 as a placeholder.
Run the following to get geom_marginboxplot:
library(ggplot2)
library(grid)
`%||%` <- function (x, y) if (is.null(x)) y else x
geom_marginboxplot <- function(mapping = NULL, data = NULL,
...,
sides = "bl",
outlier.shape = 16,
outlier.size = 1.5,
outlier.stroke = 0.5,
width = 0.9,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatMarginBoxplot,
geom = GeomMarginBoxplot,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
sides = sides,
outlier.shape = outlier.shape,
outlier.size = outlier.size,
outlier.stroke = outlier.stroke,
width = width,
notch = FALSE,
notchwidth = 0.5,
varwidth = FALSE,
na.rm = na.rm,
...
)
)
}
StatMarginBoxplot <- ggproto(
"StatMarginBoxplot", Stat,
optional_aes = c("x", "y"),
non_missing_aes = "weight",
setup_data = function(data, params,
sides = "bl") {
if(grepl("l|r", sides)){
data.vertical <- data
data.vertical$orientation <- "vertical"
} else data.vertical <- data.frame()
if(grepl("b|t", sides)){
data.horizontal <- data
data.horizontal$y <- data.horizontal$x
data.horizontal$orientation <- "horizontal"
} else data.horizontal <- data.frame()
data <- remove_missing(rbind(data.vertical,
data.horizontal),
na.rm = FALSE, vars = "x",
"stat_boxplot")
data
},
compute_group = function(data, scales, sides = "bl",
width = 0.9, na.rm = FALSE, coef = 1.5){
if(grepl("l|r", sides)){
df.vertical <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "vertical", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.vertical <- df.vertical[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.vertical$orientation = "vertical"
} else df.vertical <- data.frame()
if(grepl("b|t", sides)){
df.horizontal <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "horizontal", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.horizontal <- df.horizontal[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.horizontal$orientation = "horizontal"
} else df.horizontal <- data.frame()
df <- rbind(df.vertical, df.horizontal)
colnames(df) <- gsub("^y", "", colnames(df))
df
}
)
GeomMarginBoxplot <- ggproto(
"GeomMarginBoxplot", Geom,
setup_data = function(data, params, sides = "bl") {
data.vertical <- data[data$orientation == "vertical", ]
if(nrow(data.vertical) > 0) {
colnames(data.vertical)[1:6] <- paste0("y", colnames(data.vertical)[1:6])
}
data.horizontal <- data[data$orientation == "horizontal", ]
if(nrow(data.horizontal) > 0){
colnames(data.horizontal)[1:6] <- paste0("x", colnames(data.horizontal)[1:6])
}
data <- merge(data.vertical, data.horizontal, all = TRUE)
data <- data[, sapply(data, function(x) !all(is.na(x)))]
data
},
draw_group = function(data, panel_params, coord, fatten = 2,
outlier.shape = 19, outlier.stroke = 0.5,
outlier.size = 1.5, width = 0.9,
notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
sides = "bl") {
draw.marginal.box <- function(sides){
if(sides %in% c("l", "b")){
pos1 <- unit(0, "npc"); pos2 <- unit(0.03, "npc")
} else {
pos2 <- unit(0.97, "npc"); pos1 <- unit(1, "npc")
}
if(width > 0 & width < 1){
increment <- (1 - width) / 2
increment <- increment * (pos2 - pos1)
pos1 <- pos1 + increment
pos2 <- pos2 - increment
}
pos3 <- 0.5 * pos1 + 0.5 * pos2
outliers_grob <- NULL
if(sides %in% c("l", "r")) {
data <- data[data$orientation == "vertical", ]
if (!is.null(data$youtliers) && length(data$youtliers[[1]] >= 1)) {
outliers <- data.frame(
y = unlist(data$youtliers[[1]]),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- rep(pos3, nrow(coords))
y.pos <- unit(coords$y, "native")
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
y = c(data$ymin, data$ylower, data$ymiddle, data$yupper, data$ymax),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
x0 = rep(pos3, 2),
x1 = rep(pos3, 2),
y0 = unit(c(box.whiskers$y[1], box.whiskers$y[5]), "native"),
y1 = unit(c(box.whiskers$y[2], box.whiskers$y[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
x = pos1,
y = unit(box.whiskers$y[4], "native"),
width = pos2 - pos1,
height = unit(box.whiskers$y[4] - box.whiskers$y[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
x0 = rep(pos1, 2),
x1 = rep(pos2, 2),
y0 = unit(box.whiskers$y[3], "native"),
y1 = unit(box.whiskers$y[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
if(sides %in% c("b", "t")) {
data <- data[data$orientation == "horizontal", ]
if (!is.null(data$xoutliers) && length(data$xoutliers[[1]] >= 1)) {
outliers <- data.frame(
x = unlist(data$xoutliers[[1]]),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- unit(coords$x, "native")
y.pos <- rep(pos3, nrow(coords))
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
x = c(data$xmin, data$xlower, data$xmiddle, data$xupper, data$xmax),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
y0 = rep(pos3, 2),
y1 = rep(pos3, 2),
x0 = unit(c(box.whiskers$x[1], box.whiskers$x[5]), "native"),
x1 = unit(c(box.whiskers$x[2], box.whiskers$x[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
y = pos2,
x = unit(box.whiskers$x[2], "native"),
height = pos2 - pos1,
width = unit(box.whiskers$x[4] - box.whiskers$x[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
y0 = rep(pos1, 2),
y1 = rep(pos2, 2),
x0 = unit(box.whiskers$x[3], "native"),
x1 = unit(box.whiskers$x[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
grobTree(outliers_grob,
whiskers_grob,
box_grob,
median_grob)
}
result <- list()
if(grepl("l", sides)) result$l <- draw.marginal.box("l")
if(grepl("r", sides)) result$r <- draw.marginal.box("r")
if(grepl("b", sides)) result$b <- draw.marginal.box("b")
if(grepl("t", sides)) result$t <- draw.marginal.box("t")
gTree(children = do.call("gList", result))
},
draw_key = draw_key_boxplot,
default_aes = aes(weight = 1, colour = "grey20", fill = "white",
size = 0.5, stroke = 0.5,
alpha = 0.75, shape = 16, linetype = "solid",
sides = "bl"),
optional_aes = c("lower", "upper", "middle", "min", "max")
)
Session info: R 3.5.1, ggplot2 3.0.0.
I've created these split half violin plots using ggplot.
However, instead of including the boxplot, which shows the median, I'd like to include a horizontal line with the mean.
This means each colored half would have its own mean line: the gold half would have a mean line which would not exactly align with the mean line on the grey half. Importantly, I'd like the mean line to reside only inside the density plot.
How can I achieve this? I can't figure it out and I'd appreciate any help!
Here's some example data:
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000,
1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000))
)
Here's the extension for geom_violin to create split_geom_violin:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Here's the code for the graph:
library(ggplot2)
ggplot(my_data, aes(x, y, fill=m)) +
geom_split_violin(trim = TRUE) +
geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0) +
labs(x=NULL,y="GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels=c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(values=c("#E69F00", "#999999"),
name="Survey\nPart",
breaks=c("1", "2"),
labels=c("Time 1", "Time 5"))
You can use stat_summary & geom_crossbar while setting all fun.y, fun.ymin & fun.ymax to mean only
library(ggplot2)
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin(trim = TRUE) +
stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar",
width = 0.25,
position = position_dodge(width = .25),
) +
labs(x = NULL, y = "GM Attitude Score") +
theme_classic() +
theme(text = element_text(size = 20)) +
scale_x_discrete(labels = c("0" = "Control\nCondition", "1" = "GM\nCondition")) +
scale_fill_manual(
values = c("#E69F00", "#999999"),
name = "Survey\nPart",
breaks = c("1", "2"),
labels = c("Time 1", "Time 5")
)
Data & function used:
set.seed(20160229)
my_data <- data.frame(
y = c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x = c(rep("a", 2000), rep("b", 2000)),
m = c(rep("i", 1000), rep("j", 2000), rep("i", 1000))
)
GeomSplitViolin <- ggproto(
"GeomSplitViolin",
GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
grp <- data[1, "group"]
newdata <- plyr::arrange(
transform(data, x = if (grp %% 2 == 1) xminv else xmaxv),
if (grp %% 2 == 1) y else -y
)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname(
"geom_split_violin",
grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob)
)
} else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
geom_split_violin <- function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "identity", ...,
draw_quantiles = NULL,
trim = TRUE,
scale = "area",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomSplitViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm, ...
)
)
}
Created on 2018-07-08 by the reprex package (v0.2.0.9000).
This question is related to How to change boxplot settings when stat_summary is used, where I managed to build nice unicolor boxplots.
However, due to the "unicolor", the median segment's colour cannot be distinguished from the rest of the box. I've managed to add a black point for the median, but I prefer to add a segment instead. Here is the code:
# Data
xdf2 <- data.frame(month = rep(1:6, each = 100),
grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n = nrow(xdf2),10)
# Definition of whiskers
f <- function(x) {
r <- quantile(x, probs = c(0.10, 0.25, 0.5, 0.75, 0.90))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# Add points outside of whiskers
o <- function(x) {
subset(x, x < quantile(x, probs=0.1) | quantile(x, probs=0.9) < x)
}
# Plot
ggplot(data = xdf2, aes(factor(month), m, colour = grp, fill = grp)) +
stat_summary(fun.data = f, geom="boxplot",
position = position_dodge(width=1), size = 2) +
stat_summary(fun.y = o, geom="point",
position = position_dodge(width = 1)) +
scale_color_manual(values = c("indianred","orange"), labels = c("AAA", "BBB")) +
scale_fill_manual(values = c("indianred", "orange"), labels = c("AAA", "BBB")) +
theme_bw() +
stat_summary(fun.y = "median", geom = "point",
position = position_dodge(width = 1), col = "black", size = 4)
And here is the graph:
I would like to add a segment by building a function which compute the parameters for geom="segment":
# Add function to compute segment parameters
s <- function(x,y,z) {
x2 <- x - z
y2 <- median(y)
x2end <- x + z
y2end <- median(y)
# assemble the named output
out <- c(x = x2, y = y2, xend = x2end, yend = y2end)
names(out) <- c("x","y","xend","yend")
out
}
and replace the geom="point" part with:
stat_summary(fun.y = s(month, m, 0.3), geom = "segment",
position = position_dodge(width = 1), col="black")
What I get is:
Error in s(month, m, 0.3) (from #2) : object 'month' not found
If I could better understand the logic of stat_summary, I could solve this problem. But I find it's not easy. If somebody could help me to solve this problem with stat_summary and geom = "segment", I would be very glad and maybe I will understand better the logic behind.
I would also appreciate alternative solutions for adding a horizontal line to mark the median.
There are so many pieces to a boxplot that it's probably worth the effort to change the underlying ggproto object, rather than recreate outliers / whiskers / boxes / median segments piece by piece, & hope they still stack well together.
Here's the result:
# Data
set.seed(123)
xdf2 <- data.frame(month = rep(1:6,each=100), grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n=nrow(xdf2),10)
p <- ggplot(data = xdf2,
aes(factor(month), m, colour = grp, fill = grp)) +
scale_color_manual(values = c("A" = "indianred", "B" = "orange"),
labels = c("A" = "AAA", "B" = "BBB"),
aesthetics = c("color", "fill")) +
theme_bw() +
theme(legend.position = "bottom")
p +
geom_boxplot2(position = position_dodge(width = 1), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black")
Here are some more variations with different aesthetic options:
library(dplyr)
cowplot::plot_grid(
p +
labs(subtitle = paste("quantiles = c(0.05, 0.3, 0.5, 0.7, 0.95)",
"median segment color = brown",
sep = "\n")) +
geom_boxplot2(position = position_dodge(width = 0.8), size = 2,
qs = c(0.05, 0.3, 0.5, 0.7, 0.95),
median.colour = "brown"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge2, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge2(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
nrow = 1
)
Code:
# define stat_boxplot2() based on stat_boxplot, but with boxplot quantiles (qs)
# added as a parameter (default values are same as original function), &
# stat = StatBoxplot2 instead of StatBoxplot
stat_boxplot2 <- function (
mapping = NULL, data = NULL, geom = "boxplot", position = "dodge2",
..., coef = 1.5, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
layer(data = data, mapping = mapping, stat = StatBoxplot2,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
coef = coef,
qs = qs, ...))
}
# define StatBoxplot2 based on StatBoxplot, with compute_group function tweaked
# to use qs passed from stat_boxplot2(), & outlier definition simplified to
# include all data points beyond the range of qs values
StatBoxplot2 <- ggproto(
"StatBoxplot2", StatBoxplot,
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
if (!is.null(data$weight)) {
mod <- quantreg::rq(y ~ 1, weights = weight, data = data,
tau = qs)
stats <- as.numeric(stats::coef(mod))
}
else {
stats <- as.numeric(stats::quantile(data$y, qs))
}
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
outliers <- data$y < stats[1] | data$y > stats[5] # change outlier definition
if (length(unique(data$x)) > 1)
width <- diff(range(data$x)) * 0.9
df <- as.data.frame(as.list(stats))
df$outliers <- list(data$y[outliers])
if (is.null(data$weight)) {
n <- sum(!is.na(data$y))
}
else {
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
}
df$notchupper <- df$middle + 1.58 * iqr/sqrt(n)
df$notchlower <- df$middle - 1.58 * iqr/sqrt(n)
df$x <- if (is.factor(data$x))
data$x[1]
else mean(range(data$x))
df$width <- width
df$relvarwidth <- sqrt(n)
df
}
)
# define geom_boxplot2() based on geom_boxplot, using stat = "boxplot2" by
# default instead of "boxplot", with a new parameter median.colour, &
# geom = GeomBoxplot2 instead of GeomBoxplot
geom_boxplot2 <- function(mapping = NULL, data = NULL, stat = "boxplot2", position = "dodge2",
..., outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL,
outlier.shape = 19, outlier.size = 1.5, outlier.stroke = 0.5,
outlier.alpha = NULL, notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
median.colour = NULL, median.color = NULL) {
if (is.character(position)) {
if (varwidth == TRUE)
position <- position_dodge2(preserve = "single")
}
else {
if (identical(position$preserve, "total") & varwidth ==
TRUE) {
warning("Can't preserve total widths when varwidth = TRUE.",
call. = FALSE)
position$preserve <- "single"
}
}
layer(data = data, mapping = mapping, stat = stat, geom = GeomBoxplot2,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(outlier.colour = outlier.color %||% outlier.colour,
outlier.fill = outlier.fill, outlier.shape = outlier.shape,
outlier.size = outlier.size, outlier.stroke = outlier.stroke,
outlier.alpha = outlier.alpha, notch = notch, notchwidth = notchwidth,
varwidth = varwidth, na.rm = na.rm,
median.colour = median.color %||% median.colour, ...))
}
# define GeomBoxplot2 based on GeomBoxplot, with draw_group function & draw_key
# functions tweaked to use median.colour for the median segment's colour, if available
GeomBoxplot2 <- ggproto(
"GeomBoxplot2",
GeomBoxplot,
draw_group = function (data, panel_params, coord, fatten = 2, outlier.colour = NULL,
outlier.fill = NULL, outlier.shape = 19, outlier.size = 1.5,
outlier.stroke = 0.5, outlier.alpha = NULL, notch = FALSE,
notchwidth = 0.5, varwidth = FALSE, median.colour = NULL) {
common <- data.frame(colour = data$colour, size = data$size,
linetype = data$linetype, fill = alpha(data$fill, data$alpha),
group = data$group, stringsAsFactors = FALSE)
whiskers <- data.frame(x = data$x, xend = data$x,
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = NA,
common, stringsAsFactors = FALSE)
box <- data.frame(xmin = data$xmin, xmax = data$xmax, ymin = data$lower,
y = data$middle, ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch,
data$notchupper, NA),
notchwidth = notchwidth, alpha = data$alpha,
common, stringsAsFactors = FALSE)
if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
outliers <- data.frame(y = data$outliers[[1]], x = data$x[1],
colour = outlier.colour %||% data$colour[1], fill = outlier.fill %||%
data$fill[1], shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1], stroke = outlier.stroke %||%
data$stroke[1], fill = NA, alpha = outlier.alpha %||%
data$alpha[1], stringsAsFactors = FALSE)
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params,
coord)
}
else {
outliers_grob <- NULL
}
if(is.null(median.colour)){
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord)))
} else {
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord),
GeomSegment$draw_panel(transform(box, x = xmin, xend = xmax, yend = y,
size = size, alpha = NA,
colour = median.colour),
panel_params,
coord)))
}
},
draw_key = function (data, params, size) {
if(is.null(params$median.colour)){
draw_key_boxplot(data, params, size)
} else {
grobTree(linesGrob(0.5, c(0.1, 0.25)),
linesGrob(0.5, c(0.75, 0.9)),
rectGrob(height = 0.5, width = 0.75),
linesGrob(c(0.125, 0.875), 0.5,
gp = gpar(col = params$median.colour)),
gp = gpar(col = data$colour,
fill = alpha(data$fill, data$alpha),
lwd = data$size * .pt,
lty = data$linetype))
}
}
)