This question is related to How to change boxplot settings when stat_summary is used, where I managed to build nice unicolor boxplots.
However, due to the "unicolor", the median segment's colour cannot be distinguished from the rest of the box. I've managed to add a black point for the median, but I prefer to add a segment instead. Here is the code:
# Data
xdf2 <- data.frame(month = rep(1:6, each = 100),
grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n = nrow(xdf2),10)
# Definition of whiskers
f <- function(x) {
r <- quantile(x, probs = c(0.10, 0.25, 0.5, 0.75, 0.90))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
# Add points outside of whiskers
o <- function(x) {
subset(x, x < quantile(x, probs=0.1) | quantile(x, probs=0.9) < x)
}
# Plot
ggplot(data = xdf2, aes(factor(month), m, colour = grp, fill = grp)) +
stat_summary(fun.data = f, geom="boxplot",
position = position_dodge(width=1), size = 2) +
stat_summary(fun.y = o, geom="point",
position = position_dodge(width = 1)) +
scale_color_manual(values = c("indianred","orange"), labels = c("AAA", "BBB")) +
scale_fill_manual(values = c("indianred", "orange"), labels = c("AAA", "BBB")) +
theme_bw() +
stat_summary(fun.y = "median", geom = "point",
position = position_dodge(width = 1), col = "black", size = 4)
And here is the graph:
I would like to add a segment by building a function which compute the parameters for geom="segment":
# Add function to compute segment parameters
s <- function(x,y,z) {
x2 <- x - z
y2 <- median(y)
x2end <- x + z
y2end <- median(y)
# assemble the named output
out <- c(x = x2, y = y2, xend = x2end, yend = y2end)
names(out) <- c("x","y","xend","yend")
out
}
and replace the geom="point" part with:
stat_summary(fun.y = s(month, m, 0.3), geom = "segment",
position = position_dodge(width = 1), col="black")
What I get is:
Error in s(month, m, 0.3) (from #2) : object 'month' not found
If I could better understand the logic of stat_summary, I could solve this problem. But I find it's not easy. If somebody could help me to solve this problem with stat_summary and geom = "segment", I would be very glad and maybe I will understand better the logic behind.
I would also appreciate alternative solutions for adding a horizontal line to mark the median.
There are so many pieces to a boxplot that it's probably worth the effort to change the underlying ggproto object, rather than recreate outliers / whiskers / boxes / median segments piece by piece, & hope they still stack well together.
Here's the result:
# Data
set.seed(123)
xdf2 <- data.frame(month = rep(1:6,each=100), grp = rep(c('A','B'), 50*6))
xdf2$m <- rpois(n=nrow(xdf2),10)
p <- ggplot(data = xdf2,
aes(factor(month), m, colour = grp, fill = grp)) +
scale_color_manual(values = c("A" = "indianred", "B" = "orange"),
labels = c("A" = "AAA", "B" = "BBB"),
aesthetics = c("color", "fill")) +
theme_bw() +
theme(legend.position = "bottom")
p +
geom_boxplot2(position = position_dodge(width = 1), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black")
Here are some more variations with different aesthetic options:
library(dplyr)
cowplot::plot_grid(
p +
labs(subtitle = paste("quantiles = c(0.05, 0.3, 0.5, 0.7, 0.95)",
"median segment color = brown",
sep = "\n")) +
geom_boxplot2(position = position_dodge(width = 0.8), size = 2,
qs = c(0.05, 0.3, 0.5, 0.7, 0.95),
median.colour = "brown"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge2, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge2(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
p %+% filter(xdf2, !(month == 2 & grp == "B")) +
labs(subtitle = paste("some data missing",
"position = dodge, preserve = single",
sep = "\n")) +
geom_boxplot2(position = position_dodge(preserve = "single"), size = 2,
qs = c(0.10, 0.25, 0.5, 0.75, 0.90),
median.colour = "black"),
nrow = 1
)
Code:
# define stat_boxplot2() based on stat_boxplot, but with boxplot quantiles (qs)
# added as a parameter (default values are same as original function), &
# stat = StatBoxplot2 instead of StatBoxplot
stat_boxplot2 <- function (
mapping = NULL, data = NULL, geom = "boxplot", position = "dodge2",
..., coef = 1.5, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
layer(data = data, mapping = mapping, stat = StatBoxplot2,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
coef = coef,
qs = qs, ...))
}
# define StatBoxplot2 based on StatBoxplot, with compute_group function tweaked
# to use qs passed from stat_boxplot2(), & outlier definition simplified to
# include all data points beyond the range of qs values
StatBoxplot2 <- ggproto(
"StatBoxplot2", StatBoxplot,
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5,
qs = c(0, 0.25, 0.5, 0.75, 1)) {
if (!is.null(data$weight)) {
mod <- quantreg::rq(y ~ 1, weights = weight, data = data,
tau = qs)
stats <- as.numeric(stats::coef(mod))
}
else {
stats <- as.numeric(stats::quantile(data$y, qs))
}
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
outliers <- data$y < stats[1] | data$y > stats[5] # change outlier definition
if (length(unique(data$x)) > 1)
width <- diff(range(data$x)) * 0.9
df <- as.data.frame(as.list(stats))
df$outliers <- list(data$y[outliers])
if (is.null(data$weight)) {
n <- sum(!is.na(data$y))
}
else {
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
}
df$notchupper <- df$middle + 1.58 * iqr/sqrt(n)
df$notchlower <- df$middle - 1.58 * iqr/sqrt(n)
df$x <- if (is.factor(data$x))
data$x[1]
else mean(range(data$x))
df$width <- width
df$relvarwidth <- sqrt(n)
df
}
)
# define geom_boxplot2() based on geom_boxplot, using stat = "boxplot2" by
# default instead of "boxplot", with a new parameter median.colour, &
# geom = GeomBoxplot2 instead of GeomBoxplot
geom_boxplot2 <- function(mapping = NULL, data = NULL, stat = "boxplot2", position = "dodge2",
..., outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL,
outlier.shape = 19, outlier.size = 1.5, outlier.stroke = 0.5,
outlier.alpha = NULL, notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
median.colour = NULL, median.color = NULL) {
if (is.character(position)) {
if (varwidth == TRUE)
position <- position_dodge2(preserve = "single")
}
else {
if (identical(position$preserve, "total") & varwidth ==
TRUE) {
warning("Can't preserve total widths when varwidth = TRUE.",
call. = FALSE)
position$preserve <- "single"
}
}
layer(data = data, mapping = mapping, stat = stat, geom = GeomBoxplot2,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(outlier.colour = outlier.color %||% outlier.colour,
outlier.fill = outlier.fill, outlier.shape = outlier.shape,
outlier.size = outlier.size, outlier.stroke = outlier.stroke,
outlier.alpha = outlier.alpha, notch = notch, notchwidth = notchwidth,
varwidth = varwidth, na.rm = na.rm,
median.colour = median.color %||% median.colour, ...))
}
# define GeomBoxplot2 based on GeomBoxplot, with draw_group function & draw_key
# functions tweaked to use median.colour for the median segment's colour, if available
GeomBoxplot2 <- ggproto(
"GeomBoxplot2",
GeomBoxplot,
draw_group = function (data, panel_params, coord, fatten = 2, outlier.colour = NULL,
outlier.fill = NULL, outlier.shape = 19, outlier.size = 1.5,
outlier.stroke = 0.5, outlier.alpha = NULL, notch = FALSE,
notchwidth = 0.5, varwidth = FALSE, median.colour = NULL) {
common <- data.frame(colour = data$colour, size = data$size,
linetype = data$linetype, fill = alpha(data$fill, data$alpha),
group = data$group, stringsAsFactors = FALSE)
whiskers <- data.frame(x = data$x, xend = data$x,
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = NA,
common, stringsAsFactors = FALSE)
box <- data.frame(xmin = data$xmin, xmax = data$xmax, ymin = data$lower,
y = data$middle, ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch,
data$notchupper, NA),
notchwidth = notchwidth, alpha = data$alpha,
common, stringsAsFactors = FALSE)
if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
outliers <- data.frame(y = data$outliers[[1]], x = data$x[1],
colour = outlier.colour %||% data$colour[1], fill = outlier.fill %||%
data$fill[1], shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1], stroke = outlier.stroke %||%
data$stroke[1], fill = NA, alpha = outlier.alpha %||%
data$alpha[1], stringsAsFactors = FALSE)
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params,
coord)
}
else {
outliers_grob <- NULL
}
if(is.null(median.colour)){
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord)))
} else {
ggplot2:::ggname(
"geom_boxplot",
grobTree(outliers_grob,
GeomSegment$draw_panel(whiskers, panel_params, coord),
GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord),
GeomSegment$draw_panel(transform(box, x = xmin, xend = xmax, yend = y,
size = size, alpha = NA,
colour = median.colour),
panel_params,
coord)))
}
},
draw_key = function (data, params, size) {
if(is.null(params$median.colour)){
draw_key_boxplot(data, params, size)
} else {
grobTree(linesGrob(0.5, c(0.1, 0.25)),
linesGrob(0.5, c(0.75, 0.9)),
rectGrob(height = 0.5, width = 0.75),
linesGrob(c(0.125, 0.875), 0.5,
gp = gpar(col = params$median.colour)),
gp = gpar(col = data$colour,
fill = alpha(data$fill, data$alpha),
lwd = data$size * .pt,
lty = data$linetype))
}
}
)
Related
This very useful post shows how to display error bars in one direction only with ggplot. To do this, the geom_errorbar function is modified as follows (as proposed by Sean Hughes):
geom_uperrorbar <- function(mapping = NULL, data = NULL,
stat = "identity", position = "identity",
...,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomUperrorbar,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
GeomUperrorbar <- ggproto("GeomUperrorbar", Geom,
default_aes = aes(colour = "black", size = 0.5, linetype = 1, width = 0.5,
alpha = NA),
draw_key = draw_key_path,
required_aes = c("x", "y", "ymax"),
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
transform(data,
xmin = x - width / 2, xmax = x + width / 2, width = NULL
)
},draw_panel = function(data, panel_scales, coord, width = NULL) {
GeomPath$draw_panel(data.frame(
x = as.vector(rbind(data$xmin, data$xmax, NA, data$x, data$x)),
y = as.vector(rbind(data$ymax, data$ymax, NA, data$ymax, data$y)),
colour = rep(data$colour, each = 5),
alpha = rep(data$alpha, each = 5),
size = rep(data$size, each = 5),
linetype = rep(data$linetype, each = 5),
group = rep(1:(nrow(data)), each = 5),
stringsAsFactors = FALSE,
row.names = 1:(nrow(data) * 5)
), panel_scales, coord)
}
)
"%||%" <- function(a, b) {
if (!is.null(a)) a else b
}
But now I have the case that the last bar is negative. Thus, the error bar at the last bar must head in the other direction. Does anyone have an idea how this could work?
Here is a reproducible example:
df <- data.frame(trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, -2),
group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2))
df2 <- df[c(1,3), ]
limits <- aes(ymax = resp + se, ymin = resp - se)
dodge <- position_dodge(width = 0.9)
p <- ggplot(df, aes(fill = group, y = resp, x = trt))
p + geom_bar(position = dodge, stat = "identity") +
geom_errorbar(limits, position = dodge, width = 0.25)
The solution in the linked question will actually produce this result. You just need to make sure that the standard error is subtracted instead of added when the bar points down:
limits <- aes(ymax = resp + se * sign(resp))
Now your plotting code is pretty much unaltered:
dodge <- position_dodge(width = 0.9)
ggplot(df, aes(fill = group, y = resp, x = trt)) +
geom_bar(position = dodge, stat = "identity") +
geom_uperrorbar(limits, position = dodge, width = 0.25)
I'm working on a ggplot2 extension that works on data.frames looking a bit like:
data <- data.frame(
type = c("text", "text", "line", "line"),
label = c("some label", "another one", NA, NA),
x = c(0,10,2,4),
y = c(0,10,3,7),
xend = c(NA, NA, 8, 10),
yend = c(NA, NA, 3, 4)
)
This means we have objects (rows) with different type. Now I want to subset the data inside my Geoms and Stats based on type.
Consider the following example (using ggplot2 standard functions):
library(ggplot2)
ggplot(data, aes(x, y)) +
geom_text(aes(label = label)) +
geom_segment(aes(xend = xend, yend = yend))
This plots what one expects (text as text and lines as segments).
Now I have my own version of geom_text called geom_var:
GeomVar <- ggproto("GeomVar", ggplot2::GeomText,
default_aes = aes(x = x, y = y, label = label, colour = "black",
size = 4, angle = 0, hjust = 0.5, vjust = 0.5,
alpha = NA, family = "Arial", fontface = 1,
lineheight = 1.2, length = 10)
)
geom_var <- function(mapping = aes(label = label), data = NULL, position = "identity",
..., parse = FALSE, nudge_x = 0, nudge_y = 0, check_overlap = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE)
{
ggplot2::layer(data = data, mapping = mapping, stat = StatVar, geom = GeomVar,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(parse = parse, check_overlap = check_overlap,
na.rm = na.rm, ...))
}
StatVar <- ggproto("StatVar", ggplot2::Stat,
compute_group = function(data, scales, length = 5) {
data$label <- sapply(data$label,
function(x) {paste0(strwrap(x, width = length),
collapse = "\n")})
data
}
)
stat_var <- function(mapping = NULL, data = NULL, geom = "var",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatVar, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
When I use my own version the plot looks like:
ggplot(data, aes(x, y)) +
geom_var(aes(label = label)) +
geom_segment(aes(xend = xend, yend = yend))
TL;DR:
How can I change my GeomVar and/or StatVar in order the NAs don't get plotted anymore?
Or: How can I subset my data based on type in my GeomVar and StatVar functions?
(I tried data <- data[data$type == "text", ] in basically every place data occurs in the GeomVar, geom_var, StatVar and stat_var functions..)
You can pass type as an aesthetic in geom_var(aes(...)), and specify the setup_data function for your StatVar to look out for it:
# define setup_data in StatVar
StatVar <- ggproto("StatVar",
ggplot2::Stat,
setup_data = function(data, params){
# print(data) # I like doing this while debugging code to see what data
# actually looks like at this point
data <- data[data$type == "text", ]
},
compute_group = function(data, scales, length = 5) {
data$label <- sapply(data$label,
function(x) {paste0(strwrap(x, width = length),
collapse = "\n")})
data
}
)
# include type as a required aesthetic mapping in GeomVar
GeomVar <- ggproto("GeomVar", ggplot2::GeomText,
required_aes = c("x", "y", "label", "type"),
default_aes = aes(x = x, y = y, label = label, colour = "black",
size = 4, angle = 0, hjust = 0.5, vjust = 0.5,
alpha = NA, family = "Arial", fontface = 1,
lineheight = 1.2, length = 10))
# map type in geom_var
ggplot(data, aes(x, y)) +
geom_var(aes(label = label, type = type)) +
geom_segment(aes(xend = xend, yend = yend))
I'd like to add a rug boxplot per group to the bottom and top of my density plot. I could not find an implementation, so I tried to manually create the boxplots and then add those with annotation_custom to the plot.
Currently there is the problem that x axes of the density plot and the boxplots do not align. I tried to extract the limits of the first plot, but could only find a way to extract the limits of the data.
The second problem is the exact y alignment of the boxplots, this should be the same as geom_rug handles it.
The third problem is to ensure that the same fill colors are used by the density and boxplots. I used a manual approach to solve this, but clearly it would be a lot more general if I do not have to specify the color in multiple places.
set.seed(123)
library(ggplot2)
library(ggpubr)
library(data.table)
Data <- data.table(x = rnorm(100),
group = rep(c("group1", "group2"), times = c(30, 70)))
# Colors for groups
colors <- c("group1" = "#66C2A5", "group2" = "#FC8D62")
p <-
ggplot(Data, aes(x = x, fill = group, color = group)) +
geom_density(alpha = 0.5) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors)
# Rugs
p +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
#-----
# Boxplots
boxplot1 <-
ggplot(Data[group %in% "group1"]) +
geom_boxplot(aes(y = x), fill = colors[["group1"]]) +
coord_flip() +
theme_transparent()
boxplot2 <-
ggplot(Data[group %in% "group2"]) +
geom_boxplot(aes(y = x), fill = colors[["group2"]]) +
coord_flip() +
theme_transparent()
boxplot1_grob <- ggplotGrob(boxplot1)
boxplot2_grob <- ggplotGrob(boxplot2)
# Place box plots inside density plot
x <- ggplot_build(p)$layout$panel_scales_x[[1]]$range$range
xmin <- x[1]
xmax <- x[2]
y <- ggplot_build(p)$layout$panel_scales_y[[1]]$range$range
ymin <- y[1]
ymax <- y[2]
yoffset <- (1/28) * ymax
xoffset <- (1/28) * xmax
# Add boxplots with annotation_custom
p2 <- p +
annotation_custom(grob = boxplot1_grob, xmin = xmin, xmax = xmax,
ymin = ymin - yoffset, ymax = ymin + yoffset) +
annotation_custom(grob = boxplot2_grob,
xmin = xmin, xmax = xmax,
ymin = ymax - yoffset, ymax = ymax + yoffset)
p2
# Alignment is not correct
p2 +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
I made something similar for practice sometime back, & have yet to test it rigorously, but it does seem to work for your use case. If anything breaks, let me know & I'll see if I can fix them:
# with boxplots only
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t")
# with both boxplots & geom_rug (check that they align exactly)
p +
geom_marginboxplot(data = Data[Data$group %in% "group1", ],
aes(y = 1), sides = "b") +
geom_marginboxplot(data = Data[Data$group %in% "group2", ],
aes(y = 1), sides = "t") +
geom_rug(data = Data[group %in% "group1"]) +
geom_rug(data = Data[group %in% "group2"], sides = "t")
The marginal boxplot's dimensions imitate those of geom_rug, occupying 3% of the plot panel's height / width. Both x & y have to be mapped in aes(), though in this case y isn't actually needed, so I assigned it the value 1 as a placeholder.
Run the following to get geom_marginboxplot:
library(ggplot2)
library(grid)
`%||%` <- function (x, y) if (is.null(x)) y else x
geom_marginboxplot <- function(mapping = NULL, data = NULL,
...,
sides = "bl",
outlier.shape = 16,
outlier.size = 1.5,
outlier.stroke = 0.5,
width = 0.9,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatMarginBoxplot,
geom = GeomMarginBoxplot,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
sides = sides,
outlier.shape = outlier.shape,
outlier.size = outlier.size,
outlier.stroke = outlier.stroke,
width = width,
notch = FALSE,
notchwidth = 0.5,
varwidth = FALSE,
na.rm = na.rm,
...
)
)
}
StatMarginBoxplot <- ggproto(
"StatMarginBoxplot", Stat,
optional_aes = c("x", "y"),
non_missing_aes = "weight",
setup_data = function(data, params,
sides = "bl") {
if(grepl("l|r", sides)){
data.vertical <- data
data.vertical$orientation <- "vertical"
} else data.vertical <- data.frame()
if(grepl("b|t", sides)){
data.horizontal <- data
data.horizontal$y <- data.horizontal$x
data.horizontal$orientation <- "horizontal"
} else data.horizontal <- data.frame()
data <- remove_missing(rbind(data.vertical,
data.horizontal),
na.rm = FALSE, vars = "x",
"stat_boxplot")
data
},
compute_group = function(data, scales, sides = "bl",
width = 0.9, na.rm = FALSE, coef = 1.5){
if(grepl("l|r", sides)){
df.vertical <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "vertical", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.vertical <- df.vertical[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.vertical$orientation = "vertical"
} else df.vertical <- data.frame()
if(grepl("b|t", sides)){
df.horizontal <- do.call(environment(StatBoxplot$compute_group)$f,
args = list(data = data[data$orientation == "horizontal", ],
scales = scales, width = width,
na.rm = na.rm, coef = coef))
df.horizontal <- df.horizontal[, c("ymin", "lower", "middle", "upper", "ymax", "outliers")]
df.horizontal$orientation = "horizontal"
} else df.horizontal <- data.frame()
df <- rbind(df.vertical, df.horizontal)
colnames(df) <- gsub("^y", "", colnames(df))
df
}
)
GeomMarginBoxplot <- ggproto(
"GeomMarginBoxplot", Geom,
setup_data = function(data, params, sides = "bl") {
data.vertical <- data[data$orientation == "vertical", ]
if(nrow(data.vertical) > 0) {
colnames(data.vertical)[1:6] <- paste0("y", colnames(data.vertical)[1:6])
}
data.horizontal <- data[data$orientation == "horizontal", ]
if(nrow(data.horizontal) > 0){
colnames(data.horizontal)[1:6] <- paste0("x", colnames(data.horizontal)[1:6])
}
data <- merge(data.vertical, data.horizontal, all = TRUE)
data <- data[, sapply(data, function(x) !all(is.na(x)))]
data
},
draw_group = function(data, panel_params, coord, fatten = 2,
outlier.shape = 19, outlier.stroke = 0.5,
outlier.size = 1.5, width = 0.9,
notch = FALSE, notchwidth = 0.5, varwidth = FALSE,
sides = "bl") {
draw.marginal.box <- function(sides){
if(sides %in% c("l", "b")){
pos1 <- unit(0, "npc"); pos2 <- unit(0.03, "npc")
} else {
pos2 <- unit(0.97, "npc"); pos1 <- unit(1, "npc")
}
if(width > 0 & width < 1){
increment <- (1 - width) / 2
increment <- increment * (pos2 - pos1)
pos1 <- pos1 + increment
pos2 <- pos2 - increment
}
pos3 <- 0.5 * pos1 + 0.5 * pos2
outliers_grob <- NULL
if(sides %in% c("l", "r")) {
data <- data[data$orientation == "vertical", ]
if (!is.null(data$youtliers) && length(data$youtliers[[1]] >= 1)) {
outliers <- data.frame(
y = unlist(data$youtliers[[1]]),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- rep(pos3, nrow(coords))
y.pos <- unit(coords$y, "native")
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
y = c(data$ymin, data$ylower, data$ymiddle, data$yupper, data$ymax),
x = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
x0 = rep(pos3, 2),
x1 = rep(pos3, 2),
y0 = unit(c(box.whiskers$y[1], box.whiskers$y[5]), "native"),
y1 = unit(c(box.whiskers$y[2], box.whiskers$y[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
x = pos1,
y = unit(box.whiskers$y[4], "native"),
width = pos2 - pos1,
height = unit(box.whiskers$y[4] - box.whiskers$y[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
x0 = rep(pos1, 2),
x1 = rep(pos2, 2),
y0 = unit(box.whiskers$y[3], "native"),
y1 = unit(box.whiskers$y[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
if(sides %in% c("b", "t")) {
data <- data[data$orientation == "horizontal", ]
if (!is.null(data$xoutliers) && length(data$xoutliers[[1]] >= 1)) {
outliers <- data.frame(
x = unlist(data$xoutliers[[1]]),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
shape = outlier.shape %||% data$shape[1],
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
coords <- coord$transform(outliers, panel_params)
x.pos <- unit(coords$x, "native")
y.pos <- rep(pos3, nrow(coords))
outliers_grob <- pointsGrob(
x = x.pos, y = y.pos,
pch = coords$shape,
gp = gpar(col = coords$colour,
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * .pt + coords$stroke * .stroke/2,
lwd = coords$stroke * .stroke/2))
}
box.whiskers <- data.frame(
x = c(data$xmin, data$xlower, data$xmiddle, data$xupper, data$xmax),
y = 0,
colour = data$colour[1],
fill = data$fill[1],
size = data$size[1],
alpha = data$alpha[1],
stringsAsFactors = FALSE
)
box.whiskers <- coord$transform(box.whiskers, panel_params)
whiskers_grob <- segmentsGrob(
y0 = rep(pos3, 2),
y1 = rep(pos3, 2),
x0 = unit(c(box.whiskers$x[1], box.whiskers$x[5]), "native"),
x1 = unit(c(box.whiskers$x[2], box.whiskers$x[4]), "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
box_grob <- rectGrob(
y = pos2,
x = unit(box.whiskers$x[2], "native"),
height = pos2 - pos1,
width = unit(box.whiskers$x[4] - box.whiskers$x[2], "native"),
just = c("left", "top"),
gp = gpar(col = box.whiskers$colour,
fill = alpha(box.whiskers$fill, box.whiskers$alpha),
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
median_grob <- segmentsGrob(
y0 = rep(pos1, 2),
y1 = rep(pos2, 2),
x0 = unit(box.whiskers$x[3], "native"),
x1 = unit(box.whiskers$x[3], "native"),
gp = gpar(col = box.whiskers$colour,
lwd = box.whiskers$size * .pt,
lty = box.whiskers$linetype))
}
grobTree(outliers_grob,
whiskers_grob,
box_grob,
median_grob)
}
result <- list()
if(grepl("l", sides)) result$l <- draw.marginal.box("l")
if(grepl("r", sides)) result$r <- draw.marginal.box("r")
if(grepl("b", sides)) result$b <- draw.marginal.box("b")
if(grepl("t", sides)) result$t <- draw.marginal.box("t")
gTree(children = do.call("gList", result))
},
draw_key = draw_key_boxplot,
default_aes = aes(weight = 1, colour = "grey20", fill = "white",
size = 0.5, stroke = 0.5,
alpha = 0.75, shape = 16, linetype = "solid",
sides = "bl"),
optional_aes = c("lower", "upper", "middle", "min", "max")
)
Session info: R 3.5.1, ggplot2 3.0.0.
I want to draw a graph which is familiar to the enterotype plot in the research. But my new multiple-ggproto seems terrible as showed in p1, owing to the missing backgroup color of the label. I've tried multiple variations of this, for example modify GeomLabel$draw_panel in order to reset the default arguments of geom in ggplot2::ggproto. However, I could not find the labelGrob() function which is removed in ggplot2 and grid package. Thus, the solution of modification didn't work. How to modify the backgroup color of label in the multiple-ggproto. Any ideas? Thanks in advance. Here is my code and two pictures.
p1: the background color of label should be white or the text color should be black.
P2:displays the wrong point color, line color and legend.
geom_enterotype <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity",
alpha = 0.3, prop = 0.5, ..., lineend = "butt", linejoin = "round",
linemitre = 1, arrow = NULL, na.rm = FALSE, parse = FALSE,
nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, "lines"),
label.r = unit(0.15, "lines"), label.size = 0.1,
show.legend = TRUE, inherit.aes = TRUE) {
library(ggplot2)
# create new stat and geom for PCA scatterplot with ellipses
StatEllipse <- ggproto("StatEllipse", Stat,
required_aes = c("x", "y"),
compute_group = function(., data, scales, level = 0.75, segments = 51, ...) {
library(MASS)
dfn <- 2
dfd <- length(data$x) - 1
if (dfd < 3) {
ellipse <- rbind(c(NA, NA))
} else {
v <- cov.trob(cbind(data$x, data$y))
shape <- v$cov
center <- v$center
radius <- sqrt(dfn * qf(level, dfn, dfd))
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
ellipse <- t(center + radius * t(unit.circle %*% chol(shape)))
}
ellipse <- as.data.frame(ellipse)
colnames(ellipse) <- c("x", "y")
return(ellipse)
})
# write new ggproto
GeomEllipse <- ggproto("GeomEllipse", Geom,
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1)
return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
grid::pathGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = grid::gpar(col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha), lwd = first_rows$size * .pt, lty = first_rows$linetype))
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, alpha = NA, prop = 0.5),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_path
)
# create a new stat for PCA scatterplot with lines which totally directs to the center
StatConline <- ggproto("StatConline", Stat,
compute_group = function(data, scales) {
library(miscTools)
library(MASS)
df <- data.frame(data$x,data$y)
mat <- as.matrix(df)
center <- cov.trob(df)$center
names(center)<- NULL
mat_insert <- insertRow(mat, 2, center )
for(i in 1:nrow(mat)) {
mat_insert <- insertRow( mat_insert, 2*i, center )
next
}
mat_insert <- mat_insert[-c(2:3),]
rownames(mat_insert) <- NULL
mat_insert <- as.data.frame(mat_insert,center)
colnames(mat_insert) =c("x","y")
return(mat_insert)
},
required_aes = c("x", "y")
)
# create a new stat for PCA scatterplot with center labels
StatLabel <- ggproto("StatLabel" ,Stat,
compute_group = function(data, scales) {
library(MASS)
df <- data.frame(data$x,data$y)
center <- cov.trob(df)$center
names(center)<- NULL
center <- t(as.data.frame(center))
center <- as.data.frame(cbind(center))
colnames(center) <- c("x","y")
rownames(center) <- NULL
return(center)
},
required_aes = c("x", "y")
)
layer1 <- layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...))
layer2 <- layer(stat = StatEllipse, data = data, mapping = mapping, geom = GeomEllipse, position = position, show.legend = FALSE,
inherit.aes = inherit.aes, params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...))
layer3 <- layer(data = data, mapping = mapping, stat = StatConline, geom = GeomPath,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(lineend = lineend, linejoin = linejoin,
linemitre = linemitre, arrow = arrow, na.rm = na.rm, ...))
if (!missing(nudge_x) || !missing(nudge_y)) {
if (!missing(position)) {
stop("Specify either `position` or `nudge_x`/`nudge_y`",
call. = FALSE)
}
position <- position_nudge(nudge_x, nudge_y)
}
layer4 <- layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel,
position = position, show.legend = FALSE, inherit.aes = inherit.aes,
params = list(parse = parse, label.padding = label.padding,
label.r = label.r, label.size = label.size, na.rm = na.rm, ...))
return(list(layer1,layer2,layer3,layer4))
}
# data
data(Cars93, package = "MASS")
car_df <- Cars93[, c(3, 5, 13:15, 17, 19:25)]
car_df <- subset(car_df, Type == "Large" | Type == "Midsize" | Type == "Small")
x1 <- mean(car_df$Price) + 2 * sd(car_df$Price)
x2 <- mean(car_df$Price) - 2 * sd(car_df$Price)
car_df <- subset(car_df, Price > x2 | Price < x1)
car_df <- na.omit(car_df)
# Principal Component Analysis
car.pca <- prcomp(car_df[, -1], scale = T)
car.pca_pre <- cbind(as.data.frame(predict(car.pca)[, 1:2]), car_df[, 1])
colnames(car.pca_pre) <- c("PC1", "PC2", "Type")
xlab <- paste("PC1(", round(((car.pca$sdev[1])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
ylab <- paste("PC2(", round(((car.pca$sdev[2])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
head(car.pca_pre)
#plot
library(ggplot2)
p1 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color= Type ,label = Type)) +
geom_enterotype()
p2 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , label = Type)) +
geom_enterotype()
You can manually change the colour scale to give it more emphasis against the background fill colour:
p3 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color = Type, label = Type)) +
geom_enterotype() +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p3
You can additionally adjust your fill colours by changing the alpha values, or assigning different colour values to give better contrast to your labels.
p4 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, fill = Type, colour = Type)) +
geom_enterotype() +
scale_fill_manual(values = alpha(c("pink", "lightgreen", "skyblue"), 1)) +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p4
Finally, if you want a background white colour to your labels, you have to remove the fill option. You can also additionally assign a shape value.
As you can observe, the background text colour is associated with the shape fill colour, while the text label colour is associated with the line colour, the the shape border colour.
p5 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, colour = Type)) +
geom_enterotype() + scale_colour_manual(values = c("red4", "green4", "blue4"))
p5
How can increase the alpha of the fill of violin plots but not the alpha of the boundary line?
Changing alpha as an argument to geom_violin() results in both the fill and line changing.
Here's what can be done if you wish to avoid plotting twice. Since the introduction of the extension mechanism we can easily modify the existing source code to define our own geoms.
First we need to check what's going on in geom_violin. The actual plotting is done with GeomPolygon$draw_panel(newdata, ...). So the trick is to tinker with geom_polygon. The modification required is really simple: in the plotting block
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = alpha(first_rows$colour, first_rows$alpha),
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
just replace colour specification to col = first_rows$colour.
Alright, we're good to go. Just declare our custom geom_violin2, borrowing the code from original source and applying several ad-hoc fixes.
library(grid)
GeomPolygon2 <- ggproto("GeomPolygon2", Geom,
draw_panel = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1) return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
ggplot2:::ggname("geom_polygon",
polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = gpar(
col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
)
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1,
alpha = NA),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon
)
`%||%` <- function (a, b)
{
if (!is.null(a))
a
else b
}
GeomViolin2 <- ggproto("GeomViolin", Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
plyr::ddply(data, "group", transform,
xmin = x - width / 2,
xmax = x + width / 2
)
},
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x)
)
newdata <- rbind(
plyr::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
)
newdata <- rbind(newdata, newdata[1,])
if (length(draw_quantiles) > 0) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[
rep(1, nrow(quantiles)),
setdiff(names(data), c("x", "y")),
drop = FALSE
]
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_violin", grobTree(
GeomPolygon2$draw_panel(newdata, ...),
quantile_grob)
)
} else {
ggplot2:::ggname("geom_violin", GeomPolygon2$draw_panel(newdata, ...))
}
},
draw_key = draw_key_polygon,
default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"),
required_aes = c("x", "y")
)
geom_violin2 <- function(mapping = NULL, data = NULL, stat = "ydensity",
draw_quantiles = NULL, position = "dodge",
trim = TRUE, scale = "area",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomViolin2,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
draw_quantiles = draw_quantiles,
na.rm = na.rm,
...
)
)
}
Now behold! The colours are questionable, I admit. But you can clearly see that the border is not affected by alpha.
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_violin2(alpha = 0.7, size = 3, colour = "blue", fill = "red")