Adding boxplot below density plot - r

I'm new to ggplot and I'm trying to create this graph:
But actually, I'm just stuck here:
This is my code :
ggplot(diamonds) +
aes(x = carat, group = cut) +
geom_line(stat = "density", size = 1) +
theme_grey() +
facet_wrap(~cut, nrow = 5, strip.position = "right") +
geom_boxplot(aes())
Does someone know what I can do next?

Edit: As of ggplot2 3.3.0, this can be done in ggplot2 without any extension package.
Under the package's news, under new features:
All geoms and stats that had a direction (i.e. where the x and y axes
had different interpretation), can now freely choose their direction,
instead of relying on coord_flip(). The direction is deduced from
the aesthetic mapping, but can also be specified directly with the new
orientation argument (#thomasp85, #3506).
The following will now work directly (replacing all references to geom_boxploth / stat_boxploth in the original answer with geom_boxplot / stat_boxplot:
library(ggplot2)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal boxplots & density plots
geom_boxplot(aes(fill = cut)) +
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete()
Original answer
This can be done easily with a horizontal boxplot geom_boxploth() / stat_boxploth(), found in the ggstance package:
library(ggstance)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal box plot
geom_boxploth(aes(fill = cut)) +
# normal density plot
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxploth(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
# reproduce original chart's color scale (o/w ordered factors will result
# in viridis scale by default, using the current version of ggplot2)
scale_fill_discrete()
If you are limited to the ggplot2 package for one reason or another, it can still be done, but it would be less straightforward, since geom_boxplot() and geom_density() go in different directions.
Alternative 1: calculate the box plot's coordinates, & flip them manually before passing the results to ggplot(). Add a density layer in the normal way:
library(dplyr)
library(tidyr)
p.box <- ggplot(diamonds, aes(x = cut, y = carat)) + geom_boxplot()
p.box.data <- layer_data(p.box) %>%
select(x, ymin, lower, middle, upper, ymax, outliers) %>%
mutate(cut = factor(x, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-x)
ggplot(p.box.data) +
# manually plot flipped boxplot
geom_segment(aes(x = ymin, xend = ymax, y = -0.5, yend = -0.5)) +
geom_rect(aes(xmin = lower, xmax = upper, ymin = -0.75, ymax = -0.25, fill = cut),
color = "black") +
geom_point(data = . %>% unnest(outliers),
aes(x = outliers, y = -0.5)) +
# vertical lines at Q1 / Q2 / Q3
geom_vline(data = . %>% select(cut, lower, middle, upper) %>% gather(key, value, -cut),
aes(xintercept = value)) +
# density plot
geom_density(data = diamonds, aes(x = carat)) +
facet_grid(cut ~ .) +
labs(x = "carat") +
scale_fill_discrete()
Alternative 2: calculate the density plot's coordinates, & flip them manually before passing the results to ggplot(). Add a box plot layer in the normal way. Flip the whole chart:
p.density <- ggplot(diamonds, aes(x = carat, group = cut)) + geom_density()
p.density.data <- layer_data(p.density) %>%
select(x, y, group) %>%
mutate(cut = factor(group, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-group)
p.density.data <- p.density.data %>%
rbind(p.density.data %>%
group_by(cut) %>%
filter(x == min(x)) %>%
mutate(y = 0) %>%
ungroup())
ggplot(diamonds, aes(x = -0.5, y = carat)) +
# manually flipped density plot
geom_polygon(data = p.density.data, aes(x = y, y = x),
fill = NA, color = "black") +
# box plot
geom_boxplot(aes(fill = cut, group = cut)) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "hline", aes(yintercept = ..lower..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..middle..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..upper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete() +
coord_flip()

Maybe this will help. Although need little upgrade :)
library(tidyverse)
library(magrittr)
library(wrapr)
subplots <-
diamonds$cut %>%
unique() %>%
tibble(Cut = .) %>%
mutate(rn = row_number() - 1) %$%
map2(
.x = Cut,
.y = rn,
~annotation_custom(ggplotGrob(
diamonds %>%
filter(cut == .x) %.>%
ggplot(data = .) +
aes(x = carat, fill = cut) +
annotation_custom(ggplotGrob(
ggplot(data = .) +
geom_boxplot(
aes(x = -1, y = carat),
fill = .y + 1
) +
coord_flip() +
theme_void() +
theme(plot.margin = margin(t = 20))
)) +
geom_line(stat = 'density', size = 1) +
theme_void() +
theme(plot.margin = margin(t = .y * 100 + 10, b = (4 - .y) * 100 + 40))
))
)
ggplot() + subplots

Related

ggplot2 add sum to chart

Using mtcars as an example, I've produced some violin plots. I wanted to add two things to this chart:
for each group, list n
for each group, sum a third variable (e.g. wt)
I can do (1) with the geom_text code below although (n) is actually plotted on the x axis rather than off to the side.
But I can't work out how to do (2).
Any help much appreciated!
library(ggplot2)
library(gridExtra)
library(ggthemes)
result <- mtcars
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(stat = "count", aes(label = ..count.., y = ..count..))
You can add both of these annotations by creating them in your dataframe temporarily prior to graphing. Using the dplyr package, you can create two new columns, one with the count for each group, and one with the sum of wt for each group. This can then be piped directly into your ggplot using %>% (alternatively, you could save the new dataset and insert it into ggplot the way you have it). Then with some minor edits to your geom_text call and adding a second one, we can create the plot you want. The code looks like this:
library(ggplot2)
library(gridExtra)
library(ggthemes)
library(magrittr)
library(dplyr)
result <- mtcars
result %>%
group_by(gear) %>%
mutate(count = n(), sum_wt = sum(wt)) %>%
ggplot(aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(aes(label = paste0("n = ", count),
x = (gear + 0.25),
y = 4.75)) +
geom_text(aes(label = paste0("sum wt = ", sum_wt),
x = (gear - 0.25),
y = 4.75))
The new graph looks like this:
Alternatively, if you create a summary data frame named result_sum, then you can manually add that into the geom_text calls.
result <- mtcars %>%
mutate(gear = factor(as.character(gear)))
result_sum <- result %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) +
theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count),
x = (as.numeric(gear) + 0.25),
y = 4.75)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", sum_wt),
x = (as.numeric(gear) - 0.25),
y = 4.75))
This gives you this:
The benefit to this second method is that the text isn't bold like in the first graph. The bold effect occurs in the first graph due to the text being printed over itself for all observations in the dataframe.
Thanks to those who helped.... I used this in the end which plots the calculated values, one set of classes being text based so using vjust to position the vertical offset.
thanks again!
library(ggplot2)
library(gridExtra)
library(ggthemes)
results <- mtcars
results$gear <- as.factor(as.character(results$gear)) #Turn 'gear' to text to simulate classes, then factorise
result_sum <- results %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(results, aes(x = gear, y = drat, group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count), x = (gear), vjust= 0, y = 5.25)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", round(sum_wt,0)), x = (gear), vjust= -2, y = 5.25))

ggplot2 - using different types for color scales

I would like to use color scale independently, i.e. for continous and categorical scale.
I have a specific application in mind where I have a simple plot like here
mtcars %>%
mutate(cylcol = factor(if_else(cyl == 6, "six", "not six"))) %>%
ggplot(aes(x = mpg, y = wt)) +
geom_point(aes(color = drat)) +
facet_wrap(~ cyl)
But I would like also highlight border, like in the answer of Seth_P, of a specific condition (I dont't want to use fill background of a facet!). For example
mtcars %>%
mutate(cylcol = factor(if_else(cyl == 6, "six", "not six"))) %>%
ggplot(aes(x = mpg, y = wt)) +
geom_point() +
geom_rect(xmin = -Inf, xmax = Inf, show.legend = FALSE,
ymin = -Inf, ymax = Inf, aes(col = cylcol), fill = "#00000000") +
facet_wrap(~ cyl)
Now I would like to "combine" these two, like that for example:
mtcars %>%
mutate(cylcol = factor(if_else(cyl == 6, "six", "not six"))) %>%
ggplot(aes(x = mpg, y = wt)) +
geom_point(aes(color = drat)) +
geom_rect(xmin = -Inf, xmax = Inf, show.legend = FALSE,
ymin = -Inf, ymax = Inf, aes(col = cylcol), fill = "#00000000") +
facet_wrap(~ cyl)
This yields an error Error: Discrete value supplied to continuous scale. This makes on one hand sense, but on the other hand, since both use independent variables I would like to use "different color scales". I could use overlays, like here, where facet colors are plotted over the plot but I would very much appreciate an easier solution. I'm aware of specifying fill and color separately - but that is not the goal. I really would like to use color on different scales. Any suggestions ?
I'm not aware of any way of having discrete and continuous colour scales simultaneously. However, you can work around it by using a geom_point shape that is filled by fill rather than colour:
library(ggplot2)
library(dplyr)
mtcars %>%
mutate(cylcol = factor(if_else(cyl == 6, "six", "not six"))) %>%
ggplot(aes(x = mpg, y = wt)) +
geom_point(aes(fill = drat), shape = 21) +
geom_rect(aes(colour = cylcol), xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = Inf, fill = NA) +
facet_wrap(~ cyl)
Here's a second way, which may be a little overcomplicated, but works by splitting the data and effectively doing the faceting manually. The widths need to be adjusted manually according to the y axis label size, the legend size and the plot size.
library(ggplot2)
library(dplyr)
library(purrr)
lims <- list(x = range(mtcars$mpg), y = range(mtcars$wt))
make_plot <- function(data) {
cyl <- data$cyl[1]
if (cyl == 6) {
rec_col <- "light blue"
} else {
rec_col <- "red"
}
p <- data %>%
ggplot(aes(x = mpg, y = wt)) +
geom_point(aes(colour = drat)) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = NA, colour = rec_col) +
xlim(lims$x) + ylim(lims$y) +
facet_wrap(~ cyl)
if (cyl != 4) {
p <- p + theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(),
axis.text.y = element_blank())
}
if (cyl != 8) {
p <- p + theme(legend.position = "none")
}
if (cyl != 6) {
p <- p + theme(axis.title.x = element_text(colour = "white"))
}
p
}
mtcars %>%
split(.$cyl) %>%
map(make_plot) %>%
grid.arrange(grobs = ., layout_matrix = matrix(1:3, nrow = 1),
widths = c(0.32, 0.28, 0.4))
Finally, it's worth noting this was considered three years ago but hadley felt there wasn't enough development bandwidth.

ggplot2 and facet_grid : add highest value for each plot

I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

R - ggplot2 geom_line dodge

I am having trouble drawing "dodges" line on "dodged" stacked bars.
dt = mtcars %>% group_by(am, cyl) %>% summarise(m = mean(disp))
dt0 = dt[dt$am == 0, ]
dt1 = dt[dt$am == 1, ]
dt0 %>% ggplot(aes(factor(cyl), m, fill = factor(cyl))) + geom_bar(stat = 'identity', position = 'dodge') +
geom_point(data = dt1, aes(factor(cyl), m, colour = factor(cyl)), position=position_dodge(width=0.9), colour = 'black')
What I would like is to draw a line from the top of the stacked bar to the black points of each cyl.
dt0 %>% ggplot(aes(factor(cyl), m, fill = factor(cyl))) + geom_bar(stat = 'identity', position = 'dodge') +
geom_point(data = dt1, aes(factor(cyl), m, colour = factor(cyl)), position=position_dodge(width=0.9), colour = 'black') +
geom_line(data = dt1, aes(factor(cyl), m, colour = factor(cyl), group = 1), position=position_dodge(width=0.9), colour = 'black')
However, the position=position_dodge(width=0.9) dodge doesn't work here.
Any idea ?
This is much easier to accomplish if you reshape your summary data:
dt <- mtcars %>%
group_by(am, cyl) %>%
summarise(m = mean(disp)) %>%
spread(am, m)
cyl 0 1
* <dbl> <dbl> <dbl>
1 4 135.8667 93.6125
2 6 204.5500 155.0000
3 8 357.6167 326.0000
While "0" and "1" are poor column names, they can still be used in aes() if you quote them in backticks. The calls to position_dodge() also become unnecessary:
dt %>% ggplot(aes(x = factor(cyl), y = `0`, fill = factor(cyl))) +
geom_bar(stat = 'identity') +
geom_point(aes(x = factor(cyl), y = `1`), colour = 'black') +
geom_segment(aes(x = factor(cyl), xend = factor(cyl), y = `0`, yend = `1`))

Line height spacing for text in ggplot

I am trying to reduce the space between my long axis labels. In base R graphics I would use lheight, but is seems to have no effect in ggplot. Is there a ggplot equivalent?
Toy example to show the problem:
library("tidyverse")
df0 <- mtcars %>%
rownames_to_column("car") %>%
mutate(car = str_wrap(car, width = 10))
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip()
# has no effect
par(lheight = 0.5)
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip()
You may be looking for a combination of options. The closest to lheight is likely setting lineheight in element_text. I also made the font smaller, just to show options.
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip() +
theme(axis.text.y = element_text(lineheight = 0.5,
size = 6))
I had a same problem and I found a solution in reducing my list with: slice(1:40)
library("tidyverse")
df0 <- mtcars %>%
rownames_to_column("car") %>%
mutate(car = str_wrap(car, width = 10)) %>% slice(1:40)
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip()
# has no effect
par(lheight = 0.6)
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip()
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip() +
theme(axis.text.y = element_text(lineheight = 0.6, size = 5))
Another option is using guide_axis with n.dodge in scale_y_discrete to automatically dodge the labels like this:
library("tidyverse")
df0 <- mtcars %>%
rownames_to_column("car") %>%
mutate(car = str_wrap(car, width = 10))
ggplot(data = df0, aes(x = car, y = mpg)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_y_discrete(guide = guide_axis(n.dodge = 2)) +
theme(axis.text.y = element_text(size = 5))
Created on 2022-10-20 with reprex v2.0.2

Resources