I would like to chance the distances between groups in an alluvial diagram using ggplot2 and ggalluvial
my example is from https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html
data(vaccinations)
levels(vaccinations$response) <- rev(levels(vaccinations$response))
ggplot(vaccinations,
aes(x = survey, stratum = response, alluvium = subject,
y = freq,
fill = response, label = response)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow() +
geom_stratum(alpha = .5) +
geom_text(stat = "stratum", size = 3) +
theme(legend.position = "none") +
ggtitle("vaccination survey responses at three points in time")
gives me:
Now I would like to shorten the distance between ms153_NSA while keeping the same distance between ms432_NSA and ms460_NSA:
I tried to use following (without success):
https://rdrr.io/cran/ggalluvial/man/stat_flow.html
How to increase the space between the bars in a bar plot in ggplot2?
Even google does not show me an asymmetrical alluvial diagram: :'-/
Here's one way to hack it
Data manipulation:
# get layer data calculated by ggalluvial, & shift 2nd position x leftwards by desired amount
library(dplyr)
p <- ggplot(vaccinations,
aes(x = survey, stratum = response, alluvium = subject,
y = freq,
fill = response, label = response)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow() +
theme(legend.position = "none") +
ggtitle("vaccination survey responses at three points in time")
shift.amt = 0.2
new.df <- layer_data(p) %>%
mutate(xmin = ifelse(x == 2, xmin - shift.amt, xmin),
xmax = ifelse(x == 2, xmax - shift.amt, xmax),
x = ifelse(x == 2, x - shift.amt, x))
Plot:
library(ggforce) # needed for geom_diagonal_wide
ggplot(new.df, aes(fill = fill)) +
# recreate each geom layer using the modified data
geom_diagonal_wide(data = . %>%
select(alluvium, fill, side, xmin, xmax, ymin, ymax) %>%
group_by(alluvium,) %>%
summarise(fill = fill[side == "start"],
x = list(c(xmax[side == "start"], xmin[side == "end"],
xmin[side == "end"], xmax[side == "start"])),
y = list(c(ymax[side == "start"], ymax[side == "end"],
ymin[side == "end"], ymin[side == "start"]))) %>%
tidyr::unnest(),
aes(x = x, y = y, group = alluvium),
alpha = 0.5) +
geom_rect(data = . %>% group_by(x, stratum, fill) %>%
summarise(xmin = min(xmin), xmax = max(xmax),
ymin = min(ymin), ymax = max(ymax)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
alpha = 0.5, colour = "black") +
geom_text(data = . %>% group_by(x, stratum, label) %>%
summarise(y = mean(range(y))),
aes(x = x, y = y, label = label),
inherit.aes = FALSE, size = 3) +
# recreate original x-axis breaks/labels, fill values, & axis titles
scale_x_continuous(breaks = sort(unique(new.df$x)),
labels = layer_scales(p)$x$get_labels()) +
scale_fill_identity() + # by default, this won't create a legend
labs(title = "vaccination survey responses at three points in time",
x = p$labels$x, y = p$labels$y)
Related
I have a dataframe with with a value measured with an instrument at four locations over consecutive time periods. I wish to have the night time (6pm to 6am) shaded grey to help with interpretation. The code below plots this correctly except i also want to use facet_grid(..., scales = 'free_x') to drop the unwanted time periods when the instrument was not collecting data at each location.
require(ggplot2)
df.long <- data.frame(Timestamp = seq.POSIXt(as.POSIXct("2018-07-05 18:00:00", format = '%Y-%m-%d %H:%M:%S'), by = 'hour', length.out = 192),
Location = c(rep('A', 48), rep('B', 48), rep('C', 48), rep('D', 48)),
value = sin(seq(1:192)/4))
shade <- data.frame(dusk = seq.POSIXt(as.POSIXct("2018-07-05 18:00:00", format = '%Y-%m-%d %H:%M:%S'), by = 'day', length.out = 8),
dawn = seq.POSIXt(as.POSIXct("2018-07-06 06:00:00", format = '%Y-%m-%d %H:%M:%S'), by = 'day', length.out = 8),
top = Inf,
bottom = -Inf)
ggplot(df.long) +
geom_rect(data = shade,
aes(xmin = dusk, xmax = dawn,ymin = bottom, ymax = top),
fill = 'light grey', alpha = 0.5) +
geom_line(aes(x = Timestamp, y = value, col = Location)) +
geom_point(aes(x = Timestamp, y = value, fill = Location), pch = 21) +
facet_grid( ~ Location, scales = 'free_x') +
ylab('Flux (mg O2 m-2 h-1)') +
theme_bw()
The X axis is scaled correctly with facet_grid(..., scales = 'free_x') when I exclude the call for geom_rect. How do i go about plotting the shaded geom_rect on the second plot without the X-axis scales expanding?
ggplot(df.long) +
# geom_rect(data = shade, aes(xmin = dusk, xmax = dawn,
# ymin = bottom, ymax = top), fill = 'light grey', alpha = 0.5) +
geom_line(aes(x = Timestamp, y = value, col = Location)) +
geom_point(aes(x = Timestamp, y = value, fill = Location), pch = 21) +
facet_grid( ~ Location, scales = 'free_x') +
ylab('Flux (mg O2 m-2 h-1)') +
theme_bw() +
theme(legend.position = 'none')
I have also tried substituting geom_rect with:
annotate("rect",
xmin = shade$dusk, xmax = shade$dawn, ymin = shade$bottom, ymax = shade$top,
fill = "light grey", alpha = 0.5) +
One possible approach is to define only the relevant shading rectangles for each facet:
library(dplyr)
shade2 <- shade %>%
# replicate shade for each facet
slice(rep(seq(1, n()),
times = n_distinct(df.long$Location))) %>%
mutate(Location = rep(sort(unique(df.long$Location)),
each = n()/4)) %>%
# calculate actual x-range associated with each facet, & join with shade
left_join(df.long %>%
group_by(Location) %>%
summarise(xmin = min(Timestamp),
xmax = max(Timestamp)) %>%
ungroup(),
by = "Location") %>%
# for each shade facet, keep only rows within relevant x-range
filter(dusk >= xmin & dawn <= xmax)
ggplot(df.long) +
geom_rect(data = shade2, # replace shade with shade2, everything else is unchanged
aes(xmin = dusk, xmax = dawn,
ymin = bottom, ymax = top),
fill = 'light grey', alpha = 0.5) +
geom_line(aes(x = Timestamp, y = value, col = Location)) +
geom_point(aes(x = Timestamp, y = value, fill = Location), pch = 21) +
facet_grid( ~ Location, scales = 'free_x') +
ylab('Flux (mg O2 m-2 h-1)') +
theme_bw()
I'm new to ggplot and I'm trying to create this graph:
But actually, I'm just stuck here:
This is my code :
ggplot(diamonds) +
aes(x = carat, group = cut) +
geom_line(stat = "density", size = 1) +
theme_grey() +
facet_wrap(~cut, nrow = 5, strip.position = "right") +
geom_boxplot(aes())
Does someone know what I can do next?
Edit: As of ggplot2 3.3.0, this can be done in ggplot2 without any extension package.
Under the package's news, under new features:
All geoms and stats that had a direction (i.e. where the x and y axes
had different interpretation), can now freely choose their direction,
instead of relying on coord_flip(). The direction is deduced from
the aesthetic mapping, but can also be specified directly with the new
orientation argument (#thomasp85, #3506).
The following will now work directly (replacing all references to geom_boxploth / stat_boxploth in the original answer with geom_boxplot / stat_boxplot:
library(ggplot2)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal boxplots & density plots
geom_boxplot(aes(fill = cut)) +
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete()
Original answer
This can be done easily with a horizontal boxplot geom_boxploth() / stat_boxploth(), found in the ggstance package:
library(ggstance)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal box plot
geom_boxploth(aes(fill = cut)) +
# normal density plot
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxploth(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
# reproduce original chart's color scale (o/w ordered factors will result
# in viridis scale by default, using the current version of ggplot2)
scale_fill_discrete()
If you are limited to the ggplot2 package for one reason or another, it can still be done, but it would be less straightforward, since geom_boxplot() and geom_density() go in different directions.
Alternative 1: calculate the box plot's coordinates, & flip them manually before passing the results to ggplot(). Add a density layer in the normal way:
library(dplyr)
library(tidyr)
p.box <- ggplot(diamonds, aes(x = cut, y = carat)) + geom_boxplot()
p.box.data <- layer_data(p.box) %>%
select(x, ymin, lower, middle, upper, ymax, outliers) %>%
mutate(cut = factor(x, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-x)
ggplot(p.box.data) +
# manually plot flipped boxplot
geom_segment(aes(x = ymin, xend = ymax, y = -0.5, yend = -0.5)) +
geom_rect(aes(xmin = lower, xmax = upper, ymin = -0.75, ymax = -0.25, fill = cut),
color = "black") +
geom_point(data = . %>% unnest(outliers),
aes(x = outliers, y = -0.5)) +
# vertical lines at Q1 / Q2 / Q3
geom_vline(data = . %>% select(cut, lower, middle, upper) %>% gather(key, value, -cut),
aes(xintercept = value)) +
# density plot
geom_density(data = diamonds, aes(x = carat)) +
facet_grid(cut ~ .) +
labs(x = "carat") +
scale_fill_discrete()
Alternative 2: calculate the density plot's coordinates, & flip them manually before passing the results to ggplot(). Add a box plot layer in the normal way. Flip the whole chart:
p.density <- ggplot(diamonds, aes(x = carat, group = cut)) + geom_density()
p.density.data <- layer_data(p.density) %>%
select(x, y, group) %>%
mutate(cut = factor(group, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-group)
p.density.data <- p.density.data %>%
rbind(p.density.data %>%
group_by(cut) %>%
filter(x == min(x)) %>%
mutate(y = 0) %>%
ungroup())
ggplot(diamonds, aes(x = -0.5, y = carat)) +
# manually flipped density plot
geom_polygon(data = p.density.data, aes(x = y, y = x),
fill = NA, color = "black") +
# box plot
geom_boxplot(aes(fill = cut, group = cut)) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "hline", aes(yintercept = ..lower..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..middle..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..upper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete() +
coord_flip()
Maybe this will help. Although need little upgrade :)
library(tidyverse)
library(magrittr)
library(wrapr)
subplots <-
diamonds$cut %>%
unique() %>%
tibble(Cut = .) %>%
mutate(rn = row_number() - 1) %$%
map2(
.x = Cut,
.y = rn,
~annotation_custom(ggplotGrob(
diamonds %>%
filter(cut == .x) %.>%
ggplot(data = .) +
aes(x = carat, fill = cut) +
annotation_custom(ggplotGrob(
ggplot(data = .) +
geom_boxplot(
aes(x = -1, y = carat),
fill = .y + 1
) +
coord_flip() +
theme_void() +
theme(plot.margin = margin(t = 20))
)) +
geom_line(stat = 'density', size = 1) +
theme_void() +
theme(plot.margin = margin(t = .y * 100 + 10, b = (4 - .y) * 100 + 40))
))
)
ggplot() + subplots
I use an example from here. My question is how can I add a specific bounding box to this heatmap, such as add a red line box to the top left four tiles?
require(ggplot2)
require(reshape)
require(scales)
mydf <- data.frame(industry = c('all industries','steel','cars'),
'all regions' = c(250,150,100), americas = c(150,90,60),
europe = c(150,60,40), check.names = FALSE)
mydf
mymelt <- melt(mydf, id.var = c('industry'))
mymelt
ggplot(mymelt, aes(x = industry, y = variable, fill = value)) +
geom_tile() + geom_text(aes(fill = mymelt$value, label = mymelt$value))
A quick and dirty (some hard-coding) possibility is to use geom_rect, where the positions are given by the numerical values of the levels of x and y variables to be bound with a box, plus/minus an offset.
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = 1 - 0.5, xmax = 2 + 0.5, ymin = 2 - 0.5, ymax = 3 + 0.5),
fill = "transparent", color = "red", size = 1.5)
A less hard-coded version:
# convert x and y variables to factors
ind <- as.factor(mymelt$industry)
vars <- as.factor(mymelt$variable)
# numeric version of the levels to be bound by a box
xmin <- unique(as.numeric(ind[ind == "all industries"]))
xmax <- unique(as.numeric(ind[ind == "cars"]))
ymin <- unique(as.numeric(vars[vars == "americas"]))
ymax <- unique(as.numeric(vars[vars == "europe"]))
# set offset
offset <- 0.5
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = xmin - offset,
xmax = xmax + offset,
ymin = ymin - offset,
ymax = ymax + offset),
fill = "transparent", color = "red", size = 1.5)
This is my dataset example:
df <- data.frame(group = rep(c("group1","group2","group3", "group4", "group5", "group6"), each=3),
X = paste(letters[1:18]),
Y = c(1:18))
As you can see, there are three variables, two of them categorical (group and X). I have constructed a line chart using ggplot2 where the X axis is X and Y axis is Y.
I want to shade the background using the group variable, so that 6 different colors must appear.
I tried this code:
ggplot(df, aes(x = X, y = Y)) +
geom_rect(xmin = 0, xmax = 3, ymin = -0.5, ymax = Inf,
fill = 'blue', alpha = 0.05) +
geom_point(size = 2.5)
But geom_rect() only colorize the area between 0 and 3, in the X axis.
I guess I can do it manually by replicating the the geom_rect() so many times as groups I have. But I am sure there must be a more beautiful code using the variable itself. Any idea?
To get shading for the entire graph, geom_rect needs the xmin and xmax locations for all the rectangles, so these need to be provided by mapping xmin and xmax to columns in the data, rather than hard-coding them.
ggplot(df, aes(x = X, y = Y)) +
geom_rect(aes(xmin = X, xmax = dplyr::lead(X), ymin = -0.5, ymax = Inf, fill = group),
alpha = 0.5) +
geom_point(size = 2.5) +
theme_classic()
Here is one way:
df2 <- df %>% mutate(Xn=as.numeric(X))
ggplot(df2) +
geom_rect(aes(xmin=Xn-.5, xmax=Xn+.5, ymin=-Inf, ymax=Inf, fill = group), alpha=0.5, stat="identity") +
geom_point(aes(x = Xn, y = Y), size = 2.5) + scale_x_continuous(breaks=df2$Xn, labels=df2$X)
This will get you close - need to add a couple columns to your data frame. Using dplyr here.
df <- df %>%
group_by(group) %>%
mutate(xmin = sort(X)[1],
xmax = sort(X, decreasing = T)[1])
ggplot(df, aes(x = X, y = Y)) +
geom_point(size = 2.5) +
geom_rect(aes(xmin=xmin, xmax = xmax, fill = group), ymin = -0.5, ymax = Inf,
alpha = 0.05)
I use an example from here. My question is how can I add a specific bounding box to this heatmap, such as add a red line box to the top left four tiles?
require(ggplot2)
require(reshape)
require(scales)
mydf <- data.frame(industry = c('all industries','steel','cars'),
'all regions' = c(250,150,100), americas = c(150,90,60),
europe = c(150,60,40), check.names = FALSE)
mydf
mymelt <- melt(mydf, id.var = c('industry'))
mymelt
ggplot(mymelt, aes(x = industry, y = variable, fill = value)) +
geom_tile() + geom_text(aes(fill = mymelt$value, label = mymelt$value))
A quick and dirty (some hard-coding) possibility is to use geom_rect, where the positions are given by the numerical values of the levels of x and y variables to be bound with a box, plus/minus an offset.
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = 1 - 0.5, xmax = 2 + 0.5, ymin = 2 - 0.5, ymax = 3 + 0.5),
fill = "transparent", color = "red", size = 1.5)
A less hard-coded version:
# convert x and y variables to factors
ind <- as.factor(mymelt$industry)
vars <- as.factor(mymelt$variable)
# numeric version of the levels to be bound by a box
xmin <- unique(as.numeric(ind[ind == "all industries"]))
xmax <- unique(as.numeric(ind[ind == "cars"]))
ymin <- unique(as.numeric(vars[vars == "americas"]))
ymax <- unique(as.numeric(vars[vars == "europe"]))
# set offset
offset <- 0.5
ggplot(mymelt, aes(x = industry, y = variable, fill = value, label = value)) +
geom_tile() +
geom_text() +
geom_rect(aes(xmin = xmin - offset,
xmax = xmax + offset,
ymin = ymin - offset,
ymax = ymax + offset),
fill = "transparent", color = "red", size = 1.5)