Draw arrow between two ggplot pie charts - r

Is there a way to draw an arrow between two pie charts using coordinates from the outer circle of the two pie charts as start and end position? My arrow is drawn by trying with different x's and y's.
#pie chart 1
pie1 <- count(diamonds, cut) %>%
ggplot() +
geom_bar(aes(x = '', y = n, fill = cut), stat = 'identity', width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
#pie chart 2
pie2 <- count(diamonds, color) %>%
ggplot() +
geom_bar(aes(x = '', y = n, fill = color), stat = 'identity', width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
# Plots and arrow combined
grid.newpage()
vp_fig <- viewport() # top plot area
pushViewport(vp_fig)
grid.draw(rectGrob())
vp_pie1 <- viewport(x =.5, y= 1, width = .25, height = .25, just = c('centre', 'top')) #viewport for pie chart 1
pushViewport(vp_pie1)
grid.draw(ggplotGrob(pie1))
popViewport()
vp_pie2 <- viewport(x =.25, y= .5, width = .25, height = .25, just = c('left', 'centre')) #viewport for pie chart 2
pushViewport(vp_pie2)
grid.draw(ggplotGrob(pie2))
popViewport()
upViewport() #move to top plot area
grid.lines(x = c(.45, .37), y = c(.8, .61), arrow = arrow()) # arrow between the pie charts

Here's a possible approach.:
Step 0. Create pie charts, & convert them to a list of grobs:
pie1 <- count(diamonds, fill = cut) %>%
ggplot() +
geom_col(aes(x = '', y = n, fill = fill), width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
pie2 <- pie1 %+% count(diamonds, fill = color)
pie3 <- pie1 %+% count(diamonds, fill = clarity)
pie.list <- list(pie1 = ggplotGrob(pie1),
pie2 = ggplotGrob(pie2),
pie3 = ggplotGrob(pie3))
rm(pie1, pie2, pie3)
Step 1. Define centre coordinates / radius for each pie:
pie.coords <- data.frame(
pie = names(pie.list),
center.x = c(0, 3, 5),
center.y = c(0, 4, 2),
radius = c(1, 1.5, 0.5)
)
Step 2. Calculate the appropriate start & end arrow coordinates for each combination of pies, taking into account each pie's size (assuming each pie can have a different radius value):
arrow.coords <- expand.grid(start = pie.coords$pie,
end = pie.coords$pie,
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE) %>%
filter(start != end) %>%
left_join(pie.coords, by = c("start" = "pie")) %>%
left_join(pie.coords, by = c("end" = "pie"))
colnames(arrow.coords) <- colnames(arrow.coords) %>%
gsub(".x$", ".start", .) %>%
gsub(".y$", ".end", .)
arrow.coords <- arrow.coords %>%
mutate(delta.x = center.x.end - center.x.start,
delta.y = center.y.end - center.y.start,
distance = sqrt(delta.x^2 + delta.y^2)) %>%
mutate(start.x = center.x.start + radius.start / distance * delta.x,
start.y = center.y.start + radius.start / distance * delta.y,
end.x = center.x.end - radius.end / distance * delta.x,
end.y = center.y.end - radius.end / distance * delta.y) %>%
select(starts_with("start"),
starts_with("end")) %>%
mutate_at(vars(start, end), factor)
Step 3. Convert pie center / radius into x & y min/max coordinates:
pie.coords <- pie.coords %>%
mutate(xmin = center.x - radius,
xmax = center.x + radius,
ymin = center.y - radius,
ymax = center.y + radius)
Step 4. Define function to create an annotation_custom() layer for each pie (this is optional; I just don't want to type the same thing repeatedly for each pie):
annotation_custom_list <- function(pie.names){
result <- vector("list", length(pie.names) + 1)
for(i in seq_along(pie.names)){
pie <- pie.names[i]
result[[i]] <- annotation_custom(
grob = pie.list[[pie]],
xmin = pie.coords$xmin[pie.coords$pie == pie],
xmax = pie.coords$xmax[pie.coords$pie == pie],
ymin = pie.coords$ymin[pie.coords$pie == pie],
ymax = pie.coords$ymax[pie.coords$pie == pie])
}
# add a blank geom layer to ensure the resulting ggplot's
# scales extend sufficiently to show each pie
result[[length(result)]] <- geom_blank(
data = pie.coords %>% filter(pie %in% pie.names),
aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax)
)
return(result)
}
Step 5. Putting it all together:
ggplot() +
# plot pie grobs
annotation_custom_list(c("pie1", "pie2", "pie3")) +
# plot arrows between grobs
# (adjust the filter criteria to only plot between specific pies)
geom_segment(data = arrow.coords %>%
filter(as.integer(start) < as.integer(end)),
aes(x = start.x, y = start.y,
xend = end.x, yend = end.y),
arrow = arrow()) +
# theme_void for clean look
theme_void()

I ended up with this figure which is mostly the code of Z.Lin with a few small modifications:
Step 0
Here I have only added more pies and subsetted the datasets of the pies:
library(tidyverse)
pie1 <- count(diamonds, fill = cut) %>%
ggplot() +
geom_col(aes(x = '', y = n, fill = fill), width = 1) +
coord_polar('y', start = 0) +
scale_fill_manual(values = c('Fair'='green','Good'= 'darkgreen','Very Good'='darkblue','Premium'= 'plum','Ideal'='red'))+
theme_void() +
theme(legend.position = 'none')
pie2 <- pie1 %+% count(subset(diamonds, cut %in% c('Premium', 'Fair')), fill = cut)
pie3 <- pie1 %+% count(subset(diamonds, cut %in% c('Ideal', 'Good')), fill = cut)
pie4 <- pie1 %+% count(subset(diamonds, cut=='Premium'), fill = cut)
pie5 <- pie1 %+% count(subset(diamonds, cut=='Fair'), fill = cut)
pie6 <- pie1 %+% count(subset(diamonds, cut=='Ideal'), fill = cut)
pie7 <- pie1 %+% count(subset(diamonds, cut=='Good'), fill = cut)
pie.list <- list(pie1 = ggplotGrob(pie1),
pie2 = ggplotGrob(pie2),
pie3 = ggplotGrob(pie3),
pie4 = ggplotGrob(pie4),
pie5 = ggplotGrob(pie5),
pie6 = ggplotGrob(pie6),
pie7 = ggplotGrob(pie7))
rm(pie1, pie2, pie3, pie4, pie5, pie6, pie7)
Step 1
No fundamental modifications:
y <- c(1, (1+2*sqrt(3)), (1+4*sqrt(3))) #vector of all y
pie.coords <- data.frame(
pie = names(pie.list),
center.x = c(7,3,11,1,5,9,13),
center.y = c(y[3],y[2],y[2],y[1],y[1],y[1],y[1]),
radius = c(1,1,1,1,1,1,1)
)
Step 2
I modified the length of the arrows by multiplying with a "fudge factor" of .85 (I tried different values until the endpoint fitted with the pies). I wanted only some of the arrows between the pies so I included more filtering. I added a factor for the different colours of arrows.
arrow.coords <- expand.grid(start = pie.coords$pie,
end = pie.coords$pie,
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE) %>%
filter(start != end) %>%
filter(start %in% c('pie1', 'pie2', 'pie3')) %>%
filter(end != 'pie1') %>%
left_join(pie.coords, by = c("start" = "pie")) %>%
left_join(pie.coords, by = c("end" = "pie"))
colnames(arrow.coords) <- colnames(arrow.coords) %>%
gsub(".x$", ".start", .) %>%
gsub(".y$", ".end", .)
arrow.coords <- arrow.coords %>%
mutate(delta.x = center.x.end - center.x.start,
delta.y = center.y.end - center.y.start,
distance = sqrt(delta.x^2 + delta.y^2)) %>%
mutate(start.x = center.x.start + radius.start*.85 / distance * delta.x, #multiply with .85 to justify the arrow lengths
start.y = center.y.start + radius.start*.85 / distance * delta.y,
end.x = center.x.end - radius.end*.85 / distance * delta.x,
end.y = center.y.end - radius.end*.85 / distance * delta.y) %>%
select(starts_with("start"),
starts_with("end")) %>%
mutate_at(vars(start, end), factor) %>%
filter(start.y>end.y) %>%
filter(start.y - end.y <4 & abs(start.x-end.x)<4) %>%
mutate(arrowType = factor(paste0(start,end))) %>% #adding factor
mutate(arrowType=recode(arrowType, 'pie1pie2' = 'PremiumFair',
'pie1pie3' = 'IdealGood',
'pie2pie4' = 'Premium',
'pie3pie6' = 'Ideal',
'pie2pie5' = 'Fair',
'pie3pie7'='Good'))
Step 3 and step 4
No changes of the code of Z.Lin.
Step 5
I moved all the filtering of the arrow.coords to Step 2. I modified the formatting of the arrows (thicker and with varying colour) and added labels to the arrows. In addition I added coord_fixed(ratio = 1) to ensure that one unit of x has the same length as one unit of y.
ggplot() +
# plot pie grobs
annotation_custom_list(c("pie1", "pie2", "pie3", "pie4", "pie5", "pie6", "pie7")) +
# plot arrows between grobs
geom_segment(data = arrow.coords,
aes(x = start.x, y = start.y,
xend = end.x, yend = end.y, colour = arrowType),
arrow = arrow(), size = 3, show.legend = FALSE) +
scale_colour_manual(values = c('Fair' = 'green','Good' ='darkgreen', 'Premium'='plum','Ideal' ='red', 'PremiumFair'='plum', 'IdealGood'='red'))+
geom_label(data = arrow.coords, aes(x = (start.x+end.x)/2, y = (start.y+end.y)/2, label = arrowType), size = 8) +
coord_fixed(ratio = 1) +
theme_void() # theme_void for clean look

Related

Shading regions of a plot based on whether a condition is satisfied

I'm creating lineplots using ggplot() and geom_line() for a corridor of values that develops over time.
It may happen sometimes that the upper bound is below the lower bound (which I'll call "inversion"), and I would like to highlight regions where this happens in my plot, say by using a different background color.
Searching both Google and StackOverflow has not led me anywhere.
Here is an artificial example:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>% mutate(inversion = upper < lower)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) + geom_line(linewidth = 1) + theme_light()
This produces the following plot:
As you can see, inversion occurs in a few places, e.g. around x = 50. I would like for the plot to have a darker (say gray) background where it does, based on the inversion column already in the tibble. How can I do this?
Thank you very much for the help!
One option to achieve your desired result would be to use ggh4x::stat_difference like so. Note that to this end we have to use the wide dataset and accordingly add the lines via two geom_line.
library(ggplot2)
library(ggh4x)
ggplot(d, mapping = aes(x = x)) +
stat_difference(aes(ymin = lower, ymax = upper)) +
geom_line(aes(y = lower, color = "lower"), linewidth = 1) +
geom_line(aes(y = upper, color = "upper"), linewidth = 1) +
scale_fill_manual(values = c("+" = "transparent", "-" = "darkgrey"),
breaks = "-",
labels = "Inversion") +
theme_light() +
labs(color = "Bounds")
EDIT Of course is it also possible to draw background rects for the intersection regions. But I don't know of any out-of-the-box option, i.e. the tricky part is to compute the x values where the lines intersect which requires some effort and approximation. Here is one approach but probably not the most efficient one.
library(tidyverse)
# Compute intersection points and prepare data to draw rects
n <- 20 # Increase for a better approximation
rect <- data.frame(
x = seq(1, N, length.out = N * n)
)
# Shamefully stolen from ggh4x
rle_id <- function(x) with(rle(x), rep.int(seq_along(values), lengths))
rect <- rect |>
mutate(lower = approx(d$x, d$lower, x)[["y"]],
upper = approx(d$x, d$upper, x)[["y"]],
inversion = upper < lower,
rle = with(rle(inversion & !is.na(inversion)), rep.int(seq_along(values), lengths))
) |>
filter(inversion) |>
group_by(rle) |>
slice(c(1, n())) |>
mutate(label = c("xmin", "xmax")) |>
ungroup() |>
select(x, rle, label) |>
pivot_wider(names_from = label, values_from = x)
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = rect, aes(xmin = xmin, xmax = xmax, group = rle),
ymin = -Inf, ymax = Inf, fill = "darkgrey", alpha = .3, inherit.aes = FALSE) +
theme_light()
#> Warning: Removed 9 rows containing missing values (`geom_line()`).
Answering myself, the following worked for me in the end (also using actual data and plots grouped with facet_wrap()); h/t to #stefan, whose approach with geom_rect() I recycled:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>%
mutate(inversion = upper < lower,
inversionLag = if_else(is.na(lag(inversion)), FALSE, lag(inversion)),
inversionLead = if_else(is.na(lead(inversion)), FALSE, lead(inversion)),
inversionStart = inversion & !inversionLag,
inversionEnd = inversion & !inversionLead
)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
iS <- d %>% filter(inversionStart) %>% select(x) %>% rowid_to_column() %>% rename(iS = x)
iE <- d %>% filter(inversionEnd) %>% select(x) %>% rowid_to_column() %>% rename(iE = x)
iD <- iS %>% full_join(iE, by = c("rowid"))
g <- ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = iD, mapping = aes(xmin = iS, xmax = iE, fill = "Inversion"), ymin = -Inf, ymax = Inf, alpha = 0.3, inherit.aes = FALSE) +
scale_fill_manual(name = "Inversions", values = "darkgray") +
theme_light()
g
This gives
which is pretty much what I was after.

How to substitute x values for legends in ggplot2?

So, I am trying to build a circular barplot similiar as the one that follows:
library(tidyverse)
a <- c("Like", "Dislike", "Neutral")
b <- c("Price", "Functionality", "Functionality: missing pieces", "Durability")
n <- c(10, 0, 5, 6,5,4,19,28,4,6,9,1)
data <- data.frame(a,b,n)
ggplot(data, aes(x=b, y= n, fill = a))+geom_bar(stat="identity")+
ylim(-100, 120)+
coord_polar(start = 0)
But I would like to keep it clean and put the names of the bars in a legend box instead of above the bar. Substituting the names for numbers and then linking it to a legend would be ideal.
You could plot them as shapes:
data <- data.frame(a, b, n) %>%
mutate(b_fct = as.factor(b))
shapes <- data$b_fct %>%
levels() %>%
length() %>%
seq() %>%
as.character() %>%
map_int(utf8ToInt)
ggplot(data, aes(x = b_fct, shape = b, y = n, fill = a)) +
geom_bar(stat = "identity") +
geom_point(y = 120) +
ylim(-100, 120) +
coord_polar(start = 0) +
scale_x_discrete(labels = NULL) +
scale_shape_manual(values = shapes)
You could convert b to a factor and then abuse scale_color_manual a little:
a <- c("Like", "Dislike", "Neutral")
b <- factor(c("Price", "Functionality", "Functionality: missing pieces", "Durability"), c("Price", "Functionality", "Functionality: missing pieces", "Durability"), ordered = T)
n <- c(10, 0, 5, 6,5,4,19,28,4,6,9,1)
ggplot(data, aes(x=as.character(as.numeric(b)), y= n, fill = a, color = b))+geom_bar(stat="identity")+
ylim(-100, 120)+
coord_polar(start = 0) +
labs(x = NULL, y = NULL) +
scale_color_manual(
values = rep('#ffffff00', length(unique(b))),
labels = paste(as.numeric(b), as.character(b), sep = ': '),
guide = guide_legend(override.aes = list(fill = '#ffffff'), keywidth = 0))
Maybe something like this, using the caption feature:
a <- c("Like", "Dislike", "Neutral")
b <- as.character(seq_len(4))
n <- c(10, 0, 5, 6,5,4,19,28,4,6,9,1)
data <- data.frame(a,b,n)
ggplot(data, aes(x=b, y= n, fill = a))+geom_bar(stat="identity")+
ylim(-100, 120)+
coord_polar(start = 0) +
labs(caption=paste((paste(seq_len(4), c("Price", "Functionality", "Functionality: missing pieces", "Durability"), sep=": ")), collapse="\n")) +
theme (plot.caption=element_text(hjust=0))

Discrete legend breaks in ggplot2 [duplicate]

This question already has answers here:
Create discrete color bar with varying interval widths and no spacing between legend levels
(5 answers)
Closed last year.
I'd like to break the legend into categories rather than having a continuous range of colours. Could someone kindly help me for the specific example I am using here? Below is my current trial with colour breaks at 40, 60 and 80. Thank you very much!
library(raster)
library(ggplot2)
library(maptools)
data("wrld_simpl")
#sample raster
r <- raster(ncol=10, nrow=20)
r[] <- 1:ncell(r)
extent(r) <- extent(c(-180, 180, -70, 70))
#plotting
var_df <- as.data.frame(rasterToPoints(r))
p <- ggplot() +
geom_polygon(data = wrld_simpl[wrld_simpl#data$UN!="10",],
aes(x = long, y = lat, group = group),
colour = "black", fill = "grey")
p <- p + geom_raster(data = var_df, aes(x = x, y = y, fill = layer))
p <- p + coord_equal() + theme_bw() +labs(x="", y="")
p <- p + theme(legend.key=element_blank(),
axis.text.y =element_text(size=16),
axis.text.x =element_text(size=16),
legend.text =element_text(size=12),
legend.title=element_text(size=12))
# p <- p + scale_fill_gradientn(colours = rev(terrain.colors(10)))
p <- p + scale_colour_manual(values = c("red", "blue", "green","yellow"),
breaks = c("40", "60", "80", max(var_df$layer)),
labels = c("1-40", "40-60", "60-80", "80+"))
p <- p + geom_polygon(data = wrld_simpl[wrld_simpl#data$UN!="10",],
aes(x = long, y = lat, group = group),
colour = "black", fill = NA)
p
Current continuous legend:
Example of legend with breaks:
Here you go. I took the plot_discrete_cbar() function written by #AF7 from here
library(raster)
library(ggplot2)
library(maptools)
# Plot discrete colorbar function
plot_discrete_cbar = function (
# Vector of breaks. If +-Inf are used, triangles will be added to the sides of the color bar
breaks,
palette = "Greys", # RColorBrewer palette to use
# Alternatively, manually set colors
colors = RColorBrewer::brewer.pal(length(breaks) - 1, palette),
direction = 1, # Flip colors? Can be 1 or -1
spacing = "natural", # Spacing between labels. Can be "natural" or "constant"
border_color = NA, # NA = no border color
legend_title = NULL,
legend_direction = "horizontal", # Can be "horizontal" or "vertical"
font_size = NULL,
expand_size = 1, # Controls spacing around legend plot
spacing_scaling = 1, # Multiplicative factor for label and legend title spacing
width = 0.1, # Thickness of color bar
triangle_size = 0.1 # Relative width of +-Inf triangles
) {
require(ggplot2)
if (!(spacing %in% c("natural", "constant"))) stop("Spacing must be either 'natural' or 'constant'")
if (!(direction %in% c(1, -1))) stop("Direction must be either 1 or -1")
if (!(legend_direction %in% c("horizontal", "vertical"))) {
stop("Legend_direction must be either 'horizontal' or 'vertical'")
}
breaks = as.numeric(breaks)
new_breaks = sort(unique(breaks))
if (any(new_breaks != breaks)) warning("Wrong order or duplicated breaks")
breaks = new_breaks
if (class(colors) == "function") colors = colors(length(breaks) - 1)
if (length(colors) != length(breaks) - 1) {
stop("Number of colors (", length(colors), ") must be equal to number of breaks (",
length(breaks), ") minus 1")
}
if (!missing(colors)) {
warning("Ignoring RColorBrewer palette '", palette, "', since colors were passed manually")
}
if (direction == -1) colors = rev(colors)
inf_breaks = which(is.infinite(breaks))
if (length(inf_breaks) != 0) breaks = breaks[-inf_breaks]
plotcolors = colors
n_breaks = length(breaks)
labels = breaks
if (spacing == "constant") {
breaks = 1:n_breaks
}
r_breaks = range(breaks)
if(is.null(font_size)) {
print("Legend key font_size not set. Use default value = 5")
font_size <- 5
} else {
print(paste0("font_size = ", font_size))
font_size <- font_size
}
cbar_df = data.frame(stringsAsFactors = FALSE,
y = breaks,
yend = c(breaks[-1], NA),
color = as.character(1:n_breaks)
)[-n_breaks,]
xmin = 1 - width/2
xmax = 1 + width/2
cbar_plot = ggplot(cbar_df, aes(xmin = xmin, xmax = xmax,
ymin = y, ymax = yend, fill = color)) +
geom_rect(show.legend = FALSE,
color = border_color)
if (any(inf_breaks == 1)) { # Add < arrow for -Inf
firstv = breaks[1]
polystart = data.frame(
x = c(xmin, xmax, 1),
y = c(rep(firstv, 2), firstv - diff(r_breaks) * triangle_size)
)
plotcolors = plotcolors[-1]
cbar_plot = cbar_plot +
geom_polygon(data = polystart, aes(x = x, y = y),
show.legend = FALSE,
inherit.aes = FALSE,
fill = colors[1],
color = border_color)
}
if (any(inf_breaks > 1)) { # Add > arrow for +Inf
lastv = breaks[n_breaks]
polyend = data.frame(
x = c(xmin, xmax, 1),
y = c(rep(lastv, 2), lastv + diff(r_breaks) * triangle_size)
)
plotcolors = plotcolors[-length(plotcolors)]
cbar_plot = cbar_plot +
geom_polygon(data = polyend, aes(x = x, y = y),
show.legend = FALSE,
inherit.aes = FALSE,
fill = colors[length(colors)],
color = border_color)
}
if (legend_direction == "horizontal") { # horizontal legend
mul = 1
x = xmin
xend = xmax
cbar_plot = cbar_plot + coord_flip()
angle = 0
legend_position = xmax + 0.1 * spacing_scaling
} else { # vertical legend
mul = -1
x = xmax
xend = xmin
angle = -90
legend_position = xmax + 0.2 * spacing_scaling
}
cbar_plot = cbar_plot +
geom_segment(data = data.frame(y = breaks, yend = breaks),
aes(y = y, yend = yend),
x = x - 0.05 * mul * spacing_scaling, xend = xend,
inherit.aes = FALSE) +
annotate(geom = 'text', x = x - 0.1 * mul * spacing_scaling, y = breaks,
label = labels,
size = font_size) +
scale_x_continuous(expand = c(expand_size, expand_size)) +
scale_fill_manual(values = plotcolors) +
theme_void()
if (!is.null(legend_title)) { # Add legend title
cbar_plot = cbar_plot +
annotate(geom = 'text', x = legend_position, y = mean(r_breaks),
label = legend_title,
angle = angle,
size = font_size)
}
return(cbar_plot)
}
Cut data into bins for the discrete colorbar
myvalues <- c(seq(0, 200, 40), Inf)
var_df$cuts <- cut(var_df$layer, myvalues, include.lowest = TRUE)
levels(var_df$cuts)
#> [1] "[0,40]" "(40,80]" "(80,120]" "(120,160]" "(160,200]" "(200,Inf]"
Plot the raster
p <- ggplot() +
geom_polygon(data = wrld_simpl[wrld_simpl#data$UN != "10", ],
aes(x = long, y = lat, group = group),
colour = "black", fill = "grey")
p <- p + geom_raster(data = var_df, aes(x = x, y = y, fill = cuts)) # matching cuts & fill
p <- p + coord_equal() + theme_minimal() + labs(x="", y="")
p <- p + theme(legend.key =element_blank(),
axis.text.y =element_text(size=16),
axis.text.x =element_text(size=16),
legend.text =element_text(size=12),
legend.title=element_text(size=12))
p <- p + scale_fill_brewer("Layer", palette = "YlGnBu", drop = FALSE)
p <- p + geom_polygon(data = wrld_simpl[wrld_simpl#data$UN != "10", ],
aes(x = long, y = lat, group = group),
colour = "black", fill = NA)
p <- p + theme(legend.position = "none")
Plot the discrete colorbar
dbar <- plot_discrete_cbar(myvalues,
palette = "YlGnBu",
legend_title = NULL,
spacing = "natural")
# reduce top and bottom margins
p1 <- p + theme(plot.margin = unit(c(10, 10, -35, 10), "pt"))
dbar <- dbar + theme(plot.margin = unit(c(-35, 10, -30, 10), "pt"))
Combine two plots together
# devtools::install_github('baptiste/egg')
library(egg)
ggarrange(p1, dbar, nrow = 2, ncol = 1, heights = c(1, 0.4))
Created on 2018-10-18 by the reprex package (v0.2.1.9000)

Making a specific quantile plot in R

I am very intrigued by the following visulization (Decile term)
And I wonder how it would be possible to do it in R.
There is of course histograms and density plots, but they do not make such a nice visualization. Especially, I would like to know if it possible to do it with ggplot/tidyverse.
edit in response to the comment
library(dplyr)
library(ggplot2)
someData <- data_frame(x = rnorm(1000))
ggplot(someData, aes(x = x)) +
geom_histogram()
this produces a histogram (see http://www.r-fiddle.org/#/fiddle?id=LQXazwMY&version=1)
But how I can get the coloful bars? How to implement the small rectangles? (The arrows are less relevant).
You have to define a number of breaks, and use approximate deciles that match those histogram breaks. Otherwise, two deciles will end up in one bar.
d <- data_frame(x = rnorm(1000))
breaks <- seq(min(d$x), max(d$x), length.out = 50)
quantiles <- quantile(d$x, seq(0, 1, 0.1))
quantiles2 <- sapply(quantiles, function(x) breaks[which.min(abs(x - breaks))])
d$bar <- as.numeric(as.character(cut(d$x, breaks, na.omit((breaks + dplyr::lag(breaks)) / 2))))
d$fill <- cut(d$x, quantiles2, na.omit((quantiles2 + dplyr::lag(quantiles2)) / 2))
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1])
Or with more distinct colors:
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1]) +
scale_fill_brewer(type = 'qual', palette = 3) # The only qual pallete with enough colors
Add some styling and increase the breaks to 100:
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1], size = 0.3) +
scale_fill_brewer(type = 'qual', palette = 3) +
theme_classic() +
coord_fixed(diff(breaks)[1], expand = FALSE) + # makes square blocks
labs(x = 'x', y = 'count')
And here is a function to make that last one:
decile_histogram <- function(data, var, n_breaks = 100) {
breaks <- seq(min(data[[var]]), max(data[[var]]), length.out = n_breaks)
quantiles <- quantile(data[[var]], seq(0, 1, 0.1))
quantiles2 <- sapply(quantiles, function(x) breaks[which.min(abs(x - breaks))])
data$bar <- as.numeric(as.character(
cut(data[[var]], breaks, na.omit((breaks + dplyr::lag(breaks)) / 2)))
)
data$fill <- cut(data[[var]], quantiles2, na.omit((quantiles2 + dplyr::lag(quantiles2)) / 2))
ggplot2::ggplot(data, ggplot2::aes(bar, y = 1, fill = fill)) +
ggplot2::geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1], size = 0.3) +
ggplot2::scale_fill_brewer(type = 'qual', palette = 3) +
ggplot2::theme_classic() +
ggplot2::coord_fixed(diff(breaks)[1], expand = FALSE) +
ggplot2::labs(x = 'x', y = 'count')
}
Use as:
d <- data.frame(x = rnorm(1000))
decile_histogram(d, 'x')

ggplot2: incorrect boxplot width when facetting with facets of different scales

I need a facetted boxplot. The x-axis for the plots is a quantitative variable, and I want to reflect this information on the plot. The scale of the abscissa is very different among the facets.
My problem is that the widths of the boxes are very small for the facet with the large scale.
A possible explanation is that the width of the boxes is the same for all facets, whereas it should ideally be determined by the xlims of each facet individually.
I would be grateful for two inputs:
Do you think this is a bug and should be reported ?
Do you have a solution ?
Thanks in advance !
Remark: transforming the abscissa to a categorical variable could be one solution, but it is not perfect as it would result in a loss of some information.
Minimal working example:
library(tidyverse)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>% # Create sample (y) and label (idx)
bind_rows() %>%
ggplot(aes(x = x, y = y, group = x)) +
geom_boxplot() +
facet_wrap(~idx, scales = 'free')
Result:
A cumbersome solution would be to redraw the boxplot from scratch, but this is not very satisfying:
draw_boxplot = function(locations, width, ymin, lower, middle, upper, ymax, idx){
local_df = tibble(locations = locations, width = width, ymin = ymin, lower = lower, middle = middle, upper = upper, ymax = ymax, idx = idx)
ggplot(data = local_df) +
geom_rect(aes(xmin = locations - width/2, xmax = locations + width/2, ymin = lower, ymax = upper), fill = 'white', colour = 'black') +
geom_segment(aes(x = locations - width/2, xend = locations + width/2, y = middle, yend = middle), size = 0.8) +
geom_segment(aes(x = locations, xend = locations, y = upper, yend = ymax)) +
geom_segment(aes(x = locations, xend = locations, y = lower, yend = ymin)) +
facet_wrap(~idx, scales = 'free_x')
}
make_boxplot = function(to_plot){
to_plot %>%
cmp_boxplot %>%
(function(x){
draw_boxplot(locations = x$x, width = x$width, ymin = x$y0, lower = x$y25, middle = x$y50, upper = x$y75, ymax = x$y100, idx = x$idx)
})
}
cmp_boxplot = function(to_plot){
to_plot %>%
group_by(idx) %>%
mutate(width = 0.6*(max(x) - min(x))/length(unique(x))) %>% #hand specified width
group_by(x) %>%
mutate(y0 = min(y),
y25 = quantile(y, 0.25),
y50 = median(y),
y75 = quantile(y, 0.75),
y100 = max(y)) %>%
select(-y) %>%
unique()
}
c(1:4,7) %>%
c(.,10*.) %>%
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>%
bind_rows() %>%
make_boxplot
Result:
Since geom_boxplot doesn't allow varying width as an aesthetic, you have to write your own. Fortunately it's not too complicated.
bp_custom <- function(vals, type) {
bp = boxplot.stats(vals)
if(type == "whiskers") {
y = bp$stats[1]
yend = bp$stats[5]
return(data.frame(y = y, yend = yend))
}
if(type == "box") {
ymin = bp$stats[2]
ymax = bp$stats[4]
return(data.frame(ymin = ymin, ymax = ymax))
}
if(type == "median") {
y = median(vals)
yend = median(vals)
return(data.frame(y = y, yend = yend))
}
if(type == "outliers") {
y = bp$out
return(data.frame(y = y))
} else {
return(warning("Type must be one of 'whiskers', 'box', 'median', or 'outliers'."))
}
}
This function does all the computation and returns dataframes suitable for use in stat_summary. Then we call it in several different layers to construct the various bits of a boxplot. Note that you need to compute the width of the boxplot per group of the facet, done below using dplyr in your pipe. I calculated the width such that the range of x gets split up into equal segments based on the number of unique x values, then each box gets about 1/2 the width of that segment. Your data may need a different adjustment.
library(dplyr)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {
tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))
}) %>%
bind_rows() %>%
group_by(idx) %>% # NOTE THIS LINE
mutate(width = 0.25*diff(range(x))/length(unique(x))) %>% # NOTE THIS LINE
ggplot(aes(x = x, y = y, group = x)) +
stat_summary(fun.data = bp_custom, fun.args = "whiskers",
geom = "segment", aes(xend = x)) +
stat_summary(fun.data = bp_custom, fun.args = "box",
geom = "rect", aes(xmin = x - width, xmax = x + width),
fill = "white", color = "black") +
stat_summary(fun.data = bp_custom, fun.args = "median",
geom = "segment", aes(x = x - width, xend = x + width), size = 1.5) +
stat_summary(fun.data = bp_custom, fun.args = "outliers",
geom = "point") +
facet_wrap(~idx, scales = 'free')
As for reporting this as a bug (actually a desired feature), I think it's an infrequent enough use case that they won't prioritize it. If you wrap this code up into a custom geom (based on here) and submit a pull-request, you might get more luck.

Resources