I'm creating lineplots using ggplot() and geom_line() for a corridor of values that develops over time.
It may happen sometimes that the upper bound is below the lower bound (which I'll call "inversion"), and I would like to highlight regions where this happens in my plot, say by using a different background color.
Searching both Google and StackOverflow has not led me anywhere.
Here is an artificial example:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>% mutate(inversion = upper < lower)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) + geom_line(linewidth = 1) + theme_light()
This produces the following plot:
As you can see, inversion occurs in a few places, e.g. around x = 50. I would like for the plot to have a darker (say gray) background where it does, based on the inversion column already in the tibble. How can I do this?
Thank you very much for the help!
One option to achieve your desired result would be to use ggh4x::stat_difference like so. Note that to this end we have to use the wide dataset and accordingly add the lines via two geom_line.
library(ggplot2)
library(ggh4x)
ggplot(d, mapping = aes(x = x)) +
stat_difference(aes(ymin = lower, ymax = upper)) +
geom_line(aes(y = lower, color = "lower"), linewidth = 1) +
geom_line(aes(y = upper, color = "upper"), linewidth = 1) +
scale_fill_manual(values = c("+" = "transparent", "-" = "darkgrey"),
breaks = "-",
labels = "Inversion") +
theme_light() +
labs(color = "Bounds")
EDIT Of course is it also possible to draw background rects for the intersection regions. But I don't know of any out-of-the-box option, i.e. the tricky part is to compute the x values where the lines intersect which requires some effort and approximation. Here is one approach but probably not the most efficient one.
library(tidyverse)
# Compute intersection points and prepare data to draw rects
n <- 20 # Increase for a better approximation
rect <- data.frame(
x = seq(1, N, length.out = N * n)
)
# Shamefully stolen from ggh4x
rle_id <- function(x) with(rle(x), rep.int(seq_along(values), lengths))
rect <- rect |>
mutate(lower = approx(d$x, d$lower, x)[["y"]],
upper = approx(d$x, d$upper, x)[["y"]],
inversion = upper < lower,
rle = with(rle(inversion & !is.na(inversion)), rep.int(seq_along(values), lengths))
) |>
filter(inversion) |>
group_by(rle) |>
slice(c(1, n())) |>
mutate(label = c("xmin", "xmax")) |>
ungroup() |>
select(x, rle, label) |>
pivot_wider(names_from = label, values_from = x)
ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = rect, aes(xmin = xmin, xmax = xmax, group = rle),
ymin = -Inf, ymax = Inf, fill = "darkgrey", alpha = .3, inherit.aes = FALSE) +
theme_light()
#> Warning: Removed 9 rows containing missing values (`geom_line()`).
Answering myself, the following worked for me in the end (also using actual data and plots grouped with facet_wrap()); h/t to #stefan, whose approach with geom_rect() I recycled:
library(tidyverse)
library(RcppRoll)
set.seed(42)
N <- 100
l <- 5
a <- rgamma(n = N, shape = 2)
d <- tibble(x = 1:N, upper = roll_maxr(a, n = l), lower = roll_minr(a + lag(a), n = l)) %>%
mutate(inversion = upper < lower,
inversionLag = if_else(is.na(lag(inversion)), FALSE, lag(inversion)),
inversionLead = if_else(is.na(lead(inversion)), FALSE, lead(inversion)),
inversionStart = inversion & !inversionLag,
inversionEnd = inversion & !inversionLead
)
dl <- pivot_longer(d, cols = c("upper", "lower"), names_to = "Bounds", values_to = "bound_vals")
iS <- d %>% filter(inversionStart) %>% select(x) %>% rowid_to_column() %>% rename(iS = x)
iE <- d %>% filter(inversionEnd) %>% select(x) %>% rowid_to_column() %>% rename(iE = x)
iD <- iS %>% full_join(iE, by = c("rowid"))
g <- ggplot(dl, mapping = aes(x = x, y = bound_vals, color = Bounds)) +
geom_line(linewidth = 1) +
geom_rect(data = iD, mapping = aes(xmin = iS, xmax = iE, fill = "Inversion"), ymin = -Inf, ymax = Inf, alpha = 0.3, inherit.aes = FALSE) +
scale_fill_manual(name = "Inversions", values = "darkgray") +
theme_light()
g
This gives
which is pretty much what I was after.
Related
I have some troubles with my code. I'm very very beginner in R, so I would like some help. I have a dataframe and I need to make an hist chart and then highlight some points. But I cannot understand how to find those points in my dataset. Here is and example of what I have.
x <- c("a","b","c","d","f","g","h","i","j","k")
y <- c(197421,77506,130474,18365,30470,22518,70183,15378,29747,11148)
z <- data.frame(x,y)
hist(z$y)
For example, how can I find in the hist where is the "a" and "h" value placed? and in a barplot? I tried the function points, but I cannot find the coordinates. Please let me know how could I make that . Thanks in advance.
Here is a way with dplyr and ggplot2. The approach is to cut the y variable into bins and then use summarise to create the counts and the labels.
library(dplyr)
library(ggplot2)
z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
ggplot() +
geom_bar(aes(x = bins, y = count), stat = "identity", color = "black", fill = "grey") +
geom_text(aes(x = bins, y = count + 0.5, label = xes)) +
xlab("y")
Here is a more complicated way that makes a plot that looks more like what hist() produces.
z2 <- z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
separate(bins, into = c("start", "end"), sep = ",") %>%
mutate(across(start:end, ~as.numeric(str_remove(., "\\(|\\]"))))
ggplot() +
geom_histogram(data = z, aes(x = y), breaks = seq(0, 200000, 50000),
color = "black", fill = "grey") +
geom_text(data = z2, aes(x = (start + end) / 2, y = count + 0.5, label = xes))
I've spent the past few days looking through so many forums and sites, so I hope you can help.
You can find the data I've been using here, as well as the three model predictions.
I'm predicting subjective well-being (i.e. positive affect, negative affect, and life satisfaction) from last night's person-centered sleep satisfaction. I came up with three models that I now want to plot next to each other. The problem is that facet_wrap puts the models next to each other alphabetically and not how I want them (positive affect, negative affect, and life satisfaction).
You can view my current graph here
This is my code to get the graph going:
library("afex")
library("tidyverse")
library("tidylog")
theme_set(theme_bw(base_size = 15))
library("sjPlot")
d3 <- read.csv("d3.csv")
d3 <- d3 %>%
group_by(ID) %>%
mutate(SD_person_centred = sleepDur - mean(sleepDur, na.rm = TRUE)) %>%
mutate(sleep_satisfaction_person_centred = Sleep_quality_open - mean(Sleep_quality_open, na.rm = TRUE)) %>%
mutate(MS_person_centred = mid_sleep_modified - mean(mid_sleep_modified, na.rm = TRUE)) %>%
mutate(MS_person_freeday_centred = abs(mid_sleep_modified -
mean(mid_sleep_modified[Routine_work_day_open == "No"], na.rm = TRUE))) %>%
mutate(MS_person_mctq_centred = abs(mid_sleep_modified - MCTQ_MSF_number)) %>%
mutate(sleep_onset_person_centred = Sleep_Onset_open - mean(Sleep_Onset_open, na.rm = TRUE)) %>%
mutate(sleep_efficiency_person_centred = SleepEfficiency_act - mean(SleepEfficiency_act, na.rm = TRUE)) %>%
ungroup
m_p_sls_1 <- readRDS("m_p_sls_1.rds")
m_n_sls_1 <- readRDS("m_n_sls_1.rds")
m_s_sls_1 <- readRDS("m_s_sls_1.rds")
tmp <- get_model_data(m_p_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp$DV <- "positive_affect"
tmp2 <- get_model_data(m_n_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp2$DV <- "negative_affect"
tmp3 <- get_model_data(m_s_sls_1$full_model, type = "pred", terms = "sleep_satisfaction_person_centred")
tmp3$DV <- "life_satisfaction"
tmp <- bind_rows(tmp, tmp2, tmp3)
tmp
tmp$DV
Here I change tmp$DV into a factor as this was the solution I found online. However, this did not change anything:
tmp$DV <- factor(tmp$DV, levels=c("positive_affect","negative_affect","life_satisfaction"))
levels(tmp$DV)
This is my code for the graph:
variable_names <- list(
"positive_affect" = "positive affect" ,
"negative_affect" = "negative affect",
"life_satisfaction" = "life satisfaction"
)
variable_labeller <- function(variable,value){
return(variable_names[value])
}
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~DV, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
When I give the factor of tmp$DV a different name, i.e. tmp$facet and add this to my code, I do get the right order, but the scales are not free on the y-axis anymore. Please have a look here.
tmp$facet <- factor(tmp$DV, levels=c("positive_affect", "negative_affect", "life_satisfaction"))
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~facet, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
When I change pivot_longer to facet in the first row, I get the same graph as the one before.
Sorry for the long post, but I tried to be as clear as possible. Please let me know if I wasn't.
I'd appreciate any kind of hints. Thanks a lot for your time.
All the best,
Anita
Just got the answer from my colleague Henrik Singmann, in case anybody was wondering:
d3 %>%
pivot_longer(cols="positive_affect":"life_satisfaction", names_to = "DV", values_to = "Score") %>%
mutate(DV = factor(DV, levels=c("positive_affect","negative_affect","life_satisfaction"))) %>%
ggplot(aes(x = sleep_satisfaction_person_centred, y = Score)) +
geom_ribbon(data = tmp, aes(x = x, ymin = conf.low, ymax = conf.high, y = predicted),
fill = "lightgrey") +
geom_line(data = tmp, aes(x = x, y = predicted, group = 1)) +
geom_point(alpha = 0.2) +
facet_wrap(~DV, scales = "free_y",labeller=variable_labeller) +
labs(y = "Score",
x = "Sleep satisfaction person centered")
So the factor needs to be defined in d3 before being handed over to ggplot.
Is there a way to draw an arrow between two pie charts using coordinates from the outer circle of the two pie charts as start and end position? My arrow is drawn by trying with different x's and y's.
#pie chart 1
pie1 <- count(diamonds, cut) %>%
ggplot() +
geom_bar(aes(x = '', y = n, fill = cut), stat = 'identity', width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
#pie chart 2
pie2 <- count(diamonds, color) %>%
ggplot() +
geom_bar(aes(x = '', y = n, fill = color), stat = 'identity', width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
# Plots and arrow combined
grid.newpage()
vp_fig <- viewport() # top plot area
pushViewport(vp_fig)
grid.draw(rectGrob())
vp_pie1 <- viewport(x =.5, y= 1, width = .25, height = .25, just = c('centre', 'top')) #viewport for pie chart 1
pushViewport(vp_pie1)
grid.draw(ggplotGrob(pie1))
popViewport()
vp_pie2 <- viewport(x =.25, y= .5, width = .25, height = .25, just = c('left', 'centre')) #viewport for pie chart 2
pushViewport(vp_pie2)
grid.draw(ggplotGrob(pie2))
popViewport()
upViewport() #move to top plot area
grid.lines(x = c(.45, .37), y = c(.8, .61), arrow = arrow()) # arrow between the pie charts
Here's a possible approach.:
Step 0. Create pie charts, & convert them to a list of grobs:
pie1 <- count(diamonds, fill = cut) %>%
ggplot() +
geom_col(aes(x = '', y = n, fill = fill), width = 1) +
coord_polar('y', start = 0) +
theme_void()+
theme(legend.position = 'none')
pie2 <- pie1 %+% count(diamonds, fill = color)
pie3 <- pie1 %+% count(diamonds, fill = clarity)
pie.list <- list(pie1 = ggplotGrob(pie1),
pie2 = ggplotGrob(pie2),
pie3 = ggplotGrob(pie3))
rm(pie1, pie2, pie3)
Step 1. Define centre coordinates / radius for each pie:
pie.coords <- data.frame(
pie = names(pie.list),
center.x = c(0, 3, 5),
center.y = c(0, 4, 2),
radius = c(1, 1.5, 0.5)
)
Step 2. Calculate the appropriate start & end arrow coordinates for each combination of pies, taking into account each pie's size (assuming each pie can have a different radius value):
arrow.coords <- expand.grid(start = pie.coords$pie,
end = pie.coords$pie,
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE) %>%
filter(start != end) %>%
left_join(pie.coords, by = c("start" = "pie")) %>%
left_join(pie.coords, by = c("end" = "pie"))
colnames(arrow.coords) <- colnames(arrow.coords) %>%
gsub(".x$", ".start", .) %>%
gsub(".y$", ".end", .)
arrow.coords <- arrow.coords %>%
mutate(delta.x = center.x.end - center.x.start,
delta.y = center.y.end - center.y.start,
distance = sqrt(delta.x^2 + delta.y^2)) %>%
mutate(start.x = center.x.start + radius.start / distance * delta.x,
start.y = center.y.start + radius.start / distance * delta.y,
end.x = center.x.end - radius.end / distance * delta.x,
end.y = center.y.end - radius.end / distance * delta.y) %>%
select(starts_with("start"),
starts_with("end")) %>%
mutate_at(vars(start, end), factor)
Step 3. Convert pie center / radius into x & y min/max coordinates:
pie.coords <- pie.coords %>%
mutate(xmin = center.x - radius,
xmax = center.x + radius,
ymin = center.y - radius,
ymax = center.y + radius)
Step 4. Define function to create an annotation_custom() layer for each pie (this is optional; I just don't want to type the same thing repeatedly for each pie):
annotation_custom_list <- function(pie.names){
result <- vector("list", length(pie.names) + 1)
for(i in seq_along(pie.names)){
pie <- pie.names[i]
result[[i]] <- annotation_custom(
grob = pie.list[[pie]],
xmin = pie.coords$xmin[pie.coords$pie == pie],
xmax = pie.coords$xmax[pie.coords$pie == pie],
ymin = pie.coords$ymin[pie.coords$pie == pie],
ymax = pie.coords$ymax[pie.coords$pie == pie])
}
# add a blank geom layer to ensure the resulting ggplot's
# scales extend sufficiently to show each pie
result[[length(result)]] <- geom_blank(
data = pie.coords %>% filter(pie %in% pie.names),
aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax)
)
return(result)
}
Step 5. Putting it all together:
ggplot() +
# plot pie grobs
annotation_custom_list(c("pie1", "pie2", "pie3")) +
# plot arrows between grobs
# (adjust the filter criteria to only plot between specific pies)
geom_segment(data = arrow.coords %>%
filter(as.integer(start) < as.integer(end)),
aes(x = start.x, y = start.y,
xend = end.x, yend = end.y),
arrow = arrow()) +
# theme_void for clean look
theme_void()
I ended up with this figure which is mostly the code of Z.Lin with a few small modifications:
Step 0
Here I have only added more pies and subsetted the datasets of the pies:
library(tidyverse)
pie1 <- count(diamonds, fill = cut) %>%
ggplot() +
geom_col(aes(x = '', y = n, fill = fill), width = 1) +
coord_polar('y', start = 0) +
scale_fill_manual(values = c('Fair'='green','Good'= 'darkgreen','Very Good'='darkblue','Premium'= 'plum','Ideal'='red'))+
theme_void() +
theme(legend.position = 'none')
pie2 <- pie1 %+% count(subset(diamonds, cut %in% c('Premium', 'Fair')), fill = cut)
pie3 <- pie1 %+% count(subset(diamonds, cut %in% c('Ideal', 'Good')), fill = cut)
pie4 <- pie1 %+% count(subset(diamonds, cut=='Premium'), fill = cut)
pie5 <- pie1 %+% count(subset(diamonds, cut=='Fair'), fill = cut)
pie6 <- pie1 %+% count(subset(diamonds, cut=='Ideal'), fill = cut)
pie7 <- pie1 %+% count(subset(diamonds, cut=='Good'), fill = cut)
pie.list <- list(pie1 = ggplotGrob(pie1),
pie2 = ggplotGrob(pie2),
pie3 = ggplotGrob(pie3),
pie4 = ggplotGrob(pie4),
pie5 = ggplotGrob(pie5),
pie6 = ggplotGrob(pie6),
pie7 = ggplotGrob(pie7))
rm(pie1, pie2, pie3, pie4, pie5, pie6, pie7)
Step 1
No fundamental modifications:
y <- c(1, (1+2*sqrt(3)), (1+4*sqrt(3))) #vector of all y
pie.coords <- data.frame(
pie = names(pie.list),
center.x = c(7,3,11,1,5,9,13),
center.y = c(y[3],y[2],y[2],y[1],y[1],y[1],y[1]),
radius = c(1,1,1,1,1,1,1)
)
Step 2
I modified the length of the arrows by multiplying with a "fudge factor" of .85 (I tried different values until the endpoint fitted with the pies). I wanted only some of the arrows between the pies so I included more filtering. I added a factor for the different colours of arrows.
arrow.coords <- expand.grid(start = pie.coords$pie,
end = pie.coords$pie,
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE) %>%
filter(start != end) %>%
filter(start %in% c('pie1', 'pie2', 'pie3')) %>%
filter(end != 'pie1') %>%
left_join(pie.coords, by = c("start" = "pie")) %>%
left_join(pie.coords, by = c("end" = "pie"))
colnames(arrow.coords) <- colnames(arrow.coords) %>%
gsub(".x$", ".start", .) %>%
gsub(".y$", ".end", .)
arrow.coords <- arrow.coords %>%
mutate(delta.x = center.x.end - center.x.start,
delta.y = center.y.end - center.y.start,
distance = sqrt(delta.x^2 + delta.y^2)) %>%
mutate(start.x = center.x.start + radius.start*.85 / distance * delta.x, #multiply with .85 to justify the arrow lengths
start.y = center.y.start + radius.start*.85 / distance * delta.y,
end.x = center.x.end - radius.end*.85 / distance * delta.x,
end.y = center.y.end - radius.end*.85 / distance * delta.y) %>%
select(starts_with("start"),
starts_with("end")) %>%
mutate_at(vars(start, end), factor) %>%
filter(start.y>end.y) %>%
filter(start.y - end.y <4 & abs(start.x-end.x)<4) %>%
mutate(arrowType = factor(paste0(start,end))) %>% #adding factor
mutate(arrowType=recode(arrowType, 'pie1pie2' = 'PremiumFair',
'pie1pie3' = 'IdealGood',
'pie2pie4' = 'Premium',
'pie3pie6' = 'Ideal',
'pie2pie5' = 'Fair',
'pie3pie7'='Good'))
Step 3 and step 4
No changes of the code of Z.Lin.
Step 5
I moved all the filtering of the arrow.coords to Step 2. I modified the formatting of the arrows (thicker and with varying colour) and added labels to the arrows. In addition I added coord_fixed(ratio = 1) to ensure that one unit of x has the same length as one unit of y.
ggplot() +
# plot pie grobs
annotation_custom_list(c("pie1", "pie2", "pie3", "pie4", "pie5", "pie6", "pie7")) +
# plot arrows between grobs
geom_segment(data = arrow.coords,
aes(x = start.x, y = start.y,
xend = end.x, yend = end.y, colour = arrowType),
arrow = arrow(), size = 3, show.legend = FALSE) +
scale_colour_manual(values = c('Fair' = 'green','Good' ='darkgreen', 'Premium'='plum','Ideal' ='red', 'PremiumFair'='plum', 'IdealGood'='red'))+
geom_label(data = arrow.coords, aes(x = (start.x+end.x)/2, y = (start.y+end.y)/2, label = arrowType), size = 8) +
coord_fixed(ratio = 1) +
theme_void() # theme_void for clean look
I am trying to use the ks library to calculate the 95% home range for groups within a data set. The problem is that the "break" values which define the cut-off for the 95% contours differ between groups. So far, I have been able to get my plots, but I have to manually add the break values for each group/level and I would really like to find a solution where I can create figures in ggplot where the break values are imported automatically.
require(ks)
require(dplyr)
require(ggplot2)
# define the ks function to pass to a grouped_df
ksFUN = function(data){
H = Hpi(data[,c("x","y")], binned = TRUE) * 1
fhata = kde(data[,c("x","y")], H = H, compute.cont = TRUE,
xmin = c(minX, minY), xmax = c(maxX, maxY))
res95 = data.frame(HR = contourSizes(fhata, cont = 95, approx = TRUE))
dimnames(fhata[['estimate']]) = list(fhata[["eval.points"]][[1]],
fhata[["eval.points"]][[2]])
dat = reshape2::melt(fhata[['estimate']])
dat$breaks50 = fhata[["cont"]]["50%"]
dat$breaks95 = fhata[["cont"]]["5%"]
return(dat)
}
set.seed(100)
# create some data
df1 = data.frame(x = rnorm(100, 0, 5),
y = rnorm(100, 0, 5),
Group = "Test1")
df2 = data.frame(x = rnorm(100, 10, 5),
y = rnorm(100, 10, 5),
Group = "Test2")
df = rbind(df1, df2)
# Set the minimum and maximum x and y values outside
# of the ksFUN to keep the data on the same scale
minX = min(df$x, na.rm = T); maxX = max(df$x, na.rm = T)
minY = min(df$y, na.rm = T); maxY = max(df$y, na.rm = T)
xx = df %>%
group_by(Group) %>%
do(as.data.frame(ksFUN(.)))
# extract the break value for the 95% contour (home range) and 50% (core area)
breaks = xx %>%
group_by(Group) %>%
summarize(breaks95 = mean(breaks95),
breaks50 = mean(breaks50))
breaks
# The only way I have been able to add the breaks is to manually add them
ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group)) +
stat_contour(data = xx[xx$Group == "Test1",], aes(z = value),
breaks = 0.000587, alpha = 0.3, geom = "polygon") +
stat_contour(data = xx[xx$Group == "Test2",], aes(z = value),
breaks = 0.000527, alpha = 0.3, geom = "polygon")
I would really like to find a solution where I don't have to explicitly pass the break values to the stat_contour functions
Is there a problem with using the breaks column in breaks? e.g.
# base plot
pl <- ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group))
groups <- unique(xx$Group)
# loop and add for each group
for(i in groups){
pl <- pl + stat_contour(data = xx[xx$Group == i,], aes(z = value),
breaks = breaks[breaks$Group == i, ]$breaks,
alpha = 0.3, geom = "polygon")
}
pl
I get some weird plots, at the edges, especially when I remove the breaks part from stat_contour, which leads me to think there might be a bug in ksFUN
I need a facetted boxplot. The x-axis for the plots is a quantitative variable, and I want to reflect this information on the plot. The scale of the abscissa is very different among the facets.
My problem is that the widths of the boxes are very small for the facet with the large scale.
A possible explanation is that the width of the boxes is the same for all facets, whereas it should ideally be determined by the xlims of each facet individually.
I would be grateful for two inputs:
Do you think this is a bug and should be reported ?
Do you have a solution ?
Thanks in advance !
Remark: transforming the abscissa to a categorical variable could be one solution, but it is not perfect as it would result in a loss of some information.
Minimal working example:
library(tidyverse)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>% # Create sample (y) and label (idx)
bind_rows() %>%
ggplot(aes(x = x, y = y, group = x)) +
geom_boxplot() +
facet_wrap(~idx, scales = 'free')
Result:
A cumbersome solution would be to redraw the boxplot from scratch, but this is not very satisfying:
draw_boxplot = function(locations, width, ymin, lower, middle, upper, ymax, idx){
local_df = tibble(locations = locations, width = width, ymin = ymin, lower = lower, middle = middle, upper = upper, ymax = ymax, idx = idx)
ggplot(data = local_df) +
geom_rect(aes(xmin = locations - width/2, xmax = locations + width/2, ymin = lower, ymax = upper), fill = 'white', colour = 'black') +
geom_segment(aes(x = locations - width/2, xend = locations + width/2, y = middle, yend = middle), size = 0.8) +
geom_segment(aes(x = locations, xend = locations, y = upper, yend = ymax)) +
geom_segment(aes(x = locations, xend = locations, y = lower, yend = ymin)) +
facet_wrap(~idx, scales = 'free_x')
}
make_boxplot = function(to_plot){
to_plot %>%
cmp_boxplot %>%
(function(x){
draw_boxplot(locations = x$x, width = x$width, ymin = x$y0, lower = x$y25, middle = x$y50, upper = x$y75, ymax = x$y100, idx = x$idx)
})
}
cmp_boxplot = function(to_plot){
to_plot %>%
group_by(idx) %>%
mutate(width = 0.6*(max(x) - min(x))/length(unique(x))) %>% #hand specified width
group_by(x) %>%
mutate(y0 = min(y),
y25 = quantile(y, 0.25),
y50 = median(y),
y75 = quantile(y, 0.75),
y100 = max(y)) %>%
select(-y) %>%
unique()
}
c(1:4,7) %>%
c(.,10*.) %>%
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>%
bind_rows() %>%
make_boxplot
Result:
Since geom_boxplot doesn't allow varying width as an aesthetic, you have to write your own. Fortunately it's not too complicated.
bp_custom <- function(vals, type) {
bp = boxplot.stats(vals)
if(type == "whiskers") {
y = bp$stats[1]
yend = bp$stats[5]
return(data.frame(y = y, yend = yend))
}
if(type == "box") {
ymin = bp$stats[2]
ymax = bp$stats[4]
return(data.frame(ymin = ymin, ymax = ymax))
}
if(type == "median") {
y = median(vals)
yend = median(vals)
return(data.frame(y = y, yend = yend))
}
if(type == "outliers") {
y = bp$out
return(data.frame(y = y))
} else {
return(warning("Type must be one of 'whiskers', 'box', 'median', or 'outliers'."))
}
}
This function does all the computation and returns dataframes suitable for use in stat_summary. Then we call it in several different layers to construct the various bits of a boxplot. Note that you need to compute the width of the boxplot per group of the facet, done below using dplyr in your pipe. I calculated the width such that the range of x gets split up into equal segments based on the number of unique x values, then each box gets about 1/2 the width of that segment. Your data may need a different adjustment.
library(dplyr)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {
tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))
}) %>%
bind_rows() %>%
group_by(idx) %>% # NOTE THIS LINE
mutate(width = 0.25*diff(range(x))/length(unique(x))) %>% # NOTE THIS LINE
ggplot(aes(x = x, y = y, group = x)) +
stat_summary(fun.data = bp_custom, fun.args = "whiskers",
geom = "segment", aes(xend = x)) +
stat_summary(fun.data = bp_custom, fun.args = "box",
geom = "rect", aes(xmin = x - width, xmax = x + width),
fill = "white", color = "black") +
stat_summary(fun.data = bp_custom, fun.args = "median",
geom = "segment", aes(x = x - width, xend = x + width), size = 1.5) +
stat_summary(fun.data = bp_custom, fun.args = "outliers",
geom = "point") +
facet_wrap(~idx, scales = 'free')
As for reporting this as a bug (actually a desired feature), I think it's an infrequent enough use case that they won't prioritize it. If you wrap this code up into a custom geom (based on here) and submit a pull-request, you might get more luck.