Related
I am trying to use the stat_density_2d() function to create a heat map based on GPS coordinates and would like to crop the output to a polygon in ggplot(). Here is an example:
x_coord <- c(-83, -82, -82, -83, -83)
y_coord <- c(41, 41, 42, 42, 41)
xy_coords <- cbind(x_coord, y_coord)
library(sp)
poly <- Polygon(xy_coords)
polys <- Polygons(list(poly), 1)
sp.polys <- SpatialPolygons(list(polys))
set.seed(2)
lon <- c(rnorm(20, -82.2, 0.1), rnorm(20, -82.1, 0.04))
lat <- c(rnorm(20, 41.2, 0.1), rnorm(20, 41.1, 0.02))
lon_lat <- data.frame(lon = lon,
lat = lat)
ggplot() +
geom_polygon(data = sp.polys, aes(x_coord, y_coord), fill = 'transparent', color = 'black') +
stat_density_2d(data = lon_lat, aes(x = lon, y = lat, fill = ..level.., alpha = 0.3), geom =
'polygon') +
scale_fill_gradientn(colours=rev(brewer.pal(7, "Spectral"))) +
scale_x_continuous(limits = c(-83.25, -81.75)) +
scale_y_continuous(limits = c(40.75, 42.25))
which produces this figure
Is there a way to crop the stat_density output to remove the portion outside the square? Any thoughts or suggestions are greatly appreciated.
I was unable to find a way to crop the stat_density_2d output to a polygon but using the code below, can mask the area outside the polygon. The code below assumes that you have already used the code posted in the questions to create the polygon and stat_density_2d output.
library(sp)
library(ggplot2)
library(RColorBrewer)
library(rgeos)
library(raster)
x_coord2 <- c(-83.2, -81.8, -81.8, -83.2, -83.2)
y_coord2 <- c(40.8, 40.8, 42.2, 42.2, 40.8)
xy_coords2 <- cbind(x_coord2, y_coord2)
poly2 <- Polygon(xy_coords2)
polys2 <- Polygons(list(poly2), 1)
sp.polys2 <- SpatialPolygons(list(polys2))
r <- raster(x = extent(sp.polys2))
res(r) <- 0.003
r <- setValues(r, 1)
r <- mask(r, sp.polys, inverse = T)
rdf <- data.frame(rasterToPoints(r))
ggplot() +
stat_density_2d(data = lon_lat, aes(x = lon, y = lat, fill = ..level..), alpha =
0.7, geom = 'polygon') +
scale_fill_gradientn(colours=rev(brewer.pal(7, "Spectral"))) +
scale_x_continuous(limits = c(-83.25, -81.75)) +
scale_y_continuous(limits = c(40.75, 42.25)) +
geom_tile(data = rdf, aes(x, y), fill = 'white') +
geom_polygon(data = sp.polys, aes(x_coord, y_coord), fill = 'transparent', color =
'black') +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
I would like to produce a graphic combining four facets of a graph with insets in each facet showing a detail of the respective plot. This is one of the things I tried:
#create data frame
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
#do first basic plot
library(ggplot2)
plot1<-ggplot(data=data_frame, aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() + theme_bw() +
labs(title ="", x = "year", y = "sd")
plot1
#make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
plot2 <- plot1 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log",
breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
plot2
#extract inlays (this is where it goes wrong I think)
library(ggpmisc)
library(tibble)
library(dplyr)
inset <- tibble(x = 0.01, y = 10.01,
plot = list(plot2 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
coord_cartesian(xlim = c(13, 15),
ylim = c(3, 5)) +
labs(x = NULL, y = NULL, color = NULL) +
scale_colour_gradient(guide = FALSE) +
theme_bw(10)))
plot3 <- plot2 +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot)) +
annotate(geom = "rect",
xmin = 13, xmax = 15, ymin = 3, ymax = 5,
linetype = "dotted", fill = NA, colour = "black")
plot3
That leads to the following graphic:
As you can see, the colours in the insets are wrong, and all four of them appear in each of the facets even though I only want the corresponding inset of course. I read through a lot of questions here (to even get me this far) and also some examples in the ggpmisc user guide but unfortunately I am still a bit lost on how to achieve what I want. Except maybe to do it by hand extracting four insets and then combining them with plot2. But I hope there will be a better way to do this. Thank you for your help!
Edit: better graphic now thanks to this answer, but problem remains partially unsolved:
The following code does good insets, but unfortunately the colours are not preserved. As in the above version each inset does its own rainbow colours anew instead of inheriting the partial rainbow scale from the facet it belongs to. Does anyone know why and how I could change this? In comments I put another (bad) attempt at solving this, it preserves the colors but has the problem of putting all four insets in each facet.
library(ggpmisc)
library(tibble)
library(dplyr)
# #extract inlays: good colours, but produces four insets.
# fourinsets <- tibble(#x = 0.01, y = 10.01,
# x = c(rep(0.01, 4)),
# y = c(rep(10.01, 4)),
# plot = list(plot2 +
# facet_wrap( ~ max_rep, ncol=2) +
# coord_cartesian(xlim = c(13, 15),
# ylim = c(3, 5)) +
# labs(x = NULL, y = NULL, color = NULL) +
# scale_colour_gradientn(name = "number of replicates", trans = "log", guide = FALSE,
# colours = rainbow(20)) +
# theme(
# strip.background = element_blank(),
# strip.text.x = element_blank()
# )
# ))
# fourinsets$plot
library(purrr)
pp <- map(unique(data_frame$max_rep), function(x) {
plot2$data <- plot2$data %>% filter(max_rep == x)
plot2 +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
#pp[[2]]
inset_new <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
final_plot <- plot2 +
geom_plot_npc(data = inset_new, aes(npcx = x, npcy = y, label = plot, vp.width = 0.3, vp.height =0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
#final_plot
final_plot then looks like this:
I hope this clarifies the problem a bit. Any ideas are very welcome :)
Modifying off #user63230's excellent answer:
pp <- map(unique(data_frame$max_rep), function(x) {
plot2 +
aes(alpha = ifelse(max_rep == x, 1, 0)) +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
scale_alpha_identity() +
facet_null() +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
Explanation:
Instead of filtering the data passed into plot2 (which affects the mapping of colours), we impose a new aesthetic alpha, where lines belonging to the other replicate numbers are assigned 0 for transparency;
Use scale_alpha_identity() to tell ggplot that the alpha mapping is to be used as-is: i.e. 1 for 100%, 0 for 0%.
Add facet_null() to override plot2's existing facet_wrap, which removes the facet for the inset.
Everything else is unchanged from the code in the question.
I think this will get you started although its tricky to get the size of the inset plot right (when you include a legend).
#set up data
library(ggpmisc)
library(tibble)
library(dplyr)
library(ggplot2)
# create data frame
n_replicates <- c(rep(1:10, 15), rep(seq(10, 100, 10), 15), rep(seq(100,
1000, 100), 15), rep(seq(1000, 10000, 1000), 15))
sim_years <- rep(sort(rep((1:15), 10)), 4)
sd_data <- rep(NA, 600)
for (i in 1:600) {
sd_data[i] <- rnorm(1, mean = exp(0.1 * sim_years[i]), sd = 1/n_replicates[i])
}
max_rep <- sort(rep(c(10, 100, 1000, 10000), 150))
data_frame <- cbind.data.frame(n_replicates, sim_years, sd_data, max_rep)
# make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(`10` = "2, 3, ..., 10 replicates", `100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates", `10000` = "1000, 2000, ..., 10000 replicates")
Get overall plot:
# overall facet plot
overall_plot <- ggplot(data = data_frame, aes(x = sim_years, y = sd_data, group = n_replicates, col = n_replicates)) +
geom_line() +
theme_bw() +
labs(title = "", x = "year", y = "sd") +
facet_wrap(~max_rep, ncol = 2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log", breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
#plot
overall_plot
which gives:
Then from the overall plot you want to extract each plot, see here. We can map over the list to extract one at a time:
pp <- map(unique(data_frame$max_rep), function(x) {
overall_plot$data <- overall_plot$data %>% filter(max_rep == x)
overall_plot + # coord_cartesian(xlim = c(13, 15), ylim = c(3, 5)) +
labs(x = NULL, y = NULL) +
theme_bw(10) +
theme(legend.position = "none")
})
If we look at one of these (I've removed the legend) e.g.
pp[[1]]
#pp[[2]]
#pp[[3]]
#pp[[4]]
Gives:
Then we want to add these inset plots into a dataframe so that each plot has its own row:
inset <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
Then merge this into the overall plot:
overall_plot +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot, vp.width = 0.8, vp.height = 0.8))
Gives:
Here is a solution based on Z. Lin's answer, but using ggforce::facet_wrap_paginate() to do the filtering and keeping colourscales consistent.
First, we can make the 'root' plot containing all the data with no facetting.
library(ggpmisc)
library(tibble)
library(dplyr)
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
base <- ggplot(data=data_frame,
aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() +
theme_bw() +
scale_colour_gradientn(
name = "number of replicates",
trans = "log10", breaks = my_breaks,
labels = my_breaks, colours = rainbow(20)
) +
labs(title ="", x = "year", y = "sd")
Next, the main plot will be just the root plot with facet_wrap().
main <- base + facet_wrap(~ max_rep, ncol = 2, labeller = as_labeller(facet_names))
Then the new part is to use facet_wrap_paginate with nrow = 1 and ncol = 1 for every max_rep, which we'll use as insets. The nice thing is that this does the filtering and it keeps colour scales consistent with the root plot.
nmax_rep <- length(unique(data_frame$max_rep))
insets <- lapply(seq_len(nmax_rep), function(i) {
base + ggforce::facet_wrap_paginate(~ max_rep, nrow = 1, ncol = 1, page = i) +
coord_cartesian(xlim = c(12, 14), ylim = c(3, 4)) +
guides(colour = "none", x = "none", y = "none") +
theme(strip.background = element_blank(),
strip.text = element_blank(),
axis.title = element_blank(),
plot.background = element_blank())
})
insets <- tibble(x = rep(0.01, nmax_rep),
y = rep(10.01, nmax_rep),
plot = insets,
max_rep = unique(data_frame$max_rep))
main +
geom_plot_npc(data = insets,
aes(npcx = x, npcy = y, label = plot,
vp.width = 0.3, vp.height = 0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
Created on 2020-12-15 by the reprex package (v0.3.0)
If I want to plot a map on plot, I do this
library(ggplot2)
library(sf)
library(cowplot)
library(gridExtra)
library(raster)
df <- data.frame(x = 1980:1999, y = sample(1:20, 20, replace = T))
shp.dat <- getData('GADM', country='FRA', level=1)
shp.dat <- shp.dat %>% st_as_sf()
p <- ggplot(df, aes(x = x, y = y)) + geom_line() + ylim(0, 100)
p.shp <- ggplot() +
geom_sf(data = shp.dat, fill = ifelse(shp.dat$ID_1 == 1,"red", "grey"))
inset_map <- ggdraw() + draw_plot(p, 0, 0, 1, 1) + draw_plot(p.shp, 0.5, 0.52, 0.5, 0.4)
inset_map
Now I have four plots which I have plotted like this
df <- data.frame(x = 1980:1999,
y1 = sample(1:20, 20, replace = T),
y2 = sample(1:20, 20, replace = T),
y3 = sample(1:20, 20, replace = T),
y4 = sample(1:20, 20, replace = T))
p1 <- ggplot(df, aes(x = x, y = y1)) + geom_line() + ylim(0, 100)
p2 <- ggplot(df, aes(x = x, y = y2)) + geom_line() + ylim(0, 100)
p3 <- ggplot(df, aes(x = x, y = y3)) + geom_line() + ylim(0, 100)
p4 <- ggplot(df, aes(x = x, y = y4)) + geom_line() + ylim(0, 100)
pp <- grid.arrange(p1, p2, p3, p4, ncol = 2)
For each plot, I want to add the map as an inset
inset_map <- ggdraw() + draw_plot(pp, 0, 0, 1, 1) + draw_plot(p.shp, 0.5, 0.52, 0.5, 0.4)
How can I show the map as an inset in each plot separately?
You can apply the same code logic you used to generate the single inset_map, before passing the results to grid.arrange():
plot.list <- list(p1, p2, p3, p4)
plot.list %>%
# add inset map to each plot in the list
lapply(function(p) ggdraw() +
draw_plot(p, 0, 0, 1, 1) +
draw_plot(p.shp, 0.5, 0.52, 0.5, 0.4)) %>%
# convert each plot in the list to grob
lapply(ggplotGrob) %>%
# arrange in grid, as before
grid.arrange(grobs = ., ncol = 2)
I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')
As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.
Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal
Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones
I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')
As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.
Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal
Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones