Spread out density plots with ggplot - r

I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')

As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.

Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal

Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones

Related

how to graph coordinates below a plot - ggplot2?

I'm trying to make a single gene coverage plot like the one shown here.
I have already plotted the coverage but I still need to insert the exons and introns in the corresponding coordinates. Thank you! My code:
ggplot(z, aes(x=inicio, y=promedio, fill=Technology, group=Technology, color=Technology))+
stat_smooth(
geom = 'area', method = 'loess', span = 1/3,
alpha = 1/2) +
scale_x_continuous(limits=c(30689060,30748122))+
theme_set(theme_bw())+
theme(legend.text = element_text (size = 12))+
xlab("Coordinates")+
ylab("Depth")+
ggtitle("TEX15")
Without a minimal reproducible example (MRE) containing some input data and expected output it's difficult to know how to help you. For instance, here is an MRE with some 'fake' data:
library(tidyverse)
df <- data.frame(Coverage = runif(1000, 0, 7900))
p1 <- df %>%
ggplot(aes(x = Coverage)) +
geom_density(outline.type = "full", fill = "#D6B7C9") +
theme_minimal(base_size = 14) +
theme(axis.title.x = element_blank())
features1 <- tribble(~"feature", ~"start", ~"end",
"E1", 1, 1950,
"E5", 2986, 3237,
"L1", 4775, 6292)
features2 <- tribble(~"feature", ~"start", ~"end",
"E2", 1892, 2989,
"L2", 3373, 4794,
"E6", 7125, 7601,
"E7", 7604, 7900)
p2 <- features1 %>%
ggplot() +
geom_rect(aes(xmin = start, xmax = end,
ymin = 0, ymax = 1,
fill = feature),
color = "black") +
geom_text(aes(x = (start + end) / 2, y = 0.5, label = feature)) +
theme_void() +
theme(legend.position = "none")
p3 <- features2 %>%
ggplot() +
geom_rect(aes(xmin = start, xmax = end,
ymin = 0, ymax = 1,
fill = feature),
color = "black") +
geom_text(aes(x = (start + end) / 2, y = 0.5, label = feature)) +
theme_void() +
theme(legend.position = "none")
library(patchwork)
p1 / p2 / p3 + plot_layout(nrow = 3, heights = c(1, 0.1, 0.1))
Created on 2022-06-16 by the reprex package (v2.0.1)
Would this approach be suitable for your single gene coverage plot? If not, what would you want to change?

Circular stacked barplot in r [duplicate]

This question already has an answer here:
Circular barchart customization from r-graph-gallery
(1 answer)
Closed 8 months ago.
I am trying to run this code from this link https://www.r-graph-gallery.com/299-circular-stacked-barplot.html.
# library
library(tidyverse)
library(viridis)
# Create dataset
data <- data.frame(
individual=paste( "Mister ", seq(1,60), sep=""),
group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60, replace=T),
value2=sample( seq(10,100), 60, replace=T),
value3=sample( seq(10,100), 60, replace=T)
)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
# Save at png
ggsave(p, file="output.png", width=10, height=10)
However, I am not sure why I am not getting the gaps and the scales in my figure (see below). As depicted, the numbers are printed inside the figure and the gaps between different groups of data are not there.
The original figure should be as follows:
There is a bug in the code. group has to be a factor to make the code adding the gaps work. To fix this add data$group <- factor(data$group).
Note: My guess is that the reason for this bug is that as of version 4.0.0 R treats strings in data frames as strings rather than factors. Hence, for versions < 4.0.0 the code worked fine as is.
# library
library(tidyverse)
library(viridis)
#> Loading required package: viridisLite
# Create dataset
data <- data.frame(
individual = paste("Mister ", seq(1, 60), sep = ""),
group = c(rep("A", 10), rep("B", 30), rep("C", 14), rep("D", 6)),
value1 = sample(seq(10, 100), 60, replace = T),
value2 = sample(seq(10, 100), 60, replace = T),
value3 = sample(seq(10, 100), 60, replace = T)
)
# Convert to factor
data$group <- factor(data$group)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value = "value", -c(1, 2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame(matrix(NA, empty_bar * nlevels(data$group) * nObsType, ncol(data)))
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each = empty_bar * nObsType)
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep(seq(1, nrow(data) / nObsType), each = nObsType)
# Get the name and the y position of each label
label_data <- data %>%
group_by(id, individual) %>%
summarize(tot = sum(value))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id - 0.5) / number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse(angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle + 180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start = min(id), end = max(id) - empty_bar) %>%
rowwise() %>%
mutate(title = mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[c(nrow(grid_data), 1:nrow(grid_data) - 1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1, ]
# Make the plot
ggplot(data) +
# Add the stacked bar
geom_bar(aes(x = as.factor(id), y = value, fill = observation), stat = "identity", alpha = 0.5) +
scale_fill_viridis(discrete = TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data = grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id), 5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200"), color = "grey", size = 6, angle = 0, fontface = "bold", hjust = 1) +
ylim(-150, max(label_data$tot, na.rm = T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1, 4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data = label_data, aes(x = id, y = tot + 10, label = individual, hjust = hjust), color = "black", fontface = "bold", alpha = 0.6, size = 5, angle = label_data$angle, inherit.aes = FALSE) +
# Add base line information
geom_segment(data = base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha = 0.8, size = 0.6, inherit.aes = FALSE) +
geom_text(data = base_data, aes(x = title, y = -18, label = group), hjust = c(1, 1, 0, 0), colour = "black", alpha = 0.8, size = 4, fontface = "bold", inherit.aes = FALSE)
#> Warning: Removed 24 rows containing missing values (position_stack).
#> Warning: Removed 9 rows containing missing values (geom_text).

Adding different geom_segment to every facet

I have the code below, and it works fine. The problem is, I would like to add "k" and plot a straight line similar to "z", but "k" is a vector of different numbers. Each element in "k" should be plotted as a line on the 3 facets created. If k was a singular value, I would just repeat the geom_segment() command with different y limits. Is there an easy way to do this? The final output should look like attached, assuming I could draw straight lines.
x <- iris[-1:-3]
bw <- 1
nbin <- 100
y <- head(iris, 50)[2]
z <- 1
k <- c(2, 3, 4)
ggplot(x, aes(x = Petal.Width)) +
geom_density(aes(y = bw *..count.., fill = Species), size = 1, alpha = 0.4) +
geom_segment(aes(x = 5, y = 250, xend = z, yend = 250, color = "red")) +
facet_wrap(~Species)+
scale_x_continuous(labels = scales::math_format(10^.x), limits = c(0, 5), expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), limits = c(0, NA)) +
annotation_logticks(sides = "b", short=unit(-1,"mm"), mid=unit(-2,"mm"), long=unit(-3,"mm")) +
coord_cartesian(clip='off') + theme(panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA))
you can try this. Assuming that your plot is saved as p1.
k_data = data.frame(k, Species = levels(x$Species))
p1 + geom_segment(data = k_data, aes(x =5, y = 200, xend = k, yend = 200),
color = "blue", inherit.aes = F)
The idea is to create a dataframe with the columns k and Species and use this data exclusivley in a geom by setting inherit.aes = F
In this solution, the value of k is made part of the data set being plotted through a pipe. It is a temporary modification of the data set, since it is not assigned back to it nor to any other data set.
library(ggplot2)
library(dplyr)
x <- iris[-1:-3]
str(x)
bw <- 1
nbin <- 100
y <- head(iris, 50)[2]
z <- 1
k <- c(2, 3, 4)
x %>%
mutate(k = rep(k, each = 50)) %>%
ggplot(aes(x = Petal.Width)) +
geom_density(aes(y = bw *..count.., fill = Species), size = 1, alpha = 0.4) +
geom_segment(aes(x = 5, y = 250, xend = z, yend = 250), color = "red") +
geom_segment(aes(x = 5, y = 200, xend = k, yend = 200), color = "blue") +
facet_wrap(~Species)+
scale_x_continuous(labels = scales::math_format(10^.x), limits = c(0, 5), expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), limits = c(0, NA)) +
annotation_logticks(sides = "b", short=unit(-1,"mm"), mid=unit(-2,"mm"), long=unit(-3,"mm")) +
coord_cartesian(clip='off') +
theme(panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA))

ggplot lines from point to origin and cosine scores

I am trying to do two things in ggplot.
Firstly: Draw a line from each of the 3 points to the origin at c(0,0)
Secondly: Add the cosine curve similar to the picture below with the values from the cosine calculation.
library(dplyr)
library(ggplot2)
points1 <- c(0.025, 0.075, 0.050)
points2 <- c(0.07, 0.0725, 0.074)
cbind(points1, points2) %>%
data.frame() %>%
ggplot(aes(x = points1, y = points2)) +
geom_point() +
scale_x_continuous(limits = c(0.000, 0.08)) +
scale_y_continuous(limits = c(0.060, 0.08))
library(text2vec)
library(Matrix)
m <- cbind(points1, points2) %>%
Matrix(sparse = TRUE)
dist2(m, method = "cosine")
EDIT:
Thanks to #astrofunkswag my solution is the following:
library(dplyr)
library(ggplot2)
library(tidyquant)
points1 <- c(0.025, 0.075, 0.070)
points2 <- c(0.07, 0.0725, 0.020)
df <- data.frame(points1, points2)
arc_fun <- function(pt1, pt2, n.angle = 100, rad = .03) {
a1 = atan2(pt1[2], pt1[1])
a2 = atan2(pt2[2], pt2[1])
n.angle = 100
angles <- seq(a1,a2,len=n.angle)
xc = 0
yc = 0
xs <- xc+rad*cos(angles)
ys <- yc+rad*sin(angles)
return(data.frame(x = xs, y = ys))
}
arc_df <- arc_fun(as.numeric(df[1,]), as.numeric(df[3,]))
arc_df2 <- arc_fun(as.numeric(df[2,]), as.numeric(df[3,]), rad = .02)
arc_df3 <- arc_fun(as.numeric(df[1,]), as.numeric(df[2,]), rad = .05)
library(text2vec)
library(Matrix)
m <- cbind(points1, points2) %>%
Matrix(sparse = TRUE)
dist <- dist2(m, method = "cosine")
dist
dist[1, 2]
dist[2, 3]
dist[3, 1]
df %>%
ggplot(aes(x = points1, y = points2)) +
geom_point(shape = 21, colour = "black", fill = "grey", size = 8, stroke = 1) +
scale_x_continuous(limits = c(0.000, 0.08)) +
scale_y_continuous(limits = c(0.000, 0.08)) +
geom_segment(aes(xend=points1, yend = points2), x = 0, y = 0, color = "grey") +
geom_line(data = arc_df, aes(x, y), color = 'red') +
geom_line(data = arc_df2, aes(x, y), color = 'blue') +
geom_line(data = arc_df3, aes(x, y), color = 'green') +
ggtitle("Cosine distance between points in a 2-D space") +
theme_bw() +
theme(plot.title = element_text(size = 22, face = "bold"),
#axis.text.x = element_blank(), axis.text.y = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
geom_text(aes(label = c("Doc A", "Doc B", "Doc C"), hjust = -0.25, vjust = 0), size = 5) +
annotate("text", x = 0.022, y = 0.035, label = paste0("θ = ", round(dist[2, 3], 2)), size = 6) +
annotate("text", x = 0.0125, y = 0.019, label = paste("θ = ", round(dist[3, 1], 2)), size = 6) +
annotate("text", x = 0.0125, y = 0.007, label = paste("θ = ", round(dist[1, 2], 2)), size = 6)
Which outputs:
The first part is easy using geom_segment. The second part I adapted from this post. It might not be exactly what you're looking for aesthetically, but it works.
df <- data.frame(points1, points2)
plt <- df %>% ggplot(aes(x = points1, y = points2)) +
geom_point() +
scale_x_continuous(limits = c(0.000, 0.08)) +
scale_y_continuous(limits = c(0.000, 0.08))
plt <- plt + geom_segment(aes(xend=points1, yend = points2), x = 0, y=0)
arc_fun <- function(pt1, pt2, n.angle = 100, rad = .03) {
a1 = atan2(pt1[2], pt1[1])
a2 = atan2(pt2[2], pt2[1])
n.angle = 100
angles <- seq(a1,a2,len=n.angle)
xc = 0
yc = 0
xs <- xc+rad*cos(angles)
ys <- yc+rad*sin(angles)
return(data.frame(x = xs, y = ys))
}
arc_df <- arc_fun(as.numeric(df[1,]), as.numeric(df[3,]))
arc_df2 <- arc_fun(as.numeric(df[2,]), as.numeric(df[3,]), rad = .02)
arc_df3 <- arc_fun(as.numeric(df[1,]), as.numeric(df[2,]), rad = .05)
plt + geom_line(data = arc_df, aes(x, y), color = 'red') +
geom_line(data = arc_df2, aes(x, y), color = 'blue') +
geom_line(data = arc_df3, aes(x, y), color = 'green')
There's a lot of options you can play around with, such as color and shape of lines and points.

Comparing 17 density plots in R [duplicate]

I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')
As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.
Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal
Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones

Resources