At Facebook research, I found these beautiful bar charts which are connected by lines to indicate rank changes:
https://research.fb.com/do-jobs-run-in-families/
I would like to create them using ggplot2. The bar-chart-part was easy:
library(ggplot2)
library(ggpubr)
state1 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(61,94,27,10,30,77),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(182,3), rep(117,3)))
state2 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(10,30,7,61,94,27),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(117,3), rep(182,3)))
fill <- c("#40b8d0", "#b2d183", "#F9756D")
p1 <- ggplot(data = state1) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) +
labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(), panel.background = element_blank()) +
coord_flip()
p2 <- ggplot(data = state2) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) + labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
scale_x_discrete(position = "top") +
scale_y_reverse() +
coord_flip()
p3 <- ggarrange(p1, p2, common.legend = TRUE, legend = "bottom")
But I couldn't come up with a solution to the line-part. When adding lines e.g. to the left side by
p3 + geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:10, each=3),
y = cumSum[order(cumSum)], yend=cumSum[order(cumSum)]+10), size = 1.2)
The problem is that the lines will not be able to cross over to the right side.
It looks like this:
Basically, I would like to connect the 'California' bar on the left with the Caifornia bar on the right.
To do that, I think, I have to get access to the superordinate level of the graph somehow. I've looked into viewports and was able to overlay the two bar charts with a chart made out of geom_segment but then I couldn't figure out the right layout for the lines:
subplot <- ggplot(data = state1) +
geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:2, each=3),
y = cumSum[order(cumSum)], yend =cumSum[order(cumSum)]+10),
size = 1.2)
vp <- viewport(width = 1, height = 1, x = 1, y = unit(0.7, "lines"),
just ="right", "bottom"))
print(p3)
print(subplot, vp = vp)
Help or pointers are greatly appreciated.
This is a really interesting problem. I approximated it using the patchwork library, which lets you add ggplots together and gives you an easy way to control their layout—I much prefer it to doing anything grid.arrange-based, and for some things it works better than cowplot.
I expanded on the dataset just to get some more values in the two data frames.
library(tidyverse)
library(patchwork)
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
Then I made a data frame that assigns ranks to each state based on other values in their original data frame (state1 or state2).
ranks <- bind_rows(
state1 %>% mutate(position = 1),
state2 %>% mutate(position = 2)
) %>%
group_by(position, state) %>%
summarise(state_total = sum(value)) %>%
mutate(rank = dense_rank(state_total)) %>%
ungroup()
I made a quick theme to keep things very minimal and drop axis marks:
theme_min <- function(...) theme_minimal(...) +
theme(panel.grid = element_blank(), legend.position = "none", axis.title = element_blank())
The bump chart (the middle one) is based on the ranks data frame, and has no labels. Using factors instead of numeric variables for position and rank gave me a little more control over spacing, and lets the ranks line up with discrete 1 through 5 values in a way that will match the state names in the bar charts.
p_ranks <- ggplot(ranks, aes(x = as.factor(position), y = as.factor(rank), group = state)) +
geom_path() +
scale_x_discrete(breaks = NULL, expand = expand_scale(add = 0.1)) +
scale_y_discrete(breaks = NULL) +
theme_min()
p_ranks
For the left bar chart, I sort the states by value and turn the values negative to point to the left, then give it the same minimal theme:
p_left <- state1 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
mutate(value = value * -1) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
p_left
The right bar chart is pretty much the same, except the values stay positive and I moved the x-axis to the top (becomes right when I flip the coordinates):
p_right <- state2 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_x_discrete(position = "top") +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
Then because I've loaded patchwork, I can add the plots together and specify the layout.
p_left + p_ranks + p_right +
plot_layout(nrow = 1)
You may want to adjust spacing and margins some more, such as with the expand_scale call with the bump chart. I haven't tried this with axis marks along the y-axes (i.e. bottoms after flipping), but I have a feeling things might get thrown out of whack if you don't add a dummy axis to the ranks. Plenty still to mess around with, but it's a cool visualization project you posed!
Here's a pure ggplot2 solution, which combines the underlying data frames into one & plots everything in a single plot:
Data manipulation:
library(dplyr)
bar.width <- 0.9
# combine the two data sources
df <- rbind(state1 %>% mutate(source = "state1"),
state2 %>% mutate(source = "state2")) %>%
# calculate each state's rank within each data source
group_by(source, state) %>%
mutate(state.sum = sum(value)) %>%
ungroup() %>%
group_by(source) %>%
mutate(source.rank = as.integer(factor(state.sum))) %>%
ungroup() %>%
# calculate the dimensions for each bar
group_by(source, state) %>%
arrange(type) %>%
mutate(xmin = lag(cumsum(value), default = 0),
xmax = cumsum(value),
ymin = source.rank - bar.width / 2,
ymax = source.rank + bar.width / 2) %>%
ungroup() %>%
# shift each data source's coordinates away from point of origin,
# in order to create space for plotting lines
mutate(x = ifelse(source == "state1", -max(xmax) / 2, max(xmax) / 2)) %>%
mutate(xmin = ifelse(source == "state1", x - xmin, x + xmin),
xmax = ifelse(source == "state1", x - xmax, x + xmax)) %>%
# calculate label position for each data source
group_by(source) %>%
mutate(label.x = max(abs(xmax))) %>%
ungroup() %>%
mutate(label.x = ifelse(source == "state1", -label.x, label.x),
hjust = ifelse(source == "state1", 1.1, -0.1))
Plot:
ggplot(df,
aes(x = x, y = source.rank,
xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax,
fill = type)) +
geom_rect() +
geom_line(aes(group = state)) +
geom_text(aes(x = label.x, label = state, hjust = hjust),
check_overlap = TRUE) +
# allow some space for the labels; this may be changed
# depending on plot dimensions
scale_x_continuous(expand = c(0.2, 0)) +
scale_fill_manual(values = fill) +
theme_void() +
theme(legend.position = "top")
Data source (same as #camille's):
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
I have a matrix with many zero elements. The column names are labeled on the horizontal axis. I'd like to show explictly the nonzero elements as the bias from the vertical line for each column.
So how should construct a figure such as the example using ggplot2?
An example data can be generated as follow:
set.seed(2018)
N <- 5
p <- 40
dat <- matrix(0.0, nrow=p, ncol=N)
dat[2:7, 1] <- 4*rnorm(6)
dat[4:12, 2] <- 2.6*rnorm(9)
dat[25:33, 3] <- 2.1*rnorm(9)
dat[19:26, 4] <- 3.3*rnorm(8)
dat[33:38, 5] <- 2.9*rnorm(6)
colnames(dat) <- letters[1:5]
print(dat)
Here is another option using facet_wrap and geom_col with theme_minimal.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
facet_wrap(~ key, ncol = ncol(dat)) +
coord_flip() +
theme_minimal()
To further increase the aesthetic similarity to the plot in your original post we can
move the facet strips to the bottom,
rotate strip labels,
add "zero lines" in matching colours,
remove the fill legend, and
get rid of the x & y axis ticks/labels/title.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
geom_hline(data = dat %>%
as.data.frame() %>%
gather(key, value) %>%
count(key) %>%
mutate(y = 0),
aes(yintercept = y, colour = key), show.legend = F) +
facet_wrap(~ key, ncol = ncol(dat), strip.position = "bottom") +
coord_flip() +
guides(fill = FALSE) +
theme_minimal() +
theme(
strip.text.x = element_text(angle = 45),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
It would be much easier if you can provide some sample data. Thus I needed to create them and there is no guarantee that this will work for your purpose.
set.seed(123)
# creating some random sample data
df <- data.frame(id = rep(1:100, each = 3),
x = rnorm(300),
group = rep(letters[1:3], each = 100),
bias = sample(0:1, 300, replace = T, prob = c(0.7, 0.3)))
# introducing bias
df$bias <- df$bias*rnorm(nrow(df))
# calculate lower/upper bias for errorbar
df$biaslow <- apply(data.frame(df$bias), 1, function(x){min(0, x)})
df$biasupp <- apply(data.frame(df$bias), 1, function(x){max(0, x)})
Then I used kind of hack to be able to print groups in sufficient distance to make them not overlapped. Based on group I shifted bias variable and also lower and upper bias.
# I want to print groups in sufficient distance
df$bias <- as.numeric(df$group)*5 + df$bias
df$biaslow <- as.numeric(df$group)*5 + df$biaslow
df$biasupp <- as.numeric(df$group)*5 + df$biasupp
And now it is possible to plot it:
library(ggplot2)
ggplot(df, aes(x = x, col = group)) +
geom_errorbar(aes(ymin = biaslow, ymax = biasupp), width = 0) +
coord_flip() +
geom_hline(aes(yintercept = 5, col = "a")) +
geom_hline(aes(yintercept = 10, col = "b")) +
geom_hline(aes(yintercept = 15, col = "c")) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(5, 10, 15), labels = letters[1:3])
EDIT:
To incorporate special design you can add
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
to your plot.
EDIT2:
To incorporate several horizontal lines, you can create different dataset:
df2 <- data.frame(int = unique(as.numeric(df$group)*5),
gr = levels(df$group))
And use
geom_hline(data = df2, aes(yintercept = int, col = gr))
instead of copy/pasting geom_hline for each group level.
I've drawed bar graph with negative and positive bars which is familiar to the research. However, my code seems extremely inconvenient and verbose usinggraphics::plot() and graphics::text() as showed below. Try as I may, I could find the solution using element_text to fulfill in ggplot2. Please help or try to give some ideas how to achieve this in ggplot2.Thanks in advance.
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
# bar graph
tiff(file="lefse.tiff",width=2000,height=2000,res=400)
par(mar=c(5,2,1,1))
barplot(df[,3],horiz=T,xlim=c(-6,6),xlab="LDA score (log 10)",
col=c(rep("forestgreen",length(which(df[,2]=="groupA"))),
rep("goldenrod",length(which(df[,2]=="groupB")))))
axis(1,at=seq(-6,6,by=1))
# add text
text(0.85,36.7,label=df[,1][31],cex=0.6);text(0.75,35.4,label=df[,1][30],cex=0.6)
text(0.75,34.1,label=df[,1][29],cex=0.6);text(0.85,33.0,label=df[,1][28],cex=0.6)
text(0.75,31.8,label=df[,1][27],cex=0.6);text(0.6,30.6,label=df[,1][26],cex=0.6)
text(0.8,29.5,label=df[,1][25],cex=0.6);text(0.85,28.3,label=df[,1][24],cex=0.6)
text(0.45,27.1,label=df[,1][23],cex=0.6);text(0.4,25.9,label=df[,1][22],cex=0.6)
text(0.55,24.7,label=df[,1][21],cex=0.6);text(0.55,23.5,label=df[,1][20],cex=0.6)
text(0.85,22.3,label=df[,1][19],cex=0.6);text(-0.75,21.1,label=df[,1][18],cex=0.6)
text(-1,19.9,label=df[,1][17],cex=0.6);text(-1,18.8,label=df[,1][16],cex=0.6)
text(-0.85,17.6,label=df[,1][15],cex=0.6);text(-0.85,16.3,label=df[,1][14],cex=0.6)
text(-0.7,15.1,label=df[,1][13],cex=0.6);text(-0.65,13.9,label=df[,1][12],cex=0.6)
text(-0.85,12.7,label=df[,1][11],cex=0.6);text(-1.05,11.5,label=df[,1][10],cex=0.6)
text(-0.85,10.3,label=df[,1][9],cex=0.6);text(-0.85,9.1,label=df[,1][8],cex=0.6)
text(-0.47,7.9,label=df[,1][7],cex=0.6);text(-0.85,6.7,label=df[,1][6],cex=0.6)
text(-0.49,5.5,label=df[,1][5],cex=0.6);text(-1.44,4.3,label=df[,1][4],cex=0.6)
text(-0.49,3.1,label=df[,1][3],cex=0.6);text(-0.93,1.9,label=df[,1][2],cex=0.6)
text(-0.69,0.7,label=df[,1][1],cex=0.6)
# add lines
segments(0,-1,0,40,lty=3,col="grey")
segments(2,-1,2,40,lty=3,col="grey")
segments(4,-1,4,40,lty=3,col="grey")
segments(6,-1,6,40,lty=3,col="grey")
segments(4,-1,4,40,lty=3,col="grey")
segments(-2,-1,-2,40,lty=3,col="grey")
segments(-4,-1,-4,40,lty=3,col="grey")
segments(-6,-1,-6,40,lty=3,col="grey")
legend("topleft",bty="n",cex=0.65,inset=c(0.01,-0.02),ncol=2,
legend=c("groupA","groupB"),
col=c("forestgreen", "goldenrod"),pch=c(15,15))
dev.off()
Here's a solution using dplyr to create some extra columns for the label position and the justification, and then theming the plot to match reasonably closely what you originally had:
library("dplyr")
library("ggplot2")
df <- df %>%
mutate(
genus = factor(genus, levels = genus[order(value, decreasing = TRUE)]),
label_y = ifelse(value < 0, 0.2, -0.2),
label_hjust = ifelse(value < 0, 0, 1)
)
my_plot <- ggplot(df, aes(x = genus, y = value, fill = class)) +
geom_bar(stat = "identity", col = "black") +
geom_text(aes(y = label_y, label = genus, hjust = label_hjust)) +
coord_flip() +
scale_fill_manual(values = c(groupA = "forestgreen", groupB = "goldenrod")) +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
legend.position = "top",
legend.justification = 0.05,
legend.title = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"),
panel.grid.minor.x = element_blank()) +
scale_y_continuous(expression(log[10](italic("LDA score"))),
breaks = -6:6, limits = c(-6, 6))
print(my_plot)
ggsave("lefse.tiff", width = 5, height = 5, dpi = 400, my_plot)
I would try this:
library(ggplot2)
# change the factor levels so it will be displayed in correct order
df$genus <- factor(df$genus, levels = as.character(df$genus))
ggplot(df, aes(x = genus, y = value)) +
geom_bar(aes(fill = class), stat = 'identity') + # color by class
coord_flip() + # horizontal bars
geom_text(aes(y = 0, label = genus, hjust = as.numeric(value > 0))) + # label text based on value
theme(axis.text.y = element_blank())
In the above, hjust will change the direction of the text relative to its y position (flipped to x now), which is similar to pos parameter in base R plot. So you code could also be simplified with a vector for pos argument to text function.
Two options:
library(ggplot2)
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
ggplot(df, aes(reorder(genus, -value), value, fill = class)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label = genus,
y = ifelse(value < 1, 1.5, -1.5)), size = 2.5) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
Or this:
library(ggplot2)
# my data
df <- data.frame(genus=c("Prevotella","Streptococcus","YRC22","Phascolarctobacterium","SMB53","Epulopiscium",
"CF231","Anaerovibrio","Paludibacter","Parabacteroides","Desulfovibrio","Sutterella",
"Roseburia","Others__0_5_","Akkermansia","Bifidobacterium","Campylobacter","Fibrobacter",
"Coprobacillus","Bulleidia","f_02d06","Dorea","Blautia","Enterococcus","Eubacterium",
"p_75_a5","Clostridium","Coprococcus","Oscillospira","Escherichia","Lactobacillus"),
class=c(rep("groupA",18),rep("groupB",13)),
value=c(4.497311,4.082377,3.578472,3.567310,3.410453,3.390026,
3.363542,3.354532,3.335634,3.284165,3.280838,3.218053,
3.071454,3.026663,3.021749,3.004152,2.917656,2.811455,
-2.997631,-3.074314,-3.117659,-3.151276,-3.170631,-3.194323,
-3.225207,-3.274281,-3.299712,-3.299875,-3.689051,-3.692055,
-4.733154)
)
ggplot(df, aes(reorder(genus, -value), value, fill = class)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("genus")