Plotting two way bar chart using ggplot2 - r

I have the following randomly created data:
t<- matrix(sample.int(100,size=20,replace=TRUE),nrow=12,ncol=20)
a = list()
b = list()
for (x in (1:20) ) b[[x]] <- paste0("X_", x)
for (x in (1:12) ) a[[x]] <- paste0("X", x)
row.names(t) <- rbind(a)
colnames(t) <- rbind(b)
t <- as.data.frame(t)
Here t is a hypothetical two way table of frequencies, I am trying to plot a graph like the one given here using ggplot2
I am not sure how can I make t in such a way that it can be used in ggplot2 code given in the link above. Also, I appreciate if you can provide suggestions on how to visualize a larger two way table, for instance, if dimension of t grows to something 30 x 50.

Here's one approach:
EDIT to show values underneath:
t %>%
rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(across(rowname:name, fct_inorder)) %>%
ggplot(aes(x = 1, y = value)) +
geom_col() +
geom_text(aes(x = 1, y = 0, label = value), vjust = 1.1, size = 2.5) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
facet_grid(rowname~name) +
coord_cartesian(clip = "off")

Here is a modification of Jon Springs code with some "layout" tweaking:
library(tidyverse)
df %>%
rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(across(rowname:name, fct_inorder)) %>%
ggplot(aes(x = 1, y = value, fill=value)) +
geom_col(width = 0.5) +
geom_text(aes(x = 1, y = 0, label = value), vjust = 1.1, size = 2.5) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
facet_grid(rowname~name, switch = "both") +
coord_cartesian(clip = "off") +
theme_void() +
theme(strip.background = element_blank(),
strip.text.y.left = element_text(angle = 0),
panel.spacing = unit(1, "lines"),
strip.placement = "outside",
strip.switch.pad.grid = unit(0.2, "in"))+
guides(fill="none")

Related

corrplot one column multiple groups single plot

I would like a corrplot, where I only have the first column. I got that solution from here:
Corrplot with only one variable on x axis
library(corrplot)
corrplot(cor(iris[,1:4])[1:4,1, drop=FALSE], cl.pos='n')
but I would like the first column to repeat WITHIN THE SAME PLOT by group:
I am not married to using corrplot but need some similar solution. Thanks for any and all help.
Here is a hacky way to do it without an additional package:
library(corrplot)
Dat<-cor(iris[,1:4])[1:4,1, drop=FALSE]
PlotDat<-cbind(Dat,Dat,Dat)
corrplot(PlotDat, cl.pos='n')
If you want complete control over your plot, you could do something like this using ggplot2
library(tidyverse)
as.data.frame(cor(iris[1:4])[,rep(1, 3)]) %>%
setNames(1:3) %>%
rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(rowname = factor(rowname, rev(unique(rowname)))) %>%
ggplot(aes(name, rowname)) +
geom_tile(fill = 'white', color = 'black') +
geom_point(aes(size = abs(value), color = value)) +
coord_equal() +
scale_x_discrete(NULL, labels = rep('Sepal.Length', 3), expand = c(0, 0),
position = 'top') +
scale_y_discrete(NULL, expand = c(0, 0)) +
scale_color_gradientn(colors = c('red3', 'red', 'white', '#4080ce', '#063062'),
limits = c(-1, 1), guide = 'none') +
scale_size_continuous(limits = c(0, 1), range = c(0, 20), guide = 'none') +
theme(axis.text.x = element_text(angle = 90),
plot.margin = margin(20, 20, 20, 20))

Bar charts connected by lines / How to connect two graphs arranged with grid.arrange in R / ggplot2

At Facebook research, I found these beautiful bar charts which are connected by lines to indicate rank changes:
https://research.fb.com/do-jobs-run-in-families/
I would like to create them using ggplot2. The bar-chart-part was easy:
library(ggplot2)
library(ggpubr)
state1 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(61,94,27,10,30,77),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(182,3), rep(117,3)))
state2 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(10,30,7,61,94,27),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(117,3), rep(182,3)))
fill <- c("#40b8d0", "#b2d183", "#F9756D")
p1 <- ggplot(data = state1) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) +
labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(), panel.background = element_blank()) +
coord_flip()
p2 <- ggplot(data = state2) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) + labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
scale_x_discrete(position = "top") +
scale_y_reverse() +
coord_flip()
p3 <- ggarrange(p1, p2, common.legend = TRUE, legend = "bottom")
But I couldn't come up with a solution to the line-part. When adding lines e.g. to the left side by
p3 + geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:10, each=3),
y = cumSum[order(cumSum)], yend=cumSum[order(cumSum)]+10), size = 1.2)
The problem is that the lines will not be able to cross over to the right side.
It looks like this:
Basically, I would like to connect the 'California' bar on the left with the Caifornia bar on the right.
To do that, I think, I have to get access to the superordinate level of the graph somehow. I've looked into viewports and was able to overlay the two bar charts with a chart made out of geom_segment but then I couldn't figure out the right layout for the lines:
subplot <- ggplot(data = state1) +
geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:2, each=3),
y = cumSum[order(cumSum)], yend =cumSum[order(cumSum)]+10),
size = 1.2)
vp <- viewport(width = 1, height = 1, x = 1, y = unit(0.7, "lines"),
just ="right", "bottom"))
print(p3)
print(subplot, vp = vp)
Help or pointers are greatly appreciated.
This is a really interesting problem. I approximated it using the patchwork library, which lets you add ggplots together and gives you an easy way to control their layout—I much prefer it to doing anything grid.arrange-based, and for some things it works better than cowplot.
I expanded on the dataset just to get some more values in the two data frames.
library(tidyverse)
library(patchwork)
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
Then I made a data frame that assigns ranks to each state based on other values in their original data frame (state1 or state2).
ranks <- bind_rows(
state1 %>% mutate(position = 1),
state2 %>% mutate(position = 2)
) %>%
group_by(position, state) %>%
summarise(state_total = sum(value)) %>%
mutate(rank = dense_rank(state_total)) %>%
ungroup()
I made a quick theme to keep things very minimal and drop axis marks:
theme_min <- function(...) theme_minimal(...) +
theme(panel.grid = element_blank(), legend.position = "none", axis.title = element_blank())
The bump chart (the middle one) is based on the ranks data frame, and has no labels. Using factors instead of numeric variables for position and rank gave me a little more control over spacing, and lets the ranks line up with discrete 1 through 5 values in a way that will match the state names in the bar charts.
p_ranks <- ggplot(ranks, aes(x = as.factor(position), y = as.factor(rank), group = state)) +
geom_path() +
scale_x_discrete(breaks = NULL, expand = expand_scale(add = 0.1)) +
scale_y_discrete(breaks = NULL) +
theme_min()
p_ranks
For the left bar chart, I sort the states by value and turn the values negative to point to the left, then give it the same minimal theme:
p_left <- state1 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
mutate(value = value * -1) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
p_left
The right bar chart is pretty much the same, except the values stay positive and I moved the x-axis to the top (becomes right when I flip the coordinates):
p_right <- state2 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_x_discrete(position = "top") +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
Then because I've loaded patchwork, I can add the plots together and specify the layout.
p_left + p_ranks + p_right +
plot_layout(nrow = 1)
You may want to adjust spacing and margins some more, such as with the expand_scale call with the bump chart. I haven't tried this with axis marks along the y-axes (i.e. bottoms after flipping), but I have a feeling things might get thrown out of whack if you don't add a dummy axis to the ranks. Plenty still to mess around with, but it's a cool visualization project you posed!
Here's a pure ggplot2 solution, which combines the underlying data frames into one & plots everything in a single plot:
Data manipulation:
library(dplyr)
bar.width <- 0.9
# combine the two data sources
df <- rbind(state1 %>% mutate(source = "state1"),
state2 %>% mutate(source = "state2")) %>%
# calculate each state's rank within each data source
group_by(source, state) %>%
mutate(state.sum = sum(value)) %>%
ungroup() %>%
group_by(source) %>%
mutate(source.rank = as.integer(factor(state.sum))) %>%
ungroup() %>%
# calculate the dimensions for each bar
group_by(source, state) %>%
arrange(type) %>%
mutate(xmin = lag(cumsum(value), default = 0),
xmax = cumsum(value),
ymin = source.rank - bar.width / 2,
ymax = source.rank + bar.width / 2) %>%
ungroup() %>%
# shift each data source's coordinates away from point of origin,
# in order to create space for plotting lines
mutate(x = ifelse(source == "state1", -max(xmax) / 2, max(xmax) / 2)) %>%
mutate(xmin = ifelse(source == "state1", x - xmin, x + xmin),
xmax = ifelse(source == "state1", x - xmax, x + xmax)) %>%
# calculate label position for each data source
group_by(source) %>%
mutate(label.x = max(abs(xmax))) %>%
ungroup() %>%
mutate(label.x = ifelse(source == "state1", -label.x, label.x),
hjust = ifelse(source == "state1", 1.1, -0.1))
Plot:
ggplot(df,
aes(x = x, y = source.rank,
xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax,
fill = type)) +
geom_rect() +
geom_line(aes(group = state)) +
geom_text(aes(x = label.x, label = state, hjust = hjust),
check_overlap = TRUE) +
# allow some space for the labels; this may be changed
# depending on plot dimensions
scale_x_continuous(expand = c(0.2, 0)) +
scale_fill_manual(values = fill) +
theme_void() +
theme(legend.position = "top")
Data source (same as #camille's):
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)

How to create such a figure using ggplot2 in R?

I have a matrix with many zero elements. The column names are labeled on the horizontal axis. I'd like to show explictly the nonzero elements as the bias from the vertical line for each column.
So how should construct a figure such as the example using ggplot2?
An example data can be generated as follow:
set.seed(2018)
N <- 5
p <- 40
dat <- matrix(0.0, nrow=p, ncol=N)
dat[2:7, 1] <- 4*rnorm(6)
dat[4:12, 2] <- 2.6*rnorm(9)
dat[25:33, 3] <- 2.1*rnorm(9)
dat[19:26, 4] <- 3.3*rnorm(8)
dat[33:38, 5] <- 2.9*rnorm(6)
colnames(dat) <- letters[1:5]
print(dat)
Here is another option using facet_wrap and geom_col with theme_minimal.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
facet_wrap(~ key, ncol = ncol(dat)) +
coord_flip() +
theme_minimal()
To further increase the aesthetic similarity to the plot in your original post we can
move the facet strips to the bottom,
rotate strip labels,
add "zero lines" in matching colours,
remove the fill legend, and
get rid of the x & y axis ticks/labels/title.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
geom_hline(data = dat %>%
as.data.frame() %>%
gather(key, value) %>%
count(key) %>%
mutate(y = 0),
aes(yintercept = y, colour = key), show.legend = F) +
facet_wrap(~ key, ncol = ncol(dat), strip.position = "bottom") +
coord_flip() +
guides(fill = FALSE) +
theme_minimal() +
theme(
strip.text.x = element_text(angle = 45),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
It would be much easier if you can provide some sample data. Thus I needed to create them and there is no guarantee that this will work for your purpose.
set.seed(123)
# creating some random sample data
df <- data.frame(id = rep(1:100, each = 3),
x = rnorm(300),
group = rep(letters[1:3], each = 100),
bias = sample(0:1, 300, replace = T, prob = c(0.7, 0.3)))
# introducing bias
df$bias <- df$bias*rnorm(nrow(df))
# calculate lower/upper bias for errorbar
df$biaslow <- apply(data.frame(df$bias), 1, function(x){min(0, x)})
df$biasupp <- apply(data.frame(df$bias), 1, function(x){max(0, x)})
Then I used kind of hack to be able to print groups in sufficient distance to make them not overlapped. Based on group I shifted bias variable and also lower and upper bias.
# I want to print groups in sufficient distance
df$bias <- as.numeric(df$group)*5 + df$bias
df$biaslow <- as.numeric(df$group)*5 + df$biaslow
df$biasupp <- as.numeric(df$group)*5 + df$biasupp
And now it is possible to plot it:
library(ggplot2)
ggplot(df, aes(x = x, col = group)) +
geom_errorbar(aes(ymin = biaslow, ymax = biasupp), width = 0) +
coord_flip() +
geom_hline(aes(yintercept = 5, col = "a")) +
geom_hline(aes(yintercept = 10, col = "b")) +
geom_hline(aes(yintercept = 15, col = "c")) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(5, 10, 15), labels = letters[1:3])
EDIT:
To incorporate special design you can add
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
to your plot.
EDIT2:
To incorporate several horizontal lines, you can create different dataset:
df2 <- data.frame(int = unique(as.numeric(df$group)*5),
gr = levels(df$group))
And use
geom_hline(data = df2, aes(yintercept = int, col = gr))
instead of copy/pasting geom_hline for each group level.

heatmap in ggplot, different color for each group

I am trying to produce a heatmap in ggplot. I want each group to have different color gradient, but don't know how to do that. My current code looks like this:
## dummy data -----------------------------------------------------------------------------
data <- data.frame(
group = sample(c("Direct Patient Care", "Indirect Patient Care", "Education", "Rounds", "Handoff", "Misce"), 30, replace = T),
pct = rnorm(30, mean = 50, sd = 8)
)
## generate group id
data <- data %>%
group_by(group) %>%
mutate(id = row_number())
data$grpid <- with(data, ifelse(group == "Direct Patient Care", 1, ifelse(group == "Indirect Patient Care", 2,
ifelse(group == "Education", 3,
ifelse(group == "Rounds", 4,
ifelse(group == "Handoff", 5,6 ))))))
## draw graph ------------------------------------------------------------------------------
library(ggplot2)
p <- ggplot(data, aes(x=id, y=group, fill = pct)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50"), aspect.ratio = 0.4) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)+
# guides(fill = guide_legend("Time, %")) +
geom_tile() +
scale_x_continuous (name = " ", breaks = seq(1, 8, by = 1)) +
scale_y_discrete(name = " ") +
theme(axis.text.x = element_text(angle = 0,hjust = 1,vjust = 1), plot.title = element_text(hjust = 0.5) ) +
ggtitle("Heatmap of time spent doing activities across 194 shifts")
p + scale_fill_gradient2(low = "white", high = "red", limits = c(0, 80), breaks = c(0, 10, 20, 30, 40, 50, 60, 70), guide = guide_legend("Time, %")) ## change the color theme ##
And the resulting figure looks like this:
How can I change the color theme for each group, like red for 'Rounds', blue for 'Misce', green for 'Handoff' etc...
Many thanks!
You can do this by creating your own rescaled value in your data and then slightly "hacking" the alpha aesthetic combined with the fill aesthetic:
library(tidyverse)
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1))
First we create a new column called rescale which rescales the pct from 0 to 1 then you force the scale_alpha(range = c(0, 1)) [note, in this case I used c(0.1, 1) so that you can still "see" the zero points.
Finally, you probably want to remove the guides:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none")
N.B. by using aes(x = factor(id)... you can get around manually setting your x-axis since in this case it appears you want to treat it as a factor not a numeric scale.
Finally, if you really want to get fancy, you could double-encode the axis.text.y colors to that of the levels of your factor (i.e., data$group) variable:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none",
axis.text.y = element_text(color = scales::hue_pal()(length(levels(data$group)))),
axis.ticks = element_blank()) +
labs(x = "", y = "")

stacked bar *bringing labels to the graph *

I'm plotting a stacked bar graph and use geom_text to insert the value and name of each stack. The problem is some stacks are very small/narrow, so that the text of two stacks overlap each other and hence is not very readable. How can I modify the code to solve this issue.
Type<-c("ddddddddddd","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd","eeeeeeeeeeeeee","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd",
"eeeeeeeeeeeeee","mmmmmmmmmmmmmmmmmmm","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","eeeeeeeeeeeeee")
Category<-c("mmmmm","mmmmm","gggggggggggggggggg","ffffffffffff","ffffffffffff","ffffffffffff","sanddddddddd","sanddddddddd","yyyyyyyyyyy",
"yyyyyyyyyyy","yyyyyyyyyyy","sssssssssssssss","sssssssssssssss","sssssssssssssss","ttttttttttttt")
Frequency<-c(4,1,30,7,127,11,1,1,6,9,1,200,3,4,5)
Data <- data.frame(Type, Category, Frequency)
p <- ggplot(Data, aes(x = Type, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity", show.legend = FALSE) +
geom_text(aes(label = Frequency), size = 3) +
geom_text(aes(label = Category), size = 3)
Considering your data, a facetted plot might be a better approach:
# summarise your data
library(dplyr)
d1 <- Data %>%
mutate_each(funs(substr(.,1,2)),Type,Category) %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq)) # create a label by pasting the 'Category' and the 'Freq' variables together
# plot
ggplot(d1, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(label = lbl), angle = 90, size = 5, hjust = -0.1, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14)
which gives:
In the above plot I shortened the labels on purpose. If you don't want to do that, you could consider this:
d2 <- Data %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq))
ggplot(d2, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(y = 5, label = lbl), alpha = 0.6, angle = 90, size = 5, hjust = 0, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
which gives:

Resources