heatmap in ggplot, different color for each group - r

I am trying to produce a heatmap in ggplot. I want each group to have different color gradient, but don't know how to do that. My current code looks like this:
## dummy data -----------------------------------------------------------------------------
data <- data.frame(
group = sample(c("Direct Patient Care", "Indirect Patient Care", "Education", "Rounds", "Handoff", "Misce"), 30, replace = T),
pct = rnorm(30, mean = 50, sd = 8)
)
## generate group id
data <- data %>%
group_by(group) %>%
mutate(id = row_number())
data$grpid <- with(data, ifelse(group == "Direct Patient Care", 1, ifelse(group == "Indirect Patient Care", 2,
ifelse(group == "Education", 3,
ifelse(group == "Rounds", 4,
ifelse(group == "Handoff", 5,6 ))))))
## draw graph ------------------------------------------------------------------------------
library(ggplot2)
p <- ggplot(data, aes(x=id, y=group, fill = pct)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50"), aspect.ratio = 0.4) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)+
# guides(fill = guide_legend("Time, %")) +
geom_tile() +
scale_x_continuous (name = " ", breaks = seq(1, 8, by = 1)) +
scale_y_discrete(name = " ") +
theme(axis.text.x = element_text(angle = 0,hjust = 1,vjust = 1), plot.title = element_text(hjust = 0.5) ) +
ggtitle("Heatmap of time spent doing activities across 194 shifts")
p + scale_fill_gradient2(low = "white", high = "red", limits = c(0, 80), breaks = c(0, 10, 20, 30, 40, 50, 60, 70), guide = guide_legend("Time, %")) ## change the color theme ##
And the resulting figure looks like this:
How can I change the color theme for each group, like red for 'Rounds', blue for 'Misce', green for 'Handoff' etc...
Many thanks!

You can do this by creating your own rescaled value in your data and then slightly "hacking" the alpha aesthetic combined with the fill aesthetic:
library(tidyverse)
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1))
First we create a new column called rescale which rescales the pct from 0 to 1 then you force the scale_alpha(range = c(0, 1)) [note, in this case I used c(0.1, 1) so that you can still "see" the zero points.
Finally, you probably want to remove the guides:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none")
N.B. by using aes(x = factor(id)... you can get around manually setting your x-axis since in this case it appears you want to treat it as a factor not a numeric scale.
Finally, if you really want to get fancy, you could double-encode the axis.text.y colors to that of the levels of your factor (i.e., data$group) variable:
data %>%
group_by(group) %>%
mutate(rescale = scales::rescale(pct)) %>%
ggplot(., aes(x = factor(id), y = group)) +
geom_tile(aes(alpha = rescale, fill = group), color = "white") +
scale_alpha(range = c(0.1, 1)) +
theme(legend.position = "none",
axis.text.y = element_text(color = scales::hue_pal()(length(levels(data$group)))),
axis.ticks = element_blank()) +
labs(x = "", y = "")

Related

Plotting lines in ggplot2

Is it possible to make this graph with ggplot, ordering the graph by the variable "t" in ascending order, distinguishing each "t" according to status (black or white circle, it can be another marker...) and if possible the variable "id " on the ordinate axis.
Id<- c(1,2,3,4)
t<- c(10,5,20,15)
status<- c(0,1,0,1)
df<- data.frame(Id, t, status)
Maybe you want something like this using geom_segment for the lines with geom_vline for the vertical lines. Using shape and fill aesthetics to fill the points with "black" and "white" per status. You can use the following code:
Id<- c(1,2,3,4)
t<- c(10,5,20,15)
status<- c(0,1,0,1)
df<- data.frame(Id, t, status)
library(ggplot2)
library(dplyr)
library(forcats)
df %>%
mutate(Id = as.factor(Id),
status = as.factor(status)) %>%
ggplot(aes(x = t, y = fct_reorder(Id, t, .desc = TRUE), shape = status, fill = status)) +
geom_point() +
geom_segment(aes(x = 0, xend = t, y = Id, yend = Id)) +
geom_vline(xintercept=c(t),linetype="dotted", alpha = 0.4) +
scale_shape_manual(values=c(21, 21), name = "shapes!") +
scale_fill_manual(values=c("black", "white")) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 25)) +
labs(x = "", y = "Id") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none",
axis.title.y = element_text(angle=0))
Created on 2022-07-25 by the reprex package (v2.0.1)

R: How specify bars color in barplot?

My dataframe has 2 numerical and categorical columns. Negative Bars are filled with white but with with the same coloured borders as corresponding positive bars.
Now I need to have bars with same colour and B category in different colour
df <- data.frame(model = c("A","B","C","D","B","C"),
category = c("origin", "origin","origin","abroad","abroad","abroad"),
pos = c(40,50,45,100,105,80),
neg = c(-10,-5,-4,-16,-7,-2),
Colour = c("chocolate","deeppink4","yellow","steelblue3","deeppink4","yellow"))
Colour <- as.character(df$Colour)
Colour <- c(Colour,"white")
names(Colour) <- c(as.character(df$model),"white")
df <- df %>% pivot_longer(., cols=c('pos','neg'),
names_to = 'sign') %>%
mutate(Groups = paste(category, model),
sign = factor(sign, levels = c("neg", "pos")))
ggplot() +
# plot positive with fill and colour based on model
geom_col(aes(value, tidytext::reorder_within(model, value, category),
fill = model, color = model),
data = df[df$sign == "pos", ],
position = "stack") +
# plot negative with colour from based on model, but fill fixed as "white"
geom_col(aes(value, tidytext::reorder_within(model, value, category),
color = model),
data = df[df$sign == "neg", ],
fill = "white",
position = "stack") +
# the rest is same as OP's code
tidytext::scale_y_reordered() +
labs(fill = "model") +
facet_grid(category ~ ., switch = "y",scales = "free_y") +
theme(axis.text.x = element_text(angle = 90),
strip.background = element_rect(fill = "white"),
strip.placement = "outside",
strip.text.y.left = element_text(angle = 0),
panel.spacing = unit(0, "lines")) +
theme(legend.position="none") +
labs( title = "BarPlot",
subtitle = "changes",
y = " ")
Original output
Expected output:
I've added mod_col to the df so B's green and everything else purple. Then used mod_col for the fill and colour in the geom_s. And added scale_*_identity to use those colours.
library(tidyverse)
df <- data.frame(model = c("A","B","C","D","B","C"),
category = c("origin", "origin","origin","abroad","abroad","abroad"),
pos = c(40,50,45,100,105,80),
neg = c(-10,-5,-4,-16,-7,-2),
Colour = c("chocolate","deeppink4","yellow","steelblue3","deeppink4","yellow")
)
# Colour <- as.character(df$Colour)
# Colour <- c(Colour,"white")
# names(Colour) <- c(as.character(df$model),"white")
df <- df %>% pivot_longer(., cols=c('pos','neg'),
names_to = 'sign') %>%
mutate(Groups = paste(category, model),
sign = factor(sign, levels = c("neg", "pos")),
mod_col = if_else(model == "B", "green", "purple"))
ggplot() +
# plot positive with fill and colour based on model
geom_col(aes(value, tidytext::reorder_within(model, value, category),
fill = mod_col, color = mod_col),
data = df[df$sign == "pos", ],
position = "stack") +
# plot negative with colour from based on model, but fill fixed as "white"
geom_col(aes(value, tidytext::reorder_within(model, value, category),
color = mod_col),
data = df[df$sign == "neg", ],
fill = "white",
position = "stack") +
# the rest is same as OP's code
scale_fill_identity() +
scale_colour_identity() +
tidytext::scale_y_reordered() +
labs(fill = "model") +
facet_grid(category ~ ., switch = "y",scales = "free_y") +
theme(axis.text.x = element_text(angle = 90),
strip.background = element_rect(fill = "white"),
strip.placement = "outside",
strip.text.y.left = element_text(angle = 0),
panel.spacing = unit(0, "lines")) +
theme(legend.position="none") +
labs( title = "BarPlot",
subtitle = "changes",
y = " ")
Created on 2022-06-02 by the reprex package (v2.0.1)

Plotting two way bar chart using ggplot2

I have the following randomly created data:
t<- matrix(sample.int(100,size=20,replace=TRUE),nrow=12,ncol=20)
a = list()
b = list()
for (x in (1:20) ) b[[x]] <- paste0("X_", x)
for (x in (1:12) ) a[[x]] <- paste0("X", x)
row.names(t) <- rbind(a)
colnames(t) <- rbind(b)
t <- as.data.frame(t)
Here t is a hypothetical two way table of frequencies, I am trying to plot a graph like the one given here using ggplot2
I am not sure how can I make t in such a way that it can be used in ggplot2 code given in the link above. Also, I appreciate if you can provide suggestions on how to visualize a larger two way table, for instance, if dimension of t grows to something 30 x 50.
Here's one approach:
EDIT to show values underneath:
t %>%
rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(across(rowname:name, fct_inorder)) %>%
ggplot(aes(x = 1, y = value)) +
geom_col() +
geom_text(aes(x = 1, y = 0, label = value), vjust = 1.1, size = 2.5) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
facet_grid(rowname~name) +
coord_cartesian(clip = "off")
Here is a modification of Jon Springs code with some "layout" tweaking:
library(tidyverse)
df %>%
rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(across(rowname:name, fct_inorder)) %>%
ggplot(aes(x = 1, y = value, fill=value)) +
geom_col(width = 0.5) +
geom_text(aes(x = 1, y = 0, label = value), vjust = 1.1, size = 2.5) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
facet_grid(rowname~name, switch = "both") +
coord_cartesian(clip = "off") +
theme_void() +
theme(strip.background = element_blank(),
strip.text.y.left = element_text(angle = 0),
panel.spacing = unit(1, "lines"),
strip.placement = "outside",
strip.switch.pad.grid = unit(0.2, "in"))+
guides(fill="none")

Bar charts connected by lines / How to connect two graphs arranged with grid.arrange in R / ggplot2

At Facebook research, I found these beautiful bar charts which are connected by lines to indicate rank changes:
https://research.fb.com/do-jobs-run-in-families/
I would like to create them using ggplot2. The bar-chart-part was easy:
library(ggplot2)
library(ggpubr)
state1 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(61,94,27,10,30,77),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(182,3), rep(117,3)))
state2 <- data.frame(state=c(rep("ALABAMA",3), rep("CALIFORNIA",3)),
value=c(10,30,7,61,94,27),
type=rep(c("state","local","fed"),2),
cumSum=c(rep(117,3), rep(182,3)))
fill <- c("#40b8d0", "#b2d183", "#F9756D")
p1 <- ggplot(data = state1) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) +
labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(), panel.background = element_blank()) +
coord_flip()
p2 <- ggplot(data = state2) +
geom_bar(aes(x = reorder(state, value), y = value, fill = type), stat="identity") +
theme_bw() +
scale_fill_manual(values=fill) + labs(x="", y="Total budget in 1M$") +
theme(legend.position="none",
legend.direction="horizontal",
legend.title = element_blank(),
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
scale_x_discrete(position = "top") +
scale_y_reverse() +
coord_flip()
p3 <- ggarrange(p1, p2, common.legend = TRUE, legend = "bottom")
But I couldn't come up with a solution to the line-part. When adding lines e.g. to the left side by
p3 + geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:10, each=3),
y = cumSum[order(cumSum)], yend=cumSum[order(cumSum)]+10), size = 1.2)
The problem is that the lines will not be able to cross over to the right side.
It looks like this:
Basically, I would like to connect the 'California' bar on the left with the Caifornia bar on the right.
To do that, I think, I have to get access to the superordinate level of the graph somehow. I've looked into viewports and was able to overlay the two bar charts with a chart made out of geom_segment but then I couldn't figure out the right layout for the lines:
subplot <- ggplot(data = state1) +
geom_segment(aes(x = rep(1:2, each=3), xend = rep(1:2, each=3),
y = cumSum[order(cumSum)], yend =cumSum[order(cumSum)]+10),
size = 1.2)
vp <- viewport(width = 1, height = 1, x = 1, y = unit(0.7, "lines"),
just ="right", "bottom"))
print(p3)
print(subplot, vp = vp)
Help or pointers are greatly appreciated.
This is a really interesting problem. I approximated it using the patchwork library, which lets you add ggplots together and gives you an easy way to control their layout—I much prefer it to doing anything grid.arrange-based, and for some things it works better than cowplot.
I expanded on the dataset just to get some more values in the two data frames.
library(tidyverse)
library(patchwork)
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
Then I made a data frame that assigns ranks to each state based on other values in their original data frame (state1 or state2).
ranks <- bind_rows(
state1 %>% mutate(position = 1),
state2 %>% mutate(position = 2)
) %>%
group_by(position, state) %>%
summarise(state_total = sum(value)) %>%
mutate(rank = dense_rank(state_total)) %>%
ungroup()
I made a quick theme to keep things very minimal and drop axis marks:
theme_min <- function(...) theme_minimal(...) +
theme(panel.grid = element_blank(), legend.position = "none", axis.title = element_blank())
The bump chart (the middle one) is based on the ranks data frame, and has no labels. Using factors instead of numeric variables for position and rank gave me a little more control over spacing, and lets the ranks line up with discrete 1 through 5 values in a way that will match the state names in the bar charts.
p_ranks <- ggplot(ranks, aes(x = as.factor(position), y = as.factor(rank), group = state)) +
geom_path() +
scale_x_discrete(breaks = NULL, expand = expand_scale(add = 0.1)) +
scale_y_discrete(breaks = NULL) +
theme_min()
p_ranks
For the left bar chart, I sort the states by value and turn the values negative to point to the left, then give it the same minimal theme:
p_left <- state1 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
mutate(value = value * -1) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
p_left
The right bar chart is pretty much the same, except the values stay positive and I moved the x-axis to the top (becomes right when I flip the coordinates):
p_right <- state2 %>%
mutate(state = as.factor(state) %>% fct_reorder(value, sum)) %>%
arrange(state) %>%
ggplot(aes(x = state, y = value, fill = type)) +
geom_col(position = "stack") +
coord_flip() +
scale_x_discrete(position = "top") +
scale_y_continuous(breaks = NULL) +
theme_min() +
scale_fill_brewer()
Then because I've loaded patchwork, I can add the plots together and specify the layout.
p_left + p_ranks + p_right +
plot_layout(nrow = 1)
You may want to adjust spacing and margins some more, such as with the expand_scale call with the bump chart. I haven't tried this with axis marks along the y-axes (i.e. bottoms after flipping), but I have a feeling things might get thrown out of whack if you don't add a dummy axis to the ranks. Plenty still to mess around with, but it's a cool visualization project you posed!
Here's a pure ggplot2 solution, which combines the underlying data frames into one & plots everything in a single plot:
Data manipulation:
library(dplyr)
bar.width <- 0.9
# combine the two data sources
df <- rbind(state1 %>% mutate(source = "state1"),
state2 %>% mutate(source = "state2")) %>%
# calculate each state's rank within each data source
group_by(source, state) %>%
mutate(state.sum = sum(value)) %>%
ungroup() %>%
group_by(source) %>%
mutate(source.rank = as.integer(factor(state.sum))) %>%
ungroup() %>%
# calculate the dimensions for each bar
group_by(source, state) %>%
arrange(type) %>%
mutate(xmin = lag(cumsum(value), default = 0),
xmax = cumsum(value),
ymin = source.rank - bar.width / 2,
ymax = source.rank + bar.width / 2) %>%
ungroup() %>%
# shift each data source's coordinates away from point of origin,
# in order to create space for plotting lines
mutate(x = ifelse(source == "state1", -max(xmax) / 2, max(xmax) / 2)) %>%
mutate(xmin = ifelse(source == "state1", x - xmin, x + xmin),
xmax = ifelse(source == "state1", x - xmax, x + xmax)) %>%
# calculate label position for each data source
group_by(source) %>%
mutate(label.x = max(abs(xmax))) %>%
ungroup() %>%
mutate(label.x = ifelse(source == "state1", -label.x, label.x),
hjust = ifelse(source == "state1", 1.1, -0.1))
Plot:
ggplot(df,
aes(x = x, y = source.rank,
xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax,
fill = type)) +
geom_rect() +
geom_line(aes(group = state)) +
geom_text(aes(x = label.x, label = state, hjust = hjust),
check_overlap = TRUE) +
# allow some space for the labels; this may be changed
# depending on plot dimensions
scale_x_continuous(expand = c(0.2, 0)) +
scale_fill_manual(values = fill) +
theme_void() +
theme(legend.position = "top")
Data source (same as #camille's):
set.seed(1017)
state1 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)
state2 <- data_frame(
state = rep(state.name[1:5], each = 3),
value = floor(runif(15, 1, 100)),
type = rep(c("state", "local", "fed"), times = 5)
)

ggplot insert line between factor levels

I am making a tilemap in ggplot as below. 2 questions:
1) How can I expand the x-axis limits to label my groups at x = 4?
2) How can I put horizontal lines between Groups (i.e. a line between 1 and 2, 2 and 3, etc.) automatically, not specifying y-value manually?
require(tidyverse)
set.seed(1)
df <- data.frame(ID = as.character(c(1:50)),
Group = sample(1:8, 50, replace = T),
var1 = sample(c('Y', 'N'), 50, replace = T),
var2 = sample(c('Y', 'N'), 50, replace = T),
var3 = sample(c('Y', 'N'), 50, replace = T)) %>%
gather('var', 'y_n', var1:var3) %>%
arrange(-Group) %>%
mutate(ID = factor(ID, levels = unique(ID, ordered = T)))
ggplot(df, aes(var, ID, label = Group))+
geom_tile(aes(fill = y_n), color = 'white')+
scale_fill_manual(values = c('white', 'lightblue'))+
scale_x_discrete(expand = c(0, 0))+
geom_text(x = 3.5, hjust = 'right')
Using facets solves both your problems: if you facet by Group you can edit the facet panel to specify a black border around each group, and it will automatically label each group outside the plot area.
ggplot(df, aes(var, ID, label = Group)) +
geom_tile(aes(fill = y_n), color = 'white') +
scale_fill_manual(values = c('white', 'lightblue')) +
scale_x_discrete(expand = c(0, 0)) +
facet_grid(Group~., scales = "free", space = "free") + #facet by group
theme(strip.background = element_blank(), #remove background for facet labels
panel.border = element_rect(colour = "black", fill = NA), #add black border
panel.spacing = unit(0, "lines")) #remove space between facets
Within facet_grid(), It's important to add scales = "free" so each facet has only the y values present for that group, and add space = free so the size of each group is adjusted based on how many y-values it has.
You can try a geom_hline approach. Added also vertical lines for demonstrantion purposes.
# calculate the group numbers
gr <- df %>%
group_by(Group) %>%
summarise(n=length(unique(ID))) %>%
arrange(-Group) %>%
mutate(nn=cumsum(n))
ggplot(df, aes(var, ID, label = Group))+
geom_tile(aes(fill = y_n), color = 'white')+
scale_fill_manual(values = c('white', 'lightblue'))+
scale_x_discrete(expand = c(0, 0))+
geom_text(x = 3.5, hjust = 'right') +
geom_vline(xintercept = 1:length(unique(df$var))+0.5)+
geom_hline(yintercept = gr$nn+0.5)

Resources