Plot missing legend when multiple plotting - r

I am trying to make multiple plots of my data whilst colour coding them based on a grouping. I want to include legends on each plot that go outside the scope of the box. This works however, my first plot has no legend but the rest of my plots do.
For example:
library(faraway)
library(tidyverse)
library(glue)
data(savings)
group_data <- mapply(function(x, y) {
savings %>% mutate(test = ifelse(.[, y] > x, "Group 1 (GT)", "Group 2 (LT)"))
}, val, names(val), SIMPLIFY = FALSE) %>%
mapply(function(a,z) {
a %>% `colnames<-`(c(names(.)[-length(.)], glue("{z}_group")))
}, ., names(.), SIMPLIFY = FALSE) %>%
Reduce(cbind, .) %>%
.[, !duplicated(names(.))]
nn <- length(val)
ng <- names(group_data)[(length(group_data)-nn+1):length(group_data)]
n2 <- n2mfrow(nn, 2)
par(mfrow=n2, xpd=TRUE)
mapply(function(q, w){
form <- reformulate(q, response='sr')
plot(form, data=group_data, col=c('red', 'blue')[as.factor(group_data[,w])], pch=c(19, 19))
legend( x=0, 26,
legend=c("Group 1 (GT)","Group 2 (LT)"),
col=c("red","blue"), lwd=1, lty=c(0,0),
pch=c(19,19), bty='n' )
},names(val),ng, SIMPLIFY=FALSE)
Plots the following:
The data val:
list(pop15 = 35, pop75 = 2.5, dpi = 2000, ddpi = 7)
With response to the comments by #Harre, the following manipulates x for the missing legend:
if(q == 'pop15'){
legend( x=21, 26,
legend=c("Group 1 (GT)","Group 2 (GT)"),
col=c("red","blue"), lwd=1, lty=c(0,0),
pch=c(19,19), bty='n' )} else{
legend( x=0, 26,
legend=c("Group 1 (GT)","Group 2 (LT)"),
col=c("red","blue"), lwd=1, lty=c(0,0),
pch=c(19,19), bty='n' )
}
And I see all 4 legends now.
Unfortunately, If I add more columns likeso:
savings$status <- savings$pop15+1
val <- c(val, status=list(37))
Then repeat the code I get the following:
With #Harre's answer I got led to the right solution:
group_data <- mapply(function(x, y) {
savings %>% mutate(group = ifelse(.[, y] > x, "Group 1 (GT)", "Group 2 (LT)"))
}, val, names(val), SIMPLIFY = FALSE) %>%
mapply(function(a,z) {
a %>% `colnames<-`(c(names(.)[-length(.)], glue("{z}_group")))
}, ., names(.), SIMPLIFY = FALSE) %>%
Reduce(cbind, .) %>%
.[, !duplicated(names(.))] %>% pivot_longer(-c(1:(length(.)-nn))) %>% dplyr::select(group=value) %>% cbind.data.frame(savings %>% pivot_longer(-c(1)), .)
val_hline <- val %>% unlist() %>% data.frame(hline=.) %>% rownames_to_column() %>% `colnames<-`(c('name', 'hline'))
kop <- inner_join(group_data, val_hline, by='name')
kop %>% ggplot(aes(x = value, y = sr, color = group)) +
geom_point() +
facet_wrap(name ~ ., scales = "free") + theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA),
legend.position = "bottom") +
stat_smooth(method='lm') +
geom_vline(aes(xintercept=hline))

A suggested ggplot-solution, in the case you want to explore:
savings |>
pivot_longer(-sr) |>
# I have collected your val's here for illustration; feel free to use the lists
mutate(group = case_when(name == "pop15" & value > 35 ~ "Group 1 (GT)",
name == "pop75" & value > 2.5 ~ "Group 1 (GT)",
name == "dpi" & value > 2000 ~ "Group 1 (GT)",
name == "ddpi" & value > 7 ~ "Group 1 (GT)",
TRUE ~ "Group 2 (GT)")) |>
ggplot(aes(x = value, y = sr, color = group)) +
geom_point() +
facet_wrap(name ~ ., scales = "free") +
theme(legend.position = "bottom")

Related

How to visualize similar resistance pattern in a plot using R

I have a large dataset in which I want to group similar resistance patterns together. A plot to visualize similarity of resistance pattern is needed.
dat <- read.table(text="Id Resistance.Pattern
A SSRRSSSSR
B SSSRSSSSR
C RRRRSSRRR
D SSSSSSSSS
E SSRSSSSSR
F SSSRRSSRR
G SSSSR
H SSSSSSRRR
I RRSSRRRSS", header=TRUE)
I would separate out the values into a wider dataframe and then make a heatmap and dendrogram to compare sillimanites in patterns:
library(tidyverse)
library(ggdendro)
recode_dat <- dat |>
mutate(pat = str_split(Resistance.Pattern, "")) |>
unnest_wider(pat, names_sep = "_") |>
select(starts_with("pat_")) |>
mutate(across(everything(), ~case_when(. == "S" ~ 1, . == "R" ~ 2, is.na(.) ~0)))
rownames(recode_dat) <- dat$Id
dendro <- as.dendrogram(hclust(d = dist(x = scale(recode_dat))))
dendro_plot <- ggdendrogram(data = dendro, rotate = TRUE)
heatmap_plot <- dat |>
mutate(pat = str_split(Resistance.Pattern, "")) |>
unnest_wider(pat, names_sep = "_") |>
pivot_longer(cols = starts_with("pat_"), names_to = "pattern_position") |>
mutate(Id = factor(Id, levels = dat$Id[order.dendrogram(dendro)])) |>
ggplot(aes(pattern_position, Id))+
geom_tile(aes(fill = value))+
scale_x_discrete(labels = \(x) sub(".*_(\\d+$)", "\\1", x))+
theme(legend.position = "top")
cowplot::plot_grid(heatmap_plot, dendro_plot,nrow = 1, align = "h", axis = "tb")
It sounds as though the second column of your data frame represents sensitivity (S) and resistance (R), presumably to antibiotics (though this is not clear in your question). That being the case, you are presumably looking for something like this:
library(tidyverse)
p <- strsplit(dat$Resistance.Pattern, "")
do.call(rbind, lapply(p, \(x) c(x, rep(NA, max(lengths(p)) - length(x))))) %>%
as.data.frame() %>%
cbind(Id = dat$Id) %>%
mutate(Id = factor(Id, rev(Id))) %>%
pivot_longer(V1:V9) %>%
ggplot(aes(name, Id, fill = value)) +
geom_tile(col = "white", size = 2) +
coord_equal() +
scale_fill_manual(values = c("#e02430", "#d8d848"),
labels = c("Resistant", "Sensitive"),
na.value = "gray95") +
scale_x_discrete(name = "Antibiotic", position = "top",
labels = 1:9) +
labs(fill = "Resistance", y = "ID") +
theme_minimal(base_size = 20) +
theme(text = element_text(color = "gray30"))
I'd separate the entries by character, convert the binary data to numeric and plot the matrix as a heatmap and show the character string as rownames.
Whether to use a row and/or column clustering depends on whats desired.
library(dplyr)
library(tidyr) # for unnest_wider
library(gplots) # for heatmap.2
mm <-
dat %>%
group_by(Resistance.Pattern) %>%
summarize(Id, Resistance.Pattern) %>%
mutate(binary = strsplit(Resistance.Pattern, "")) %>%
unnest_wider(binary, names_sep="") %>%
mutate(across(starts_with("binary"), ~ as.numeric(c(R = 1, S = 0)[.x])))
mm2 <- as.matrix(mm[, -c(1,2)]) |> unname() # the numeric part
rownames(mm2) <- apply(as.matrix(mm[,1:2]), 1, paste, collapse=" ")
heatmap.2(mm2, trace="none", Colv="none", dendrogram="row",
col=c("green", "darkgreen"), margins=c(10,10))

Write a function to plot original value, mom and yoy change for time series data in 3 subplots [duplicate]

Given two monthly time series data sample from this link.
I will need to create one plot containing 3 subplots: plot1 for the original values, plot2 for month over month changes, and plot3 for year over year changes.
I'm able to draw the plot with code below, but the code is too redundant. So my question is how could achieve that in a concise way? Thanks.
library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)
df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df
cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>%
mutate(across(-contains('date'), as.numeric)) %>%
mutate(date= floor_date(date, 'month')) %>%
group_by(date) %>%
summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>%
filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
as.data.frame()
df1 <- df %>%
select(!grep('mom|yoy', names(df)))
df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: $'
)
# MoM changes
df2 <- df %>%
select(grep('date|mom', names(df)))
df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
# YoY changes
df3 <- df %>%
select(grep('date|yoy', names(df)))
df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot
Out:
The expected result will be similar to the plot below (the upper plot will display the original data, the middle plot will display the mom changes data, and the lower plot will display the yoy changes data):
References:
https://waterdata.usgs.gov/blog/beyond-basic-plotting/
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
Side-by-side plots with ggplot2
Maybe this is what you are looking for? By reshaping your data to the right shape, using a plot function and e.g. purrr::map2 you could achieve your desired result without duplicating your code like so.
Using some fake random example data to mimic your true data:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
labs(
x = "",
y = ylab
)
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
DATA
set.seed(123)
date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))
df <- data.frame(date, food_index, energy_index)
EDIT Adding subtitles to each plot when using patchwork is (as of the moment) a bit tricky. What I would do in this case would be to use a faceting "hack". To this end I slightly adjusted the function to take a subtitle argument and switched to purrr::pmap:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab, subtitle) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
facet_wrap(~.env$subtitle) +
labs(
x = "",
y = ylab
) +
theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")
plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
The target output is done with facets rather than stitching plots together. You could do this too if you like, but it requires reshaping your data in a different way. Which approach you take is really a matter of taste.
library(ggplot2)
library(dplyr)
yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30"),
`Actual value (Dollars).Food Index` = food_index,
`Month-on-month change (%).Food Index` = mom(food_index),
`Year-on-year change (%).Food Index` = yoy(food_index),
`Actual value (Dollars).Energy Index` = energy_index,
`Month-on-month change (%).Energy Index` = mom(energy_index),
`Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
select(-food_index, -energy_index) %>%
tidyr::pivot_longer(-1) %>%
filter(date > as.Date("2018-01-01")) %>%
tidyr::separate(name, into = c("series", "index"), sep = "\\.") %>%
ggplot(aes(date, value, color = index)) +
geom_point(na.rm = TRUE) +
geom_line() +
facet_grid(series~., scales = "free_y") +
theme_bw(base_size = 16)
Reproducible data taken from link in question
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")

Adding p-values to ggplot facets with different scales

When I try to add p-values to multifacet ggplot with different scales, they are getting shifted, although in facet_wrap to ggplot and add_xy_position to stats I indicate scales="free":
my code chunks:
stat_c <- dat_c %>%
group_by(antigen , region) %>%
wilcox_test(norm_mean ~ genotype , alternative = "greater") %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p.adj") %>%
add_xy_position(x = "region" , dodge = 0.8 , scales = "free")
plt_c <- ggplot(dat_c , aes(x = region , y = norm_mean , color = genotype)) +
geom_boxplot(outlier.shape=NA) +
geom_jitter(position=position_jitterdodge()) +
facet_wrap(~antigen , scales = "free" ) +
ggtitle(label = "DRS hit IHC in tent5c KO")
plt_c +
stat_pvalue_manual(stat_c , label = "p.adj") +
scale_y_continuous(expand = expansion(mult = c(0,0.1)))
I'd be supergrateful for any help!
I just ran into the same issue with some data I was working with. The trick is to calculate the pvalues for each facet independently and rbind the tables togehter. This way the x-values for each facet are derived independently.
Your solution may look something like this:
antigens = levels(factor(dat_c$antigen))
i = 0
res.stat.test = ""
for (a in antigens){
print(a)
df = dat_c %>% filter(antigen == a)
stat.test = df %>%
group_by(region) %>%
wilcox_test(., norm_mean ~ genotype, alternative = "greater") %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p.adj") %>%
add_xy_position(x = "region", fun = "max", dodge = 0.8)
if (i == 0){
res.stat.test = stat.test
} else {
res.stat.test = rbind(res.stat.test, stat.test)
}
}
plt_c <- ggplot(dat_c , aes(x = region , y = norm_mean , color = genotype)) +
geom_boxplot(outlier.shape=NA) +
geom_jitter(position=position_jitterdodge()) +
facet_wrap(~antigen , scales = "free" ) +
stat_pvalue_manual(res.stat.test, label = "p.adj", tip.length = 0.05, step.increase = 0.01) +
ggtitle(label = "DRS hit IHC in tent5c KO")

How to aggregate data from years to decades and plot them?

This is the graph that I would like to reproduce:
but for that I have to change the years column because on the graph the x axis is in decades. By what means could I accomplish this ?
This is what I did to extract the data from the site (https://ourworldindata.org/famines) :
library(rvest)
library(dplyr)
library(tidyr)
library(ggplot2)
col_link <- "https://ourworldindata.org/famines#famines-by-world-region-since-1860"
col_page <- read_html(col_link)
col_table <- col_page %>% html_nodes("table#tablepress-73") %>%
html_table() %>% . [[1]]
data1 <- col_table %>%
select(Year, `Excess Mortality midpoint`)
Year `Excess Mortality midpoint`
<chr> <chr>
1 1846–52 1,000,000
2 1860-1 2,000,000
3 1863-67 30,000
4 1866-7 961,043
5 1868 100,000
6 1868-70 1,500,000
7 1870–1871 1,000,000
8 1876–79 750,000
9 1876–79 7,176,346
10 1877–79 11,000,000
# ... with 67 more rows
Firstly, to convert the periods to decades, you need to extract a year for each period, based on which the calculation will be made. From your comment above, it looks like you need to extract the end year for each period. Given the data, regular expressions are used below to do this (and packages dplyr and stringr).
col_table <- col_table %>%
mutate(Year = case_when(
grepl("^\\d{4}$",Year) ~ Year,
grepl("\\d{4}[–-]\\d{4}",Year) ~ str_sub(Year, start= -4),
grepl("\\d{4}[–-]\\d{2}$",Year) ~ paste0(str_sub(Year,1,2),str_sub(Year,-2)),
grepl("\\d{4}[–-]\\d{1}$",Year) ~ paste0(str_sub(Year,1,3),str_sub(Year,-1))))
What this part of code is doing, is to detect the different cases and extract the proper year. Below there are examples for all cases, that are present on the dataset and what this part of code will result to.
1868 -> 1868
1878-1880 -> 1880
1846–52 -> 1852
1860-1 -> 1861
Now we have the year, so the next step is to extract the decade. To do so, we need to make sure that Year column is numeric and apply the necessary calculation (check here for it: https://stackoverflow.com/a/48966643/8864619)
col_table <- col_table %>%
mutate(Decade = as.numeric(Year) - as.numeric(Year) %% 10)
To reproduce the plot we need to group by decade and make sure that the Excess Mortality midpoint column is numeric to be able to get the sum of victims per decade.
col_table <- col_table %>%
mutate(`Excess Mortality midpoint` = as.numeric(gsub(",", "", `Excess Mortality midpoint`))) %>%
group_by(Decade) %>%
summarize(val = sum(`Excess Mortality midpoint`)) %>%
ungroup()
For the plot itself, ggplot2 is used:
ylab <- c(5, 10, 15, 20, 25)
options(scipen=999)
p <- ggplot(data = col_table, aes(x=factor(Decade),y=val)) +
geom_bar(stat = "identity", fill = "navy") +
scale_x_discrete(labels = col_table %>% distinct(Decade) %>% mutate(Decade = paste0(Decade,"s")) %>% pull()) +
geom_text(aes(label=format(val,big.mark=",")), size=2,vjust=-0.3) +
scale_y_continuous(labels = paste(ylab, "millions"),breaks = 10^6 * ylab) +
ggtitle('Famine victims worldwide')+
theme(panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(size = 0.05, linetype = 'solid',
colour = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank())
p
So, putting everything together, the following code should get you a column for the year and a column for the relevant decade, which should be then used to create the plot you want to:
library(rvest)
library(dplyr)
library(stringr)
library(ggplot2)
col_link <- "https://ourworldindata.org/famines#famines-by-world-region-since-1860"
col_page <- read_html(col_link)
col_table <- col_page %>% html_nodes("table#tablepress-73") %>% html_table() %>% . [[1]]
col_table <- col_table %>%
mutate(Year = case_when(
grepl("^\\d{4}$",Year) ~Year,
grepl("\\d{4}[–-]\\d{4}",Year) ~ str_sub(Year, start= -4),
grepl("\\d{4}[–-]\\d{2}$",Year) ~ paste0(str_sub(Year,1,2),str_sub(Year,-2)),
grepl("\\d{4}[–-]\\d{1}$",Year) ~ paste0(str_sub(Year,1,3),str_sub(Year,-1)))) %>%
mutate(Decade = as.numeric(Year) - as.numeric(Year)%%10) %>%
mutate(`Excess Mortality midpoint` = as.numeric(gsub(",", "", `Excess Mortality midpoint`))) %>%
group_by(Decade) %>%
summarize(val = sum(`Excess Mortality midpoint`)) %>%
ungroup()
ylab <- c(5, 10, 15, 20, 25)
options(scipen=999)
p <- ggplot(data = col_table, aes(x=factor(Decade),y=val)) +
geom_bar(stat = "identity", fill = "navy") +
scale_x_discrete(labels = col_table %>% distinct(Decade) %>% mutate(Decade = paste0(Decade,"s")) %>% pull()) +
geom_text(aes(label=format(val,big.mark=",")), size=2,vjust=-0.3) +
scale_y_continuous(labels = paste(ylab, "millions"),breaks = 10^6 * ylab) +
ggtitle('Famine victims worldwide')+
theme(panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(size = 0.05, linetype = 'solid',
colour = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank())
p
Here's the reproduced plot:
First, strsplit, make a proper year matrix, combine back with famines divided by number of years and reshape to long format (lines 1:6). Next, aggregate sums by decade and barplot it.
r <- strsplit(data1$Year, '-|–|, ') |>
rapply(\(y) unlist(lapply(y, \(x) f(max(as.numeric(y)), x))), how='r') |>
{\(.) t(sapply(., \(x) `length<-`(x, max(lengths(.)))))}() |>
{\(.) cbind(`colnames<-`(., paste0('year.', seq_len(dim(.)[2]))),
n=dim(.)[2] - rowSums(is.na(.)))}() |>
{\(.) data.frame(., f=as.numeric(gsub('\\D', '',
data1$`Excess Mortality midpoint`))/
.[, 'n'])}()|>
reshape(1:3, direction='long') |>
stats:::aggregate.formula(formula=f ~ as.integer(substr(year, 1, 3)),
FUN=sum) |>
t()
## plot
op <- par(mar=c(5, 5, 4, 2)+.1) ## set/store old pars
b <- barplot(r, axes=FALSE, ylim=c(0, max(r[2, ])*1.05),
main='Famine victims', )
abline(h=asq, col='lightgrey', lty=3)
barplot(r, names.arg=paste0(r[1, ], '0s'), col='#20254c',
cex.names=.8, axes=FALSE, add=TRUE)
asq <- seq(0, max(axTicks(2)), 2e6)
axis(2, asq, labels=FALSE)
mtext(paste(asq/1e6, 'Million'), 2, 1, at=asq, las=2)
text(b, r[2, ] + 5e5, labels=formatC(r[2, ], format='d', big.mark=','), cex=.7)
box()
par(op) ## restore old pars
In line 2, I used this helper function f() to fill up the pseudo-years:
f <- \(x1, x2, n1=nchar(x1)) {
u <- lapply(list(x1, x2), as.character)
s <- c(n1 - nchar(u[[2]]) + 1L, n1)
as.integer(`substr<-`(u[[1]], s[1], s[2], u[[2]]))
}
You can refine the aggregation method yourself to make the result exactly look like the original, but maybe this is better :)

Plotting multiple plots with two discrete variables - how to include all discrete variables in both axes

I have a dataset that looks like this:
test<-data.frame("M"=c("a","b","c","a","b","b","c","a","b","c"),
"N"=c(1,3,4,6,6,7,7,8,8,8),
"X"=c(0,1,0,1,1,0,1,0,1,1),
"Y"=c(1,1,0,0,1,0,1,1,1,0))
I'm making a simple plot where I want X and Y on the y axis, M on the x axis, each grid colored if the value of X or Y is 1 and empty if the value of X or Y is 0. I'm repeating this for each categories in N (the categories of N are 1 to 5, 6, 7, 8), then stacking all plots together. Right now, I'm doing this with the following code.
test <- test[order(test$N),]
test1 <- test[c(1:3),]
test2 <- test[c(4:5),]
test3 <- test[c(6:7),]
test4 <- test[c(8:10),] # I'm doing this to "separate" categories of `N` manually
p1 <- test1[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'red'))
p2 <- test2[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'yellow'))
p3 <- test3[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'green'))
p4 <- test4[,c(1,3:4)] %>%
gather(col_name, value, -M) %>%
ggplot(aes(factor(M), col_name, fill = value == 1))+
geom_tile(colour = 'black')+
scale_fill_manual(values = c('FALSE' = 'white', 'TRUE' = 'blue'))
grid.arrange(p1, p2, p3, p4, ncol = 1)
I'm attaching an image of what I have right now. I want to fix these plots so that I would have the same factors of M for all four plots (right now, only p1 and p4 have all three factors (a, b and c) in the x axis but I want to add factor c to p2 and a to p3 so that all x axes are identical to each other. Can anyone give me suggestions on how to do this?
(Also, I'm suspecting that the current way I'm plotting things is probably not the most quickest/easiest way to go, if anyone has suggestions on how to improve things it'd be really helpful!)
To continue using grid.arrange(), instead of facet_wrap(), do the following:
Make M a factor:
test$M <- factor(test$M)
Add the following to each of your plots:
scale_x_discrete(limits = levels(test$M))
Maybe one approach I can suggest you is using facets after applying a smart trick to group your values and avoid splitting in different dataframes. Here the code as an option for you (The colors will be the same across the facets in base of TRUE/FALSE values):
library(tidyverse)
#Code
test %>% mutate(Var=lead(N)) %>%
mutate(Diff=Var-N,Diff=ifelse(row_number()==1,0,Diff)) %>%
mutate(Group=ifelse(Diff==0,N,NA)) %>%
fill(Group) %>% select(-c(N,Var,Diff)) %>%
group_by(Group) %>% mutate(NG=paste0('p',cur_group_id())) %>% ungroup() %>%
select(-Group) %>%
pivot_longer(cols = -c(NG,M)) %>%
ggplot(aes(factor(M), name, fill = value == 1,group=value))+
geom_tile(colour = 'black')+
facet_wrap(.~NG,ncol = 1)+
scale_fill_manual('value',values=c('tomato','cyan3'))+
xlab('M')
Output:
The othe option would be patchwork with a customized function:
library(tidyverse)
library(patchwork)
#Code
data <- test %>% mutate(Var=lead(N)) %>%
mutate(Diff=Var-N,Diff=ifelse(row_number()==1,0,Diff)) %>%
mutate(Group=ifelse(Diff==0,N,NA)) %>%
fill(Group) %>% select(-c(N,Var,Diff)) %>%
group_by(Group) %>% mutate(NG=paste0('p',cur_group_id())) %>% ungroup() %>%
select(-Group) %>%
mutate(M=factor(M,levels = unique(M),ordered = T)) %>%
pivot_longer(cols = -c(NG,M))
#List
List <- split(data,data$NG)
#Function
myfun <- function(x)
{
#Test for color
val <- unique(x$NG)
#Conditioning for color
if(val=='p1') {vcolor=c('FALSE' = 'white', 'TRUE' = 'red')} else
if(val=='p2') {vcolor=c('FALSE' = 'white', 'TRUE' = 'yellow')} else
if(val=='p3') {vcolor=c('FALSE' = 'white', 'TRUE' = 'green')} else
{vcolor=c('FALSE' = 'white', 'TRUE' = 'blue')}
#Update data
x <- x %>% mutate(M=factor(M,levels = c('a','b','c'),ordered = T)) %>% complete(M=M)
#Plot
G <- ggplot(x,aes(factor(M), name, fill = (value == 1 & !is.na(value))))+
geom_tile(colour = 'black')+
scale_fill_manual('value',values=vcolor)+
xlab('M')+
scale_y_discrete(limits=c('X','Y'))+
theme_bw()+
ggtitle(val)
return(G)
}
#Apply
Lplot <- lapply(List,myfun)
#Wrap
GF <- wrap_plots(Lplot,ncol = 1)
Output:
Something like this?
test<-data.frame("M"=c("a","b","c","a","b","b","c","a","b","c"),
"N"=c(1,3,4,6,6,7,7,8,8,8),
"X"=c(0,1,0,1,1,0,1,0,1,1),
"Y"=c(1,1,0,0,1,0,1,1,1,0))
library(tidyverse)
test = mutate(test, N2 = cut(N, breaks = c(0,5:100)))
m = pivot_longer(test, c(X, Y))
ggplot(m, aes(M, name,fill=factor(value))) +
geom_tile(colour = 'black') +
facet_wrap(~N2, scales = 'free') +
scale_fill_manual(values = c(`0` = 'white', `1` = 'red'))

Resources