Nested loop with purrr::walk leaves empty plots - r

The following code produces a grid of plots:
library(tidyverse)
library(grid)
library(patchwork)
exdata <- diamonds %>%
group_by(cut) %>%
nest %>%
crossing(dummy = 1:3) %>%
crossing(cohort = LETTERS[1:3]) %>%
mutate(plots = map(.x = data, ~ ggplot(.x, aes(x = x, y = y)) + geom_point()))
# vars for plot grid
mod <- 'Fair'
colsn <- length(unique(exdata$dummy))
rowsn <- length(exdata$cohort %>% unique)
# create plots
wrap_plots(plotlist = exdata %>% filter(cut == mod) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " ")
# add some text across columns
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
Looks like this:
In that code block I have a variable mod <- 'Fair'. I would like to make this a vector and produce a grid for each type of cut in diamonds. Tried:
mods <- exdata$cut %>% unique %>% as.vector
walk(mods, function(.x) {
wrap_plots(plotlist = exdata %>% filter(cut == .x) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " ")
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
})
This code seems to run without error, but no plots all returned. The grid itself is, just no plots:
How can I run a nested walk loop to generate a grid of plots for each type of cut?

Just wrapping your plots in a print statement solves the issue
walk(mods, function(.x) {
print(wrap_plots(plotlist = exdata %>% filter(cut == .x) %>% pull(plots), ncol = colsn, nrow = rowsn) + plot_annotation(title = " "))
walk2(seq(0.165,0.835, length.out = colsn), unique(exdata$dummy), ~ grid.draw(textGrob(.y, x = .x, y = 0.98, rot = 0)))
})

Related

How to visualize similar resistance pattern in a plot using R

I have a large dataset in which I want to group similar resistance patterns together. A plot to visualize similarity of resistance pattern is needed.
dat <- read.table(text="Id Resistance.Pattern
A SSRRSSSSR
B SSSRSSSSR
C RRRRSSRRR
D SSSSSSSSS
E SSRSSSSSR
F SSSRRSSRR
G SSSSR
H SSSSSSRRR
I RRSSRRRSS", header=TRUE)
I would separate out the values into a wider dataframe and then make a heatmap and dendrogram to compare sillimanites in patterns:
library(tidyverse)
library(ggdendro)
recode_dat <- dat |>
mutate(pat = str_split(Resistance.Pattern, "")) |>
unnest_wider(pat, names_sep = "_") |>
select(starts_with("pat_")) |>
mutate(across(everything(), ~case_when(. == "S" ~ 1, . == "R" ~ 2, is.na(.) ~0)))
rownames(recode_dat) <- dat$Id
dendro <- as.dendrogram(hclust(d = dist(x = scale(recode_dat))))
dendro_plot <- ggdendrogram(data = dendro, rotate = TRUE)
heatmap_plot <- dat |>
mutate(pat = str_split(Resistance.Pattern, "")) |>
unnest_wider(pat, names_sep = "_") |>
pivot_longer(cols = starts_with("pat_"), names_to = "pattern_position") |>
mutate(Id = factor(Id, levels = dat$Id[order.dendrogram(dendro)])) |>
ggplot(aes(pattern_position, Id))+
geom_tile(aes(fill = value))+
scale_x_discrete(labels = \(x) sub(".*_(\\d+$)", "\\1", x))+
theme(legend.position = "top")
cowplot::plot_grid(heatmap_plot, dendro_plot,nrow = 1, align = "h", axis = "tb")
It sounds as though the second column of your data frame represents sensitivity (S) and resistance (R), presumably to antibiotics (though this is not clear in your question). That being the case, you are presumably looking for something like this:
library(tidyverse)
p <- strsplit(dat$Resistance.Pattern, "")
do.call(rbind, lapply(p, \(x) c(x, rep(NA, max(lengths(p)) - length(x))))) %>%
as.data.frame() %>%
cbind(Id = dat$Id) %>%
mutate(Id = factor(Id, rev(Id))) %>%
pivot_longer(V1:V9) %>%
ggplot(aes(name, Id, fill = value)) +
geom_tile(col = "white", size = 2) +
coord_equal() +
scale_fill_manual(values = c("#e02430", "#d8d848"),
labels = c("Resistant", "Sensitive"),
na.value = "gray95") +
scale_x_discrete(name = "Antibiotic", position = "top",
labels = 1:9) +
labs(fill = "Resistance", y = "ID") +
theme_minimal(base_size = 20) +
theme(text = element_text(color = "gray30"))
I'd separate the entries by character, convert the binary data to numeric and plot the matrix as a heatmap and show the character string as rownames.
Whether to use a row and/or column clustering depends on whats desired.
library(dplyr)
library(tidyr) # for unnest_wider
library(gplots) # for heatmap.2
mm <-
dat %>%
group_by(Resistance.Pattern) %>%
summarize(Id, Resistance.Pattern) %>%
mutate(binary = strsplit(Resistance.Pattern, "")) %>%
unnest_wider(binary, names_sep="") %>%
mutate(across(starts_with("binary"), ~ as.numeric(c(R = 1, S = 0)[.x])))
mm2 <- as.matrix(mm[, -c(1,2)]) |> unname() # the numeric part
rownames(mm2) <- apply(as.matrix(mm[,1:2]), 1, paste, collapse=" ")
heatmap.2(mm2, trace="none", Colv="none", dendrogram="row",
col=c("green", "darkgreen"), margins=c(10,10))

How to refer a variable index inside an argument of purrr::map() "list or atomic vector" argument?

I want to put a variable in the formula argument of regression_plot_function() function in the line
p <- purrr::map(p[1:4], ~regression_plot_function(penguins2, .x)) where I momentarily put .x like the following.
library(palmerpenguins)
library(ggplot2)
library(purrr)
library(dplyr)
library(gridExtra)
set.seed(123)
data("penguins")
index <-
matrix(rnorm(344, 25, 20), nrow = 344, ncol = 45) %>%
as.data.frame()
penguins2 <-
penguins %>% cbind(index)
regression_plot_function <- function(dataset, i_number) {
index <- paste0("V", i_number)
dataset %>%
select(species | island | year | contains(index)) %>%
mutate(year = as.factor(year)) %>%
ggplot(aes(x = year, y = !!sym(index), color = island)) +
geom_point() +
geom_smooth(aes(group = island), method = "loess")
}
p <- list()
p <- purrr::map(p[1:4], ~regression_plot_function(penguins2, .x))
allplot_regression <- cowplot::plot_grid(plotlist = p, nrow = 2, ncol = 2)
allplot_regression
With for loops, I can write it like this
p <- list()
for (i in 1:4) {
p[[i]] <- regression_plot_function(penguins2, i_number = i)
}
But I want to try it using purrr.
Thank you!
Try replacing your loop with the following:
p <- map(1:4, ~ regression_plot_function(penguins2, i_number = .))

Create all possible geom_col charts of the dataset using rowwise and rlang [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 years ago.
Improve this question
I want to create and save all the possible geom_col charts of a dataset with categorical variables on Y axis and average (grouping variable being y) value of numeric variables on x axis.
I borrowed ideas from this and this link and created the following code, but struggling to make it work. Please advise.
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
plots <- bind_rows(rep(list(mpg), nrow(vars)), .id = "grp") %>%
nest_by(grp) %>%
bind_cols(vars) %>%
rowwise() %>%
mutate(plot = list(~(data %>%
group_by(y) %>%
summarise("{x}" = mean(.data[[x]], na.rm = T)) %>%
ungroup()) %>%
ggplot() +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()))),
filename = paste0(x, "_by_", y, ".pdf")) %>%
select(filename, plot)
pwalk(plots, ggsave, path = getwd())
There is no need to bind_rows, nest, ... Instead:
Put your plotting code in a helper function instead of wrapping everything in a pipeline. This allows for much easier debugging as well as cleaner and clearer code.
Use map2 to loop over the columns of your df vars
One issue with your code was summarise("{x}" = mean(.data[[x]], na.rm = T)) which will create a variable named {x}. Instead use e.g. !!sym(x) := ... to assign the value back to x if x is a string.
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
make_plot <- function(data, x, y) {
data <- data %>%
group_by(across(all_of(y))) %>%
summarise(!!sym(x) := mean(.data[[x]], na.rm = T), .groups = "drop") %>%
ungroup()
ggplot(data) +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()
}
plots <- map2(vars$x, vars$y, make_plot, data = mpg)
length(plots)
#> [1] 25
plots[[1]]
# Export
pwalk(list(x = vars$x, y = vars$y, p = plots), function(x, y, p) ggsave(filename = paste0(x, "_by_", y, ".pdf"), plot = p, path = getwd()))

Use scale_x_continuous with labeller function that also takes a data frame as an argument as well as default breaks

Here's a code block:
# scale the log of price per group (cut)
my_diamonds <- diamonds %>%
mutate(log_price = log(price)) %>%
group_by(cut) %>%
mutate(scaled_log_price = scale(log_price) %>% as.numeric) %>% # scale within each group as opposed to overall
nest() %>%
mutate(mean_log_price = map_dbl(data, ~ .x$log_price %>% mean)) %>%
mutate(sd_log_price = map_dbl(data, ~ .x$log_price %>% sd)) %>%
unnest %>%
select(cut, price, price_scaled:sd_log_price) %>%
ungroup
# for each cut, find the back transformed actual values (exp) of each unit of zscore between -3:3
for (i in -3:3) {
my_diamonds <- my_diamonds %>%
mutate(!! paste0('mean_', ifelse(i < 0 , 'less_', 'plus_'), abs(i), 'z') := map2(.x = mean_log_price, .y = sd_log_price, ~ (.x + (i * .y)) %>% exp) %>% unlist)
}
my_diamonds_split <- my_diamonds %>% group_split(cut)
split_names <- my_diamonds %>% mutate(cut = as.character(cut)) %>% group_keys(cut) %>% pull(cut)
names(my_diamonds_split) <- split_names
I now have a variable my_diamonds_split that is a list of data frames. I would like to loop over these data frames and each time create a new ggplot.
I can use a custom labeller function with a single df, but I don't know how to do this within a loop:
labeller <- function(x) {
paste0(x,"\n", scales::dollar(sd(ex_df$price) * x + mean(ex_df$price)))
}
ex_df <- my_diamonds_split$Ideal
ex_df %>%
ggplot(aes(x = scaled_log_price)) +
geom_density() +
scale_x_continuous(label = labeller, limits = c(-3, 3))
This creates a plot for the 'Ideal' cut of diamonds. I also get two data points on the x axis, the zscore values at -2, 0 and 2 as well as the raw dollar values of 3.8K, 3.9K and 11.8K.
When I define the labeller function, I must specify the df to scale with. Tried instead with placing the dot instead of my_df, hoping that on each iteration ggplot would get the value of the df on any iteration:
labeller <- function(x) {
paste0(x,"\n", scales::dollar(sd(.$price) * x + mean(.$price)))
}
ex_df <- my_diamonds_split$Ideal
ex_df %>%
ggplot(aes(x = scaled_log_price)) +
geom_density() +
scale_x_continuous(label = labeller, limits = c(-3, 3))
Returns:
Error in is.data.frame(x) : object '.' not found
I then tried writing the function to accept an argument for the df to scale with:
labeller <- function(x, df) {
paste0(x,"\n", scales::dollar(sd(df$price) * x + mean(df$price)))
}
ex_df <- my_diamonds_split$Ideal
ex_df %>%
ggplot(aes(x = scaled_log_price)) +
geom_density() +
scale_x_continuous(label = labeller(df = ex_df), limits = c(-3, 3)) # because when it comes to running in real life, I will try something like labeller(df = my_diamonds_split[[i]])
Error in paste0(x, "\n", scales::dollar(sd(df$price) * x + mean(df$price))) :
argument "x" is missing, with no default
Bearing in mind that the scaling must be done per iteration, how could I loop over my_diamonds_split, and on each iteration generate a ggplot per above?
labeller <- function(x) {
# how can I make df variable
paste0(x,"\n", scales::dollar(sd(df$price) * x + mean(df$price)))
}
for (i in split_names) {
my_diamonds_split[[i]] %>%
ggplot(aes(x = scaled_log_price)) +
geom_density() +
scale_x_continuous(label = labeller, # <--- here, labeller must be defined with df$price except that will difer on each iteration
limits = c(-3, 3))
}
There's a hacky way to get this result in facets. Basically, after converting to z scores, you add different amounts (say, multiples of 1000) to each group's z scores. Then you set all the breaks to this collection of points and label them with pre-calculated labels.
library(ggplot2)
library(dplyr)
f <- function(x) {
y <- diamonds$price[diamonds$cut == x]
paste(seq(-3, 3), scales::dollar(round(mean(y) + seq(-3, 3) * sd(y))), sep = "\n")
}
breaks <- as.vector(sapply(levels(diamonds$cut), f))
diamonds %>%
group_by(cut) %>%
mutate(z = scale(price) + 3 + 1000 * as.numeric(cut)) %>%
ggplot(aes(z)) +
geom_point(aes(x = z - 2, y = 1), alpha = 0) +
geom_density() +
scale_x_continuous(breaks = as.vector(sapply(1:5 * 1000, "+", 0:6)),
labels = breaks) +
facet_wrap(vars(cut), scales = "free_x") +
theme(text = element_text(size = 16),
axis.text.x = element_text(size = 6))
You would have to increase the plot size to make the dollar values more visible of course.
Created on 2020-08-04 by the reprex package (v0.3.0)

How can we data wrangling to obtain shown ratio/proportion chart shown

Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.
EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation

Resources