lapply function to create many plots in Ggplot - r

I'm trying to plot the line graph for X=date Y=column which is XS1-XS10.
I use the lapply with a next function
plot_data_column = function (column) {
ggplot(data= excess_return, aes(y=column,x=date)) +
geom_line()+
geom_hline(yintercept = mean(excess_return$column), color="red")+
ggtitle(column)+
theme_minimal()
}
and then use lapply to plug in the columns of the dataset into the ggplot.
ggplots = lapply(excess_return[,1:10], plot_data_column)
My problem occurs with geom_hline which doesn't recognize the column and ggtitle(column).
P.s I have tried also like this
ggplots = lapply(colnames(excess_return[,1:10]), plot_data_column)
I wonder why R doesn't accept the XS1 as it were written manually by me?
because this code perfectly works.
ggplot(data= excess_return, aes(y=XS1,x=date)) +
geom_line()+
geom_hline(yintercept = mean(excess_return$XS1), color="red")+
theme_minimal()
The only thing what i want to is to iterate the XS1 to XS10.
Thank you for support

you can adapt this to your data strucutre:
plot_data_column = function (.data, .column) {
ggplot2::ggplot(data= .data, ggplot2::aes(y=!!dplyr::sym(.column),x = Petal.Width)) +
ggplot2::geom_line() +
ggplot2::geom_hline(yintercept = .data %>%
dplyr::pull(!!dplyr::sym(.column)) %>%
mean(),
color="red")+
ggplot2::ggtitle(.column) +
ggplot2::theme_minimal()
}
plots <- names(iris)[1:3] %>%
purrr::map(~plot_data_column(.data = iris, .column = .x))
You need to change the names(iris)[1:3] to your names names(excess_return)[1:10] and x = Petal.Width to x = date.

Another option:
library(tidyverse)
custom_ggplot_function <- function(var, var_name) {
ggplot(mtcars, aes(x = disp, y = var)) +
geom_line() +
geom_hline(yintercept = mean(var), color = "red") +
labs(y = var_name) +
ggtitle(var_name) +
theme_minimal()
}
mtcars %>%
select(mpg, cyl, disp) %>%
map2(.y = names(.), ~ custom_ggplot_function(.x, .y))

Related

for-loop to create ggplots

I trying to make boxplots with ggplot2.
The code I have to make the boxplots with the format that I want is as follows:
p <- ggplot(mg_data, aes(x=Treatment, y=CD68, color=Treatment)) +
geom_boxplot(mg_data, mapping=aes(x=Treatment, y=CD68))
p+ theme_classic() + geom_jitter(shape=16, position=position_jitter(0.2))
I can was able to use the following code to make looped boxplots:
variables <- mg_data %>%
select(10:17)
for(i in variables) {
print(ggplot(mg_data, aes(x = Treatment, y = i, color=Treatment)) +
geom_boxplot())
}
With this code I get the boxplots however, they do not have the name label of what variable is being select for the y-axis, unlike the original code when not using the for loop. I also do not know how to add the formating code to the loop:
p + theme_classic() + geom_jitter(shape=16, position=position_jitter(0.2))
Here is a way. I have tested with built-in data set iris, just change the data name and selected columns and it will work.
suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
})
variables <- iris %>%
select(1:4) %>%
names()
for(i in variables) {
g <- ggplot(iris, aes(x = Species, y = get(i), color=Species)) +
geom_boxplot() +
ylab(i)
print(g)
}
Edit
Answering to a comment by user TarJae, reproduced here because answers are less deleted than comments:
Could you please expand with saving all four files. Many thanks.
The code above can be made to save the plots with a ggsave instruction at the loop end. The filename is the variable name and the plot is the default, the return value of last_plot().
for(i in variables) {
g <- ggplot(iris, aes(x = Species, y = get(i), color=Species)) +
geom_boxplot() +
ylab(i)
print(g)
ggsave(paste0(i, ".png"), device = "png")
}
Try this:
variables <- mg_data %>%
colnames() %>%
`[`(10:17)
for (i in variables) {
print(ggplot(mg_data, aes(
x = Treatment, y = {{i}}, color = Treatment
)) +
geom_boxplot())
}
Another option is to use lapply. It's approximately the same as using a loop, but it hides the actual looping part and can make your code look a little cleaner.
variables = iris %>%
select(1:4) %>%
names()
lapply(variables, function(x) {
ggplot(iris, aes(x = Species, y = get(x), color=Species)) +
geom_boxplot() + ylab(x)
})

Shorthand functions for multiple geoms in ggplot2

I would like to create shorthand notations or functions that combines multiple geoms for ggplot.
For example, instead of
mtcars %>%
ggplot(aes(x = cyl, y = mpg)) +
geom_point() +
geom_smooth(method = "lm") +
ggpubr::stat_cor()
I would like to be able to create a function to combine the geoms like so
lm_and_cor <- function() {
geom_smooth(method = "lm", se = FALSE) +
stat_cor()
}
mtcars %>%
ggplot(aes(x = cyl, y = mpg)) +
geom_point() +
lm_and_cor()
I am aware that I can create functions that does all of the plotting, basically
plot_data <- function(x) {
x %>%
ggplot(aes(x = cyl, y = mpg)) +
geom_point() +
geom_smooth(method = "lm") +
ggpubr::stat_cor()
}
which to be fair does what I want, to some degree. However, I would instead like to combine multiple geoms in a single function, as the underlying geom (e.g. point, lines, etc.) will not always be the same. Is this doable, and is it feasible?
With ggplot2 you can use list of elements:
lm_and_cor <- function()
list(geom_smooth(method = "lm", se = FALSE),
ggpubr::stat_cor()
)
mtcars %>%
ggplot(aes(x = cyl, y = mpg)) +
geom_point() +
lm_and_cor()
Output:
Do you mean something like this?
You can store multiple geom in a list object.
Edit: I misunderstand the question. This should meet the expectation.
data(iris)
library(ggplot2)
x <- list(geom_point(), geom_line())
ggplot(iris, aes(Sepal.Length, Sepal.Width)) + x
Or if you want to make a function to plot by column use this {{variable}}.
library(dplyr)
plotting <- function(data, x, y){
data %>%
ggplot(aes({{x}}, {{y}})) +
geom_point() +
geom_smooth(method = "lm")}
plotting(iris, Sepal.Length, Sepal.Width)

automagically using labels (haven semantics) in ggplot2 plots

I'm plotting data marked up using haven semantics, i.e. variables and values have labels defined via attributes.
Often, these labels are also what I want in my axis titles and ticks.
library(ggplot2)
mtcars$mpg = haven::labelled(mtcars$mpg, labels = c("low" = 10, "high" = 30))
attributes(mtcars$mpg)$label = "miles per gallon"
ggplot(mtcars, aes(mpg, cyl)) + geom_point() +
scale_x_continuous(attributes(mtcars$mpg)$label,
breaks = attributes(mtcars$mpg)$labels,
labels = names(attributes(mtcars$mpg)$labels))
Could I write a helper that replaces that laborious scale_x_continuous statement with something that can more easily be iterated? E.g. something like
scale_x_continuous(label_from_attr, breaks = breaks_from_attr, labels = value_labels_from_attr). Or maybe even + add_labels_from_attributes() to replace the whole thing?
I'm aware that I can write/use helpers like Hmisc::label to slightly shorten the attribute-code above, but that's not what I want here.
I don't have a good scale, but you can use a function like this:
label_x <- function(p) {
b <- ggplot_build(p)
x <- b$plot$data[[b$plot$labels$x]]
p + scale_x_continuous(
attributes(x)$label,
breaks = attributes(x)$labels,
labels = names(attributes(x)$labels)
)
}
Then use as (+ won't do):
p <- ggplot(mtcars, aes(mpg, cyl)) + geom_point()
label_x(p)
Alternatively, use a pipe:
mtcars %>% { ggplot(., aes(mpg, cyl)) + geom_point() } %>% label_x()
Old solution
use_labelled <- function(l, axis = "x") {
if (axis == "x") {
scale_x_continuous(attributes(l)$label,
breaks = attributes(l)$labels,
labels = names(attributes(l)$labels))
}
if (axis == "y") {
scale_y_continuous(attributes(l)$label,
breaks = attributes(l)$labels,
labels = names(attributes(l)$labels))
}
}
Then you just give:
ggplot(mtcars, aes(mpg, cyl)) + geom_point() + use_labelled(mtcars$cyl)
Or for the y-axis:
ggplot(mtcars, aes(cyl, mpg)) + geom_point() + use_labelled(mtcars$cyl, "y")
Another approach is to write a wrapper for ggplot() that has its own class. Then attributes have full visibility when the corresponding print method is called. See ?ag.print from package 'yamlet' (0.2.1).
library(ggplot2)
library(yamlet)
library(magrittr)
mtcars$disp %<>% structure(label = 'displacement', unit = 'cu. in.')
mtcars$mpg %<>% structure(label = 'mileage', unit = 'miles/gallon')
mtcars$am %<>% factor(levels = c(0,1), labels = c('automatic','manual'))
mtcars$am %<>% structure(label = 'transmission')
agplot(mtcars, aes(disp, mpg, color = am)) + geom_point()

Add titles to ggplots created with map()

What's the easiest way to add titles to each ggplot that I've created below using the map function? I want the titles to reflect the name of each data frame - i.e. 4, 6, 8 (cylinders).
Thanks :)
mtcars_split <-
mtcars %>%
split(mtcars$cyl)
plots <-
mtcars_split %>%
map(~ ggplot(data=.,mapping = aes(y=mpg,x=wt)) +
geom_jitter()
# + ggtitle(....))
plots
Use map2 with names.
plots <- map2(
mtcars_split,
names(mtcars_split),
~ggplot(data = .x, mapping = aes(y = mpg, x = wt)) +
geom_jitter() +
ggtitle(.y)
)
Edit: alistaire pointed out this is the same as imap
plots <- imap(
mtcars_split,
~ggplot(data = .x, mapping = aes(y = mpg, x = wt)) +
geom_jitter() +
ggtitle(.y)
)
Perhaps you'd be interested in using facet_wrap instead
ggplot(mtcars, aes(y=mpg, x=wt)) + geom_jitter() + facet_wrap(~cyl)
You can use purrr::map2():
mtcars_split <- mtcars %>% split(mtcars$cyl)
plots <- map2(mtcars_split, titles,
~ ggplot(data=.x, aes(mpg,wt)) + geom_jitter() + ggtitle(.y)
)
EDIT
Sorry duplicated with Paul's answer.

More compact use of ggplot : grid spaghetti plot

The following code plot the predicted probability of several models against time. Having, all the plots on one graph was not readable so I divided the result in a grid.
I was wondering if it was possible to have only one ggplot with all the models then somehow specify which goes where with grid.arrange
Current :
p2.dat1 <- select(ppf, EXPOSURE, predp.glm.gen,predp.glm1, predp.glm2,predp.glm3,predp.glm4 )
mdf1 <- melt(p2.dat1 , id.vars="EXPOSURE")
plm.plot.all1 <- ggplot(data = mdf1,
aes(x = EXPOSURE, y = value, colour = variable)) +
geom_line()
p2.dat2 <- select(ppf, EXPOSURE, predp.glm.gen, predp.glm5,predp.glm.step )
mdf2 <- melt(p2.dat2 , id.vars="EXPOSURE")
plm.plot.all2 <- ggplot(data = mdf2,
aes(x = EXPOSURE, y = value, colour = variable)) +
geom_line()
grid.arrange(plm.plot.all1, plm.plot.all2, nrow=2)
Expected:
p2.dat <- select(ppf, EXPOSURE, predp.glm.gen,predp.glm1, predp.glm2,predp.glm3,predp.glm4,predp.glm5,predp.glm.step)
mdf <- melt(p2.dat , id.vars="EXPOSURE")
plm.plot.all <- ggplot(data = mdf1,
aes(x = EXPOSURE, y = value, colour = variable)) +
geom_line()
grid.arrange(plm.plot.all[some_selection_somehow], plm.plot.all[same], nrow=2)
Thanks,
You can do this with grid.arrange by writing some helper functions. It can be done more succinctly, but I prefer small focused functions that can be used with pipes.
library(tidyverse)
library(gridExtra)
# Helper Functions ----
plot_function <- function(x) {
ggplot(x, aes(x = EXPOSURE, y = value, colour = variable)) +
geom_line() +
labs(title = unique(x$variable)) +
theme(legend.position = "none")
}
grid_plot <- function(x, selection) {
order <- c(names(x)[grepl(selection,names(x))], names(x)[!grepl(selection,names(x))])
grid.arrange(grobs = x[order], nrow = 2)
}
# Actually make the plot ----
ppf %>%
select(EXPOSURE, predp.glm.gen,predp.glm1, predp.glm2,predp.glm3,predp.glm4,predp.glm5,predp.glm.step) %>%
gather(variable, value, -EXPOSURE) %>%
split(.$variable) %>%
map(plot_function) %>%
grid_plot("predp.glm3")
or you could do this with ggplot, a facet_wrap and factoring the variable column to the proper order. This has the benefits of shared axes across the plots, which facilitates easy comparison. You can alter the helper functions in the first approach to set the axes explicitly to achieve the same effect, but its just easier keeping it in ggplot.
library(tidyverse)
selection <- "predp.glm3"
plot_data <- ppf %>%
select(EXPOSURE, predp.glm.gen,predp.glm1, predp.glm2,predp.glm3,predp.glm4,predp.glm5,predp.glm.step) %>%
gather(variable, value, -EXPOSURE) %>%
mutate(variable = fct_relevel(variable, c(selection, levels(variable)[-grepl(selection, levels(variable))])))
ggplot(plot_data, aes(x = EXPOSURE, y = value, colour = variable)) +
geom_line() +
facet_wrap( ~variable, nrow = 2) +
theme(legend.position = "none")

Resources