Using a function within mapply to create and save scatterplots - r

I am trying to use mapply to automate saving scatterplots from ggplot to a folder.
To do this I have created lists of my x variable and y variable, as well as a list of the grouping variable I would like to colour my points by.
I then tried creating a function, and calling the function with mapply but the only output saved is a single blank image of the last variable in the list. Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$VarA <- c(1,1,1,1,1,1,1,1,1,1,1,14,NA_real_,NA_real_,NA_real_,16)
df$VarB <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16,16)
df$VarC <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$VarD <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$ControlVarA <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarB <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_1","Group_1","Group_1","Group_1","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarC <- factor(c("Group_2","Group_2","Group_2","Group_2","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
Below is the code I used to call the lists for the x, y and colouring variable.
x_lists <- df %>% select(VarA:VarB) %>% colnames(.)
y_lists <- df %>% select(VarC:VarD) %>% colnames(.)
ControlVar_list <- df %>% select(contains("ControlVar")) %>% colnames(.)
Below is the function I have created and the mapply
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("C:\\Documents\\%s.tiff", y), first_plot)
grDevices::dev.off()
}
mapply(save_plots, x_lists, y_lists, ControlVar_list, MoreArgs = list(dataset = df))

.data will not work with base R subset function. Try using dplyr::filter
library(tidyverse)
library(ggpubr)
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("%s.tiff", y), first_plot)
grDevices::dev.off()
}

Related

How to manually change line size and alpha values for ggplot2 lines (separated by factor)?

I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))

plot density plots with confidence intervals of 95% in R

I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))

How do I place insets at exact positions on a ggplot and set the colors of geom_segment?

I'm creating an illustration of how loess works. My two queries are at the end of this question. First, setup:
library(tidyverse)
data(melanoma, package = "lattice")
mela <- as_tibble(melanoma)
tric = function(x) if_else(abs(x) < 1, (1 - abs(x)^3)^3, 0)
scl = function(x) (x - min(x))/(max(x) - min(x))
mela1 <- mela %>%
slice(1:9) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod1 <- lm(incidence ~ year, data = mela1, weights = weight) %>%
augment(., mela1)
mela2 <- mela %>%
slice(10:18) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod2 <- lm(incidence ~ year, data = mela2, weights = weight) %>%
augment(., mela2)
mela3 <- mela %>%
slice(19:27) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod3 <- lm(incidence ~ year, data = mela3, weights = weight) %>%
augment(., mela3)
mela4 <- mela %>%
slice(28:37) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod4 <- lm(incidence ~ year, data = mela4, weights = weight) %>%
augment(., mela4)
The main plot:
col <- rainbow_hcl(start = 12, 4, l = 20)
colB <- rainbow_hcl(start = 12, 4, l = 100)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
# segment 1
geom_segment(
aes(x = 1936, xend = 1944, y = 2.115717, yend = 2.115717)) +
# segment 2
geom_segment(
aes(x = 1945, xend = 1953, y = 3.473217, yend = 3.473217)) +
# segment 3
geom_segment(
aes(x = 1954, xend = 1962, y = 1.170247, yend = 1.170247)) +
# segment 4
geom_segment(
aes(x = 1963, xend = 1972, y = 2.7, yend = 2.7)) +
geom_point(data = mod1, color = col[1], shape = 1) +
geom_point(data = mod2, color = col[2], shape = 0) +
geom_point(data = mod3, color = col[4], shape = 5) +
geom_point(data = mod4, color = col[3], shape = 2) +
geom_line(data = mod1, aes(x = year, y = .fitted), color = col[1]) +
geom_line(data = mod2, aes(x = year, y = .fitted), color = col[2]) +
geom_line(data = mod3, aes(x = year, y = .fitted), color = col[4]) +
geom_line(data = mod4, aes(x = year, y = .fitted), color = col[3]) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967))
Insets
inset1 <- ggplot(data = mod1, aes(x = year, y = weight)) +
geom_line(color = col[1]) +
geom_area(fill = colB[1]) +
theme_void()
inset2 <- ggplot(data = mod2, aes(x = year, y = weight)) +
geom_line(color = col[12) +
geom_area(fill = colB[2]) +
theme_void()
inset3 <- ggplot(data = mod3, aes(x = year, y = weight)) +
geom_line(color = col[3]) +
geom_area(fill = colB[3]) +
theme_void()
inset4 <- ggplot(data = mod4, aes(x = year, y = weight)) +
geom_line(color = col[4]) +
geom_area(fill = colB[4]) +
theme_void()
Question 1: How do I place the four insets so that the y = 0 of the weight function is at the height of the corresponding geom_segment? I would like the inset heights = 2 in the main figure coordinates.
Question 2: How do I set the color of each segment to the color of the corresponding inset?
Not sure whether I got everything right. But I tried my best. (; You could simplify your code considerably
... by binding you models data into one dataframe and also the data for the segments.
... mapping on aesthetics and setting the colors and shape via some named vectors and scale_xxx_manual
For your insets there is no need to make separate plots and trying to put them into the main plot. You could simply add them via an additional geom_line and a geom_ribbon. To get the heights of the segments join the segments data to the models data so that you can set the starting value for the geom_ribbon according to the y value of the segment
library(tidyverse)
library(broom)
library(colorspace)
col <- setNames(col, c("mod1", "mod2", "mod4", "mod3"))
colB <- setNames(colB, c("mod1", "mod2", "mod4", "mod3"))
shapes <- setNames(c(1, 0, 5, 2), c("mod1", "mod2", "mod3", "mod4"))
mods <- list(mod1 = mod1, mod2 = mod2, mod3 = mod3, mod4 = mod4) %>%
bind_rows(.id = "mod")
# segments data
dseg <- tribble(
~mod, ~x, ~xend, ~y,
"mod1", 1936, 1944, 2.115717,
"mod2", 1945, 1953, 3.473217,
"mod3", 1954, 1962, 1.170247,
"mod4", 1963, 1972, 2.7,
)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
geom_segment(data = dseg, aes(x = x, xend = xend, y = y, yend = y, color = mod)) +
geom_point(data = mods, aes(color = mod, shape = mod)) +
geom_line(data = mods, aes(x = year, y = .fitted, color = mod)) +
scale_color_manual(values = col) +
scale_shape_manual(values = shapes) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967)) +
guides(color = FALSE, shape = FALSE, fill = FALSE)
mods1 <- left_join(mods, select(dseg, mod, y), by = "mod")
# Add insets
main +
geom_line(data = mods1, aes(x = year, y = weight + y, color = mod, group = mod)) +
geom_ribbon(data = mods1, aes(x = year, ymin = y, ymax = weight + y, fill = mod, group = mod)) +
scale_fill_manual(values = colB)

How to change the colors of the dots in the graph? ggpubr package

I am using the ggerrorplot () function of the ggpubr package to create the graph below. My question is whether there is any way to change the colors of the dots without changing the color of the point that represents the mean and standard deviation? Observe the image:
My code:
# loading packages
library(ggpubr)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Can you accomplish this by simply passing in color to add.params?
# loading packages
library(ggpubr)
#> Loading required package: ggplot2
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2, color = "red"),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Created on 2021-03-10 by the reprex package (v0.3.0)
Another potential workaround - replicate the plot using ggplot() and geom_linerange(), e.g.
library(ggpubr)
library(ggsci)
library(cowplot)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data_summary(df, varname = "VALUE", groupnames = c("TEST", "GROUP"))
# Plot
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(shape = 21, fill = "black", stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
When you plot them side-by-side you can see that they aren't exactly the same, but this might work for you nonetheless.
Edit
An advantage of this approach is that you can adjust the 'fill' scale separately if you don't want all the dots to be the same colour, but you do want them to be different to the lines, e.g.
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(aes(fill = TEST), shape = 21, stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
scale_fill_npg() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")

Is there a way to label each cluster generated by lda()

using lda() and ggplot2 I can make a canonical plot with confidence ellipses. Is there a way to add labels for each group on the plot (labeling each cluster with a group from figure legend)?
# for the universality lda(Species~., data=iris) would be analogous
m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Diet) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Diet <- b$Diet
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Diet), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Diet) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)
The labels can be placed with either geom_text or geom_label. In the case below I will use geom_label, with the y coordinate adjusted by adding popn.radii the radii of the outer circles.
The code in the question is adapted to use built-in data set iris, like the question itself says.
m.cva.plot2 +
geom_label(data = CIregions.mean.and.pop,
mapping = aes(x = CV1.mean,
y = CV2.mean + popn.radii,
label = Species),
label.padding = unit(0.20, "lines"),
label.size = 0)
Reproducible code
library(dplyr)
library(ggplot2)
library(ggforce)
library(MASS)
b <- iris
m.lda <- lda(Species~., data=iris) #would be analogous
#m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Species) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Species <- b$Species
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Species), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Species) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)

Resources