How can I standardize binwidth when making multiple ggplot2 with varying y scale? - r

I have some code to make plots for all of the variables in my data (36) and export them automatically. It works great, but the set binwidth is pretty small. When I try to change the bindwith, it makes it really small or really big for my graphs depending on their y-axis scale. How can I increase it proportionally for all?
# Plot separate ggplot figures in a loop.
library(ggplot2)
# Make list of variable names to loop over.
var_list = combn(names(LessCountS)[1:37], 2, simplify=FALSE)
my_comparisons <- list( c("HC", "IN"), c("IN", "OUT"), c("HC", "OUT") )
symnum.args <- list(
cutpoints = c(0.0001, 0.001, 0.01, 0.05, 1),
symbols = c("***", "**", "*", "NS")
)
# Make plots.
plot_list = list()
for (i in 1:37) {
p = ggplot(LessCount1, aes_string(x=var_list[[i]][1], y=var_list[[i]][2])) +
geom_dotplot(aes(fill= Type),
binaxis = "y", stackratio = .5, binwidth = 90,
stackdir = "center"
) +
theme_gray ()+
labs(x="", y = "Cell Count (cells/\u03bcL)") +
ggtitle(var_list[[i]][2]) +
scale_x_discrete(labels=c("HC" = "Controls", "IN" = "Inpatients",
"OUT" = "Outpatients")) +
theme(plot.title = element_text(hjust = 0.5, vjust = 2), legend.text=element_text(size=12),
axis.text = element_text(size=14),
axis.title = element_text(size = 14)) +
scale_fill_manual(values=c("#CCCCCC", "#990066", "#3366CC")) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
geom = "crossbar", width = 0.5, size = .45) +
stat_compare_means(comparisons = my_comparisons, label.y = , label = "p.signif", size = 5, symnum.args = symnum.args) +
stat_compare_means(label.y = )
plot_list[[i]] = p
}
# Save plots to tiff. Makes a separate file for each plot.
for (i in 1:37) {
file_name = paste("LessCount1_plot_", var_list[[i]][2], ".tiff", sep="")
tiff(file_name)
print(plot_list[[i]])
dev.off()
}
Examples of the outcome if I change the bindwidth.
If I dont specify the binwidth they are the same size, but quite small. I want to increase it proportionally for all irrespective of their scale, hoping this is possible!
Thanks in advance,
S

You need to make binwidth conditional on the data rather than setting it to a fixed value. Here's an example:
library("ggplot2")
for (ii in 1:5) {
y <- names(mtcars)[ii]
p <- ggplot(mtcars, aes(x = 1, y = !!sym(y))) +
geom_dotplot(binaxis = "y", stackdir = "center",
# binwidth = 1
binwidth = diff(range(mtcars[, y]))/20)
print(p)
}

Related

SHAP Summary Plot for XGBoost model in R without displaying Mean Absolute SHAP value on the plot

I don't want to display the Mean Absolute Values on my SHAP Summary Plot in R. I want an output similar to the one produced in python. What line of code will help remove the mean absolute values from the summary plot in R?
I'm currently using this line of code:
shap.plot.summary.wrap1(xgb_model, X = x, top_n = 10)
You can do this by sligtly modifying the source code of shap.plot.summary() as below:
shap.plot.summary.edited <- function(data_long,
x_bound = NULL,
dilute = FALSE,
scientific = FALSE,
my_format = NULL){
if (scientific){label_format = "%.1e"} else {label_format = "%.3f"}
if (!is.null(my_format)) label_format <- my_format
# check number of observations
N_features <- setDT(data_long)[,uniqueN(variable)]
if (is.null(dilute)) dilute = FALSE
nrow_X <- nrow(data_long)/N_features # n per feature
if (dilute!=0){
# if nrow_X <= 10, no dilute happens
dilute <- ceiling(min(nrow_X/10, abs(as.numeric(dilute)))) # not allowed to dilute to fewer than 10 obs/feature
set.seed(1234)
data_long <- data_long[sample(nrow(data_long),
min(nrow(data_long)/dilute, nrow(data_long)/2))] # dilute
}
x_bound <- if (is.null(x_bound)) max(abs(data_long$value))*1.1 else as.numeric(abs(x_bound))
plot1 <- ggplot(data = data_long) +
coord_flip(ylim = c(-x_bound, x_bound)) +
geom_hline(yintercept = 0) + # the y-axis beneath
# sina plot:
ggforce::geom_sina(aes(x = variable, y = value, color = stdfvalue),
method = "counts", maxwidth = 0.7, alpha = 0.7) +
# print the mean absolute value:
#geom_text(data = unique(data_long[, c("variable", "mean_value")]),
# aes(x = variable, y=-Inf, label = sprintf(label_format, mean_value)),
# size = 3, alpha = 0.7,
# hjust = -0.2,
# fontface = "bold") + # bold
# # add a "SHAP" bar notation
# annotate("text", x = -Inf, y = -Inf, vjust = -0.2, hjust = 0, size = 3,
# label = expression(group("|", bar(SHAP), "|"))) +
scale_color_gradient(low="#FFCC33", high="#6600CC",
breaks=c(0,1), labels=c(" Low","High "),
guide = guide_colorbar(barwidth = 12, barheight = 0.3)) +
theme_bw() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(), # remove axis line
legend.position="bottom",
legend.title=element_text(size=10),
legend.text=element_text(size=8),
axis.title.x= element_text(size = 10)) +
# reverse the order of features, from high to low
# also relabel the feature using `label.feature`
scale_x_discrete(limits = rev(levels(data_long$variable))#,
#labels = label.feature(rev(levels(data_long$variable)))
)+
labs(y = "SHAP value (impact on model output)", x = "", color = "Feature value ")
return(plot1)
}

R heatmap with circles

I would like to generate, in R, a heatmap visualization of a matrix using circles, in order to have both the color and diameter of the circles be informative. Something looking like this:
This sort of plotting is called "bubblegum plot" in certain computational biology labs, but I could not find an R function/package to do it.
Any ideas? Thanks!
Not sure whether there is a package which offers this out-of-the-box but using just ggplot2 this could be achieved like so:
library(ggplot2)
set.seed(42)
d <- data.frame(
x = rep(paste("Team", LETTERS[1:8]), 4),
y = rep(paste("Task", 1:4), each = 8),
value = runif(32)
)
ggplot(d, aes(x, forcats::fct_rev(y), fill = value, size = value)) +
geom_point(shape = 21, stroke = 0) +
geom_hline(yintercept = seq(.5, 4.5, 1), size = .2) +
scale_x_discrete(position = "top") +
scale_radius(range = c(1, 15)) +
scale_fill_gradient(low = "orange", high = "blue", breaks = c(0, .5, 1), labels = c("Great", "OK", "Bad"), limits = c(0, 1)) +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major = element_blank(),
legend.text = element_text(size = 8),
legend.title = element_text(size = 8)) +
guides(size = guide_legend(override.aes = list(fill = NA, color = "black", stroke = .25),
label.position = "bottom",
title.position = "right",
order = 1),
fill = guide_colorbar(ticks.colour = NA, title.position = "top", order = 2)) +
labs(size = "Area = Time Spent", fill = "Score:", x = NULL, y = NULL)
I wrote an alternative function to perform the plotting, without ggplot and tidyverse. I will soon upload it to the CRAN corto package. Enjoy!
Usage
inputp<-matrix(runif(1000),nrow=50)
inputn<-matrix(rnorm(1000),nrow=50)
colnames(inputp)<-colnames(inputn)<-paste0("Score",1:ncol(inputp))
rownames(inputp)<-rownames(inputn)<-paste0("Car",1:nrow(inputp))
par(las=2,mar=c(0,6,6,10))
bubblegum(inputp,inputn)
BUBBLEGUM function
require(gplots)
require(plotrix)
bubblegum<-function(
inputp,
inputn,
pcr=0.1,
grid=FALSE,
reorder=FALSE,
legend=TRUE,
matrix2col=TRUE
) {
if(nrow(inputp)!=nrow(inputn)|ncol(inputp)!=ncol(inputn)){
warning("inputp and inpute have different sizes!")
}
### Initialize
rownumber<-nrow(inputp)
colnumber<-ncol(inputp)
### Trasform the NESs into colors
if(matrix2col){
colconversion<-matrix2col(inputn,nbreaks=20)
nescolors<-colconversion$colormatrix
} else {
nescolors<-inputn
}
#pradii<-0.3*(-log(inputp)/max(-log(inputp)))
pradii<-inputp
pradii[inputp>0.1]<-pcr*0
pradii[inputp<=0.1]<-pcr*1
pradii[inputp<0.05]<-pcr*2
pradii[inputp<1E-5]<-pcr*3
pradii[inputp<1E-10]<-pcr*4
pradii[inputp<1E-20]<-pcr*5
### Order by sum NES
sumnes<-apply(inputn,1,function(x){sum(abs(x))})
if(reorder){
neworder<-order(sumnes)
pradii<-pradii[neworder,]
nescolors<-nescolors[neworder,]
} else {
pradii<-pradii[nrow(pradii):1,]
nescolors<-nescolors[nrow(nescolors):1,]
}
### Plot
#par(las=2,mar=c(0,20,6,0))
plot(0,ylim=c(0,rownumber+1),xlim=c(0,colnumber+1),xaxt="n",yaxt="n",type="n",frame.plot=FALSE,xlab="",ylab="")#,xaxs="i",yaxs="i")
if(grid){
abline(h=1:rownumber,lty=2)
abline(v=1:colnumber,lty=2)
}
for (i in (1:rownumber)) {
for(j in 1:colnumber) {
radius<-pradii[i,j]
color<-nescolors[i,j]
draw.circle(j,i,radius=radius,col=color,lwd=0.2)
}
}
axis(3,at=1:colnumber,labels=colnames(pradii))
axis(2,at=1:rownumber,labels=rownames(pradii),cex.axis=0.7)
### Enable things to be drawn outside the plot region
par(xpd=TRUE)
### Title
### Legend
if(legend){
#legend(-colnumber,rownumber,c("group A", "group B"), pch = c(1,2), lty = c(1,2))
legend("topright",inset=c(-0.1,0),legend=c(
"<0.1","0.05","<1e-5","<1e-10","<1e-20"
), pch=c(21), title="FDR",pt.bg="white",horiz=FALSE,pt.cex=c(1,1.5,2,2.5,3))
}
if(matrix2col){
extreme<-round(max(abs(inputn)),1)
legend("bottomright", inset=c(-0.1,0),legend=c(
-extreme,-extreme/2,0,extreme/2,extreme
), pch=c(21), title="Score",
pt.bg=colconversion$col[c(1,5,10,15,19)],
horiz=FALSE,pt.cex=3)
}
}
###########################
matrix2col<-function(z,col1="navy",col2="white",col3="red3",nbreaks=100,center=TRUE){
if(center){
extreme=max(abs(z))+0.001
breaks <- seq(-extreme, extreme, length = nbreaks)
}else {
breaks <- seq(min(z), max(z), length = nbreaks)
}
ncol <- length(breaks) - 1
col <- colorpanel(ncol,col1,col2,col3)
CUT <- cut(z, breaks=breaks)
colorlevels <- col[match(CUT, levels(CUT))] # assign colors to heights for each point
names(colorlevels)<-rownames(z)
colormatrix<-matrix(colorlevels,ncol=ncol(z),nrow=nrow(z))
dimnames(colormatrix)<-dimnames(z)
return(list(colormatrix=colormatrix,col=col))
}

I am trying to create an exponent instead of R^2

I am using ggplot2 to create a scatter plot of 2 variables. I want to have these printed out on the caption portion of ggplot:
linear regression equation
r2 value
p-value
I am using brackets, new lines and stored values to concatenate everything together. I have attempted using expression(), parse() and bquote() functions but it only prints out the variable name and not the stored values.
This is the graph I have now. Everything looks great other than the R^2 part. Brackets seem to cause a lot of problems but I want to keep them (looks better in my opinion).This is my ggplot script. I am only concerned about the caption section at the end.
Difficult to work with the code you have provided as an example (see comment re: reproducible example), but I had my students complete a similar exercise for their homework recently, and can provide an example which you can likely generalize from. My approach is to use the TeX() function from the latex2exp package.
A psychologist is interested in whether she can predict GPA in graduate school from students' earlier scores on the Graduate Record Exam (GRE).
Setup the Toy Data and Regression Model
GPA <- c(3.70,3.18,2.90,2.93,3.02,2.65,3.70,3.77,3.41,2.38,
3.54,3.12,3.21,3.35,2.60,3.25,3.48,2.74,2.90,3.28)
GRE <- c(637,562,520,624,500,500,700,680,655,525,
593,656,592,689,550,536,629,541,588,619)
gpa.gre <- data.frame(GPA, GRE)
mod <- lm(GPA ~ GRE, data = gpa.gre)
mod.sum <- summary(mod)
print(cofs <- round(mod$coefficients, digits = 4))
aY <- cofs[[1]]
bY <- cofs[[2]]
print(Rsqr <- round(cor(GPA,GRE)^2, digits = 2))
Generate the Plot
require(ggplot2)
require(latex2exp)
p <- ggplot(data = gpa.gre, aes(x = GRE, y = GPA)) +
geom_smooth(formula = 'y ~ x', color ="grey40", method = "lm",
linetype = 1, lwd = 0.80, se = TRUE, alpha = 0.20) +
geom_point(color = "grey10", size = 1) +
labs(y = "Grade Point Average", x = "GRE Score") +
coord_cartesian(ylim = c(2.28, 3.82), xlim = c(498, 702), clip = "off") +
scale_y_continuous(breaks = seq(2.30, 3.80, 0.25)) +
scale_x_continuous(breaks = seq(500, 700, 50)) +
theme_classic() +
theme(axis.title.x = element_text(margin = unit(c(3.5,0,0,0), "mm"), size = 11.5),
axis.title.y = element_text(margin = unit(c(0,3.5,0,0), "mm"), size = 11.5),
axis.text = element_text(size = 10),
plot.margin = unit(c(0.25,4,1,0.25), "cm"))
# Use TeX function to use LaTeX
str_note <- TeX("\\textit{Note. ***p} < .001")
str_eq <- TeX("$\\hat{\\textit{y}} = 0.4682 + 0.0045 \\textit{x}$")
str_rsq <- TeX("$\\textit{R}^2 = .54***$")
# Create annotations
p + annotate("text", x = 728, y = 3.70, label = str_eq, size = 3.5,
hjust = 0, na.rm = TRUE) +
annotate("text", x = 728, y = 3.57, label = str_rsq, size = 3.5,
hjust = 0, na.rm = TRUE) +
annotate("text", x = 490, y = 1.80, label = str_note, size = 3.5,
hjust = 0, na.rm = TRUE)
Get Result
ggsave(filename = '~/Documents/gregpa.png', # your favourite file path here
width = unit(5, "in"), # width of plot
height = unit(4, "in"), # height of plot
dpi = 400) # resolution in dots per inch

Can I change the color of the labels on my brackets, without changing the color of the bracket itself? (ggplot2)

I'm plotting some percentages of answers on a Likert-scale, and need to mark the sum of answer 1-2 and 4-5. I've done this pretty okay with the brackets, but I'm having some troubles with its colors. I'd like the bracket to be of the same color as the answering alternatives it represents, but I'd still like the text to be black. Is there a neat way to fix this?
Here's my code (a function for creating the plot I need with different questions and different percentages):
### FUNCTION
plot_function <- function (question, percent_1, percent_2, percent_3, percent_4, percent_5, percent_6) {
# Create temporary df
answers <- c(percent_1, percent_2, percent_3, percent_4, percent_5, percent_6)
labs = vector(mode = "list", length = 6)
for (i in 1:6) {
labs[i] <- paste(toString(answers[i]), "%", sep = "")
}
df <- data.frame(Scale, answers, labs)
# Create plot
plot <- ggplot(df, aes(x=Scale, y = answers)) +
geom_bar(aes(fill=factor(..x..)), stat = "identity") +
labs(x = "", y="Percent", fill="Scale", title = question) +
scale_fill_manual(name = "Scale",
labels = Scale,
values = c(color_1, color_2, color_3, color_4, color_5, color_6)) +
theme(panel.background = element_blank(),
panel.grid.minor = element_line(colour = 'grey',
size = 0.25,
linetype = 'dashed')) +
theme_tufte() +
geom_text(label = labs, nudge_y = 1.5, family = font) +
geom_bracket(xmin = 1,
xmax = 2,
y.position = max(answers) + 10,
label = paste(toString(percent_1 + percent_2), "%", sep = ""),
tip.length = c(0.05, 0.05),
family = font,
vjust = -1,
size = 1,
color = color_1) +
geom_bracket(xmin = 4,
xmax = 5,
y.position = max(answers) + 10,
label = paste(toString(percent_4 + percent_5), "%", sep = ""),
tip.length = c(0.05, 0.05),
family = font,
vjust = -1,
size = 1,
color = color_5) +
ylim(c(0, max(answers + 15)))
# Return plot
plot
}
As you can see below, the labels for the brackets take on the same color as the bracket itself. How can I make the label black, while keeping the colored brackets? :)

Produce an inset in each facet of an R ggplot while preserving colours of the original facet content

I would like to produce a graphic combining four facets of a graph with insets in each facet showing a detail of the respective plot. This is one of the things I tried:
#create data frame
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
#do first basic plot
library(ggplot2)
plot1<-ggplot(data=data_frame, aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() + theme_bw() +
labs(title ="", x = "year", y = "sd")
plot1
#make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
plot2 <- plot1 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log",
breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
plot2
#extract inlays (this is where it goes wrong I think)
library(ggpmisc)
library(tibble)
library(dplyr)
inset <- tibble(x = 0.01, y = 10.01,
plot = list(plot2 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
coord_cartesian(xlim = c(13, 15),
ylim = c(3, 5)) +
labs(x = NULL, y = NULL, color = NULL) +
scale_colour_gradient(guide = FALSE) +
theme_bw(10)))
plot3 <- plot2 +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot)) +
annotate(geom = "rect",
xmin = 13, xmax = 15, ymin = 3, ymax = 5,
linetype = "dotted", fill = NA, colour = "black")
plot3
That leads to the following graphic:
As you can see, the colours in the insets are wrong, and all four of them appear in each of the facets even though I only want the corresponding inset of course. I read through a lot of questions here (to even get me this far) and also some examples in the ggpmisc user guide but unfortunately I am still a bit lost on how to achieve what I want. Except maybe to do it by hand extracting four insets and then combining them with plot2. But I hope there will be a better way to do this. Thank you for your help!
Edit: better graphic now thanks to this answer, but problem remains partially unsolved:
The following code does good insets, but unfortunately the colours are not preserved. As in the above version each inset does its own rainbow colours anew instead of inheriting the partial rainbow scale from the facet it belongs to. Does anyone know why and how I could change this? In comments I put another (bad) attempt at solving this, it preserves the colors but has the problem of putting all four insets in each facet.
library(ggpmisc)
library(tibble)
library(dplyr)
# #extract inlays: good colours, but produces four insets.
# fourinsets <- tibble(#x = 0.01, y = 10.01,
# x = c(rep(0.01, 4)),
# y = c(rep(10.01, 4)),
# plot = list(plot2 +
# facet_wrap( ~ max_rep, ncol=2) +
# coord_cartesian(xlim = c(13, 15),
# ylim = c(3, 5)) +
# labs(x = NULL, y = NULL, color = NULL) +
# scale_colour_gradientn(name = "number of replicates", trans = "log", guide = FALSE,
# colours = rainbow(20)) +
# theme(
# strip.background = element_blank(),
# strip.text.x = element_blank()
# )
# ))
# fourinsets$plot
library(purrr)
pp <- map(unique(data_frame$max_rep), function(x) {
plot2$data <- plot2$data %>% filter(max_rep == x)
plot2 +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
#pp[[2]]
inset_new <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
final_plot <- plot2 +
geom_plot_npc(data = inset_new, aes(npcx = x, npcy = y, label = plot, vp.width = 0.3, vp.height =0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
#final_plot
final_plot then looks like this:
I hope this clarifies the problem a bit. Any ideas are very welcome :)
Modifying off #user63230's excellent answer:
pp <- map(unique(data_frame$max_rep), function(x) {
plot2 +
aes(alpha = ifelse(max_rep == x, 1, 0)) +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
scale_alpha_identity() +
facet_null() +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
Explanation:
Instead of filtering the data passed into plot2 (which affects the mapping of colours), we impose a new aesthetic alpha, where lines belonging to the other replicate numbers are assigned 0 for transparency;
Use scale_alpha_identity() to tell ggplot that the alpha mapping is to be used as-is: i.e. 1 for 100%, 0 for 0%.
Add facet_null() to override plot2's existing facet_wrap, which removes the facet for the inset.
Everything else is unchanged from the code in the question.
I think this will get you started although its tricky to get the size of the inset plot right (when you include a legend).
#set up data
library(ggpmisc)
library(tibble)
library(dplyr)
library(ggplot2)
# create data frame
n_replicates <- c(rep(1:10, 15), rep(seq(10, 100, 10), 15), rep(seq(100,
1000, 100), 15), rep(seq(1000, 10000, 1000), 15))
sim_years <- rep(sort(rep((1:15), 10)), 4)
sd_data <- rep(NA, 600)
for (i in 1:600) {
sd_data[i] <- rnorm(1, mean = exp(0.1 * sim_years[i]), sd = 1/n_replicates[i])
}
max_rep <- sort(rep(c(10, 100, 1000, 10000), 150))
data_frame <- cbind.data.frame(n_replicates, sim_years, sd_data, max_rep)
# make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(`10` = "2, 3, ..., 10 replicates", `100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates", `10000` = "1000, 2000, ..., 10000 replicates")
Get overall plot:
# overall facet plot
overall_plot <- ggplot(data = data_frame, aes(x = sim_years, y = sd_data, group = n_replicates, col = n_replicates)) +
geom_line() +
theme_bw() +
labs(title = "", x = "year", y = "sd") +
facet_wrap(~max_rep, ncol = 2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log", breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
#plot
overall_plot
which gives:
Then from the overall plot you want to extract each plot, see here. We can map over the list to extract one at a time:
pp <- map(unique(data_frame$max_rep), function(x) {
overall_plot$data <- overall_plot$data %>% filter(max_rep == x)
overall_plot + # coord_cartesian(xlim = c(13, 15), ylim = c(3, 5)) +
labs(x = NULL, y = NULL) +
theme_bw(10) +
theme(legend.position = "none")
})
If we look at one of these (I've removed the legend) e.g.
pp[[1]]
#pp[[2]]
#pp[[3]]
#pp[[4]]
Gives:
Then we want to add these inset plots into a dataframe so that each plot has its own row:
inset <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
Then merge this into the overall plot:
overall_plot +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot, vp.width = 0.8, vp.height = 0.8))
Gives:
Here is a solution based on Z. Lin's answer, but using ggforce::facet_wrap_paginate() to do the filtering and keeping colourscales consistent.
First, we can make the 'root' plot containing all the data with no facetting.
library(ggpmisc)
library(tibble)
library(dplyr)
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
base <- ggplot(data=data_frame,
aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() +
theme_bw() +
scale_colour_gradientn(
name = "number of replicates",
trans = "log10", breaks = my_breaks,
labels = my_breaks, colours = rainbow(20)
) +
labs(title ="", x = "year", y = "sd")
Next, the main plot will be just the root plot with facet_wrap().
main <- base + facet_wrap(~ max_rep, ncol = 2, labeller = as_labeller(facet_names))
Then the new part is to use facet_wrap_paginate with nrow = 1 and ncol = 1 for every max_rep, which we'll use as insets. The nice thing is that this does the filtering and it keeps colour scales consistent with the root plot.
nmax_rep <- length(unique(data_frame$max_rep))
insets <- lapply(seq_len(nmax_rep), function(i) {
base + ggforce::facet_wrap_paginate(~ max_rep, nrow = 1, ncol = 1, page = i) +
coord_cartesian(xlim = c(12, 14), ylim = c(3, 4)) +
guides(colour = "none", x = "none", y = "none") +
theme(strip.background = element_blank(),
strip.text = element_blank(),
axis.title = element_blank(),
plot.background = element_blank())
})
insets <- tibble(x = rep(0.01, nmax_rep),
y = rep(10.01, nmax_rep),
plot = insets,
max_rep = unique(data_frame$max_rep))
main +
geom_plot_npc(data = insets,
aes(npcx = x, npcy = y, label = plot,
vp.width = 0.3, vp.height = 0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
Created on 2020-12-15 by the reprex package (v0.3.0)

Resources