Save complicated plot() to object - r

I have a series of commands that create a vibration of effects plot. Now, I want to assign the plot to an object (to later make it downloadable via Shiny). However, that does not seem possible. When I try to save the plot to an object, the object returns "Null" and likewise if I try to save it it saves an empty .png file.
See below for the function and some example data.
#some packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(MASS, tidyverse, ggplot2, dplyr, shiny, here, BayesFactor, ggpubr, effsize, DescTools, rqPen)
#plot of p value vs effect size vibration plot
#https://figshare.com/articles/Code_data_and_analysis_script_for_A_Traveler_s_Guide_to_the_Multiverse_Promises_Pitfalls_and_a_Framework_for_the_Evaluation_of_Analytic_Decisions_/12089736 main source
multiverse.vibration <- function(effsize, statistic, alpha = 0.05, threshold = 6, type = c("frequentist")){
#assign colours schemes
point.color <- rgb(0,76,153, alpha=80, maxColorValue=255)
contour.color = rgb(60,130,180, alpha=130, maxColorValue=255)
#vibrations
vibrations <- kde2d(effsize, -log10(statistic), n=50)
if (type == "frequentist"){
#do the plotting.
plot(effsize, -log10(statistic), type="n", las=1, xlab=expression(paste("Effect size")), ylab=expression(paste("-log"[10],"(",italic("p"),"-value)")), main="", cex.lab=1.35, cex.axis=1.2 ) ####the label of the y axis gets cut off by the picture for no reason whatsoever####
#add quantile lines
abline(v=as.numeric(quantile(effsize, probs=0.5)), lty=3, lwd=1.8, col="gray70")
abline(h=-log10(as.numeric(quantile(statistic, probs=0.5))), lty=3, lwd=1.8, col="gray70")
#add data points
points(effsize, -log10(statistic), pch=16, col=point.color, cex=1.5)
#add "vibrations"
contour(vibrations, drawlabels=FALSE, nlevels=5, lwd=1.7, col=contour.color, add=TRUE)
text(as.numeric(quantile(effsize, probs=0.5)), max(-log10(statistic)), "50", pos=2, col="gray40", cex=1)
text(max(effsize), -log10(as.numeric(quantile(statistic, probs=0.5))), "50", pos=3, col="gray40", cex=1)
#add alpha line and label
abline(h=-log10(alpha), lty=3, lwd=1.5, col="red")
text(min(effsize), -log10(alpha), expression(paste(alpha)), pos = 1, cex = 1, col = "red")
}
#...function simplified
}
#and below some data
df_multiverse <- structure(list(transformation = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("square",
"squareroot"), class = "factor"), datatrimming = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("mad",
"notrimming"), class = "factor"), fixedtrimming = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "nofixedtrimming", class = "factor"),
min = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), max = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), DispersionMeasure = c(NA,
2, 2.5, 3, 3.5, 4, 4.5, 5, NA, 2, 2.5, 3, 3.5, 4, 4.5, 5),
NumberOfTrials = c(2481, 2017, 2089, 2152, 2202, 2235, 2271,
2292, 2481, 2017, 2089, 2152, 2202, 2235, 2271, 2292), df = c(21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21
), t.value = c(0.834352731211477, -1.89143806501942, -2.06164045172582,
-2.29402139720537, -2.20170894686594, -1.30874979649765,
-1.46580636234517, -0.933033039387291, -0.381340656529586,
-2.65553835404059, -2.70367808996487, -2.88191068442976,
-2.89698876130645, -2.31203065738409, -2.40524937843272,
-1.99997820996895), p.value = c(0.413473232348569, 0.0724397922282673,
0.0518359697127152, 0.0322027617938105, 0.0390026786336539,
0.204761347160827, 0.157515139319996, 0.361407402521166,
0.706781450011369, 0.0147953018060795, 0.013300947944711,
0.00892256290108781, 0.0086233125398353, 0.0310102245266004,
0.0254623057912856, 0.0586025361696588), estimate = c(0.0513517727014905,
-0.138440596771433, -0.152826845040145, -0.172473124495872,
-0.150035258885051, -0.106059860414446, -0.0904972867538278,
-0.0636909905658258, -0.0224006885730891, -0.132591874705722,
-0.141473579509691, -0.162307800901886, -0.156924178280938,
-0.138723145332572, -0.124862443444392, -0.109932966289113
)), row.names = c("df", "df1", "df2", "df3", "df4", "df5",
"df6", "df7", "df8", "df9", "df10", "df11", "df12", "df13", "df14",
"df15"), class = "data.frame")
#and below a call
object <- multiverse.vibration(df_multiverse$estimate, df_multiverse$p.value, type = "frequentist")
#Now I try to save it
svg(file = "Figure 1.svg", width = 9, height = 9, antialias = "gray")
object
dev.off()
#empty file, does not save plot.
My goal is to save the plot to an object in a way that later allows me to download the object via some command.

Related

R forestplot() function fix section widths

I want to generate multiple forest plots using the forestplot() function in R (from the package R/forestplot), and want to ensure that I can line up the text and graph sections in each so they can be usefully shown as a stacked plot, like this:
(taken from R forestplot package blank lines with section headings)
but with the possibility that there may be different scales in each subplot, lining up the zero effect line in each plot.
Which attributes need changing in the forestplot() call to ensure that this occurs?
EDIT: to provide a minimum code example
library(forestplot)
library(tidyr)
cohort <- data.frame(Age = c(43, 39, 34, 55, 70, 59, 44, 83, 76, 44,
75, 60, 62, 50, 44, 40, 41, 42, 37, 35, 55, 46),
Status = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L), levels = c("-", "+"), class = "factor"),
Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("1","2"), class = "factor"))
age.lm <- lm(Age ~ Group, data = cohort)
status.lm <- glm(Status ~ Group, data = cohort, family=binomial(link=logit))
age.data <- summary(age.lm)$coefficients[2,]
status.data <- summary(status.lm)$coefficients[2,]
age.data <- rbind(c(0,0,0,1,"Group 1", "n=15"),
c(age.data[1], age.data[1]-age.data[2]*1.95, age.data[1]+age.data[2]*1.95, age.data[4], "Group 2", "n=7"))
status.data <- rbind(c(0,0,0,1,"Group 1", "[+13,-2]"),
c(status.data[1], status.data[1]-status.data[2]*1.95, status.data[1]+status.data[2]*1.95, status.data[4], "Group 2", "[+2,-5]"))
colnames(age.data) <- c("mean","lower","upper","p-val","labeltext","numbers")
colnames(status.data) <- c("mean","lower","upper","p-val","labeltext","numbers")
age.data <- data.frame(age.data)
status.data <- data.frame(status.data)
age.data$mean <- as.numeric(age.data$mean)
age.data$lower <- as.numeric(age.data$lower)
age.data$upper <- as.numeric(age.data$upper)
status.data$mean <- exp(as.numeric(status.data$mean))
status.data$lower <- exp(as.numeric(status.data$lower))
status.data$upper <- exp(as.numeric(status.data$upper))
age.plot <- forestplot(age.data,
labeltext = c(labeltext,numbers),
boxsize = 0.1,
xlog = FALSE,
clip=c(-20,20),
xticks=c(-20,-10,0,10,20),
txt_gp = fpTxtGp(ticks=gpar(cex=1)),
align=c("l","c","l"))
status.plot <- forestplot(status.data,
labeltext = c(labeltext,numbers),
boxsize = 0.1,
xlog = TRUE,
clip=c(1/100,100),
xticks=c(log(1e-2),log(1e-1),0,log(1e1),log(1e2)),
txt_gp = fpTxtGp(ticks=gpar(cex=1)),
align=c("l","c","l"))
Note that the age plot is a linear model and the status plot is a logistic model:
I want to be able to arrange the relative sizes of the text to the left and the plot to the right in order that the zero-effect lines (at 0 and at 1 respectively) line up so that the forest plots stack cleanly.
With the align argument you could left "l" align the parts of your plot, so the text can be left aligned. If you want to align your zero-effect lines you could use mar and play with the units to adjust one of the graphs. Here is a reproducible example:
library(forestplot)
library(tidyr)
age.plot <- forestplot(age.data,
labeltext = c(labeltext,numbers),
boxsize = 0.1,
xlog = FALSE,
clip=c(-20,20),
xticks=c(-20,-10,0,10,20),
txt_gp = fpTxtGp(ticks=gpar(cex=1)),
align=c("l","l","l")
)
status.plot <- forestplot(status.data,
labeltext = c(labeltext,numbers),
boxsize = 0.1,
xlog = TRUE,
clip=c(1/100,100),
xticks=c(log(1e-2),log(1e-1),0,log(1e1),log(1e2)),
txt_gp = fpTxtGp(ticks=gpar(cex=1)),
align=c("l","l","l"),
mar = unit(c(0,5,0,10.5), "mm")
)
library(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 1)))
pushViewport(viewport(layout.pos.row = 1))
plot(age.plot)
popViewport()
pushViewport(viewport(layout.pos.row = 2))
plot(status.plot)
popViewport(2)
Created on 2022-12-28 with reprex v2.0.2

mice.reuse() question: Error in doTryCatch(return(expr), name, parentenv, handler): Missing left after imputation

I am attempting to impute data in my validation set, which follows the MICE imputation model from my train set using mice.reuse(). Imputation is following data split as they'll be used to train/val ML algorithms. Data (n=720) is comprised of clinical and lab measurement data (numeric or factor).
See below the steps:
# first split data
sample_size <- floor(0.75 * nrow(data))
train_data <- sample(seq_len(nrow(data)), size = sample_size)
train <- data[train_data, ]
val <- data[-train_data, ]
# mice imputation - train set
train_imp <- mice(train,
predictorMatrix = predM,
method = 'pmm',
m = 5, maxit = 10, print = FALSE,
seed = 500)
# apply train data imp model to val data
val_imp <- mice.reuse(train_imp, val, maxit = 1)
After this step, I receive the following error message:
Error in value[[3L]](cond) :
Error in doTryCatch(return(expr), name, parentenv, handler): Missing left after imputation
In addition: There were 50 or more warnings (use warnings() to see the first 50)
The data looks something like this:
structure(list(male = structure(c(2L, 1L, 2L, 1L, 1L), .Label = c("FALSE",
"TRUE"), class = "factor"), age = c(55.7864476386037, 55.895961670089,
41.0376454483231, 29.6563997262149, 57.2183436002738), bmi = c(36.6115389471026,
31.5536591487683, 22.7903289734443, 42.5307689412473, 33.6484537734337
), waist_circum = c(126, 103, 91, 133, 105), bp_sys = c(147,
NA, 100, 160, 135), bp_dia = c(82, NA, 60, 81, 70), t2dm = structure(c(1L,
2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor"), hdl = c(NA,
1.35, NA, NA, NA), ldl = c(3.41, 3.28, 2.87, 3.7, 3.59), triglyceride = c(2.54,
1.04, 1.55, NA, 3.43), cholesterol = c(5.08, 5.1, 4.35, 4.82,
6.06), alt = c(41, 26, 48, 31, 31), ast = c(33, 28, 33, 21, 28
), ggt = c(33, 65, 42, 45, 26), alp = c(70, 79, 51, 88, 70),
platelet = c(156, 313, 308, 337, 186), hb = c(14.4, 11.5,
15.3, 14.5, 14.2), tsat = c(19, 18, 28, 25, 32), albu = c(4.9,
4.5, 5.4, 4.3, 4.6), egfr = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ferritin = c(171, 70, 499, 156, 94),
pt = c(1.04, 0.98, 0.99, 1.12, 0.97), bili = c(11, 5, 6,
8, 14), timp1 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), p3np = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), ha = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), glucose = c(5.7, 6.1, 5.4, 13.9, 5.1), insulin_fasted = c(29.8,
36.2, 4.9, 11.3, 14.5), hba1c = c(35.522, 58.475, 40.987,
79.242, 38.801), fibrinogen = c(NA, 5.9, 3.5, 5.9, 3.1),
fib_score = c(3, 0, 0, 0, 2), steatosis_score = c(1, 1, 3,
3, 3), inflam_score = c(2, 0, 1, 0, 1), balloon_score = c(2,
0, 1, 0, 1), fnash = structure(c(2L, 1L, 1L, 1L, 2L), .Label = c("FALSE",
"TRUE"), class = "factor"), f2 = structure(c(2L, 1L, 1L,
1L, 2L), .Label = c("FALSE", "TRUE"), class = "factor"),
f3 = structure(c(2L, 1L, 1L, 1L, 1L), .Label = c("FALSE",
"TRUE"), class = "factor"), nash = structure(c(2L, 1L, 2L,
1L, 2L), .Label = c("FALSE", "TRUE"), class = "factor"),
fib4 = c(1.84300333252591, 0.980635141316159, 0.63463649053119,
0.331915071889987, 1.54703279971167), proc6 = c(9.2, 5.5, 6.2, 5.9, 10.5
), proc4 = c(470.9, 298.5, 458.2, 336.7, 516.3), t2 = c(NA,
60.6857152393886, 48.2999977793012, NA, 93.842859513419),
i7 = c(NA, 113.817410696955, 123.560049194916, NA, 130.116414030393
), fibrosc_stiffness = c(14.1, 7, 5.2, 7, 8.8), prognostic_score = c(-1.908509,
-25.028315, -3.495727, -27.147848, -10.20379), probability_steatosis = c(NA,
0.77446951, 0.114093059, 0.654343677, 0.000105034), diafir.h = c(0.615568767767871,
0.534318429857294, 0.890916911895936, NA, 0.779479718924847
), gdf15 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
I can't seem to work out the problem. Any tips here?
I've also tried to apply the ignore arguement in running MICE itself, but this seems to take a very long time.. any one else experience this?

ggplot with tryCatch: want blank plot if there's an error during expression

Some data:
x %>% dput
structure(list(date = structure(c(18782, 18783, 18784, 18785,
18786, 18787, 18789, 18791, 18792, 18793, 18795, 18797, 18798,
18799, 18801, 18803, 18805, 18806), class = "Date"), `Expired Trials` = c(3L,
1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), `Trial Sign Ups` = c(3L, 1L, 1L, 2L, 3L, 4L, 1L, 1L, 1L,
1L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L), `Total Site Conversions` = c(3,
1, 1, 2, 3, 4, 1, 1, 1, 1, 2, 1, 3, 2, 2, 1, 1, 1), `Site Conversion Rate` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `Trial to Paid Conversion Rate` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), row.names = c(NA, -18L), class = c("tbl_df",
"tbl", "data.frame"))
Context is within a shiny app where sometimes field 'Sessions' will exist and others it won't, depending on the users selections. Rather than display the red warning message, I just want nothing or a blank plot shown instead of an error message:
x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = T)
Error in FUN(X[[i]], ...) : object 'Sessions' not found
Tried:
tryCatch(expr = {x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = T)
},
error = function(e) {message(''); print(e)},
finally = {ggplot() + theme_void()})
But, this still spits out the error, wanted/expected a blank plot instead.
How can I do this?
Consider using an if/else expression with all i.e. we plot only if all the column names specified in plot are present or else return a blank plot
nm1 <- c("date", "Sessions", "Site Conversion Rate")
if(!all(nm1 %in% names(x))) {
message("Not all columns are found")
ggplot()
} else {x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = TRUE) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE)}
Or another option is possibly with specifying otherwise
library(purrr)
f1 <- function(x) {
p1 <- x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = TRUE) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE)
print(p1)
}
f1p <- possibly(f1, otherwise = ggplot())
-testing
f1p(x)
-output
Or a modification of the OP's tryCatch
tryCatch(expr = {print(x %>%
ggplot(aes(date, Sessions)) +
geom_col(na.rm = T) +
geom_line(aes(y = `Site Conversion Rate`), na.rm = TRUE))
},
error = function(e) {message(''); print(e)},
finally = {
ggplot() +
theme_void()
})
<simpleError in FUN(X[[i]], ...): object 'Sessions' not found>

nested if / else if conditional on multiple column values - R

The objective is to populate a new column (df$final.count) according to multiple conditions. An example data frame below:
structure(list(item = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L), .Label = c("a", "b"), class = "factor"), raw.count = c(16,
300, 203, 6, 5, 40, 20, 16, 300, 203), loc = structure(c(4L,
2L, 2L, 2L, 2L, 3L, 3L, 4L, 2L, 3L), .Label = c(" ", "in", "out",
"NA"), class = "factor"), side = structure(c(4L, 2L, 3L, 2L,
3L, 4L, 3L, 4L, 2L, 4L), .Label = c("F", "L", "R", "NA"), class = "factor"),
recount = c(15, NA, NA, 7, NA, NA, 16, 15, NA, NA), final.count = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), EXPECTED = c(15, 60, 120,
7, 5, 40, 16, 15, 300, 203)), row.names = c(NA, 10L), class = "data.frame")
The objective is to populate a new column (df$final.count) according to the following conditions affecting multiple columns:
if there is a number in df$recount THAN df$recount should be used in df$final.count unconditional to other column values
if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "L" THAN function 0.2*df$raw.count should be used to populate df$final.count
if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "R" THAN function 0.6*df$raw.count should be used to populate df$final.count (NOTE only side is different)
if df$raw.count =<10 than df$raw.count should be used exept if 1 above holds
if df$loc is "out" than df$final.count <- df$raw.count unconditional to other column values exept if 1 above holds
I have tried various versions of if / else if in a loop, for example:
for (i in 1:nrow(df)) {
if(!is.na(df$recount[i]) {
df$final.count <- df$recount
}
else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "L") {
df$final.count <- 0.2*df$raw.count[i]
}
else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "R") {
df$final.count <- 0.6*df$raw.count[i]
}
else if(df$raw.count <= 10){
df$final.count <- df$raw.count
}
else(df$loc == "out") {
df$final.count <- df$raw.count
}
}
if you use a case_when() from the dplyr-package, it becomes more readable.. you can also loose the for.
library( dplyr )
df %>%
mutate( final.cond = case_when(
!is.na( recount ) ~ recount,
item == "a" & raw.count > 10 & loc == "in" & side == "L" ~ 0.2 * raw.count,
item == "a" & raw.count > 10 & loc == "in" & side == "R" ~ 0.6 * raw.count,
raw.count <= 10 ~ raw.count,
loc == "out" ~ raw.count,
TRUE ~ as.numeric(NA)
))

geom_text labels don't align with geom_bar / position_dodge2 if width is changed

I am creating a barplot in ggplot2 3 which includes facet_grid and position_dodge2(preserve="single") (= same bar width in all facets) as well as geom_text for labeling. It works all fine except when I change the width of the bars with width, e.g to 1.2 (otherwise the bars are rather slim).
Two problems occur:
the labels of geom_text don't align any longer with the bars;
the bars aren't centered on the x axis as they should.
Any solution to this? A workaround with hjust doesn't seem to work since labels are not evenly misaligned when changing width. Or am I getting something wrong regarding the purpose of width ?
This seems related to my question.
Data:
x <- structure(list(SessionLastStage = structure(1:20, .Label = c("1998-1999",
"1999-2000", "2000-2001", "2001-2002", "2002-2003", "2003-2004",
"2004-2005", "2005-2006", "2006-2007", "2007-2008", "2008-2009",
"2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014",
"2014-2015", "2015-2016", "2016-2017", "2017-2018"), class = "factor"),
freq = c(0, 2, 18, 8, 6, 0, 0, 0, 2, 14, 8, 16, 30, 4, 12,
10, 11, 30, 1, 0), Phase = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L
), .Label = c("Introduction", "Maintenance", "Deconsolidation"
), class = "factor")), class = "data.frame", row.names = c(NA,
-20L), .Names = c("SessionLastStage", "freq", "Phase"))
plot command:
x %>%
ggplot()+
geom_bar(aes(x=SessionLastStage, y=freq),
stat="identity",
width=1.2,
position = position_dodge2(preserve="single"))+
geom_text(data=x %>% filter(freq>0),
aes(x=SessionLastStage, y=freq+1, label=freq))+
facet_grid(.~Phase,
scales="free_x",
space = "free_x")+
theme_minimal()+
theme(axis.text=element_text(angle=90))
Output:

Resources