ggsave ggsurvplot with risk.table - r

I am trying to save a ggsurvplot with risk.table using ggsave. However, the output off ggsave is always just the risk.table. I also tried this and this. None is working.
library(data.table)
library(survival)
library(survminer)
OS <- c(c(1:100), seq(1, 75, length = 50), c(1:50))
dead <- rep(1, times = 200)
variable <- c(rep(0, times = 100), rep(1, times = 50), rep(2, times = 50))
dt <- data.table(OS = OS,
dead = dead,
variable = variable)
survfit <- survfit(Surv(OS, dead) ~ variable, data = dt)
ggsurvplot(survfit, data = dt,
risk.table = TRUE)
ggsave("test.png")

The main issue is that a ggsurvplot object is a list of plots. Hence, when using ggsave only the last plot or element of the list is saved.
There is already a GitHub issue on that topic with several workarounds, e.g. using one of the more recent suggestions this works fine for me
library(survival)
library(survminer)
OS <- c(c(1:100), seq(1, 75, length = 50), c(1:50))
dead <- rep(1, times = 200)
variable <- c(rep(0, times = 100), rep(1, times = 50), rep(2, times = 50))
dt <- data.frame(OS = OS,
dead = dead,
variable = variable)
survfit <- survfit(Surv(OS, dead) ~ variable, data = dt)
# add method to grid.draw
grid.draw.ggsurvplot <- function(x){
survminer:::print.ggsurvplot(x, newpage = FALSE)
}
p <- ggsurvplot(survfit, data = dt, risk.table = TRUE)
ggsave("test.png", p, height = 6, width = 6)

Related

Why doesn't this survfit plot start at 100%

When I plot the survfit plot of data with two different censoring events, the overall plot (s0) doesnt start at time = 0, pstate = 100%, but jumps to 100% when the first cencoring event occurs.
Here you can see in an example, where the jump occurs at time 1, that is the first cencoring event.
library(survival)
library(ggfortify)
library(tidyverse)
set.seed(1337)
dummy_data = tibble(time = sample.int(100, 100, replace = TRUE),
event = sample.int(3, 100, replace = TRUE))%>%
mutate(event = factor(event))
kaplanMeier <- survfit(Surv(time, event) ~ 1, data=dummy_data)
autoplot(kaplanMeier, facets = TRUE)
This does seem to be a bug in ggfortify. As a temporary fix, you can set the survival percentage at t = 0 to 100% by doing:
p <- autoplot(kaplanMeier, facets = TRUE)
p$layers[[1]]$data[1, c(5, 7, 8)] <- 1
p

Facebook Prophet: Hyperparameter Tuning on Monthly Data

I am using the Prophet model to forecast revenue for my company and one of the challenges i currently face is being able to modify the code in order to leverage the hyperparameter tuning features for monthly data. From my understanding, the code on the FB prophet site is designed to tune on daily data, not monthly. However, I have read somewhere (can't seem to find the post) where it can be tweaked for monthly data.
Has anyone been able to figure this out? Would love some help! I'm not a programmer and have been leveraging low code platforms to build this out so would really appreciate a fellow coder's help in solving this issue!
Here's the code that I'm using:
# Conditional Install
cond.install <- function(package.name){
options(repos = "http://cran.rstudio.com") #set repo
#check for package in library, if package is missing install
if(package.name%in%rownames(installed.packages())==FALSE) {
install.packages(package.name, .libPaths()[2])}else{require(package.name, character.only = TRUE)}}
# conditionally install package
cond.install('forecast')
cond.install('prophet')
cond.install('rBayesianOptimization')
cond.install('dplyr')
cond.install('lubridate')
library(dplyr)
library(lubridate)
library(forecast)
library(prophet)
library(rBayesianOptimization)
#reading data
cv_set <- read.Alteryx("#1", mode="data.frame")
valid <- read.Alteryx("#2", mode="data.frame")
#make sure the date format is defined
cv_set$ds <- as.Date(cv_set$ds)
date_seq <- as.Date(valid$ds)
#define hyper search parameter
rand_search_grid = data.frame(
changepoint_prior_scale = sort(runif(10, 0.01, 20)),
seasonality_prior_scale = c(sort(sample(c(runif(5, 0.01, 0.05), runif(5, 1, 20)), 5, replace = F)),
sort(sample(c(runif(5, 0.01, 0.05), runif(5, 1, 20)), 5, replace = F))),
n_changepoints = sample(5:50, 10, replace = F)
)
#Define deafult function for prophet. Change Linear to Logistic cap setting
prophet_fit_bayes = function(changepoint_prior_scale, seasonality_prior_scale, n_changepoints) {
error = c()
for (d in date_seq) {
train = subset(cv_set, ds < d)
test = subset(cv_set, ds == d)
m = prophet(train, growth = 'linear',
seasonality.prior.scale = seasonality_prior_scale,
changepoint.prior.scale = changepoint_prior_scale,
n.changepoints = n_changepoints,
weekly.seasonality = F,
daily.seasonality = F)
future = make_future_dataframe(m, periods = 1)
# NOTE: There's a problem in function names with library(caret)
forecast = predict(m, future)
forecast$ds = as.Date(forecast$ds)
error_d = forecast::accuracy(forecast[forecast$ds %in% test$ds, 'yhat'], test$y)[ , 'MAPE']
error = c(error, error_d)
}
## The function wants to _maximize_ the outcome so we return
## the negative of the resampled MAPE value. `Pred` can be used
## to return predicted values but we'll avoid that and use zero
list(Score = -mean(error), Pred = 0)
}
changepoint_bounds = range(rand_search_grid$changepoint_prior_scale)
n_changepoint_bounds = as.integer(range(rand_search_grid$n_changepoints))
seasonality_bounds = range(rand_search_grid$seasonality_prior_scale)
bayesian_search_bounds = list(changepoint_prior_scale = changepoint_bounds,
seasonality_prior_scale = seasonality_bounds,
n_changepoints = as.integer(n_changepoint_bounds))
#rBayesian parameters. Assume n_iteration is 1 for demo purpose
ba_search = BayesianOptimization(prophet_fit_bayes,
bounds = bayesian_search_bounds,
init_grid_dt = rand_search_grid,
init_points = 1,
n_iter = %Question.iteration.var%,
acq = 'ucb',
kappa = 1,
eps = 0,
verbose = TRUE)
best_params_ba = c(ba_search$Best_Par)
#Start Prophet
# Holiday Setting
custom1 <- data_frame(
holiday = 'custom1',
ds = as.Date(c('1991-12-31')))
custom2 <- data_frame(
holiday = 'custom2',
ds = as.Date(c('1992-12-31', '1993-01-01')))
holidays <- bind_rows(custom1, custom2)
if ('%Question.noholiday.var%' == "True") {
m = prophet(cv_set, growth = 'linear',
seasonality.prior.scale = best_params_ba[['seasonality_prior_scale']],
changepoint.prior.scale = best_params_ba[['changepoint_prior_scale']],
n.changepoints = best_params_ba[['n_changepoints']])
}
if ('%Question.holiday.var%' == "True") {
m <- prophet(holidays = holidays, growth = 'linear',
seasonality.prior.scale = best_params_ba[['seasonality_prior_scale']],
changepoint.prior.scale = best_params_ba[['changepoint_prior_scale']],
n.changepoints = best_params_ba[['n_changepoints']])
m <- add_country_holidays(m, country_name = '%Question.country.var%')
m <- fit.prophet(m, cv_set)
}
future <- make_future_dataframe(m, periods = %Question.forecast.var%)
forecast <- predict(m, future)
yhat <- as.data.frame(forecast$yhat)
yhat_l <- as.data.frame(forecast$yhat_lower)
yhat_u <-as.data.frame(forecast$yhat_upper)
trend <- as.data.frame(forecast$trend)
df1 <- cbind(yhat, yhat_l, yhat_u, trend)
write.Alteryx(df1, 1)
AlteryxGraph(3, width=576, height=576)
plot(m, forecast) + add_changepoints_to_plot(m)
invisible(dev.off())
AlteryxGraph(4, width=576, height=576)
prophet_plot_components(m, forecast)
invisible(dev.off())
#Output best params for reference
df5 <- best_params_ba
write.Alteryx(df5, 5)
You can specify custom seasonality. So you would just define a custom seasonality called monthly and define the period length. You can view the documentation here.
# R
m <- prophet(weekly.seasonality=FALSE)
m <- add_seasonality(m, name='monthly', period=30.5, fourier.order=5)
m <- fit.prophet(m, df)
forecast <- predict(m, future)
prophet_plot_components(m, forecast)

Adjust nomogram ticks with (splines) transformation, rms package [R]

I'm using a Cox regression model considering my variable trough splines transformation. All is working nice until the subsequent nomogram... as expected, the scale of my variable is also transformed but I'd like to add some custom ticks inside the region between values 0 and 2 (I guess is the transformed one). Any idea, if you please?
Here's my code...
data <- source("https://pastebin.com/raw/rGtUSTLz")$value
ddist <- datadist(data)
options(datadist = "ddist")
fit <- cph(Surv(time, event) ~ rcs(var, 3), data = data, surv = T, x = T, y = T)
surv <- Survival(fit)
plot(nomogram(fit,
fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), lp = T))
... and these are the real and the desired outputs.
Thanks in advance for your help!
I have had this issue too. My answer is a work around using another package, regplot. Alternatively, if you know what the point values are at the tick marks you want plotted, then you can supply those instead of using the output from regplot. Basically, you need to modify the tick marks and points that are output from the nomogram function and supplied to plot the nomogram.
This method also provides a way to remove points / tick marks by editing the nomogram output.
data <- source("https://pastebin.com/raw/rGtUSTLz")$value
ddist <- datadist(data)
options(datadist = "ddist")
fit <- cph(Surv(time, event) ~ rcs(var, 3), data = data, surv = T, x = T, y = T)
surv <- Survival(fit)
nom1 <- nomogram(fit, fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), lp = T)
library(regplot)
# call regplot with points = TRUE to get output
regplot(fit, fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), points = TRUE)
# look at the points supplied through regplot and take those.
nom1_edit <- nom1
# now we edit the ticks supplied for var and their corresponding point value
nom1_edit[[1]][1] <- list(c(0, 0.06, 0.15, 0.3, 2,4,6,8,10,12,14,16))
nom1_edit[[1]][2] <- list(c(0, 10, 21, 32, 42.41191, 50.63878, 58.86565,
67.09252, 75.31939, 83.54626, 91.77313, 100.00000))
nom1_edit$var$points <- c(0, 10, 21, 32, 42.41191, 50.63878, 58.86565,
67.09252, 75.31939, 83.54626, 91.77313, 100.00000)
# plot the edited nomogram with new points
plot(nom1_edit)

Generalized estimating equations working by themselves but not within functions (R)

I am trying to write a function to run GEE using the geepack package. It works fine "on its own" but not within a function, please see example below:
library(geepack)
library(pstools)
df <- data.frame(study_id = c(1:20),
leptin = runif(20),
insulin = runif(20),
age = runif(20, min = 20, max = 45),
sex = sample(c(0,1), size = 20, replace = TRUE))
#Works
geepack::geeglm(leptin ~ insulin + age + sex, id = study_id, data = df)
#Doesn't work
model_function_covariates_gee <- function(x,y) {
M1 <- paste0(x, "~", y, "+ age + sex")
M1_fit <- geepack::geeglm(M1, id = study_id, data = df)
s <- summary(M1_fit)
return(s)
}
model_function_covariates_gee("leptin", "insulin")
Error message:
Error in mcall$formula[3] <- switch(match(length(sformula), c(0, 2, 3)), :
incompatible types (from language to character) in subassignment type fix
Does anyone know why this is? I've fiddled around with it but can't get it to change. Thanks in advance.

Nomogram with R

view("Mydata")
summary(Mydata)
dim(Mydata)
x = as.matrix(Mydata[,-c(1,2)])
str(x)
time = Mydata$DFT
#time <- as.numeric(time)
event = Mydata$Rec
library(survival)
y = Surv(time, event)
suppressMessages(library(doParallel))
registerDoParallel(detectCores())
aenetfit = fit_aenet(x, y, nfolds = 10, rule = "lambda.1se",
seed = c(5, 7), parallel = TRUE)
names(aenetfit)
fit = aenetfit$model_init
alpha = aenetfit$alpha_init
lambda = aenetfit$lambda_init
adapen = aenetfit$pen_factor
suppressMessages(library("rms"))
x.df = as.data.frame(x)
dd = datadist(x.df)
options(datadist = "dd")
nom <- as_nomogram(aenetfit, x, time, event, pred.at = 365 * 2,
funlabel = "2-Year Overall Survival Probability")
plot(nom)
I intend to create a nomogram. I have used my own data named
"Mydata".
It has 9 columns (the time to event column is DFT, while the output of interest is Rec which is binary).
When I run the code, everything worked except the plot (nom). It gave the below error.
Thank you.

Resources