Nomogram with R - r

view("Mydata")
summary(Mydata)
dim(Mydata)
x = as.matrix(Mydata[,-c(1,2)])
str(x)
time = Mydata$DFT
#time <- as.numeric(time)
event = Mydata$Rec
library(survival)
y = Surv(time, event)
suppressMessages(library(doParallel))
registerDoParallel(detectCores())
aenetfit = fit_aenet(x, y, nfolds = 10, rule = "lambda.1se",
seed = c(5, 7), parallel = TRUE)
names(aenetfit)
fit = aenetfit$model_init
alpha = aenetfit$alpha_init
lambda = aenetfit$lambda_init
adapen = aenetfit$pen_factor
suppressMessages(library("rms"))
x.df = as.data.frame(x)
dd = datadist(x.df)
options(datadist = "dd")
nom <- as_nomogram(aenetfit, x, time, event, pred.at = 365 * 2,
funlabel = "2-Year Overall Survival Probability")
plot(nom)
I intend to create a nomogram. I have used my own data named
"Mydata".
It has 9 columns (the time to event column is DFT, while the output of interest is Rec which is binary).
When I run the code, everything worked except the plot (nom). It gave the below error.
Thank you.

Related

ggsave ggsurvplot with risk.table

I am trying to save a ggsurvplot with risk.table using ggsave. However, the output off ggsave is always just the risk.table. I also tried this and this. None is working.
library(data.table)
library(survival)
library(survminer)
OS <- c(c(1:100), seq(1, 75, length = 50), c(1:50))
dead <- rep(1, times = 200)
variable <- c(rep(0, times = 100), rep(1, times = 50), rep(2, times = 50))
dt <- data.table(OS = OS,
dead = dead,
variable = variable)
survfit <- survfit(Surv(OS, dead) ~ variable, data = dt)
ggsurvplot(survfit, data = dt,
risk.table = TRUE)
ggsave("test.png")
The main issue is that a ggsurvplot object is a list of plots. Hence, when using ggsave only the last plot or element of the list is saved.
There is already a GitHub issue on that topic with several workarounds, e.g. using one of the more recent suggestions this works fine for me
library(survival)
library(survminer)
OS <- c(c(1:100), seq(1, 75, length = 50), c(1:50))
dead <- rep(1, times = 200)
variable <- c(rep(0, times = 100), rep(1, times = 50), rep(2, times = 50))
dt <- data.frame(OS = OS,
dead = dead,
variable = variable)
survfit <- survfit(Surv(OS, dead) ~ variable, data = dt)
# add method to grid.draw
grid.draw.ggsurvplot <- function(x){
survminer:::print.ggsurvplot(x, newpage = FALSE)
}
p <- ggsurvplot(survfit, data = dt, risk.table = TRUE)
ggsave("test.png", p, height = 6, width = 6)

How to select appropriate sin() terms to fit a time series using R

I want to fit a time series with sin() function because it has a form of some periods (crests and troughs). However, for now I only guessed it, e.g., 1 month, two months, ..., 1 year, 2 year. Is there some function in R to estimate the multiple periods in a data series?
Below is an example which I want to fit it using the combination of sin() functions. The expression in lm() is a try after several guesses (red line in the Figure below). How can I find the sin() terms with appropriate periods?
t <- 1:365
y <- c(-1,-1.3,-1.6,-1.8,-2.1,-2.3,-2.5,-2.7,-2.9,-3,-2,-1.1,-0.3,0.5,1.1,1.6,2.1,2.5,2.8,3.1,3.4,3.7,4.2,4.6,5,5.3,5.7,5.9,6.2,5.8,5.4,5,4.6,4.2,3.9,3.6,3.4,3.1,2.9,2.8,2.6,2.5,2.3,1.9,1.5,1.1,0.8,0.5,0.2,0,-0.1,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.8,-0.9,-0.8,-0.6,-0.3,-0.1,0.1,0.4,0.6,0.9,1.1,1.3,1.5,1.7,2.1,2.4,2.7,3,3.3,3.5,3.8,4.3,4.7,5.1,5.5,5.9,6.2,6.4,6.6,6.7,6.8,6.8,6.9,7,6.9,6.8,6.7,
6.5,6.4,6.4,6.3,6.2,6,5.9,5.7,5.6,5.5,5.4,5.4,5.1,4.9,4.8,4.6,4.5,4.4,4.3,3.9,3.6,3.3,3,2.8,2.6,2.4,2.6,2.5,2.4,2.3,2.3,2.2,2.2,2.3,2.4,2.4,2.5,2.5,2.6,2.6,2.4,2.1,1.9,1.8,1.6,1.4,1.3,1,0.7,0.5,0.2,0,-0.2,-0.4,-0.2,-0.1,0.1,0.1,0.1,0.1,0.1,0.1,0,0,-0.1,-0.1,-0.2,-0.2,-0.3,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.7,-0.8,-0.8,-0.8,-0.9,-0.9,-0.9,-1.3,-1.6,-1.9,-2.1,-2.3,-2.6,-2.9,-2.9,-2.9,-2.9,
-2.9,-3,-3,-3,-2.8,-2.7,-2.5,-2.4,-2.3,-2.2,-2.1,-2,-2,-1.9,-1.9,-1.8,-1.8,-1.8,-1.9,-1.9,-2,-2.1,-2.2,-2.2,-2.3,-2.4,-2.5,-2.6,-2.7,-2.8,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,-2.8,-2.7,-2.7,-2.6,-2.6,-2.8,-3,-3.1,-3.3,-3.4,-3.5,-3.6,-3.5,-3.4,-3.3,-3.3,-3.2,-3,-2.9,-2.8,-2.8,-2.7,-2.6,-2.6,-2.6,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3,-3,-3,-3,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,
-2.7,-2.6,-2.5,-2.4,-2.3,-2.3,-2.1,-1.9,-1.8,-1.7,-1.5,-1.4,-1.3,-1.5,-1.7,-1.8,-1.9,-2,-2.1,-2.2,-2.4,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3.1,-3.2,-3.3,-3.4,-3.5,-3.5,-3.6,-3.6,-3.5,-3.4,-3.3,-3.2,-3.1,-3,-2.7,-2.3,-2,-1.8,-1.5,-1.3,-1.1,-0.9,-0.7,-0.6,-0.5,-0.3,-0.2,-0.1,-0.3,-0.5,-0.6,-0.7,-0.8,-0.9,-1,-1.1,-1.1,-1.2,-1.2,-1.2,-1.2,-1.2,-0.8,-0.4,-0.1,0.2,0.5,0.8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.6,0.3,0,-0.2,-0.5,-0.7,-0.8)
dt <- data.frame(t = t, y = y)
plot(x = dt$t, y = dt$y)
lm <- lm(y ~ sin(2*3.1416/365*t)+cos(2*3.1416/365*t)+
sin(2*2*3.1416/365*t)+cos(2*2*3.1416/365*t)+
sin(2*4*3.1416/365*t)+cos(2*4*3.1416/365*t)+
sin(2*5*3.1416/365*t)+cos(2*5*3.1416/365*t)+
sin(2*6*3.1416/365*t)+cos(2*6*3.1416/365*t)+
sin(2*0.5*3.1416/365*t)+cos(2*0.5*3.1416/365*t),
data = dt)
summary(lm)$adj.r.squared
plot(dt$y); lines(predict(lm), type = "l", col = "red")
Package forecast has the fourier function (see here), which allows you to model fourier series terms based on time series objects.
For example:
library(forecast)
dt$y <- ts(dt$y, frequency = 365)
lm<- lm(y ~ fourier(y, K=6), dt)
plot(dt$t, dt$y); lines(predict(lm), type = "l", col = "red")
Following my comment to the question,
In catastrophic-failure's answer replace Mod by Re as in SleuthEye's answer. Then call nff(y, 20, col = "red").
I realized that there is another correction to function nff to be made:
substitute length(x) or xlen for the magical number 73.
Here is the function corrected.
nff = function(x = NULL, n = NULL, up = 10L, plot = TRUE, add = FALSE, main = NULL, ...){
#The direct transformation
#The first frequency is DC, the rest are duplicated
dff = fft(x)
#The time
xlen <- length(x)
t = seq_along(x)
#Upsampled time
nt = seq(from = 1L, to = xlen + 1L - 1/up, by = 1/up)
#New spectrum
ndff = array(data = 0, dim = c(length(nt), 1L))
ndff[1] = dff[1] #Always, it's the DC component
if(n != 0){
ndff[2:(n+1)] <- dff[2:(n+1)] #The positive frequencies always come first
#The negative ones are trickier
ndff[(length(ndff) - n + 1):length(ndff)] <- dff[(xlen - n + 1L):xlen]
}
#The inverses
indff = fft(ndff/xlen, inverse = TRUE)
idff = fft(dff/xlen, inverse = TRUE)
if(plot){
if(!add){
plot(x = t, y = x, pch = 16L, xlab = "Time", ylab = "Measurement",
main = ifelse(is.null(main), paste(n, "harmonics"), main))
lines(y = Re(idff), x = t, col = adjustcolor(1L, alpha = 0.5))
}
lines(y = Re(indff), x = nt, ...)
}
ret = data.frame(time = nt, y = Mod(indff))
return(ret)
}
y <- c(-1,-1.3,-1.6,-1.8,-2.1,-2.3,-2.5,-2.7,-2.9,-3,-2,-1.1,-0.3,0.5,1.1,1.6,2.1,2.5,2.8,3.1,3.4,3.7,4.2,4.6,5,5.3,5.7,5.9,6.2,5.8,5.4,5,4.6,4.2,3.9,3.6,3.4,3.1,2.9,2.8,2.6,2.5,2.3,1.9,1.5,1.1,0.8,0.5,0.2,0,-0.1,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.8,-0.9,-0.8,-0.6,-0.3,-0.1,0.1,0.4,0.6,0.9,1.1,1.3,1.5,1.7,2.1,2.4,2.7,3,3.3,3.5,3.8,4.3,4.7,5.1,5.5,5.9,6.2,6.4,6.6,6.7,6.8,6.8,6.9,7,6.9,6.8,6.7,
6.5,6.4,6.4,6.3,6.2,6,5.9,5.7,5.6,5.5,5.4,5.4,5.1,4.9,4.8,4.6,4.5,4.4,4.3,3.9,3.6,3.3,3,2.8,2.6,2.4,2.6,2.5,2.4,2.3,2.3,2.2,2.2,2.3,2.4,2.4,2.5,2.5,2.6,2.6,2.4,2.1,1.9,1.8,1.6,1.4,1.3,1,0.7,0.5,0.2,0,-0.2,-0.4,-0.2,-0.1,0.1,0.1,0.1,0.1,0.1,0.1,0,0,-0.1,-0.1,-0.2,-0.2,-0.3,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.7,-0.8,-0.8,-0.8,-0.9,-0.9,-0.9,-1.3,-1.6,-1.9,-2.1,-2.3,-2.6,-2.9,-2.9,-2.9,-2.9,
-2.9,-3,-3,-3,-2.8,-2.7,-2.5,-2.4,-2.3,-2.2,-2.1,-2,-2,-1.9,-1.9,-1.8,-1.8,-1.8,-1.9,-1.9,-2,-2.1,-2.2,-2.2,-2.3,-2.4,-2.5,-2.6,-2.7,-2.8,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,-2.8,-2.7,-2.7,-2.6,-2.6,-2.8,-3,-3.1,-3.3,-3.4,-3.5,-3.6,-3.5,-3.4,-3.3,-3.3,-3.2,-3,-2.9,-2.8,-2.8,-2.7,-2.6,-2.6,-2.6,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3,-3,-3,-3,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,
-2.7,-2.6,-2.5,-2.4,-2.3,-2.3,-2.1,-1.9,-1.8,-1.7,-1.5,-1.4,-1.3,-1.5,-1.7,-1.8,-1.9,-2,-2.1,-2.2,-2.4,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3.1,-3.2,-3.3,-3.4,-3.5,-3.5,-3.6,-3.6,-3.5,-3.4,-3.3,-3.2,-3.1,-3,-2.7,-2.3,-2,-1.8,-1.5,-1.3,-1.1,-0.9,-0.7,-0.6,-0.5,-0.3,-0.2,-0.1,-0.3,-0.5,-0.6,-0.7,-0.8,-0.9,-1,-1.1,-1.1,-1.2,-1.2,-1.2,-1.2,-1.2,-0.8,-0.4,-0.1,0.2,0.5,0.8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.6,0.3,0,-0.2,-0.5,-0.7,-0.8)
res <- nff(y, 20, col = "red")
str(res)
#> 'data.frame': 3650 obs. of 2 variables:
#> $ time: num 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 ...
#> $ y : num 1.27 1.31 1.34 1.37 1.4 ...
Created on 2022-10-17 with reprex v2.0.2
The functions sinusoid and mvrm from package BNSP allow one to specify the number of harmonics and if that number is too high, the algorithm can remove some of the unnecessary terms and avoid overfitting.
# Specify the model
model <- y ~ sinusoid(t, harmonics = 20, amplitude = 1, period = 365)
# Fit the model
m1 <- mvrm(formula = model, data = dt, sweeps = 5000, burn = 3000, thin = 2, seed = 1, StorageDir = getwd())
# ggplot
plotOptionsM <- list(geom_point(data = dt, aes(x = t, y = y)))
plot(x = m1, term = 1, plotOptions = plotOptionsM, intercept = TRUE, quantiles = c(0.005, 0.995), grid = 100)
In this particular example, among the 20 harmonics, the 19 appear to be important.

Facebook Prophet: Hyperparameter Tuning on Monthly Data

I am using the Prophet model to forecast revenue for my company and one of the challenges i currently face is being able to modify the code in order to leverage the hyperparameter tuning features for monthly data. From my understanding, the code on the FB prophet site is designed to tune on daily data, not monthly. However, I have read somewhere (can't seem to find the post) where it can be tweaked for monthly data.
Has anyone been able to figure this out? Would love some help! I'm not a programmer and have been leveraging low code platforms to build this out so would really appreciate a fellow coder's help in solving this issue!
Here's the code that I'm using:
# Conditional Install
cond.install <- function(package.name){
options(repos = "http://cran.rstudio.com") #set repo
#check for package in library, if package is missing install
if(package.name%in%rownames(installed.packages())==FALSE) {
install.packages(package.name, .libPaths()[2])}else{require(package.name, character.only = TRUE)}}
# conditionally install package
cond.install('forecast')
cond.install('prophet')
cond.install('rBayesianOptimization')
cond.install('dplyr')
cond.install('lubridate')
library(dplyr)
library(lubridate)
library(forecast)
library(prophet)
library(rBayesianOptimization)
#reading data
cv_set <- read.Alteryx("#1", mode="data.frame")
valid <- read.Alteryx("#2", mode="data.frame")
#make sure the date format is defined
cv_set$ds <- as.Date(cv_set$ds)
date_seq <- as.Date(valid$ds)
#define hyper search parameter
rand_search_grid = data.frame(
changepoint_prior_scale = sort(runif(10, 0.01, 20)),
seasonality_prior_scale = c(sort(sample(c(runif(5, 0.01, 0.05), runif(5, 1, 20)), 5, replace = F)),
sort(sample(c(runif(5, 0.01, 0.05), runif(5, 1, 20)), 5, replace = F))),
n_changepoints = sample(5:50, 10, replace = F)
)
#Define deafult function for prophet. Change Linear to Logistic cap setting
prophet_fit_bayes = function(changepoint_prior_scale, seasonality_prior_scale, n_changepoints) {
error = c()
for (d in date_seq) {
train = subset(cv_set, ds < d)
test = subset(cv_set, ds == d)
m = prophet(train, growth = 'linear',
seasonality.prior.scale = seasonality_prior_scale,
changepoint.prior.scale = changepoint_prior_scale,
n.changepoints = n_changepoints,
weekly.seasonality = F,
daily.seasonality = F)
future = make_future_dataframe(m, periods = 1)
# NOTE: There's a problem in function names with library(caret)
forecast = predict(m, future)
forecast$ds = as.Date(forecast$ds)
error_d = forecast::accuracy(forecast[forecast$ds %in% test$ds, 'yhat'], test$y)[ , 'MAPE']
error = c(error, error_d)
}
## The function wants to _maximize_ the outcome so we return
## the negative of the resampled MAPE value. `Pred` can be used
## to return predicted values but we'll avoid that and use zero
list(Score = -mean(error), Pred = 0)
}
changepoint_bounds = range(rand_search_grid$changepoint_prior_scale)
n_changepoint_bounds = as.integer(range(rand_search_grid$n_changepoints))
seasonality_bounds = range(rand_search_grid$seasonality_prior_scale)
bayesian_search_bounds = list(changepoint_prior_scale = changepoint_bounds,
seasonality_prior_scale = seasonality_bounds,
n_changepoints = as.integer(n_changepoint_bounds))
#rBayesian parameters. Assume n_iteration is 1 for demo purpose
ba_search = BayesianOptimization(prophet_fit_bayes,
bounds = bayesian_search_bounds,
init_grid_dt = rand_search_grid,
init_points = 1,
n_iter = %Question.iteration.var%,
acq = 'ucb',
kappa = 1,
eps = 0,
verbose = TRUE)
best_params_ba = c(ba_search$Best_Par)
#Start Prophet
# Holiday Setting
custom1 <- data_frame(
holiday = 'custom1',
ds = as.Date(c('1991-12-31')))
custom2 <- data_frame(
holiday = 'custom2',
ds = as.Date(c('1992-12-31', '1993-01-01')))
holidays <- bind_rows(custom1, custom2)
if ('%Question.noholiday.var%' == "True") {
m = prophet(cv_set, growth = 'linear',
seasonality.prior.scale = best_params_ba[['seasonality_prior_scale']],
changepoint.prior.scale = best_params_ba[['changepoint_prior_scale']],
n.changepoints = best_params_ba[['n_changepoints']])
}
if ('%Question.holiday.var%' == "True") {
m <- prophet(holidays = holidays, growth = 'linear',
seasonality.prior.scale = best_params_ba[['seasonality_prior_scale']],
changepoint.prior.scale = best_params_ba[['changepoint_prior_scale']],
n.changepoints = best_params_ba[['n_changepoints']])
m <- add_country_holidays(m, country_name = '%Question.country.var%')
m <- fit.prophet(m, cv_set)
}
future <- make_future_dataframe(m, periods = %Question.forecast.var%)
forecast <- predict(m, future)
yhat <- as.data.frame(forecast$yhat)
yhat_l <- as.data.frame(forecast$yhat_lower)
yhat_u <-as.data.frame(forecast$yhat_upper)
trend <- as.data.frame(forecast$trend)
df1 <- cbind(yhat, yhat_l, yhat_u, trend)
write.Alteryx(df1, 1)
AlteryxGraph(3, width=576, height=576)
plot(m, forecast) + add_changepoints_to_plot(m)
invisible(dev.off())
AlteryxGraph(4, width=576, height=576)
prophet_plot_components(m, forecast)
invisible(dev.off())
#Output best params for reference
df5 <- best_params_ba
write.Alteryx(df5, 5)
You can specify custom seasonality. So you would just define a custom seasonality called monthly and define the period length. You can view the documentation here.
# R
m <- prophet(weekly.seasonality=FALSE)
m <- add_seasonality(m, name='monthly', period=30.5, fourier.order=5)
m <- fit.prophet(m, df)
forecast <- predict(m, future)
prophet_plot_components(m, forecast)

Error in nomogram factor name(s) not in the design: model.type x time event x.df lambda pred.at

I am using the "smart" dataset from the package "hdnom". Below mentioned is my code. I get the following error. Please let me know where I am going wrong. I am unable to understand the error.
Error in nomogram(fit, model.type = "aenet", x, time, event, x.df, lambda = lambda, :
factor name(s) not in the design: model.type x time event x.df lambda pred.at
data("smart")
summary(smart)
dim(smart)
x = as.matrix(smart[,-c(1,2)])
str(x)
time = smart$TEVENT
#time <- as.numeric(time)
event = smart$EVENT
library(survival)
y = Surv(time, event)
suppressMessages(library(doParallel))
registerDoParallel(detectCores())
aenetfit = fit_aenet(x, y, nfolds = 10, rule = "lambda.1se",
seed = c(5, 7), parallel = TRUE)
names(aenetfit)
fit = aenetfit$model_init
alpha = aenetfit$alpha_init
lambda = aenetfit$lambda_init
adapen = aenetfit$pen_factor
suppressMessages(library("rms"))
x.df = as.data.frame(x)
dd = datadist(x.df)
options(datadist = "dd")
nom = nomogram(fit, model.type = "aenet", x, time, event, x.df,lambda = lambda, pred.at = 365 * 2,funlabel = "2-Year Overall Survival Probability")
In version 4.0 and 5.0 of hdnom the function hdnom.nomogram was available, as documented here and here. The syntax was:
nom <- hdnom.nomogram(fit, model.type = "aenet", x, time, event, x.df,
lambda = lambda, pred.at = 365 * 2,
funlabel = "2-Year Overall Survival Probability")
In version 6.0 this function was replaced by hdnom::as_nomogram:
nom <- as_nomogram(aenetfit, x, time, event, pred.at = 365 * 2,
funlabel = "2-Year Overall Survival Probability")
plot(nom)
From the help of rms::nomogram: this function accepts as input (the fit argument) only regression models that were created with rms; in addition, model.type and lambda are not valid arguments of this function.

Error in VAR: Different row size of y and exogen

I am attempting a VAR model in R with an exogenous variable on:
vndata <- read.csv("vndata.txt", sep="")
names(vndata)
da <- data.frame(vndata[2:dim(vndata),])
# STOCK PRICE MODEL
y <- da[, c("irate", "stockp", "mrate", "frate")]
x <- data.frame(da[, c("cdi")])
library("vars")
VARselect(y, lag.max = 8,exogen = x)
var1 <- restrict(VAR(y, p = 2,exogen = x),method = c("ser"),thresh = 1.56)
Then, I want to plot the impulse response function:
plot(irf(var1, impulse = c("irate"), response = c("frate"), boot = T,
cumulative = FALSE,n.ahead = 20))
however, it produces the warning:
Error in VAR(y = ysampled, p = 2, exogen = x) :
Different row size of y and exogen.
I can not figure what happen. I have use dim() to make sure that y and x have the same row size.
Try this, it worked for me:
.GlobalEnv$exogen <- x
VARselect(y, lag.max = 8,exogen = .GlobalEnv$exogen)

Resources