Adjust nomogram ticks with (splines) transformation, rms package [R] - r

I'm using a Cox regression model considering my variable trough splines transformation. All is working nice until the subsequent nomogram... as expected, the scale of my variable is also transformed but I'd like to add some custom ticks inside the region between values 0 and 2 (I guess is the transformed one). Any idea, if you please?
Here's my code...
data <- source("https://pastebin.com/raw/rGtUSTLz")$value
ddist <- datadist(data)
options(datadist = "ddist")
fit <- cph(Surv(time, event) ~ rcs(var, 3), data = data, surv = T, x = T, y = T)
surv <- Survival(fit)
plot(nomogram(fit,
fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), lp = T))
... and these are the real and the desired outputs.
Thanks in advance for your help!

I have had this issue too. My answer is a work around using another package, regplot. Alternatively, if you know what the point values are at the tick marks you want plotted, then you can supply those instead of using the output from regplot. Basically, you need to modify the tick marks and points that are output from the nomogram function and supplied to plot the nomogram.
This method also provides a way to remove points / tick marks by editing the nomogram output.
data <- source("https://pastebin.com/raw/rGtUSTLz")$value
ddist <- datadist(data)
options(datadist = "ddist")
fit <- cph(Surv(time, event) ~ rcs(var, 3), data = data, surv = T, x = T, y = T)
surv <- Survival(fit)
nom1 <- nomogram(fit, fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), lp = T)
library(regplot)
# call regplot with points = TRUE to get output
regplot(fit, fun = list(function(x) surv(times = 10, lp = x),
function(x) surv(times = 30, lp = x),
function(x) surv(times = 60, lp = x)),
funlabel = paste("c", 1:3), points = TRUE)
# look at the points supplied through regplot and take those.
nom1_edit <- nom1
# now we edit the ticks supplied for var and their corresponding point value
nom1_edit[[1]][1] <- list(c(0, 0.06, 0.15, 0.3, 2,4,6,8,10,12,14,16))
nom1_edit[[1]][2] <- list(c(0, 10, 21, 32, 42.41191, 50.63878, 58.86565,
67.09252, 75.31939, 83.54626, 91.77313, 100.00000))
nom1_edit$var$points <- c(0, 10, 21, 32, 42.41191, 50.63878, 58.86565,
67.09252, 75.31939, 83.54626, 91.77313, 100.00000)
# plot the edited nomogram with new points
plot(nom1_edit)

Related

Drawing a partially transparent density polygon

How can I make this red polygon partially transparent so I can see the points underneath it?
library(ks)
set.seed(1234)
x <- runif(1000) + -150
y <- runif(1000) + 20
my.data <- data.frame(x,y)
my.matrix <- as.matrix(my.data)
my_gps_hpi <- Hpi(x = my.matrix, pilot = "samse", pre = "scale")
my.fhat <- kde(x = my.matrix, compute.cont = TRUE, h = my_gps_hpi,
xmin = c(min(my.data$x), min(my.data$y)),
xmax = c(max(my.data$x), max(my.data$y)),
bgridsize = c(100, 100))
my.contours <- c(75)
contourLevels(my.fhat, cont = my.contours)
contourSizes(my.fhat, cont = my.contours, approx = TRUE)
plot(my.data$x, my.data$y)
plot(my.fhat, lwd = 3, display = "filled.contour", cont = my.contours, add = TRUE)
png(file="transparent_polygon_June21_2021.png")
plot(my.data$x, my.data$y)
plot(my.fhat, lwd = 3, display = "filled.contour", cont = my.contours, add = TRUE)
dev.off()
I think I have figured out a solution by digging around in the source code in the file kde.R.
I made several changes to my code.
Changed my.fhat to fhat because the source code might want fhat.
Changed my.contours to contours for the same reason.
Changed contourLevels(my.fhat, cont = my.contours) to hts <- contourLevels(fhat, cont = contours) for the same reason.
Extracted the col.fun from the source code and changed it to return the color of my choice: col.fun <- function(n) {rgb(255, 0, 0, 127, maxColorValue=255)}.
Modified the plot statement to that shown in the code below.
Here is the modified R code:
setwd('C:/Users/mark_/Documents/ctmm/density_in_R/')
set.seed(1234)
library(ks)
x <- runif(1000) + -150
y <- runif(1000) + 20
my.data <- data.frame(x,y)
my.matrix <- as.matrix(my.data)
gps_hpi <- Hpi(x = my.matrix, pilot = "samse", pre = "scale")
fhat <- kde(x = my.matrix, compute.cont = TRUE, h = gps_hpi,
xmin = c(min(my.data$x), min(my.data$y)),
xmax = c(max(my.data$x), max(my.data$y)),
bgridsize = c(100, 100))
contours <- c(75)
hts <- contourLevels(fhat, cont = contours)
contourSizes(fhat, cont = contours, approx = TRUE)
col.fun <- function(n) {rgb(255, 0, 0, 127, maxColorValue=255)}
col.fun(1)
plot(fhat, lwd = 3, display = "filled.contour", cont = contours, col.fun = col.fun(1), drawpoints=TRUE)
png(file="transparent_polygon_June22_2021.png")
plot(fhat, lwd = 3, display = "filled.contour", cont = contours, col.fun = col.fun(1), drawpoints=TRUE)
dev.off()

How to extract the Prediction Intervals of a Gaussian Process Regression via caret kernlab package?

I am trying to use a Gaussian Process Regression (GPR) model to predict hourly streamflow discharges in a river. I've got good results applying the caret::kernlab train () function (thanks Kuhn!).
Since the uncertainty idea is one of the main inherent ones advantages of the GPR, I would like to know if anyone could help me to access the results related to the prediction inteval of the test dataset.
I'll put an extract of the code I've been working. Since my real data are huge (and sincerely, I don't know how to put it here), I'll example with the data(airquality). The main goal in this particular example is to predict airquality$Ozone, using as predictos the lag-variables of airquality$Temperature.
rm(list = ls())
data(airquality)
airquality = na.omit(as.data.frame(airquality)); str(airquality)
library(tidyverse)
library(magrittr)
airquality$Ozone %>% plot(type = 'l')
lines(airquality$Temp, col = 2)
legend("topleft", legend = c("Ozone", "Temperature"),
col=c(1, 2), lty = 1:1, cex = 0.7, text.font = 4, inset = 0.01,
box.lty=0, lwd = 1)
attach(airquality)
df_lags <- airquality %>%
mutate(Temp_lag1 = lag(n = 1L, Temp)) %>%
na.omit()
ESM_train = data.frame(df_lags[1:81, ]) # Training Observed 75% dataset
ESM_test = data.frame(df_lags[82:nrow(df_lags), ]) # Testing Observed 25% dataset
grid_gaussprRadial = expand.grid(.sigma = c(0.001, 0.01, 0.05, 0.1, 0.5, 1, 2)) # Sigma parameters searching for GPR
# TRAIN MODEL ############################
# Tuning set
library(caret)
set.seed(111)
cvCtrl <- trainControl(
method ="repeatedcv",
repeats = 1,
number = 20,
allowParallel = TRUE,
verboseIter = TRUE,
savePredictions = "final")
# Train (aprox. 4 seconds time-simulation)
attach(ESM_train)
set.seed(111)
system.time(Model_train <- caret::train(Ozone ~ Temp + Temp_lag1,
trControl = cvCtrl,
data = ESM_train,
metric = "MAE", # Using MAE since I intend minimum values are my focus
preProcess = c("center", "scale"),
method = "gaussprRadial", # Setting RBF kernel function
tuneGrid = grid_gaussprRadial,
maxit = 1000,
linout = 1)) # Regression type
plot(Model_train)
Model_train
ESM_results_train <- Model_train$resample %>% mutate(Model = "") # K-fold Training measures
# Select the interested TRAIN data and arrange them as dataframe
Ozone_Obs_Tr = Model_train$pred$obs
Ozone_sim = Model_train$pred$pred
Resid = Ozone_Obs_Tr - Ozone_sim
train_results = data.frame(Ozone_Obs_Tr,
Ozone_sim,
Resid)
# Plot Obs x Simulated train results
library(ggplot2)
ggplot(data = train_results, aes(x = Ozone_Obs_Tr, y = Ozone_sim)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "black")
# TEST MODEL ############################
# From "ESM_test" dataframe, we predict ESM Ozone time series, adding it in "ESM_forecasted" dataframe
ESM_forecasted = ESM_test %>%
mutate(Ozone_Pred = predict(Model_train, newdata = ESM_test, variance.model = TRUE))
str(ESM_forecasted)
# Select the interested TEST data and arrange them as a dataframe
Ozone_Obs = ESM_forecasted$Ozone
Ozone_Pred = ESM_forecasted$Ozone_Pred
# Plot Obs x Predicted TEST results
ggplot(data = ESM_forecasted, aes(x = Ozone_Obs, y = Ozone_Pred)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "black")
# Model performance #####
library(hydroGOF)
gof_TR = gof(Ozone_sim, Ozone_Obs_Tr)
gof_TEST = gof(Ozone_Pred,Ozone_Obs)
Performances = data.frame(
Train = gof_TR,
Test = gof_TEST
); Performances
# Plot the TEST prediction
attach(ESM_forecasted)
plot(Ozone_Obs, type = "l", xlab = "", ylab = "", ylim = range(Ozone_Obs, Ozone_Pred))
lines(Ozone_Pred , col = "coral2", lty = 2, lwd = 2)
legend("top", legend = c("Ozone Obs Test", "Ozone Pred Test"),
col=c(1, "coral2"), lty = 1:2, cex = 0.7, text.font = 4, inset = 0.01, box.lty=0, lwd = 2)
These last lines generate the following plot:
The next, and last, step would be to extract the prediction intervals, which is based on a gaussian distribution around each prediction point, to plot it together with this last plot.
The caret::kernlab train() appliance returned better prediction than, for instance, just kernlab::gaussprRadial(), or even tgp::bgp() packages. For both of them I could find the prediction interval.
For example, to pick up the prediction intervals via tgp::bgp(), it could be done typing:
Upper_Bound <- Ozone_Pred$ZZ.q2 #Ozone_Pred - 2 * sigma^2
Lower_Bound <- Ozone_Pred$ZZ.q1 #Ozone_Pred + 2 * sigma^2
Therefore, via caret::kernlab train(), I hope the required standard deviations could be found typing something as
Model_train$...
or maybe, with
Ozone_Pred$...
Moreover, at link: https://stats.stackexchange.com/questions/414079/can-mad-median-absolute-deviation-or-mae-mean-absolute-error-be-used-to-calc,
Stephan Kolassa author explained that we could estimate the prediction intervals through MAE, or even RMSE. But I didn't understand if this is my point, since the MAE I got is just the comparison between Obs x Predicted Ozone data, in this example.
Please, this solution is very important to me! I think I am near to obtain my main results, but I don't know anymore how to try.
Thanks a lot, friends!
I don't really know how the caret framework works, but getting a prediction interval for a GP regression with a Gaussian likelihood is easy enough to do manually.
First we just need a function for the squared exponential kernel, also called the radial basis function kernel, which is what you were using. sf here is the scale factor (unused in the kernlab implementation), and ell is the length scale, called sigma in the kernlab implementation:
covSEiso <- function(x1, x2 = x1, sf = 1.0, ell = 1.0) {
sf <- sf^2
ell <- -0.5 * (1 / (ell^2))
n <- nrow(x1)
m <- nrow(x2)
d <- ncol(x1)
result <- matrix(0, nrow = n, ncol = m)
for ( j in 1:m ) {
for ( i in 1:n ) {
result[i, j] <- sf * exp(ell * sum((x1[i, ] - x2[j, ])^2))
}
}
return(result)
}
I'm not sure what your code says about which length scale to use; below I will use a length scale of 25 and scale factor of 50 (obtained via GPML's hyperparameter optimization routines). Then we use the covSEiso() function above to get the relevant covariances, and the rest is application of basic Gaussian identities. I would refer you to Chapter 2 of Rasmussen and Williams (2006) (graciously provided for free online).
data(airquality)
library(tidyverse)
library(magrittr)
df_lags <- airquality %>%
mutate(Temp_lag1 = lag(n = 1L, Temp)) %>%
na.omit()
ESM_train <- data.frame(df_lags[1:81, ]) # Training Data 75% dataset
ESM_test <- data.frame(df_lags[82:nrow(df_lags), ]) # Testing Data 25% dataset
## For convenience I'll define separately the training and test inputs
X <- ESM_train[ , c("Temp", "Temp_lag1")]
Xstar <- ESM_test[ , c("Temp", "Temp_lag1")]
## Get the kernel manually
K <- covSEiso(X, ell = 25, sf = 50)
## We also need covariance between the test cases
Kstar <- covSEiso(Xstar, X, ell = 25, sf = 50)
Ktest <- covSEiso(Xstar, ell = 25, sf = 50)
## Now the 95% credible region for the posterior is
predictive_mean <- Kstar %*% solve(K + diag(nrow(K))) %*% ESM_train$Ozone
predictive_var <- Ktest - (Kstar %*% solve(K + diag(nrow(K))) %*% t(Kstar))
## Then for the prediction interval we only need to add the observation noise
z <- sqrt(diag(predictive_var)) + 25
interval_high <- predictive_mean + 2 * z
interval_low <- predictive_mean - 2 * z
Then we can check out the prediction intervals
This all is pretty easy to do via my gplmr package (available on GitHub) which can call GPML from R if you have Octave installed:
data(airquality)
library(tidyverse)
library(magrittr)
library(gpmlr)
df_lags <- airquality %>%
mutate(Temp_lag1 = lag(n = 1L, Temp)) %>%
na.omit()
ESM_train <- data.frame(df_lags[1:81, ]) # Training Data 75% dataset
ESM_test <- data.frame(df_lags[82:nrow(df_lags), ]) # Testing Data 25% dataset
X <- as.matrix(ESM_train[ , c("Temp", "Temp_lag1")])
y <- ESM_train$Ozone
Xs <- as.matrix(ESM_test[ , c("Temp", "Temp_lag1")])
ys <- ESM_test$Ozone
hyp0 <- list(mean = numeric(), cov = c(0, 0), lik = 0)
hyp <- set_hyperparameters(hyp0, "infExact", "meanZero", "covSEiso","likGauss",
X, y)
gp_res <- gp(hyp, "infExact", "meanZero", "covSEiso", "likGauss", X, y, Xs, ys)
predictive_mean <- gp_res$YMU
interval_high <- gp_res$YMU + 2 * sqrt(gp_res$YS2)
interval_low <- gp_res$YMU - 2 * sqrt(gp_res$YS2)
Then just plot the predictions, as above:
plot(NULL, xlab = "", ylab = "", xaxt = "n", yaxt = "n",
xlim = range(ESM_test$Temp), ylim = range(c(interval_high, interval_low)))
axis(1, tick = FALSE, line = -0.75)
axis(2, tick = FALSE, line = -0.75)
mtext("Temp", 1, 1.5)
mtext("Ozone", 2, 1.5)
idx <- order(ESM_test$Temp)
polygon(c(ESM_test$Temp[idx], rev(ESM_test$Temp[idx])),
c(interval_high[idx], rev(interval_low[idx])),
border = NA, col = "#80808080")
lines(ESM_test$Temp[idx], predictive_mean[idx])
points(ESM_test$Temp, ESM_test$Ozone, pch = 19)
plot(NULL, xlab = "", ylab = "", xaxt = "n", yaxt = "n",
xlim = range(ESM_test$Temp_lag1), ylim = range(c(interval_high, interval_low)))
axis(1, tick = FALSE, line = -0.75)
axis(2, tick = FALSE, line = -0.75)
mtext("Temp_lag1", 1, 1.5)
mtext("Ozone", 2, 1.5)
idx <- order(ESM_test$Temp_lag1)
polygon(c(ESM_test$Temp_lag1[idx], rev(ESM_test$Temp_lag1[idx])),
c(interval_high[idx], rev(interval_low[idx])),
border = NA, col = "#80808080")
lines(ESM_test$Temp_lag1[idx], predictive_mean[idx])
points(ESM_test$Temp_lag1, ESM_test$Ozone, pch = 19)

How can I predict values in factorial experiments (2^k) with centre points in R?

How can I predict values in factorial experiments with centre points in R using FrF2 package with the predict function or using the broom package?
My code:
library(FrF2)
plan.person = FrF2(nfactors = 5, resolution = 5, replications = 2,
ncenter = 1, randomize = FALSE,
factor.names = list(
A = c(8, 5),
B = c(70, 30),
C = c(0.5, 0),
D = c(1000, 700),
E = c(70, 10)))
resp <- c(84.55, 66.34, -1, 69.18, 73.01, 64.52, 0.73, 47.61, 68.18, 59.87,
26, 72.57, 78.08, 73.81, 26, 59.38, 71.41, 88.64, 64.92, 4, 68.81,
80, 69.66, -1.36, 54.50, 79.24, 78.53, -1, 72.63, 89.97, 87.98,
-11, 65.68, 82.46)
newplan <- add.response(design = plan.person, response = resp)
model <- lm(newplan, use.center = T)
# summary(model)
d <- within(newplan, {
A <- as.numeric(as.character(A))
B <- as.numeric(as.character(B))
C <- as.numeric(as.character(C))
D <- as.numeric(as.character(D))
E <- as.numeric(as.character(E)) })
A = seq(5, 8, 1)
B = seq(30, 70, length.out = length(A))
C = seq(0, 0.5, length.out = length(A))
D = seq(700, 1000, length.out = length(A))
E = seq(10, 70, length.out = length(A))
data <- expand.grid(A = A, B = B,
C = C, D = D,
E = E)
dados$p <- predict(model, newdata=data)
Because of the center point the following message appears.
Error in model.frame.default (Terms, newdata, na.action = na.action, xlev = object $ xlevels):
   lengths of variables differ (found in 'center')
"A two-level experiment with center points can detect, but not fit, quadratic effects."
(https://www.itl.nist.gov/div898/handbook/pri/section3/pri336.htm)
That is, R can't predict these values because you need to make additional assumptions about what the curve looks like to predict points not at your design points.
Note that computationally, you can get the software to work by adding a center term. The error is because this term is in the regression but not in the data set. You could add one with data$center <- FALSE (because none of the points in data are at the center), but this will not do the right thing, as it will not take the potential curvature into account when predicting non-central points, it would simply predict a twisted plane (that is, linear with interactions) with a single bump at the center.
Of course, it's also equivalent to just fitting the model with use.center=FALSE, as the center point doesn't affect the fit of the other points.
If you remove the central value, you can this after model <- lm(newplan, use.center = T)
:
1- Filter the pvalues < 0.05
coe <- broom::tidy(model) %>%
slice(-7) %>% #remove center
filter(p.value < 0.05)
m_beta <- coe$estimate
2 - Do a grid:
A = seq(5, 8, 0.5)
B = seq(30, 70, length.out = length(A))
exp <- expand.grid(A = A, B = B) %>%
mutate(bo = as.numeric(1)) %>%
mutate(ult = A*B) %>%
select(bo, A, B, ult) %>%
as.matrix()
3: Do a Regression:
reg <- t(m_beta %*% t(exp))
exp <- cbind(exp, reg) %>%
as.data.frame() %>%
rename(reg = V5)
But I believe this only solves the computational problem or simplifies it. I believe linear regression should be redone as well. But with this code you can explore and see what other errors exist.

Graphical output of density for the function gammamixEM (package mixtools)

I'm using the function gammamixEM from the package mixtools. How can I return the graphical output of density as in the function normalmixEM (i.e., the second plot in plot(...,which=2)) ?
Update:
Here is a reproducible example for the function gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
Here is a reproducible example for the function normalmixEM:
data(faithful)
attach(faithful)
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
plot(out, which=2)
I would like to obtain this graphical output of density from the function gammamixEM.
Here you go.
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
x <- out
whichplots <- 2
density = 2 %in% whichplots
loglik = 1 %in% whichplots
def.par <- par(ask=(loglik + density > 1), "mar") # only ask and mar are changed
mix.object <- x
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
I just had to dig into the source code of plot.mixEM
So, now to do this with gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
gammamixEM.out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
mix.object <- gammamixEM.out
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
main2 <- "Density Curves"
xlab2 <- "Data"
col2 <- 2:(k+1)
hist(x, prob = TRUE, main = main2, xlab = xlab2,
ylim = c(0,maxy))
for (i in 1:k) {
lines(x, mix.object$lambda[i] *
dnorm(x,
sd = sd(x)))
}
I believe it should be pretty straight forward to continue this example a bit, if you want to add the labels, smooth lines, etc. Here's the source of the plot.mixEM function.

Cycle for plotting multiple graphs according to the number of factors

I've a factor vector containing 25 unique variables for categorizing two numeric variables (x,y)
I want to plot for each single factor a scatterplot
for (factor in Coordinates$matrixID) {
dev.new()
plot(grid, type = "n")
vectorField(Coordinates$Angle,Coordinates&Length,Coordinates$x,Coordinates$y,scale=0.15, headspan=0, vecspec="deg")
}
This function result in generating 63 identical graphs of overall data. I want 25 different graphs, one for each factor
Could you please help me,
Thanks
EDIT: Example given
library(VecStatGraphs2D)
Data <- data.frame(
x = sample(1:100),
y = sample(1:100),
angle = sample(1:100),
lenght = sample(1:100),
matrixID = sample(letters[1:25], 20, replace = TRUE))
for (factor in matrixID) {
dev.new()
plot(grid, type = "n") V
VectorField(Data$angle,Data$lenght,Data$x,Data$y,scale=0.15,headspan=0, vecspec="deg")
}
Not so tidy, but you may try something like:
library(plotrix)
library(VecStatGraphs2D)
Data <- data.frame(
x = sample(1:100),
y = sample(1:100), angle = sample(1:100), lenght = sample(1:100),
matrixID = sample(letters[1:4], 20, replace = TRUE))
for (i in unique(Data$matrixID))
{
dev.new()
Data1 <- subset(Data, matrixID == i)
plot(0:100, type = "n")
vectorField(Data1$angle,Data1$lenght,Data1$x,Data1$y,scale=0.15, headspan=0, vecspec="deg")
}
for your example, and
for (i in unique(Coordinates$matrixID))
{
dev.new()
Coordinates1 <- subset(Coordinates, matrixID == i)
plot(grid, type = "n")
vectorField(Coordinates1$Angle,Coordinates1&Length,Coordinates1$x,Coordinates1$y,scale=0.15, headspan=0, vecspec="deg")
}
in your code.
Is this what you're trying to achieve?
# Dummy dataset
library(plotrix)
Data <- data.frame(
x = sample(1:100),
y = sample(1:100), angle = sample(1:100), lenght = sample(1:100), matrixID = sample(letters[1:4], 20, replace = TRUE))
# Get the levels of matrixID
lev <- levels(Data$matrixID)
# Plot each graph
for (i in lev) {
temp <- subset(Data,matrixID==i)
plot(temp$x,temp$y,type="n", main=i)
with(temp, vectorField(u=angle,v=lenght,x=x,y=y,scale=0.15,headspan=0, vecspec="deg"))
}

Resources