Find value of covariate given a probability in R - r

Given a fractional polynomial GLM, I am looking to find the value of a covariate that gives me an output of a given probability.
My data is simulated using:
# FUNCTIONS ====================================================================
logit <- function(p){
x = log(p/(1-p))
x
}
sigmoid <- function(x){
p = 1/(1 + exp(-x))
p
}
beta_duration <- function(D, select){
logit(
switch(select,
0.05 + 0.9 / (1 + exp(-2*D + 25)),
0.9 * exp(-exp(-0.5 * (D - 11))),
0.9 * exp(-exp(-(D - 11))),
0.9 * exp(-2 * exp(-(D - 9))),
sigmoid(0.847 + 0.210 * (D - 10)),
0.7 + 0.0015 * (D - 10) ^ 2,
0.7 - 0.0015 * (D - 10) ^ 2 + 0.03 * (D - 10)
)
)
}
beta_sex <- function(sex, OR = 1){
ifelse(sex == "Female", -0.5 * log(OR), 0.5 * log(OR))
}
plot_beta_duration <- function(select){
x <- seq(10, 20, by = 0.01)
y <- beta_duration(x, select)
data.frame(x = x,
y = y) %>%
ggplot(aes(x = x, y = y)) +
geom_line() +
ylim(0, 1)
}
# DATA SIMULATION ==============================================================
duration <- c(10, 12, 14, 18, 20)
sex <- factor(c("Female", "Male"))
eta <- function(duration, sex, duration_select, sex_OR, noise_sd){
beta_sex(sex, sex_OR) + beta_duration(duration, duration_select) + rnorm(length(duration), 0, noise_sd)
}
sim_data <- function(durations_type, sex_OR, noise_sd, p_female, n, seed){
set.seed(seed)
data.frame(
duration = sample(duration, n, TRUE),
sex = sample(sex, n, TRUE, c(p_female, 1 - p_female))
) %>%
rowwise() %>%
mutate(eta = eta(duration, sex, durations_type, sex_OR, noise_sd),
p = sigmoid(eta),
cured = sample(0:1, 1, prob = c(1 - p, p)))
}
# DATA SIM PARAMETERS
durations_type <- 4 # See beta_duration for functions
sex_OR <- 3 # Odds of cure for male vs female (ref)
noise_sd <- 1
p_female <- 0.7 # proportion of females in the sample
n <- 500
data <- sim_data(durations_type = 1, # See beta_duration for functions
sex_OR = 3, # Odds of cure for male vs female (ref)
noise_sd = 1,
p_female = 0.7, # proportion of females in the sample
n = 500,
seed = 21874564)
And my model is fitted by:
library(mfp)
model1 <- mfp(cured ~ fp(duration) + sex,
family = binomial(link = "logit"),
data = data)
summary(model1)
For each level of sex (i.e. "Male" or "Female"), I want to find the value of duration that gives me a probability equal to some value frontier <- 0.8.
So far, I can only think of using an approximation using a vector of possibilities:
pred_duration <- seq(10, 20, by = 0.1)
pred <- data.frame(expand.grid(duration = pred_duration,
sex = sex),
p = predict(model1,
newdata = expand.grid(duration = pred_duration,
sex = sex),
type = "response"))
pred[which(pred$p > 0.8), ] %>%
group_by(sex) %>%
summarize(min(duration))
But I am really after an exact solution.

The function uniroot allows you to detect the point at which the output of a function equals 0. If you create a function that takes duration as input, calculates the predicted probability from that duration, then subtracts the desired probability, then this function will have an output of 0 at the desired value of duration. uniroot will find this value for you. If you wrap this process in a little function, it makes it very easy to use:
find_prob <- function(p) {
f <- function(v) {
predict(model1, type = 'response',
newdata = data.frame(duration = v, sex = 'Male')) - p
}
uniroot(f, interval = range(data$duration), tol = 1e-9)$root
}
So, for example, to find the duration that gives an 80% probability, we just do:
find_prob(0.8)
#> [1] 12.86089
To prove that this is the correct value, we can feed it directly into predict to see what the predicted probability will be given sex = male and duration = 12.86089
predict(model1, type = 'response',
newdata = data.frame(sex = 'Male', duration = find_prob(0.8)))
#> 1
#> 0.8

Related

How can I find the maximum output of a function

If I have a GLM, is there any way I can efficiently find the maximum output by changing one covariate and holding the others?
Using my simulated data:
# FUNCTIONS ====================================================================
logit <- function(p){
x = log(p/(1-p))
x
}
sigmoid <- function(x){
p = 1/(1 + exp(-x))
p
}
beta_duration <- function(D, select){
logit(
switch(select,
0.05 + 0.9 / (1 + exp(-2*D + 25)),
0.9 * exp(-exp(-0.5 * (D - 11))),
0.9 * exp(-exp(-(D - 11))),
0.9 * exp(-2 * exp(-(D - 9))),
sigmoid(0.847 + 0.210 * (D - 10)),
0.7 + 0.0015 * (D - 10) ^ 2,
0.7 - 0.0015 * (D - 10) ^ 2 + 0.03 * (D - 10)
)
)
}
beta_sex <- function(sex, OR = 1){
ifelse(sex == "Female", -0.5 * log(OR), 0.5 * log(OR))
}
plot_beta_duration <- function(select){
x <- seq(10, 20, by = 0.01)
y <- beta_duration(x, select)
data.frame(x = x,
y = y) %>%
ggplot(aes(x = x, y = y)) +
geom_line() +
ylim(0, 1)
}
# DATA SIMULATION ==============================================================
duration <- c(10, 12, 14, 18, 20)
sex <- factor(c("Female", "Male"))
eta <- function(duration, sex, duration_select, sex_OR, noise_sd){
beta_sex(sex, sex_OR) + beta_duration(duration, duration_select) + rnorm(length(duration), 0, noise_sd)
}
sim_data <- function(durations_type, sex_OR, noise_sd, p_female, n, seed){
set.seed(seed)
data.frame(
duration = sample(duration, n, TRUE),
sex = sample(sex, n, TRUE, c(p_female, 1 - p_female))
) %>%
rowwise() %>%
mutate(eta = eta(duration, sex, durations_type, sex_OR, noise_sd),
p = sigmoid(eta),
cured = sample(0:1, 1, prob = c(1 - p, p)))
}
# DATA SIM PARAMETERS
durations_type <- 4 # See beta_duration for functions
sex_OR <- 3 # Odds of cure for male vs female (ref)
noise_sd <- 1
p_female <- 0.7 # proportion of females in the sample
n <- 500
data <- sim_data(durations_type = 1, # See beta_duration for functions
sex_OR = 3, # Odds of cure for male vs female (ref)
noise_sd = 1,
p_female = 0.7, # proportion of females in the sample
n = 500,
seed = 21874564)
I am fitting a fractional polynomial GLM:
library(mfp)
model1 <- mfp(cured ~ fp(duration) + sex,
family = binomial(link = "logit"),
data = data)
summary(model1)
Given that I am holding sex as constant, is there any way to find the value of duration within a certain range that gives me the highest predicted value? Something less inefficient than:
range <- seq(10, 20, by = 1e-4)
range[which.max(predict(model, type = "response", newdata = data.frame(duration = range, sex = "Male")))]
You can use optimize here. Just create a function which returns a prediction based on the value of duration:
f <- function(x) predict(model1, list(sex = 'Male', duration = x))
And we can find the value of duration which produces the maximum log odds within the range 0-20 by doing:
optimise(f, c(0, 20), maximum = TRUE)$maximum
#> [1] 17.95679

Calculate/approach individual face probabilities of 10-faced dice, based on summed 2-roll dice experiment

I have a biochemistry problem , that can be simplified as a two-roll dice experiment (I think...).
Assume there is an uneven dice with 10 faces, i.e. individual face probabilities are not 1/10. We want to know these probabilities.
The given dataset that we have, however, is a histogram of summed faces of rolling the (same) dice twice. So, the range of the observed bins is 2-20 (2 = 1+1; 3 = 1+2, 2+1, 4 = 2+2, 1+3, 3+1; etc.).
The probabilities of summed faces are the product of the individual probabilities (s: observed probabilities of summed faces; p: probabilities of individual faces) and can be written as follows:
s2 ~ p1^2
s3 ~ 2*p1*p2
s4 ~ 2*p1*p3 + p2^2
s5 ~ 2*p1*p4 + 2*p2*p3
s6 ~ 2*p1*p5 + 2*p2*p4 + p3^2
s7 ~ 2*p1*p6 + 2*p2*p5 + 2*p3*p4
s8 ~ 2*p1*p7 + 2*p2*p6 + 2*p3*p5 + p4^2
s9 ~ 2*p1*p8 + 2*p2*p7 + 2*p3*p6 + 2*p4*p5
s10 ~ 2*p1*p9 + 2*p2*p8 + 2*p3*p7 + 2*p4*p6 + p5^2
s11 ~ 2*p1*p10 + 2*p2*p9 + 2*p3*p8 + 2*p4*p7 + 2*p5*p6
s12 ~ 2*p2*p10 + 2*p3*p9 + 2*p4*p8 + 2*p5*p7 + p6^2
s13 ~ 2*p3*p10 + 2*p4*p9 + 2*p5*p8 + 2*p6*p7
s14 ~ 2*p4*p10 + 2*p5*p9 + 2*p6*p8 + p7^2
s15 ~ 2*p5*p10 + 2*p6*p9 + 2*p7*p8
s16 ~ 2*p6*p10 + 2*p7*p9 + p8^2
s17 ~ 2*p7*p10 + 2*p8*p9
s18 ~ 2*p8*p10 + p9^2
s19 ~ 2*p9*p10
s20 ~ p10^2
In this case there are 20-1=19 known variables, and 10 unknowns, so the system is over-determined. It is also easy to solve by hand using algebra. As far as I can remember: quadratic terms will result in 2 possible solutions per individual face. Probabilities are always positive, so practically there should be one solution. Right?
Is there a way to solve this system in R? I am familiar with linear inverse problems in R, but I don't know how to approach this (quadratic?) system in R.
Here is some code to simulate the problem:
options(stringsAsFactors = FALSE)
library(gtools)
library(dplyr)
dice <- data.frame(face = 1:10)
### functions
split_dice_faces <- function(summed_face){
face_face <- strsplit(x = as.character(summed_face),split = "[/_\\|]")[[1]]
names(face_face) <- c("face1","face2")
as.numeric(face_face)
}
sum_dice_faces <- function(face_face){
sapply(face_face, function(face_face_i){
face1 <- split_dice_faces(face_face_i)[1]
face2 <- split_dice_faces(face_face_i)[2]
sum(c(face1[1], face2[1]))
})
}
simulate_2_rolls <- function(dice_pool){
dice_perm <- data.frame(permutations(n = dim(dice_pool)[1], r = 2, v = as.character(dice_pool$face), repeats.allowed = T ))
dice_perm$face_face <- paste(dice_perm[[1]],"|",dice_perm[[2]], sep = "")
dice_perm$prob <- dice_pool$prob[match(dice_perm[[1]], dice_pool$face)]*dice_pool$prob[match(dice_perm[[2]], dice_pool$face)]
dice_perm$summed_face <- sum_dice_faces(dice_perm$face_face)
dice_perm <- dice_perm %>% arrange(summed_face) %>% select(one_of(c("face_face", "summed_face","prob")))
dice_perm
}
summarise_2_rolls_experiment <- function(simulate_2_rolls_df){
simulate_2_rolls_df %>% group_by(summed_face) %>% summarise(prob = sum(prob))
}
from_face_probs_to_summed_observations <- function(face_probs){
face_probs %>%
data.frame(face = dice$face, prob = .) %>%
simulate_2_rolls() %>%
summarise_2_rolls_experiment() %>%
pull(prob)
}
generate_formulas <- function() {
output <-
dice_sum_probs %>% group_by(summed_face) %>% group_split() %>%
sapply(function(i){
left_hand <- paste("s",i$summed_face[1],sep="")
right_hand <-
sapply(strsplit(i$face_face, "\\|") , function(row){
row_i <- as.numeric(row)
row_i <- row_i[order(row_i)]
row_i <- paste("p",row_i,sep = "")
if(row_i[1] == row_i[2]){
paste(row_i[1],"^2",sep="")
} else {
paste(row_i,collapse="*")
}
})
right_hand <-
paste(sapply(unique(right_hand), function(right_hand_i){
fact <- sum(right_hand == right_hand_i)
if(fact > 1){fact <- paste(fact,"*",sep = "")} else {fact <- ""}
paste(fact,right_hand_i,sep = "")
}), collapse = " + ")
paste(left_hand, "~", right_hand)
})
return(output)
}
to simulate a dataset:
### random individual probabilites
dice_probs <- data.frame(face = dice$face,
prob = runif(n = dim(dice)[1]) %>% (function(x){x / sum(x)}))
dice_probs
### simulate infinite amount of trials, observations expressed as probabilities
dice_sum_probs <- simulate_2_rolls(dice_probs)
dice_sum_probs
### sum experiment outcomes with the same sum
dice_sum_probs_summary <- dice_sum_probs %>% group_by(summed_face) %>% summarise(prob = sum(prob))
### plot, this is the given dataset
with(data = dice_sum_probs_summary, barplot(prob, names.arg = summed_face))
### how to calculate / approach p1, p2, ..., p10?
Thanks!
If we create a multiplication table of the probabilities, outer(p, p) and then sum those over constant values of outer(1:10, 1:10, "+") using tapply we get the following nonlinear regression problem:
nls(prob ~ tapply(outer(p, p), outer(1:10, 1:10, `+`), sum),
dice_sum_probs_summary, algorithm = "port",
start = list(p = sqrt(dice_sum_probs_summary$prob[seq(1, 19, 2)])),
lower = numeric(10), upper = rep(1, 10))
giving:
Nonlinear regression model
model: prob ~ tapply(outer(p, p), outer(1:10, 1:10, `+`), sum)
data: dice_sum_probs_summary
p1 p2 p3 p4 p5 p6 p7 p8 p9 p10
0.06514 0.04980 0.14439 0.06971 0.06234 0.19320 0.09491 0.01237 0.11936 0.18878
residual sum-of-squares: 1.33e-30
which is consistent with
> dice_probs
face prob
1 1 0.06513537
2 2 0.04980455
3 3 0.14438749
4 4 0.06971313
5 5 0.06234477
6 6 0.19319613
7 7 0.09491289
8 8 0.01236557
9 9 0.11936244
10 10 0.18877766
We can alternately express it as follows where X is a matrix of zeros and ones having dimension 19 x 100 such that each row corresponds to a possible outcome of rolling the two dice (i.e. 2:20) and each column corresponds to a pair of indexes from 1:10 and 1:10. An entry equals one if the column pair sums to the sum of the two faces represented by its row and zero otherwise.
g <- c(outer(1:10, 1:10, `+`))
X <- + outer(2:20, g, `==`)
nls(prob ~ X %*% kronecker(p, p), dice_sum_probs_summary, alg = "port",
start = list(p = sqrt(dice_sum_probs_summary$prob[seq(1, 19, 2)])),
lower = numeric(10), upper = rep(1, 10))

LSTM understanding, possible overfit

Following this blog post, I'm trying to understand lstm for time series forecasting.
The thing is the result on the test data are too good, what am I missing?
Also everytime I re-run the fit it seems to get better, is the Net re-using the same weights?
The structure is very simple, the input_shape is [1, 1, 1].
Even with Epochs = 1, it learns all too well the test data.
Here's a reproducible example:
library(keras)
library(ggplot2)
library(dplyr)
Data creation and prep:
# create some fake time series
set.seed(123)
df_timeseries <- data.frame(
ts = 1:2500,
value = arima.sim(list(order = c(1,1,0), ar = 0.7), n = 2500)[-1] # fake data
)
#plot(df_timeseries$value, type = "l")
# first order difference
diff_serie <- diff(df_timeseries$value, differences = 1)
# Lagged data ---
lag_transform <- function(x, k= 1){
lagged = c(rep(NA, k), x[1:(length(x)-k)])
DF = as.data.frame(cbind(lagged, x))
colnames(DF) <- c( paste0('x-', k), 'x')
DF[is.na(DF)] <- 0
return(DF)
}
supervised <- lag_transform(diff_serie, 1) # "supervised" form
# head(supervised, 3)
# x-1 x
# 1 0.0000000 0.1796152
# 2 0.1796152 -0.3470608
# 3 -0.3470608 -1.3107662
# Split Train/Test ---
N = nrow(supervised)
n = round(N *0.8, digits = 0)
train = supervised[1:n, ] # train set # 1999 obs
test = supervised[(n+1):N, ] # test set: 500 obs
# Normalize Data --- !!! used min/max just from the train set
scale_data = function(train, test, feature_range = c(0, 1)) {
x = train
fr_min = feature_range[1]
fr_max = feature_range[2]
std_train = ((x - min(x) ) / (max(x) - min(x) ))
std_test = ((test - min(x) ) / (max(x) - min(x) ))
scaled_train = std_train *(fr_max -fr_min) + fr_min
scaled_test = std_test *(fr_max -fr_min) + fr_min
return( list(scaled_train = as.vector(scaled_train), scaled_test = as.vector(scaled_test) ,scaler= c(min =min(x), max = max(x))) )
}
Scaled = scale_data(train, test, c(-1, 1))
# Split ---
y_train = Scaled$scaled_train[, 2]
x_train = Scaled$scaled_train[, 1]
y_test = Scaled$scaled_test[, 2]
x_test = Scaled$scaled_test[, 1]
# reverse function for scale back to original values
# reverse
invert_scaling = function(scaled, scaler, feature_range = c(0, 1)){
min = scaler[1]
max = scaler[2]
t = length(scaled)
mins = feature_range[1]
maxs = feature_range[2]
inverted_dfs = numeric(t)
for( i in 1:t){
X = (scaled[i]- mins)/(maxs - mins)
rawValues = X *(max - min) + min
inverted_dfs[i] <- rawValues
}
return(inverted_dfs)
}
Model and Fit:
# Model ---
# Reshape
dim(x_train) <- c(length(x_train), 1, 1)
# specify required arguments
X_shape2 = dim(x_train)[2]
X_shape3 = dim(x_train)[3]
batch_size = 1 # must be a common factor of both the train and test samples
units = 30 # can adjust this, in model tuninig phase
model <- keras_model_sequential()
model%>% #[1, 1, 1]
layer_lstm(units, batch_input_shape = c(batch_size, X_shape2, X_shape3), stateful= F)%>%
layer_dense(units = 10) %>%
layer_dense(units = 1)
model %>% compile(
loss = 'mean_squared_error',
optimizer = optimizer_adam( lr= 0.02, decay = 1e-6 ),
metrics = c('mean_absolute_percentage_error')
)
# Fit ---
Epochs = 1
for(i in 1:Epochs ){
model %>% fit(x_train, y_train, epochs=1, batch_size=batch_size, verbose=1, shuffle=F)
model %>% reset_states()
}
# Predictions Test data ---
L = length(x_test)
scaler = Scaled$scaler
predictions = numeric(L)
for(i in 1:L){
X = x_test[i]
dim(X) = c(1,1,1) # praticamente prevedo punto a punto
yhat = model %>% predict(X, batch_size=batch_size)
# invert scaling
yhat = invert_scaling(yhat, scaler, c(-1, 1))
# invert differencing
yhat = yhat + df_timeseries$value[(n+i)] # could the problem be here?
# store
predictions[i] <- yhat
}
Plot for comparison just on the Test data:
Code for the plot and MAPE on Test data:
# Now for the comparison:
df_plot = tibble(
data = 1:nrow(test),
actual = df_timeseries$value[(n+1):N],
predict = predictions
)
df_plot %>%
gather("key", "value", -data) %>%
ggplot(aes(x = data, y = value, color = key)) +
geom_line() +
theme_minimal()
# mape
mape_function <- function(v_actual, v_pred) {
diff <- (v_actual - v_pred)/v_actual
sum(abs(diff))/length(diff)
}
mape_function(df_plot$actual, df_plot$predict)
# [1] 0.00348043 - MAPE on test data
Update: based on nicola's comment:
By changing the prediction part, where I reverse the difference the plot does make more sense.
But still, how can I fix this? I need to plot the actual values not the differences. How can I measure my performance and if the net is overfitting?
predict_diff = numeric(L)
for(i in 1:L){
X = x_test[i]
dim(X) = c(1,1,1) # praticamente prevedo punto a punto
yhat = model %>% predict(X, batch_size=batch_size)
# invert scaling
yhat = invert_scaling(yhat, scaler, c(-1, 1))
# invert differencing
predict_diff[i] <- yhat
yhat = yhat + df_timeseries$value[(n+i)] # could the problem be here?
# store
#predictions[i] <- yhat
}
df_plot = tibble(
data = 1:nrow(test),
actual = test$x,
predict = predict_diff
)
df_plot %>%
gather("key", "value", -data) %>%
ggplot(aes(x = data, y = value, color = key)) +
geom_line() +
theme_minimal()

Adapting the meansd moderator option in sjPlot interaction

I am using sjPlot, the sjp.int function, to plot an interaction of an lme.
The options for the moderator values are means +/- sd, quartiles, all, max/min. Is there a way to plot the mean +/- 2sd?
Typically it would be like this:
model <- lme(outcome ~ var1+var2*time, random=~1|ID, data=mydata, na.action="na.omit")
sjp.int(model, show.ci=T, mdrt.values="meansd")
Many thanks
Reproducible example:
#create data
mydata <- data.frame( SID=sample(1:150,400,replace=TRUE),age=sample(50:70,400,replace=TRUE), sex=sample(c("Male","Female"),200, replace=TRUE),time= seq(0.7, 6.2, length.out=400), Vol =rnorm(400),HCD =rnorm(400))
mydata$time <- as.numeric(mydata$time)
#insert random NAs
NAins <- NAinsert <- function(df, prop = .1){
n <- nrow(df)
m <- ncol(df)
num.to.na <- ceiling(prop*n*m)
id <- sample(0:(m*n-1), num.to.na, replace = FALSE)
rows <- id %/% m + 1
cols <- id %% m + 1
sapply(seq(num.to.na), function(x){
df[rows[x], cols[x]] <<- NA
}
)
return(df)
}
mydata2 <- NAins(mydata,0.1)
#run the lme which gives error message
model = lme(Vol ~ age+sex*time+time* HCD, random=~time|SID,na.action="na.omit",data=mydata2);summary(model)
mydf <- ggpredict(model, terms=c("time","HCD [-2.5, -0.5, 2.0]"))
#lmer works
model2 = lmer(Vol ~ age+sex*time+time* HCD+(time|SID),control=lmerControl(check.nobs.vs.nlev = "ignore",check.nobs.vs.rankZ = "ignore", check.nobs.vs.nRE="ignore"), na.action="na.omit",data=mydata2);summary(model)
mydf <- ggpredict(model2, terms=c("time","HCD [-2.5, -0.5, 2.0]"))
#plotting gives problems (jittered lines)
plot(mydf)
With sjPlot, it's currently not possible. However, I have written a package especially dedicated to compute and plot marginal effects: ggeffects. This package is a bit more flexible (for marginal effects plots).
In the ggeffects-package, there's a ggpredict()-function, where you can compute marginal effects at specific values. Once you know the sd of your model term in question, you can specify these values in the function call to plot your interaction:
library(ggeffects)
# plot interaction for time and var2, for values
# 10, 30 and 50 of var2
mydf <- ggpredict(model, terms = c("time", "var2 [10,30,50]"))
plot(mydf)
There are some examples in the package-vignette, see especially this section.
Edit
Here are the results, based on your reproducible example (note that GitHub-Version is currently required!):
# requires at least the GitHub-Versiob 0.1.0.9000!
library(ggeffects)
library(nlme)
library(lme4)
library(glmmTMB)
#create data
mydata <-
data.frame(
SID = sample(1:150, 400, replace = TRUE),
age = sample(50:70, 400, replace = TRUE),
sex = sample(c("Male", "Female"), 200, replace = TRUE),
time = seq(0.7, 6.2, length.out = 400),
Vol = rnorm(400),
HCD = rnorm(400)
)
mydata$time <- as.numeric(mydata$time)
#insert random NAs
NAins <- NAinsert <- function(df, prop = .1) {
n <- nrow(df)
m <- ncol(df)
num.to.na <- ceiling(prop * n * m)
id <- sample(0:(m * n - 1), num.to.na, replace = FALSE)
rows <- id %/% m + 1
cols <- id %% m + 1
sapply(seq(num.to.na), function(x) {
df[rows[x], cols[x]] <<- NA
})
return(df)
}
mydata2 <- NAins(mydata, 0.1)
# run the lme, works now
model = lme(
Vol ~ age + sex * time + time * HCD,
random = ~ time |
SID,
na.action = "na.omit",
data = mydata2
)
summary(model)
mydf <- ggpredict(model, terms = c("time", "HCD [-2.5, -0.5, 2.0]"))
plot(mydf)
lme-plot
# lmer also works
model2 <- lmer(
Vol ~ age + sex * time + time * HCD + (time |
SID),
control = lmerControl(
check.nobs.vs.nlev = "ignore",
check.nobs.vs.rankZ = "ignore",
check.nobs.vs.nRE = "ignore"
),
na.action = "na.omit",
data = mydata2
)
summary(model)
mydf <- ggpredict(model2, terms = c("time", "HCD [-2.5, -0.5, 2.0]"), ci.lvl = NA)
# plotting works, but only w/o CI
plot(mydf)
lmer-plot
# lmer also works
model3 <- glmmTMB(
Vol ~ age + sex * time + time * HCD + (time | SID),
data = mydata2
)
summary(model)
mydf <- ggpredict(model3, terms = c("time", "HCD [-2.5, -0.5, 2.0]"))
plot(mydf)
plot(mydf, facets = T)
glmmTMB-plots

How to change the position of axis values in nomogram, rms package?

How do I change the axis values of the rms r package! from horizontal to vertical like style. Below is a code extracted from the rms package! The axis values are in horizontal style. I want to change it to the vertical style and I can seem to figure it out, I appreciate any help! Thanks
n <- 1000 # define sample size
set.seed(17) # so can reproduce the results
d <- data.frame(age = rnorm(n, 50, 10),
blood.pressure = rnorm(n, 120, 15),
cholesterol = rnorm(n, 200, 25),
sex = factor(sample(c('female','male'), n,TRUE)))
# Specify population model for log odds that Y=1
# Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)]
d <- upData(d,
L = .4*(sex=='male') + .045*(age-50) +
(log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')),
y = ifelse(runif(n) < plogis(L), 1, 0))
ddist <- datadist(d); options(datadist='ddist')
f <- lrm(y ~ lsp(age,50) + sex * rcs(cholesterol, 4) + blood.pressure,
data=d)
nom <- nomogram(f, fun=function(x)1/(1+exp(-x)), # or fun=plogis
fun.at=c(.001,.01,.05,seq(.1,.9,by=.1),.95,.99,.999),
funlabel="Risk of Death")
#Instead of fun.at, could have specified fun.lp.at=logit of
#sequence above - faster and slightly more accurate
plot(nom, xfrac=.45)
print(nom)
nom <- nomogram(f, age=seq(10,90,by=10))
plot(nom, xfrac=.45)
g <- lrm(y ~ sex + rcs(age, 3) * rcs(cholesterol, 3), data=d)
nom <- nomogram(g, interact=list(age=c(20,40,60)),
conf.int=c(.7,.9,.95))
plot(nom, col.conf=c(1,.5,.2), naxes=7)
w <- upData(d,
cens = 15 * runif(n),
h = .02 * exp(.04 * (age - 50) + .8 * (sex == 'Female')),
d.time = -log(runif(n)) / h,
death = ifelse(d.time <= cens, 1, 0),
d.time = pmin(d.time, cens))
f <- psm(Surv(d.time,death) ~ sex * age, data=w, dist='lognormal')
med <- Quantile(f)
surv <- Survival(f) # This would also work if f was from cph
plot(nomogram(f, fun=function(x) med(lp=x), funlabel="Median Survival Time"))
nom <- nomogram(f, fun=list(function(x) surv(3, x),
function(x) surv(6, x)),
funlabel=c("3-Month Survival Probability",
"6-month Survival Probability"))
plot(nom, xfrac=.7)
[![This is the nomogram result][1]][1]

Resources