Modelling generic variables in a Latent class model with gmnl() - r

I have the problem in fomulating a model, where at least one variable is to be estimated independently from the classes, so one and the same coefficient for all classes. How could one do this?
I am working with the R package gmnl.
install.packages("gmnl")
library(gmnl)
library(mlogit)
#browseURL("https://cran.r-project.org/web/packages/poLCA/index.html")
## Examples using the Fishing data set from the AER package
data("Electricity", package = "mlogit")
Electr <- mlogit.data(Electricity, id.var = "id", choice = "choice",
varying = 3:26, shape = "wide", sep = "")
Elec.lc <- gmnl(choice ~ pf + cl + loc + wk + tod + seas| 0 | 0 | 0 | 1,
data = Electr,
subset = 1:3000,
model = 'lc',
panel = TRUE,
Q = 2)
summary(Elec.lc)
How would you model one of the variables pf, cl, loc, wk, tod, or seas independently from the class? Thank you!

Thanks to Mauricio Sarrias I can present this work-around, which should solve the problem:
###################################
library("gmnl")
library("mlogit")
# Data
data("Electricity", package = "mlogit")
Electr <- mlogit.data(Electricity, id.var = "id", choice = "choice",
varying = 3:26, shape = "wide", sep = "")
# ASCs
Electr$asc2 <- as.numeric(Electr$alt == 2)
Electr$asc3 <- as.numeric(Electr$alt == 3)
Electr$asc4 <- as.numeric(Electr$alt == 4)
# We estimate a MNL for the initial values of LC-MNL
init_mnl <- gmnl(choice ~ asc2 + asc3 + asc4 + pf + cl| 0,
data = Electr)
summary(init_mnl)
# Work on initial values for LC-MNL
init <- coef(init_mnl)
Q <- 2 # Number of Classes
init.shift <- seq(-0.02, 0.02, length.out = Q)
lc.mean <- c()
for(i in 1:Q){
lc.mean <- c(lc.mean, init + init.shift[i])
}
lc.names <- c()
lc.nalpha <- c()
for (i in 1:Q){
lc.names <- c(lc.names, paste('class', i, names(init), sep = '.'))
}
names(lc.mean) <- lc.names
# Now we fix pf coefficient = 0 in the second class
lc.mean[c("class.2.pf")] <- 0
start_lc <- c(lc.mean, # Var coefficients
0) #Constant for second class
# Estimate LC with price coefficient held fixed at 0 in class 2
lc <- gmnl(choice ~ asc2 + asc3 + asc4 + pf + cl| 0 | 0 | 0 |1,
data = Electr,
model = "lc",
iterlim = 500,
start = start_lc,
fixed = c(rep(FALSE, 8), TRUE, rep(FALSE, 2)), # note that class.2.pf is fixed at 0
print.level = 3,
print.init = TRUE,
Q = 2)
summary(lc)
########################

Related

How do I add subscripts to labels in ggplot?

I'm doing an analysis on air pollutants using Bayesian Kernel Machine Regression, using the bkmr package in R.
https://jenfb.github.io/bkmr/overview.html
The link is to Jennifer Bobb's instructions on how to use this package. I don't think it is relevant to the issue though. What I want to do is have PM2.5, O3, and NO2 show up in my charts with the 2.5, 3, and 2 as subscripts. I'm trying to use this function and getting no luck:
colnames(dat) <- c("LTE4", "$O[3]", "$PM[2.5]", "$NO[2]", "Diethyl", "Dimethyl", "age", "tmpf", "relh", "sex", "agany", "agself", "asthma")
When I do this what happens I just see these labels show up in the plots with with the $ and [] instead of subscripted numbers. Any ideas?
This is the full code I am using:
### January BKMR Analysis ###
## Hierarchical Variable Selection ##
## Updated June 6, 2022 ##
# Reading in necessary packages
library(tidyverse)
library(bkmr)
trio_semipro <- readRDS("C:/Users/Matt/OneDrive/Documents/Fresno Thesis/Thesis Code/trio_semipro.rds")
trio_semipro
dim(trio_semipro)
head(trio_semipro)
trio_semipro$log_lte4 <- log(trio_semipro$Final)
# Separating out dataframes for winter and summer to run separate models for each season
trio_semipro_w <- trio_semipro %>%
filter(visit_month == 1)
trio_semipro_s <- trio_semipro %>%
filter(visit_month == 2)
# Summer and Winter Dataframes
trio_semipro_w
trio_semipro_s
head(trio_semipro_w)
#view(trio_semipro_w)
dat = cbind(trio_semipro_w$log_lte4, trio_semipro_w$O3,
trio_semipro_w$PM25, trio_semipro_w$NO2, trio_semipro_w$diethyl, trio_semipro_w$dimethyl,
trio_semipro_w$age, trio_semipro_w$tmpf, trio_semipro_w$relh, trio_semipro_w$sex, trio_semipro_w$agriculture_anyone,
trio_semipro_w$agriculture_self, trio_semipro_w$asthma)
head(dat)
colnames(dat) = c("LTE4", "$O[3]", "$PM[2.5]", "$NO[2]", "Diethyl", "Dimethyl", "age", "tmpf", "relh", "sex", "agany", "agself", "asthma")
dat = as.data.frame(dat)
dat$sex
# recode the binary variable to be 0, 1 and NA
dat$agself = dat$agself-1
dat$agself[which(dat$agself==2)]=NA
dat$agself
# recode sex variable
dat$sex = dat$sex -1
# recode agany variable
dat$agany = dat$agany - 1
dat$agany[which(dat$agany==2)]=NA
#recode asthma variable
dat$asthma = dat$asthma - 1
dat$asthma[which(dat$asthma==2)]=NA
dat$asthma
dat$sex
dat$agany
# good
head(dat)
complete_dat = dat[-which(apply(dat, 1, anyNA)),]
dim(complete_dat)
# Fit BKMR
zscaled <- apply(complete_dat[,(2:6)], 2, scale)
yscaled <- scale(complete_dat$lte4)
xscaled <- cbind(scale(complete_dat[,7:9]), complete_dat[,10:13])
fit_bkmr = kmbayes(y=yscaled, Z= zscaled, X = xscaled,
iter = 20000, varsel = TRUE, groups=c(1,1,1,2,2), verbose=FALSE)
plot(fit_bkmr$sigsq.eps, type = "l")
TracePlot(fit = fit_bkmr, par = "beta", comp = 4)
TracePlot(fit = fit_bkmr, par = "sigsq.eps")
TracePlot(fit = fit_bkmr, par = "r", comp = 1)
# Estimating posterior inclusion probabilities
ExtractPIPs(fit_bkmr)
# Estimating h
y <- yscaled
Z <- zscaled
X <- xscaled
med_vals <- apply(Z, 2, median)
Znew <- matrix(med_vals, nrow = 1)
# Summarize model output
pred.resp.univar <- PredictorResponseUnivar(fit = fit_bkmr)
library(ggplot2) # Using ggplot to plot cross sections of h
ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) +
geom_smooth(stat = "identity") +
geom_hline(yintercept = 0, lty = 5, col = "red2", alpha = 0.4) +
facet_wrap(~ variable, nrow = 1) +
ylab("h(z)")
# visualze the bivarate exposure-response function for two predictors, where
# all of the other predictors are fixed at a particular percentile.
pred.resp.bivar <- PredictorResponseBivar(fit = fit_bkmr, min.plot.dist = 1)
ggplot(pred.resp.bivar, aes(z1, z2, fill = est)) +
geom_raster() +
facet_grid(variable2 ~ variable1) +
scale_fill_gradientn(colours=c("#0000FFFF","#FFFFFFFF","#FF0000FF")) +
xlab("expos1") +
ylab("expos2") +
ggtitle("h(expos1, expos2)")

`gmnl` unable to include Alternative Specific Constants (ASC) in Mixed-mixed MNL model

I am trying to estimate a Mixed-mixed multinomial logit model using the gmnl package. It works perfectly when not including Alternative Specific Constants (ASC), but it produces a weird error when incorporating them. The code below was taken (and adapted) from the original article published of the package.
Data preparation
options(digits = 3)
library("gmnl")
library("mlogit")
data("Electricity", package = "mlogit")
Electr <- mlogit.data(Electricity,
id.var = "id",
choice = "choice",
varying = 3:26,
shape = "wide",
sep = "")
####Alternative Specific Constants
Electr$asc2 <- as.numeric(Electr$alt == 2)
Electr$asc3 <- as.numeric(Electr$alt == 3)
Electr$asc4 <- as.numeric(Electr$alt == 4)
Latent Class Models (with ASC)
The code below works perfectly, even including the ASC in the second part of the formula (LC_ASC_in_formula) or explicitly with the regressors (LC_ASC_in_variables).
LC_ASC_in_formula <- gmnl(choice ~ pf + cl + loc + wk + tod + seas | 1 | 0 | 0 | 1,
data = Electr,
subset = 1:3000,
model = "lc",
panel = TRUE,
Q = 2)
summary(LC_ASC_in_formula)
LC_ASC_in_variables <- gmnl( choice ~ pf + cl + loc + wk + tod + seas +asc2 +asc3 +asc4 | 0 | 0 | 0 | 1,
data = Electr,
subset = 1:3000,
model = "lc",
panel = TRUE,
Q = 2)
summary(LC_ASC_in_variables)
## Are they the same?
logLik(LC_ASC_in_variables) == logLik(LC_ASC_in_formula)
## [1] TRUE
Mixed-mixed MNL model
This model is basically a Latent Class model, but inside each class, the parameters are random (follow a previously specified parametric distribution).
Mixed-mixed MNL model WITHOUT ASC
The model works just fine when the ASCs are omitted.
MM_no_ASC <- gmnl(choice ~ pf + cl + loc + wk + tod + seas | 0 | 0 | 0 | 1,
data = Electr,
subset = 1:3000,
model = "mm",
R = 5,
panel = TRUE,
ranp = c(pf = "n",cl = "n",loc = "n",wk = "n", tod = "n",seas= "n"),
Q = 2,
iterlim = 500)
However, it fails to estimate the model when including the ASC:
As part of the variables in the model.
MM_ASC_in_variables <- gmnl( choice ~ pf + cl + loc + wk + tod + seas +
asc2 +asc3 +asc4 | 0 | 0 | 0 | 1 ,
data = Electr,
subset = 1:3000,
model = "mm",
R = 5,
panel = TRUE,
ranp = c(pf = "n",cl = "n",loc = "n",wk = "n", tod = "n",seas= "n"),
Q = 2,
iterlim = 500)
> Error in if (distr == "n") { : missing value where TRUE/FALSE needed
and when including them in the third part of the formula.
MM_ASC_in_formula <- gmnl( choice ~ pf + cl + loc + wk + tod + seas | 1 | 0 | 0 | 1 ,
data = Electr,
subset = 1:3000,
model = "mm",
R = 5,
panel = TRUE,
ranp = c(pf = "n",cl = "n",loc = "n",wk = "n", tod = "n",seas= "n"),
Q = 2,
iterlim = 500)
> Error in if (distr == "n") { : missing value where TRUE/FALSE needed
Howeve, both ways to include the ASC parameters fail to initialize the model estimation. Hopefully, someone could help me to solve this issue. Thank you in advance.
Bonus1: Traceback of the error.
I reduced the number of observations included in the estimation (subset = 1:20) to see better the traceback() of the error shown below. But I couldn't spot the error myself.
MM_ASC_in_formula <- gmnl( choice ~ pf + cl + loc + wk + tod + seas | 1 | 0 | 0 | 1 ,
data = Electr,
subset = 1:20,
model = "mm",
R = 5,
panel = TRUE,
ranp = c(pf = "n",cl = "n",loc = "n",wk = "n", tod = "n",seas= "n"),
Q = 2,
iterlim = 500)
# Error in if (distr == "n") { : missing value where TRUE/FALSE needed
traceback()
# Estimating MM-MNL model
# Error in if (distr == "n") { : missing value where TRUE/FALSE needed
# > traceback()
# 14: Makeh.rcoef(beta[, q], stds[, q], ranp, Omega[, ((i - 1) * R +
# 1):(i * R), drop = FALSE], correlation, Pi = NULL, Slist = NULL,
# mvar = NULL)
# 13: fnOrig(theta, ...)
# 12: logLikFunc(theta, fnOrig = function (theta, y, X, H, Q, id = NULL,
# ranp, R, correlation, weights = NULL, haltons = NULL, seed = 12345,
# gradient = TRUE, get.bi = FALSE)
# {
# K <- ncol(X[[1]])
# J <- length(X)
# N <- nrow(X[[1]])
# panel <- !is.null(id)
# if (panel) {
# n <- length(unique(id))
# if (length(weights) == 1)
# weights <- rep(weights, N)
# }
# beta <- matrix(theta[1L:(K * Q)], nrow = K, ncol = Q)
# nstds <- if (!correlation)
# K * Q
# else (0.5 * K * (K + 1)) * Q
# stds <- matrix(theta[(K * Q + 1):(K * Q + nstds)], ncol = Q)
# rownames(beta) <- colnames(X[[1]])
# colnames(beta) <- colnames(stds) <- paste("class", 1:Q, sep = ":")
# gamma <- theta[-c(1L:(K * Q + nstds))]
# ew <- lapply(H, function(x) exp(crossprod(t(x), gamma)))
# sew <- suml(ew)
# Wnq <- lapply(ew, function(x) {
# v <- x/sew
# v[is.na(v)] <- 0
# as.vector(v)
# })
# Wnq <- Reduce(cbind, Wnq)
# set.seed(seed)
# Omega <- make.draws(R * ifelse(panel, n, N), K, haltons)
# XBr <- vector(mode = "list", length = J)
# for (j in 1:J) XBr[[j]] <- array(NA, dim = c(N, R, Q))
# nind <- ifelse(panel, n, N)
# if (panel)
# theIds <- unique(id)
# if (get.bi)
# bi <- array(NA, dim = c(nind, R, Q, K), dimnames = list(NULL,
# NULL, NULL, colnames(X[[1]])))
# for (i in 1:nind) {
# if (panel) {
# anid <- theIds[i]
# theRows <- which(id == anid)
# }
# else theRows <- i
# for (q in 1:Q) {
# bq <- Makeh.rcoef(beta[, q], stds[, q], ranp, Omega[,
# ((i - 1) * R + 1):(i * R), drop = FALSE], correlation,
# Pi = NULL, Slist = NULL, mvar = NULL)
# for (j in 1:J) {
# XBr[[j]][theRows, , q] <- crossprod(t(X[[j]][theRows,
# , drop = FALSE]), bq$br)
# }
# if (get.bi)
# bi[i, , q, ] <- t(bq$br)
# }
# }
# EXB <- lapply(XBr, function(x) exp(x))
# SEXB <- suml.array(EXB)
# Pntirq <- lapply(EXB, function(x) x/SEXB)
# Pnrq <- suml.array(mapply("*", Pntirq, y, SIMPLIFY = FALSE))
# if (panel)
# Pnrq <- apply(Pnrq, c(2, 3), tapply, id, prod)
# Pnq <- apply(Pnrq, c(1, 3), mean)
# WPnq <- Wnq * Pnq
# Ln <- apply(WPnq, 1, sum)
# if (get.bi)
# Qir <- list(wnq = Wnq, Ln = Ln, Pnrq = Pnrq)
# lnL <- if (panel)
# sum(log(Ln) * weights[!duplicated(id)])
# else sum(log(Ln) * weights)
# if (gradient) {
# lambda <- mapply(function(y, p) y - p, y, Pntirq, SIMPLIFY = FALSE)
# Wnq.mod <- aperm(repmat(Wnq/Ln, dimen = c(1, 1, R)),
# c(1, 3, 2))
# Qnq.mod <- Wnq.mod * Pnrq
# if (panel)
# Qnq.mod <- Qnq.mod[id, , ]
# eta <- lapply(lambda, function(x) x * Qnq.mod)
# dUdb <- dUds <- vector(mode = "list", length = J)
# for (j in 1:J) {
# dUdb[[j]] <- array(NA, dim = c(N, K, Q))
# dUds[[j]] <- array(NA, dim = c(N, nrow(stds), Q))
# }
# for (i in 1:nind) {
# if (panel) {
# anid <- theIds[i]
# theRows <- which(id == anid)
# }
# else theRows <- i
# for (q in 1:Q) {
# bq <- Makeh.rcoef(beta[, q], stds[, q], ranp,
# Omega[, ((i - 1) * R + 1):(i * R), drop = FALSE],
# correlation, Pi = NULL, Slist = NULL, mvar = NULL)
# for (j in 1:J) {
# dUdb[[j]][theRows, , q] <- tcrossprod(eta[[j]][theRows,
# , q, drop = TRUE], bq$d.mu)
# dUds[[j]][theRows, , q] <- tcrossprod(eta[[j]][theRows,
# , q, drop = TRUE], bq$d.sigma)
# }
# }
# }
# if (correlation) {
# vecX <- c()
# for (i in 1:K) {
# vecX <- c(vecX, i:K)
# }
# Xac <- lapply(X, function(x) x[, vecX])
# }
# else {
# Xac <- X
# }
# Xr <- lapply(X, function(x) x[, rep(1:K, Q)])
# Xacr <- lapply(Xac, function(x) x[, rep(1:ncol(Xac[[1]]),
# Q)])
# dUdb <- lapply(dUdb, function(x) matrix(x, nrow = N))
# dUds <- lapply(dUds, function(x) matrix(x, nrow = N))
# grad.beta <- suml(mapply("*", Xr, dUdb, SIMPLIFY = FALSE))/R
# grad.stds <- suml(mapply("*", Xacr, dUds, SIMPLIFY = FALSE))/R
# Qnq <- WPnq/Ln
# if (panel) {
# Wnq <- Wnq[id, ]
# H <- lapply(H, function(x) x[id, ])
# Qnq <- Qnq[id, ]
# }
# Wg <- vector(mode = "list", length = Q)
# IQ <- diag(Q)
# for (q in 1:Q) Wg[[q]] <- rowSums(Qnq * (repRows(IQ[q,
# ], N) - repCols(Wnq[, q], Q)))
# grad.gamma <- suml(mapply("*", H, Wg, SIMPLIFY = FALSE))
# gari <- cbind(grad.beta, grad.stds, grad.gamma)
# colnames(gari) <- names(theta)
# attr(lnL, "gradient") <- gari * weights
# }
# if (get.bi) {
# Pnjq <- lapply(Pntirq, function(x) apply(x, c(1, 3),
# mean))
# if (panel)
# Wnq <- Wnq[id, ]
# Pw <- lapply(Pnjq, function(x) x * Wnq)
# attr(lnL, "prob.alt") <- sapply(Pw, function(x) apply(x,
# 1, sum))
# attr(lnL, "prob.ind") <- Ln
# attr(lnL, "bi") <- bi
# attr(lnL, "Qir") <- Qir
# attr(lnL, "Wnq") <- Wnq
# }
# lnL
# },# weights = 1, R = 5, seed = 12345, ranp = c(pf = "n", cl = "n",
# loc = "n", wk = "n", tod = "n", seas = "n"), id = structure(c(1L,
# 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), H = list(
# `1` = structure(0, .Dim = c(1L, 1L), .Dimnames = list(
# "1", "(class)2")), `2` = structure(1, .Dim = c(1L,
# 1L), .Dimnames = list("2", "(class)2"))), correlation = FALSE,
# haltons = NA, Q = 2)
# 11: eval(f, sys.frame(sys.parent()))
# 10: eval(f, sys.frame(sys.parent()))
# 9: callWithoutArgs(theta, fName = fName, args = names(formals(sumt)),
# ...)
# 8: (function (theta, fName, ...)
#
# 7: do.call(callWithoutSumt, argList)
# 6: maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS",
# fixed = fixed, constraints = constraints, finalHessian = finalHessian,
# parscale = parscale, control = mControl, ...)
# 5: maxRoutine(fn = logLik, grad = grad, hess = hess, start = start,
# constraints = constraints, ...)
# 4: maxLik(method = "bfgs", iterlim = 500, start = c(`class.1.2:(intercept)` = -4.85114128700713,
# `class.1.3:(intercept)` = -7.69322200825539, `class.1.4:(intercept)` = 5.01582959989182,
# class.1.pf = -1.60963678008691, class.1.cl = 0.109892050051351,
# class.1.loc = 18.3461318629584, class.1.wk = 5.01552145983325,
# class.1.tod = 6.12905713997904, class.1.seas = -4.37562129235275,
# `class.2.2:(intercept)` = -4.81114128700713, `class.2.3:(intercept)` = -7.6532220082554,
# `class.2.4:(intercept)` = 5.05582959989182, class.2.pf = -1.56963678008691,
# class.2.cl = 0.149892050051351, class.2.loc = 18.3861318629584,
# class.2.wk = 5.05552145983325, class.2.tod = 6.16905713997903,
# class.2.seas = -4.33562129235275, class.1.sd.pf = 0.08, class.1.sd.cl = 0.08,
# class.1.sd.loc = 0.08, class.1.sd.wk = 0.08, class.1.sd.tod = 0.08,
# class.1.sd.seas = 0.08, class.2.sd.pf = 0.12, class.2.sd.cl = 0.12,
# class.2.sd.loc = 0.12, class.2.sd.wk = 0.12, class.2.sd.tod = 0.12,
# class.2.sd.seas = 0.12, `(class)2` = 0), X = Xl, y = yl, gradient = gradient,
# weights = weights, logLik = ll.mnlogit, R = R, seed = seed,
# ranp = ranp, id = id, H = Hl, correlation = correlation,
# haltons = haltons, Q = Q)
# 3: eval(opt, sys.frame(which = nframe))
# 2: eval(opt, sys.frame(which = nframe))
# 1: gmnl(choice ~ pf + cl + loc + wk + tod + seas | 1 | 0 | 0 | 1,
# data = Electr, subset = 1:20, model = "mm", R = 5, panel = TRUE,
# ranp = c(pf = "n", cl = "n", loc = "n", wk = "n", tod = "n",
# seas = "n"), Q = 2, iterlim = 500)
Bonus2 :sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19044)
Matrix products: default
attached base packages:
[1] grid stats graphics grDevices utils datasets
[7] methods base
other attached packages:
[1] here_1.0.1 strucchange_1.5-2 sandwich_3.0-1
[4] zoo_1.8-9 partykit_1.2-15 mvtnorm_1.1-3
[7] libcoin_1.0-9 mlogit_1.1-1 dfidx_0.0-4
[10] gmnl_1.1-3.2 Formula_1.2-4 maxLik_1.5-2
[13] miscTools_0.6-26 dplyr_1.0.7 nnet_7.3-17
Thank you in advance.

Creating new datasets for each iteration of a power analysis

I have the following code to estimate the power for my study which runs perfectly fine. The issue is that I am running n = 1000 iterations, but each iteration generates the exact same dataset. I think this is because the commands in the function that I created (powercrosssw) draw on the data definitions above that are fixed in value? How do I ensure that each dataset (named dx below) that is generated is different (i.e. the values for u_3, error, and y are different for each iteration) so that I am calculating the power appropriately?
library(simstudy)
library(nlme)
library(gendata)
library(data.table)
library(geepack)
set.seed(12345)
clusterDef <- defDataAdd(varname = "u_3", dist = "normal", formula = 0, variance = 25.77) #cluster-level random effect
patError <- defDataAdd(varname = "error", dist = "normal", formula = 0, variance = 38.35) #error term
#Generate cluster-level data
cohortsw <- genData(3, id = "cluster")
cohortsw <- addColumns(clusterDef, cohortsw)
cohortswTm <- addPeriods(cohortsw, nPeriods = 6, idvars = "cluster", perName = "period")
cohortstep <- trtStepWedge(cohortswTm, "cluster", nWaves = 3, lenWaves = 1, startPer = 1, grpName = "Ijt")
cohortstep
#Generate individual patient-level data
pat <- genCluster(cohortswTm, cLevelVar = "timeID", numIndsVar = 5, level1ID = "id")
pat
dx <- merge(pat[, .(cluster, period, id)], cohortstep, by = c("cluster", "period"))
dx <- addColumns(patError, dx)
setkey(dx, id, cluster, period)
#Define outcome y
outDef <- defDataAdd(varname = "y", formula = "17.87 + 5.0*Ijt - 5.42*I(period == 1) - 5.72*I(period == 2) - 7.03*I(period == 3) - 6.13*I(period == 4) - 9.13*I(period == 5) + u_3 + error", dist = "normal")
dx <- addColumns(outDef, dx)
#Fit GLMM model to simulated dataset
model1 <- lme(y ~ factor(period) + factor(Ijt), random = ~1|cluster, data = dx, method = "REML")
summary(model1)
#Power analysis
powercrosssw <- function(nclus = 3, clsize = 5) {
cohortsw <- genData(nclus, id = "cluster")
cohortsw <- addColumns(clusterDef, cohortsw)
cohortswTm <- addPeriods(cohortsw, nPeriods = 6, idvars = "cluster", perName = "period")
cohortstep <- trtStepWedge(cohortswTm, "cluster", nWaves = 3, lenWaves = 1, startPer = 1, grpName = "Ijt")
pat <- genCluster(cohortswTm, cLevelVar = "timeID", numIndsVar = clsize, level1ID = "id")
dx <- merge(pat[, .(cluster, period, id)], cohortstep, by = c("cluster", "period"))
dx <- addColumns(patError, dx)
setkey(dx, id, cluster, period)
return(dx)
}
bresult <- NULL
presult <- NULL
eresult <- NULL
intercept <- NULL
trt <- NULL
timecoeff1 <- NULL
timecoeff2 <- NULL
timecoeff3 <- NULL
timecoeff4 <- NULL
timecoeff5 <- NULL
ranclus <- NULL
error <- NULL
i=1
while (i < 1000) {
cohortsw <- powercrosssw()
#Fit multi-level model to simulated dataset
model1 <- tryCatch(lme(y ~ factor(period) + factor(Ijt), data = dx, random = ~1|cluster, method = "REML"),
warning = function(w) { "warning" }
)
if (! is.character(model1)) {
coeff <- coef(summary(model1))["factor(Ijt)1", "Value"]
pvalue <- coef(summary(model1))["factor(Ijt)1", "p-value"]
error <- coef(summary(model1))["factor(Ijt)1", "Std.Error"]
bresult <- c(bresult, coeff)
presult <- c(presult, pvalue)
eresult <- c(eresult, error)
i <- i + 1
}
}

Rolling window with Copulas

I would like to apply a rolling window to fit a student t Copula and then to do a forecast based on the results from the fitting process. I already tried it with a for loop, but it always state errors according to the fit Copula command.
#Students t Copula
windowsSize <- 4000 # training data size
testsize <- 351 # number of observations to forecast
for(k in 0:33) # run 34 experiments
{
A <- k*testsize + 1
B <- A + windowsSize - 1
start_obs <- A
end_obs <- B
lgYen_roll <- lgYenUSD[A:B]
lgEuro_roll <- lgEuroUSD[A:B]
ElgYen_roll <- ElgYenUSD[A:B]
ElgEuro_roll <- ElgEuroUSD[A:B
StdlgYen_roll <- StdlgYenUSD[A:B]
StdlgEuro_roll <- StdlgEuroUSD[A:B]
CopYenEuro_roll <- pobs(as.matrix(cbind(lgYen_roll,lgEuro_roll)))
YenEuro_fit_t_roll <- fitCopula(t.cop,CopYenEuro_roll,method=c('ml'), posDef = is(t.cop, "ellipCopula"),
start = NULL, lower = NULL, upper = NULL,
optim.method = optimMeth(t.cop, method,dim=d),
optim.control = list(maxit=1000),
estimate.variance = NA, hideWarnings = FALSE)
Here occurs already the first error: "Error in if (any(u < 0) || any(u > 1)) stop("'u' must be in [0,1] -- probably rather use pobs(.)") :
missing value where TRUE/FALSE needed"
CO_YenEuro_roll_rho <- coef(YenEuro_fit_t_roll)[1]
CO_YenEuro_roll_df <- coef(YenEuro_fit_t_roll)[2]
YenEurocopula_dist_t_roll <- mvdc(copula=tCopula(param = CO_YenEuro_roll_rho,dim=2), margins=c("norm","norm"),
paramMargins = list(list(mean=ElgYen_roll, sd=StdlgYen_roll),
list(mean=ElgEuro_roll, sd=StdlgEuro_roll)),
check = TRUE, fixupNames = TRUE)
YenEurocopula_random_t_roll.dist <- rMvdc(351,YenEurocopula_dist_t_roll)
#Prediction
A <- B + 1
B <- B + testsize
lgYen_roll <- lgYenUSD[A:B]
lgEuro_roll <- lgEuroUSD[A:B]
ElgYen_roll <- ElgYenUSD[A:B]
ElgEuro_roll <- ElgEuroUSD[A:B]
StdlgYen_roll <- StdlgYenUSD[A:B]
StdlgEuro_roll <- StdlgEuroUSD[A:B]
predict_EXT <- matrix(0, testsize, 1)
for(i in 1:testsize) # do the forecast based on the Copula Fit results
{
predict_EXT[i] <- fitCopula(t.cop,CopYenEuro_rolling[i],method=c('ml'), posDef = is(t.cop, "ellipCopula"),
start = NULL, lower = NULL, upper = NULL,
optim.method = optimMeth(t.cop, method,dim=d),
optim.control = list(maxit=1000),
estimate.variance = NA, hideWarnings = TRUE)
YenEurocopula_dist_t_roll <- mvdc(copula=tCopula(param = CO_YenEuro_roll_rho[i],dim=2), margins=c("norm","norm"),
paramMargins = list(list(mean=ElgYen_roll[i], sd=StdlgYen_roll[i]),
list(mean=ElgEuro_roll[i], sd=StdlgEuro_roll[i])),
check = TRUE, fixupNames = TRUE)
YenEurocopula_random_t_roll.dist <- rMvdc(351,YenEurocopula_dist_t_roll[i])
}}
Maybe someone has a solution to this problem?

How to calculate mean sojourn time in each nonabsorbing state using R package MSTATE

I am working on a survival analysis and cannot seem to figure out how do to this.
From the MSTATE tutorial the following is a block of code for as simple Cox-regression. How does one calculate the mean sojourn time in each nonabsorbing state?
Code:
library(mstate)
data(ebmt3)
tmat <- trans.illdeath(names=c("Tx","PR","RelDeath"))
ebmt3$prtime <- ebmt3$prtime/365.25
ebmt3$rfstime <- ebmt3$rfstime/365.25
covs <- c("dissub", "age", "drmatch", "tcd", "prtime")
msbmt <- msprep(time = c(NA, "prtime", "rfstime"), status = c(NA, "prstat", "rfsstat"), data = ebmt3, trans = tmat, keep = covs)
expcovs <- expand.covs(msbmt, covs[2:3], append = FALSE)
msbmt <- expand.covs(msbmt, covs, append = TRUE, longnames = FALSE)
c1 <- coxph(Surv(Tstart, Tstop, status) ~ dissub1.1 + dissub2.1 +
age1.1 + age2.1 + drmatch.1 + tcd.1 + dissub1.2 + dissub2.2 +
age1.2 + age2.2 + drmatch.2 + tcd.2 + dissub1.3 + dissub2.3 +
age1.3 + age2.3 + drmatch.3 + tcd.3 + strata(trans), data = msbmt,
method = "breslow")
newd <- data.frame(dissub = rep(0, 3), age = rep(0, 3), drmatch = rep(0,
3), tcd = rep(0, 3), trans = 1:3)
newd$dissub <- factor(newd$dissub, levels = 0:2, labels = levels(ebmt3$dissub))
newd$age <- factor(newd$age, levels = 0:2, labels = levels(ebmt3$age))
newd$drmatch <- factor(newd$drmatch, levels = 0:1, labels = levels(ebmt3$drmatch))
newd$tcd <- factor(newd$tcd, levels = 0:1, labels = levels(ebmt3$tcd))
attr(newd, "trans") <- tmat
class(newd) <- c("msdata", "data.frame")
newd <- expand.covs(newd, covs[1:4], longnames = FALSE)
newd$strata = 1:3
newd
msf1 <- msfit(c1, newdata = newd, trans = tmat)
Thanks!
I think you are looking for the ELOS function in mstate - it stands for the Expected Length of Stay in a state - to complete your example you would need to calculate the transition probabilities using probtrans and then you can calculate ELOS for every state.
pt <- probtrans(msf1,predt=0)
# ELOS until last observed time point
ELOS(pt)

Resources