Censoring in rjags - Invalid parent values - r

I'm having troubles reimplementing a model from winbugs on rjags. I'm getting the Invalid parent values error which is the error you get when censoring was not correctly setup, but I can't see my mistake.
This is the original model on WinBugs:
model {
for(i in 1 : N) {
times[i] ~ dweib(v, lambda[i]) T(censor[i],)
lambda[i] <- exp(beta0 + beta1*type[i])
S[i] <- exp(-lambda[i]*pow(times[i],v));
f[i] <- lambda[i]*v*pow(times[i],v-1)*S[i]
h[i] <- f[i]/S[i]
}
beta0 ~ dnorm(0.0, 0.0001)
beta1 ~ dnorm(0.0, 0.0001)
v ~ dexp(0.001)
median0 <- pow(log(2) * exp(-beta0), 1/v)
median1 <- pow(log(2) * exp(-beta0-beta1), 1/v)
}
Setting up a reproducible example:
type <- as.factor(c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
censor <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,882,892,1031,
1033,1306,1335,0,1452,1472,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,0,0,529,0,
0,0,0,0,0,0,0,0,945,0,0,1180,0,0,1277,1397,1512,1519)
times <-c (17,42,44,48,60,72,74,95,103,108,122,144,167,170,183,185,193,195,197,208,234,235,254,307,315,401,
445,464,484,528,542,567,577,580,795,855,NA,NA,NA,NA,NA,NA,1366,NA,NA,1,63,105,129,182,216,250,262,
301,301,342,354,356,358,380,NA,383,383,388,394,408,460,489,499,524,NA,535,562,675,676,748,748,778,
786,797,NA,955,968,NA,1245,1271,NA,NA,NA,NA)
df <- tibble(type = type, censor = censor, time = times) %>%
mutate(censor_limit = replace(censor, censor == 0, max(times, na.rm = TRUE))) %>%
mutate(is_censored = ifelse(is.na(time), 1, 0)) %>%
mutate(time_init = ifelse(is_censored == 1, censor_limit + 1, NA))
df$censor <- NULL
head(df)
And this is the rjags part:
m <- textConnection("model {
for(i in 1 : N) {
isCensored[i] ~ dinterval(times[i], censorLimit[i])
times[i] ~ dweib(v, lambda[i])
lambda[i] <- exp(beta0 + beta1*type[i])
S[i] <- exp(-lambda[i]*pow(times[i],v));
f[i] <- lambda[i]*v*pow(times[i],v-1)*S[i]
h[i] <- f[i]/S[i]
}
beta0 ~ dnorm(0.0, 0.0001)
beta1 ~ dnorm(0.0, 0.0001)
v ~ dexp(0.001)
# Median survival time
median0 <- pow(log(2) * exp(-beta0), 1/v)
median1 <- pow(log(2) * exp(-beta0-beta1), 1/v)
}")
d <- list(N = nrow(df), times = df$time, type = df$type, isCensored = df$is_censored,
censorLimit = df$censor_limit)
inits1 = function() {
inits = list(v = 1, beta0 = 0, beta1=0, times = df$time_init)
}
mod <- jags.model(m, data = d, inits = inits1, n.chains = 3)
update(mod, 1e3)
mod_sim <- coda.samples(model = mod, variable.names = c("lambda", "median0", "median1"), n.iter = 5e3)
mod_csim <- as.mcmc(do.call(rbind, mod_sim))
Output:
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 164
Unobserved stochastic nodes: 19
Total graph size: 910
Initializing model
Deleting model
Error in jags.model(m, data = d, inits = inits1, n.chains = 3): Error in node h[35]
Invalid parent values

Related

Why does rjags give Dimension mismatch taking subset of y error here?

I have written this model but rjags gives dimension mismatch error; What's happening?
Error in jags.model(textConnection(model1), data = jags_data, n.chains = n_chains, :
RUNTIME ERROR:
Compilation error on line 8.
Dimension mismatch taking subset of y
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
As suggested by #user20650 the issue is that you are indexing y as vector and your functions are generating as a matrix. Try this code with a slight change in gen_data():
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
y <- as.vector(y)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
Output:
beta_est
[[1]]
beta[1] beta[2]
-0.006031984 0.692007301
You can also try y <- y[,1,drop=T] in the same function instead of as.vector()

How could I solve Dimension mismatch in Jags model.?

I'm super new in bayesian analysis and I'm trying to practice with an example for Classic Capture-recapture models: Mh2
This is my code
nind <- dim(venados)[1]
K <- 43
ntraps <- 13
M <- 150
nz <- M - nind
Yaug <- array(0, dim = c(M, ntraps, K))
Yaug[1:nind,,] <- venados
y <- apply(Yaug, c(1,3), sum)
y[y > 1] <- 1
Bundle data
data1 <- list(y = y, nz = nz, nind = nind, K = K, sup = Buffer)
# Model JAGS
sink("Mh2_jags.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
y[i] ~ dbin(mu[i],K)
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()
# Inicial values
inits <- function(){list(z = as.numeric(y >= 1), psi = 0.6, p0 = runif(1), sigmap = runif(1, 0.7, 1.2), lp = rnorm(M, -0.2))}
params1 <- c("p0","sigmap","psi","N","D")
# MCMC
ni <- 10000; nt <- 1; nb <- 1000; nc <- 3
# JAGS and posteriors
fM2 <- jags(data1, inits, params1, "Mh2_jags.txt", n.chains = nc, n.thin = nt, n.iter = ni, n.burnin = nb)
I received this error message
Processing function input.......
Done.
Compiling model graph
Resolving undeclared variables
Deleting model
Error in jags.model(file = model.file, data = data, inits = inits, n.chains = n.chains, :
RUNTIME ERROR:
Compilation error on line 16.
Dimension mismatch in subset expression of y
I have read that some letters as s and n have to be changed. However,
I do not know what to do. Please if you could give an advice.
Thank you very much
The issue is because y is two dimensional but the model assumes it is one dimensional. If you are assuming that the secondary surveys are i.i.d. Bernoulli trials (and each session had K trials)n then you would just need to take the sum of the rows of the y matrix. Assuming this is the case then you just need to modify a couple lines at the top of this script.
nind <- dim(venados)[1]
K <- 43
ntraps <- 13
M <- 150
nz <- M - nind
Yaug <- array(0, dim = c(M, ntraps, K))
Yaug[1:nind,,] <- venados
y <- apply(Yaug, c(1,3), sum)
y[y > 1] <- 1
# Take the rowSum
y_vector <- rowSums(y)
# Use y_vector instead of y
data1 <- list(y = y_vector, nz = nz, nind = nind, K = K, sup = Buffer)
Conversely, if you wanted to include covariates for the observational process (and those covariates vary by survey) you would use the matrix y and modify the model.
sink("Mh2_jags_Kloop.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
# Loop over K surveys
for(j in 1:K){
y[i,j] ~ dbern(p[i]*z[i])
}
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()
Finally, you don't specify what mu is within the model. I think you want it to be p, but you also need to link the latent state model to the observational state model (if z=0 then that individual cannot be sampled. In this case you would interpret psi as the probability that nind+nz individuals are at your site.
# Model JAGS
sink("Mh2_jags.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
y[i] ~ dbin(p[i] * z[i],K)
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()

Estimating logsitic parameters and random effects with nlme

I have managed to fit logistic curves to fit growth models for 129 fish belonging to 3 groups. Unfortunately the parameters I got were not consistent and very often the models I tried have crashed. Therefore I've simulated a data set on which I've tried to fit these parameters and to add a random effect to handle the individual vaiability. I must have missed something with nlme as I was either able to get consistent coefficients or consitent variance estimation but not the two.
set.seed(100)
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469,
68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426,
32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
)))
# one curve for each individual
nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))
for (i in 1:sum(nl)) {
auxi <- tab$Individu %in% i
sec <- unique(tab$group[auxi])
Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)
Once I got this data set I tried the following model :
# Initialising coefficients
cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- aggregate(. ~ fac, cbind(cfs, fac = rep(levels(tab$group), nl)), mean)
debt <- lapply(cfs[-1], function(x) c(x[1], x[-1]-x[1]))
debt <- unlist(debt)
# control arguments
lmc <- lmeControl(1e3, 1e3, niterEM=200, msMaxEval = 1e3)
# logistic model for each group
nlme(Li ~ Asym/(1+exp((xmid-Time)/scal)), data = tab,
fixed = Asym + xmid + scal ~ group,
random = Asym + xmid + scal ~ 1|Individual ,
start = debt,
control = lmc)
And I got the following message : "Error in nlme.formula(Li ~ Asym/(1 + exp((xmid - Time)/scal)), data = tab, :
step halving factor reduced below minimum in PNLS step"
I have tried many different formulas and I was not able to get coefficients and random effects estimations.
Regards,
Maxime
Well I didin't find a satisfying answer to this issue. I've tried ADMB, but I've encountered different issues, either I was not able to code this model or I was not able to compile the .ptl file.
I finally used jags to dot it, with the library R2jags.
I hope it could be useful for someone else :
# the code of the bayesian model stored in the file "growth.txt"
model {
for (i in 1:K) {
for (j in 1:n) {
Y[j, i] ~ dnorm(eta[j, i], tauC)
eta[j, i] <- phi1[i] / (1 + exp(-(x[j]-phi2[i])/phi3[i]))
}
## random effect of iˆth tree
phi1[i] <- mu1 + a2*gr2[i] + a3*gr3[i] + a[i]
a[i] ~ dnorm(0, tau1)
phi2[i] <- mu2 + b2*gr2[i] + b3*gr3[i] + b[i]
b[i] ~ dnorm(0, tau2)
phi3[i] <- mu3 + c2*gr2[i] + c3*gr3[i] + c[i]
c[i] ~ dnorm(0, tau3)
}
## priors
tauC ~ dgamma(1.0E-3, 1.0E-3)
logSigma <- -0.5*log(tauC)
logSigmaA <- -0.5*log(tau1)
logSigmaB <- -0.5*log(tau2)
logSigmaC <- -0.5*log(tau3)
mu1 ~ dnorm(0, 1.0E-4)
mu2 ~ dnorm(0, 1.0E-4)
mu3 ~ dnorm(0, 1.0E-4)
a2 ~ dnorm(0, 1.0E-4)
a3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
b2 ~ dnorm(0, 1.0E-4)
b3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
tau1 ~ dgamma(1.0E-3, 1.0E-3)
tau2 ~ dgamma(1.0E-3, 1.0E-3)
tau3 ~ dgamma(1.0E-3, 1.0E-3)
}
And the associated R lines :
#
library(tidyr)
tabw <- spread(tab[-4], Individual, Li,-2, drop = TRUE)
x <- tabw[,1] # Time
# each Individual belong to one of the three groups
grs <- unique(tab[c(1,4)])
grs <- grs$group[match(colnames(tabw)[-1], grs$Individual)]
# dummy variable
gr2 <- (grs %in% "gr2")*1
gr3 <- (grs %in% "gr3")*1
BUGSData<-list(n = length(x), K = ncol(tabw)-1, x = tabw[,1], Y = tabw[,-1], gr2 = gr2, gr3 = gr3)
cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- cbind(cfs, gr = grs) %>% group_by(gr) %>% summarise_all(funs(mean, sd))
cfs <- cfs %>% mutate(Asym_mean = Asym_mean-Asym_mean[1]*0^((1:n())==1),
xmid_mean = xmid_mean-xmid_mean[1]*0^((1:n())==1),
scal_mean = scal_mean-scal_mean[1]*0^((1:n())==1))
debt <- c(unlist(cfs[2:4]), cfs %>% select(ends_with("sd")) %>% colMeans())
names(debt) <- c("mu1", "a2", "a3", "mu2", "b2", "b3", "mu3", "c2", "c3", "tau1", "tau2", "tau3")
debt <- as.list(debt)
set.seed(1001) ## set RNG seed for R
inits<-c(debt, tauC = 0.1,
.RNG.name="base::Wichmann-Hill", ## set RNG seed/type for JAGS
.RNG.seed=round(runif(1)*1000000))
tfit_jags <- jags(model="growth.txt",
data=BUGSData,
parameters.to.save= c(names(debt),
"logSigma", "logSigmaA", "logSigmaB", "logSigmaC",
"phi1", "phi2", "phi3"),
n.chains=1,
inits=list(inits),
progress.bar="none",
n.iter = 2e3, # 1e6
n.burnin = 1e3 # 1e5,
) # n.thin = 1e3
Finally, I dit it also with ADMB and the library R2 admb with the following code.
This code was freely adapted from the Orange example that can be found here :
https://github.com/admb-project/admb-examples/tree/master/growth-models/orange-trees
The code for growth6.tpl file :
DATA_SECTION
init_int n // Number of data points
init_vector y(1,n) // Response vector
init_vector t(1,n) // Primary covariate
init_int M // Number of groups
init_vector ngroup(1,M) // Group indicator
init_int m // Number of parameters in nonlinear regression model
init_vector gr2(1,M) // dummy variable for being in group 2
init_vector gr3(1,M) // dummy variable for being in group 3
PARAMETER_SECTION
init_bounded_vector beta(1,3,-40,40,1) // Fixed effects parameters
init_bounded_number log_sigma(-5,5.0,1) // log(residual variance)
init_bounded_number log_sigma_u(-10,5,2) // 0.5*log(variance component)
init_bounded_number log_sigma_v(-10,5,3) // 0.5*log(variance component)
init_bounded_number log_sigma_w(-10,5,4) // 0.5*log(variance component)
init_bounded_vector beta2(1,3,-40,40,1) // Fixed effects for group 2
init_bounded_vector beta3(1,3,-40,40,1) // Fixed effects for group 3
random_effects_vector u(1,M,2) // Unscaled random effects
random_effects_vector v(1,M,3)
random_effects_vector w(1,M,3)
objective_function_value g
PRELIMINARY_CALCS_SECTION
cout << setprecision(4); //
GLOBALS_SECTION
#include <df1b2fun.h>
//#include <fvar.hpp>
PROCEDURE_SECTION
int i,ii,iii;
g = 0.0;
ii = 0;
iii = 0;
for(i=1;i<=(int) M;i++) // loop on individuals
{
fit_individual_tree(beta(1),beta(2),beta(3),beta2(1),beta2(2),beta2(3),beta3(1),beta3(2),beta3(3),u(i),v(i),w(i),i,ii,iii,log_sigma,log_sigma_u,log_sigma_v,log_sigma_w);
}
SEPARABLE_FUNCTION void fit_individual_tree(const dvariable& beta1,const dvariable& beta2,const dvariable& beta3,const dvariable& a1,const dvariable& a2,const dvariable& a3,const dvariable& b1,const dvariable& b2,const dvariable& b3,const dvariable& u1,const dvariable& v1,const dvariable& w1,int i,int& ii,int& iii,const dvariable& log_sigma,const dvariable& log_sigma_u,const dvariable& log_sigma_v,const dvariable& log_sigma_w)
int j;
int g1;
int g2;
int g3;
iii++;
dvar_vector a(1,3); // Basic model function parameters
g2 = gr2(iii);
g3 = gr3(iii);
g1 = 1-g2-g3;
a(1) = 62.26 + beta1*g1 + a1*g2 + b1*g3 + u1;
a(2) = 72.90 + beta2*g1 + a2*g2 + b2*g3 + v1;
a(3) = 31.35 + beta3*g1 + a3*g2 + b3*g3 + w1;
dvariable tmp, f;
dvariable sigma = mfexp(log_sigma);
// Random effects contribution
g -= -(log_sigma_u);
g -= -.5*(square(u1/mfexp(log_sigma_u)));
g -= -(log_sigma_v);
g -= -.5*(square(v1/mfexp(log_sigma_v)));
g -= -(log_sigma_w);
g -= -.5*(square(w1/mfexp(log_sigma_w)));
for(j=1;j<=ngroup(i);j++)
{
g -= -log_sigma;
ii++;
f = a(1)/(1+mfexp(-(t(ii)-a(2))/a(3)));
tmp = y(ii) - f;
tmp /= sigma;
g -= -0.5*tmp*tmp;
}
REPORT_SECTION
//report << beta0+beta << endl;
report << exp(log_sigma) << endl;
report << exp(log_sigma_u) << endl;
TOP_OF_MAIN_SECTION
arrmblsize = 40000000L;
gradient_structure::set_GRADSTACK_BUFFER_SIZE(300000000);
gradient_structure::set_CMPDIF_BUFFER_SIZE(20000000);
gradient_structure::set_MAX_NVAR_OFFSET(1000000);
Then the R code to estimate parameters :
library(dplyr)
library(tidyr)
library(nlme)
library(R2admb)
set.seed(100)
# coefficients for each group
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469,
68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426,
32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
)))
nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))
for (i in 1:sum(nl)) {
auxi <- tab$Individu %in% i
sec <- unique(tab$group[auxi])
Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)
grs <- unique(tab[c("Individual", "group")])
gr2 <- as.integer((grs$group == "gr2")*1)
gr3 <- as.integer((grs$group == "gr3")*1)
do_admb("growth6",
data =
list(n = nrow(tab), y = tab$Li, t = tab$Time, M = 129, ngroup = rep(130, 129), m=3,
gr2 = gr2, gr3 = gr3),
params =
list(beta = rep(0, 3),
log_sigma = 1, log_sigma_u = 1, log_sigma_v = 1, log_sigma_w = 1,
beta2 = rep(0, 3), beta3 = rep(0, 3),
u = rep(0, 129), v = rep(0, 129), w = rep(0, 129)),
run.opts = run.control(clean_files = "none")
)
ted <- read_admb("growth6")
cfe <- matrix(coef(ted)[grep("beta", names(coef(ted)))]+c(62.26, 72.90, 31.35), 3)
rownames(cfe) <- sprintf("phi%d", 1:3)
colnames(cfe) <- sprintf("gr%d", 1:3)
# we can compare with
coef(nlsList(Li ~ SSlogis(Time, phi1, phi2, phi3)|group, tab))
I hope this could help someone-else.
Max

R: using bootstrap prediction on mixed model

library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
theta.fit = function(x, y){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = x,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, x){
(fit$fitted)[,1]
}
sq.err <- function(y,yhat) { (y-yhat)^2}
results <- bootpred(x,y,20,theta.fit,theta.predict,
err.meas=sq.err)
I am using the bootpred function to obtain estimates of prediction error. However, when I run the last line, I get the following error:
Error in model.frame.default(formula = ~height + age, data = c(" 4.51", :
'data' must be a data.frame, not a matrix or an array
I then tried x = data.frame(x) but that did not solve my problem.
The problem comes about because the example dataset used is a groupedData:
library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
class(x)
[1] "nfnGroupedData" "nfGroupedData" "groupedData" "data.frame"
And inside the bootpred function, it is converted into a matrix again. It can be quite a mess converting back and forth, especially when you need the factor column for linear mixed models.
What you can do write theta.fit and theta.predict to take in a data.frame:
theta.fit = function(df){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = df,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, df){
predict(fit,df)
}
sq.err <- function(y,yhat) { (y-yhat)^2}
And now alter the bootpred function and use df, I guess you can provide y again, or specific the column to use in the data.frame:
bootpred_df = function (df,y,nboot, theta.fit, theta.predict, err.meas, ...)
{
call <- match.call()
n <- length(y)
saveii <- NULL
fit0 <- theta.fit(df, ...)
yhat0 <- theta.predict(fit0, df)
app.err <- mean(err.meas(y, yhat0))
err1 <- matrix(0, nrow = nboot, ncol = n)
err2 <- rep(0, nboot)
for (b in 1:nboot) {
ii <- sample(1:n, replace = TRUE)
saveii <- cbind(saveii, ii)
fit <- theta.fit(df[ii, ], ...)
yhat1 <- theta.predict(fit, df[ii, ])
yhat2 <- theta.predict(fit, df)
err1[b, ] <- err.meas(y, yhat2)
err2[b] <- mean(err.meas(y[ii], yhat1))
}
optim <- mean(apply(err1, 1, mean,na.rm=TRUE) - err2)
junk <- function(x, i) {
sum(x == i)
}
e0 <- 0
for (i in 1:n) {
o <- apply(saveii, 2, junk, i)
if (sum(o == 0) == 0)
cat("increase nboot for computation of the .632 estimator",
fill = TRUE)
e0 <- e0 + (1/n) * sum(err1[o == 0, i])/sum(o == 0)
}
err.632 <- 0.368 * app.err + 0.632 * e0
return(list(app.err, optim, err.632, call = call))
}
We can run it now.. but because of the nature of this data, there will be instances where the group (Seed) has an uneven distribution making some of the variables hard to estimate.. Most likely this problem might be better addressed by refining the code. In any case, if you are lucky it works like below:
bootpred_df(Loblolly,Loblolly$height,20,theta.fit,theta.predict,err.meas=sq.err)
[[1]]
[1] 0.4337236
[[2]]
[1] 0.1777644
[[3]]
[1] 0.6532417
$call
bootpred_df(df = Loblolly, y = Loblolly$height, nboot = 20, theta.fit = theta.fit,
theta.predict = theta.predict, err.meas = sq.err)

Package dglm in R

I am trying to fit a double glm in R using the dglm package. This is used in combination with the statmod package to use the tweedie model. A reproduction of the problem is:
library(dglm)
library(statmod)
p <- 1.5
y <- runif(10)
x <- runif(10)
dglm(y~x,~x,family=tweedie(link.power=0, var.power=p))
#doesnt work
dglm(y~x,~x,family=tweedie(link.power=0, var.power=1.5))
#works
var.power needs to be defined in a variable, since I want to use a loop where dglm runs on every entry of it
So, you can fix the problem by forcing dglm to evaluate the call where you input p. In the dglm function, on about line 73:
if (family$family == "Tweedie") {
tweedie.p <- call$family$var.power
}
should be:
if (family$family == "Tweedie") {
tweedie.p <- eval(call$family$var.power)
}
You can make your own function with the patch like this:
dglm.nograpes <- function (formula = formula(data), dformula = ~1, family = gaussian,
dlink = "log", data = sys.parent(), subset = NULL, weights = NULL,
contrasts = NULL, method = "ml", mustart = NULL, betastart = NULL,
etastart = NULL, phistart = NULL, control = dglm.control(...),
ykeep = TRUE, xkeep = FALSE, zkeep = FALSE, ...)
{
call <- match.call()
if (is.character(family))
family <- get(family, mode = "function", envir = parent.frame())
if (is.function(family))
family <- family()
if (is.null(family$family)) {
print(family)
stop("'family' not recognized")
}
mnames <- c("", "formula", "data", "weights", "subset")
cnames <- names(call)
cnames <- cnames[match(mnames, cnames, 0)]
mcall <- call[cnames]
mcall[[1]] <- as.name("model.frame")
mframe <<- eval(mcall, sys.parent())
mf <- match.call(expand.dots = FALSE)
y <- model.response(mframe, "numeric")
if (is.null(dim(y))) {
N <- length(y)
}
else {
N <- dim(y)[1]
}
nobs <- N
mterms <- attr(mframe, "terms")
X <- model.matrix(mterms, mframe, contrasts)
weights <- model.weights(mframe)
if (is.null(weights))
weights <- rep(1, N)
if (is.null(weights))
weights <- rep(1, N)
if (!is.null(weights) && any(weights < 0)) {
stop("negative weights not allowed")
}
offset <- model.offset(mframe)
if (is.null(offset))
offset <- rep(0, N)
if (!is.null(offset) && length(offset) != NROW(y)) {
stop(gettextf("number of offsets is %d should equal %d (number of observations)",
length(offset), NROW(y)), domain = NA)
}
mcall$formula <- formula
mcall$formula[3] <- switch(match(length(dformula), c(0, 2,
3)), 1, dformula[2], dformula[3])
mframe <- eval(mcall, sys.parent())
dterms <- attr(mframe, "terms")
Z <- model.matrix(dterms, mframe, contrasts)
doffset <- model.extract(mframe, offset)
if (is.null(doffset))
doffset <- rep(0, N)
name.dlink <- substitute(dlink)
if (is.name(name.dlink)) {
if (is.character(dlink)) {
name.dlink <- dlink
}
else {
dlink <- name.dlink <- as.character(name.dlink)
}
}
else {
if (is.call(name.dlink))
name.dlink <- deparse(name.dlink)
}
if (!is.null(name.dlink))
name.dlink <- name.dlink
if (family$family == "Tweedie") {
tweedie.p <- eval(call$family$var.power)
}
Digamma <- family$family == "Gamma" || (family$family ==
"Tweedie" && tweedie.p == 2)
if (Digamma) {
linkinv <- make.link(name.dlink)$linkinv
linkfun <- make.link(name.dlink)$linkfun
mu.eta <- make.link(name.dlink)$mu.eta
valid.eta <- make.link(name.dlink)$valid.eta
init <- expression({
if (any(y <= 0)) {
print(y)
print(any(y <= 0))
stop("non-positive values not allowed for the DM gamma family")
}
n <- rep.int(1, nobs)
mustart <- y
})
dfamily <- structure(list(family = "Digamma", variance = varfun.digamma,
dev.resids = function(y, mu, wt) {
wt * unitdeviance.digamma(y, mu)
}, aic = function(y, n, mu, wt, dev) NA, link = name.dlink,
linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta,
initialize = init, validmu = function(mu) {
all(mu > 0)
}, valideta = valid.eta))
}
else {
eval(substitute(dfamily <- Gamma(link = lk), list(lk = name.dlink)))
}
dlink <- as.character(dfamily$link)
logdlink <- dlink == "log"
if (!is.null(call$method)) {
name.method <- substitute(method)
if (!is.character(name.method))
name.method <- deparse(name.method)
list.methods <- c("ml", "reml", "ML", "REML", "Ml", "Reml")
i.method <- pmatch(method, list.methods, nomatch = 0)
if (!i.method)
stop("Method must be ml or reml")
method <- switch(i.method, "ml", "reml", "ml", "reml",
"ml", "reml")
}
reml <- method == "reml"
if (is.null(mustart)) {
etastart <- NULL
eval(family$initialize)
mu <- mustart
mustart <- NULL
}
if (!is.null(betastart)) {
eta <- X %*% betastart
mu <- family$linkinv(eta + offset)
}
else {
if (!is.null(mustart)) {
mu <- mustart
eta <- family$linkfun(mu) - offset
}
else {
eta <- lm.fit(X, family$linkfun(mu) - offset, singular.ok = TRUE)$fitted.values
mu <- family$linkinv(eta + offset)
}
}
d <- family$dev.resids(y, mu, weights)
if (!is.null(phistart)) {
phi <- phistart
deta <- dfamily$linkfun(phi) - doffset
}
else {
deta <- lm.fit(Z, dfamily$linkfun(d + (d == 0)/6) - doffset,
singular.ok = TRUE)$fitted.values
if (logdlink)
deta <- deta + 1.27036
phi <- dfamily$linkinv(deta + offset)
}
if (any(phi <= 0)) {
cat("Some values for phi are non-positive, suggesting an inappropriate model",
"Try a different link function.\n")
}
zm <- as.vector(eta + (y - mu)/family$mu.eta(eta))
wm <- as.vector(eval(family$variance(mu)) * weights/phi)
mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
eta <- mfit$fitted.values
mu <- family$linkinv(eta + offset)
cat("family:", family$family, "\n")
if (family$family == "Tweedie") {
cat("p:", tweedie.p, "\n")
if ((tweedie.p > 0) & (any(mu < 0))) {
cat("Some values for mu are negative, suggesting an inappropriate model.",
"Try a different link function.\n")
}
}
d <- family$dev.resids(y, mu, weights)
const <- dglm.constant(y, family, weights)
if (Digamma) {
h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) *
weights/phi)
}
else {
h <- log(phi/weights)
}
m2loglik <- const + sum(h + d/phi)
if (reml)
m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
m2loglikold <- m2loglik + 1
epsilon <- control$epsilon
maxit <- control$maxit
trace <- control$trace
iter <- 0
while (abs(m2loglikold - m2loglik)/(abs(m2loglikold) + 1) >
epsilon && iter < maxit) {
hdot <- 1/dfamily$mu.eta(deta)
if (Digamma) {
delta <- 2 * weights * (log(weights/phi) - digamma(weights/phi))
u <- 2 * weights^2 * (trigamma(weights/phi) - phi/weights)
fdot <- phi^2/u * hdot
}
else {
delta <- phi
u <- phi^2
fdot <- hdot
}
wd <- 1/(fdot^2 * u)
if (reml) {
h <- hat(mfit$qr)
delta <- delta - phi * h
wd <- wd - 2 * (h/hdot^2/phi^2) + h^2
}
if (any(wd < 0)) {
cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n")
wd[wd < 0] <- 0
}
if (any(is.infinite(wd))) {
cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n")
wd[is.infinite(wd)] <- 100
}
zd <- deta + (d - delta) * fdot
dfit <- lm.wfit(Z, zd, wd, method = "qr", singular.ok = TRUE)
deta <- dfit$fitted.values
phi <- dfamily$linkinv(deta + doffset)
if (any(is.infinite(phi))) {
cat("*** Some values for phi are infinite, suggesting an inappropriate model",
"Try a different link function. Making an attempt to continue...\n")
phi[is.infinite(phi)] <- 10
}
zm <- eta + (y - mu)/family$mu.eta(eta)
fam.wt <- expression(weights * family$variance(mu))
wm <- eval(fam.wt)/phi
mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
eta <- mfit$fitted.values
mu <- family$linkinv(eta + offset)
if (family$family == "Tweedie") {
if ((tweedie.p > 0) & (any(mu < 0))) {
cat("*** Some values for mu are negative, suggesting an inappropriate model.",
"Try a different link function. Making an attempt to continue...\n")
mu[mu <= 0] <- 1
}
}
d <- family$dev.resids(y, mu, weights)
m2loglikold <- m2loglik
if (Digamma) {
h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) *
weights/phi)
}
else {
h <- log(phi/weights)
}
m2loglik <- const + sum(h + d/phi)
if (reml) {
m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
}
iter <- iter + 1
if (trace)
cat("DGLM iteration ", iter, ": -2*log-likelihood = ",
format(round(m2loglik, 4)), " \n", sep = "")
}
mfit$formula <- call$formula
mfit$call <- call
mfit$family <- family
mfit$linear.predictors <- mfit$fitted.values + offset
mfit$fitted.values <- mu
mfit$prior.weights <- weights
mfit$terms <- mterms
mfit$contrasts <- attr(X, "contrasts")
intercept <- attr(mterms, "intercept")
mfit$df.null <- N - sum(weights == 0) - as.integer(intercept)
mfit$call <- call
mfit$deviance <- sum(d/phi)
mfit$aic <- NA
mfit$null.deviance <- glm.fit(x = X, y = y, weights = weights/phi,
offset = offset, family = family)
if (length(mfit$null.deviance) > 1)
mfit$null.deviance <- mfit$null.deviance$null.deviance
if (ykeep)
mfit$y <- y
if (xkeep)
mfit$x <- X
class(mfit) <- c("glm", "lm")
dfit$family <- dfamily
dfit$prior.weights <- rep(1, N)
dfit$linear.predictors <- dfit$fitted.values + doffset
dfit$fitted.values <- phi
dfit$terms <- dterms
dfit$aic <- NA
call$formula <- call$dformula
call$dformula <- NULL
call$family <- call(dfamily$family, link = name.dlink)
dfit$call <- call
dfit$residuals <- dfamily$dev.resid(d, phi, wt = rep(1/2,
N))
dfit$deviance <- sum(dfit$residuals)
dfit$null.deviance <- glm.fit(x = Z, y = d, weights = rep(1/2,
N), offset = doffset, family = dfamily)
if (length(dfit$null.deviance) > 1)
dfit$null.deviance <- dfit$null.deviance$null.deviance
if (ykeep)
dfit$y <- d
if (zkeep)
dfit$z <- Z
dfit$formula <- as.vector(attr(dterms, "formula"))
dfit$iter <- iter
class(dfit) <- c("glm", "lm")
out <- c(mfit, list(dispersion.fit = dfit, iter = iter, method = method,
m2loglik = m2loglik))
class(out) <- c("dglm", "glm", "lm")
out
}
And then run it like this:
dglm.nograpes(y~x,~x,family=tweedie(link.power=0, var.power=p))

Resources