Estimating logsitic parameters and random effects with nlme - r
I have managed to fit logistic curves to fit growth models for 129 fish belonging to 3 groups. Unfortunately the parameters I got were not consistent and very often the models I tried have crashed. Therefore I've simulated a data set on which I've tried to fit these parameters and to add a random effect to handle the individual vaiability. I must have missed something with nlme as I was either able to get consistent coefficients or consitent variance estimation but not the two.
set.seed(100)
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469,
68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426,
32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
)))
# one curve for each individual
nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))
for (i in 1:sum(nl)) {
auxi <- tab$Individu %in% i
sec <- unique(tab$group[auxi])
Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)
Once I got this data set I tried the following model :
# Initialising coefficients
cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- aggregate(. ~ fac, cbind(cfs, fac = rep(levels(tab$group), nl)), mean)
debt <- lapply(cfs[-1], function(x) c(x[1], x[-1]-x[1]))
debt <- unlist(debt)
# control arguments
lmc <- lmeControl(1e3, 1e3, niterEM=200, msMaxEval = 1e3)
# logistic model for each group
nlme(Li ~ Asym/(1+exp((xmid-Time)/scal)), data = tab,
fixed = Asym + xmid + scal ~ group,
random = Asym + xmid + scal ~ 1|Individual ,
start = debt,
control = lmc)
And I got the following message : "Error in nlme.formula(Li ~ Asym/(1 + exp((xmid - Time)/scal)), data = tab, :
step halving factor reduced below minimum in PNLS step"
I have tried many different formulas and I was not able to get coefficients and random effects estimations.
Regards,
Maxime
Well I didin't find a satisfying answer to this issue. I've tried ADMB, but I've encountered different issues, either I was not able to code this model or I was not able to compile the .ptl file.
I finally used jags to dot it, with the library R2jags.
I hope it could be useful for someone else :
# the code of the bayesian model stored in the file "growth.txt"
model {
for (i in 1:K) {
for (j in 1:n) {
Y[j, i] ~ dnorm(eta[j, i], tauC)
eta[j, i] <- phi1[i] / (1 + exp(-(x[j]-phi2[i])/phi3[i]))
}
## random effect of iˆth tree
phi1[i] <- mu1 + a2*gr2[i] + a3*gr3[i] + a[i]
a[i] ~ dnorm(0, tau1)
phi2[i] <- mu2 + b2*gr2[i] + b3*gr3[i] + b[i]
b[i] ~ dnorm(0, tau2)
phi3[i] <- mu3 + c2*gr2[i] + c3*gr3[i] + c[i]
c[i] ~ dnorm(0, tau3)
}
## priors
tauC ~ dgamma(1.0E-3, 1.0E-3)
logSigma <- -0.5*log(tauC)
logSigmaA <- -0.5*log(tau1)
logSigmaB <- -0.5*log(tau2)
logSigmaC <- -0.5*log(tau3)
mu1 ~ dnorm(0, 1.0E-4)
mu2 ~ dnorm(0, 1.0E-4)
mu3 ~ dnorm(0, 1.0E-4)
a2 ~ dnorm(0, 1.0E-4)
a3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
b2 ~ dnorm(0, 1.0E-4)
b3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
tau1 ~ dgamma(1.0E-3, 1.0E-3)
tau2 ~ dgamma(1.0E-3, 1.0E-3)
tau3 ~ dgamma(1.0E-3, 1.0E-3)
}
And the associated R lines :
#
library(tidyr)
tabw <- spread(tab[-4], Individual, Li,-2, drop = TRUE)
x <- tabw[,1] # Time
# each Individual belong to one of the three groups
grs <- unique(tab[c(1,4)])
grs <- grs$group[match(colnames(tabw)[-1], grs$Individual)]
# dummy variable
gr2 <- (grs %in% "gr2")*1
gr3 <- (grs %in% "gr3")*1
BUGSData<-list(n = length(x), K = ncol(tabw)-1, x = tabw[,1], Y = tabw[,-1], gr2 = gr2, gr3 = gr3)
cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- cbind(cfs, gr = grs) %>% group_by(gr) %>% summarise_all(funs(mean, sd))
cfs <- cfs %>% mutate(Asym_mean = Asym_mean-Asym_mean[1]*0^((1:n())==1),
xmid_mean = xmid_mean-xmid_mean[1]*0^((1:n())==1),
scal_mean = scal_mean-scal_mean[1]*0^((1:n())==1))
debt <- c(unlist(cfs[2:4]), cfs %>% select(ends_with("sd")) %>% colMeans())
names(debt) <- c("mu1", "a2", "a3", "mu2", "b2", "b3", "mu3", "c2", "c3", "tau1", "tau2", "tau3")
debt <- as.list(debt)
set.seed(1001) ## set RNG seed for R
inits<-c(debt, tauC = 0.1,
.RNG.name="base::Wichmann-Hill", ## set RNG seed/type for JAGS
.RNG.seed=round(runif(1)*1000000))
tfit_jags <- jags(model="growth.txt",
data=BUGSData,
parameters.to.save= c(names(debt),
"logSigma", "logSigmaA", "logSigmaB", "logSigmaC",
"phi1", "phi2", "phi3"),
n.chains=1,
inits=list(inits),
progress.bar="none",
n.iter = 2e3, # 1e6
n.burnin = 1e3 # 1e5,
) # n.thin = 1e3
Finally, I dit it also with ADMB and the library R2 admb with the following code.
This code was freely adapted from the Orange example that can be found here :
https://github.com/admb-project/admb-examples/tree/master/growth-models/orange-trees
The code for growth6.tpl file :
DATA_SECTION
init_int n // Number of data points
init_vector y(1,n) // Response vector
init_vector t(1,n) // Primary covariate
init_int M // Number of groups
init_vector ngroup(1,M) // Group indicator
init_int m // Number of parameters in nonlinear regression model
init_vector gr2(1,M) // dummy variable for being in group 2
init_vector gr3(1,M) // dummy variable for being in group 3
PARAMETER_SECTION
init_bounded_vector beta(1,3,-40,40,1) // Fixed effects parameters
init_bounded_number log_sigma(-5,5.0,1) // log(residual variance)
init_bounded_number log_sigma_u(-10,5,2) // 0.5*log(variance component)
init_bounded_number log_sigma_v(-10,5,3) // 0.5*log(variance component)
init_bounded_number log_sigma_w(-10,5,4) // 0.5*log(variance component)
init_bounded_vector beta2(1,3,-40,40,1) // Fixed effects for group 2
init_bounded_vector beta3(1,3,-40,40,1) // Fixed effects for group 3
random_effects_vector u(1,M,2) // Unscaled random effects
random_effects_vector v(1,M,3)
random_effects_vector w(1,M,3)
objective_function_value g
PRELIMINARY_CALCS_SECTION
cout << setprecision(4); //
GLOBALS_SECTION
#include <df1b2fun.h>
//#include <fvar.hpp>
PROCEDURE_SECTION
int i,ii,iii;
g = 0.0;
ii = 0;
iii = 0;
for(i=1;i<=(int) M;i++) // loop on individuals
{
fit_individual_tree(beta(1),beta(2),beta(3),beta2(1),beta2(2),beta2(3),beta3(1),beta3(2),beta3(3),u(i),v(i),w(i),i,ii,iii,log_sigma,log_sigma_u,log_sigma_v,log_sigma_w);
}
SEPARABLE_FUNCTION void fit_individual_tree(const dvariable& beta1,const dvariable& beta2,const dvariable& beta3,const dvariable& a1,const dvariable& a2,const dvariable& a3,const dvariable& b1,const dvariable& b2,const dvariable& b3,const dvariable& u1,const dvariable& v1,const dvariable& w1,int i,int& ii,int& iii,const dvariable& log_sigma,const dvariable& log_sigma_u,const dvariable& log_sigma_v,const dvariable& log_sigma_w)
int j;
int g1;
int g2;
int g3;
iii++;
dvar_vector a(1,3); // Basic model function parameters
g2 = gr2(iii);
g3 = gr3(iii);
g1 = 1-g2-g3;
a(1) = 62.26 + beta1*g1 + a1*g2 + b1*g3 + u1;
a(2) = 72.90 + beta2*g1 + a2*g2 + b2*g3 + v1;
a(3) = 31.35 + beta3*g1 + a3*g2 + b3*g3 + w1;
dvariable tmp, f;
dvariable sigma = mfexp(log_sigma);
// Random effects contribution
g -= -(log_sigma_u);
g -= -.5*(square(u1/mfexp(log_sigma_u)));
g -= -(log_sigma_v);
g -= -.5*(square(v1/mfexp(log_sigma_v)));
g -= -(log_sigma_w);
g -= -.5*(square(w1/mfexp(log_sigma_w)));
for(j=1;j<=ngroup(i);j++)
{
g -= -log_sigma;
ii++;
f = a(1)/(1+mfexp(-(t(ii)-a(2))/a(3)));
tmp = y(ii) - f;
tmp /= sigma;
g -= -0.5*tmp*tmp;
}
REPORT_SECTION
//report << beta0+beta << endl;
report << exp(log_sigma) << endl;
report << exp(log_sigma_u) << endl;
TOP_OF_MAIN_SECTION
arrmblsize = 40000000L;
gradient_structure::set_GRADSTACK_BUFFER_SIZE(300000000);
gradient_structure::set_CMPDIF_BUFFER_SIZE(20000000);
gradient_structure::set_MAX_NVAR_OFFSET(1000000);
Then the R code to estimate parameters :
library(dplyr)
library(tidyr)
library(nlme)
library(R2admb)
set.seed(100)
# coefficients for each group
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469,
68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426,
32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
)))
nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))
for (i in 1:sum(nl)) {
auxi <- tab$Individu %in% i
sec <- unique(tab$group[auxi])
Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)
grs <- unique(tab[c("Individual", "group")])
gr2 <- as.integer((grs$group == "gr2")*1)
gr3 <- as.integer((grs$group == "gr3")*1)
do_admb("growth6",
data =
list(n = nrow(tab), y = tab$Li, t = tab$Time, M = 129, ngroup = rep(130, 129), m=3,
gr2 = gr2, gr3 = gr3),
params =
list(beta = rep(0, 3),
log_sigma = 1, log_sigma_u = 1, log_sigma_v = 1, log_sigma_w = 1,
beta2 = rep(0, 3), beta3 = rep(0, 3),
u = rep(0, 129), v = rep(0, 129), w = rep(0, 129)),
run.opts = run.control(clean_files = "none")
)
ted <- read_admb("growth6")
cfe <- matrix(coef(ted)[grep("beta", names(coef(ted)))]+c(62.26, 72.90, 31.35), 3)
rownames(cfe) <- sprintf("phi%d", 1:3)
colnames(cfe) <- sprintf("gr%d", 1:3)
# we can compare with
coef(nlsList(Li ~ SSlogis(Time, phi1, phi2, phi3)|group, tab))
I hope this could help someone-else.
Max
Related
JAGS mixture of normals with regression for the mean of the normals
I'm working on a dataset where the target variable y is likely distributed as a mixture of normals. The data is the score difference between two teams in an fps game, and my objective is to predict which team will win. To do so I have 4 variables: abilityA: the mean of the ranks of players in team A abilityB: the mean of the ranks of players in team B mapA: the mean of the kda (kills\deaths\assists) of the players of team A in that particular map mapB: the mean of the kda (kills\deaths\assists) of the players of team B in that particular map My JAGS code is the following: # MODEL SPECIFICATION model { # Likelihood: for( i in 1 : N ) { Y[i] ~ dnorm(mu[z[i]], precision) mu[z[i]] = b0 + b1*abilityA[i] + b2*mapA[i] - (b3 + b4*abilityB[i] + b5*mapB[i]) z[i] ~ dcat( omega ) } #Priors b0 ~ dnorm(0, 0.5) b1 ~ dnorm(0, 0.5) b2 ~ dnorm(0, 0.5) b3 ~ dnorm(0, 0.5) b4 ~ dnorm(0, 0.5) b5 ~ dnorm(0, 0.5) precision ~ dgamma( 0.01 , 0.01 ) sd_n = sqrt(1/precision) omega ~ ddirch( c(1,1,1) ) } ...but I get the following error: Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, : RUNTIME ERROR: Compilation error on line 11. Unknown variable z Either supply values for this variable with the data or define it on the left hand side of a relation. Reproducible example With the following R code the error is reproducible library(readr) library(dplyr) library(MASS) library(R2jags) library(VGAM) library(mixtools) curve(dnormm(x, p = c(0.3,0.7), mu = c(-2,3), sigma = c(1.3,1.3)), from = -10, to = 10, ylab = "Mixture", lwd = 3, col = "darkgreen") data.mixture <- list( Y = rnormm(1000, p = c(0.3,0.7), mu = c(-2,3), sigma = c(1.3,1.3)), N = 1000 ) parameters <- c("sd_n") iters = 1000 dugongjags = jags(data=data.mixture, parameters.to.save=parameters, model.file="./temp.txt", n.chains=1, n.iter=iters, n.thin = 1, quiet = T) This is jags code to save in a file called temp.txt # MODEL SPECIFICATION model { # Likelihood: for( i in 1 : N ) { Y[i] ~ dnorm(mu[z[i]], precision) mu[z[i]] = b z[i] ~ dcat( omega ) } #Priors b ~ dnorm(0,0.5) precision ~ dgamma( 0.01 , 0.01 ) sd_n = sqrt(1/precision) omega ~ ddirch( c(1,1,1) ) } I referred to this site to build the model. The thing is that in the following examples the means of the normals (mu[i]) is set with the prior, but is not possible in my case because of the dependency on the data (abilityA, mapA, ...). http://doingbayesiandataanalysis.blogspot.com/2012/06/mixture-of-normal-distributions.html https://www.coursera.org/lecture/mcmc-bayesian-statistics/mixture-model-in-jags-KDEVZ Any idea?
How could I solve Dimension mismatch in Jags model.?
I'm super new in bayesian analysis and I'm trying to practice with an example for Classic Capture-recapture models: Mh2 This is my code nind <- dim(venados)[1] K <- 43 ntraps <- 13 M <- 150 nz <- M - nind Yaug <- array(0, dim = c(M, ntraps, K)) Yaug[1:nind,,] <- venados y <- apply(Yaug, c(1,3), sum) y[y > 1] <- 1 Bundle data data1 <- list(y = y, nz = nz, nind = nind, K = K, sup = Buffer) # Model JAGS sink("Mh2_jags.txt") cat(" model{ # Priors p0 ~ dunif(0,1) mup <- log(p0/(1-p0)) sigmap ~ dunif(0,10) taup <- 1/(sigmap*sigmap) psi ~ dunif(0,1) # Likelihood for (i in 1:(nind+nz)) { z[i] ~ dbern(psi) lp[i] ~ dnorm(mup,taup) logit(p[i]) <- lp[i] y[i] ~ dbin(mu[i],K) } # i N <- sum(z[1:(nind+nz)]) D <- N/sup*100 } # modelo ",fill = TRUE) sink() # Inicial values inits <- function(){list(z = as.numeric(y >= 1), psi = 0.6, p0 = runif(1), sigmap = runif(1, 0.7, 1.2), lp = rnorm(M, -0.2))} params1 <- c("p0","sigmap","psi","N","D") # MCMC ni <- 10000; nt <- 1; nb <- 1000; nc <- 3 # JAGS and posteriors fM2 <- jags(data1, inits, params1, "Mh2_jags.txt", n.chains = nc, n.thin = nt, n.iter = ni, n.burnin = nb) I received this error message Processing function input....... Done. Compiling model graph Resolving undeclared variables Deleting model Error in jags.model(file = model.file, data = data, inits = inits, n.chains = n.chains, : RUNTIME ERROR: Compilation error on line 16. Dimension mismatch in subset expression of y I have read that some letters as s and n have to be changed. However, I do not know what to do. Please if you could give an advice. Thank you very much
The issue is because y is two dimensional but the model assumes it is one dimensional. If you are assuming that the secondary surveys are i.i.d. Bernoulli trials (and each session had K trials)n then you would just need to take the sum of the rows of the y matrix. Assuming this is the case then you just need to modify a couple lines at the top of this script. nind <- dim(venados)[1] K <- 43 ntraps <- 13 M <- 150 nz <- M - nind Yaug <- array(0, dim = c(M, ntraps, K)) Yaug[1:nind,,] <- venados y <- apply(Yaug, c(1,3), sum) y[y > 1] <- 1 # Take the rowSum y_vector <- rowSums(y) # Use y_vector instead of y data1 <- list(y = y_vector, nz = nz, nind = nind, K = K, sup = Buffer) Conversely, if you wanted to include covariates for the observational process (and those covariates vary by survey) you would use the matrix y and modify the model. sink("Mh2_jags_Kloop.txt") cat(" model{ # Priors p0 ~ dunif(0,1) mup <- log(p0/(1-p0)) sigmap ~ dunif(0,10) taup <- 1/(sigmap*sigmap) psi ~ dunif(0,1) # Likelihood for (i in 1:(nind+nz)) { z[i] ~ dbern(psi) lp[i] ~ dnorm(mup,taup) logit(p[i]) <- lp[i] # Loop over K surveys for(j in 1:K){ y[i,j] ~ dbern(p[i]*z[i]) } } # i N <- sum(z[1:(nind+nz)]) D <- N/sup*100 } # modelo ",fill = TRUE) sink() Finally, you don't specify what mu is within the model. I think you want it to be p, but you also need to link the latent state model to the observational state model (if z=0 then that individual cannot be sampled. In this case you would interpret psi as the probability that nind+nz individuals are at your site. # Model JAGS sink("Mh2_jags.txt") cat(" model{ # Priors p0 ~ dunif(0,1) mup <- log(p0/(1-p0)) sigmap ~ dunif(0,10) taup <- 1/(sigmap*sigmap) psi ~ dunif(0,1) # Likelihood for (i in 1:(nind+nz)) { z[i] ~ dbern(psi) lp[i] ~ dnorm(mup,taup) logit(p[i]) <- lp[i] y[i] ~ dbin(p[i] * z[i],K) } # i N <- sum(z[1:(nind+nz)]) D <- N/sup*100 } # modelo ",fill = TRUE) sink()
Censoring in rjags - Invalid parent values
I'm having troubles reimplementing a model from winbugs on rjags. I'm getting the Invalid parent values error which is the error you get when censoring was not correctly setup, but I can't see my mistake. This is the original model on WinBugs: model { for(i in 1 : N) { times[i] ~ dweib(v, lambda[i]) T(censor[i],) lambda[i] <- exp(beta0 + beta1*type[i]) S[i] <- exp(-lambda[i]*pow(times[i],v)); f[i] <- lambda[i]*v*pow(times[i],v-1)*S[i] h[i] <- f[i]/S[i] } beta0 ~ dnorm(0.0, 0.0001) beta1 ~ dnorm(0.0, 0.0001) v ~ dexp(0.001) median0 <- pow(log(2) * exp(-beta0), 1/v) median1 <- pow(log(2) * exp(-beta0-beta1), 1/v) } Setting up a reproducible example: type <- as.factor(c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) censor <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,882,892,1031, 1033,1306,1335,0,1452,1472,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,0,0,529,0, 0,0,0,0,0,0,0,0,945,0,0,1180,0,0,1277,1397,1512,1519) times <-c (17,42,44,48,60,72,74,95,103,108,122,144,167,170,183,185,193,195,197,208,234,235,254,307,315,401, 445,464,484,528,542,567,577,580,795,855,NA,NA,NA,NA,NA,NA,1366,NA,NA,1,63,105,129,182,216,250,262, 301,301,342,354,356,358,380,NA,383,383,388,394,408,460,489,499,524,NA,535,562,675,676,748,748,778, 786,797,NA,955,968,NA,1245,1271,NA,NA,NA,NA) df <- tibble(type = type, censor = censor, time = times) %>% mutate(censor_limit = replace(censor, censor == 0, max(times, na.rm = TRUE))) %>% mutate(is_censored = ifelse(is.na(time), 1, 0)) %>% mutate(time_init = ifelse(is_censored == 1, censor_limit + 1, NA)) df$censor <- NULL head(df) And this is the rjags part: m <- textConnection("model { for(i in 1 : N) { isCensored[i] ~ dinterval(times[i], censorLimit[i]) times[i] ~ dweib(v, lambda[i]) lambda[i] <- exp(beta0 + beta1*type[i]) S[i] <- exp(-lambda[i]*pow(times[i],v)); f[i] <- lambda[i]*v*pow(times[i],v-1)*S[i] h[i] <- f[i]/S[i] } beta0 ~ dnorm(0.0, 0.0001) beta1 ~ dnorm(0.0, 0.0001) v ~ dexp(0.001) # Median survival time median0 <- pow(log(2) * exp(-beta0), 1/v) median1 <- pow(log(2) * exp(-beta0-beta1), 1/v) }") d <- list(N = nrow(df), times = df$time, type = df$type, isCensored = df$is_censored, censorLimit = df$censor_limit) inits1 = function() { inits = list(v = 1, beta0 = 0, beta1=0, times = df$time_init) } mod <- jags.model(m, data = d, inits = inits1, n.chains = 3) update(mod, 1e3) mod_sim <- coda.samples(model = mod, variable.names = c("lambda", "median0", "median1"), n.iter = 5e3) mod_csim <- as.mcmc(do.call(rbind, mod_sim)) Output: Compiling model graph Resolving undeclared variables Allocating nodes Graph information: Observed stochastic nodes: 164 Unobserved stochastic nodes: 19 Total graph size: 910 Initializing model Deleting model Error in jags.model(m, data = d, inits = inits1, n.chains = 3): Error in node h[35] Invalid parent values
How to fit a model with and without an interaction in a JAGS regression model
I'm using this tutorial to wrap my head around JAGS code. In the section 'Same model with an additional categorical predictor' it states that "This model includes an interaction between sex and body length". How can I remove this so that there's no interaction? Here's the full setup and model in R and JAGS. First the data: set.seed(42) samplesize <- 50 # Larger sample size because we're fitting a more complex model b_length <- sort(rnorm(samplesize)) # Body length sex <- sample(c(0, 1), size = samplesize, replace = T) # Sex (0: female, 1: male) int_true_f <- 30 # Intercept of females int_true_m_diff <- 5 # Difference between intercepts of males and females slope_true_f <- 10 # Slope of females slope_true_m_diff <- -3 # Difference between slopes of males and females mu <- int_true_f + sex * int_true_m_diff + (slope_true_f + sex * slope_true_m_diff) * b_length # True means sigma <- 5 # True standard deviation of normal distributions b_mass <- rnorm(samplesize, mean = mu, sd = sigma) # Body mass (response variable) # Combine into a data frame: snakes2 <- data.frame(b_length = b_length, b_mass = b_mass, sex = sex) head(snakes2) jagsdata_s2 <- with(snakes2, list(b_mass = b_mass, b_length = b_length, sex = sex, N = length(b_mass))) JAGS code: lm2_jags <- function(){ # Likelihood: for (i in 1:N){ b_mass[i] ~ dnorm(mu[i], tau) # tau is precision (1 / variance) mu[i] <- alpha[1] + sex[i] * alpha[2] + (beta[1] + beta[2] * sex[i]) * b_length[i] } # Priors: for (i in 1:2){ alpha[i] ~ dnorm(0, 0.01) beta[i] ~ dnorm(0, 0.01) } sigma ~ dunif(0, 100) tau <- 1 / (sigma * sigma) } Initial values and run: init_values <- function(){ list(alpha = rnorm(2), beta = rnorm(2), sigma = runif(1)) } params <- c("alpha", "beta", "sigma") fit_lm2 <- jags(data = jagsdata_s2, inits = init_values, parameters.to.save = params, model.file = lm2_jags, n.chains = 3, n.iter = 12000, n.burnin = 2000, n.thin = 10, DIC = F)
The interaction term is contained in your calculation of mu. The sex changes how the formula between body length and body mass is defined, via the slope terms. To build a model where sex and body length are treated as independent with respect to how they affect body mass, you could do something like this: mu <- int_true_f + (sex * int_true_m_diff) + b_length The JAGS code would then become lm2_jags <- function(){ # Likelihood: for (i in 1:N){ b_mass[i] ~ dnorm(mu[i], tau) # tau is precision (1 / variance) mu[i] <- alpha[1] + (sex[i] * alpha[2]) + (b_length[i] * alpha[3]) } # Priors: for (i in 1:3){ alpha[i] ~ dnorm(0, 0.01) } sigma ~ dunif(0, 100) tau <- 1 / (sigma * sigma) }
R: using bootstrap prediction on mixed model
library(nlme) library(bootstrap) y = Loblolly$height x = Loblolly theta.fit = function(x, y){ nlme(height ~ SSasymp(age, Asym, R0, lrc), data = x, fixed = Asym + R0 + lrc ~ 1, random = Asym ~ 1, start = c(Asym = 103, R0 = -8.5, lrc = -3.3)) } theta.predict = function(fit, x){ (fit$fitted)[,1] } sq.err <- function(y,yhat) { (y-yhat)^2} results <- bootpred(x,y,20,theta.fit,theta.predict, err.meas=sq.err) I am using the bootpred function to obtain estimates of prediction error. However, when I run the last line, I get the following error: Error in model.frame.default(formula = ~height + age, data = c(" 4.51", : 'data' must be a data.frame, not a matrix or an array I then tried x = data.frame(x) but that did not solve my problem.
The problem comes about because the example dataset used is a groupedData: library(nlme) library(bootstrap) y = Loblolly$height x = Loblolly class(x) [1] "nfnGroupedData" "nfGroupedData" "groupedData" "data.frame" And inside the bootpred function, it is converted into a matrix again. It can be quite a mess converting back and forth, especially when you need the factor column for linear mixed models. What you can do write theta.fit and theta.predict to take in a data.frame: theta.fit = function(df){ nlme(height ~ SSasymp(age, Asym, R0, lrc), data = df, fixed = Asym + R0 + lrc ~ 1, random = Asym ~ 1, start = c(Asym = 103, R0 = -8.5, lrc = -3.3)) } theta.predict = function(fit, df){ predict(fit,df) } sq.err <- function(y,yhat) { (y-yhat)^2} And now alter the bootpred function and use df, I guess you can provide y again, or specific the column to use in the data.frame: bootpred_df = function (df,y,nboot, theta.fit, theta.predict, err.meas, ...) { call <- match.call() n <- length(y) saveii <- NULL fit0 <- theta.fit(df, ...) yhat0 <- theta.predict(fit0, df) app.err <- mean(err.meas(y, yhat0)) err1 <- matrix(0, nrow = nboot, ncol = n) err2 <- rep(0, nboot) for (b in 1:nboot) { ii <- sample(1:n, replace = TRUE) saveii <- cbind(saveii, ii) fit <- theta.fit(df[ii, ], ...) yhat1 <- theta.predict(fit, df[ii, ]) yhat2 <- theta.predict(fit, df) err1[b, ] <- err.meas(y, yhat2) err2[b] <- mean(err.meas(y[ii], yhat1)) } optim <- mean(apply(err1, 1, mean,na.rm=TRUE) - err2) junk <- function(x, i) { sum(x == i) } e0 <- 0 for (i in 1:n) { o <- apply(saveii, 2, junk, i) if (sum(o == 0) == 0) cat("increase nboot for computation of the .632 estimator", fill = TRUE) e0 <- e0 + (1/n) * sum(err1[o == 0, i])/sum(o == 0) } err.632 <- 0.368 * app.err + 0.632 * e0 return(list(app.err, optim, err.632, call = call)) } We can run it now.. but because of the nature of this data, there will be instances where the group (Seed) has an uneven distribution making some of the variables hard to estimate.. Most likely this problem might be better addressed by refining the code. In any case, if you are lucky it works like below: bootpred_df(Loblolly,Loblolly$height,20,theta.fit,theta.predict,err.meas=sq.err) [[1]] [1] 0.4337236 [[2]] [1] 0.1777644 [[3]] [1] 0.6532417 $call bootpred_df(df = Loblolly, y = Loblolly$height, nboot = 20, theta.fit = theta.fit, theta.predict = theta.predict, err.meas = sq.err)