Convert multilevel jags model from wide to long format - r

I have a multi-level jags model. I'm trying to convert it from wide to long format as described here: http://jeromyanglim.tumblr.com/post/37361593128/jags-converting-multilevel-model-from-wide-to However my model is more complex than the example so I'm having some trouble making this work. To illustrate the difficulties I've made a repeatable example. This first block creates data and sets jags parameters:
library(ecodist)
library(runjags)
set.seed(10)
##### population n
n <- 250
# num outputs
num.ys <- 10
# Vector binary to indicate which domains have correlation with independent variables
corr.vec <- c(0, 0, 0, 1, 1, 0, 0, 1, 1, 1)
correlation = 0.99
# Function to simulate correlated outcome
sim.fn <- function(i, var1, sw1) {
if(sw1 ==1){
temp <- corgen(n , var1, correlation )
temp <- as.numeric(temp$y * attr(temp$y,'scaled:scale') + attr(temp$y,'scaled:center'))
} else {
temp <- rnorm(n, 0, 5)
}
return(temp)
}
##### Generate data
df0 <- data.frame(var1=rnorm(n, 15, 2))
df1 <- data.frame(df0, sapply(1:num.ys, function(i) sim.fn(i, df0$var1, corr.vec[i])))
out.names <- paste0("y_", 1:num.ys)
names(df1) <- c("var1", out.names)
### Jags parameters
parameters = c("B1O", "b1", "b1o", "nu", "sd")
adaptSteps = 1000 # Number of steps to "tune" the samplers.
burnInSteps = 10000 # Number of steps to "burn-in" the samplers.
nChains = 2 # Number of chains to run.
numSavedSteps=1000 # Total number of steps in chains to save.
thinSteps=2 # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.
Ok, so this next section is the 'wide format' jags model thats provides the correct estimates in the object mcmcChain:
modelstring = "
model {
for( i in 1 : nData ) {
for(np in 1:nVars){
y[i, np] ~ dt( mu[i,np], tau, nu)
mu[i, np] <- b0s[i] + (b1 + b1o[np]) * x1[i]
}
}
#Random effects
for(i in 1:nData){
b0s[i] ~ dnorm(0, b0stau)
}
#Outcome level
for (np in 1:nVars){
b1o[np] ~ dnorm(0, b1otau)
}
##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)
#
b0stau <- pow(b0ssd, -2)
b0ssd ~ dt(0, 1/625, 1)T(0,)
# tau & nu priors
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
tau <- pow(sd, -2)
sd ~ dunif(0, 10)
b1otau <- pow(b1osd, -2)
b1osd ~ dt(0, 1/625, 1)T(0,)
b1dtau <- pow(b1dsd, -2)
b1dsd ~ dt(0, 1/625, 1)T(0,)
#Transformations
for(np in 1:nVars){
B1O[np] <- b1 + b1o[np]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.test.txt")
zy <- (df1[, out.names])
sc_ys <- data.frame(lapply(zy, function(x) scale(x)) )
dataList = list( y = as.matrix(sc_ys), x1 = as.numeric(scale(df1$var1,)),
nVars = num.ys, nData = nrow(df1))
# Run this model via run.jags
codaSamples <- run.jags(model="model.jags.no_dom.test.txt" , data=dataList , method ="parallel", n.chains=nChains, monitor=parameters,
adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)
mcmcChain <- data.frame(summary( codaSamples ))
mcmcChain
So the BO outputs are close to the correlations the data was generated from.
Next is my attempt at the "long format" model analogous to the explantion in the link above.
modelstring = "
model {
for( i in 1 : nData ) {
y[i] ~ dt( mu[i] , tau, nu )
mu[i] <- b0s[i] + (b1 + b1o[idx[i]]) * x1[i]
}
#Random effects
for(i in 1:nData){
b0s[i] ~ dnorm(0, b0stau)
}
#Outcome level
for (y in 1:nVars){
b1o[y] ~ dnorm(0, b1otau[y])
}
##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)
b0stau <- pow(b0ssd, -2)
b0ssd ~ dt(0, 1/625, 1)T(0,)
for (y in 1:nVars){
b1otau[y] <- pow(b1osd[y], -2)
b1osd[y] ~ dt(0, 1/625, 1)T(0,)
}
tau <- pow(sd, -2)
sd ~ dunif(0, 10)
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
#Transformations
for(j in 1:nVars){
B1O[j] <- b1 + b1o[j]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.long.test.txt")
# Restructure data into long format
dataList2 = list( y = unlist(sc_ys), x1 = rep (as.numeric(scale(df1$var1,)), length(out.names)),
idx = rep(1:length(out.names), each=nrow(df1)),
nVars = length(out.names), nData = nrow(df1))
codaSamples2 <- run.jags(model="model.jags.no_dom.long.test.txt" , data=dataList2 , method ="parallel", n.chains=nChains, monitor=parameters,
adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)
mcmcChain2 <- data.frame(summary( codaSamples2 ))
mcmcChain2
So the results in mcmcChain2 don't match those of mcmcChain, but I cannot see where I'm going wrong. Can anyone help please ? Thanks.

Your matrix df1 has nData * nVars elements, but your long format model is only using the first nData elements (i.e. in effect you are just using the first column of the data). The maximum for the main data loop needs to be adjusted to be equal to nData*nVars and not just nData.
Also you need a vector representing the row number of the original df1 so that you can index your random effect b0s correctly as e.g. b0s[dfrow[i]]. Also, it is hard to follow the data specification (e.g. what is length(out.names)) so I'm not sure if you have already done this, but either x1 needs to be repeated nVars times or you should use the same x1[dfrow[i]] indexing as for the random effect (preferably the latter for the sake of readability of your model code).
Matt

Related

Jags: Attempt to redefine node error, mixed effect regression

I want to perform a mixed effect regression in rjags, with a random slope and intercept. I define the following toy dataset:
library(ggplot2)
library(data.table)
global_slope <- 1
global_int <- 1
Npoints_per_group <- 50
N_groups <- 10
pentes <- rnorm(N_groups,-1,.5)
centers_x <- seq(0,10,length = N_groups)
center_y <- global_slope*centers_x + global_int
group_spread <- 2
group_names <- sample(LETTERS,N_groups)
df <- lapply(1:N_groups,function(i){
x <- seq(centers_x[i]-group_spread/2,centers_x[i]+group_spread/2,length = Npoints_per_group)
y <- pentes[i]*(x- centers_x[i])+center_y[i]+rnorm(Npoints_per_group)
data.table(x = x,y = y,ID = group_names[i])
}) %>% rbindlist()
ggplot(df,aes(x,y,color = as.factor(ID)))+
geom_point()
This is a typical situation of Simpson paradox: an overall increasing trend when you have a decreasing trend within each group (given by the ID variable).
I define the following model:
library(rjags)
model_code_simpson <-
" model
{
# first level
for (i in 1:n) {
y[i] ~ dnorm(alpha[i] + beta[i] * x[i], tau)
alpha[i] = alpha[group[i]] # random intercept
beta[i] = beta[group[i]] # random slope
}
# second level
for(j in 1:J){
alpha[j] ~ dnorm(mu.alpha, tau.alpha)
beta[j] ~ dnorm(mu.beta, tau.beta)
}
# Priors
mu.alpha ~ dnorm(0,0.001)
mu.beta ~ dnorm(0,0.001)
sigma ~ dunif(0,10)
sigma.alpha ~ dunif(0,10)
sigma.beta ~ dunif(0,10)
# Derived quantities
tau <- pow(sigma,-2)
tau.alpha <- pow(sigma.alpha,-2)
tau.beta <- pow(sigma.beta,-2)
}
"
# Choose the parameters to watch
model_parameters <- c("mu.alpha","tau.alpha","tau.beta","tau")
# define numeric grouping variable
df[,ID2 := .GRP,by = ID]
model_data <- list(n = nrow(df),
y = df$y,
x = df$x,
group = df$ID2,
J = df[,uniqueN(ID)])
model <- jags.model(textConnection(model_code_simpson),
data = model_data,
n.chains = 2)
I get the following error:
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(textConnection(model_code_simpson), data = model_data, :
RUNTIME ERROR:
Compilation error on line 8.
Attempt to redefine node beta[1]
I do not understand what is happening, and related questions did not help me much.
You defined beta twice. First, beta is a vector of length n when you are looping through the data. Second, beta is a vector of length J when you are creating the random effects. This "redefining" is causing this issue, but it is an easy fix. You just need to remove that first instance of beta in your model and it will compile (i.e., just move your nested indexing inside of dnorm() and you are good to go).
model_code_simpson <-
" model
{
# first level
for (i in 1:n) {
y[i] ~ dnorm(
alpha[group[i]] + beta[group[i]] * x[i],
tau
)
}
# second level
for(j in 1:J){
alpha[j] ~ dnorm(mu.alpha, tau.alpha)
beta[j] ~ dnorm(mu.beta, tau.beta)
}
# Priors
mu.alpha ~ dnorm(0,0.001)
mu.beta ~ dnorm(0,0.001)
sigma ~ dunif(0,10)
sigma.alpha ~ dunif(0,10)
sigma.beta ~ dunif(0,10)
# Derived quantities
tau <- pow(sigma,-2)
tau.alpha <- pow(sigma.alpha,-2)
tau.beta <- pow(sigma.beta,-2)
}
"

How to specify nested model

I am using runjags to model some hierarchical data. I can model one level of the hierarchy but I do not know how to extend it to more levels. I am trying to do this using method 3 from page 24 of "Bayesian Hierarchical Modelling using WinBUGS", by Nicky Best et al which uses a nested loop (as opposed to nested indexing).
For one level I can model
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha[j], py)
}
alpha[j] ~ dnorm(0, taua)
}
beta ~ dnorm(0, 0.001)
taua ~ dgamma(0.01, 0.01)
py ~ dgamma(0.01, 0.1)
}"
INITS <- list(list(.RNG.seed=1, .RNG.name="base::Wichmann-Hill"),
list(.RNG.seed=2, .RNG.name="base::Wichmann-Hill"))
results <- run.jags(filestring, monitor=c("py", "beta", "alpha"), data=jags_data, sample=1e3,
n.chains=2, inits=INITS, summarise=FALSE)
I then tried to add another level using
for(k in 1:Nouter){
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[k], py)
} } }
but receive the error
Compilation error on line 5.
Attempt to redefine node y[1,1]
How do I extend this to model another level of which the first one is nested? Thank you.
Below is some reproducible data which shows the structure of the data. I wish to estimate random estimates for both outer_grp and the inner_grp.
library(data.table)
library(runjags)
set.seed(12345)
dat <- data.table(outer_grp=rep(1:5, each=10), inner_grp=rep(1:10, each=5), y=rnorm(50), x=rnorm(50), time=1:5)
wdat = dcast(dat, inner_grp + outer_grp ~ time, value.var=c("y", "x"))
jags_data = c(setNames(
lapply(split.default(wdat, substr(names(wdat), 1, 1)),as.matrix),
c("inner_grp", "outer_grp","x", "y")),
N=5, Nouter=5, Ninner=10)
EDIT
Perhaps it is enough to model like??
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[outer_grp[j]], py)
}
}
for(i in 1:Ninner){ alpha_in[i] ~ dnorm(0, taua) }
for(i in 1:Nouter){ alpha_out[i] ~ dnorm(0, taub) }
beta ~ dnorm(0, 0.001)
taua ~ dgamma(0.01, 0.01)
taub ~ dgamma(0.01, 0.01)
py ~ dgamma(0.01, 0.1)
}"
It is possible to add the outer group intercept by using nested indexing while still using the loop format. I'll use the Pastes dataset from lme4 for comparison.
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[batch[j]], py)
}
}
for(i in 1:Ninner){ alpha_in[i] ~ dnorm(0, taua) }
for(i in 1:Nouter){ alpha_out[i] ~ dnorm(0, taub) }
beta ~ dnorm(0, 0.001)
taua <- 1/(sa*sa)
sa ~ dunif(0,100)
taub <- 1/(sb*sb)
sb ~dunif(0,100)
py ~ dgamma(0.001, 0.001)
}"
INITS <- list(list(.RNG.seed=1, .RNG.name="base::Wichmann-Hill"),
list(.RNG.seed=2, .RNG.name="base::Wichmann-Hill"))
results <- run.jags(filestring, monitor=c("py", "beta", "alpha_in", "alpha_out", "sa", "sb"),
data=jags_data, burnin=1e4, sample=1e4, n.chains=2,
inits=INITS, summarise=0)
summary(results, vars=c("py", "beta", "sa", "sb"))
Compare to lme4
fm1 <- lmer(strength ~ (1|batch) + (1|sample), Pastes)
print(summary(fm1), corr=FALSE)
Data used
library(lme4); library(data.table); library(runjags)
data(Pastes); setDT(Pastes)
Pastes[,time := sequence(.N), by=sample]
# Change format to match question
wdat = dcast(Pastes, batch + sample ~ time, value.var="strength")
jags_data = list(y=as.matrix(wdat[,3:4]), batch=wdat$batch, N=2, Ninner=length(unique(wdat$sample)), Nouter=length(unique(wdat$batch)))

Multiplying a vector of parameters by a matrix of independent variables in JAGS

I am fitting a multivariate model in JAGS using the dirlichet distribution. I have a matrix y of 3 species proportional abundances.
#generate 3 columns of species proprotional abundance data
y <- matrix(ncol = 3, nrow = 100)
y[,] <- abs(rnorm(length(y)))
for(i in 1:nrow(y)){
y[i,] <- y[i,] / sum(y[i,])
}
I have a matrix x of predictor values, the first of which is an intercept.
#generate 2 columns of predictors and an intercept
x <- matrix(ncol = 2, nrow = 100)
x[,] <- rnorm(length(x), mean = 20, sd = 4)
x <- cbind(rep(1,nrow(x)),x)
I specify a multivariate jags model, jags.model:
jags.model = "
model {
#setup parameter priors for each species * predictor combination.
for(j in 1:N.spp){
for(k in 1:N.preds){
m[k,j] ~ dgamma(1.0E-3, 1.0E-3)
}
}
#go ahead and fit means of species abundances as a linear combination of predictor and parameters.
for(i in 1:N){
for(j in 1:N.spp){
log(a0[i,j]) <- m[,j] * x[i,]
}
y[i,1:N.spp] ~ ddirch(a0[i,1:N.spp])
}
} #close model loop.
"
I setup the JAGS data object, jags.data:
jags.data <- list(y = as.matrix(y), x = as.matrix(x),
N.spp = ncol(y), N.preds = ncol(x), N = nrow(y))
I fit the JAGS model using the runjags package in R.
jags.out <- runjags::run.jags(jags.model,
data=jags.data,
adapt = 100,
burnin = 200,
sample = 400,
n.chains=3,
monitor=c('m'))
I get the following error:
Error: The following error occured when compiling and adapting the model using rjags:
Error in rjags::jags.model(model, data = dataenv, n.chains = length(runjags.object$end.state), :
RUNTIME ERROR:
Invalid vector argument to exp
What am I doing wrong here? For reference, spelling out each parameter by predictor combination still fits fine:
jags.model = "
model {
#setup parameter priors for each species * predictor combination.
for(j in 1:N.spp){
for(k in 1:N.preds){
m[k,j] ~ dgamma(1.0E-3, 1.0E-3)
}
}
#go ahead and fit means of species abundances as a linear combination of predictor and parameters.
for(i in 1:N){
for(j in 1:N.spp){
log(a0[i,j]) <- m[1,j] * x[i,1] + m[2,j] * x[i,2] + m[3,j] * x[i,3]
}
y[i,1:N.spp] ~ ddirch(a0[i,1:N.spp])
}
} #close model loop.
"
The solution to this problem is to take a dot product, or an inner product in JAGS. Change the line:
log(a0[i,j]) <- m[,j] * x[i,]
to:
log(a0[i,j]) <- inprod(m[,j] , x[i,])
And everything should work fine. Full model below.
jags.model = "
model {
#setup parameter priors for each species * predictor combination.
for(j in 1:N.spp){
for(k in 1:N.preds){
m[k,j] ~ dgamma(1.0E-3, 1.0E-3)
}
}
#go ahead and fit means of species abundances as a linear combination of predictor and parameters.
for(i in 1:N){
for(j in 1:N.spp){
log(a0[i,j]) <- inprod(m[,j] , x[i,])
}
y[i,1:N.spp] ~ ddirch(a0[i,1:N.spp])
}
} #close model loop.
"

OpenBUGS error undefined variable

I'm working on a binomial mixture model using OpenBUGS and R package R2OpenBUGS. I've successfully built simpler models, but once I add another level for imperfect detection, I consistently receive the error variable X is not defined in model or in data set. I've tried a number of different things, including changing the structure of my data and entering my data directly into OpenBUGS. I'm posting this in the hope that someone else has experience with this error, and perhaps knows why OpenBUGS is not recognizing variable X even though it is clearly defined as far as I can tell.
I've also gotten the error expected the collection operator c error pos 8 - this is not an error I've been getting previously, but I am similarly stumped.
Both the model and the data-simulation function come from Kery's Introduction to WinBUGS for Ecologists (2010). I will note that the data set here is in lieu of my own data, which is similar.
I am including the function to build the dataset as well as the model. Apologies for the length.
# Simulate data: 200 sites, 3 sampling rounds, 3 factors of the level 'trt',
# and continuous covariate 'X'
data.fn <- function(nsite = 180, nrep = 3, xmin = -1, xmax = 1, alpha.vec = c(0.01,0.2,0.4,1.1,0.01,0.2), beta0 = 1, beta1 = -1, ntrt = 3){
y <- array(dim = c(nsite, nrep)) # Array for counts
X <- sort(runif(n = nsite, min = xmin, max = xmax)) # covariate values, sorted
# Relationship expected abundance - covariate
x2 <- rep(1:ntrt, rep(60, ntrt)) # Indicator for population
trt <- factor(x2, labels = c("CT", "CM", "CC"))
Xmat <- model.matrix(~ trt*X)
lin.pred <- Xmat[,] %*% alpha.vec # Value of lin.predictor
lam <- exp(lin.pred)
# Add Poisson noise: draw N from Poisson(lambda)
N <- rpois(n = nsite, lambda = lam)
table(N) # Distribution of abundances across sites
sum(N > 0) / nsite # Empirical occupancy
totalN <- sum(N) ; totalN
# Observation process
# Relationship detection prob - covariate
p <- plogis(beta0 + beta1 * X)
# Make a 'census' (i.e., go out and count things)
for (i in 1:nrep){
y[,i] <- rbinom(n = nsite, size = N, prob = p)
}
# Return stuff
return(list(nsite = nsite, nrep = nrep, ntrt = ntrt, X = X, alpha.vec = alpha.vec, beta0 = beta0, beta1 = beta1, lam = lam, N = N, totalN = totalN, p = p, y = y, trt = trt))
}
data <- data.fn()
And here is the model:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(C = y, trt = as.numeric(trt), X = s.X)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Note: This answer has gone through a major revision, after I noticed another problem with the code.
If I understand your model correctly, you are mixing up the y and N from the simulated data, and what is passed as C to Bugs. You are passing the y variable (a matrix) to the C variable in the Bugs model, but this is accessed as a vector. From what I can see C is representing the number of "trials" in your binomial draw (actual abundances), i.e. N in your data set. The variable y (a matrix) is called the same thing in both the simulated data and in the Bugs model.
This is a reformulation of your model, as I understand it, and this runs ok:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
N<- data$N
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(y = y, trt = as.numeric(trt), X = s.X, C= N)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Overall, the results from this model looks ok, but there are long autocorrelation lags for beta0 and beta1. The estimate of beta1 also seems a bit off(~= -0.4), so you might want to recheck the Bugs model specification, so that it is matching the simulation model (i.e. that you are fitting the correct statistical model). At the moment, I'm not sure that it does, but I don't have the time to check further right now.
I got the same message trying to pass a factor to OpenBUGS. Like so,
Ndata <- list(yrs=N$yrs, site=N$site), ... )
The variable "site" was not passed by the "bugs" function. It simply was not in list passed
to OpenBUGS
I solved the problem by passing site as numeric,
Ndata <- list(yrs=N$yrs, site=as.numeric(N$site)), ... )

Two models in one Winbugs script

I am conducting a Bayesian analysis using Winbugs from R. I need to combine two Winbugs scripts into one: however, I am receiving an error message (Variable x2 is not defined in model or in data set). Here is the winbugs code:
model{
# Model’s likelihood
for (i in 1:n) {
tto[i] ~ dnorm( mu[i], tau ) # stochastic componenent
b[i] ~ dnorm(0.0, tau2)
# link and linear predictor
mu[i] <- 1 - (beta.concern2*concern2[i] + beta.concern3*concern3[i] + b[i])
}
for (i in 1:1002) {
# Linear regression on logit
logit(p[i]) <- beta.concern2*x2[i,1] + beta.concern2*x2[i,2]
# Likelihood function for each data point
y2[i] ~ dbern(p[i])
}
s2<-1/tau
s <-sqrt(s2)
a2<-1/tau2
a <-sqrt(a2)
}
where x2 is a 1002*2 matrix and y is a vector
This is the R code definining the data:
combined.data <- list(n=n,tto=tto,concern2=concern2,
concern3=concern3,y2=y2, x2=x2)
Anyone know what is wrong?
I'm going to be making quite a few assumptions here...
Perhaps you could add a diagram illustrating the relationships between the variables, and which are deterministic vs stochastic. I find this helpful when making models in BUGS. Also, it would be helpful to have the dimensions of all your data, the meaning of n and perhaps some context or detail on what you're modelling and the nodes in which you're interested.
I'm guessing that y is a binary (0,1) vector of length 1002, and has corresponding values for x2[,1] and x2[,2] (herein x1, x2) and concern2, concern3 (herein c2, c3) and tto i.e.
nrow(x2) == 1002
Here's some sample data with of nrow==10 to work with:
y <- sample(x=c(0,1), size=10, replace=TRUE, prob=c(0.5,0.5))
x2 <- matrix(rnorm(20), nrow=10, ncol=2)
c2 <- rnorm(10)
c3 <- rnorm(10)
tto <- rnorm(10)
It appears that you're trying to determine the values of beta.concern2 (herein b2) for both values of x2 in the logit. Not sure why you'd want to fit it with the same parameter for two different predictors. In case this is a typo I'm giving b2 and b3 as parameters instead. I hope you'll be able to adapt this to your needs.
The product of these values of b2, b3 (stochastic) and c2, c3 (given) are used to generate a variable mu, which also has an error term. (I'm presuming b[i] (herein b1[i]) is a normally distributed error term.)
Then tto is a normally distributed variable which depends on the value of mu, and itself has an error term. I have set the precision of the error terms as being equal in both cases.
So for such a model:
require(rjags)
### The data
dataList <- list(
x1 = x2[,1],
x2 = x2[,2],
y = y,
c2 = c2,
c3 = c3,
tto = tto,
nRowX = nrow(x2)
)
### make sure logistic model can be fitted
f1 <- stats::glm(dataList$y ~ dataList$x1 + dataList$x2 -1, family=binomial(logit))
show(f1)
### set some approximate initial values
b1Init <- 0.1 # arbitrary
b2Init <- f1$coef[2]
b3Init <- f1$coef[3]
initsList <- list(
b1 = b1Init,
b2 = b2Init,
b3 = b3Init)
### Model: varying parameters (b2, b3) per observation; 2x error terms
modelstring <- "
model {
for(i in 1:nRowX){
tto[i] ~ dnorm(mu[i], prec)
mu[i] <- 1 - (b1 + b2*c2[i] + b3*c3[i])
y[i] ~ dbern(L[i]) # L for logit
L[i] <- 1/(1+exp(- ( b2*x1[i] + b3*x2[i]) ))
}
b1 ~ dnorm(0, prec) # precision
prec <- 1/sqrt(SD) # convert to Std Deviation
SD <- 0.5
b2 ~ dnorm(0, 1.4) # arbitrary
b3 ~ dnorm(0, 1.4)
}
"
writeLines(modelstring,con="model.txt")
parameters <- c("b1","b2","b3") # to monitor
adaptSteps <- 1e4 # "tune in" samplers
burnInSteps <- 2e4 # "burn in" samplers
nChains <- 3
numSavedSteps <-2e3
thinSteps <- 1 # Steps to "thin" (1=keep every step).
nPerChain <- ceiling(( numSavedSteps * thinSteps ) / nChains) # Steps per chain
rm(jagsModel) # in case already present
jagsModel <- rjags::jags.model(
"model.txt", data=dataList,
inits=initsList, n.chains=nChains,
n.adapt=adaptSteps)
stats::update(jagsModel, n.iter=burnInSteps)
### MCMC chain
MCMC1 <- as.matrix(rjags::coda.samples(
jagsModel, variable.names=parameters,
n.iter=nPerChain, thin=thinSteps))
### Extract chain values
b2Sample <- as.vector(MCMC1[,grep("b2",colnames(MCMC1))])

Resources