I am using runjags to model some hierarchical data. I can model one level of the hierarchy but I do not know how to extend it to more levels. I am trying to do this using method 3 from page 24 of "Bayesian Hierarchical Modelling using WinBUGS", by Nicky Best et al which uses a nested loop (as opposed to nested indexing).
For one level I can model
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha[j], py)
}
alpha[j] ~ dnorm(0, taua)
}
beta ~ dnorm(0, 0.001)
taua ~ dgamma(0.01, 0.01)
py ~ dgamma(0.01, 0.1)
}"
INITS <- list(list(.RNG.seed=1, .RNG.name="base::Wichmann-Hill"),
list(.RNG.seed=2, .RNG.name="base::Wichmann-Hill"))
results <- run.jags(filestring, monitor=c("py", "beta", "alpha"), data=jags_data, sample=1e3,
n.chains=2, inits=INITS, summarise=FALSE)
I then tried to add another level using
for(k in 1:Nouter){
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[k], py)
} } }
but receive the error
Compilation error on line 5.
Attempt to redefine node y[1,1]
How do I extend this to model another level of which the first one is nested? Thank you.
Below is some reproducible data which shows the structure of the data. I wish to estimate random estimates for both outer_grp and the inner_grp.
library(data.table)
library(runjags)
set.seed(12345)
dat <- data.table(outer_grp=rep(1:5, each=10), inner_grp=rep(1:10, each=5), y=rnorm(50), x=rnorm(50), time=1:5)
wdat = dcast(dat, inner_grp + outer_grp ~ time, value.var=c("y", "x"))
jags_data = c(setNames(
lapply(split.default(wdat, substr(names(wdat), 1, 1)),as.matrix),
c("inner_grp", "outer_grp","x", "y")),
N=5, Nouter=5, Ninner=10)
EDIT
Perhaps it is enough to model like??
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[outer_grp[j]], py)
}
}
for(i in 1:Ninner){ alpha_in[i] ~ dnorm(0, taua) }
for(i in 1:Nouter){ alpha_out[i] ~ dnorm(0, taub) }
beta ~ dnorm(0, 0.001)
taua ~ dgamma(0.01, 0.01)
taub ~ dgamma(0.01, 0.01)
py ~ dgamma(0.01, 0.1)
}"
It is possible to add the outer group intercept by using nested indexing while still using the loop format. I'll use the Pastes dataset from lme4 for comparison.
filestring <-
"model{
for(j in 1:Ninner){
for(i in 1:N){
y[j,i] ~ dnorm(beta + alpha_in[j] + alpha_out[batch[j]], py)
}
}
for(i in 1:Ninner){ alpha_in[i] ~ dnorm(0, taua) }
for(i in 1:Nouter){ alpha_out[i] ~ dnorm(0, taub) }
beta ~ dnorm(0, 0.001)
taua <- 1/(sa*sa)
sa ~ dunif(0,100)
taub <- 1/(sb*sb)
sb ~dunif(0,100)
py ~ dgamma(0.001, 0.001)
}"
INITS <- list(list(.RNG.seed=1, .RNG.name="base::Wichmann-Hill"),
list(.RNG.seed=2, .RNG.name="base::Wichmann-Hill"))
results <- run.jags(filestring, monitor=c("py", "beta", "alpha_in", "alpha_out", "sa", "sb"),
data=jags_data, burnin=1e4, sample=1e4, n.chains=2,
inits=INITS, summarise=0)
summary(results, vars=c("py", "beta", "sa", "sb"))
Compare to lme4
fm1 <- lmer(strength ~ (1|batch) + (1|sample), Pastes)
print(summary(fm1), corr=FALSE)
Data used
library(lme4); library(data.table); library(runjags)
data(Pastes); setDT(Pastes)
Pastes[,time := sequence(.N), by=sample]
# Change format to match question
wdat = dcast(Pastes, batch + sample ~ time, value.var="strength")
jags_data = list(y=as.matrix(wdat[,3:4]), batch=wdat$batch, N=2, Ninner=length(unique(wdat$sample)), Nouter=length(unique(wdat$batch)))
Related
I want to perform a mixed effect regression in rjags, with a random slope and intercept. I define the following toy dataset:
library(ggplot2)
library(data.table)
global_slope <- 1
global_int <- 1
Npoints_per_group <- 50
N_groups <- 10
pentes <- rnorm(N_groups,-1,.5)
centers_x <- seq(0,10,length = N_groups)
center_y <- global_slope*centers_x + global_int
group_spread <- 2
group_names <- sample(LETTERS,N_groups)
df <- lapply(1:N_groups,function(i){
x <- seq(centers_x[i]-group_spread/2,centers_x[i]+group_spread/2,length = Npoints_per_group)
y <- pentes[i]*(x- centers_x[i])+center_y[i]+rnorm(Npoints_per_group)
data.table(x = x,y = y,ID = group_names[i])
}) %>% rbindlist()
ggplot(df,aes(x,y,color = as.factor(ID)))+
geom_point()
This is a typical situation of Simpson paradox: an overall increasing trend when you have a decreasing trend within each group (given by the ID variable).
I define the following model:
library(rjags)
model_code_simpson <-
" model
{
# first level
for (i in 1:n) {
y[i] ~ dnorm(alpha[i] + beta[i] * x[i], tau)
alpha[i] = alpha[group[i]] # random intercept
beta[i] = beta[group[i]] # random slope
}
# second level
for(j in 1:J){
alpha[j] ~ dnorm(mu.alpha, tau.alpha)
beta[j] ~ dnorm(mu.beta, tau.beta)
}
# Priors
mu.alpha ~ dnorm(0,0.001)
mu.beta ~ dnorm(0,0.001)
sigma ~ dunif(0,10)
sigma.alpha ~ dunif(0,10)
sigma.beta ~ dunif(0,10)
# Derived quantities
tau <- pow(sigma,-2)
tau.alpha <- pow(sigma.alpha,-2)
tau.beta <- pow(sigma.beta,-2)
}
"
# Choose the parameters to watch
model_parameters <- c("mu.alpha","tau.alpha","tau.beta","tau")
# define numeric grouping variable
df[,ID2 := .GRP,by = ID]
model_data <- list(n = nrow(df),
y = df$y,
x = df$x,
group = df$ID2,
J = df[,uniqueN(ID)])
model <- jags.model(textConnection(model_code_simpson),
data = model_data,
n.chains = 2)
I get the following error:
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(textConnection(model_code_simpson), data = model_data, :
RUNTIME ERROR:
Compilation error on line 8.
Attempt to redefine node beta[1]
I do not understand what is happening, and related questions did not help me much.
You defined beta twice. First, beta is a vector of length n when you are looping through the data. Second, beta is a vector of length J when you are creating the random effects. This "redefining" is causing this issue, but it is an easy fix. You just need to remove that first instance of beta in your model and it will compile (i.e., just move your nested indexing inside of dnorm() and you are good to go).
model_code_simpson <-
" model
{
# first level
for (i in 1:n) {
y[i] ~ dnorm(
alpha[group[i]] + beta[group[i]] * x[i],
tau
)
}
# second level
for(j in 1:J){
alpha[j] ~ dnorm(mu.alpha, tau.alpha)
beta[j] ~ dnorm(mu.beta, tau.beta)
}
# Priors
mu.alpha ~ dnorm(0,0.001)
mu.beta ~ dnorm(0,0.001)
sigma ~ dunif(0,10)
sigma.alpha ~ dunif(0,10)
sigma.beta ~ dunif(0,10)
# Derived quantities
tau <- pow(sigma,-2)
tau.alpha <- pow(sigma.alpha,-2)
tau.beta <- pow(sigma.beta,-2)
}
"
I'm trying to fit a simplex model with poisson trick, the likelihood is Likelihood Simplex. The code is below
model{
for (i in 1:n){
y[i] ~ dpois(lambda[i])
lambda[i] <- 0.5*log(phi[i]*(y[i]*(1-y[i]))^3) + 0.5*(1/phi[i])*d[i]
d[i] <- ((y[i]-mu[i])^2)/(y[i]*(1-y[i])*mu[i]^2*(1-mu[i])^2)
mu[i] <- beta0+beta1*income[i] + beta2*person[i]
log(phi[i]) <- -delta0
}
beta0 ~ dnorm(0,.001)
beta1 ~ dnorm(0,.001)
beta2 ~ dnorm(0,.001)
delta0 ~ dnorm(0,.001)
}"
When I try to run the code with JAGS in R, I get the following error
RUNTIME ERROR:
Possible directed cycle involving some or all
of the following nodes:
Then it shows all d[], y[] and lambda[]
I found that someone have a similar problem JAGS error, but looks like that I'm not doing the same mistake.
Any help?
EDIT:
Second attempt
regmodel = "
data{
for(i in 1:n) {
zeros[i] <- 0
}
}
model{
C <- 1000
for (i in 1:n){
zeros[i] ~ dpois(lambda[i])
lambda[i] <- -l[i] + C
l[i] <-
0.5*log(phi[i]*(y[i]*(1-y[i]))^3) +
0.5*(1/phi[i])*((y[i]-mu[i])^2)/(y[i]*(1-y[i])*mu[i]^2*(1-mu[i])^2)
mu[i]<- beta0 + beta1*income[i] + beta2*person[i]
log(phi[i]) <- -delta0
}
beta0 ~ dnorm(0,.001)
beta1 ~ dnorm(0,.001)
beta2 ~ dnorm(0,.001)
delta0 ~ dnorm(0,.001)
}"
But the error now is
Error in jags.model(file = "ModeloSimplex.txt", data = reg.dat, n.chains = 3, :
Error in node (a(a0.5*(a1/phi[1])*(a(ay[1]-mu[1])^2))/(ay[1]*(a1-y[1])*(amu[1]^2)*(a(a1-mu[1])^2)))
Invalid parent values
I have a multi-level jags model. I'm trying to convert it from wide to long format as described here: http://jeromyanglim.tumblr.com/post/37361593128/jags-converting-multilevel-model-from-wide-to However my model is more complex than the example so I'm having some trouble making this work. To illustrate the difficulties I've made a repeatable example. This first block creates data and sets jags parameters:
library(ecodist)
library(runjags)
set.seed(10)
##### population n
n <- 250
# num outputs
num.ys <- 10
# Vector binary to indicate which domains have correlation with independent variables
corr.vec <- c(0, 0, 0, 1, 1, 0, 0, 1, 1, 1)
correlation = 0.99
# Function to simulate correlated outcome
sim.fn <- function(i, var1, sw1) {
if(sw1 ==1){
temp <- corgen(n , var1, correlation )
temp <- as.numeric(temp$y * attr(temp$y,'scaled:scale') + attr(temp$y,'scaled:center'))
} else {
temp <- rnorm(n, 0, 5)
}
return(temp)
}
##### Generate data
df0 <- data.frame(var1=rnorm(n, 15, 2))
df1 <- data.frame(df0, sapply(1:num.ys, function(i) sim.fn(i, df0$var1, corr.vec[i])))
out.names <- paste0("y_", 1:num.ys)
names(df1) <- c("var1", out.names)
### Jags parameters
parameters = c("B1O", "b1", "b1o", "nu", "sd")
adaptSteps = 1000 # Number of steps to "tune" the samplers.
burnInSteps = 10000 # Number of steps to "burn-in" the samplers.
nChains = 2 # Number of chains to run.
numSavedSteps=1000 # Total number of steps in chains to save.
thinSteps=2 # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.
Ok, so this next section is the 'wide format' jags model thats provides the correct estimates in the object mcmcChain:
modelstring = "
model {
for( i in 1 : nData ) {
for(np in 1:nVars){
y[i, np] ~ dt( mu[i,np], tau, nu)
mu[i, np] <- b0s[i] + (b1 + b1o[np]) * x1[i]
}
}
#Random effects
for(i in 1:nData){
b0s[i] ~ dnorm(0, b0stau)
}
#Outcome level
for (np in 1:nVars){
b1o[np] ~ dnorm(0, b1otau)
}
##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)
#
b0stau <- pow(b0ssd, -2)
b0ssd ~ dt(0, 1/625, 1)T(0,)
# tau & nu priors
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
tau <- pow(sd, -2)
sd ~ dunif(0, 10)
b1otau <- pow(b1osd, -2)
b1osd ~ dt(0, 1/625, 1)T(0,)
b1dtau <- pow(b1dsd, -2)
b1dsd ~ dt(0, 1/625, 1)T(0,)
#Transformations
for(np in 1:nVars){
B1O[np] <- b1 + b1o[np]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.test.txt")
zy <- (df1[, out.names])
sc_ys <- data.frame(lapply(zy, function(x) scale(x)) )
dataList = list( y = as.matrix(sc_ys), x1 = as.numeric(scale(df1$var1,)),
nVars = num.ys, nData = nrow(df1))
# Run this model via run.jags
codaSamples <- run.jags(model="model.jags.no_dom.test.txt" , data=dataList , method ="parallel", n.chains=nChains, monitor=parameters,
adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)
mcmcChain <- data.frame(summary( codaSamples ))
mcmcChain
So the BO outputs are close to the correlations the data was generated from.
Next is my attempt at the "long format" model analogous to the explantion in the link above.
modelstring = "
model {
for( i in 1 : nData ) {
y[i] ~ dt( mu[i] , tau, nu )
mu[i] <- b0s[i] + (b1 + b1o[idx[i]]) * x1[i]
}
#Random effects
for(i in 1:nData){
b0s[i] ~ dnorm(0, b0stau)
}
#Outcome level
for (y in 1:nVars){
b1o[y] ~ dnorm(0, b1otau[y])
}
##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)
b0stau <- pow(b0ssd, -2)
b0ssd ~ dt(0, 1/625, 1)T(0,)
for (y in 1:nVars){
b1otau[y] <- pow(b1osd[y], -2)
b1osd[y] ~ dt(0, 1/625, 1)T(0,)
}
tau <- pow(sd, -2)
sd ~ dunif(0, 10)
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
#Transformations
for(j in 1:nVars){
B1O[j] <- b1 + b1o[j]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.long.test.txt")
# Restructure data into long format
dataList2 = list( y = unlist(sc_ys), x1 = rep (as.numeric(scale(df1$var1,)), length(out.names)),
idx = rep(1:length(out.names), each=nrow(df1)),
nVars = length(out.names), nData = nrow(df1))
codaSamples2 <- run.jags(model="model.jags.no_dom.long.test.txt" , data=dataList2 , method ="parallel", n.chains=nChains, monitor=parameters,
adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)
mcmcChain2 <- data.frame(summary( codaSamples2 ))
mcmcChain2
So the results in mcmcChain2 don't match those of mcmcChain, but I cannot see where I'm going wrong. Can anyone help please ? Thanks.
Your matrix df1 has nData * nVars elements, but your long format model is only using the first nData elements (i.e. in effect you are just using the first column of the data). The maximum for the main data loop needs to be adjusted to be equal to nData*nVars and not just nData.
Also you need a vector representing the row number of the original df1 so that you can index your random effect b0s correctly as e.g. b0s[dfrow[i]]. Also, it is hard to follow the data specification (e.g. what is length(out.names)) so I'm not sure if you have already done this, but either x1 needs to be repeated nVars times or you should use the same x1[dfrow[i]] indexing as for the random effect (preferably the latter for the sake of readability of your model code).
Matt
I am trying to fit a logistic regression model in JAGS, but I have data in the form of (# success y, # attempts n), rather than a binary variable. In R, one can fit a model to data such as these by using glm(y/n ~ ) with the "weights" argument, but I am not sure how to fit this in JAGS.
Here is a simple example that I hope addresses what I am trying to ask. Note that I am using the rjags package. Thanks for any help!
y <- rbinom(10, 500, 0.2)
n <- sample(500:600, 10)
p <- y/n
x <- sample(0:100, 10) # some covariate
data <- data.frame(y, n, p, x)
model <- "model{
# Specify likelihood
for(i in 1:10){
y[i] ~ dbin(p[i], n[i])
logit(p[i]) <- b0 + b1*x
}
# Specify priors
b0 ~ dnorm(0, 0.0001)
b1 ~ dnorm(0, 0.0001)
}"
You don't need to compute p in your data set at all. Just let it be a logical node in your model. I prefer the R2jags interface, which allows you to specify a BUGS model in the form of an R function ...
jagsdata <- data.frame(y=rbinom(10, 500, 0.2),
n=sample(500:600, 10),
x=sample(0:100, 10))
model <- function() {
## Specify likelihood
for(i in 1:10){
y[i] ~ dbin(p[i], n[i])
logit(p[i]) <- b0 + b1*x[i]
}
## Specify priors
b0 ~ dnorm(0, 0.0001)
b1 ~ dnorm(0, 0.0001)
}
Now run it:
library("R2jags")
jags(model.file=model,data=jagsdata,
parameters.to.save=c("b0","b1"))
Using JAGS I am trying to estimate a model including a unit-specific time trend.
However, the problem is that I don't know how to model this and so far I have been unable to find a solution.
As an example, consider we have the following data:
rain<-rnorm(200) # Explanatory variable
n1<-rnorm(200) # Some noise
gdp<-rain+n1 # Outcome variable
ccode<-rep(1:10,20) # Unit codes
year<-rep(1:20,10) # Years
Using normal linear regression, we would estimate the model as:
m1<-lm(gdp~rain+factor(ccode)*year)
Where factor(ccode)*year is the unit-specific time trend. Now I want to estimate the model using JAGS. So I create parameters for the indexing:
N<-200
J<-max(ccode)
T<-max(year)
And estimate the model,
library(R2jags)
library(rjags)
set.seed(42); runif(1)
dat<-list(gdp=gdp,
rain=rain,
ccode=ccode,
year=year,
N=N,J=J,T=T)
parameters<-c("b1","b0")
model.file <- "~/model.txt"
system.time(m1<-jags(data=dat,inits=NULL,parameters.to.save=parameters,
model.file=model.file,
n.chains=4,n.iter=500,n.burnin=125,n.thin=2))
with the following model file, and this is where the error is at the moment:
# Simple model
model {
# For N observations
for(i in 1:N) {
gdp[i] ~ dnorm(yhat[i], tau)
yhat[i] <- b1*rain[i] + b0[ccode[i]*year[i]]
}
for(t in 1:T) {
for(j in 1:J) {
b0[t,j] ~ dnorm(0, .01)
}
}
# Priors
b1 ~ dnorm(0, .01)
# Hyperpriors
tau <- pow(sd, -2)
sd ~ dunif(0,20)
}
I am fairly sure that the way in which I define b0 and the indexing is incorrect given the error I get when using the code: Compilation error on line 7. Dimension mismatch taking subset of b0.
However, I don't know how to solve this so I wondered whether someone here has suggestions about this?
Your lm example can also be written:
m1 <- lm(gdp ~ -1 + rain + factor(ccode) + factor(ccode):year)
The equivalent JAGS model would be:
M <- function() {
for(i in 1:N) {
gdp[i] ~ dnorm(yhat[i], sd^-2)
yhat[i] <- b0[ccode[i]] + b1*rain[i] + b2[ccode[i]]*year[i]
}
b1 ~ dnorm(0, 0.001)
for (j in 1:J) {
b0[j] ~ dnorm(0, 0.001)
b2[j] ~ dnorm(0, 0.001)
}
sd ~ dunif(0, 100)
}
parameters<-c('b0', 'b1', 'b2')
mj <- jags(dat, NULL, parameters, M, 3)
Comparing coefficients:
par(mfrow=c(1, 2), mar=c(5, 5, 1, 1))
plot(mj$BUGSoutput$summary[grep('^b0', row.names(mj$BUGSoutput$summary)), '50%'],
coef(m1)[grep('^factor\\(ccode\\)\\d+$', names(coef(m1)))],
xlab='JAGS estimate', ylab='lm estimate', pch=20, las=1,
main='b0')
abline(0, 1)
plot(mj$BUGSoutput$summary[grep('^b2', row.names(mj$BUGSoutput$summary)), '50%'],
coef(m1)[grep('^factor\\(ccode\\)\\d+:', names(coef(m1)))],
xlab='JAGS estimate', ylab='lm estimate', pch=20, las=1,
main='b2')
abline(0, 1)