I am trying to fit a multinomial logistic regression model using rjags for the outcome is a categorical (nominal) variable (Outcome) with 3 levels, and the explanatory variables are Age (continuous) and Group (categorical with 3 levels). In doing so, I would like to obtain the Posterior means and 95% quantile-based regions for Age and Group.
I am not really great at for loop which I think is the reason why my written code for the model isn't working working properly.
My beta priors follow a Normal distribution, βj ∼ Normal(0,100) for j ∈ {0, 1, 2}.
Reproducible R code
library(rjags)
set.seed(1)
data <- data.frame(Age = round(runif(119, min = 1, max = 18)),
Group = c(rep("pink", 20), rep("blue", 18), rep("yellow", 81)),
Outcome = c(rep("A", 45), rep("B", 19), rep("C", 55)))
X <- as.matrix(data[,c("Age", "Group")])
J <- ncol(X)
N <- nrow(X)
## Step 1: Specify model
cat("
model {
for (i in 1:N){
##Sampling model
yvec[i] ~ dmulti(p[i,1:J], 1)
#yvec[i] ~ dcat(p[i, 1:J]) # alternative
for (j in 1:J){
log(q[i,j]) <- beta0 + beta1*X[i,1] + beta2*X[i,2]
p[i,j] <- q[i,j]/sum(q[i,1:J])
}
##Priors
beta0 ~ dnorm(0, 0.001)
beta1 ~ dnorm(0, 0.001)
beta2 ~ dnorm(0, 0.001)
}
}",
file="model.txt")
##Step 2: Specify data list
dat.list <- list(yvec = data$Outcome, X=X, J=J, N=N)
## Step 3: Compile and adapt model in JAGS
jagsModel<-jags.model(file = "model.txt",
data = dat.list,
n.chains = 3,
n.adapt = 3000
)
Error message:
Sources I have been looking at for help:
http://people.bu.edu/dietze/Bayes2018/Lesson21_GLM.pdf
Dirichlet Multinomial model in JAGS with categorical X
Reference from http://www.stats.ox.ac.uk/~nicholls/MScMCMC15/jags_user_manual.pdf, page 31
I have just started to learn how to use the rjags package so any hint/explanation and link to relevant sources would be greatly appreciated!
I will include an approach to your issue. I have taken the same priors you defined for coefficients. I only need to mention that as you have a factor in Group I will use one of its levels as reference (in this case pink) so its effect will be taken into account by the constant in the model. Next the code:
library(rjags)
#Data
set.seed(1)
data <- data.frame(Age = round(runif(119, min = 1, max = 18)),
Group = c(rep("pink", 20), rep("blue", 18), rep("yellow", 81)),
Outcome = c(rep("A", 45), rep("B", 19), rep("C", 55)))
#Input Values we will avoid pink because it is used as reference level
#so constant absorbs the effect of that level
r1 <- as.numeric(data$Group=='pink')
r2 <- as.numeric(data$Group=='blue')
r3 <- as.numeric(data$Group=='yellow')
age <- data$Age
#Output 2 and 3
o1 <- as.numeric(data$Outcome=='A')
o2 <- as.numeric(data$Outcome=='B')
o3 <- as.numeric(data$Outcome=='C')
#Dim, all have the same length
N <- length(r2)
## Step 1: Specify model
model.string <- "
model{
for (i in 1:N){
## outcome levels B, C
o1[i] ~ dbern(pi1[i])
o2[i] ~ dbern(pi2[i])
o3[i] ~ dbern(pi3[i])
## predictors
logit(pi1[i]) <- b1+b2*age[i]+b3*r2[i]+b4*r3[i]
logit(pi2[i]) <- b1+b2*age[i]+b3*r2[i]+b4*r3[i]
logit(pi3[i]) <- b1+b2*age[i]+b3*r2[i]+b4*r3[i]
}
## priors
b1 ~ dnorm(0, 0.001)
b2 ~ dnorm(0, 0.001)
b3 ~ dnorm(0, 0.001)
b4 ~ dnorm(0, 0.001)
}
"
#Model
model.spec<-textConnection(model.string)
## fit model w JAGS
jags <- jags.model(model.spec,
data = list('r2'=r2,'r3'=r3,
'o1'=o1,'o2'=o2,'o3'=o3,
'age'=age,'N'=N),
n.chains=3,
n.adapt=3000)
#Update the model
#Update
update(jags, n.iter=1000,progress.bar = 'none')
#Sampling
results <- coda.samples(jags,variable.names=c("b1","b2","b3","b4"),n.iter=1000,
progress.bar = 'none')
#Results
Res <- do.call(rbind.data.frame, results)
With the results of chains for parameters saved in Res, you can compute posterior media and credible intervals using next code:
#Posterior means
apply(Res,2,mean)
b1 b2 b3 b4
-0.79447801 0.00168827 0.07240954 0.08650250
#Lower CI limit
apply(Res,2,quantile,prob=0.05)
b1 b2 b3 b4
-1.45918662 -0.03960765 -0.61027923 -0.42674155
#Upper CI limit
apply(Res,2,quantile,prob=0.95)
b1 b2 b3 b4
-0.13005617 0.04013478 0.72852243 0.61216838
The b parameters belong to the each of the variables considered (age and the levels of Group). Final values could change because of the mixed chains!
Related
I am performing a logistic regression in R and I attempt to plot the logit of the probability vs the probability of obtaining 1. I would like to plot all the values predicted as 1(positive) using one colour and the values predicted as 0 (negatives) with another colour. This is, to plot the values>0.5 with one colour and the values<0.5 with another colour. Any ideas of how can I do that? Here is my code:
pdgng<-data$pdgng
ec<-data$ec
logitp <- 0.497-1.699 * (log(pdgng)) +3.829 * (log(ec))
logistic<-exp(logitp)/(1+exp(logitp))
op5<-par(cex.lab=0.9,font.lab=2,cex.axis=1,bty="n")
plot(logitp,logistic,ylab="Probability",xlab="logitp"
abline(h=0.5, col="blue",lwd=1,lty=2)
I have tried to use a loop but I can't figure out how to apply it to my case.
Regards,
Antonela
Thank you for your answer... Here are the complete code and the data.
pregnancystate<-c("pregnancystate_nomales.csv")
data<-read.csv(file = "pregnancystate_nomales.csv", dec = ".",sep = ",", header =
TRUE)
cat(file = "preg.bug", "
#Likelihood:
model {
for(i in 1: 69){
pregnancy[i] ~ dbern(p[i])
logit(p[i]) <- b0+ b1 * (log(pdgng[i])) + b2 * (log(ec[i]))
}
#priors:
b0 ~ dunif(0,1)
b1 ~ dnorm(0, 0.0001)
b2 ~ dnorm(0, 0.0001)
}"
)
pdg<-data$pdgng
ec<-data$ec
obs<-69 #number of observations
inits <- function() {
list(b0=runif(0,0.05),b1 = rnorm(1,0,1), b2 = rnorm(1,0,1))
}
parameters <- c( "b0", "b1", "b2")
ni <- 100000
nt <- 1
nb <- 50000
nc <- 3
library(jagsUI)
pregn <- jags(data=data, parameters.to.save = parameters, model.file
="preg.bug" , n.chains = nc, n.thin = nt,
n.iter = ni, n.burnin = nb)
print(pregn)
###Logistic curve
pdgng<-data$pdgng
ec<-data$ec
logitp <- 0.497-1.699 * (log(pdgng)) +3.829 * (log(ec))
logistic<-exp(logitp)/(1+exp(logitp))
op5<-par(cex.lab=0.9,font.lab=2,cex.axis=1,bty="n")
plot(logitp,logistic,ylab="Probability",xlab="logitp")
abline(h=0.5, col="blue",lwd=1,lty=2)
My data:
pregnancy,pdgng,ec ,logit(p),probability
0,143997.3937,746.5102301,2.736380058,0.939139522
1,45109.3079,1418.995961,4.659342216,0.990616199
0,52683.58472,56.85769855,-0.802104453,0.309575536
1,138659.7743,852.4850646,2.984848323,0.951884912
0,52689.26541,47.78030436,-1.091276892,0.251377908
0,35554.31237,54.64659066,-0.578210329,0.359344501
1,44957.50427,881.71069,3.870957685,0.979586972
0,17346.58536,34.3035689,-0.82320263,0.305084254
1,87056.00603,959.375944,3.524232772,0.971369456
0,65611.00906,75.41128029,-0.494460715,0.378843302
0,40403.69619,50.41093373,-0.806534415,0.308629482
1,70574.46512,201.8534225,1.088218252,0.748046058
0,42819.85124,63.11200154,-0.47587338,0.383227039
0,24432.8854,55.57544032,-0.273726259,0.431992534
0,40603.1634,66.64685283,-0.346121395,0.414323288
0,42019.87914,52.39963524,-0.771133528,0.316233952
0,31035.0976,74.1115375,0.028378308,0.507094101
1,72415.54675,245.6925039,1.395895237,0.801531717
1,84035.46785,125.7413611,0.172889837,0.543115117
0,54189.70386,72.94486225,-0.408776022,0.399205644
1,27718.06513,59.68819086,-0.248043693,0.438305071
0,34963.48171,61.44647007,-0.370936316,0.408314794
0,107577.8631,100.9496181,-0.374125172,0.407544612
0,45300.54732,74.97611356,-0.23107752,0.442486313
1,87096.67182,967.8444156,3.538495187,0.97176345
0,51185.37663,139.2228995,0.707560153,0.669861819
0,55756.69828,54.10187088,-0.926462682,0.28364291
0,62001.79489,72.06355191,-0.528232186,0.370929297
1,90068.53164,1229.299153,3.911209047,0.980376504
1,49585.43419,167.4432442,1.037725477,0.738410897
1,49404.23826,89.63236934,0.001765223,0.500441306
1,61502.94127,665.0569165,3.171324365,0.959740787
1,67251.66979,233.6405586,1.366821986,0.796866212
1,92243.7537,327.5783641,1.695613274,0.844960934
0,54199.09589,49.26495746,-1.06124114,0.257072343
1,123323.012,1444.722047,3.94799264,0.981071798
0,53346.41158,41.17624229,-1.347644582,0.206255721
0,29770.49904,54.5633406,-0.449895094,0.389385709
1,109766.4443,780.8332001,3.011143597,0.953075026
1,98604.30654,172.0454198,0.576164889,0.640184471
1,312081.4201,215.5575672,0.101773998,0.52542156
0,57012.73092,55.32276656,-0.905791033,0.287861894
1,60997.32874,384.736727,2.267748818,0.906170555
1,97002.76256,214.6832766,0.956219147,0.722364177
1,153642.8724,1119.772165,3.362507665,0.966512036
0,38540.42815,53.2720665,-0.679986605,0.336264292
1,26926.33036,154.1974377,1.350760623,0.794253953
0,40106.0074,73.12316073,-0.182908088,0.454400038
1,231120.767,555.4809947,1.896414313,0.869485158
1,69794.21866,192.9932648,1.021808213,0.735324668
1,38155.6105,155.9863581,1.113037873,0.752695031
0,23854.68994,43.47440207,-0.664222643,0.339791692
1,34365.31734,204.2199999,1.637949837,0.837255778
1,81997.44506,602.4558705,2.795054752,0.942408006
0,51168.54791,54.91864886,-0.838270405,0.301899182
1,45249.61128,277.0660431,1.942187893,0.87459231
1,19571.31501,33.21986659,-0.96548945,0.275780463
1,102338.8059,986.4792596,3.451336152,0.969270963
0,36239.49787,24.65023113,-1.915433194,0.128371691
0,19273.67035,18.41201813,-1.935041598,0.126193601
0,33700.59761,31.68973854,-1.444385314,0.190867176
0,27424.33371,33.6980347,-1.190374235,0.233192011
0,30118.85279,31.41423431,-1.376085775,0.201638377
0,24570.3708,34.2774031,-1.081053394,0.253306723
0,24154.21332,36.994861,-0.94166181,0.280564787
1,92503.08903,310.94613,1.60693896,0.832985967
1,47316.63823,165.8834488,1.056687739,0.742057059
1,16917.84884,260.5582078,2.565165807,0.928585784
1,36961.88734,338.2647117,2.422986594,0.918563435
1s are pregnant females and 0s are not pregnant females. I intend to logitp vs probability and use different a colour to the dots which belong to pregnant females and another colour to those that belong to not pregnant.
I hope this clarify my question.
Regards,
Antonela
Is there way to get predict behavior with standard errors from lfe::felm if the fixed effects are swept out using the projection method in felm? This question is very similar to the question here, but none of the answers to that question can be used to estimate standard errors or confidence/prediction intervals. I know that there's currently no predict.felm, but I am wondering if there are workarounds similar to those linked above that might also work for estimating the prediction interval
library(DAAG)
library(lfe)
model1 <- lm(data = cps1, re74 ~ age + nodeg + marr)
predict(model1, newdata = data.frame(age=40, nodeg = 0, marr=1), se.fit = T, interval="prediction")$fit
# Result: fit lwr upr
# 1 18436.18 2339.335 34533.03
model2 <- felm(data = cps1, re74 ~ age | nodeg + marr)
predict(model2, newdata = data.frame(age=40, nodeg = 0, marr=1), se.fit = T, interval="prediction")$fit
# Does not work
The goal is to estimate a prediction interval for yhat, for which I think I'd need to compute the full variance-covariance matrix (including the fixed effects). I haven't been able to figure out how to do this, and I'm wondering if it's even computationally feasible.
After conversations with several people, I don't believe it is possible to obtain an estimate the distribution of yhat=Xb (where X includes both the covariates and the fixed effects) directly from felm, which is what this question boils down to. It is possible bootstrap them, however. The following code does so in parallel. There is scope for performance improvements, but this gives the general idea.
Note: here I do not compute full prediction interval, just the SEs on Xb, but obtaining the prediction interval is straightforward - just add the root of sigma^2 to the SE.
library(DAAG)
library(lfe)
library(parallel)
model1 <- lm(data = cps1, re74 ~ age + nodeg + marr)
yhat_lm <- predict(model1, newdata = data.frame(age=40, nodeg = 0, marr=1), se.fit = T)
set.seed(42)
boot_yhat <- function(b) {
print(b)
n <- nrow(cps1)
boot <- cps1[sample(1:n, n, replace=T),]
lm.model <- lm(data=demeanlist(boot[, c("re74", "age")], list(factor(boot$nodeg), factor(boot$marr))),
formula = re74 ~ age)
fe <- getfe(felm(data = boot, re74 ~ age | nodeg + marr))
bootResult <- predict(lm.model, newdata = data.frame(age = 40)) +
fe$effect[fe$fe == "nodeg" & fe$idx==0] +
fe$effect[fe$fe == "marr" & fe$idx==1]
return(bootResult)
}
B = 1000
yhats_boot <- mclapply(1:B, boot_yhat)
plot(density(rnorm(10000, mean=yhat_lm$fit, sd=yhat_lm$se.fit)))
lines(density(yhats), col="red")
From your first model predict(.) yields this:
# fit lwr upr
# 1 18436.18 2339.335 34533.03
Following 李哲源 we can achieve these results manually, too.
beta.hat.1 <- coef(model1) # save coefficients
# model matrix: age=40, nodeg = 0, marr=1:
X.1 <- cbind(1, matrix(c(40, 0, 1), ncol=3))
pred.1 <- as.numeric(X.1 %*% beta.hat.1) # prediction
V.1 <- vcov(model1) # save var-cov matrix
se2.1 <- unname(rowSums((X.1 %*% V.1) * X.1)) # prediction var
alpha.1 <- qt((1-0.95)/2, df = model1$df.residual) # 5 % level
pred.1 + c(alpha.1, -alpha.1) * sqrt(se2.1) # 95%-CI
# [1] 18258.18 18614.18
sigma2.1 <- sum(model1$residuals ^ 2) / model1$df.residual # sigma.sq
PI.1 <- pred.1 + c(alpha.1, -alpha.1) * sqrt(se2.1 + sigma2.1) # prediction interval
matrix(c(pred.1, PI.1), nrow = 1, dimnames = list(1, c("fit", "lwr", "upr")))
# fit lwr upr
# 1 18436.18 2339.335 34533.03
Now, your linked example applied to multiple FE, we get this results:
lm.model <- lm(data=demeanlist(cps1[, c(8, 2)],
list(as.factor(cps1$nodeg),
as.factor(cps1$marr))), re74 ~ age)
fe <- getfe(model2)
predict(lm.model, newdata = data.frame(age = 40)) + fe$effect[fe$idx=="1"]
# [1] 15091.75 10115.21
The first value is with and the second without added FE (try fe$effect[fe$idx=="1"]).
Now we're following the manual approach above.
beta.hat <- coef(model2) # coefficient
x <- 40 # age = 40
pred <- as.numeric(x %*% beta.hat) # prediction
V <- model2$vcv # var/cov
se2 <- unname(rowSums((x %*% V) * x)) # prediction var
alpha <- qt((1-0.95)/2, df = model2$df.residual) # 5% level
pred + c(alpha, -alpha) * sqrt(se2) # CI
# [1] 9599.733 10630.697
sigma2 <- sum(model2$residuals ^ 2) / model2$df.residual # sigma^2
PI <- pred + c(alpha, -alpha) * sqrt(se2 + sigma2) # PI
matrix(c(pred, PI), nrow = 1, dimnames = list(1, c("fit", "lwr", "upr"))) # output
# fit lwr upr
# 1 10115.21 -5988.898 26219.33
As we see, the fit is the same as the linked example approach, but now with prediction interval. (Disclaimer: The logic of the approach should be straightforward, the values of the PI should still be evaluated, e.g. in Stata with reghdfe.)
Edit: In case you want to achieve exactly the same output from felm() which predict.lm() yields with the linear model1, you simply need to "include" again the fixed effects in your model (see model3 below). Just follow the same approach then. For more convenience you easily could wrap it into a function.
library(DAAG)
library(lfe)
model3 <- felm(data = cps1, re74 ~ age + nodeg + marr)
pv <- c(40, 0, 1) # prediction x-values
predict0.felm <- function(mod, pv.=pv) {
beta.hat <- coef(mod) # coefficient
x <- cbind(1, matrix(pv., ncol=3)) # prediction vector
pred <- as.numeric(x %*% beta.hat) # prediction
V <- mod[['vcv'] ] # var/cov
se2 <- unname(rowSums((x %*% V) * x)) # prediction var
alpha <- qt((1-0.95)/2, df = mod[['df.residual']]) # 5% level
CI <- structure(pred + c(alpha, -alpha) * sqrt(se2),
names=c("CI lwr", "CI upr")) # CI
sigma2 <- sum(mod[['residuals']] ^ 2) / mod[['df.residual']] # sigma^2
PI <- pred + c(alpha, -alpha) * sqrt(se2 + sigma2) # PI
mx <- matrix(c(pred, PI), nrow = 1,
dimnames = list(1, c("PI fit", "PI lwr", "PI upr"))) # output
list(CI, mx)
}
predict0.felm(model3)[[2]]
# PI fit PI lwr PI upr
# 1 18436.18 2339.335 34533.03
By this with felm() you can achieve the same prediction interval as with predict.lm().
I am trying to fit a logistic regression model in JAGS, but I have data in the form of (# success y, # attempts n), rather than a binary variable. In R, one can fit a model to data such as these by using glm(y/n ~ ) with the "weights" argument, but I am not sure how to fit this in JAGS.
Here is a simple example that I hope addresses what I am trying to ask. Note that I am using the rjags package. Thanks for any help!
y <- rbinom(10, 500, 0.2)
n <- sample(500:600, 10)
p <- y/n
x <- sample(0:100, 10) # some covariate
data <- data.frame(y, n, p, x)
model <- "model{
# Specify likelihood
for(i in 1:10){
y[i] ~ dbin(p[i], n[i])
logit(p[i]) <- b0 + b1*x
}
# Specify priors
b0 ~ dnorm(0, 0.0001)
b1 ~ dnorm(0, 0.0001)
}"
You don't need to compute p in your data set at all. Just let it be a logical node in your model. I prefer the R2jags interface, which allows you to specify a BUGS model in the form of an R function ...
jagsdata <- data.frame(y=rbinom(10, 500, 0.2),
n=sample(500:600, 10),
x=sample(0:100, 10))
model <- function() {
## Specify likelihood
for(i in 1:10){
y[i] ~ dbin(p[i], n[i])
logit(p[i]) <- b0 + b1*x[i]
}
## Specify priors
b0 ~ dnorm(0, 0.0001)
b1 ~ dnorm(0, 0.0001)
}
Now run it:
library("R2jags")
jags(model.file=model,data=jagsdata,
parameters.to.save=c("b0","b1"))
I'm working on a binomial mixture model using OpenBUGS and R package R2OpenBUGS. I've successfully built simpler models, but once I add another level for imperfect detection, I consistently receive the error variable X is not defined in model or in data set. I've tried a number of different things, including changing the structure of my data and entering my data directly into OpenBUGS. I'm posting this in the hope that someone else has experience with this error, and perhaps knows why OpenBUGS is not recognizing variable X even though it is clearly defined as far as I can tell.
I've also gotten the error expected the collection operator c error pos 8 - this is not an error I've been getting previously, but I am similarly stumped.
Both the model and the data-simulation function come from Kery's Introduction to WinBUGS for Ecologists (2010). I will note that the data set here is in lieu of my own data, which is similar.
I am including the function to build the dataset as well as the model. Apologies for the length.
# Simulate data: 200 sites, 3 sampling rounds, 3 factors of the level 'trt',
# and continuous covariate 'X'
data.fn <- function(nsite = 180, nrep = 3, xmin = -1, xmax = 1, alpha.vec = c(0.01,0.2,0.4,1.1,0.01,0.2), beta0 = 1, beta1 = -1, ntrt = 3){
y <- array(dim = c(nsite, nrep)) # Array for counts
X <- sort(runif(n = nsite, min = xmin, max = xmax)) # covariate values, sorted
# Relationship expected abundance - covariate
x2 <- rep(1:ntrt, rep(60, ntrt)) # Indicator for population
trt <- factor(x2, labels = c("CT", "CM", "CC"))
Xmat <- model.matrix(~ trt*X)
lin.pred <- Xmat[,] %*% alpha.vec # Value of lin.predictor
lam <- exp(lin.pred)
# Add Poisson noise: draw N from Poisson(lambda)
N <- rpois(n = nsite, lambda = lam)
table(N) # Distribution of abundances across sites
sum(N > 0) / nsite # Empirical occupancy
totalN <- sum(N) ; totalN
# Observation process
# Relationship detection prob - covariate
p <- plogis(beta0 + beta1 * X)
# Make a 'census' (i.e., go out and count things)
for (i in 1:nrep){
y[,i] <- rbinom(n = nsite, size = N, prob = p)
}
# Return stuff
return(list(nsite = nsite, nrep = nrep, ntrt = ntrt, X = X, alpha.vec = alpha.vec, beta0 = beta0, beta1 = beta1, lam = lam, N = N, totalN = totalN, p = p, y = y, trt = trt))
}
data <- data.fn()
And here is the model:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(C = y, trt = as.numeric(trt), X = s.X)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Note: This answer has gone through a major revision, after I noticed another problem with the code.
If I understand your model correctly, you are mixing up the y and N from the simulated data, and what is passed as C to Bugs. You are passing the y variable (a matrix) to the C variable in the Bugs model, but this is accessed as a vector. From what I can see C is representing the number of "trials" in your binomial draw (actual abundances), i.e. N in your data set. The variable y (a matrix) is called the same thing in both the simulated data and in the Bugs model.
This is a reformulation of your model, as I understand it, and this runs ok:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
N<- data$N
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(y = y, trt = as.numeric(trt), X = s.X, C= N)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Overall, the results from this model looks ok, but there are long autocorrelation lags for beta0 and beta1. The estimate of beta1 also seems a bit off(~= -0.4), so you might want to recheck the Bugs model specification, so that it is matching the simulation model (i.e. that you are fitting the correct statistical model). At the moment, I'm not sure that it does, but I don't have the time to check further right now.
I got the same message trying to pass a factor to OpenBUGS. Like so,
Ndata <- list(yrs=N$yrs, site=N$site), ... )
The variable "site" was not passed by the "bugs" function. It simply was not in list passed
to OpenBUGS
I solved the problem by passing site as numeric,
Ndata <- list(yrs=N$yrs, site=as.numeric(N$site)), ... )
I am conducting a Bayesian analysis using Winbugs from R. I need to combine two Winbugs scripts into one: however, I am receiving an error message (Variable x2 is not defined in model or in data set). Here is the winbugs code:
model{
# Model’s likelihood
for (i in 1:n) {
tto[i] ~ dnorm( mu[i], tau ) # stochastic componenent
b[i] ~ dnorm(0.0, tau2)
# link and linear predictor
mu[i] <- 1 - (beta.concern2*concern2[i] + beta.concern3*concern3[i] + b[i])
}
for (i in 1:1002) {
# Linear regression on logit
logit(p[i]) <- beta.concern2*x2[i,1] + beta.concern2*x2[i,2]
# Likelihood function for each data point
y2[i] ~ dbern(p[i])
}
s2<-1/tau
s <-sqrt(s2)
a2<-1/tau2
a <-sqrt(a2)
}
where x2 is a 1002*2 matrix and y is a vector
This is the R code definining the data:
combined.data <- list(n=n,tto=tto,concern2=concern2,
concern3=concern3,y2=y2, x2=x2)
Anyone know what is wrong?
I'm going to be making quite a few assumptions here...
Perhaps you could add a diagram illustrating the relationships between the variables, and which are deterministic vs stochastic. I find this helpful when making models in BUGS. Also, it would be helpful to have the dimensions of all your data, the meaning of n and perhaps some context or detail on what you're modelling and the nodes in which you're interested.
I'm guessing that y is a binary (0,1) vector of length 1002, and has corresponding values for x2[,1] and x2[,2] (herein x1, x2) and concern2, concern3 (herein c2, c3) and tto i.e.
nrow(x2) == 1002
Here's some sample data with of nrow==10 to work with:
y <- sample(x=c(0,1), size=10, replace=TRUE, prob=c(0.5,0.5))
x2 <- matrix(rnorm(20), nrow=10, ncol=2)
c2 <- rnorm(10)
c3 <- rnorm(10)
tto <- rnorm(10)
It appears that you're trying to determine the values of beta.concern2 (herein b2) for both values of x2 in the logit. Not sure why you'd want to fit it with the same parameter for two different predictors. In case this is a typo I'm giving b2 and b3 as parameters instead. I hope you'll be able to adapt this to your needs.
The product of these values of b2, b3 (stochastic) and c2, c3 (given) are used to generate a variable mu, which also has an error term. (I'm presuming b[i] (herein b1[i]) is a normally distributed error term.)
Then tto is a normally distributed variable which depends on the value of mu, and itself has an error term. I have set the precision of the error terms as being equal in both cases.
So for such a model:
require(rjags)
### The data
dataList <- list(
x1 = x2[,1],
x2 = x2[,2],
y = y,
c2 = c2,
c3 = c3,
tto = tto,
nRowX = nrow(x2)
)
### make sure logistic model can be fitted
f1 <- stats::glm(dataList$y ~ dataList$x1 + dataList$x2 -1, family=binomial(logit))
show(f1)
### set some approximate initial values
b1Init <- 0.1 # arbitrary
b2Init <- f1$coef[2]
b3Init <- f1$coef[3]
initsList <- list(
b1 = b1Init,
b2 = b2Init,
b3 = b3Init)
### Model: varying parameters (b2, b3) per observation; 2x error terms
modelstring <- "
model {
for(i in 1:nRowX){
tto[i] ~ dnorm(mu[i], prec)
mu[i] <- 1 - (b1 + b2*c2[i] + b3*c3[i])
y[i] ~ dbern(L[i]) # L for logit
L[i] <- 1/(1+exp(- ( b2*x1[i] + b3*x2[i]) ))
}
b1 ~ dnorm(0, prec) # precision
prec <- 1/sqrt(SD) # convert to Std Deviation
SD <- 0.5
b2 ~ dnorm(0, 1.4) # arbitrary
b3 ~ dnorm(0, 1.4)
}
"
writeLines(modelstring,con="model.txt")
parameters <- c("b1","b2","b3") # to monitor
adaptSteps <- 1e4 # "tune in" samplers
burnInSteps <- 2e4 # "burn in" samplers
nChains <- 3
numSavedSteps <-2e3
thinSteps <- 1 # Steps to "thin" (1=keep every step).
nPerChain <- ceiling(( numSavedSteps * thinSteps ) / nChains) # Steps per chain
rm(jagsModel) # in case already present
jagsModel <- rjags::jags.model(
"model.txt", data=dataList,
inits=initsList, n.chains=nChains,
n.adapt=adaptSteps)
stats::update(jagsModel, n.iter=burnInSteps)
### MCMC chain
MCMC1 <- as.matrix(rjags::coda.samples(
jagsModel, variable.names=parameters,
n.iter=nPerChain, thin=thinSteps))
### Extract chain values
b2Sample <- as.vector(MCMC1[,grep("b2",colnames(MCMC1))])