all,
I'm trying to use JAGS model in R. The R package is "R2jags".
I'm confused with the error like beblow.
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, :
RUNTIME ERROR:
Non-conforming parameters in function inprod
The data processing part,
library(R2jags)
data <- read.table("H:/mlp2020/data109.csv",sep = ",",check.names = F,stringsAsFactors = F,header = T)
rownames(data) <- data[,1]
data <- data[,-1]
z <- as.vector(data$group)
z <- z+1
Y_input <- data[,c(1:1356)]
X_input <- data[,c(1358:1368)]
N <- 1702 #nrow(Y_input)
P <- 1356 #ncol(Y_input)
R <- 11 #ncol(X_input)
Y <- Y_input
X <- X_input
jags.data <- list("X","Y","z","N","P","R")
jags.params <- c("phi","delta")
jags.inits <- function(){
list("phi"=rbinom(1,1,0.1),"delta"=rbinom(1,1,0.1))
}
my model code is like,
model <- function(){
for (j in 1:P){
for (i in 1:N){
k[i,j] <- 1+phi[j]*z[i] #k=1 phi[j]=0,k=2 phi[j]=1 z[i]=1,k=3 phi[j]=1 z[i]=2
}
phi[j] ~ dbern(w)
}
for (j in 1:P){
for (i in 1:N){
Y[i,j] ~ dnorm(mu[i,j], tau[j])
mu[i,j] = e_1[j] + e_2[j]*equals(k[i,j],2) + inprod(X[i,],beta)
}
for (r in 1:R){
beta[r,j]<-delta[r,j]*beta_0[j]
#spike and slab pior for beta
delta[r,j] ~ dbern(t)
}
#spike and slab pior for beta
beta_0[j] ~ dnorm(0,tau_beta[j])
tau_beta[j] ~ dgamma(2, 15) # input
e_1[j]~dnorm(0,tau_1[j]) #tau_mu[j]=1
tau_1[j] ~ dgamma(2, 15) # input
e_2[j]~dnorm(0,tau_2[j]) #tau_mu[j]=1
tau_2[j] ~ dgamma(2, 15) # input
sigma[j] <- 1.0/sqrt(tau[j])
# input
tau[j] ~ dgamma(10, 1)
}
# t ~ dbeta(a_t,b_t) # input
# w ~ dbeta(a_w,b_w) # input
w ~ dbeta(1,0.1)
t ~ dbeta(0.4,1.6)
}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=10,
model.file=model)
Any help would be greatly appreciated!
inprod is the dot product. Its two arguments must be vectors of the same lengths, but the second argument in your code is a matrix.
I have not checked your model, but perhaps you want inprod(X[i,],beta[,j]).
Related
I'm writing a loop that fits models across several datasets and outcome variables. I'm saving the results in a matrix. in the current version, I accounted for potential errors such as missing covariates. The more data I include the more errors I get and I need to account for in the loop. I would like to modify the code below so it record "NA" when the model stops due to an error regardless of the error type.
I would appreciate any thoughts.
datasets <- list('data_01','data_02','data_03','data_04')
outcomes <- list('var_01','var_02','var_03','var_04','var_05')
results <- vector("list", length(datasets))
for (i in 1:length(datasets)) {
results [[i]] <- matrix(NA, nrow=length(outcomes), ncol=2)
}
for (j in seq_along(outcomes)) {
for (i in seq_along(surveys)) {
if ("TRUE" %in% (!(outcomes[[j]] %in% names(datasets[[i]]))))
{
results[[i]][j, 1] <- outcomes[[j]]
results[[i]][j, 2] <- "NA"
}
else
{
results[[i]][j, 1] <- outcomes[[j]]
fit <- glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap"))
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
}
}
}
Without the data I can't test, but this should work:
atasets <- list('data_01','data_02','data_03','data_04')
outcomes <- list('var_01','var_02','var_03','var_04','var_05')
results <- vector("list", length(datasets))
for (i in 1:length(datasets)) {
results [[i]] <- matrix(NA, nrow=length(outcomes), ncol=2)
}
for (j in seq_along(outcomes)) {
for (i in seq_along(surveys)) {
if (any(!(outcomes[[j]] %in% names(datasets[[i]]))))
{
results[[i]][j, 1] <- outcomes[[j]]
results[[i]][j, 2] <- NA
}
else
{
results[[i]][j, 1] <- outcomes[[j]]
form <- reformulate(c("RS_AGE", "RS_MARITAL", "(1|FW_ID)", "(1|RS_CLID)"),
response = outcomes[[j]])
fit <- try(glmer(form, data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap")))
if(!inherits(fit, "try-error")){
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
}else{
results[[i]][j,2] <- NA
}
}
}
}
Try replacing
fit <- glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap"))
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
by
fit <- tryCatch(glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap")),error = function(e) e)
if(!inherits(fit,"error")){
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
}else{
ICC <- NA
}
library(GLMsData)
data(fluoro)
lambda <- seq(-2,2,0.5)
lm.out <- list()
for(i in length(lambda)){
if(i != 0){
y <- (fluoro$Dose^lambda-1)/lambda
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y[i]~Time, data = fluoro, na.exclude = T)
}
print(lm.out)
Error in model.frame.default(formula = y[i] ~ Time, data = fluoro, drop.unused.levels = TRUE) : variable lengths differ (found for 'Time')
I am trying to use various transformations of the response variable and fit these corresponding models, and obtain residual plots for each model.
I need a help. Thanks
Here is a corrected version of the for loop in the question.
data(fluoro, package = "GLMsData")
lambda <- seq(-2, 2, 0.5)
lm.out <- list()
for(i in 1:length(lambda)){
if(lambda[i] != 0){
y <- (fluoro$Dose^lambda[i]-1)/lambda[i]
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y ~ Time, data = fluoro, na.action = na.exclude)
}
print(lm.out)
And a version with a boxcox function defined and used in a lapply loop.
boxcox <- function(x, lambda, na.rm = FALSE){
if(na.rm) x <- x[!is.na(x)]
if(lambda == 0){
log(x)
} else {
(x^lambda - 1)/lambda
}
}
lm_out2 <- lapply(lambda, \(l){
lm(boxcox(Dose, lambda = l) ~ Time, data = fluoro, na.action = na.exclude)
})
Check that both ways above produce the same results.
coef_list <- sapply(lm.out, coef)
coef_list2 <- sapply(lm_out2, coef)
identical(coef_list, coef_list2)
#[1] TRUE
smry_list <- lapply(lm.out, summary)
smry_list2 <- lapply(lm_out2, summary)
pval_list <- sapply(smry_list, \(fit) fit$coefficients[, "Pr(>|t|)"])
pval_list2 <- sapply(smry_list2, \(fit) fit$coefficients[, "Pr(>|t|)"])
identical(pval_list, pval_list2)
#[1] TRUE
R2_list <- sapply(smry_list, "[[", "r.squared")
R2_list2 <- sapply(smry_list2, "[[", "r.squared")
identical(R2_list, R2_list2)
#[1] TRUE
I am running 503 separate regressions, each with a separate dependent variables, with 3 independent variables and 1 AR(1) term.
Data:
# fake data
set.seed(333)
df <- data.frame(seq(as.Date("2017/1/1"), as.Date("2017/2/19"), "days"),
matrix(runif(50*506), nrow = 50, ncol = 506))
names(df) <- c("Date", paste0("var", 1:503), c("mktrf", "smb", "hml"))
I create the AR(1) process as follows, using a function called lagpad:
lagpad <- function(x, k=1) {
i<-is.vector(x)
if(is.vector(x)) x<-matrix(x) else x<-matrix(x,nrow(x))
if(k>0) {
x <- rbind(matrix(rep(NA, k*ncol(x)),ncol=ncol(x)), matrix(x[1:(nrow(x)-k),], ncol=ncol(x)))
}
else {
x <- rbind(matrix(x[(-k+1):(nrow(x)),], ncol=ncol(x)),matrix(rep(NA, -k*ncol(x)),ncol=ncol(x)))
}
if(i) x[1:length(x)] else x
}
Then I store my necessary variables for regression:
All the dep var
x = df[,505:507]
All the indep var
y <- df[,2:504]
AR(1) process
y_lag <- lagpad(y, -1)
Fit all the models
list_models_AR= lapply(y, function(y)
with(x, lm(y ~ mktrf + smb + hml + y_lag, na.action = na.exclude)))
I'm having trouble figuring out how to use lapply in this case, since there are multiple components of y_lag that need to be called, one for each regression.
I am not sure of your lag term, it seems to be actually the next y-column. If so you can create 502 models as follows:
list_models_AR= lapply(1:(ncol(y)-1), function(i) lm(y[,i]~x[,1] + x[,2] + x[,3] +y[,i+1], na.action=na.exclude))
I'm relatively new to JAGS and am running it through the R package jagsUI. I am building occupancy models, but want to summarize results as I go. So I have a matrix of 0s and 1s:
mat1 <- matrix(rbinom(10*10,1,.5),10,10)
y=mat1
That I want to run through the following model:
# Bundle data and summarize data bundle
str( win.data <- list(y = mat1, M = nrow(mat1), T = ncol(mat1)) )
# Specify model in BUGS language
sink("model.txt")
cat("
model {
# Priors
psi0 ~ dunif(0, 1)
p ~ dunif(0, 1)
for(t in 1:(T-1)){
rho[t] ~ dunif(-1,1)
}
beta0 ~ dnorm(0, 0.1)
# Likelihood
for (i in 1:M) { # Loop over sites
z[i,1] ~ dbern(psi0) # State model
y[i,1] ~ dbern(z[i,1]*p)
for (j in 2:T) { # Loop over replicate surveys
logit(psi[i,j])<- beta0 + rho[j-1]*z[i,j-1]
z[i,j] ~ dbern(psi[i,j])
y[i,j] ~ dbern(z[i,j]*p) # Observation model
}
}
# Derived quantities
coln[i,j] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j] <- ifelse(z[i,j-1]-z[i,j]==1,1,0) # went extinct
tot.coln[,j] <- sum(coln[,j]) # sum of colonized each survey
tot.ext[,j] <- sum(ext[,j]) # sum of extinctions each survey
Nocc[,j] <- sum(z[,j]) # total sites occupied each survey
coln.rate[,j] <- tot.coln[,j]/Nocc[,j]
ext.rate[,j] <- tot.ext[,j]/Nocc[,j]
}
",fill = TRUE)
sink()
# Initial values
zst <- apply(y, 1, max, na.rm=TRUE) # Avoid data/model/inits conflict
y<- as.matrix(y)
zst<- y
inits <- function(){list(z = zst)}
# Parameters monitored
params <- c("psi0", "p", "beta0", "coln.rate", "ext.rate")
# MCMC settings
ni <- 2000 ; nt <- 1 ; nb <- 1000 ; nc <- 3
# Call JAGS and summarize posteriors
library(jagsUI)
fm <- jags(win.data, inits, params, "model.txt", n.chains = nc,
n.thin = nt, n.iter = ni, n.burnin = nb)
print(fm, dig = 3)
The model runs except for the piece after "# Derived quantities". Basically I want to calculate the rate of change from 0 to 1 and from 1 to 0 in each survey. A couple of my thoughts on why it doesn't work. 1) z[i,j] isn't really 0s and 1s. 2) the calculations shouldn't go under Derived quantities. 3) ifelse from the JAGS manual isn't doing what I think.
I also tried using the "step" function replacing the first two lines after Derived quantities with:
coln[i,j] <- step(z[i,j]-z[i,j-1]-0.5) # colonized
ext[i,j] <- step(z[i,j-1]-z[i,j]-0.5) # went extinct
But no luck there. Any ideas?
You are indexing i and j here without looping through them. To make this work you would need to set it up within another nested for loop. Also, your extinction calculation was incorrect.
for(j in 2:T){
for(i in 1:M){
coln[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==-1,1,0) # went extinct
}
tot.coln[j-1] <- sum(coln[,j-1]) # sum of colonized each survey
tot.ext[j-1] <- sum(ext[,j-1]) # sum of extinctions each survey
Nocc[j-1] <- sum(z[,j-1]) # total sites occupied each survey
coln.rate[j-1] <- tot.coln[j-1]/Nocc[j-1]
ext.rate[j-1] <- tot.ext[j-1]/Nocc[j-1]
}
I am just really getting into trying to write MLE commands in R that function and look similar to native R functions. In this attempt I am trying to do a simple MLE with
y=b0 + x*b1 + u
and
u~N(0,sd=s0 + z*s1)
However, even such a simple command I am having difficulty coding. I have written a similar command in Stata in a handful of lines
Here is the code I have written so far in R.
normalreg <- function (beta, sigma=NULL, data, beta0=NULL, sigma0=NULL,
con1 = T, con2 = T) {
# If a formula for sigma is not specified
# assume it is the same as the formula for the beta.
if (is.null(sigma)) sigma=beta
# Grab the call expression
mf <- match.call(expand.dots = FALSE)
# Find the position of each argument
m <- match(c("beta", "sigma", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
# Adjust names of mf
mf <- mf[c(1L, m)]
# Since I have two formulas I will call them both formula
names(mf)[2:3] <- "formula"
# Drop unused levels
mf$drop.unused.levels <- TRUE
# Divide mf into data1 and data2
data1 <- data2 <- mf
data1 <- mf[-3]
data2 <- mf[-2]
# Name the first elements model.frame which will be
data1[[1L]] <- data2[[1L]] <- as.name("model.frame")
data1 <- as.matrix(eval(data1, parent.frame()))
data2 <- as.matrix(eval(data2, parent.frame()))
y <- data1[,1]
data1 <- data1[,-1]
if (con1) data1 <- cbind(data1,1)
data2 <- unlist(data2[,-1])
if (con2) data2 <- cbind(data2,1)
data1 <- as.matrix(data1) # Ensure our data is read as matrix
data2 <- as.matrix(data2) # Ensure our data is read as matrix
if (!is.null(beta0)) if (length(beta0)!=ncol(data1))
stop("Length of beta0 need equal the number of ind. data2iables in the first equation")
if (!is.null(sigma0)) if (length(sigma0)!=ncol(data2))
stop("Length of beta0 need equal the number of ind. data2iables in the second equation")
# Set initial parameter estimates
if (is.null(beta0)) beta0 <- rep(1, ncol(data1))
if (is.null(sigma0)) sigma0 <- rep(1, ncol(data2))
# Define the maximization function
normMLE <- function(est=c(beta0,sigma0), data1=data1, data2=data2, y=y) {
data1est <- as.matrix(est[1:ncol(data1)], nrow=ncol(data1))
data2est <- as.matrix(est[(ncol(data1)+1):(ncol(data1)+ncol(data2))],
nrow=ncol(data1))
ps <-pnorm(y-data1%*%data1est,
sd=data2%*%data2est)
# Estimate a vector of log likelihoods based on coefficient estimates
llk <- log(ps)
-sum(llk)
}
results <- optim(c(beta0,sigma0), normMLE, hessian=T,
data1=data1, data2=data2, y=y)
results
}
x <-rnorm(10000)
z<-x^2
y <-x*2 + rnorm(10000, sd=2+z*2) + 10
normalreg(y~x, y~z)
At this point the biggest issue is finding an optimization routine that does not fail when the some of the values return NA when the standard deviation goes negative. Any suggestions? Sorry for the huge amount of code.
Francis
I include a check to see if any of the standard deviations are less than or equal to 0 and return a likelihood of 0 if that is the case. Seems to work for me. You can figure out the details of wrapping it into your function.
#y=b0 + x*b1 + u
#u~N(0,sd=s0 + z*s1)
ll <- function(par, x, z, y){
b0 <- par[1]
b1 <- par[2]
s0 <- par[3]
s1 <- par[4]
sds <- s0 + z*s1
if(any(sds <= 0)){
return(log(0))
}
preds <- b0 + x*b1
sum(dnorm(y, preds, sds, log = TRUE))
}
n <- 100
b0 <- 10
b1 <- 2
s0 <- 2
s1 <- 2
x <- rnorm(n)
z <- x^2
y <- b0 + b1*x + rnorm(n, sd = s0 + s1*z)
optim(c(1,1,1,1), ll, x=x, z=z,y=y, control = list(fnscale = -1))
With that said it probably wouldn't be a bad idea to parameterize the standard deviation in such a way that it is impossible to go negative...