Related
I'm trying to run the following function mentioned below using OptimParallel in R on a certain data set. The code is as follows:
install.packages("optimParallel")
install.packages('parallel')
library(parallel)
library(optimParallel)
library(doParallel)
library(data.table)
library(Rlab)
library(HDInterval)
library(mvtnorm)
library(matrixStats)
library(dplyr)
library(cold)
## Bolus data:
data("bolus")
d1 <- bolus
d1$group <- ifelse(d1$group == "2mg",1,0)
colnames(d1) <- c("index",'group',"time","y")
d2 <- d1 %>% select(index, y, group, time)
colnames(d2) <- c('index','y','x1','x2') ### Final data
## Modification of the objective function:
## Another approach:
dpd_poi <- function(x,fixed = c(rep(FALSE,5))){
params <- fixed
dpd_1 <- function(p){
params[!fixed] <- p
alpha <- params[1]
beta_0 <- params[2]
beta_1 <- params[3]
beta_2 <- params[4]
rho <- params[5]
add_pi <- function(d){
k <- beta_0+(d[3]*beta_1)+(d[4]*beta_2)
k1 <- exp(k) ## for Poisson regression
d <- cbind(d,k1)
}
dat_split <- split(x , f = x$index)
result <- lapply(dat_split, add_pi)
result <- rbindlist(result)
result <- as.data.frame(result)
colnames(result) <- c('index','y','x1','x2','lamb')
result_split <- split(result, f = result$index)
expression <- function(d){
bin <- as.data.frame(combn(d$y , 2))
pr <- as.data.frame(combn(d$lamb , 2))
## Evaluation of the probabilities:
f_jk <- function(u,v){
dummy_func <- function(x,y){
ppois(x, lambda = y)
}
dummy_func_1 <- function(x,y){
ppois(x-1, lambda = y)
}
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1 <- inverseCDF(as.matrix(k), pnorm)
inv2 <- inverseCDF(as.matrix(k_1), pnorm)
mean <- rep(0,2)
lower <- inv2
upper <- inv1
corr <- diag(2)
corr[lower.tri(corr)] <- rho
corr[upper.tri(corr)] <- rho
prob <- pmvnorm(lower = lower, upper = upper, mean = mean, corr = corr)
prob <- (1+(1/alpha))*(prob^alpha)
## First expression: (changes for Poisson regression)
lam <- as.vector(t(v))
v1 <- rpois(1000, lambda = lam[1])
v2 <- rpois(1000, lambda = lam[2])
all_possib <- as.data.frame(rbind(v1,v2))
new_func <- function(u){
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1_1 <- inverseCDF(as.matrix(k), pnorm)
inv2_1 <- inverseCDF(as.matrix(k_1), pnorm)
mean1 <- rep(0,2)
lower1 <- inv2_1
upper1 <- inv1_1
corr1 <- diag(2)
corr1[lower.tri(corr1)] <- rho
corr1[upper.tri(corr1)] <- rho
prob1 <- pmvnorm(lower = lower1, upper = upper1, mean = mean1, corr = corr1)
prob1 <- prob1^(alpha)
}
val <- apply(all_possib, 2, new_func)
val_s <- mean(val) ## approximation
return(val_s - prob)
}
final_res <- mapply(f_jk, bin, pr)
final_value <- sum(final_res)
}
u <- sapply(result_split,expression)
return(sum(u))
}
}
## run the objective function:
cl <- makeCluster(25)
setDefaultCluster(cl=cl)
clusterExport(cl,c('d2','val'))
clusterEvalQ(cl,c(library(data.table), library(Rlab),library(HDInterval),library(mvtnorm),library(matrixStats),library(dplyr),library(cold)))
val <- dpd_poi(d2, c(0.5,FALSE,FALSE,FALSE,FALSE))
optimParallel(par = c(beta_0 =1, beta_1 =0.1 ,beta_2 = 1,rho=0.2),fn = val ,method = "L-BFGS-B",lower = c(-10,-10,-10,0),upper = c(Inf,Inf,Inf,1))
stopCluster(cl)
After running for some time, it returns the following error:
checkForRemoteErrors(val)
9 nodes produced errors; first error: missing value where TRUE/FALSE needed
However, when I make a minor change in the objective function (pick 2 random numbers from rpois instead of 1000) and run the same code using optim, it converges and gives me a proper result. This is a Monte Carlo simulation and it does not make sense to draw so few Poisson variables. I have to use optimParllel, otherwise, it takes way too long to converge. I could also run this code using simulated data.
I'm unable to figure out where the issue truly lies. I truly appreciate any help in this regard.
all,
I'm trying to use JAGS model in R. The R package is "R2jags".
I'm confused with the error like beblow.
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, :
RUNTIME ERROR:
Non-conforming parameters in function inprod
The data processing part,
library(R2jags)
data <- read.table("H:/mlp2020/data109.csv",sep = ",",check.names = F,stringsAsFactors = F,header = T)
rownames(data) <- data[,1]
data <- data[,-1]
z <- as.vector(data$group)
z <- z+1
Y_input <- data[,c(1:1356)]
X_input <- data[,c(1358:1368)]
N <- 1702 #nrow(Y_input)
P <- 1356 #ncol(Y_input)
R <- 11 #ncol(X_input)
Y <- Y_input
X <- X_input
jags.data <- list("X","Y","z","N","P","R")
jags.params <- c("phi","delta")
jags.inits <- function(){
list("phi"=rbinom(1,1,0.1),"delta"=rbinom(1,1,0.1))
}
my model code is like,
model <- function(){
for (j in 1:P){
for (i in 1:N){
k[i,j] <- 1+phi[j]*z[i] #k=1 phi[j]=0,k=2 phi[j]=1 z[i]=1,k=3 phi[j]=1 z[i]=2
}
phi[j] ~ dbern(w)
}
for (j in 1:P){
for (i in 1:N){
Y[i,j] ~ dnorm(mu[i,j], tau[j])
mu[i,j] = e_1[j] + e_2[j]*equals(k[i,j],2) + inprod(X[i,],beta)
}
for (r in 1:R){
beta[r,j]<-delta[r,j]*beta_0[j]
#spike and slab pior for beta
delta[r,j] ~ dbern(t)
}
#spike and slab pior for beta
beta_0[j] ~ dnorm(0,tau_beta[j])
tau_beta[j] ~ dgamma(2, 15) # input
e_1[j]~dnorm(0,tau_1[j]) #tau_mu[j]=1
tau_1[j] ~ dgamma(2, 15) # input
e_2[j]~dnorm(0,tau_2[j]) #tau_mu[j]=1
tau_2[j] ~ dgamma(2, 15) # input
sigma[j] <- 1.0/sqrt(tau[j])
# input
tau[j] ~ dgamma(10, 1)
}
# t ~ dbeta(a_t,b_t) # input
# w ~ dbeta(a_w,b_w) # input
w ~ dbeta(1,0.1)
t ~ dbeta(0.4,1.6)
}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=10,
model.file=model)
Any help would be greatly appreciated!
inprod is the dot product. Its two arguments must be vectors of the same lengths, but the second argument in your code is a matrix.
I have not checked your model, but perhaps you want inprod(X[i,],beta[,j]).
I am trying to take bootstrap samples and conduct stepwise regression in R. I am trying to look at the distribution of the regression coefficients, but the "bhat" matrix I am working on is mostly printing out NAs with the exception of the first row (which is the same number across all columns). How can I fix this?
B <- 100 #number of bootstrap samples
n <- nrow(dat)
d <- ncol(dat) - 1
bhat <- matrix(NA, nrow = B, ncol = ncol(dat) - 1)
for(b in 1:B) {
s <- sample(1:n, n, replace=TRUE)
samp <- as.matrix(dat[s, c(1:15)])
samp <- as.data.frame(samp)
#stepwise regression
null <- lm(dat$Y ~ 1, data=samp)
full <- lm(dat$Y ~ ., data=samp)
fit <- step(null, scope=formula(full), direction="forward", k=log(n), trace=0)
bhat[b,] <- coef(fit)
}
Every time you do a step the number of coefficients are different, it's not the same dimension as your matrix, and you cannot "slot" it in, you can do:
dat = data.frame(Y=rnorm(100),matrix(rnorm(100*15),ncol=15))
colnames(dat)[-1] = paste0("Var",1:15)
B <- 100 #number of bootstrap samples
n <- nrow(dat)
d <- ncol(dat) - 1
full_mdl = colnames(model.matrix(Y~.,data=dat))
bhat <- matrix(NA, nrow = B, ncol = length(full_mdl))
colnames(bhat) = full_mdl
for(b in 1:B) {
samp <- dat[sample(1:n, n, replace=TRUE), c(1:15)]
null <- lm(dat$Y ~ 1, data=samp)
full <- lm(dat$Y ~ ., data=samp)
fit <- step(null, scope=formula(full), direction="forward", k=log(n), trace=0)
bhat[b,names(coef(fit))] <- coef(fit)
}
I'm relatively new to JAGS and am running it through the R package jagsUI. I am building occupancy models, but want to summarize results as I go. So I have a matrix of 0s and 1s:
mat1 <- matrix(rbinom(10*10,1,.5),10,10)
y=mat1
That I want to run through the following model:
# Bundle data and summarize data bundle
str( win.data <- list(y = mat1, M = nrow(mat1), T = ncol(mat1)) )
# Specify model in BUGS language
sink("model.txt")
cat("
model {
# Priors
psi0 ~ dunif(0, 1)
p ~ dunif(0, 1)
for(t in 1:(T-1)){
rho[t] ~ dunif(-1,1)
}
beta0 ~ dnorm(0, 0.1)
# Likelihood
for (i in 1:M) { # Loop over sites
z[i,1] ~ dbern(psi0) # State model
y[i,1] ~ dbern(z[i,1]*p)
for (j in 2:T) { # Loop over replicate surveys
logit(psi[i,j])<- beta0 + rho[j-1]*z[i,j-1]
z[i,j] ~ dbern(psi[i,j])
y[i,j] ~ dbern(z[i,j]*p) # Observation model
}
}
# Derived quantities
coln[i,j] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j] <- ifelse(z[i,j-1]-z[i,j]==1,1,0) # went extinct
tot.coln[,j] <- sum(coln[,j]) # sum of colonized each survey
tot.ext[,j] <- sum(ext[,j]) # sum of extinctions each survey
Nocc[,j] <- sum(z[,j]) # total sites occupied each survey
coln.rate[,j] <- tot.coln[,j]/Nocc[,j]
ext.rate[,j] <- tot.ext[,j]/Nocc[,j]
}
",fill = TRUE)
sink()
# Initial values
zst <- apply(y, 1, max, na.rm=TRUE) # Avoid data/model/inits conflict
y<- as.matrix(y)
zst<- y
inits <- function(){list(z = zst)}
# Parameters monitored
params <- c("psi0", "p", "beta0", "coln.rate", "ext.rate")
# MCMC settings
ni <- 2000 ; nt <- 1 ; nb <- 1000 ; nc <- 3
# Call JAGS and summarize posteriors
library(jagsUI)
fm <- jags(win.data, inits, params, "model.txt", n.chains = nc,
n.thin = nt, n.iter = ni, n.burnin = nb)
print(fm, dig = 3)
The model runs except for the piece after "# Derived quantities". Basically I want to calculate the rate of change from 0 to 1 and from 1 to 0 in each survey. A couple of my thoughts on why it doesn't work. 1) z[i,j] isn't really 0s and 1s. 2) the calculations shouldn't go under Derived quantities. 3) ifelse from the JAGS manual isn't doing what I think.
I also tried using the "step" function replacing the first two lines after Derived quantities with:
coln[i,j] <- step(z[i,j]-z[i,j-1]-0.5) # colonized
ext[i,j] <- step(z[i,j-1]-z[i,j]-0.5) # went extinct
But no luck there. Any ideas?
You are indexing i and j here without looping through them. To make this work you would need to set it up within another nested for loop. Also, your extinction calculation was incorrect.
for(j in 2:T){
for(i in 1:M){
coln[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==-1,1,0) # went extinct
}
tot.coln[j-1] <- sum(coln[,j-1]) # sum of colonized each survey
tot.ext[j-1] <- sum(ext[,j-1]) # sum of extinctions each survey
Nocc[j-1] <- sum(z[,j-1]) # total sites occupied each survey
coln.rate[j-1] <- tot.coln[j-1]/Nocc[j-1]
ext.rate[j-1] <- tot.ext[j-1]/Nocc[j-1]
}
I have written a custom likelihood function that fits a multi-data model that integrates mark-recapture and telemetry data (sensu Royle et al. 2013 Methods in Ecology and Evolution). The likelihood function is designed to be flexible in terms of whether and how many covariates are specified for different linear models in different likelihood components which is determined by values supplied as function arguments (i.e., data matrices "detcovs" and "dencovs" in my code). The likelihood function works when I directly supply it to optimization functions (e.g., optim or nlm), but does not play nice with the mle2 function in the bbmle package. My problem is that I continually run into the following error: "some named arguments in 'start' are not arguments to the specified log-likelihood function". This is my first attempt at writing custom likelihood functions so I'm sure there are general coding conventions of which I'm unaware that make such tasks much more efficient and amendable to the mle2 function. Below is my likelihood function, code creating the staring value objects, and code calling the mle2 function. Any advice how to solve the error problem and general comments on writing cleaner functions is welcome. Many thanks in advance.
Edit: As requested, I have simplified the likelihood function and provided code to simulate reproducible data to which the model can be fit. Included in the simulation code are 2 custom functions and use of the raster function from the raster package. Hopefully, I have sufficiently simplified everything to enable others to troubleshoot. Again, many thanks for your help!
Jared
Likelihood function:
CSCR.RSF.intlik2.EXAMPLE <- function(alpha0,sigma,alphas=NULL,betas=NULL,n0,yscr=NULL,K=NULL,X=X,trapcovs=NULL,Gden=NULL,Gdet=NULL,ytel=NULL,stel=NULL,
dencovs=NULL,detcovs=NULL){
#
# this version of the code handles a covariate on log(Density). This is starting value 5
#
# start = vector of starting values
# yscr = nind x ntraps encounter matrix
# K = number of occasions
# X = trap locations
# Gden = matrix with grid cell coordinates for density raster
# Gdet = matrix with gride cell coordinates for RSF raster
# dencovs = all covariate values for all nGden pixels in density raster
# trapcovs = covariate value at trap locations
# detcovs = all covariate values for all nGrsf pixels in RSF raster
# ytel = nguys x nGdet matrix of telemetry fixes in each nGdet pixels
# stel = home range center of telemetered individuals, IF you wish to estimate it. Not necessary
# alphas = starting values for RSF/detfn coefficients excluding sigma and intercept
# alpha0 = starting values for RSF/detfn intercept
# sigma = starting value for RSF/detfn sigma
# betas = starting values for density function coefficients
# n0 = starting value for number of undetected individuals on log scale
#
n0 = exp(n0)
nGden = nrow(Gden)
D = e2dist(X,Gden)
nGdet <- nrow(Gdet)
alphas = alphas
loglam = alpha0 -(1/(2*sigma*sigma))*D*D + as.vector(trapcovs%*%alphas) # ztrap recycled over nG
psi = exp(as.vector(dencovs%*%betas))
psi = psi/sum(psi)
probcap = 1-exp(-exp(loglam))
#probcap = (exp(theta0)/(1+exp(theta0)))*exp(-theta1*D*D)
Pm = matrix(NA,nrow=nrow(probcap),ncol=ncol(probcap))
ymat = yscr
ymat = rbind(yscr,rep(0,ncol(yscr)))
lik.marg = rep(NA,nrow(ymat))
for(i in 1:nrow(ymat)){
Pm[1:length(Pm)] = (dbinom(rep(ymat[i,],nGden),rep(K,nGden),probcap[1:length(Pm)],log=TRUE))
lik.cond = exp(colSums(Pm))
lik.marg[i] = sum( lik.cond*psi )
}
nv = c(rep(1,length(lik.marg)-1),n0)
part1 = lgamma(nrow(yscr)+n0+1) - lgamma(n0+1)
part2 = sum(nv*log(lik.marg))
out = -1*(part1+ part2)
lam = t(exp(a0 - (1/(2*sigma*sigma))*t(D2)+ as.vector(detcovs%*%alphas)))# recycle zall over all ytel guys
# lam is now nGdet x nG!
denom = rowSums(lam)
probs = lam/denom # each column is the probs for a guy at column [j]
tel.loglik = -1*sum( ytel*log(probs) )
out = out + tel.loglik
out
}
Data simulation code:
library(raster)
library(bbmle)
e2dist <- function (x, y){
i <- sort(rep(1:nrow(y), nrow(x)))
dvec <- sqrt((x[, 1] - y[i, 1])^2 + (x[, 2] - y[i, 2])^2)
matrix(dvec, nrow = nrow(x), ncol = nrow(y), byrow = F)
}
spcov <- function(R) {
v <- sqrt(nrow(R))
D <- as.matrix(dist(R))
V <- exp(-D/2)
cov1 <- t(chol(V)) %*% rnorm(nrow(R))
Rd <- as.data.frame(R)
colnames(Rd) <- c("x", "y")
Rd$C <- as.numeric((cov1 - mean(cov1)) / sd(cov1))
return(Rd)
}
set.seed(1234)
co <- seq(0.3, 0.7, length=5)
X <- cbind(rep(co, each=5),
rep(co, times=5))
B <- 10
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
dencovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(dencovs)[[2]][3:4] <- c("dencov1","dencov2")
denr.list <- vector("list",2)
for(i in 1:2){
denr.list[[i]] <- raster(
list(x=seq(0,1,length=10),
y=seq(0,1,length=10),
z=t(matrix(dencovs[,i+2],10,10,byrow=TRUE)))
)
}
B <- 20
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
detcovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(detcovs)[[2]][3:4] <- c("detcov1","detcov2")
detcov.raster.list <- vector("list",2)
trapcovs <- matrix(0,J,2)
for(i in 1:2){
detr.list[[i]] <- raster(
list(x=seq(0,1,length=20),
y=seq(0,1,length=20),
z=t(matrix(detcovs[,i+2],20,20,byrow=TRUE)))
)
trapcovs[,i] <- extract(detr.list[[i]],X)
}
alpha0 <- -3
sigma <- 0.15
alphas <- c(1,-1)
beta0 <- 3
betas <- c(-1,1)
pixelArea <- (dencovs$y[2] - dencovs$y[1])^2
mu <- exp(beta0 + as.matrix(dencovs[,3:4])%*%betas)*pixelArea
EN <- sum(mu)
N <- rpois(1, EN)
pi <- mu/sum(mu)
s <- dencovs[sample(1:nrow(dencovs), size=N, replace=TRUE, prob=pi),1:2]
J <- nrow(X)
K <- 10
yc <- d <- p <- matrix(NA, N, J)
D <- e2dist(s,X)
loglam <- t(alpha0 - t((1/(2*sigma*sigma))*D*D) + as.vector(trapcovs%*%alphas))
p <- 1-exp(-exp(loglam))
for(i in 1:N) {
for(j in 1:J) {
yc[i,j] <- rbinom(1, K, p[i,j])
}
}
detected <- apply(yc>0, 1, any)
yscr <- yc[detected,]
ntel <- 5
nfixes <- 100
poss.tel <- which(s[,1]>0.2 & s[,1]<0.8 & s[,2]>0.2 & s[,2]<0.8)
stel.id <- sample(poss.tel,ntel)
stel <- s[stel.id,]
ytel <- matrix(NA,ntel,nrow(detcovs))
d <- e2dist(stel,detcovs[,1:2])
lam <- t(exp(1 - t((1/(2*sigma*sigma))*d*d) + as.vector(as.matrix(detcovs[,3:4])%*%alphas)))
for(i in 1:ntel){
ytel[i,] <- rmultinom(1,nfixes,lam[i,]/sum(lam[i,]))
}
Specify starting values and call mle2 function:
start1 <- list(alpha0=alpha0,sigma=sigma,alphas=alphas,betas=betas,n0=log(N-nrow(yscr)))
parnames(CSCR.RSF.intlik2.EXAMPLE) <- names(start)
out1 <- mle2(CSCR.RSF.intlik2.EXAMPLE,start=start1,method="SANN",optimizer="optim",
data=list(yscr=yscr,K=K,X=X,trapcovs=trapcovs,Gden=dencovs[,1:2],Gdet=detcovs[,1:2],
ytel=ytel,stel=stel,dencovs=as.matrix(dencovs[,3:4]),detcovs=as.matrix(detcovs[,3:4]))
)