I posted another question about this code here Maximum Likelihood Method for an ageing SIR model
I solved that problem but have encountered another. Here is my code
# ---- set initial conditions ----
M_inits <- P[1]
Sv0_inits <- P[2] - 0.000049*P[2] - 0.5*P[2] - 0.000012*P[2] - 0.20*P[2] - 0.20*P[2]
Sv_inits <- P_new[-1]-0.000049*P_new[2:100] - 0.5*P[2:100] - 0.000012*P_new[2:100] - 0.20*P_new[2:100] - 0.20*P_new[2:100]
Iv0_inits <- 0.000049*P[2]
Iv_inits <- 0.000049*P_new[2:100]
Sz0_inits <- 0.5*P[2]-0.000012*P[2]
Sz_inits <- 0.5*P[2:100]-0.000012*P_new[2:100]
Iz0_inits <- 0.000012*P[2]
Iz_inits <- 0.000012*P_new[2:100]
Rz0_inits <- 0.40*P[2]
Rz_inits <- 0.40*P_new[2:100]
B10_inits <- 0
B1_inits <- 0*P_new[2:100]
VInc0_inits <- 0
VInc_inits <- c(rep(0,99))
ZInc0_inits <- 0
ZInc_inits <- c(rep(0,99))
inits_all <- c(M=M_inits, Sv0=Sv0_inits, Sv=Sv_inits,
Iv0=Iv0_inits,Iv=Iv_inits,
Sz0 = Sz0_inits,Sz=Sz_inits,
Iz0=Iz0_inits,Iz=Iz_inits,
Rz0=Rz0_inits,Rz=Rz_inits,
B10=B10_inits, B1=B1_inits,
VInc0=VInc0_inits, VInc=VInc_inits,
ZInc0=ZInc0_inits, ZInc=ZInc_inits)
T0 <- 0
maxtime <- 1
# ---- Maximum likelihood estimation function for closed SIR model ----
mle.sir <- function(b) {
beta <- c(rep(0.4,100))
gamma <- 1/7 #Recovery rate for varicella - needs to be 7 days
sigma <- 1/28 #Recovery rate for zoster - needs to be 28 days
zeta <- 0.124 #Scaling of FOI from zoster to varicella
wb <- 1/912
C <- read.csv("/Users/laurenadams/Documents/gitrepo/Varicella/Input CSV/Contact Matrix.csv")
C <- as.matrix(C)
T0 <- 0
maxTime <- 10
results <- lsoda(inits_all,seq(T0,maxTime,1),varicella_fun_novax,parms=c(beta,C,R,gamma,sigma,zeta,wb,mort,ageing))
Y <- as.numeric(results[10,])
Y <- as.numeric(Y[603:702])
Y <- Y*100000
Y <- round(Y)
nll <- -sum(dpois(x=var.data$rounded, lambda=Y, log=TRUE))
return(nll)
#return(results)
}
# initial - Initial estimates of beta and gamma
initial <- list(b=beta)
# fit0 - preliminary fit of model to data using the initial estimates
fit0 <- mle2(mle.sir, start=initial, fixed=list(C, R, sigma, zeta, wb, mort, ageing))
At this point I get the following error code...
Error in validObject(.Object) :
invalid class “mle2” object: invalid object for slot "fullcoef" in class "mle2": got class "NULL", should be or extend class "numeric"
I tried to define the fullcoef term as follows
fit0 <- mle2(mle.sir, start=initial, fixed=list(C, R, sigma, zeta, wb, mort, ageing), fullcoef=list(beta, C, R, sigma, zeta, wb, mort, ageing)
But it still doesn't work.
Related
I am using trying to use bridge sampling in R studio to simulate paths for the variance gamma process. My code is:
sigma = 0.5054
theta = 0.2464
nu = 0.1184
mu=1
N=2^(k)
k=5
V_<-rep(NA,252)
V_[0]<-0
G_[N]<-rgamma(1, shape=N*1/nu, scale=nu)
G_<-0
V<-rnorm(theta*G[N],sigma^2*G[N])
for(l in 1:k){
n<-2^(k-l)
for(j in 1:2^i-1){
i<-(2*j-1)*n
d1<-(n)*mu^2/nu
d2<-(n)*mu^2/nu
Y<-rbeta(1,d1,d2)
G_[i]<-G_[i-1]+(G[i+n]-G[i-n])*Y
G[i]
print(G_[i])
Z<-rnorm(0,(G_[i+n]-G_[i])*sigma^2*Y)
V_[i]<-Y*V_[i+n]+(1-Y)*V_[i-n]+Z
print(V_[i])
}
}
ts.plot(V[i])
I'm not sure what I've done wrong. The algorithm I am trying to follow is as below in the picture:
Based on your code, a numerical sequence was simulated. And it can be roughly validated by using VarianceGamma::vgFit to estimate the parameters.
Note that the time index starts from 1 due to R syntax. The sqrt of variance was used for the standard deviation in rnorm. And I probably shouldn't add the change due to interest rate vgC in the end, since it is not included in your algorithm. Please set it as 0 if it doesn't make sense.
Simulation by Brownian bridge:
# Brownian-Gamma Bridge Sampling (BGBS) of a VG process
set.seed(1)
M <- 10
nt <- 2^M + 1 #number of observations
T <- nt - 1 #total time
T_ <- seq(0, T, length.out=nt) #fixed time increments
#random time increments
#T_ = c(0, runif(nt-2), 1)
#T_ = sort(T_) * T
r <- 1 + 0.2 #interest rate
vgC <- (r-1)
sigma <- 0.5054
theta <- 0.2464
nu <- 0.1184
V_ <- G_ <- rep(NA,nt)
V_[1] <- 0
G_[1] <- 0
G_[nt] <- rgamma(1, shape=T/nu, scale=nu)
V_[nt] <- rnorm(1, theta*G_[nt], sqrt(sigma^2*G_[nt]))
for (k in 1:M)
{
n <- 2^(M-k)
for (j in 1:2^(k-1))
{
i <- (2*j-1) * n
Y <- rbeta(1, (T_[i+1]-T_[i-n+1])/nu, (T_[i+n+1]-T_[i+1])/nu)
G_[i+1] <- G_[i-n+1] + (G_[i+n+1] - G_[i-n+1]) * Y
Z <- rnorm(1, sd=sqrt((G_[i+n+1] - G_[i+1]) * sigma^2 * Y))
V_[i+1] <- Y * V_[i+n+1] + (1-Y) * V_[i-n+1] + Z
}
}
V_ <- V_ + vgC*T_ # changes due to interest rate
plot(T_, V_)
The results roughly match with the estimation:
#Estimated parameters:
library(VarianceGamma)
dV <- V_[2:nt] - V_[1:(nt-1)]
vgFit(dV)
> vgC sigma theta nu
> 0.2996 0.5241 0.1663 0.1184
#Real parameters:
c(vgC, sigma, theta, nu)
> vgC sigma theta nu
> 0.2000 0.5054 0.2464 0.1184
EDIT
As you commented, there is another similar algorithm and can be implemented in a similar way.
Your code could be modified as below:
set.seed(1)
M <- 7
nt <- 2^M + 1
T <- nt - 1
T_ <- seq(0, T, length.out=nt)
sigma=0.008835
theta= -0.003856
nu=0.263743
vgc=0.004132
V_ <- G_ <- rep(1,nt)
G_[T+1] <- rgamma(1, shape=T/nu, scale=nu) #
V_[T+1] <- rnorm(1, theta*G_[T+1], sqrt(sigma^2*G_[T+1])) #
V_[1] <- 0
G_[1] <- 0
for (m in 1:M){ #
Y <- rbeta(1,T/(2^m*nu), T/(2^m*nu))
for (j in 1:2^(m-1)){ #
i <- (2*j-1)
G_[i*T/(2^m)+1] = G_[(i-1)*T/(2^m)+1]+(-G_[(i-1)*T/(2^m)+1]+G_[(i+1)*T/(2^m)+1])*Y #
b=G_[T*(i+1)/2^m+1] - G_[T*(i)/2^m+1] #
Z_i <- rnorm(1, sd=b*sigma^2*Y)
#V_[i] <- Y* V_[i+1] + (1-Y)*V_[i-1] + Z_i
V_[i*T/(2^m)+1] <- Y* V_[(i+1)*T/(2^m)+1] + (1-Y)*V_[(i-1)*T/(2^m)+1] + Z_i
}
}
V_ <- V_ + vgc*T_
V_
ts.plot(V_, main="BRIDGE", xlab="Time increment")
Ryan again, I have found another algorithm for bridge sampling which I tried on my own, But I am not convinced that my answers are correct. I have added my code, output and algorithm below and also the output I think it should loom like? I have used a similar format to your code:
set.seed(1)
M <- 7
nt <- 2^M + 1 #number of observations
T <- nt - 1 #total time
T_ <- seq(0, T, length.out=nt) #fixed time increments
sigma=0.008835
theta= -0.003856
nu=0.263743
vgc=0.004132
V_ <- G_ <- rep(1,nt)
G_[T] <- rgamma(1, shape=T/nu, scale=nu)
V_[T] <- rnorm(1, theta*G_[T], sqrt(sigma^2*G_[T]))
V_[1] <- 0
G_[1] <- 0
for (m in 2:M){
Y <- rbeta(1,T/(2^m*nu), T/(2^m*nu))
for (j in 2:2^(m-1)){
i <- (2*j-1)
G_[i*T/(2^m)] = G_[(i-1)*T/(2^m)]+(G_[(i-1)*T/(2^m)]+G_[(i+1)*T/(2^m)])*Y
b=G_[T*(i)/2^m] - G_[T*(i-1)/2^m]
Z_i <- rnorm(1, sd=b*sigma^2*Y)
V_[i] <- Y* V_[i+1] + (1-Y)*V_[i-1] + Z_i
}
}
V_ <- V_ + vgc*T_ # changes due to interest rate
V_
ts.plot(V_, main="BRIDGE", xlab="Time increment")
However this is how my plot from my ouput, in figure 1:
Bu as Variance gamma is a jump process with finite activity, the path should look like this: , this is just an image from google for variance gamma paths, the sequential sampling one looks like this and my aim is to compare it to Bridge sampling for simulating paths. But my output looks really different. Please let me know your thoughts. If there is an issue in my code let me know thanks. Here is algortihm for it, much similar to the one above but slightly different:
Very new to R and RStudio and the whole concept of coding language. I'm trying to create reproducible code so I can properly ask a question.
The first error says:
Error in colSums(cTrain * log(pTrain) + cCar * log(pCar) + cSM * log(pSM)) :
'x' must be an array of at least two dimensions
Using this code, where can I fix this so that 'x' can have two dimensions?
mydata <- structure(list(LUGGAGE=c(0,1,0,1,0), GA=c(0,0,0,0,0), TRAIN_AV=c(1,1,1,1,1), CAR_AV=c(1,1,1,1,1), SM_AV=c(1,1,1,1,1),
TRAIN_TT=c(114,142,235,193,227), TRAIN_CO=c(40,109,124,90,94),
SM_TxT=c(44,91,179,119,108), SM_CO=c(46,132,132,127,118),
CAR_TT=c(140,110,170,150,286), CAR_CO=c(123,104,80,95,169), CHOICE=c(2,2,3,3,2)),
.Names=c("Luggage","GA","TRAIN_AV","CAR_AV","SM_AV","TRAIN_TT","TRAIN_CO","SM_TT","SM_CO","CAR_TT","CAR_CO","CHOICE"),
row.names=c(NA,5L), class="data.frame")
## Initial value of parameters
initPar <- 8
### Log-Likelihood Function of the Logit Model
library("maxLik")
loglik <- function(x) {
## Parameters
# Alternative Specific Constants
asc_train <- x[1]
asc_sm <- x[2]
# Travel Time to Destination
ttime <- x[3]
# Travel Cost to Destination
tcost_train <- x[4]
tcost_car <- x[5]
tcost_sm <- x[6]
# Effect of Swiss Annual Season Ticket
ga <- x[7]
# Effect of luggage
luggage <- x[8]
## Log-Likelihood Variable
LL = 0
## Utility Function Vin
train <- asc_train*matrix(1, nrow=nrow(mydata), ncol = 1) + tcost_train*mydata$TRAIN_CO + ttime*mydata$TRAIN_TT/100 + ga*mydata$GA + luggage*mydata$LUGGAGE
car <- tcost_car*mydata$CAR_CO + ttime*mydata$CAR_TT/100 + luggage*mydata$LUGGAGE
sm <- asc_sm*matrix(1, nrow=nrow(mydata), ncol = 1) + tcost_sm*mydata$SM_CO + ttime*mydata$SM_TT/100 + ga*mydata$GA + luggage*mydata$LUGGAGE
## exp(Vin) and Control for Mode Availability
train <- mydata$TRAIN_AV *exp(train)
car <- mydata$CAR_AV *exp(car)
sm <- mydata$SM_AV *exp(sm)
## Choice Probabilities
deno <- (train + car + sm)
## Individual Choice Probabilities
pTrain <- mydata$TRAIN_AV *(train / deno)
pCar <- mydata$CAR_AV *(car / deno)
pSM <- mydata$SM_AV *(sm / deno)
pTrain <- (pTrain!=0) *pTrain + (pTrain==0)
pCar <- (pCar!=0) *pCar + (pCar==0)
pSM <- (pSM!=0) *pSM + (pSM==0)
## Choice Results
cTrain <- mydata$CHOICE == "1"
cCar <- mydata$CHOICE == "3"
cSM <- mydata$CHOICE == "2"
## Log-Likelihood Function
LL <- colSums(cTrain*log(pTrain) + cCar*log(pCar) + cSM*log(pSM))
}
### Maximization of Log-Likelihood Function ###
# Parameter Optimization
result <- maxLik(loglik, start=numeric(initPar))
# Parameter Estimation, Hessian Matrix Calculation
parameters <- result$estimate
hessianMatrix <- result$hessian
# T-Statistic Calculation
tval <- parameters/sqrt(-diag(solve(hessianMatrix)))
# L(0), Log-Likelihood When All parameters = 0
L0 <- loglik(numeric(initPar))
# LL, Maximumum Likelihood
LL <- result$maximum
Nicely asked question with a reproducible example; upvoted!
Your problem was very simple. Your function looks for a variable called mydata$LUGGAGE that doesn't exist. R is case sensitive and your column is called mydata$Luggage.
All you have to do is
names(mydata)[1] <- "LUGGAGE"
Now run your script and you should get this result:
result <- maxLik(loglik, start=numeric(initPar))
result
# Maximum Likelihood estimation
# Newton-Raphson maximisation, 30 iterations
# Return code 2: successive function values within tolerance limit
# Log-Likelihood: -1.744552e-07 (8 free parameter(s))
# Estimate(s): -277.7676 -250.6531 8.651811 -1.680196 -4.208955 -1.281697 0 354.4692
I have a likelihood function that contains a bivariate normal CDF. I keep getting values close to one for the correlation, even when the true value is zero.
The R package sampleSelection maximizes a likelihood function that contains a bivaraite normal CDF (as in Van de Ven and Van Praag (1981)). I tried looking at the source code for the package, but couldn't find how they write the likelihood. For reference, Van de Ven and Van Praag's paper:
The Demand for Deductibles in Private Health Insurance: A Probit Model with Sample Selection.
The likelihood function is Equation (19), where H denotes the standard normal CDF and H_2 denotes the bivariate normal CDF.
My question:
Can someone tell me how the likelihood function is written in the sampleSelection package? or
Can someone tell me why I'm getting values of close to one for the correlation in the code below?
Here's the code that's keeping me up at night:
########################################################
#
# Trying to code Van de Ven and Van Praag (1981)
#
#
########################################################
library(MASS)
library(pbivnorm)
library(mnormt)
library(maxLik)
library(sampleSelection)
set.seed(1)
# Sample size
full_sample <- 1000
# Parameters
rho <- .1
beta0 <- 0
beta1 <- 1
gamma0 <- .2
gamma1 <- .5
gamma2 <- .5
varcovar <- matrix(c(1,rho,rho,1), nrow = 2, ncol = 2)
# Vectors for storing values
y <- rep(0,full_sample)
s <- rep(0,full_sample)
outcome <- rep(0,full_sample)
select <- rep(0,full_sample)
#######################
# Simulate data
#######################
x <- rnorm(full_sample)
z <- rnorm(full_sample)
errors <- mvrnorm(full_sample, rep(0,2), varcovar)
# note: 1st element for selection eq; 2nd outcome
s <- gamma0 + gamma1*x + gamma2*z + errors[,1]
y <- beta0 + beta1*x + errors[,2]
for(i in 1:full_sample){
if(s[i]>=0){
select[i] <- 1
if(y[i]>=0){
outcome[i] <- 1
}else{
outcome[i] <- 0
}
}else{
outcome[i] <- NA
select[i] <- 0
}
}
#######################################
# Writing the log likelihood
##
# Note: vega1= beta0,
# vega2= beta1,
# vega3= gamma0,
# vega4= gamma1,
# vega5= gamma2,
# vega6= rho
#######################################
first.lf <- function(vega) {
# Transforming this parameter becuase
# correlation is bounded between -1 aad 1
corr <- tanh(vega[6])
# Set up vectors for writing log likelihood
y0 <- 1-outcome
for(i in 1:full_sample) {
if(is.na(y0[i])){ y0[i]<- 0}
if(is.na(outcome[i])){ outcome[i]<- 0}
}
yt0 <- t(y0)
yt1 <- t(outcome)
missing <- 1 - select
ytmiss <- t(missing)
# Terms in the selection and outcome equations
A <- vega[3]+vega[4]*x+vega[5]*z
B <- vega[1]+vega[2]*x
term1 <- pbivnorm(A,B,corr)
term0 <- pbivnorm(A,-B,corr)
term_miss <- pnorm(-A)
log_term1 <- log(term1)
log_term0 <- log(term0)
log_term_miss <- log(term_miss)
# The log likelihood
logl <- sum( yt1%*%log_term1 + yt0%*%log_term0 + ytmiss%*%log_term_miss)
return(logl)
}
startv <- c(beta0,beta1,gamma0,gamma1,gamma2,rho)
# Maxmimizing my likelihood gives
maxLik(logLik = first.lf, start = startv, method="BFGS")
# tanh(7.28604) = 0.9999991, far from the true value of .1
# Using sampleSelection package for comparison
outcomeF<-factor(outcome)
selectEq <- select ~ x + z
outcomeEq <- outcomeF ~ x
selection( selectEq, outcomeEq)
# Notice the value of -0.2162 for rho compared to my 0.9999991
It happens that there is a typo in the paper in equation (19). The terms from i = N_1 + 1 to N should have -rho rather than rho. Hence, using
term0 <- pbivnorm(A,-B,-corr)
gives
maxLik(logLik = first.lf, start = startv, method="BFGS")
# Maximum Likelihood estimation
# BFGS maximization, 40 iterations
# Return code 0: successful convergence
# Log-Likelihood: -832.5119 (6 free parameter(s))
# Estimate(s): 0.3723783 0.9307454 0.1349979 0.4693686 0.4572421 -0.219618
as needed.
I'm working on a binomial mixture model using OpenBUGS and R package R2OpenBUGS. I've successfully built simpler models, but once I add another level for imperfect detection, I consistently receive the error variable X is not defined in model or in data set. I've tried a number of different things, including changing the structure of my data and entering my data directly into OpenBUGS. I'm posting this in the hope that someone else has experience with this error, and perhaps knows why OpenBUGS is not recognizing variable X even though it is clearly defined as far as I can tell.
I've also gotten the error expected the collection operator c error pos 8 - this is not an error I've been getting previously, but I am similarly stumped.
Both the model and the data-simulation function come from Kery's Introduction to WinBUGS for Ecologists (2010). I will note that the data set here is in lieu of my own data, which is similar.
I am including the function to build the dataset as well as the model. Apologies for the length.
# Simulate data: 200 sites, 3 sampling rounds, 3 factors of the level 'trt',
# and continuous covariate 'X'
data.fn <- function(nsite = 180, nrep = 3, xmin = -1, xmax = 1, alpha.vec = c(0.01,0.2,0.4,1.1,0.01,0.2), beta0 = 1, beta1 = -1, ntrt = 3){
y <- array(dim = c(nsite, nrep)) # Array for counts
X <- sort(runif(n = nsite, min = xmin, max = xmax)) # covariate values, sorted
# Relationship expected abundance - covariate
x2 <- rep(1:ntrt, rep(60, ntrt)) # Indicator for population
trt <- factor(x2, labels = c("CT", "CM", "CC"))
Xmat <- model.matrix(~ trt*X)
lin.pred <- Xmat[,] %*% alpha.vec # Value of lin.predictor
lam <- exp(lin.pred)
# Add Poisson noise: draw N from Poisson(lambda)
N <- rpois(n = nsite, lambda = lam)
table(N) # Distribution of abundances across sites
sum(N > 0) / nsite # Empirical occupancy
totalN <- sum(N) ; totalN
# Observation process
# Relationship detection prob - covariate
p <- plogis(beta0 + beta1 * X)
# Make a 'census' (i.e., go out and count things)
for (i in 1:nrep){
y[,i] <- rbinom(n = nsite, size = N, prob = p)
}
# Return stuff
return(list(nsite = nsite, nrep = nrep, ntrt = ntrt, X = X, alpha.vec = alpha.vec, beta0 = beta0, beta1 = beta1, lam = lam, N = N, totalN = totalN, p = p, y = y, trt = trt))
}
data <- data.fn()
And here is the model:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(C = y, trt = as.numeric(trt), X = s.X)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Note: This answer has gone through a major revision, after I noticed another problem with the code.
If I understand your model correctly, you are mixing up the y and N from the simulated data, and what is passed as C to Bugs. You are passing the y variable (a matrix) to the C variable in the Bugs model, but this is accessed as a vector. From what I can see C is representing the number of "trials" in your binomial draw (actual abundances), i.e. N in your data set. The variable y (a matrix) is called the same thing in both the simulated data and in the Bugs model.
This is a reformulation of your model, as I understand it, and this runs ok:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
N<- data$N
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(y = y, trt = as.numeric(trt), X = s.X, C= N)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Overall, the results from this model looks ok, but there are long autocorrelation lags for beta0 and beta1. The estimate of beta1 also seems a bit off(~= -0.4), so you might want to recheck the Bugs model specification, so that it is matching the simulation model (i.e. that you are fitting the correct statistical model). At the moment, I'm not sure that it does, but I don't have the time to check further right now.
I got the same message trying to pass a factor to OpenBUGS. Like so,
Ndata <- list(yrs=N$yrs, site=N$site), ... )
The variable "site" was not passed by the "bugs" function. It simply was not in list passed
to OpenBUGS
I solved the problem by passing site as numeric,
Ndata <- list(yrs=N$yrs, site=as.numeric(N$site)), ... )
I am trying to use the command mle2, in the package bbmle. I am looking at p2 of "Maximum likelihood estimation and analysis with the bbmle package" by Bolker. Somehow I fail to enter the right start values. Here's the reproducible code:
l.lik.probit <-function(par, ivs, dv){
Y <- as.matrix(dv)
X <- as.matrix(ivs)
K <-ncol(X)
b <- as.matrix(par[1:K])
phi <- pnorm(X %*% b)
sum(Y * log(phi) + (1 - Y) * log(1 - phi))
}
n=200
set.seed(1000)
x1 <- rnorm(n)
x2 <- rnorm(n)
x3 <- rnorm(n)
x4 <- rnorm(n)
latentz<- 1 + 2.0 * x1 + 3.0 * x2 + 5.0 * x3 + 8.0 * x4 + rnorm(n,0,5)
y <- latentz
y[latentz < 1] <- 0
y[latentz >=1] <- 1
x <- cbind(1,x1,x2,x3,x4)
values.start <-c(1,1,1,1,1)
foo2<-mle2(l.lik.probit, start=list(dv=0,ivs=values.start),method="BFGS",optimizer="optim", data=list(Y=y,X=x))
And this is the error I get:
Error in mle2(l.lik.probit, start = list(Y = 0, X = values.start), method = "BFGS", :
some named arguments in 'start' are not arguments to the specified log-likelihood function
Any idea why? Thanks for your help!
You've missed a couple of things, but the most important is that by default mle2 takes a list of parameters; you can make it take a parameter vector instead, but you have to work a little bit harder.
I have tweaked the code slightly in places. (I changed the log-likelihood function to a negative log-likelihood function, without which this would never work!)
l.lik.probit <-function(par, ivs, dv){
K <- ncol(ivs)
b <- as.matrix(par[1:K])
phi <- pnorm(ivs %*% b)
-sum(dv * log(phi) + (1 - dv) * log(1 - phi))
}
n <- 200
set.seed(1000)
dat <- data.frame(x1=rnorm(n),
x2=rnorm(n),
x3=rnorm(n),
x4=rnorm(n))
beta <- c(1,2,3,5,8)
mm <- model.matrix(~x1+x2+x3+x4,data=dat)
latentz<- rnorm(n,mean=mm%*%beta,sd=5)
y <- latentz
y[latentz < 1] <- 0
y[latentz >=1] <- 1
x <- mm
values.start <- rep(1,5)
Now we do the fit. The main thing is to specify vecpar=TRUE and to use parnames to let mle2 know the names of the elements in the parameter vector ...
library("bbmle")
names(values.start) <- parnames(l.lik.probit) <- paste0("b",0:4)
m1 <- mle2(l.lik.probit, start=values.start,
vecpar=TRUE,
method="BFGS",optimizer="optim",
data=list(dv=y,ivs=x))
As pointed out above for this particular example you have just re-implemented the probit regression (although I understand that you now want to extend this to allow for heteroscedasticity in some way ...)
dat2 <- data.frame(dat,y)
m2 <- glm(y~x1+x2+x3+x4,family=binomial(link="probit"),
data=dat2)
As a final note, I would say that you should check out the parameters argument, which allows you to specify a sub-linear model for any one of the parameters, and the formula interface:
m3 <- mle2(y~dbinom(prob=pnorm(eta),size=1),
parameters=list(eta~x1+x2+x3+x4),
start=list(eta=0),
data=dat2)
PS confint(foo2) appears to work fine (giving profile CIs as requested) with this set-up.
ae <- function(x,y) all.equal(unname(coef(x)),unname(coef(y)),tol=5e-5)
ae(m1,m2) && ae(m2,m3)