I wrote the below R function to do the following task:
Simulate 10 time series data set from ARIMA model through arima.sim() function
Split the series into sub-series of possible 2s, 3s, 4s, 5s, 6s, 7s, 8s, and 9s.
For each size take a resample the blocks with replacement, for new series and obtain the best ARIMA model from the subseries from each block size through auto.arima() function.
Obtain for each subseries of each block sizes RMSE.
.
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
## simulate ARIMA(1,0, 0)
#n=10; phi <- 0.6; order <- c(1, 0, 0)
bootstrap1 <- function(n, phi){
ts <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
########################################################
## create a vector of block sizes
t <- length(ts) # the length of the time series
lb <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
BOOTSTRAP <- matrix(nrow = 1, ncol = length(lb))
colnames(BOOTSTRAP) <-lb
########################################################
## This section use foreach function to do detail in the brace
BOOTSTRAP <- foreach(b = 1:length(lb), .combine = 'cbind') %do%{
l <- lb[b]# block size at each instance
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) - 10)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast::forecast(train, model = forecast::auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- Metrics::rmse(test, nfuture) # RETURN RMSE
BOOTSTRAP[b] <- RMSE
}
BOOTSTRAPS <- matrix(BOOTSTRAP, nrow = 1, ncol = length(lb))
colnames(BOOTSTRAPS) <- lb
BOOTSTRAPS
return(list(BOOTSTRAPS))
}
If the function is called as below:
bootstrap1(10, 0.6)
I get the following result:
##$BOOTSTRAPS
## 2 3 4 5 6 7 8 9
##[1,] 1.287224 2.264574 2.998069 2.349261 1.677791 1.183126 2.021157 1.357658
My attempt to use Monte Carlo function to make my function run three(3) different and distinct times.
param_list=list("n"=10, "phi"=0.6)
library(MonteCarlo)
MC_result<-MonteCarlo(func = bootstrap1, nrep=3, param_list = param_list)
I got the following error message:
Error in MonteCarlo(func = bootstrap1, nrep = 3, param_list = param_list) :
func has to return a list with named components. Each component has to be scalar.
Please help me to get right what I did wrong either on my function or the MonteCarlo() function.
Based on the error message, I would try replacing the end of your function with something like:
names(BOOTSTRAPS) <- letters[1:10]
return(as.list(BOOTSTRAPS))
Then the resulting output is a named list with names letters[1:10].
Related
I wrote an R function and will like to run it kth times and will want the same result anytime same is run in the same environment. I think of setting seed but can not achieve the same result as I ran the same function with the same seed two consecutive times.
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = n_cores)
bootstrap1 <- function(n, phi){
ts <- arima.sim(n, model = list(ar=phi, order = c(1, 1, 0)), sd = 1)
#ts <- numeric(n)
#ts[1] <- rnorm(1)
#for(i in 2:length(ts))
# ts[i] <- 2 * ts[i - 1] + rnorm(1)
########################################################
## create a vector of block sizes
t <- length(ts) # the length of the time series
lb <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
BOOTSTRAP <- matrix(nrow = 1, ncol = length(lb))
colnames(BOOTSTRAP) <-lb
#BOOTSTRAP <- list(length(lb))
########################################################
## This section use foreach function to do detail in the brace
BOOTSTRAP <- foreach(b = 1:length(lb), .combine = 'cbind') %dopar%{
l <- lb[b]# block size at each instance
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
res<-sample(blk, replace=T, 1000) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) - 10)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast::forecast(train, model = forecast::auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- Metrics::rmse(test, nfuture) # RETURN RMSE
BOOTSTRAP[b] <- RMSE
}
BOOTSTRAPS <- matrix(BOOTSTRAP, nrow = 1, ncol = length(lb))
colnames(BOOTSTRAPS) <- lb
BOOTSTRAPS
return(list("BOOTSTRAPS" = BOOTSTRAPS))
}
First Trial
set.seed(123, kind = "L'Ecuyer-CMRG")
t(replicate(3, bootstrap1(10, 0.5)$BOOTSTRAPS[1,]))
# 2 3 4 5 6 7 8 9
#[1,] 3.353364 4.097191 3.759332 3.713234 4.541143 4.151920 4.603380 5.237056
#[2,] 4.490765 5.037171 4.289265 3.964172 3.225878 5.345506 4.646740 2.593153
#[3,] 4.514881 4.838114 3.701961 5.069747 4.165742 4.130256 3.951216 4.133241
Second Trial
set.seed(123, kind = "L'Ecuyer-CMRG")
t(replicate(3, bootstrap1(10, 0.5)$BOOTSTRAPS[1,]))
# 2 3 4 5 6 7 8 9
#[1,] 3.271285 3.701031 2.725770 3.867532 3.283368 3.713057 3.274201 4.141896
#[2,] 3.987040 3.767720 5.440987 3.850190 3.306520 5.399880 5.337676 3.288834
#[3,] 5.157924 3.895024 3.996077 4.855608 4.443317 5.224098 5.335144 2.918870
How do I set seed or what will I do to get the same result?
Edit
I am operating R on Windows.
You should only be setting the seed once. It seems like you're setting the seed twice (once with set.seed(1) inside your bootstrap function, and again with set.seed(123, kind = "L'Ecuyer-CMRG") outside your bootstrap function.
Try only using only one set.seed() function (and using the same value both times), and see if that fixes it.
I have an R function which I rewrite to give room for foreach function and parallel process but the matrix result is having a different column name.
The original function below:
# simulate arima(1,0,0)
library(forecast)
n=10
phi <- 0.6
set.seed(106100125)
ar1 <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
auto.arima(ar1)
ts <- ar1
t <- length(ts) # the length of the time series
li <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
# vector to store block means
RMSEblk <- matrix(nrow = 1, ncol = length(li))
colnames(RMSEblk) <-li
for (b in 1:length(li)){
l <- li[b]# block size
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
# initialize vector to receive result from for loop
singleblock <- vector()
for(i in 1:10){
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist<-unlist(res, use.names = F) # unlist the bootstrap series
# Split the series into train and test set
train <- head(res.unlist, round(length(res.unlist) * 0.6))
h <- length(res.unlist) - length(train)
test <- tail(res.unlist, h)
# Forecast for train set
model <- auto.arima(train)
future <- forecast(test, model=model,h=h)
nfuture <- as.numeric(future$mean) # makes the `future` object a vector
RMSE <- rmse(test, nfuture) # use the `rmse` function from `Metrics` package
singleblock[i] <- RMSE # Assign RMSE value to final result vector element I
}
RMSEblk[b] <- mean(singleblock) # store into matrix
}
RMSEblk
The Desired Arrangement of Result
## 2 3 4 5 6 7 8 9
##[1,] 1.022961 1.440676 1.54268 1.074976 1.205165 1.186345 1.436563 1.501218
The modified function
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
#########################################################
## simulate ARIMA(1,0, 0)
n=10
phi <- 0.6
set.seed(106100125)
ar1 <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
auto.arima(ar1)
ts <- ar1
########################################################
## greate a vector of block sizes
t <- length(ts) # the length of the time series
li <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
RMSEblk <- matrix(nrow = 1, ncol = length(li))
colnames(RMSEblk) <-li
########################################################
## This section use foreach function to do detail in the brace
RMSEblk <- foreach(b = 1:length(li), .combine = 'cbind') %do%{
#for (b in 1:length(li)){
l <- li[b]# block size
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
## Thissubsection initialize vector to receive result from for loop
singleblock <- vector()
singleblock <- foreach(i = 1:10, .packages = c("forecast"), .combine = 'c') %dopar% { ### Replacement parallel foreach loop
#for(i in 1:10){
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) * 0.8)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast(train, model = auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- accuracy(nfuture, test) # RETURN RMSE
singleblock[i] <- RMSE # Assign RMSE value to final result vector element I
}
RMSEblk[b] <- mean(singleblock) # store into matrix
}
stopCluster(cl)
RMSEblk[] <- future_vapply(b=1:length(li), RMSEblk[b], numeric(1))
RMSEblk
** The Undesired Arrangement of Result**
## Error: ‘is.function(FUN)’ is not TRUE
##>
##> RMSEblk
##result.1 result.2 result.3 result.4 result.5 result.6 result.7 result.8
##[1,] 34.4202 20.54789 24.77103 25.49809 15.42677 30.80389 18.28471 15.92572
What I want
I want the result of the second function to be presented and arranged as the result of the first function. That means instead of result.1 I want 2 as the column name or instead of result. 8 I want 9 as the column name of function 2.
I have an R code that contains some nested bracket for loop within which I used rmse() function from Metrics package. I tried it without the function and it worked, but inside my nested R code it does not.
Here is what I desire to do with R
I have generated a 50-time series dataset.
I lice the same time series dataset into chunks of the following sizes: 2,3,...,48,49 making me have 48 different time series formed from step 1 above.
I divided each 48-time series dataset into train and test sets so I can use rmse function in Metrics package to get the Root Mean Squared Error (RMSE) for the 48 subseries formed in step 2.
The RMSE for each series is then tabulated according to their chunk sizes
I obtained the best ARIMA model for each 48 different time series data set.
My R code
# simulate arima(1,0,0)
library(forecast)
library(Metrics)
n <- 50
phi <- 0.5
set.seed(1)
wn <- rnorm(n, mean=0, sd=1)
ar1 <- sqrt((wn[1])^2/(1-phi^2))
for(i in 2:n){
ar1[i] <- ar1[i - 1] * phi + wn[i]
}
ts <- ar1
t<-length(ts)# the length of the time series
li <- seq(n-2)+1 # vector of block sizes(i.e to be between 1 and n exclusively)
RMSEblk<-matrix(nrow = 1, ncol = length(li))#vector to store block means
colnames(RMSEblk)<-li
for (b in 1:length(li)){
l<- li[b]# block size
m <- ceiling(t / l) # number of blocks
blk<-split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
singleblock <- vector() #initialize vector to receive result from for loop
for(i in 1:10){
res<-sample(blk, replace=T, 100) # resamples the blocks
res.unlist<-unlist(res, use.names = F) # unlist the bootstrap series
# Split the series into train and test set
train <- head(res.unlist, round(length(res.unlist) * 0.6))
h <- length(res.unlist) - length(train)
test <- tail(res.unlist, h)
# Forecast for train set
model <- auto.arima(train)
future <- forecast(test, model=model,h=h)
nfuture <- as.numeric(out$mean) # makes the `future` object a vector
# use the `rmse` function from `Metrics` package
RMSE <- rmse(test, nn)
singleblock[i] <- RMSE # Assign RMSE value to final result vector element i
}
#singleblock
RMSEblk[b]<-mean(singleblock) #store into matrix
}
RMSEblk
The error I got
#Error in rmse(test, nn): unused argument (nn)
#Traceback:
But when I wrote
library(forecast)
train <- head(ar1, round(length(ar1) * 0.6))
h <- length(ar1) - length(train)
test <- tail(ar1, h)
model <- auto.arima(train)
#forecast <- predict(model, h)
out <- forecast(test, model=model,h=h)
nn <- as.numeric(out$mean)
rmse(test, nn)
It did work
Please point out what I am missing?
I am able to run your code after making two very small corrections in your for loop. See the two commented lines:
for (b in 1:length(li)){
l<- li[b]
m <- ceiling(t / l)
blk<-split(ts, rep(1:m, each=l, length.out = t))
singleblock <- vector()
for(i in 1:10){
res<-sample(blk, replace=T, 100)
res.unlist<-unlist(res, use.names = F)
train <- head(res.unlist, round(length(res.unlist) * 0.6))
h <- length(res.unlist) - length(train)
test <- tail(res.unlist, h)
model <- auto.arima(train)
future <- forecast(test, model=model,h=h)
nfuture <- as.numeric(future$mean) # EDITED: `future` instead of `out`
RMSE <- rmse(test, nfuture) # EDITED: `nfuture` instead of `nn`
singleblock[i] <- RMSEi
}
RMSEblk[b]<-mean(singleblock)
}
It is possible that these typos did not result in errors because nn and out were defined in the global environment while you ran the for loop. A good debugging tip is to restart R and try to reproduce the problem.
Your code does not define nn. Other code that works has nn. To start code with clean slate use this line as first executable line:
rm(list=ls())
I want to perform a bootstrap simulation 1000 times and compute percentile confidence intervals 1000 times for different samplesizes n = 10,20,...,100. I've solved this problem and I'm just asking, instead of doing this huge computations 10 times, covering 300 lines of code, is there a way to shorten this? Like, running this function over and over again 10 times? I tried a for-loop but it did not work. Here is the code that does work:
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
PercCoverage = vector("numeric", 10L)
n = 10 # sample size
getCI = function(B, k, gamma, n) {
getM = function(orgData, idx) {
bsM = mean(orgData[idx])
bsS2M = (((n-1) / n) * var(orgData[idx])) / n
c(bsM, bsS2M)
}
F = rgamma(n, kHat, gammaHat) # simulated data: original sample
M = mean(F) # M from original sample
S2M = (((n-1)/n)*var(F))/n # S^2(M) from original sample
# bootstrap
boots = t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE))))
Mstar = boots[,1] # M* for each replicate
S2Mstar = boots[,2] # S^2*(M) for each replicate
biasM = mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx = trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc = sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
# 1000 bootstraps
Nrep <- 1000 # number of bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n)))
# coverage probabilities
PercCoverage[1] = sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
However, here I need to script this for n=10, n=20 and so on to n=100, and each time I need to change PercCoverage[1] to PercCoverage[2]...PercCoverage[10] in order to store these values in an array for later plotting.
I tried setting n=c(10,20,30,40,50,60,70,80,90,100) and then placing all of the above in a for loop but the function getCI needed numerical value.
EDIT: For loop attempt:
n = c(10,20,30,40,50,60,70,80,90,100)
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
PercCoverage = vector("numeric", 10L)
for (i in length(n)){
getCI = function(B, k, gamma, n[i]) {
getM = function(orgData, idx) {
bsM = mean(orgData[idx])
bsS2M = (((n[i]-1) / n[i]) * var(orgData[idx])) / n[i]
c(bsM, bsS2M)
}
F = rgamma(n[i], kHat, gammaHat) # simulated data: original sample
M = mean(F) # M from original sample
S2M = (((n[i]-1)/n[i])*var(F))/n[i] # S^2(M) from original sample
# bootstrap
boots = t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE))))
Mstar = boots[,1] # M* for each replicate
S2Mstar = boots[,2] # S^2*(M) for each replicate
biasM = mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx = trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc = sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
# 1000 bootstraps
Nrep <- 1000 # number of bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n[i])))
# coverage probabilities
PercCoverage[i] = sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
}
Consider defining multiple functions: a master one boostrap_proc, gCI, and getM. Then pass in your sequences of sample sizes in lapply for list return or sapply for numeric vector each calling the master function and returning a series of probabilities (last line of function). Be sure to remove the hard coded n = 10.
Define Functions
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
bootstrap_proc <- function(n) {
Nrep <- 1000 # 1000 bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n)))
# coverage probabilities
sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
}
getCI <- function(B, k, gamma, n) {
F <- rgamma(n, kHat, gammaHat) # simulated data: original sample
M <- mean(F) # M from original sample
S2M <- (((n-1)/n)*var(F))/n # S^2(M) from original sample
# bootstrap
boots <- t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE),n)))
Mstar <- boots[,1] # M* for each replicate
S2Mstar <- boots[,2] # S^2*(M) for each replicate
biasM <- mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx <- trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc <- sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
getM <- function(orgData, idx, n) {
bsM <- mean(orgData[idx])
bsS2M <- (((n-1) / n) * var(orgData[idx])) / n
c(bsM, bsS2M)
}
Call Function
sample_sizes <- c(10,20,30,40,50,60,70,80,90,100)
# LIST
PercCoverage <- lapply(sample_sizes, bootstrap_proc)
# VECTOR
PercCoverage <- sapply(sample_sizes, bootstrap_proc)
# VECTOR
PercCoverage <- vapply(sample_sizes, bootstrap_proc, numeric(1))
I have written a custom likelihood function that fits a multi-data model that integrates mark-recapture and telemetry data (sensu Royle et al. 2013 Methods in Ecology and Evolution). The likelihood function is designed to be flexible in terms of whether and how many covariates are specified for different linear models in different likelihood components which is determined by values supplied as function arguments (i.e., data matrices "detcovs" and "dencovs" in my code). The likelihood function works when I directly supply it to optimization functions (e.g., optim or nlm), but does not play nice with the mle2 function in the bbmle package. My problem is that I continually run into the following error: "some named arguments in 'start' are not arguments to the specified log-likelihood function". This is my first attempt at writing custom likelihood functions so I'm sure there are general coding conventions of which I'm unaware that make such tasks much more efficient and amendable to the mle2 function. Below is my likelihood function, code creating the staring value objects, and code calling the mle2 function. Any advice how to solve the error problem and general comments on writing cleaner functions is welcome. Many thanks in advance.
Edit: As requested, I have simplified the likelihood function and provided code to simulate reproducible data to which the model can be fit. Included in the simulation code are 2 custom functions and use of the raster function from the raster package. Hopefully, I have sufficiently simplified everything to enable others to troubleshoot. Again, many thanks for your help!
Jared
Likelihood function:
CSCR.RSF.intlik2.EXAMPLE <- function(alpha0,sigma,alphas=NULL,betas=NULL,n0,yscr=NULL,K=NULL,X=X,trapcovs=NULL,Gden=NULL,Gdet=NULL,ytel=NULL,stel=NULL,
dencovs=NULL,detcovs=NULL){
#
# this version of the code handles a covariate on log(Density). This is starting value 5
#
# start = vector of starting values
# yscr = nind x ntraps encounter matrix
# K = number of occasions
# X = trap locations
# Gden = matrix with grid cell coordinates for density raster
# Gdet = matrix with gride cell coordinates for RSF raster
# dencovs = all covariate values for all nGden pixels in density raster
# trapcovs = covariate value at trap locations
# detcovs = all covariate values for all nGrsf pixels in RSF raster
# ytel = nguys x nGdet matrix of telemetry fixes in each nGdet pixels
# stel = home range center of telemetered individuals, IF you wish to estimate it. Not necessary
# alphas = starting values for RSF/detfn coefficients excluding sigma and intercept
# alpha0 = starting values for RSF/detfn intercept
# sigma = starting value for RSF/detfn sigma
# betas = starting values for density function coefficients
# n0 = starting value for number of undetected individuals on log scale
#
n0 = exp(n0)
nGden = nrow(Gden)
D = e2dist(X,Gden)
nGdet <- nrow(Gdet)
alphas = alphas
loglam = alpha0 -(1/(2*sigma*sigma))*D*D + as.vector(trapcovs%*%alphas) # ztrap recycled over nG
psi = exp(as.vector(dencovs%*%betas))
psi = psi/sum(psi)
probcap = 1-exp(-exp(loglam))
#probcap = (exp(theta0)/(1+exp(theta0)))*exp(-theta1*D*D)
Pm = matrix(NA,nrow=nrow(probcap),ncol=ncol(probcap))
ymat = yscr
ymat = rbind(yscr,rep(0,ncol(yscr)))
lik.marg = rep(NA,nrow(ymat))
for(i in 1:nrow(ymat)){
Pm[1:length(Pm)] = (dbinom(rep(ymat[i,],nGden),rep(K,nGden),probcap[1:length(Pm)],log=TRUE))
lik.cond = exp(colSums(Pm))
lik.marg[i] = sum( lik.cond*psi )
}
nv = c(rep(1,length(lik.marg)-1),n0)
part1 = lgamma(nrow(yscr)+n0+1) - lgamma(n0+1)
part2 = sum(nv*log(lik.marg))
out = -1*(part1+ part2)
lam = t(exp(a0 - (1/(2*sigma*sigma))*t(D2)+ as.vector(detcovs%*%alphas)))# recycle zall over all ytel guys
# lam is now nGdet x nG!
denom = rowSums(lam)
probs = lam/denom # each column is the probs for a guy at column [j]
tel.loglik = -1*sum( ytel*log(probs) )
out = out + tel.loglik
out
}
Data simulation code:
library(raster)
library(bbmle)
e2dist <- function (x, y){
i <- sort(rep(1:nrow(y), nrow(x)))
dvec <- sqrt((x[, 1] - y[i, 1])^2 + (x[, 2] - y[i, 2])^2)
matrix(dvec, nrow = nrow(x), ncol = nrow(y), byrow = F)
}
spcov <- function(R) {
v <- sqrt(nrow(R))
D <- as.matrix(dist(R))
V <- exp(-D/2)
cov1 <- t(chol(V)) %*% rnorm(nrow(R))
Rd <- as.data.frame(R)
colnames(Rd) <- c("x", "y")
Rd$C <- as.numeric((cov1 - mean(cov1)) / sd(cov1))
return(Rd)
}
set.seed(1234)
co <- seq(0.3, 0.7, length=5)
X <- cbind(rep(co, each=5),
rep(co, times=5))
B <- 10
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
dencovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(dencovs)[[2]][3:4] <- c("dencov1","dencov2")
denr.list <- vector("list",2)
for(i in 1:2){
denr.list[[i]] <- raster(
list(x=seq(0,1,length=10),
y=seq(0,1,length=10),
z=t(matrix(dencovs[,i+2],10,10,byrow=TRUE)))
)
}
B <- 20
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
detcovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(detcovs)[[2]][3:4] <- c("detcov1","detcov2")
detcov.raster.list <- vector("list",2)
trapcovs <- matrix(0,J,2)
for(i in 1:2){
detr.list[[i]] <- raster(
list(x=seq(0,1,length=20),
y=seq(0,1,length=20),
z=t(matrix(detcovs[,i+2],20,20,byrow=TRUE)))
)
trapcovs[,i] <- extract(detr.list[[i]],X)
}
alpha0 <- -3
sigma <- 0.15
alphas <- c(1,-1)
beta0 <- 3
betas <- c(-1,1)
pixelArea <- (dencovs$y[2] - dencovs$y[1])^2
mu <- exp(beta0 + as.matrix(dencovs[,3:4])%*%betas)*pixelArea
EN <- sum(mu)
N <- rpois(1, EN)
pi <- mu/sum(mu)
s <- dencovs[sample(1:nrow(dencovs), size=N, replace=TRUE, prob=pi),1:2]
J <- nrow(X)
K <- 10
yc <- d <- p <- matrix(NA, N, J)
D <- e2dist(s,X)
loglam <- t(alpha0 - t((1/(2*sigma*sigma))*D*D) + as.vector(trapcovs%*%alphas))
p <- 1-exp(-exp(loglam))
for(i in 1:N) {
for(j in 1:J) {
yc[i,j] <- rbinom(1, K, p[i,j])
}
}
detected <- apply(yc>0, 1, any)
yscr <- yc[detected,]
ntel <- 5
nfixes <- 100
poss.tel <- which(s[,1]>0.2 & s[,1]<0.8 & s[,2]>0.2 & s[,2]<0.8)
stel.id <- sample(poss.tel,ntel)
stel <- s[stel.id,]
ytel <- matrix(NA,ntel,nrow(detcovs))
d <- e2dist(stel,detcovs[,1:2])
lam <- t(exp(1 - t((1/(2*sigma*sigma))*d*d) + as.vector(as.matrix(detcovs[,3:4])%*%alphas)))
for(i in 1:ntel){
ytel[i,] <- rmultinom(1,nfixes,lam[i,]/sum(lam[i,]))
}
Specify starting values and call mle2 function:
start1 <- list(alpha0=alpha0,sigma=sigma,alphas=alphas,betas=betas,n0=log(N-nrow(yscr)))
parnames(CSCR.RSF.intlik2.EXAMPLE) <- names(start)
out1 <- mle2(CSCR.RSF.intlik2.EXAMPLE,start=start1,method="SANN",optimizer="optim",
data=list(yscr=yscr,K=K,X=X,trapcovs=trapcovs,Gden=dencovs[,1:2],Gdet=detcovs[,1:2],
ytel=ytel,stel=stel,dencovs=as.matrix(dencovs[,3:4]),detcovs=as.matrix(detcovs[,3:4]))
)