I want to perform a bootstrap simulation 1000 times and compute percentile confidence intervals 1000 times for different samplesizes n = 10,20,...,100. I've solved this problem and I'm just asking, instead of doing this huge computations 10 times, covering 300 lines of code, is there a way to shorten this? Like, running this function over and over again 10 times? I tried a for-loop but it did not work. Here is the code that does work:
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
PercCoverage = vector("numeric", 10L)
n = 10 # sample size
getCI = function(B, k, gamma, n) {
getM = function(orgData, idx) {
bsM = mean(orgData[idx])
bsS2M = (((n-1) / n) * var(orgData[idx])) / n
c(bsM, bsS2M)
}
F = rgamma(n, kHat, gammaHat) # simulated data: original sample
M = mean(F) # M from original sample
S2M = (((n-1)/n)*var(F))/n # S^2(M) from original sample
# bootstrap
boots = t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE))))
Mstar = boots[,1] # M* for each replicate
S2Mstar = boots[,2] # S^2*(M) for each replicate
biasM = mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx = trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc = sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
# 1000 bootstraps
Nrep <- 1000 # number of bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n)))
# coverage probabilities
PercCoverage[1] = sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
However, here I need to script this for n=10, n=20 and so on to n=100, and each time I need to change PercCoverage[1] to PercCoverage[2]...PercCoverage[10] in order to store these values in an array for later plotting.
I tried setting n=c(10,20,30,40,50,60,70,80,90,100) and then placing all of the above in a for loop but the function getCI needed numerical value.
EDIT: For loop attempt:
n = c(10,20,30,40,50,60,70,80,90,100)
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
PercCoverage = vector("numeric", 10L)
for (i in length(n)){
getCI = function(B, k, gamma, n[i]) {
getM = function(orgData, idx) {
bsM = mean(orgData[idx])
bsS2M = (((n[i]-1) / n[i]) * var(orgData[idx])) / n[i]
c(bsM, bsS2M)
}
F = rgamma(n[i], kHat, gammaHat) # simulated data: original sample
M = mean(F) # M from original sample
S2M = (((n[i]-1)/n[i])*var(F))/n[i] # S^2(M) from original sample
# bootstrap
boots = t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE))))
Mstar = boots[,1] # M* for each replicate
S2Mstar = boots[,2] # S^2*(M) for each replicate
biasM = mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx = trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc = sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
# 1000 bootstraps
Nrep <- 1000 # number of bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n[i])))
# coverage probabilities
PercCoverage[i] = sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
}
Consider defining multiple functions: a master one boostrap_proc, gCI, and getM. Then pass in your sequences of sample sizes in lapply for list return or sapply for numeric vector each calling the master function and returning a series of probabilities (last line of function). Be sure to remove the hard coded n = 10.
Define Functions
B = 1000 # number of replicates
kHat = Parameters[1] # approx = 2.06786
gammaHat = Parameters[2] # approx = 0.51144
TheoreticalMean = kHat/gammaHat
TheoreticalVariance = kHat/gammaHat^2
bootstrap_proc <- function(n) {
Nrep <- 1000 # 1000 bootstraps
CIs <- t(replicate(Nrep, getCI(B, kHat, gammaHat, n)))
# coverage probabilities
sum((CIs[,"perc1"]<TheoreticalMean) & (CIs[,"perc2"]>TheoreticalMean)) / Nrep
}
getCI <- function(B, k, gamma, n) {
F <- rgamma(n, kHat, gammaHat) # simulated data: original sample
M <- mean(F) # M from original sample
S2M <- (((n-1)/n)*var(F))/n # S^2(M) from original sample
# bootstrap
boots <- t(replicate(B, getM(F, sample(seq(along=F), replace=TRUE),n)))
Mstar <- boots[,1] # M* for each replicate
S2Mstar <- boots[,2] # S^2*(M) for each replicate
biasM <- mean(Mstar)-M # bias of estimator M
# indices for sorted vector of estimates
idx <- trunc((B+1)*c(0.05/2,1-0.05/2))
ciPerc <- sort(Mstar)[idx] # percentile CI
c(perc=ciPerc)
}
getM <- function(orgData, idx, n) {
bsM <- mean(orgData[idx])
bsS2M <- (((n-1) / n) * var(orgData[idx])) / n
c(bsM, bsS2M)
}
Call Function
sample_sizes <- c(10,20,30,40,50,60,70,80,90,100)
# LIST
PercCoverage <- lapply(sample_sizes, bootstrap_proc)
# VECTOR
PercCoverage <- sapply(sample_sizes, bootstrap_proc)
# VECTOR
PercCoverage <- vapply(sample_sizes, bootstrap_proc, numeric(1))
Related
I have an R function which I rewrite to give room for foreach function and parallel process but the matrix result is having a different column name.
The original function below:
# simulate arima(1,0,0)
library(forecast)
n=10
phi <- 0.6
set.seed(106100125)
ar1 <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
auto.arima(ar1)
ts <- ar1
t <- length(ts) # the length of the time series
li <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
# vector to store block means
RMSEblk <- matrix(nrow = 1, ncol = length(li))
colnames(RMSEblk) <-li
for (b in 1:length(li)){
l <- li[b]# block size
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
# initialize vector to receive result from for loop
singleblock <- vector()
for(i in 1:10){
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist<-unlist(res, use.names = F) # unlist the bootstrap series
# Split the series into train and test set
train <- head(res.unlist, round(length(res.unlist) * 0.6))
h <- length(res.unlist) - length(train)
test <- tail(res.unlist, h)
# Forecast for train set
model <- auto.arima(train)
future <- forecast(test, model=model,h=h)
nfuture <- as.numeric(future$mean) # makes the `future` object a vector
RMSE <- rmse(test, nfuture) # use the `rmse` function from `Metrics` package
singleblock[i] <- RMSE # Assign RMSE value to final result vector element I
}
RMSEblk[b] <- mean(singleblock) # store into matrix
}
RMSEblk
The Desired Arrangement of Result
## 2 3 4 5 6 7 8 9
##[1,] 1.022961 1.440676 1.54268 1.074976 1.205165 1.186345 1.436563 1.501218
The modified function
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
#########################################################
## simulate ARIMA(1,0, 0)
n=10
phi <- 0.6
set.seed(106100125)
ar1 <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
auto.arima(ar1)
ts <- ar1
########################################################
## greate a vector of block sizes
t <- length(ts) # the length of the time series
li <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
RMSEblk <- matrix(nrow = 1, ncol = length(li))
colnames(RMSEblk) <-li
########################################################
## This section use foreach function to do detail in the brace
RMSEblk <- foreach(b = 1:length(li), .combine = 'cbind') %do%{
#for (b in 1:length(li)){
l <- li[b]# block size
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
## Thissubsection initialize vector to receive result from for loop
singleblock <- vector()
singleblock <- foreach(i = 1:10, .packages = c("forecast"), .combine = 'c') %dopar% { ### Replacement parallel foreach loop
#for(i in 1:10){
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) * 0.8)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast(train, model = auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- accuracy(nfuture, test) # RETURN RMSE
singleblock[i] <- RMSE # Assign RMSE value to final result vector element I
}
RMSEblk[b] <- mean(singleblock) # store into matrix
}
stopCluster(cl)
RMSEblk[] <- future_vapply(b=1:length(li), RMSEblk[b], numeric(1))
RMSEblk
** The Undesired Arrangement of Result**
## Error: ‘is.function(FUN)’ is not TRUE
##>
##> RMSEblk
##result.1 result.2 result.3 result.4 result.5 result.6 result.7 result.8
##[1,] 34.4202 20.54789 24.77103 25.49809 15.42677 30.80389 18.28471 15.92572
What I want
I want the result of the second function to be presented and arranged as the result of the first function. That means instead of result.1 I want 2 as the column name or instead of result. 8 I want 9 as the column name of function 2.
I am trying to write a function to calculate a gradient in R. The function must specifically do so using a for loop. I am writing this function in order to illustrate how a for loop is less efficient than vectorized programming when trying to calculate the gradient.
The function takes in a design matrix X and a vector of coefficients beta in order to calculate the gradient of a cost function (The design matrix is a matrix of covariates with ones on the first column.)
The cost function is the MSE,
. I am calculating the gradient using an analytical solution, taking the partial derivative of the loss function. This gives us the partial derivative as ,
for the first coefficient $\beta_{0}$ and then similarly for all other coefficients $\beta_{j}$,
I managed to calculate the answer (implementing the above) using vectorized programming as the line of code below.
-2*t(X)%*%(y-X%*%beta)
My attempt at using a for loop, does not work but it looked like this,
# Initialize the matrix to hold gradient
gradient = matrix(0, nrow = nrow(beta))
for(i in 1:nrow(beta)){
if(i == 1){
gradient[i] = -2*sum(y - X%*%beta) # first value
}else if(i>1){
gradient[i] = -2*sum( t(X)%*%(y - X%*%beta) ) * apply(X[,-1],2,sum)[i-1]
} }
Below is the code to generate data to use and the two implementations that I tried in R. The code generates the data and can be used to test fixing the for loop if copied into R.
# Values
# Random data Generated
n = 4
p = 3
beta_0 = 2
beta = matrix(c(beta_0,3,1,4), nrow= (p+1) ) # coefficients
X = runif(n=(n*p), min=-5,max= 5) # covariates
X = matrix(X, nrow = n, ncol = p)
X = cbind(1, X) # make a design matrix
y = apply(X[,-1],1,mean) # Response (Some function) #
# Print all to show all initial values
print(list("Initial Values:"="","n:"=n," p:" = p, "beta_0:" = beta_0," beta:"=beta,
" X:"=X," y:" = y))
# Function 1
# Find the gradient (using a for loop)
# The partial derivative of the loss function
df_ols = function(beta,X,y){
begin.time <- proc.time()
# Initialize the matrix to hold gradient
gradient = matrix(0, nrow = nrow(beta))
for(i in 1:nrow(beta)){
if(i == 1){
gradient[i] = -2*sum(y - X%*%beta) # first value
}else if(i>1){
gradient[i] = -2*sum( t(X)%*%(y - X%*%beta) ) * apply(X[,-1],2,sum)[i-1]
} }
end.time <- proc.time()
time <- (end.time-begin.time)[3]
print(list("gradient 1"=gradient,"time"=time))
}
df_ols(beta,X,y)
# Function 2
# Find the gradient Approach 2 using vectorized programming
gradient_3 <- function(X, beta){
begin.time <- proc.time()
# Finding the gradient
grad_3 <- -2*t(X)%*%(y-X%*%beta)
grad_3 <- matrix(grad_3, ncol = 1,nrow = ncol(X)) # Turn into a column matrix
end.time <- proc.time()
time <- (end.time-begin.time)[3]
print(list("gradient 3"= grad_3 ,"time"=time))
}
gradient_3(X, beta) # Assumed Correct
I apologize if I was not too wordy. Any help would be appreciated.
I managed to get the for loop to work, below you'll see the answer.
# Find the gradient (using a for loop) explicit, element-wise formulations
# The partial derivative of the loss function
df_ols = function(beta,X,y){
begin.time <- proc.time()
# Initialize the matrix to hold gradient
gradient = matrix(0, nrow = nrow(beta))
for(i in 1:nrow(beta)){
if(i == 1){
gradient[i] = -2*sum(y - X%*%beta) # first value
}else if(i>1){
gradient[i] = -2*t(X[,i])%*%(y-X%*%beta)
#gradient[i] = -2*sum( t(X)%*%(y - X%*%beta) ) * apply(X[,-1],2,sum)[i-1]
} }
end.time <- proc.time()
time <- (end.time-begin.time)[3]
print(list("gradient 1"=gradient,"time"=time))
}
df_ols(beta,X,y)
, running both the vectorized and elementwise formations we find the vectorized process is much faster. We implement the full process below.
# Random data Generated
n = 100000*12
p = 3
beta_0 = 2
beta = matrix(c(beta_0,3,1,4), nrow= (p+1) ) # coefficients
X = runif(n=(n*p), min=-5,max= 5) # covariates
X = matrix(X, nrow = n, ncol = p)
X = cbind(1, X) # make a design matrix
y = apply(X[,-1],1,mean) # Response (Some function)
# Print parameters. To show all initial values
print(list("Initial Values:" = '',"n:"=n," p:" = p, "beta_0:" = beta_0," beta:"=beta,
" X:"=round(X,digits=2)," y:" = round(y,digits=2)))
# Function 3
# Find the gradient Approach 3, using vectorized programming
gradient_3 <- function(X, beta,y){
begin.time <- proc.time()
# Find the partial derivative of the rest (j'th)
grad_3 <- -2*t(X)%*%(y-X%*%beta)
grad_3 <- matrix(grad_3, ncol = 1,nrow = ncol(X)) # Turn into a column matrix
end.time <- proc.time()
time <- (end.time-begin.time)[3]
print(list("gradient 3"= grad_3 ,"time"=time))
}
Showing the results
> df_ols(beta,X,y) # Elementwise
$`gradient 1`
[,1]
[1,] 4804829
[2,] 53311879
[3,] 13471077
[4,] 73259191
$time
elapsed
3.59
> gradient_3(X, beta,y) # Vectorized Programming solution
$`gradient 3`
[,1]
[1,] 4804829
[2,] 53311879
[3,] 13471077
[4,] 73259191
$time
elapsed
0.89
I am trying to write a code to solve the following problem (As stated in HW5 in the CalTech course Learning from Data):
In this problem you will create your own target function f
(probability in this case) and data set D to see how Logistic
Regression works. For simplicity, we will take f to be a 0=1
probability so y is a deterministic function of x. Take d = 2 so you
can visualize the problem, and let X = [-1; 1]×[-1; 1] with uniform
probability of picking each x 2 X . Choose a line in the plane as the
boundary between f(x) = 1 (where y has to be +1) and f(x) = 0 (where y
has to be -1) by taking two random, uniformly distributed points from
X and taking the line passing through them as the boundary between y =
±1. Pick N = 100 training points at random from X , and evaluate the
outputs yn for each of these points xn. Run Logistic Regression with
Stochastic Gradient Descent to find g, and estimate Eout(the cross
entropy error) by generating a sufficiently large, separate set of
points to evaluate the error. Repeat the experiment for 100 runs with
different targets and take the average. Initialize the weight vector
of Logistic Regression to all zeros in each run. Stop the algorithm
when |w(t-1) - w(t)| < 0:01, where w(t) denotes the weight vector at
the end of epoch t. An epoch is a full pass through the N data points
(use a random permutation of 1; 2; · · · ; N to present the data
points to the algorithm within each epoch, and use different
permutations for different epochs). Use a learning rate of 0.01.
I am required to calculate the nearest value to Eout for N=100, and the average number of epochs for the required criterion.
I wrote and ran the code but I'm not getting the right answers (as stated in the solutions, these are Eout is near 0.1 and the number of epochs is near 350). The required number of epochs for a delta w of 0.01 comes to far too small (around 10), leaving the error too big (around 2). I then tried to replace the criterion with |w(t-1) - w(t)| < 0.001 (rather than 0.01). Then, the average required number of epochs was about 250 and out of sample error was about 0.35.
Is there something wrong with my code/solution, or is it possible that the answers provided are faulty? I've added comments to indicate what I intend to do at each step. Thanks in advance.
library(pracma)
h<- 0 # h will later be updated to number of required epochs
p<- 0 # p will later be updated to Eout
C <- matrix(ncol=10000, nrow=2) # Testing set, used to calculate out of sample error
d <- matrix(ncol=10000, nrow=1)
for(i in 1:10000){
C[, i] <- c(runif(2, min = -1, max = 1)) # Sample data
d[1, i] <- sign(C[2, i] - f(C[1, i]))
}
for(g in 1:100){ # 100 runs of the experiment
x <- runif(2, min = -1, max = 1)
y <- runif(2, min = -1, max = 1)
fit = (lm(y~x))
t <- summary(fit)$coefficients[,1]
f <- function(x){ # Target function
t[2]*x + t[1]
}
A <- matrix(ncol=100, nrow=2) # Sample data
b <- matrix(ncol=100, nrow=1)
norm_vec <- function(x) {sqrt(sum(x^2))} # vector norm calculator
w <- c(0,0) # weights initialized to zero
for(i in 1:100){
A[, i] <- c(runif(2, min = -1, max = 1)) # Sample data
b[1, i] <- sign(A[2, i] - f(A[1, i]))
}
q <- matrix(nrow = 2, ncol = 1000) # q tracks the weight vector at the end of each epoch
l= 1
while(l < 1001){
E <- function(z){ # cross entropy error function
x = z[1]
y = z[2]
v = z[3]
return(log(1 + exp(-v*t(w)%*%c(x, y))))
}
err <- function(xn1, xn2, yn){ #gradient of error function
return(c(-yn*xn1, -yn*xn2)*(exp(-yn*t(w)*c(xn1,xn2))/(1+exp(-yn*t(w)*c(xn1,xn2)))))
}
e = matrix(nrow = 2, ncol = 100) # e will track the required gradient at each data point
e[,1:100] = 0
perm = sample(100, 100, replace = FALSE, prob = NULL) # Random permutation of the data indices
for(j in 1:100){ # One complete Epoch
r = A[,perm[j]] # pick the perm[j]th entry in A
s = b[perm[j]] # pick the perm[j]th entry in b
e[,perm[j]] = err(r[1], r[2], s) # Gradient of the error
w = w - 0.01*e[,perm[j]] # update the weight vector accorng to the formula involving step size, gradient
}
q[,l] = w # the lth entry is the weight vector at the end of the lth epoch
if(l > 1 & norm_vec(q[,l] - q[,l-1])<0.001){ # given criterion to terminate the algorithm
break
}
l = l+1 # move to the next epoch
}
for(n in 1:10000){
p[g] = mean(E(c(C[1,n], C[2, n], d[n]))) # average over 10000 data points, of the error function, in experiment no. g
}
h[g] = l #gth entry in the vector h, tracks the number of epochs in the gth iteration of the experiment
}
mean(h) # Mean number of epochs needed
mean(p) # average Eout, over 100 experiments
I am new to R and trying to build a accumulative binomial distribution table and got stuck in the loop.
r = readline("please enter an interger n:")
p = seq(from = 0.1, to = 1,by = 0.1 )
r = seq(from = 0, to = 100)
n <- ""
for (each in r) {
x=qbinom(x,r,p)
}
print(x)
As an alternate to the loop: you can use expand.grid to create all permutations of k and p, and further avoid the loop as pbinom can take vectors.
# Create input values
p = 1:9/10
k = 0:25
n = 25
# Create permutations of `k` and `p`: use this to make grid of values
ex <- expand.grid(p=p, k=k)
# Find probabilities for each value set
ex$P <- with(ex, pbinom(k, n, p ))
# Reshape to your required table format
round(reshape2::dcast(k ~ p, data=ex, value.var = "P"), 3)
Loop approach
# Values to match new example
p = 1:19/20
k = 0:25
n = 4
# Create matrix to match the dimensions of our required output
# We will fill this as we iterate through the loop
mat1 <- mat2 <- matrix(0, ncol=length(p), nrow=length(k), dimnames=list(k, p))
# Loop through the values of k
# We will also use the fact that you can pass vectors to `pbinom`
# so for each value of `k`, we pass the vector of `p`
# So we will update each row of our output matrix with
# each iteration of the loop
for(i in seq_along(k)){
mat1[i, ] <- pbinom(k[i], n, p)
}
Just for completeness, we could of updated the columns of our output matrix instead - that is for each value of p pass the vector k
for(j in seq_along(p)){
mat2[, j] <- pbinom(k, n, p[j])
}
# Check that they give the same result
all.equal(mat1, mat2)
I have written a custom likelihood function that fits a multi-data model that integrates mark-recapture and telemetry data (sensu Royle et al. 2013 Methods in Ecology and Evolution). The likelihood function is designed to be flexible in terms of whether and how many covariates are specified for different linear models in different likelihood components which is determined by values supplied as function arguments (i.e., data matrices "detcovs" and "dencovs" in my code). The likelihood function works when I directly supply it to optimization functions (e.g., optim or nlm), but does not play nice with the mle2 function in the bbmle package. My problem is that I continually run into the following error: "some named arguments in 'start' are not arguments to the specified log-likelihood function". This is my first attempt at writing custom likelihood functions so I'm sure there are general coding conventions of which I'm unaware that make such tasks much more efficient and amendable to the mle2 function. Below is my likelihood function, code creating the staring value objects, and code calling the mle2 function. Any advice how to solve the error problem and general comments on writing cleaner functions is welcome. Many thanks in advance.
Edit: As requested, I have simplified the likelihood function and provided code to simulate reproducible data to which the model can be fit. Included in the simulation code are 2 custom functions and use of the raster function from the raster package. Hopefully, I have sufficiently simplified everything to enable others to troubleshoot. Again, many thanks for your help!
Jared
Likelihood function:
CSCR.RSF.intlik2.EXAMPLE <- function(alpha0,sigma,alphas=NULL,betas=NULL,n0,yscr=NULL,K=NULL,X=X,trapcovs=NULL,Gden=NULL,Gdet=NULL,ytel=NULL,stel=NULL,
dencovs=NULL,detcovs=NULL){
#
# this version of the code handles a covariate on log(Density). This is starting value 5
#
# start = vector of starting values
# yscr = nind x ntraps encounter matrix
# K = number of occasions
# X = trap locations
# Gden = matrix with grid cell coordinates for density raster
# Gdet = matrix with gride cell coordinates for RSF raster
# dencovs = all covariate values for all nGden pixels in density raster
# trapcovs = covariate value at trap locations
# detcovs = all covariate values for all nGrsf pixels in RSF raster
# ytel = nguys x nGdet matrix of telemetry fixes in each nGdet pixels
# stel = home range center of telemetered individuals, IF you wish to estimate it. Not necessary
# alphas = starting values for RSF/detfn coefficients excluding sigma and intercept
# alpha0 = starting values for RSF/detfn intercept
# sigma = starting value for RSF/detfn sigma
# betas = starting values for density function coefficients
# n0 = starting value for number of undetected individuals on log scale
#
n0 = exp(n0)
nGden = nrow(Gden)
D = e2dist(X,Gden)
nGdet <- nrow(Gdet)
alphas = alphas
loglam = alpha0 -(1/(2*sigma*sigma))*D*D + as.vector(trapcovs%*%alphas) # ztrap recycled over nG
psi = exp(as.vector(dencovs%*%betas))
psi = psi/sum(psi)
probcap = 1-exp(-exp(loglam))
#probcap = (exp(theta0)/(1+exp(theta0)))*exp(-theta1*D*D)
Pm = matrix(NA,nrow=nrow(probcap),ncol=ncol(probcap))
ymat = yscr
ymat = rbind(yscr,rep(0,ncol(yscr)))
lik.marg = rep(NA,nrow(ymat))
for(i in 1:nrow(ymat)){
Pm[1:length(Pm)] = (dbinom(rep(ymat[i,],nGden),rep(K,nGden),probcap[1:length(Pm)],log=TRUE))
lik.cond = exp(colSums(Pm))
lik.marg[i] = sum( lik.cond*psi )
}
nv = c(rep(1,length(lik.marg)-1),n0)
part1 = lgamma(nrow(yscr)+n0+1) - lgamma(n0+1)
part2 = sum(nv*log(lik.marg))
out = -1*(part1+ part2)
lam = t(exp(a0 - (1/(2*sigma*sigma))*t(D2)+ as.vector(detcovs%*%alphas)))# recycle zall over all ytel guys
# lam is now nGdet x nG!
denom = rowSums(lam)
probs = lam/denom # each column is the probs for a guy at column [j]
tel.loglik = -1*sum( ytel*log(probs) )
out = out + tel.loglik
out
}
Data simulation code:
library(raster)
library(bbmle)
e2dist <- function (x, y){
i <- sort(rep(1:nrow(y), nrow(x)))
dvec <- sqrt((x[, 1] - y[i, 1])^2 + (x[, 2] - y[i, 2])^2)
matrix(dvec, nrow = nrow(x), ncol = nrow(y), byrow = F)
}
spcov <- function(R) {
v <- sqrt(nrow(R))
D <- as.matrix(dist(R))
V <- exp(-D/2)
cov1 <- t(chol(V)) %*% rnorm(nrow(R))
Rd <- as.data.frame(R)
colnames(Rd) <- c("x", "y")
Rd$C <- as.numeric((cov1 - mean(cov1)) / sd(cov1))
return(Rd)
}
set.seed(1234)
co <- seq(0.3, 0.7, length=5)
X <- cbind(rep(co, each=5),
rep(co, times=5))
B <- 10
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
dencovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(dencovs)[[2]][3:4] <- c("dencov1","dencov2")
denr.list <- vector("list",2)
for(i in 1:2){
denr.list[[i]] <- raster(
list(x=seq(0,1,length=10),
y=seq(0,1,length=10),
z=t(matrix(dencovs[,i+2],10,10,byrow=TRUE)))
)
}
B <- 20
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
detcovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(detcovs)[[2]][3:4] <- c("detcov1","detcov2")
detcov.raster.list <- vector("list",2)
trapcovs <- matrix(0,J,2)
for(i in 1:2){
detr.list[[i]] <- raster(
list(x=seq(0,1,length=20),
y=seq(0,1,length=20),
z=t(matrix(detcovs[,i+2],20,20,byrow=TRUE)))
)
trapcovs[,i] <- extract(detr.list[[i]],X)
}
alpha0 <- -3
sigma <- 0.15
alphas <- c(1,-1)
beta0 <- 3
betas <- c(-1,1)
pixelArea <- (dencovs$y[2] - dencovs$y[1])^2
mu <- exp(beta0 + as.matrix(dencovs[,3:4])%*%betas)*pixelArea
EN <- sum(mu)
N <- rpois(1, EN)
pi <- mu/sum(mu)
s <- dencovs[sample(1:nrow(dencovs), size=N, replace=TRUE, prob=pi),1:2]
J <- nrow(X)
K <- 10
yc <- d <- p <- matrix(NA, N, J)
D <- e2dist(s,X)
loglam <- t(alpha0 - t((1/(2*sigma*sigma))*D*D) + as.vector(trapcovs%*%alphas))
p <- 1-exp(-exp(loglam))
for(i in 1:N) {
for(j in 1:J) {
yc[i,j] <- rbinom(1, K, p[i,j])
}
}
detected <- apply(yc>0, 1, any)
yscr <- yc[detected,]
ntel <- 5
nfixes <- 100
poss.tel <- which(s[,1]>0.2 & s[,1]<0.8 & s[,2]>0.2 & s[,2]<0.8)
stel.id <- sample(poss.tel,ntel)
stel <- s[stel.id,]
ytel <- matrix(NA,ntel,nrow(detcovs))
d <- e2dist(stel,detcovs[,1:2])
lam <- t(exp(1 - t((1/(2*sigma*sigma))*d*d) + as.vector(as.matrix(detcovs[,3:4])%*%alphas)))
for(i in 1:ntel){
ytel[i,] <- rmultinom(1,nfixes,lam[i,]/sum(lam[i,]))
}
Specify starting values and call mle2 function:
start1 <- list(alpha0=alpha0,sigma=sigma,alphas=alphas,betas=betas,n0=log(N-nrow(yscr)))
parnames(CSCR.RSF.intlik2.EXAMPLE) <- names(start)
out1 <- mle2(CSCR.RSF.intlik2.EXAMPLE,start=start1,method="SANN",optimizer="optim",
data=list(yscr=yscr,K=K,X=X,trapcovs=trapcovs,Gden=dencovs[,1:2],Gdet=detcovs[,1:2],
ytel=ytel,stel=stel,dencovs=as.matrix(dencovs[,3:4]),detcovs=as.matrix(detcovs[,3:4]))
)