I need to run a coverage probability test on different sample sizes and censoring proportions. I need to replicate 1000 bootstrap samples using the boot function in R. I have run the code for up to 3-8 hours and I have no idea regarding the runtime.
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.05
n <- 140
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
bp_lambda <- matrix(NA, nrow=N, ncol=2)
bp_beta <- matrix(NA, nrow=N, ncol=2)
for (i in 1:N) {
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1/lambda)*log(1 - u)))^(1/beta)
s_i <- 1*(t_i < c_i)
t <- pmin(t_i, c_i)
data <- data.frame(u, t_i, c_i, s_i, t)
estimates.boot <- function(data, j){
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
library(maxLik)
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta - 1)*beta*exp(t1^beta)*exp(lambda*(1 - exp(t1^beta))))
r <- (1 - s_i)*log(exp(lambda*(1 - exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start=c(para=c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
library(boot)
bootstrap <- boot(data, estimates.boot, 1000)
bootlambda <- bootstrap$t[, 1]
klambda <- bootlambda[order(bootlambda)]
bp_lambda[i, ] <- c(klambda[25], klambda[975])
bootbeta <- bootstrap$t[, 2]
kbeta <- bootbeta[order(bootbeta)]
bp_beta[i, ] <- c(kbeta[25], kbeta[975])
}
left_lambda <- sum(bp_lambda[, 1]>lambda)/N
right_lambda <- sum(bp_lambda[, 2]<lambda)/N
total_lambda <- left_lambda + right_lambda
left_beta <- sum(bp_beta[, 1] > beta)/N
right_beta <- sum(bp_beta[, 2]<beta)/N
total_beta <- left_beta + right_beta
sealphahat <- sqrt(alpha*(1 - alpha)/N)
antilambda <- total_lambda>(alpha + 2.58*sealphahat)
conlambda <- total_lambda<(alpha - 2.58*sealphahat)
asymlambda <- (max(left_lambda, right_lambda)/min(left_lambda, right_lambda)) > 1.5
antibeta <- total_beta > (alpha + 2.58*sealphahat)
conbeta <- total_beta < (alpha - 2.58*sealphahat)
asymbeta <- (max(left_beta, right_beta)/min(left_beta, right_beta)) > 1.5
anti <- antilambda + antibeta
con <- conlambda + conbeta
asym <- asymlambda + asymbeta
cbind(anti, con, asym)
Anyone have idea how to speed up the operation?
Basically, you want to apply a random sampling to an estimation function (inner bootstrap) and randomly sample the entire process again (outer bootstrap).
Consequently we could write an estimation function estimate() using replicate() (to avoid boot:boot) and a function for the inner bootstrap innerBoot(). In the latter we could use matrixStats::rowQuantiles for fast computation of the quantiles you want.
I essentially used your code, just fixed a few issues that prevented the code from running.
estimate <- function() {
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1/lambda)*log(1 - u)))^(1/beta)
s_i <- 1*(t_i < c_i)
t <- pmin(t_i, c_i)
LLF <- function(para) {
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t_i^(beta - 1)*beta*exp(t_i^beta)*exp(lambda*(1 - exp(t_i^beta))))
r <- (1 - s_i)*log(exp(lambda*(1 - exp(t_i^beta))))
return(sum(e + r))
}
mle <- maxLik::maxLik(LLF, start=c(para=c(0.02, 0.5)))
return(setNames(mle$estimate, c('lambda_hat', 'beta_hat')))
}
innerBoot <- function() {
boot <- replicate(N, estimate())
return(matrixStats::rowQuantiles(boot, p=c(.025, .975)))
}
We also perform the outer bootstrap with replicate(). I wrap it here in system.time() to get a time measurement.
lambda <- 0.02
beta <- 0.5
alpha <- 0.05
n <- 140
# N <- 1000
N <- 10 ## for testing
seed <- 42
set.seed(seed)
tm <- system.time(
BA <- replicate(N, innerBoot())
)
I got these measurements,
tm
# user system elapsed ## N = 10
# 1.055 0.000 1.057
# user system elapsed ## N = 100
# 102.012 0.227 102.489
which indicates that for N <- 1000 about 167 minutes are to be expected.
The result is an array of dim 2x2xN.
> dim(BA)
[1] 2 2 100
To calculate the summaries we may easily refer to the respective cells.
boot_sum <- function(BA) {
left_lambda <- sum(BA[1, 1, ] > lambda)/N
right_lambda <- sum(BA[1, 2, ] >< lambda)/N
left_beta <- sum(BA[2, 1, ] > beta)/N
right_beta <- sum(BA[2, 2, ] < beta)/N
total_lambda <- left_lambda + right_lambda
total_beta <- left_beta + right_beta
sealphahat <- sqrt(alpha*(1 - alpha)/N)
antilambda <- total_lambda > (alpha + 2.58*sealphahat)
conlambda <- total_lambda < (alpha - 2.58*sealphahat)
asymlambda <- (max(left_lambda, right_lambda)/min(left_lambda, right_lambda)) > 1.5
antibeta <- total_beta > (alpha + 2.58*sealphahat)
conbeta <- total_beta < (alpha - 2.58*sealphahat)
asymbeta <- (max(left_beta, right_beta)/min(left_beta, right_beta)) > 1.5
anti <- antilambda + antibeta
con <- conlambda + conbeta
asym <- asymlambda + asymbeta
return(cbind(anti, con, asym))
}
boot_sum(BA)
# anti con asym
# [1,] 2 0 2
Note: You should definitely check the code in the body of estimate() (i.e. run it manually several times without bootstrapping), as it throws warnings every now and then, probably there is a mistake in how you define LLF().
Warning messages:
1: In log(lambda * t_i^(beta - 1) * beta * exp(t_i^beta) * exp(lambda * :
NaNs produced
2: In log(lambda * t_i^(beta - 1) * beta * exp(t_i^beta) * exp(lambda * :
NaNs produced
Also I'm not sure if the summary calculation currently makes much sense.
My advice is to check your likelihood function and the summary by 1. manually run the lines, 2. starting with a very small N like 10 or so, to see if calculations make sense.
Once you've checked that, it's worth to wait the ~167 minutes to wait for the result.
Or parallelize innerBoot(), which is about 80% faster overall (using 7 cores), as follows:
innerBootParallel <- function() {
boot <- parSapply(cl, 1:N, function(i) estimate())
return(matrixStats::rowQuantiles(boot, p=c(.025, .975)))
}
library(parallel)
cl <- makeCluster(detectCores() - 1)
clusterExport(cl, c('estimate', 'n', 'lambda', 'N', 'beta'))
clusterSetRNGStream(cl, seed)
BA <- replicate(N, innerBootParallel())
stopCluster(cl)
boot_sum(BA)
Related
I have wrote a simulation code for censored observations to find bootstrap-t confidence interval. However, I encountered some problem where my 'btAlpha' and 'btLambda' cannot compute the correct answer hence I cannot go to the next step which is to calculate the total error probabilities.
This is my code :
#BOOTSTRAP-T (20%)
library(survival)
n <- 100
N <- 1000
alpha <- 1
lambda <- 0.5
alphaHat <- NULL
lambdaHat <- NULL
cp <- NULL
btAlpha <- matrix (NA, nrow=N, ncol=2)
btLambda <- matrix (NA, nrow=N, ncol=2)
for (i in 1:1000) {
u <- runif(n)
c1 <- rexp(n, 0.1)
t1 <- -(log(1 - u^(1/alpha))/lambda)
t <- pmin(t1, c1)
ci <- 1*(t1 < c1) #censored data
cp[i] <- length(ci[ci == 0])/n #censoring proportion
#FUNCTION TO CALL OUT
estBoot < -function(data, j) {
dat <- data [j, ]
data0 <- dat[which(dat$ci == 0), ] # right censored data
data1 <- dat[which(dat$ci == 1), ] # uncensored data
dat
#MAXIMUM LIKELIHOOD ESTIMATION
library(maxLik)
LLF <- function(para) {
alpha <- para[1]
lambda <- para[2]
a <- sum(log(alpha*lambda*(1 - exp(-lambda*data1$t1))^(alpha - 1)*
exp(-lambda*data1$t1)))
b <- sum(log(1 - (1 - exp(-lambda*data0$t1)^(alpha))))
l <- a + b
return(l)
}
mle <- maxLik(LLF, start=c(alpha=1, lambda=0.5))
alphaHat <- mle$estimate[1]
lambdaHat <- mle$estimate[2]
observedDi <- solve(-mle$hessian)
return(c(alphaHat, lambdaHat, observedDi[1, 1], observedDi[2, 2]))
}
library(boot)
bt <- boot(dat, estBoot, R=1000)
bootAlphaHat <- bt$t[, 1] #t is from bootstrap
bootAlphaHat0 <- bt$t0[1] #t0 is from original set
seAlphaHat <- sqrt(bt$t[, 2])
seAlphaHat0 <- sqrt(bt$t0[2]) #same as 'original' in bt
zAlpha <- (bootAlphaHat - bootAlphaHat0)/seAlphaHat
kAlpha <- zAlpha[order(zAlpha)]
ciAlpha <- c(kAlpha[25], kAlpha[975])
btAlpha[i, ] <- rev(bootAlphaHat0 - ciAlpha*seAlphaHat0)
bootLambdaHat <- bt$t[, 2]
bootLambdaHat0 <- bt$t0[2]
seLambdaHat <- sqrt(bt$t[, 4])
seLambdaHat0 <- sqrt(bt$t0[4])
zLambda <- (bootLambdaHat - bootLambdaHat0)/seLambdaHat
kLambda <- zLambda[order(zLambda)]
ciLambda <- c(kLambda[25], kLambda[975])
btLambda[i, ] <- rev(bootLambdaHat0 - ciLambda*seLambdaHat0)
}
leftAlpha <- sum(btAlpha[, 1] > alpha)/N
rightAlpha <- sum(btAlpha[, 2] < alpha)/N
totalEAlpha <- leftAlpha + rightAlpha
leftLambda <- sum(btLambda[, 1] > lambda)/N
rightLambda <- sum(btLambda[, 2] < lambda)/N
totalELambda <- leftLambda + rightLambda
#alpha=0.05
sealphaHat <- sqrt(0.05*(1 - 0.05)/N)
antiAlpha <- totalEAlpha > (0.05 + 2.58*sealphaHat)
conAlpha <- totalEAlpha < (0.05 - 2.58*sealphaHat )
asymAlpha <- (max(leftAlpha, rightAlpha)/min(leftAlpha, rightAlpha)) > 1.5
antiLambda <- totalELambda > (0.05 + 2.58 *sealphaHat)
conLambda <- totalELambda < (0.05 - 2.58 *sealphaHat)
asymLambda <- (max(leftLambda, rightLambda)/min(leftLambda, rightLambda)) > 1.5
anti <- antiAlpha + antiLambda
con <- conAlpha + conLambda
asym <- asymAlpha + asymLambda
My 'btAlpha[i,]' and 'btLambda[i,]' is two matrix data frame and only computed NA values hence I cannot calculate the next step which is total error probabilities etc. It should be simulated 1000 values through specified formula but I didnt get the desired output. I have tried to run this without using loops and same problems encountered. Do you guys have any idea? I could really use and truly appreciate your help.
I want to create a custom bootstrap function because I want to better understand what bootstrap is doing and it seems like the other bootstrap libraries out there does not solve my issue.
The Problem: I would like to create my own wald confidence interval function where it takes in the bootstrap data, outputs the confidence interval, test the confidence interval is within a range, and gets the coverage.
Right now, I am getting this type of error:
Error in bootresults[i,}<-waldCI(y=bootdata[i], n=numTrials):number of
items to replace is not a multiple of replacement length
The goal: My goal is to get the bootresults dataset to return 4 columns(p value,One that shows the upper bound, lower bound, and whether or not the p is in the interval) and get a graph similar to this one:
Wald interval chart
Code:
set.seed(42)
samples10 <- list()
i <- 1
while(i < 100) {
sample10[[i]] <- rbinom(1500, size=10, prob=i*.01) ## rows=1500 ;columns=10
i <- i + 1
}
sample10 <- data.frame(samples10)
colnames(sample10) <- c(seq(.01, .99, .01)) ## p-values
waldconfidenceinterval <- function(y, n, alpha=0.05) {
p <- colSums(y)/(n*200)
sd <- sqrt(p*((1 - p)/(n*200)))
z <- qnorm(c(alpha/2, 1 - alpha/2))
ci <- p + z*sd
return(ci)
}
B <- 200
numTrials <- 10
bootresults <- matrix(ncol=length(sample10), nrow=B) ## rows=200, cols=99
## empty matrix in the beginning
set.seed(42)
for(i in seq_len(B)) {
bootdata <- sample10[sample(B, replace=T), ]
bootresults[i, ] <- waldCI(y=bootdata[i], n=numTrials)
## Pseudocode:
# boot_test_data$in_interval <-
# ifelse(boot_test_data$lower1 < i/100 & i/100 < boot_test_data$upper1, 1, 0)
# coverage[i] <- sum(boot_test_data$in_interval) / length(boot_test_data$in_interval)
}
Any help is greatly appreciated since I am fairly new to R.
Looks like that you want to initialize a three-dimensional array bootresults rather than a two-dimensional matrix. In your waldCI() you may use colMeans.
waldCI <- function(y, alpha=0.05) {
p <- colMeans(y)
se <- sqrt(p*(1 - p)/nrow(y))
z <- qnorm(1 - alpha/2)
ci <- p + z*se %*% cbind(lower=-1, upper=1)
return(ci)
}
B <- 200
numTrials <- 10
## initialize array
bootresults1 <- array(dim=c(ncol(samples10), 4, B),
dimnames=list(c(), c("p.values", "lower", "upper", "in.int"), c()))
set.seed(42)
for(i in seq_len(B)) {
samp <- samples10[sample(nrow(samples10), numTrials, replace=F), ]
ci <- waldCI(samp)
bootresults1[,,i] <- cbind(p.values, ci, in.int=ci[, 1] < p.values & p.values < ci[, 2])
}
coverage <- rowMeans(bootresults[,4,])
plot(p.values, coverage, type="l", main="My Plot")
Similar approach, more R-ish, though:
p.values <- seq(.01, .99, .01)
set.seed(42)
samples10 <- `colnames<-`(sapply(p.values, function(pr) rbinom(1.5e3, 1, pr)), p.values)
BOOT <- function(numTrials, ...) {
samp <- samples10[sample(nrow(samples10), numTrials, replace=F), ]
ci <- waldCI(samp, ...)
cbind(p.values, ci, in.int=ci[, 1] < p.values & p.values < ci[, 2])
}
B <- 200
numTrials <- 10
set.seed(42)
bootresults2 <- replicate(B, BOOT(numTrials=10))
stopifnot(all.equal(bootresults1, bootresults2))
Data:
Note, that I used rbinom(..., size=1, ...) to create your sample data. The use of "p" as an object name suggested that the data should be binomial.
set.seed(42)
samples10 <- matrix(nrow=1500, ncol=99, dimnames=list(c(), c(seq(.01, .99, .01))))
i <- 1
while (i < 100) {
samples10[, i] <- rbinom(1500, size=1, prob=i*.01) ## rows=1500 ;columns=10
i <- i + 1
}
Without a while loop, you could proceed vectorized:
p.values <- seq(.01, .99, .01)
set.seed(42)
samples10 <- `colnames<-`(sapply(p.values, function(pr) rbinom(1.5e3, 1, pr)), p.values)
I am trying to code gradient descent in R. The goal is to collect a data frame of each estimate so I can plot the algorithm's search through the parameter space.
I am using the built-in dataset data(cars) in R. Unfortunately something is way off in my function. The estimates just increase linearly with each iteration! But I cannot figure out where I err.
Any tips?
Code:
GradientDescent <- function(b0_start, b1_start, x, y, niter=10, alpha=0.1) {
# initialize
gradient_b0 = 0
gradient_b1 = 0
x <- as.matrix(x)
y <- as.matrix(y)
N = length(y)
results <- matrix(nrow=niter, ncol=2)
# gradient
for(i in 1:N){
gradient_b0 <- gradient_b0 + (-2/N) * (y[i] - (b0_start + b1_start*x[i]))
gradient_b1 <- gradient_b1 + (-2/N) * x[i] * (y[i] - (b0_start + b1_start*x[i]))
}
# descent
b0_hat <- b0_start
b1_hat <- b1_start
for(i in 1:niter){
b0_hat <- b0_hat - (alpha*gradient_b0)
b1_hat <- b1_hat - (alpha*gradient_b1)
# collect
results[i,] <- c(b0_hat,b1_hat)
}
# return
df <- data.frame(results)
colnames(df) <- c("b0", "b1")
return(df)
}
> test <- GradientDescent(0,0,cars$speed, cars$dist, niter=1000)
> head(test,2); tail(test,2)
b0 b1
1 8.596 153.928
2 17.192 307.856
b0 b1
999 8587.404 153774.1
1000 8596.000 153928.0
Here is a solution for cars dataset:
# dependent and independent variables
y <- cars$dist
x <- cars$speed
# number of iterations
iter_n <- 100
# initial value of the parameter
theta1 <- 0
# learning rate
alpha <- 0.001
m <- nrow(cars)
yhat <- theta1*x
# a tibble to record the parameter update and cost
library(tibble)
results <- data_frame(theta1 = as.numeric(),
cost = NA,
iteration = 1)
# run the gradient descent
for (i in 1:iter_n){
theta1 <- theta1 - alpha * ((1 / m) * (sum((yhat - y) * x)))
yhat <- theta1*x
cost <- (1/m)*sum((yhat-y)^2)
results[i, 1] = theta1
results[i, 2] <- cost
results[i, 3] <- i
}
# print the parameter value after the defined iteration
print(theta1)
# 2.909132
Checking whether cost is decreasing:
library(ggplot2)
ggplot(results, aes(x = iteration, y = cost))+
geom_line()+
geom_point()
I wrote a more detailed blog post here.
I have a code which has been used for some paper.
After defining the function to be optimized, the author used the Nelder-Mead method to estimate the parameters needed. When I run the code, it freezes after 493 function evaluations have been used, it doesn't show any kind of error message or anything. I've been trying to find some info but I haven't been lucky. How can I modify the optim command in order to evaluate all possible combinations, and/or what is preventing the function from being optimized?
Here's the code. It's relatively long, BUT the second-to-last line (system.time(stcopfit...)) is the ONLY ONE I need to make work / fix / modify. So you can just copy&paste the code (as I said, taken from the author of the mentioned paper) and let it run, you don't have to go through the all code, just the last few lines. This is the data over which to run the optimization, i.e. a matrix of [0,1] uniform variables of dimension 2172x9.
Any help is appreciated, thanks!
Here's a screenshot in RStudio (it took around 2 minutes to arrive at 493, and then it's been stuck like this for the last 30 minutes):
Code:
#download older version of "sn" package
url <- "https://cran.r-project.org/src/contrib/Archive/sn/sn_1.0-0.tar.gz"
install.packages(url, repos=NULL, type="source")
install.packages(signal)
library(sn)
library(signal)
#1. redefine qst function
qst <- function (p, xi = 0, omega = 1, alpha = 0, nu = Inf, tol = 1e-08)
{
if (length(alpha) > 1)
stop("'alpha' must be a single value")
if (length(nu) > 1)
stop("'nu' must be a single value")
if (nu <= 0)
stop("nu must be non-negative")
if (nu == Inf)
return(qsn(p, xi, omega, alpha))
if (nu == 1)
return(qsc(p, xi, omega, alpha))
if (alpha == Inf)
return(xi + omega * sqrt(qf(p, 1, nu)))
if (alpha == -Inf)
return(xi - omega * sqrt(qf(1 - p, 1, nu)))
na <- is.na(p) | (p < 0) | (p > 1)
abs.alpha <- abs(alpha)
if (alpha < 0)
p <- (1 - p)
zero <- (p == 0)
one <- (p == 1)
x <- xa <- xb <- xc <- fa <- fb <- fc <- rep(NA, length(p))
nc <- rep(TRUE, length(p))
nc[(na | zero | one)] <- FALSE
fc[!nc] <- 0
xa[nc] <- qt(p[nc], nu)
xb[nc] <- sqrt(qf(p[nc], 1, nu))
fa[nc] <- pst(xa[nc], 0, 1, abs.alpha, nu) - p[nc]
fb[nc] <- pst(xb[nc], 0, 1, abs.alpha, nu) - p[nc]
regula.falsi <- FALSE
while (sum(nc) > 0) {
xc[nc] <- if (regula.falsi)
xb[nc] - fb[nc] * (xb[nc] - xa[nc])/(fb[nc] - fa[nc])
else (xb[nc] + xa[nc])/2
fc[nc] <- pst(xc[nc], 0, 1, abs.alpha, nu) - p[nc]
pos <- (fc[nc] > 0)
xa[nc][!pos] <- xc[nc][!pos]
fa[nc][!pos] <- fc[nc][!pos]
xb[nc][pos] <- xc[nc][pos]
fb[nc][pos] <- fc[nc][pos]
x[nc] <- xc[nc]
nc[(abs(fc) < tol)] <- FALSE
regula.falsi <- !regula.falsi
}
x <- replace(x, zero, -Inf)
x <- replace(x, one, Inf)
Sign <- function(x) sign(x)+ as.numeric(x==0)
q <- as.numeric(xi + omega * Sign(alpha)* x)
names(q) <- names(p)
return(q)
}
#2. initial parameter setting
mkParam <- function(Omega, delta, nu){
ndim <- length(delta)+1;
R <- diag(ndim);
for (i in 2:ndim){
R[i,1] <- R[1,i] <- delta[i-1];
if (i>=3){for (j in 2:(i-1)){R[i,j] <- R[j,i] <- Omega[i-1,j-1];}}
}
LTR <- t(chol(R));
Mtheta <- matrix(0, nrow=ndim, ncol=ndim);
for (i in 2:ndim){
Mtheta[i,1] <- acos(LTR[i,1]);
cumsin <- sin(Mtheta[i,1]);
if (i >=3){for (j in 2:(i-1)){
Mtheta[i,j] <- acos(LTR[i,j]/cumsin);
cumsin <- cumsin*sin(Mtheta[i,j]);}
}
}
c(Mtheta[lower.tri(Mtheta)], log(nu-2));
}
#3. from internal to original parameters
paramToExtCorr <- function(param){
ntheta <- dim*(dim+1)/2;
theta <- param[1:ntheta];
ndim <- (1+sqrt(1+8*length(theta)))/2;
LTR <- diag(ndim);
for (i in 2:ndim){
LTR[i,1] <- cos(theta[i-1]);
cumsin <- sin(theta[i-1]);
if (i >=3){for (j in 2:(i-1)){
k <- i+ndim*(j-1)-j*(j+1)/2;
LTR[i,j] <- cumsin*cos(theta[k]);
cumsin <- cumsin*sin(theta[k]);}
}
LTR[i,i] <- cumsin;
}
R <- LTR %*% t(LTR);
R;
}
#4. show estimated parameters and log likelihood
resultVec <- function(fit){
R <- paramToExtCorr(fit$par);
logLik <- -fit$value;
Omega <- R[-1, -1];
delta <- R[1, -1];
ntheta <- dim*(dim+1)/2;
nu <- exp(fit$par[ntheta+1])+2;
c(Omega[lower.tri(Omega)], delta, nu, logLik);
}
#5. negative log likelihood for multivariate skew-t copula
stcopn11 <- function(param){
N <- nrow(udat);
mpoints <- 150;
npar <- length(param);
nu <- exp(param[npar])+2;
R <- paramToExtCorr(param);
Omega <- R[-1, -1];
delta <- R[1, -1];
zeta <- delta/sqrt(1-delta*delta);
iOmega <- solve(Omega);
alpha <- iOmega %*% delta / sqrt(1-(t(delta) %*% iOmega %*% delta)[1,1]);
ix <- matrix(0, nrow=N, ncol=dim);
lm <- matrix(0, nrow=N, ncol=dim);
for (j in 1:dim){
minx <- qst(min(udat[,j]), alpha=zeta[j], nu=nu);
maxx <- qst(max(udat[,j]), alpha=zeta[j], nu=nu);
xx <- seq(minx, maxx, length=mpoints);
px <- sort(pst(xx, alpha=zeta[j], nu=nu));
ix[,j] <- pchip(px, xx, udat[,j]);
lm[,j] <- dst(ix[,j], alpha=zeta[j], nu=nu, log=TRUE);
}
lc <- dmst(ix, Omega=Omega, alpha=alpha, nu=nu, log=TRUE);
-sum(lc)+sum(lm)
}
#6. sample setting
dim <- 9;
smdelta <- c(-0.36,-0.33,-0.48,-0.36,-0.33,-0.48,-0.36,-0.33,-0.48);
smdf <- 5;
smOmega <- cor(udat);
smzeta <- smdelta/sqrt(1-smdelta*smdelta);
iOmega <- solve(smOmega);
smalpha <- iOmega %*% smdelta /sqrt(1-(t(smdelta) %*% iOmega %*% smdelta)[1,1]);
#7. estimation
iniPar <- mkParam(diag(dim),numeric(dim),6);
system.time(stcopfit<-optim(iniPar,stcopn11,control=list(reltol=1e-8,trace=6)));
resultVec(stcopfit);
The parameters you arrive at by step 493 lead to an infinite loop in your qst function: not having any idea what this very complex code is actually doing, I'm afraid I can't diagnose further. Here's what I did to get that far:
I stated cur.params <- NULL in the global environment, then put cur.params <<- params within stcopn11; this saves the current set of parameters to the global environment, so that when you break out of the optim() call manually (via Control-C or ESC depending on your platform) you can inspect the current set of parameters, and restart from them easily
I put in old-school debugging statements (e.g. cat("entering stcopn11\n") and cat("leaving stcopn11\n") at the beginning and at the next-to-last line of the objective function, a few within stopc11 to indicate progress markers within)
once I had the "bad" parameters I used debug(stcopn11) and stcopn11(cur.param) to step through the function
I discovered that it was hanging on dimension 3 (j==3 in the for loop within stcopn11) and particularly on the first qst() call
I added a maxit=1e5 argument to qst; initialized it <- 1 before the while loop; set it <- it+1 each time through the loop; changed the stopping criterion to while (sum(nc) > 0 && it<maxit); and added if (it==maxit) stop("hit max number of iterations in qst") right after the loop
1e5 iterations in qst took 74 seconds; I have no idea whether it might stop eventually, but didn't want to wait to find out.
This was my modified version of stcopn11:
cur.param <- NULL ## set parameter placeholder
##5. negative log likelihood for multivariate skew-t copula
stcopn11 <- function(param,debug=FALSE) {
cat("stcopn11\n")
cur.param <<- param ## record current params outside function
N <- nrow(udat)
mpoints <- 150
npar <- length(param)
nu <- exp(param[npar])+2
R <- paramToExtCorr(param)
Omega <- R[-1, -1]
delta <- R[1, -1]
zeta <- delta/sqrt(1-delta*delta)
cat("... solving iOmega")
iOmega <- solve(Omega)
alpha <- iOmega %*% delta /
sqrt(1-(t(delta) %*% iOmega %*% delta)[1,1])
ix <- matrix(0, nrow=N, ncol=dim)
lm <- matrix(0, nrow=N, ncol=dim)
cat("... entering dim loop\n")
for (j in 1:dim){
if (debug) cat(j,"\n")
minx <- qst(min(udat[,j]), alpha=zeta[j], nu=nu)
maxx <- qst(max(udat[,j]), alpha=zeta[j], nu=nu)
xx <- seq(minx, maxx, length=mpoints)
px <- sort(pst(xx, alpha=zeta[j], nu=nu))
ix[,j] <- pchip(px, xx, udat[,j])
lm[,j] <- dst(ix[,j], alpha=zeta[j], nu=nu, log=TRUE)
}
lc <- dmst(ix, Omega=Omega, alpha=alpha, nu=nu, log=TRUE)
cat("leaving stcopn11\n")
-sum(lc)+sum(lm)
}
I want set up a model for interest rate in binomial tree. The interest rate is path dependent. I want return interest rate (discount factor and payoff) at every step in all scenarios(2^N). The reason I want to return every single interest rate is that I want use the interest rate is compute discount factor. I know how to do this in a complex way. Here I want to use a double loop (or something simpler) to get the results.
w is for "0" or "1" dummy variable matrix representing all scenarios.
r is interest rate. if there is a head(1), then r1=r0+u=r0+0.005; if there is a tail(0), then r1=r0-d.
D is discount factor. D1=1/(1+r0), D2=D1/(1+r1)...
P is payoff.
In this case, period N is 10. therefore, I can compute step by step. However,if N gets larger and larger, I cannot use my method. I want a simple way to compute this. Thank you.
#Real Price
N <- 10
r0 <- 0.06
K <- 0.05
u <- 0.005
d <- 0.004
q <- 0.5
w <- expand.grid(rep(list(0:1),N))
r <- D <- P <- matrix(0,0,nrow=2^N,ncol=N)
for(i in 1:dim(w)[1])
{
r[i,1] <- r0 + u*w[i,1] - d*(1-w[i,1])
r[i,2] <- r[i,1] + u*w[i,2] - d*(1-w[i,2])
r[i,3] <- r[i,2]+u*w[i,3]-d*(1-w[i,3])
r[i,4] <- r[i,3]+u*w[i,4]-d*(1-w[i,4])
r[i,5] <- r[i,4]+u*w[i,5]-d*(1-w[i,5])
r[i,6] <- r[i,5]+u*w[i,6]-d*(1-w[i,6])
r[i,7] <- r[i,6]+u*w[i,7]-d*(1-w[i,7])
r[i,8] <- r[i,7]+u*w[i,8]-d*(1-w[i,8])
r[i,9] <- r[i,8]+u*w[i,9]-d*(1-w[i,9])
r[i,10] <- r[i,9]*+u*w[i,10]-d*(1-w[i,10])
D[i,1] <- 1/(1+r0)
D[i,2] <- D[i,1]/(1+r[i,1])
D[i,3] <- D[i,2]/(1+r[i,2])
D[i,4] <- D[i,3]/(1+r[i,3])
D[i,5] <- D[i,4]/(1+r[i,4])
D[i,6] <- D[i,5]/(1+r[i,5])
D[i,7] <- D[i,6]/(1+r[i,6])
D[i,8] <- D[i,7]/(1+r[i,7])
D[i,9] <- D[i,8]/(1+r[i,8])
D[i,10] <- D[i,9]/(1+r[i,9])
P[i,1] <- D[i,1]*pmax(K-r0,0)*(0.5^N)
P[i,2] <- D[i,2]*pmax(K-r[i,1],0)*(0.5^N)
P[i,3] <- D[i,3]*pmax(K-r[i,2],0)*(0.5^N)
P[i,4] <- D[i,4]*pmax(K-r[i,3],0)*(0.5^N)
P[i,5] <- D[i,5]*pmax(K-r[i,4],0)*(0.5^N)
P[i,6] <- D[i,6]*pmax(K-r[i,5],0)*(0.5^N)
P[i,7] <- D[i,7]*pmax(K-r[i,6],0)*(0.5^N)
P[i,8] <- D[i,8]*pmax(K-r[i,7],0)*(0.5^N)
P[i,9] <- D[i,9]*pmax(K-r[i,8],0)*(0.5^N)
P[i,10] <- D[i,10]*pmax(K-r[i,9],0)*(0.5^N)
}
true.price <- sum(P)
#> true.price
# > true.price
# [1] 0.00292045
You can just use a nested loop, looping over 2:(ncol(w)) within the i loop:
for(i in 1:nrow(w)) {
r[i, 1] <- r0 + u*w[i, 1] - d*(1-w[i, 1])
D[i, 1] <- 1/(1+r0)
P[i, 1] <- D[i, 1]*pmax(K-r0, 0)*(0.5^N)
for (j in 2:(ncol(w))) {
r[i,j] <- r[i, j-1] + u*w[i, j] - d*(1-w[i, j])
D[i,j] <- D[i, j-1]/(1+r[i, j-1])
P[i,j] <- D[i, j]*pmax(K-r[i, j-1], 0)*(0.5^N)
}
}
true.price <- sum(P)