Bootstrap t confidence interval for censored observations - r

I have wrote a simulation code for censored observations to find bootstrap-t confidence interval. However, I encountered some problem where my 'btAlpha' and 'btLambda' cannot compute the correct answer hence I cannot go to the next step which is to calculate the total error probabilities.
This is my code :
#BOOTSTRAP-T (20%)
library(survival)
n <- 100
N <- 1000
alpha <- 1
lambda <- 0.5
alphaHat <- NULL
lambdaHat <- NULL
cp <- NULL
btAlpha <- matrix (NA, nrow=N, ncol=2)
btLambda <- matrix (NA, nrow=N, ncol=2)
for (i in 1:1000) {
u <- runif(n)
c1 <- rexp(n, 0.1)
t1 <- -(log(1 - u^(1/alpha))/lambda)
t <- pmin(t1, c1)
ci <- 1*(t1 < c1) #censored data
cp[i] <- length(ci[ci == 0])/n #censoring proportion
#FUNCTION TO CALL OUT
estBoot < -function(data, j) {
dat <- data [j, ]
data0 <- dat[which(dat$ci == 0), ] # right censored data
data1 <- dat[which(dat$ci == 1), ] # uncensored data
dat
#MAXIMUM LIKELIHOOD ESTIMATION
library(maxLik)
LLF <- function(para) {
alpha <- para[1]
lambda <- para[2]
a <- sum(log(alpha*lambda*(1 - exp(-lambda*data1$t1))^(alpha - 1)*
exp(-lambda*data1$t1)))
b <- sum(log(1 - (1 - exp(-lambda*data0$t1)^(alpha))))
l <- a + b
return(l)
}
mle <- maxLik(LLF, start=c(alpha=1, lambda=0.5))
alphaHat <- mle$estimate[1]
lambdaHat <- mle$estimate[2]
observedDi <- solve(-mle$hessian)
return(c(alphaHat, lambdaHat, observedDi[1, 1], observedDi[2, 2]))
}
library(boot)
bt <- boot(dat, estBoot, R=1000)
bootAlphaHat <- bt$t[, 1] #t is from bootstrap
bootAlphaHat0 <- bt$t0[1] #t0 is from original set
seAlphaHat <- sqrt(bt$t[, 2])
seAlphaHat0 <- sqrt(bt$t0[2]) #same as 'original' in bt
zAlpha <- (bootAlphaHat - bootAlphaHat0)/seAlphaHat
kAlpha <- zAlpha[order(zAlpha)]
ciAlpha <- c(kAlpha[25], kAlpha[975])
btAlpha[i, ] <- rev(bootAlphaHat0 - ciAlpha*seAlphaHat0)
bootLambdaHat <- bt$t[, 2]
bootLambdaHat0 <- bt$t0[2]
seLambdaHat <- sqrt(bt$t[, 4])
seLambdaHat0 <- sqrt(bt$t0[4])
zLambda <- (bootLambdaHat - bootLambdaHat0)/seLambdaHat
kLambda <- zLambda[order(zLambda)]
ciLambda <- c(kLambda[25], kLambda[975])
btLambda[i, ] <- rev(bootLambdaHat0 - ciLambda*seLambdaHat0)
}
leftAlpha <- sum(btAlpha[, 1] > alpha)/N
rightAlpha <- sum(btAlpha[, 2] < alpha)/N
totalEAlpha <- leftAlpha + rightAlpha
leftLambda <- sum(btLambda[, 1] > lambda)/N
rightLambda <- sum(btLambda[, 2] < lambda)/N
totalELambda <- leftLambda + rightLambda
#alpha=0.05
sealphaHat <- sqrt(0.05*(1 - 0.05)/N)
antiAlpha <- totalEAlpha > (0.05 + 2.58*sealphaHat)
conAlpha <- totalEAlpha < (0.05 - 2.58*sealphaHat )
asymAlpha <- (max(leftAlpha, rightAlpha)/min(leftAlpha, rightAlpha)) > 1.5
antiLambda <- totalELambda > (0.05 + 2.58 *sealphaHat)
conLambda <- totalELambda < (0.05 - 2.58 *sealphaHat)
asymLambda <- (max(leftLambda, rightLambda)/min(leftLambda, rightLambda)) > 1.5
anti <- antiAlpha + antiLambda
con <- conAlpha + conLambda
asym <- asymAlpha + asymLambda
My 'btAlpha[i,]' and 'btLambda[i,]' is two matrix data frame and only computed NA values hence I cannot calculate the next step which is total error probabilities etc. It should be simulated 1000 values through specified formula but I didnt get the desired output. I have tried to run this without using loops and same problems encountered. Do you guys have any idea? I could really use and truly appreciate your help.

Related

Speed up loop operation

I need to run a coverage probability test on different sample sizes and censoring proportions. I need to replicate 1000 bootstrap samples using the boot function in R. I have run the code for up to 3-8 hours and I have no idea regarding the runtime.
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.05
n <- 140
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
bp_lambda <- matrix(NA, nrow=N, ncol=2)
bp_beta <- matrix(NA, nrow=N, ncol=2)
for (i in 1:N) {
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1/lambda)*log(1 - u)))^(1/beta)
s_i <- 1*(t_i < c_i)
t <- pmin(t_i, c_i)
data <- data.frame(u, t_i, c_i, s_i, t)
estimates.boot <- function(data, j){
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
library(maxLik)
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta - 1)*beta*exp(t1^beta)*exp(lambda*(1 - exp(t1^beta))))
r <- (1 - s_i)*log(exp(lambda*(1 - exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start=c(para=c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
library(boot)
bootstrap <- boot(data, estimates.boot, 1000)
bootlambda <- bootstrap$t[, 1]
klambda <- bootlambda[order(bootlambda)]
bp_lambda[i, ] <- c(klambda[25], klambda[975])
bootbeta <- bootstrap$t[, 2]
kbeta <- bootbeta[order(bootbeta)]
bp_beta[i, ] <- c(kbeta[25], kbeta[975])
}
left_lambda <- sum(bp_lambda[, 1]>lambda)/N
right_lambda <- sum(bp_lambda[, 2]<lambda)/N
total_lambda <- left_lambda + right_lambda
left_beta <- sum(bp_beta[, 1] > beta)/N
right_beta <- sum(bp_beta[, 2]<beta)/N
total_beta <- left_beta + right_beta
sealphahat <- sqrt(alpha*(1 - alpha)/N)
antilambda <- total_lambda>(alpha + 2.58*sealphahat)
conlambda <- total_lambda<(alpha - 2.58*sealphahat)
asymlambda <- (max(left_lambda, right_lambda)/min(left_lambda, right_lambda)) > 1.5
antibeta <- total_beta > (alpha + 2.58*sealphahat)
conbeta <- total_beta < (alpha - 2.58*sealphahat)
asymbeta <- (max(left_beta, right_beta)/min(left_beta, right_beta)) > 1.5
anti <- antilambda + antibeta
con <- conlambda + conbeta
asym <- asymlambda + asymbeta
cbind(anti, con, asym)
Anyone have idea how to speed up the operation?
Basically, you want to apply a random sampling to an estimation function (inner bootstrap) and randomly sample the entire process again (outer bootstrap).
Consequently we could write an estimation function estimate() using replicate() (to avoid boot:boot) and a function for the inner bootstrap innerBoot(). In the latter we could use matrixStats::rowQuantiles for fast computation of the quantiles you want.
I essentially used your code, just fixed a few issues that prevented the code from running.
estimate <- function() {
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1/lambda)*log(1 - u)))^(1/beta)
s_i <- 1*(t_i < c_i)
t <- pmin(t_i, c_i)
LLF <- function(para) {
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t_i^(beta - 1)*beta*exp(t_i^beta)*exp(lambda*(1 - exp(t_i^beta))))
r <- (1 - s_i)*log(exp(lambda*(1 - exp(t_i^beta))))
return(sum(e + r))
}
mle <- maxLik::maxLik(LLF, start=c(para=c(0.02, 0.5)))
return(setNames(mle$estimate, c('lambda_hat', 'beta_hat')))
}
innerBoot <- function() {
boot <- replicate(N, estimate())
return(matrixStats::rowQuantiles(boot, p=c(.025, .975)))
}
We also perform the outer bootstrap with replicate(). I wrap it here in system.time() to get a time measurement.
lambda <- 0.02
beta <- 0.5
alpha <- 0.05
n <- 140
# N <- 1000
N <- 10 ## for testing
seed <- 42
set.seed(seed)
tm <- system.time(
BA <- replicate(N, innerBoot())
)
I got these measurements,
tm
# user system elapsed ## N = 10
# 1.055 0.000 1.057
# user system elapsed ## N = 100
# 102.012 0.227 102.489
which indicates that for N <- 1000 about 167 minutes are to be expected.
The result is an array of dim 2x2xN.
> dim(BA)
[1] 2 2 100
To calculate the summaries we may easily refer to the respective cells.
boot_sum <- function(BA) {
left_lambda <- sum(BA[1, 1, ] > lambda)/N
right_lambda <- sum(BA[1, 2, ] >< lambda)/N
left_beta <- sum(BA[2, 1, ] > beta)/N
right_beta <- sum(BA[2, 2, ] < beta)/N
total_lambda <- left_lambda + right_lambda
total_beta <- left_beta + right_beta
sealphahat <- sqrt(alpha*(1 - alpha)/N)
antilambda <- total_lambda > (alpha + 2.58*sealphahat)
conlambda <- total_lambda < (alpha - 2.58*sealphahat)
asymlambda <- (max(left_lambda, right_lambda)/min(left_lambda, right_lambda)) > 1.5
antibeta <- total_beta > (alpha + 2.58*sealphahat)
conbeta <- total_beta < (alpha - 2.58*sealphahat)
asymbeta <- (max(left_beta, right_beta)/min(left_beta, right_beta)) > 1.5
anti <- antilambda + antibeta
con <- conlambda + conbeta
asym <- asymlambda + asymbeta
return(cbind(anti, con, asym))
}
boot_sum(BA)
# anti con asym
# [1,] 2 0 2
Note: You should definitely check the code in the body of estimate() (i.e. run it manually several times without bootstrapping), as it throws warnings every now and then, probably there is a mistake in how you define LLF().
Warning messages:
1: In log(lambda * t_i^(beta - 1) * beta * exp(t_i^beta) * exp(lambda * :
NaNs produced
2: In log(lambda * t_i^(beta - 1) * beta * exp(t_i^beta) * exp(lambda * :
NaNs produced
Also I'm not sure if the summary calculation currently makes much sense.
My advice is to check your likelihood function and the summary by 1. manually run the lines, 2. starting with a very small N like 10 or so, to see if calculations make sense.
Once you've checked that, it's worth to wait the ~167 minutes to wait for the result.
Or parallelize innerBoot(), which is about 80% faster overall (using 7 cores), as follows:
innerBootParallel <- function() {
boot <- parSapply(cl, 1:N, function(i) estimate())
return(matrixStats::rowQuantiles(boot, p=c(.025, .975)))
}
library(parallel)
cl <- makeCluster(detectCores() - 1)
clusterExport(cl, c('estimate', 'n', 'lambda', 'N', 'beta'))
clusterSetRNGStream(cl, seed)
BA <- replicate(N, innerBootParallel())
stopCluster(cl)
boot_sum(BA)

How to perform a bootstrap and find confidence interval in R

I want to create a custom bootstrap function because I want to better understand what bootstrap is doing and it seems like the other bootstrap libraries out there does not solve my issue.
The Problem: I would like to create my own wald confidence interval function where it takes in the bootstrap data, outputs the confidence interval, test the confidence interval is within a range, and gets the coverage.
Right now, I am getting this type of error:
Error in bootresults[i,}<-waldCI(y=bootdata[i], n=numTrials):number of
items to replace is not a multiple of replacement length
The goal: My goal is to get the bootresults dataset to return 4 columns(p value,One that shows the upper bound, lower bound, and whether or not the p is in the interval) and get a graph similar to this one:
Wald interval chart
Code:
set.seed(42)
samples10 <- list()
i <- 1
while(i < 100) {
sample10[[i]] <- rbinom(1500, size=10, prob=i*.01) ## rows=1500 ;columns=10
i <- i + 1
}
sample10 <- data.frame(samples10)
colnames(sample10) <- c(seq(.01, .99, .01)) ## p-values
waldconfidenceinterval <- function(y, n, alpha=0.05) {
p <- colSums(y)/(n*200)
sd <- sqrt(p*((1 - p)/(n*200)))
z <- qnorm(c(alpha/2, 1 - alpha/2))
ci <- p + z*sd
return(ci)
}
B <- 200
numTrials <- 10
bootresults <- matrix(ncol=length(sample10), nrow=B) ## rows=200, cols=99
## empty matrix in the beginning
set.seed(42)
for(i in seq_len(B)) {
bootdata <- sample10[sample(B, replace=T), ]
bootresults[i, ] <- waldCI(y=bootdata[i], n=numTrials)
## Pseudocode:
# boot_test_data$in_interval <-
# ifelse(boot_test_data$lower1 < i/100 & i/100 < boot_test_data$upper1, 1, 0)
# coverage[i] <- sum(boot_test_data$in_interval) / length(boot_test_data$in_interval)
}
Any help is greatly appreciated since I am fairly new to R.
Looks like that you want to initialize a three-dimensional array bootresults rather than a two-dimensional matrix. In your waldCI() you may use colMeans.
waldCI <- function(y, alpha=0.05) {
p <- colMeans(y)
se <- sqrt(p*(1 - p)/nrow(y))
z <- qnorm(1 - alpha/2)
ci <- p + z*se %*% cbind(lower=-1, upper=1)
return(ci)
}
B <- 200
numTrials <- 10
## initialize array
bootresults1 <- array(dim=c(ncol(samples10), 4, B),
dimnames=list(c(), c("p.values", "lower", "upper", "in.int"), c()))
set.seed(42)
for(i in seq_len(B)) {
samp <- samples10[sample(nrow(samples10), numTrials, replace=F), ]
ci <- waldCI(samp)
bootresults1[,,i] <- cbind(p.values, ci, in.int=ci[, 1] < p.values & p.values < ci[, 2])
}
coverage <- rowMeans(bootresults[,4,])
plot(p.values, coverage, type="l", main="My Plot")
Similar approach, more R-ish, though:
p.values <- seq(.01, .99, .01)
set.seed(42)
samples10 <- `colnames<-`(sapply(p.values, function(pr) rbinom(1.5e3, 1, pr)), p.values)
BOOT <- function(numTrials, ...) {
samp <- samples10[sample(nrow(samples10), numTrials, replace=F), ]
ci <- waldCI(samp, ...)
cbind(p.values, ci, in.int=ci[, 1] < p.values & p.values < ci[, 2])
}
B <- 200
numTrials <- 10
set.seed(42)
bootresults2 <- replicate(B, BOOT(numTrials=10))
stopifnot(all.equal(bootresults1, bootresults2))
Data:
Note, that I used rbinom(..., size=1, ...) to create your sample data. The use of "p" as an object name suggested that the data should be binomial.
set.seed(42)
samples10 <- matrix(nrow=1500, ncol=99, dimnames=list(c(), c(seq(.01, .99, .01))))
i <- 1
while (i < 100) {
samples10[, i] <- rbinom(1500, size=1, prob=i*.01) ## rows=1500 ;columns=10
i <- i + 1
}
Without a while loop, you could proceed vectorized:
p.values <- seq(.01, .99, .01)
set.seed(42)
samples10 <- `colnames<-`(sapply(p.values, function(pr) rbinom(1.5e3, 1, pr)), p.values)

coding gradient descent in R

I am trying to code gradient descent in R. The goal is to collect a data frame of each estimate so I can plot the algorithm's search through the parameter space.
I am using the built-in dataset data(cars) in R. Unfortunately something is way off in my function. The estimates just increase linearly with each iteration! But I cannot figure out where I err.
Any tips?
Code:
GradientDescent <- function(b0_start, b1_start, x, y, niter=10, alpha=0.1) {
# initialize
gradient_b0 = 0
gradient_b1 = 0
x <- as.matrix(x)
y <- as.matrix(y)
N = length(y)
results <- matrix(nrow=niter, ncol=2)
# gradient
for(i in 1:N){
gradient_b0 <- gradient_b0 + (-2/N) * (y[i] - (b0_start + b1_start*x[i]))
gradient_b1 <- gradient_b1 + (-2/N) * x[i] * (y[i] - (b0_start + b1_start*x[i]))
}
# descent
b0_hat <- b0_start
b1_hat <- b1_start
for(i in 1:niter){
b0_hat <- b0_hat - (alpha*gradient_b0)
b1_hat <- b1_hat - (alpha*gradient_b1)
# collect
results[i,] <- c(b0_hat,b1_hat)
}
# return
df <- data.frame(results)
colnames(df) <- c("b0", "b1")
return(df)
}
> test <- GradientDescent(0,0,cars$speed, cars$dist, niter=1000)
> head(test,2); tail(test,2)
b0 b1
1 8.596 153.928
2 17.192 307.856
b0 b1
999 8587.404 153774.1
1000 8596.000 153928.0
Here is a solution for cars dataset:
# dependent and independent variables
y <- cars$dist
x <- cars$speed
# number of iterations
iter_n <- 100
# initial value of the parameter
theta1 <- 0
# learning rate
alpha <- 0.001
m <- nrow(cars)
yhat <- theta1*x
# a tibble to record the parameter update and cost
library(tibble)
results <- data_frame(theta1 = as.numeric(),
cost = NA,
iteration = 1)
# run the gradient descent
for (i in 1:iter_n){
theta1 <- theta1 - alpha * ((1 / m) * (sum((yhat - y) * x)))
yhat <- theta1*x
cost <- (1/m)*sum((yhat-y)^2)
results[i, 1] = theta1
results[i, 2] <- cost
results[i, 3] <- i
}
# print the parameter value after the defined iteration
print(theta1)
# 2.909132
Checking whether cost is decreasing:
library(ggplot2)
ggplot(results, aes(x = iteration, y = cost))+
geom_line()+
geom_point()
I wrote a more detailed blog post here.

Simulating two conditional variable with specified means and a correlation using R

This question arose from one of my previous questions where the problem of generating two correlated series was resolved with some limitations. We were trying to produce two correlated series that followed exponential distributions with certain parameters. For example, a variable tr with mean 1 and another variable t with mean 2 with a correlation of -0.5 were required satisfying the condition that t>tr). The following codes were tried in R.
rho <- -0.5
mu <- rep(0,2)
Sigma <- matrix(rho, nrow=2, ncol=2) + diag(2)*(1 - rho)
library(MASS)
compute.tr.t <- function(req.n, paccept) {
req.n <- round(req.n / paccept)
rawvars <- mvrnorm(req.n, mu=mu, Sigma=Sigma)
pvars <- pnorm(rawvars)
tr <- qexp(pvars[,1], 1/1)
t <- qexp(pvars[,2], 1/2)
keep <- which(t > tr)
return(data.frame(t=t[keep],tr=tr[keep]))
}
req.n <- n
paccept <- 1
res <- data.frame()
while (req.n > 0) {
new.res <- compute.tr.t(req.n, paccept)
res <- rbind(res, new.res)
req.n <- n - nrow(res)
paccept <- nrow(new.res) / n# updated paccept according to last step
}
The problem that occurred due to pruning data that do not satisfy the condition t>tr:
The means were not preserved.
The correlation was not preserved.
See the output below. It is evident that because of imposing such a condition the locations shift.
mean(res$tr)
[1] 0.4660927
mean(res$t)
[1] 2.859441
print(cor(res$tr,res$t))
[1] -0.237159
My question: is there a way to achieve two correlated and conditional variables (such that t>tr) keeping the series means near to that of the specified means? We may go along with the reduced correlation, but is it possible to at least preserve the means?
Updated answer every element of t strictly greater than tr:
n <- 100
rho <- 0.5
mu <- rep(0,2)
Sigma <- matrix(rho, nrow=2, ncol=2) + diag(2)*(1 - rho)
library(MASS)
compute.tr.t <- function(req.n, paccept) {
req.n <- round(req.n / paccept)
rawvars <- mvrnorm(req.n, mu=mu, Sigma=Sigma)
pvars <- pnorm(rawvars)
tr <- qexp(pvars[,1], 1/1)
t <- qexp(pvars[,2], 1/2)
tr <- tr[(tr-mean(tr))^2 <.25 ] # can play with this value
t <- t[(t-mean(t))^2 <.25 ]
m <- min(length(t), length(tr))
t <- t[1:m]
tr <- tr[1:m]
return(data.frame(t=t,tr=tr))
}
req.n <- n
paccept <- 1
res <- data.frame()
while (req.n > 0) {
new.res <- compute.tr.t(req.n, paccept)
res <- rbind(res, new.res)
req.n <- n - nrow(res)
paccept <- nrow(new.res) / n
}
mean(res$t)
[1] 1.972218
mean(res$tr)
[1] 0.590776
table(res$t > res$tr) # should be all true, rarely you'll get 1 trivial false that you can kick out
TRUE
132
cor(res$t,res$tr) # suffered a little but not too bad, can probably improve
[1] .2527064
Original answer mean(t) > mean(tr) but not every element:
n <- 100
rho <- 0.5
mu <- rep(0,2)
Sigma <- matrix(rho, nrow=2, ncol=2) + diag(2)*(1 - rho)
library(MASS)
compute.tr.t <- function(req.n, paccept) {
req.n <- round(req.n / paccept)
rawvars <- mvrnorm(req.n, mu=mu, Sigma=Sigma)
pvars <- pnorm(rawvars)
tr <- qexp(pvars[,1], 1/1)
t <- qexp(pvars[,2], 1/2)
keep <- which(t > tr)
return(data.frame(t=t,tr=tr))
}
req.n <- n
paccept <- 1
res <- data.frame()
while (req.n > 0) {
new.res <- compute.tr.t(req.n, paccept)
res <- rbind(res, new.res)
req.n <- n - nrow(res)
paccept <- nrow(new.res) / n# updated paccept according to last step
}
mean(res$tr)
[1] 0.9399213
mean(res$t)
[1] 1.795431
print(cor(res$tr,res$t))
[1] 0.5075668
Since there's some randomness in this for good measure I ran it a 2nd time and got the following result:
mean(res$tr)
[1] 1.001255
mean(res$t)
[1] 1.922343
print(cor(res$tr,res$t))
[1] 0.6648311
After you run it once if you don't quite like the results a simple hack to meet any desired level of precision is:
while(
(cor(res$tr,res$t) > .55 | cor(res$tr,res$t) < .45)
){
n <- 100
rho <- 0.5
mu <- rep(0,2)
Sigma <- matrix(rho, nrow=2, ncol=2) + diag(2)*(1 - rho)
library(MASS)
compute.tr.t <- function(req.n, paccept) {
req.n <- round(req.n / paccept)
rawvars <- mvrnorm(req.n, mu=mu, Sigma=Sigma)
pvars <- pnorm(rawvars)
tr <- qexp(pvars[,1], 1/1)
t <- qexp(pvars[,2], 1/2)
keep <- which(t > tr)
return(data.frame(t=t,tr=tr))
}
req.n <- n
paccept <- 1
res <- data.frame()
while (req.n > 0) {
new.res <- compute.tr.t(req.n, paccept)
res <- rbind(res, new.res)
req.n <- n - nrow(res)
paccept <- nrow(new.res) / n# updated paccept according to last step
}
}

double for loop in Binomial Tree in R

I want set up a model for interest rate in binomial tree. The interest rate is path dependent. I want return interest rate (discount factor and payoff) at every step in all scenarios(2^N). The reason I want to return every single interest rate is that I want use the interest rate is compute discount factor. I know how to do this in a complex way. Here I want to use a double loop (or something simpler) to get the results.
w is for "0" or "1" dummy variable matrix representing all scenarios.
r is interest rate. if there is a head(1), then r1=r0+u=r0+0.005; if there is a tail(0), then r1=r0-d.
D is discount factor. D1=1/(1+r0), D2=D1/(1+r1)...
P is payoff.
In this case, period N is 10. therefore, I can compute step by step. However,if N gets larger and larger, I cannot use my method. I want a simple way to compute this. Thank you.
#Real Price
N <- 10
r0 <- 0.06
K <- 0.05
u <- 0.005
d <- 0.004
q <- 0.5
w <- expand.grid(rep(list(0:1),N))
r <- D <- P <- matrix(0,0,nrow=2^N,ncol=N)
for(i in 1:dim(w)[1])
{
r[i,1] <- r0 + u*w[i,1] - d*(1-w[i,1])
r[i,2] <- r[i,1] + u*w[i,2] - d*(1-w[i,2])
r[i,3] <- r[i,2]+u*w[i,3]-d*(1-w[i,3])
r[i,4] <- r[i,3]+u*w[i,4]-d*(1-w[i,4])
r[i,5] <- r[i,4]+u*w[i,5]-d*(1-w[i,5])
r[i,6] <- r[i,5]+u*w[i,6]-d*(1-w[i,6])
r[i,7] <- r[i,6]+u*w[i,7]-d*(1-w[i,7])
r[i,8] <- r[i,7]+u*w[i,8]-d*(1-w[i,8])
r[i,9] <- r[i,8]+u*w[i,9]-d*(1-w[i,9])
r[i,10] <- r[i,9]*+u*w[i,10]-d*(1-w[i,10])
D[i,1] <- 1/(1+r0)
D[i,2] <- D[i,1]/(1+r[i,1])
D[i,3] <- D[i,2]/(1+r[i,2])
D[i,4] <- D[i,3]/(1+r[i,3])
D[i,5] <- D[i,4]/(1+r[i,4])
D[i,6] <- D[i,5]/(1+r[i,5])
D[i,7] <- D[i,6]/(1+r[i,6])
D[i,8] <- D[i,7]/(1+r[i,7])
D[i,9] <- D[i,8]/(1+r[i,8])
D[i,10] <- D[i,9]/(1+r[i,9])
P[i,1] <- D[i,1]*pmax(K-r0,0)*(0.5^N)
P[i,2] <- D[i,2]*pmax(K-r[i,1],0)*(0.5^N)
P[i,3] <- D[i,3]*pmax(K-r[i,2],0)*(0.5^N)
P[i,4] <- D[i,4]*pmax(K-r[i,3],0)*(0.5^N)
P[i,5] <- D[i,5]*pmax(K-r[i,4],0)*(0.5^N)
P[i,6] <- D[i,6]*pmax(K-r[i,5],0)*(0.5^N)
P[i,7] <- D[i,7]*pmax(K-r[i,6],0)*(0.5^N)
P[i,8] <- D[i,8]*pmax(K-r[i,7],0)*(0.5^N)
P[i,9] <- D[i,9]*pmax(K-r[i,8],0)*(0.5^N)
P[i,10] <- D[i,10]*pmax(K-r[i,9],0)*(0.5^N)
}
true.price <- sum(P)
#> true.price
# > true.price
# [1] 0.00292045
You can just use a nested loop, looping over 2:(ncol(w)) within the i loop:
for(i in 1:nrow(w)) {
r[i, 1] <- r0 + u*w[i, 1] - d*(1-w[i, 1])
D[i, 1] <- 1/(1+r0)
P[i, 1] <- D[i, 1]*pmax(K-r0, 0)*(0.5^N)
for (j in 2:(ncol(w))) {
r[i,j] <- r[i, j-1] + u*w[i, j] - d*(1-w[i, j])
D[i,j] <- D[i, j-1]/(1+r[i, j-1])
P[i,j] <- D[i, j]*pmax(K-r[i, j-1], 0)*(0.5^N)
}
}
true.price <- sum(P)

Resources