I have wrote a simulation code for censored observations to find bootstrap-t confidence interval. However, I encountered some problem where my 'btAlpha' and 'btLambda' cannot compute the correct answer hence I cannot go to the next step which is to calculate the total error probabilities.
This is my code :
#BOOTSTRAP-T (20%)
library(survival)
n <- 100
N <- 1000
alpha <- 1
lambda <- 0.5
alphaHat <- NULL
lambdaHat <- NULL
cp <- NULL
btAlpha <- matrix (NA, nrow=N, ncol=2)
btLambda <- matrix (NA, nrow=N, ncol=2)
for (i in 1:1000) {
u <- runif(n)
c1 <- rexp(n, 0.1)
t1 <- -(log(1 - u^(1/alpha))/lambda)
t <- pmin(t1, c1)
ci <- 1*(t1 < c1) #censored data
cp[i] <- length(ci[ci == 0])/n #censoring proportion
#FUNCTION TO CALL OUT
estBoot < -function(data, j) {
dat <- data [j, ]
data0 <- dat[which(dat$ci == 0), ] # right censored data
data1 <- dat[which(dat$ci == 1), ] # uncensored data
dat
#MAXIMUM LIKELIHOOD ESTIMATION
library(maxLik)
LLF <- function(para) {
alpha <- para[1]
lambda <- para[2]
a <- sum(log(alpha*lambda*(1 - exp(-lambda*data1$t1))^(alpha - 1)*
exp(-lambda*data1$t1)))
b <- sum(log(1 - (1 - exp(-lambda*data0$t1)^(alpha))))
l <- a + b
return(l)
}
mle <- maxLik(LLF, start=c(alpha=1, lambda=0.5))
alphaHat <- mle$estimate[1]
lambdaHat <- mle$estimate[2]
observedDi <- solve(-mle$hessian)
return(c(alphaHat, lambdaHat, observedDi[1, 1], observedDi[2, 2]))
}
library(boot)
bt <- boot(dat, estBoot, R=1000)
bootAlphaHat <- bt$t[, 1] #t is from bootstrap
bootAlphaHat0 <- bt$t0[1] #t0 is from original set
seAlphaHat <- sqrt(bt$t[, 2])
seAlphaHat0 <- sqrt(bt$t0[2]) #same as 'original' in bt
zAlpha <- (bootAlphaHat - bootAlphaHat0)/seAlphaHat
kAlpha <- zAlpha[order(zAlpha)]
ciAlpha <- c(kAlpha[25], kAlpha[975])
btAlpha[i, ] <- rev(bootAlphaHat0 - ciAlpha*seAlphaHat0)
bootLambdaHat <- bt$t[, 2]
bootLambdaHat0 <- bt$t0[2]
seLambdaHat <- sqrt(bt$t[, 4])
seLambdaHat0 <- sqrt(bt$t0[4])
zLambda <- (bootLambdaHat - bootLambdaHat0)/seLambdaHat
kLambda <- zLambda[order(zLambda)]
ciLambda <- c(kLambda[25], kLambda[975])
btLambda[i, ] <- rev(bootLambdaHat0 - ciLambda*seLambdaHat0)
}
leftAlpha <- sum(btAlpha[, 1] > alpha)/N
rightAlpha <- sum(btAlpha[, 2] < alpha)/N
totalEAlpha <- leftAlpha + rightAlpha
leftLambda <- sum(btLambda[, 1] > lambda)/N
rightLambda <- sum(btLambda[, 2] < lambda)/N
totalELambda <- leftLambda + rightLambda
#alpha=0.05
sealphaHat <- sqrt(0.05*(1 - 0.05)/N)
antiAlpha <- totalEAlpha > (0.05 + 2.58*sealphaHat)
conAlpha <- totalEAlpha < (0.05 - 2.58*sealphaHat )
asymAlpha <- (max(leftAlpha, rightAlpha)/min(leftAlpha, rightAlpha)) > 1.5
antiLambda <- totalELambda > (0.05 + 2.58 *sealphaHat)
conLambda <- totalELambda < (0.05 - 2.58 *sealphaHat)
asymLambda <- (max(leftLambda, rightLambda)/min(leftLambda, rightLambda)) > 1.5
anti <- antiAlpha + antiLambda
con <- conAlpha + conLambda
asym <- asymAlpha + asymLambda
My 'btAlpha[i,]' and 'btLambda[i,]' is two matrix data frame and only computed NA values hence I cannot calculate the next step which is total error probabilities etc. It should be simulated 1000 values through specified formula but I didnt get the desired output. I have tried to run this without using loops and same problems encountered. Do you guys have any idea? I could really use and truly appreciate your help.
I'm trying to learn how to make a loop in R
I have this:
sigma2 <- 0.4
a0 <- -0.1260805
b <- 0.1260805
tt <- 1:50, 1:50
z <- rnorm(50, 0, sigma2)
y <- rep(1, 50)
for(i in 1:50){
y[i]=exp(a0 + b*tt[i])*exp(z[i])
}
y
and I want to kind of test the code above 1000 times, since I want to test the hypothesis at the 0.05 level
can I treid this, and seens to be wrong:
aa <- rep(1, 1000)
for(i in 1:1000){
y[i]=exp(a0 + b1*tt[i])*exp(z[i])
}
Thanks for help!
I think this is what you want (or at least closer):
# re-write original code with vectorization:
n <- 50
sigma2 <- 0.4
a0 <- -0.1260805
b <- 0.1260805
tt <- 1:n
z <- rnorm(n, 0, sigma2)
y <- exp(a0 + b*tt)*exp(z)
# do it 20 times
result <- replicate(20, exp(a0 + b*tt)*exp(rnorm(n, 0, sigma2)))
result is a 50x20 matrix - one column per repetition.
I am trying to code gradient descent in R. The goal is to collect a data frame of each estimate so I can plot the algorithm's search through the parameter space.
I am using the built-in dataset data(cars) in R. Unfortunately something is way off in my function. The estimates just increase linearly with each iteration! But I cannot figure out where I err.
Any tips?
Code:
GradientDescent <- function(b0_start, b1_start, x, y, niter=10, alpha=0.1) {
# initialize
gradient_b0 = 0
gradient_b1 = 0
x <- as.matrix(x)
y <- as.matrix(y)
N = length(y)
results <- matrix(nrow=niter, ncol=2)
# gradient
for(i in 1:N){
gradient_b0 <- gradient_b0 + (-2/N) * (y[i] - (b0_start + b1_start*x[i]))
gradient_b1 <- gradient_b1 + (-2/N) * x[i] * (y[i] - (b0_start + b1_start*x[i]))
}
# descent
b0_hat <- b0_start
b1_hat <- b1_start
for(i in 1:niter){
b0_hat <- b0_hat - (alpha*gradient_b0)
b1_hat <- b1_hat - (alpha*gradient_b1)
# collect
results[i,] <- c(b0_hat,b1_hat)
}
# return
df <- data.frame(results)
colnames(df) <- c("b0", "b1")
return(df)
}
> test <- GradientDescent(0,0,cars$speed, cars$dist, niter=1000)
> head(test,2); tail(test,2)
b0 b1
1 8.596 153.928
2 17.192 307.856
b0 b1
999 8587.404 153774.1
1000 8596.000 153928.0
Here is a solution for cars dataset:
# dependent and independent variables
y <- cars$dist
x <- cars$speed
# number of iterations
iter_n <- 100
# initial value of the parameter
theta1 <- 0
# learning rate
alpha <- 0.001
m <- nrow(cars)
yhat <- theta1*x
# a tibble to record the parameter update and cost
library(tibble)
results <- data_frame(theta1 = as.numeric(),
cost = NA,
iteration = 1)
# run the gradient descent
for (i in 1:iter_n){
theta1 <- theta1 - alpha * ((1 / m) * (sum((yhat - y) * x)))
yhat <- theta1*x
cost <- (1/m)*sum((yhat-y)^2)
results[i, 1] = theta1
results[i, 2] <- cost
results[i, 3] <- i
}
# print the parameter value after the defined iteration
print(theta1)
# 2.909132
Checking whether cost is decreasing:
library(ggplot2)
ggplot(results, aes(x = iteration, y = cost))+
geom_line()+
geom_point()
I wrote a more detailed blog post here.
I am trying to implement the Regularized Latent Semantic Indexing (RLSI) algorithm on R.
The original paper can be found here:
http://research.microsoft.com/en-us/people/hangli/sigirfp372-wang.pdf
Below is my code.
Here, I generate a matrix D from two matrices U and V. Each column of U correspond to a topic vector, and it is made to be sparse. After that, I apply RLSI on the D matrix to see if I can factorize it into two matrices, one of which has sparse vectors like U.
However, the resulting U is far from being sparse. Actually, every element of it is filled with numbers.
Is there something wrong with my code?
Thank you very much in advance.
library(magrittr)
# functions
updateU <- function(D,U,V){
S <- V %*% t(V)
R <- D %*% t(V)
for(m in 1:M){
u_m <- rep(0, K)
u_previous <- u_m
diff_u <- 100
while(diff_u > 0.1){
for(k in 1:K){
w_mk <- R[m,k] - S[k,-k] %*% U[m,-k]
in_hinge <- (abs(w_mk) - 0.5 * lambda_1)
u_m[k] <- (ifelse(in_hinge > 0, in_hinge, 0) * ifelse(w_mk >= 0, 1, -1)) / S[k,k]
}
diff_u <- sum(u_m - u_previous)
u_previous <- u_m
}
U[m,] <- u_m
}
return(U)
}
updateV <- function(D,U,V){
Sigma <- solve(t(U) %*% U + lambda_2 * diag(K))
Phi <- t(U) %*% D
V <- Sigma %*% Phi
return(V)
}
# Set constants
M <- 5000
N <- 1000
K <- 30
lambda_1 <- 1
lambda_2 <- 0.5
# Create D
originalU <- c(rpois(50000, lambda = 10), rep(0, 100000)) %>% sample(., 150000) %>% matrix(., M, K)
originalV <- rpois(30000, lambda = 5) %>% sample(., 30000) %>% matrix(., K, N)
D <- originalU %*% originalV
# Initialize U and V
V <- matrix(rpois(30000, lambda = 5), K, N)
U <- matrix(0, M, K)
# Run RLSI (iterate 100 times for now)
for(t in 1:100){
cat(t,":")
U <- updateU(D,U,V)
V <- updateV(D,U,V)
loss <- sum((D - U %*% V) ^ 2)
cat(loss, "\n")
}
I've got it. Each row in U has to be set to a zero vector each time updateU function is run.
I want set up a model for interest rate in binomial tree. The interest rate is path dependent. I want return interest rate (discount factor and payoff) at every step in all scenarios(2^N). The reason I want to return every single interest rate is that I want use the interest rate is compute discount factor. I know how to do this in a complex way. Here I want to use a double loop (or something simpler) to get the results.
w is for "0" or "1" dummy variable matrix representing all scenarios.
r is interest rate. if there is a head(1), then r1=r0+u=r0+0.005; if there is a tail(0), then r1=r0-d.
D is discount factor. D1=1/(1+r0), D2=D1/(1+r1)...
P is payoff.
In this case, period N is 10. therefore, I can compute step by step. However,if N gets larger and larger, I cannot use my method. I want a simple way to compute this. Thank you.
#Real Price
N <- 10
r0 <- 0.06
K <- 0.05
u <- 0.005
d <- 0.004
q <- 0.5
w <- expand.grid(rep(list(0:1),N))
r <- D <- P <- matrix(0,0,nrow=2^N,ncol=N)
for(i in 1:dim(w)[1])
{
r[i,1] <- r0 + u*w[i,1] - d*(1-w[i,1])
r[i,2] <- r[i,1] + u*w[i,2] - d*(1-w[i,2])
r[i,3] <- r[i,2]+u*w[i,3]-d*(1-w[i,3])
r[i,4] <- r[i,3]+u*w[i,4]-d*(1-w[i,4])
r[i,5] <- r[i,4]+u*w[i,5]-d*(1-w[i,5])
r[i,6] <- r[i,5]+u*w[i,6]-d*(1-w[i,6])
r[i,7] <- r[i,6]+u*w[i,7]-d*(1-w[i,7])
r[i,8] <- r[i,7]+u*w[i,8]-d*(1-w[i,8])
r[i,9] <- r[i,8]+u*w[i,9]-d*(1-w[i,9])
r[i,10] <- r[i,9]*+u*w[i,10]-d*(1-w[i,10])
D[i,1] <- 1/(1+r0)
D[i,2] <- D[i,1]/(1+r[i,1])
D[i,3] <- D[i,2]/(1+r[i,2])
D[i,4] <- D[i,3]/(1+r[i,3])
D[i,5] <- D[i,4]/(1+r[i,4])
D[i,6] <- D[i,5]/(1+r[i,5])
D[i,7] <- D[i,6]/(1+r[i,6])
D[i,8] <- D[i,7]/(1+r[i,7])
D[i,9] <- D[i,8]/(1+r[i,8])
D[i,10] <- D[i,9]/(1+r[i,9])
P[i,1] <- D[i,1]*pmax(K-r0,0)*(0.5^N)
P[i,2] <- D[i,2]*pmax(K-r[i,1],0)*(0.5^N)
P[i,3] <- D[i,3]*pmax(K-r[i,2],0)*(0.5^N)
P[i,4] <- D[i,4]*pmax(K-r[i,3],0)*(0.5^N)
P[i,5] <- D[i,5]*pmax(K-r[i,4],0)*(0.5^N)
P[i,6] <- D[i,6]*pmax(K-r[i,5],0)*(0.5^N)
P[i,7] <- D[i,7]*pmax(K-r[i,6],0)*(0.5^N)
P[i,8] <- D[i,8]*pmax(K-r[i,7],0)*(0.5^N)
P[i,9] <- D[i,9]*pmax(K-r[i,8],0)*(0.5^N)
P[i,10] <- D[i,10]*pmax(K-r[i,9],0)*(0.5^N)
}
true.price <- sum(P)
#> true.price
# > true.price
# [1] 0.00292045
You can just use a nested loop, looping over 2:(ncol(w)) within the i loop:
for(i in 1:nrow(w)) {
r[i, 1] <- r0 + u*w[i, 1] - d*(1-w[i, 1])
D[i, 1] <- 1/(1+r0)
P[i, 1] <- D[i, 1]*pmax(K-r0, 0)*(0.5^N)
for (j in 2:(ncol(w))) {
r[i,j] <- r[i, j-1] + u*w[i, j] - d*(1-w[i, j])
D[i,j] <- D[i, j-1]/(1+r[i, j-1])
P[i,j] <- D[i, j]*pmax(K-r[i, j-1], 0)*(0.5^N)
}
}
true.price <- sum(P)