I want to estimate the confidence interval using the delta approach; an image of an example model is attached output of delta approach. The intervals are very wide compared to the MLE or bootstrap estimates for bootstrap approach. Can anyone suggest a way to derive the accurate CI's using the delta approach? Thank you
Mod1 <- Mod0 <- model.matrix(object)[subset, ]
n <- nrow(Mod0)
Nvec <- matrix(rep(c(1/n,0,0,1/n),each=n), n*2, 2)
if (method == 'delta') {
if (scale == 'linear') {
df <- deriv( ~y/x, c('x', 'y')) # y depends on x
}
out <- sapply(parm, function(p) {
Mod0[, p] <- 0
Mod1[, p] <- 1
Mod <- rbind(Mod0, Mod1)
allpreds <- family(object)$linkinv(Mod %*% coef(object))
avgpreds <- t(Nvec) %*% allpreds
val <- f(avgpreds)
V <- sweep(chol(vcov(object)) %*% t(Mod), allpreds*(1-allpreds), '*', MARGIN = 2) %*% Nvec
V <- t(V) %*% V
dxdy <- matrix(attr(eval(df, list('x'=avgpreds[1], 'y'=avgpreds[2])), 'gradient'))
se <- sqrt(t(dxdy) %*% V %*% dxdy)
out <- c(val, se, z <- abs({val-null}/se), 2*pnorm(abs(val/se), lower.tail=FALSE), val + qnorm(cilevel[1])*se, val + qnorm(cilevel[2])*se)
names(out) <- c(name, 'Std. Error', 'Z-value', 'p-value', ciname)
out
})
out <- t(out)
rownames(out) <- names(cf)
return(out)
} ## end if delta
With a lot of help from contributors to StackOverflow I have managed to put together a function to derive the weights of a 2-asset portfolio which maximises the Sharpe ratio. No short sales are allowed and the sum of weights add to 1. What I would like to do now is to constrain asset A to not being more or less than 10% from a user defined weight. As an example I would like to constrain the weight of asset A to be no less than 54% or more than 66% (i.e 60% +/- 10%). So on the below example I would end up with weights of (0.54,0.66) instead of the unsconstrained (0.243,0.7570) .I assume this can be done by tweaking bVect but I am not so sure how to go about it. Any help would be appreciated.
asset_A <- c(0.034320510,-0.001209628,0.031900161,0.023163947,-0.001872938,-0.010322489,0.006090395,-0.003270854,0.017778990,0.017204915)
asset_B <- c(0.047103261,0.055175057,0.021019816,0.020602347,0.007281368,-0.006547404,0.019155238,0.005494798,0.025429958,0.014929124)
require(quadprog)
HR_solve <- function(asset_A,asset_B) {
vol_A <- sd(asset_A)
vol_B <- sd(asset_B)
cor_AB <- cor(cbind(asset_A,asset_B),method="pearson")
ret_A_B <- as.matrix(c(mean(asset_A),mean(asset_B)))
vol_AB <- c(vol_A,vol_B)
covmat <- diag(as.vector(vol_AB))%*%cor_AB%*%diag(as.vector(vol_AB))
aMat <- cbind(rep(1,nrow(covmat)),diag(1,nrow(covmat)))
bVec <- c(1,0,0)
zeros <- array(0, dim = c(nrow(covmat),1))
minw <- solve.QP(covmat, zeros, aMat, bVec, meq = 1 ,factorized = FALSE)$solution
rp <- as.numeric(t(minw) %*% ret_A_B)
sp <- sqrt(t(minw) %*% covmat %*% minw)
wp <- t(matrix(minw))
sret <- sort(seq(t(minw) %*% ret_A_B,max(ret_A_B),length.out=100))
aMatt <- cbind(ret_A_B,aMat)
for (ri in sret[-1]){
bVect <- c(ri,bVec)
result <- tryCatch({solve.QP(covmat, zeros, aMatt, bVect, meq = 2,factorized = FALSE)},
warning = function(w){ return(NULL) } , error = function(w){ return(NULL)}, finally = {} )
if (!is.null(result)){
wp <- rbind(wp,as.vector(result$solution))
rp <-c(rp,t(as.vector(result$solution) %*% ret_A_B))
sp <- c(sp,sqrt(t(as.vector(result$solution)) %*% covmat %*% as.vector(result$solution))) }
}
HR_weights <- wp[which.max(rp/sp),]
as.matrix(HR_weights)
}
HR_solve(asset_A,asset_B)
[,1]
[1,] 0.2429662
[2,] 0.7570338
I think you should take a look at the link below.
http://economistatlarge.com/portfolio-theory/r-optimized-portfolio/r-code-graph-efficient-frontier
I think you'll learn a lot from that. I'll post the code here, in case the link gets shut down sometime in the future.
# Economist at Large
# Modern Portfolio Theory
# Use solve.QP to solve for efficient frontier
# Last Edited 5/3/13
# This file uses the solve.QP function in the quadprog package to solve for the
# efficient frontier.
# Since the efficient frontier is a parabolic function, we can find the solution
# that minimizes portfolio variance and then vary the risk premium to find
# points along the efficient frontier. Then simply find the portfolio with the
# largest Sharpe ratio (expected return / sd) to identify the most
# efficient portfolio
library(stockPortfolio) # Base package for retrieving returns
library(ggplot2) # Used to graph efficient frontier
library(reshape2) # Used to melt the data
library(quadprog) #Needed for solve.QP
# Create the portfolio using ETFs, incl. hypothetical non-efficient allocation
stocks <- c(
"VTSMX" = .0,
"SPY" = .20,
"EFA" = .10,
"IWM" = .10,
"VWO" = .30,
"LQD" = .20,
"HYG" = .10)
# Retrieve returns, from earliest start date possible (where all stocks have
# data) through most recent date
returns <- getReturns(names(stocks[-1]), freq="week") #Currently, drop index
#### Efficient Frontier function ####
eff.frontier <- function (returns, short="no", max.allocation=NULL,
risk.premium.up=.5, risk.increment=.005){
# return argument should be a m x n matrix with one column per security
# short argument is whether short-selling is allowed; default is no (short
# selling prohibited)max.allocation is the maximum % allowed for any one
# security (reduces concentration) risk.premium.up is the upper limit of the
# risk premium modeled (see for loop below) and risk.increment is the
# increment (by) value used in the for loop
covariance <- cov(returns)
print(covariance)
n <- ncol(covariance)
# Create initial Amat and bvec assuming only equality constraint
# (short-selling is allowed, no allocation constraints)
Amat <- matrix (1, nrow=n)
bvec <- 1
meq <- 1
# Then modify the Amat and bvec if short-selling is prohibited
if(short=="no"){
Amat <- cbind(1, diag(n))
bvec <- c(bvec, rep(0, n))
}
# And modify Amat and bvec if a max allocation (concentration) is specified
if(!is.null(max.allocation)){
if(max.allocation > 1 | max.allocation <0){
stop("max.allocation must be greater than 0 and less than 1")
}
if(max.allocation * n < 1){
stop("Need to set max.allocation higher; not enough assets to add to 1")
}
Amat <- cbind(Amat, -diag(n))
bvec <- c(bvec, rep(-max.allocation, n))
}
# Calculate the number of loops
loops <- risk.premium.up / risk.increment + 1
loop <- 1
# Initialize a matrix to contain allocation and statistics
# This is not necessary, but speeds up processing and uses less memory
eff <- matrix(nrow=loops, ncol=n+3)
# Now I need to give the matrix column names
colnames(eff) <- c(colnames(returns), "Std.Dev", "Exp.Return", "sharpe")
# Loop through the quadratic program solver
for (i in seq(from=0, to=risk.premium.up, by=risk.increment)){
dvec <- colMeans(returns) * i # This moves the solution along the EF
sol <- solve.QP(covariance, dvec=dvec, Amat=Amat, bvec=bvec, meq=meq)
eff[loop,"Std.Dev"] <- sqrt(sum(sol$solution*colSums((covariance*sol$solution))))
eff[loop,"Exp.Return"] <- as.numeric(sol$solution %*% colMeans(returns))
eff[loop,"sharpe"] <- eff[loop,"Exp.Return"] / eff[loop,"Std.Dev"]
eff[loop,1:n] <- sol$solution
loop <- loop+1
}
return(as.data.frame(eff))
}
# Run the eff.frontier function based on no short and 50% alloc. restrictions
eff <- eff.frontier(returns=returns$R, short="no", max.allocation=.50,
risk.premium.up=1, risk.increment=.001)
# Find the optimal portfolio
eff.optimal.point <- eff[eff$sharpe==max(eff$sharpe),]
# graph efficient frontier
# Start with color scheme
ealred <- "#7D110C"
ealtan <- "#CDC4B6"
eallighttan <- "#F7F6F0"
ealdark <- "#423C30"
ggplot(eff, aes(x=Std.Dev, y=Exp.Return)) + geom_point(alpha=.1, color=ealdark) +
geom_point(data=eff.optimal.point, aes(x=Std.Dev, y=Exp.Return, label=sharpe),
color=ealred, size=5) +
annotate(geom="text", x=eff.optimal.point$Std.Dev,
y=eff.optimal.point$Exp.Return,
label=paste("Risk: ",
round(eff.optimal.point$Std.Dev*100, digits=3),"\nReturn: ",
round(eff.optimal.point$Exp.Return*100, digits=4),"%\nSharpe: ",
round(eff.optimal.point$sharpe*100, digits=2), "%", sep=""),
hjust=0, vjust=1.2) +
ggtitle("Efficient Frontier\nand Optimal Portfolio") +
labs(x="Risk (standard deviation of portfolio)", y="Return") +
theme(panel.background=element_rect(fill=eallighttan),
text=element_text(color=ealdark),
plot.title=element_text(size=24, color=ealred))
ggsave("Efficient Frontier.png")
Ok I have found a way to do this... if you think there is a more elegant way please let me know...
require(quadprog)
HR_solve <- function(asset_A,asset_B,mean_A,range_A) {
vol_A <- sd(asset_A)
vol_B <- sd(asset_B)
cor_AB <- cor(cbind(asset_A,asset_B),method="pearson")
ret_A_B <- as.matrix(c(mean(asset_A),mean(asset_B)))
vol_AB <- c(vol_A,vol_B)
covmat <- diag(as.vector(vol_AB))%*%cor_AB%*%diag(as.vector(vol_AB))
bVec <- c(1,0,0)
aMat <- cbind(rep(1,nrow(covmat)),diag(1,nrow(covmat)))
zeros <- array(0, dim = c(nrow(covmat),1))
minw <- solve.QP(covmat, zeros, aMat, bVec, meq = 1 ,factorized = FALSE)$solution
rp <- as.numeric(t(minw) %*% ret_A_B)
sp <- sqrt(t(minw) %*% covmat %*% minw)
wp <- t(matrix(minw))
sret <- sort(seq(t(minw) %*% ret_A_B,max(ret_A_B),length.out=1000))
min_A <- mean_A * (1-range_A)
max_A <- mean_A * (1+range_A)
aMatt <- cbind(ret_A_B,aMat,-diag(2))
bVec <- c(1,min_A,0,-max_A,-1)
for (ri in sret[-1]){
bVect <- c(ri,bVec)
result <- tryCatch({solve.QP(covmat, zeros, aMatt, bVect, meq = 2,factorized = FALSE)},
warning = function(w){ return(NULL) } , error = function(w){ return(NULL)}, finally = {} )
if (!is.null(result)){
wp <- rbind(wp,as.vector(result$solution))
rp <-c(rp,t(as.vector(result$solution) %*% ret_A_B))
sp <- c(sp,sqrt(t(as.vector(result$solution)) %*% covmat %*% as.vector(result$solution))) }
}
HR_weights <- wp[which.max(rp/sp),]
as.matrix(HR_weights)
}
Just change aMat and bVec:
# sset A to be no less than 54% or more than 66%
aMat <- cbind(rep(1,nrow(covmat)),diag(1,nrow(covmat)),c(1,0),c(-1,0))
bVec <- c(1,0,0,.54,-.66)
I did code for Newton Raphson for logistic regression. Unfortunately I tried many data there is no convergence. there is a mistake I do not know where is it. Can anyone help to figure out what is the problem.
First the data is as following; y indicate the response (0,1) , Z is 115*30 matrix which is the exploratory variables. I need to estimate the 30 parameters.
y = c(rep(0,60),rep(1,55))
X = sample(c(0,1),size=3450,replace=T)
Z = t(matrix(X,ncol=115))
#The code is ;
B = matrix(rep(0,30*10),ncol=10)
B[,1] = matrix(rep(0,30),ncol=1)
for(i in 2 : 10){
print(i)
p <- exp(Z %*%as.matrix(B[,i])) / (1 + exp(Z %*% as.matrix(B[,i])))
v.2 <- diag(as.vector(1 * p*(1-p)))
score.2 <- t(Z) %*% (y - p) # score function
increm <- solve(t(Z) %*% v.2 %*% Z)
B[,i] = as.matrix(B[,i-1])+increm%*%score.2
if(B[,i]-B[i-1]==matrix(rep(0.0001,30),ncol=1)){
return(B)
}
}
Found it! You're updating p based on B[,i], you should be using B[,i-1] ...
While I was finding the answer, I cleaned up your code and incorporated the results in a function. R's built-in glm seems to work (see below). One note is that this approach is likely to be unstable: fitting a binary model with 30 predictors and only 115 binary responses, and without any penalization or shrinkage, is extremely optimistic ...
set.seed(101)
n.obs <- 115
n.zero <- 60
n.pred <- 30
y <- c(rep(0,n.zero),rep(1,n.obs-n.zero))
X <- sample(c(0,1),size=n.pred*n.obs,replace=TRUE)
Z <- t(matrix(X,ncol=n.obs))
R's built-in glm fitter does work (it uses iteratively reweighted least squares, not N-R):
g1 <- glm(y~.-1,data.frame(y,Z),family="binomial")
(If you want to view the results, library("arm"); coefplot(g1).)
## B_{m+1} = B_m + (X^T V_m X)^{-1} X^T (Y-P_m)
NRfit function:
NRfit <- function(y,X,start,n.iter=100,tol=1e-4,verbose=TRUE) {
## used X rather than Z just because it's more standard notation
n.pred <- ncol(X)
B <- matrix(NA,ncol=n.iter,
nrow=n.pred)
B[,1] <- start
for (i in 2:n.iter) {
if (verbose) cat(i,"\n")
p <- plogis(X %*% B[,i-1])
v.2 <- diag(c(p*(1-p)))
score.2 <- t(X) %*% (y - p) # score function
increm <- solve(t(X) %*% v.2 %*% X)
B[,i] <- B[,i-1]+increm%*%score.2
if (all(abs(B[,i]-B[,i-1]) < tol)) return(B)
}
B
}
matplot(res1 <- t(NRfit(y,Z,start=coef(g1))))
matplot(res2 <- t(NRfit(y,Z,start=rep(0,ncol(Z)))))
all.equal(res2[6,],unname(coef(g1))) ## TRUE
I am working on Ridge regression, I want to make my own function. It tried the following. It work for individual value of k but not for array for sequence of values.
dt<-longley
attach(dt)
library(MASS)
X<-cbind(X1,X2,X3,X4,X5,X6)
X<-as.matrix(X)
Y<-as.matrix(Y)
sx<-scale(X)/sqrt(nrow(X)-1)
sy<-scale(Y)/sqrt(nrow(Y)-1)
rxx<-cor(sx)
rxy<-cor(sx,sy)
for (k in 0:1){
res<-solve(rxx+k*diag(rxx))%*%rxy
k=k+0.01
}
Need help for optimized code too.
poly.kernel <- function(v1, v2=v1, p=1) {
((as.matrix(v1) %*% t(v2))+1)^p
}
KernelRidgeReg <- function(TrainObjects,TrainLabels,TestObjects,lambda){
X <- TrainObjects
y <- TrainLabels
kernel <- poly.kernel(X)
design.mat <- cbind(1, kernel)
I <- rbind(0, cbind(0, kernel))
M <- crossprod(design.mat) + lambda*I
#crossprod is just x times traspose of x, just looks neater in my openion
M.inv <- solve(M)
#inverse of M
k <- as.matrix(diag(poly.kernel(cbind(TrainObjects,TrainLabels))))
#Removing diag still gives the same MSE, but will output a vector of prediction.
Labels <- rbind(0,as.matrix(TrainLabels))
y.hat <- t(Labels) %*% M.inv %*% rbind(0,k)
y.true <- Y.test
MSE <-mean((y.hat - y.true)^2)
return(list(MSE=MSE,y.hat=y.hat))
}
Kernel with p=1, will give you ridge regression.
Solve built-in R function sometimes return singular matrix. You may want to write your own function to avoid that.