from clustering to portfolio selection in R

from clustering to portfolio selection in R - r

I need help.
I need to make a portfolio selection (Markowitz) and my script is the following:
The following is the matrix of stock returns:
a <- c(0.00444, -0.00553, -0.01007, -0.00012, 0.04133, -0.02472, -0.01771,
-0.00994, -0.06933, 0.00476)
b <- c(-0.01926, 0.06008, 0.02839, 0.00930, 0.02072, 0.02072, 0.03670, -0.02049,
-0.01644, 0.02375)
c <- c(-0.00719, 0.02296, -0.06438, 0.008805, -0.00603, -0.00663, -0.01160,
-0.00298, 0.00713, 0.00790)
d <- c(-0.01753, 0.00809, 0.02301, -0.00631, -0.026114,0.03157, -0.02488,
-0.01013, -0.03231, -0.00763)
e <- c(-0.02153, 0.00863, -0.02929, -0.01424, -0.01940, -0.02313, -0.04146,
-0.02610, 0.00050, -0.04700)
stocks <- cbind(a,b,c,d,e)
And the following is the market index:
rmkt <- c(-0.01159, -0.02787, -0.05936, -0.09417, -0.03027,-0.03161, -0.03166,
-0.04092, -0.02841, -0.009781)
for example the time is 10 so:
time <- 1:10
I used the following code for clustering:
Nsim = 10
opt_num_centers = rep(0, Nsim)
est_centers_coefs = est_centers_fits = rep(list(0), Nsim)
for(kk in 1:Nsim){
rmkt_list <- list()
for(i in 1:5){
rmkt_list[[i]] = rmkt
}
stock_list_mat = do.call("cbind", stocks_list)
class(stock_list_mat)
stock_mat <- stock_list_mat[, c(1:societies_stocks)]
class(stock_mat)
nseg = 30
B = basis_array(time, min(time), max(time), ndt = nseg,
deg = 3, max_derivs = 0, sparse = FALSE) [[1]]
M <- ncol(B)
P <- crossprod(diff(diag(ncol(B)), diff = 3))
dim(stock_mat)
class(stock_mat)
est_coef = matrix(0, nrow = M, ncol(stock_mat))
est_bet = matrix(0, nrow = nrow(stock_mat), ncol(stock_mat))
dim(est_coef)
dim(est_bet)
smooth_par = NULL
for(i in 1:ncol(stock_mat)){
bb = expectreg.ls(stock_mat[ , i] ~ rb(time,"special", by = rmkt, B = B, P = P),
smooth = "schall", expectiles = 0.5)
smooth_par = c(smooth_par, bb$lambda$time)
est_coef[, i] = bb$coefficients$time[, 1]
est_bet[, i] = B %*% bb$coefficients$time[, 1]
}
sc_est_coef = scale(est_coef)
aa = (clusGap(t(sc_est_coef), kmeans, 10, B = 100, nstart = 50, iter.max = 50))
n_clust = maxSE(aa$Tab[, "gap"], aa$Tab[, "SE.sim"] ,"Tibs2001SEmax")
opt_num_centers[kk] = n_clust
km = kmeans(t(sc_est_coef), n_clust, nstart = 100, iter.max = 100)
est_centers_coefs[[kk]] = t(km$centers)
est_centers_fits[[kk]] = B %*% est_centers_coefs[[kk]]
cat("\t", "simulation =", kk, "\n","\t", "# Clust =", opt_num_centers[kk], "\n")
}
Considering everything how can I proceed to make a portfolio selection?
Thank u all.

Related

R code for simulating stochastic asset price path

Consider the following model for the evolution of an asset's price:
This what I have done (in R). I could not find a function that randomly outputs +1 or -1, so I decided to adapt the inbuilt rbinom function.
## This code is in R
rm(list = ls())
library(dplyr)
library(dint)
library(magrittr)
library(stats)
path =
function(T, mu, sigma, p, x0) {
x = rep(NA, T)
x[1] = x0
for(i in 2:T){
z = if_else(rbinom(1,1,p) == 0, -1, 1)
x[i] = x[i-1] * exp(mu + sigma*z)
}
return(x)
}
## Just some testing
x_sim = path(T = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
## Actual answer
Np = 10000
mc = matrix(nrow = 17, ncol = Np)
for(j in 1:Np){
mc[,j] = path(T = 17, mu = 0, sigma = 0.01, p = 0.5, x0 = 100)
}
test = mc[2:nrow(mc), ] >= 100
sum_test = colSums(test)
comp = sum(sum_test >= 1)/length(sum_test)
prob = 1 - comp
Does this make sense? Any help/tips/advice would be much appreciated. Thanks!

Staying close to your code, I came up with this. Intuitively, if you think about it, the probability should be rather low due to the parameters and I get a probability of about 6.7% which is roughly what I get if I run your code with the parameters from the assignment.
simpath <- function(t, mu, sigma, p, x0, seed){
# set seed
if(!missing(seed)){
set.seed(seed)
}
# set up matrix for storing the results
res <- matrix(c(1:t, rep(NA, t*2)), ncol = 3)
colnames(res) <- c('t', 'z_t', 'x_t')
res[, 'z_t'] <- sample(c(1, -1), size = t, prob = c(p, 1-p), replace = TRUE)
res[1, 3] <- x0
for(i in 2:t){
res[i, 3] <- res[i-1, 3] * exp(mu+sigma*res[i, 2])
}
return(res)
}
x_sim <- simpath(t = 4, mu = 0, sigma = 0.01, p = 0.5, x0 = 100, seed = 123)
x_sim2 <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100, seed = 123)
## Actual answer
Np <- 100000
mc <- matrix(nrow = 36, ncol = Np)
for (j in 1:Np){
mc[, j] <- simpath(t = 36, mu = 0, sigma = 0.03, p = 0.5, x0 = 100)[, 3]
}
test <- mc > 100
sum_test <- colSums(test)
comp = sum(sum_test == 0)/length(sum_test)
prob = comp
> prob
[1] 0.06759

Why does rjags give Dimension mismatch taking subset of y error here?

I have written this model but rjags gives dimension mismatch error; What's happening?
Error in jags.model(textConnection(model1), data = jags_data, n.chains = n_chains, :
RUNTIME ERROR:
Compilation error on line 8.
Dimension mismatch taking subset of y
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))

As suggested by #user20650 the issue is that you are indexing y as vector and your functions are generating as a matrix. Try this code with a slight change in gen_data():
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
y <- as.vector(y)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
Output:
beta_est
[[1]]
beta[1] beta[2]
-0.006031984 0.692007301
You can also try y <- y[,1,drop=T] in the same function instead of as.vector()

Constraints in constrOptim.nl in r

I am using R package costrOptim.nl.
I need to minimize a function with the following constraints:
Alpha < sqrt(2*omega) and omega > 0
In my code expressed as:
theta[3] < sqrt(2*theta[1]) and theta[1] > 0
I write these conditions as:
Image
But when I call optimizer and run it.
I'm getting the following problem:
1: In sqrt(2 * theta[1]) : NaNs produced
Why? Did I set the proper conditions?

This is my whole code.
data <- read.delim(file = file, header = FALSE)
ind <- seq(from = 1, to = NROW(data), by = 1)
data <- data.frame(ind = ind, Ret = data$V1, Ret2 = data$V1^2)
colnames(data)[1] <- "Ind"
colnames(data)[2] <- "Ret"
colnames(data)[3] <- "Ret2"
T <- length(data$Ret)
m <- arima(x = data$Ret2, order = c(3,0,0), include.mean = TRUE, method = c("ML"))
b_not <- m$coef
omega <- 0.1
alpha <- 0.005
beta <- 0.9
theta <- c(omega,beta,alpha) # "some" value of theta
s0 <- theta[1]/(1-theta[2])
theta[3] < sqrt(2*theta[1]) # check whether the Feller condition is verified
N <- 30000
reps <- 1
rho <- -0.8
n <- 100
heston.II <- function(theta){
set.seed(5)
u <- rnorm(n = N*reps,mean = 0, sd = 1)
u1 <- rnorm(n = N*reps,mean = 0, sd = 1)
u2 <- rho*u + sqrt((1-rho^2))*u1
sigma <- matrix(0, nrow = N*reps, ncol = 1)
ret.int <- matrix(0, nrow = N*reps, ncol = 1)
sigma[1,1] <- s0
for (i in 2:(N*reps)) {
sigma[i,1] <- theta[1] + theta[2]*sigma[i-1,1] + theta[3]*sqrt(sigma[i-1,1])*u1[i]
# if(sigma[i,1] < 0.00000001){ sigma[i,1] = s0}
}
for (i in 1:(N*reps)) {
ret.int[i,1] <- sqrt(sigma[i,1])*u2[i]
}
ret <- matrix(0, nrow = N*reps/n, ncol = 1)
ret[1,1] <- sum(ret.int[1:n],1)
for (i in 2:((N*reps)/n)) {
ret[i,] <- sum(ret.int[(n*i):(n*(i+1))])
ret[((N*reps)/n),] <- sum(ret.int[(n*(i-1)):(n*i)])
}
ret2 <- ret^2
model <- arima(x = ret2, order = c(3,0,0), include.mean = TRUE)
beta_hat <- model$coef
m1 <- beta_hat[1] - b_not[1]
m2 <- beta_hat[2] - b_not[2]
m3 <- beta_hat[3] - b_not[3]
m4 <- beta_hat[4] - b_not[4]
D <- cbind(m1,m2,m3,m4)
DD <- (D)%*%t(D)/1000
DD <- as.numeric(DD)
return(DD)
}
heston.sim <- heston.II(theta)
hin <- function(theta){
h <- rep(NA, 2)
h[1] <- theta[1]
h[2] <- sqrt(2*theta[1]) - theta[3]
return(h)
}
hin(theta = theta)
.opt <- constrOptim.nl(par = theta, fn = heston.II, hin = hin)
.opt

loop with certain values is not working

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}

I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

specClust() in kknn - arpack iteration limit increase

I am applying spectral clustering to a dataset with 4200 rows and 2 columns.
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric")
I have the below error.
n .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:944 : ARPACK error, Maximum number of iterations reached
In addition: Warning message:
In .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:776 :ARPACK solver failed to converge (1001 iterations, 0/7 eigenvectors converged)
How do i increase the iterations of arpack because this doesnt work:
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric",iter.max=301000)

Digging into the specClust, the ... does not pass anything to the arpack call.
The simplest thing to do I think is to copy the specClust code add maxiter=10000 and source the function in your script.
specCLust2 <- function (data, centers = NULL, nn = 7, method = "symmetric",
gmax = NULL, max.iter = 10000, ...)
{
call = match.call()
if (is.data.frame(data))
data = as.matrix(data)
da = apply(data, 1, paste, collapse = "#")
indUnique = which(!duplicated(da))
indAll = match(da, da[indUnique])
data2 = data
data = data[indUnique, ]
n <- nrow(data)
data = scale(data, FALSE, TRUE)
if (is.null(gmax)) {
if (!is.null(centers))
gmax = centers - 1L
else gmax = 1L
}
test = TRUE
while (test) {
DC = mydist(data, nn)
sif <- rbind(1:n, as.vector(DC[[2]]))
g <- graph(sif, directed = FALSE)
g <- decompose(g, min.vertices = 4)
if (length(g) > 1) {
if (length(g) >= gmax)
nn = nn + 2
else test = FALSE
}
else test = FALSE
}
W <- DC[[1]]
n <- nrow(data)
wi <- W[, nn]
SC <- matrix(1, nrow(W), nn)
SC[] <- wi[DC[[2]]] * wi
W = W^2/SC
alpha = 1/(2 * (nn + 1))
qua = abs(qnorm(alpha))
W = W * qua
W = dnorm(W, sd = 1)
DC[[1]] = W
L = Laplacian(DC, nn, method)
f <- function(x, extra) as.vector(extra %*% x)
if (is.null(centers))
kmax = 25
else kmax = max(centers)
###
#add the maxiter parameter to the arpack call, below
###
U <- arpack(f, extra = L, options = list(n = n, which = "SM",
nev = kmax, ncv = 2 * kmax, mode = 1, maxiter=max.iter), sym = TRUE)
ind <- order(U[[1]])
U[[2]] = U[[2]][indAll, ind]
U[[1]] = U[[1]][ind]
if (is.null(centers)) {
tmp = which.max(diff(U[[1]])) + 1
centers = which.min(AUC(U[[1]][1:tmp]))
}
if (method == "symmetric") {
rs = sqrt(rowSums(U[[2]]^2))
U[[2]] = U[[2]]/rs
}
result = kmeans(U[[2]], centers = centers, nstart = 20, ...)
archeType = getClosest(U[[2]][indAll, ], result$centers)
result$eigenvalue = U[[1]]
result$eigenvector = U[[2]]
result$data = data2
result$indAll = indAll
result$indUnique = indUnique
result$L = L
result$archetype = archeType
result$call = call
class(result) = c("specClust", "kmeans")
result
}

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

from clustering to portfolio selection in R - r

Related

R code for simulating stochastic asset price path

Why does rjags give Dimension mismatch taking subset of y error here?

Constraints in constrOptim.nl in r

loop with certain values is not working

specClust() in kknn - arpack iteration limit increase

Categories

Resources