Suppressing causal tree to print in console - r

I am using the causalTree package. When I launch the causalTree function, something is printed in the console.
library(causalTree)
n <- 1000
p <- 2
y <- rnorm(n)
X <- matrix(rnorm(n * p), ncol = p)
D <- rbinom(n, 1, 0.5)
tree <- causalTree(y ~ ., data = data.frame(y, X), treatment = D, split.Rule = "CT", cv.option = "CT", split.Honest = T, cv.Honest = T, split.Bucket = F, xval = 5, cp = 0, minsize = 20, propensity = 0.5)
# [1] 2
# [1] "CT"
Is there a way to avoid this?

Probably the causalTree has multiple print statements. You can prevent this to use invisible(capture.output(...)) like this:
library(causalTree)
n <- 1000
p <- 2
y <- rnorm(n)
X <- matrix(rnorm(n * p), ncol = p)
D <- rbinom(n, 1, 0.5)
invisible(capture.output(tree <- causalTree(y ~ ., data = data.frame(y, X),
treatment = D, split.Rule = "CT",
cv.option = "CT", split.Honest = T,
cv.Honest = T, split.Bucket = F, xval = 5,
cp = 0, minsize = 20, propensity = 0.5)))
Created on 2022-07-19 by the reprex package (v2.0.1)

Related

from clustering to portfolio selection in R

I need help.
I need to make a portfolio selection (Markowitz) and my script is the following:
The following is the matrix of stock returns:
a <- c(0.00444, -0.00553, -0.01007, -0.00012, 0.04133, -0.02472, -0.01771,
-0.00994, -0.06933, 0.00476)
b <- c(-0.01926, 0.06008, 0.02839, 0.00930, 0.02072, 0.02072, 0.03670, -0.02049,
-0.01644, 0.02375)
c <- c(-0.00719, 0.02296, -0.06438, 0.008805, -0.00603, -0.00663, -0.01160,
-0.00298, 0.00713, 0.00790)
d <- c(-0.01753, 0.00809, 0.02301, -0.00631, -0.026114,0.03157, -0.02488,
-0.01013, -0.03231, -0.00763)
e <- c(-0.02153, 0.00863, -0.02929, -0.01424, -0.01940, -0.02313, -0.04146,
-0.02610, 0.00050, -0.04700)
stocks <- cbind(a,b,c,d,e)
And the following is the market index:
rmkt <- c(-0.01159, -0.02787, -0.05936, -0.09417, -0.03027,-0.03161, -0.03166,
-0.04092, -0.02841, -0.009781)
for example the time is 10 so:
time <- 1:10
I used the following code for clustering:
Nsim = 10
opt_num_centers = rep(0, Nsim)
est_centers_coefs = est_centers_fits = rep(list(0), Nsim)
for(kk in 1:Nsim){
rmkt_list <- list()
for(i in 1:5){
rmkt_list[[i]] = rmkt
}
stock_list_mat = do.call("cbind", stocks_list)
class(stock_list_mat)
stock_mat <- stock_list_mat[, c(1:societies_stocks)]
class(stock_mat)
nseg = 30
B = basis_array(time, min(time), max(time), ndt = nseg,
deg = 3, max_derivs = 0, sparse = FALSE) [[1]]
M <- ncol(B)
P <- crossprod(diff(diag(ncol(B)), diff = 3))
dim(stock_mat)
class(stock_mat)
est_coef = matrix(0, nrow = M, ncol(stock_mat))
est_bet = matrix(0, nrow = nrow(stock_mat), ncol(stock_mat))
dim(est_coef)
dim(est_bet)
smooth_par = NULL
for(i in 1:ncol(stock_mat)){
bb = expectreg.ls(stock_mat[ , i] ~ rb(time,"special", by = rmkt, B = B, P = P),
smooth = "schall", expectiles = 0.5)
smooth_par = c(smooth_par, bb$lambda$time)
est_coef[, i] = bb$coefficients$time[, 1]
est_bet[, i] = B %*% bb$coefficients$time[, 1]
}
sc_est_coef = scale(est_coef)
aa = (clusGap(t(sc_est_coef), kmeans, 10, B = 100, nstart = 50, iter.max = 50))
n_clust = maxSE(aa$Tab[, "gap"], aa$Tab[, "SE.sim"] ,"Tibs2001SEmax")
opt_num_centers[kk] = n_clust
km = kmeans(t(sc_est_coef), n_clust, nstart = 100, iter.max = 100)
est_centers_coefs[[kk]] = t(km$centers)
est_centers_fits[[kk]] = B %*% est_centers_coefs[[kk]]
cat("\t", "simulation =", kk, "\n","\t", "# Clust =", opt_num_centers[kk], "\n")
}
Considering everything how can I proceed to make a portfolio selection?
Thank u all.

SIR model (gray) suceptible and (red) infected. keep getting Error in infected & close_contact : non-conformable arrays

It wont print out the gray squares that are meant to represent the people susceptible and red squares that are meant to represent those infected
initial1=function(m,l){
x = matrix(0, nrow = m, ncol = l)
new_infections=x
close_contact=sample(c("S","I"), size=m*l, prob=c(0.9,0.1), replace=TRUE)
close_contact[TRUE] = 1
set.seed(12)
close_contact = sample(c(T, F), size = m*l, replace = TRUE)
close_contact = matrix(close_contact, nrow = m, ncol = l)
infected = x[-50, ] == 1
new_infections[-1, ][infected & close_contact] = 1
}
plot_infection= function(initial1){
sir_colors=c("White", "Grey", "Red")
x1=matrix(initial1, nrow = m)
x1
z=matrix(0, nrow=m, ncol=l)
z[x1=="I"]=1
par(mfrow = c(1,2))
image(z, col = sir_colors)
x2=matrix(initial2, nrow=m)
z1=matrix(0, nrow=m, ncol=l)
z1[x2=="S"]=1
image(z1, col=sir_colors)
}
plot_infection(initial1(m,l))
The following functions are much simpler and plot a graph according to the problem description.
initial1 <- function(m, l){
new_infections <- sample(c(S = 2L, I = 3L), size = m*l, prob = c(0.9, 0.1), replace = TRUE)
new_infections <- matrix(new_infections, nrow = m, ncol = l)
close_contact <- sample(c(TRUE, FALSE), size = m*l, replace = TRUE)
close_contact <- matrix(close_contact, nrow = m, ncol = l)
new_infections[close_contact] <- 1L
new_infections
}
plot_infection <- function(x){
sir_colors <- c("white", "grey", "red")
image(x, col = sir_colors)
}
m <- 50
l <- 50
set.seed(12)
init <- initial1(m, l)
plot_infection(init)

loop with certain values is not working

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}
I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

R Package Deepnet: Why sae_dnn_train does not work with large data sets

I am trying sae.dnn.train() with 5000 cases, 55-inputs and 3 hidden layers.
Why function nn.predict returns NaN? (vector)
I am using the following command
Nrow <-5000
Ncol <- 55
v <- c(rnorm(Nrow*Ncol,1, 0.5))
x <- matrix(v, nrow=Nrow, ncol=Ncol)
y <- c(rep(1, Nrow/2), rep(0, Nrow/2))
dnn <- sae.dnn.train(x, y, hidden = c(100,90,80),activationfun = "tanh", learningrate = 0.6, momentum = 0.5, learningrate_scale = 1.0,output = "sigm", sae_output = "linear", numepochs = 10, batchsize = 100, hidden_dropout = 0, visible_dropout = 0)
yy <- nn.predict(dnn, x)

When is it advisable to use sparse matrices in R?

I have been tinkering with power simulations recently and I have the following code:
library(MASS)
library(Matrix)
simdat <- data.frame(mmm = rep(rep(factor(1:2,
labels=c("m1", "m2")),
each = 2),
times = 2800),
ttt = rep(factor(1:2,
labels = c("t1", "t2")),
times = 5600),
sss = rep(factor(1:70),
each = 160),
iii = rep(rep(factor(1:40),
each = 4),
times = 70))
beta <- c(1, 2)
X1 <- model.matrix(~ mmm,
data = simdat)
Z1 <- model.matrix(~ ttt,
data = simdat)
X1 and Z1 are 11200x2 matrices. With the help of Stackoverflow I managed to make my calculations a lot more efficient than they were before:
funab <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(X1 %*% beta)
M1 <- rowSums(Z1 * ran_sub[rep(1:70,
each = 160),])
M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
Y will then be a vector of length 11200. I then replicate this function a lot (say 1000 times):
sim <- replicate(n = 1000,
expr = funab()},
simplify = FALSE)
sim will be a 11200x1000 list. Given that I want to do this a lot more and possibly include more code into funab() I wonder if it is advisable to use sparse matrices for X1 and Z1 in the calculations in funab() as it is now?
Ok, I've tried to follow an advice given in the comments to my question and ran a test with the microbenchmark package. To make copy and pasting easier I will repeat the code from above:
library(MASS)
library(Matrix)
simdat <- data.frame(mmm = rep(rep(factor(1:2,
labels=c("m1", "m2")),
each = 2),
times = 2800),
ttt = rep(factor(1:2,
labels = c("t1", "t2")),
times = 5600),
sss = rep(factor(1:70),
each = 160),
iii = rep(rep(factor(1:40),
each = 4),
times = 70))
beta <- c(1, 2)
X1 <- model.matrix(~ mmm,
data = simdat)
Z1 <- model.matrix(~ ttt,
data = simdat)
I now create the same matrices as sparse matrices:
sparseX1 <- sparse.model.matrix(~ mmm,
data = simdat)
sparseZ1 <- sparse.model.matrix(~ ttt,
data = simdat)
I then set up the two functions:
funab_sparse <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(sparseX1 %*% beta)
M1 <- Matrix::rowSums(sparseZ1 * ran_sub[rep(1:70,
each = 160),])
M2 <- Matrix::rowSums(sparseZ1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
funab <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(X1 %*% beta)
M1 <- rowSums(Z1 * ran_sub[rep(1:70,
each = 160),])
M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
library(microbenchmark)
res <- microbenchmark(funab(), funab_sparse(), times = 1000)
and get the results:
> res <- microbenchmark(funab(), funab_sparse(), times = 1000)
> res
Unit: milliseconds
expr min lq median uq max neval
funab() 2.200342 2.277006 2.309587 2.481627 69.99895 1000
funab_sparse() 8.419564 8.568157 9.666248 9.874024 75.88907 1000
Assuming that I did not make any substantial mistakes I can conclude that with this particular way of doing the calculations using sparse matrices will not speed up my code.

Resources