When is it advisable to use sparse matrices in R? - r

I have been tinkering with power simulations recently and I have the following code:
library(MASS)
library(Matrix)
simdat <- data.frame(mmm = rep(rep(factor(1:2,
labels=c("m1", "m2")),
each = 2),
times = 2800),
ttt = rep(factor(1:2,
labels = c("t1", "t2")),
times = 5600),
sss = rep(factor(1:70),
each = 160),
iii = rep(rep(factor(1:40),
each = 4),
times = 70))
beta <- c(1, 2)
X1 <- model.matrix(~ mmm,
data = simdat)
Z1 <- model.matrix(~ ttt,
data = simdat)
X1 and Z1 are 11200x2 matrices. With the help of Stackoverflow I managed to make my calculations a lot more efficient than they were before:
funab <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(X1 %*% beta)
M1 <- rowSums(Z1 * ran_sub[rep(1:70,
each = 160),])
M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
Y will then be a vector of length 11200. I then replicate this function a lot (say 1000 times):
sim <- replicate(n = 1000,
expr = funab()},
simplify = FALSE)
sim will be a 11200x1000 list. Given that I want to do this a lot more and possibly include more code into funab() I wonder if it is advisable to use sparse matrices for X1 and Z1 in the calculations in funab() as it is now?

Ok, I've tried to follow an advice given in the comments to my question and ran a test with the microbenchmark package. To make copy and pasting easier I will repeat the code from above:
library(MASS)
library(Matrix)
simdat <- data.frame(mmm = rep(rep(factor(1:2,
labels=c("m1", "m2")),
each = 2),
times = 2800),
ttt = rep(factor(1:2,
labels = c("t1", "t2")),
times = 5600),
sss = rep(factor(1:70),
each = 160),
iii = rep(rep(factor(1:40),
each = 4),
times = 70))
beta <- c(1, 2)
X1 <- model.matrix(~ mmm,
data = simdat)
Z1 <- model.matrix(~ ttt,
data = simdat)
I now create the same matrices as sparse matrices:
sparseX1 <- sparse.model.matrix(~ mmm,
data = simdat)
sparseZ1 <- sparse.model.matrix(~ ttt,
data = simdat)
I then set up the two functions:
funab_sparse <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(sparseX1 %*% beta)
M1 <- Matrix::rowSums(sparseZ1 * ran_sub[rep(1:70,
each = 160),])
M2 <- Matrix::rowSums(sparseZ1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
funab <- function(){
ran_sub <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
ran_ite <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
Mb <- as.vector(X1 %*% beta)
M1 <- rowSums(Z1 * ran_sub[rep(1:70,
each = 160),])
M2 <- rowSums(Z1 * ran_ite[rep(rep(1:40, each = 4),
times = 70),])
Mout <- Mb + M1 + M2
Y <- as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
}
library(microbenchmark)
res <- microbenchmark(funab(), funab_sparse(), times = 1000)
and get the results:
> res <- microbenchmark(funab(), funab_sparse(), times = 1000)
> res
Unit: milliseconds
expr min lq median uq max neval
funab() 2.200342 2.277006 2.309587 2.481627 69.99895 1000
funab_sparse() 8.419564 8.568157 9.666248 9.874024 75.88907 1000
Assuming that I did not make any substantial mistakes I can conclude that with this particular way of doing the calculations using sparse matrices will not speed up my code.

Related

from clustering to portfolio selection in R

I need help.
I need to make a portfolio selection (Markowitz) and my script is the following:
The following is the matrix of stock returns:
a <- c(0.00444, -0.00553, -0.01007, -0.00012, 0.04133, -0.02472, -0.01771,
-0.00994, -0.06933, 0.00476)
b <- c(-0.01926, 0.06008, 0.02839, 0.00930, 0.02072, 0.02072, 0.03670, -0.02049,
-0.01644, 0.02375)
c <- c(-0.00719, 0.02296, -0.06438, 0.008805, -0.00603, -0.00663, -0.01160,
-0.00298, 0.00713, 0.00790)
d <- c(-0.01753, 0.00809, 0.02301, -0.00631, -0.026114,0.03157, -0.02488,
-0.01013, -0.03231, -0.00763)
e <- c(-0.02153, 0.00863, -0.02929, -0.01424, -0.01940, -0.02313, -0.04146,
-0.02610, 0.00050, -0.04700)
stocks <- cbind(a,b,c,d,e)
And the following is the market index:
rmkt <- c(-0.01159, -0.02787, -0.05936, -0.09417, -0.03027,-0.03161, -0.03166,
-0.04092, -0.02841, -0.009781)
for example the time is 10 so:
time <- 1:10
I used the following code for clustering:
Nsim = 10
opt_num_centers = rep(0, Nsim)
est_centers_coefs = est_centers_fits = rep(list(0), Nsim)
for(kk in 1:Nsim){
rmkt_list <- list()
for(i in 1:5){
rmkt_list[[i]] = rmkt
}
stock_list_mat = do.call("cbind", stocks_list)
class(stock_list_mat)
stock_mat <- stock_list_mat[, c(1:societies_stocks)]
class(stock_mat)
nseg = 30
B = basis_array(time, min(time), max(time), ndt = nseg,
deg = 3, max_derivs = 0, sparse = FALSE) [[1]]
M <- ncol(B)
P <- crossprod(diff(diag(ncol(B)), diff = 3))
dim(stock_mat)
class(stock_mat)
est_coef = matrix(0, nrow = M, ncol(stock_mat))
est_bet = matrix(0, nrow = nrow(stock_mat), ncol(stock_mat))
dim(est_coef)
dim(est_bet)
smooth_par = NULL
for(i in 1:ncol(stock_mat)){
bb = expectreg.ls(stock_mat[ , i] ~ rb(time,"special", by = rmkt, B = B, P = P),
smooth = "schall", expectiles = 0.5)
smooth_par = c(smooth_par, bb$lambda$time)
est_coef[, i] = bb$coefficients$time[, 1]
est_bet[, i] = B %*% bb$coefficients$time[, 1]
}
sc_est_coef = scale(est_coef)
aa = (clusGap(t(sc_est_coef), kmeans, 10, B = 100, nstart = 50, iter.max = 50))
n_clust = maxSE(aa$Tab[, "gap"], aa$Tab[, "SE.sim"] ,"Tibs2001SEmax")
opt_num_centers[kk] = n_clust
km = kmeans(t(sc_est_coef), n_clust, nstart = 100, iter.max = 100)
est_centers_coefs[[kk]] = t(km$centers)
est_centers_fits[[kk]] = B %*% est_centers_coefs[[kk]]
cat("\t", "simulation =", kk, "\n","\t", "# Clust =", opt_num_centers[kk], "\n")
}
Considering everything how can I proceed to make a portfolio selection?
Thank u all.

Why does function return NULL?

A beginner in R over here, so apologies for the basic question.
Why does ATE return a null vector instead of saving the values of the difference of the means?
fun.cluster <- function(M, N){
set.seed(02139)
J <- 1:M # vector J_i
df <- as.data.frame(matrix(data=1:N, nrow = N, ncol = 1)) #data frame of all original values
df$cluster <- cut(df$V1, M, labels = 1:M) #breaking the dataframe into clusters
df$cluster <- as.numeric(df$cluster)
Y1 <- as.vector(sample(J, 5)) # assigning treatment
df$treatment <- ifelse(df$cluster %in% Y1, df$treatment <- 1, df$treatment <- 0)
#Inducing intracluster correlation:
mu_0j <- runif(n = 50, min = -1, max = 1)
df$V1[df$treatment==0] <- mu_0j
mu_1j <- runif(n=50, min = -0.5, max = 1.5)
df$V1[df$treatment==0] <- mu_1j
# drawing values
y_0i <- rnorm(n = 50, mean = mu_0j, sd = 1)
y_1i <- rnorm(n = 50, mean = mu_1j, sd = 1)
D_i <- as.vector(c(y_0i, y_1i))
# calculating ATE:
ATE[i] <- mean(y_1i - y_0i)
}
ATE <- c()
for(i in 1:10){
fun.cluster(M = 10, N = 100)
}

Performing t-Test Selection manually

I’m trying to write simulation code, that generates data and runs t-test selection (discarding those predictors whose t-test p-value exceeds 0.05, retaining the rest) on it. The simulation is largely an adaptation of Applied Econometrics with R by Kleiber and Zeileis (2008, pp. 183–189).
When running the code, it usually fails. Yet with certain seeds (e.g. 1534) it produces plausible output. If it does not produce output (e.g. 1911), it fails due to: "Error in x[, ii] : subscript out of bounds", which traces back to na.omit.data.frame(). So, for some reason, the way I attempt to handle the NAs seems to fail, but I'm unable to figure out in how so.
coef <- rep(coef[,3], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
The first block is unlikely to the cause of the error. It merely generates the data and works well on its own and with other methods, like PCA, as well. The second block pulls the p-values from the regression output; removes the p-value of the intercept (beta_0); and fills the vector with as many 7s as necessary to have the same length as the number of variables, to ensure the same dimension for matrix calculations. Seven is arbitrary and could be any number larger than 0.05 to not pass the test of the loop. This becomes – I believe – necessary, if R discards predictors due to multicollinearity.
The final block creates an empty matrix of the original dimensions; inserts the original data, if the t-test p-value is lower than 0.05, else retains the NA; while the penultimate line removes all columns containing NAs ((exclusively NA or one NA is the same here) taken from mnel’s answer to Remove columns from dataframe where ALL values are NA); lastly, the modified data is again put in the shape of a linear regression.
Does anyone know what causes this behavior or how it would work as intended? I would expect it to either work or not, but not kind of both. Ideally, the former.
A working version of the code is:
set.seed(1534)
Sim_TTS <- function(nobs = c(1000, 15000), pdim = pdims, coef = coef100,
model = c("MLC", "MHC"), ...){
DGP_TTS <- function(nobs = 1000, model = c("MLC", "MHC"), coef = coef100,
sd = 1, pdim = pdims, ALPHA = 0.05)
{
model <- match.arg(model)
if(model == "MLC") {
coef <- rep(coef[,1], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
} else {
coef <- rep(coef[,2], length.out = pdim+1)
err <- as.vector(rnorm(nobs, sd = sd))
uX <- c(rep(1, times = nobs))
pX <- matrix(scale(rnorm(nobs)), byrow = TRUE, ncol = pdim, nrow = nobs)
X <- cbind(uX, pX)
y <- coef %*% t(X) + err
y <- matrix(y)
tTp <- (summary(lm(y ~ pX)))$coefficients[,4]
tTp <- tTp[2:length(tTp)]
TTT <- matrix(c(tTp, rep(.7, ncol(pX)-length(tTp))))
tX <- matrix(NA, ncol = ncol(pX), nrow = nrow(pX))
for(i in 1:ncol(pX)) {ifelse(TTT[i,] < ALPHA, tX[,i] <- pX[,i], NA)}
tX <- matrix(Filter(function(x)!all(is.na(x)), tX), nrow = nobs)
TTR <- lm(y ~ tX)
}
return(TTR)
}
PG_TTS <- function(nrep = 1, ...)
{
rsq <- matrix(rep(NA, nrep), ncol = 1)
rsqad <- matrix(rep(NA, nrep), ncol = 1)
pastr <- matrix(rep(NA, nrep), ncol = 1)
vmat <- cbind(rsq, rsqad, pastr)
colnames(vmat) <- c("R sq.", "adj. R sq.", "p*")
for(i in 1:nrep) {
vmat[i,1] <- summary(DGP_TTS(...))$r.squared
vmat[i,2] <- summary(DGP_TTS(...))$adj.r.squared
vmat[i,3] <- length(DGP_TTS(...)$coefficients)-1
}
return(c(mean(vmat[,1]), mean(vmat[,2]), round(mean(vmat[,3]))))
}
SIM_TTS <- function(...)
{
prs <- expand.grid(pdim = pdim, nobs = nobs, model = model)
nprs <- nrow(prs)
pow <- matrix(rep(NA, 3 * nprs), ncol = 3)
for(i in 1:nprs) pow[i,] <- PG_TTS(pdim = prs[i,1],
nobs = prs[i,2], model = as.character(prs[i,3]), ...)
rval <- rbind(prs, prs, prs)
rval$stat <- factor(rep(1:3, c(nprs, nprs, nprs)),
labels = c("R sq.", "adj. R sq.", "p*"))
rval$power <- c(pow[,1], pow[,2], pow[,3])
rval$nobs <- factor(rval$nobs)
return(rval)
}
psim_TTS <- SIM_TTS()
tab_TTS <- xtabs(power ~ pdim + stat + model + nobs, data = psim_TTS)
ftable(tab_TTS, row.vars = c("model", "nobs", "stat"), col.vars = "pdim")}
FO_TTS <- Sim_TTS()
FO_TTS
}
Preceeded by:
pdims <- seq(12, 100, 4)
coefLC12 <- c(0, rep(0.2, 4), rep(0.1, 4), rep(0, 4))/1.3
rtL <- c(0.2, rep(0, 3))/1.3
coefLC100 <- c(coefLC12, rep(rtL, 22))
coefHC12 <- c(0, rep(0.8, 4), rep(0.4, 4), rep(0, 4))/1.1
rtH <- c(0.8, rep(0, 3))/1.1
coefHC100 <- c(coefHC12, rep(rtH, 22))
coef100 <- cbind(coefLC100, coefHC100)
I’m aware that model selection via the significance of individual predictors is not recommended, but that is the whole point – it is meant to be compared to more sophisticated methods.

Bootstrapping of multiple values using boot::boot()

I try to estimate confidence intervals for several parameters of a nonlinear model using bootstrapping. Right now, I do bootstrapping for for each parameter individually. Therefore I have to gererate the model serveral times.
Here is an example:
library(boot)
# generate some data:
x <- rnorm(300, mean = 5, sd = 2)
y <- xvalues^2*rnorm(300, mean = 1.5, sd = 1) + rnorm(300, mean = 3, sd = 1)
data <- data.frame(x = x, y = y)
# this is my model: nls(y ~ b1*x^2+b2, data = data, start = list(b1 = 1.5,b2 = 3))
# functions for bootstrapping:
getParamB1 <- function(x1, idx){
data <- x1 %>%
dplyr::slice(idx)
model <- nls(y ~ b1*x^2+b2, data = data, start = list(b1 = 1.5,b2 = 3))
coef(model)[['b1']]
}
getParamB2 <- function(x1, idx){
data <- x1 %>%
dplyr::slice(idx)
model <- nls(y ~ b1*x^2+b2, data = data, start = list(b1 = 1.5,b2 = 3))
coef(model)[['b2']]
}
# Calculate bootstrap confidence intervals
btrpB1 <- boot(data, statistic = getParamB1, R=200)
btrpB2 <- boot(data, statistic = getParamB2, R=200)
ciB1 <- boot.ci(btrpB1)
ciB2 <- boot.ci(btrpB2)
This is of course not very nice code. Is there a way to estiamte confidence intervals for several parameters (here b1 and b2) at once?
How about this?
library(boot)
# generate some data:
x <- rnorm(300, mean = 5, sd = 2)
y <- x^2 * rnorm(300, mean = 1.5, sd = 1) + rnorm(300, mean = 3, sd = 1)
df <- data.frame(x = x, y = y)
m1 <- nls(y ~ b1 * x^2 + b2, data = df, start = list(b1 = 1.5, b2 = 3))
boot.coef <- function(mod, data, indices) {
assign(deparse(mod$data), data[indices, ])
m <- eval(mod$call)
return(coef(m))
}
results <- boot(data = df, statistic = boot.coef,
R = 1000, mod = m1)

loop with certain values is not working

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}
I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

Resources