loop with certain values is not working - r

I just need help for the first loop! I would like to run the loop for each certain value of m (see first line in code) but its running only for 1:10? The outcome shoud be stored in the last rows msediff1 to msediff100! Also i need the graphics for each value of m!Thanks in advance!
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
n <- 150
x1 <- rnorm(n = n, mean = 10, sd = 4)
R <- 100 # Number of reps
results.true <- matrix(NA , ncol = 2, nrow = R)
colnames(results.true) <- c("beta0.hat", "beta1.hat")
results.diff <- matrix(NA, ncol = 2, nrow = R)
colnames(results.diff) <- c("beta0.hat", "betadiff.hat")
sigma <- 1.2
beta <- c(1.2)
X <- cbind(x1)
if (m==1){d0 <- .7071; d <- c(-.7071)}
if (m==2){d0 = .8090; d = c(-.5,-.309)}
if (m==3){d0 = .8582; d = c(-.3832,-.2809,-.1942) }
if (m==4){d0 = .8873; d = c(-.3090,-.2464,-.1901,-.1409)}
if (m==5){d0 <- .9064; d <- c(-.2600,-.2167,-.1774,-.1420,-.1103)}
if (m==6){d0 = .92; d = c(-.2238,-.1925,-.1635,-.1369,-.1126,-.0906)}
if (m==7){d0 = .9302; d = c(-.1965,-.1728,-.1506,-.1299,-.1107,-.093,-.0768)}
if (m==8){d0 = .9380; d = c(-.1751,-.1565,-.1389,-.1224,-.1069,-.0925,-.0791,-.0666)}
if (m==9){d0 = .9443; d = c(-.1578,-.1429,-.1287,-.1152,-.1025,-.0905,-.0792,-.0687,-.0538)}
if (m==10){d0 <- .9494;
d <- c(-.1437, -.1314, -.1197, -.1085, -.0978, -.0877, -.0782, -.0691, -.0606, -.0527)}
if (m==25){d0 <- 0.97873;
d <- c(-0.06128, -0.05915, -0.05705, -0.05500, -0.05298, -0.05100, -0.04906, -0.04715, -0.04528, -0.04345, -0.04166, -0.03990, -0.03818, -0.03650, -0.03486, -0.03325, -0.03168, -0.03015, -0.02865, -0.02719,
-0.02577, -0.02438, -0.02303, -0.02171, -0.02043) }
if (m==50) {d0 <- 0.98918;
d <- c(-0.03132, -0.03077, -0.03023, -0.02969, -0.02916, -0.02863, -0.02811, -0.02759, -0.02708, -0.02657, -0.02606, -0.02556, -0.02507, -0.02458, -0.02409, -0.02361, -0.02314, -0.02266, -0.02220, -0.02174, -0.02128, -0.02083, -0.02038, -0.01994, -0.01950, -0.01907, -0.01864, -0.01822, -0.01780, -0.01739,-0.01698,-0.01658,-0.01618,-0.01578,-0.01539,-0.01501,-0.01463,-0.01425,-0.01388,-0.01352,
-0.01316,-0.01280,-0.01245,-0.01210,-0.01176,-0.01142,-0.01108,-0.01075,-0.01043,-0.01011) }
if (m==100) { d0 <- 0.99454083;
d <- c(-0.01583636,-0.01569757,-0.01555936,-0.01542178,-0.01528478,-0.01514841,-0.01501262,-0.01487745,-0.01474289,-0.01460892,
-0.01447556,-0.01434282,-0.01421067,-0.01407914,-0.01394819,-0.01381786,-0.01368816,-0.01355903,-0.01343053,-0.01330264,
-0.01317535,-0.01304868,-0.01292260,-0.01279714,-0.01267228,-0.01254803,-0.01242439,-0.01230136,-0.01217894,-0.01205713,
-0.01193592,-0.01181533,-0.01169534,-0.01157596,-0.01145719,-0.01133903,-0.01122148,-0.01110453,-0.01098819,-0.01087247,
-0.01075735,-0.01064283,-0.01052892,-0.01041563,-0.01030293,-0.01019085,-0.01007937,-0.00996850,-0.00985823,-0.00974857,
-0.00963952,-0.00953107,-0.00942322,-0.00931598,-0.00920935,-0.00910332,-0.00899789,-0.00889306,-0.00878884,-0.00868522,
-0.00858220,-0.00847978,-0.00837797,-0.00827675,-0.00817614,-0.00807612,-0.00797670,-0.00787788,-0.00777966,-0.00768203,
-0.00758500,-0.00748857,-0.00739273,-0.00729749,-0.00720284,-0.00710878,-0.00701532,-0.00692245,-0.00683017,-0.00673848,
-0.00664738,-0.00655687,-0.00646694,-0.00637761,-0.00628886,-0.00620070,-0.00611312,-0.00602612,-0.00593971,-0.00585389,
-0.00576864,-0.00568397,-0.00559989,-0.00551638,-0.00543345,-0.00535110,-0.00526933,-0.00518813,-0.00510750,-0.00502745) }
for(r in 1:R){
u <- rnorm(n = n, mean = 0, sd = sigma)
y <- X%*%beta + u
yy = d0* y[(m+1):n]; Xd <- d0* x1[(m+1):n];
for (i in 1:m) { yy <- yy + d[i]* y[(m+1-i):(n-i) ]
Xd = Xd + d[i]* x1[(m+1-i):(n-i)] }
reg.true <- lm(y ~ x1)
reg.diff <- lm(yy ~ Xd)
results.true[r, ] <- coef(reg.true)
results.diff[r, ] <- coef(reg.diff)
}
results.true
results.diff
beta
apply(results.true, MARGIN = 2, FUN = mean)
apply(results.diff, MARGIN = 2, FUN = mean)
co <- 2
dens.true <- density(results.true[, co])
dens.diff <- density(results.diff[, co])
win.graph()
plot(dens.true,
xlim = range(c(results.true[, co], results.diff[, co])),
ylim = range(c(dens.true$y, dens.diff$yy)),
main = "beta estimation true vs. diff", lwd = 2,)
lines(density(results.diff[, co]), col = "red", lwd = 2)
abline(v = beta, col = "blue", lwd = 2)
legend(x=1.24,y=12,c("outcome true","outcome diff"),lty=c(1,1),col =c("black","red") )
legend(x=1.12,y=12,c("m=",m))
#Mean Squared Error
mse=mean(reg.true$residuals^2)
if (m==1) {msediff1=mean(reg.diff$residuals^2)}
if (m==2) {msediff2=mean(reg.diff$residuals^2)}
if (m==3) {msediff3=mean(reg.diff$residuals^2)}
if (m==4) {msediff4=mean(reg.diff$residuals^2)}
if (m==5) {msediff5=mean(reg.diff$residuals^2)}
if (m==6) {msediff6=mean(reg.diff$residuals^2)}
if (m==7) {msediff7=mean(reg.diff$residuals^2)}
if (m==8) {msediff8=mean(reg.diff$residuals^2)}
if (m==9) {msediff9=mean(reg.diff$residuals^2)}
if (m==10) {msediff10=mean(reg.diff$residuals^2)}
if (m==25) {msediff25=mean(reg.diff$residuals^2)}
if (m==50) {msediff50=mean(reg.diff$residuals^2)}
if (m==100) {msediff100=mean(reg.diff$residuals^2)}
}

I can see an error in the code.
m = c(1,2,3,4,5,6,7,8,9,10,25,50,100)
for (m in 1:length(unique(m))){
As soon as the loop starts, m is changed. It's not what's in the first line anymore...
Try, for (ind in 1:length(unique(m))){ if that's not the intention.

Related

Residual plots of a regression model in ggplot2

I'm trying to transform two residual plots performed below into ggplot2.
As a description, in order to perform these graphs, it is necessary to previously define some functions associated with the specifics of the class of the adopted model, which I am providing below.
The model is in the fit argument whose data is from the nlme library, and the graphs are plotted at the end of the code using the qqPlot2 function.
rm(list = ls()); cat('\014')
library(ggplot2)
library(dplyr)
library(plotly)
library(nlme)
library(lme4)
library(MASS)
library(tidyverse)
library(splines)
library(gamlss)
library(gridExtra)
library(hnp)
library(car)
extract.lmeDesign2 <- function(m){
start.level = 1
data <- getData(m)
grps <- nlme::getGroups(m)
n <- length(grps)
X <- list()
grp.dims <- m$dims$ncol
Zt <- model.matrix(m$modelStruct$reStruct, data)
cov <- as.matrix(m$modelStruct$reStruct)
i.col <- 1
n.levels <- length(m$groups)
Z <- matrix(0, n, 0)
if (start.level <= n.levels) {
for (i in 1:(n.levels - start.level + 1)) {
if (length(levels(m$groups[[n.levels - i + 1]])) != 1)
{
X[[1]] <- model.matrix(~m$groups[[n.levels - i +
1]] - 1,
contrasts.arg = c("contr.treatment",
"contr.treatment"))
}
else X[[1]] <- matrix(1, n, 1)
X[[2]] <- as.matrix(Zt[, i.col:(i.col + grp.dims[i] -
1)])
i.col <- i.col + grp.dims[i]
Z <- cbind(mgcv::tensor.prod.model.matrix(X),Z)
}
Vr <- matrix(0, ncol(Z), ncol(Z))
start <- 1
for (i in 1:(n.levels - start.level + 1)) {
k <- n.levels - i + 1
for (j in 1:m$dims$ngrps[i]) {
stop <- start + ncol(cov[[k]]) - 1
Vr[ncol(Z) + 1 - (stop:start),ncol(Z) + 1 - (stop:start)] <- cov[[k]]
start <- stop + 1
}
}
}
X <- if (class(m$call$fixed) == "name" && !is.null(m$data$X)) {
m$data$X
} else {
model.matrix(formula(eval(m$call$fixed)),data)
}
y <- as.vector(matrix(m$residuals, ncol = NCOL(m$residuals))[,NCOL(m$residuals)] +
matrix(m$fitted, ncol = NCOL(m$fitted))[,NCOL(m$fitted)])
return(list(
Vr = Vr,
X = X,
Z = Z,
sigmasq = m$sigma ^ 2,
lambda = unique(diag(Vr)),
y = y,
k = n.levels
)
)
}
fit = lme(distance ~ age, method="REML",data = Orthodont)
data.fit <- extract.lmeDesign2(fit)
data <- getData(fit)
y <- data.fit$y
X <- data.fit$X
N <- length(y)
id <- sort(as.numeric(getGroups(fit, level = 1)), index.return = TRUE)$x
n <- length(as.numeric(names(table(id))))
vecni <- (table(id))
p <- ncol(X)
n.levels <- length(fit$groups)
start.level <- 1
Cgrps <- nlme::getGroups(fit, level = start.level)
CCind <- levels((Cgrps))
sigma2 <- fit$sigma^2
obs <- numeric()
for (i in 1:n)
{
obs <- append(obs,1:vecni[i])
}
if (n.levels > 1) {
lZi <- list()
lgi <- list()
numrow <- numeric()
mgroups <- fit$groups
for (n in 1:length(CCind)) {
dgi <- data.frame(as.matrix(mgroups[mgroups == CCind[n], ]))
nrowzi <- dim(dgi)[1]
ncolzi <- 0
girep <- as.numeric(length(levels(dgi[,1])))
for (k in 2:n.levels) {
girep <- c(girep,as.numeric(length(levels(dgi[,k]))))
}
for (k in 1:n.levels) {
ncolzi <- ncolzi + as.numeric(length(levels(dgi[,k])))
}
auxi <- as.vector(table(dgi[,1]))
for (i in 2:n.levels) {
auxi <- c(auxi,as.vector(table(dgi[,i])))
}
l <- 1
Zi <- matrix(0,nrowzi,ncolzi)
for (j in 1:ncolzi) {
Zi[l:(l + auxi[j] - 1),j] <- rep(1,auxi[j])
l <- l + auxi[j]
if (l == (nrowzi + 1)) l <- 1
}
lZi[[n]] <- Zi
numrow[n] <- dim(Zi)[1]
comp.var <- as.matrix(fit1$modelStruct$reStruct)
auxg <- rep(as.numeric(comp.var[1])*sigma2,girep[1])
for (i in 2:length(girep)) {
auxg <- c(auxg,rep(as.numeric(comp.var[i])*sigma2,girep[i]))
}
lgi[[n]] <- diag(auxg)
}
q <- dim(lgi[[1]])[1]
for (h in 2:length(CCind)) {
q <- c(q,dim(lgi[[h]])[1])
}
Z <- lZi[[1]]
for (k in 2:length(CCind)) {
Z <- bdiag(Z,(lZi[[k]]))
}
Z <- as.matrix(Z)
nrowZi <- lZi[[1]]
for (h in 2:length(CCind)) {
nrowZi <- c(nrowZi,dim(lZi[[h]])[1])
}
Gam <- lgi[[1]]
for (k in 2:length(CCind)) {
Gam <- bdiag(Gam,(lgi[[k]]))
}
Gam <- as.matrix(Gam)
}else{
mataux <- model.matrix(fit$modelStruct$reStruct,data)
mataux <- as.data.frame(cbind(mataux,id))
lZi <- list()
lgi <- list()
for (i in (as.numeric(unique(id)))) {
lZi[[i]] <- as.matrix((subset(split(mataux,id == i,
drop = T)$`TRUE`,select = -id)))
lgi[[i]] <- getVarCov(fit,type = "random.effects")
}
Z <- as.matrix(bdiag(lZi))
g <- getVarCov(fit,type = "random.effects")
q <- dim(g)[1]
Gam <- as.matrix(kronecker(diag(length(as.numeric(unique(id)))),g))
}
if (n.levels > 1) {
if (!inherits(fit, "lme"))
stop("object does not appear to be of class lme")
grps <- nlme::getGroups(fit)
n <- length(grps)
n.levels <- length(fit$groups)
if (is.null(fit$modelStruct$corStruct))
n.corlevels <- 0
else n.corlevels <- length(all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct)))
if (n.levels < n.corlevels) {
getGroupsFormula(fit$modelStruct$corStruct)
vnames <- all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct))
lab <- paste(eval(parse(text = vnames[1]), envir = fit$data))
if (length(vnames) > 1)
for (i in 2:length(vnames)) {
lab <- paste(lab, "/", eval(parse(text = vnames[i]),
envir = fit$data), sep = "")
}
grps <- factor(lab)
}
if (n.levels >= start.level || n.corlevels >= start.level) {
if (n.levels >= start.level)
Cgrps <- nlme::getGroups(fit, level = start.level)
else Cgrps <- grps
Cind <- sort(as.numeric(Cgrps), index.return = TRUE)$ix
rCind <- 1:n
rCind[Cind] <- 1:n
Clevel <- levels(Cgrps)
n.cg <- length(Clevel)
size.cg <- array(0, n.cg)
for (i in 1:n.cg) size.cg[i] <- sum(Cgrps == Clevel[i])
}
else {
n.cg <- 1
Cind <- 1:n
}
if (is.null(fit$modelStruct$varStruct))
w <- rep(fit$sigma, n)
else {
w <- 1/nlme::varWeights(fit$modelStruct$varStruct)
group.name <- names(fit$groups)
order.txt <- paste("ind<-order(data[[\"", group.name[1],
"\"]]", sep = "")
if (length(fit$groups) > 1)
for (i in 2:length(fit$groups)) order.txt <- paste(order.txt,
",data[[\"", group.name[i], "\"]]", sep = "")
order.txt <- paste(order.txt, ")")
eval(parse(text = order.txt))
w[ind] <- w
w <- w * fit$sigma
}
w <- w[Cind]
if (is.null(fit$modelStruct$corStruct))
lR <- array(1, n)
else {
c.m <- nlme::corMatrix(fit$modelStruct$corStruct)
if (!is.list(c.m)) {
lR <- c.m
lR <- lR[Cind, ]
lR <- lR[, Cind]
}
else {
lR <- list()
ind <- list()
for (i in 1:n.cg) {
lR[[i]] <- matrix(0, size.cg[i], size.cg[i])
ind[[i]] <- 1:size.cg[i]
}
Roff <- cumsum(c(1, size.cg))
gr.name <- names(c.m)
n.g <- length(c.m)
j0 <- rep(1, n.cg)
ii <- 1:n
for (i in 1:n.g) {
Clev <- unique(Cgrps[grps == gr.name[i]])
if (length(Clev) > 1)
stop("inner groupings not nested in outer!!")
k <- (1:n.cg)[Clevel == Clev]
j1 <- j0[k] + nrow(c.m[[i]]) - 1
lR[[k]][j0[k]:j1, j0[k]:j1] <- c.m[[i]]
ind1 <- ii[grps == gr.name[i]]
ind2 <- rCind[ind1]
ind[[k]][j0[k]:j1] <- ind2 - Roff[k] + 1
j0[k] <- j1 + 1
}
for (k in 1:n.cg) {
lR[[k]][ind[[k]], ] <- lR[[k]]
lR[[k]][, ind[[k]]] <- lR[[k]]
}
}
}
if (is.list(lR)) {
for (i in 1:n.cg) {
wi <- w[Roff[i]:(Roff[i] + size.cg[i] - 1)]
lR[[i]] <- as.vector(wi) * t(as.vector(wi) * lR[[i]])
}
}
else if (is.matrix(lR)) {
lR <- as.vector(w) * t(as.vector(w) * lR)
}
else {
lR <- w^2 * lR
}
if (is.list(lR)) {
R <- lR[[1]]
for (k in 2:n.cg) {
R <- bdiag(R,lR[[k]])
}
R <- as.matrix(R)
}
else{
R <- diag(lR)
}
}else{
R <- getVarCov(fit,type = "conditional",individual = 1)[[1]]
for (i in 2:length(as.numeric(unique(id)))) {
R <- as.matrix(bdiag(R,getVarCov(fit,
type = "conditional",individual = i)[[1]] ) )
}
}
sqrt.matrix <- function(mat) {
mat <- as.matrix(mat)
singular_dec <- svd(mat,LINPACK = F)
U <- singular_dec$u
V <- singular_dec$v
D <- diag(singular_dec$d)
sqrtmatrix <- U %*% sqrt(D) %*% t(V)
}
V <- (Z %*% Gam %*% t(Z)) + R
iV <- solve(V)
varbeta <- solve((t(X) %*% iV %*% X))
Q <- (iV - iV %*% X %*% (varbeta) %*% t(X) %*% iV )
zq <- t(Z) %*% Q
norm.frob.ZtQ <- sum(diag(zq %*% t(zq)))
eblue <- as.vector(fixef(fit))
eblup <- Gam %*% t(Z) %*% iV %*% (y - X %*% eblue)
predm <- X %*% eblue
predi <- X %*% eblue + Z %*% eblup
resm <- (y - predm)
resc <- (y - predi)
var.resm <- V - X %*% solve(t(X) %*% iV %*% X) %*% t(X)
var.resc <- R %*% Q %*% R
ident <- diag(N)
auxnum <- (R %*% Q %*% Z %*% Gam %*% t(Z) %*% Q %*% R)
auxden <- R %*% Q %*% R
CF <- diag(auxnum)/diag(auxden)
rescp <- resc/sqrt(diag(var.resc))
R.half <- sqrt.matrix(R)
auxqn <- eigen((R.half %*% Q %*% R.half), symmetric = T, only.values = FALSE)
lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(sqrt.matrix(R[1:N,1:N]))
var.resmcp <- lt %*% var.resc[1:N,1:N] %*% t(lt)
resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
if (n.levels > 1) {
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,length(CCind),1)
gbi <- aux[1:(q[1]),(1:q[1])]
eblupi <- eblup[1:(q[1]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[1] <- dmi
for (j in 2:length(CCind)) {
gbi <- aux[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)])]
eblupi <- eblup[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}else{
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,n,1)
for (j in 1:length(CCind))
{
if (q == 1)
{
gbi <- aux[j,j]
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
else
{
gbi <- aux[(q*j - qm):(q*j),(q*j - qm):(q*j)]
cat(gbi,'\n','\t')
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}
}
qqPlot2 <- function(x, distribution="norm", ..., ylab=deparse(substitute(x)),
xlab=paste(distribution, "quantiles"), main = NULL,
las = par("las"),
envelope = .95,
col = palette()[1],
col.lines = palette()[2], lwd = 2, pch = 1, cex = par("cex"),
cex.lab = par("cex.lab"), cex.axis = par("cex.axis"),
line = c("quartiles", "robust", "none"),
labels = if (!is.null(names(x))) names(x) else seq(along = x),
id.method = "y",
id.n = if (id.method[1] == "identify") Inf else 0,
id.cex = 1, id.col=palette()[1], grid = TRUE)
{
line <- match.arg(line)
good <- !is.na(x)
ord <- order(x[good])
ord.x <- x[good][ord]
ord.lab <- labels[good][ord]
q.function <- eval(parse(text = paste("q", distribution, sep = "")))
d.function <- eval(parse(text = paste("d", distribution, sep = "")))
n <- length(ord.x)
P <- ppoints(n)
z <- q.function(P, ...)
plot(z, ord.x, type = "n", xlab = xlab,
ylab = ylab, main = main,
las = las,cex.lab = cex.lab, cex.axis = cex.axis)
if (grid) {
grid(lty = 1, equilogs = FALSE)
box()}
points(z, ord.x, col = col, pch = pch, cex = cex)
if (line == "quartiles" || line == "none") {
Q.x <- quantile(ord.x, c(.25,.75))
Q.z <- q.function(c(.25,.75), ...)
b <- (Q.x[2] - Q.x[1])/(Q.z[2] - Q.z[1])
a <- Q.x[1] - b*Q.z[1]
abline(a, b, col = col.lines, lwd = lwd)
}
if (line == "robust") {
coef <- coef(rlm(ord.x ~ z))
a <- coef[1]
b <- coef[2]
abline(a, b)
}
conf <- if (envelope == FALSE) .95 else envelope
zz <- qnorm(1 - (1 - conf)/2)
SE <- (b/d.function(z, ...))*sqrt(P*(1 - P)/n)
fit.value <- a + b*z
upper <- fit.value + zz*SE
lower <- fit.value - zz*SE
if (envelope != FALSE) {
lines(z, upper, lty = 2, lwd = lwd, col = col.lines)
lines(z, lower, lty = 2, lwd = lwd, col = col.lines)
}
}
x11()
qqPlot2(resmcp, ylab = "Resíduos",
xlab = "Quantil N(0,1)", pch = 20)
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
My attempt to reproduce them in ggplot2 was as follows:
P1 = qqPlot2(resmcp, ylab = "Resíduos",
xlab = "Quantil N(0,1)", pch = 20)
PP1 = ggplot(data = P1, aes(resmcp)) +
geom_point(aes(y = resmcp), show.legend = FALSE)
P2 = qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
PP2 = ggplot(data = P2, aes(dm)) +
geom_point(aes(y = dm), show.legend = FALSE)
x11()
gridExtra::grid.arrange(PP1,PP2, ncol = 2)
However, something is happening, as I have gotten the following result:
See my attempt below for the quantile mahalanobis distance graph vs. chi-square quantiles:
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
cdf <- gVals(dm, "chisq", .95) # dm is defined in the previous code above
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
Note that the x axis should go from 0 to 8, and the y axis should go from 0 to 14. Also, the shape of the simulation envelope is not similar. I am not able to fix this problem.
Update
Instead of having the code for the option quartile commented out, I have commented out the code for the option robust in the function. Additionally, instead of returning a data frame, it returns a list. FYI, you only need the MASS package if you use the robust option (for the function rlm).
This function is based on the code used in qqPlot2 in your question. However, it doesn't return a plot; it returns data.
library(car)
library(MASS)
library(tidyverse)
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
Here is a comparison with some arbitrary data.
data(mtcars)
qqPlot2(mtcars$mpg)
qqPlot2(mtcars$mpg, dist = "chisq", df = 31)
ndf <- gVals(mtcars$mpg, "norm", .95)
ggplot(ndf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = ndf$a[[1]], slope = ndf$b[[1]]) +
annotate("line", x = ndf$dff$z, y = ndf$dff$uc, color = "red", lty = 2) +
annotate("line", x = ndf$dff$z, y = ndf$dff$dc, color = "red", lty = 2)
cdf <- gVals(mtcars$mpg, "chisq", .95)
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
I managed to solve it through the library qqplotr.
library(qqplotr)
dist <- "chisq"
dpar <- list(df = q)
QT <- data.frame(QUANTIS = dm); ggplot(QT, aes(sample = QUANTIS)) +
stat_qq_band(distribution = dist, dparams = dpar) +
stat_qq_point(distribution = dist, dparams = dpar) +
stat_qq_line(distribution = dist, dparams = dpar, color = "blue");
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")

from clustering to portfolio selection in R

I need help.
I need to make a portfolio selection (Markowitz) and my script is the following:
The following is the matrix of stock returns:
a <- c(0.00444, -0.00553, -0.01007, -0.00012, 0.04133, -0.02472, -0.01771,
-0.00994, -0.06933, 0.00476)
b <- c(-0.01926, 0.06008, 0.02839, 0.00930, 0.02072, 0.02072, 0.03670, -0.02049,
-0.01644, 0.02375)
c <- c(-0.00719, 0.02296, -0.06438, 0.008805, -0.00603, -0.00663, -0.01160,
-0.00298, 0.00713, 0.00790)
d <- c(-0.01753, 0.00809, 0.02301, -0.00631, -0.026114,0.03157, -0.02488,
-0.01013, -0.03231, -0.00763)
e <- c(-0.02153, 0.00863, -0.02929, -0.01424, -0.01940, -0.02313, -0.04146,
-0.02610, 0.00050, -0.04700)
stocks <- cbind(a,b,c,d,e)
And the following is the market index:
rmkt <- c(-0.01159, -0.02787, -0.05936, -0.09417, -0.03027,-0.03161, -0.03166,
-0.04092, -0.02841, -0.009781)
for example the time is 10 so:
time <- 1:10
I used the following code for clustering:
Nsim = 10
opt_num_centers = rep(0, Nsim)
est_centers_coefs = est_centers_fits = rep(list(0), Nsim)
for(kk in 1:Nsim){
rmkt_list <- list()
for(i in 1:5){
rmkt_list[[i]] = rmkt
}
stock_list_mat = do.call("cbind", stocks_list)
class(stock_list_mat)
stock_mat <- stock_list_mat[, c(1:societies_stocks)]
class(stock_mat)
nseg = 30
B = basis_array(time, min(time), max(time), ndt = nseg,
deg = 3, max_derivs = 0, sparse = FALSE) [[1]]
M <- ncol(B)
P <- crossprod(diff(diag(ncol(B)), diff = 3))
dim(stock_mat)
class(stock_mat)
est_coef = matrix(0, nrow = M, ncol(stock_mat))
est_bet = matrix(0, nrow = nrow(stock_mat), ncol(stock_mat))
dim(est_coef)
dim(est_bet)
smooth_par = NULL
for(i in 1:ncol(stock_mat)){
bb = expectreg.ls(stock_mat[ , i] ~ rb(time,"special", by = rmkt, B = B, P = P),
smooth = "schall", expectiles = 0.5)
smooth_par = c(smooth_par, bb$lambda$time)
est_coef[, i] = bb$coefficients$time[, 1]
est_bet[, i] = B %*% bb$coefficients$time[, 1]
}
sc_est_coef = scale(est_coef)
aa = (clusGap(t(sc_est_coef), kmeans, 10, B = 100, nstart = 50, iter.max = 50))
n_clust = maxSE(aa$Tab[, "gap"], aa$Tab[, "SE.sim"] ,"Tibs2001SEmax")
opt_num_centers[kk] = n_clust
km = kmeans(t(sc_est_coef), n_clust, nstart = 100, iter.max = 100)
est_centers_coefs[[kk]] = t(km$centers)
est_centers_fits[[kk]] = B %*% est_centers_coefs[[kk]]
cat("\t", "simulation =", kk, "\n","\t", "# Clust =", opt_num_centers[kk], "\n")
}
Considering everything how can I proceed to make a portfolio selection?
Thank u all.

Why does rjags give Dimension mismatch taking subset of y error here?

I have written this model but rjags gives dimension mismatch error; What's happening?
Error in jags.model(textConnection(model1), data = jags_data, n.chains = n_chains, :
RUNTIME ERROR:
Compilation error on line 8.
Dimension mismatch taking subset of y
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
As suggested by #user20650 the issue is that you are indexing y as vector and your functions are generating as a matrix. Try this code with a slight change in gen_data():
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
y <- as.vector(y)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
Output:
beta_est
[[1]]
beta[1] beta[2]
-0.006031984 0.692007301
You can also try y <- y[,1,drop=T] in the same function instead of as.vector()

Constraints in constrOptim.nl in r

I am using R package costrOptim.nl.
I need to minimize a function with the following constraints:
Alpha < sqrt(2*omega) and omega > 0
In my code expressed as:
theta[3] < sqrt(2*theta[1]) and theta[1] > 0
I write these conditions as:
Image
But when I call optimizer and run it.
I'm getting the following problem:
1: In sqrt(2 * theta[1]) : NaNs produced
Why? Did I set the proper conditions?
This is my whole code.
data <- read.delim(file = file, header = FALSE)
ind <- seq(from = 1, to = NROW(data), by = 1)
data <- data.frame(ind = ind, Ret = data$V1, Ret2 = data$V1^2)
colnames(data)[1] <- "Ind"
colnames(data)[2] <- "Ret"
colnames(data)[3] <- "Ret2"
T <- length(data$Ret)
m <- arima(x = data$Ret2, order = c(3,0,0), include.mean = TRUE, method = c("ML"))
b_not <- m$coef
omega <- 0.1
alpha <- 0.005
beta <- 0.9
theta <- c(omega,beta,alpha) # "some" value of theta
s0 <- theta[1]/(1-theta[2])
theta[3] < sqrt(2*theta[1]) # check whether the Feller condition is verified
N <- 30000
reps <- 1
rho <- -0.8
n <- 100
heston.II <- function(theta){
set.seed(5)
u <- rnorm(n = N*reps,mean = 0, sd = 1)
u1 <- rnorm(n = N*reps,mean = 0, sd = 1)
u2 <- rho*u + sqrt((1-rho^2))*u1
sigma <- matrix(0, nrow = N*reps, ncol = 1)
ret.int <- matrix(0, nrow = N*reps, ncol = 1)
sigma[1,1] <- s0
for (i in 2:(N*reps)) {
sigma[i,1] <- theta[1] + theta[2]*sigma[i-1,1] + theta[3]*sqrt(sigma[i-1,1])*u1[i]
# if(sigma[i,1] < 0.00000001){ sigma[i,1] = s0}
}
for (i in 1:(N*reps)) {
ret.int[i,1] <- sqrt(sigma[i,1])*u2[i]
}
ret <- matrix(0, nrow = N*reps/n, ncol = 1)
ret[1,1] <- sum(ret.int[1:n],1)
for (i in 2:((N*reps)/n)) {
ret[i,] <- sum(ret.int[(n*i):(n*(i+1))])
ret[((N*reps)/n),] <- sum(ret.int[(n*(i-1)):(n*i)])
}
ret2 <- ret^2
model <- arima(x = ret2, order = c(3,0,0), include.mean = TRUE)
beta_hat <- model$coef
m1 <- beta_hat[1] - b_not[1]
m2 <- beta_hat[2] - b_not[2]
m3 <- beta_hat[3] - b_not[3]
m4 <- beta_hat[4] - b_not[4]
D <- cbind(m1,m2,m3,m4)
DD <- (D)%*%t(D)/1000
DD <- as.numeric(DD)
return(DD)
}
heston.sim <- heston.II(theta)
hin <- function(theta){
h <- rep(NA, 2)
h[1] <- theta[1]
h[2] <- sqrt(2*theta[1]) - theta[3]
return(h)
}
hin(theta = theta)
.opt <- constrOptim.nl(par = theta, fn = heston.II, hin = hin)
.opt

with ggplot in R

I need little help. I try to do plot with ggplot package. When I want to make plot, depends of more than 1 factor (for example here: plot changes when średnia1 and odchylenie1 change):
alpha = 0.05
N = 100
sample_l = 10
srednia1 = seq(-7, 7, by = 1)
odchylenie1 = seq(1, 10, by = 1)
srednia2 = 2
odchylenie2 = 2
prob = 0.7
params = expand.grid(sample_l, srednia1, odchylenie1, srednia2, odchylenie2, prob)
str(params)
names(params) = c("dlugość", "średnia1", "odchylenie1", "średnia2", "odchyelnie2", "prawdopodobienstwo")
set.seed(100)
now <- Sys.time()
powers <- sapply(1:nrow(params), function(p){
l <- params[p, 1]
par_1 <- c(params[p, 2],params[p, 3])
par_2 <- c(params[p, 4], params[p, 5])
p <- params[p,6]
p_sim <-sapply(rep(l, N), function(x){
my_sample <- rmix(l,"norm", par_1, "norm", par_2, p)
shapiro.test(my_sample)$p.value
})
mean(p_sim < alpha)
})
Sys.time() - now
power_df <- bind_cols(params, power = powers)
power_df %>% ggplot(aes(x = średnia1,
y = power,
col = factor(odchylenie1))) +
geom_line()
it work perfect, but now, when I want to make plot only depends of 1 factor - prob something goes wrong. I have error : Error: Aesthetics must be either length 1 or the same as the data (150): x, y. Here is a code:
alpha = 0.05
N = 100
sample_l = 10
srednia1 = 2
odchylenie1 = 2
srednia2 = 1
odchylenie2 = 1
prob = seq(0.1,0.9,by=0.1)
set.seed(100)
now <- Sys.time()
powers <- sapply(1:nrow(params), function(p){
l <- params[p, 1]
par_1 <- c(params[p, 2],params[p, 3])
par_2 <- c(params[p, 4], params[p, 5])
p <- params[p,6]
p_sim <-sapply(rep(l, N), function(x){
my_sample <- rmix(l,"norm", par_1, "norm", par_2, p)
shapiro.test(my_sample)$p.value
})
mean(p_sim < alpha)
})
Sys.time() - now
power_df <- bind_cols(params, power = powers)
power_df %>% ggplot(aes(x = prob, y = power)) + geom_line()
PLEASE HELP ME :(

Resources