Related
I'm trying to transform two residual plots performed below into ggplot2.
As a description, in order to perform these graphs, it is necessary to previously define some functions associated with the specifics of the class of the adopted model, which I am providing below.
The model is in the fit argument whose data is from the nlme library, and the graphs are plotted at the end of the code using the qqPlot2 function.
rm(list = ls()); cat('\014')
library(ggplot2)
library(dplyr)
library(plotly)
library(nlme)
library(lme4)
library(MASS)
library(tidyverse)
library(splines)
library(gamlss)
library(gridExtra)
library(hnp)
library(car)
extract.lmeDesign2 <- function(m){
start.level = 1
data <- getData(m)
grps <- nlme::getGroups(m)
n <- length(grps)
X <- list()
grp.dims <- m$dims$ncol
Zt <- model.matrix(m$modelStruct$reStruct, data)
cov <- as.matrix(m$modelStruct$reStruct)
i.col <- 1
n.levels <- length(m$groups)
Z <- matrix(0, n, 0)
if (start.level <= n.levels) {
for (i in 1:(n.levels - start.level + 1)) {
if (length(levels(m$groups[[n.levels - i + 1]])) != 1)
{
X[[1]] <- model.matrix(~m$groups[[n.levels - i +
1]] - 1,
contrasts.arg = c("contr.treatment",
"contr.treatment"))
}
else X[[1]] <- matrix(1, n, 1)
X[[2]] <- as.matrix(Zt[, i.col:(i.col + grp.dims[i] -
1)])
i.col <- i.col + grp.dims[i]
Z <- cbind(mgcv::tensor.prod.model.matrix(X),Z)
}
Vr <- matrix(0, ncol(Z), ncol(Z))
start <- 1
for (i in 1:(n.levels - start.level + 1)) {
k <- n.levels - i + 1
for (j in 1:m$dims$ngrps[i]) {
stop <- start + ncol(cov[[k]]) - 1
Vr[ncol(Z) + 1 - (stop:start),ncol(Z) + 1 - (stop:start)] <- cov[[k]]
start <- stop + 1
}
}
}
X <- if (class(m$call$fixed) == "name" && !is.null(m$data$X)) {
m$data$X
} else {
model.matrix(formula(eval(m$call$fixed)),data)
}
y <- as.vector(matrix(m$residuals, ncol = NCOL(m$residuals))[,NCOL(m$residuals)] +
matrix(m$fitted, ncol = NCOL(m$fitted))[,NCOL(m$fitted)])
return(list(
Vr = Vr,
X = X,
Z = Z,
sigmasq = m$sigma ^ 2,
lambda = unique(diag(Vr)),
y = y,
k = n.levels
)
)
}
fit = lme(distance ~ age, method="REML",data = Orthodont)
data.fit <- extract.lmeDesign2(fit)
data <- getData(fit)
y <- data.fit$y
X <- data.fit$X
N <- length(y)
id <- sort(as.numeric(getGroups(fit, level = 1)), index.return = TRUE)$x
n <- length(as.numeric(names(table(id))))
vecni <- (table(id))
p <- ncol(X)
n.levels <- length(fit$groups)
start.level <- 1
Cgrps <- nlme::getGroups(fit, level = start.level)
CCind <- levels((Cgrps))
sigma2 <- fit$sigma^2
obs <- numeric()
for (i in 1:n)
{
obs <- append(obs,1:vecni[i])
}
if (n.levels > 1) {
lZi <- list()
lgi <- list()
numrow <- numeric()
mgroups <- fit$groups
for (n in 1:length(CCind)) {
dgi <- data.frame(as.matrix(mgroups[mgroups == CCind[n], ]))
nrowzi <- dim(dgi)[1]
ncolzi <- 0
girep <- as.numeric(length(levels(dgi[,1])))
for (k in 2:n.levels) {
girep <- c(girep,as.numeric(length(levels(dgi[,k]))))
}
for (k in 1:n.levels) {
ncolzi <- ncolzi + as.numeric(length(levels(dgi[,k])))
}
auxi <- as.vector(table(dgi[,1]))
for (i in 2:n.levels) {
auxi <- c(auxi,as.vector(table(dgi[,i])))
}
l <- 1
Zi <- matrix(0,nrowzi,ncolzi)
for (j in 1:ncolzi) {
Zi[l:(l + auxi[j] - 1),j] <- rep(1,auxi[j])
l <- l + auxi[j]
if (l == (nrowzi + 1)) l <- 1
}
lZi[[n]] <- Zi
numrow[n] <- dim(Zi)[1]
comp.var <- as.matrix(fit1$modelStruct$reStruct)
auxg <- rep(as.numeric(comp.var[1])*sigma2,girep[1])
for (i in 2:length(girep)) {
auxg <- c(auxg,rep(as.numeric(comp.var[i])*sigma2,girep[i]))
}
lgi[[n]] <- diag(auxg)
}
q <- dim(lgi[[1]])[1]
for (h in 2:length(CCind)) {
q <- c(q,dim(lgi[[h]])[1])
}
Z <- lZi[[1]]
for (k in 2:length(CCind)) {
Z <- bdiag(Z,(lZi[[k]]))
}
Z <- as.matrix(Z)
nrowZi <- lZi[[1]]
for (h in 2:length(CCind)) {
nrowZi <- c(nrowZi,dim(lZi[[h]])[1])
}
Gam <- lgi[[1]]
for (k in 2:length(CCind)) {
Gam <- bdiag(Gam,(lgi[[k]]))
}
Gam <- as.matrix(Gam)
}else{
mataux <- model.matrix(fit$modelStruct$reStruct,data)
mataux <- as.data.frame(cbind(mataux,id))
lZi <- list()
lgi <- list()
for (i in (as.numeric(unique(id)))) {
lZi[[i]] <- as.matrix((subset(split(mataux,id == i,
drop = T)$`TRUE`,select = -id)))
lgi[[i]] <- getVarCov(fit,type = "random.effects")
}
Z <- as.matrix(bdiag(lZi))
g <- getVarCov(fit,type = "random.effects")
q <- dim(g)[1]
Gam <- as.matrix(kronecker(diag(length(as.numeric(unique(id)))),g))
}
if (n.levels > 1) {
if (!inherits(fit, "lme"))
stop("object does not appear to be of class lme")
grps <- nlme::getGroups(fit)
n <- length(grps)
n.levels <- length(fit$groups)
if (is.null(fit$modelStruct$corStruct))
n.corlevels <- 0
else n.corlevels <- length(all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct)))
if (n.levels < n.corlevels) {
getGroupsFormula(fit$modelStruct$corStruct)
vnames <- all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct))
lab <- paste(eval(parse(text = vnames[1]), envir = fit$data))
if (length(vnames) > 1)
for (i in 2:length(vnames)) {
lab <- paste(lab, "/", eval(parse(text = vnames[i]),
envir = fit$data), sep = "")
}
grps <- factor(lab)
}
if (n.levels >= start.level || n.corlevels >= start.level) {
if (n.levels >= start.level)
Cgrps <- nlme::getGroups(fit, level = start.level)
else Cgrps <- grps
Cind <- sort(as.numeric(Cgrps), index.return = TRUE)$ix
rCind <- 1:n
rCind[Cind] <- 1:n
Clevel <- levels(Cgrps)
n.cg <- length(Clevel)
size.cg <- array(0, n.cg)
for (i in 1:n.cg) size.cg[i] <- sum(Cgrps == Clevel[i])
}
else {
n.cg <- 1
Cind <- 1:n
}
if (is.null(fit$modelStruct$varStruct))
w <- rep(fit$sigma, n)
else {
w <- 1/nlme::varWeights(fit$modelStruct$varStruct)
group.name <- names(fit$groups)
order.txt <- paste("ind<-order(data[[\"", group.name[1],
"\"]]", sep = "")
if (length(fit$groups) > 1)
for (i in 2:length(fit$groups)) order.txt <- paste(order.txt,
",data[[\"", group.name[i], "\"]]", sep = "")
order.txt <- paste(order.txt, ")")
eval(parse(text = order.txt))
w[ind] <- w
w <- w * fit$sigma
}
w <- w[Cind]
if (is.null(fit$modelStruct$corStruct))
lR <- array(1, n)
else {
c.m <- nlme::corMatrix(fit$modelStruct$corStruct)
if (!is.list(c.m)) {
lR <- c.m
lR <- lR[Cind, ]
lR <- lR[, Cind]
}
else {
lR <- list()
ind <- list()
for (i in 1:n.cg) {
lR[[i]] <- matrix(0, size.cg[i], size.cg[i])
ind[[i]] <- 1:size.cg[i]
}
Roff <- cumsum(c(1, size.cg))
gr.name <- names(c.m)
n.g <- length(c.m)
j0 <- rep(1, n.cg)
ii <- 1:n
for (i in 1:n.g) {
Clev <- unique(Cgrps[grps == gr.name[i]])
if (length(Clev) > 1)
stop("inner groupings not nested in outer!!")
k <- (1:n.cg)[Clevel == Clev]
j1 <- j0[k] + nrow(c.m[[i]]) - 1
lR[[k]][j0[k]:j1, j0[k]:j1] <- c.m[[i]]
ind1 <- ii[grps == gr.name[i]]
ind2 <- rCind[ind1]
ind[[k]][j0[k]:j1] <- ind2 - Roff[k] + 1
j0[k] <- j1 + 1
}
for (k in 1:n.cg) {
lR[[k]][ind[[k]], ] <- lR[[k]]
lR[[k]][, ind[[k]]] <- lR[[k]]
}
}
}
if (is.list(lR)) {
for (i in 1:n.cg) {
wi <- w[Roff[i]:(Roff[i] + size.cg[i] - 1)]
lR[[i]] <- as.vector(wi) * t(as.vector(wi) * lR[[i]])
}
}
else if (is.matrix(lR)) {
lR <- as.vector(w) * t(as.vector(w) * lR)
}
else {
lR <- w^2 * lR
}
if (is.list(lR)) {
R <- lR[[1]]
for (k in 2:n.cg) {
R <- bdiag(R,lR[[k]])
}
R <- as.matrix(R)
}
else{
R <- diag(lR)
}
}else{
R <- getVarCov(fit,type = "conditional",individual = 1)[[1]]
for (i in 2:length(as.numeric(unique(id)))) {
R <- as.matrix(bdiag(R,getVarCov(fit,
type = "conditional",individual = i)[[1]] ) )
}
}
sqrt.matrix <- function(mat) {
mat <- as.matrix(mat)
singular_dec <- svd(mat,LINPACK = F)
U <- singular_dec$u
V <- singular_dec$v
D <- diag(singular_dec$d)
sqrtmatrix <- U %*% sqrt(D) %*% t(V)
}
V <- (Z %*% Gam %*% t(Z)) + R
iV <- solve(V)
varbeta <- solve((t(X) %*% iV %*% X))
Q <- (iV - iV %*% X %*% (varbeta) %*% t(X) %*% iV )
zq <- t(Z) %*% Q
norm.frob.ZtQ <- sum(diag(zq %*% t(zq)))
eblue <- as.vector(fixef(fit))
eblup <- Gam %*% t(Z) %*% iV %*% (y - X %*% eblue)
predm <- X %*% eblue
predi <- X %*% eblue + Z %*% eblup
resm <- (y - predm)
resc <- (y - predi)
var.resm <- V - X %*% solve(t(X) %*% iV %*% X) %*% t(X)
var.resc <- R %*% Q %*% R
ident <- diag(N)
auxnum <- (R %*% Q %*% Z %*% Gam %*% t(Z) %*% Q %*% R)
auxden <- R %*% Q %*% R
CF <- diag(auxnum)/diag(auxden)
rescp <- resc/sqrt(diag(var.resc))
R.half <- sqrt.matrix(R)
auxqn <- eigen((R.half %*% Q %*% R.half), symmetric = T, only.values = FALSE)
lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(sqrt.matrix(R[1:N,1:N]))
var.resmcp <- lt %*% var.resc[1:N,1:N] %*% t(lt)
resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
if (n.levels > 1) {
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,length(CCind),1)
gbi <- aux[1:(q[1]),(1:q[1])]
eblupi <- eblup[1:(q[1]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[1] <- dmi
for (j in 2:length(CCind)) {
gbi <- aux[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)])]
eblupi <- eblup[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}else{
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,n,1)
for (j in 1:length(CCind))
{
if (q == 1)
{
gbi <- aux[j,j]
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
else
{
gbi <- aux[(q*j - qm):(q*j),(q*j - qm):(q*j)]
cat(gbi,'\n','\t')
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}
}
qqPlot2 <- function(x, distribution="norm", ..., ylab=deparse(substitute(x)),
xlab=paste(distribution, "quantiles"), main = NULL,
las = par("las"),
envelope = .95,
col = palette()[1],
col.lines = palette()[2], lwd = 2, pch = 1, cex = par("cex"),
cex.lab = par("cex.lab"), cex.axis = par("cex.axis"),
line = c("quartiles", "robust", "none"),
labels = if (!is.null(names(x))) names(x) else seq(along = x),
id.method = "y",
id.n = if (id.method[1] == "identify") Inf else 0,
id.cex = 1, id.col=palette()[1], grid = TRUE)
{
line <- match.arg(line)
good <- !is.na(x)
ord <- order(x[good])
ord.x <- x[good][ord]
ord.lab <- labels[good][ord]
q.function <- eval(parse(text = paste("q", distribution, sep = "")))
d.function <- eval(parse(text = paste("d", distribution, sep = "")))
n <- length(ord.x)
P <- ppoints(n)
z <- q.function(P, ...)
plot(z, ord.x, type = "n", xlab = xlab,
ylab = ylab, main = main,
las = las,cex.lab = cex.lab, cex.axis = cex.axis)
if (grid) {
grid(lty = 1, equilogs = FALSE)
box()}
points(z, ord.x, col = col, pch = pch, cex = cex)
if (line == "quartiles" || line == "none") {
Q.x <- quantile(ord.x, c(.25,.75))
Q.z <- q.function(c(.25,.75), ...)
b <- (Q.x[2] - Q.x[1])/(Q.z[2] - Q.z[1])
a <- Q.x[1] - b*Q.z[1]
abline(a, b, col = col.lines, lwd = lwd)
}
if (line == "robust") {
coef <- coef(rlm(ord.x ~ z))
a <- coef[1]
b <- coef[2]
abline(a, b)
}
conf <- if (envelope == FALSE) .95 else envelope
zz <- qnorm(1 - (1 - conf)/2)
SE <- (b/d.function(z, ...))*sqrt(P*(1 - P)/n)
fit.value <- a + b*z
upper <- fit.value + zz*SE
lower <- fit.value - zz*SE
if (envelope != FALSE) {
lines(z, upper, lty = 2, lwd = lwd, col = col.lines)
lines(z, lower, lty = 2, lwd = lwd, col = col.lines)
}
}
x11()
qqPlot2(resmcp, ylab = "ResÃduos",
xlab = "Quantil N(0,1)", pch = 20)
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
My attempt to reproduce them in ggplot2 was as follows:
P1 = qqPlot2(resmcp, ylab = "ResÃduos",
xlab = "Quantil N(0,1)", pch = 20)
PP1 = ggplot(data = P1, aes(resmcp)) +
geom_point(aes(y = resmcp), show.legend = FALSE)
P2 = qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
PP2 = ggplot(data = P2, aes(dm)) +
geom_point(aes(y = dm), show.legend = FALSE)
x11()
gridExtra::grid.arrange(PP1,PP2, ncol = 2)
However, something is happening, as I have gotten the following result:
See my attempt below for the quantile mahalanobis distance graph vs. chi-square quantiles:
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
cdf <- gVals(dm, "chisq", .95) # dm is defined in the previous code above
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
Note that the x axis should go from 0 to 8, and the y axis should go from 0 to 14. Also, the shape of the simulation envelope is not similar. I am not able to fix this problem.
Update
Instead of having the code for the option quartile commented out, I have commented out the code for the option robust in the function. Additionally, instead of returning a data frame, it returns a list. FYI, you only need the MASS package if you use the robust option (for the function rlm).
This function is based on the code used in qqPlot2 in your question. However, it doesn't return a plot; it returns data.
library(car)
library(MASS)
library(tidyverse)
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
Here is a comparison with some arbitrary data.
data(mtcars)
qqPlot2(mtcars$mpg)
qqPlot2(mtcars$mpg, dist = "chisq", df = 31)
ndf <- gVals(mtcars$mpg, "norm", .95)
ggplot(ndf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = ndf$a[[1]], slope = ndf$b[[1]]) +
annotate("line", x = ndf$dff$z, y = ndf$dff$uc, color = "red", lty = 2) +
annotate("line", x = ndf$dff$z, y = ndf$dff$dc, color = "red", lty = 2)
cdf <- gVals(mtcars$mpg, "chisq", .95)
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
I managed to solve it through the library qqplotr.
library(qqplotr)
dist <- "chisq"
dpar <- list(df = q)
QT <- data.frame(QUANTIS = dm); ggplot(QT, aes(sample = QUANTIS)) +
stat_qq_band(distribution = dist, dparams = dpar) +
stat_qq_point(distribution = dist, dparams = dpar) +
stat_qq_line(distribution = dist, dparams = dpar, color = "blue");
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
I need your help, I need to combine two vectors(z and Num1 or Num2), so z will 10 in final vector and Num1(Num2) was 90 in final vector.
Code that I have now:
I <- seq(1:100)
NA1<-vector()
NA2<-vector()
z <- rep(NA, 10)
Num1 <- rnorm(100)
Num2 <- rnorm(100)
vect_1 <- sample(c(Num1, z))
vect_2 <- sample(c(Num2, z))
vect_1_NA <- is.na(vect_1)
vect_2_NA <- is.na(vect_2)
for(i in I){
if(vect_1_NA[i] == TRUE)
NA1 <- append(NA1, i)
}
for(i in I){
if(vect_2_NA[i] == TRUE)
NA2 <- append(NA2, i)
}
I'm running a mediation analysis on a dataset in r and can't figure out how to get psych::mediate to work--I've done the same on another dataset before and didn't change anything, but it's not working with this new data for some reason.
I tried:
1. Turning 'condition' into a condition.f factor
2. Explicitly naming DATA a "data.frame"
3. Specifying different parameters such as "z" or "mod" in the function
4. Checked capitalization on all the variable column names.
None of the above seem to work.
library(psych)
DATA = STEX_S1_FINALCLEAN
Mediation_RA = psych::mediate( y = "DV_See", x = "Share_T", m = "Seff", data = DATA)
print(Mediation_RA,short=F)
I'd expect a full output with mediation values, but have gotten:
Error in psych::mediate(y = "DV_See", x = "Share_T", m = "Seff", data = DATA) :
object 'ex' not found
I don't see and object 'ex' anywhere, and that's not a name of any columns in the DATA data frame.
Following the suggestion of #r2evans, you can use the following modified function:
mymediate <- function (y, x, m = NULL, data, mod = NULL, z = NULL, n.obs = NULL,
use = "pairwise", n.iter = 5000, alpha = 0.05, std = FALSE,
plot = TRUE, zero = TRUE, main = "Mediation")
{
cl <- match.call()
if (class(y) == "formula") {
ps <- fparse(y)
y <- ps$y
x <- ps$x
m <- ps$m
mod <- ps$prod
ex <- ps$ex
x <- x[!ps$x %in% ps$m]
z <- ps$z
print(str(ps))
} else {
ex = NULL
}
all.ab <- NULL
if (is.numeric(y))
y <- colnames(data)[y]
if (is.numeric(x))
x <- colnames(data)[x]
if (!is.null(m))
if (is.numeric(m))
m <- colnames(data)[m]
if (!is.null(mod)) {
if (is.numeric(mod)) {
nmod <- length(mod)
mod <- colnames(data)[mod]
}
}
if (is.null(mod)) {
nmod <- 0
}
else {
nmod <- length(mod)
}
var.names <- list(IV = x, DV = y, med = m, mod = mod, z = z,
ex = ex)
if (any(!(unlist(var.names) %in% colnames(data)))) {
stop("Variable names not specified correctly")
}
if (ncol(data) == nrow(data)) {
raw <- FALSE
if (nmod > 0) {
stop("Moderation Analysis requires the raw data")
}
else {
data <- data[c(y, x, m, z), c(y, x, m, z)]
}
}
else {
data <- data[, c(y, x, m, z, ex)]
}
if (nmod == 1) {
mod <- c(x, mod)
nmod <- length(mod)
}
if (!is.matrix(data))
data <- as.matrix(data)
if ((dim(data)[1] != dim(data)[2])) {
n.obs = dim(data)[1]
if (!is.null(mod))
if (zero)
data <- scale(data, scale = FALSE)
C <- cov(data, use = use)
raw <- TRUE
if (std) {
C <- cov2cor(C)
}
}
else {
raw <- FALSE
C <- data
nvar <- ncol(C)
if (is.null(n.obs)) {
n.obs <- 1000
message("The data matrix was a correlation matrix and the number of subjects was not specified. \n n.obs arbitrarily set to 1000")
}
if (!is.null(m)) {
message("The replication data matrices were simulated based upon the specified number of subjects and the observed correlation matrix.")
eX <- eigen(C)
data <- matrix(rnorm(nvar * n.obs), n.obs)
data <- t(eX$vectors %*% diag(sqrt(pmax(eX$values,
0)), nvar) %*% t(data))
colnames(data) <- c(y, x, m)
}
}
if ((nmod > 0) | (!is.null(ex))) {
if (!raw) {
stop("Moderation analysis requires the raw data")
}
else {
if (zero) {
data <- scale(data, scale = FALSE)
}
}
}
if (nmod > 0) {
prods <- matrix(NA, ncol = length(ps$prod), nrow = nrow(data))
colnames(prods) <- paste0("V", 1:length(ps$prod))
for (i in 1:length(ps$prod)) {
prods[, i] <- apply(data[, ps$prod[[i]]], 1, prod)
colnames(prods)[i] <- paste0(ps$prod[[i]], collapse = "*")
}
data <- cbind(data, prods)
x <- c(x, colnames(prods))
}
if (!is.null(ex)) {
quads <- matrix(NA, ncol = length(ex), nrow = nrow(data))
colnames(quads) <- ex
for (i in 1:length(ex)) {
quads[, i] <- data[, ex[i]] * data[, ex[i]]
colnames(quads)[i] <- paste0(ex[i], "^2")
}
data <- cbind(data, quads)
x <- c(x, colnames(quads))
}
if (raw) {
C <- cov(data, use = use)
}
if (std) {
C <- cov2cor(C)
}
xy <- c(x, y)
numx <- length(x)
numy <- length(y)
if (!is.null(m)) {
numm <- length(m)
nxy <- numx + numy
m.matrix <- C[c(x, m), c(x, m), drop = FALSE]
}
else {
numm <- 0
nxy <- numx
}
df <- n.obs - nxy - 1
xy.matrix <- C[c(x, m), y, drop = FALSE]
total.reg <- matReg(x, y, m = m, z = z, C = C, n.obs = n.obs)
direct <- total.reg$beta
if (!is.null(z)) {
colnames(direct) <- paste0(colnames(direct), "*")
rownames(direct) <- paste0(rownames(direct), "*")
}
if (numm > 0) {
a.reg <- matReg(x = x, y = m, C = C, z = z, n.obs = n.obs)
b.reg <- matReg(c(x, m), y, C = C, z = z, n.obs = n.obs)
cprime.reg <- matReg(c(x, m), y, C = C, n.obs = n.obs,
z = z)
a <- a.reg$beta
b <- b.reg$beta[-(1:numx), , drop = FALSE]
c <- total.reg$beta
cprime <- cprime.reg$beta
all.ab <- matrix(NA, ncol = numm, nrow = numx)
for (i in 1:numx) {
all.ab[i, ] <- a[i, ] * t(b[, 1])
}
colnames(all.ab) <- m
rownames(all.ab) <- x
ab <- a %*% b
indirect <- c - ab
if (is.null(n.obs)) {
message("Bootstrap is not meaningful unless raw data are provided or the number of subjects is specified.")
mean.boot <- sd.boot <- ci.quant <- boot <- se <- tvalue <- prob <- NA
}
else {
boot <- psych:::boot.mediate(data, x, y, m, z, n.iter = n.iter,
std = std, use = use)
mean.boot <- colMeans(boot)
sd.boot <- apply(boot, 2, sd)
ci.quant <- apply(boot, 2, function(x) quantile(x,
c(alpha/2, 1 - alpha/2), na.rm = TRUE))
mean.boot <- matrix(mean.boot, nrow = numx)
sd.boot <- matrix(sd.boot, nrow = numx)
ci.ab <- matrix(ci.quant, nrow = 2 * numx * numy)
boots <- list(mean = mean.boot, sd = sd.boot, ci = ci.quant,
ci.ab = ci.ab)
}
}
else {
a.reg <- b.reg <- reg <- NA
a <- b <- c <- ab <- cprime <- boot <- boots <- indirect <- cprime.reg <- NA
}
if (!is.null(z)) {
var.names$IV <- paste0(var.names$IV, "*")
var.names$DV <- paste0(var.names$DV, "*")
var.names$med <- paste0(var.names$med, "*")
colnames(C) <- rownames(C) <- paste0(colnames(C), "*")
}
result <- list(var.names = var.names, a = a, b = b, ab = ab,
all.ab = all.ab, c = c, direct = direct, indirect = indirect,
cprime = cprime, total.reg = total.reg, a.reg = a.reg,
b.reg = b.reg, cprime.reg = cprime.reg, boot = boots,
boot.values = boot, sdnames = colnames(data), data = data,
C = C, Call = cl)
class(result) <- c("psych", "mediate")
if (plot) {
if (is.null(m)) {
moderate.diagram(result)
}
else {
mediate.diagram(result, main = main)
}
}
return(result)
}
You can test the mymediate function using the following example:
library(psych)
mod.k2 <- mymediate(y="OccupAsp", x=c("Intelligence","Siblings","FatherEd","FatherOcc"),
m= c(5:6), data=R.kerch, n.obs=767, n.iter=50)
print(mod.k2)
i have to do 1000 iteration for this SIMPLS function to get the value of the coefficient. my problem is how to get the value of the coefficient for each iteration? can I print the output for iteration?
n = 10
k = 20
a = 2
coef = matrix(0,nrow=20, ncol=10)
for (i in 1:1000) {
t[,i] = matrix(rnorm(n%*%a,0,1), ncol=a) # n x a
p[,i] = matrix(rnorm(k%*%a,0,1), ncol=a) # k x a
B[,i] = matrix(rnorm(k,0,0.001), nrow=k, ncol=1) # k x 1
e[,i] = matrix(rcauchy(n,location=0,scale=1), nrow=n, ncol=1)##standard cauchy
x[,i] = t%*%t(p) ## explanatary variable xi
y[,i] = (t%*%(t(p)%*%B)) + e ## response variable yi
simpls <- function(y, x, a) {
n <- nrow(x)
k <- ncol(x)
m <- NCOL(y)
y <- matrix(y)
Ps <- matrix(0, k, a)
Cs <- matrix(0, m, a)
Rs <- matrix(0, k, a)
Ts <- matrix(0, n, a)
mx <- apply(x, 2, mean)
sdx <- apply(x, 2, sd)
x <- sapply(1:k, function(i) (x[,i]-mx[i]))
my <- apply(y, 2, mean)
sdy <- apply(y, 2, sd)
y <- sapply(1:m, function(i) (y[,i]-my[i]))
S <- t(x)%*%y
Snew <- S
for (i in 1:a) {
rs <- svd(Snew)$u[,1,drop=FALSE]
rs <- rs/norm(rs,type="F")
ts <- x%*%rs
ts <- ts/norm(ts,type="F")
ps <- t(x)%*%ts
cs <- t(y)%*%ts
Rs[,i] <- rs
Ts[,i] <- ts
Ps[,i] <- ps
Cs[,i] <- cs
Snew <- Snew-Ps[,1:i]%*%solve(t(Ps[,1:i])%*%Ps[,1:i])%*%t(Ps[,1:i])%*%Snew
}
coef[,i] <- matrix(drop(Rs%*%(solve(t(Ps)%*%Rs)%*%t(Cs))))
yfit <- x%*%coef
orgyfit <- yfit+my
res <- y-yfit
SSE <- sum((y-yfit)^2)
scale <- sqrt(SSE/(n-a))
stdres <- sapply(1:m, function(i) (res[,i]-mean(res[,i]))/sqrt(var(res[,i])))
hatt <- diag(Ts%*%solve(t(Ts)%*%Ts)%*%t(Ts))
result <- list(coef=coef, fit=orgyfit, res=res, SSE=SSE,scale=scale, stdres=stdres, leverage=hatt,Ts=Ts,Rs=Rs,Ps=Ps,Cs=Cs)
}
}
print(coef)
You can just add your coef to a vector for every iteration. I've created an example here:
coef_vector <- NULL
for (i in 1:10) {
loop_coef <- i*2
coef_vector <- c(coef_vector, loop_coef)
}
Result:
> coef_vector
[1] 2 4 6 8 10 12 14 16 18 20
>
Of course, if your coef is more complex than a variable, you can add it to a list instead of a vector.
I am trying to take a derivative of a double sum function. I am running into this error:
Error in deriv.f.1(X = X.data, y = y.vec, alpha = alpha.vector[1, ]) :
object 'L_D_grad' not found
I have tried to move the {} brackets around, double check if I missed a closing/opening bracket, if I have extra opening/closing bracket. However, the error still exists.
# Generate Sample Data
gen.sample <- function(n){
x <- rnorm(n,5,10)
y <- ifelse(x < 2.843,1,-1)
return(data.frame(x,y))
}
##
deriv.f.1 <- function(X,y,alpha){
N <- length(X)
L_D_grad < numeric(N)
xy.alpha.sum <- numeric(N)
for(k in 1:N){
for(l in 1:N){
if(l == k){
xy.alpha.sum[l] = 0}
else{
xy.alpha.sum[l] <- alpha[l]*y[k]*y[l]*X[k]*X[l]}
}
L_D_grad[k] <- 1 - sum(xy.alpha.sum) - alpha[k]*(y[k])^2*(X[k])^2
}
return(L_D_grad)
}
## Illustration
set.seed(4997)
options(digits = 4,scipen = -4)
sample.data <- gen.sample(n=N)
X.data <- sample.data$x
y.vec <- sample.data$y
alpha.vector <- matrix(rep(seq(from=-5,to = 5, length.out = N),N*N),
ncol = N, nrow = N, byrow = TRUE)
alpha_vec <- alpha.vector[1,]
deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
Thanks in advance!
Here is my code:
# Generate Sample Data
gen.sample <- function(n){
x <- rnorm(n,5,10)
y <- ifelse(x < 2.843,1,-1)
return(data.frame(x,y))
}
##
deriv.f.1 <- function(X,y,alpha){
N <- length(X)
L_D_grad <- numeric(N)
xy.alpha.sum <- numeric(N)
for(k in 1:N){
for(l in 1:N){
if(l == k){
xy.alpha.sum[l] = 0}
else{
xy.alpha.sum[l] <- alpha[l]*y[k]*y[l]*X[k]*X[l]}
}
L_D_grad[k] <- 1 - sum(xy.alpha.sum) - alpha[k]*(y[k])^2*(X[k])^2
}
return(L_D_grad)
}
## Illustration
set.seed(4997)
options(digits = 4,scipen = -4)
N=10
sample.data <- gen.sample(n=N)
X.data <- sample.data$x
y.vec <- sample.data$y
alpha.vector <- matrix(rep(seq(from=-5,to = 5, length.out = N),N*N),
ncol = N, nrow = N, byrow = TRUE)
alpha_vec <- alpha.vector[1,]
deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
Where:
#sample.data
#x y
#1 -5.303e+00 1
#2 1.493e+01 -1
#3 9.797e+00 -1
#4 1.991e+01 -1
#5 -1.454e+01 1
#6 1.423e+01 -1
#7 1.025e+01 -1
#8 5.455e+00 -1
#9 3.719e+00 -1
#10 2.021e+01 -1
And deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
# -1.271e+01 -3.759e+01 -2.432e+01 -5.046e+01 -3.659e+01 -3.577e+01 -2.548e+01 -1.310e+01
# -8.612e+00 -5.123e+01
I made two changes:
Assign N a value: N=10
Correct assignment form L_D_grad: L_D_grad <- numeric(N)