Saving huge model object to file - r

Say you have a model object of class 'varrest' returned from a VAR() regression operation.
I want to save the model to a file, but not all data which was used to estimate the coefficients.
How can one just save the model specification wihtout the training data?
Because when I save the model it has a file size of over 1GB and therefore loading does take its time.
Can one save objects without some attributes?

The predict.varest function starts out with this code:
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
You can then investigate how much pruning you might achieve:
data(Canada)
tcan <-
VAR(Canada, p = 2, type = "trend")
names(tcan)
# [1] "varresult" "datamat" "y" "type" "p"
# [6] "K" "obs" "totobs" "restrictions" "call"
object.size(tcan[c("K","p", "obs", "type", "datamat", "y")] )
#15080 bytes
object.size(tcan)
#252032 bytes
So the difference is substantial, but just saving those items is not sufficient because the next line in predict.varest is:
B <- Bcoef(object)
You will need to add that object to the list above and then construct a new predict-function that accepts something less than the large 'varresult' node of the model object. Also turned out that there was a downstream call to an internal function that needs to be stored. (You will need to decide in advance what interval you need for prediction.)
tsmall <- c( tcan[c("K","p", "obs", "type", "datamat", "y", "call")] )
tsmall[["Bco"]] <- Bcoef(tcan)
tsmall$sig.y <- vars:::.fecov(x = tcan, n.ahead = 10)
And the modified predict function will be:
sm.predict <- function (object, ..., n.ahead = 10, ci = 0.95, dumvar = NULL)
{
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
n.ahead <- as.integer(n.ahead)
Z <- object$datamat[, -c(1:K)]
# This used to be a call to Bcoef(object)
B <- object$Bco
if (type == "const") {
Zdet <- matrix(rep(1, n.ahead), nrow = n.ahead, ncol = 1)
colnames(Zdet) <- "const"
}
else if (type == "trend") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(seq(trdstart, length = n.ahead), nrow = n.ahead,
ncol = 1)
colnames(Zdet) <- "trend"
}
else if (type == "both") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(c(rep(1, n.ahead), seq(trdstart, length = n.ahead)),
nrow = n.ahead, ncol = 2)
colnames(Zdet) <- c("const", "trend")
}
else if (type == "none") {
Zdet <- NULL
}
if (!is.null(eval(object$call$season))) {
season <- eval(object$call$season)
seas.names <- paste("sd", 1:(season - 1), sep = "")
cycle <- tail(data.all[, seas.names], season)
seasonal <- as.matrix(cycle, nrow = season, ncol = season -
1)
if (nrow(seasonal) >= n.ahead) {
seasonal <- as.matrix(cycle[1:n.ahead, ], nrow = n.ahead,
ncol = season - 1)
}
else {
while (nrow(seasonal) < n.ahead) {
seasonal <- rbind(seasonal, cycle)
}
seasonal <- seasonal[1:n.ahead, ]
}
rownames(seasonal) <- seq(nrow(data.all) + 1, length = n.ahead)
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, seasonal))
}
else {
Zdet <- as.matrix(seasonal)
}
}
if (!is.null(eval(object$call$exogen))) {
if (is.null(dumvar)) {
stop("\nNo matrix for dumvar supplied, but object varest contains exogenous variables.\n")
}
if (!all(colnames(dumvar) %in% colnames(data.all))) {
stop("\nColumn names of dumvar do not coincide with exogen.\n")
}
if (!identical(nrow(dumvar), n.ahead)) {
stop("\nRow number of dumvar is unequal to n.ahead.\n")
}
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, dumvar))
}
else {
Zdet <- as.matrix(dumvar)
}
}
Zy <- as.matrix(object$datamat[, 1:(K * (p + 1))])
yse <- matrix(NA, nrow = n.ahead, ncol = K)
# This used to be a call to vars:::.fecov
sig.y <- object$sig.y
for (i in 1:n.ahead) {
yse[i, ] <- sqrt(diag(sig.y[, , i]))
}
yse <- -1 * qnorm((1 - ci)/2) * yse
colnames(yse) <- paste(ci, "of", ynames)
forecast <- matrix(NA, ncol = K, nrow = n.ahead)
lasty <- c(Zy[nrow(Zy), ])
for (i in 1:n.ahead) {
lasty <- lasty[1:(K * p)]; print(lasty); print(B)
Z <- c(lasty, Zdet[i, ]) ;print(Z)
forecast[i, ] <- B %*% Z
temp <- forecast[i, ]
lasty <- c(temp, lasty)
}
colnames(forecast) <- paste(ynames, ".fcst", sep = "")
lower <- forecast - yse
colnames(lower) <- paste(ynames, ".lower", sep = "")
upper <- forecast + yse
colnames(upper) <- paste(ynames, ".upper", sep = "")
forecasts <- list()
for (i in 1:K) {
forecasts[[i]] <- cbind(forecast[, i], lower[, i], upper[,
i], yse[, i])
colnames(forecasts[[i]]) <- c("fcst", "lower", "upper",
"CI")
}
names(forecasts) <- ynames
result <- list(fcst = forecasts, endog = object$y, model = object,
exo.fcst = dumvar)
class(result) <- "varprd"
return(result)
}

Either
set the attributes you do not want to NULL, or
copy the parts you want to a new object, or
call the save() function with proper indexing.

Related

Regarding parallelisation of ensemblegamma0() function of ensembleBMA package

I was using the ensmebleBMA package in R I and fitting gamma0() model for precipitation forecast in R
But it is taking a long time if I do it for a season and so many grids.
I am using the ensemblegamma0() function to fit the model.
Hope that it will be done fast if I can parallelize the function. I tried parallelizing a source code but was not working properly. I seek help from someone having any idea about it.
There is a for loop inside this function() (ensemblegamma0()) which will fit the model for multiple days as each day forecasting is independent which could be parallelized
I am sharing the method I tried here
sample method
library(future.apply)
plan(multiprocess) ## => parallelize on your local computer
X <- 1:5
y <- future_lapply(X, function(x) {
tmp <- sqrt(x)
tmp
})
source code used
ensembleBMAgamma0 <-
function(ensembleData, trainingDays, dates = NULL,
control = controlBMAgamma0(), exchangeable = NULL)
{
#
# copyright 2006-present, University of Washington. All rights reserved.
# for terms of use, see the LICENSE file
#
if (!inherits(ensembleData,"ensembleData")) stop("not an ensembleData object")
if (missing(trainingDays)) stop("trainingDays must be specified")
call <- match.call()
warmStart <- FALSE
if (missing(trainingDays)) stop("trainingDays must be specified")
ensMemNames <- ensembleMembers(ensembleData)
nForecasts <- length(ensMemNames)
exchangeable <- getExchangeable( exchangeable, ensembleGroups(ensembleData),
nForecasts)
# remove instances missing all forecasts, obs or dates
M <- !dataNA(ensembleData)
if (!all(M)) ensembleData <- ensembleData[M,]
nObs <- nrow(ensembleData)
if (!nObs) stop("no data")
Dates <- as.character(ensembleValidDates(ensembleData))
DATES <- sort(unique(Dates))
julianDATES <- ymdhTOjul(DATES)
incr <- min(1,min(diff(julianDATES))) ## incr may be fractional for hours
forecastHour <- ensembleFhour(ensembleData)
lag <- ceiling( forecastHour / 24 )
## dates that can be modeled by the training data (ignoring gaps)
dates <- getDates( DATES, julianDATES, dates, trainingDays, lag, incr)
juliandates <- ymdhTOjul(dates)
nDates <- length(dates)
if (is.null(control$prior)) {
# accomodates saved mean as an additional parameter
prob0coefs <- array( NA, c(3, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
}
else {
prob0coefs <- array( NA, c(4, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
}
biasCoefs <- array( NA, c(2, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
varCoefs <- array( NA, c(2, nDates), dimnames = list(NULL, dates))
weights <- array( NA, c(nForecasts, nDates),
dimnames = list(ensMemNames, dates))
trainTable <- rep(0, nDates)
names(trainTable) <- dates
nIter <- loglikelihood <- rep(0, nDates)
names(nIter) <- names(loglikelihood) <- dates
obs <- dataVerifObs(ensembleData)
K <- 1:nForecasts
L <- length(juliandates)
twin <- 1:trainingDays
cat("\n")
l <- 0
for(i in seq(along = juliandates)) {
I <- (juliandates[i]-lag*incr) >= julianDATES
if (!any(I)) stop("insufficient training data")
j <- which(I)[sum(I)]
if (j != l) {
D <- as.logical(match(Dates, DATES[j:1], nomatch=0))
nonz <- sum(obs[D] != 0)
if (is.null(control$prior) && nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
twin <- (j+1) - (1:trainingDays)
if (is.null(control$prior)) {
# attempt to extend the training period
while (TRUE) {
D <- as.logical(match(Dates, DATES[twin], nomatch=0))
if (!any(D)) stop("this should not happen")
d <- ensembleValidDates(ensembleData[D,])
# if (length(unique(d)) != trainingDays) stop("wrong # of training days")
nonz <- sum(obs[D] != 0)
if (nonz >= control$rainobs) break
if (min(twin) == 1) break
twin <- max(twin):(min(twin)-1)
}
if (nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
}
cat("modeling for date", dates[i], "...")
kNA <- apply(ensembleForecasts(ensembleData[D,]), 2,
function(x) all(is.na(x)))
if (any(kNA)) {
if (!is.null(x <- exchangeable)) x <- exchangeable[-K[kNA]]
fit <- fitBMAgamma0(ensembleData[D,-K[kNA]], control = control,
exchangeable = x)
}
else {
fit <- fitBMAgamma0(ensembleData[D,], control = control,
exchangeable = exchangeable)
}
l <- j ## last model fit
trainTable[i] <- length(unique(Dates[D]))
nIter[i] <- fit$nIter
loglikelihood[i] <- fit$loglikelihood
if (warmStart) control$start$weights <- weights[,i]
cat("\n")
}
else {
trainTable[i] <- -abs(trainTable[i-1])
nIter[i] <- -abs(nIter[i-1])
loglikelihood[i] <- loglikelihood[i-1]
}
prob0coefs[,K[!kNA],i] <- fit$prob0coefs
biasCoefs[,K[!kNA],i] <- fit$biasCoefs
varCoefs[,i] <- fit$varCoefs
weights[K[!kNA],i] <- fit$weights
}
structure(list(training = list(days=trainingDays,lag=lag,table=trainTable),
prob0coefs = prob0coefs, biasCoefs = biasCoefs,
varCoefs = varCoefs, weights = weights, nIter = nIter,
exchangeable = exchangeable, power = fit$power,
call = match.call()),
forecastHour = forecastHour,
initializationTime = ensembleItime(ensembleData),
class = c("ensembleBMAgamma0","ensembleBMA"))
}
edited source code for loop
y=future_lapply(juliandates, function (i) {
I <- (juliandates[i]-lag*incr) >= julianDATES
if (!any(I)) stop("insufficient training data")
j <- which(I)[sum(I)]
if (j != l) {
D <- as.logical(match(Dates, DATES[j:1], nomatch=0))
nonz <- sum(obs[D] != 0)
if (is.null(control$prior) && nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
twin <- (j+1) - (1:trainingDays)
if (is.null(control$prior)) {
# attempt to extend the training period
while (TRUE) {
D <- as.logical(match(Dates, DATES[twin], nomatch=0))
if (!any(D)) stop("this should not happen")
d <- ensembleValidDates(ensembleData[D,])
# if (length(unique(d)) != trainingDays) stop("wrong # of training days")
nonz <- sum(obs[D] != 0)
if (nonz >= control$rainobs) break
if (min(twin) == 1) break
twin <- max(twin):(min(twin)-1)
}
if (nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
}
cat("modeling for date", dates[i], "...")
kNA <- apply(ensembleForecasts(ensembleData[D,]), 2,
function(x) all(is.na(x)))
if (any(kNA)) {
if (!is.null(x <- exchangeable)) x <- exchangeable[-K[kNA]]
fit <- fitBMAgamma0(ensembleData[D,-K[kNA]], control = control,
exchangeable = x)
}
else {
fit <- fitBMAgamma0(ensembleData[D,], control = control,
exchangeable = exchangeable)
}
l <- j ## last model fit
trainTable[i] <- length(unique(Dates[D]))
nIter[i] <- fit$nIter
loglikelihood[i] <- fit$loglikelihood
if (warmStart) control$start$weights <- weights[,i]
cat("\n")
}
else {
trainTable[i] <- -abs(trainTable[i-1])
nIter[i] <- -abs(nIter[i-1])
loglikelihood[i] <- loglikelihood[i-1]
}
prob0coefs[,K[!kNA],i] <- fit$prob0coefs
biasCoefs[,K[!kNA],i] <- fit$biasCoefs
varCoefs[,i] <- fit$varCoefs
weights[K[!kNA],i] <- fit$weights
}
structure(list(training = list(days=trainingDays,lag=lag,table=trainTable),
prob0coefs = prob0coefs, biasCoefs = biasCoefs,
varCoefs = varCoefs, weights = weights, nIter = nIter,
exchangeable = exchangeable, power = fit$power,
call = match.call()),
forecastHour = forecastHour,
initializationTime = ensembleItime(ensembleData),
class = c("ensembleBMAgamma0","ensembleBMA"))
})
i am getting some error after running this

Error in Psych::Mediate: Object Not Found

I'm running a mediation analysis on a dataset in r and can't figure out how to get psych::mediate to work--I've done the same on another dataset before and didn't change anything, but it's not working with this new data for some reason.
I tried:
1. Turning 'condition' into a condition.f factor
2. Explicitly naming DATA a "data.frame"
3. Specifying different parameters such as "z" or "mod" in the function
4. Checked capitalization on all the variable column names.
None of the above seem to work.
library(psych)
DATA = STEX_S1_FINALCLEAN
Mediation_RA = psych::mediate( y = "DV_See", x = "Share_T", m = "Seff", data = DATA)
print(Mediation_RA,short=F)
I'd expect a full output with mediation values, but have gotten:
Error in psych::mediate(y = "DV_See", x = "Share_T", m = "Seff", data = DATA) :
object 'ex' not found
I don't see and object 'ex' anywhere, and that's not a name of any columns in the DATA data frame.
Following the suggestion of #r2evans, you can use the following modified function:
mymediate <- function (y, x, m = NULL, data, mod = NULL, z = NULL, n.obs = NULL,
use = "pairwise", n.iter = 5000, alpha = 0.05, std = FALSE,
plot = TRUE, zero = TRUE, main = "Mediation")
{
cl <- match.call()
if (class(y) == "formula") {
ps <- fparse(y)
y <- ps$y
x <- ps$x
m <- ps$m
mod <- ps$prod
ex <- ps$ex
x <- x[!ps$x %in% ps$m]
z <- ps$z
print(str(ps))
} else {
ex = NULL
}
all.ab <- NULL
if (is.numeric(y))
y <- colnames(data)[y]
if (is.numeric(x))
x <- colnames(data)[x]
if (!is.null(m))
if (is.numeric(m))
m <- colnames(data)[m]
if (!is.null(mod)) {
if (is.numeric(mod)) {
nmod <- length(mod)
mod <- colnames(data)[mod]
}
}
if (is.null(mod)) {
nmod <- 0
}
else {
nmod <- length(mod)
}
var.names <- list(IV = x, DV = y, med = m, mod = mod, z = z,
ex = ex)
if (any(!(unlist(var.names) %in% colnames(data)))) {
stop("Variable names not specified correctly")
}
if (ncol(data) == nrow(data)) {
raw <- FALSE
if (nmod > 0) {
stop("Moderation Analysis requires the raw data")
}
else {
data <- data[c(y, x, m, z), c(y, x, m, z)]
}
}
else {
data <- data[, c(y, x, m, z, ex)]
}
if (nmod == 1) {
mod <- c(x, mod)
nmod <- length(mod)
}
if (!is.matrix(data))
data <- as.matrix(data)
if ((dim(data)[1] != dim(data)[2])) {
n.obs = dim(data)[1]
if (!is.null(mod))
if (zero)
data <- scale(data, scale = FALSE)
C <- cov(data, use = use)
raw <- TRUE
if (std) {
C <- cov2cor(C)
}
}
else {
raw <- FALSE
C <- data
nvar <- ncol(C)
if (is.null(n.obs)) {
n.obs <- 1000
message("The data matrix was a correlation matrix and the number of subjects was not specified. \n n.obs arbitrarily set to 1000")
}
if (!is.null(m)) {
message("The replication data matrices were simulated based upon the specified number of subjects and the observed correlation matrix.")
eX <- eigen(C)
data <- matrix(rnorm(nvar * n.obs), n.obs)
data <- t(eX$vectors %*% diag(sqrt(pmax(eX$values,
0)), nvar) %*% t(data))
colnames(data) <- c(y, x, m)
}
}
if ((nmod > 0) | (!is.null(ex))) {
if (!raw) {
stop("Moderation analysis requires the raw data")
}
else {
if (zero) {
data <- scale(data, scale = FALSE)
}
}
}
if (nmod > 0) {
prods <- matrix(NA, ncol = length(ps$prod), nrow = nrow(data))
colnames(prods) <- paste0("V", 1:length(ps$prod))
for (i in 1:length(ps$prod)) {
prods[, i] <- apply(data[, ps$prod[[i]]], 1, prod)
colnames(prods)[i] <- paste0(ps$prod[[i]], collapse = "*")
}
data <- cbind(data, prods)
x <- c(x, colnames(prods))
}
if (!is.null(ex)) {
quads <- matrix(NA, ncol = length(ex), nrow = nrow(data))
colnames(quads) <- ex
for (i in 1:length(ex)) {
quads[, i] <- data[, ex[i]] * data[, ex[i]]
colnames(quads)[i] <- paste0(ex[i], "^2")
}
data <- cbind(data, quads)
x <- c(x, colnames(quads))
}
if (raw) {
C <- cov(data, use = use)
}
if (std) {
C <- cov2cor(C)
}
xy <- c(x, y)
numx <- length(x)
numy <- length(y)
if (!is.null(m)) {
numm <- length(m)
nxy <- numx + numy
m.matrix <- C[c(x, m), c(x, m), drop = FALSE]
}
else {
numm <- 0
nxy <- numx
}
df <- n.obs - nxy - 1
xy.matrix <- C[c(x, m), y, drop = FALSE]
total.reg <- matReg(x, y, m = m, z = z, C = C, n.obs = n.obs)
direct <- total.reg$beta
if (!is.null(z)) {
colnames(direct) <- paste0(colnames(direct), "*")
rownames(direct) <- paste0(rownames(direct), "*")
}
if (numm > 0) {
a.reg <- matReg(x = x, y = m, C = C, z = z, n.obs = n.obs)
b.reg <- matReg(c(x, m), y, C = C, z = z, n.obs = n.obs)
cprime.reg <- matReg(c(x, m), y, C = C, n.obs = n.obs,
z = z)
a <- a.reg$beta
b <- b.reg$beta[-(1:numx), , drop = FALSE]
c <- total.reg$beta
cprime <- cprime.reg$beta
all.ab <- matrix(NA, ncol = numm, nrow = numx)
for (i in 1:numx) {
all.ab[i, ] <- a[i, ] * t(b[, 1])
}
colnames(all.ab) <- m
rownames(all.ab) <- x
ab <- a %*% b
indirect <- c - ab
if (is.null(n.obs)) {
message("Bootstrap is not meaningful unless raw data are provided or the number of subjects is specified.")
mean.boot <- sd.boot <- ci.quant <- boot <- se <- tvalue <- prob <- NA
}
else {
boot <- psych:::boot.mediate(data, x, y, m, z, n.iter = n.iter,
std = std, use = use)
mean.boot <- colMeans(boot)
sd.boot <- apply(boot, 2, sd)
ci.quant <- apply(boot, 2, function(x) quantile(x,
c(alpha/2, 1 - alpha/2), na.rm = TRUE))
mean.boot <- matrix(mean.boot, nrow = numx)
sd.boot <- matrix(sd.boot, nrow = numx)
ci.ab <- matrix(ci.quant, nrow = 2 * numx * numy)
boots <- list(mean = mean.boot, sd = sd.boot, ci = ci.quant,
ci.ab = ci.ab)
}
}
else {
a.reg <- b.reg <- reg <- NA
a <- b <- c <- ab <- cprime <- boot <- boots <- indirect <- cprime.reg <- NA
}
if (!is.null(z)) {
var.names$IV <- paste0(var.names$IV, "*")
var.names$DV <- paste0(var.names$DV, "*")
var.names$med <- paste0(var.names$med, "*")
colnames(C) <- rownames(C) <- paste0(colnames(C), "*")
}
result <- list(var.names = var.names, a = a, b = b, ab = ab,
all.ab = all.ab, c = c, direct = direct, indirect = indirect,
cprime = cprime, total.reg = total.reg, a.reg = a.reg,
b.reg = b.reg, cprime.reg = cprime.reg, boot = boots,
boot.values = boot, sdnames = colnames(data), data = data,
C = C, Call = cl)
class(result) <- c("psych", "mediate")
if (plot) {
if (is.null(m)) {
moderate.diagram(result)
}
else {
mediate.diagram(result, main = main)
}
}
return(result)
}
You can test the mymediate function using the following example:
library(psych)
mod.k2 <- mymediate(y="OccupAsp", x=c("Intelligence","Siblings","FatherEd","FatherOcc"),
m= c(5:6), data=R.kerch, n.obs=767, n.iter=50)
print(mod.k2)

R Factor Analysis with factanal() for huge amount of predictors results in a system that is computationally singular

I am trying to run Factor analysis for a dataset with around 150 variables but only have around around 80 observations.
I tried the factanal() function in R and R reported error:
Error in solve.default(cv) :
system is computationally singular: reciprocal condition number = 3.0804e-20
Any suggestions on alternative methods / packages?
A demonstration on a dummy dataset would be:
# This will work (dataset with 80 obs and 15 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*15), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
# This will not (dataset with 80 obs and 150 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*150), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
So far I've replaced the solve function in the factanal() source code with a numerical solving function one that I created below, but it did not resolve the issue:
solve_G = function(M){
library(matrixcalc)
if(!is.singular.matrix(M)){
return(solve(M))
} else{
s = svd(M)
U = s$u
V = s$v
D_Inv = diag(1/s$d)
Num_Inv = V %*% D_Inv %*% t(U)
cat("Singular Matrix! SVD Used.\n")
return(Num_Inv)
}
}
And after you replace "solve" with "solve_G", a new error occurred:
Error in factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt) :
could not find function "factanal.fit.mle"
P.S. Here is the new "factanal" function named my_factanal:
The error above occurred when running the line:
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
And to run this, Set x to be a 80* 150 numerical dataframe, set factors = 2, set scores = "regression", rotation = "varimax":
my_factanal = function (x, factors, data = NULL, covmat = NULL, n.obs = NA,
subset, na.action, start = NULL, scores = c("none", "regression",
"Bartlett"), rotation = "varimax", control = NULL, ...)
{
sortLoadings <- function(Lambda) {
cn <- colnames(Lambda)
Phi <- attr(Lambda, "covariance")
ssq <- apply(Lambda, 2L, function(x) -sum(x^2))
Lambda <- Lambda[, order(ssq), drop = FALSE]
colnames(Lambda) <- cn
neg <- colSums(Lambda) < 0
Lambda[, neg] <- -Lambda[, neg]
if (!is.null(Phi)) {
unit <- ifelse(neg, -1, 1)
attr(Lambda, "covariance") <- unit %*% Phi[order(ssq),
order(ssq)] %*% unit
}
Lambda
}
cl <- match.call()
na.act <- NULL
if (is.list(covmat)) {
if (any(is.na(match(c("cov", "n.obs"), names(covmat)))))
stop("'covmat' is not a valid covariance list")
cv <- covmat$cov
n.obs <- covmat$n.obs
have.x <- FALSE
}
else if (is.matrix(covmat)) {
cv <- covmat
have.x <- FALSE
}
else if (is.null(covmat)) {
if (missing(x))
stop("neither 'x' nor 'covmat' supplied")
have.x <- TRUE
if (inherits(x, "formula")) {
mt <- terms(x, data = data)
if (attr(mt, "response") > 0)
stop("response not allowed in formula")
attr(mt, "intercept") <- 0
mf <- match.call(expand.dots = FALSE)
names(mf)[names(mf) == "x"] <- "formula"
mf$factors <- mf$covmat <- mf$scores <- mf$start <- mf$rotation <- mf$control <- mf$... <- NULL
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
na.act <- attr(mf, "na.action")
if (.check_vars_numeric(mf))
stop("factor analysis applies only to numerical variables")
z <- model.matrix(mt, mf)
}
else {
z <- as.matrix(x)
if (!is.numeric(z))
stop("factor analysis applies only to numerical variables")
if (!missing(subset))
z <- z[subset, , drop = FALSE]
}
covmat <- cov.wt(z)
cv <- covmat$cov
n.obs <- covmat$n.obs
}
else stop("'covmat' is of unknown type")
scores <- match.arg(scores)
if (scores != "none" && !have.x)
stop("requested scores without an 'x' matrix")
p <- ncol(cv)
if (p < 3)
stop("factor analysis requires at least three variables")
dof <- 0.5 * ((p - factors)^2 - p - factors)
if (dof < 0)
stop(sprintf(ngettext(factors, "%d factor is too many for %d variables",
"%d factors are too many for %d variables"), factors,
p), domain = NA)
sds <- sqrt(diag(cv))
cv <- cv/(sds %o% sds)
cn <- list(nstart = 1, trace = FALSE, lower = 0.005)
cn[names(control)] <- control
more <- list(...)[c("nstart", "trace", "lower", "opt", "rotate")]
if (length(more))
cn[names(more)] <- more
if (is.null(start)) {
start <- (1 - 0.5 * factors/p)/diag(solve_G(cv))
if ((ns <- cn$nstart) > 1)
start <- cbind(start, matrix(runif(ns - 1), p, ns -
1, byrow = TRUE))
}
start <- as.matrix(start)
if (nrow(start) != p)
stop(sprintf(ngettext(p, "'start' must have %d row",
"'start' must have %d rows"), p), domain = NA)
nc <- ncol(start)
if (nc < 1)
stop("no starting values supplied")
best <- Inf
for (i in 1L:nc) {
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
if (cn$trace)
cat("start", i, "value:", format(nfit$criteria[1L]),
"uniqs:", format(as.vector(round(nfit$uniquenesses,
4))), "\\n")
if (nfit$converged && nfit$criteria[1L] < best) {
fit <- nfit
best <- fit$criteria[1L]
}
}
if (best == Inf)
stop(ngettext(nc, "unable to optimize from this starting value",
"unable to optimize from these starting values"),
domain = NA)
load <- fit$loadings
if (rotation != "none") {
rot <- do.call(rotation, c(list(load), cn$rotate))
load <- if (is.list(rot)) {
load <- rot$loadings
fit$rotmat <- if (inherits(rot, "GPArotation"))
t(solve_G(rot$Th))
else rot$rotmat
rot$loadings
}
else rot
}
fit$loadings <- sortLoadings(load)
class(fit$loadings) <- "loadings"
fit$na.action <- na.act
if (have.x && scores != "none") {
Lambda <- fit$loadings
zz <- scale(z, TRUE, TRUE)
switch(scores, regression = {
sc <- zz %*% solve(cv, Lambda)
if (!is.null(Phi <- attr(Lambda, "covariance"))) sc <- sc %*%
Phi
}, Bartlett = {
d <- 1/fit$uniquenesses
tmp <- t(Lambda * d)
sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz)))
})
rownames(sc) <- rownames(z)
colnames(sc) <- colnames(Lambda)
if (!is.null(na.act))
sc <- napredict(na.act, sc)
fit$scores <- sc
}
if (!is.na(n.obs) && dof > 0) {
fit$STATISTIC <- (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3) *
fit$criteria["objective"]
fit$PVAL <- pchisq(fit$STATISTIC, dof, lower.tail = FALSE)
}
fit$n.obs <- n.obs
fit$call <- cl
fit
}

Prevent a function to block

In the package shapes there is a function called
estcov
that uses some method to give mean of tensors, but this is not the point of the question.
Everytime this function is called appears a plot, i want to stop it plotting without touching the code of the function,there is some trick to do this?
Here there is a riproducible code
S <- array(0,c(5,5,10) )
for (i in 1:10){
tem <- diag(5)+.1*matrix(rnorm(25),5,5)
S[,,i]<- tem
}
estcov( S , method="Procrustes")
The best way to do that is to send everything to a NULL dev, and close it afterward :
pdf(file = NULL)
estcov( S , method="Procrustes")
dev.off()
You could create a wrapper around estcov that redirects graphics to a temporary file, which is deleted afterwards.
estcov_no_plot <- function(...) {
temp_plot <- tempfile()
png(temp_plot)
on.exit({
dev.off(dev.cur())
file.remove(temp_plot)
})
shapes::estcov(...)
}
This example uses a new name for the function to remind you it's not the original. You could name it estcov, which would replace the package's function in your environment, but that might cause confusion.
What you can do is remove the part with the plot yourself (commented out below):
my_estcov <- function (S, method = "Riemannian", weights = 1, alpha = 1/2,
MDSk = 2)
{
out <- list(mean = 0, sd = 0, pco = 0, eig = 0, dist = 0)
M <- dim(S)[3]
if (length(weights) == 1) {
weights <- rep(1, times = M)
}
if (method == "Procrustes") {
dd <- estSS(S, weights)
}
if (method == "ProcrustesShape") {
dd <- estShape(S, weights)
}
if (method == "Riemannian") {
dd <- estLogRiem2(S, weights)
}
if (method == "Cholesky") {
dd <- estCholesky(S, weights)
}
if (method == "Power") {
dd <- estPowerEuclid(S, weights, alpha)
}
if (method == "Euclidean") {
dd <- estEuclid(S, weights)
}
if (method == "LogEuclidean") {
dd <- estLogEuclid(S, weights)
}
if (method == "RiemannianLe") {
dd <- estRiemLe(S, weights)
}
out$mean <- dd
sum <- 0
for (i in 1:M) {
sum <- sum + weights[i] * distcov(S[, , i], dd, method = method)^2/sum(weights)
}
out$sd <- sqrt(sum)
dist <- matrix(0, M, M)
for (i in 2:M) {
for (j in 1:(i - 1)) {
dist[i, j] <- distcov(S[, , i], S[, , j], method = method)
dist[j, i] <- dist[i, j]
}
}
out$dist <- dist
if (M > MDSk) {
ans <- cmdscale(dist, k = MDSk, eig = TRUE, add = TRUE,
x.ret = TRUE)
out$pco <- ans$points
out$eig <- ans$eig
#if (MDSk > 2) {
# shapes3d(out$pco[, 1:min(MDSk, 3)], axes3 = TRUE)
#}
#if (MDSk == 2) {
# plot(out$pco, type = "n", xlab = "MDS1", ylab = "MDS2")
# text(out$pco[, 1], out$pco[, 2], 1:length(out$pco[,
# 1]))
#}
}
out
}
You can even add a parameter (a logical plot = F) to control when plot or outputs.

prediction.strength in Package fpc

I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}

Resources