Calculating p-values for divisive hierarchical clustering in R (pvclust package) - r

Would anyone know how to calculate significance values for clusters in the R package pvclust using a divisive hierarchical clustering method (e.g. diana from the cluster package)? The pvclust package supports only agglomerative hierarchical clustering methods (implemented by the hclust function), but I have been hoping that forcing pvclust to use diana instead of hclust might be possible. I tried modifying some of the internal pvclust functions as follows, but the only result was an error:
library(pvclust)
library(cluster)
pvclust.nonparallel <- function (data, method.hclust, method.dist, use.cor, nboot, r,
store, weight, iseed, quiet)
{
if (!is.null(iseed))
set.seed(seed = iseed)
n <- nrow(data)
p <- ncol(data)
if (is.function(method.dist)) {
distance <- method.dist(data)
}
else {
distance <- pvclust:::dist.pvclust(data, method = method.dist,
use.cor = use.cor)
}
####### replace hclust with diana
# data.hclust <- hclust(distance, method = method.hclust) # original version
data.hclust <- diana(distance, diss = T)
if (method.hclust == "ward" && getRversion() >= "3.1.0") {
method.hclust <- "ward.D"
}
size <- floor(n * r)
rl <- length(size)
if (rl == 1) {
if (r != 1)
warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
r <- list(1)
}
else r <- as.list(size/n)
mboot <- lapply(r, boot.hclust, data = data, object.hclust = data.hclust,
nboot = nboot, method.dist = method.dist, use.cor = use.cor,
method.hclust = method.hclust, store = store, weight = weight,
quiet = quiet)
result <- pvclust:::pvclust.merge(data = data, object.hclust = data.hclust,
mboot = mboot)
return(result)
}
boot.hclust <- function (r, data, object.hclust, method.dist, use.cor, method.hclust,
nboot, store, weight = FALSE, quiet = FALSE)
{
n <- nrow(data)
size <- round(n * r, digits = 0)
if (size == 0)
stop("invalid scale parameter(r)")
r <- size/n
pattern <- pvclust:::hc2split(object.hclust)$pattern
edges.cnt <- table(factor(pattern)) - table(factor(pattern))
st <- list()
rp <- as.character(round(r, digits = 2))
if (r == 1)
rp <- paste(rp, ".0", sep = "")
if (!quiet)
cat(paste("Bootstrap (r = ", rp, ")... ",
sep = ""))
w0 <- rep(1, n)
na.flag <- 0
for (i in 1:nboot) {
if (weight && r > 10) {
w1 <- as.vector(rmultinom(1, size, w0))
suppressWarnings(distance <- distw.pvclust(data,
w1, method = method.dist, use.cor = use.cor))
}
else {
smpl <- sample(1:n, size, replace = TRUE)
if (is.function(method.dist)) {
suppressWarnings(distance <- method.dist(data[smpl,
]))
}
else {
suppressWarnings(distance <- pvclust:::dist.pvclust(data[smpl,
], method = method.dist, use.cor = use.cor))
}
}
if (all(is.finite(distance))) {
####### replace hclust with diana
# x.hclust <- hclust(distance, method = method.hclust) # original version
x.hclust <- diana(distance, diss = T)
pattern.i <- pvclust:::hc2split(x.hclust)$pattern
edges.cnt <- edges.cnt + table(factor(pattern.i,
levels = pattern))
}
else {
x.hclust <- NULL
na.flag <- 1
}
if (store)
st[[i]] <- x.hclust
}
if (!quiet)
cat("Done.\n")
if (na.flag == 1)
warning(paste("inappropriate distance matrices are omitted in computation: r = ",
r), call. = FALSE)
boot <- list(edges.cnt = edges.cnt, method.dist = method.dist,
use.cor = use.cor, method.hclust = method.hclust, nboot = nboot,
size = size, r = r, store = st)
class(boot) <- "boot.hclust"
return(boot)
}
assignInNamespace("pvclust.nonparallel",pvclust.nonparallel,ns="pvclust")
assignInNamespace("boot.hclust",boot.hclust,ns="pvclust")
data(lung)
res.pv <- pvclust(t(lung), method.dist = "euclidean")
plot(res.pv)
# Error in barplot.default(w, xlab = xlab, horiz = TRUE, space = 0, axes = FALSE, :
# argument 9 matches multiple formal arguments

Related

Re: couldn't find function in R even when I've installed the packages

I have this code for my paper and it still could not find the function even when I've properly re-installed the necessary packages.
expoUtility <- function(x, alpha, param_beta, W){
(1-exp(-alpha*(W + x)^(1-param_beta)))/alpha
}
Round.Probability.Table <- cbind(Round.Probability.Table,c(1:10))
Round.Probability.Table <- Round.Probability.Table[,1:3]
names(Round.Probability.Table) <- c("Round","CasesAtEnd","Probability")
for (i in 1:9) {
Round.Probability.Table$Probability[i] <- 1/choose(Round.Probability.Table$CasesAtEnd[i],Round.Probability.Table$CasesAtEnd[i+1])
}
LL.expoUtility <- function (parameters) {
alpha <- parameters[1]
param_beta <- parameters[2]
W <- parameters[3]
sigma <- parameters[4]
LL <- foreach(i=1:nrow(data), .combine = "c") %dopar% {
sv <- expoUtility(data$Bank.Offer[i], alpha = alpha, param_beta = param_beta, W = W)
cv <- combn(Set.Cases.Remaining[[i]], Round.Probability.Table[1+match(data$Round[i], Round.Probability.Table$Round),2]) %>%
apply(2,mean) %>%
sapply(FUN = predictedBankOffer, b = data$Bank.Offer.Percent[i], r = data$Round[i]) %>%
sapply(FUN = expoUtility, alpha = alpha, param_beta = param_beta, W = W)*Round.Probability.Table$Probability[data$Round[i]]
cv <- sum(cv)
delta <- combn(Set.Cases.Remaining[[i]], Round.Probability.Table[1+match(data$Round[i], Round.Probability.Table$Round), 2]) %>%
apply(2,mean) %>%
sapply(FUN = predictedBankOffer, b = data$Bank.Offer.Percent[i], r = data$Round[i]) %>%
sapply(FUN = expoUtility, alpha = alpha, param_beta = param_beta, W = W) - cv
delta <- delta^2 %>%
sum() %>%
sqrt()
if (data$Answer[i] == 0) {
z <- (cv-sv)/(delta*sigma)
} else {
z <- (sv-cv)/(delta*sigma)
}
}
LL <- sapply(LL, FUN = pnorm, mean = 0, sd = 1, log.p = TRUE)
return(LL)
}
LL.logUtility <- function (parameters) {
sigma <- parameters[1]
LL <- foreach(i=1:nrow(data), .combine = "c") %dopar% {
sv <- log(data$Bank.Offer[i])
cv <- combn(Set.Cases.Remaining[[i]], Round.Probability.Table[1+match(data$Round[i], Round.Probability.Table$Round),2]) %>%
apply(2,mean) %>%
sapply(FUN = predictedBankOffer, b = data$Bank.Offer.Percent[i], r = data$Round[i]) %>%
sapply(FUN = log)*Round.Probability.Table$Probability[data$Round[i]]
cv <- sum(cv)
delta <- combn(Set.Cases.Remaining[[i]], Round.Probability.Table[1+match(data$Round[i], Round.Probability.Table$Round), 2]) %>%
apply(2,mean) %>%
sapply(FUN = predictedBankOffer, b = data$Bank.Offer.Percent[i], r = data$Round[i]) %>%
sapply(FUN = log) - cv
delta <- delta^2 %>%
sum() %>%
sqrt()
if (data$Answer[i] == 0) {
z <- (cv-sv)/(delta*sigma)
} else {
z <- (sv-cv)/(delta*sigma)
}
}
LL <- sapply(LL, FUN = pnorm, mean = 0, sd = 1, log.p = TRUE)
return(LL)
}
ptm.1 <- proc.time()
mle.1 <- maxLik(logLik = LL.expoUtility, start = c(0.1233,0.958,82370,0.1625), method = "NM", tol = 1e-20, iterlim = 3)
Error in { : task 1 failed - "could not find function "expoUtility""
Called from: e$fun(obj, substitute(ex), parent.frame(), e$data)
I am guessing that the problem lies on the foreach and doParallel package. I am also using a Windows OS and I obtain the code from a MAC OS. Will this affect the coding?
I think the problem lies on the foreach, there is a parameter .packages that loads the package for every worker. From the help of foreach:
.packages: character vector of packages that the tasks depend on. If
ex requires a R package to be loaded, this option can be used to load
that package on each of the workers.
So, I think you need to declare the package that you are using on those functions in the foreach, like this:
LL <- foreach(i=1:nrow(data), .packages=c("name of the package you are using"),
.combine = "c") %dopar% {
"Rest of your code"
}

R Factor Analysis with factanal() for huge amount of predictors results in a system that is computationally singular

I am trying to run Factor analysis for a dataset with around 150 variables but only have around around 80 observations.
I tried the factanal() function in R and R reported error:
Error in solve.default(cv) :
system is computationally singular: reciprocal condition number = 3.0804e-20
Any suggestions on alternative methods / packages?
A demonstration on a dummy dataset would be:
# This will work (dataset with 80 obs and 15 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*15), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
# This will not (dataset with 80 obs and 150 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*150), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
So far I've replaced the solve function in the factanal() source code with a numerical solving function one that I created below, but it did not resolve the issue:
solve_G = function(M){
library(matrixcalc)
if(!is.singular.matrix(M)){
return(solve(M))
} else{
s = svd(M)
U = s$u
V = s$v
D_Inv = diag(1/s$d)
Num_Inv = V %*% D_Inv %*% t(U)
cat("Singular Matrix! SVD Used.\n")
return(Num_Inv)
}
}
And after you replace "solve" with "solve_G", a new error occurred:
Error in factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt) :
could not find function "factanal.fit.mle"
P.S. Here is the new "factanal" function named my_factanal:
The error above occurred when running the line:
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
And to run this, Set x to be a 80* 150 numerical dataframe, set factors = 2, set scores = "regression", rotation = "varimax":
my_factanal = function (x, factors, data = NULL, covmat = NULL, n.obs = NA,
subset, na.action, start = NULL, scores = c("none", "regression",
"Bartlett"), rotation = "varimax", control = NULL, ...)
{
sortLoadings <- function(Lambda) {
cn <- colnames(Lambda)
Phi <- attr(Lambda, "covariance")
ssq <- apply(Lambda, 2L, function(x) -sum(x^2))
Lambda <- Lambda[, order(ssq), drop = FALSE]
colnames(Lambda) <- cn
neg <- colSums(Lambda) < 0
Lambda[, neg] <- -Lambda[, neg]
if (!is.null(Phi)) {
unit <- ifelse(neg, -1, 1)
attr(Lambda, "covariance") <- unit %*% Phi[order(ssq),
order(ssq)] %*% unit
}
Lambda
}
cl <- match.call()
na.act <- NULL
if (is.list(covmat)) {
if (any(is.na(match(c("cov", "n.obs"), names(covmat)))))
stop("'covmat' is not a valid covariance list")
cv <- covmat$cov
n.obs <- covmat$n.obs
have.x <- FALSE
}
else if (is.matrix(covmat)) {
cv <- covmat
have.x <- FALSE
}
else if (is.null(covmat)) {
if (missing(x))
stop("neither 'x' nor 'covmat' supplied")
have.x <- TRUE
if (inherits(x, "formula")) {
mt <- terms(x, data = data)
if (attr(mt, "response") > 0)
stop("response not allowed in formula")
attr(mt, "intercept") <- 0
mf <- match.call(expand.dots = FALSE)
names(mf)[names(mf) == "x"] <- "formula"
mf$factors <- mf$covmat <- mf$scores <- mf$start <- mf$rotation <- mf$control <- mf$... <- NULL
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
na.act <- attr(mf, "na.action")
if (.check_vars_numeric(mf))
stop("factor analysis applies only to numerical variables")
z <- model.matrix(mt, mf)
}
else {
z <- as.matrix(x)
if (!is.numeric(z))
stop("factor analysis applies only to numerical variables")
if (!missing(subset))
z <- z[subset, , drop = FALSE]
}
covmat <- cov.wt(z)
cv <- covmat$cov
n.obs <- covmat$n.obs
}
else stop("'covmat' is of unknown type")
scores <- match.arg(scores)
if (scores != "none" && !have.x)
stop("requested scores without an 'x' matrix")
p <- ncol(cv)
if (p < 3)
stop("factor analysis requires at least three variables")
dof <- 0.5 * ((p - factors)^2 - p - factors)
if (dof < 0)
stop(sprintf(ngettext(factors, "%d factor is too many for %d variables",
"%d factors are too many for %d variables"), factors,
p), domain = NA)
sds <- sqrt(diag(cv))
cv <- cv/(sds %o% sds)
cn <- list(nstart = 1, trace = FALSE, lower = 0.005)
cn[names(control)] <- control
more <- list(...)[c("nstart", "trace", "lower", "opt", "rotate")]
if (length(more))
cn[names(more)] <- more
if (is.null(start)) {
start <- (1 - 0.5 * factors/p)/diag(solve_G(cv))
if ((ns <- cn$nstart) > 1)
start <- cbind(start, matrix(runif(ns - 1), p, ns -
1, byrow = TRUE))
}
start <- as.matrix(start)
if (nrow(start) != p)
stop(sprintf(ngettext(p, "'start' must have %d row",
"'start' must have %d rows"), p), domain = NA)
nc <- ncol(start)
if (nc < 1)
stop("no starting values supplied")
best <- Inf
for (i in 1L:nc) {
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
if (cn$trace)
cat("start", i, "value:", format(nfit$criteria[1L]),
"uniqs:", format(as.vector(round(nfit$uniquenesses,
4))), "\\n")
if (nfit$converged && nfit$criteria[1L] < best) {
fit <- nfit
best <- fit$criteria[1L]
}
}
if (best == Inf)
stop(ngettext(nc, "unable to optimize from this starting value",
"unable to optimize from these starting values"),
domain = NA)
load <- fit$loadings
if (rotation != "none") {
rot <- do.call(rotation, c(list(load), cn$rotate))
load <- if (is.list(rot)) {
load <- rot$loadings
fit$rotmat <- if (inherits(rot, "GPArotation"))
t(solve_G(rot$Th))
else rot$rotmat
rot$loadings
}
else rot
}
fit$loadings <- sortLoadings(load)
class(fit$loadings) <- "loadings"
fit$na.action <- na.act
if (have.x && scores != "none") {
Lambda <- fit$loadings
zz <- scale(z, TRUE, TRUE)
switch(scores, regression = {
sc <- zz %*% solve(cv, Lambda)
if (!is.null(Phi <- attr(Lambda, "covariance"))) sc <- sc %*%
Phi
}, Bartlett = {
d <- 1/fit$uniquenesses
tmp <- t(Lambda * d)
sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz)))
})
rownames(sc) <- rownames(z)
colnames(sc) <- colnames(Lambda)
if (!is.null(na.act))
sc <- napredict(na.act, sc)
fit$scores <- sc
}
if (!is.na(n.obs) && dof > 0) {
fit$STATISTIC <- (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3) *
fit$criteria["objective"]
fit$PVAL <- pchisq(fit$STATISTIC, dof, lower.tail = FALSE)
}
fit$n.obs <- n.obs
fit$call <- cl
fit
}

specClust() in kknn - arpack iteration limit increase

I am applying spectral clustering to a dataset with 4200 rows and 2 columns.
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric")
I have the below error.
n .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:944 : ARPACK error, Maximum number of iterations reached
In addition: Warning message:
In .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:776 :ARPACK solver failed to converge (1001 iterations, 0/7 eigenvectors converged)
How do i increase the iterations of arpack because this doesnt work:
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric",iter.max=301000)
Digging into the specClust, the ... does not pass anything to the arpack call.
The simplest thing to do I think is to copy the specClust code add maxiter=10000 and source the function in your script.
specCLust2 <- function (data, centers = NULL, nn = 7, method = "symmetric",
gmax = NULL, max.iter = 10000, ...)
{
call = match.call()
if (is.data.frame(data))
data = as.matrix(data)
da = apply(data, 1, paste, collapse = "#")
indUnique = which(!duplicated(da))
indAll = match(da, da[indUnique])
data2 = data
data = data[indUnique, ]
n <- nrow(data)
data = scale(data, FALSE, TRUE)
if (is.null(gmax)) {
if (!is.null(centers))
gmax = centers - 1L
else gmax = 1L
}
test = TRUE
while (test) {
DC = mydist(data, nn)
sif <- rbind(1:n, as.vector(DC[[2]]))
g <- graph(sif, directed = FALSE)
g <- decompose(g, min.vertices = 4)
if (length(g) > 1) {
if (length(g) >= gmax)
nn = nn + 2
else test = FALSE
}
else test = FALSE
}
W <- DC[[1]]
n <- nrow(data)
wi <- W[, nn]
SC <- matrix(1, nrow(W), nn)
SC[] <- wi[DC[[2]]] * wi
W = W^2/SC
alpha = 1/(2 * (nn + 1))
qua = abs(qnorm(alpha))
W = W * qua
W = dnorm(W, sd = 1)
DC[[1]] = W
L = Laplacian(DC, nn, method)
f <- function(x, extra) as.vector(extra %*% x)
if (is.null(centers))
kmax = 25
else kmax = max(centers)
###
#add the maxiter parameter to the arpack call, below
###
U <- arpack(f, extra = L, options = list(n = n, which = "SM",
nev = kmax, ncv = 2 * kmax, mode = 1, maxiter=max.iter), sym = TRUE)
ind <- order(U[[1]])
U[[2]] = U[[2]][indAll, ind]
U[[1]] = U[[1]][ind]
if (is.null(centers)) {
tmp = which.max(diff(U[[1]])) + 1
centers = which.min(AUC(U[[1]][1:tmp]))
}
if (method == "symmetric") {
rs = sqrt(rowSums(U[[2]]^2))
U[[2]] = U[[2]]/rs
}
result = kmeans(U[[2]], centers = centers, nstart = 20, ...)
archeType = getClosest(U[[2]][indAll, ], result$centers)
result$eigenvalue = U[[1]]
result$eigenvector = U[[2]]
result$data = data2
result$indAll = indAll
result$indUnique = indUnique
result$L = L
result$archetype = archeType
result$call = call
class(result) = c("specClust", "kmeans")
result
}

Modify SPDEP package - insert new function

I am trying to modify the stsls function of the R package spdep.
The function compute a spatial autoregressive function using a two stage least square. For both stages, the function uses the same spatial matrix.
What I want is to create a new function, say stslsm, which uses two different spatial matrices, one for the first stage (inlistw), one for the second stage (listw).
For this reason, I took the stsls function, I added a new entry, inlistw, and modified accordingly the script. For those who are interested, the code is at the bottom of this post, but please consider that this is just a first trial.
Now the problem is that I don't know how to insert this new function in the spdep package.
I read different posts on this issue. The most recurrent suggestion is:
unlockBinding("spdep", loadNamespace("spdep"));
assignInNamespace("stslsm", stslsm, ns=asNamespace("spdep"), envir=loadNamespace("spdep"));
assign("stslsm", stslsm, envir=env);
lockBinding(stslsm, loadNamespace("spdep"));
But after the second line of code I get the following
Error in bindingIsLocked(x, ns) : no binding for "stslsm"
I got stuck here. Do you have any suggestion?
function (formula, data = list(), listw, inlistw,zero.policy = NULL,
na.action = na.fail, robust = FALSE, HC = NULL, legacy = FALSE,
W2X = TRUE) {
if (!inherits(listw, "listw"))
stop("No neighbourhood list")
if (is.null(zero.policy))
zero.policy <- get("zeroPolicy", envir = .spdepOptions)
stopifnot(is.logical(zero.policy))
if (class(formula) != "formula")
formula <- as.formula(formula)
mt <- terms(formula, data = data)
mf <- lm(formula, data, na.action = na.action, method = "model.frame")
na.act <- attr(mf, "na.action")
if (!is.null(na.act)) {
subset <- !(1:length(listw$neighbours) %in% na.act)
listw <- subset(listw, subset, zero.policy = zero.policy)
}
y <- model.extract(mf, "response")
if (any(is.na(y)))
stop("NAs in dependent variable")
X <- model.matrix(mt, mf)
if (any(is.na(X)))
stop("NAs in independent variable")
if (robust) {
if (is.null(HC))
HC <- "HC0"
if (!any(HC %in% c("HC0", "HC1")))
stop("HC must be one of HC0, HC1")
}
Wy <- lag.listw(listw, y, zero.policy = zero.policy)
dim(Wy) <- c(nrow(X), 1)
colnames(Wy) <- c("Rho")
n <- NROW(X)
m <- NCOL(X)
xcolnames <- colnames(X)
K <- ifelse(xcolnames[1] == "(Intercept)", 2, 1)
if (m > 1) {
WX <- matrix(nrow = n, ncol = (m - (K - 1)))
if (W2X)
WWX <- matrix(nrow = n, ncol = ncol(WX))
for (k in K:m) {
wx <- lag.listw(inlistw, X[, k], zero.policy = zero.policy)
if (W2X)
wwx <- lag.listw(inlistw, wx, zero.policy = zero.policy)
if (any(is.na(wx)))
stop("NAs in lagged independent variable")
WX[, (k - (K - 1))] <- wx
if (W2X)
WWX[, (k - (K - 1))] <- wwx
}
if (W2X)
inst <- cbind(WX, WWX)
else inst <- WX
}
if (K == 2 && listw$style != "W") {
wx1 <- as.double(rep(1, n))
wx <- lag.listw(inlistw, wx1, zero.policy = zero.policy)
if (W2X)
wwx <- lag.listw(inlistw, wx, zero.policy = zero.policy)
if (m > 1) {
inst <- cbind(wx, inst)
if (W2X)
inst <- cbind(wwx, inst)
}
else {
inst <- matrix(wx, nrow = n, ncol = 1)
if (W2X)
inst <- cbind(inst, wwx)
}
}
result <- tsls(y = y, yend = Wy, X = X, Zinst = inst, robust = robust,
HC = HC, legacy = legacy)
result$zero.policy <- zero.policy
result$robust <- robust
if (robust)
result$HC <- HC
result$legacy <- legacy
result$listw_style <- listw$style
result$call <- match.call()
class(result) <- "stsls"
result
}

Saving huge model object to file

Say you have a model object of class 'varrest' returned from a VAR() regression operation.
I want to save the model to a file, but not all data which was used to estimate the coefficients.
How can one just save the model specification wihtout the training data?
Because when I save the model it has a file size of over 1GB and therefore loading does take its time.
Can one save objects without some attributes?
The predict.varest function starts out with this code:
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
You can then investigate how much pruning you might achieve:
data(Canada)
tcan <-
VAR(Canada, p = 2, type = "trend")
names(tcan)
# [1] "varresult" "datamat" "y" "type" "p"
# [6] "K" "obs" "totobs" "restrictions" "call"
object.size(tcan[c("K","p", "obs", "type", "datamat", "y")] )
#15080 bytes
object.size(tcan)
#252032 bytes
So the difference is substantial, but just saving those items is not sufficient because the next line in predict.varest is:
B <- Bcoef(object)
You will need to add that object to the list above and then construct a new predict-function that accepts something less than the large 'varresult' node of the model object. Also turned out that there was a downstream call to an internal function that needs to be stored. (You will need to decide in advance what interval you need for prediction.)
tsmall <- c( tcan[c("K","p", "obs", "type", "datamat", "y", "call")] )
tsmall[["Bco"]] <- Bcoef(tcan)
tsmall$sig.y <- vars:::.fecov(x = tcan, n.ahead = 10)
And the modified predict function will be:
sm.predict <- function (object, ..., n.ahead = 10, ci = 0.95, dumvar = NULL)
{
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
n.ahead <- as.integer(n.ahead)
Z <- object$datamat[, -c(1:K)]
# This used to be a call to Bcoef(object)
B <- object$Bco
if (type == "const") {
Zdet <- matrix(rep(1, n.ahead), nrow = n.ahead, ncol = 1)
colnames(Zdet) <- "const"
}
else if (type == "trend") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(seq(trdstart, length = n.ahead), nrow = n.ahead,
ncol = 1)
colnames(Zdet) <- "trend"
}
else if (type == "both") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(c(rep(1, n.ahead), seq(trdstart, length = n.ahead)),
nrow = n.ahead, ncol = 2)
colnames(Zdet) <- c("const", "trend")
}
else if (type == "none") {
Zdet <- NULL
}
if (!is.null(eval(object$call$season))) {
season <- eval(object$call$season)
seas.names <- paste("sd", 1:(season - 1), sep = "")
cycle <- tail(data.all[, seas.names], season)
seasonal <- as.matrix(cycle, nrow = season, ncol = season -
1)
if (nrow(seasonal) >= n.ahead) {
seasonal <- as.matrix(cycle[1:n.ahead, ], nrow = n.ahead,
ncol = season - 1)
}
else {
while (nrow(seasonal) < n.ahead) {
seasonal <- rbind(seasonal, cycle)
}
seasonal <- seasonal[1:n.ahead, ]
}
rownames(seasonal) <- seq(nrow(data.all) + 1, length = n.ahead)
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, seasonal))
}
else {
Zdet <- as.matrix(seasonal)
}
}
if (!is.null(eval(object$call$exogen))) {
if (is.null(dumvar)) {
stop("\nNo matrix for dumvar supplied, but object varest contains exogenous variables.\n")
}
if (!all(colnames(dumvar) %in% colnames(data.all))) {
stop("\nColumn names of dumvar do not coincide with exogen.\n")
}
if (!identical(nrow(dumvar), n.ahead)) {
stop("\nRow number of dumvar is unequal to n.ahead.\n")
}
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, dumvar))
}
else {
Zdet <- as.matrix(dumvar)
}
}
Zy <- as.matrix(object$datamat[, 1:(K * (p + 1))])
yse <- matrix(NA, nrow = n.ahead, ncol = K)
# This used to be a call to vars:::.fecov
sig.y <- object$sig.y
for (i in 1:n.ahead) {
yse[i, ] <- sqrt(diag(sig.y[, , i]))
}
yse <- -1 * qnorm((1 - ci)/2) * yse
colnames(yse) <- paste(ci, "of", ynames)
forecast <- matrix(NA, ncol = K, nrow = n.ahead)
lasty <- c(Zy[nrow(Zy), ])
for (i in 1:n.ahead) {
lasty <- lasty[1:(K * p)]; print(lasty); print(B)
Z <- c(lasty, Zdet[i, ]) ;print(Z)
forecast[i, ] <- B %*% Z
temp <- forecast[i, ]
lasty <- c(temp, lasty)
}
colnames(forecast) <- paste(ynames, ".fcst", sep = "")
lower <- forecast - yse
colnames(lower) <- paste(ynames, ".lower", sep = "")
upper <- forecast + yse
colnames(upper) <- paste(ynames, ".upper", sep = "")
forecasts <- list()
for (i in 1:K) {
forecasts[[i]] <- cbind(forecast[, i], lower[, i], upper[,
i], yse[, i])
colnames(forecasts[[i]]) <- c("fcst", "lower", "upper",
"CI")
}
names(forecasts) <- ynames
result <- list(fcst = forecasts, endog = object$y, model = object,
exo.fcst = dumvar)
class(result) <- "varprd"
return(result)
}
Either
set the attributes you do not want to NULL, or
copy the parts you want to a new object, or
call the save() function with proper indexing.

Resources