How to manage 3D arrays in agent-based models with R? - r

I am building an agent-based model with R but I have memory issues by trying to work with large 3D arrays. In the 3D arrays, the first dimension corresponds to the time (from 1 to 3650 days), the second dimension defines the properties of individuals or landscape cells and the third dimension represents each individual or landscape cell. At each time step (1 day), each 3D array is filled using several functions. Ideally, I would like to run my ABM over large landscapes (for example, 90000 cells) containing a large number of individuals (for example, 720000). Actually, this is impossible because of memory issues.
Currently, the 3D arrays are defined at initialization so that data are stored in the array at each time step. However, to fill one 3D array at t from the model, I need to only keep data at t – 1 and t – tf – 1, where tf is a duration parameter that is fixed (e.g., tf = 320 days). Here an example of one model function that is used to fill one 3D array (there are 3 duration parameters):
library(ff)
library(magrittr)
library(dplyr)
library(tidyr)
library(gtools)
## Define parameters
tf1 <- 288
tf2 <- 292
tf3 <- 150
## Define 3D array
col_array <- c(letters[seq( from = 1, to = 9 )])
s_array <- ff(-999, dim=c(3650, 9, 2500), dimnames=list(NULL, col_array, as.character(seq(1, 2500, 1))),
filename="s_array.ffd", vmode="double", overwrite = t) ## 3th dimension = patch ID
## Define initial array
initial_s_array <- matrix(sample.int(100, size = 2500*9, replace = TRUE), nrow = 2500, ncol = 9, dimnames=list(NULL, col_array))
## Loop over time
line <- 1
for(t in 1:3650){
print(t)
s_array[t,c("a", "b", "c", "d", "e", "f", "g", "h", "i"),] <- func(v_t_1 = ifelse((t - 1) >= 1, list(s_array[(t - 1),,]), NA)[[1]],
v_t_tf1_1 = ifelse((t - tf1 - 1) >= 1, list(s_array[(t - tf1 - 1),,]), NA)[[1]],
v_t_tf2_1 = ifelse((t - tf2 - 1) >= 1, list(s_array[(t - tf2 - 1),,]), NA)[[1]],
v_t_tf3_1 = ifelse((t - tf3 - 1) >= 1, list(s_array[(t - tf3 - 1),,]), NA)[[1]],
v_t_0 = initial_s_array, columns_names = col_array)
line <- line + 1
}
func <- function(v_t_1, v_t_tf1_1, v_t_tf2_1, v_t_tf3_1, v_t_0, columns_names){
## Data at t-1
dt_t_1 <- (ifelse(!(all(is.na(v_t_1))),
list(v_t_1 %>%
as.data.frame.table(stringsAsFactors = FALSE) %>%
dplyr::mutate_all(as.character)), NA))[[1]]
## Data at t-tf1-1
dt_t_tf1_1 <- (ifelse(!(all(is.na(v_t_tf1_1))),
list(v_t_tf1_1 %>%
as.data.frame.table(stringsAsFactors = FALSE) %>%
dplyr::mutate_all(as.character)), NA))[[1]]
## Data at t-tf2-1
dt_t_tf2_1 <- (ifelse(!(all(is.na(v_t_tf2_1))),
list(v_t_tf2_1 %>%
as.data.frame.table(stringsAsFactors = FALSE) %>%
dplyr::mutate_all(as.character)), NA))[[1]]
## Data at t-tf3-1
dt_t_tf3_1 <- (ifelse(!(all(is.na(v_t_tf3_1))),
list(v_t_tf3_1 %>%
as.data.frame.table(stringsAsFactors = FALSE) %>%
dplyr::mutate_all(as.character)), NA))[[1]]
## Format data at t-1
dt_t_1_reshape <- (ifelse(!(all(is.na(dt_t_1))),
list(dt_t_1 %>%
dplyr::rename(ID = Var2) %>%
tidyr::spread(Var1, Freq) %>%
dplyr::select(ID, columns_names) %>%
dplyr::arrange(match(ID, mixedsort(colnames(v_t_1))))), NA))[[1]]
## Format data at t-tf1-1
dt_t_tf1_1_reshape <- (ifelse(!(all(is.na(dt_t_tf1_1))),
list(dt_t_tf1_1 %>%
dplyr::rename(ID = Var2) %>%
tidyr::spread(Var1, Freq) %>%
dplyr::select(ID, columns_names) %>%
dplyr::arrange(match(ID, mixedsort(colnames(v_t_tf1_1))))), NA))[[1]]
## Format data at t-tf2-1
dt_t_tf2_1_reshape <- (ifelse(!(all(is.na(dt_t_tf2_1))),
list(dt_t_tf2_1 %>%
dplyr::rename(ID = Var2) %>%
tidyr::spread(Var1, Freq) %>%
dplyr::select(ID, columns_names) %>%
dplyr::arrange(match(ID, mixedsort(colnames(v_t_tf2_1))))), NA))[[1]]
## Format data at t-tf3-1
dt_t_tf3_1_reshape <- (ifelse(!(all(is.na(dt_t_tf3_1))),
list(dt_t_tf3_1 %>%
dplyr::rename(ID = Var2) %>%
tidyr::spread(Var1, Freq) %>%
dplyr::select(ID, columns_names) %>%
dplyr::arrange(match(ID, mixedsort(colnames(v_t_tf3_1))))), NA))[[1]]
## Retrieve data
a_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("a")])), list(v_t_0[,c("a")])))[[1]]
d_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("d")])), list(v_t_0[,c("d")])))[[1]]
g_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("f")])), list(v_t_0[,c("f")])))[[1]]
a_t_tf1_1 <- (ifelse(!(all(is.na(dt_t_tf1_1_reshape))), list(as.numeric(dt_t_tf1_1_reshape[,c("a")])), 0))[[1]]
d_t_tf2_1 <- (ifelse(!(all(is.na(dt_t_tf2_1_reshape))), list(as.numeric(dt_t_tf2_1_reshape[,c("d")])), 0))[[1]]
g_t_tf3_1 <- (ifelse(!(all(is.na(dt_t_tf3_1_reshape))), list(as.numeric(dt_t_tf3_1_reshape[,c("f")])), 0))[[1]]
b_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("b")])), list(v_t_0[,c("b")])))[[1]]
e_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("e")])), list(v_t_0[,c("e")])))[[1]]
h_t_1 <- (ifelse((!(all(is.na(dt_t_1_reshape)))), list(as.numeric(dt_t_1_reshape[,c("h")])), list(v_t_0[,c("h")])))[[1]]
b_t_tf1_1 <- (ifelse(!(all(is.na(dt_t_tf1_1_reshape))), list(as.numeric(dt_t_tf1_1_reshape[,c("b")])), 0))[[1]]
e_t_tf2_1 <- (ifelse(!(all(is.na(dt_t_tf2_1_reshape))), list(as.numeric(dt_t_tf2_1_reshape[,c("e")])), 0))[[1]]
h_t_tf3_1 <- (ifelse(!(all(is.na(dt_t_tf3_1_reshape))), list(as.numeric(dt_t_tf3_1_reshape[,c("h")])), 0))[[1]]
## Define discrete equations
a_t <- round(0.4*a_t_1 + 0.5*a_t_tf1_1)
b_t <- round(0.5*b_t_1 + 0.6*b_t_tf1_1)
c_t <- a_t + b_t
d_t <- round(0.7*d_t_1 + 0.7*d_t_tf2_1)
e_t <- round(0.9*e_t_1 + 0.4*e_t_tf2_1)
f_t <- d_t + e_t
g_t <- round(0.3*g_t_1 + 0.2*g_t_tf3_1)
h_t <- round(0.5*h_t_1 + 0.1*h_t_tf3_1)
i_t <- g_t + h_t
## Update the values
dt_array <- as.matrix(cbind(a_t, b_t, c_t, d_t, e_t, f_t, g_t, h_t, i_t))
## print(dt_array)
## Build the output matrix
dt_array <- t(dt_array)
return(dt_array)
}
The function “func” takes as argument data at t – 1 and t – tf – 1 providing one 3D array “s_array”. The function returns a data frame that is used to fill the 3D array.
I think that I could reduce the first dimension of my arrays by keeping only data at t – 1 and t – tf – 1 (rather than keep data at each time step from 1 to 3650 days). However, I don’t know how to manage these new 3D arrays in the ABM at each time step (i.e., how to initialize the 3D arrays and to only store data at t – 1 and t – tf – 1)?
Edit:
I have tested the example with 90000 observations for the 3rd dimension. The number of rows (i.e., 3650) in each array is too large.
> s_array <- ff(-999, dim=c(3650, 9, 90000), dimnames=list(NULL, col_array, as.character(seq(1, 90000, 1))),
+ filename="s_array.ffd", vmode="double", overwrite = TRUE)
Error in if (length < 0 || length > .Machine$integer.max) stop("length must be between 1 and .Machine$integer.max") :
missing value where TRUE/FALSE needed
In addition: Warning message:
In ff(-999, dim = c(3650, 9, 90000), dimnames = list(NULL, col_array, :
NAs introduced by coercion to integer range
Is there a way to reduce the number of rows and apply the function that is used to fill the arrays ?

The reason I said that R probably isn't ideal is because of its copy-on-modify semantics,
so every time you change something in any array/matrix/data frame,
a copy must be made.
I think R can do some clever things with its memory management,
but still.
Using ff to avoid having all time slices in RAM at the same time is probably indeed advantageous,
but your code is probably changing storage formats far too often,
juggling data structures back and forth.
I think I packed the logic in a couple R6 class
(along with some improvements),
maybe it can get you started with improving your code's memory usage:
suppressPackageStartupMessages({
library(R6)
library(ff)
})
SArray <- R6::R6Class(
"SArray",
public = list(
time_slices = NULL,
initialize = function(sdim, sdimnames) {
self$time_slices <- lapply(1L:sdim[1L], function(ignored) {
ff(NA_real_, vmode="double", dim=sdim[-1L], dimnames=sdimnames[-1L])#, FF_RETURN=FALSE)
})
names(self$time_slices) <- sdimnames[[1L]]
}
)
)
`[.SArray` <- function(s_array, i, j, ...) {
s_array$time_slices[[i]][j,]
}
`[<-.SArray` <- function(s_array, i, j, ..., value) {
s_array$time_slices[[i]][j,] <- value
s_array
}
dim.SArray <- function(x) {
c(length(x$time_slices), dim(x$time_slices[[1L]]))
}
ABM <- R6::R6Class(
"ABM",
public = list(
s_array = NULL,
tf1 = NULL,
tf2 = NULL,
tf3 = NULL,
initialize = function(sdim, sdimnames, tfs) {
self$tf1 <- tfs[1L]
self$tf2 <- tfs[2L]
self$tf3 <- tfs[3L]
self$s_array <- SArray$new(sdim, sdimnames)
},
init_abm = function(seed = NULL) {
set.seed(seed)
sdim <- dim(self$s_array)
s_init <- matrix(sample.int(100L, size = 6L * sdim[3L], replace=TRUE),
nrow=6L, ncol=sdim[3L],
dimnames=list(c("a", "b", "d", "e", "g", "h"),
as.character(1:sdim[3L])))
self$a(1L, s_init["a", ])
self$b(1L, s_init["b", ])
self$c(1L)
self$d(1L, s_init["d", ])
self$e(1L, s_init["e", ])
self$f(1L)
self$g(1L, s_init["g", ])
self$h(1L, s_init["h", ])
self$i(1L)
private$t <- 1L
invisible()
},
can_advance = function() {
private$t < dim(self$s_array)[1L]
},
advance = function(verbose = FALSE) {
t <- private$t + 1L
if (verbose) print(t)
self$a(t)
self$b(t)
self$c(t)
self$d(t)
self$e(t)
self$f(t)
self$g(t)
self$h(t)
self$i(t)
private$t <- t
invisible()
},
# get time slice at t - tf - 1 for given letter
s_tf = function(t, tf, letter) {
t_tf_1 <- t - tf - 1L
if (t_tf_1 > 0L)
self$s_array[t_tf_1, letter, ]
else
0
},
# discrete equations
a = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "a", ]
t_tf_1 <- self$s_tf(t, self$tf1, "a")
}
self$s_array[t, "a", ] <- round(0.4 * t_1 + 0.5 * t_tf_1)
invisible()
},
b = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "b", ]
t_tf_1 <- self$s_tf(t, self$tf1, "b")
}
self$s_array[t, "b", ] <- round(0.5 * t_1 + 0.6 * t_tf_1)
invisible()
},
c = function(t) {
if (t < 1L) stop("t must be positive")
a_t <- self$s_array[t, "a", ]
b_t <- self$s_array[t, "b", ]
self$s_array[t, "c", ] <- a_t + b_t
invisible()
},
d = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "d", ]
t_tf_1 <- self$s_tf(t, self$tf2, "d")
}
self$s_array[t, "d", ] <- round(0.7 * t_1 + 0.7 * t_tf_1)
invisible()
},
e = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "e", ]
t_tf_1 <- self$s_tf(t, self$tf2, "e")
}
self$s_array[t, "e", ] <- round(0.9 * t_1 + 0.4 * t_tf_1)
invisible()
},
f = function(t) {
if (t < 1L) stop("t must be positive")
d_t <- self$s_array[t, "d", ]
e_t <- self$s_array[t, "e", ]
self$s_array[t, "f", ] <- d_t + e_t
invisible()
},
g = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "g", ]
t_tf_1 <- self$s_tf(t, self$tf3, "g")
}
self$s_array[t, "g", ] <- round(0.3 * t_1 + 0.2 * t_tf_1)
invisible()
},
h = function(t, t_0) {
if (t < 2L) {
t <- 1L
t_1 <- t_0
t_tf_1 <- 0
}
else {
t_1 <- self$s_array[t - 1L, "h", ]
t_tf_1 <- self$s_tf(t, self$tf3, "h")
}
self$s_array[t, "h", ] <- round(0.5 * t_1 + 0.1 * t_tf_1)
invisible()
},
i = function(t) {
if (t < 1L) stop("t must be positive")
g_t <- self$s_array[t, "g", ]
h_t <- self$s_array[t, "h", ]
self$s_array[t, "i", ] <- g_t + h_t
invisible()
}
),
private = list(
t = NULL
)
)
max_t <- 10
abm <- ABM$new(c(max_t, 9, 2500),
list(NULL, letters[1:9], as.character(1:2500)),
c(288L, 292L, 150L))
abm$init_abm()
while (abm$can_advance()) {
abm$advance(TRUE)
}
anyNA(abm$s_array[])
# FALSE
Some of the functions under discrete equations encapsulate the logic for initialization when t < 2L.
The SArray class separates the 3D array into a list of 2D arrays to work around the .Machine$integer.max limit.

Related

Incorrect number of probabilities dimsensions when optimizing a multi-season elo model

My data frame looks like this:
and the code to compute the initial/optimized elo
# Elo Rating System
library(eurolig)
library(tidyverse)
library(lubridate)
plldf2 <- read_csv('pll_elor.csv',show_col_types = FALSE)
plldf3 <- plldf2[(plldf2$season == 2019),]
# Helpers -----------------------------------------------------------------
# Expected win probability before a game
getExpectedProb <- function(r_team, r_opp, home_adv, s) {
1 / (1 + 10 ^ ((r_opp - r_team - home_adv) / s))
}
# Get Elo rating for next season
getCarryOver <- function(rating, c) {
c * rating + 1505 * (1 - c)
}
# Get margin of victory multiplier
getMovMultiplier <- function(points_diff, elo_diff) {
((points_diff + 3) ^ 0.8) / (7.5 + 0.006 * elo_diff)
}
getEloSummary <- function(df) {
df %>%
pivot_longer(
cols = ends_with("_new"),
names_to = "type",
values_to = "elo"
) %>%
select(
season,
date,
team,
opp,
type,
elo
) %>%
mutate(
team = ifelse(type == "elo_home_new", team, opp),
order = rank(date),
team_id = paste0(team, "-", season)
) %>%
select(season, date, team, elo, order, team_id)
}
# Algorithm ---------------------------------------------------------------
# For a single season
getSeasonElo <- function(df, k, home_adv, s, initial_elo) {
team_ratings <- initial_elo
# Data frame to store the subsequent values obtained by the algorithm
ratings_df <- df %>%
mutate(
home_adv = NA,
win_points_home = NA,
win_points_away = NA,
expected_prob_home = NA,
expected_prob_away = NA,
mov_home = NA,
mov_away = NA,
elo_home_prev = NA,
elo_away_prev = NA,
elo_home_new = NA,
elo_away_new = NA,
prob_pred = NA
)
for (i in 1:nrow(df)) {
team_home <- df$team[i]
team_away <- df$opp[i]
elo_home <- team_ratings[[team_home]]
elo_away <- team_ratings[[team_away]]
# Home advantage set to 0 for Final 4 games
h <- ifelse(df$phase[i] == "ff", 0, home_adv)
# Assign 1 for wins and 0 for losses
win_points_home <- ifelse(
df$score[i] > df$opp_score[i],
1,
0
)
win_points_away <- ifelse(win_points_home == 0, 1, 0)
# Find pre-game win probabilities
expected_prob_home <- getExpectedProb(
r_team = elo_home,
r_opp = elo_away,
home_adv = h,
s = s)
expected_prob_away <- 1 - expected_prob_home
# Margin of victory multiplier
points_diff_abs <- abs(df$score[i] - df$opp_score[i])
elo_diff_home <- elo_home + h - elo_away
elo_diff_away <- elo_away - elo_home - h
mov_home <- getMovMultiplier(points_diff_abs, elo_diff_home)
mov_away <- getMovMultiplier(points_diff_abs, elo_diff_away)
# Update Elo ratings
elo_home_new <- elo_home + k * (win_points_home - expected_prob_home) * mov_home
elo_away_new <- elo_away + k * (win_points_away - expected_prob_away) * mov_away
team_ratings[[team_home]] <- elo_home_new
team_ratings[[team_away]] <- elo_away_new
prob_pred <- sample(
x = c(team_home, team_away),
size = length(list(expected_prob_home,expected_prob_away)),
prob = c(expected_prob_home, expected_prob_away) #ISSUE HERE
)
ratings_df$home_adv[i] <- h
ratings_df$win_points_home[i] <- win_points_home
ratings_df$win_points_away[i] <- win_points_away
ratings_df$expected_prob_home[i] <- expected_prob_home
ratings_df$expected_prob_away[i] <- expected_prob_away
ratings_df$mov_home[i] <- mov_home
ratings_df$mov_away[i] <- mov_away
ratings_df$elo_home_prev[i] <- elo_home
ratings_df$elo_away_prev[i] <- elo_away
ratings_df$elo_home_new[i] <- elo_home_new
ratings_df$elo_away_new[i] <- elo_away_new
ratings_df$prob_pred[i] <- prob_pred
}
ratings_df <- ratings_df %>%
mutate(
winner = ifelse(score > opp_score, team, opp),
winner_pred = ifelse(elo_home_prev + home_adv >= elo_away_prev,
team, opp),
correct_pred = ifelse(winner == winner_pred, TRUE, FALSE)
)
list(ratings_df = ratings_df, team_elo = team_ratings)
}
# Algorithm for several seasons
getElo <- function(df, k, home_adv, s, carry) {
df <- arrange(df, season)
season_results <- split(df, df$season)
# Start with first season
teams <- sort(unique(season_results[[1]]$team))
# Since it is the first season overall, all teams start with 1300 Elo points
initial_ratings <- as.list(rep(1300, length(teams)))
names(initial_ratings) <- teams
first_season_ratings <- getSeasonElo(
season_results[[1]],
k = k,
home_adv = home_adv,
s = s,
initial_elo = initial_ratings
)
# TODO: Use the last recorded Elo rating, not last season
elo_final <- tibble(
season = unique(season_results[[1]]$season),
team = names(first_season_ratings$team_elo),
elo = unlist(first_season_ratings$team_elo)
)
season_ratings <- vector("list", length(season_results))
season_ratings[[1]] <- first_season_ratings
for (i in 2:length(season_ratings)) {
teams <- sort(unique(season_results[[i]]$team))
teams_new <- teams[!teams %in% elo_final$team]
teams_new_elo <- as.list(rep(1300, length(teams_new)))
names(teams_new_elo) <- teams_new
teams_old <- teams[teams %in% elo_final$team]
teams_old_elo <- vector("list", length(teams_old))
names(teams_old_elo) <- teams_old
for (j in seq_along(teams_old)) {
elo_team <- elo_final %>%
filter(team == teams_old[j])
teams_old_elo[[j]] <- elo_team$elo[which.max(elo_team$season)]
}
teams_old_elo <- lapply(teams_old_elo, getCarryOver, c = carry)
initial_elo <- c(teams_new_elo, teams_old_elo)
season_ratings[[i]] <- getSeasonElo(
season_results[[i]],
k = k,
home_adv = home_adv,
s = s,
initial_elo = initial_elo
)
elo_final_season <- tibble(
season = unique(season_results[[i]]$season),
team = names(season_ratings[[i]]$team_elo),
elo = unlist(season_ratings[[i]]$team_elo)
)
elo_final <- bind_rows(elo_final, elo_final_season)
}
output_df <- map_df(season_ratings, function(x) x$ratings_df)
output_df
}
# Tunning -----------------------------------------------------------------
# Grid optimization
k <- seq(10, 50, by = 5)
h <- seq(0, 150, by = 25)
c <- seq(0.5, 1, by = 0.1)
grid_df <- expand_grid(k, h, c)
checkAccuracy <- function(df, k, h, c) {
df <- getElo(df, k, h, s = 400, c)
sum(df$correct_pred) / nrow(df)
}
n <- nrow(grid_df)
accuracy <- numeric(n)
for (i in 1:n) {
acc <- checkAccuracy(
df = results,
k = grid_df$k[i],
h = grid_df$h[i],
c = grid_df$c[i]
)
accuracy[i] <- acc
}
acc_df <- cbind(grid_df, accuracy) %>%
as_tibble() %>%
arrange(desc(accuracy))
# Ratings -----------------------------------------------------------------
elo_df <- getElo(plldf2, k = 25, home_adv = 100, s = 400, carry = 0.8)
elo_summary <- getEloSummary(elo_df) %>%
left_join(teaminfo, by = c("team" = "team", "season"))
tester <- getSeasonElo(
plldf3,
k = 25,
home_adv = 100,
s = 400,
initial_elo = initial_ratings)
getEloSummary(tester)
elo_summary %>%
ggplot(aes(order, elo, group = team_id)) +
geom_line()
but when I attempt to run the 'getElo' function to optimize the model it says there is a incorrect number of probabilities and I get the error below. However when I subset my data and run it for a single season using the 'getSeasonElo' function it computes it without issue. I assumed the problem was coming from the 'size' parameter being set to 1 by default, which I've fixed to account for the multi-season calculation, but the incorrect number is still happening? Not sure what I missed.
prob_pred <- sample(
x = c(team_home, team_away),
size = length(list(expected_prob_home,expected_prob_away)),
prob = c(expected_prob_home, expected_prob_away) #ISSUE HERE
)

R Factor Analysis with factanal() for huge amount of predictors results in a system that is computationally singular

I am trying to run Factor analysis for a dataset with around 150 variables but only have around around 80 observations.
I tried the factanal() function in R and R reported error:
Error in solve.default(cv) :
system is computationally singular: reciprocal condition number = 3.0804e-20
Any suggestions on alternative methods / packages?
A demonstration on a dummy dataset would be:
# This will work (dataset with 80 obs and 15 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*15), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
# This will not (dataset with 80 obs and 150 predictors)
set.seed(1234)
fake_df = as.data.frame(matrix(rnorm(80*150), nrow = 80))
factanal(fake_df, factors = 2, rotation = "varimax")
So far I've replaced the solve function in the factanal() source code with a numerical solving function one that I created below, but it did not resolve the issue:
solve_G = function(M){
library(matrixcalc)
if(!is.singular.matrix(M)){
return(solve(M))
} else{
s = svd(M)
U = s$u
V = s$v
D_Inv = diag(1/s$d)
Num_Inv = V %*% D_Inv %*% t(U)
cat("Singular Matrix! SVD Used.\n")
return(Num_Inv)
}
}
And after you replace "solve" with "solve_G", a new error occurred:
Error in factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt) :
could not find function "factanal.fit.mle"
P.S. Here is the new "factanal" function named my_factanal:
The error above occurred when running the line:
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
And to run this, Set x to be a 80* 150 numerical dataframe, set factors = 2, set scores = "regression", rotation = "varimax":
my_factanal = function (x, factors, data = NULL, covmat = NULL, n.obs = NA,
subset, na.action, start = NULL, scores = c("none", "regression",
"Bartlett"), rotation = "varimax", control = NULL, ...)
{
sortLoadings <- function(Lambda) {
cn <- colnames(Lambda)
Phi <- attr(Lambda, "covariance")
ssq <- apply(Lambda, 2L, function(x) -sum(x^2))
Lambda <- Lambda[, order(ssq), drop = FALSE]
colnames(Lambda) <- cn
neg <- colSums(Lambda) < 0
Lambda[, neg] <- -Lambda[, neg]
if (!is.null(Phi)) {
unit <- ifelse(neg, -1, 1)
attr(Lambda, "covariance") <- unit %*% Phi[order(ssq),
order(ssq)] %*% unit
}
Lambda
}
cl <- match.call()
na.act <- NULL
if (is.list(covmat)) {
if (any(is.na(match(c("cov", "n.obs"), names(covmat)))))
stop("'covmat' is not a valid covariance list")
cv <- covmat$cov
n.obs <- covmat$n.obs
have.x <- FALSE
}
else if (is.matrix(covmat)) {
cv <- covmat
have.x <- FALSE
}
else if (is.null(covmat)) {
if (missing(x))
stop("neither 'x' nor 'covmat' supplied")
have.x <- TRUE
if (inherits(x, "formula")) {
mt <- terms(x, data = data)
if (attr(mt, "response") > 0)
stop("response not allowed in formula")
attr(mt, "intercept") <- 0
mf <- match.call(expand.dots = FALSE)
names(mf)[names(mf) == "x"] <- "formula"
mf$factors <- mf$covmat <- mf$scores <- mf$start <- mf$rotation <- mf$control <- mf$... <- NULL
mf[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(mf)
na.act <- attr(mf, "na.action")
if (.check_vars_numeric(mf))
stop("factor analysis applies only to numerical variables")
z <- model.matrix(mt, mf)
}
else {
z <- as.matrix(x)
if (!is.numeric(z))
stop("factor analysis applies only to numerical variables")
if (!missing(subset))
z <- z[subset, , drop = FALSE]
}
covmat <- cov.wt(z)
cv <- covmat$cov
n.obs <- covmat$n.obs
}
else stop("'covmat' is of unknown type")
scores <- match.arg(scores)
if (scores != "none" && !have.x)
stop("requested scores without an 'x' matrix")
p <- ncol(cv)
if (p < 3)
stop("factor analysis requires at least three variables")
dof <- 0.5 * ((p - factors)^2 - p - factors)
if (dof < 0)
stop(sprintf(ngettext(factors, "%d factor is too many for %d variables",
"%d factors are too many for %d variables"), factors,
p), domain = NA)
sds <- sqrt(diag(cv))
cv <- cv/(sds %o% sds)
cn <- list(nstart = 1, trace = FALSE, lower = 0.005)
cn[names(control)] <- control
more <- list(...)[c("nstart", "trace", "lower", "opt", "rotate")]
if (length(more))
cn[names(more)] <- more
if (is.null(start)) {
start <- (1 - 0.5 * factors/p)/diag(solve_G(cv))
if ((ns <- cn$nstart) > 1)
start <- cbind(start, matrix(runif(ns - 1), p, ns -
1, byrow = TRUE))
}
start <- as.matrix(start)
if (nrow(start) != p)
stop(sprintf(ngettext(p, "'start' must have %d row",
"'start' must have %d rows"), p), domain = NA)
nc <- ncol(start)
if (nc < 1)
stop("no starting values supplied")
best <- Inf
for (i in 1L:nc) {
nfit <- factanal.fit.mle(cv, factors, start[, i], max(cn$lower, 0), cn$opt)
if (cn$trace)
cat("start", i, "value:", format(nfit$criteria[1L]),
"uniqs:", format(as.vector(round(nfit$uniquenesses,
4))), "\\n")
if (nfit$converged && nfit$criteria[1L] < best) {
fit <- nfit
best <- fit$criteria[1L]
}
}
if (best == Inf)
stop(ngettext(nc, "unable to optimize from this starting value",
"unable to optimize from these starting values"),
domain = NA)
load <- fit$loadings
if (rotation != "none") {
rot <- do.call(rotation, c(list(load), cn$rotate))
load <- if (is.list(rot)) {
load <- rot$loadings
fit$rotmat <- if (inherits(rot, "GPArotation"))
t(solve_G(rot$Th))
else rot$rotmat
rot$loadings
}
else rot
}
fit$loadings <- sortLoadings(load)
class(fit$loadings) <- "loadings"
fit$na.action <- na.act
if (have.x && scores != "none") {
Lambda <- fit$loadings
zz <- scale(z, TRUE, TRUE)
switch(scores, regression = {
sc <- zz %*% solve(cv, Lambda)
if (!is.null(Phi <- attr(Lambda, "covariance"))) sc <- sc %*%
Phi
}, Bartlett = {
d <- 1/fit$uniquenesses
tmp <- t(Lambda * d)
sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz)))
})
rownames(sc) <- rownames(z)
colnames(sc) <- colnames(Lambda)
if (!is.null(na.act))
sc <- napredict(na.act, sc)
fit$scores <- sc
}
if (!is.na(n.obs) && dof > 0) {
fit$STATISTIC <- (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3) *
fit$criteria["objective"]
fit$PVAL <- pchisq(fit$STATISTIC, dof, lower.tail = FALSE)
}
fit$n.obs <- n.obs
fit$call <- cl
fit
}

How to remove extra points on graph

I have written the following R function:
#initialprob <- c(0.4,0.5,0.1)
f1 <- function(n,m,priceinitial,delta,mean, sd, ninterval){
initialprob <- c(1/3, 1/3, 1/3)
traders <- vector(mode="character", length=n)
traderscurrent <- vector(mode="character", length=n)
price <- vector(mode="numeric")
pricecurrent <- vector(mode="numeric")
for(nint in 1:ninterval)
{
print(initialprob)
L = floor(rnorm(1,mean,sd))
print(L)
for(i in 1:n)
{
traders[i] = sample(c("B", "S", "N"), size=1, prob=initialprob)
}
print(table(traders))
for(step in 1:L)
{
for(i in 1:n)
{
b <- sample(traders[-i], m)
#print(b)
#table(b)
traderscurrent[i] <- sample(b,1)
}
print(table(traderscurrent))
buy = length(which(traderscurrent == "B"))
sell = length(which(traderscurrent == "S"))
neutral = length(which(traderscurrent == "N"))
total = buy+neutral+sell
buyprop = buy/total
sellprop = sell/total
neutralprop = neutral/total
pricecurrent[step] = priceinitial+buy*delta-sell*delta
priceinitial = pricecurrent[step]
traders <- traderscurrent
#print(nint)
#print
initialprob <- c(sellprop,buyprop,neutralprop)
print(initialprob)
}
a <- runif(1,0,1)
b <- runif(1,0,1)
c <- runif(1,0,1)
total = a+b+c
initialprob <- c(a/total, b/total, c/total)
print(initialprob)
price <- append(price,pricecurrent, after=length(price))
#price <- price[-step]
plot(price)
}
print(price)
#plot(price)
}
When I call f1(1000,100,100,10,10,1,100) I get a graph that looks like this:
How would I fix this? It seems that there is a problem with appending the prices with previous prices. Maybe the last price of the previous iteration is being added twice?
The problem seems to be that the pricecurrent vector is not being reset after each iteration. Try adding pricecurrent <- vector(mode="numeric") just after the first for loop declaration:
for(nint in 1:ninterval)
{
pricecurrent <- vector(mode="numeric") ## <- added
print(initialprob)
L = floor(rnorm(1,mean,sd))
print(L)
for(i in 1:n)
....

Trying to create R UDF in Vertica

So I have a long function that I run in R every month. My goal is to create a Vertica UDF using vertica's ability to run functions written in R. My hope is that this can then be automated from my companies' data warehouse. I've looked all over the internet for an example that resembles mine but cannot find one. My function takes two dataframes as input and one dataframe as output. Below is the function and factory function code.
Any help would be appreciated.
Thanks,
Ben
TV_Attribution_Function <- function(MAP.data, tv_data) {
# Inputs are two queries
# MAP.data = visits data
# tv_data = Data with TV spots
MAP.data$Date_time <- as.POSIXct(as.character(MAP.data$Date_time), format="%F %R")
tv_data$IMPRESSIONS <- as.numeric(tv_data$IMPRESSIONS)
tv_data$Date_time <- as.POSIXct(as.character(tv_data$Date_time), format="%F %R")
missing <- tv_data[tv_data$IMPRESSIONS <= 0,]
tv_data[which(tv_data$IMPRESSIONS == 0),'IMPRESSIONS'] <- 1
#replace nas w/ 0
tv_data[is.na(tv_data)] <- 0
MAP.data[is.na(MAP.data)] <- 0
for(i in c(1:8,10:12)){
if(class(tv_data[,i])[1] == 'character'){tv_data[,i] <- as.factor(tv_data[,i])}
}
tv_data$Feed <- factor(tv_data$Feed, levels = c("",unique(tv_data$Feed)))
tv_data$SPOT_LENGTH <- as.integer(tv_data$SPOT_LENGTH)
for(i in 2:9){
if(class(MAP.data[,i])[1] == 'character'){MAP.data[,i] <- as.factor(MAP.data[,i])}
if(typeof(MAP.data[,i])[1] == 'double'){MAP.data[,i] <- as.integer(MAP.data[,i])}
}
visits_data <- MAP.data
span = 0.2
sd_x = 1.0
span_b = 0.35
sd_b = 1.0
minutes_gap = 5
days <- unique(visits_data$Day_Only, na.rm=TRUE)
final_outcome <- data.frame()
for (i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$Visits_Count
outcome_2[which(is.na(outcome_2))] <- 0
t <- 1:nrow(outcome_day)
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_x * v.sd
direct_response <- outcome_2 - baseline
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")
temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))
for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}
tv_data$mapped_visits <- rep(0,nrow(tv_data))
for (n in (1:nrow(tv_data))){
num <- match(tv_data[n,1], t_imps_plus$Date_time)
if (is.na(num) == FALSE) {
tv_data$mapped_visits[n] <- tv_data$IMPRESSIONS[n] * minutes_gap * sum(t_imps_plus$mapped_visits[num:(num+minutes_gap-1)] / t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {tv_data$mapped_visits[n] <- NA}
}
#Next for visits that resulted in a seeker signup
final_outcome <- data.frame()
# Loop for each day to create baseline visits and detect spikes
for(i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$new_seekers
outcome_2[which(is.na(outcome_2))] <- 0
t <- c(1:length(outcome_2))
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span_b)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_b * v.sd
#shouldnt direct response be outcome_2 - baseline, not fitted???
direct_response <- outcome_2 - v.lo$fitted
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_ns_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")
temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
#basically left join
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))
for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}
tv_data$mapped_ns_visits <- rep(0,nrow(tv_data))
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], t_imps_plus$Date_time)
if (is.na(num) == FALSE) {
tv_data$mapped_ns_visits[n] <- sum(t_imps_plus$mapped_ns_visits[num:(num+minutes_gap-1)]) * tv_data$IMPRESSIONS[n]*minutes_gap / sum(t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {n}
}
final_outcome <- data.frame()
# Loop for each day to create baseline visits and detect spikes
for (i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$new_sitters
outcome_2[which(is.na(outcome_2))] <- 0
t <- c(1:nrow(outcome_day))
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span_b)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_b * v.sd
direct_response <- outcome_2 - baseline
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_np_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")
#Fill in total impressions including lags
temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))
for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}
#Add mapped visits data to the tv_data table and account for any overlapping spots using ratio of impressions
tv_data$mapped_np_visits <- rep(0,nrow(tv_data))
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], t_imps_plus$Date_time) # mapped new members when spot aired
if (is.na(num) == FALSE) {
tv_data$mapped_np_visits[n] <- tv_data$IMPRESSIONS[n] * minutes_gap *
sum(t_imps_plus$mapped_np_visits[num:(num+minutes_gap-1)] / t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {n}
}
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)
if(is.na(num) == FALSE){
temp <- visits_data$new_seekers[num:(num+minutes_gap-1)]
tv_data$total_ns_visits[n] <- sum(temp, na.rm = T)
} else if(is.na(num) == TRUE){tv_data$total_ns_visits[n] <- 0}
}
## Add total new provider visits ####
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)
if(is.na(num) == FALSE){
temp <- visits_data$new_sitters[num:(num+minutes_gap-1)]
tv_data$total_np_visits[n] <- sum(temp, na.rm = T)
} else if(is.na(num) == TRUE){tv_data$total_np_visits[n] <- 0}
}
### add total day1 prems #####
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)
if(is.na(num) == FALSE){
temp <- visits_data$day1_premiums[num:(num+minutes_gap-1)]
tv_data$day1_premiums[n] <- sum(temp, na.rm = T)
} else if(is.na(num) == TRUE){tv_data$day1_premiums[n] <- 0}
}
# add week1 prems ###########
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)
if(is.na(num) == FALSE){
temp <- visits_data$week1_premiums[num:(num+minutes_gap-1)]
tv_data$week1_premiums[n] <- sum(temp, na.rm = T)
} else if(is.na(num) == TRUE){tv_data$week1_premiums[n] <- 0}
}
tv_data$attr_premiums <- tv_data$week1_premiums * tv_data$mapped_ns_visits / tv_data$total_ns_visits
return(tv_data)
}
# Factory Function
TV_Attribution_Function_Factory <- function() {
list (
name = TV_Attribution_Function
,udxtype=c("scalar")
,intype = c("any")
,outtype = c("any")
)
}

Saving huge model object to file

Say you have a model object of class 'varrest' returned from a VAR() regression operation.
I want to save the model to a file, but not all data which was used to estimate the coefficients.
How can one just save the model specification wihtout the training data?
Because when I save the model it has a file size of over 1GB and therefore loading does take its time.
Can one save objects without some attributes?
The predict.varest function starts out with this code:
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
You can then investigate how much pruning you might achieve:
data(Canada)
tcan <-
VAR(Canada, p = 2, type = "trend")
names(tcan)
# [1] "varresult" "datamat" "y" "type" "p"
# [6] "K" "obs" "totobs" "restrictions" "call"
object.size(tcan[c("K","p", "obs", "type", "datamat", "y")] )
#15080 bytes
object.size(tcan)
#252032 bytes
So the difference is substantial, but just saving those items is not sufficient because the next line in predict.varest is:
B <- Bcoef(object)
You will need to add that object to the list above and then construct a new predict-function that accepts something less than the large 'varresult' node of the model object. Also turned out that there was a downstream call to an internal function that needs to be stored. (You will need to decide in advance what interval you need for prediction.)
tsmall <- c( tcan[c("K","p", "obs", "type", "datamat", "y", "call")] )
tsmall[["Bco"]] <- Bcoef(tcan)
tsmall$sig.y <- vars:::.fecov(x = tcan, n.ahead = 10)
And the modified predict function will be:
sm.predict <- function (object, ..., n.ahead = 10, ci = 0.95, dumvar = NULL)
{
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
n.ahead <- as.integer(n.ahead)
Z <- object$datamat[, -c(1:K)]
# This used to be a call to Bcoef(object)
B <- object$Bco
if (type == "const") {
Zdet <- matrix(rep(1, n.ahead), nrow = n.ahead, ncol = 1)
colnames(Zdet) <- "const"
}
else if (type == "trend") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(seq(trdstart, length = n.ahead), nrow = n.ahead,
ncol = 1)
colnames(Zdet) <- "trend"
}
else if (type == "both") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(c(rep(1, n.ahead), seq(trdstart, length = n.ahead)),
nrow = n.ahead, ncol = 2)
colnames(Zdet) <- c("const", "trend")
}
else if (type == "none") {
Zdet <- NULL
}
if (!is.null(eval(object$call$season))) {
season <- eval(object$call$season)
seas.names <- paste("sd", 1:(season - 1), sep = "")
cycle <- tail(data.all[, seas.names], season)
seasonal <- as.matrix(cycle, nrow = season, ncol = season -
1)
if (nrow(seasonal) >= n.ahead) {
seasonal <- as.matrix(cycle[1:n.ahead, ], nrow = n.ahead,
ncol = season - 1)
}
else {
while (nrow(seasonal) < n.ahead) {
seasonal <- rbind(seasonal, cycle)
}
seasonal <- seasonal[1:n.ahead, ]
}
rownames(seasonal) <- seq(nrow(data.all) + 1, length = n.ahead)
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, seasonal))
}
else {
Zdet <- as.matrix(seasonal)
}
}
if (!is.null(eval(object$call$exogen))) {
if (is.null(dumvar)) {
stop("\nNo matrix for dumvar supplied, but object varest contains exogenous variables.\n")
}
if (!all(colnames(dumvar) %in% colnames(data.all))) {
stop("\nColumn names of dumvar do not coincide with exogen.\n")
}
if (!identical(nrow(dumvar), n.ahead)) {
stop("\nRow number of dumvar is unequal to n.ahead.\n")
}
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, dumvar))
}
else {
Zdet <- as.matrix(dumvar)
}
}
Zy <- as.matrix(object$datamat[, 1:(K * (p + 1))])
yse <- matrix(NA, nrow = n.ahead, ncol = K)
# This used to be a call to vars:::.fecov
sig.y <- object$sig.y
for (i in 1:n.ahead) {
yse[i, ] <- sqrt(diag(sig.y[, , i]))
}
yse <- -1 * qnorm((1 - ci)/2) * yse
colnames(yse) <- paste(ci, "of", ynames)
forecast <- matrix(NA, ncol = K, nrow = n.ahead)
lasty <- c(Zy[nrow(Zy), ])
for (i in 1:n.ahead) {
lasty <- lasty[1:(K * p)]; print(lasty); print(B)
Z <- c(lasty, Zdet[i, ]) ;print(Z)
forecast[i, ] <- B %*% Z
temp <- forecast[i, ]
lasty <- c(temp, lasty)
}
colnames(forecast) <- paste(ynames, ".fcst", sep = "")
lower <- forecast - yse
colnames(lower) <- paste(ynames, ".lower", sep = "")
upper <- forecast + yse
colnames(upper) <- paste(ynames, ".upper", sep = "")
forecasts <- list()
for (i in 1:K) {
forecasts[[i]] <- cbind(forecast[, i], lower[, i], upper[,
i], yse[, i])
colnames(forecasts[[i]]) <- c("fcst", "lower", "upper",
"CI")
}
names(forecasts) <- ynames
result <- list(fcst = forecasts, endog = object$y, model = object,
exo.fcst = dumvar)
class(result) <- "varprd"
return(result)
}
Either
set the attributes you do not want to NULL, or
copy the parts you want to a new object, or
call the save() function with proper indexing.

Resources