Error in density.default(x = neg) : object 'neg' not found - r

I tried to rewrite the below plot_densities fuction in order to use ggplot2.
plot_densities <- function(density) {
neg_density <- density[[1]]
pos_density <- density[[2]]
plot(
pos_density,
ylim = range(c(neg_density$y, pos_density$y)),
main = "Coverage plot of Sample 5",
xlab = "lenght 21",
col = 'blue',
type = 'h'
)
lines(neg_density, type = 'h', col = 'red')
}
Unfurtunately the new function below caused Error in density.default(x = neg) : object 'neg' not found
plot_densities2 <- function(density) {
neg_density <- density[[1]]
pos_density <- density[[2]]
densities = append(neg_density, pos_density)
ggplot(as.data.frame(densities), aes(x=x, y=y)) +
theme_bw() +
geom_density(alpha=0.5)
}
The full code can be found below and the data can be downloaded from here
#apt update && apt install zlib1g-dev
#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("Rsamtools")
#load library
library(Rsamtools)
extracting_pos_neg_reads <- function(bam_fn) {
#read in entire BAM file
bam <- scanBam(bam_fn)
#names of the BAM fields
names(bam[[1]])
# [1] "qname" "flag" "rname" "strand" "pos" "qwidth" "mapq" "cigar"
# [9] "mrnm" "mpos" "isize" "seq" "qual"
#distribution of BAM flags
table(bam[[1]]$flag)
# 0 4 16
#1472261 775200 1652949
#function for collapsing the list of lists into a single list
#as per the Rsamtools vignette
.unlist <- function (x) {
## do.call(c, ...) coerces factor to integer, which is undesired
x1 <- x[[1L]]
if (is.factor(x1)) {
structure(unlist(x), class = "factor", levels = levels(x1))
} else {
do.call(c, x)
}
}
#store names of BAM fields
bam_field <- names(bam[[1]])
#go through each BAM field and unlist
list <- lapply(bam_field, function(y)
.unlist(lapply(bam, "[[", y)))
#store as data frame
bam_df <- do.call("DataFrame", list)
names(bam_df) <- bam_field
dim(bam_df)
#[1] 3900410 13
#---------
#use chr22 as an example
#how many entries on the negative strand of chr22?
###table(bam_df$rname == 'chr22' & bam_df$flag == 16)
# FALSE TRUE
#3875997 24413
#function for checking negative strand
check_neg <- function(x) {
if (intToBits(x)[5] == 1) {
return(T)
} else {
return(F)
}
}
#test neg function with subset of chr22
test <- subset(bam_df)#, rname == 'chr22')
dim(test)
#[1] 56426 13
table(apply(as.data.frame(test$flag), 1, check_neg))
#number same as above
#FALSE TRUE
#32013 24413
#function for checking positive strand
check_pos <- function(x) {
if (intToBits(x)[3] == 1) {
return(F)
} else if (intToBits(x)[5] != 1) {
return(T)
} else {
return(F)
}
}
#check pos function
table(apply(as.data.frame(test$flag), 1, check_pos))
#looks OK
#FALSE TRUE
#24413 32013
#store the mapped positions on the plus and minus strands
neg <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_neg),
'pos']
length(neg)
#[1] 24413
pos <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_pos),
'pos']
length(pos)
#[1] 32013
#calculate the densities
neg_density <- density(neg)
pos_density <- density(pos)
#display the negative strand with negative values
neg_density$y <- neg_density$y * -1
return (list(neg_density, pos_density))
}
plot_densities <- function(density) {
neg_density <- density[[1]]
pos_density <- density[[2]]
plot(
pos_density,
ylim = range(c(neg_density$y, pos_density$y)),
main = "Coverage plot of Sample 5",
xlab = "lenght 21",
col = 'blue',
type = 'h'
)
lines(neg_density, type = 'h', col = 'red')
}
plot_densities2 <- function(density) {
neg_density <- density[[1]]
pos_density <- density[[2]]
densities = append(neg_density, pos_density)
densities
ggplot(as.data.frame(densities), aes(x=x, y=y)) +
theme_bw() +
geom_density(alpha=0.5)
}
filenames <- c("~/sample5-21.sam-uniq.sorted.bam", "~/sample5-24.sam-uniq.sorted.bam")
for ( i in filenames){
print(i)
density <- extracting_pos_neg_reads(i)
plot_densities2(density)
}

Density objects seem to be not the best ones to be used with append and as.data.frame. In particular, they contain some elements that caused problems but at the same time are unnecessary. What we may do is to pick only x and y elements as to construct the relevant data frame:
plot_densities2 <- function(density) {
densities <- cbind(rbind(data.frame(density[[1]][1:2]), data.frame(density[[2]][1:2])),
id = rep(c("neg", "pos"), each = length(density[[1]]$x)))
print(ggplot(data = densities, aes(x = x, y = y, fill = id)) +
theme_bw() + geom_area(alpha = 0.5))
}

Related

Can you have a rolling window filter in gganimate?

I am looking to have each frame of a scatter plot be filtered by another vector with a certain bin width and have it it roll through those. For example I can do this by:
library(ggplot2)
library(gganimate)
#example data
iris <- datasets::iris
#plot x and y
g <- ggplot(iris) + geom_point(aes(x = Petal.Width,y = Petal.Length))
#filter x and y by a third value with a bin width of 2 steping through by 0.5
g + transition_filter(transition_length = 1,
filter_length = 1,
4 < Sepal.Length & Sepal.Length < 6,
4.5 < Sepal.Length & Sepal.Length < 6.5,
5 < Sepal.Length & Sepal.Length < 7,
5.5 < Sepal.Length & Sepal.Length < 7.5,
6 < Sepal.Length & Sepal.Length < 8)
However - writing out each filter condition is tedious, and I would like to filter a different dataset with a ~20 binwidth steping through by 1 over a 300 point range so writing 100+ filters is not practical.
Is there another way to do this?
A while ago I wanted this exact function but didn't actually see anything in gganimate to do it, so I wrote something that would get the job done. Below is what I came up with, so I ended up rebuilding gganimate with this function included to avoid using :::.
I wrote this a while ago so I don't recall the exact intention of each argument at the moment of writing it (ALWAYS REMEMBER TO DOCUMENT YOUR CODE).
Here is what I recall
span : expression that can be evaluated within the data layers
size : how much data to be shown at once
enter_length/exit_length : Don't exactly recall how it works in relation to each other or size/span
range : a subset range
retain_data_order : logical - don't remember why this is here (sorry!)
library(gganimate)
#> Loading required package: ggplot2
library(rlang)
library(tweenr)
library(stringi)
get_row_event <- gganimate:::get_row_event
is_placeholder <- gganimate:::is_placeholder
recast_event_times <- gganimate:::recast_event_times
recast_times <- gganimate:::recast_times
TransitionSpan <- ggplot2::ggproto('TransitionSpan',
TransitionEvents,
finish_data = function (self, data, params)
{
lapply(data, function(d) {
split_panel <- stri_match(d$group, regex = "^(.+)<(.*)>(.*)$")
if (is.na(split_panel[1]))
return(list(d))
d$group <- match(d$group, unique(d$group))
empty_d <- d[0, , drop = FALSE]
d <- split(d, as.integer(split_panel[, 3]))
frames <- rep(list(empty_d), params$nframes)
frames[as.integer(names(d))] <- d
frames
})
},
setup_params = function(self, data, params) {
# browser()
params$start <- get_row_event(data, params$span_quo, "start")
time_class <- if (is_placeholder(params$start))
NULL
else params$start$class
end_quo <- expr(!!params$span_quo + diff(range(!!params$span_quo))*!!params$size_quo)
params$end <- get_row_event(data, end_quo, "end",
time_class)
params$enter_length <- get_row_event(data, params$enter_length_quo,
"enter_length", time_class)
params$exit_length <- get_row_event(data, params$exit_length_quo,
"exit_length", time_class)
params$require_stat <- is_placeholder(params$start) || is_placeholder(params$end) ||
is_placeholder(params$enter_length) || is_placeholder(params$exit_length)
static = lengths(params$start$values) == 0
params$row_id <- Map(function(st, end, en, ex, s) if (s)
character(0)
else paste(st, end, en, ex, sep = "_"), st = params$start$values,
end = params$end$values, en = params$enter_length$values,
ex = params$exit_length$values, s = static)
params
},
setup_params2 = function(self, data, params, row_vars) {
late_start <- FALSE
if (is_placeholder(params$start)) {
params$start <- get_row_event(data, params$start_quo, 'start', after = TRUE)
late_start <- TRUE
} else {
params$start$values <- lapply(row_vars$start, as.numeric)
}
size <- expr(!!params$size_quo)
time_class <- params$start$class
if (is_placeholder(params$end)) {
params$end <- get_row_event(data, params$end_quo, 'end', time_class, after = TRUE)
} else {
params$end$values <- lapply(row_vars$end, as.numeric)
}
if (is_placeholder(params$enter_length)) {
params$enter_length <- get_row_event(data, params$enter_length_quo, 'enter_length', time_class, after = TRUE)
} else {
params$enter_length$values <- lapply(row_vars$enter_length, as.numeric)
}
if (is_placeholder(params$exit_length)) {
params$exit_length <- get_row_event(data, params$exit_length_quo, 'exit_length', time_class, after = TRUE)
} else {
params$exit_length$values <- lapply(row_vars$exit_length, as.numeric)
}
times <- recast_event_times(params$start, params$end, params$enter_length, params$exit_length)
params$span_size <- diff(times$start$range)*eval_tidy(size)
range <- if (is.null(params$range)) {
low <- min(unlist(Map(function(start, enter) {
start - (if (length(enter) == 0) 0 else enter)
}, start = times$start$values, enter = times$enter_length$values)))
high <- max(unlist(Map(function(start, end, exit) {
(if (length(end) == 0) start else end) + (if (length(exit) == 0) 0 else exit)
}, start = times$start$values, end = times$end$values, exit = times$exit_length$values)))
range <- c(low, high)
} else {
if (!inherits(params$range, time_class)) {
stop('range must be given in the same class as time', call. = FALSE)
}
as.numeric(params$range)
}
full_length <- diff(range)
frame_time <- recast_times(
seq(range[1], range[2], length.out = params$nframes),
time_class
)
frame_length <- full_length / params$nframes
rep_frame <- round(params$span_size/frame_length)
lowerl <- c(rep(frame_time[1],rep_frame), frame_time[2:(params$nframes-rep_frame+1)])
upperl <- c(frame_time[1:(params$nframes-rep_frame)], rep(frame_time[params$nframes-rep_frame+1], rep_frame))
start <- lapply(times$start$values, function(x) {
round((params$nframes - 1) * (x - range[1])/full_length) + 1
})
end <- lapply(times$end$values, function(x) {
if (length(x) == 0) return(numeric())
round((params$nframes - 1) * (x - range[1])/full_length) + 1
})
enter_length <- lapply(times$enter_length$values, function(x) {
if (length(x) == 0) return(numeric())
round(x / frame_length)
})
exit_length <- lapply(times$exit_length$values, function(x) {
if (length(x) == 0) return(numeric())
round(x / frame_length)
})
params$range <- range
params$frame_time <- frame_time
static = lengths(start) == 0
params$row_id <- Map(function(st, end, en, ex, s) if (s) character(0) else paste(st, end, en, ex, sep = '_'),
st = start, end = end, en = enter_length, ex = exit_length, s = static)
params$lowerl <- lowerl
params$upperl <- upperl
params$frame_span <- upperl - lowerl
params$frame_info <- data.frame(
frame_time = frame_time,
lowerl = lowerl,
upperl = upperl,
frame_span = upperl - lowerl
)
params$nframes <- nrow(params$frame_info)
params
},
expand_panel = function(self, data, type, id, match, ease, enter, exit, params, layer_index) {
#browser()
row_vars <- self$get_row_vars(data)
if (is.null(row_vars))
return(data)
data$group <- paste0(row_vars$before, row_vars$after)
start <- as.numeric(row_vars$start)
end <- as.numeric(row_vars$end)
if (is.na(end[1]))
end <- NULL
enter_length <- as.numeric(row_vars$enter_length)
if (is.na(enter_length[1]))
enter_length <- NULL
exit_length <- as.numeric(row_vars$exit_length)
if (is.na(exit_length[1]))
exit_length <- NULL
data$.start <- start
all_frames <- tween_events(data, c(ease,"linear"),
params$nframes, !!start, !!end, c(1, params$nframes),
enter, exit, !!enter_length, !!exit_length)
if(params$retain_data_order){
all_frames <- all_frames[order(as.numeric(all_frames$.id)),]
} else {
all_frames <- all_frames[order(all_frames$.start, as.numeric(all_frames$.id)),]
}
all_frames$group <- paste0(all_frames$group, '<', all_frames$.frame, '>')
all_frames$.frame <- NULL
all_frames$.start <- NULL
all_frames
})
transition_span <- function(span, size = 0.5, enter_length = NULL, exit_length = NULL, range = NULL, retain_data_order = T){
span_quo <- enquo(span)
size_quo <- enquo(size)
enter_length_quo <- enquo(enter_length)
exit_length_quo <- enquo(exit_length)
gganimate:::require_quo(span_quo, "span")
ggproto(NULL, TransitionSpan,
params = list(span_quo = span_quo,
size_quo = size_quo, range = range, enter_length_quo = enter_length_quo,
exit_length_quo = exit_length_quo,
retain_data_order = retain_data_order))
}
g <- ggplot(iris) +
geom_point(aes(x = Petal.Width,y = Petal.Length, color = Sepal.Length)) +
viridis::scale_color_viridis()
a <- g + transition_span(Sepal.Length, .1, 1, 1)
animate(a, renderer = gganimate::gifski_renderer())
Created on 2021-08-11 by the reprex package (v2.0.0)

Cannot update/edit ggplot2 object exported from a package (`gratia`) in R

I hope I am missing something painfully obvious here.
I wish to update (e.g., fix title, labs, etc.) on a ggplot object produced from gratia::draw(). Not really sure why I am unable to update the object.
Is there a simple solution?
# devtools::install_github('gavinsimpson/gratia')
library('mgcv')
library('gratia')
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
draw(mod)
p = draw(mod)
# P is a ggobject.
class(p)
#> [1] "gg" "ggplot"
So, why can't I update p?
p + ggtitle("My title")
Created on 2019-02-26 by the reprex package (v0.2.1)
The object returned by draw.gam is the output of cowplot::plot_grid (not a pure ggplot2 graphical object).
I made a small change into draw.gam function of gratia. .
Now the p object can be manipulated:
# The modified draw.gam function
mydraw.gam <- function (object, parametric = TRUE, select = NULL, scales = c("free",
"fixed"), align = "hv", axis = "lrtb", n = 100, unconditional = FALSE,
overall_uncertainty = TRUE, dist = 0.1, ...)
{
scales <- match.arg(scales)
S <- smooths(object)
select <- gratia:::check_user_select_smooths(smooths = S, select = select)
d <- gratia:::smooth_dim(object)
take <- d <= 2L
select <- select[take]
S <- S[take]
d <- d[take]
is_re <- vapply(object[["smooth"]], gratia:::is_re_smooth, logical(1L))
is_by <- vapply(object[["smooth"]], gratia:::is_by_smooth, logical(1L))
if (any(is_by)) {
S <- vapply(strsplit(S, ":"), `[[`, character(1L), 1L)
}
npara <- 0
nsmooth <- length(S)
if (isTRUE(parametric)) {
terms <- parametric_terms(object)
npara <- length(terms)
p <- vector("list", length = npara)
}
g <- l <- vector("list", length = nsmooth)
for (i in unique(S)) {
eS <- evaluate_smooth(object, smooth = i, n = n, unconditional = unconditional,
overall_uncertainty = overall_uncertainty, dist = dist)
l[S == i] <- split(eS, eS[["smooth"]])
}
l <- l[select]
d <- d[select]
g <- g[select]
if (length(g) == 0L) {
message("Unable to draw any of the model terms.")
return(invisible(g))
}
for (i in seq_along(l)) {
g[[i]] <- draw(l[[i]])
}
if (isTRUE(parametric)) {
for (i in seq_along(terms)) {
p[[i]] <- evaluate_parametric_term(object, term = terms[i])
g[[i + length(g)]] <- draw(p[[i]])
}
}
if (isTRUE(identical(scales, "fixed"))) {
wrapper <- function(x) {
range(x[["est"]] + (2 * x[["se"]]), x[["est"]] -
(2 * x[["se"]]))
}
ylims <- range(unlist(lapply(l, wrapper)))
if (isTRUE(parametric)) {
ylims <- range(ylims, unlist(lapply(p, function(x) range(x[["upper"]],
x[["lower"]]))))
}
gg <- seq_along(g)[c(d == 1L, rep(TRUE, npara))]
for (i in gg) {
g[[i]] <- g[[i]] + lims(y = ylims)
}
}
g
}
# Example no. 1
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
p <- mydraw.gam(mod)
p[[1]] + ggtitle("My title")
# Example no. 2
mod <- gam(y ~ s(x0) + x1, data = dat, method = "REML")
p <- mydraw.gam(mod)
# Plot graphs separately
p[[1]] + ggtitle("My title")
p[[2]] + ggtitle("My title")
# Arrange the two plots on the same figure
cowplot::plot_grid(plotlist = p)
Reposts from Gavin Simpson and Hao Ye, respectively:
I think the only way to change the title(s) on the individual plots of smooths would be to use draw(evaluate_smooth(model, "smooth"), title = "My title") individually at the moment.
You might be able to hack a title in a different way:
draw(mod) +
ggplot2::coord_cartesian(clip = "off") +
ggplot2::theme(plot.margin = ggplot2::unit(c(0.05, 0, 0, 0), "npc")) +
ggplot2::annotate("text", x = 0.5, y = 1, vjust = 0, label = "TITLE", size = 6)

How do I speed up my function, specifically the ggplot commands?

I put together a function to identify outliers. It takes a dataframe and then shows plots of the data with lines to indicate potential outliers. It'll give a table with outliers marked, too.
But, it is SLOOOW. The problem is it takes a really long time for the plots to load.
I was curious if you might have advice on how to speed this up.
Related: Is the default plotting system faster than ggplot?
I'll start with the dependencies
#These next four functions are not mine. They're used in GetOutliers()
ExtractDetails <- function(x, down, up){
outClass <- rep("N", length(x))
indexLo <- which(x < down)
indexHi <- which(x > up)
outClass[indexLo] <- "L"
outClass[indexHi] <- "U"
index <- union(indexLo, indexHi)
values <- x[index]
outClass <- outClass[index]
nOut <- length(index)
maxNom <- max(x[which(x <= up)])
minNom <- min(x[which(x >= down)])
outList <- list(nOut = nOut, lowLim = down,
upLim = up, minNom = minNom,
maxNom = maxNom, index = index,
values = values,
outClass = outClass)
return(outList)
}
Hampel <- function(x, t = 3){
#
mu <- median(x, na.rm = TRUE)
sig <- mad(x, na.rm = TRUE)
if (sig == 0){
message("Hampel identifer implosion: MAD scale estimate is zero")
}
up<-mu+t*sig
down<-mu-t*sig
out <- list(up = up, down = down)
return(out)
}
ThreeSigma <- function(x, t = 3){
#
mu <- mean(x, na.rm = TRUE)
sig <- sd(x, na.rm = TRUE)
if (sig == 0){
message("All non-missing x-values are identical")
}
up<-mu+t* sig
down<-mu-t * sig
out <- list(up = up, down = down)
return(out)
}
BoxplotRule <- function(x, t = 1.5){
#
xL <- quantile(x, na.rm = TRUE, probs = 0.25, names = FALSE)
xU <- quantile(x, na.rm = TRUE, probs = 0.75, names = FALSE)
Q<-xU-xL
if(Q==0){
message("Boxplot rule implosion: interquartile distance is zero")
}
up<-xU+t*Q
down<-xU-t*Q
out <- list(up = up, down = down)
return(out)
}
FindOutliers <- function(x, t3 = 3, tH = 3, tb = 1.5){
threeLims <- ThreeSigma(x, t = t3)
HampLims <- Hampel(x, t = tH)
boxLims <- BoxplotRule(x, t = tb)
n <- length(x)
nMiss <- length(which(is.na(x)))
threeList <- ExtractDetails(x, threeLims$down, threeLims$up)
HampList <- ExtractDetails(x, HampLims$down, HampLims$up)
boxList <- ExtractDetails(x, boxLims$down, boxLims$up)
sumFrame <- data.frame(method = "ThreeSigma", n = n,
nMiss = nMiss, nOut = threeList$nOut,
lowLim = threeList$lowLim,
upLim = threeList$upLim,
minNom = threeList$minNom,
maxNom = threeList$maxNom)
upFrame <- data.frame(method = "Hampel", n = n,
nMiss = nMiss, nOut = HampList$nOut,
lowLim = HampList$lowLim,
upLim = HampList$upLim,
minNom = HampList$minNom,
maxNom = HampList$maxNom)
sumFrame <- rbind.data.frame(sumFrame, upFrame)
upFrame <- data.frame(method = "BoxplotRule", n = n,
nMiss = nMiss, nOut = boxList$nOut,
lowLim = boxList$lowLim,
upLim = boxList$upLim,
minNom = boxList$minNom,
maxNom = boxList$maxNom)
sumFrame <- rbind.data.frame(sumFrame, upFrame)
threeFrame <- data.frame(index = threeList$index,
values = threeList$values,
type = threeList$outClass)
HampFrame <- data.frame(index = HampList$index,
values = HampList$values,
type = HampList$outClass)
boxFrame <- data.frame(index = boxList$index,
values = boxList$values,
type = boxList$outClass)
outList <- list(summary = sumFrame, threeSigma = threeFrame,
Hampel = HampFrame, boxplotRule = boxFrame)
return(outList)
}
#strip non-numeric variables out of a dataframe
num_vars <- function(df){
X <- which(sapply(df, is.numeric))
num_vars <- df[names(X)]
return(num_vars)
}
This is the function
GetOutliers <- function(df){
library('dplyr')
library('ggplot2')
#strip out the non-numeric columns
df_out <- num_vars(df)
#initialize the data frame
df_out$Hampel <- NA
df_out$threeSigma <- NA
df_out$boxplotRule <- NA
df_out_id <- df_out
#identify outliers for each column
for (i in 1:length(names(num_vars(df)))){
#find the outliers
Outs <- FindOutliers(df_out[[i]])
OutsSum <- Outs$summary
#re-enter the outlier status
df_out$Hampel <- NA
df_out$threeSigma <- NA
df_out$boxplotRule <- NA
ifelse(is.na(Outs$Hampel), print(), df_out[unlist(Outs$Hampel[1]),]$Hampel <- TRUE)
ifelse(is.na(Outs$threeSigma), print(), df_out[unlist(Outs$threeSigma[1]),]$threeSigma <- TRUE)
ifelse(is.na(Outs$boxplotRule), print(), df_out[unlist(Outs$boxplotRule[1]),]$boxplotRule <- TRUE)
#visualize the outliers and print outlier information
Temp <- df_out
A <- colnames(Temp)[i]
AA <- paste(A,"Index")
colnames(Temp)[i] <- 'curr_column'
#table with outlier status
X <- arrange(subset(Temp,Hampel == TRUE | boxplotRule == TRUE | threeSigma == TRUE), desc(curr_column))
#scatterplot with labels
Y <- ggplot(Temp,aes(seq_along(curr_column),curr_column)) + geom_point() +
geom_hline(yintercept=OutsSum$lowLim[1],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$lowLim[2],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$lowLim[3],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[1],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[2],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[3],linetype = 'dashed') +
geom_text(aes(40,OutsSum$lowLim[1],label="ThreeSigma Lower",vjust=-1)) +
geom_text(aes(40,OutsSum$lowLim[2],label="Hampel Lower",vjust=-1)) +
geom_text(aes(40,OutsSum$lowLim[3],label="Boxplot Lower",vjust=-1)) +
geom_text(aes(40,OutsSum$upLim[1],label="ThreeSigma Upper",vjust=-1)) +
geom_text(aes(40,OutsSum$upLim[2],label="Hampel Upper",vjust=-1)) +
geom_text(aes(40,OutsSum$upLim[3],label="Boxplot Upper",vjust=-1)) +
xlab(AA) + ylab(A)
#scatterplot without labels
Z <- ggplot(Temp,aes(seq_along(curr_column),curr_column)) + geom_point() +
geom_hline(yintercept=OutsSum$lowLim[1],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$lowLim[2],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$lowLim[3],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[1],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[2],linetype = 'dashed') +
geom_hline(yintercept=OutsSum$upLim[3],linetype = 'dashed') +
xlab(AA) + ylab(A)
U <- ggplot(Temp,aes(curr_column)) + geom_density() + xlab(A)
print(A)
print(X)
print(OutsSum)
print(Z)
print(Y)
print(U)
#mark the extreme outliers, the rest are reasonable outliers
A <- colnames(df_out_id[i])
Q <- as.numeric(readline(prompt="Enter the index for final Extreme value on the upper limit (if none, enter 0): "))
W <- as.numeric(readline(prompt="Enter the index for first Extreme value on the lower limit (if none, enter 0): "))
col <- df_out_id[i]
df_out_id[i] <- sapply(col[[1]], function(x){
if(Q>1 & x %in% X$curr_column[1:Q]) return('Extreme')
if(W>1 & x %in% X$curr_column[W:length(X$curr_column)]) return('Extreme')
else if (x %in% X$curr_column[Q+1:length(X$curr_column)]) return('Reasonable')
else return('Non-Outlier')
})
}
#return a dataframe with outlier status, excluding the outlier ID columns
summary(df_out_id)
return(df_out_id[1:(length(names(df_out_id))-3)])
}
Example
library('ISLR')
data(Carseats)
GetOutliers(Carseats)
It'll show you the outliers for each numeric variable.
It'll plot the variable density and then a scatterplot with identifier lines
It will also accept input so you can mark some outliers as reasonable and other as extreme

Changing the font size of multipanel Taylor plot using openair package in R

I have created a multipanel Taylor plot using openair package. I want to change the font size of 'correlation' and 'observed' and make it sentence case. I have used the following code:
TaylorDiagram(data, obs = "Observed", mod = "Predicted", group = "Method", type = "Station")
The task is now achieved by changing the source code of TaylorDiagram from openair package using the following code
library(lattice)
library(dplyr)
TaylorDiagram1 <- function(mydata, obs = "obs", mod = "mod", group = NULL, type = "default",
normalise = FALSE, cols = "brewer1",
rms.col = "darkgoldenrod", cor.col = "black", arrow.lwd = 3,
annotate = "centred\nRMS error",
key = TRUE, key.title = group, key.columns = 1,
key.pos = "right", strip = TRUE, auto.text = TRUE, ...) {
## get rid of R check annoyances
sd.mod <- R <- NULL
## greyscale handling
## set graphics
current.strip <- trellis.par.get("strip.background")
current.font <- trellis.par.get("fontsize")
## reset graphic parameters
on.exit(trellis.par.set(
fontsize = current.font
))
if (length(cols) == 1 && cols == "greyscale") {
trellis.par.set(list(strip.background = list(col = "white")))
## other local colours
method.col <- "greyscale"
} else {
method.col <- "default"
}
## extra.args setup
extra.args <- list(...)
## label controls (some local xlab, ylab management in code)
extra.args$xlab <- if ("xlab" %in% names(extra.args)) {
quickText(extra.args$xlab, auto.text)
} else {
NULL
}
extra.args$ylab <- if ("ylab" %in% names(extra.args)) {
quickText(extra.args$ylab, auto.text)
} else {
NULL
}
extra.args$main <- if ("main" %in% names(extra.args)) {
quickText(extra.args$main, auto.text)
} else {
quickText("", auto.text)
}
if ("fontsize" %in% names(extra.args)) {
trellis.par.set(fontsize = list(text = extra.args$fontsize))
}
if (!"layout" %in% names(extra.args)) {
extra.args$layout <- NULL
}
if (!"pch" %in% names(extra.args)) {
extra.args$pch <- 20
}
if (!"cex" %in% names(extra.args)) {
extra.args$cex <- 2
}
## #######################################################################################
## check to see if two data sets are present
combine <- FALSE
if (length(mod) == 2) combine <- TRUE
if (any(type %in% dateTypes)) {
vars <- c("date", obs, mod)
} else {
vars <- c(obs, mod)
}
## assume two groups do not exist
twoGrp <- FALSE
if (!missing(group)) if (any(group %in% type)) stop("Can't have 'group' also in 'type'.")
mydata <- cutData(mydata, type, ...)
if (missing(group)) {
if ((!"group" %in% type) & (!"group" %in% c(obs, mod))) {
mydata$group <- factor("group")
group <- "group"
npol <- 1
}
## don't overwrite a
} else { ## means that group is there
mydata <- cutData(mydata, group, ...)
}
## if group is present, need to add that list of variables unless it is
## a pre-defined date-based one
if (!missing(group)) {
npol <- length(unique((mydata[[group[1]]])))
## if group is of length 2
if (length(group) == 2L) {
twoGrp <- TRUE
grp1 <- group[1]
grp2 <- group[2]
if (missing(key.title)) key.title <- grp1
vars <- c(vars, grp1, grp2)
mydata$newgrp <- paste(mydata[[group[1]]], mydata[[group[2]]], sep = "-")
group <- "newgrp"
}
if (group %in% dateTypes | any(type %in% dateTypes)) {
vars <- unique(c(vars, "date", group))
} else {
vars <- unique(c(vars, group))
}
}
## data checks, for base and new data if necessary
mydata <- checkPrep(mydata, vars, type)
# check mod and obs are numbers
mydata <- checkNum(mydata, vars = c(obs, mod))
## remove missing data
mydata <- na.omit(mydata)
legend <- NULL
## function to calculate stats for TD
calcStats <- function(mydata, obs = obs, mod = mod) {
R <- cor(mydata[[obs]], mydata[[mod]], use = "pairwise")
sd.obs <- sd(mydata[[obs]])
sd.mod <- sd(mydata[[mod]])
if (normalise) {
sd.mod <- sd.mod / sd.obs
sd.obs <- 1
}
res <- data.frame(R, sd.obs, sd.mod)
res
}
vars <- c(group, type)
results <- group_by(mydata, UQS(syms(vars))) %>%
do(calcStats(., obs = obs, mod = mod[1]))
results.new <- NULL
if (combine) {
results.new <- group_by(mydata, UQS(syms(vars))) %>%
do(calcStats(., obs = obs, mod = mod[2]))
}
## if no group to plot, then add a dummy one to make xyplot work
if (is.null(group)) {
results$MyGroupVar <- factor("MyGroupVar")
group <- "MyGroupVar"
}
## set up colours
myColors <- openColours(cols, npol)
pch.orig <- extra.args$pch
## combined colours if two groups
if (twoGrp) {
myColors <- rep(
openColours(cols, length(unique(mydata[[grp1]]))),
each = length(unique(mydata[[grp2]]))
)
extra.args$pch <- rep(extra.args$pch, each = length(unique(mydata[[grp2]])))
}
## basic function for lattice call + defaults
temp <- paste(type, collapse = "+")
myform <- formula(paste("R ~ sd.mod", "|", temp, sep = ""))
scales <- list(x = list(rot = 0), y = list(rot = 0))
pol.name <- sapply(levels(mydata[, group]), function(x) quickText(x, auto.text))
if (key & npol > 1 & !combine) {
thecols <- unique(myColors)
if (twoGrp) {
pol.name <- levels(factor(mydata[[grp1]]))
}
key <- list(
points = list(col = thecols), pch = pch.orig,
cex = extra.args$cex, text = list(lab = pol.name, cex = 0.8),
space = key.pos, columns = key.columns,
title = quickText(key.title, auto.text),
cex.title = 0.8, lines.title = 3
)
} else if (key & npol > 1 & combine) {
key <- list(
lines = list(col = myColors[1:npol]), lwd = arrow.lwd,
text = list(lab = pol.name, cex = 0.8), space = key.pos,
columns = key.columns,
title = quickText(key.title, auto.text),
cex.title = 0.8, lines.title = 3
)
} else {
key <- NULL
}
## special wd layout
if (length(type) == 1 & type[1] == "wd" & is.null(extra.args$layout)) {
## re-order to make sensible layout
wds <- c("NW", "N", "NE", "W", "E", "SW", "S", "SE")
mydata$wd <- ordered(mydata$wd, levels = wds)
## see if wd is actually there or not
wd.ok <- sapply(wds, function(x) {
if (x %in% unique(mydata$wd)) FALSE else TRUE
})
skip <- c(wd.ok[1:4], TRUE, wd.ok[5:8])
mydata$wd <- factor(mydata$wd) ## remove empty factor levels
extra.args$layout <- c(3, 3)
if (!"skip" %in% names(extra.args)) {
extra.args$skip <- skip
}
}
if (!"skip" %in% names(extra.args)) {
extra.args$skip <- FALSE
}
## proper names of labelling ####################################################
stripName <- sapply(levels(mydata[, type[1]]), function(x) quickText(x, auto.text))
if (strip) strip <- strip.custom(factor.levels = stripName)
if (length(type) == 1) {
strip.left <- FALSE
} else { ## two conditioning variables
stripName <- sapply(levels(mydata[, type[2]]), function(x) quickText(x, auto.text))
strip.left <- strip.custom(factor.levels = stripName)
}
## #############################################################################
## no strip needed for single panel
if (length(type) == 1 & type[1] == "default") strip <- FALSE
## not sure how to evaluate "group" in xyplot, so change to a fixed name
id <- which(names(results) == group)
names(results)[id] <- "MyGroupVar"
maxsd <- 1.2 * max(results$sd.obs, results$sd.mod)
# xlim, ylim handling
if (!"ylim" %in% names(extra.args)) {
extra.args$ylim <- 1.12 * c(0, maxsd)
}
if (!"xlim" %in% names(extra.args)) {
extra.args$xlim <- 1.12 * c(0, maxsd)
}
## xlab, ylab local management
if (is.null(extra.args$ylab)) {
extra.args$ylab <- if (normalise) "standard deviation (normalised)" else "Standard deviation"
}
if (is.null(extra.args$xlab)) {
extra.args$xlab <- extra.args$ylab
}
## plot
xyplot.args <- list(
x = myform, data = results, groups = results$MyGroupVar,
aspect = 1,
type = "n",
as.table = TRUE,
scales = scales,
key = key,
par.strip.text = list(cex = 0.8),
strip = strip,
strip.left = strip.left,
panel = function(x, y, ...) {
## annotate each panel but don't need to do this for each grouping value
panel.taylor.setup(
x, y,
results = results, maxsd = maxsd,
cor.col = cor.col, rms.col = rms.col,
annotate = annotate, ...
)
## plot data in each panel
panel.superpose(
x, y,
panel.groups = panel.taylor, ...,
results = results, results.new = results.new,
combine = combine, myColors = myColors,
arrow.lwd = arrow.lwd
)
}
)
## reset for extra.args
xyplot.args <- listUpdate(xyplot.args, extra.args)
## plot
plt <- do.call(xyplot, xyplot.args)
if (length(type) == 1) plot(plt) else plot(useOuterStrips(plt, strip = strip, strip.left = strip.left))
newdata <- results
output <- list(plot = plt, data = newdata, call = match.call())
class(output) <- "openair"
invisible(output)
}
panel.taylor.setup <- function(x, y, subscripts, results, maxsd, cor.col, rms.col,
col.symbol, annotate, group.number, type, ...) {
## note, this assumes for each level of type there is a single measured value
## therefore, only the first is used i.e. results$sd.obs[subscripts[1]]
## This does not matter if normalise = TRUE because all sd.obs = 1.
## The data frame 'results' should contain a grouping variable 'MyGroupVar',
## 'type' e.g. season, R (correlation coef), sd.obs and sd.mod
xcurve <- cos(seq(0, pi / 2, by = 0.01)) * maxsd
ycurve <- sin(seq(0, pi / 2, by = 0.01)) * maxsd
llines(xcurve, ycurve, col = "black")
xcurve <- cos(seq(0, pi / 2, by = 0.01)) * results$sd.obs[subscripts[1]]
ycurve <- sin(seq(0, pi / 2, by = 0.01)) * results$sd.obs[subscripts[1]]
llines(xcurve, ycurve, col = "black", lty = 5)
corr.lines <- c(0.2, 0.4, 0.6, 0.8, 0.9)
## grid line with alpha transparency
theCol <- t(col2rgb(cor.col)) / 255
for (gcl in corr.lines) llines(
c(0, maxsd * gcl), c(0, maxsd * sqrt(1 - gcl ^ 2)),
col = rgb(theCol, alpha = 0.4), alpha = 0.5
)
bigtick <- acos(seq(0.1, 0.9, by = 0.1))
medtick <- acos(seq(0.05, 0.95, by = 0.1))
smltick <- acos(seq(0.91, 0.99, by = 0.01))
lsegments(
cos(bigtick) * maxsd, sin(bigtick) *
maxsd, cos(bigtick) * 0.96 * maxsd, sin(bigtick) * 0.96 * maxsd,
col = cor.col
)
lsegments(
cos(medtick) * maxsd, sin(medtick) *
maxsd, cos(medtick) * 0.98 * maxsd, sin(medtick) * 0.98 * maxsd,
col = cor.col
)
lsegments(
cos(smltick) * maxsd, sin(smltick) *
maxsd, cos(smltick) * 0.99 * maxsd, sin(smltick) * 0.99 * maxsd,
col = cor.col
)
## arcs for standard deviations (3 by default)
gamma <- pretty(c(0, maxsd), n = 5)
if (gamma[length(gamma)] > maxsd) {
gamma <- gamma[-length(gamma)]
}
labelpos <- seq(45, 70, length.out = length(gamma))
## some from plotrix
for (gindex in 1:length(gamma)) {
xcurve <- cos(seq(0, pi, by = 0.03)) * gamma[gindex] +
results$sd.obs[subscripts[1]]
endcurve <- which(xcurve < 0)
endcurve <- ifelse(length(endcurve), min(endcurve) - 1, 105)
ycurve <- sin(seq(0, pi, by = 0.03)) * gamma[gindex]
maxcurve <- xcurve * xcurve + ycurve * ycurve
startcurve <- which(maxcurve > maxsd * maxsd)
startcurve <- ifelse(length(startcurve), max(startcurve) + 1, 0)
llines(
xcurve[startcurve:endcurve], ycurve[startcurve:endcurve],
col = rms.col, lty = 5
)
ltext(
xcurve[labelpos[gindex]], ycurve[labelpos[gindex]],
gamma[gindex],
cex = 0.7, col = rms.col, pos = 1,
srt = 0, font = 2
)
ltext(
1.1 * maxsd, 1.05 * maxsd,
labels = annotate, cex = 0.7,
col = rms.col, pos = 2
)
}
## angles for R key
angles <- 180 * c(bigtick, acos(c(0.95, 0.99))) / pi
ltext(
cos(c(bigtick, acos(c(0.95, 0.99)))) *
1.06 * maxsd, sin(c(bigtick, acos(c(0.95, 0.99)))) *
1.06 * maxsd, c(seq(0.1, 0.9, by = 0.1), 0.95, 0.99),
cex = 0.7,
adj = 0.5, srt = angles, col = cor.col
)
ltext(
0.82 * maxsd, 0.82 * maxsd, "Correlation",
srt = 315, cex = 0.7,
col = cor.col
)
## measured point and text
lpoints(results$sd.obs[subscripts[1]], 0, pch = 20, col = "purple", cex = 1.5)
ltext(results$sd.obs[subscripts[1]], 0, "Observed", col = "purple", cex = 0.7, pos = 3)
}
panel.taylor <- function(x, y, subscripts, results, results.new, maxsd, cor.col,
rms.col, combine, col.symbol, myColors, group.number,
type, arrow.lwd, ...) {
R <- NULL
sd.mod <- NULL ## avoid R NOTEs
## Plot actual results by type and group if given
results <- transform(results, x = sd.mod * R, y = sd.mod * sin(acos(R)))
if (combine) {
results.new <- transform(results.new, x = sd.mod * R, y = sd.mod * sin(acos(R)))
larrows(
results$x[subscripts], results$y[subscripts],
results.new$x[subscripts], results.new$y[subscripts],
angle = 30, length = 0.1, col = myColors[group.number], lwd = arrow.lwd
)
} else {
lpoints(
results$x[subscripts], results$y[subscripts],
col.symbol = myColors[group.number], ...
)
}
}
startYear <- function(dat) as.numeric(format(min(dat[order(dat)]), "%Y"))
endYear <- function(dat) as.numeric(format(max(dat[order(dat)]), "%Y"))
startMonth <- function(dat) as.numeric(format(min(dat[order(dat)]), "%m"))
endMonth <- function(dat) as.numeric(format(max(dat[order(dat)]), "%m"))
## these are pre-defined type that need a field "date"; used by cutData
dateTypes <- c("year", "hour", "month", "season", "weekday", "weekend",
"monthyear", "gmtbst", "bstgmt", "dst", "daylight",
"seasonyear", "yearseason")
checkPrep <- function(mydata, Names, type, remove.calm = TRUE, remove.neg = TRUE,
strip.white = TRUE, wd = "wd") {
## deal with conditioning variable if present, if user-defined, must exist in data
## pre-defined types
## existing conditioning variables that only depend on date (which is checked)
conds <- c(
"default", "year", "hour", "month", "season", "weekday",
"weekend", "monthyear", "gmtbst", "bstgmt", "dst", "daylight",
"yearseason", "seasonyear"
)
all.vars <- unique(c(names(mydata), conds))
varNames <- c(Names, type) ## names we want to be there
matching <- varNames %in% all.vars
if (any(!matching)) {
## not all variables are present
stop(cat("Can't find the variable(s)", varNames[!matching], "\n"))
}
## add type to names if not in pre-defined list
if (any(type %in% conds == FALSE)) {
ids <- which(type %in% conds == FALSE)
Names <- c(Names, type[ids])
}
## if type already present in data frame
if (any(type %in% names(mydata))) {
ids <- which(type %in% names(mydata))
Names <- unique(c(Names, type[ids]))
}
## just select data needed
mydata <- mydata[, Names]
## if site is in the data set, check none are missing
## seems to be a problem for some KCL data...
if ("site" %in% names(mydata)) { ## split by site
## remove any NA sites
if (anyNA(mydata$site)) {
id <- which(is.na(mydata$site))
mydata <- mydata[-id, ]
}
}
## sometimes ratios are considered which can results in infinite values
## make sure all infinite values are set to NA
mydata[] <- lapply(mydata, function(x) {
replace(x, x == Inf | x == -Inf, NA)
})
if ("ws" %in% Names) {
if ("ws" %in% Names & is.numeric(mydata$ws)) {
## check for negative wind speeds
if (any(sign(mydata$ws[!is.na(mydata$ws)]) == -1)) {
if (remove.neg) { ## remove negative ws only if TRUE
warning("Wind speed <0; removing negative data")
mydata$ws[mydata$ws < 0] <- NA
}
}
}
}
## round wd to make processing obvious
## data already rounded to nearest 10 degress will not be affected
## data not rounded will be rounded to nearest 10 degrees
## assumes 10 is average of 5-15 etc
if (wd %in% Names) {
if (wd %in% Names & is.numeric(mydata[, wd])) {
## check for wd <0 or > 360
if (any(sign(mydata[[wd]][!is.na(mydata[[wd]])]) == -1 |
mydata[[wd]][!is.na(mydata[[wd]])] > 360)) {
warning("Wind direction < 0 or > 360; removing these data")
mydata[[wd]][mydata[[wd]] < 0] <- NA
mydata[[wd]][mydata[[wd]] > 360] <- NA
}
if (remove.calm) {
if ("ws" %in% names(mydata)) {
mydata[[wd]][mydata$ws == 0] <- NA ## set wd to NA where there are calms
mydata$ws[mydata$ws == 0] <- NA ## remove calm ws
}
mydata[[wd]][mydata[[wd]] == 0] <- 360 ## set any legitimate wd to 360
## round wd for use in functions - except windRose/pollutionRose
mydata[[wd]] <- 10 * ceiling(mydata[[wd]] / 10 - 0.5)
mydata[[wd]][mydata[[wd]] == 0] <- 360 # angles <5 should be in 360 bin
}
mydata[[wd]][mydata[[wd]] == 0] <- 360 ## set any legitimate wd to 360
}
}
## make sure date is ordered in time if present
if ("date" %in% Names) {
if ("POSIXlt" %in% class(mydata$date)) {
stop("date should be in POSIXct format not POSIXlt")
}
## if date in format dd/mm/yyyy hh:mm (basic check)
if (length(grep("/", as.character(mydata$date[1]))) > 0) {
mydata$date <- as.POSIXct(strptime(mydata$date, "%d/%m/%Y %H:%M"), "GMT")
}
## try and work with a factor date - but probably a problem in original data
if (is.factor(mydata$date)) {
warning("date field is a factor, check date format")
mydata$date <- as.POSIXct(mydata$date, "GMT")
}
mydata <- arrange(mydata, date)
## make sure date is the first field
if (names(mydata)[1] != "date") {
mydata <- mydata[c("date", setdiff(names(mydata), "date"))]
}
## check to see if there are any missing dates, stop if there are
ids <- which(is.na(mydata$date))
if (length(ids) > 0) {
mydata <- mydata[-ids, ]
warning(paste(
"Missing dates detected, removing",
length(ids), "lines"
), call. = FALSE)
}
## daylight saving time can cause terrible problems - best avoided!!
if (any(dst(mydata$date))) {
warning("Detected data with Daylight Saving Time, converting to UTC/GMT")
mydata$date <- lubridate::force_tz(mydata$date, tzone = "GMT")
}
}
if (strip.white) {
## set panel strip to white
suppressWarnings(trellis.par.set(list(strip.background = list(col = "white"))))
}
## return data frame
return(mydata)
}
# function to check variables are numeric, if not force with warning
checkNum <- function(mydata, vars) {
for (i in seq_along(vars)) {
if (!is.numeric(mydata[[vars[i]]])) {
mydata[[vars[i]]] <- as.numeric(as.character(mydata[[vars[i]]]))
warning(
paste(vars[i], "is not numeric, forcing to numeric..."),
call. = FALSE
)
}
}
return(mydata)
}
## listUpdate function
# [in development]
listUpdate <- function(a, b, drop.dots = TRUE,
subset.a = NULL, subset.b = NULL) {
if (drop.dots) {
a <- a[names(a) != "..."]
b <- b[names(b) != "..."]
}
if (!is.null(subset.a)) {
a <- a[names(a) %in% subset.a]
}
if (!is.null(subset.b)) {
b <- b[names(b) %in% subset.b]
}
if (length(names(b) > 0)) {
a <- modifyList(a, b)
}
a
}
Then use the following code for plotting
TaylorDiagram1(data, obs = "Observed", mod = "Predicted", group = "Method", type = "Station",
scales=list(alternating=1),normalise = TRUE,fontsize=12,
rms.col="black",auto.text=F,xlab="Standard deviation",
cex = 1, ylab="Standard deviation",par.settings = list( grid.pars = list(fontfamily = "serif")))

Saving huge model object to file

Say you have a model object of class 'varrest' returned from a VAR() regression operation.
I want to save the model to a file, but not all data which was used to estimate the coefficients.
How can one just save the model specification wihtout the training data?
Because when I save the model it has a file size of over 1GB and therefore loading does take its time.
Can one save objects without some attributes?
The predict.varest function starts out with this code:
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
You can then investigate how much pruning you might achieve:
data(Canada)
tcan <-
VAR(Canada, p = 2, type = "trend")
names(tcan)
# [1] "varresult" "datamat" "y" "type" "p"
# [6] "K" "obs" "totobs" "restrictions" "call"
object.size(tcan[c("K","p", "obs", "type", "datamat", "y")] )
#15080 bytes
object.size(tcan)
#252032 bytes
So the difference is substantial, but just saving those items is not sufficient because the next line in predict.varest is:
B <- Bcoef(object)
You will need to add that object to the list above and then construct a new predict-function that accepts something less than the large 'varresult' node of the model object. Also turned out that there was a downstream call to an internal function that needs to be stored. (You will need to decide in advance what interval you need for prediction.)
tsmall <- c( tcan[c("K","p", "obs", "type", "datamat", "y", "call")] )
tsmall[["Bco"]] <- Bcoef(tcan)
tsmall$sig.y <- vars:::.fecov(x = tcan, n.ahead = 10)
And the modified predict function will be:
sm.predict <- function (object, ..., n.ahead = 10, ci = 0.95, dumvar = NULL)
{
K <- object$K
p <- object$p
obs <- object$obs
type <- object$type
data.all <- object$datamat
ynames <- colnames(object$y)
n.ahead <- as.integer(n.ahead)
Z <- object$datamat[, -c(1:K)]
# This used to be a call to Bcoef(object)
B <- object$Bco
if (type == "const") {
Zdet <- matrix(rep(1, n.ahead), nrow = n.ahead, ncol = 1)
colnames(Zdet) <- "const"
}
else if (type == "trend") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(seq(trdstart, length = n.ahead), nrow = n.ahead,
ncol = 1)
colnames(Zdet) <- "trend"
}
else if (type == "both") {
trdstart <- nrow(Z) + 1 + p
Zdet <- matrix(c(rep(1, n.ahead), seq(trdstart, length = n.ahead)),
nrow = n.ahead, ncol = 2)
colnames(Zdet) <- c("const", "trend")
}
else if (type == "none") {
Zdet <- NULL
}
if (!is.null(eval(object$call$season))) {
season <- eval(object$call$season)
seas.names <- paste("sd", 1:(season - 1), sep = "")
cycle <- tail(data.all[, seas.names], season)
seasonal <- as.matrix(cycle, nrow = season, ncol = season -
1)
if (nrow(seasonal) >= n.ahead) {
seasonal <- as.matrix(cycle[1:n.ahead, ], nrow = n.ahead,
ncol = season - 1)
}
else {
while (nrow(seasonal) < n.ahead) {
seasonal <- rbind(seasonal, cycle)
}
seasonal <- seasonal[1:n.ahead, ]
}
rownames(seasonal) <- seq(nrow(data.all) + 1, length = n.ahead)
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, seasonal))
}
else {
Zdet <- as.matrix(seasonal)
}
}
if (!is.null(eval(object$call$exogen))) {
if (is.null(dumvar)) {
stop("\nNo matrix for dumvar supplied, but object varest contains exogenous variables.\n")
}
if (!all(colnames(dumvar) %in% colnames(data.all))) {
stop("\nColumn names of dumvar do not coincide with exogen.\n")
}
if (!identical(nrow(dumvar), n.ahead)) {
stop("\nRow number of dumvar is unequal to n.ahead.\n")
}
if (!is.null(Zdet)) {
Zdet <- as.matrix(cbind(Zdet, dumvar))
}
else {
Zdet <- as.matrix(dumvar)
}
}
Zy <- as.matrix(object$datamat[, 1:(K * (p + 1))])
yse <- matrix(NA, nrow = n.ahead, ncol = K)
# This used to be a call to vars:::.fecov
sig.y <- object$sig.y
for (i in 1:n.ahead) {
yse[i, ] <- sqrt(diag(sig.y[, , i]))
}
yse <- -1 * qnorm((1 - ci)/2) * yse
colnames(yse) <- paste(ci, "of", ynames)
forecast <- matrix(NA, ncol = K, nrow = n.ahead)
lasty <- c(Zy[nrow(Zy), ])
for (i in 1:n.ahead) {
lasty <- lasty[1:(K * p)]; print(lasty); print(B)
Z <- c(lasty, Zdet[i, ]) ;print(Z)
forecast[i, ] <- B %*% Z
temp <- forecast[i, ]
lasty <- c(temp, lasty)
}
colnames(forecast) <- paste(ynames, ".fcst", sep = "")
lower <- forecast - yse
colnames(lower) <- paste(ynames, ".lower", sep = "")
upper <- forecast + yse
colnames(upper) <- paste(ynames, ".upper", sep = "")
forecasts <- list()
for (i in 1:K) {
forecasts[[i]] <- cbind(forecast[, i], lower[, i], upper[,
i], yse[, i])
colnames(forecasts[[i]]) <- c("fcst", "lower", "upper",
"CI")
}
names(forecasts) <- ynames
result <- list(fcst = forecasts, endog = object$y, model = object,
exo.fcst = dumvar)
class(result) <- "varprd"
return(result)
}
Either
set the attributes you do not want to NULL, or
copy the parts you want to a new object, or
call the save() function with proper indexing.

Resources