Cannot release memory with knitr - r

I have an issue with knitr where I run can the code in the console without a problem but run out of memory when I knit the document. The markdown document is similar to
---
title: "xyz"
output:
html_document:
toc: true
date: "`r format(Sys.time(), '%d %B, %Y')`"
author: Me
bibliography: ../ref.bib
---
```{r setup, include = FALSE, cache = FALSE}
options(width = 100, digits = 3, scipen = 8)
knitr::opts_chunk$set(
error = FALSE, cache = FALSE,
cache.path = "some-path-cache/", fig.path = "some-path-fig/",
warnings = TRUE, message = TRUE, dpi = 128, cache.lazy = FALSE)
```
[some code]
```{r load_dat}
big_dat <- func_to_get_big_dat()
some_subset <- func_to_get_subset()
```
[some code where both big_dat and some_subset is used, some objects are assigned and some are subsequently removed with rm]
```{r reduce_mem}
dat_fit <- big_dat[some_subset, ]
rm(big_dat)
```
```{r log_to_show}
sink("some-log-file")
print(gc())
print(sapply(ls(), function(x) paste0(class(get(x)), collapse = ";")))
print(sort(sapply(ls(), function(x) object.size(get(x)))))
sink()
```
```{r some_chunk_that_requires_a_lot_of_memory, cache = 1}
...
```
When I knit the document using knitr then I run out of memory in the some_chunk_that_requires_a_lot_of_memory and the content of some-log-file is
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3220059 172 5684620 304 5684620 304
Vcells 581359200 4436 1217211123 9287 981188369 7486
[output abbreviated (the other variables are "function"s, "character"s, and "matrix"s]
dat_fit X1 some_subset
"data.frame" "integer" "integer"
[output abbreviated]
X1 some_subset dat_fit
5235568 5235568 591631352
so the objects in the .GlobalEnv far from sums to the 4436 MB (there are not many objects and they far smaller than 50 MB each). Running the code in the console does not yield any issues and the print(gc()) shows a much smaller figure.
My questions are
Can I do something to figure out why I use much more memory when I knit the document? Clearly, there must be assigned some objects somewhere that takes up a lot of space. Can I find all assigned objects and check their size?
Do you have some suggestion why gc release less memory when I knit the document? Is there somewhere were knitr assigns some object that may take up a lot of memory?
The data set is proprietary and I have tried but failed to make small example where I can reproduce the result. As a note, I do cache some output from some chunks between load_dat and reduce_mem. I use cache.lazy = FALSE to avoid this issue. Here is my sessionInfo
library(knitr)
sessionInfo()
#R R version 3.4.2 (2017-09-28)
#R Platform: x86_64-w64-mingw32/x64 (64-bit)
#R Running under: Windows 7 x64 (build 7601) Service Pack 1
#R
#R Matrix products: default
#R
#R locale:
#R [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
#R [4] LC_NUMERIC=C LC_TIME=English_United States.1252
#R
#R attached base packages:
#R [1] stats graphics grDevices utils datasets methods base
#R
#R other attached packages:
#R [1] knitr_1.17
#R
#R loaded via a namespace (and not attached):
#R [1] compiler_3.4.2 tools_3.4.2 yaml_2.1.16
Regarding question 1.
I also added the following to the log_to_show chunk to figure out if there are objects in other environments in the session that takes up a lot of space
# function to check if `this_env` is in `l`
is_env_in_list <- function(l, this_env){
for(i in l)
if(identical(i, this_env))
return(TRUE)
FALSE
}
# remove duplicates for environments
remove_dup_envs <- function(objs){
do_drop <- logical(length(objs))
for(j in rev(seq_along(objs))){
for(i in seq_len(j - 1L)){
if(identical(objs[[i]], objs[[j]])){
do_drop[j] <- TRUE
break
}
}
}
objs[!do_drop]
}
# attempt to write function to get all unique environments
get_env <- function(this_env = .GlobalEnv, out = NULL, only_new = FALSE){
if(is_env_in_list(out, this_env))
return(if(only_new) NULL else out)
if(identical(this_env, emptyenv()))
return(if(only_new) NULL else out)
new. <- this_env # not emptyenv or in list so we add it
# add parent env
p_env <- parent.env(this_env)
if(!is_env_in_list(out, p_env))
new. <- c(new., get_env(p_env, out, only_new = only_new))
# look through assigned objects, find enviroments and add these
objs <- lapply(ls(envir = this_env), function(x){
o <- try(get(x, envir = this_env), silent = TRUE)
if(inherits(o, "try-error"))
NULL
o
})
objs <- lapply(objs, function(x){
if(is.function(x) && !is.null(environment(x)))
return(environment(x))
x
})
if(length(objs) == 0)
return(if(only_new) new. else remove_dup_envs(c(new., out)))
is_env <- which(sapply(objs, is.environment))
if(length(is_env) == 0)
return(if(only_new) new. else remove_dup_envs(c(new., out)))
objs <- remove_dup_envs(objs[is_env])
keep <- which(!sapply(objs, is_env_in_list, l = c(new., out)))
if(length(keep) == 0L)
return(if(only_new) new. else c(new., out))
objs <- objs[keep]
for(o in objs){
ass_envs <- get_env(o, out = c(new., out), only_new = TRUE)
new. <- c(new., ass_envs)
}
return(if(only_new) new. else remove_dup_envs(c(new., out)))
}
tmp <- get_env(asNamespace("knitr"))
names(tmp) <- sapply(tmp, environmentName)
print(tmp <- tmp[order(names(tmp))])
out <- lapply(tmp, function(x){
o <- sapply(ls(envir = x), function(z){
r <- try(object.size(get(z, envir = x)), silent = TRUE)
if(inherits(r, "try-error"))
return(0)
r
})
if(length(o) == 0L)
return(NULL)
tail(sort(o))
})
max_val <- sapply(out, max)
keep <- which(max_val > 10^7)
out <- out[keep]
max_val <- max_val[keep]
tmp <- tmp[keep]
ord <- order(max_val)
print(tmp <- tmp[ord])
print(out <- out[ord])
It shows no objects that are larger than dat_fit.

Related

indirect indexing/subscripting inside %dopar%

I'm not understanding how to do indirect subscripting in %dopar% or in llply( .parallel = TRUE). My actual use-case is a list of formulas, then generating a list of glmer results in a first foreach %dopar%, then calling PBmodcomp on specific pairs of results in a separate foreach %dopar%. My toy example, using numeric indices rather than names of objects in the lists, works fine for %do% but not %dopar%, and fine for alply without .parallel = TRUE but not with .parallel = TRUE. [My real example with glmer and indexing lists by names rather than by integers works with %do% but not %dopar%.]
library(doParallel)
library(foreach)
library(plyr)
cl <- makePSOCKcluster(2) # tiny for toy example
registerDoParallel(cl)
mB <- c(1,2,1,3,4,10)
MO <- c("Full", "noYS", "noYZ", "noYSZS", "noS", "noZ",
"noY", "justS", "justZ", "noSZ", "noYSZ")
# Works
testouts <- foreach(i = 1:length(mB)) %do% {
# mB[i]
MO[mB[i]]
}
testouts
# all NA
testouts2 <- foreach(i = 1:length(mB)) %dopar% {
# mB[i]
MO[mB[i]]
}
testouts2
# Works
testouts3 <- alply(mB, 1, .fun = function(i) { MO[mB[i]]} )
testouts3
# fails "$ operator is invalid for atomic vectors"
testouts4 <- alply(mB, 1, .fun = function(i) { MO[mB[i]]},
.parallel = TRUE,
.paropts = list(.export=ls(.GlobalEnv)))
testouts4
stopCluster(cl)
I've tried various combinations of double brackets like MO[mB[[i]]], to no avail. mB[i] instead of MO[mB[i]] works in all 4 and returns a list of the numbers. I've tried .export(c("MO", "mB")) but just get the message that those objects are already exported.
I assume that there's something I misunderstand about evaluation of expressions like MO[mB[i]] in different environments, but there may be other things I misunderstand, too.
sessionInfo() R version 3.5.1 (2018-07-02) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 7 x64 (build
7601) Service Pack 1
Matrix products: default
locale: [1] LC_COLLATE=English_United States.1252 [2]
LC_CTYPE=English_United States.1252 [3] LC_MONETARY=English_United
States.1252 [4] LC_NUMERIC=C [5]
LC_TIME=English_United States.1252
attached base packages: [1] parallel stats graphics grDevices
utils datasets methods [8] base
other attached packages: [1] plyr_1.8.4 doParallel_1.0.13
iterators_1.0.9 foreach_1.5.0
loaded via a namespace (and not attached): [1] compiler_3.5.1
tools_3.5.1 listenv_0.7.0 Rcpp_0.12.17 [5]
codetools_0.2-15 digest_0.6.15 globals_0.12.1 future_1.8.1
[9] fortunes_1.5-5
The problem appears to be with version 1.5.0 of foreach on r-forge. Version 1.4.4 from CRAN works fine for both foreach %do par% and llply( .parallel = TRUE). For anyone finding this post when searching for %dopar% with lists, here's the code where mList is a named list of formulas, and tList is a named list of pairs of model names to be compared.
tList <- list(Z1 = c("Full", "noYZ"),
Z2 = c("noYS", "noYSZS"),
S1 = c("Full", "noYS"),
S2 = c("noYZ", "noYSZS"),
A1 = c("noYSZS", "noY"),
A2 = c("noSZ", "noYSZ")
)
cl <- makePSOCKcluster(params$nCores) # value from YAML params:
registerDoParallel(cl)
# first run the models
modouts <- foreach(imod = 1:length(mList),
.packages = "lme4") %dopar% {
glmer(as.formula(mList[[imod]]),
data = dsn,
family = poisson,
control = glmerControl(optimizer = "bobyqa",
optCtrl = list(maxfun = 100000),
check.conv.singular = "warning")
)
}
names(modouts) <- names(mList)
####
# now run the parametric bootstrap tests
nSim <- 500
testouts <- foreach(i = seq_along(tList),
.packages = "pbkrtest") %dopar% {
PBmodcomp(modouts[[tList[[i]][1]]],
modouts[[tList[[i]][2]]],
nsim = nSim)
}
names(testouts) <- names(tList)
stopCluster(Cl)

Memory usage keep growing until crash

I'm running some scripts from R that gets info from some webs. The problems is that even though I clean the session with gc(), the memory keep growing until my session crashes.
Here is the script:
library(XML)
library(RJDBC)
library(RCurl)
procesarPublicaciones <- function(tabla){
log_file <<- file(log_path, open="a")
drv <<- JDBC("oracle.jdbc.OracleDriver", classPath="C:/jdbc/jre6/ojdbc6.jar"," ")
con <<- dbConnect(drv, "server_path", "user", "password")
query <- paste("SELECT * FROM",tabla,sep=' ')
bool <- tryCatch(
{
## Get a list of URLs from a DB
listUrl <- dbGetQuery(con, query)
if( nrow(listUrl) != 0) TRUE else FALSE
dbDisconnect(con)
}, error = function(e) return(FALSE)
)
if( bool ) {
file.create(data_file)
apply(listUrl,c(1),procesarHtml)
}else{
cat("\n",getTime(),"\t[ERROR]\t\t", file=log_file)
}
cat( "\n",getTime(),"\t[INFO]\t\t FINISH", file=log_file)
close(log_file)
}
procesarHtml <- function(pUrl){
headerGatherer <- basicHeaderGatherer()
html <- getURI(theUrl, headerfunction = headerGatherer$update, curl = curlHandle)
heatherValue <- headerGatherer$value()
if ( heatherValue["status"] == "200" ){
doc <- htmlParse(html)
tryCatch
(
{
## Here I get all the info that I need from the web and write it on a file.
## here is a simplification
info1 <- xpathSApply(doc, xPath.info1, xmlValue)
info2 <- xpathSApply(doc, xPath.info2, xmlValue)
data <- data.frame(col1 = info1, col2=info2)
write.table(data, file=data_file , sep=";", row.names=FALSE, col.names=FALSE, append=TRUE)
}, error= function(e)
{
## LOG ERROR
}
)
rm(info1, info2, data, doc)
}else{
## LOG INFO
}
rm(headerGatherer,html,heatherValue)
cat("\n",getTime(),"\t[INFO]\t\t memory used: ", memory.size()," MB", file=log_file)
gc()
cat("\n",getTime(),"\t[INFO]\t\t memory used after gc(): ", memory.size()," MB", file=log_file)
}
Even though I remove all internal variables with rm() and use gc(), memory keeps growing. It seems that all the html that I get from the web is kept in memory.
Here is my Session Info:
> sessionInfo()
R version 3.2.0 (2015-04-16)
Platform: i386-w64-mingw32/i386 (32-bit)
Running under: Windows XP (build 2600) Service Pack 3
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] RCurl_1.95-4.6 bitops_1.0-6 RJDBC_0.2-5 rJava_0.9-6 DBI_0.3.1
[6] XML_3.98-1.1
loaded via a namespace (and not attached):
[1] tools_3.2.0
--------------------EDIT 2015-06-08 --------------------
I'm still having the problem, but I found the same issue on other post, which is apparently resolved.
Serious Memory Leak When Iteratively Parsing XML Files
When using the XML package, you'll want to use free() to release the memory allocated by htmlParse() (or any of the other html parsing functions that allocate memory at the C level). I usually place a call to free(doc) as soon as I don't need the html doc any more.
So in your case, I would try placing free(doc) on its own line prior to rm(info1, info2, data, doc) in your function, like this:
free(doc)
rm(info1, info2, data, doc)
In fact the call to free() may be sufficient enough that you could remove the rm() call completely.
I had a related issue using htmlParse. Led to Windows crashing (out of memory) before my 10,000 iterataions completed.
Answer:
in addition to free/remove - do a garbage collect gc() (as suggested in Serious Memory Leak When Iteratively Parsing XML Files ) every n iterations

R markdown presentation not displaying plots

I have Rstudio on Windows (sessionInfo() below) and am trying to build an r presentation using markdown. When I try to knit HTML or PDF it does not seem to be retaining the folder where plots should be generated from and as a result my presentations are missing plots. I have confirmed that it does work with a basic html_document though.
Does anyone have any ideas on how to resolve?
MWE (rstudio default with headers for slides)
---
title: "plottest2"
author: "AN Other"
date: "Monday, June 30, 2014"
output: html_document
---
## Area 1 ##
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r}
summary(cars)
```
## Area 2 ##
You can also embed plots, for example:
```{r, echo=FALSE}
plot(cars)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
This generates using the knit html command, but change html_document to ioslides_presentation and it won't pick up the plot
SessionInfo
> sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_United Kingdom.1252 LC_CTYPE=English_United Kingdom.1252 LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C
[5] LC_TIME=English_United Kingdom.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lattice_0.20-29 ggplot2_1.0.0
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 digest_0.6.4 evaluate_0.5.5 formatR_0.10 grid_3.1.0 gtable_0.1.2 htmltools_0.2.4 knitr_1.6 labeling_0.2 MASS_7.3-31
[11] munsell_0.4.2 plyr_1.8.1 proto_0.3-10 Rcpp_0.11.2 reshape2_1.4 rmarkdown_0.2.49 scales_0.2.4 stringr_0.6.2 tools_3.1.0 yaml_2.1.13
C:\Program Files\R\R-3.1.0\library\base\R.Rprofile
### This is the system Rprofile file. It is always run on startup.
### Additional commands can be placed in site or user Rprofile files
#
# Copyright (C) 1995-2012 The R Core Team
### (see ?Rprofile).
### Notice that it is a bad idea to use this file as a template for
### personal startup files, since things will be executed twice and in
### the wrong environment (user profiles are run in .GlobalEnv).
.GlobalEnv <- globalenv()
attach(NULL, name = "Autoloads")
.AutoloadEnv <- as.environment(2)
assign(".Autoloaded", NULL, envir = .AutoloadEnv)
T <- TRUE
F <- FALSE
R.version <- structure(R.Version(), class = "simple.list")
version <- R.version # for S compatibility
## for backwards compatibility only
R.version.string <- R.version$version.string
## NOTA BENE: options() for non-base package functionality are in places like
## --------- ../utils/R/zzz.R
options(keep.source = interactive())
options(warn = 0)
# options(repos = c(CRAN="#CRAN#"))
# options(BIOC = "http://www.bioconductor.org")
options(timeout = 60)
options(encoding = "native.enc")
options(show.error.messages = TRUE)
## keep in sync with PrintDefaults() in ../../main/print.c :
options(scipen = 0)
options(max.print = 99999)# max. #{entries} in internal printMatrix()
options(add.smooth = TRUE)# currently only used in 'plot.lm'
options(stringsAsFactors = TRUE)
if(!interactive() && is.null(getOption("showErrorCalls")))
options(showErrorCalls = TRUE)
local({dp <- Sys.getenv("R_DEFAULT_PACKAGES")
if(identical(dp, "")) # marginally faster to do methods last
dp <- c("datasets", "utils", "grDevices", "graphics",
"stats", "methods")
else if(identical(dp, "NULL")) dp <- character(0)
else dp <- strsplit(dp, ",")[[1]]
dp <- sub("[[:blank:]]*([[:alnum:]]+)", "\\1", dp) # strip whitespace
options(defaultPackages = dp)
})
## Expand R_LIBS_* environment variables.
Sys.setenv(R_LIBS_SITE =
.expand_R_libs_env_var(Sys.getenv("R_LIBS_SITE")))
Sys.setenv(R_LIBS_USER =
.expand_R_libs_env_var(Sys.getenv("R_LIBS_USER")))
.First.sys <- function()
{
for(pkg in getOption("defaultPackages")) {
res <- require(pkg, quietly = TRUE, warn.conflicts = FALSE,
character.only = TRUE)
if(!res)
warning(gettextf('package %s in options("defaultPackages") was not found', sQuote(pkg)),
call.=FALSE, domain = NA)
}
}
.OptRequireMethods <- function()
{
if("methods" %in% getOption("defaultPackages")) {
res <- require("methods", quietly = TRUE, warn.conflicts = FALSE,
character.only = TRUE)
if(!res)
warning('package "methods" in options("defaultPackages") was not found', call.=FALSE)
}
}
if(nzchar(Sys.getenv("R_BATCH"))) {
.Last.sys <- function()
{
cat("> proc.time()\n")
print(proc.time())
}
## avoid passing on to spawned R processes
## A system has been reported without Sys.unsetenv, so try this
try(Sys.setenv(R_BATCH=""))
}
###-*- R -*-
## this will break if R is on a network share
.Library <- file.path(chartr("\\", "/", R.home()), "library")
.Library.site <- Sys.getenv("R_LIBS_SITE")
.Library.site <- if(!nchar(.Library.site)) file.path(R.home(), "site-library") else unlist(strsplit(.Library.site, ";"))
.Library.site <- .Library.site[file.exists(.Library.site)]
if(!nzchar(Sys.getenv("R_LIBS_USER")))
Sys.setenv(R_LIBS_USER=
file.path(Sys.getenv("R_USER"), "R",
"win-library",
paste(R.version$major,
sub("\\..*$", "", R.version$minor),
sep=".")
))
invisible(.libPaths(c(unlist(strsplit(Sys.getenv("R_LIBS"), ";")),
unlist(strsplit(Sys.getenv("R_LIBS_USER"), ";"))
)))
local({
popath <- Sys.getenv("R_TRANSLATIONS", "")
if(!nzchar(popath)) {
paths <- file.path(.libPaths(), "translations", "DESCRIPTION")
popath <- dirname(paths[file.exists(paths)][1])
}
bindtextdomain("R", popath)
bindtextdomain("R-base", popath)
bindtextdomain("RGui", popath)
assign(".popath", popath, .BaseNamespaceEnv)
})
if(nzchar(Sys.getenv("R_PAPERSIZE"))) {
options(papersize = Sys.getenv("R_PAPERSIZE"))
} else {
if(grepl("(canada|united.states)", Sys.getlocale("LC_MONETARY"),
ignore.case = TRUE)) options(papersize = "letter")
else options(papersize = "a4")
}
options(pager = if(length(grep("--ess", commandArgs()))) "console" else "internal",
useFancyQuotes = (.Platform$GUI == "Rgui"),
pdfviewer = Sys.getenv("R_PDFVIEWER", file.path(R.home("bin"), "open.exe")))
if(.Platform$GUI == "Rgui")
Sys.setenv(GFORTRAN_STDOUT_UNIT = "-1", GFORTRAN_STDERR_UNIT = "-1")
local({
br <- Sys.getenv("R_BROWSER", NA_character_)
if(!is.na(br)) options(browser = br)
tests_startup <- Sys.getenv("R_TESTS")
if(nzchar(tests_startup)) source(tests_startup)
})
C:\Program Files\R\R-3.1.0\etc\Rprofile.site
# Things you might want to change
# options(papersize="a4")
# options(editor="notepad")
# options(pager="internal")
# set the default help type
# options(help_type="text")
options(help_type="html")
# set a site library
# .Library.site <- file.path(chartr("\\", "/", R.home()), "site-library")
# set a CRAN mirror
# local({r <- getOption("repos")
# r["CRAN"] <- "http://my.local.cran"
# options(repos=r)})
# Give a fortune cookie, but only to interactive sessions
# (This would need the fortunes package to be installed.)
# if (interactive())
# fortunes::fortune()
I have found the same issue with RStudio-0.98.983 and R-3.1.1-win. Uninstalling both and reinstalling did NOT solve the issue for me. I have tried with RStudio-0.98.994 and it did not work either...
Update: This was reported (see link in the comments below) and a solution was found by the RStudio team. It seems it is an issue with the Lua base64 encoder on Windows, which is used in ioslides. The solution is to install the packages httpuv or catools. After restarting RStudio, the issue should be fixed (at least it was for me!).
I had a similar problem with a chart not being displayed. It turned out that the problem was that the name of the .Rpres file I was using had spaces in it. Once I replaced the spaces with underscores the plot appeared again.
Use "Example_File_Name.Rpres" not "Example File Name.Rpres".
I had the same problem, and a different solution worked for me.
- don't save the rmarkdown with any numbers in the document name,
- and also don't inlcude the .html in the document name, to the markdown file you wish to save
Using just a name without the two above should create one rmd-file and one html-file in your designated folder. The rmd-file will not include plots, the html-File however should inlcude them in its presentation.
This is a localised issue - an install on a fresh computer did not have this error. It could be due to having previous versions of R hanging around - suggest taking the route of completely uninstalling R and Rstudio.
Uninstalling R and Rstudio works.

Missing object error when using step() within a user-defined function

5 days and still no answer
As can be seen by Simon's comment, this is a reproducible and very strange issue. It seems that the issue only arises when a stepwise regression with very high predictive power is wrapped in a function.
I have been struggling with this for a while and any help would be much appreciated. I am trying to write a function that runs several stepwise regressions and outputs all of them to a list. However, R is having trouble reading the dataset that I specify in my function arguments. I found several similar errors on various boards (here, here, and here), however none of them seemed to ever get resolved. It all comes down to some weird issues with calling step() in a user-defined function. I am using the following script to test my code. Run the whole thing several times until an error arises (trust me, it will):
test.df <- data.frame(a = sample(0:1, 100, rep = T),
b = as.factor(sample(0:5, 100, rep = T)),
c = runif(100, 0, 100),
d = rnorm(100, 50, 50))
test.df$b[10:100] <- test.df$a[10:100] #making sure that at least one of the variables has some predictive power
stepModel <- function(modeling.formula, dataset, outfile = NULL) {
if (is.null(outfile) == FALSE){
sink(file = outfile,
append = TRUE, type = "output")
print("")
print("Models run at:")
print(Sys.time())
}
model.initial <- glm(modeling.formula,
family = binomial,
data = dataset)
model.stepwise1 <- step(model.initial, direction = "backward")
model.stepwise2 <- step(model.stepwise1, scope = ~.^2)
output <- list(modInitial = model.initial, modStep1 = model.stepwise1, modStep2 = model.stepwise2)
sink()
return(output)
}
blah <- stepModel(a~., dataset = test.df)
This returns the following error message (if the error does not show up right away, keep re-running the test.df script as well as the call for stepModel(), it will show up eventually):
Error in is.data.frame(data) : object 'dataset' not found
I have determined that everything runs fine up until model.stepwise2 starts to get built. Somehow, the temporary object 'dataset' works just fine for the first stepwise regression, but fails to be recognized by the second. I found this by commenting out part of the function as can be seen below. This code will run fine, proving that the object 'dataset' was originally being recognized:
stepModel1 <- function(modeling.formula, dataset, outfile = NULL) {
if (is.null(outfile) == FALSE){
sink(file = outfile,
append = TRUE, type = "output")
print("")
print("Models run at:")
print(Sys.time())
}
model.initial <- glm(modeling.formula,
family = binomial,
data = dataset)
model.stepwise1 <- step(model.initial, direction = "backward")
# model.stepwise2 <- step(model.stepwise1, scope = ~.^2)
# sink()
# output <- list(modInitial = model.initial, modStep1 = model.stepwise1, modStep2 = model.stepwise2)
return(model.stepwise1)
}
blah1 <- stepModel1(a~., dataset = test.df)
EDIT - before anyone asks, all the summary() functions were there because the full function (i edited it so that you could focus in on the error) has another piece that defines a file to which you can output stepwise trace. I just got rid of them
EDIT 2 - session info
sessionInfo()
R version 2.15.1 (2012-06-22)
Platform: x86_64-pc-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] tcltk stats graphics grDevices utils datasets methods base
other attached packages:
[1] sqldf_0.4-6.4 RSQLite.extfuns_0.0.1 RSQLite_0.11.3 chron_2.3-43
[5] gsubfn_0.6-5 proto_0.3-10 DBI_0.2-6 ggplot2_0.9.3.1
[9] caret_5.15-61 reshape2_1.2.2 lattice_0.20-6 foreach_1.4.0
[13] cluster_1.14.2 plyr_1.8
loaded via a namespace (and not attached):
[1] codetools_0.2-8 colorspace_1.2-1 dichromat_2.0-0 digest_0.6.2 grid_2.15.1
[6] gtable_0.1.2 iterators_1.0.6 labeling_0.1 MASS_7.3-18 munsell_0.4
[11] RColorBrewer_1.0-5 scales_0.2.3 stringr_0.6.2 tools_2.15
EDIT 3 - this performs all the same operations as the function, just without using a function. This will run fine every time, even when the algorithm doesn't converge:
modeling.formula <- a~.
dataset <- test.df
outfile <- NULL
if (is.null(outfile) == FALSE){
sink(file = outfile,
append = TRUE, type = "output")
print("")
print("Models run at:")
print(Sys.time())
}
model.initial <- glm(modeling.formula,
family = binomial,
data = dataset)
model.stepwise1 <- step(model.initial, direction = "backward")
model.stepwise2 <- step(model.stepwise1, scope = ~.^2)
output <- list(modInitial = model.initial, modStep1 = model.stepwise1, modStep2 = model.stepwise2)
Using do.call to refer to the data set in the calling environment works for me. See https://stackoverflow.com/a/7668846/210673 for the original suggestion. Here's a version that works (with sink code removed).
stepModel2 <- function(modeling.formula, dataset) {
model.initial <- do.call("glm", list(modeling.formula,
family = "binomial",
data = as.name(dataset)))
model.stepwise1 <- step(model.initial, direction = "backward")
model.stepwise2 <- step(model.stepwise1, scope = ~.^2)
list(modInitial = model.initial, modStep1 = model.stepwise1, modStep2 = model.stepwise2)
}
blah <- stepModel2(a~., dataset = "test.df")
It fails for me consistently with set.seed(6) with the original code. The reason it fails is that the dataset variable is not present within the step function, and although it's not needed in making model.stepwise1, it is needed for model.stepwise2 when model.stepwise1 keeps a linear term. So that's the case when your version fails. Calling the dataset from the global environment as I do here fixes this issue.

addOBV throwing error

I am trying to plot a graph with price and a few technical indicators such as ADX, RSI, and OBV. I cannot figure out why addOBV is giving an error and why addADX not showing at all in the graph lines in the chart?
Here my code:
tmp <- read.csv(paste("ProcessedQuotes/",Nifty[x,],".csv", sep=""),
as.is=TRUE, header=TRUE, row.names=NULL)
tmp$Date<-as.Date(tmp$Date)
ydat = xts(tmp[,-1],tmp$Date)
lineChart(ydat, TA=NULL, name=paste(Nifty[x,]," Technical Graph"))
plot(addSMA(10))
plot(addEMA(10))
plot(addRSI())
plot(addADX())
plot(addOBV())
Error for addOBV is:
Error in try.xts(c(2038282, 1181844, -1114409, 1387404, 3522045, 4951254, :
Error in as.xts.double(x, ..., .RECLASS = TRUE) :
order.by must be either 'names()' or otherwise specified
Below you can see DIn is not shown fully in the graphs.
> class(ydat)
[1] "xts" "zoo"
> head(ydat)
Open High Low Close Volume Trades Sma20 Sma50 DIp DIn DX ADX aroonUp aroonDn oscillator macd signal RSI14
I don't know why that patch doesn't work for you, but you can just create a new function (or you could mask the one from quantmod). Let's just make a new, patched version called addOBV2 which is the code for addOBV except for the one patched line. (x <- as.matrix(lchob#xdata) is replaced with x <- try.xts(lchob#xdata, error=FALSE)).
addOBV2 <- function (..., on = NA, legend = "auto")
{
stopifnot("package:TTR" %in% search() || require("TTR", quietly = TRUE))
lchob <- quantmod:::get.current.chob()
x <- try.xts(lchob#xdata, error=FALSE)
#x <- as.matrix(lchob#xdata)
x <- OBV(price = Cl(x), volume = Vo(x))
yrange <- NULL
chobTA <- new("chobTA")
if (NCOL(x) == 1) {
chobTA#TA.values <- x[lchob#xsubset]
}
else chobTA#TA.values <- x[lchob#xsubset, ]
chobTA#name <- "chartTA"
if (any(is.na(on))) {
chobTA#new <- TRUE
}
else {
chobTA#new <- FALSE
chobTA#on <- on
}
chobTA#call <- match.call()
legend.name <- gsub("^.*[(]", " On Balance Volume (", deparse(match.call()))#,
#extended = TRUE)
gpars <- c(list(...), list(col=4))[unique(names(c(list(col=4), list(...))))]
chobTA#params <- list(xrange = lchob#xrange, yrange = yrange,
colors = lchob#colors, color.vol = lchob#color.vol, multi.col = lchob#multi.col,
spacing = lchob#spacing, width = lchob#width, bp = lchob#bp,
x.labels = lchob#x.labels, time.scale = lchob#time.scale,
isLogical = is.logical(x), legend = legend, legend.name = legend.name,
pars = list(gpars))
if (is.null(sys.call(-1))) {
TA <- lchob#passed.args$TA
lchob#passed.args$TA <- c(TA, chobTA)
lchob#windows <- lchob#windows + ifelse(chobTA#new, 1,
0)
chartSeries.chob <- quantmod:::chartSeries.chob
do.call("chartSeries.chob", list(lchob))
invisible(chobTA)
}
else {
return(chobTA)
}
}
Now it works.
# reproduce your data
ydat <- getSymbols("ZEEL.NS", src="yahoo", from="2012-09-11",
to="2013-01-18", auto.assign=FALSE)
lineChart(ydat, TA=NULL, name=paste("ZEEL Technical Graph"))
plot(addSMA(10))
plot(addEMA(10))
plot(addRSI())
plot(addADX())
plot(addOBV2())
This code reproduces the error:
library(quantmod)
getSymbols("AAPL")
lineChart(AAPL, 'last 6 months')
addOBV()
Session Info:
sessionInfo()
R version 2.15.0 (2012-03-30)
Platform: x86_64-apple-darwin9.8.0/x86_64 (64-bit)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] quantmod_0.3-17 TTR_0.21-1 xts_0.9-1 zoo_1.7-9 Defaults_1.1-1 rgeos_0.2-11
[7] sp_1.0-5 sos_1.3-5 brew_1.0-6
loaded via a namespace (and not attached):
[1] grid_2.15.0 lattice_0.20-6 tools_2.15.0
Googling around, the error seems to be related to the fact that addOBV converts the data into a matrix, which causes problems with TTR::OBV. A patch has been posted on RForge.

Resources