R package gamlss used inside foreach() fails to find an object - r

I'm trying to get leave-one-out predicted values. Please help me with this "can't find object" issue. I have searched for similar issues, but haven't managed to figure it out. This is on Windows 10.
Thanks in advance
library('gamlss')
library('foreach')
library('doParallel')
registerDoParallel(cores = 4)
# Generate data
set.seed(314)
sample.size <- 30
input.processed.cut <- data.frame(TP = round(runif(sample.size) * 100),
FP = round(runif(sample.size) * 100),
x = runif(sample.size))
# Fit Beta-binomial
model3 <- gamlss(formula = cbind(TP, FP) ~ x,
family = BB,
data = input.processed.cut)
# Get the leave-one-out values
loo_predict.mu <- function(model.obj, input.data) {
yhat <- foreach(i = 1 : nrow(input.data), .packages="gamlss", .combine = rbind) %dopar% {
updated.model.obj <- update(model.obj, data = input.data[-i, ])
predict(updated.model.obj, what = "mu", newdata = input.data[i,], type = "response")
}
return(data.frame(result = yhat[, 1], row.names = NULL))
}
par.run <- loo_predict.mu(model3, input.processed.cut)
# Error in { : task 1 failed - "object 'input.data' not found"
> version
_
platform x86_64-w64-mingw32
arch x86_64
os mingw32
system x86_64, mingw32
status
major 3
minor 4.3
year 2017
month 11
day 30
svn rev 73796
language R
version.string R version 3.4.3 (2017-11-30)
nickname Kite-Eating Tree

I got a response from gamlss team and verified that their solution works. The only thing to change was to provide "data" along with "newdata" to predict().
loo_predict.mu <- function(model.obj, input.data) {
yhat <- foreach(i = 1 : nrow(input.data), .packages="gamlss", .combine = rbind) %dopar% {
updated.model.obj <- update(model.obj, data = input.data[-i, ])
predict(updated.model.obj, what = "mu", data = input.data[-i, ],
newdata = input.data[i,], type = "response")
}
return(data.frame(result = yhat[, 1], row.names = NULL))
}

Related

Error in readRDS(file.rds)

Summary:
I installed the rstan package, now I am facing this error:
Error in readRDS(file.rds) : unknown input format
Description:
> traceback()
5: readRDS(file.rds)
4: is(obj <- readRDS(file.rds), "stanmodel")
3: stan_model(file, model_name = model_name, model_code = model_code,
stanc_ret = NULL, boost_lib = boost_lib, eigen_lib = eigen_lib,
save_dso = save_dso, verbose = verbose)
2: stan(file = fName, data = datalist, init = list(inlist), chains = 1,
iter = n.it, pars = c("f", "tau"), warmup = n.warm, seed = computerID +
1000) at functions.r#131
1: RunModel(11, myT, nu, lambda, rho, BDRi, Pars, computerID, n.it,
n.warm, getwd())
Environment:
RStan Version: 2.16.2
R Version 3.4.1 (2017-06-30)
Operating System: Windows 10
Please test whether this works:
install.packages('rstan')
library(rstan)
saveRDS(mtcars, file = 'test.rds')
readRDS('test.rds')
Works fine for me
My problem is solved. I use writeLines command.
library(rstan)
scode <- "
parameters {
real y[2];
}
model {
y[1] ~ normal(0, 1);
y[2] ~ double_exponential(0, 2);
}
"
writeLines(scode, "mymodel.stan")
mymodel <- stan("mymodel.stan", iter = 10, verbose = FALSE)

"Error in install.packages : missing value where TRUE/FALSE needed" in RStudio but not in R. Why?

When I execute:
install.packages(c("rj", "rj.gd"), repos="http://download.walware.de/rj-2.1")
It works in R but not in RStudio. Why?
In RStudio I get:
> install.packages(c("rj", "rj.gd"), repos="http://download.walware.de/rj-2.1")
Error in install.packages : missing value where TRUE/FALSE needed
traceback:
> traceback()
3: installed.packages()
2: as.data.frame(installed.packages(), stringsAsFactors = F)
1: (function ()
{
uniqueLibPaths <- .rs.uniqueLibraryPaths()
x <- suppressWarnings(library(lib.loc = uniqueLibPaths))
x <- x$results[x$results[, 1] != "base", ]
pkgs.name <- x[, 1]
pkgs.library <- x[, 2]
pkgs.desc <- x[, 3]
pkgs.url <- file.path("help/library", pkgs.name, "html",
"00Index.html")
loaded.pkgs <- .rs.pathPackage()
pkgs.loaded <- !is.na(match(normalizePath(paste(pkgs.library,
pkgs.name, sep = "/")), loaded.pkgs))
instPkgs <- as.data.frame(installed.packages(), stringsAsFactors = F)
pkgs.version <- character(length = length(pkgs.name))
for (i in 1:length(pkgs.name)) {
pkgs.version[[i]] <- .rs.packageVersion(pkgs.name[[i]],
pkgs.library[[i]], instPkgs)
}
pkgs.library <- .rs.createAliasedPath(pkgs.library)
packages = data.frame(name = pkgs.name, library = pkgs.library,
version = pkgs.version, desc = pkgs.desc, url = pkgs.url,
loaded = pkgs.loaded, check.rows = TRUE, stringsAsFactors = FALSE)
packages[order(packages$name), ]
})()
I use Windows 10 Enterprise with RStudio 1.0.143 as well as:
> R.version
_
platform x86_64-w64-mingw32
arch x86_64
os mingw32
system x86_64, mingw32
status
major 3
minor 4.0
year 2017
month 04
day 21
svn rev 72570
language R
version.string R version 3.4.0 (2017-04-21)
nickname You Stupid Darkness

R: Plot trees from h2o.randomForest() and h2o.gbm()

Looking for an efficient way to plot trees in rstudio, H2O's Flow or in local html page from h2o's RF and GBM models similar to the one in the image in link below. Specifically, how do you plot trees for the objects, (fitted models) rf1 and gbm2 produced by code below perhaps by parsing h2o.download_pojo(rf1) or h2o.download_pojo(gbm1)?
# # The following two commands remove any previously installed H2O packages for R.
# if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) }
# if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") }
# # Next, we download packages that H2O depends on.
# pkgs <- c("methods","statmod","stats","graphics","RCurl","jsonlite","tools","utils")
# for (pkg in pkgs) {
# if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }
# }
#
# # Now we download, install h2o package
# install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/R")))
library(h2o)
h2o.init(nthreads = -1, max_mem_size = "2G")
h2o.removeAll() ##clean slate - just in case the cluster was already running
## Load data - available to download from link below
## https://www.dropbox.com/s/gu8e2o0mzlozbu4/SampleData.csv?dl=0
df <- h2o.importFile(path = normalizePath("../SampleData.csv"))
splits <- h2o.splitFrame(df, c(0.4, 0.3), seed = 1234)
train <- h2o.assign(splits[[1]], "train.hex")
valid <- h2o.assign(splits[[2]], "valid.hex")
test <- h2o.assign(splits[[2]], "test.hex")
predictor_col_start_pos <- 2
predictor_col_end_pos <- 169
predicted_col_pos <- 1
rf1 <- h2o.randomForest(training_frame = train, validation_frame = valid,
x = predictor_col_start_pos:predictor_col_end_pos, y = predicted_col_pos,
model_id = "rf_covType_v1", ntrees = 2000, stopping_rounds = 10, score_each_iteration = T,
seed = 2001)
gbm1 <- h2o.gbm(training_frame = train, validation_frame = valid, x = predictor_col_start_pos:predictor_col_end_pos,
y = predicted_col_pos, model_id = "gbm_covType2", seed = 2002, ntrees = 20,
learn_rate = 0.2, max_depth = 10, stopping_rounds = 2, stopping_tolerance = 0.01,
score_each_iteration = T)
## Next step would be to plot trees for fitted models rf1 and gbm2
# print the model, POJO (Plain Old Java Object) to screen
h2o.download_pojo(rf1)
h2o.download_pojo(gbm1)
I think it may be the solution you are looking for;
library(h2o)
h2o.init()
df = h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip")
model = h2o.gbm(model_id = "model",
training_frame = df,
x = c("Year", "Month", "DayofMonth", "DayOfWeek", "UniqueCarrier"),
y = "IsDepDelayed",
max_depth = 3,
ntrees = 5)
h2o.download_mojo(model, getwd(), FALSE)
Now download the latest stable h2o release from http://www.h2o.ai/download/ and run the PrintMojo tool from the command line.
java -cp h2o.jar hex.genmodel.tools.PrintMojo --tree 0 -i model.zip -o model.gv
dot -Tpng model.gv -o model.png
open model.png
More info: http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html
New Tree API introduced in 3.22.0.1 (October 2018) changes the whole game of visualizing H2O trees. General workflow may look like this:
and detailed example with code can be found here: Finally, You Can Plot H2O Decision Trees in R.

R: h2o: saving a deeplearning model: automatically generated long file name too long for windows

I have no problems saving a h20 glm model(as this has a shorter file name) but I am having problems saving a h2o deeplearning model using the exactly the same saving procedure
I tried:
library(h2o)
localH2O = h2o.init()
a <- runif(1000)
b <- runif(1000)
c <- runif(1000)
d <- 5*a+2*b^2+c*a
df1 <- data.frame(a,b,c,d)
df1.hex <- as.h2o(df1)
test.dl <- h2o.deeplearning(x = 1:3, y = 4, training_frame = df1.hex)
dlmodel.path = h2o.saveModel(test.dl, dir = "file:///C:/", name = "modeldl")
dlmodel.path
But get an error:
Error in .h2o.doSafeREST(conn = conn, h2oRestApiVersion = h2oRestApiVersion, :
FS IO Failure:
accessed path : file:///C://modeldl/modelmetrics_DeepLearningModel__9fe11910a85d1371379ac7d536d64359_-5064771152374762981_on_Key_Frame__C__Users_store_AppData_Local_Temp_RtmpGGylNe_file1f18787f2989_csv_1.hex_2.DeepLearningModel__9fe11910a85d1371379ac7d536d64359.temporary.train.chunks8_-6759658083019717917.bin
I am using a windows 10 computer. As has been pointed out by RHA, the filepath/name is extremely long and is too long for windows.How can I overcome this? Most of the filepath characters are generated automatically by the h20 program. I am using the latest h20 update.
from.sessionInfo(): other attached packages: [1] h2o_3.0.0.30
I would be grateful for your help.
Have you tried to add model_id = "something" to your h2o.deeplearning command?
test.dl <- h2o.deeplearning(x = 1:3, y = 4, training_frame = df1.hex, model_id = "myTest.dl")
I hope it could fix your problem.

Using [R] packages caret and gbm: "Error in vector(type, length) : vector: cannot make a vector of mode 'NULL'"

I'm trying to train a boosting model on a data frame, using the Caret and gbm packages in R. I've been able to build models successfully with default parameters; however, I continue to hit this error, when I attempt to customize the summary function:
Error in vector(type, length) :
vector: cannot make a vector of mode 'NULL'.
This is the first question I've posted, as I'm usually able to root up info to solve the problem. In this case, I can't seem to find a similar issue.
The following code is intended to reproduce the error. Let me know if it doesn't, or if I should include additional info, as I'm more than happy to do so.
System.info:
sysname: Windows
release: 7 x64
version: build 7601, Service Pack 1
version.string: R version 3.1.3 (2015-03-09)
system: x86_64, mingw32
library(plyr)
library(caret)
library(dplyr)
example <- data.frame(response = rnorm(100), predictor1 = rnorm(100), predictor2 = rnorm(100))
aeSummary <- function(data, lev = NULL, model = NULL) {
out <- abs(data$obs-data$pred)
names(out) <- "AE"
out
}
modelFit <- train(response ~ .,
data = example,
method = "gbm",
tuneGrid = data.frame(n.trees = 5,
interaction.depth = 5,
shrinkage = 0.05,
n.minobsinnode = 6),
metric = "AE",
maximize = FALSE,
trControl = trainControl(
summaryFunction = aeSummary))

Resources