How to save All models from h2o automl - r

I'm trying to save all the models from an h2o.automl as part of the h2o package. Currently I am able to save a single model using h2o.saveModel(aml#leader, path = "/home/data/user").
How can I save all the models?
Here is my attempt on a sample dataset:
library(h2o)
h2o.init()
prostate.hex <- h2o.importFile(path = paste("https://raw.github.com",
"h2oai/h2o-2/master/smalldata/logreg/prostate.csv", sep = "/"),
destination_frame = "prostate.hex")
Get data from github or import via readr:
library(readr)
prostate <- read_csv("/home/data/user/prostate.csv")
prostate.hex<- as.h2o(prostate, "prostate.hex")
aml <- h2o.automl(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"),
training_frame = prostate.hex,
max_runtime_secs = 180,
exclude_algos = c("StackedEnsemble")
)
Now I'm trying to save the models within aml:
mod_ids <- as_tibble(aml#leaderboard$model_id)
Now I can't figure out how to save the models:
for(i in 1:nrow(mod_ids)) {
print(mod_ids[i,])
#h2o.saveModel(object = aml#leaderboard[[i]], "/home/data/user/")
}
Here is what I've tried:
tutorial automl
H2O AUTOML: How to save reuse and build on top of existing automl models

Try this, it'll do your job:
for(i in 1:nrow(mod_ids)) {
aml1 <- h2o.getModel(aml#leaderboard[i, 1]) # get model object in environment
h2o.saveModel(object = aml1, "C:/Users/sm/Documents/stack/models") # pass that model object to h2o.saveModel as an argument
}

Related

azure machine learning and R using azuremlsdk - supported R version and custom_docker_image

So we have to move away from using SQL Server Machine Learning services as it only supports R 3.5.2 even for SQL Server 2019!
I am trying hard to go all 21st century and deploy some of our on prem R trained models as web service as described by one of the Microsoft evangelists David Smith (see code below).
Looking at r_environment I noticed to my horror that, if I do not use a custom docker image, predefine images only support R 3.6?! Is this correct? If so, how do I create a custom docker image and why does Microsoft suggest using Azure ML where there are also restrictions in terms of the R version!
PS:
Some code to possibly replicate my issues:
Train model locally:
library(datasets)
library(caret)
data(iris)
setwd("C:/Data")
index <- createDataPartition(iris$Species, p=0.80, list=FALSE)
testset <- iris[-index,]
trainset <- iris[index,]
model = train(Species ~ .,
data=trainset,
method="rpart",
trControl = trainControl(method = "cv"))
saveRDS(model, "model.rds")
I can deploy this model in Azure ML:
Scoring script score.r
library(jsonlite)
init <- function()
{
model_path <- Sys.getenv("AZUREML_MODEL_DIR")
model <- readRDS(file.path(model_path, "model.rds"))
message("iris classfication model loaded")
function(data)
{
vars <- as.data.frame(fromJSON(data))
prediction <- predict(model, newdata=vars)
toJSON(prediction)
}
}
Failing code:
library(azuremlsdk)
interactive_auth <- interactive_login_authentication(tenant_id="xxx")
ws <- get_workspace(
name = "amazing_work_space",
subscription_id = "xxx",
resource_group ="xxx",
auth = interactive_auth
)
model <- get_model(ws, name = "iris_classification")
r_env <- r_environment(name = 'myr_env',
version = '1')
inference_config <- inference_config(
entry_script = "score.R",
source_directory = ".",
environment = r_env)
aci_config <- aci_webservice_deployment_config(cpu_cores = 1, memory_gb = 0.5)
aci_service <- deploy_model(ws,
'xxx',
list(model),
inference_config,
aci_config)
wait_for_deployment(aci_service, show_output = TRUE)

Please select a longer horizon when the forecasts are first computed in forecast package in r

When I run the following code, I do NOT get this error:
## https://www.dataiku.com/learn/guide/code/r/time_series.html
library(readxl)
library(forecast)
library(dplyr)
library(prophet)
library(rstan)
library(Hmisc)
library(caret)
data<-read_excel("Time Series/Items.xlsx", col_types = c("text", "numeric"))
Nper=0.75
stmodels=c("meanf","naive","snaive","rwf","croston","stlf","ses","holt","hw","splinef","thetaf","ets","auto.arima","tbats","prophet")
gkuniforecast = function(data, Np, Ncolumn, tsfreq, model) {
## Preparation
N = ceiling(Np*nrow(data))
## Models
if (model=="prophet"){
df=data
names(df)=c("ds","y")
df$ds=as.Date(paste(df$ds,"-01",sep=""), "%Y-%b-%d")
train.df = df[1:N,]
na.df=data.frame(ds=rep(NA, N),y=rep(NA, N))
test.df <- rbind(na.df, df[(N+1):nrow(data),])
m <- prophet(train.df)
future <- make_future_dataframe(m, periods = nrow(data)-N, freq = 'month')
pro_forecast <- predict(m, future)
plot(m, pro_forecast)
##prophet_plot_components(m, forecast)
acc=matrix(rep(NA, 16),nrow=2,ncol=8,dimnames=list(c("Training set", "Test set"),c("ME","RMSE","MAE","MPE","MAPE","MASE","ACF1","Theil's U")))
acc["Test set","RMSE"]=sqrt(mean((pro_forecast$yhat - test.df)^2, na.rm = TRUE))
}else{
x=pull(data,Ncolumn)
train.x = ts(x[1:N], frequency=tsfreq)
test.x <- ts(c(rep(NA, N), x[(N+1):NROW(x)]), frequency=tsfreq)
str1=paste0("m_",model," = ",model,"(train.x)")
if (Np==1) {str2=paste0("f_",model," = forecast(m_",model,", h=NROW(x)")
} else {str2=paste0("f_",model," = forecast(m_",model,", h=NROW(x)-N)")}
str3=paste0("plot(f_",model,")")
str4="lines(test.x)"
str5=paste0("acc=accuracy(f_",model,",test.x)")
str=paste0(str1,";",str2,";",str3,";",str4,";",str5)
eval(parse(text=str))
}
return(acc)
}
acc = lapply(stmodels, gkuniforecast, data=data, Np=Nper, Ncolumn=2,tsfreq=12)
But when I run this code, I do:
##Forecast data prep
tsfreq=5
x=pull(data,1)
train.x = ts(x[1:N], frequency=tsfreq)
test.x <- ts(c(rep(NA, N), x[(N+1):NROW(x)]), frequency=tsfreq)
stmodels=c("meanf","naive","snaive","rwf","croston","stlf","ses","holt","hw"##,"splinef"
,"thetaf","ets","auto.arima","tbats")
for (i in 1:length(stmodels)){
str1=paste0("m_",stmodels[i]," = ",stmodels[i],"(train.x)")
str2=paste0("f_",stmodels[i]," = forecast(m_",stmodels[i],", h=NROW(x)-N)")
str3=paste0("plot(f_",stmodels[i],")")
str4="lines(test.x)"
str5=paste0('acc[["',stmodels[i],'"]]=accuracy(f_',stmodels[i],',test.x)')
str=paste0(str1,";",str2,";",str3,";",str4,";",str5)
eval(parse(text=str))
}
There seems to be a problem with 'hw' (splinef is commented out, because it gives me another error), but I do not understand why in the first dataset, I get no errors and I do with the second dataset. What is also different is the frequency.
Again the error is:
Please select a longer horizon when the forecasts are first computed
You are mixing functions that create forecasts directly (like meanf()) with functions that generate models (like ets()). For functions that generate forecasts directly, you need to specify the forecast horizon when you call the function. See https://otexts.org/fpp2/the-forecast-package-in-r.html for a list of functions that produce forecasts directly.

Plot a Neural Net Curve Using neuralnet and ROCR package

Here I have a classification task and I need to use neuralnet and ROCR packages. The problem is that I got the error messages when I use prediction function.
Here is my code:
#load packages
require(neuralnet)
library(ROCR)
#create data set
train<-read.table(file="train.txt",header=TRUE,sep=",")
test<- read.table(file="test.txt",header=TRUE,sep=",")
#build model and make predictions
nn.sag <- neuralnet(Type ~ Area+Perimeter+Compactness+Length+Width+Asymmetry+Groove, data = train, hidden = 5, algorithm = "sag", err.fct = "sse", linear.output = FALSE)
prob = compute(nn.sag, test[, -ncol(test)] )
prob.result <- prob$net.result
nn.pred = prediction(prob.result, test$Type)
pref <- performance(nn.pred, "tpr", "fpr")
plot(pref)
And here I got the error message for the 'prediction' function:
'$ operator is invalid for atomic vectors'
The dataset looks like (only training dataset here):
Area,Perimeter,Compactness,Length,Width,Asymmetry,Groove,Type
14.8,14.52,0.8823,5.656,3.288,3.112,5.309,1
14.79,14.52,0.8819,5.545,3.291,2.704,5.111,1
14.99,14.56,0.8883,5.57,3.377,2.958,5.175,1
19.14,16.61,0.8722,6.259,3.737,6.682,6.053,0
15.69,14.75,0.9058,5.527,3.514,1.599,5.046,1
14.11,14.26,0.8722,5.52,3.168,2.688,5.219,1
13.16,13.55,0.9009,5.138,3.201,2.461,4.783,1
16.16,15.33,0.8644,5.845,3.395,4.266,5.795,0
15.01,14.76,0.8657,5.789,3.245,1.791,5.001,1
14.11,14.1,0.8911,5.42,3.302,2.7,5,1
17.98,15.85,0.8993,5.979,3.687,2.257,5.919,0
21.18,17.21,0.8989,6.573,4.033,5.78,6.231,0
14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
14.59,14.28,0.8993,5.351,3.333,4.185,4.781,1
11.42,12.86,0.8683,5.008,2.85,2.7,4.607,1
12.11,13.47,0.8392,5.159,3.032,1.502,4.519,1
15.6,15.11,0.858,5.832,3.286,2.725,5.752,0
15.38,14.66,0.899,5.477,3.465,3.6,5.439,0
18.94,16.49,0.875,6.445,3.639,5.064,6.362,0
12.36,13.19,0.8923,5.076,3.042,3.22,4.605,1
14.01,14.29,0.8625,5.609,3.158,2.217,5.132,1
17.12,15.55,0.8892,5.85,3.566,2.858,5.746,0
15.78,14.91,0.8923,5.674,3.434,5.593,5.136,1
16.19,15.16,0.8849,5.833,3.421,0.903,5.307,1
14.43,14.4,0.8751,5.585,3.272,3.975,5.144,1
13.8,14.04,0.8794,5.376,3.155,1.56,4.961,1
14.46,14.35,0.8818,5.388,3.377,2.802,5.044,1
18.59,16.05,0.9066,6.037,3.86,6.001,5.877,0
18.75,16.18,0.8999,6.111,3.869,4.188,5.992,0
15.49,14.94,0.8724,5.757,3.371,3.412,5.228,1
12.73,13.75,0.8458,5.412,2.882,3.533,5.067,1
13.5,13.85,0.8852,5.351,3.158,2.249,5.176,1
14.38,14.21,0.8951,5.386,3.312,2.462,4.956,1
14.86,14.67,0.8676,5.678,3.258,2.129,5.351,1
18.45,16.12,0.8921,6.107,3.769,2.235,5.794,0
17.32,15.91,0.8599,6.064,3.403,3.824,5.922,0
20.2,16.89,0.8894,6.285,3.864,5.173,6.187,0
20.03,16.9,0.8811,6.493,3.857,3.063,6.32,0
18.14,16.12,0.8772,6.059,3.563,3.619,6.011,0
13.99,13.83,0.9183,5.119,3.383,5.234,4.781,1
15.57,15.15,0.8527,5.92,3.231,2.64,5.879,0
16.2,15.27,0.8734,5.826,3.464,2.823,5.527,1
20.97,17.25,0.8859,6.563,3.991,4.677,6.316,0
14.16,14.4,0.8584,5.658,3.129,3.072,5.176,1
13.45,14.02,0.8604,5.516,3.065,3.531,5.097,1
15.5,14.86,0.882,5.877,3.396,4.711,5.528,1
16.77,15.62,0.8638,5.927,3.438,4.92,5.795,0
12.74,13.67,0.8564,5.395,2.956,2.504,4.869,1
14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
14.28,14.17,0.8944,5.397,3.298,6.685,5.001,1
14.34,14.37,0.8726,5.63,3.19,1.313,5.15,1
14.03,14.16,0.8796,5.438,3.201,1.717,5.001,1
19.11,16.26,0.9081,6.154,3.93,2.936,6.079,0
14.52,14.6,0.8557,5.741,3.113,1.481,5.487,1
18.43,15.97,0.9077,5.98,3.771,2.984,5.905,0
18.81,16.29,0.8906,6.272,3.693,3.237,6.053,0
13.78,14.06,0.8759,5.479,3.156,3.136,4.872,1
14.69,14.49,0.8799,5.563,3.259,3.586,5.219,1
18.85,16.17,0.9056,6.152,3.806,2.843,6.2,0
12.88,13.5,0.8879,5.139,3.119,2.352,4.607,1
12.78,13.57,0.8716,5.262,3.026,1.176,4.782,1
14.33,14.28,0.8831,5.504,3.199,3.328,5.224,1
19.46,16.5,0.8985,6.113,3.892,4.308,6.009,0
19.38,16.72,0.8716,6.303,3.791,3.678,5.965,0
15.26,14.85,0.8696,5.714,3.242,4.543,5.314,1
20.24,16.91,0.8897,6.315,3.962,5.901,6.188,0
19.94,16.92,0.8752,6.675,3.763,3.252,6.55,0
20.71,17.23,0.8763,6.579,3.814,4.451,6.451,0
16.17,15.38,0.8588,5.762,3.387,4.286,5.703,0
13.02,13.76,0.8641,5.395,3.026,3.373,4.825,1
16.53,15.34,0.8823,5.875,3.467,5.532,5.88,0
13.89,14.02,0.888,5.439,3.199,3.986,4.738,1
18.98,16.57,0.8687,6.449,3.552,2.144,6.453,0
17.08,15.38,0.9079,5.832,3.683,2.956,5.484,1
15.03,14.77,0.8658,5.702,3.212,1.933,5.439,1
16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
18.65,16.41,0.8698,6.285,3.594,4.391,6.102,0
20.1,16.99,0.8746,6.581,3.785,1.955,6.449,0
17.99,15.86,0.8992,5.89,3.694,2.068,5.837,0
15.88,14.9,0.8988,5.618,3.507,0.7651,5.091,1
13.22,13.84,0.868,5.395,3.07,4.157,5.088,1
18.3,15.89,0.9108,5.979,3.755,2.837,5.962,0
19.51,16.71,0.878,6.366,3.801,2.962,6.185,0
The prediction() function is available in both neuralnet and ROCR package in R. So do not load both packages together. First load neuralnet, train your model and then detach it using detach() and then load ROCR package. Try following code:
#load packages
require(neuralnet)
#create data set
train<-read.table(file="train.txt",header=TRUE,sep=",")
test<- read.table(file="test.txt",header=TRUE,sep=",")
#build model and make predictions
nn.sag <- neuralnet(Type ~ Area+Perimeter+Compactness+Length+Width+Asymmetry+Groove, data = train, hidden = 5, algorithm = "sag", err.fct = "sse", linear.output = FALSE)
prob = compute(nn.sag, test[, -ncol(test)] )
prob.result <- prob$net.result
detach(package:neuralnet,unload = T)
library(ROCR)
nn.pred = prediction(prob.result, test$Type)
pref <- performance(nn.pred, "tpr", "fpr")
plot(pref)
Or just simply use ROCR::prediction(prediction(prob.result, test$Type))
For selecting the right package.

R: Plot trees from h2o.randomForest() and h2o.gbm()

Looking for an efficient way to plot trees in rstudio, H2O's Flow or in local html page from h2o's RF and GBM models similar to the one in the image in link below. Specifically, how do you plot trees for the objects, (fitted models) rf1 and gbm2 produced by code below perhaps by parsing h2o.download_pojo(rf1) or h2o.download_pojo(gbm1)?
# # The following two commands remove any previously installed H2O packages for R.
# if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) }
# if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") }
# # Next, we download packages that H2O depends on.
# pkgs <- c("methods","statmod","stats","graphics","RCurl","jsonlite","tools","utils")
# for (pkg in pkgs) {
# if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }
# }
#
# # Now we download, install h2o package
# install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/R")))
library(h2o)
h2o.init(nthreads = -1, max_mem_size = "2G")
h2o.removeAll() ##clean slate - just in case the cluster was already running
## Load data - available to download from link below
## https://www.dropbox.com/s/gu8e2o0mzlozbu4/SampleData.csv?dl=0
df <- h2o.importFile(path = normalizePath("../SampleData.csv"))
splits <- h2o.splitFrame(df, c(0.4, 0.3), seed = 1234)
train <- h2o.assign(splits[[1]], "train.hex")
valid <- h2o.assign(splits[[2]], "valid.hex")
test <- h2o.assign(splits[[2]], "test.hex")
predictor_col_start_pos <- 2
predictor_col_end_pos <- 169
predicted_col_pos <- 1
rf1 <- h2o.randomForest(training_frame = train, validation_frame = valid,
x = predictor_col_start_pos:predictor_col_end_pos, y = predicted_col_pos,
model_id = "rf_covType_v1", ntrees = 2000, stopping_rounds = 10, score_each_iteration = T,
seed = 2001)
gbm1 <- h2o.gbm(training_frame = train, validation_frame = valid, x = predictor_col_start_pos:predictor_col_end_pos,
y = predicted_col_pos, model_id = "gbm_covType2", seed = 2002, ntrees = 20,
learn_rate = 0.2, max_depth = 10, stopping_rounds = 2, stopping_tolerance = 0.01,
score_each_iteration = T)
## Next step would be to plot trees for fitted models rf1 and gbm2
# print the model, POJO (Plain Old Java Object) to screen
h2o.download_pojo(rf1)
h2o.download_pojo(gbm1)
I think it may be the solution you are looking for;
library(h2o)
h2o.init()
df = h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip")
model = h2o.gbm(model_id = "model",
training_frame = df,
x = c("Year", "Month", "DayofMonth", "DayOfWeek", "UniqueCarrier"),
y = "IsDepDelayed",
max_depth = 3,
ntrees = 5)
h2o.download_mojo(model, getwd(), FALSE)
Now download the latest stable h2o release from http://www.h2o.ai/download/ and run the PrintMojo tool from the command line.
java -cp h2o.jar hex.genmodel.tools.PrintMojo --tree 0 -i model.zip -o model.gv
dot -Tpng model.gv -o model.png
open model.png
More info: http://docs.h2o.ai/h2o/latest-stable/h2o-genmodel/javadoc/index.html
New Tree API introduced in 3.22.0.1 (October 2018) changes the whole game of visualizing H2O trees. General workflow may look like this:
and detailed example with code can be found here: Finally, You Can Plot H2O Decision Trees in R.

R: h2o: saving a deeplearning model: automatically generated long file name too long for windows

I have no problems saving a h20 glm model(as this has a shorter file name) but I am having problems saving a h2o deeplearning model using the exactly the same saving procedure
I tried:
library(h2o)
localH2O = h2o.init()
a <- runif(1000)
b <- runif(1000)
c <- runif(1000)
d <- 5*a+2*b^2+c*a
df1 <- data.frame(a,b,c,d)
df1.hex <- as.h2o(df1)
test.dl <- h2o.deeplearning(x = 1:3, y = 4, training_frame = df1.hex)
dlmodel.path = h2o.saveModel(test.dl, dir = "file:///C:/", name = "modeldl")
dlmodel.path
But get an error:
Error in .h2o.doSafeREST(conn = conn, h2oRestApiVersion = h2oRestApiVersion, :
FS IO Failure:
accessed path : file:///C://modeldl/modelmetrics_DeepLearningModel__9fe11910a85d1371379ac7d536d64359_-5064771152374762981_on_Key_Frame__C__Users_store_AppData_Local_Temp_RtmpGGylNe_file1f18787f2989_csv_1.hex_2.DeepLearningModel__9fe11910a85d1371379ac7d536d64359.temporary.train.chunks8_-6759658083019717917.bin
I am using a windows 10 computer. As has been pointed out by RHA, the filepath/name is extremely long and is too long for windows.How can I overcome this? Most of the filepath characters are generated automatically by the h20 program. I am using the latest h20 update.
from.sessionInfo(): other attached packages: [1] h2o_3.0.0.30
I would be grateful for your help.
Have you tried to add model_id = "something" to your h2o.deeplearning command?
test.dl <- h2o.deeplearning(x = 1:3, y = 4, training_frame = df1.hex, model_id = "myTest.dl")
I hope it could fix your problem.

Resources