I am trying to run pdredge using the following example code, where the data is located at
https://github.com/aditibhaskar/help/blob/master/gages_urbanizing_and_ref_with_trends_cut_to_20years_2018-12-02.Rdata
library(MuMIn)
require(snow)
require(parallel)
variable.list <- c("log(hden_change_divided_by_hdenStart)", "hden_peak_change", "DRAIN_SQKM", "PPTAVG_BASIN", "SNOW_PCT_PRECIP", "log(HIRES_LENTIC_PCT)", "FRAGUN_BASIN", "BFI_AVE", "CLAYAVE", "WTDEPAVE", "AWCAVE", "RD_STR_INTERS", "RIP800_FOREST", "BAS_COMPACTNESS", "STREAMS_KM_SQ_KM", "RRMEAN", "SLOPE_PCT", "PCT_1ST_ORDER", "FRESHW_WITHDRAWAL")
y.365 <- lm(paste0("(SlopePct.365 - Ref1.SlopePct.365) ~", paste(variable.list, collapse="+")), data=gages)
options(na.action = "na.fail")
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
clust <- try(makeCluster(getOption("cl.cores", no_cores), type = clusterType))
dredge.365 <- pdredge(y.365, rank="AICc", trace=2, cluster=clust)
From this I get these errors:
"2097152: In is.data.frame(data):
object 'gages' not found (model 2097151 skipped)
Error in pdredge(y.365, rank="AICc", trace=2, cluster=clust) :
the result is empty
What am I doing wrong? Thanks.
You forgot to export model's data to the cluster nodes:
clusterExport(clust, "gages")
Related
I am trying to train multiple ANNs in parallel in R. The problem is that I don't know how to install Neuralnet on the VPS from my local machine.
I'm looking for some instructions like the following, which works perfectly to install R and Future package on VPS:
library(parallel)
cl <- parallelly::makeClusterPSOCK("VPS ip", user = "my_user")
clusterEvalQ(cl[1], { dir.create(Sys.getenv("R_LIBS_USER"), recursive=TRUE) })
clusterEvalQ(cl[1], { install.packages("future", repos = "https://cloud.r-project.org") })
stopCluster(cl)
The script that implements parallel coding in R that I am testing is the following:
library(parallelly)
library(future)
library(parallel)
library(neuralnet)
data(iris)
head(iris)
nrow(iris)
red <- function(x) {
fold.test <- sample(nrow(iris), nrow(iris) / 3)
test <- iris[fold.test, ]
train <- iris[-fold.test, ]
ann <- neuralnet(as.numeric(Species) ~ Sepal.Length + Sepal.Width +
Petal.Length + Petal.Width, train, hidden = c(10,5))
ann
output <- compute(ann, test[ , c("Sepal.Length", "Sepal.Width",
"Petal.Length", "Petal.Width")])
result <- data.frame(
Real = test$Species,
Predicted = levels(iris$Species)[round(output$net.result)])
result
table(result$Predicted, result$Real)
mse <-table(result$Predicted, result$Real)
x
}
local_workers <- rep("localhost", times=3)
workers <- rep("VPS ip", times =1)
workers <- c(workers,local_workers)
system.time({
cl <- parallelly::makeClusterPSOCK(workers, user = "my_user")
parallel::clusterExport(cl, "iris")
resultados <- parSapply(cl, 1:20, red)})
#resultados
stopCluster(cl)
And the error that occurs when running it is:
Error in checkForRemoteErrors(val) :
4 nodes produced errors; first error: could not find function "neuralnet"
I am an absolute newbie to R-Studio and want to use svm() of the e1071 package.
I went through David Meyer's paper.
I can't get classAgreement() to run.
What do I need to do before I can use classAgreement()?
Thanks a lot!
library(e1071)
data(Glass, package="mlbench")
index <- 1:nrow(Glass)
testindex <- sample(index, trunc(length(index)/3))
testset <- Glass[testindex,]
trainset <- Glass[-testindex,]
svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset[,-10])
table(pred = svm.pred, true = testset[,10])
classAgreement(table)
Running your code the classAgreement(table) throws the following error:
Error in sum(tab) : invalid 'type' (closure) of argument
This is due to the fact that table here is a function as you didn't write an object called table which I think you intended to do in the previous line.
So you can either do one of the following:
svm.tab <- table(pred = svm.pred, true = testset[,10])
classAgreement(svm.tab)
Or just in one go
classAgreement(table(pred = svm.pred, true = testset[,10]))
I'm trying to setup a LSTM RNN by using mxnet in R, however, while trying to train my network I get this error and R is showing me a fatal error all the time:
"[00:36:08] d:\program files (x86)\jenkins\workspace\mxnet\mxnet\src\operator\tensor./matrix_op-inl.h:155: Using target_shape will be deprecated.
[00:36:08] d:\program files (x86)\jenkins\workspace\mxnet\mxnet\src\operator\tensor./matrix_op-inl.h:155: Using target_shape will be deprecated.
[00:36:08] d:\program files (x86)\jenkins\workspace\mxnet\mxnet\src\operator\tensor./matrix_op-inl.h:155: Using target_shape will be deprecated."
here is my code:
# install.packages("drat", repos="https://cran.rstudio.com")
# drat:::addRepo("dmlc")
# install.packages("mxnet")
rm(list = ls())
require(mxnet)
require(mlbench)
inputData <- read.table(file.path(getwd(), "Data", "input.csv"),
header = TRUE, sep = ",")
inputData$X <- as.Date(inputData$X)
inputData <- na.omit(inputData)
index <- 1:nrow(inputData)*0.8
train.dates <- inputData[index,1]
test.dates <- inputData[-index,1]
inputData[,1] <- NULL
train <- inputData[index,]
test <- inputData[-index,]
train.x <- data.matrix(train[,-ncol(train)])
test.x <- data.matrix(test[,-ncol(test)])
train.y <- train[,ncol(train)]
test.y <- test[,ncol(test)]
get.label <- function(X) {
label <- array(0, dim=dim(X))
d <- dim(X)[1]
w <- dim(X)[2]
for (i in 0:(w-1)) {
for (j in 1:d) {
label[i*d+j] <- X[(i*d+j)%%(w*d)+1]
}
}
return (label)
}
X.train.label <- get.label(t(train.x))
X.val.label <- get.label(t(test.x))
X.train <- list(data=t(train.x), label=X.train.label)
X.val <- list(data=t(test.x), label=X.val.label)
batch.size = 1
seq.len = 32
num.hidden = 16
num.embed = 16
num.lstm.layer = 1
num.round = 1
learning.rate= 0.1
wd=0.00001
clip_gradient=1
update.period = 1
model <- mx.lstm(X.train, X.val,
ctx=mx.cpu(),
num.round=num.round,
update.period=update.period,
num.lstm.layer=num.lstm.layer,
seq.len=seq.len,
num.hidden=num.hidden,
num.embed=num.embed,
num.label=15,
batch.size=batch.size,
input.size=15,
initializer=mx.init.uniform(0.1),
learning.rate=learning.rate,
wd=wd,
clip_gradient=clip_gradient)
Input dataset consists of Date column, 15 features, and the target value.
Please hep me. Thanks in advance!
The message that you receive is a warning, and you can ignore it. The real problem is the mismatch of shapes. If I run your code I receive:
[14:06:36] src/ndarray/ndarray.cc:348: Check failed: from.shape() == to->shape() operands shape mismatchfrom.shape = (1,15) to.shape=(1,32)
To fix this problem set seq.len = 15, since you have 15 features. If you update the seq.len and run your code, you will see that training started (notice, I also receive the same warning as you):
[14:08:17] src/operator/tensor/./matrix_op-inl.h:159: Using target_shape will be deprecated.
[14:08:17] src/operator/tensor/./matrix_op-inl.h:159: Using target_shape will be deprecated.
[14:08:17] src/operator/tensor/./matrix_op-inl.h:159: Using target_shape will be deprecated.
Iter [1] Train: Time: 0.263811111450195 sec, NLL=2.71622828266634, Perp=15.1231742012938
Iter [1] Val: NLL=2.51107457406329, Perp=12.3181597260587
Progress has been made on getting the parallel processing part working but saving the vector with the fetch distances is not working properly. The error I get is
df_Test_Fetch <- data.frame(x_lake_length)
Error in data.frame(x_lake_length) : object 'x_lake_length' not found
write.table(df_Test_Fetch,file="C:/tempTest_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
Error in is.data.frame(x) : object 'df_Test_Fetch' not found
I have tried altering the code below so that the foreach step is output to x_lake_length. But that did not output the vector as I hoped. How can I get the actually results to be saved to a csv file. I am running a windows 8 computer with R x64 3.3.0.
Thanks you in advance
Jen
Here is the full code.
# make sure there is no prexisting data
rm(x_lake_length)
# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
doParallel)
# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)
# Data --------------------------------------------------------------------
ogrDrivers()
dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)
# Analysis ----------------------------------------------------------------
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)}
foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal")) %dopar% (
myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())
I think this is what you want, though without understanding the subject matter I can't be 100% sure.
What I did was add a return() to your parallelized function and assigned the value of that returned object to x_lake_length when you call the foreach. But I'm only guessing that that's what you were trying to do, so please correct me if I'm wrong.
# make sure there is no prexisting data
rm(x_lake_length)
# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
doParallel)
# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)
# Data --------------------------------------------------------------------
ogrDrivers()
dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)
# Analysis ----------------------------------------------------------------
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)
return(x_lake_length)
}
x_lake_length <- foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal")) %dopar% (
myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())
I'm playing around with the German Credit dataset from the "caret" package.
First, I build a very simple model:
library(caret)
library(randomForest)
library(pmml)
data(GermanCredit)
GermanCredit <- GermanCredit[, -nearZeroVar(GermanCredit)]
GermanCredit$CheckingAccountStatus.lt.0 <- NULL
GermanCredit$SavingsAccountBonds.lt.100 <- NULL
GermanCredit$EmploymentDuration.lt.1 <- NULL
GermanCredit$EmploymentDuration.Unemployed <- NULL
GermanCredit$Personal.Male.Married.Widowed <- NULL
GermanCredit$Property.Unknown <- NULL
GermanCredit$Housing.ForFree <- NULL
set.seed(100)
inTrain <- createDataPartition(GermanCredit$Class, p = .8)[[1]]
GermanCreditTrain <- GermanCredit[ inTrain, ]
GermanCreditTest <- GermanCredit[-inTrain, ]
set.seed(1056)
credit.rf <- randomForest(Class~., data = GermanCreditTrain, ntree = 500)
Now, if I predict the outcome Class on the test set, and do this several times, and then compare the results:
credit.pred1 <- predict(credit.rf, GermanCreditTest)
credit.pred2 <- predict(credit.rf, GermanCreditTest)
credit.pred3 <- predict(credit.rf, GermanCreditTest)
all.equal(credit.pred1, credit.pred2)
all.equal(credit.pred2, credit.pred3)
all.equal(credit.pred1, credit.pred3)
I get the same predictions for all 3 passes. Now, I'm doing this by manually typing the code in the RStudio interpreter. But, If I copy-paste the code from my text editor (which I've posted here: https://gist.github.com/anonymous/32b3c8194362d2e10527), I get an error message saying that there are 3 string differences in the second and third comparison!
How is this possible?
Try using caret's train function:
credit.rf <- train(Class~., data = GermanCreditTrain, method="rf")
instead of
credit.rf <- randomForest(Class~., data = GermanCreditTrain, ntree = 500)
I was able to reproduce the issue and not sure what is causing it. However, the above seems to work when pasted:
credit.rf <- train(Class~., data = GermanCreditTrain, method="rf")
>
> credit.pred1 <- predict(credit.rf, GermanCreditTest)
> credit.pred2 <- predict(credit.rf, GermanCreditTest)
> credit.pred3 <- predict(credit.rf, GermanCreditTest)
>
> all.equal(credit.pred1, credit.pred2)
[1] TRUE
> all.equal(credit.pred2, credit.pred3)
[1] TRUE
> all.equal(credit.pred1, credit.pred3)
[1] TRUE