Calling a function within foreach loop in R - r

I'm trying to use a foreach loop but if I try to fit a Gumbel distribution within the loop I get the error
"Error in { : task 1 failed - "The dgumbel function must be defined""
I have read many posts about functions and foreach loops and they all seem to suggest putting ".export = functionname" into the foreach line. I did this and don't understand why it is still not running. If I exclude the Gumbel fistdist line and fit only normal distributions, it all works fine.
If I use a normal for loop, the fitdist of a Gumbel distribution works fine,too.
# create data that will be used to fit a distribution
cum_prec <- matrix (nrow= 80, ncol=10)
x = c(1:6, 8:10)
for ( i in x){
cum_prec[,i] <- rnorm(80,400, 50)}
cum_prec[,7] <- rGumbel(80,400, 50)
# package fitdistrplus does not work for the gumbel distribution. Therefore, the gumbel distribution has to be added manually.
dgumbel <- function(x,a,b) {1/b*exp((a-x)/b)*exp(-exp((a-x)/b))}
pgumbel <- function(q,a,b) {exp(-exp((a-q)/b))}
qgumbel <- function(p,a,b) {a-b*log(-log(p))}
library(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
clim <- foreach (n=1:10, .export=c("dgumbel", "pgumbel","qgumbel"), .combine= cbind, .packages= "fitdistrplus") %dopar%
{
rep=10000
normal <- matrix(nrow=rep, ncol=10)
x = c(1:6, 8:10)
for ( i in x){
Normal<-fitdist(cum_prec[,i],"norm")
normal[,i] <- rnorm(rep,Normal$estimate[1], Normal$estimate[2])
}
Gumbel<-fitdist(cum_prec[,7],"gumbel", start=list(a=0,b=1),optim.method="Nelder-Mead")
normal[,7] <- rGumbel(rep,Gumbel$estimate[1], Gumbel$estimate[2])
SU <- vector("numeric", rep)
for (i in 1:rep){
su = (quantile(normal[,1], probs=runif(1,0,1))+ quantile(normal[,2], probs=runif(1,0,1)) + quantile(normal[,3], probs=runif(1,0,1))
+ quantile(normal[,4], probs=runif(1,0,1))+ quantile(normal[,5], probs=runif(1,0,1))+ quantile(normal[,6], probs=runif(1,0,1))
+ quantile(normal[,7], probs=runif(1,0,1))+ quantile(normal[,8], probs=runif(1,0,1))+ quantile(normal[,9], probs=runif(1,0,1))
+ quantile(normal[,10], probs=runif(1,0,1)))/10
SU[i] <- su
}
}
stopCluster(cl)

Related

terra package returns error when try to run parallel operations

I'm working with raster package and I try to switch to terra but for some reasons that I don't understand, terra cannot reproduce the same operation of raster when working in parallel with packages such snowfall and future.apply. Here is a reproducible example.
library(terra)
r <- rast()
r[] <- 1:ncell(r)
m <- rast()
m[] <- c(rep(1,ncell(m)/5),rep(2,ncell(m)/5),rep(3,ncell(m)/5),rep(4,ncell(m)/5),rep(5,ncell(m)/5))
ms <- separate(m,other=NA)
plot(ms)
mymask <- function(ind){
tipo <- tipo_tav[ind]
mask <- ms[[ind]]
masked <-
terra::mask(
r,
mask
)
richard <- function(x){
k <-0.2
v <-0.3
a <-200
y0 <-2
y <- k/v*x*(1-((x/a)^v))+y0
return(y)
}
pred <- richard(masked)
pred <- clamp(pred,lower=0)
return(pred)
}
#the sequential usage works fine, faster than the `raster` counterpart
system.time(x <- mymask(1))#0.03
#when I try to run my function in parallel I receive an error
plan(multisession,workers=5)
system.time(pred_list <- future_lapply(1:5, FUN = mymask))
Error in .External(list(name = "CppMethod__invoke_notvoid", address = <pointer: (nil)>, :
NULL value as symbol address.
the exactly same code works well if I change rast with raster and terra::mask with raster::mask. See below:
library(raster)
r <- raster(r)
ms <- stack(ms)
mymask <- function(ind){
tipo <- tipo_tav[ind]
mask <- ms[[ind]]
masked <-
raster::mask(
r,
mask
)
richard <- function(x){
k <-0.2
v <-0.3
a <-200
y0 <-2
y <- k/v*x*(1-((x/a)^v))+y0
return(y)
}
pred <- richard(masked)
pred <- clamp(pred,lower=0)
return(pred)
}
#this works fine
system.time(x <- mymask(1))#0.06
#this works too
plan(multisession,workers=5)
system.time(pred_list <- future_lapply(1:5, FUN = mymask))#15.48
The same behavior if I use snowfall instead of future
library(snowfall)
sfInit(parallel = TRUE, cpus =5)
sfLibrary(terra)
sfExportAll()
system.time(pred_list <- sfLapply(1:5, fun = mymask))
sfStop()
this return the same error of future_lapply
Why is this happening? I've never seen such an error. I was hoping to take advantage of the higher speed of terra but so I'm stuck.
A SpatRaster cannot be serialized, you cannot send it to parallel compute nodes. Have a look here for more discussion.
Instead you can (a) send and receive filenames; (b) parallelize your custom function that you supply to app or lapp; (c) use the cores=n argument (where available, e.g. app and predict); (d) use a mechanism like wrap; (e) send a filename and a vector to make a SpatExtent to process and create a virtual raster from the output tiles (see ?vrt).
For example, you could do use a function like this (Option "a")
prich <- function(filein, fileout) {
r <- rast(filein)
richard <- function(x) {
k <-0.2
v <-0.3
a <-200
y0 <-2
y <- k/v*x*(1-((x/a)^v))+y0
y[y<0] <- 0
return(y)
}
x <- app(masked, richard, filename=fileout, overwrite=TRUE)
return(TRUE)
}
I use app because it is much more efficient for large rasters --- as it could avoid writing temp files for each of the 10 arithmetic operations with a SpatRaster. Given that you want to parallelize this relatively simple function, I assume the files are very large.
Or option "c":
richard <- function(x) {
k <-0.2
v <-0.3
a <-200
y0 <-2
y <- k/v*x*(1-((x/a)^v))+y0
y[y<0] <- 0
return(y)
}
x <- app(masked, richard, cores=12)
In neither case I included the masking. You could include it in option "a" but mask is disk I/O intensive, not computationally intensive, so it might be as efficient to do it in one step rather than in parallel.
With wrap you could do something like this
f <- function(w) {
x <- rast(w)
y <- richard(x)
wrap(y)
}
r <- rast(nrow=10, ncol=10, vals=1:100)
x <- f(wrap(r))
x <- rast(x)
Where f would be run in parallel. That only works for small rasters, but you could parallelize over tiles, and you can create tiles with terra::makeTiles.
More internal parallelization options will be coming, but don't hold your breath.

R - cpv (trotter package) and %dopar%

I'd like to know whether the cpv function within the trotter package works with %dopar%? I'm getting the following error:
task 1 failed - "object of type 'S4' is not subsettable"
Here's a small example:
library(doParallel)
library(trotter)
registerDoParallel(cores = 2)
x <- letters
combos <- cpv(2, 1:4)
print(combos)
num_combos <- length(combos)
results_list <- foreach(combo_num=1:num_combos) %dopar% { # many iterations
y <- x[combos[combo_num]]
# time consuming stuff follows that involves using y
}
Replacing %dopar% with %do% (or simply using a for loop) and it works fine.
Depending on the cluster type one needs to explicitly specify the used packages via the .packages argument. The following should work:
library(doParallel)
library(trotter)
cl <- makePSOCKcluster(2)
registerDoParallel(cl=cl)
x <- letters
combos <- cpv(2, 1:4)
num_combos <- length(combos)
rl <- foreach(combo_num=1:num_combos, .packages="trotter") %dopar% {
x[combos[combo_num]]
}

Use set.seed() with foreach() in R

I am currently running a simulation using a for loop in R, but want to switch over to a foreach loop since it is faster. I use set.seed() in the for loop, and would like to use this again with foreach so I can obtain identical results.
For example, suppose I have
x <- c()
for (i in 1:10){
set.seed(i)
x[i] <- rnorm(1)
}
How can I do this same thing using foreach? I don't think this works:
x <- foreach(i = 1:10, ...) %dopar% {set.seed(i) ... }
This works:
library (foreach)
fn<-function(i)
{
set.seed(i)
y <- rnorm(1)
return(y)
}
x<-foreach(i=1:10) %do% fn(i)
print(x)

R foreach could not find function "%dopar%"

When I using the doParallel library, I encountered this weird error, the system throws this
" Error in { : task 1 failed -could not find function "%dopar%"
To be specific, this is what I did
library(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
# Read the data
coin95 <-read.csv('~/Documents/coin95.csv')
coin95 <- coin95[,!(names(coin95) %in% c("X"))]
coin95[c("Person")] <- sapply(coin95[c("Person")],as.character)
# create the name list
coin95_name <- as.character(coin95$Person)
coin95_name <- unique(coin95_name)
n <- as.numeric(length(coin95_name))
# the average counting process
ntw <- function(now){
foreach (Ii = coin95_name,.combine = "+",.export = c("coin95","n")) %dopar% {
time <-subset(coin95, subset = coin95$Person == Ii)$duration
stepfun(time,seq(0,length(time)))(now)/n
}
}
# the average cumulative hazard
lambda <- function(now,params){
b <- params[1]
sigma <- params[2]
mu <- params[3]
xi <- params[4]
beta1 <- params[5]
beta2 <- params[6]
k <- function(spread){
L0 <- (1+(spread -mu)*xi/sigma)^(-1/xi)
return(L0)
}
foreach(Ii = coin95_name,.combine = "+",.export = c("coin95","n")) %dopar% {
time <- subset(coin95, subset = coin95$Person == Ii)$duration
noncov <- subset(coin95, subset = coin95$Person == Ii)$noncovered
reim <- subset(coin95, subset = coin95$Person == Ii)$reimbursement
(b*now+sum( exp(-k(now-time[(time < now)])+beta1*noncov[(time < now)]+beta2*reim[(time <now)]) ))/n
}
}
So far, everything is GOOD, I have created two functions ntw and lambda using the foreach. They worked perfectly.
Then I create the third function also using the foreach:
# the distance
Time <- coin95$duration
Time <- sort(as.double(Time))
jl <- function(params){
res<-foreach(Ii = Time,.combine = "rbind",.export = c("ntw","lambda")) %dopar% {
(ntw(Ii)-ntw(Ii-1e-7)) * (ntw(Ii)- lambda(Ii,params))^2
}
return(sqrt(sum(res)))
}
guess<-c(0.0,1.3333,0.0,0.1,-1.2,3e-3)
Type jl(guess):
> jl(guess)
Show Traceback
Rerun with Debug
Error in { : task 1 failed -could not find function "%dopar%"
Any Idea what's going wrong ?
Quick fix for problem with foreach %dopar% is to reinstall these packages:
install.packages("doSNOW")
install.packages("doParallel")
install.packages("doMPI")
Above packages are responsible for parallelism in R. Bug which existed in old versions of these packages is now removed. I should mention that it will most likely help even though you are not using these packages in your code.

nlsBoot and foreach %dopar%: scoping issues

I would like to do bootstrap of residuals for nls fits in a loop. I use nlsBoot and in order to decrease computation time I would like to do that in parallel (on a Windows 7 system at the moment). Here is some code, which reproduces my problem:
#function for fitting
Falge2000 <- function(GP2000,alpha,PAR) {
(GP2000*alpha*PAR)/(GP2000+alpha*PAR-GP2000/2000*PAR)
}
#some data
PAR <- 10:1600
GPP <- Falge2000(-450,-0.73,PAR) + rnorm(length(PAR),sd=0.0001)
df1 <- data.frame(PAR,GPP)
#nls fit
mod <- nls(GPP~Falge2000(GP2000,alpha,PAR),start=list(GP2000=-450,alpha=-0.73),data=df1, upper=c(0,0),algorithm="port")
#bootstrap of residuals
library(nlstools)
summary(nlsBoot(mod,niter=5))
#works
#now do it several times
#and in parallel
library(foreach)
library(doParallel)
cl <- makeCluster(1)
registerDoParallel(cl)
ttt <- foreach(1:5, .packages='nlstools',.export="df1") %dopar% {
res <- nlsBoot(mod,niter=5)
summary(res)
}
#Error in { :
#task 1 failed - "Procedure aborted: the fit only converged in 1 % during bootstrapping"
stopCluster(cl)
I suspect this an issue with environments and after looking at the code of nlsBoot the problem seems to arise from the use of an anonymous function in a lapply call:
l1 <- lapply(1:niter, function(i) {
data2[, var1] <- fitted1 + sample(scale(resid1, scale = FALSE),
replace = TRUE)
nls2 <- try(update(nls, start = as.list(coef(nls)), data = data2),
silent = TRUE)
if (inherits(nls2, "nls"))
return(list(coef = coef(nls2), rse = summary(nls2)$sigma))
})
if (sum(sapply(l1, is.null)) > niter/2)
stop(paste("Procedure aborted: the fit only converged in",
round(sum(sapply(l1, is.null))/niter), "% during bootstrapping"))
Is there a way to use nlsBoot in a parallel loop? Or do I need to modify the function? (I could try to use a for loop instead of lapply.)
By moving the creation of the mod object into the %dopar% loop, it looks like everything works OK. Also, this automatically exports the df1 object, so you can remove the .export argument.
ttt <- foreach(1:5, .packages='nlstools') %dopar% {
mod <- nls(GPP~Falge2000(GP2000,alpha,PAR),start=list(GP2000=-450,alpha=-0.73),data=df1, upper=c(0,0),algorithm="port")
res <- nlsBoot(mod,niter=5)
capture.output(summary(res))
}
However, you might need to work out what you want returned. Using capture.output was just to see if things were working, since summary(res) seemed to only return NULL.

Resources