Parallel computing problems - r

I seem to have run on a problem when trying to run a parallel computing in R
library(parallel)
library(foreach)
library(doParallel)
library(snow)
cl <- makeCluster(detectCores())
Loading required package: Rmpi
Error : .onLoad failed in loadNamespace() for 'Rmpi', details:
call: inDL(x, as.logical(local), as.logical(now), ...)
error: unable to load shared object 'C:/Users/PCCasa/Documents/R/win- library/3.2/Rmpi/libs/x64/Rmpi.dll':
LoadLibrary failure: The system is unable to find the package specified.
Error in makeMPIcluster(spec, ...) :
the `Rmpi' package is needed for MPI clusters.
registerDoParallel(cl)
Error in registerDoParallel(cl) : Object 'cl' not found
windowsproduces an error which advises to either repair or reinstall msmpi.dll. Could you kindly let me know what the best prodecure would be as to solve this issue

None, the RMPI spawn function is not implemented for Windows. Here is the excerpt of the RMPI code.
if (.Platform$OS=="windows"){
#stop("Spawning is not implemented. Please use mpiexec with Rprofile.")
workdrive <- unlist(strsplit(getwd(),":"))[1]
workdir <- unlist(strsplit(getwd(),"/"))
if (length(workdir) > 1)
workdir <-paste(workdir, collapse="\\")
else
workdir <- paste(workdir,"\\")
localhost <- Sys.getenv("COMPUTERNAME")
networkdrive <-NULL #.Call("RegQuery", as.integer(2),paste("NETWORK\\",workdrive,sep=""),
#PACKAGE="Rmpi")
remotepath <-networkdrive[which(networkdrive=="RemotePath")+1]
mapdrive <- as.logical(mapdrive && !is.null(remotepath))
arg <- c(Rscript, R.home(), workdrive, workdir, localhost, mapdrive, remotepath)
if (.Platform$r_arch == "i386")
realns <- mpi.comm.spawn(slave = system.file("Rslaves32.cmd",
package = "Rmpi"), slavearg = arg, nslaves = nslaves,
info = 0, root = root, intercomm = intercomm, quiet = quiet)
else
realns <- mpi.comm.spawn(slave = system.file("Rslaves64.cmd",
package = "Rmpi"), slavearg = arg, nslaves = nslaves,
info = 0, root = root, intercomm = intercomm, quiet = quiet)
}

Related

Error running Rmpi when doing parallel computing

I'm trying to running parallel computing in R with below lines
library(parallel)
library(snow)
library(snowFT)
library(VGAM)
library(dplyr)
library(Rmpi)
nCores <- detectCores() - 1
cl <- makeCluster(nCores)
Then R returns an error
Error in Rmpi::mpi.comm.spawn(slave = mpitask, slavearg = args, nslaves = count, : Internal MPI error!, error stack: MPI_Comm_spawn(cmd="C:/R/R-40~1.2/bin/x64/Rscript.exe", argv=0x00000223DB137530, maxprocs=11, MPI_INFO_NULL, root=0, MPI_COMM_SELF, intercomm=0x00000223DCFCD998, errors=0x00000223DA9FC9E8) failed Internal MPI error! FAILspawn not supported without process manager
3. Rmpi::mpi.comm.spawn(slave = mpitask, slavearg = args, nslaves = count, intercomm = intercomm)
2. makeMPIcluster(spec, ...)
1. makeCluster(nCores)
I've tried to install MPICH2 on Windows from here, but the final cmd command mpiexec -validate always returns FAIL.
Could you please elaborate on how to solve this issue?
The problem is that makeCluster(nCores) is used by more than one package. As such, I use parallel::makeCluster(nCores) to solve the issue.

multiple errors in R

I am trying to get three_d_array for dataset using R language. I have the same data description in different files so I ran my code for each file by changing the name only. The following is my code
#Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jre7")
install.packages('lazyeval')
install.packages("xlsx")
install.packages("rJava")
options(java.parameters = "-Xmx10000m")
#options(java.parameters="-Xmx4000m")
options(java.home="C:/Program Files/Java/jre7/")
library(rJava)
library(xlsx)
raw23370 <- read.xlsx("C:\\Users\\Taqwa\\Desktop\\stations\\stations\\23370.xlsx",1)
out <- createWorkbook()
three_d_array <- array(0,dim = c(92, 6, 600))
#options(java.parameters = "-Xmx1000m")
for (i in 1:600){
candy = 1:nrow(raw23370)
B=sample(candy,nrow(raw23370) , replace=T)
a=raw23370[B,]
assign(paste0("sheet", i), createSheet(wb = out, sheetName = as.character(i)))
addDataFrame(x = a, sheet = eval(as.name(paste0("sheet", i))))
}
saveWorkbook(out, "C:\\Users\\Taqwa\\Desktop\\stations\\23370_output.xlsx")
I kept getting multiple errors after running the code for file number 20,
the following are my errors
library(rJava)
Error : .onLoad failed in loadNamespace() for 'rJava', details:
call: inDL(x, as.logical(local), as.logical(now), ...)
error: unable to load shared object 'C:/Users/Taqwa/Documents/R/win-library/3.1/rJava/libs/x64/rJava.dll':
LoadLibrary failure: The specified module could not be found.
Error: package or namespace load failed for ‘rJava’
> library(xlsx)
Loading required package: rJava
Error : .onLoad failed in loadNamespace() for 'rJava', details:
call: inDL(x, as.logical(local), as.logical(now), ...)
error: unable to load shared object 'C:/Users/Taqwa/Documents/R/win-library/3.1/rJava/libs/x64/rJava.dll':
LoadLibrary failure: The specified module could not be found.
Error: package ‘rJava’ could not be loaded
> raw23370 <- read.xlsx("C:\\Users\\Taqwa\\Desktop\\stations\\stations\\23370.xlsx",1)
Error: could not find function "read.xlsx"
> out <- createWorkbook()
Error: could not find function "createWorkbook"
> three_d_array <- array(0,dim = c(92, 6, 600))
> #options(java.parameters = "-Xmx1000m")
> for (i in 1:600){
+ candy = 1:nrow(raw23370)
+
+ B=sample(candy,nrow(raw23370) , replace=T)
+
+ a=raw23370[B,]
+ assign(paste0("sheet", i), createSheet(wb = out, sheetName = as.character(i)))
+ # you don't really need your three_d_array
+ addDataFrame(x = a, sheet = eval(as.name(paste0("sheet", i))))
+ }
Error in nrow(raw23370) : object 'raw23370' not found
> saveWorkbook(out, "C:\\Users\\Taqwa\\Desktop\\stations\\23370_output.xlsx")
Error: could not find function "saveWorkbook"
Could anyone provide help

SparkR Null Pointer Exception when trying to create a data frame

When trying to create a data frame in sparkR, I get an error regarding a Null Pointer Exception. I have pasted my code, and the error message below. Do I need to install any more packages in order for this code to run?
CODE
SPARK_HOME <- "C:\\Users\\erer\\Downloads\\spark-1.5.2-bin-hadoop2.4\\spark-1.5.2-bin-hadoop2.4"
Sys.setenv('SPARKR_SUBMIT_ARGS'='"--packages" "com.databricks:spark-csv_2.10:1.2.0" "sparkr-shell"')
library(SparkR, lib.loc = "C:\\Users\\erer\\Downloads\\spark-1.5.2-bin-hadoop2.4\\R\\lib")
library(SparkR)
library(rJava)
sc <- sparkR.init(master = "local", sparkHome = SPARK_HOME)
sqlContext <- sparkRSQL.init(sc)
localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
df <- createDataFrame(sqlContext, localDF)
ERROR:
Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) :
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost): java.lang.NullPointerException
at java.lang.ProcessBuilder.start(Unknown Source)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:445)
at org.apache.hadoop.util.Shell.run(Shell.java:418)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
at org.apache.hadoop.fs.FileUtil.chmod(FileUtil.java:873)
at org.apache.hadoop.fs.FileUtil.chmod(FileUtil.java:853)
at org.apache.spark.util.Utils$.fetchFile(Utils.scala:381)
at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$5.apply(Executor.scala:405)
at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$5.apply(Executor.scala:397)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:7
You need to point library SparkR to the directory where the local SparkR code is, specified in the lib.loc parameter (if you downloaded a Spark binary, the SPARK_HOME/R/lib will be already populated for you):
`library(SparkR, lib.loc = "/home/kris/spark/spark-1.5.2-bin-hadoop2.6/R/lib")`
See also this tutorial on R-bloggers on how to run Spark from Rstudio: http://www.r-bloggers.com/sparkr-with-rstudio-in-ubuntu-12-04/

Passing a C++ package library to a doParallel cluster

I have a some plyr code that is meant to run a Rcpp function that I have written:
nodes = detectCores()
cl = makeCluster(nodes)
registerDoParallel(cl)
l = llply(mylist, function(x) {
.Call("myfancyfunction", PACKAGE = "mypackage", ...)
}, .parallel = TRUE, .paropts = list(.packages = "mypackage"))
However, even when I include the package I get the error:
Error in do.ply(i) :
task 1 failed - ""myfancyfunction" not available for .Call() for package "mypackage""
How do I make my libraries accessible to the parallel processes?
The error doesn't indicate any issue with plyr or parallel processing; it means you haven't properly registered your C/C++ routine in your package. For example, this works:
library(plyr)
library(doParallel)
nodes = detectCores()
cl = makeCluster(nodes)
registerDoParallel(cl)
l <- llply(list(1,2), function(x) {
.Call("endpoints", 1L, 1L, 1L, TRUE, PACKAGE="xts")
}, .parallel=TRUE, .paropts=list(.packages="xts"))
library(mypackage); .Call("myfancyfunction", x, PACKAGE="mypackage") in a normal R session likely throws the same error.
See Registering native routines in Writing R Extensions for details on ways you can register myfancyfunction.

Problems installing R packages

I'm setting up a new laptop running Gentoo and wish to install R (as I do on all of my computers!).
However, I've hit a bit of a problem when it comes to installing packages.
I first tried to:
> install.packages(c("ggplot2", "plyr", "reshape2"))
And it duly downloaded all of the packages and its dependencies. However they didn't install reporting.
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Not a problem I'll just install the data.table package, unfortunately...
> install.packages("data.table")
trying URL 'http://cran.uk.r-project.org/src/contrib/data.table_1.8.2.tar.gz'
Content type 'application/x-gzip' length 818198 bytes (799 Kb)
opened URL
==================================================
downloaded 799 Kb
Error in library(data.table) : there is no package called ‘data.table’
Calls: .First -> library
Execution halted
The downloaded source packages are in
‘/tmp/RtmpbQtALj/downloaded_packages’
Updating HTML index of packages in '.Library'
Making packages.html ... done
Warning message:
In install.packages("data.table") :
installation of package ‘data.table’ had non-zero exit status
And there is no indication of why installation failed at all, so I've no idea how to go about solving this? A traceback() isn't available either.
GCC is installed and configured as the output of gcc-config shows (and the fact that I can install other software from source no problem).
# gcc-config -l
[1] x86_64-pc-linux-gnu-4.6.3 *
Stumped as to how to go about solving this one. Any thoughts or ideas on how to get more information out of install.packages() welcome.
EDIT : contents of .First as requested....
> .First
function ()
{
library(data.table)
library(foreign)
library(ggplot2)
library(Hmisc)
library(lattice)
library(plyr)
library(rms)
library(xtable)
cat("\nWelcome at", date(), "\n")
}
EDIT 2 : No Rprofile.site but there is /usr/lib64/R/library/base/R/Rprofile which has....
# cat /usr/lib64/R/library/base/R/Rprofile
### This is the system Rprofile file. It is always run on startup.
### Additional commands can be placed in site or user Rprofile files
### (see ?Rprofile).
### Notice that it is a bad idea to use this file as a template for
### personal startup files, since things will be executed twice and in
### the wrong environment (user profiles are run in .GlobalEnv).
.GlobalEnv <- globalenv()
attach(NULL, name = "Autoloads")
.AutoloadEnv <- as.environment(2)
assign(".Autoloaded", NULL, envir = .AutoloadEnv)
T <- TRUE
F <- FALSE
R.version <- structure(R.Version(), class = "simple.list")
version <- R.version # for S compatibility
## for backwards compatibility only
R.version.string <- R.version$version.string
## NOTA BENE: options() for non-base package functionality are in places like
## --------- ../utils/R/zzz.R
options(keep.source = interactive())
options(warn = 0)
# options(repos = c(CRAN="#CRAN#"))
# options(BIOC = "http://www.bioconductor.org")
options(timeout = 60)
options(encoding = "native.enc")
options(show.error.messages = TRUE)
## keep in sync with PrintDefaults() in ../../main/print.c :
options(scipen = 0)
options(max.print = 99999)# max. #{entries} in internal printMatrix()
options(add.smooth = TRUE)# currently only used in 'plot.lm'
options(stringsAsFactors = TRUE)
if(!interactive() && is.null(getOption("showErrorCalls")))
options(showErrorCalls = TRUE)
local({dp <- Sys.getenv("R_DEFAULT_PACKAGES")
if(identical(dp, "")) # marginally faster to do methods last
dp <- c("datasets", "utils", "grDevices", "graphics",
"stats", "methods")
else if(identical(dp, "NULL")) dp <- character(0)
else dp <- strsplit(dp, ",")[[1]]
dp <- sub("[[:blank:]]*([[:alnum:]]+)", "\\1", dp) # strip whitespace
options(defaultPackages = dp)
})
## Expand R_LIBS_* environment variables.
Sys.setenv(R_LIBS_SITE =
.expand_R_libs_env_var(Sys.getenv("R_LIBS_SITE")))
Sys.setenv(R_LIBS_USER =
.expand_R_libs_env_var(Sys.getenv("R_LIBS_USER")))
.First.sys <- function()
{
for(pkg in getOption("defaultPackages")) {
res <- require(pkg, quietly = TRUE, warn.conflicts = FALSE,
character.only = TRUE)
if(!res)
warning(gettextf('package %s in options("defaultPackages") was not found', sQuote(pkg)),
call.=FALSE, domain = NA)
}
}
.OptRequireMethods <- function()
{
if("methods" %in% getOption("defaultPackages")) {
res <- require("methods", quietly = TRUE, warn.conflicts = FALSE,
character.only = TRUE)
if(!res)
warning('package "methods" in options("defaultPackages") was not found', call.=FALSE)
}
}
if(nzchar(Sys.getenv("R_BATCH"))) {
.Last.sys <- function()
{
cat("> proc.time()\n")
print(proc.time())
}
## avoid passing on to spawned R processes
## A system has been reported without Sys.unsetenv, so try this
try(Sys.setenv(R_BATCH=""))
}
###-*- R -*- Unix Specific ----
.Library <- file.path(R.home(), "library")
.Library.site <- Sys.getenv("R_LIBS_SITE")
.Library.site <- if(!nchar(.Library.site)) file.path(R.home(), "site-library") else unlist(strsplit(.Library.site, ":"))
.Library.site <- .Library.site[file.exists(.Library.site)]
invisible(.libPaths(c(unlist(strsplit(Sys.getenv("R_LIBS"), ":")),
unlist(strsplit(Sys.getenv("R_LIBS_USER"), ":")
))))
local({
## we distinguish between R_PAPERSIZE as set by the user and by configure
papersize <- Sys.getenv("R_PAPERSIZE_USER")
if(!nchar(papersize)) {
lcpaper <- Sys.getlocale("LC_PAPER") # might be null: OK as nchar is 0
papersize <- if(nchar(lcpaper))
if(length(grep("(_US|_CA)", lcpaper))) "letter" else "a4"
else Sys.getenv("R_PAPERSIZE")
}
options(papersize = papersize,
printcmd = Sys.getenv("R_PRINTCMD"),
dvipscmd = Sys.getenv("DVIPS", "dvips"),
texi2dvi = Sys.getenv("R_TEXI2DVICMD"),
browser = Sys.getenv("R_BROWSER"),
pager = file.path(R.home(), "bin", "pager"),
pdfviewer = Sys.getenv("R_PDFVIEWER"),
useFancyQuotes = TRUE)
})
## non standard settings for the R.app GUI of the Mac OS X port
if(.Platform$GUI == "AQUA") {
## this is set to let RAqua use both X11 device and X11/TclTk
if (Sys.getenv("DISPLAY") == "")
Sys.setenv("DISPLAY" = ":0")
## this is to allow gfortran compiler to work
Sys.setenv("PATH" = paste(Sys.getenv("PATH"),":/usr/local/bin",sep = ""))
}## end "Aqua"
local({
tests_startup <- Sys.getenv("R_TESTS")
if(nzchar(tests_startup)) source(tests_startup)
})
Looks like data.table is not installed for the user that is running the install.packages command. I think wrapping that .First function in if (interactive()) { } would be a good idea in general. Otherwise, you need to install data.table and any other packages that load at startup since install.packages runs the .Rprofile file when starting
WARNING: You're using a non-UTF8 locale, therefore only ASCII characters will work.
Please read R for Mac OS X FAQ (see Help) section 9 and adjust your system preferences accordingly.
[History restored from /Users/carlosaburto/.Rapp.history]
defaults write org.R-project.R force.LANG en_US.UTF-8
Error: unexpected symbol in "defaults write"
starting httpd help server ... done

Resources