Is it possible to request that parallel::mclapply() abandons all further processing asap if it encounters an error (e.g., a stop()) in any one of its processes?
Here is another approach: The idea is to modify parallel::mclapply() at the three places indicated with #!!. The new argument stop.on.error can be used to specify whether the execution should stop when an error occurs.
library(parallel)
Mclapply <- function (X, FUN, ..., mc.preschedule = TRUE,
mc.set.seed = TRUE, mc.silent = FALSE,
mc.cores = getOption("mc.cores", 2L),
mc.cleanup = TRUE, mc.allow.recursive = TRUE,
affinity.list = NULL, stop.on.error=FALSE)
{
stop.on.error <- stop.on.error[1] #!!
stopifnot(is.logical(stop.on.error)) #!!
cores <- as.integer(mc.cores)
if ((is.na(cores) || cores < 1L) && is.null(affinity.list))
stop("'mc.cores' must be >= 1")
parallel:::.check_ncores(cores)
if (parallel:::isChild() && !isTRUE(mc.allow.recursive))
return(lapply(X = X, FUN = FUN, ...))
if (!is.vector(X) || is.object(X))
X <- as.list(X)
if (!is.null(affinity.list) && length(affinity.list) < length(X))
stop("affinity.list and X must have the same length")
if (mc.set.seed)
mc.reset.stream()
if (length(X) < 2) {
old.aff <- mcaffinity()
mcaffinity(affinity.list[[1]])
res <- lapply(X = X, FUN = FUN, ...)
mcaffinity(old.aff)
return(res)
}
if (length(X) < cores)
cores <- length(X)
if (cores < 2L && is.null(affinity.list))
return(lapply(X = X, FUN = FUN, ...))
jobs <- list()
parallel:::prepareCleanup()
on.exit(parallel:::cleanup(mc.cleanup))
if (!mc.preschedule) {
FUN <- match.fun(FUN)
if (length(X) <= cores && is.null(affinity.list)) {
jobs <- lapply(seq_along(X), function(i) mcparallel(FUN(X[[i]],
...), name = names(X)[i], mc.set.seed = mc.set.seed,
silent = mc.silent))
res <- mccollect(jobs)
if (length(res) == length(X))
names(res) <- names(X)
has.errors <- sum(sapply(res, inherits, "try-error"))
}
else {
sx <- seq_along(X)
res <- vector("list", length(sx))
names(res) <- names(X)
fin <- rep(FALSE, length(X))
if (!is.null(affinity.list)) {
cores <- max(unlist(x = affinity.list, recursive = TRUE))
d0 <- logical(cores)
cpu.map <- lapply(sx, function(i) {
data <- d0
data[as.vector(affinity.list[[i]])] <- TRUE
data
})
ava <- do.call(rbind, cpu.map)
}
else {
ava <- matrix(TRUE, nrow = length(X), ncol = cores)
}
jobid <- integer(cores)
for (i in 1:cores) {
jobid[i] <- match(TRUE, ava[, i])
ava[jobid[i], ] <- FALSE
}
if (anyNA(jobid)) {
unused <- which(is.na(jobid))
jobid <- jobid[-unused]
ava <- ava[, -unused, drop = FALSE]
}
jobs <- lapply(jobid, function(i) mcparallel(FUN(X[[i]],
...), mc.set.seed = mc.set.seed, silent = mc.silent,
mc.affinity = affinity.list[[i]]))
jobsp <- parallel:::processID(jobs)
has.errors <- 0L
delivered.result <- 0L
while (!all(fin)) {
s <- parallel:::selectChildren(jobs[!is.na(jobsp)], -1)
if (is.null(s))
break
if (is.integer(s))
for (ch in s) {
ji <- match(TRUE, jobsp == ch)
ci <- jobid[ji]
r <- parallel:::readChild(ch)
if (is.raw(r)) {
child.res <- unserialize(r)
if (inherits(child.res, "try-error")){
if(stop.on.error) #!!
stop("error in process X = ", ci, "\n", attr(child.res, "condition")$message) #!!
has.errors <- has.errors + 1L
}
if (!is.null(child.res))
res[[ci]] <- child.res
delivered.result <- delivered.result +
1L
}
else {
fin[ci] <- TRUE
jobsp[ji] <- jobid[ji] <- NA
if (any(ava)) {
nexti <- which.max(ava[, ji])
if (!is.na(nexti)) {
jobid[ji] <- nexti
jobs[[ji]] <- mcparallel(FUN(X[[nexti]],
...), mc.set.seed = mc.set.seed,
silent = mc.silent, mc.affinity = affinity.list[[nexti]])
jobsp[ji] <- parallel:::processID(jobs[[ji]])
ava[nexti, ] <- FALSE
}
}
}
}
}
nores <- length(X) - delivered.result
if (nores > 0)
warning(sprintf(ngettext(nores, "%d parallel function call did not deliver a result",
"%d parallel function calls did not deliver results"),
nores), domain = NA)
}
if (has.errors)
warning(gettextf("%d function calls resulted in an error",
has.errors), domain = NA)
return(res)
}
if (!is.null(affinity.list))
warning("'mc.preschedule' must be false if 'affinity.list' is used")
sindex <- lapply(seq_len(cores), function(i) seq(i, length(X),
by = cores))
schedule <- lapply(seq_len(cores), function(i) X[seq(i, length(X),
by = cores)])
ch <- list()
res <- vector("list", length(X))
names(res) <- names(X)
cp <- rep(0L, cores)
fin <- rep(FALSE, cores)
dr <- rep(FALSE, cores)
inner.do <- function(core) {
S <- schedule[[core]]
f <- parallel:::mcfork()
if (isTRUE(mc.set.seed))
parallel:::mc.advance.stream()
if (inherits(f, "masterProcess")) {
on.exit(mcexit(1L, structure("fatal error in wrapper code",
class = "try-error")))
if (isTRUE(mc.set.seed))
parallel:::mc.set.stream()
if (isTRUE(mc.silent))
closeStdout(TRUE)
parallel:::sendMaster(try(lapply(X = S, FUN = FUN, ...), silent = TRUE))
parallel:::mcexit(0L)
}
jobs[[core]] <<- ch[[core]] <<- f
cp[core] <<- parallel:::processID(f)
NULL
}
job.res <- lapply(seq_len(cores), inner.do)
ac <- cp[cp > 0]
has.errors <- integer(0)
while (!all(fin)) {
s <- parallel:::selectChildren(ac[!fin], -1)
if (is.null(s))
break
if (is.integer(s))
for (ch in s) {
a <- parallel:::readChild(ch)
if (is.integer(a)) {
core <- which(cp == a)
fin[core] <- TRUE
}
else if (is.raw(a)) {
core <- which(cp == attr(a, "pid"))
job.res[[core]] <- ijr <- unserialize(a)
if (inherits(ijr, "try-error")){
has.errors <- c(has.errors, core)
if(stop.on.error) #!!
stop("error in one of X = ", paste(schedule[[core]], collapse=", "), "\n", attr(ijr, "condition")$message) #!!
}
dr[core] <- TRUE
}
else if (is.null(a)) {
core <- which(cp == ch)
fin[core] <- TRUE
}
}
}
for (i in seq_len(cores)) {
this <- job.res[[i]]
if (inherits(this, "try-error")) {
for (j in sindex[[i]]) res[[j]] <- this
}
else if (!is.null(this))
res[sindex[[i]]] <- this
}
nores <- cores - sum(dr)
if (nores > 0)
warning(sprintf(ngettext(nores, "scheduled core %s did not deliver a result, all values of the job will be affected",
"scheduled cores %s did not deliver results, all values of the jobs will be affected"),
paste(which(dr == FALSE), collapse = ", ")), domain = NA)
if (length(has.errors)) {
if (length(has.errors) == cores)
warning("all scheduled cores encountered errors in user code")
else warning(sprintf(ngettext(has.errors, "scheduled core %s encountered error in user code, all values of the job will be affected",
"scheduled cores %s encountered errors in user code, all values of the jobs will be affected"),
paste(has.errors, collapse = ", ")), domain = NA)
}
res
}
Tests:
f <- function(x, errorAt=1, sleep=2){
if(x==errorAt) stop("-->> test error <<--")
Sys.sleep(sleep)
x
}
options(mc.cores=2)
Mclapply(X=1:4, FUN=f, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, stop.on.error = TRUE) :
## error in one of X = 1, 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=3, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, errorAt = 3, stop.on.error = TRUE) :
## error in one of X = 1, 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=Inf, stop.on.error=TRUE)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
Mclapply(X=1:4, FUN=f, mc.preschedule=FALSE, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, mc.preschedule = FALSE, stop.on.error = TRUE) :
## error in process X = 1
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=3, mc.preschedule=FALSE, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, errorAt = 3, mc.preschedule = FALSE, :
## error in process X = 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=Inf, mc.preschedule=FALSE, stop.on.error=TRUE)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
This approach uses many internal functions of the package parallel (e.g., parallel:::isChild()). It worked with R version 3.6.0.
Terminating the evaluations in all processes of a cluster upon an error in one process is not possible with a standard mclapply() call. The reason for this is that the processes do not communicate among each other until they are done.
Using the R package future one can achieve such a behavior. The idea is to
create futures and evaluate them in parallel
check every 2 seconds if one feature is resolved into an error
if an error is detected, kill all process of the cluster
A sketch how this could work:
library(future)
library(parallel)
library(tools)
parallelLapply <- function(x, fun, checkInterval=2, nProcess=2){
## setup cluster and get process IDs of process in cluster
cl <- makeCluster(spec=nProcess)
pids <- unlist(parLapply(cl=cl, X=1:nProcess, function(x) Sys.getpid()))
plan(cluster, workers=cl)
## create futures and start their evaluation
fList <- lapply(1:2, function(x) futureCall(function(x) try(fun(x), silent=TRUE), list(x=x)))
## check every 2 second whether an error occurred or whether all are resolved
while(TRUE){
Sys.sleep(checkInterval)
## check for errors
errorStatus <- unlist(lapply(fList, function(x)
resolved(x) && class(value(x))=="try-error"))
if(any(unlist(errorStatus))){
lapply(pids, pskill)
results <- NULL
cat("an error occurred in one future: all process of the cluster were killed.\n")
break
}
## check if all resolved without error
allResolved <- all(unlist(lapply(fList, resolved)))
if(allResolved){
results <- lapply(fList, value)
cat("all futures are resolved sucessfully.\n")
break
}
}
results
}
## test 1: early termination because x=1 results in an error.
f1 <- function(x){
if(x==1) stop()
Sys.sleep(15)
x
}
parallelLapply(x=1:5, fun=f1)
# an error occurred in one future: all process of the cluster were killed.
# NULL
## test 2: no error
f2 <- function(x){
Sys.sleep(15)
x
}
parallelLapply(x=1:5, fun=f2)
## all futures are resolved sucessfully.
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
Note:
Additional adjustment are needed if the function passed to fun depends on additional arguments.
On Linux one can use makeForkCluster() instead of makeCluster() for convenience. Then the usage is closer to mclapply().
Here is a cleaner version of the suggestion from ivo Welch. Note that this does not stop running processes when an error occurs, but rather prevents the start of new evaluations of FUN.
library(parallel)
mcLapply <- function(X, FUN, ..., mc.preschedule=TRUE,
mc.set.seed=TRUE, mc.silent=FALSE,
mc.cores=getOption("mc.cores", 2L),
mc.cleanup=TRUE, mc.allow.recursive=TRUE,
affinity.list=NULL){
tmpFileName <- tempfile()
fn <- function(X){
if(file.exists(tmpFileName))
return(NA)
o <- try(do.call("FUN", c(X, list(...))), silent=TRUE)
if(class(o)=="try-error"){
file.create(tmpFileName)
}
o
}
ret <- mclapply(X=X, FUN=fn, mc.preschedule=mc.preschedule,
mc.set.seed=mc.set.seed, mc.silent=mc.silent,
mc.cores=mc.cores, mc.cleanup=mc.cleanup,
mc.allow.recursive=mc.allow.recursive,
affinity.list=affinity.list)
if(exists(tmpFileName))
file.remove(tmpFileName)
ret
}
## test 1: early termination because x=1 results in an error.
f1 <- function(x){
if(x==1) stop()
Sys.sleep(1)
x
}
mcLapply(X=1:3, FUN=f1)
## [[1]]
## [1] "Error in FUN(1L) : \n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in FUN(1L): >
##
## [[2]]
## [1] NA
##
## [[3]]
## [1] NA
## test 2: no error
f2 <- function(x, a){
Sys.sleep(1)
x+a
}
mcLapply(X=1:2, FUN=f2, a=10)
## [[1]]
## [1] 11
##
## [[2]]
## [1] 12
The following is ugly, but workable. It uses the filesystem as a global shared variable.
options( mc.cores=2 )
if (!exists("touchFile"))
touchFile <- function(filename) { system(paste0("touch ", filename)); }
tfnm <- paste0("mytemporary",as.numeric(Sys.time()))
mfun <- function( i ) {
if (file.exists( tfnm )) stop("done due to process ", i)
message("Mfun(", i,")")
if ( i == 3 ) { message("creating ", tfnm); touchFile(tfnm); stop("goodbye"); }
Sys.sleep( i%%3 )
}
v <- mclapply( 1:10, mfun )
if (file.exists(tfnm)) file.remove(tfnm)
This would be nicer to implement by mclapply itself.
Related
There is a code with three for loops running with data containing enough missing values. The major problem is with the unacceptably long run time which seems to take at least more than a month although I try to keep my PC opened during most of the day.
The structure below is 100% correct from what I am trying to achieve when I test with a very few data points. But as the number of columns and rows become 2781 and 280, respectively, I perceive it takes forever although I am 100% sure that this is running correctly even when I see the updated environment window of my R-Studio each time I refresh it.
My data also has lots of missing values, probably 40% or something. I think this is making the computation time extremely longer as well.
The data dimension is 315 * 2781.
However, I am trying to achieve an output in a 280 * 2781 matrix form.
May I please get help minimizing the run time of this following code?
It would be very appreciated if I can!
options(java.parameters = "- Xmx8000m")
memory.limit(size=8e+6)
data=read.table("C:/Data/input.txt",T,sep="\t");
data=data.frame(data)[,-1]
corr<-NULL
corr2<-NULL
corr3<-NULL
for(i in 1:280)
{
corr2<-NULL
for(j in 1:2781)
{
data2<-data[,-j]
corr<-NULL
for(k in 1:2780)
{
ifelse((is.error(grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])==TRUE) || (grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])>0.05|| (is.na(grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])==TRUE),corr<-cbind(corr,0),corr<-cbind(corr,1))
}
corr2<-rbind(corr2,corr)
}
corr3<-rbind(corr3,rowSums(corr2))
}
The snippet of my data is as below:
> dput(data[1:30, 1:10])
structure(c(0.567388170165941, 0.193093325709924, 0.965938209090382,
0.348295788047835, 0.496113050729036, 0.0645384560339153, 0.946750836912543,
0.642093246569857, 0.565092500532046, 0.0952424583956599, 0.444063827162609,
0.709971546428278, 0.756330407923087, 0.601746253203601, 0.341865634545684,
0.953319212188944, 0.0788604547269642, 0.990508111426607, 0.35519331949763,
0.697004508692771, 0.285368352662772, 0.274287624517456, 0.575733694015071,
0.12937490013428, 0.00476219342090189, 0.684308280004188, 0.189448777819052,
0.615732178557664, 0.404873769031838, 0.357331350911409, 0.565436001634225,
0.380773033713922, 0.348490287549794, 0.0473814208526164, 0.389312234241515,
0.562123290728778, 0.30642102798447, 0.911173274740577, 0.566258994862437,
0.837928073247895, 0.107747194357216, 0.253737836843356, 0.651503744535148,
0.187739939894527, 0.951192815322429, 0.740037888288498, 0.0817571650259197,
0.740519099170342, 0.601534485351294, 0.120900869136676, 0.415282893227413,
0.591146623482928, 0.698511375114322, 0.08557975362055, 0.139396222075447,
0.303953414550051, 0.0743798329494894, 0.0293272000271827, 0.335832208395004,
0.665010208031163, 0.0319741254206747, 0.678886031731963, 0.154593498911709,
0.275712370406836, 0.828485634410754, 0.921500099124387, 0.651940459152684,
0.00574865937232971, 0.82236105017364, 0.55089360428974, 0.209424041677266,
0.861786168068647, 0.672873278381303, 0.301034058211371, 0.180336013436317,
0.481560358777642, 0.901354183442891, 0.986482679378241, 0.90117057505995,
0.476308439625427, 0.638073122361675, 0.27481731469743, 0.689271076582372,
0.324349449947476, 0.56620552809909, 0.867861548438668, 0.78374840435572,
0.0668482843320817, 0.276675389613956, 0.990600393852219, 0.990227151894942,
0.417612489778548, 0.391012848122045, 0.348758921027184, 0.0799746725242585,
0.88941288786009, 0.511429069796577, 0.0338982092216611, 0.240115304477513,
0.0268365524243563, 0.67206134647131, 0.816803207853809, 0.344421110814437,
0.864659120794386, 0.84128700569272, 0.116056860191748, 0.303730394458398,
0.48192183743231, 0.341675494797528, 0.0622653553728014, 0.823110743425786,
0.483212807681412, 0.968748248415068, 0.953057422768325, 0.116025703493506,
0.327919023809955, 0.590675016632304, 0.832283023977652, 0.342327545629814,
0.576901035616174, 0.942689201096073, 0.59300709143281, 0.565881528891623,
0.600007816683501, 0.133237989619374, 0.873827134957537, 0.744597729761153,
0.755133397178724, 0.0245723063126206, 0.97799762734212, 0.636845340020955,
0.73828601022251, 0.644093665992841, 0.57204390084371, 0.496023115236312,
0.703613247489557, 0.149237307952717, 0.0871439634356648, 0.0632112647872418,
0.83703236351721, 0.433215840253979, 0.430483993608505, 0.924051651498303,
0.913056606892496, 0.914889572421089, 0.215407102368772, 0.76880722376518,
0.269207723205909, 0.865548757137731, 0.28798541566357, 0.391722843516618,
0.649806497385725, 0.459413924254477, 0.907465039752424, 0.48731207777746,
0.554472463205457, 0.779784266138449, 0.566323830280453, 0.208658932242543,
0.958056638715789, 0.61858483706601, 0.838681482244283, 0.286310768220574,
0.895410191034898, 0.448722236789763, 0.297688684659079, 0.33291415637359,
0.0115265529602766, 0.850776052568108, 0.764857453294098, 0.469730701530352,
0.222089925780892, 0.0496484278701246, 0.32886885642074, 0.356443469878286,
0.612877089297399, 0.727906176587567, 0.0292073413729668, 0.429160050582141,
0.232313714455813, 0.678631312213838, 0.642334033036605, 0.99107678886503,
0.542449960019439, 0.835914565017447, 0.52798323193565, 0.303808332188055,
0.919654499506578, 0.944237019168213, 0.52141259261407, 0.794379767496139,
0.72268659202382, 0.114752230467275, 0.175116094760597, 0.437696389388293,
0.852590200025588, 0.511136321350932, 0.30879021063447, 0.174206420546398,
0.14262041519396, 0.375411552377045, 0.0204910831525922, 0.852757754037157,
0.631567053496838, 0.475924106314778, 0.508682047016919, 0.307679089019075,
0.70284536993131, 0.851252349093556, 0.0868967010173947, 0.586291917832568,
0.0529140203725547, 0.440692059928551, 0.207642213441432, 0.777513341512531,
0.141496006632224, 0.548626560717821, 0.419565241318196, 0.0702310993801802,
0.499403427587822, 0.189343606121838, 0.370725362794474, 0.888076487928629,
0.83070912421681, 0.466137421084568, 0.177098380634561, 0.91202046489343,
0.142300580162555, 0.823691181838512, 0.41561916610226, 0.939948018174618,
0.806491429451853, 0.795849160756916, 0.566376683535054, 0.36814984655939,
0.307756055146456, 0.602875682059675, 0.506007500691339, 0.538658684119582,
0.420845189364627, 0.663071365095675, 0.958144341595471, 0.793743418296799,
0.983086514985189, 0.266262857476249, 0.817585011478513, 0.122843299992383,
0.989197303075343, 0.71584410732612, 0.500571243464947, 0.397394519997761,
0.659465527161956, 0.459530522814021, 0.602246116613969, 0.250076721422374,
0.17533828667365, 0.6599256307818, 0.184704560553655, 0.15679649473168,
0.513444944983348, 0.205572377191857, 0.430164282443002, 0.131548407254741,
0.914019819349051, 0.935795902274549, 0.857401241315529, 0.977940042736009,
0.41389597626403, 0.179183913161978, 0.431347143370658, 0.477178965462372,
0.121315707685426, 0.107695729471743, 0.634954946814105, 0.859707030234858,
0.855825762730092, 0.708672808250412, 0.674073817208409, 0.672288877889514,
0.622144045541063, 0.433355041313916, 0.952878215815872, 0.229569894727319,
0.289388840552419, 0.937473804224283, 0.116283216979355, 0.659604362910613,
0.240837284363806, 0.726138337515295, 0.68390148691833, 0.381577257299796,
0.899390475358814, 0.26472729514353, 0.0383855854161084, 0.855232689995319,
0.655799814499915, 0.335587574867532, 0.163842789363116, 0.0353666560258716,
0.048316186061129), .Dim = c(30L, 10L))
I converted just the inner loop to mapply and did a quick speed test:
library(lmtest)
data <- matrix(runif(315*2781), nrow = 315)
get01 <- function(x, y) {
try(gt <- grangertest(x ~ y, order = 1, na.action = na.omit)$P[2])
if (exists("gt")) {
if (gt > 0.05 || is.na(gt)) {
return(0)
} else {
return(1)
}
} else {
return(0)
}
}
i <- 1; j <- 1
system.time(corr <- mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]))
#> user system elapsed
#> 21.505 0.014 21.520
It would need to perform that mapply 778680 times, so that puts it at about 200 days. You'll either need a different approach with the Granger test or several cores. Here's the command to replace the full loop:
corr3 <- t(mapply(function(i) colSums(mapply(function(j) mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]), 1:2781)), 1:280))
Replace that first mapply with simplify2array(parLapply to parallelize:
library(parallel)
cl <- makeCluster(detectCores())
clusterExport(cl, list("data", "get01"))
parLapply(cl, cl, function(x) require(lmtest))
corr3 <- t(simplify2array(parLapply(cl, 1:280, function(i) colSums(mapply(function(j) mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]), 1:2781)))))
stopCluster(cl)
Here is a version, not parallelized, that speeds up the code in the question by a factor greater than 4.
Some bottlenecks in the question's code are easy to detect:
The matrices corr? are extended inside the loops. The solution is to reserve memory beforehand;
The test grangertest is called 3 times per inner iteration when only one is needed;
To cbind with 0 or 1 is in fact creating a vector, not a matrix.
Here is a comparative test between the question's code and the function below.
library(lmtest)
# avoids loading an extra package
is.error <- function(x){
inherits(x, c("error", "try-error"))
}
Lag <- 5L
nr <- nrow(data)
nc <- ncol(data)
t0 <- system.time({
corr<-NULL
corr2<-NULL
corr3<-NULL
for(i in 1:(nr - Lag))
{
corr2<-NULL
data3 <- data[i:(i + Lag), ]
for(j in 1:nc)
{
data2<-data[,-j]
corr<-NULL
for(k in 1:(nc - 1L))
{
ifelse((is.error(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])==TRUE) ||
(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])>0.05 ||
(is.na(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])==TRUE),
corr<-cbind(corr,0),
corr<-cbind(corr,1)
)
}
corr2 <- rbind(corr2, corr)
}
corr3<-rbind(corr3, rowSums(corr2))
}
corr3
})
I will use a simplified version of lmtest::grangertest.
granger_test <- function (x, y, order = 1, na.action = na.omit, ...) {
xnam <- deparse(substitute(x))
ynam <- deparse(substitute(y))
n <- length(x)
all <- cbind(x = x[-1], y = y[-1], x_1 = x[-n], y_1 = y[-n])
y <- as.vector(all[, 2])
lagX <- as.matrix(all[, (1:order + 2)])
lagY <- as.matrix(all[, (1:order + 2 + order)])
fm <- lm(y ~ lagY + lagX)
rval <- lmtest::waldtest(fm, 2, ...)
attr(rval, "heading") <- c("Granger causality test\n", paste("Model 1: ",
ynam, " ~ ", "Lags(", ynam, ", 1:", order, ") + Lags(",
xnam, ", 1:", order, ")\nModel 2: ", ynam, " ~ ", "Lags(",
ynam, ", 1:", order, ")", sep = ""))
rval
}
And now the function to run the tests.
f_Rui <- function(data, Lag){
nr <- nrow(data)
nc <- ncol(data)
corr3 <- matrix(0, nrow = nr - Lag, ncol = nc)
data3 <- matrix(0, nrow = Lag + 1L, ncol = nc)
data2 <- matrix(0, nrow = Lag + 1L, ncol = nc - 1L)
for(i in 1:(nr - Lag)) {
corr2 <- matrix(0, nrow = nc, ncol = nc - 1L)
data3[] <- data[i:(i + Lag), ]
for(j in 1:nc) {
corr <- integer(nc - 1L)
data2[] <- data3[, -j]
for(k in 1:(nc - 1L)){
res <- tryCatch(
grangertest(x = data2[, k], y = data3[, j], order = 1, na.action = na.omit),
error = function(e) e
)
if(!inherits(res, "error") && !is.na(res[['Pr(>F)']][2]) && res[['Pr(>F)']][2] <= 0.05) {
corr[k] <- 1L
}
}
corr2[j, ] <- corr
}
corr3[i, ] <- rowSums(corr2)
}
corr3
}
The results are identical and the timings much better.
t1 <- system.time({
res <- f_Rui(data, 5L)
})
identical(corr3, res)
#[1] TRUE
times <- rbind(t0, t1)
t(t(times)/t1)
# user.self sys.self elapsed user.child sys.child
#t0 4.682908 1.736111 4.707783 NaN NaN
#t1 1.000000 1.000000 1.000000 NaN NaN
I am trying to implement NLPCA (Nonlinear PCA) on a data set using the homals package in R but I keep on getting the following error message:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
The data set I use can be found in the UCI ML Repository and it's called dat when imported in R: https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
Here is my code (some code is provided once the data set is downloaded):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
I am trying to predict the first attribute, therefore I set it to be active=FALSE.
The output looks like this (skipped all iteration messages):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
I don't understand why this error comes up. I have used the same code on some other data set and it worked fine so I don't see why this error persists. Any suggestions about what might be going wrong and how I could fix this issue?
Thanks!
It seems the error comes from code generating NAs in the homals function, specifically for your data for the number_credits levels, which causes problems with sort(as.numeric((rownames(clist[[i]])))) and the attempt to catch the error, since one of the levels does not give an NA value.
So either you have to modify the homals function to take care of such an edge case, or change problematic factor levels. This might be something to file as a bug report to the package maintainer.
As a work-around in your case you could do something like:
levels(dat$number_credits)[1] <- "_1"
and the function should run without problems.
Edit:
I think one solution would be to change one line of code in the homals function, but no guarantee this does work as intended. Better submit a bug report to the package author/maintainer - see https://cran.r-project.org/web/packages/homals/ for the address.
Using rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))] instead of rnames <- sort(as.numeric((rownames(clist[[i]])))) would allow the following code to identify NAs, but I am not sure why the author did not try to preserve factor levels outright.
Anyway, you could run a modified function in your local environment, which would require to explicitly call internal (not exported) homals functions, as shown below. Not necessarily the best approach, but would help you out in a pinch.
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}
I have used trycatch so that if error happened during execution of r code it will not break.
I wanted to write error in one of the file , but not sure how it can be done.
Below is code used
library(forecast)
library(data.table)
library(RODBC)
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
Data <- read.csv("Data.csv", header=TRUE,na.strings=c("NULL",""))[ ,c('Project_ID', 'Period_End_Date', 'Overall_Backlog_Processing_Efficiency_Incident')]
result = tryCatch({
if (nrow(Data) >= 1)
{
backlog <- as.vector(Data$Overall_Backlog_Processing_Efficiency_Incident)
i <- 1
datalist = list()
for (i in 1:8) {
backlogts <- tbats(backlog)
fc2 <- forecast(backlogts, h=1)
fc2
fc_2 <- as.data.frame(fc2)
fc_2$i <- i # maybe you want to keep track of which iteration produced it?
datalist[[i]] <- fc_2 # add it to your list
backlog <- append(backlog,round(fc_2[1,1], digits = 2))
i <- i +1
}
forecast_data = do.call(rbind, datalist)
forecast_data$`Point Forecast` <- round(forecast_data$`Point Forecast` , digits = 3)
nextweekday <- function(date, wday) {
date <- as.Date(date)
diff <- wday - wday(date)
if( diff < 0 )
diff <- diff + 7
return(date + diff)
}
a <- tail(Data$Period_End_Date, n=1)
a <- as.Date(a, "%d-%b-%y")
b <- tail(Data$Project_ID, n=1)
Period_End_Date <- data.table(date=seq(as.Date(nextweekday(a,1)), by=7, length=8), key="date")
forecast_data = cbind(forecast_data, Period_End_Date)
names(forecast_data)[names(forecast_data) == 'date'] <- 'Period_End'
forecast_data$Period_End <- as.character(forecast_data$Period_End)
forecast_data$Project_ID <- b
forecast_data <- forecast_data[c(8,7,1,4,5)]
names(forecast_data)[names(forecast_data) == 'Lo 95'] <- 'Lower_Limit_95'
names(forecast_data)[names(forecast_data) == 'Hi 95'] <- 'Upper_Limit_95'
names(forecast_data)[names(forecast_data) == 'Point Forecast'] <- 'Point_Forecast'
}
},
warning = function(w) {},
error = function(e) {
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
print(paste("MY_ERROR: ",e))
})
I tried to print error print(paste("MY_ERROR: ",err)) under error = function(e), but it is not working
Is there anything I am missing. Please advice.
Does this help
foo <- function(x){
output <- tryCatch(x, error = function(e) e)
ifelse(is(output, "error"), "error", "no error")
}
foo() # error
foo(1) # no error
Alternatively, use purrr.
foo <- function(x){
x
}
foo_safe() # error
foo_safe(10)
I am trying to add a progress bar to a bootstrap function in R.
I tried to make the example function as simple as possible (hence i'm using mean in this example).
library(boot)
v1 <- rnorm(1000)
rep_count = 1
m.boot <- function(data, indices) {
d <- data[indices]
setWinProgressBar(pb, rep_count)
rep_count <- rep_count + 1
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
pb <- winProgressBar(title = "Bootstrap in progress", label = "",
min = 0, max = tot_rep, initial = 0, width = 300)
b <- boot(v1, m.boot, R = tot_rep)
close(pb)
The bootstrap functions properly, but the problem is that the value of rep_count does not increase in the loop and the progress bar stays frozen during the process.
If I check the value of rep_count after the bootstrap is complete, it is still 1.
What am i doing wrong? maybe the boot function does not simply insert the m.boot function in a loop and so the variables in it are not increased?
Thank you.
You could use the package progress as below:
library(boot)
library(progress)
v1 <- rnorm(1000)
#add progress bar as parameter to function
m.boot <- function(data, indices, prog) {
#display progress with each run of the function
prog$tick()
d <- data[indices]
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
#initialize progress bar object
pb <- progress_bar$new(total = tot_rep + 1)
#perform bootstrap
boot(data = v1, statistic = m.boot, R = tot_rep, prog = pb)
I haven't quite figured out yet why it's necessary to set the number of iterations for progress_bar to be +1 the total bootstrap replicates (parameter R), but this is what was necessary in my own code, otherwise it throws an error. It seems like the bootstrap function is run one more time than you specify in parameter R, so if the progress bar is set to only run R times, it thinks the job is finished before it really is.
The pbapply package was designed to work with vectorized functions. There are 2 ways to achieve that in the context of this question: (1) write a wrapper as was suggested, which will not produce the same object of class 'boot'; (2) alternatively, the line lapply(seq_len(RR), fn) can be written as pblapply(seq_len(RR), fn). Option 2 can happen either by locally copying/updating the boot function as shown in the example below, or asking the package maintainer, Brian Ripley, if he would consider adding a progress bar directly or through pbapply as dependency.
My solution (changes indicated by comments):
library(boot)
library(pbapply)
boot2 <- function (data, statistic, R, sim = "ordinary", stype = c("i",
"f", "w"), strata = rep(1, n), L = NULL, m = 0, weights = NULL,
ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
parallel = c("no", "multicore", "snow"), ncpus = getOption("boot.ncpus",
1L), cl = NULL)
{
call <- match.call()
stype <- match.arg(stype)
if (missing(parallel))
parallel <- getOption("boot.parallel", "no")
parallel <- match.arg(parallel)
have_mc <- have_snow <- FALSE
if (parallel != "no" && ncpus > 1L) {
if (parallel == "multicore")
have_mc <- .Platform$OS.type != "windows"
else if (parallel == "snow")
have_snow <- TRUE
if (!have_mc && !have_snow)
ncpus <- 1L
loadNamespace("parallel")
}
if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
simple <- FALSE
}
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
runif(1)
seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
n <- NROW(data)
if ((n == 0) || is.null(n))
stop("no data in call to 'boot'")
temp.str <- strata
strata <- tapply(seq_len(n), as.numeric(strata))
t0 <- if (sim != "parametric") {
if ((sim == "antithetic") && is.null(L))
L <- empinf(data = data, statistic = statistic, stype = stype,
strata = strata, ...)
if (sim != "ordinary")
m <- 0
else if (any(m < 0))
stop("negative value of 'm' supplied")
if ((length(m) != 1L) && (length(m) != length(table(strata))))
stop("length of 'm' incompatible with 'strata'")
if ((sim == "ordinary") || (sim == "balanced")) {
if (isMatrix(weights) && (nrow(weights) != length(R)))
stop("dimensions of 'R' and 'weights' do not match")
}
else weights <- NULL
if (!is.null(weights))
weights <- t(apply(matrix(weights, n, length(R),
byrow = TRUE), 2L, normalize, strata))
if (!simple)
i <- index.array(n, R, sim, strata, m, L, weights)
original <- if (stype == "f")
rep(1, n)
else if (stype == "w") {
ns <- tabulate(strata)[strata]
1/ns
}
else seq_len(n)
t0 <- if (sum(m) > 0L)
statistic(data, original, rep(1, sum(m)), ...)
else statistic(data, original, ...)
rm(original)
t0
}
else statistic(data, ...)
pred.i <- NULL
fn <- if (sim == "parametric") {
ran.gen
data
mle
function(r) {
dd <- ran.gen(data, mle)
statistic(dd, ...)
}
}
else {
if (!simple && ncol(i) > n) {
pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
i <- i[, seq_len(n)]
}
if (stype %in% c("f", "w")) {
f <- freq.array(i)
rm(i)
if (stype == "w")
f <- f/ns
if (sum(m) == 0L)
function(r) statistic(data, f[r, ], ...)
else function(r) statistic(data, f[r, ], pred.i[r,
], ...)
}
else if (sum(m) > 0L)
function(r) statistic(data, i[r, ], pred.i[r, ],
...)
else if (simple)
function(r) statistic(data, index.array(n, 1, sim,
strata, m, L, weights), ...)
else function(r) statistic(data, i[r, ], ...)
}
RR <- sum(R)
res <- if (ncpus > 1L && (have_mc || have_snow)) {
if (have_mc) {
parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
}
else if (have_snow) {
list(...)
if (is.null(cl)) {
cl <- parallel::makePSOCKcluster(rep("localhost",
ncpus))
if (RNGkind()[1L] == "L'Ecuyer-CMRG")
parallel::clusterSetRNGStream(cl)
res <- parallel::parLapply(cl, seq_len(RR), fn)
parallel::stopCluster(cl)
res
}
else parallel::parLapply(cl, seq_len(RR), fn)
}
}
else pblapply(seq_len(RR), fn) #### changed !!!
t.star <- matrix(, RR, length(t0))
for (r in seq_len(RR)) t.star[r, ] <- res[[r]]
if (is.null(weights))
weights <- 1/tabulate(strata)[strata]
boot.return(sim, t0, t.star, temp.str, R, data, statistic,
stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
}
## Functions not exported by boot
isMatrix <- boot:::isMatrix
index.array <- boot:::index.array
boot.return <- boot:::boot.return
## Now the example
m.boot <- function(data, indices) {
d <- data[indices]
mean(d, na.rm = T)
}
tot_rep <- 200
v1 <- rnorm(1000)
b <- boot2(v1, m.boot, R = tot_rep)
The increased rep_count is a local variable and lost after each function call. In the next iteration the function gets rep_count from the global environment again, i.e., its value is 1.
You can use <<-:
rep_count <<- rep_count + 1
This assigns to the rep_count first found on the search path outside the function. Of course, using <<- is usually not recommended because side effects of functions should be avoided, but here you have a legitimate use case. However, you should probably wrap the whole thing in a function to avoid a side effect on the global environment.
There might be better solutions ...
I think i found a possible solution. This merges the answer of #Roland with the convenience of the pbapply package, using its functions startpb(), closepb(), etc..
library(boot)
library(pbapply)
v1 <- rnorm(1000)
rep_count = 1
tot_rep = 200
m.boot <- function(data, indices) {
d <- data[indices]
setpb(pb, rep_count)
rep_count <<- rep_count + 1
Sys.sleep(0.01) #Just to slow down the process
mean(d, na.rm = T)
}
pb <- startpb(min = 0, max = tot_rep)
b <- boot(v1, m.boot, R = tot_rep)
closepb(pb)
rep_count = 1
As previously suggested, wrapping everything in a function avoids messing with the rep_count variable.
The progress bar from the package dplyr works well:
library(dplyr)
library(boot)
v1 <- rnorm(1000)
m.boot <- function(data, indices) {
d <- data[indices]
p$tick()$print() # update progress bar
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
p <- progress_estimated(tot_rep+1) # init progress bar
b <- boot(v1, m.boot, R = tot_rep)
You can use the package pbapply
library(boot)
library(pbapply)
v1 <- rnorm(1000)
rep_count = 1
# your m.boot function ....
m.boot <- function(data, indices) {
d <- data[indices]
mean(d, na.rm = T)
}
# ... wraped in `bootfunc`
bootfunc <- function(x) { boot(x, m.boot, R = 200) }
# apply function to v1 , returning progress bar
pblapply(v1, bootfunc)
# > b <- pblapply(v1, bootfunc)
# > |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% Elapsed time: 02s
How can I write a function to check cases(x,y) by the two
tests:
One
if y==rank(y)
Two
xranks <- rank(x)
yranks <- rank(y)
meanx <- mean(xranks)
meany <- mean(yranks)
covariance.term <- cov(xranks-meanx,y-meany)
sd.x <- sd(xranks)
sd.y <- sd(yranks)
if -1<= covariance.term/(sd.x*sd.y) <=1
and should return TRUE if both tests are passed, or FALSE, with warnings about which tests failed.
The following should do what you want, but as you didn't provide test cases, I am not sure if it works.
check.xy <- function(x,y) {
xranks <- rank(x)
yranks <- rank(y)
meanx <- mean(xranks)
meany <- mean(yranks)
covariance.term <- cov(xranks-meanx,y-meany)
sd.x <- sd(xranks)
sd.y <- sd(yranks)
testA <- all(y == rank(y))
testB <- all(-1 <= covariance.term/(sd.x*sd.y) & covariance.term/(sd.x*sd.y) <=1)
if (testA & testB) return(TRUE)
else if (testA) warning("test two failed")
else if (testB) warning("test one failed")
else warning("tests one and two failed")
FALSE
}
I think to define each test in a single function, especially that we want warnings about which tests failed.
The 2 tests share the same environment, that why I defined them as a nested functions.
multitest <- function(x,y){
test.covariance <- function(){
xranks <- rank(x)
yranks <- rank(y)
meanx <- mean(xranks)
meany <- mean(yranks)
covariance.term <- cov(xranks-meanx,y-meany)
sd.x <- sd(xranks)
sd.y <- sd(yranks)
cov.norm <- covariance.term/(sd.x*sd.y)
res <- cov.norm > -1 && cov.norm < 1
if(is.na(res) || res > 0) warning('test covariance range failed',.call = FALSE)
res
}
test.rank <- function(){
res <- all(y==rank(y))
if(!res) warning('test rank failed')
res
}
res <- test.covariance() && test.rank()
!is.na(res)
}
some tests :
success
x <- 1:10
y <- 1:10
multitest(x,y)
[1] TRUE
failure rank
x <- rnorm(10)
y <- rnorm(10)
multitest(x,y)
[1] FALSE
Warning message:
In test.rank() : test rank failed
failure covariance
x <- rep(10,10)
y <- 1:10
multitest(x,y)
[1] FALSE
Warning message:
In test.covariance() : test covariance range failed