Related
I am trying to implement NLPCA (Nonlinear PCA) on a data set using the homals package in R but I keep on getting the following error message:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
The data set I use can be found in the UCI ML Repository and it's called dat when imported in R: https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
Here is my code (some code is provided once the data set is downloaded):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
I am trying to predict the first attribute, therefore I set it to be active=FALSE.
The output looks like this (skipped all iteration messages):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
I don't understand why this error comes up. I have used the same code on some other data set and it worked fine so I don't see why this error persists. Any suggestions about what might be going wrong and how I could fix this issue?
Thanks!
It seems the error comes from code generating NAs in the homals function, specifically for your data for the number_credits levels, which causes problems with sort(as.numeric((rownames(clist[[i]])))) and the attempt to catch the error, since one of the levels does not give an NA value.
So either you have to modify the homals function to take care of such an edge case, or change problematic factor levels. This might be something to file as a bug report to the package maintainer.
As a work-around in your case you could do something like:
levels(dat$number_credits)[1] <- "_1"
and the function should run without problems.
Edit:
I think one solution would be to change one line of code in the homals function, but no guarantee this does work as intended. Better submit a bug report to the package author/maintainer - see https://cran.r-project.org/web/packages/homals/ for the address.
Using rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))] instead of rnames <- sort(as.numeric((rownames(clist[[i]])))) would allow the following code to identify NAs, but I am not sure why the author did not try to preserve factor levels outright.
Anyway, you could run a modified function in your local environment, which would require to explicitly call internal (not exported) homals functions, as shown below. Not necessarily the best approach, but would help you out in a pinch.
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}
abcdefghij.onestep <- function (x, params) {
Susceptible <- x[2]
Exposed <- x[3]
Infected_Multibacillary <- x[4]
Infected_Paucibacillary <- x[5]
Exposed_Detected_Diagnosis <- x[6]
Treated <- x[7]
Disability <- x[8]
Recovered <- x[9]
Relapse_Multibacillary <-x[10]
Relapse_Paucibacillary <-x[11]
N <- Susceptible + Exposed + Infected_Multibacillary + Infected_Paucibacillary + Exposed_Detected_Diagnosis + Treated + Disability + Recovered + Relapse_Multibacillary + Relapse_Paucibacillary
m12 <- params["m12"]
m25 <- params["m25"]
m23 <- params["m23"]
m24 <- params["m24"]
m35 <- params["m35"]
m45 <- params["m45"]
m37 <- params["m37"]
m56 <- params["m56"]
m67 <- params["m67"]
m68 <- params["m68"]
m89 <- params["m89"]
m810 <- params["m810"]
m96 <- params["m96"]
m97 <- params["m97"]
m106 <- params["m106"]
mu <- params["mu"]
rates <- c(
birth=mu*N, susceptible_exposed=m12*Susceptible*Infected_Multibacillary -m25*Infected_Multibacillary * Exposed_Detected_Diagnosis -m23*Exposed*Infected_Multibacillary -m24*Exposed*Infected_Paucibacillary,
exposed_infected_multibacillary=m23*Exposed*Infected_Paucibacillary-m35*Infected_Multibacillary*Exposed_Detected_Diagnosis-m45* Exposed_Detected_Diagnosis* Infected_Paucibacillary-m37*Disability*Infected_Multibacillary,
exposed_infected_paucibacillary=m24*Exposed*Infected_Paucibacillary-m45*Infected_Paucibacillary*Exposed_Detected_Diagnosis,
infected_multibacillary_exposed_detected=m35*Infected_Multibacillary*Exposed_Detected_Diagnosis+m45*Infected_Paucibacillary* Exposed_Detected_Diagnosis-m56* Exposed_Detected_Diagnosis*Treated,
exposed_detected_treatment=m56*Exposed_Detected_Diagnosis*Treated-m67*Treated* Disability-m68*Treated*Recovered,
infected_multibacillary_disability=m37*Disability*Infected_Multibacillary+m67*Treated* Disability,
treated_recovered=m68* Treated*Recovered-m89*Recovered*Infected_Multibacillary-m810* Recovered*Infected_Paucibacillary,
relapse_multibacillary_treatment=-m96*Relapse_Multibacillary* Treated-m97*Relapse_Multibacillary*Disability,
relapse_paucibacillary_treatment=-m106*Relapse_Paucibacillary*Treated,
susceptible_death=mu*Susceptible,
exposed_death=mu*Exposed,
infected_multibacillary_death=mu*Infected_Multibacillary,
infected_paucibacillary_death=mu*Infected_Paucibacillary,
exposed_detected_death=mu* Exposed_Detected_Diagnosis,
treatment_death=mu*Treated,
disability_death=mu*Disability,
recovered_death=mu*Recovered,
relapse_multibacillary_death=mu*Relapse_Multibacillary,
relapse_paucibacillary_death=mu*Relapse_Paucibacilary
)
transitions <- list(
birth=c(1,0,0,0,0,0,0,0,0,0),
susceptible_exposed=c(-1,1,0,0,0,0,0,0,0,0,0,0),
exposed_infected_multibacillary=c(0,-1,1,0,0,0,0,0,0,0),
exposed_infected_paucibacillary=c(0,-1,0,1,0,0,0,0,0,0),
infected_multibacillary_exposed_detected=c(0,0,-1,0,1,0,0,0,0,0),
exposed_detected_treatment=c(0,0,0,0,-1,1,0,0,0,0),
infected_multibacillary_disability=c(0,0,-1,0,0,0,1,0,0,0),
treated_recovered=c(0,0,0,0,0,-1,0,1,0,0),
relapse_multibacillary_treatment=c(0,0,0,0,0,1,0,0,-1,0),
relapse_paucibacillary_treatment=c(0,0,0,0,0,1,0,0,0,-1),
susceptible_death=c(-1,0,0,0,0,0,0,0,0,0),
exposed_death= c(0,-1,0,0,0,0,0,0,0,0),
infected_multibacillary_death= c(0,0,-1,0,0,0,0,0,0,0),
infected_paucibacillary_death= c(0,0,0,-1,0,0,0,0,0,0),
exposed_detected_death= c(0,0,0,0,-1,0,0,0,0,0),
treatment_death= c(0,0,0,0,0,-1,0,0,0,0),
disability_death= c(0,0,0,0,0,0,-1,0,0,0),
recovered_death= c(0,0,0,0,0,0,0,-1,0,0),
relapse_multibacillary_death= c(0,0,0,0,0,0,0,0,-1,0),
relapse_paucibacillary_death= c(0,0,0,0,0,0,0,0,0,-1)
)
total.rate <- sum(rates)
if (total.rate==0)
tau <- Inf
else
tau <- rexp(n=1,rate=total.rate)
event <- sample.int(n=6,size=1,prob=rates/total.rate)
x+c(tau,transitions[[event]])
}
abcdefghij.simul <- function (x, params, maxstep = 10000) {
output <- array(dim=c(maxstep+1,4))
colnames(output) <- names(x)
output[1,] <-x
k <- 1
while ((k <= maxstep) && (x["Exposed"] > 0)) {
k <- k+1
output[k,] <- x <- abcdefghij.onestep(x,params)
}
as.data.frame(output[1:k,])
}
And in R this happens:
> set.seed(56856583)
> nsims <- 1
> xstart <- c(time=1,Susceptible=100000,Exposed=1,Infected_Multibacillary=1,Infected_Paucibacillary=1,Exposed_Detected_Diagnosis=1,Treated=1,Disability=1,Recovered=1,Relapse_Multibacillary=1,Relapse_Paucibacillary=1)
> library(plyr)
> simdat <- rdply(nsims, abcdefghij.simul(xstart,params))
Error in dimnames(x) <- dn :
length of 'dimnames' [2] not equal to array extent
Called from: `colnames<-`(`*tmp*`, value = names(x))
Browse[1]>
Change this line:
output <- array(dim=c(maxstep+1,4))
To this instead:
output <- array(dim=c(maxstep+1,11))
Your xstart variable has 11 elements. If you want them all on one row, you have to create something that is 11 columns wide, not 4. Perhaps you had just 4 values in the beginning.
Furthermore you don't seem to define params anywhere. The code won't run until you do.
Is it possible to request that parallel::mclapply() abandons all further processing asap if it encounters an error (e.g., a stop()) in any one of its processes?
Here is another approach: The idea is to modify parallel::mclapply() at the three places indicated with #!!. The new argument stop.on.error can be used to specify whether the execution should stop when an error occurs.
library(parallel)
Mclapply <- function (X, FUN, ..., mc.preschedule = TRUE,
mc.set.seed = TRUE, mc.silent = FALSE,
mc.cores = getOption("mc.cores", 2L),
mc.cleanup = TRUE, mc.allow.recursive = TRUE,
affinity.list = NULL, stop.on.error=FALSE)
{
stop.on.error <- stop.on.error[1] #!!
stopifnot(is.logical(stop.on.error)) #!!
cores <- as.integer(mc.cores)
if ((is.na(cores) || cores < 1L) && is.null(affinity.list))
stop("'mc.cores' must be >= 1")
parallel:::.check_ncores(cores)
if (parallel:::isChild() && !isTRUE(mc.allow.recursive))
return(lapply(X = X, FUN = FUN, ...))
if (!is.vector(X) || is.object(X))
X <- as.list(X)
if (!is.null(affinity.list) && length(affinity.list) < length(X))
stop("affinity.list and X must have the same length")
if (mc.set.seed)
mc.reset.stream()
if (length(X) < 2) {
old.aff <- mcaffinity()
mcaffinity(affinity.list[[1]])
res <- lapply(X = X, FUN = FUN, ...)
mcaffinity(old.aff)
return(res)
}
if (length(X) < cores)
cores <- length(X)
if (cores < 2L && is.null(affinity.list))
return(lapply(X = X, FUN = FUN, ...))
jobs <- list()
parallel:::prepareCleanup()
on.exit(parallel:::cleanup(mc.cleanup))
if (!mc.preschedule) {
FUN <- match.fun(FUN)
if (length(X) <= cores && is.null(affinity.list)) {
jobs <- lapply(seq_along(X), function(i) mcparallel(FUN(X[[i]],
...), name = names(X)[i], mc.set.seed = mc.set.seed,
silent = mc.silent))
res <- mccollect(jobs)
if (length(res) == length(X))
names(res) <- names(X)
has.errors <- sum(sapply(res, inherits, "try-error"))
}
else {
sx <- seq_along(X)
res <- vector("list", length(sx))
names(res) <- names(X)
fin <- rep(FALSE, length(X))
if (!is.null(affinity.list)) {
cores <- max(unlist(x = affinity.list, recursive = TRUE))
d0 <- logical(cores)
cpu.map <- lapply(sx, function(i) {
data <- d0
data[as.vector(affinity.list[[i]])] <- TRUE
data
})
ava <- do.call(rbind, cpu.map)
}
else {
ava <- matrix(TRUE, nrow = length(X), ncol = cores)
}
jobid <- integer(cores)
for (i in 1:cores) {
jobid[i] <- match(TRUE, ava[, i])
ava[jobid[i], ] <- FALSE
}
if (anyNA(jobid)) {
unused <- which(is.na(jobid))
jobid <- jobid[-unused]
ava <- ava[, -unused, drop = FALSE]
}
jobs <- lapply(jobid, function(i) mcparallel(FUN(X[[i]],
...), mc.set.seed = mc.set.seed, silent = mc.silent,
mc.affinity = affinity.list[[i]]))
jobsp <- parallel:::processID(jobs)
has.errors <- 0L
delivered.result <- 0L
while (!all(fin)) {
s <- parallel:::selectChildren(jobs[!is.na(jobsp)], -1)
if (is.null(s))
break
if (is.integer(s))
for (ch in s) {
ji <- match(TRUE, jobsp == ch)
ci <- jobid[ji]
r <- parallel:::readChild(ch)
if (is.raw(r)) {
child.res <- unserialize(r)
if (inherits(child.res, "try-error")){
if(stop.on.error) #!!
stop("error in process X = ", ci, "\n", attr(child.res, "condition")$message) #!!
has.errors <- has.errors + 1L
}
if (!is.null(child.res))
res[[ci]] <- child.res
delivered.result <- delivered.result +
1L
}
else {
fin[ci] <- TRUE
jobsp[ji] <- jobid[ji] <- NA
if (any(ava)) {
nexti <- which.max(ava[, ji])
if (!is.na(nexti)) {
jobid[ji] <- nexti
jobs[[ji]] <- mcparallel(FUN(X[[nexti]],
...), mc.set.seed = mc.set.seed,
silent = mc.silent, mc.affinity = affinity.list[[nexti]])
jobsp[ji] <- parallel:::processID(jobs[[ji]])
ava[nexti, ] <- FALSE
}
}
}
}
}
nores <- length(X) - delivered.result
if (nores > 0)
warning(sprintf(ngettext(nores, "%d parallel function call did not deliver a result",
"%d parallel function calls did not deliver results"),
nores), domain = NA)
}
if (has.errors)
warning(gettextf("%d function calls resulted in an error",
has.errors), domain = NA)
return(res)
}
if (!is.null(affinity.list))
warning("'mc.preschedule' must be false if 'affinity.list' is used")
sindex <- lapply(seq_len(cores), function(i) seq(i, length(X),
by = cores))
schedule <- lapply(seq_len(cores), function(i) X[seq(i, length(X),
by = cores)])
ch <- list()
res <- vector("list", length(X))
names(res) <- names(X)
cp <- rep(0L, cores)
fin <- rep(FALSE, cores)
dr <- rep(FALSE, cores)
inner.do <- function(core) {
S <- schedule[[core]]
f <- parallel:::mcfork()
if (isTRUE(mc.set.seed))
parallel:::mc.advance.stream()
if (inherits(f, "masterProcess")) {
on.exit(mcexit(1L, structure("fatal error in wrapper code",
class = "try-error")))
if (isTRUE(mc.set.seed))
parallel:::mc.set.stream()
if (isTRUE(mc.silent))
closeStdout(TRUE)
parallel:::sendMaster(try(lapply(X = S, FUN = FUN, ...), silent = TRUE))
parallel:::mcexit(0L)
}
jobs[[core]] <<- ch[[core]] <<- f
cp[core] <<- parallel:::processID(f)
NULL
}
job.res <- lapply(seq_len(cores), inner.do)
ac <- cp[cp > 0]
has.errors <- integer(0)
while (!all(fin)) {
s <- parallel:::selectChildren(ac[!fin], -1)
if (is.null(s))
break
if (is.integer(s))
for (ch in s) {
a <- parallel:::readChild(ch)
if (is.integer(a)) {
core <- which(cp == a)
fin[core] <- TRUE
}
else if (is.raw(a)) {
core <- which(cp == attr(a, "pid"))
job.res[[core]] <- ijr <- unserialize(a)
if (inherits(ijr, "try-error")){
has.errors <- c(has.errors, core)
if(stop.on.error) #!!
stop("error in one of X = ", paste(schedule[[core]], collapse=", "), "\n", attr(ijr, "condition")$message) #!!
}
dr[core] <- TRUE
}
else if (is.null(a)) {
core <- which(cp == ch)
fin[core] <- TRUE
}
}
}
for (i in seq_len(cores)) {
this <- job.res[[i]]
if (inherits(this, "try-error")) {
for (j in sindex[[i]]) res[[j]] <- this
}
else if (!is.null(this))
res[sindex[[i]]] <- this
}
nores <- cores - sum(dr)
if (nores > 0)
warning(sprintf(ngettext(nores, "scheduled core %s did not deliver a result, all values of the job will be affected",
"scheduled cores %s did not deliver results, all values of the jobs will be affected"),
paste(which(dr == FALSE), collapse = ", ")), domain = NA)
if (length(has.errors)) {
if (length(has.errors) == cores)
warning("all scheduled cores encountered errors in user code")
else warning(sprintf(ngettext(has.errors, "scheduled core %s encountered error in user code, all values of the job will be affected",
"scheduled cores %s encountered errors in user code, all values of the jobs will be affected"),
paste(has.errors, collapse = ", ")), domain = NA)
}
res
}
Tests:
f <- function(x, errorAt=1, sleep=2){
if(x==errorAt) stop("-->> test error <<--")
Sys.sleep(sleep)
x
}
options(mc.cores=2)
Mclapply(X=1:4, FUN=f, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, stop.on.error = TRUE) :
## error in one of X = 1, 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=3, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, errorAt = 3, stop.on.error = TRUE) :
## error in one of X = 1, 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=Inf, stop.on.error=TRUE)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
Mclapply(X=1:4, FUN=f, mc.preschedule=FALSE, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, mc.preschedule = FALSE, stop.on.error = TRUE) :
## error in process X = 1
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=3, mc.preschedule=FALSE, stop.on.error=TRUE)
## Error in Mclapply(X = 1:4, FUN = f, errorAt = 3, mc.preschedule = FALSE, :
## error in process X = 3
## -->> test error <<--
Mclapply(X=1:4, FUN=f, errorAt=Inf, mc.preschedule=FALSE, stop.on.error=TRUE)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
This approach uses many internal functions of the package parallel (e.g., parallel:::isChild()). It worked with R version 3.6.0.
Terminating the evaluations in all processes of a cluster upon an error in one process is not possible with a standard mclapply() call. The reason for this is that the processes do not communicate among each other until they are done.
Using the R package future one can achieve such a behavior. The idea is to
create futures and evaluate them in parallel
check every 2 seconds if one feature is resolved into an error
if an error is detected, kill all process of the cluster
A sketch how this could work:
library(future)
library(parallel)
library(tools)
parallelLapply <- function(x, fun, checkInterval=2, nProcess=2){
## setup cluster and get process IDs of process in cluster
cl <- makeCluster(spec=nProcess)
pids <- unlist(parLapply(cl=cl, X=1:nProcess, function(x) Sys.getpid()))
plan(cluster, workers=cl)
## create futures and start their evaluation
fList <- lapply(1:2, function(x) futureCall(function(x) try(fun(x), silent=TRUE), list(x=x)))
## check every 2 second whether an error occurred or whether all are resolved
while(TRUE){
Sys.sleep(checkInterval)
## check for errors
errorStatus <- unlist(lapply(fList, function(x)
resolved(x) && class(value(x))=="try-error"))
if(any(unlist(errorStatus))){
lapply(pids, pskill)
results <- NULL
cat("an error occurred in one future: all process of the cluster were killed.\n")
break
}
## check if all resolved without error
allResolved <- all(unlist(lapply(fList, resolved)))
if(allResolved){
results <- lapply(fList, value)
cat("all futures are resolved sucessfully.\n")
break
}
}
results
}
## test 1: early termination because x=1 results in an error.
f1 <- function(x){
if(x==1) stop()
Sys.sleep(15)
x
}
parallelLapply(x=1:5, fun=f1)
# an error occurred in one future: all process of the cluster were killed.
# NULL
## test 2: no error
f2 <- function(x){
Sys.sleep(15)
x
}
parallelLapply(x=1:5, fun=f2)
## all futures are resolved sucessfully.
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
Note:
Additional adjustment are needed if the function passed to fun depends on additional arguments.
On Linux one can use makeForkCluster() instead of makeCluster() for convenience. Then the usage is closer to mclapply().
Here is a cleaner version of the suggestion from ivo Welch. Note that this does not stop running processes when an error occurs, but rather prevents the start of new evaluations of FUN.
library(parallel)
mcLapply <- function(X, FUN, ..., mc.preschedule=TRUE,
mc.set.seed=TRUE, mc.silent=FALSE,
mc.cores=getOption("mc.cores", 2L),
mc.cleanup=TRUE, mc.allow.recursive=TRUE,
affinity.list=NULL){
tmpFileName <- tempfile()
fn <- function(X){
if(file.exists(tmpFileName))
return(NA)
o <- try(do.call("FUN", c(X, list(...))), silent=TRUE)
if(class(o)=="try-error"){
file.create(tmpFileName)
}
o
}
ret <- mclapply(X=X, FUN=fn, mc.preschedule=mc.preschedule,
mc.set.seed=mc.set.seed, mc.silent=mc.silent,
mc.cores=mc.cores, mc.cleanup=mc.cleanup,
mc.allow.recursive=mc.allow.recursive,
affinity.list=affinity.list)
if(exists(tmpFileName))
file.remove(tmpFileName)
ret
}
## test 1: early termination because x=1 results in an error.
f1 <- function(x){
if(x==1) stop()
Sys.sleep(1)
x
}
mcLapply(X=1:3, FUN=f1)
## [[1]]
## [1] "Error in FUN(1L) : \n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in FUN(1L): >
##
## [[2]]
## [1] NA
##
## [[3]]
## [1] NA
## test 2: no error
f2 <- function(x, a){
Sys.sleep(1)
x+a
}
mcLapply(X=1:2, FUN=f2, a=10)
## [[1]]
## [1] 11
##
## [[2]]
## [1] 12
The following is ugly, but workable. It uses the filesystem as a global shared variable.
options( mc.cores=2 )
if (!exists("touchFile"))
touchFile <- function(filename) { system(paste0("touch ", filename)); }
tfnm <- paste0("mytemporary",as.numeric(Sys.time()))
mfun <- function( i ) {
if (file.exists( tfnm )) stop("done due to process ", i)
message("Mfun(", i,")")
if ( i == 3 ) { message("creating ", tfnm); touchFile(tfnm); stop("goodbye"); }
Sys.sleep( i%%3 )
}
v <- mclapply( 1:10, mfun )
if (file.exists(tfnm)) file.remove(tfnm)
This would be nicer to implement by mclapply itself.
I have used trycatch so that if error happened during execution of r code it will not break.
I wanted to write error in one of the file , but not sure how it can be done.
Below is code used
library(forecast)
library(data.table)
library(RODBC)
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
Data <- read.csv("Data.csv", header=TRUE,na.strings=c("NULL",""))[ ,c('Project_ID', 'Period_End_Date', 'Overall_Backlog_Processing_Efficiency_Incident')]
result = tryCatch({
if (nrow(Data) >= 1)
{
backlog <- as.vector(Data$Overall_Backlog_Processing_Efficiency_Incident)
i <- 1
datalist = list()
for (i in 1:8) {
backlogts <- tbats(backlog)
fc2 <- forecast(backlogts, h=1)
fc2
fc_2 <- as.data.frame(fc2)
fc_2$i <- i # maybe you want to keep track of which iteration produced it?
datalist[[i]] <- fc_2 # add it to your list
backlog <- append(backlog,round(fc_2[1,1], digits = 2))
i <- i +1
}
forecast_data = do.call(rbind, datalist)
forecast_data$`Point Forecast` <- round(forecast_data$`Point Forecast` , digits = 3)
nextweekday <- function(date, wday) {
date <- as.Date(date)
diff <- wday - wday(date)
if( diff < 0 )
diff <- diff + 7
return(date + diff)
}
a <- tail(Data$Period_End_Date, n=1)
a <- as.Date(a, "%d-%b-%y")
b <- tail(Data$Project_ID, n=1)
Period_End_Date <- data.table(date=seq(as.Date(nextweekday(a,1)), by=7, length=8), key="date")
forecast_data = cbind(forecast_data, Period_End_Date)
names(forecast_data)[names(forecast_data) == 'date'] <- 'Period_End'
forecast_data$Period_End <- as.character(forecast_data$Period_End)
forecast_data$Project_ID <- b
forecast_data <- forecast_data[c(8,7,1,4,5)]
names(forecast_data)[names(forecast_data) == 'Lo 95'] <- 'Lower_Limit_95'
names(forecast_data)[names(forecast_data) == 'Hi 95'] <- 'Upper_Limit_95'
names(forecast_data)[names(forecast_data) == 'Point Forecast'] <- 'Point_Forecast'
}
},
warning = function(w) {},
error = function(e) {
forecast_data <- data.frame(Project_ID=character(),
Period_End=character(),
Point_Forecast=character(),
Lower_Limit_95=character(),
Upper_Limit_95=character(),
stringsAsFactors=FALSE)
print(paste("MY_ERROR: ",e))
})
I tried to print error print(paste("MY_ERROR: ",err)) under error = function(e), but it is not working
Is there anything I am missing. Please advice.
Does this help
foo <- function(x){
output <- tryCatch(x, error = function(e) e)
ifelse(is(output, "error"), "error", "no error")
}
foo() # error
foo(1) # no error
Alternatively, use purrr.
foo <- function(x){
x
}
foo_safe() # error
foo_safe(10)
I am wondering whether a proper framework for interval manipulation and comparison does exist in R.
After some search, I was only able to find the following:
- function findInterval in base Package. (but I hardly understand it)
- some answers here and there about union and intersection (notably: http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html)
Would you know of an initiative to implement a comprehensive set of tools to easily handles frequent tasks in interval manipulation, like inclusion/setdiff/union/intersection/etc. (eg see here for a list of functionalities)?
or would you have advice in developing such an approach?
below are some drafts on my side for doing so. it is surely awkward and still has some bugs but it might illustrate what I am looking for.
preliminary aspects about the options taken
- should deal seamlessly with intervals or intervals set
- intervals are represented as 2 columns data.frames (lower boundary, higher boundary), on one row
- intervals sets are represented as 2 columns with several rows
- a third column might be needed for identification of intervals sets
UNION
interval_union <- function(df){ # for data frame
df <- interval_clean(df)
if(is.empty(df)){
return(as.data.frame(NULL))
} else {
if(is.POSIXct(df[,1])) {
dated <- TRUE
df <- colwise(as.numeric)(df)
} else {
dated <- FALSE
}
M <- as.matrix(df)
o <- order(c(M[, 1], M[, 2]))
n <- cumsum( rep(c(1, -1), each=nrow(M))[o])
startPos <- c(TRUE, n[-1]==1 & n[-length(n)]==0)
endPos <- c(FALSE, n[-1]==0 & n[-length(n)]==1)
M <- M[o]
if(dated == TRUE) {
df2 <- colwise(mkDateTime)(as.data.frame(cbind(M[startPos], M[endPos])), from.s = TRUE)
} else {
df2 <- as.data.frame(cbind(M[startPos], M[endPos]))
}
colnames(df2) <- colnames(df)
# print(df2)
return(df2)
}
}
union_1_1 <- function(test, ref){
names(ref) <- names(test)
tmp <- interval_union(as.data.frame(rbind(test, ref)))
return(tmp)
}
union_1_n <- function(test, ref){
return(union_1_1(test, ref))
}
union_n_n <- function(test, ref){
testnn <- adply(.data = test, 1, union_1_n, ref, .expand = FALSE)
return(testnn)
}
ref_interval_union <- function(df, ref){
tmp0 <- adply(df, 1, union_1_1, ref, .expand = FALSE) # set to FALSE to keep ID
return(tmp0)
}
INTERSECTION
interval_intersect <- function(df){
# adapted from : http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html
M <- as.matrix(df)
L <- max(M[, 1])
R <- min(M[, 2])
Inew <- if (L <= R) c(L, R) else c()
if (!is.empty(Inew)){
df2 <- t(as.data.frame(Inew))
colnames(df2) <- colnames(df)
rownames(df2) <- NULL
} else {
df2 <- NULL
}
return(as.data.frame(df2))
}
ref_interval_intersect <- function(df, ref){
tmpfun <- function(a, b){
names(b) <- names(a)
tmp <- interval_intersect(as.data.frame(rbind(a, b)))
return(tmp)
}
tmp0 <- adply(df, 1, tmpfun, ref, .expand = FALSE) # [,3:4]
#if(!is.empty(tmp0)) colnames(tmp0) <- colnames(df)
return(tmp0)
}
int_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
names(re) <- names(te)
tmp0 <- c(max(te[1, 1], re[1, 1]), min(te[1, 2], re[1, 2]))
if(tmp0[1]>tmp0[2]) tmp0 <- NULL # inverse of a correct interval --> VOID
if(!is.empty(tmp0)){
tmp1 <- colwise(mkDateTime)(as.data.frame(t(as.data.frame(tmp0))))
colnames(tmp1) <- colnames(test)
} else {
tmp1 <- data.frame(NULL)
}
return(tmp1)
}
int_1_n <- function(test, ref){
test1 <- adply(.data = ref, 1, int_1_1, test = test, .expand = FALSE)
if(is.empty(test1)){
return(data.frame(NULL))
} else {
testn <- interval_union(test1[,2:3])
return(testn)
}
}
int_n_n <- function(test, ref){
testnn <- adply(.data = test, 1, int_1_n, ref, .expand = FALSE)
# return(testnn[,2:3]) # return interval set without index (1st column)
return(testnn) # return interval set with index (1st column) --> usefull to go with merge to keep metadata going alon g with interval description
}
int_intersect <- function(df, ref){
mycols <- colnames(df)
df$X1 <- 1:nrow(df)
test <- df[, 1:2]
tmp <- int_n_n(test, ref)
intersection <- merge(tmp, df, by = "X1", suffixes = c("", "init"))
return(intersection[,mycols])
}
EXCLUSION
excl_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
names(re) <- names(te)
if(te[1] < re[1]){ # Lower Bound
if(te[2] > re[1]){ # overlap
x <- unlist(c(te[1], re[1]))
} else { # no overlap
x <- unlist(c(te[1], te[2]))
}
} else { # test > ref on lower bound side
x <- NULL
}
if(te[2] > re[2]){ # Upper Bound
if(te[1] < re[2]){ # overlap
y <- unlist(c(re[2], te[2]))
} else { # no overlap
y <- unlist(c(te[1], te[2]))
}
} else { # test < ref on upper bound side
y <- NULL
}
if(is.empty(x) & is.empty(y)){
tmp0 <- NULL
tmp1 <- tmp0
} else {
tmp0 <- as.data.frame(rbind(x, y))
colnames(tmp0) <- colnames(test)
tmp1 <- interval_union(tmp0)
}
return(tmp1)
}
excl_1_n <- function(test, ref){
testn0 <- adply(.data = ref, 1, excl_1_1, test = test, .expand=FALSE)
# boucle pour intersecter successivement les intervalles sets, pour gérer les intervalles disjoints (identifiés par X1, col1)
tmp <- range(testn0)
names(tmp) <- colnames(testn0)[2:3]
tmp <- as.data.frame(t(tmp))
for(i in unique(testn0[,1])){
tmp <- int_n_n(tmp, testn0[testn0[,1]==i, 2:3])
}
return(tmp)
}
INCLUSION
incl_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
if(te[1] >= re[1] & te[2] <= re[2]){ return(TRUE) } else { return(FALSE) }
}
incl_1_n <- function(test, ref){
testn <- adply(.data = ref, 1, incl_1_1, test = test)
return(any(testn[,ncol(testn)]))
}
incl_n_n <- function(test, ref){
testnn <- aaply(.data = test, 1, incl_1_n, ref, .expand = FALSE)
names(testnn) <- NULL
return(testnn)
}
flat_incl_n_n <- function(test, ref){
ref <- interval_union(ref)
return(incl_n_n(test, ref))
}
# testing for a vector, instead of an interval set
incl_x_1 <- function(x, ref){
test <- (x>=ref[1,1] & x<ref[1,2])
return(test)
}
incl_x_n <- function(x, ref){
test <- any(x>=ref[,1] & x<ref[,2])
return(test)
}
I think you might be able to make good use of the many interval-related functions in the sets package.
Here's a small example illustrating the package's support for interval construction, intersection, set difference, union, and complementation, as well as its test for inclusion in an interval. These and many other related functions are documented on the help page for ?interval.
library(sets)
i1 <- interval(1,6)
i2 <- interval(5,10)
i3 <- interval(200,400)
i4 <- interval(202,402)
i5 <- interval_union(interval_intersection(i1,i2),
interval_symdiff(i3,i4))
i5
# [5, 6] U [200, 202) U (400, 402]
interval_complement(i5)
# [-Inf, 5) U (6, 200) U [202, 400] U (402, Inf]
interval_contains_element(i5, 5.5)
# [1] TRUE
interval_contains_element(i5, 201)
# [1] TRUE
If your intervals are currently encoded in a two-column data.frame, you could use something like mapply() to convert them to intervals of the type used by the sets package:
df <- data.frame(lBound = c(1,5,100), uBound = c(10, 6, 200))
Ints <- with(df, mapply("interval", l=lBound, r=uBound, SIMPLIFY=FALSE))
Ints
# [[1]]
# [1, 10]
# [[2]]
# [5, 6]
# [[3]]
# [100, 200]