error: replacement has 1 row, data has 0 - r

corr <- function(directory, threshold = 0) {
setwd("c:/users/hp1/desktop")
files_full <- list.files(directory,full.names = TRUE)
cr <- data.frame()
j = 1
for(i in 1:332)
{
y <- read.csv(files_full[i])
z <- y[complete.cases(y),]
if (nrow(z) > threshold){
cr[j] <- cor(z[,'sulfate'], z[,'nitrate'], method = "pearson")
j = j+1
}
}
cr
}
it's showing the following error:
Error in [<-.data.frame(*tmp*, j, value = -0.222552560758546) :
replacement has 1 row, data has 0
I was expecting as j increments, values would get added to the cr dtaframe. however that is not happening.
please suggest necessary changes

You could try something like this. If you provide a reproducible example I can show you how to clean the result. sapply will try to simplify the result, but you can stop it by specifying simplify = FALSE and remove unwanted list elements.
setwd("c:/users/hp1/desktop") # I would use this outside a function
corr <- function(directory, threshold = 0) {
files_full <- list.files(directory, full.names = TRUE)
sapply(files_full, FUN = function(x) {
y <- read.csv(x)
z <- y[complete.cases(y),]
if (nrow(z) > threshold){
out <- cor(z[,'sulfate'], z[,'nitrate'], method = "pearson")
} else {
return(NA) # or some other way you want to handle the result
}
})
}

Related

homals package for Nonlinear PCA in R: Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent

I am trying to implement NLPCA (Nonlinear PCA) on a data set using the homals package in R but I keep on getting the following error message:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
The data set I use can be found in the UCI ML Repository and it's called dat when imported in R: https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
Here is my code (some code is provided once the data set is downloaded):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
I am trying to predict the first attribute, therefore I set it to be active=FALSE.
The output looks like this (skipped all iteration messages):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
I don't understand why this error comes up. I have used the same code on some other data set and it worked fine so I don't see why this error persists. Any suggestions about what might be going wrong and how I could fix this issue?
Thanks!
It seems the error comes from code generating NAs in the homals function, specifically for your data for the number_credits levels, which causes problems with sort(as.numeric((rownames(clist[[i]])))) and the attempt to catch the error, since one of the levels does not give an NA value.
So either you have to modify the homals function to take care of such an edge case, or change problematic factor levels. This might be something to file as a bug report to the package maintainer.
As a work-around in your case you could do something like:
levels(dat$number_credits)[1] <- "_1"
and the function should run without problems.
Edit:
I think one solution would be to change one line of code in the homals function, but no guarantee this does work as intended. Better submit a bug report to the package author/maintainer - see https://cran.r-project.org/web/packages/homals/ for the address.
Using rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))] instead of rnames <- sort(as.numeric((rownames(clist[[i]])))) would allow the following code to identify NAs, but I am not sure why the author did not try to preserve factor levels outright.
Anyway, you could run a modified function in your local environment, which would require to explicitly call internal (not exported) homals functions, as shown below. Not necessarily the best approach, but would help you out in a pinch.
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}

Error in strsplit(word, NULL) : non-character argument with spell checker

I try to do a spelling checker with R that correct a spelling mistake of a word or a document.
I try with this R code to do a correction for a word, which it works very well:
> Correct("speling", dtm = counts)
$l4
[1] "spelling"
but when I try to do the correction of a document, I get this error :
> CorrectDocument("the quick bruwn fowx jumpt ovre tha lasy dog", dtm = counts)
Error in strsplit(word, NULL) : non-character argument
# This is a text processing function, which I
# borrowed from a CMU Data mining course professor.
strip.text <- function(txt) {
# remove apostrophes (so "don't" -> "dont", "Jane's" -> "Janes", etc.)
txt <- gsub("'","",txt)
# convert to lowercase
txt <- tolower(txt)
# change other non-alphanumeric characters to spaces
txt <- gsub("[^a-z0-9]"," ",txt)
# change digits to #
txt <- gsub("[0-9]+"," ",txt)
# split and make one vector
txt <- unlist(strsplit(txt," "))
# remove empty words
txt <- txt[txt != ""]
return(txt)
}
# Words within 1 transposition.
Transpositions <- function(word = FALSE) {
N <- nchar(word)
if (N > 2) {
out <- rep(word, N - 1)
word <- unlist(strsplit(word, NULL))
# Permutations of the letters
perms <- matrix(c(1:(N - 1), 2:N), ncol = 2)
reversed <- perms[, 2:1]
trans.words <- matrix(rep(word, N - 1), byrow = TRUE, nrow = N - 1)
for(i in 1:(N - 1)) {
trans.words[i, perms[i, ]] <- trans.words[i, reversed[i, ]]
out[i] <- paste(trans.words[i, ], collapse = "")
}
}
else if (N == 2) {
out <- paste(word[2:1], collapse = "")
}
else {
out <- paste(word, collapse = "")
}
return(out)
}
# Single letter deletions.
Deletes <- function(word = FALSE) {
N <- nchar(word)
word <- unlist(strsplit(word, NULL))
out <- list()
for(i in 1:N) {
out[i] <- paste(word[-i], collapse = "")
}
return(out)
}
# Single-letter insertions.
Insertions <- function(word = FALSE) {
N <- nchar(word)
out <- list()
for (letter in letters) {
out[[letter]] <- rep(word, N + 1)
for (i in 1:(N + 1)) {
out[[letter]][i] <- paste(substr(word, i - N, i - 1), letter,
substr(word, i, N), sep = "")
}
}
out <- unlist(out)
return(out)
}
# Single-letter replacements.
Replaces <- function(word = FALSE) {
N <- nchar(word)
out <- list()
for (letter in letters) {
out[[letter]] <- rep(word, N)
for (i in 1:N) {
out[[letter]][i] <- paste(substr(word, i - N, i - 1), letter,
substr(word, i + 1, N + 1), sep = "")
}
}
out <- unlist(out)
return(out)
}
# All Neighbors with distance "1"
Neighbors <- function(word) {
neighbors <- c(word, Replaces(word), Deletes(word),
Insertions(word), Transpositions(word))
return(neighbors)
}
# Probability as determined by our corpus.
Probability <- function(word, dtm) {
# Number of words, total
N <- length(dtm)
word.number <- which(names(dtm) == word)
count <- dtm[word.number]
pval <- count/N
return(pval)
}
# Correct a single word.
Correct <- function(word, dtm) {
neighbors <- Neighbors(word)
# If it is a word, just return it.
if (word %in% names(dtm)) {
out <- word
}
# Otherwise, check for neighbors.
else {
# Which of the neighbors are known words?
known <- which(neighbors %in% names(dtm))
N.known <- length(known)
# If there are no known neighbors, including the word,
# look farther away.
if (N.known == 0) {
print(paste("Having a hard time matching '", word, "'...", sep = ""))
neighbors <- unlist(lapply(neighbors, Neighbors))
}
# Then out non-words.
neighbors <- neighbors[which(neighbors %in% names(dtm))]
N <- length(neighbors)
# If we found some neighbors, find the one with the highest
# p-value.
if (N > 1) {
P <- 0*(1:N)
for (i in 1:N) {
P[i] <- Probability(neighbors[i], dtm)
}
out <- neighbors[which.max(P)]
}
# If no neighbors still, return the word.
else {
out <- word
}
}
return(out)
}
# Correct an entire document.
CorrectDocument <- function(document, dtm) {
by.word <- unlist(strsplit(document, " "))
N <- length(by.word)
for (i in 1:N) {
by.word[i] <- Correct(by.word[i], dtm = dtm)
}
corrected <- paste(by.word, collapse = " ")
return(corrected)
}
words <- scan("http://norvig.com/big.txt", what = character())
words <- strip.text(words)
counts <- table(words)
Correct("speling", dtm = counts)
#---correct a document
CorrectDocument("the quick bruwn fowx jumpt ovre tha lasy dog", dtm = counts)
Any idea please?
Thank you
The function Correct has a bug, you should add an unlist, i.e. the line :
Correct <- function(word, dtm) {
neighbors <- Neighbors(word)
should be changed as :
Correct <- function(word, dtm) {
neighbors <- unlist(Neighbors(word))
EDIT :
Here's a function which correct the lines of a document file (overwriting it) :
CorrectDocumentFile <- function(file,dtm){
# read the file lines
textLines <- unlist(readLines(file))
# for each line not empty or blank, correct the text
for(i in which(!grepl("^\\s*$",textLines))){
line <- textLines[[i]]
textLines[i] <- CorrectDocument(line,dtm)
}
# overwrite the file with the correction
writeLines(textLines, file)
}
Usage:
CorrectDocumentFile(file="fileToBeCorrected.txt", dtm=counts)

Add a progress bar to boot function in R

I am trying to add a progress bar to a bootstrap function in R.
I tried to make the example function as simple as possible (hence i'm using mean in this example).
library(boot)
v1 <- rnorm(1000)
rep_count = 1
m.boot <- function(data, indices) {
d <- data[indices]
setWinProgressBar(pb, rep_count)
rep_count <- rep_count + 1
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
pb <- winProgressBar(title = "Bootstrap in progress", label = "",
min = 0, max = tot_rep, initial = 0, width = 300)
b <- boot(v1, m.boot, R = tot_rep)
close(pb)
The bootstrap functions properly, but the problem is that the value of rep_count does not increase in the loop and the progress bar stays frozen during the process.
If I check the value of rep_count after the bootstrap is complete, it is still 1.
What am i doing wrong? maybe the boot function does not simply insert the m.boot function in a loop and so the variables in it are not increased?
Thank you.
You could use the package progress as below:
library(boot)
library(progress)
v1 <- rnorm(1000)
#add progress bar as parameter to function
m.boot <- function(data, indices, prog) {
#display progress with each run of the function
prog$tick()
d <- data[indices]
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
#initialize progress bar object
pb <- progress_bar$new(total = tot_rep + 1)
#perform bootstrap
boot(data = v1, statistic = m.boot, R = tot_rep, prog = pb)
I haven't quite figured out yet why it's necessary to set the number of iterations for progress_bar to be +1 the total bootstrap replicates (parameter R), but this is what was necessary in my own code, otherwise it throws an error. It seems like the bootstrap function is run one more time than you specify in parameter R, so if the progress bar is set to only run R times, it thinks the job is finished before it really is.
The pbapply package was designed to work with vectorized functions. There are 2 ways to achieve that in the context of this question: (1) write a wrapper as was suggested, which will not produce the same object of class 'boot'; (2) alternatively, the line lapply(seq_len(RR), fn) can be written as pblapply(seq_len(RR), fn). Option 2 can happen either by locally copying/updating the boot function as shown in the example below, or asking the package maintainer, Brian Ripley, if he would consider adding a progress bar directly or through pbapply as dependency.
My solution (changes indicated by comments):
library(boot)
library(pbapply)
boot2 <- function (data, statistic, R, sim = "ordinary", stype = c("i",
"f", "w"), strata = rep(1, n), L = NULL, m = 0, weights = NULL,
ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
parallel = c("no", "multicore", "snow"), ncpus = getOption("boot.ncpus",
1L), cl = NULL)
{
call <- match.call()
stype <- match.arg(stype)
if (missing(parallel))
parallel <- getOption("boot.parallel", "no")
parallel <- match.arg(parallel)
have_mc <- have_snow <- FALSE
if (parallel != "no" && ncpus > 1L) {
if (parallel == "multicore")
have_mc <- .Platform$OS.type != "windows"
else if (parallel == "snow")
have_snow <- TRUE
if (!have_mc && !have_snow)
ncpus <- 1L
loadNamespace("parallel")
}
if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
simple <- FALSE
}
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
runif(1)
seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
n <- NROW(data)
if ((n == 0) || is.null(n))
stop("no data in call to 'boot'")
temp.str <- strata
strata <- tapply(seq_len(n), as.numeric(strata))
t0 <- if (sim != "parametric") {
if ((sim == "antithetic") && is.null(L))
L <- empinf(data = data, statistic = statistic, stype = stype,
strata = strata, ...)
if (sim != "ordinary")
m <- 0
else if (any(m < 0))
stop("negative value of 'm' supplied")
if ((length(m) != 1L) && (length(m) != length(table(strata))))
stop("length of 'm' incompatible with 'strata'")
if ((sim == "ordinary") || (sim == "balanced")) {
if (isMatrix(weights) && (nrow(weights) != length(R)))
stop("dimensions of 'R' and 'weights' do not match")
}
else weights <- NULL
if (!is.null(weights))
weights <- t(apply(matrix(weights, n, length(R),
byrow = TRUE), 2L, normalize, strata))
if (!simple)
i <- index.array(n, R, sim, strata, m, L, weights)
original <- if (stype == "f")
rep(1, n)
else if (stype == "w") {
ns <- tabulate(strata)[strata]
1/ns
}
else seq_len(n)
t0 <- if (sum(m) > 0L)
statistic(data, original, rep(1, sum(m)), ...)
else statistic(data, original, ...)
rm(original)
t0
}
else statistic(data, ...)
pred.i <- NULL
fn <- if (sim == "parametric") {
ran.gen
data
mle
function(r) {
dd <- ran.gen(data, mle)
statistic(dd, ...)
}
}
else {
if (!simple && ncol(i) > n) {
pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
i <- i[, seq_len(n)]
}
if (stype %in% c("f", "w")) {
f <- freq.array(i)
rm(i)
if (stype == "w")
f <- f/ns
if (sum(m) == 0L)
function(r) statistic(data, f[r, ], ...)
else function(r) statistic(data, f[r, ], pred.i[r,
], ...)
}
else if (sum(m) > 0L)
function(r) statistic(data, i[r, ], pred.i[r, ],
...)
else if (simple)
function(r) statistic(data, index.array(n, 1, sim,
strata, m, L, weights), ...)
else function(r) statistic(data, i[r, ], ...)
}
RR <- sum(R)
res <- if (ncpus > 1L && (have_mc || have_snow)) {
if (have_mc) {
parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
}
else if (have_snow) {
list(...)
if (is.null(cl)) {
cl <- parallel::makePSOCKcluster(rep("localhost",
ncpus))
if (RNGkind()[1L] == "L'Ecuyer-CMRG")
parallel::clusterSetRNGStream(cl)
res <- parallel::parLapply(cl, seq_len(RR), fn)
parallel::stopCluster(cl)
res
}
else parallel::parLapply(cl, seq_len(RR), fn)
}
}
else pblapply(seq_len(RR), fn) #### changed !!!
t.star <- matrix(, RR, length(t0))
for (r in seq_len(RR)) t.star[r, ] <- res[[r]]
if (is.null(weights))
weights <- 1/tabulate(strata)[strata]
boot.return(sim, t0, t.star, temp.str, R, data, statistic,
stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
}
## Functions not exported by boot
isMatrix <- boot:::isMatrix
index.array <- boot:::index.array
boot.return <- boot:::boot.return
## Now the example
m.boot <- function(data, indices) {
d <- data[indices]
mean(d, na.rm = T)
}
tot_rep <- 200
v1 <- rnorm(1000)
b <- boot2(v1, m.boot, R = tot_rep)
The increased rep_count is a local variable and lost after each function call. In the next iteration the function gets rep_count from the global environment again, i.e., its value is 1.
You can use <<-:
rep_count <<- rep_count + 1
This assigns to the rep_count first found on the search path outside the function. Of course, using <<- is usually not recommended because side effects of functions should be avoided, but here you have a legitimate use case. However, you should probably wrap the whole thing in a function to avoid a side effect on the global environment.
There might be better solutions ...
I think i found a possible solution. This merges the answer of #Roland with the convenience of the pbapply package, using its functions startpb(), closepb(), etc..
library(boot)
library(pbapply)
v1 <- rnorm(1000)
rep_count = 1
tot_rep = 200
m.boot <- function(data, indices) {
d <- data[indices]
setpb(pb, rep_count)
rep_count <<- rep_count + 1
Sys.sleep(0.01) #Just to slow down the process
mean(d, na.rm = T)
}
pb <- startpb(min = 0, max = tot_rep)
b <- boot(v1, m.boot, R = tot_rep)
closepb(pb)
rep_count = 1
As previously suggested, wrapping everything in a function avoids messing with the rep_count variable.
The progress bar from the package dplyr works well:
library(dplyr)
library(boot)
v1 <- rnorm(1000)
m.boot <- function(data, indices) {
d <- data[indices]
p$tick()$print() # update progress bar
Sys.sleep(0.01)
mean(d, na.rm = T)
}
tot_rep <- 200
p <- progress_estimated(tot_rep+1) # init progress bar
b <- boot(v1, m.boot, R = tot_rep)
You can use the package pbapply
library(boot)
library(pbapply)
v1 <- rnorm(1000)
rep_count = 1
# your m.boot function ....
m.boot <- function(data, indices) {
d <- data[indices]
mean(d, na.rm = T)
}
# ... wraped in `bootfunc`
bootfunc <- function(x) { boot(x, m.boot, R = 200) }
# apply function to v1 , returning progress bar
pblapply(v1, bootfunc)
# > b <- pblapply(v1, bootfunc)
# > |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% Elapsed time: 02s

R: can `assign` be used for models?

Can the assign function in R be used to assign object names to models (e.g., gls models)? I'm guessing not b/c I keep getting a warning:
> Warning messages:
In assign(paste0(deparse(substitute(mod)), "_", i, j), update(mod, :
only the first element is used as variable name
As a result the objects are not created
Is there a way to do this?
Here is my fucntion code if it helps:
#Choose best corARMA structure for model of choice:
corARMA.chooser <- function(mod,min = 0,max = 3 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models
mod <- get('mod')
aic.arma <- AIC(mod)
ps <- 0
qs <- 0
for(i in min:max) {
js <- if(i == 0) c(1:max) else c(min:max)
for(j in js) {
arma <- corARMA(p = i, q = j)
assign(paste0(deparse(substitute(mod)),'_',i,j), update(mod, .~., correlation = arma), envir = .GlobalEnv)
aic.arma <- c(aic.arma, AIC(get(paste0(deparse(substitute(mod)),'_',i,j))))
ps <- c(ps, i)
qs <- c(qs, i)
}
aic.arma.out <- data.frame(ps, qs, aic.arma)
aic.arma.out
}
}
Update:
I tried using the list approach, but I get the error:
Error in names(mod.list) <- c(names(mod.list), paste0(deparse(substitute(mod)), :
'names' attribute [1275] must be the same length as the vector [1]
EDIT: what actually tears your variable name apart is this line mod <- get('mod') where you overwrite your named instance of mod why do you actually do this? If change your function to this it behaves as I'd expect it to:
corARMA.chooser <- function(modIn,min = 0,max = 3 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models mod <- get('modIn') aic.arma <- AIC(modIn) ps <- 0 qs <- 0 for(i in min:max) {
js <- if(i == 0) c(1:max) else c(min:max)
for(j in js) {
arma <- corARMA(p = i, q = j)
browser()
assign(paste0(deparse(substitute(modIn)),'_',i,j), update(mod, .~., correlation = arma), envir = .GlobalEnv)
aic.arma <- c(aic.arma, AIC(get(paste0(deparse(substitute(mod)),'_',i,j))))
ps <- c(ps, i)
qs <- c(qs, i)
}
aic.arma.out <- data.frame(ps, qs, aic.arma)
aic.arma.out
}
}
hope this is what you were trying to achieve.
Still not sure why the code works alone but not in the function, but it is clear that the deparse(substitute(mod)) is for some reason pulls mod apart to all of its parts first in the function, vs. simply creating a name of the object itself.
Here is my new code that works:
corARMA.chooser <- function(mod,p = 1,q = 0 ) {
#This function creates 1. object for each combo of ARMA {0:3} 2. AIC table comparing all of these models
mod.list <- NULL
nms <- NULL
aic.arma <- AIC(mod)
ps <- 0
qs <- 0
for(i in c(p)) {
js <- if(i == 0) c(q[q>0]) else c(q)
for(j in c(js)) {
arma <- corARMA(p = i, q = j)
mod.list <- c(mod.list, list(update(mod, .~., correlation = arma)))
names(mod.list) <- c(names(mod.list), paste0(deparse(substitute(mod)),'_',i,j))
aic.arma <- c(aic.arma, AIC(eval(parse(text=(paste0('mod.list$',deparse(substitute(mod)),'_',i,j))))))
ps <- c(ps, i)
qs <- c(qs, j)
}
}
assign(paste0(deparse(substitute(mod)),'_','ARMA'),mod.list, envir = .GlobalEnv)
aic.arma.out <- data.frame(p = ps, q = qs, AIC = aic.arma)
aic.arma.out
}

If error in loop create vector of "n" and continue

I have a loop in R which tests every possible combination of ARIMA with specific conditions and tests the lags. However during the loop there is an error
Error in optim(init[mask], armafn, method = optim.method, hessian = TRUE, :
non-finite finite-difference value [1]
When this error occurs I want it to create a vector of "n" which will be put into a matrix with the rest of the models. I have tried tryCatch but this for some reason stops the rest of the iterations from happening.
Here is my code:
N<- c(155782.7, 159463.7, 172741.1, 204547.2, 126049.3, 139881.9, 140747.3, 251963.0, 182444.3, 207780.8, 189251.2, 318053.7, 230569.2, 247826.8, 237019.6, 383909.5, 265145.5, 264816.4, 239607.0, 436403.1, 276767.7, 286337.9, 270022.7, 444672.9, 263717.2, 343143.9, 271701.7)
aslog<-"n"
library(gtools)
library(forecast)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML")
if(aslog=="y"){Arimafit[i,]<-c(exp(fitted(ArimaW1)))}else{Arimafit[i,]<-c(fitted(ArimaW1))}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}
I tried doing
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) NULL)
and this gave another error
Error in Arimafit[i, ] <- c(fitted(ArimaW1)) :
number of items to replace is not a multiple of replacement length
then i tried:
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) matrix("n",ncol=length(Arimafit[1,])))
but this gave an error:
Error: $ operator is invalid for atomic vectors
and also gave a matrix with all the fitted ARIMA values up to iteration 68, after that it gives everything as 0.0
is there a way to get the loop to continue the iterations, filling a vector with a value which goes into the matrix Arimafit like the iterations that do work so that i can carry on with the code?
I just found out the way to do what i wanted to do. This may help other people so I wont delete it, ill just post the solution :)
library(gtools)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
arimaerror<-matrix(0,ncol=length(N),nrow=1)
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- try(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"))
if(is(ArimaW1,"try-error"))
ArimaW1<-arimaerror else
ArimaW1<-ArimaW1
arimafitted<-try(fitted(ArimaW1))
if(is(arimafitted,"try-error"))
fitarima<-arimaerror else
fitarima<-arimafitted
if(aslog=="y"){Arimafit[i,]<-c(exp(fitarima))}else{Arimafit[i,]<-c(fitarima)}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}
Here is a further adaption to what i wanted to achieve
a <- permutations(n = 3, r = 6, v = c(0:2), repeats.allowed = TRUE)
a <- a[ifelse((a[, 1] + a[, 4] > 2 | a[, 2] + a[, 5] > 2 | a[, 3] + a[, 6] > 2),
FALSE, TRUE), ]
Arimafit <- matrix(0,
ncol = length(Data.new),
nrow = length(a[, 1]),
byrow = TRUE)
totb <- matrix(0, ncol = 1, nrow = length(a[, 1]))
arimaerror <- matrix(0, ncol = length(Data.new), nrow = 1)
for (i in 1:length(a[, 1])){
ArimaData.new <- try(Arima(Data.new,
order = a[i, c(1:3)],
seasonal = list(order = a[i, c(4:6)]),
method = "ML"),
silent = TRUE)
if (is(ArimaData.new, "try-error")){
ArimaData.new <- arimaerror
} else {
ArimaData.new <- ArimaData.new
}
arimafitted <- try(fitted(ArimaData.new), silent = TRUE)
if (is(arimafitted, "try-error")){
fitarima <- arimaerror
} else {
fitarima <- arimafitted
}
if (as.log == "log"){
Arimafit[i, ] <- c(exp(fitarima))
Datanew <- c(exp(Data.new))
} else {
if (as.log == "sqrt"){
Arimafit[i, ] <- c((fitarima)^2)
Datanew <- c((Data.new)^2)
} else {
Arimafit[i, ] <- c(fitarima)
Datanew <- c(Data.new)
}
}
data <- c(Datanew)
arima.fits <- c(Arimafit[i, ])
fullres <- data - arima.fits
v <- acf(fullres, plot = FALSE)
w <- pacf(fullres, plot = FALSE)
if (v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4)){
totb[i] <- "n"
} else {
totb[i] <- sum(abs(w$acf[1:4]))
}
j <- match(min(totb), totb)
order.arima <- a[j, c(1:3)]
order.seasonal.arima <- a[j, c(4:6)]
}

Resources