If error in loop create vector of "n" and continue - r

I have a loop in R which tests every possible combination of ARIMA with specific conditions and tests the lags. However during the loop there is an error
Error in optim(init[mask], armafn, method = optim.method, hessian = TRUE, :
non-finite finite-difference value [1]
When this error occurs I want it to create a vector of "n" which will be put into a matrix with the rest of the models. I have tried tryCatch but this for some reason stops the rest of the iterations from happening.
Here is my code:
N<- c(155782.7, 159463.7, 172741.1, 204547.2, 126049.3, 139881.9, 140747.3, 251963.0, 182444.3, 207780.8, 189251.2, 318053.7, 230569.2, 247826.8, 237019.6, 383909.5, 265145.5, 264816.4, 239607.0, 436403.1, 276767.7, 286337.9, 270022.7, 444672.9, 263717.2, 343143.9, 271701.7)
aslog<-"n"
library(gtools)
library(forecast)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML")
if(aslog=="y"){Arimafit[i,]<-c(exp(fitted(ArimaW1)))}else{Arimafit[i,]<-c(fitted(ArimaW1))}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}
I tried doing
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) NULL)
and this gave another error
Error in Arimafit[i, ] <- c(fitted(ArimaW1)) :
number of items to replace is not a multiple of replacement length
then i tried:
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) matrix("n",ncol=length(Arimafit[1,])))
but this gave an error:
Error: $ operator is invalid for atomic vectors
and also gave a matrix with all the fitted ARIMA values up to iteration 68, after that it gives everything as 0.0
is there a way to get the loop to continue the iterations, filling a vector with a value which goes into the matrix Arimafit like the iterations that do work so that i can carry on with the code?

I just found out the way to do what i wanted to do. This may help other people so I wont delete it, ill just post the solution :)
library(gtools)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
arimaerror<-matrix(0,ncol=length(N),nrow=1)
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- try(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"))
if(is(ArimaW1,"try-error"))
ArimaW1<-arimaerror else
ArimaW1<-ArimaW1
arimafitted<-try(fitted(ArimaW1))
if(is(arimafitted,"try-error"))
fitarima<-arimaerror else
fitarima<-arimafitted
if(aslog=="y"){Arimafit[i,]<-c(exp(fitarima))}else{Arimafit[i,]<-c(fitarima)}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}

Here is a further adaption to what i wanted to achieve
a <- permutations(n = 3, r = 6, v = c(0:2), repeats.allowed = TRUE)
a <- a[ifelse((a[, 1] + a[, 4] > 2 | a[, 2] + a[, 5] > 2 | a[, 3] + a[, 6] > 2),
FALSE, TRUE), ]
Arimafit <- matrix(0,
ncol = length(Data.new),
nrow = length(a[, 1]),
byrow = TRUE)
totb <- matrix(0, ncol = 1, nrow = length(a[, 1]))
arimaerror <- matrix(0, ncol = length(Data.new), nrow = 1)
for (i in 1:length(a[, 1])){
ArimaData.new <- try(Arima(Data.new,
order = a[i, c(1:3)],
seasonal = list(order = a[i, c(4:6)]),
method = "ML"),
silent = TRUE)
if (is(ArimaData.new, "try-error")){
ArimaData.new <- arimaerror
} else {
ArimaData.new <- ArimaData.new
}
arimafitted <- try(fitted(ArimaData.new), silent = TRUE)
if (is(arimafitted, "try-error")){
fitarima <- arimaerror
} else {
fitarima <- arimafitted
}
if (as.log == "log"){
Arimafit[i, ] <- c(exp(fitarima))
Datanew <- c(exp(Data.new))
} else {
if (as.log == "sqrt"){
Arimafit[i, ] <- c((fitarima)^2)
Datanew <- c((Data.new)^2)
} else {
Arimafit[i, ] <- c(fitarima)
Datanew <- c(Data.new)
}
}
data <- c(Datanew)
arima.fits <- c(Arimafit[i, ])
fullres <- data - arima.fits
v <- acf(fullres, plot = FALSE)
w <- pacf(fullres, plot = FALSE)
if (v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4)){
totb[i] <- "n"
} else {
totb[i] <- sum(abs(w$acf[1:4]))
}
j <- match(min(totb), totb)
order.arima <- a[j, c(1:3)]
order.seasonal.arima <- a[j, c(4:6)]
}

Related

How to minimize the unacceptably long run time of the created R code

There is a code with three for loops running with data containing enough missing values. The major problem is with the unacceptably long run time which seems to take at least more than a month although I try to keep my PC opened during most of the day.
The structure below is 100% correct from what I am trying to achieve when I test with a very few data points. But as the number of columns and rows become 2781 and 280, respectively, I perceive it takes forever although I am 100% sure that this is running correctly even when I see the updated environment window of my R-Studio each time I refresh it.
My data also has lots of missing values, probably 40% or something. I think this is making the computation time extremely longer as well.
The data dimension is 315 * 2781.
However, I am trying to achieve an output in a 280 * 2781 matrix form.
May I please get help minimizing the run time of this following code?
It would be very appreciated if I can!
options(java.parameters = "- Xmx8000m")
memory.limit(size=8e+6)
data=read.table("C:/Data/input.txt",T,sep="\t");
data=data.frame(data)[,-1]
corr<-NULL
corr2<-NULL
corr3<-NULL
for(i in 1:280)
{
corr2<-NULL
for(j in 1:2781)
{
data2<-data[,-j]
corr<-NULL
for(k in 1:2780)
{
ifelse((is.error(grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])==TRUE) || (grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])>0.05|| (is.na(grangertest(data[i:(i+35),j] ~ data2[i:(i+35),k], order = 1, na.action = na.omit)$P[2])==TRUE),corr<-cbind(corr,0),corr<-cbind(corr,1))
}
corr2<-rbind(corr2,corr)
}
corr3<-rbind(corr3,rowSums(corr2))
}
The snippet of my data is as below:
> dput(data[1:30, 1:10])
structure(c(0.567388170165941, 0.193093325709924, 0.965938209090382,
0.348295788047835, 0.496113050729036, 0.0645384560339153, 0.946750836912543,
0.642093246569857, 0.565092500532046, 0.0952424583956599, 0.444063827162609,
0.709971546428278, 0.756330407923087, 0.601746253203601, 0.341865634545684,
0.953319212188944, 0.0788604547269642, 0.990508111426607, 0.35519331949763,
0.697004508692771, 0.285368352662772, 0.274287624517456, 0.575733694015071,
0.12937490013428, 0.00476219342090189, 0.684308280004188, 0.189448777819052,
0.615732178557664, 0.404873769031838, 0.357331350911409, 0.565436001634225,
0.380773033713922, 0.348490287549794, 0.0473814208526164, 0.389312234241515,
0.562123290728778, 0.30642102798447, 0.911173274740577, 0.566258994862437,
0.837928073247895, 0.107747194357216, 0.253737836843356, 0.651503744535148,
0.187739939894527, 0.951192815322429, 0.740037888288498, 0.0817571650259197,
0.740519099170342, 0.601534485351294, 0.120900869136676, 0.415282893227413,
0.591146623482928, 0.698511375114322, 0.08557975362055, 0.139396222075447,
0.303953414550051, 0.0743798329494894, 0.0293272000271827, 0.335832208395004,
0.665010208031163, 0.0319741254206747, 0.678886031731963, 0.154593498911709,
0.275712370406836, 0.828485634410754, 0.921500099124387, 0.651940459152684,
0.00574865937232971, 0.82236105017364, 0.55089360428974, 0.209424041677266,
0.861786168068647, 0.672873278381303, 0.301034058211371, 0.180336013436317,
0.481560358777642, 0.901354183442891, 0.986482679378241, 0.90117057505995,
0.476308439625427, 0.638073122361675, 0.27481731469743, 0.689271076582372,
0.324349449947476, 0.56620552809909, 0.867861548438668, 0.78374840435572,
0.0668482843320817, 0.276675389613956, 0.990600393852219, 0.990227151894942,
0.417612489778548, 0.391012848122045, 0.348758921027184, 0.0799746725242585,
0.88941288786009, 0.511429069796577, 0.0338982092216611, 0.240115304477513,
0.0268365524243563, 0.67206134647131, 0.816803207853809, 0.344421110814437,
0.864659120794386, 0.84128700569272, 0.116056860191748, 0.303730394458398,
0.48192183743231, 0.341675494797528, 0.0622653553728014, 0.823110743425786,
0.483212807681412, 0.968748248415068, 0.953057422768325, 0.116025703493506,
0.327919023809955, 0.590675016632304, 0.832283023977652, 0.342327545629814,
0.576901035616174, 0.942689201096073, 0.59300709143281, 0.565881528891623,
0.600007816683501, 0.133237989619374, 0.873827134957537, 0.744597729761153,
0.755133397178724, 0.0245723063126206, 0.97799762734212, 0.636845340020955,
0.73828601022251, 0.644093665992841, 0.57204390084371, 0.496023115236312,
0.703613247489557, 0.149237307952717, 0.0871439634356648, 0.0632112647872418,
0.83703236351721, 0.433215840253979, 0.430483993608505, 0.924051651498303,
0.913056606892496, 0.914889572421089, 0.215407102368772, 0.76880722376518,
0.269207723205909, 0.865548757137731, 0.28798541566357, 0.391722843516618,
0.649806497385725, 0.459413924254477, 0.907465039752424, 0.48731207777746,
0.554472463205457, 0.779784266138449, 0.566323830280453, 0.208658932242543,
0.958056638715789, 0.61858483706601, 0.838681482244283, 0.286310768220574,
0.895410191034898, 0.448722236789763, 0.297688684659079, 0.33291415637359,
0.0115265529602766, 0.850776052568108, 0.764857453294098, 0.469730701530352,
0.222089925780892, 0.0496484278701246, 0.32886885642074, 0.356443469878286,
0.612877089297399, 0.727906176587567, 0.0292073413729668, 0.429160050582141,
0.232313714455813, 0.678631312213838, 0.642334033036605, 0.99107678886503,
0.542449960019439, 0.835914565017447, 0.52798323193565, 0.303808332188055,
0.919654499506578, 0.944237019168213, 0.52141259261407, 0.794379767496139,
0.72268659202382, 0.114752230467275, 0.175116094760597, 0.437696389388293,
0.852590200025588, 0.511136321350932, 0.30879021063447, 0.174206420546398,
0.14262041519396, 0.375411552377045, 0.0204910831525922, 0.852757754037157,
0.631567053496838, 0.475924106314778, 0.508682047016919, 0.307679089019075,
0.70284536993131, 0.851252349093556, 0.0868967010173947, 0.586291917832568,
0.0529140203725547, 0.440692059928551, 0.207642213441432, 0.777513341512531,
0.141496006632224, 0.548626560717821, 0.419565241318196, 0.0702310993801802,
0.499403427587822, 0.189343606121838, 0.370725362794474, 0.888076487928629,
0.83070912421681, 0.466137421084568, 0.177098380634561, 0.91202046489343,
0.142300580162555, 0.823691181838512, 0.41561916610226, 0.939948018174618,
0.806491429451853, 0.795849160756916, 0.566376683535054, 0.36814984655939,
0.307756055146456, 0.602875682059675, 0.506007500691339, 0.538658684119582,
0.420845189364627, 0.663071365095675, 0.958144341595471, 0.793743418296799,
0.983086514985189, 0.266262857476249, 0.817585011478513, 0.122843299992383,
0.989197303075343, 0.71584410732612, 0.500571243464947, 0.397394519997761,
0.659465527161956, 0.459530522814021, 0.602246116613969, 0.250076721422374,
0.17533828667365, 0.6599256307818, 0.184704560553655, 0.15679649473168,
0.513444944983348, 0.205572377191857, 0.430164282443002, 0.131548407254741,
0.914019819349051, 0.935795902274549, 0.857401241315529, 0.977940042736009,
0.41389597626403, 0.179183913161978, 0.431347143370658, 0.477178965462372,
0.121315707685426, 0.107695729471743, 0.634954946814105, 0.859707030234858,
0.855825762730092, 0.708672808250412, 0.674073817208409, 0.672288877889514,
0.622144045541063, 0.433355041313916, 0.952878215815872, 0.229569894727319,
0.289388840552419, 0.937473804224283, 0.116283216979355, 0.659604362910613,
0.240837284363806, 0.726138337515295, 0.68390148691833, 0.381577257299796,
0.899390475358814, 0.26472729514353, 0.0383855854161084, 0.855232689995319,
0.655799814499915, 0.335587574867532, 0.163842789363116, 0.0353666560258716,
0.048316186061129), .Dim = c(30L, 10L))
I converted just the inner loop to mapply and did a quick speed test:
library(lmtest)
data <- matrix(runif(315*2781), nrow = 315)
get01 <- function(x, y) {
try(gt <- grangertest(x ~ y, order = 1, na.action = na.omit)$P[2])
if (exists("gt")) {
if (gt > 0.05 || is.na(gt)) {
return(0)
} else {
return(1)
}
} else {
return(0)
}
}
i <- 1; j <- 1
system.time(corr <- mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]))
#> user system elapsed
#> 21.505 0.014 21.520
It would need to perform that mapply 778680 times, so that puts it at about 200 days. You'll either need a different approach with the Granger test or several cores. Here's the command to replace the full loop:
corr3 <- t(mapply(function(i) colSums(mapply(function(j) mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]), 1:2781)), 1:280))
Replace that first mapply with simplify2array(parLapply to parallelize:
library(parallel)
cl <- makeCluster(detectCores())
clusterExport(cl, list("data", "get01"))
parLapply(cl, cl, function(x) require(lmtest))
corr3 <- t(simplify2array(parLapply(cl, 1:280, function(i) colSums(mapply(function(j) mapply(function(k) {get01(data[i:(i+35),j], data[i:(i+35),k])}, (1:2781)[-j]), 1:2781)))))
stopCluster(cl)
Here is a version, not parallelized, that speeds up the code in the question by a factor greater than 4.
Some bottlenecks in the question's code are easy to detect:
The matrices corr? are extended inside the loops. The solution is to reserve memory beforehand;
The test grangertest is called 3 times per inner iteration when only one is needed;
To cbind with 0 or 1 is in fact creating a vector, not a matrix.
Here is a comparative test between the question's code and the function below.
library(lmtest)
# avoids loading an extra package
is.error <- function(x){
inherits(x, c("error", "try-error"))
}
Lag <- 5L
nr <- nrow(data)
nc <- ncol(data)
t0 <- system.time({
corr<-NULL
corr2<-NULL
corr3<-NULL
for(i in 1:(nr - Lag))
{
corr2<-NULL
data3 <- data[i:(i + Lag), ]
for(j in 1:nc)
{
data2<-data[,-j]
corr<-NULL
for(k in 1:(nc - 1L))
{
ifelse((is.error(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])==TRUE) ||
(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])>0.05 ||
(is.na(grangertest(data[i:(i+Lag),j] ~ data2[i:(i+Lag),k], order = 1, na.action = na.omit)$P[2])==TRUE),
corr<-cbind(corr,0),
corr<-cbind(corr,1)
)
}
corr2 <- rbind(corr2, corr)
}
corr3<-rbind(corr3, rowSums(corr2))
}
corr3
})
I will use a simplified version of lmtest::grangertest.
granger_test <- function (x, y, order = 1, na.action = na.omit, ...) {
xnam <- deparse(substitute(x))
ynam <- deparse(substitute(y))
n <- length(x)
all <- cbind(x = x[-1], y = y[-1], x_1 = x[-n], y_1 = y[-n])
y <- as.vector(all[, 2])
lagX <- as.matrix(all[, (1:order + 2)])
lagY <- as.matrix(all[, (1:order + 2 + order)])
fm <- lm(y ~ lagY + lagX)
rval <- lmtest::waldtest(fm, 2, ...)
attr(rval, "heading") <- c("Granger causality test\n", paste("Model 1: ",
ynam, " ~ ", "Lags(", ynam, ", 1:", order, ") + Lags(",
xnam, ", 1:", order, ")\nModel 2: ", ynam, " ~ ", "Lags(",
ynam, ", 1:", order, ")", sep = ""))
rval
}
And now the function to run the tests.
f_Rui <- function(data, Lag){
nr <- nrow(data)
nc <- ncol(data)
corr3 <- matrix(0, nrow = nr - Lag, ncol = nc)
data3 <- matrix(0, nrow = Lag + 1L, ncol = nc)
data2 <- matrix(0, nrow = Lag + 1L, ncol = nc - 1L)
for(i in 1:(nr - Lag)) {
corr2 <- matrix(0, nrow = nc, ncol = nc - 1L)
data3[] <- data[i:(i + Lag), ]
for(j in 1:nc) {
corr <- integer(nc - 1L)
data2[] <- data3[, -j]
for(k in 1:(nc - 1L)){
res <- tryCatch(
grangertest(x = data2[, k], y = data3[, j], order = 1, na.action = na.omit),
error = function(e) e
)
if(!inherits(res, "error") && !is.na(res[['Pr(>F)']][2]) && res[['Pr(>F)']][2] <= 0.05) {
corr[k] <- 1L
}
}
corr2[j, ] <- corr
}
corr3[i, ] <- rowSums(corr2)
}
corr3
}
The results are identical and the timings much better.
t1 <- system.time({
res <- f_Rui(data, 5L)
})
identical(corr3, res)
#[1] TRUE
times <- rbind(t0, t1)
t(t(times)/t1)
# user.self sys.self elapsed user.child sys.child
#t0 4.682908 1.736111 4.707783 NaN NaN
#t1 1.000000 1.000000 1.000000 NaN NaN

how to solve error non-numeric argument to binary operation

I want to calculate the stock return but I got error in
return=function(x)
{
n=length(x)
x=matrix(x,nrow=n,ncol=1)
return_data=matrix(nrow=n-1,ncol=1)
for(i in 1:n-1)
{
return_data[i]=log(x[i+1,]/x[i,])
}
return_data
}
R_JSMR=return(JSMR)
Error in x[i + 1, ]/x[i, ] : non-numeric argument to binary operator
how do I solve this? please help me, thank you very much :)
The error is to have the for loop in 1:n-1 instead of 1:(n-1). Here is the function corrected and a vectorized version of it.
log_returns <- function(x) {
n <- length(x)
return_data <- matrix(nrow = n - 1, ncol = 1)
for(i in 1:(n-1)) {
return_data[i] <- log(x[i+1]/x[i])
}
return_data
}
log_returns2 <- function(x) {
return_data <- log(x[-1]/x[-length(x)])
matrix(return_data, ncol = 1)
}
JSMR <- 1:10
R_JSMR <- log_returns(JSMR)
R_JSMR2 <- log_returns2(JSMR)
identical(R_JSMR,R_JSMR2)
#[1] TRUE
If you don't need to return a matrix, here are two one-liners.
log_returns3 <- function(x) log(x[-1]/x[-length(x)])
log_returns4 <- function(x) diff(log(x))

rowSums - 'x' must be an array of at least two dimensions

This is really hard to explain but basically I have a dataset where people completed a wordsearch task. By using the following code I indexed the letters of the wordsearch by finding their numbers in the descriptions. I put them into a matrix so that I can use them to index from the dataset later:
number <- "#101"
wordsearch <- matrix(rep(0, times = 16 * ncol(data)), nrow = 16, ncol = ncol(data))
for (i in 1:9){
for (j in 1:ncol(data)){
wordsearch[i,j] <- grepl(number, data[1, j], fixed = T)
}
number <- paste("#10", (i+1), sep = "")
}
number <- "#110"
for (i in 10:15) {
for (j in 34:217){
wordsearch[i,j] <- grepl(number, data[1, j], fixed = T)
}
number <- paste("#1", (i+1), sep = "")
}
number <- "#3"
for (j in 1:ncol(data)){
wordsearch[16,j] <- grepl(number, data[1, j], fixed = T)
}
This part works perfectly. Then I want to sum the number of letters people found for each word and create new columns for each word and add to the dataset. First I got the error that 'x must be numeric' so I did data[is.na(data] <- 0
And then I did the following code:
col <- seq(261, by = 1, length.out = 16)
for (i in 1:16){
d2[, col[i]] <- rowSums(d2[, wordsearch[i, ] == 1])
}
I literally just did that with another dataset and it worked fine but now I'm getting the "x' must be an array of at least two dimensions". Can someone help?

homals package for Nonlinear PCA in R: Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent

I am trying to implement NLPCA (Nonlinear PCA) on a data set using the homals package in R but I keep on getting the following error message:
Error in dimnames(x) <- dn : length of 'dimnames' [1] not equal to array extent
The data set I use can be found in the UCI ML Repository and it's called dat when imported in R: https://archive.ics.uci.edu/ml/datasets/South+German+Credit+%28UPDATE%29
Here is my code (some code is provided once the data set is downloaded):
nlpcasouthgerman <- homals(dat, rank=1, level=c('nominal','numerical',rep('nominal',2),
'numerical','nominal',
rep('ordinal',2), rep('nominal',2),
'ordinal','nominal','numerical',
rep('nominal',2), 'ordinal',
'nominal','ordinal',rep('nominal',3)),
active=c(FALSE, rep(TRUE, 20)), ndim=3, verbose=1)
I am trying to predict the first attribute, therefore I set it to be active=FALSE.
The output looks like this (skipped all iteration messages):
Iteration: 1 Loss Value: 0.000047
Iteration: 2 Loss Value: 0.000044
...
Iteration: 37 Loss Value: 0.000043
Iteration: 38 Loss Value: 0.000043
Error in dimnames(x) <- dn :
length of 'dimnames' [1] not equal to array extent
I don't understand why this error comes up. I have used the same code on some other data set and it worked fine so I don't see why this error persists. Any suggestions about what might be going wrong and how I could fix this issue?
Thanks!
It seems the error comes from code generating NAs in the homals function, specifically for your data for the number_credits levels, which causes problems with sort(as.numeric((rownames(clist[[i]])))) and the attempt to catch the error, since one of the levels does not give an NA value.
So either you have to modify the homals function to take care of such an edge case, or change problematic factor levels. This might be something to file as a bug report to the package maintainer.
As a work-around in your case you could do something like:
levels(dat$number_credits)[1] <- "_1"
and the function should run without problems.
Edit:
I think one solution would be to change one line of code in the homals function, but no guarantee this does work as intended. Better submit a bug report to the package author/maintainer - see https://cran.r-project.org/web/packages/homals/ for the address.
Using rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))] instead of rnames <- sort(as.numeric((rownames(clist[[i]])))) would allow the following code to identify NAs, but I am not sure why the author did not try to preserve factor levels outright.
Anyway, you could run a modified function in your local environment, which would require to explicitly call internal (not exported) homals functions, as shown below. Not necessarily the best approach, but would help you out in a pinch.
homals <- function (data, ndim = 2, rank = ndim, level = "nominal", sets = 0,
active = TRUE, eps = 0.000001, itermax = 1000, verbose = 0) {
dframe <- data
name <- deparse(substitute(dframe))
nobj <- nrow(dframe)
nvar <- ncol(dframe)
vname <- names(dframe)
rname <- rownames(dframe)
for (j in 1:nvar) {
dframe[, j] <- as.factor(dframe[, j])
levfreq <- table(dframe[, j])
if (any(levfreq == 0)) {
newlev <- levels(dframe[, j])[-which(levfreq == 0)]
}
else {
newlev <- levels(dframe[, j])
}
dframe[, j] <- factor(dframe[, j], levels = sort(newlev))
}
varcheck <- apply(dframe, 2, function(tl) length(table(tl)))
if (any(varcheck == 1))
stop("Variable with only 1 value detected! Can't proceed with estimation!")
active <- homals:::checkPars(active, nvar)
rank <- homals:::checkPars(rank, nvar)
level <- homals:::checkPars(level, nvar)
if (length(sets) == 1)
sets <- lapply(1:nvar, "c")
if (!all(sort(unlist(sets)) == (1:nvar))) {
print(cat("sets union", sort(unlist(sets)), "\n"))
stop("inappropriate set structure !")
}
nset <- length(sets)
mis <- rep(0, nobj)
for (l in 1:nset) {
lset <- sets[[l]]
if (all(!active[lset]))
(next)()
jset <- lset[which(active[lset])]
for (i in 1:nobj) {
if (any(is.na(dframe[i, jset])))
dframe[i, jset] <- NA
else mis[i] <- mis[i] + 1
}
}
for (j in 1:nvar) {
k <- length(levels(dframe[, j]))
if (rank[j] > min(ndim, k - 1))
rank[j] <- min(ndim, k - 1)
}
x <- cbind(homals:::orthogonalPolynomials(mis, 1:nobj, ndim))
x <- homals:::normX(homals:::centerX(x, mis), mis)$q
y <- lapply(1:nvar, function(j) homals:::computeY(dframe[, j], x))
sold <- homals:::totalLoss(dframe, x, y, active, rank, level, sets)
iter <- pops <- 0
repeat {
iter <- iter + 1
y <- homals:::updateY(dframe, x, y, active, rank, level, sets,
verbose = verbose)
smid <- homals:::totalLoss(dframe, x, y, active, rank, level,
sets)/(nobj * nvar * ndim)
ssum <- homals:::totalSum(dframe, x, y, active, rank, level, sets)
qv <- homals:::normX(homals:::centerX((1/mis) * ssum, mis), mis)
z <- qv$q
snew <- homals:::totalLoss(dframe, z, y, active, rank, level,
sets)/(nobj * nvar * ndim)
if (verbose > 0)
cat("Iteration:", formatC(iter, digits = 3, width = 3),
"Loss Value: ", formatC(c(smid), digits = 6,
width = 6, format = "f"), "\n")
r <- abs(qv$r)/2
ops <- sum(r)
aps <- sum(La.svd(crossprod(x, mis * z), 0, 0)$d)/ndim
if (iter == itermax) {
stop("maximum number of iterations reached")
}
if (smid > sold) {
warning(cat("Loss function increases in iteration ",
iter, "\n"))
}
if ((ops - pops) < eps)
break
else {
x <- z
pops <- ops
sold <- smid
}
}
ylist <- alist <- clist <- ulist <- NULL
for (j in 1:nvar) {
gg <- dframe[, j]
c <- homals:::computeY(gg, z)
d <- as.vector(table(gg))
lst <- homals:::restrictY(d, c, rank[j], level[j])
y <- lst$y
a <- lst$a
u <- lst$z
ylist <- c(ylist, list(y))
alist <- c(alist, list(a))
clist <- c(clist, list(c))
ulist <- c(ulist, list(u))
}
dimlab <- paste("D", 1:ndim, sep = "")
for (i in 1:nvar) {
if (ndim == 1) {
ylist[[i]] <- cbind(ylist[[i]])
ulist[[i]] <- cbind(ulist[[i]])
clist[[i]] <- cbind(clist[[i]])
}
options(warn = -1)
# Here is the line that I changed in the code:
# rnames <- sort(as.numeric((rownames(clist[[i]]))))
rnames <- as.numeric(rownames(clist[[i]]))[order(as.numeric(rownames(clist[[i]])))]
options(warn = 0)
if ((any(is.na(rnames))) || (length(rnames) == 0))
rnames <- rownames(clist[[i]])
if (!is.matrix(ulist[[i]]))
ulist[[i]] <- as.matrix(ulist[[i]])
rownames(ylist[[i]]) <- rownames(ulist[[i]]) <- rownames(clist[[i]]) <- rnames
rownames(alist[[i]]) <- paste(1:dim(alist[[i]])[1])
colnames(clist[[i]]) <- colnames(ylist[[i]]) <- colnames(alist[[i]]) <- dimlab
colnames(ulist[[i]]) <- paste(1:dim(as.matrix(ulist[[i]]))[2])
}
names(ylist) <- names(ulist) <- names(clist) <- names(alist) <- colnames(dframe)
rownames(z) <- rownames(dframe)
colnames(z) <- dimlab
dummymat <- as.matrix(homals:::expandFrame(dframe, zero = FALSE, clean = FALSE))
dummymat01 <- dummymat
dummymat[dummymat == 2] <- NA
dummymat[dummymat == 0] <- Inf
scoremat <- array(NA, dim = c(dim(dframe), ndim), dimnames = list(rownames(dframe),
colnames(dframe), paste("dim", 1:ndim, sep = "")))
for (i in 1:ndim) {
catscores.d1 <- do.call(rbind, ylist)[, i]
dummy.scores <- t(t(dummymat) * catscores.d1)
freqlist <- apply(dframe, 2, function(dtab) as.list(table(dtab)))
cat.ind <- sequence(sapply(freqlist, length))
scoremat[, , i] <- t(apply(dummy.scores, 1, function(ds) {
ind.infel <- which(ds == Inf)
ind.minfel <- which(ds == -Inf)
ind.nan <- which(is.nan(ds))
ind.nael <- which((is.na(ds) + (cat.ind != 1)) ==
2)
ds[-c(ind.infel, ind.minfel, ind.nael, ind.nan)]
}))
}
disc.mat <- apply(scoremat, 3, function(xx) {
apply(xx, 2, function(cols) {
(sum(cols^2, na.rm = TRUE))/nobj
})
})
result <- list(datname = name, catscores = ylist, scoremat = scoremat,
objscores = z, cat.centroids = clist, ind.mat = dummymat01,
loadings = alist, low.rank = ulist, discrim = disc.mat,
ndim = ndim, niter = iter, level = level, eigenvalues = r,
loss = smid, rank.vec = rank, active = active, dframe = dframe,
call = match.call())
class(result) <- "homals"
result
}

Multiple loop Syntax Error

I cannot figure out what's going wrong with my loop and it is already too complicated for my current level. I have already tried applybut obviously I do something wrong, so I didn't use it at all.
library('wavelets')
library('benford.analysis')
indeces <- ls() # my initial datasets
wfilters <- array(c("haar","la8","d4","c6")) # filter option in "modwt" function
wfiltname <- array(c("h","l","d","c")) # to rename the new objects
for (i in 1:nrow(as.array(indeces))) {
x <- get(as.matrix(indeces[i]))
x <- x[,2]
# Creates modwt objects equal to the number of filters
for (j in 1:nrow(as.array(wfilters))) {
x <- wavelets::modwt(x, filter = wfilters[j], n.levels = 4,
boundary = "periodic")
# A loop that creates a matrix with benford fun output per modwt n.levels option
for (l in 1:4) {
x <- as.matrix(x#W$W[l]) # n.levels are represented as x#W$W1, x#W$W2,...
x <- benford.analysis::benford(x, number.of.digits = 1,
sign = "both", discrete = T,
round = 3) # accepts matrices
x[,l] <- x$bfd$data.dist # it always has 9 elements
}
assign(paste0("b", wfiltname[j], indeces[i]), x)
}
}
The above loop should be reproducible with any data (where the values are in second column). The error I get is the following:
Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), :
'data' must be of a vector type, was 'NULL'
Thanks to #Cath and #jogo I made it work after some improvements. Here's the correct code:
temp <- list.files(path = "...")
list2env(
lapply(setNames(temp, make.names(gsub("*.csv$", "", temp))),
read.csv), envir = .GlobalEnv)
rm(temp)
indeces <- ls()
wfilters <- array(c("haar","la8","d4","c6"))
wfiltname <- array(c("h","l","d","c"))
k <- data.frame(matrix(nrow = 9,ncol = 4))
nlvl <- 4
for (i in 1:length(indeces)) {
x <- as.matrix(get(indeces[i]))
for (j in 1:length(wfilters)) {
y <- wavelets::modwt(as.matrix(x), filter = wfilters[j], n.levels = nlvl,
boundary = "periodic")
y <- as.matrix(y#W)
for(m in 1:nlvl) {
z <- as.matrix(y[[m]])
z <- benford.analysis::benford(z, number.of.digits = 1, sign = "both", discrete = TRUE, round = 16)
k[m] <- as.data.frame(z$bfd$data.dist)
colnames(k)[m] <- paste0(wfilters[j], "W", m)
}
assign(paste0(indeces[i], wfiltname[j]), k)
}
}
rm(x,y,z,i,j,m,k)
I would appreciate if there is a way to write it more efficiently. Thank you very much

Resources