rowSums - 'x' must be an array of at least two dimensions - r

This is really hard to explain but basically I have a dataset where people completed a wordsearch task. By using the following code I indexed the letters of the wordsearch by finding their numbers in the descriptions. I put them into a matrix so that I can use them to index from the dataset later:
number <- "#101"
wordsearch <- matrix(rep(0, times = 16 * ncol(data)), nrow = 16, ncol = ncol(data))
for (i in 1:9){
for (j in 1:ncol(data)){
wordsearch[i,j] <- grepl(number, data[1, j], fixed = T)
}
number <- paste("#10", (i+1), sep = "")
}
number <- "#110"
for (i in 10:15) {
for (j in 34:217){
wordsearch[i,j] <- grepl(number, data[1, j], fixed = T)
}
number <- paste("#1", (i+1), sep = "")
}
number <- "#3"
for (j in 1:ncol(data)){
wordsearch[16,j] <- grepl(number, data[1, j], fixed = T)
}
This part works perfectly. Then I want to sum the number of letters people found for each word and create new columns for each word and add to the dataset. First I got the error that 'x must be numeric' so I did data[is.na(data] <- 0
And then I did the following code:
col <- seq(261, by = 1, length.out = 16)
for (i in 1:16){
d2[, col[i]] <- rowSums(d2[, wordsearch[i, ] == 1])
}
I literally just did that with another dataset and it worked fine but now I'm getting the "x' must be an array of at least two dimensions". Can someone help?

Related

Reading England and Wales Charity Commission bcp files in R

I'm trying to read .bcp files provided by https://register-of-charities.charitycommission.gov.uk/register/full-register-download in R. I have been trying previously answered questions here, but readChar does not seem to read everything in all files, namely it breaks for extract_charity.bcp.
So I have thought of readBin and tried to read extract_charity.bcp like this:
library(stringr)
b <- readBin("extract_charity.bcp", "character", n = 300000, size = NA_integer_,
endian = .Platform$endian)
c<- paste0(b, collapse = "" ) #put it back as one large character string
d<- str_locate_all(c, "\\*\\#\\#\\*\\d") #find row breaks followed by a digit
e <- d[[1]]
flags <- e[,1]
f <- c()
f[1] <- substr(c, 1, flags[1]-1)
for (i in 2:length(flags)) f[i]<- substr(c, flags[i-1]+4, flags[i]-1) #removes row breaks
export <- matrix(nrow = 372432, ncol = 18)
exportF <- matrix(nrow = 0, ncol = 18)
for (j in 1:length(flags)) {
new_row <- str_split( f[j], "\\#\\*\\*\\#" )[[1]] #removes column breaks
if (length(new_row)==18) { export[j, ] <- new_row #if correct number of columns
} else { print(j)
exportF <- rbind(exportF, new_row) }}
However, there are 49 errors - all of the same type. There is a strange character string inserted at various places across the table - currently it is "P`j[Ÿ " but when I run the script again, it is "°Tj[Ÿ ", so it provides different string every time I run the script, so I cannot run the script to remove it manually:
str_replace_all(c, problem, "")
Error in stri_replace_all_regex(string, pattern, fix_replacement(replacement), :
Missing closing bracket on a bracket expression. (U_REGEX_MISSING_CLOSE_BRACKET)
Just to let the world know, this can be done.
In first pass, the file is parsed and problems are stored in exportF, where it is identified and deleted from original parsing output. Then in the second pass, that is parsed correctly.
This is a mess, but it works, and pretty fast, too.
library(stringr)
library(stringi)
b <- readBin("extract_charity.bcp", "character", n = 300000, size = NA_integer_)
c<- paste0(b, collapse = "" )
tt<- str_locate_all(c, "\\*\\#\\#\\*\\d")
e <- tt[[1]]
flags <- e[,1]
f <- c()
f[1] <- substr(c, 1, flags[1]-1)
for (i in 2:length(flags)) {
f[i]<- substr(c, flags[i-1]+4, flags[i]-1)
}
export <- matrix(nrow = length(flags), ncol = 18)
exportF <- matrix(nrow = 0, ncol = 18)
for (j in 1:length(flags)) {
new_row <- str_split( f[j], "\\#\\*\\*\\#" )[[1]]
if (length(new_row)==18) { export[j, ] <- new_row
} else {print(flags[j])
exportF <- rbind(exportF, new_row) }}
#go trough the first line and see where the problem is and locate its position
problem <- str_sub(as.character(exportF[1,8]), 5, 10)
#CHECK TO SEE IF CORRECT
problem %in% str_sub(exportF[1,8], 5, 10)
problem %in% exportF[1,8]
str_detect(c,problem )
str_detect(b[324],problem )
#d <-stri_replace_all_charclass(b, problem, "")
str_detect(d,problem )
r<- gsub(problem, "", b )
str_detect(r,problem )
#now go again but with clean data
r<- paste0(r, collapse = "" )
tt<- str_locate_all(r, "\\*\\#\\#\\*\\d")
e <- tt[[1]]
flags <- e[,1]
f <- c()
f[1] <- substr(r, 1, flags[1]-1)
for (i in 2:length(flags)) {
f[i]<- substr(r, flags[i-1]+4, flags[i]-1)
}
#g<- str_split(f[372432], "\\#\\*\\*\\#")[[1]]
export <- matrix(nrow = 372434, ncol = 18)
exportF <- matrix(nrow = 0, ncol = 18)
for (j in 1:length(flags)) {
new_row <- str_split( f[j], "\\#\\*\\*\\#" )[[1]]
if (length(new_row)==18) { export[j, ] <- new_row
} else {print(flags[j])
exportF <- rbind(exportF, new_row) }}
write.csv(export, "extract_charity2021.csv", row.names = F)
leaving it here for either future myself or someone in need of doing this.

a simple for loop in r to simulate a small set of data, print result works fine but setting a dataset creates a dataset with only 2 entries

I am trying to get a for loop to work. I have other iterations of this code but cannot for the life of me figure why this particular version is not working correctly.
result <- data.frame(matrix(nrow = 101, ncol = 2))
colnames(result) <- c("i", "Ratio")
for (i in seq(0, 2, length.out = 101)){
p <- (1-pnorm(.5 + i))/(1-pnorm( i ))
result[i, 1] <- i
result[i, 2] <- p
}
result
for (i in seq(0,2, length.out = 101)){
p <- (1-pnorm(.5 + i))/(1-pnorm( i ))
print (p)
}
The first block of code yields a dataframe with 2 entries and the rest NA. The second works. Changing the seq from (0,2) to (0,3) yields a dataframe with 3 entries, (0,4) with 4 entries and so forth.
I am really struggling to understand what is causing this behavior with my code.
just use
counter<-0
for (i in seq(0, 2, length.out = 101)){
p <- (1-pnorm(.5 + i))/(1-pnorm( i ))
result[counter, 1] <- i
result[counter, 2] <- p
counter<-counter+1;
}
result

Loop for value matching won't work across data frames for multiple instances

Can anyone tell me what’s preventing this loop from running?
For each row i, in column 3 of the data frame ‘depth.df’, the loop preforms a mathematical function, using a second data frame, 'linker.df' (it multiplies i by a constant / a value from linker.df which is found by matching the value of i.
If I run the loop for a single instance of i, (lets say its = 50) it runs fine:
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] == 50]))
return(result)
}
}
>97,331
but if I run it to loop over each instance of i, it always returns an error:
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] %in% depth.df[i,3]]))
return(result)
}
}
Error in result[i] <- depth.df[i, 3] * (all_SC_bins/(depth.ea.bin.all[, :
replacement has length zero
EDIT
Here is a reproducible data set provided to illustrate data structure and issue
#make some data as an example
#make some data as an example
linker.data <- sample(x=40:50, replace = FALSE)
linker.df <- data.frame(
X = linker.data
, Y = sample(x=2000:3000, size = 11, replace = TRUE)
)
depth.df <- data.frame(
X = sample(x=9000:9999, size = 300, replace = TRUE)
, Y = sample(x=c("A","G","T","C"), size = 300, replace = TRUE)
, Z = sample(linker.data, size = 300, replace = TRUE)
)
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
result[i] <- depth.df[i,3]*(x /( linker.df [i,2][ linker.df [i,1] %in% depth.df[i,3]]))
return(result)
}
}
Error emerges because denominator returns integer(0) or numeric(0) or a FALSE result on most rows. Your loop attempts to find exact row number, i, where both dataframes' respective X and Z match. Likely, you intended where any of the rows match which would entail using a second, nested loop with an if conditional on matches.
cor.depth <- function(depth.df){
result <- seq(from=1, to=(nrow(depth.df)))
x <- 8971
for(i in 1:nrow(depth.df)){
for (j in 1:nrow(linker.df)){
if (linker.df[j,1] == depth.df[i,3]) {
result[i] <- depth.df[i,3]*(x /( linker.df[j,2]))
}
}
}
return(result)
}
Nonetheless, consider merge a more efficient, vectorized approach which matches any rows between both sets on ids. The setNames below renames columns to avoid duplicate headers:
mdf <- merge(setNames(linker.df, paste0(names(linker.df), "_l")),
setNames(depth.df, paste0(names(depth.df), "_d")),
by.x="X_l", by.y="Z_d")
mdf$result <- mdf$X_l * (8971 / mdf$Y_l)
And as comparison, the two approaches would be equivalent:
depth.df$result <- cor.depth(depth.df)
depth.df <- with(depth.df, depth.df[order(Z),]) # ORDER BY Z
mdf <- with(mdf, mdf[order(X_l),]) # ORDER BY X_L
all.equal(depth.df$result, mdf$result)
# [1] TRUE

Multiple loop Syntax Error

I cannot figure out what's going wrong with my loop and it is already too complicated for my current level. I have already tried applybut obviously I do something wrong, so I didn't use it at all.
library('wavelets')
library('benford.analysis')
indeces <- ls() # my initial datasets
wfilters <- array(c("haar","la8","d4","c6")) # filter option in "modwt" function
wfiltname <- array(c("h","l","d","c")) # to rename the new objects
for (i in 1:nrow(as.array(indeces))) {
x <- get(as.matrix(indeces[i]))
x <- x[,2]
# Creates modwt objects equal to the number of filters
for (j in 1:nrow(as.array(wfilters))) {
x <- wavelets::modwt(x, filter = wfilters[j], n.levels = 4,
boundary = "periodic")
# A loop that creates a matrix with benford fun output per modwt n.levels option
for (l in 1:4) {
x <- as.matrix(x#W$W[l]) # n.levels are represented as x#W$W1, x#W$W2,...
x <- benford.analysis::benford(x, number.of.digits = 1,
sign = "both", discrete = T,
round = 3) # accepts matrices
x[,l] <- x$bfd$data.dist # it always has 9 elements
}
assign(paste0("b", wfiltname[j], indeces[i]), x)
}
}
The above loop should be reproducible with any data (where the values are in second column). The error I get is the following:
Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), :
'data' must be of a vector type, was 'NULL'
Thanks to #Cath and #jogo I made it work after some improvements. Here's the correct code:
temp <- list.files(path = "...")
list2env(
lapply(setNames(temp, make.names(gsub("*.csv$", "", temp))),
read.csv), envir = .GlobalEnv)
rm(temp)
indeces <- ls()
wfilters <- array(c("haar","la8","d4","c6"))
wfiltname <- array(c("h","l","d","c"))
k <- data.frame(matrix(nrow = 9,ncol = 4))
nlvl <- 4
for (i in 1:length(indeces)) {
x <- as.matrix(get(indeces[i]))
for (j in 1:length(wfilters)) {
y <- wavelets::modwt(as.matrix(x), filter = wfilters[j], n.levels = nlvl,
boundary = "periodic")
y <- as.matrix(y#W)
for(m in 1:nlvl) {
z <- as.matrix(y[[m]])
z <- benford.analysis::benford(z, number.of.digits = 1, sign = "both", discrete = TRUE, round = 16)
k[m] <- as.data.frame(z$bfd$data.dist)
colnames(k)[m] <- paste0(wfilters[j], "W", m)
}
assign(paste0(indeces[i], wfiltname[j]), k)
}
}
rm(x,y,z,i,j,m,k)
I would appreciate if there is a way to write it more efficiently. Thank you very much

If error in loop create vector of "n" and continue

I have a loop in R which tests every possible combination of ARIMA with specific conditions and tests the lags. However during the loop there is an error
Error in optim(init[mask], armafn, method = optim.method, hessian = TRUE, :
non-finite finite-difference value [1]
When this error occurs I want it to create a vector of "n" which will be put into a matrix with the rest of the models. I have tried tryCatch but this for some reason stops the rest of the iterations from happening.
Here is my code:
N<- c(155782.7, 159463.7, 172741.1, 204547.2, 126049.3, 139881.9, 140747.3, 251963.0, 182444.3, 207780.8, 189251.2, 318053.7, 230569.2, 247826.8, 237019.6, 383909.5, 265145.5, 264816.4, 239607.0, 436403.1, 276767.7, 286337.9, 270022.7, 444672.9, 263717.2, 343143.9, 271701.7)
aslog<-"n"
library(gtools)
library(forecast)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML")
if(aslog=="y"){Arimafit[i,]<-c(exp(fitted(ArimaW1)))}else{Arimafit[i,]<-c(fitted(ArimaW1))}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}
I tried doing
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) NULL)
and this gave another error
Error in Arimafit[i, ] <- c(fitted(ArimaW1)) :
number of items to replace is not a multiple of replacement length
then i tried:
ArimaW1<-tryCatch(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"),error=function(e) matrix("n",ncol=length(Arimafit[1,])))
but this gave an error:
Error: $ operator is invalid for atomic vectors
and also gave a matrix with all the fitted ARIMA values up to iteration 68, after that it gives everything as 0.0
is there a way to get the loop to continue the iterations, filling a vector with a value which goes into the matrix Arimafit like the iterations that do work so that i can carry on with the code?
I just found out the way to do what i wanted to do. This may help other people so I wont delete it, ill just post the solution :)
library(gtools)
a<-permutations(n=3,r=6,v=c(0:2),repeats.allowed=TRUE)
a<-a[ifelse((a[,1]+a[,4]>2|a[,2]+a[,5]>2|a[,3]+a[,6]>2),FALSE,TRUE),]
namWA<-matrix(0,ncol=1,nrow=length(a[,1]))
namWS<-matrix(0,ncol=1,nrow=length(a[,1]))
Arimafit<-matrix(0,ncol=length(N),nrow=length(a[,1]),byrow=TRUE)
tota<-matrix(0,ncol=1,nrow=length(a[,1]))
totb<-matrix(0,ncol=1,nrow=length(a[,1]))
arimaerror<-matrix(0,ncol=length(N),nrow=1)
for(i in 1:length(a[,1])){
namWA[i]<-paste("orderWA",i,sep=".")
assign(namWA[i],a[i,c(1:3)])
namWS[i]<-paste("orderWS",i,sep=".")
assign(namWS[i],a[i,c(4:6)])
ArimaW1 <- try(Arima(N, order= a[i,c(1:3)], seasonal=list(order=a[i,c(4:6)]),method="ML"))
if(is(ArimaW1,"try-error"))
ArimaW1<-arimaerror else
ArimaW1<-ArimaW1
arimafitted<-try(fitted(ArimaW1))
if(is(arimafitted,"try-error"))
fitarima<-arimaerror else
fitarima<-arimafitted
if(aslog=="y"){Arimafit[i,]<-c(exp(fitarima))}else{Arimafit[i,]<-c(fitarima)}
nnn<-c(N)
arimab<-c(Arimafit[i,])
fullres<-nnn-arimab
v<-acf(fullres,plot=FALSE)
w<-pacf(fullres,plot=FALSE)
if(v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4))
tota[i]<-"n" else{
tota[i]<-sum(abs(v$acf[2:7]))
totb[i]<-sum(abs(w$acf[1:6]))}
}
Here is a further adaption to what i wanted to achieve
a <- permutations(n = 3, r = 6, v = c(0:2), repeats.allowed = TRUE)
a <- a[ifelse((a[, 1] + a[, 4] > 2 | a[, 2] + a[, 5] > 2 | a[, 3] + a[, 6] > 2),
FALSE, TRUE), ]
Arimafit <- matrix(0,
ncol = length(Data.new),
nrow = length(a[, 1]),
byrow = TRUE)
totb <- matrix(0, ncol = 1, nrow = length(a[, 1]))
arimaerror <- matrix(0, ncol = length(Data.new), nrow = 1)
for (i in 1:length(a[, 1])){
ArimaData.new <- try(Arima(Data.new,
order = a[i, c(1:3)],
seasonal = list(order = a[i, c(4:6)]),
method = "ML"),
silent = TRUE)
if (is(ArimaData.new, "try-error")){
ArimaData.new <- arimaerror
} else {
ArimaData.new <- ArimaData.new
}
arimafitted <- try(fitted(ArimaData.new), silent = TRUE)
if (is(arimafitted, "try-error")){
fitarima <- arimaerror
} else {
fitarima <- arimafitted
}
if (as.log == "log"){
Arimafit[i, ] <- c(exp(fitarima))
Datanew <- c(exp(Data.new))
} else {
if (as.log == "sqrt"){
Arimafit[i, ] <- c((fitarima)^2)
Datanew <- c((Data.new)^2)
} else {
Arimafit[i, ] <- c(fitarima)
Datanew <- c(Data.new)
}
}
data <- c(Datanew)
arima.fits <- c(Arimafit[i, ])
fullres <- data - arima.fits
v <- acf(fullres, plot = FALSE)
w <- pacf(fullres, plot = FALSE)
if (v$acf[2]>0.4|v$acf[2]<(-0.4)|v$acf[3]>0.4|v$acf[3]<(-0.4)|v$acf[4]>0.4|v$acf[4]<(-0.4)|v$acf[5]>0.4|v$acf[5]<(-0.4)|v$acf[6]>0.4|v$acf[6]<(-0.4)|v$acf[7]>0.4|v$acf[7]<(-0.4)|w$acf[1]>0.4|w$acf[1]<(-0.4)|w$acf[2]>0.4|w$acf[2]<(-0.4)|w$acf[3]>0.4|w$acf[3]<(-0.4)|w$acf[4]>0.4|w$acf[4]<(-0.4)|w$acf[5]>0.4|w$acf[5]<(-0.4)|w$acf[6]>0.4|w$acf[6]<(-0.4)){
totb[i] <- "n"
} else {
totb[i] <- sum(abs(w$acf[1:4]))
}
j <- match(min(totb), totb)
order.arima <- a[j, c(1:3)]
order.seasonal.arima <- a[j, c(4:6)]
}

Resources