How to read 3 correlation matrix as array - r

I would like to read 3 independent correlation matrix in one array.
I have followed the as indicated in here
However, getting error and don’t why. I would appreciate if some one could see my code and help me.
Here are my codes and simulated data.
dataDir <- getwd()
## Each matrix is in a csv file
set.seed(22)
## m1
li.A <- matrix(rnorm(100), nrow = 20)
rownames(li.A) <- LETTERS[1:20]
colnames(li.A) <- paste0("S_", ncol = 1:5)
m1 <- cor(t(li.A))
write.csv(m1, file = “m1.csv")
# m2
set.seed(42)
pa.A <- matrix(rnorm(100), nrow = 20)
rownames(pa.A) <- LETTERS[1:20]
colnames(pa.A) <- paste0("S_", ncol = 1:5)
m2 <- cor(t(pa.A))
write.csv(m2, file = “m2.csv")
# m3
set.seed(44)
li.B <- matrix(rnorm(100), nrow = 20)
rownames(li.B) <- LETTERS[1:20]
colnames(li.B) <- paste0("S_", ncol = 1:5)
m3 <- cor(t(li.B))
write.csv(m3, file = “m3.csv")
fileList <- dir(path=dataDir,pattern = ".csv")
## Read all matrices into an array
A <- array(as.numeric(NA),dim=c(20,20,3)) # There are 3 matrices of size 20 x 20
for (i in 1:length(fileList)){
A[,,i] <- as.matrix(read.delim(file.path(dataDir,fileList[i]), sep = ';', header=TRUE, row.names=1))
}
here is the error.
Error in A[, , i] <- as.matrix(read.delim(file.path(dataDir, fileList[i]), :
replacement has length zero
Thank you!

The issue would be related to the sep = ';' instead it is sep="," and it returns a single string column instead of the multiple columns. Therefore, when we do the assignment with indexing, it showed the error
A <- array(as.numeric(NA),dim=c(20,20,3)) # There are 3 matrices of size 20 x 20
for (i in 1:length(fileList)){
A[,,i] <- as.matrix(read.delim(file.path(dataDir,fileList[i]),
sep = ',', header=TRUE, row.names=1))
}
dim(A)
#[1] 20 20 3

Related

Optimizing an R code with For Loop with matrix

I need to convert the following code to one with for loop, what is the easiest way to do it?
set.seed(123)
iter <- 1000
s1 <- 2
mat1 <- matrix(data = rcauchy(iter*s1,0,1),nrow = iter,ncol = s1)
sets1 <- apply(mat1,1,median)
hist(sets1)
s2 <- 5
mat2 <- matrix(data = rcauchy(iter*s2,0,1),nrow = iter,ncol = s2)
sets2 <- apply(mat2,1,median)
hist(sets2)
s3 <- 10
mat3 <- matrix(data = rcauchy(iter*s3,0,1),nrow = iter,ncol = s3)
sets3 <- apply(mat3,1,median)
hist(sets3)
s4 <-20
mat4 <- matrix(data = rcauchy(iter*s4,0,1),nrow = iter,ncol = s4)
sets4 <- apply(mat4,1,median)
hist(sets4)
I tried the following:
set.seed(1234)
iter <- 1000
size <- c(2,5,10,20)
for(i in 2:size){
for (j in 1:iter){
mat[] <- matrix(data = rcauchy(i*j,0,1),nrow=iter,ncol=i)
s <- apply(mat,1,median)
hist(s)
}
}
But it does not work, please help
The easies way is to wrap the creation of the matrix into a lapply function.
set.seed(123)
iter <- 1000
size <- c(2,5,10,20)
returnmatrix<-lapply(size, function(i){
mat<-matrix(data = rcauchy(i*iter,0,1),nrow=iter,ncol=i)
s <- apply(mat,1,median)
hist(s, main=paste("Histogram when S=", i))
mat
})
The lapply function will plot the histograms and will return the matrixes as list if additional processing is desired.

Trying to perform MNIST example using data locally in R and TensorFlow

I am trying to perform this MNIST example, but instead of using
input_dataset <- tf$examples$tutorials$mnist$input_data
mnist <- input_dataset$read_data_sets("MNIST-data", one_hot = TRUE)
I am trying to import the dataset locally in my Documents/MNIST/MNIST-data folder.
The first attempt, I tried from an example:
images <- file("t10k-images-idx3-ubyte", "rb")
readBin(images, integer(), n=4, endian="big")
m = matrix(readBin(images, integer(), size=1, n=28*28, endian="big"),28,28)
image(m)
par(mfrow=c(5,5))
par(mar=c(0,0,0,0)
for(i in 1:25){m = matrix(readBin(images,integer(), size=1, n=28*28, endian="big"),28,28);image(m[,28:1])}
I wasn't sure how to proceed from there, so I tried something another example I found on GitHub:
load.mnist <- function(dir) {
load.image.file <- function(filename) {
ret <- list()
f <- file(filename,'rb')
readBin(f,'integer',n=1,size=4,endian='big')
ret$n <- readBin(f,'integer',n=1,size=4,endian='big')
nrow <- readBin(f,'integer',n=1,size=4,endian='big')
ncol <- readBin(f,'integer',n=1,size=4,endian='big')
x <- readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F)
ret$x <- matrix(x, ncol=nrow*ncol, byrow=T)
close(f)
ret
}
load.label.file <- function(filename) {
f = file(filename,'rb')
readBin(f,'integer',n=1,size=4,endian='big')
n = readBin(f,'integer',n=1,size=4,endian='big')
y = readBin(f,'integer',n=n,size=1,signed=F)
close(f)
y
}
mnist <- list()
mnist$train <- load.image.file(paste(dir,'/train-images-idx3-ubyte',sep=""))
mnist$test <- load.image.file(paste(dir,'/t10k-images-idx3-ubyte',sep=""))
mnist$train$y <- load.label.file(paste(dir,'/train-labels-idx1-ubyte',sep=""))
n <- length(mnist$train$y)
mnist$train$yy <- matrix(rep(0,n*10),nrow=n,ncol=10)
for (i in 1:n){
mnist$train$yy[i,mnist$train$y[i] + 1] <- 1
}
mnist$test$y <- load.label.file(paste(dir,'/t10k-labels-idx1-ubyte',sep=""))
m <- length(mnist$test$y)
mnist$test$yy <- matrix(rep(0,m*10),nrow=m,ncol=10)
for (j in 1:m){
mnist$test$yy[j,mnist$test$y[j] + 1] <- 1
}
mnist
}
show.digit <- function(arr784, col=gray(12:1/12), ...) {
image(matrix(arr784, nrow=28)[,28:1], col=col, ...)
}
show_digit(train$x[5,])
Again, I wasn't sure how to proceed or how that could be implemented in the MNIST For ML Beginners example.
The third attempt I've tried was from the Iris dataset TF example:
library(tensorflow)
library(tfdatasets)
mnist_train_spec <- csv_record_spec("mnist_train.csv")
dataset <- text_line_dataset("mnist_train.csv", record_spec = mnist_train_spec)
str(dataset)
mnist_train_spec <- csv_record_spec(
names = c("label", paste("P", as.character(c(2:785)), sep = ""))
)
This last attempt throws out an error - Error in delim_record_spec(example_file, delim = ",", skip, names, types, :
You must provide an example_file if you don't explicitly specify names and types (or defaults)
My main question is, how can I locally import MNIST data files into R, and still be able to run the MNIST example without using the two lines of code I first mentioned?

Creating large matrix whilst manipulating columns

I am a bit stumped on how to create a large matrix. The code I have so far are as given below. What I desire is that the rows are positions and columns are 'beta_values' from individual text files.
#Number of files = 300
#Matrix 33000 x 300
file.list <- list.files(pattern = "txt$")
# for each file, run read.table and select only the 1,2,3,12th column
columns = c('ID','S','E','Name','Alias','version','cdrive','positional','Contour','total_Contour','M_values','beta_values')
#Number of rows in the matrix
nr=33000
mat <- matrix("numeric", nrow = nr, ncol = length(file.list))
for (i in 1:length(file.list)) {
fs <- fread(file.list[i], colClasses = columns, select=c(1,2,4,12))
# Creating Position values given by paste(fs$V1,'_', fs$V2,'_',fs$V4, sep="") and 'beta_values' given by fs$V12
fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
mat[,i] <- as.matrix(fs_reorder)
}
Error:
Error in mat[, i] <- as.matrix(fs_reorder) :
number of items to replace is not a multiple of replacement length
Quick note:
> i=1
> fs <- fread(file.list[i], select=c(1,2,4,12))
> mat <- matrix(nrow = nr, ncol = length(file.list))
> fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
> mat[,i] <- as.matrix(fs_reorder)
Error in mat[, i] <- as.matrix(fs_reorder) :
number of items to replace is not a multiple of replacement length
> mat<- as.matrix(fs_reorder)
So this works for 1 file.
Looping through the files:
file.list_main <- list.files(pattern = "txt$")
file.list = file.list_main[1:2]
n = length(position_mat)
k = length(file.list)
mat <- matrix(nrow=n, ncol=length(file.list))
for (i in 1:length(file.list)) {
fs <- fread(file.list[i], select=c(1,2,4,12))
fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
positions = (paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""))
betas = fs$V12
for(j in 1:k){
for(i in 1:n){
mat[i,j] = (positions[i]*betas[j])
}
}
}
Error:
Error in positions[i] * betas[j] :
non-numeric argument to binary operator
For reproducible analysis, please find the example below. Any help is very much appreciated.
set.seed(20430)
n = 1000
k = 3
fileA = rnorm(n)
fileB = rnorm(n)
fileC = rnorm(n)
positions = paste("loveletters_",rnorm(n),sep="")
betas <- cbind(fileA, fileB, fileC)
for(j in 1:k){
for(i in 1:n){
x[i,j] = (positions[i]*betas[j])
}
}
Results:
Error in positions[i] * betas[j] :
non-numeric argument to binary operator
> length(positions)
[1] 1000
> ncol(betas)
[1] 3
> nrow(betas)
[1] 1000

Read large number of .asc-files, delete rows and save as raster in R

I need to read a large number of .asc-files, delete rows and transform them to a raster-stack by stack(). The source of the zip-packed data is here: ftp://ftp.dwd.de/pub/CDC/grids_germany/monthly/radiation_global/
I've already unziped the files. But now I wrote this code that's really slow, my computer can't accomplished it:
files <- list.files("mydirectory", pattern="\\.asc$",
full.names=TRUE, recursive=TRUE)
i <- lapply(files, readLines) #read data
i <- lapply(i, function(x){x[-(1:28)]}) #delete rows
i <- lapply(i, function(x){gsub('-999', ' NA ', x, fixed = TRUE)}) #convert '-999' to NA
i <- lapply(i, function(x){scan(textConnection(x), what = double(), n = 866*654)}) #convert to numeric
i <- lapply(i, function(x){matrix(x, nrow = 866, ncol = 654, byrow = TRUE)}) #convert to matrix
r <- lapply(i, function(x){raster(x)}) #rasterize data
st <- stack(r) #convert to stack-raster
I wonder if there is a better way to convert this data to raster-files. Other .asc files have got only 6 lines as an header like here: ftp://ftp.dwd.de/pub/CDC/grids_germany/monthly/precipitation/01_Jan/ . I read that data by a much simpler function which use only the stack()-function:
loadRaster <- function(directory, output, clipping){
files <- list.files(path=directory, pattern="\\.asc$",
full.names=TRUE, recursive=TRUE)
stc <- stack(files)
crs(stc) <- gk3
stcC <- crop(stc, extent(clipping))
writeRaster(stcC, filename=output)
}
#You can ignore the code below "stc <-stack(files)"
Finally I got it by using textConnection() stepwise (opening and closing again), because – and there was maybe the cause for slowness – there is a limit for opened connections.
files <- list.files(path="mydirectory", pattern="\\.asc$",
full.names=TRUE, recursive=TRUE)
i <- lapply(files, readLines)
i <- lapply(i, function(x){x[-(1:28)]})
i <- lapply(i, function(x){gsub('-999', ' NA ', x, fixed = TRUE)})
names(i) <- substr(files, 92,97)
i1 <- lapply(i[1:100], function(x){scan(textConnection(x), what = double(), n = 866*654)})
closeAllConnections()
i2 <- lapply(i[101:200], function(x){scan(textConnection(x), what = double(), n = 866*654)})
closeAllConnections()
i3 <- lapply(i[201:300], function(x){scan(textConnection(x), what = double(), n = 866*654)})
closeAllConnections()
i4 <- lapply(i[301:length(i)], function(x){scan(textConnection(x), what = double(), n = 866*654)})
closeAllConnections()
i <- c(i1, i2, i3, i4)
m <- lapply(i, function(x){matrix(x, nrow = 866, ncol = 654, byrow = TRUE)})
r <- lapply(m, function(x){raster(x)})
stc <- stack(r)

Errors while using cbind with a matrix

I have a list of 40 data sets who all have the same columns. I want to bind the 7th column of each data set. I thought about doing this with a matrix using cbind. This is my code:
RetRates <- function(q) {
q <- matrix(nrow = 766, ncol = length(ListeActions),
data = rep(0, 766), byrow = TRUE)
s <- 0
for (i in 1:length(ListeActions)) {
x <- ListeActions[[i]]
q[,i] <- cbind(q[,i], x[,9]) ## I need the 9th column
}
return(q)
}
Hedi <- matrix(nrow = 766, ncol = length(ListeActions),
data = rep(0, 766), byrow = TRUE)
Hedi <- RetRates(Hedi)
I get these warnings :
Warning messages: 1: In replace(q[, i], 1:766, x[, 9]) : the number
of objects to be replaced is not a multiple of the size of the
replacement !
Let's take a smaller example: cbind the 5th columns of each of these 3 matrices
d1 <- matrix(runif(30), 5, 6)
d2 <- matrix(rnorm(30), 5, 6)
d3 <- matrix(rnorm(30), 5, 6)
First we put the 3 matrices in a list
M <- list(d1=d1, d2=d2, d3=d3)
Then we could use, as in your question, a for loop
res1 <- matrix(NA, nrow=5, ncol=length(M))
for (i in 1:length(M)) {
res1[, i] <- M[[i]][,5]
}
Or we could use some magical R functions to get the result in one slightly more obscure command
res2 <- do.call(cbind, lapply(M, "[",,5))

Resources