Creating large matrix whilst manipulating columns - r

I am a bit stumped on how to create a large matrix. The code I have so far are as given below. What I desire is that the rows are positions and columns are 'beta_values' from individual text files.
#Number of files = 300
#Matrix 33000 x 300
file.list <- list.files(pattern = "txt$")
# for each file, run read.table and select only the 1,2,3,12th column
columns = c('ID','S','E','Name','Alias','version','cdrive','positional','Contour','total_Contour','M_values','beta_values')
#Number of rows in the matrix
nr=33000
mat <- matrix("numeric", nrow = nr, ncol = length(file.list))
for (i in 1:length(file.list)) {
fs <- fread(file.list[i], colClasses = columns, select=c(1,2,4,12))
# Creating Position values given by paste(fs$V1,'_', fs$V2,'_',fs$V4, sep="") and 'beta_values' given by fs$V12
fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
mat[,i] <- as.matrix(fs_reorder)
}
Error:
Error in mat[, i] <- as.matrix(fs_reorder) :
number of items to replace is not a multiple of replacement length
Quick note:
> i=1
> fs <- fread(file.list[i], select=c(1,2,4,12))
> mat <- matrix(nrow = nr, ncol = length(file.list))
> fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
> mat[,i] <- as.matrix(fs_reorder)
Error in mat[, i] <- as.matrix(fs_reorder) :
number of items to replace is not a multiple of replacement length
> mat<- as.matrix(fs_reorder)
So this works for 1 file.
Looping through the files:
file.list_main <- list.files(pattern = "txt$")
file.list = file.list_main[1:2]
n = length(position_mat)
k = length(file.list)
mat <- matrix(nrow=n, ncol=length(file.list))
for (i in 1:length(file.list)) {
fs <- fread(file.list[i], select=c(1,2,4,12))
fs_reorder <- data.frame(paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""), fs$V12)
positions = (paste(fs$V1,'_', fs$V2,'_',fs$V4, sep=""))
betas = fs$V12
for(j in 1:k){
for(i in 1:n){
mat[i,j] = (positions[i]*betas[j])
}
}
}
Error:
Error in positions[i] * betas[j] :
non-numeric argument to binary operator
For reproducible analysis, please find the example below. Any help is very much appreciated.
set.seed(20430)
n = 1000
k = 3
fileA = rnorm(n)
fileB = rnorm(n)
fileC = rnorm(n)
positions = paste("loveletters_",rnorm(n),sep="")
betas <- cbind(fileA, fileB, fileC)
for(j in 1:k){
for(i in 1:n){
x[i,j] = (positions[i]*betas[j])
}
}
Results:
Error in positions[i] * betas[j] :
non-numeric argument to binary operator
> length(positions)
[1] 1000
> ncol(betas)
[1] 3
> nrow(betas)
[1] 1000

Related

How to read 3 correlation matrix as array

I would like to read 3 independent correlation matrix in one array.
I have followed the as indicated in here
However, getting error and don’t why. I would appreciate if some one could see my code and help me.
Here are my codes and simulated data.
dataDir <- getwd()
## Each matrix is in a csv file
set.seed(22)
## m1
li.A <- matrix(rnorm(100), nrow = 20)
rownames(li.A) <- LETTERS[1:20]
colnames(li.A) <- paste0("S_", ncol = 1:5)
m1 <- cor(t(li.A))
write.csv(m1, file = “m1.csv")
# m2
set.seed(42)
pa.A <- matrix(rnorm(100), nrow = 20)
rownames(pa.A) <- LETTERS[1:20]
colnames(pa.A) <- paste0("S_", ncol = 1:5)
m2 <- cor(t(pa.A))
write.csv(m2, file = “m2.csv")
# m3
set.seed(44)
li.B <- matrix(rnorm(100), nrow = 20)
rownames(li.B) <- LETTERS[1:20]
colnames(li.B) <- paste0("S_", ncol = 1:5)
m3 <- cor(t(li.B))
write.csv(m3, file = “m3.csv")
fileList <- dir(path=dataDir,pattern = ".csv")
## Read all matrices into an array
A <- array(as.numeric(NA),dim=c(20,20,3)) # There are 3 matrices of size 20 x 20
for (i in 1:length(fileList)){
A[,,i] <- as.matrix(read.delim(file.path(dataDir,fileList[i]), sep = ';', header=TRUE, row.names=1))
}
here is the error.
Error in A[, , i] <- as.matrix(read.delim(file.path(dataDir, fileList[i]), :
replacement has length zero
Thank you!
The issue would be related to the sep = ';' instead it is sep="," and it returns a single string column instead of the multiple columns. Therefore, when we do the assignment with indexing, it showed the error
A <- array(as.numeric(NA),dim=c(20,20,3)) # There are 3 matrices of size 20 x 20
for (i in 1:length(fileList)){
A[,,i] <- as.matrix(read.delim(file.path(dataDir,fileList[i]),
sep = ',', header=TRUE, row.names=1))
}
dim(A)
#[1] 20 20 3

Get correlations for all combinations between two differently sized dataframes

Is there an R function to calculate all possible correlations and provide p-values between rows in two data frames (with similar number of columns but varying rows), similar as to the cor() function in R?
I found cor.test(), but it only takes a dataframe of similar size.
To the best of my knowledge, the function cor.test only accepts vectors of numeric values that have the same length.
You can achieve what you are looking for with, e.g., the function corrplot::cor.mtest.
Here is a reproducible example. First load the library and create the fake data...
library(corrplot)
nbgene1 <- 100
nbgene2 <- 200
n <- 10
df1 <- matrix(rnorm(nbgene1 * n), nbgene1, n)
rownames(df1) <- paste0("Df1_gene", 1:nbgene1)
colnames(df1) <- paste0("Subject", 1:n)
df2 <- matrix(rnorm(nbgene2 * n), nbgene2, n)
rownames(df2) <- paste0("Df2_gene", 1:nbgene2)
colnames(df2) <- paste0("Subject", 1:n)
The function cor.mtest only accepts a single data-frame, with individuals as rows and variables as columns, so you need to combine the two data-frames...
df_combined <- rbind(df1, df2)
... and input the transposed data-frame to cor.mtest (because in your case, rows are genes and columns are individuals).
res_cortest <- cor.mtest(t(df_combined))
Then all you need to do is extract the correct p-values from the result.
pval <- res_cortest$p[1:nbgene1, (nbgene1+1):(nbgene1+nbgene2)]
You may want to rename the rows and columns of this matrix for a more interpretable result.
dimnames(pval) <- list(rownames(df1), rownames(df2))
Also, don't forget to correct for multiple testing !
# For example with Banjamini and Hochberg's method
padj <- matrix(p.adjust(pval, "BH"), nbgene1, nbgene2, dimnames = dimnames(pval))
What's even more interesting than using cor.mtest is to look at what's inside!
> corrplot::cor.mtest
function (mat, ...)
{
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
diag(p.mat) <- 0
diag(lowCI.mat) <- diag(uppCI.mat) <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(x = mat[, i], y = mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
if (!is.null(tmp$conf.int)) {
lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
}
}
}
list(p = p.mat, lowCI = lowCI.mat, uppCI = uppCI.mat)
}
It's a simple for loop!
An equivalent of this loop in the context of our reproducible example would be...
pval <- matrix(NA, nbgene1, nbgene2,
dimnames = list(rownames(df1),
rownames(df2)))
for (i in 1:nbgene1) {
for (j in 1:nbgene2) {
pval[i, j] <- cor.test(df1[i, ], df2[j, ])$p.value
}
}
The multiple correction step is the same.

Creating or initializing an empty matrix in R

I am using R v 3.0.0 (2013-04-03) and RStudio v 1.1.463 under Win-7 64-bit.
In the following source code:
# Problem 1 - Matrix powers in R
#
# R does not have a built-in command for taking matrix powers.
# Write a function matrixpower with two arguments mat and k that
# will take integer powers k of a matrix mat.
matrixMul <- function(mat1)
{
rows <- nrow(mat1)
cols <- ncol(mat1)
matOut = matrix(, nrow = rows, ncol = cols) # empty matrix
for (i in 1:rows)
{
for(j in 1:cols)
{
vec1 <- mat1[i,]
vec2 <- mat1[,j]
mult1 <- vec1 * vec2
matOut[i,j] <- mult1
}
}
return(matOut)
}
matrixpower<-function(mat1, k)
{
matOut <-mat1#empty matix
for (i in k)
{
matOut <- matrixMul(matOut)
}
return(matOut)
}
mat1 <- matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, ncol=3)
power1 <- matrixMul(mat1)
the declaration
matOut <- matrix(, nrow = rows, ncol = cols) # empty matrix
is giving the following syntax error even before compilation:
missing argument to function call
I am following these instructions.
What am I doing wrong here?
Try this:
matOut = matrix(numeric(rows*cols), nrow = rows, ncol = cols) # empty matrix

Why calling rbind on data.frame with 0 columns drops all the rows?

I noticed a discrepancy with rbind behaviour between matrix and data.frame objects.
With matrix objects everything works as expected:
mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)
dim(rbind(mat1, mat2))
[1] 4 0
But if we turn them to data.frame all of a sudden it looses the number of rows:
> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0
What I would like to understand is - is this behaviour intentional? And if so what is the reasoning for dropping the number of rows in this situation?
EDIT: As noted by #PoGibas - this behaviour is documented in ?rbind. No reason is given and it would probably be hard to infer one. So the question becomes:
How to rbind an arbitrary number of data.frames while always preserving their number of rows?
Workaround could be to use cbind and transposition:
m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows
Here cbind creates a data.frame with 0 rows and 4 columns and we transpose it to matrix with 4 rows and 0 columns.
Another solution is just brutal modification of original base::rbind.data.frame (source on github) function.
You have to remove/comment out two parts there:
Removal of arguments if there length is not a positive integer (length(data.frame()) returns 0). Comment out this part:
allargs <- allargs[lengths(allargs) > 0L]
Return of empty data.frame if attribute names is empty (you can't set attribute to an empty data.frame - names(as.data.frame(mat1)) <- "" returns an error). Comment out this part:
if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))
Result:
m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0
Code:
rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
{
match.names <- function(clabs, nmi)
{
if(identical(clabs, nmi)) NULL
else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
## we need 1-1 matches here
m <- pmatch(nmi, clabs, 0L)
if(any(m == 0L))
stop("names do not match previous names")
m
} else stop("names do not match previous names")
}
if(make.row.names)
Make.row.names <- function(nmi, ri, ni, nrow)
{
if(nzchar(nmi)) {
if(ni == 0L) character() # PR8506
else if(ni > 1L) paste(nmi, ri, sep = ".")
else nmi
}
else if(nrow > 0L && identical(ri, seq_len(ni)) &&
identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
as.integer(seq.int(from = nrow + 1L, length.out = ni))
else ri
}
allargs <- list(...)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# allargs <- allargs[lengths(allargs) > 0L]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(length(allargs)) {
## drop any zero-row data frames, as they may not have proper column
## types (e.g. NULL).
nr <- vapply(allargs, function(x)
if(is.data.frame(x)) .row_names_info(x, 2L)
else if(is.list(x)) length(x[[1L]])
# mismatched lists are checked later
else length(x), 1L)
if(any(nr > 0L)) allargs <- allargs[nr > 0L]
else return(allargs[[1L]]) # pretty arbitrary
}
n <- length(allargs)
if(n == 0L)
return(structure(list(),
class = "data.frame",
row.names = integer()))
nms <- names(allargs)
if(is.null(nms))
nms <- character(n)
cl <- NULL
perm <- rows <- vector("list", n)
rlabs <- if(make.row.names) rows # else NULL
nrow <- 0L
value <- clabs <- NULL
all.levs <- list()
for(i in seq_len(n)) {
## check the arguments, develop row and column labels
xi <- allargs[[i]]
nmi <- nms[i]
## coerce matrix to data frame
if(is.matrix(xi)) allargs[[i]] <- xi <-
as.data.frame(xi, stringsAsFactors = stringsAsFactors)
if(inherits(xi, "data.frame")) {
if(is.null(cl))
cl <- oldClass(xi)
ri <- attr(xi, "row.names")
ni <- length(ri)
if(is.null(clabs)) ## first time
clabs <- names(xi)
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, names(xi))
if( !is.null(pi) ) perm[[i]] <- pi
}
rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
nrow <- nrow + ni
if(is.null(value)) { ## first time ==> setup once:
value <- unclass(xi)
nvar <- length(value)
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
for(j in seq_len(nvar)) {
xj <- value[[j]]
facCol[j] <-
if(!is.null(levels(xj))) {
all.levs[[j]] <- levels(xj)
TRUE # turn categories into factors
} else
is.factor(xj)
ordCol[j] <- is.ordered(xj)
has.dim[j] <- length(dim(xj)) == 2L
}
}
else for(j in seq_len(nvar)) {
xij <- xi[[j]]
if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
if(facCol[jj]) {
if(length(lij <- levels(xij))) {
all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
ordCol[jj] <- ordCol[jj] & is.ordered(xij)
} else if(is.character(xij))
all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
}
}
}
else if(is.list(xi)) {
ni <- range(lengths(xi))
if(ni[1L] == ni[2L])
ni <- ni[1L]
else stop("invalid list argument: all variables should have the same length")
rows[[i]] <- ri <-
as.integer(seq.int(from = nrow + 1L, length.out = ni))
nrow <- nrow + ni
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
if(length(nmi <- names(xi)) > 0L) {
if(is.null(clabs))
clabs <- nmi
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, nmi)
if( !is.null(pi) ) perm[[i]] <- pi
}
}
}
else if(length(xi)) { # 1 new row
rows[[i]] <- nrow <- nrow + 1L
if(make.row.names)
rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
}
}
nvar <- length(clabs)
if(nvar == 0L)
nvar <- max(lengths(allargs)) # only vector args
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# if(nvar == 0L)
# return(structure(list(), class = "data.frame",
# row.names = integer()))
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
pseq <- seq_len(nvar)
if(is.null(value)) { # this happens if there has been no data frame
value <- list()
value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
}
names(value) <- clabs
for(j in pseq)
if(length(lij <- all.levs[[j]]))
value[[j]] <-
factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
if(any(has.dim)) {
rmax <- max(unlist(rows))
for(i in pseq[has.dim])
if(!inherits(xi <- value[[i]], "data.frame")) {
dn <- dimnames(xi)
rn <- dn[[1L]]
if(length(rn) > 0L) length(rn) <- rmax
pi <- dim(xi)[2L]
length(xi) <- rmax * pi
value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
}
}
for(i in seq_len(n)) {
xi <- unclass(allargs[[i]])
if(!is.list(xi))
if(length(xi) != nvar)
xi <- rep(xi, length.out = nvar)
ri <- rows[[i]]
pi <- perm[[i]]
if(is.null(pi)) pi <- pseq
for(j in pseq) {
jj <- pi[j]
xij <- xi[[j]]
if(has.dim[jj]) {
value[[jj]][ri, ] <- xij
## copy rownames
rownames(value[[jj]])[ri] <- rownames(xij)
} else {
## coerce factors to vectors, in case lhs is character or
## level set has changed
value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
## copy names if any
if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
}
}
}
if(make.row.names) {
rlabs <- unlist(rlabs)
if(anyDuplicated(rlabs))
rlabs <- make.unique(as.character(rlabs), sep = "")
}
if(is.null(cl)) {
as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
stringsAsFactors = stringsAsFactors)
} else {
structure(value, class = cl,
row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
}
}

Errors while using cbind with a matrix

I have a list of 40 data sets who all have the same columns. I want to bind the 7th column of each data set. I thought about doing this with a matrix using cbind. This is my code:
RetRates <- function(q) {
q <- matrix(nrow = 766, ncol = length(ListeActions),
data = rep(0, 766), byrow = TRUE)
s <- 0
for (i in 1:length(ListeActions)) {
x <- ListeActions[[i]]
q[,i] <- cbind(q[,i], x[,9]) ## I need the 9th column
}
return(q)
}
Hedi <- matrix(nrow = 766, ncol = length(ListeActions),
data = rep(0, 766), byrow = TRUE)
Hedi <- RetRates(Hedi)
I get these warnings :
Warning messages: 1: In replace(q[, i], 1:766, x[, 9]) : the number
of objects to be replaced is not a multiple of the size of the
replacement !
Let's take a smaller example: cbind the 5th columns of each of these 3 matrices
d1 <- matrix(runif(30), 5, 6)
d2 <- matrix(rnorm(30), 5, 6)
d3 <- matrix(rnorm(30), 5, 6)
First we put the 3 matrices in a list
M <- list(d1=d1, d2=d2, d3=d3)
Then we could use, as in your question, a for loop
res1 <- matrix(NA, nrow=5, ncol=length(M))
for (i in 1:length(M)) {
res1[, i] <- M[[i]][,5]
}
Or we could use some magical R functions to get the result in one slightly more obscure command
res2 <- do.call(cbind, lapply(M, "[",,5))

Resources