Efficient way to turn a matrix into a triplet (i,j,v) - r

The What
Given some matrix:
mat <- matrix(1:10,ncol=2)
I want to transform it to the following triplet format: (i,j,v) where i is the row index, j is the column index and v is the value at i,j (you can see why at the bottom)
What I have tried:
matrixToTriplet <- function(mat) {
i <- 1:nrow(mat)
nj <- ncol(mat)
j <- 1:nj
output <- matrix(numeric(0), ncol=3)
for(i_ in i) {
curr <- c(rep(i_, times=nj),j,mat[i_,])
output <- rbind(output, matrix(curr, ncol=3))
}
output
}
The output should be:
> matrixToTriplet(mat)
[,1] [,2] [,3]
[1,] 1 1 1
[2,] 1 2 6
[3,] 2 1 2
[4,] 2 2 7
[5,] 3 1 3
[6,] 3 2 8
[7,] 4 1 4
[8,] 4 2 9
[9,] 5 1 5
[10,] 5 2 10
I also had another version using apply and sapply instead of for but those would explode very quickly. The kind of sizes I am working on is quite large, 1600x1600 matrices.
The Why
One might ask "why"?, the reason for this is that I need the is and js as features for a model to predict v. If there is a better way to do this I am interested to hear.

For those who really like expand.grid:
cbind(expand.grid(seq(nrow(mat)), seq(ncol(mat))), as.vector(mat))

You can do this with row and col:
x <- t(mat)
cbind(c(col(x)), c(row(x)), c(x))
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 2 6
# [3,] 2 1 2
# [4,] 2 2 7
# [5,] 3 1 3
# [6,] 3 2 8
# [7,] 4 1 4
# [8,] 4 2 9
# [9,] 5 1 5
# [10,] 5 2 10
If the row order does not matter in the final output, we can also do it with cbind(c(row(mat)), c(col(mat)), c(mat)) directly.
A benchmark will be helpful when talking about efficiency:
library(microbenchmark)
bmf <- function(mat, ...){
microbenchmark(
a = {x <- t(mat);cbind(c(col(x)), c(row(x)), c(x))},
a2 = {cbind(c(row(mat)), c(col(mat)), c(mat))},
b = {cbind(which(mat > 0, arr.ind = TRUE), val = c(mat))},
c = {cbind(expand.grid(seq(nrow(mat)), seq(ncol(mat))), as.vector(mat))},
...)
}
mat <- matrix(seq_len(10*10), 10, 10)
bmf(mat, times = 10)
# Unit: microseconds
# expr min lq mean median uq max neval
# a 7.985 9.239 18.2556 15.0415 22.756 47.065 10
# a2 4.310 4.681 5.5257 5.2405 5.755 9.099 10
# b 17.032 21.672 35.8950 28.7505 59.170 68.436 10
# c 216.101 228.736 267.7217 243.9465 288.455 380.096 10'
mat <- matrix(seq_len(1000*1000), 1000, 1000)
bmf(mat, times = 10)
# Unit: milliseconds
# expr min lq mean median uq max neval
# a 17.70805 20.51167 36.73432 21.79357 24.56775 111.6796 10
# a2 14.61793 20.95486 37.70526 25.58968 30.91322 98.44344 10
# b 41.74630 45.49698 76.61307 47.86678 122.90142 178.8363 10
# c 14.40912 17.84025 25.39672 19.29968 20.12222 85.2515 10

The simplest way would be to use which argument with arr.ind= TRUE parameter which exactly does what you want however, the issue is it expects a logical value. So we need to find a condition in which all the values turn out to be TRUE. In this case I see all values are greater than 0. So we can do
#as.vector suggested by #snoram and verified by #mt1022 that it is faster
cbind(which(mat > 0, arr.ind = TRUE), val = as.vector(mat))
# row col val
# [1,] 1 1 1
# [2,] 2 1 2
# [3,] 3 1 3
# [4,] 4 1 4
# [5,] 5 1 5
# [6,] 1 2 6
# [7,] 2 2 7
# [8,] 3 2 8
# [9,] 4 2 9
#[10,] 5 2 10
If you are not able to find any such condition which would make all the values to be TRUE we could just create a new matrix with same dimensions as mat with all values as TRUE using relist
cbind(which(relist(TRUE, mat), arr.ind = TRUE), value = as.vector(mat))
# row col value
# [1,] 1 1 1
# [2,] 2 1 2
# [3,] 3 1 3
# [4,] 4 1 4
# [5,] 5 1 5
# [6,] 1 2 6
# [7,] 2 2 7
# [8,] 3 2 8
# [9,] 4 2 9
#[10,] 5 2 10

Just for fun, here is an option using the Matrix package.
mat <- matrix(1:10,ncol=2)
#create sparse matrix
library(Matrix)
M <- Matrix(mat, sparse = TRUE)
#turn into triplet representation
M <- as(M, "TsparseMatrix")
#indices are zero-based within Matrix package
m <- cbind(M#i + 1, M#j + 1, M#x) #do you really need a matrix as output?
m[order(m[,1]),] #probably you don't need this step
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 2 6
# [3,] 2 1 2
# [4,] 2 2 7
# [5,] 3 1 3
# [6,] 3 2 8
# [7,] 4 1 4
# [8,] 4 2 9
# [9,] 5 1 5
#[10,] 5 2 10

Related

How can I replicate a column if a negative value occur in the column?

I have following matrix (example):
mat <- matrix(c(seq(8,-1),
seq(9,0),
seq(6,-3)),10,3)
Now I would like to restart the column if a negative value occur in the column.
So I would like to get following matrix:
solution_mat <- mat
solution_mat[10,1] <-8
solution_mat[8:10,3] <- 6:4
Thanks for help!
Try the following:
apply(mat, 2, function(x){
inx <- which(x < 0)
x[inx] <- x[seq_along(inx)]
x
})
# [,1] [,2] [,3]
# [1,] 8 9 6
# [2,] 7 8 5
# [3,] 6 7 4
# [4,] 5 6 3
# [5,] 4 5 2
# [6,] 3 4 1
# [7,] 2 3 0
# [8,] 1 2 6
# [9,] 0 1 5
#[10,] 8 0 4
If the numbers are strictly descending series as in your example, you could take the value of each column modulo x[1] + 1, where x[1] is the first value in the column. This has the advantage that it will continue to work if there are more negative numbers than positive ones.
apply(mat, 2, function(x) x %% (x[1] + 1))
#> [,1] [,2] [,3]
#> [1,] 8 9 6
#> [2,] 7 8 5
#> [3,] 6 7 4
#> [4,] 5 6 3
#> [5,] 4 5 2
#> [6,] 3 4 1
#> [7,] 2 3 0
#> [8,] 1 2 6
#> [9,] 0 1 5
#> [10,] 8 0 4

Creating matrix from random block rows of another matrix without loop (in R)?

I am trying to create a matrix by drawing random block rows from another matrix. I have managed to do so with a loop.
set.seed(1)
a_matrix <- matrix(1:10,10,5) # the matrix with original sample
b_matrix <- matrix(NA,10, 5) # a matrix to store the bootstrap sample
S2<- seq(from =1 , to = 10, by =2) #[1] 1 3 5 7 9
m <- 2 # block size of m
for (r in S2){ start_point<-sample(1:(nrow(a_matrix)-1), 1, replace=T)
#randomly choose a number 1 to length of a_matrix -1
b_block <- a_matrix[start_point:(start_point+(m-1)), 1:ncol(a_matrix)]
# randomly select blocks from matrix a
b_matrix[r,]<-as.matrix((b_block)[1,])
b_matrix[(r+1),]<-as.matrix((b_block)[2,]) # put the blocks into matrix b
}
b_matrix
#we now have a b_matrix that is made of random blocks (size m=2)
#of the original a_matrix
The loop method works but it is clearly not very efficient and it is not possible to extend it to other block size (for e.g. having a blocksize of 3) .What is a cleaner and expandable approach ? Thanks in advance
Here I tried to clean it up a bit and generalize the use of m:
random_block_sample <- function(a_matrix, m = 2L) {
N <- nrow(a_matrix)
stopifnot(m <= N)
n <- ceiling(N / m)
s <- sample(N - m + 1L, n, TRUE) # start_point
i <- unlist(lapply(s, seq, length.out = m))
b_matrix <- a_matrix[i, , drop = FALSE]
head(b_matrix, N)
}
set.seed(1L)
random_block_sample(a_matrix, m = 2L)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 3 3 3 3 3
# [2,] 4 4 4 4 4
# [3,] 4 4 4 4 4
# [4,] 5 5 5 5 5
# [5,] 6 6 6 6 6
# [6,] 7 7 7 7 7
# [7,] 9 9 9 9 9
# [8,] 10 10 10 10 10
# [9,] 2 2 2 2 2
# [10,] 3 3 3 3 3
set.seed(1L)
random_block_sample(a_matrix, m = 5L)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 2 2 2 2 2
# [2,] 3 3 3 3 3
# [3,] 4 4 4 4 4
# [4,] 5 5 5 5 5
# [5,] 6 6 6 6 6
# [6,] 3 3 3 3 3
# [7,] 4 4 4 4 4
# [8,] 5 5 5 5 5
# [9,] 6 6 6 6 6
# [10,] 7 7 7 7 7

Repeatedly Complementary subsets

I want to repeatedly divide a set into two complementary subsets with known size and keep them as the columns of two matrix. For example assume the main set is {1, 2, ..., 10}, the size of first sample is 8 and I want to repeat sampling 3 times. I want to have:
[,1] [,2] [,3]
[1,] 10 9 1
[2,] 8 1 10
[3,] 3 7 5
[4,] 4 2 3
[5,] 1 8 8
[6,] 6 4 2
[7,] 9 5 7
[8,] 5 10 6
and
[,1] [,2] [,3]
[1,] 2 3 4
[2,] 7 6 9
Any idea how to implement it in R avoiding for loops?
I would use replicate + sample, like this:
set.seed(1) # Just so you can replicate my results
A <- replicate(3, sample(10, 8, FALSE)) # Change 3 to the number of replications
A
# [,1] [,2] [,3]
# [1,] 3 7 8
# [2,] 4 1 9
# [3,] 5 2 4
# [4,] 7 8 6
# [5,] 2 5 7
# [6,] 8 10 2
# [7,] 9 4 3
# [8,] 6 6 1
For the other set, I would use apply + setdiff, like this:
B <- apply(A, 2, function(x) setdiff(1:10, x))
B
# [,1] [,2] [,3]
# [1,] 1 3 5
# [2,] 10 9 10
Another option as suggested by #thelatemail (which would be more efficient) is to just create use replicate to create your original matrix, and use basic subsetting to create your separate matrices.
A <- replicate(3, sample(10))
B <- A[-(seq_len(8)), ]
A <- A[seq_len(8), ]

Create a block circulant matrix in R

I am trying to create a block circulant matrix in R. The structure of a block circulant matrix is given below.
C0 C1 ... Cn-1
Cn-1 C0 C1 ... Cn-2
Cn-2 Cn-1 .... Cn-3
and so on
I have the blocks
C0 .... Cn-1
What is the easiest way to create the matrix. Is there a function already available?
Thanks for a challenging question! Here is a solution summing kronecker products of your matrices with sub- and super-diagonals.
Sample data, a list of matrices:
C <- lapply(1:3, matrix, nrow = 2, ncol = 2)
My solution:
bcm <- function(C) {
require(Matrix)
n <- length(C)
Reduce(`+`, lapply((-n+1):(n-1),
function(i) kronecker(as.matrix(bandSparse(n, n, -i)),
C[[1 + (i %% n)]])))
}
bcm(C)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 3 3 2 2
# [2,] 1 1 3 3 2 2
# [3,] 2 2 1 1 3 3
# [4,] 2 2 1 1 3 3
# [5,] 3 3 2 2 1 1
# [6,] 3 3 2 2 1 1
I don't know if this is particularly efficient, but as I interpret your question it does what you want.
rotList <- function(L,n) {
if (n==0) return(L)
c(tail(L,n),head(L,-n))
}
rowFun <- function(n,matList) do.call(rbind,rotList(matList,n))
bcMat <- function(matList) {
n <- length(matList)
do.call(cbind,lapply(0:(n-1),rowFun,matList))
}
Example:
bcMat(list(diag(3),matrix(1:9,nrow=3),matrix(4,nrow=3,ncol=3)))
I think what you are looking for is circulant.matrix from the lgcp package.
If x is a matrix whose columns are the bases of the sub-blocks of a
block circulant matrix, then this function returns the block circulant
matrix of interest.
eg
x <- matrix(1:8,ncol=4)
circulant(x)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
# [1,] 1 2 3 4 5 6 7 8
# [2,] 2 1 4 3 6 5 8 7
# [3,] 7 8 1 2 3 4 5 6
# [4,] 8 7 2 1 4 3 6 5
# [5,] 5 6 7 8 1 2 3 4
# [6,] 6 5 8 7 2 1 4 3
# [7,] 3 4 5 6 7 8 1 2
# [8,] 4 3 6 5 8 7 2 1
Alternative approach
Here is a highly inefficient approach using kronecker and Reduce
bcirc <- function(list.blocks){
P <- lapply(seq_along(list.blocks), function(x,y) x ==y, x = circulant(seq_along(list.blocks)))
Reduce('+',Map(P = P, A=list.blocks, f = function(P,A) kronecker(P,A)))
}
benchmarking with #flodel and #Ben Bolker
lbirary(microbenchmark)
microbenchmark(bcm(C), bcirc(C), bcMat(C))
Unit: microseconds
expr min lq median uq max neval
bcm(C) 10836.719 10925.7845 10992.8450 11141.1240 21622.927 100
bcirc(C) 444.983 455.7275 479.5790 487.0370 569.105 100
bcMat(C) 288.558 296.4350 309.8945 348.4215 2190.231 100
Is something like this what you are looking for?
> vec <- 1:4
> sapply(rev(seq_along(vec)),function(x) c(tail(vec,x),head(vec,-x)) )
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 2 3 4 1
[3,] 3 4 1 2
[4,] 4 1 2 3

How can I separate a matrix into smaller ones in R?

I have the following matrix
2 4 1
6 32 1
4 2 1
5 3 2
4 2 2
I want to make the following two matrices based on 3rd column
first
2 4
6 32
4 2
second
5 3
4 2
Best I can come up with, but I get an error
x <- cbind(mat[,1], mat[,2]) if mat[,3]=1
y <- cbind(mat[,1], mat[,2]) if mat[,3]=2
If mat is your matrix:
mat <- matrix(1:15,ncol=3)
mat[,3] <- c(1,1,1,2,2)
> mat
[,1] [,2] [,3]
[1,] 1 6 1
[2,] 2 7 1
[3,] 3 8 1
[4,] 4 9 2
[5,] 5 10 2
Then you can use split:
> lapply( split( mat[,1:2], mat[,3] ), matrix, ncol=2)
$`1`
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
$`2`
[,1] [,2]
[1,] 4 9
[2,] 5 10
The lapply of matrix is necessary because split drops the attributes that make a vector a matrix, so you need to add them back in.
Yet another example:
#test data
mat <- matrix(1:15,ncol=3)
mat[,3] <- c(1,1,1,2,2)
#make a list storing a matrix for each id as components
result <- lapply(by(mat,mat[,3],identity),as.matrix)
Final product:
> result
$`1`
V1 V2 V3
1 1 6 1
2 2 7 1
3 3 8 1
$`2`
V1 V2 V3
4 4 9 2
5 5 10 2
If you have a matrix A, this will get the first two columns when the third column is 1:
A[A[,3] == 1,c(1,2)]
You can use this to obtain matrices for any value in the third column.
Explanation: A[,3] == 1 returns a vector of booleans, where the i-th position is TRUE if A[i,3] is 1. This vector of booleans can be used to index into a matrix to extract the rows we want.
Disclaimer: I have very little experience with R, this is the MATLAB-ish way to do it.
split.data.frame could be used also to split a matrix.
mat <- matrix(1:15,ncol=3)
mat[,3] <- c(1,1,1,2,2)
x <- split.data.frame(mat[,-3], mat[,3])
x
#$`1`
# [,1] [,2]
#[1,] 1 6
#[2,] 2 7
#[3,] 3 8
#
#$`2`
# [,1] [,2]
#[1,] 4 9
#[2,] 5 10
str(x)
#List of 2
# $ 1: num [1:3, 1:2] 1 2 3 6 7 8
# $ 2: num [1:2, 1:2] 4 5 9 10
Or split the index and and use it in lapply to subset.
lapply(split(seq_along(mat[,3]), mat[,3]), \(i) mat[i, -3, drop=FALSE])
#$`1`
# [,1] [,2]
#[1,] 1 6
#[2,] 2 7
#[3,] 3 8
#
#$`2`
# [,1] [,2]
#[1,] 4 9
#[2,] 5 10
This is a functional version of pedrosorio's idea:
getthird <- function(mat, idx) mat[mat[,3]==idx, 1:2]
sapply(unique(mat[,3]), getthird, mat=mat) #idx gets sent the unique values
#-----------
[[1]]
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[[2]]
[,1] [,2]
[1,] 4 9
[2,] 5 10
We can use by or tapply
> by(seq_along(mat[, 3]), mat[, 3], function(k) mat[k, -3])
mat[, 3]: 1
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
------------------------------------------------------------
mat[, 3]: 2
[,1] [,2]
[1,] 4 9
[2,] 5 10
> tapply(seq_along(mat[, 3]), mat[, 3], function(k) mat[k, -3])
$`1`
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
$`2`
[,1] [,2]
[1,] 4 9
[2,] 5 10

Resources