Say I have a list of indices, like:
l <- list(c(1,2,3), c(1), c(1,5), c(2, 3, 5))
Which specify the non-zero elements in a matrix, like:
(m <- matrix(c(1,1,1,0,0, 1,0,0,0,0, 1,0,0,0,5, 0,1,1,0,1), nrow=4, byrow=TRUE))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 0 0
[2,] 1 0 0 0 0
[3,] 1 0 0 0 5
[4,] 0 1 1 0 1
What is the fastest way, using R, to make m from l, giving that the matrix is very big, say 50.000 rows and 2000 columns?
Try
d1 <- stack(setNames(l, seq_along(l)))
library(Matrix)
m1 <- sparseMatrix(as.numeric(d1[,2]), d1[,1], x=1)
as.matrix(m1)
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 1 1 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 1
#[4,] 0 1 1 0 1
Or instead of stack, we could use melt
library(reshape2)
d2 <- melt(l)
sparseMatrix(d2[,2], d2[,1],x=1)
Or using only base R
Un1 <- unlist(l)
m1 <- matrix(0, nrow=length(l), ncol=max(Un1))
m1[cbind(as.numeric(d1$ind), d1$values)] <- 1
m1
For me, the following is at least 3 times faster than the suggestions above, on data the size as specified in the question (5e4 x 2e3):
unlist_l <- unlist(l)
M <- matrix(0, nrow = length(l), ncol = max(unique(unlist_l)))
ij <- cbind(rep(1:length(l), lengths(l)), unlist_l)
M[ij] <- 1
Performance might depend on data size and degree of sparsity.
Related
How do I make a matrix with the sequence 1:16, where all values except 2,3,6,9 and 16 are equal to 0?
I've tried a bunch of different things.
You could do this:
m1 <- 1:16
m2 <- rep(0, 16)
indices <- c(2,3,6,9,16)
m2[indices] <- m1[indices]
matrix(m2, nrow = 4, byrow = TRUE)
# [,1] [,2] [,3] [,4]
# [1,] 0 2 3 0
# [2,] 0 6 0 0
# [3,] 9 0 0 0
# [4,] 0 0 0 16
Depends if the values you want to be non-zero are always going to be at their indices when the matrix is created by row.
You can generalise this method into a function:
create_matrix <- function(max_val, nrow, non_zero_indices) {
m1 <- 1:max_val
m2 <- rep(0, max_val)
m2[non_zero_indices] <- m1[non_zero_indices]
matrix(m2, nrow = nrow, byrow = TRUE)
}
create_matrix(16,4, c(2,3,6,9,16))
# [,1] [,2] [,3] [,4]
# [1,] 0 2 3 0
# [2,] 0 6 0 0
# [3,] 9 0 0 0
# [4,] 0 0 0 16
#akrun's suggestion in the comments will also work if you add byrow=TRUE, so it looks like:
matrix(replace(1:16, !1:16 %in% c(2, 3, 6, 9, 16), 0), 4, 4, byrow=TRUE)
It's a matter of taste.
EDIT: Generation of indices
No one asked for this but I noticed that your indices follow a sequence - specifically they are OEIS A081660 + 1. So instead of typing them directly you could generate them with:
get_indices <- function(n) {
2^(n+1)/3+n+(-1)^n/3 + 1
}
get_indices(0:4)
# [1] 2 3 6 9 16
Suppose that we have a (4,4) matrix. My goal is to change iteratively that cells (1,1),(2,1),(3,1),(1,2),(2,2),(1,3)
I wrote the following
for(i in 1:3){
for(j in 1:3){
if(i>j){
A[i,j] = A[i,j] + sample(c(-1,1),prob=c(0.5,0.5))
}
}
However, it doesn't change the correct cells and misses cells that have to be changed.
The matrix A can be of the form
A = matrix(c(1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0),4,4,byrow=T)
I think that the following chunk of code might be the solution, at least it gives the correct answer for a few runs that I did.
A = matrix(c(1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0),4,4,byrow=T)
k = 0
for(i in 1:3){
for(j in 1:(3-k)){
A[i,j] = A[i,j] + sample(c(-1,1),prob=c(0.5,0.5), size = 1)
}
k = k + 1
}
I think you simple forgot to set the size= parameter of sample to get one draw of the Rademacher universe.
set.seed(42)
for (i in 1:3) {
for (j in 1:3) {
if (i > j) {
A[i, j] <- A[i, j] + sample(c(-1, 1), size=1, prob=c(0.5, 0.5))
}
}
}
A
# [,1] [,2] [,3] [,4]
# [1,] 1 1 1 1
# [2,] 0 1 1 0
# [3,] 0 2 0 0
# [4,] 1 0 0 0
Another idea is to use a permutation matrix, which you may subset to your needs, and over which you may loop.
id <- RcppAlgos::permuteGeneral(ncol(B) - 1, ncol(B) - 2, repetition=T)
(id <- id[c(1, 4, 7, 2, 5, 3), ])
# [,1] [,2]
# [1,] 1 1
# [2,] 2 1
# [3,] 3 1
# [4,] 1 2
# [5,] 2 2
# [6,] 1 3
set.seed(42)
for (i in 1:nrow(id)) {
A[id[i, 1], id[i, 2]] <- A[id[i, 1], id[i, 2]] +
sample(c(-1, 1), size=1, prob=c(0.5, 0.5))
}
A
# [,1] [,2] [,3] [,4]
# [1,] 0 0 0 1
# [2,] 0 0 1 0
# [3,] 2 1 0 0
# [4,] 1 0 0 0
We can create a row/column index (vectorized approach) by cbinding the vector of index. Use the index to subset the cells of the matrix and assign (<-) after adding the sample output to those elements
n <- 3
j1 <- rep(seq_len(n), rev(seq_len(n)))
i1 <- ave(j1, j1, FUN = seq_along)
ind <- cbind(i1, j1)
ind
# i1 j1
#[1,] 1 1
#[2,] 2 1
#[3,] 3 1
#[4,] 1 2
#[5,] 2 2
#[6,] 1 3
A[ind] <- A[ind] + sample(c(-1,1),prob=c(0.5,0.5),
size = nrow(ind), replace= TRUE)
I am trying to create the following matrix A for n rows and n+1 columns. n will likely be around 20 or 30, but for the purpose of the question I put it at 4 and 5.
Here is what I have so far:
N <- 5 # n+1
n <- 4 # n
columns <- list()
# first column:
columns[1] <- c(-1, 1, rep(0, N-2))
# all other columns:
for(i in N:2) {
columns[i] <- c((rep(0, N-i), 1, -2, 1, rep(0, i-3)))
}
# combine into matrix:
A <- cbind(columns)
I keep getting the following error msg:
In columns[1] <- c(-1, 1, rep(0, N - 2)) :
number of items to replace is not a multiple of replacement length
And later
"for(i in N:2) {
columns[i] <- c((rep(0, N-i),"
}
Error: unexpected '}' in "}"
I guess you can try the for loop below to create your matrix A:
N <- 5
n <- 4
A <- matrix(0,n,N)
for (i in 1:nrow(A)) {
if (i == 1) {
A[i,1:2] <- c(-1,1)
} else {
A[i,i+(-1:1)] <- c(1,-2,1)
}
}
such that
> A
[,1] [,2] [,3] [,4] [,5]
[1,] -1 1 0 0 0
[2,] 1 -2 1 0 0
[3,] 0 1 -2 1 0
[4,] 0 0 1 -2 1
Another solution is to use outer, and this method would be faster and looks more compact than the for loop approach, i.e.,
A <- `diag<-`(replace(z<-abs(outer(1:n,1:N,"-")),!z %in% c(0,1),0),
c(-1,rep(-2,length(diag(z))-1)))
I thought this would be fast compared to the loop, but when I tested on a 5000x5001 example, the loop in ThomasIsCoding's answer was about 5x faster. Go with that one!
N = 5
n = N - 1
A = matrix(0, nrow = n, ncol = N)
delta = row(A) - col(A)
diag(A) = -2
A[delta %in% c(1, -1)] = 1
A[1, 1] = -1
A
# [,1] [,2] [,3] [,4] [,5]
# [1,] -1 1 0 0 0
# [2,] 1 -2 1 0 0
# [3,] 0 1 -2 1 0
# [4,] 0 0 1 -2 1
You could use data.table::shift to shift the vector c(1, -2, 1, 0) by all increments from -1 (backwards shift / lead by 1) to n - 1 (forward shift / lagged by n - 1) and then cbind all the shifted outputs together. The first-row first-column element doesn't follow this pattern so that's fixed at the end.
library(data.table)
out <- do.call(cbind, shift(c(1, -2, 1, 0), seq(-1, n - 1), fill = 0))
out[1, 1] <- -1
out
# [,1] [,2] [,3] [,4] [,5]
# [1,] -1 1 0 0 0
# [2,] 1 -2 1 0 0
# [3,] 0 1 -2 1 0
# [4,] 0 0 1 -2 1
I have a set of vectors, and would like to stack them on top of each other to create diagonal entries to a matrix.
What would be an easy way to create the example_out matrix from c1 and c2?
c1 <- seq(1, 4)
c2 <- seq(5, 8)
example_out <- matrix(c(1,0,0,0,5,2,0,0,0,6,3,0,0,0,7,4,0,0,0,8), nrow=5, byrow=T)
example_out
Create a matrix out of 0s, then fill main diagonal with c1 and sub diagonal with c2.
example_out <- matrix(rep(0, 20), 5, 4)
diag(example_out) <- c1
diag(example_out[-1, ]) <- c2
Yielding
> example_out
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
[2,] 5 2 0 0
[3,] 0 6 3 0
[4,] 0 0 7 4
[5,] 0 0 0 8
Data
c1 <- seq(1, 4)
c2 <- seq(5, 8)
Another simple approach, augmenting two diagonal matrices with 0 rows and adding them:
rbind(diag(c1), 0) + rbind(0, diag(c2))
# [,1] [,2] [,3] [,4]
# [1,] 1 0 0 0
# [2,] 5 2 0 0
# [3,] 0 6 3 0
# [4,] 0 0 7 4
# [5,] 0 0 0 8
Here is an alternative approach replaceing entries in a numeric vector before casting as matrix
matrix(unlist(sapply(seq_along(c1), function(i)
replace(rep(0, length(c1) + 1), i:(i+1), c(c1[i], c2[i])))),
ncol = length(c1))
# [,1] [,2] [,3] [,4]
#[1,] 1 0 0 0
#[2,] 5 2 0 0
#[3,] 0 6 3 0
#[4,] 0 0 7 4
#[5,] 0 0 0 8
Update
I was curious to see how the different methods compared in terms of performance/runtime. Here is a short microbenchmark analysis using two larger vectors c1 and c2.
set.seed(2017)
c1 <- sample(1000)
c2 <- sample(1000)
library(microbenchmark)
library(Matrix)
res <- microbenchmark(
method_jaySF = {
example_out <- matrix(0, length(c1) + 1, length(c2))
diag(example_out) <- c1
diag(example_out[-1, ]) <- c2
},
method_Roland = {
bandSparse(length(c1) + 1, length(c2), 0:-1, list(c1, c2))
},
method_Onyambu = {
a = matrix(0,length(c1)+1,length(c2))
a[row(a)==col(a)]=c1
a[row(a)==col(a)+1]=c2
},
method_Gregor = {
rbind(diag(c1), 0) + rbind(0, diag(c2))
},
method_Maurits = {
matrix(unlist(sapply(seq_along(c1), function(i)
replace(rep(0, length(c1) + 1), i:(i+1), c(c1[i], c2[i])))),
ncol = length(c1))
}
)
res;
#Unit: microseconds
# expr min lq mean median uq max
# method_jaySF 31894.439 37850.81 58452.41 40560.992 46224.579 208862.631
# method_Roland 940.535 1342.32 1675.29 1457.928 1869.621 8228.287
# method_Onyambu 55033.797 66083.67 124364.44 73143.798 195886.534 274383.132
# method_Gregor 37784.977 44049.87 69918.85 47539.793 53122.162 243774.715
# method_Maurits 14961.924 21378.77 42834.89 23536.966 27270.953 186088.146
autoplot(res)
You should create a sparse matrix. Use the Matrix package:
c1 <- seq(1, 4)
c2 <- seq(5, 8)
library(Matrix)
bandSparse(5, 4, 0:-1, list(c1, c2))
#5 x 4 sparse Matrix of class "dgCMatrix"
#
#[1,] 1 . . .
#[2,] 5 2 . .
#[3,] . 6 3 .
#[4,] . . 7 4
#[5,] . . . 8
a = matrix(0,length(c1)+1,length(c2))
a[row(a)==col(a)]=c1
a[row(a)==col(a)+1]=c2
a
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
[2,] 5 2 0 0
[3,] 0 6 3 0
[4,] 0 0 7 4
[5,] 0 0 0 8
I am looking for an easier way to do the following:
m <- matrix(0, nrow=3, 3)
v <- c(1, 3, 2)
for (i in 1:nrow(m)) {
m[[i, v[i]]] = 1
}
The above code creates the following index matrix:
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 0 1
[3,] 0 1 0
There surely must be a better way to do this?!
One way to do it without pre-defining the matrix would be to use outer:
num.col <- 3
outer(v, seq_len(num.col), "==") * 1
# [,1] [,2] [,3]
# [1,] 1 0 0
# [2,] 0 0 1
# [3,] 0 1 0