Create matrix with for-loop - r

I am trying to create the following matrix A for n rows and n+1 columns. n will likely be around 20 or 30, but for the purpose of the question I put it at 4 and 5.
Here is what I have so far:
N <- 5 # n+1
n <- 4 # n
columns <- list()
# first column:
columns[1] <- c(-1, 1, rep(0, N-2))
# all other columns:
for(i in N:2) {
columns[i] <- c((rep(0, N-i), 1, -2, 1, rep(0, i-3)))
}
# combine into matrix:
A <- cbind(columns)
I keep getting the following error msg:
In columns[1] <- c(-1, 1, rep(0, N - 2)) :
number of items to replace is not a multiple of replacement length
And later
"for(i in N:2) {
columns[i] <- c((rep(0, N-i),"
}
Error: unexpected '}' in "}"

I guess you can try the for loop below to create your matrix A:
N <- 5
n <- 4
A <- matrix(0,n,N)
for (i in 1:nrow(A)) {
if (i == 1) {
A[i,1:2] <- c(-1,1)
} else {
A[i,i+(-1:1)] <- c(1,-2,1)
}
}
such that
> A
[,1] [,2] [,3] [,4] [,5]
[1,] -1 1 0 0 0
[2,] 1 -2 1 0 0
[3,] 0 1 -2 1 0
[4,] 0 0 1 -2 1
Another solution is to use outer, and this method would be faster and looks more compact than the for loop approach, i.e.,
A <- `diag<-`(replace(z<-abs(outer(1:n,1:N,"-")),!z %in% c(0,1),0),
c(-1,rep(-2,length(diag(z))-1)))

I thought this would be fast compared to the loop, but when I tested on a 5000x5001 example, the loop in ThomasIsCoding's answer was about 5x faster. Go with that one!
N = 5
n = N - 1
A = matrix(0, nrow = n, ncol = N)
delta = row(A) - col(A)
diag(A) = -2
A[delta %in% c(1, -1)] = 1
A[1, 1] = -1
A
# [,1] [,2] [,3] [,4] [,5]
# [1,] -1 1 0 0 0
# [2,] 1 -2 1 0 0
# [3,] 0 1 -2 1 0
# [4,] 0 0 1 -2 1

You could use data.table::shift to shift the vector c(1, -2, 1, 0) by all increments from -1 (backwards shift / lead by 1) to n - 1 (forward shift / lagged by n - 1) and then cbind all the shifted outputs together. The first-row first-column element doesn't follow this pattern so that's fixed at the end.
library(data.table)
out <- do.call(cbind, shift(c(1, -2, 1, 0), seq(-1, n - 1), fill = 0))
out[1, 1] <- -1
out
# [,1] [,2] [,3] [,4] [,5]
# [1,] -1 1 0 0 0
# [2,] 1 -2 1 0 0
# [3,] 0 1 -2 1 0
# [4,] 0 0 1 -2 1

Related

Making a matrix in R without typing out specific elements, excluding some

How do I make a matrix with the sequence 1:16, where all values except 2,3,6,9 and 16 are equal to 0?
I've tried a bunch of different things.
You could do this:
m1 <- 1:16
m2 <- rep(0, 16)
indices <- c(2,3,6,9,16)
m2[indices] <- m1[indices]
matrix(m2, nrow = 4, byrow = TRUE)
# [,1] [,2] [,3] [,4]
# [1,] 0 2 3 0
# [2,] 0 6 0 0
# [3,] 9 0 0 0
# [4,] 0 0 0 16
Depends if the values you want to be non-zero are always going to be at their indices when the matrix is created by row.
You can generalise this method into a function:
create_matrix <- function(max_val, nrow, non_zero_indices) {
m1 <- 1:max_val
m2 <- rep(0, max_val)
m2[non_zero_indices] <- m1[non_zero_indices]
matrix(m2, nrow = nrow, byrow = TRUE)
}
create_matrix(16,4, c(2,3,6,9,16))
# [,1] [,2] [,3] [,4]
# [1,] 0 2 3 0
# [2,] 0 6 0 0
# [3,] 9 0 0 0
# [4,] 0 0 0 16
#akrun's suggestion in the comments will also work if you add byrow=TRUE, so it looks like:
matrix(replace(1:16, !1:16 %in% c(2, 3, 6, 9, 16), 0), 4, 4, byrow=TRUE)
It's a matter of taste.
EDIT: Generation of indices
No one asked for this but I noticed that your indices follow a sequence - specifically they are OEIS A081660 + 1. So instead of typing them directly you could generate them with:
get_indices <- function(n) {
2^(n+1)/3+n+(-1)^n/3 + 1
}
get_indices(0:4)
# [1] 2 3 6 9 16

Changing particular cells of a Matrix iteratively in R

Suppose that we have a (4,4) matrix. My goal is to change iteratively that cells (1,1),(2,1),(3,1),(1,2),(2,2),(1,3)
I wrote the following
for(i in 1:3){
for(j in 1:3){
if(i>j){
A[i,j] = A[i,j] + sample(c(-1,1),prob=c(0.5,0.5))
}
}
However, it doesn't change the correct cells and misses cells that have to be changed.
The matrix A can be of the form
A = matrix(c(1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0),4,4,byrow=T)
I think that the following chunk of code might be the solution, at least it gives the correct answer for a few runs that I did.
A = matrix(c(1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0),4,4,byrow=T)
k = 0
for(i in 1:3){
for(j in 1:(3-k)){
A[i,j] = A[i,j] + sample(c(-1,1),prob=c(0.5,0.5), size = 1)
}
k = k + 1
}
I think you simple forgot to set the size= parameter of sample to get one draw of the Rademacher universe.
set.seed(42)
for (i in 1:3) {
for (j in 1:3) {
if (i > j) {
A[i, j] <- A[i, j] + sample(c(-1, 1), size=1, prob=c(0.5, 0.5))
}
}
}
A
# [,1] [,2] [,3] [,4]
# [1,] 1 1 1 1
# [2,] 0 1 1 0
# [3,] 0 2 0 0
# [4,] 1 0 0 0
Another idea is to use a permutation matrix, which you may subset to your needs, and over which you may loop.
id <- RcppAlgos::permuteGeneral(ncol(B) - 1, ncol(B) - 2, repetition=T)
(id <- id[c(1, 4, 7, 2, 5, 3), ])
# [,1] [,2]
# [1,] 1 1
# [2,] 2 1
# [3,] 3 1
# [4,] 1 2
# [5,] 2 2
# [6,] 1 3
set.seed(42)
for (i in 1:nrow(id)) {
A[id[i, 1], id[i, 2]] <- A[id[i, 1], id[i, 2]] +
sample(c(-1, 1), size=1, prob=c(0.5, 0.5))
}
A
# [,1] [,2] [,3] [,4]
# [1,] 0 0 0 1
# [2,] 0 0 1 0
# [3,] 2 1 0 0
# [4,] 1 0 0 0
We can create a row/column index (vectorized approach) by cbinding the vector of index. Use the index to subset the cells of the matrix and assign (<-) after adding the sample output to those elements
n <- 3
j1 <- rep(seq_len(n), rev(seq_len(n)))
i1 <- ave(j1, j1, FUN = seq_along)
ind <- cbind(i1, j1)
ind
# i1 j1
#[1,] 1 1
#[2,] 2 1
#[3,] 3 1
#[4,] 1 2
#[5,] 2 2
#[6,] 1 3
A[ind] <- A[ind] + sample(c(-1,1),prob=c(0.5,0.5),
size = nrow(ind), replace= TRUE)

How to write the function to create a diagonal matrix from upper right to lower left in R?

I would like to write one function whose input is a square matrix, and it returns a square matrix whose numbers from the upper right corner down to lower left corner are preserved and other numbers are zero.
For example
suppose A is a 4*4 matrix in the following.(sorry I do not know how to type the matrix expression)
[1,2,3,4]
[5,6,7,8]
[9,10,11,12]
[13,14,15,16]
How can I write a function in R without any loops to transform the matrix into this?
[0,0,0,4]
[0,0,7,0]
[0,10,0,0]
[13,0,0,0]
This feels like a gymnastics exercise...
xy <- matrix(1:16, ncol = 4, byrow = TRUE)
xy <- apply(xy, MARGIN = 1, rev)
xy[lower.tri(xy)] <- 0
xy[upper.tri(xy)] <- 0
t(apply(xy, MARGIN = 1, rev))
[,1] [,2] [,3] [,4]
[1,] 0 0 0 4
[2,] 0 0 7 0
[3,] 0 10 0 0
[4,] 13 0 0 0
Here is another option.
mat <- matrix(1:16, 4, byrow = TRUE)
idx <- cbind(seq_len(nrow(mat)),
ncol(mat):1)
values <- mat[idx]
mat <- matrix(0, nrow = dim(mat)[1], ncol = dim(mat)[2])
mat[idx] <- values
mat
# [,1] [,2] [,3] [,4]
#[1,] 0 0 0 4
#[2,] 0 0 7 0
#[3,] 0 10 0 0
#[4,] 13 0 0 0
A non-apply solution using some maths to generate the indices stealing xy from #Roman
xy <- matrix(1:16, ncol = 4, byrow = TRUE)
ind <- nrow(xy)
xy[setdiff(1:length(xy), seq(ind, by = ind -1, length.out = ind))] <- 0
xy
# [,1] [,2] [,3] [,4]
#[1,] 0 0 0 4
#[2,] 0 0 7 0
#[3,] 0 10 0 0
#[4,] 13 0 0 0
Trying it on 5 X 5 matrix
xy <- matrix(1:25, 5, byrow = TRUE)
ind <- nrow(xy)
xy[setdiff(1:length(xy), seq(ind, by = ind -1, length.out = ind))] <- 0
xy
# [,1] [,2] [,3] [,4] [,5]
#[1,] 0 0 0 0 5
#[2,] 0 0 0 9 0
#[3,] 0 0 13 0 0
#[4,] 0 17 0 0 0
#[5,] 21 0 0 0 0
This answer takes a slightly different approach than the other answers. Instead of trying to zero out everything except for the diagonal, we can just build the diagonal by itself:
m <- matrix(rep(0,16), nrow = 4, byrow = TRUE)
for (i in 0:15) {
row <- floor(i / 4)
col <- i %% 4
if (i == 3 + (row*3)) {
m[row+1, col+1] <- i+1
}
}
m
[,1] [,2] [,3] [,4]
[1,] 0 0 0 4
[2,] 0 0 7 0
[3,] 0 10 0 0
[4,] 13 0 0 0
I just thought about a way to reverse the original diag function from base R.
You can see it by just typing diag in the console.
Here the highlighted change I made in my diag_reverse:
y <- x[((m - 1L):0L * (dim(x)[1L])) + (1L:m)] # m is min(dim(x))
And here's the complete function (I kept all the code except that one line):
diag_reverse <- function (x = 1, nrow, ncol, names = TRUE)
{
if (is.matrix(x)) {
if (nargs() > 1L && (nargs() > 2L || any(names(match.call()) %in%
c("nrow", "ncol"))))
stop("'nrow' or 'ncol' cannot be specified when 'x' is a matrix")
if ((m <- min(dim(x))) == 0L)
return(vector(typeof(x), 0L))
y <- x[((m - 1L):0L * (dim(x)[1L])) + (1L:m)] # HERE I made the change
if (names) {
nms <- dimnames(x)
if (is.list(nms) && !any(vapply(nms, is.null, NA)) &&
identical((nm <- nms[[1L]][seq_len(m)]), nms[[2L]][seq_len(m)]))
names(y) <- nm
}
return(y)
}
if (is.array(x) && length(dim(x)) != 1L)
stop("'x' is an array, but not one-dimensional.")
if (missing(x))
n <- nrow
else if (length(x) == 1L && nargs() == 1L) {
n <- as.integer(x)
x <- 1
}
else n <- length(x)
if (!missing(nrow))
n <- nrow
if (missing(ncol))
ncol <- n
.Internal(diag(x, n, ncol))
}
Then we can call it:
m <- matrix(1:16,nrow=4,ncol=4,byrow = T)
diag_reverse(m)
#[1] 4 7 10 13
I'll test it on other matrices to see if it gives always the correct answer.
The apply family are really just loops with a bow tie.
Here is a way to do it without apply. With some input checking and should work on any size matrix.
off_diag = function(X)
{
if(!is.matrix(X)) stop('Argument is not a matrix')
n <- nrow(X)
if(ncol(X) != n) stop('Matrix is not square')
if(n<2) return(X)
Y <- X * c(0,rep(rep(c(0,1),c(n-2,1)),n),rep(0,n-1))
return(Y)
}
Now it can handle numeric vectors, character vectors and NAs.
mat <- matrix(1:16, 4, byrow = TRUE)
off_diag(mat)
# [,1] [,2] [,3] [,4]
# [1,] 0 0 0 4
# [2,] 0 0 7 0
# [3,] 0 10 0 0
# [4,] 13 0 0 0
Edit: improvement
I realised my function will fail if there are NAs since NA*0 is NA, additionally it will not work on characters, but doesn't check the matrix has mode as numeric. So instead I use the same setup to make a logical vector
minor_diag = function(X)
{
if(!is.matrix(X)) stop('Argument is not a matrix')
n <- nrow(X)
if(ncol(X) != n) stop('Matrix is not square')
if(n<2) return(X)
index = c(TRUE,rep(rep(c(TRUE,FALSE),c(n-2,1)),n),rep(TRUE,n-1))
X[index]=0
return(X)
}
mat <- matrix(letters[1:16], 4, byrow = TRUE)
minor_diag(mat)
## [,1] [,2] [,3] [,4]
## [1,] "0" "0" "0" "d"
## [2,] "0" "0" "g" "0"
## [3,] "0" "j" "0" "0"
## [4,] "m" "0" "0" "0"
minor_diag(matrix(NA,2,2))
## [,1] [,2]
## [1,] 0 NA
## [2,] NA 0
A one liner without loops
#setup
n <- 5
A <- matrix(1:(n^2), n)
#solution
diag(diag(A[n:1,]))[n:1,]

create a X % probability matrix from list of matrices

I have a list of matrices (some hundred thousands). I want to create a single matrix where the cells correspond to e.g. the 95%. With that I mean this: if e.g. cell mat[1,2] is positive (i.e. >0) in 95% of the matrices it is scored a 1, and if e.g. cell mat[2,1] is negative (i.e. <0) in 95% of the matrices it is scored a -1. If they fall below this threshold they are scored a 0.
#Dummy data
listX <- list()
for(i in 1:10){listX[[i]]<-matrix(rnorm(n = 25, mean = 0.5, sd = 1),5,5)}
listX2 <- listX
for(i in 1:10) { listX2[[i]] <- ifelse(listX[[i]] >0, 1, -1) }
For the sake of the dummy data, the 95% can be changed to say 60%, such that the cells that keep their sign in 6 out of 10 matrices are kept and scored either 1 or -1 and the rest 0.
I'm stuck on this, hence cannot provide any more code.
I would do:
listX <- list()
set.seed(20)
# I set seed for reproducability, and changed your mean so you could see the negatives
for(i in 1:10){listX[[i]]<-matrix(rnorm(n = 25, mean = 0, sd = 1),5,5)}
threshold <- 0.7
(Reduce('+',lapply(listX,function(x){x > 0}))/length(listX) >= threshold) - (Reduce('+',lapply(listX,function(x){x < 0}))/length(listX) >= threshold)
[,1] [,2] [,3] [,4] [,5]
[1,] 1 0 0 -1 1
[2,] -1 1 -1 -1 1
[3,] 0 0 0 1 1
[4,] 0 1 0 0 0
[5,] 0 0 0 0 0
This basically checks both conditions, and adds the two checks together. To break down one of the conditions (Reduce('+',lapply(listX,function(x){x > 0}))/length(listX) > threshold)
lapply(listX,function(x){x > 0}) loops through each matrix and converts it to a a matrix of true/false for every value that is above zero.
Reduce('+',lapply(listX,function(x){x > 0}))/length(listX) then adds these all together (Reduce), and divides by the number of obeservations. If the proportion is greater than our threshold, we set that value to one, and if not it is zero.
We then subtract the same matrix with x < 0 as the test, which gives -1 in each case where enough sub-values are negative.
You can change the list to an array and then take the mean over the dimensions.
arr <- simplify2array(listX)
grzero = rowMeans(arr > 0, dims = 2)
lezero = rowMeans(arr < 0, dims = 2)
prop = 0.6
1* (grzero >= prop) + -1* (lezero >= prop)
Test case showing which answers work so far! (edit)
Below you'll find my original answer. It ended up producing comparable results to the other answers on test cases involving randomly seeded data. To triple check, I created a small test data set with a known answer. It turns out that only answer by #Chris passes right now (though #user20650 should be ok if using >= on this example as indicated in comments). Here it is in case anyone else wants to use it:
listX <- list(
matrix(c(1,0,-1,1), nrow = 2),
matrix(c(1,0,-1,1), nrow = 2),
matrix(c(1,0, 1,0), nrow = 2)
)
# With any threshold < .67,
# result should be...
matrix(c(1, 0, -1, 1), nrow = 2)
#> [,1] [,2]
#> [1,] 1 -1
#> [2,] 0 1
# Otherwise...
matrix(c(1, 0, 0, 0), nrow = 2)
#> [,1] [,2]
#> [1,] 1 0
#> [2,] 0 0
# #Chris answer passes
threshold <- 0.5
(Reduce('+',lapply(listX,function(x){x > 0}))/length(listX) >= threshold) - (Reduce('+',lapply(listX,function(x){x < 0}))/length(listX) >= threshold)
#> [,1] [,2]
#> [1,] 1 -1
#> [2,] 0 1
threshold <- 1.0
(Reduce('+',lapply(listX,function(x){x > 0}))/length(listX) >= threshold) - (Reduce('+',lapply(listX,function(x){x < 0}))/length(listX) >= threshold)
#> [,1] [,2]
#> [1,] 1 0
#> [2,] 0 0
# My function fails...
prob_matrix(listX, .5)
#> [,1] [,2]
#> [1,] 1 -1
#> [2,] 0 1
prob_matrix(listX, 1)
#> [,1] [,2]
#> [1,] 1 0
#> [2,] 0 1
# #user20650 answer fails...
arr <- simplify2array(listX)
grzero = rowSums(arr > 0, dims = 2) / length(listX)
lezero = rowSums(arr < 0, dims = 2) / length(listX)
prop = 0.5
1* (grzero > prop) + -1* (lezero > prop)
#> [,1] [,2]
#> [1,] 1 -1
#> [2,] 0 1
arr <- simplify2array(listX)
grzero = rowSums(arr > 0, dims = 2) / length(listX)
lezero = rowSums(arr < 0, dims = 2) / length(listX)
prop = 1.0
1* (grzero > prop) + -1* (lezero > prop)
#> [,1] [,2]
#> [1,] 0 0
#> [2,] 0 0
Original answer
Here's one approach...
Combine sign and Reduce to do a cumulative sum of the signs of values in each cell, returning a single matrix.
Any cells where this value is less than the threshold number (your probability * number of matrices in the list) is converted to 0.
Return the sign() of all cells.
Below is an example with a wrapper function:
Toy data...
set.seed(12)
listX <- list()
for(i in 1:10){listX[[i]]<-matrix(rnorm(n = 25, mean = 0, sd = 1), 5, 5)}
Function...
prob_matrix <- function(matrix_list, prob) {
# Sum the signs of values in each cell
matrix_list <- lapply(matrix_list, sign)
x <- Reduce(`+`, matrix_list)
# Convert cells below prob to 0, others to relevant sign
x[abs(x) < (prob * length(matrix_list)) / 2] <- 0
sign(x)
}
Example cases...
prob_matrix(listX, .2)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -1 1 0 1 0
#> [2,] -1 0 -1 -1 0
#> [3,] 1 -1 1 1 1
#> [4,] 0 -1 1 1 -1
#> [5,] -1 0 -1 0 -1
prob_matrix(listX, .4)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -1 1 0 1 0
#> [2,] -1 0 -1 -1 0
#> [3,] 1 -1 1 1 1
#> [4,] 0 -1 1 1 -1
#> [5,] -1 0 -1 0 -1
prob_matrix(listX, .6)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0 1 0 1 0
#> [2,] -1 0 0 -1 0
#> [3,] 1 -1 0 1 1
#> [4,] 0 0 0 1 -1
#> [5,] -1 0 0 0 -1
prob_matrix(listX, .8)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0 1 0 1 0
#> [2,] -1 0 0 -1 0
#> [3,] 1 -1 0 1 1
#> [4,] 0 0 0 1 -1
#> [5,] -1 0 0 0 -1

R: List of indices to binary matrix

Say I have a list of indices, like:
l <- list(c(1,2,3), c(1), c(1,5), c(2, 3, 5))
Which specify the non-zero elements in a matrix, like:
(m <- matrix(c(1,1,1,0,0, 1,0,0,0,0, 1,0,0,0,5, 0,1,1,0,1), nrow=4, byrow=TRUE))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 0 0
[2,] 1 0 0 0 0
[3,] 1 0 0 0 5
[4,] 0 1 1 0 1
What is the fastest way, using R, to make m from l, giving that the matrix is very big, say 50.000 rows and 2000 columns?
Try
d1 <- stack(setNames(l, seq_along(l)))
library(Matrix)
m1 <- sparseMatrix(as.numeric(d1[,2]), d1[,1], x=1)
as.matrix(m1)
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 1 1 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 1
#[4,] 0 1 1 0 1
Or instead of stack, we could use melt
library(reshape2)
d2 <- melt(l)
sparseMatrix(d2[,2], d2[,1],x=1)
Or using only base R
Un1 <- unlist(l)
m1 <- matrix(0, nrow=length(l), ncol=max(Un1))
m1[cbind(as.numeric(d1$ind), d1$values)] <- 1
m1
For me, the following is at least 3 times faster than the suggestions above, on data the size as specified in the question (5e4 x 2e3):
unlist_l <- unlist(l)
M <- matrix(0, nrow = length(l), ncol = max(unique(unlist_l)))
ij <- cbind(rep(1:length(l), lengths(l)), unlist_l)
M[ij] <- 1
Performance might depend on data size and degree of sparsity.

Resources