Convert frequency vector to logical matrix - r

I would like to convert a frequency vector (i.e. the colSums() of a matrix) to one of the possible versions of the original logical matrix in R.
Something like:
s <- c(1,2,3)
# Some function of s
# Example output:
[,1] [,2] [,3]
[1,] 0 0 1
[2,] 1 0 0
[3,] 0 1 0
[4,] 0 0 1
[5,] 0 0 1
[6,] 0 1 0
The order of rows is not important.
Could someone give me a hint on how to do this?
Edit: Rowsums are always 1. The output can be considered a multinomial dataset where each row reflects an observation.

set.seed(523)
s <- c(1, 2, 3)
n <- 6
sapply(s, function(i) sample(c(rep(1, i), rep(0, n - i))))
# [,1] [,2] [,3]
# [1,] 0 1 1
# [2,] 1 0 0
# [3,] 0 1 0
# [4,] 0 0 1
# [5,] 0 0 0
# [6,] 0 0 1

s <- c(1,2,3)
result = matrix(0, nrow = max(s), ncol = length(s))
for (i in seq_along(s)) result[1:s[i], i] = 1
result
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 0 1 1
# [3,] 0 0 1
Keeping rowsums as 1
s <- c(1,2,3)
result = matrix(0, nrow = sum(s), ncol = length(s))
result[cbind(1:sum(s), rep(seq_along(s), times = s))] = 1
result
# [,1] [,2] [,3]
# [1,] 1 0 0
# [2,] 0 1 0
# [3,] 0 1 0
# [4,] 0 0 1
# [5,] 0 0 1
# [6,] 0 0 1

# Input:
s <- c(1,2,3)
# ...
set.seed(1) # For reproducibility
nr <- sum(s)
nc <- length(s)
mat <- matrix(0L, nrow = nr, ncol = nc)
mat[cbind(seq_len(nr), sample(rep(seq_len(nc), s)))] <- 1L
# Output:
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 0 1
[3,] 0 1 0
[4,] 0 0 1
[5,] 0 1 0
[6,] 0 0 1

Related

Copying the diagonal of a dataframe/matrix in the first row, by group

Say I have the following matrix mat3, where column 1 is a variable defining 2 groups:
mat1 <- diag(1, 5, 5)
mat1[,1] <- 1
mat2 <- diag(3, 5, 5)
mat2[,1] <- 3
mat3 <- rbind(mat1, mat2)
mat3
In mat3, how do I copy the diagonals of mat1 and mat2 in their respective first rows (i.e. rows 1 and 6)? The pseudocode would be: diag(mat3) by mat3[,1]
I tried the following but it did not work:
fnc <- function(x) {
res <- x
res[1,] <- diag(x)
res <<- res
}
by(mat3, as.factor(mat3[,1]), fnc)
res
In practice, I need to apply this operation to a dataframe.
Thanks a lot!
do.call(rbind, lapply(split.data.frame(mat3, mat3[,1]), \(x) {
x[1, ] <- diag(x); x
}))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 1 1
[2,] 1 1 0 0 0
[3,] 1 0 1 0 0
[4,] 1 0 0 1 0
[5,] 1 0 0 0 1
[6,] 3 3 3 3 3
[7,] 3 3 0 0 0
[8,] 3 0 3 0 0
[9,] 3 0 0 3 0
[10,] 3 0 0 0 3
If mat3 is a data.frame, you can revise the anonymous function as
\(x) {
x[1, ] <- diag(as.matrix(x)); x
}
Here's an approch that just finds the start of each matrix assuming they are square:
idx <- seq(from=1, to=nrow(mat3), by=ncol(mat3))
for(i in idx) mat3[i, ] <- rep(mat3[i, 1], ncol(mat3))
mat3
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 1 1 1 1
# [2,] 1 1 0 0 0
# [3,] 1 0 1 0 0
# [4,] 1 0 0 1 0
# [5,] 1 0 0 0 1
# [6,] 3 3 3 3 3
# [7,] 3 3 0 0 0
# [8,] 3 0 3 0 0
# [9,] 3 0 0 3 0
# [10,] 3 0 0 0 3

How to replace a spot of cells by surrounding cells value in a matrix (R)

I obtain a matrix from a calculation. This matrix can have spots of 1 while the rest of the matrix presents values of 0, 2 and 3.
> ##Working folder
> setwd("C:/Users/laure/Desktop/Code")
> ##Load matrix from excel
> mat <- read.csv("test.csv", header = TRUE)
> mat <- as.matrix(mat)
> mat
X1 X1.1 X0 X0.1 X0.2 X0.3 X1.2 X1.3
[1,] 1 0 0 0 3 0 0 2
[2,] 1 2 0 3 1 1 0 0
[3,] 0 0 0 0 3 1 0 0
[4,] 2 0 0 0 0 0 0 0
[5,] 0 0 0 0 0 0 2 2
[6,] 0 0 1 0 0 0 2 2
[7,] 0 1 1 1 0 0 2 2
[8,] 0 0 1 1 0 0 2 2
[9,] 0 0 1 0 0 0 0 0
[10,] 0 0 0 0 0 0 0 0
[11,] 0 0 0 0 2 0 0 1
[12,] 0 0 0 0 2 2 1 1
[13,] 1 1 0 0 0 0 1 1
[14,] 1 1 0 0 0 1 1 1
[15,] 1 1 1 0 0 1 1 1
I would like to replace all the spots of 1 that are completely surrounded by cell values of 0 (in this example have only 1 spot completely surrounded by 0). I do not want to include the spot of 1 on the borders as there are not completely surrounded. I do not know the location and the number of the spots as it depends of the calculation performed before.
I can find the all spots having value of 1 using clump function but how to find the surrounding cells value and replace the spot cells.
####convert matrix into raster
r <-raster(mat)
####select cells with criteria based on cell value
rx <- r == 1
###extract IDs of clumps according the criteria
rc <- clump(rx)
f <- freq(rc, useNA="no")
> f
value count
[1,] 1 2
[2,] 2 3
[3,] 3 7
[4,] 4 11
[5,] 5 7
Adopting the approach from compute_neighb_sum you can use the following code:
embed_matrix <- function(mx) {
cbind(Inf, rbind(Inf, mx, Inf), Inf)
}
disembed_matrix <- function(mx) {
mx[-c(1, nrow(mx)), -c(1, ncol(mx)), drop = FALSE]
}
is_valid_idx <- function(idx, dim) {
rowSums(t(t(idx) > dim | t(idx) < 0)) == 0
}
sum_neighbor_cells <- function(m, include_corner = TRUE, include_element = FALSE) {
em <- embed_matrix(m)
dims <- dim(em)
offsets <- as.matrix(expand.grid(r = -1:1, c = -1:1))
exclude_offsets <- matrix(integer(0), ncol = 2)
if (!include_element) {
exclude_offsets <- rbind(exclude_offsets, c(0, 0))
}
if (!include_corner) {
exclude_offsets <- rbind(exclude_offsets,
matrix(c(-1, -1, 1, 1, -1, 1, -1, 1), ncol = 2))
}
dupes <- duplicated(rbind(offsets, exclude_offsets), fromLast = TRUE)
offsets <- offsets[!dupes[seq_len(nrow(offsets))], , drop = FALSE]
idx <- cbind(c(row(em)), c(col(em)))
res <- apply(offsets, 1, function(row) {
t(t(idx) + row)
})
dim(res) <- c(dim(idx), nrow(offsets))
idx <- aperm(res, 3:1)
res <- apply(idx, 3, function(i) {
valid_idx <- i[is_valid_idx(i, dims), ,
drop = FALSE]
sum(em[valid_idx])
})
dim(res) <- dims
res <- disembed_matrix(res)
res
}
Then you can use sum_neighbor_cells to get the sum of all neighboring cells (with ot without the corner cells):
set.seed(123)
(m <- matrix(sample(0:1, 25, TRUE), 5))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 1 0 0
# [2,] 0 1 1 1 1
# [3,] 0 1 1 0 0
# [4,] 1 0 0 0 0
# [5,] 0 0 1 0 0
sum_neighbor_cells(m, include_corner = FALSE)
# [,1] [,2] [,3] [,4] [,5]
# [1,] Inf Inf Inf Inf Inf
# [2,] Inf 3 4 2 Inf
# [3,] Inf 2 2 2 Inf
# [4,] Inf 2 2 0 Inf
# [5,] Inf Inf Inf Inf Inf
sum_neighbor_cells(m, include_corner = TRUE)
# [,1] [,2] [,3] [,4] [,5]
# [1,] Inf Inf Inf Inf Inf
# [2,] Inf 5 6 4 Inf
# [3,] Inf 4 4 4 Inf
# [4,] Inf 4 3 2 Inf
# [5,] Inf Inf Inf Inf Inf
With this function you can get the indices of cells with have only 0's as neighboring cells easily via:
(idx <- which(sum_neighbor_cells(m, include_corner = FALSE) == 0, arr.ind = TRUE))
# row col
# [1,] 4 4
m[idx] <- NA
m
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 1 0 0
# [2,] 0 1 1 1 1
# [3,] 0 1 1 0 0
# [4,] 1 0 0 NA 0
# [5,] 0 0 1 0 0

How to pad vectors and matrices with 0s

vec_length <- 4
nonzero_ind <- c(1, 3)
zero_ind <- c(2, 4)
Vector:
I currently have a vector called vec that contains 2 elements.
vec <- c(22, -5)
I want to obtain a vector of length vec_length that expands vec by adding 0s. The position of these 0s are specified by zero_ind. That is, I want a vector that looks like:
> expanded_vec
[1] 22 0 -5 0
Matrix
I currently have a 2x2 matrix called mat that corresponds to vec.
> mat
[,1] [,2]
[1,] 1 2
[2,] 2 3
I want to obtain a vec_length by vec_length matrix where there are 0s in the (i,j)-th position if either i OR j is one of the nonzero_ind. That is, I want a matrix that looks like:
> expected_mat
[,1] [,2] [,3] [,4]
[1,] 1 0 2 0
[2,] 0 0 0 0
[3,] 2 0 3 0
[4,] 0 0 0 0
For the vector:
"[<-"(numeric(vec_length), nonzero_ind, vec)
#[1] 22 0 -5 0
"[<-"(numeric(vec_length), seq_len(vec_length)[-zero_ind], vec)
#[1] 22 0 -5 0
x <- numeric(vec_length)
(x[nonzero_ind] <- vec)
#[1] 22 0 -5 0
x <- numeric(vec_length)
(x[-zero_ind] <- vec)
#[1] 22 0 -5 0
and the Matrix:
i <- "[<-"(rep(NA,vec_length), nonzero_ind, seq_along(nonzero_ind))
mat <- mat[i, i]
mat[is.na(mat)] <- 0
mat
# [,1] [,2] [,3] [,4]
#[1,] 1 0 2 0
#[2,] 0 0 0 0
#[3,] 2 0 3 0
#[4,] 0 0 0 0
or
mat_out <- matrix(0, vec_length, vec_length)
mat_out[nonzero_ind, nonzero_ind] <- mat
mat_out
# [,1] [,2] [,3] [,4]
#[1,] 1 0 2 0
#[2,] 0 0 0 0
#[3,] 2 0 3 0
#[4,] 0 0 0 0
Maybe you can try kronecker like below
> kronecker(vec,c(1,0))
[1] 22 0 -5 0
> kronecker(mat,matrix(c(1,0,0,0),nrow(mat)))
[,1] [,2] [,3] [,4]
[1,] 1 0 2 0
[2,] 0 0 0 0
[3,] 2 0 3 0
[4,] 0 0 0 0
One example:
# Input data
vec_inp <- c(22, -5)
mat_inp <- matrix(c(1, 2, 2, 3), ncol = 2)
# Vector case
update_vec <- function(inp, len, nzi) {
replace(x = vector(mode = 'double', length = len), list = nzi, values = inp)
}
update_vec(vec_inp, vec_length, nonzero_ind)
# [1] 22 0 -5 0
# Matrix case, work column by column
mat_out <- matrix(vector(mode = 'double', length = vec_length^2), ncol = vec_length)
for (i in seq_along(nonzero_ind)) {
mat_out[, nonzero_ind[i]] <- update_vec(mat_inp[, i], vec_length, nonzero_ind)
}
mat_out
# [,1] [,2] [,3] [,4]
# [1,] 1 0 2 0
# [2,] 0 0 0 0
# [3,] 2 0 3 0
# [4,] 0 0 0 0

Using objects inside list as function arguments in lapply

I am trying to learn different ways to use to the objects inside a list as the FUN arguments in lapply. Take this data:
A <- list(a = matrix(0, ncol = 3, nrow = 3), b = matrix(0, ncol = 3, nrow = 3))
B <- list(a = matrix(1, ncol = 1, nrow = 3), b = matrix(1, ncol = 1, nrow = 3))
D <- mapply(FUN="list", A, B, SIMPLIFY=F, USE.NAMES=F)
D <- lapply(D, `names<-`, c("first", "second"))
D
[[1]]
[[1]]$`first`
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[[1]]$second
[,1]
[1,] 1
[2,] 1
[3,] 1
[[2]]
[[2]]$`first`
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[[2]]$second
[,1]
[1,] 1
[2,] 1
[3,] 1
Desired result:
[[1]]
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[4,] 1 1 1
[[2]]
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[4,] 1 1 1
This is how I would normally do it:
lapply(D, function(x) rbind(x$first, as.numeric(x$second)))
Now I am wondering whether there is a way to avoid using function(x) and repeating all those xs. Something like:
lapply(D, "rbind", <args>)
How can I let rbind (or any other function) know that I am referring to objects within the frame of lapply?
Thank you,
K.
To "avoid using function(x) and repeating all those xs", we could use with():
lapply(D, with, rbind(first, as.numeric(second)))
Update
As per the commments changed the code to keep only the right solution.
As some of the commentators said, the problem is that B would need to be transposed, to find an elegant solution. You should have a look to library(purrr) because with that the whole problem reduces to:
map2(A, B, ~ rbind(.x, t(.y)))
# $`a`
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
# [3,] 0 0 0
# [4,] 1 1 1
# $b
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
# [3,] 0 0 0
# [4,] 1 1 1
What map2 does is that it takes 2 lists and applies the function to each element of these lists. The ~ syntax is a shortcut for function(.)

Obtain all possible matrices by swapping only two positions in any given column

Let's start with the following matrix.
M <- matrix(c(0,0,1,1,0,0,1,1,
0,1,1,0,0,1,1,0,
0,0,0,0,1,1,1,1,
0,1,0,1,1,0,1,0,
0,0,1,1,1,1,0,0,
0,1,1,0,1,0,0,1),nrow = 8,ncol = 6)
Here is the M
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 1 1 1
[6,] 0 1 1 0 1 0
[7,] 1 1 1 1 0 0
[8,] 1 0 1 0 0 1
If I pick a random column, say 4, I want to swap two positions in that column. One such possibility is swapping 5th and 6th position is given by
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 0 1 1
[6,] 0 1 1 1 1 0
[7,] 1 1 1 1 0 0
[8,] 1 0 1 0 0 1
I want to do this for every possible swap in each column and then for all columns to obtain all the possible matrices.
Here's another solution:
# Return all unique permutations for c(0,0,0,0,1,1,1,1)
library(gtools)
perms = unique(permutations(8, 8, M[,1], set = FALSE))
# Create nested list
Mat_list = lapply(vector("list", ncol(M)), function(x) vector("list", nrow(perms)))
# Loop through every column and every permutations replacing each column
# with each unique permutation one at a time
for(ii in 1:ncol(M)){
for(jj in 1:nrow(perms)){
New_Mat = M
New_Mat[,ii] = perms[jj,]
Mat_list[[ii]][[jj]] = New_Mat
}
}
Result:
> Mat_list[[1]][[2]]
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 1 1 1
[6,] 1 1 1 0 1 0
[7,] 0 1 1 1 0 0
[8,] 1 0 1 0 0 1
Note:
Instead of creating a super long list, I've created a nested list of matrices with 8 elements and n sub-elements per element (where n is the number of unique permutations). You can unlist the result if you prefer the long list form.
This code gives every permutation of 0s and 1s by column. I used a smaller toy example here, because the number of possibilities can get very large -- prod(choose(nrow(M), colSums(M))). As a note, this will likely not run on a standard computer for the matrix given, because of memory requirements.
library(gtools)
set.seed(1234)
M <- matrix(sample(0:1, 16, replace = TRUE), ncol = 4)
M
# [,1] [,2] [,3] [,4]
# [1,] 0 1 1 0
# [2,] 1 1 1 1
# [3,] 1 0 1 0
# [4,] 1 0 1 1
perm1s <- function(n, N) {
unique(permutations(N, N, c(rep(0, N - n), rep(1, n)), FALSE, FALSE))
}
createMat <- function(vec, lst) {
tmp <- lapply(seq_along(vec), function(x) lst[[x]][vec[x], ])
do.call(cbind, tmp)
}
makeMats <- function(M) {
sums <- colSums(M)
rows <- nrow(M)
rowPerm <- lapply(sums, perm1s, N = rows)
comb <- expand.grid(lapply(sapply(rowPerm, nrow), seq))
comb <- lapply(split(comb, seq(nrow(comb))), unlist)
mats <- lapply(comb, createMat, lst = rowPerm)
mats
}
res <- makeMats(M)
res[[1]]
# [,1] [,2] [,3] [,4]
# [1,] 0 0 1 0
# [2,] 1 0 1 0
# [3,] 1 1 1 1
# [4,] 1 1 1 1
To hold other columns constant when varying 1 column -- sum(choose(nrow(M), colSums(M))) possibilities:
makeMats2 <- function(M) {
sums <- colSums(M)
rows <- nrow(M)
rowPerm <- lapply(sums, perm1s, N = rows)
ind <- rep(seq_along(rowPerm), sapply(rowPerm, nrow))
rowPerm <- lapply(rowPerm, function(x) split(x, seq(nrow(x))))
rowPerm <- unlist(rowPerm, recursive = FALSE)
mats <- rep(list(M), length(rowPerm))
mats <- mapply(function(x, y, z) {x[ , y] <- z; x},
x = mats, y = ind, z = rowPerm, SIMPLIFY = FALSE)
mats
}

Resources