Convert frequency vector to logical matrix

Convert frequency vector to logical matrix - r

I would like to convert a frequency vector (i.e. the colSums() of a matrix) to one of the possible versions of the original logical matrix in R.
Something like:
s <- c(1,2,3)
# Some function of s
# Example output:
[,1] [,2] [,3]
[1,] 0 0 1
[2,] 1 0 0
[3,] 0 1 0
[4,] 0 0 1
[5,] 0 0 1
[6,] 0 1 0
The order of rows is not important.
Could someone give me a hint on how to do this?
Edit: Rowsums are always 1. The output can be considered a multinomial dataset where each row reflects an observation.

set.seed(523)
s <- c(1, 2, 3)
n <- 6
sapply(s, function(i) sample(c(rep(1, i), rep(0, n - i))))
# [,1] [,2] [,3]
# [1,] 0 1 1
# [2,] 1 0 0
# [3,] 0 1 0
# [4,] 0 0 1
# [5,] 0 0 0
# [6,] 0 0 1

s <- c(1,2,3)
result = matrix(0, nrow = max(s), ncol = length(s))
for (i in seq_along(s)) result[1:s[i], i] = 1
result
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 0 1 1
# [3,] 0 0 1
Keeping rowsums as 1
s <- c(1,2,3)
result = matrix(0, nrow = sum(s), ncol = length(s))
result[cbind(1:sum(s), rep(seq_along(s), times = s))] = 1
result
# [,1] [,2] [,3]
# [1,] 1 0 0
# [2,] 0 1 0
# [3,] 0 1 0
# [4,] 0 0 1
# [5,] 0 0 1
# [6,] 0 0 1

# Input:
s <- c(1,2,3)
# ...
set.seed(1) # For reproducibility
nr <- sum(s)
nc <- length(s)
mat <- matrix(0L, nrow = nr, ncol = nc)
mat[cbind(seq_len(nr), sample(rep(seq_len(nc), s)))] <- 1L
# Output:
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 0 1
[3,] 0 1 0
[4,] 0 0 1
[5,] 0 1 0
[6,] 0 0 1

Related

Copying the diagonal of a dataframe/matrix in the first row, by group

Say I have the following matrix mat3, where column 1 is a variable defining 2 groups:
mat1 <- diag(1, 5, 5)
mat1[,1] <- 1
mat2 <- diag(3, 5, 5)
mat2[,1] <- 3
mat3 <- rbind(mat1, mat2)
mat3
In mat3, how do I copy the diagonals of mat1 and mat2 in their respective first rows (i.e. rows 1 and 6)? The pseudocode would be: diag(mat3) by mat3[,1]
I tried the following but it did not work:
fnc <- function(x) {
res <- x
res[1,] <- diag(x)
res <<- res
}
by(mat3, as.factor(mat3[,1]), fnc)
res
In practice, I need to apply this operation to a dataframe.
Thanks a lot!

do.call(rbind, lapply(split.data.frame(mat3, mat3[,1]), \(x) {
x[1, ] <- diag(x); x
}))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 1 1
[2,] 1 1 0 0 0
[3,] 1 0 1 0 0
[4,] 1 0 0 1 0
[5,] 1 0 0 0 1
[6,] 3 3 3 3 3
[7,] 3 3 0 0 0
[8,] 3 0 3 0 0
[9,] 3 0 0 3 0
[10,] 3 0 0 0 3
If mat3 is a data.frame, you can revise the anonymous function as
\(x) {
x[1, ] <- diag(as.matrix(x)); x
}

Here's an approch that just finds the start of each matrix assuming they are square:
idx <- seq(from=1, to=nrow(mat3), by=ncol(mat3))
for(i in idx) mat3[i, ] <- rep(mat3[i, 1], ncol(mat3))
mat3
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 1 1 1 1
# [2,] 1 1 0 0 0
# [3,] 1 0 1 0 0
# [4,] 1 0 0 1 0
# [5,] 1 0 0 0 1
# [6,] 3 3 3 3 3
# [7,] 3 3 0 0 0
# [8,] 3 0 3 0 0
# [9,] 3 0 0 3 0
# [10,] 3 0 0 0 3

How to replace a spot of cells by surrounding cells value in a matrix (R)

I obtain a matrix from a calculation. This matrix can have spots of 1 while the rest of the matrix presents values of 0, 2 and 3.
> ##Working folder
> setwd("C:/Users/laure/Desktop/Code")
> ##Load matrix from excel
> mat <- read.csv("test.csv", header = TRUE)
> mat <- as.matrix(mat)
> mat
X1 X1.1 X0 X0.1 X0.2 X0.3 X1.2 X1.3
[1,] 1 0 0 0 3 0 0 2
[2,] 1 2 0 3 1 1 0 0
[3,] 0 0 0 0 3 1 0 0
[4,] 2 0 0 0 0 0 0 0
[5,] 0 0 0 0 0 0 2 2
[6,] 0 0 1 0 0 0 2 2
[7,] 0 1 1 1 0 0 2 2
[8,] 0 0 1 1 0 0 2 2
[9,] 0 0 1 0 0 0 0 0
[10,] 0 0 0 0 0 0 0 0
[11,] 0 0 0 0 2 0 0 1
[12,] 0 0 0 0 2 2 1 1
[13,] 1 1 0 0 0 0 1 1
[14,] 1 1 0 0 0 1 1 1
[15,] 1 1 1 0 0 1 1 1
I would like to replace all the spots of 1 that are completely surrounded by cell values of 0 (in this example have only 1 spot completely surrounded by 0). I do not want to include the spot of 1 on the borders as there are not completely surrounded. I do not know the location and the number of the spots as it depends of the calculation performed before.
I can find the all spots having value of 1 using clump function but how to find the surrounding cells value and replace the spot cells.
####convert matrix into raster
r <-raster(mat)
####select cells with criteria based on cell value
rx <- r == 1
###extract IDs of clumps according the criteria
rc <- clump(rx)
f <- freq(rc, useNA="no")
> f
value count
[1,] 1 2
[2,] 2 3
[3,] 3 7
[4,] 4 11
[5,] 5 7

Adopting the approach from compute_neighb_sum you can use the following code:
embed_matrix <- function(mx) {
cbind(Inf, rbind(Inf, mx, Inf), Inf)
}
disembed_matrix <- function(mx) {
mx[-c(1, nrow(mx)), -c(1, ncol(mx)), drop = FALSE]
}
is_valid_idx <- function(idx, dim) {
rowSums(t(t(idx) > dim | t(idx) < 0)) == 0
}
sum_neighbor_cells <- function(m, include_corner = TRUE, include_element = FALSE) {
em <- embed_matrix(m)
dims <- dim(em)
offsets <- as.matrix(expand.grid(r = -1:1, c = -1:1))
exclude_offsets <- matrix(integer(0), ncol = 2)
if (!include_element) {
exclude_offsets <- rbind(exclude_offsets, c(0, 0))
}
if (!include_corner) {
exclude_offsets <- rbind(exclude_offsets,
matrix(c(-1, -1, 1, 1, -1, 1, -1, 1), ncol = 2))
}
dupes <- duplicated(rbind(offsets, exclude_offsets), fromLast = TRUE)
offsets <- offsets[!dupes[seq_len(nrow(offsets))], , drop = FALSE]
idx <- cbind(c(row(em)), c(col(em)))
res <- apply(offsets, 1, function(row) {
t(t(idx) + row)
})
dim(res) <- c(dim(idx), nrow(offsets))
idx <- aperm(res, 3:1)
res <- apply(idx, 3, function(i) {
valid_idx <- i[is_valid_idx(i, dims), ,
drop = FALSE]
sum(em[valid_idx])
})
dim(res) <- dims
res <- disembed_matrix(res)
res
}
Then you can use sum_neighbor_cells to get the sum of all neighboring cells (with ot without the corner cells):
set.seed(123)
(m <- matrix(sample(0:1, 25, TRUE), 5))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 1 0 0
# [2,] 0 1 1 1 1
# [3,] 0 1 1 0 0
# [4,] 1 0 0 0 0
# [5,] 0 0 1 0 0
sum_neighbor_cells(m, include_corner = FALSE)
# [,1] [,2] [,3] [,4] [,5]
# [1,] Inf Inf Inf Inf Inf
# [2,] Inf 3 4 2 Inf
# [3,] Inf 2 2 2 Inf
# [4,] Inf 2 2 0 Inf
# [5,] Inf Inf Inf Inf Inf
sum_neighbor_cells(m, include_corner = TRUE)
# [,1] [,2] [,3] [,4] [,5]
# [1,] Inf Inf Inf Inf Inf
# [2,] Inf 5 6 4 Inf
# [3,] Inf 4 4 4 Inf
# [4,] Inf 4 3 2 Inf
# [5,] Inf Inf Inf Inf Inf
With this function you can get the indices of cells with have only 0's as neighboring cells easily via:
(idx <- which(sum_neighbor_cells(m, include_corner = FALSE) == 0, arr.ind = TRUE))
# row col
# [1,] 4 4
m[idx] <- NA
m
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 1 0 0
# [2,] 0 1 1 1 1
# [3,] 0 1 1 0 0
# [4,] 1 0 0 NA 0
# [5,] 0 0 1 0 0

How to pad vectors and matrices with 0s

vec_length <- 4
nonzero_ind <- c(1, 3)
zero_ind <- c(2, 4)
Vector:
I currently have a vector called vec that contains 2 elements.
vec <- c(22, -5)
I want to obtain a vector of length vec_length that expands vec by adding 0s. The position of these 0s are specified by zero_ind. That is, I want a vector that looks like:
> expanded_vec
[1] 22 0 -5 0
Matrix
I currently have a 2x2 matrix called mat that corresponds to vec.
> mat
[,1] [,2]
[1,] 1 2
[2,] 2 3
I want to obtain a vec_length by vec_length matrix where there are 0s in the (i,j)-th position if either i OR j is one of the nonzero_ind. That is, I want a matrix that looks like:
> expected_mat
[,1] [,2] [,3] [,4]
[1,] 1 0 2 0
[2,] 0 0 0 0
[3,] 2 0 3 0
[4,] 0 0 0 0

For the vector:
"[<-"(numeric(vec_length), nonzero_ind, vec)
#[1] 22 0 -5 0
"[<-"(numeric(vec_length), seq_len(vec_length)[-zero_ind], vec)
#[1] 22 0 -5 0
x <- numeric(vec_length)
(x[nonzero_ind] <- vec)
#[1] 22 0 -5 0
x <- numeric(vec_length)
(x[-zero_ind] <- vec)
#[1] 22 0 -5 0
and the Matrix:
i <- "[<-"(rep(NA,vec_length), nonzero_ind, seq_along(nonzero_ind))
mat <- mat[i, i]
mat[is.na(mat)] <- 0
mat
# [,1] [,2] [,3] [,4]
#[1,] 1 0 2 0
#[2,] 0 0 0 0
#[3,] 2 0 3 0
#[4,] 0 0 0 0
or
mat_out <- matrix(0, vec_length, vec_length)
mat_out[nonzero_ind, nonzero_ind] <- mat
mat_out
# [,1] [,2] [,3] [,4]
#[1,] 1 0 2 0
#[2,] 0 0 0 0
#[3,] 2 0 3 0
#[4,] 0 0 0 0

Maybe you can try kronecker like below
> kronecker(vec,c(1,0))
[1] 22 0 -5 0
> kronecker(mat,matrix(c(1,0,0,0),nrow(mat)))
[,1] [,2] [,3] [,4]
[1,] 1 0 2 0
[2,] 0 0 0 0
[3,] 2 0 3 0
[4,] 0 0 0 0

One example:
# Input data
vec_inp <- c(22, -5)
mat_inp <- matrix(c(1, 2, 2, 3), ncol = 2)
# Vector case
update_vec <- function(inp, len, nzi) {
replace(x = vector(mode = 'double', length = len), list = nzi, values = inp)
}
update_vec(vec_inp, vec_length, nonzero_ind)
# [1] 22 0 -5 0
# Matrix case, work column by column
mat_out <- matrix(vector(mode = 'double', length = vec_length^2), ncol = vec_length)
for (i in seq_along(nonzero_ind)) {
mat_out[, nonzero_ind[i]] <- update_vec(mat_inp[, i], vec_length, nonzero_ind)
}
mat_out
# [,1] [,2] [,3] [,4]
# [1,] 1 0 2 0
# [2,] 0 0 0 0
# [3,] 2 0 3 0
# [4,] 0 0 0 0

Using objects inside list as function arguments in lapply

I am trying to learn different ways to use to the objects inside a list as the FUN arguments in lapply. Take this data:
A <- list(a = matrix(0, ncol = 3, nrow = 3), b = matrix(0, ncol = 3, nrow = 3))
B <- list(a = matrix(1, ncol = 1, nrow = 3), b = matrix(1, ncol = 1, nrow = 3))
D <- mapply(FUN="list", A, B, SIMPLIFY=F, USE.NAMES=F)
D <- lapply(D, `names<-`, c("first", "second"))
D
[[1]]
[[1]]$`first`
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[[1]]$second
[,1]
[1,] 1
[2,] 1
[3,] 1
[[2]]
[[2]]$`first`
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[[2]]$second
[,1]
[1,] 1
[2,] 1
[3,] 1
Desired result:
[[1]]
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[4,] 1 1 1
[[2]]
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
[4,] 1 1 1
This is how I would normally do it:
lapply(D, function(x) rbind(x$first, as.numeric(x$second)))
Now I am wondering whether there is a way to avoid using function(x) and repeating all those xs. Something like:
lapply(D, "rbind", <args>)
How can I let rbind (or any other function) know that I am referring to objects within the frame of lapply?
Thank you,
K.

To "avoid using function(x) and repeating all those xs", we could use with():
lapply(D, with, rbind(first, as.numeric(second)))

Update
As per the commments changed the code to keep only the right solution.
As some of the commentators said, the problem is that B would need to be transposed, to find an elegant solution. You should have a look to library(purrr) because with that the whole problem reduces to:
map2(A, B, ~ rbind(.x, t(.y)))
# $`a`
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
# [3,] 0 0 0
# [4,] 1 1 1
# $b
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
# [3,] 0 0 0
# [4,] 1 1 1
What map2 does is that it takes 2 lists and applies the function to each element of these lists. The ~ syntax is a shortcut for function(.)

Obtain all possible matrices by swapping only two positions in any given column

Let's start with the following matrix.
M <- matrix(c(0,0,1,1,0,0,1,1,
0,1,1,0,0,1,1,0,
0,0,0,0,1,1,1,1,
0,1,0,1,1,0,1,0,
0,0,1,1,1,1,0,0,
0,1,1,0,1,0,0,1),nrow = 8,ncol = 6)
Here is the M
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 1 1 1
[6,] 0 1 1 0 1 0
[7,] 1 1 1 1 0 0
[8,] 1 0 1 0 0 1
If I pick a random column, say 4, I want to swap two positions in that column. One such possibility is swapping 5th and 6th position is given by
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 0 1 1
[6,] 0 1 1 1 1 0
[7,] 1 1 1 1 0 0
[8,] 1 0 1 0 0 1
I want to do this for every possible swap in each column and then for all columns to obtain all the possible matrices.

Here's another solution:
# Return all unique permutations for c(0,0,0,0,1,1,1,1)
library(gtools)
perms = unique(permutations(8, 8, M[,1], set = FALSE))
# Create nested list
Mat_list = lapply(vector("list", ncol(M)), function(x) vector("list", nrow(perms)))
# Loop through every column and every permutations replacing each column
# with each unique permutation one at a time
for(ii in 1:ncol(M)){
for(jj in 1:nrow(perms)){
New_Mat = M
New_Mat[,ii] = perms[jj,]
Mat_list[[ii]][[jj]] = New_Mat
}
}
Result:
> Mat_list[[1]][[2]]
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 1 0 1 0 1
[3,] 1 1 0 0 1 1
[4,] 1 0 0 1 1 0
[5,] 0 0 1 1 1 1
[6,] 1 1 1 0 1 0
[7,] 0 1 1 1 0 0
[8,] 1 0 1 0 0 1
Note:
Instead of creating a super long list, I've created a nested list of matrices with 8 elements and n sub-elements per element (where n is the number of unique permutations). You can unlist the result if you prefer the long list form.

This code gives every permutation of 0s and 1s by column. I used a smaller toy example here, because the number of possibilities can get very large -- prod(choose(nrow(M), colSums(M))). As a note, this will likely not run on a standard computer for the matrix given, because of memory requirements.
library(gtools)
set.seed(1234)
M <- matrix(sample(0:1, 16, replace = TRUE), ncol = 4)
M
# [,1] [,2] [,3] [,4]
# [1,] 0 1 1 0
# [2,] 1 1 1 1
# [3,] 1 0 1 0
# [4,] 1 0 1 1
perm1s <- function(n, N) {
unique(permutations(N, N, c(rep(0, N - n), rep(1, n)), FALSE, FALSE))
}
createMat <- function(vec, lst) {
tmp <- lapply(seq_along(vec), function(x) lst[[x]][vec[x], ])
do.call(cbind, tmp)
}
makeMats <- function(M) {
sums <- colSums(M)
rows <- nrow(M)
rowPerm <- lapply(sums, perm1s, N = rows)
comb <- expand.grid(lapply(sapply(rowPerm, nrow), seq))
comb <- lapply(split(comb, seq(nrow(comb))), unlist)
mats <- lapply(comb, createMat, lst = rowPerm)
mats
}
res <- makeMats(M)
res[[1]]
# [,1] [,2] [,3] [,4]
# [1,] 0 0 1 0
# [2,] 1 0 1 0
# [3,] 1 1 1 1
# [4,] 1 1 1 1
To hold other columns constant when varying 1 column -- sum(choose(nrow(M), colSums(M))) possibilities:
makeMats2 <- function(M) {
sums <- colSums(M)
rows <- nrow(M)
rowPerm <- lapply(sums, perm1s, N = rows)
ind <- rep(seq_along(rowPerm), sapply(rowPerm, nrow))
rowPerm <- lapply(rowPerm, function(x) split(x, seq(nrow(x))))
rowPerm <- unlist(rowPerm, recursive = FALSE)
mats <- rep(list(M), length(rowPerm))
mats <- mapply(function(x, y, z) {x[ , y] <- z; x},
x = mats, y = ind, z = rowPerm, SIMPLIFY = FALSE)
mats
}

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Convert frequency vector to logical matrix - r

set.seed(523) s <- c(1, 2, 3) n <- 6 sapply(s, function(i) sample(c(rep(1, i), rep(0, n - i)))) # [,1] [,2] [,3] # [1,] 0 1 1 # [2,] 1 0 0 # [3,] 0 1 0 # [4,] 0 0 1 # [5,] 0 0 0 # [6,] 0 0 1

# Input: s <- c(1,2,3) # ... set.seed(1) # For reproducibility nr <- sum(s) nc <- length(s) mat <- matrix(0L, nrow = nr, ncol = nc) mat[cbind(seq_len(nr), sample(rep(seq_len(nc), s)))] <- 1L # Output: [,1] [,2] [,3] [1,] 1 0 0 [2,] 0 0 1 [3,] 0 1 0 [4,] 0 0 1 [5,] 0 1 0 [6,] 0 0 1

Related

Copying the diagonal of a dataframe/matrix in the first row, by group

How to replace a spot of cells by surrounding cells value in a matrix (R)

How to pad vectors and matrices with 0s

Using objects inside list as function arguments in lapply

Obtain all possible matrices by swapping only two positions in any given column

Categories

Resources