Turn numeric vector into boolean matrix [duplicate] - r

This question already has answers here:
Generate a dummy-variable
(17 answers)
Closed 5 years ago.
I have a column vector in a dataframe and would like to turn it into a binary matrix so I can do matrix multiplication with it later on.
y_labels
1
4
4
3
desired output
1 0 0 0
0 0 0 1
0 0 0 1
0 0 1 0
In Octave I would do something like y_matrix = (y_labels == [1 2 3 4]). However, I can't figure out how to get this in R. Anybody know how?

We can use model.matrix to change it to binary
model.matrix(~ -1 + factor(y_labels, levels = 1:4), df1)
or with table
with(df1, table(1:nrow(df1), factor(y_labels, levels = 1:4)))
# 1 2 3 4
# 1 1 0 0 0
# 2 0 0 0 1
# 3 0 0 0 1
# 4 0 0 1 0
Or more compactly
+(sapply(1:4, `==`, df1$y_labels))
# [,1] [,2] [,3] [,4]
#[1,] 1 0 0 0
#[2,] 0 0 0 1
#[3,] 0 0 0 1
#[4,] 0 0 1 0

Here's another option:
Start by creating a matrix of zeros:
m <- matrix(0, nrow = nrow(df), ncol = max(df$y_labels))
Then insert 1s at the correct positions:
m[col(m) == df$y_labels] <- 1
The result is:
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
[2,] 0 0 0 1
[3,] 0 0 0 1
[4,] 0 0 1 0

How about (where vec is your numeric vector):
m <- matrix(0, length(vec), max(vec))
m[cbind(seq_along(vec), vec)] <- 1
# [,1] [,2] [,3] [,4]
#[1,] 1 0 0 0
#[2,] 0 0 0 1
#[3,] 0 0 0 1
#[4,] 0 0 1 0

In base R:
df1 <- data.frame(y_labels = c(1,4,4,3))
t(sapply(df1$y_labels,function(x) c(rep(0,x-1),1,rep(0,max(df1$y_labels)-x))))
or
t(sapply(df1$y_labels,function(x) `[<-`(numeric(max(df1$y_labels)),x,1)))
output:
# [,1] [,2] [,3] [,4]
# [1,] 1 0 0 0
# [2,] 0 0 0 1
# [3,] 0 0 0 1
# [4,] 0 0 1 0

Related

Creating a specific matrix in R

I want to create the following matrix
A <- matrix(0,n,n)
for(i in 1:n){
for(j in 1:n){
if(abs(i - j) == 1) A1[i,j] <- 1
}
}
Is there another way to create such a matrix? I just want to avoid using for-loop.
A simple option is using outer + abs
> +(abs(outer(1:n,1:n,`-`))==1)
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 0 1 0 0 0 0 0
[2,] 1 0 1 0 0 0 0
[3,] 0 1 0 1 0 0 0
[4,] 0 0 1 0 1 0 0
[5,] 0 0 0 1 0 1 0
[6,] 0 0 0 0 1 0 1
[7,] 0 0 0 0 0 1 0
where n <- 7
Create a matrix with 0 values
Subtract row index with column index.
Replace values in matrix with 1 where the difference is 1 or -1
n <- 5
A <- matrix(0,n,n)
inds <- row(A) - col(A)
A[abs(inds) == 1] <- 1
A
# [,1] [,2] [,3] [,4] [,5]
#[1,] 0 1 0 0 0
#[2,] 1 0 1 0 0
#[3,] 0 1 0 1 0
#[4,] 0 0 1 0 1
#[5,] 0 0 0 1 0
where row(A) - col(A) (inds) returns :
inds
# [,1] [,2] [,3] [,4] [,5]
#[1,] 0 -1 -2 -3 -4
#[2,] 1 0 -1 -2 -3
#[3,] 2 1 0 -1 -2
#[4,] 3 2 1 0 -1
#[5,] 4 3 2 1 0
Using tidyverse - crossing to get the combinations of sequence, then
get the absolute difference (-) between the columns, check if it is equal to 1, and reshape from 'long' to 'wide' with pivot_wider
library(dplyr)
library(tidyr)
crossing(n1 = 1:n, n2 = 1:n) %>%
mutate(new = +(abs((n1 - n2)) == 1)) %>%
pivot_wider(names_from = n2, values_from = new)
-output
# A tibble: 5 x 6
# n1 `1` `2` `3` `4` `5`
# <int> <int> <int> <int> <int> <int>
#1 1 0 1 0 0 0
#2 2 1 0 1 0 0
#3 3 0 1 0 1 0
#4 4 0 0 1 0 1
#5 5 0 0 0 1 0
Or another option with diag from base R
+(abs(row(diag(n)) - col(diag(n))) == 1)
-output
# [,1] [,2] [,3] [,4] [,5]
#[1,] 0 1 0 0 0
#[2,] 1 0 1 0 0
#[3,] 0 1 0 1 0
#[4,] 0 0 1 0 1
#[5,] 0 0 0 1 0
You can reduce the amount of code by using the function stats::toeplitz, which follows the idea in the answer by Ronak Shah.
f1 <- function(n)
{
A <- matrix(0,n,n)
inds <- row(A) - col(A)
A[abs(inds) == 1] <- 1
A
}
n <- 10
A1 <- f1(n)
A2 <- toeplitz(c(0,1,rep(0,n-2)))
all.equal(A1, A2)
#[1] TRUE

R: Matrix Combination with specific number of values

I want to make all combinations of my Matrix.
Ex. a binary 5 X 5 matrix where I only have two 1 rows (see below)
Com 1:
1 1 0 0 0
1 1 0 0 0
1 1 0 0 0
1 1 0 0 0
1 1 0 0 0
Com 2:
1 0 1 0 0
1 1 0 0 0
1 1 0 0 0
1 1 0 0 0
1 1 0 0 0
.
.
.
Com ?:
0 0 0 1 1
0 0 0 1 1
0 0 0 1 1
0 0 0 1 1
0 0 0 1 1
I tried using Combination package in R, but couldn't find a solution.
Using RcppAlgos (I am the author) we can accomplish this with 2 calls. It's quite fast as well:
library(tictoc)
library(RcppAlgos)
tic("RcppAlgos solution")
## First we generate the permutations of the multiset c(1, 1, 0, 0, 0)
binPerms <- permuteGeneral(1:0, 5, freqs = c(2, 3))
## Now we generate the permutations with repetition choose 5
## and select the rows from binPerms above
allMatrices <- permuteGeneral(1:nrow(binPerms), 5,
repetition = TRUE,
FUN = function(x) {
binPerms[x, ]
})
toc()
RcppAlgos solution: 0.108 sec elapsed
Here is the output:
allMatrices[1:3]
[[1]]
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 0 0 0
[2,] 1 1 0 0 0
[3,] 1 1 0 0 0
[4,] 1 1 0 0 0
[5,] 1 1 0 0 0
[[2]]
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 0 0 0
[2,] 1 1 0 0 0
[3,] 1 1 0 0 0
[4,] 1 1 0 0 0
[5,] 1 0 1 0 0
[[3]]
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 0 0 0
[2,] 1 1 0 0 0
[3,] 1 1 0 0 0
[4,] 1 1 0 0 0
[5,] 1 0 0 1 0
len <- length(allMatrices)
len
[1] 100000
allMatrices[(len - 2):len]
[[1]]
[,1] [,2] [,3] [,4] [,5]
[1,] 0 0 0 1 1
[2,] 0 0 0 1 1
[3,] 0 0 0 1 1
[4,] 0 0 0 1 1
[5,] 0 0 1 1 0
[[2]]
[,1] [,2] [,3] [,4] [,5]
[1,] 0 0 0 1 1
[2,] 0 0 0 1 1
[3,] 0 0 0 1 1
[4,] 0 0 0 1 1
[5,] 0 0 1 0 1
[[3]]
[,1] [,2] [,3] [,4] [,5]
[1,] 0 0 0 1 1
[2,] 0 0 0 1 1
[3,] 0 0 0 1 1
[4,] 0 0 0 1 1
[5,] 0 0 0 1 1
The code I've written below worked for me. A list of 100,000 5x5 matrices. Each of the rows has two places set to 1.
n <- 5 # No of columns
k <- 2 # No. of ones
m <- 5 # No of rows in matrix
nck <- combn(1:n,k,simplify = F)
possible_rows <-lapply(nck,function(x){
arr <- numeric(n)
arr[x] <- 1
matrix(arr,nrow=1)
})
mat_list <- possible_rows
for(i in 1:(m-1)){
list_of_lists <- lapply(mat_list,function(x){
lapply(possible_rows,function(y){
rbind(x,y)
})
})
mat_list <- Reduce(c,list_of_lists)
print(c(i,length(mat_list)))
}

R: List of indices, including empty ones, to binary matrix

Say I have a list of indices, including elements that are empty, like:
l <- list(c(1,2,3), c(1), c(1,5), NULL, c(2, 3, 5))
Which specify the non-zero elements in a matrix, like:
(m <- matrix(c(1,1,1,0,0, 1,0,0,0,0, 1,0,0,0,5, 0,0,0,0,0, 0,1,1,0,1), nrow=5, byrow=TRUE))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 0 0
[2,] 1 0 0 0 0
[3,] 1 0 0 0 5
[4,] 0 0 0 0 0
[5,] 0 1 1 0 1
What is the fastest way, using R, to make m from l, giving that the matrix is very big, say 50.000 rows and 2000 columns?
You can Filter the non-NULL list elements from 'l' and use melt to reshape it to 'data.frame' with 'key/value' columns or `row/column' index columns.
library(reshape2)
d2 <- melt(Filter(Negate(is.null), setNames(l, seq_along(l))))
Un1 <- unlist(l)
m1 <- matrix(0, nrow=length(l), ncol=max(Un1))
m1[cbind(as.numeric(d2[,2]), d2[,1])] <- 1
m1
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 1 1 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 1
#[4,] 0 0 0 0 0
#[5,] 0 1 1 0 1
Or
library(Matrix)
as.matrix(sparseMatrix(as.numeric(d2[,2]), d2[,1], x=1))
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 1 1 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 1
#[4,] 0 0 0 0 0
#[5,] 0 1 1 0 1
You can do:
do.call(rbind, lapply(l, function(x) (1:max(unlist(l)) %in% x)+0L))
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 1 1 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 1
#[4,] 0 0 0 0 0
#[5,] 0 1 1 0 1
Even if akrun solution should be faster!

Generate large matrix filled with 0's or 1's in R

In R language, I am trying to generate a large matrix filled with 0's and 1's.
I have generated a large matrix but its filled with values between 0 and 1.
Here is how I did that:
NCols=500
NRows=700
mr<-matrix(runif(NCols*NRows), ncol=NCols)
I think you are asking how to generate a matrix with just zero and 1
Here is how I would do it
onezero <- function(nrow,ncol)
matrix(sample(c(0,1), replace=T, size=nrow*ncol), nrow=nrow)
With nrow and ncol the rows and columns of the matrix
R> onezero(5,5)
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 0 1 0
[2,] 1 1 1 1 0
[3,] 1 1 0 0 0
[4,] 1 0 0 1 0
[5,] 0 0 0 0 0
You can use rbinomtoo. And can change the probability of success on each trial. In this case, it's .5.
nrow<-700
ncol<-500
mat01 <- matrix(rbinom(nrow*ncol,1,.5),nrow,ncol)
> number.of.columns = 5
> number.of.rows = 10
> matrix.size = number.of.columns*number.of.rows
> ones.and.zeros.samples = sample(0:1, matrix.size, replace=TRUE)
> A = matrix(ones.and.zeros.samples, number.of.rows)
> A
[,1] [,2] [,3] [,4] [,5]
[1,] 0 0 0 1 1
[2,] 1 0 0 0 1
[3,] 0 1 1 0 0
[4,] 0 0 1 1 1
[5,] 1 0 1 1 0
[6,] 0 1 0 1 1
[7,] 0 0 1 1 0
[8,] 0 1 0 0 0
[9,] 0 0 0 0 0
[10,] 0 0 0 1 1

Convert a string into a similarity matrix

I have number of strings in an idiosyncratic format, representing sets. In R, I'd like to convert them into a similarity matrix.
For example, a string showing that 1+2 comprise a set, 3 is alone in a set, and 4,5, and 6 comprise a set is:
"1+2,3,4+5+6"
For the example above, I'd like to be able to produce
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 0 0 0 0
[2,] 1 1 0 0 0 0
[3,] 0 0 1 0 0 0
[4,] 0 0 0 1 1 1
[5,] 0 0 0 1 1 1
[6,] 0 0 0 1 1 1
It seems like this should be a painfully simple task. How would I go about it?
Here's an approach:
out <- lapply(unlist(strsplit("1+2,3,4+5+6", ",")), function(x) {
as.numeric(unlist(strsplit(x, "\\+")))
})
x <- table(unlist(out), rep(seq_along(out), sapply(out, length)))
matrix(x %*% t(x), nrow(x))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 1 0 0 0 0
## [2,] 1 1 0 0 0 0
## [3,] 0 0 1 0 0 0
## [4,] 0 0 0 1 1 1
## [5,] 0 0 0 1 1 1
## [6,] 0 0 0 1 1 1
Pseudocode:
Split at , to get an array of strings, each describing a set.
For each element of the array:
Split at + to get an array of set members
Mark every possible pairing of members of this set on the matrix
You can create a matrix in R with:
m = mat.or.vec(6, 6)
By default, the matrix should initialize with all entries 0. You can assign new values with:
m[2,3] = 1
Here's another approach:
# write a simple function
similarity <- function(string){
sets <- gsub("\\+", ":", strsplit(string, ",")[[1]])
n <- as.numeric(tail(strsplit(gsub("[[:punct:]]", "", string), "")[[1]], 1))
mat <- mat.or.vec(n, n)
ind <- suppressWarnings(lapply(sets, function(x) eval(parse(text=x))))
for(i in 1:length(ind)){
mat[ind[[i]], ind[[i]]] <- 1
}
return(mat)
}
# Use that function
> similarity("1+2,3,4+5+6")
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 0 0 0 0
[2,] 1 1 0 0 0 0
[3,] 0 0 1 0 0 0
[4,] 0 0 0 1 1 1
[5,] 0 0 0 1 1 1
[6,] 0 0 0 1 1 1
# Using other string
> similarity("1+2,3,5+6+7, 8")
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 0 0 0 0 0 0
[2,] 1 1 0 0 0 0 0 0
[3,] 0 0 1 0 0 0 0 0
[4,] 0 0 0 0 0 0 0 0
[5,] 0 0 0 0 1 1 1 0
[6,] 0 0 0 0 1 1 1 0
[7,] 0 0 0 0 1 1 1 0
[8,] 0 0 0 0 0 0 0 1

Resources