Select columns which contain a certain number in matrix - r

I have a matrix A:
1 0 2
1 3 1
I want to choose all columns which contain the number 1. My desired output:
1 2
1 1

DATA
A = matrix(c(1, 1, 0, 3, 2, 1), nrow = 2)
A
# [,1] [,2] [,3]
#[1,] 1 0 2
#[2,] 1 3 1
1
A[,colSums(A == 1, na.rm = TRUE) > 0, drop = FALSE]
# [,1] [,2]
#[1,] 1 2
#[2,] 1 1
2
A[,apply(A, 2, function(x) any(x == 1)), drop = FALSE]
# [,1] [,2]
#[1,] 1 2
#[2,] 1 1

Related

Converting a list of lists into a data frame

I have a function that first generates a list of vectors (generated by using lapply), and then cbinds it to a column vector. I thought this would produce a dataframe. However, it produces a list of lists.
The cbind function isn't working as I thought it would.
Here's a small example of what the function is generating
col_test <- c(1, 2, 1, 1, 2)
lst_test <- list(c(1, 2 , 3), c(2, 2, 2), c(1, 1, 2), c(1, 2, 2), c(1, 1, 1))
a_df <- cbind(col_test, lst_test)
Typing
> a_df[1,]
gives the output
$`col_test`
[1] 1
$lst_test
[1] 1 2 3
I'd like the data frame to be
[,1] [,2] [,3] [,4]
[1,] 1 1 2 3
[2,] 2 2 2 2
[3,] 1 1 1 2
[4,] 1 1 2 2
[5,] 2 1 1 1
How do I get it into this form?
data.frame(col_test,t(as.data.frame(lst_test)))
do.call(rbind, Map(c, col_test, lst_test))
# [,1] [,2] [,3] [,4]
#[1,] 1 1 2 3
#[2,] 2 2 2 2
#[3,] 1 1 1 2
#[4,] 1 1 2 2
#[5,] 2 1 1 1
col_test <- c(1, 2, 1, 1, 2)
lst_test <- list(c(1, 2 , 3), c(2, 2, 2), c(1, 1, 2), c(1, 2, 2), c(1, 1, 1))
name the sublists so we can use bind_rows
names(lst_test) <- 1:length(lst_test)
lst_test1 <- bind_rows(lst_test)
the bind_rows function binds by cols in this case so we need to pivot it
lst_test_pivot <- t(lst_test1)
but this gives us a matrix, so we need to cast it back to a dataframe
lst_test_pivot_df <- as.data.frame(lst_test_pivot)
now it works as
cbind(col_test, lst_test_pivot_df)
now produces
col_test V1 V2 V3
1 1 1 2 3
2 2 2 2 2
3 1 1 1 2
4 1 1 2 2
5 2 1 1 1
This should do the trick. Note that we are using do.call so that the individual elements of lst_test are sent as parameters to cbind, which prevents cbind from creating a list-of-lists. t is used to transpose the resulting matrix to your preferred orientation, and finally, one more cbind with col_test inserts that data as well.
library(tidyverse)
mat.new <- do.call(cbind, lst_test) %>%
t %>%
cbind(col_test, .) %>%
unname
[,1] [,2] [,3] [,4]
[1,] 1 1 2 3
[2,] 2 2 2 2
[3,] 1 1 1 2
[4,] 1 1 2 2
[5,] 2 1 1 1

splitting list elements expanding the list

I'm doing some kind of optical character recognition and face the following issue. I store the glyphs in a list of binary matrices and they can be of different size, but their maximum possible width is wid = 3 columns (may be any defined constant, not just 3). In some cases after the first stage of processing I get data which look like this:
myll <- list(matrix(c(0, 0, 0, 1, 1, 0), ncol = 2),
matrix(c(0), ncol = 1),
matrix(c(1, 1, 0), ncol = 3),
matrix(c(1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), ncol = 7),
matrix(c(1, 1, 1, 1), ncol = 2))
# [[1]]
# [,1] [,2]
# [1,] 0 1
# [2,] 0 1
# [3,] 0 0
#
# [[2]]
# [,1]
# [1,] 0
#
# [[3]]
# [,1] [,2] [,3]
# [1,] 1 1 0
#
# [[4]]
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# [1,] 1 1 1 0 0 0 1
# [2,] 0 1 0 1 0 0 1
# [3,] 1 1 1 1 0 0 1
#
# [[5]]
# [,1] [,2]
# [1,] 1 1
# [2,] 1 1
So, some glyphs may be not separated for some reasons. This happens only with glyphs of maximum possible width. Moreover, there may be some junk at the end of the matrix. I have to split them into matrices of width ncol = wid leaving the last piece (junk) as is. Then I store this matrices in separate elements of list to get the following output:
# [[1]]
# [,1] [,2]
# [1,] 0 1
# [2,] 0 1
# [3,] 0 0
#
# [[2]]
# [,1]
# [1,] 0
#
# [[3]]
# [,1] [,2] [,3]
# [1,] 1 1 0
#
# [[4]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 0 1 0
# [3,] 1 1 1
#
# [[5]]
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 1 0 0
# [3,] 1 0 0
#
# [[6]]
# [,1]
# [1,] 1
# [2,] 1
# [3,] 1
#
# [[7]]
# [,1] [,2]
# [1,] 1 1
# [2,] 1 1
At the moment I can make it with the help of this functions
checkGlyphs <- function(gl_m, wid = 3) {
if (ncol(gl_m) > wid)
return(list(gl_m[,1:wid], matrix(gl_m[,-(1:wid)], nrow = nrow(gl_m)))) else
return(gl_m)
}
separateGlyphs <- function(myll, wid = 3) {
require("magrittr")
presplit <- lapply(myll, checkGlyphs, wid)
total_new_length <-
presplit[unlist(lapply(presplit, is.list))] %>% lapply(length) %>% unlist() %>% sum() +
as.integer(!unlist(lapply(presplit, is.list))) %>% sum()
splitted <- vector("list", length = total_new_length)
spl_index <- 1
for (i in 1:length(presplit))
{
if (!is.list(presplit[[i]]))
{
splitted[[spl_index]] <- presplit[[i]]
spl_index <- spl_index + 1
} else
{
for (j in 1:length(presplit[[i]]))
{
splitted[[spl_index]] <- presplit[[i]][[j]]
spl_index <- spl_index + 1
}
}
}
if (any(lapply(splitted, ncol) > wid)) return(separateGlyphs(splitted, wid)) else
return(splitted)
}
But I believe there is more fast and convenient way to achieve the same result (without using for loops and this enlooped reassignment of elements and then recursion if needed O_o).
I will be thankful for any suggestions on the point or, alternatively, for recommending some OCR-packages for R.
This should do the trick, with the values in final being what you're after.
combined <- do.call(cbind, lapply(myll, unlist))
idx <- seq(1, ncol(combined), 2)
final <- do.call(list, lapply(idx, function(x) combined[, x:(x+1)]))

R mean values of a sum of Matrices without divide for points equal zero

A = matrix(c(1,2,3, 0, 2, 2, 0,2 ,3), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 2 2 2
[3,] 3 2 3
B = matrix(c(1,2,3, 1, 4, 2, 2,2 ,1), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] 1 1 2
[2,] 2 4 2
[3,] 3 2 1
C = A + B /(Sum numbers diff of zero)
C = matrix(c(1,2,3, 1, 3, 2, 2,2 ,2), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] 1 1 2
[2,] 2 3 2
[3,] 3 2 2
I need do it for a list of N matrices (mat_vect[[i]]):
list_mat_vect[[i]] <- assign(paste("a", i, sep = ""), mat_vect[[i]])
Sum matrix and get mean value
mat_sum_mean = Reduce("+", list_mat_vect) / length(file_list)
Here is dividing for all numbers, including the zeros. I dont want that.
You can do
(A+B)/((A!=0) + (B!=0))
to get
[,1] [,2] [,3]
[1,] 1 1 2
[2,] 2 3 2
[3,] 3 2 2
Here != tests for equality with zero returning TRUE or FALSE. When we add those up, the TRUEs are treated like 1 and the FALSEs become 0.
You can do this with a list of matrices as well
list_mat_vect<-list(A,B)
Reduce("+", list_mat_vect) / Reduce("+", lapply(list_mat_vect, function(x) x!=0))

Subset selection from binary matrix with dynamic column indices

A few questions, for which the R language might have elegant solutions....
Given, a matrix m containing binary values 1 and 0, and a vector v of column indices
how would I write a function to extract the all rows in m that have
the value of 1 in each of the columns indexed by the integers in v?
as an extra feature, how would one return the row indices along with
the corresponding rows?
Probably best if I illustrating, with an example....
Assuming the logic I'm asking for resides in function selectByIndices( matrix, indexVector).
so if we have the matrix (or perhaps the equivalent dataframe):
>(m= matrix(c( 1, 0, 1, 1, 1,0, 1, 1, 0, 1,1, 0, 1, 1, 0,1, 1, 1,
0, 1,0, 1, 0, 0, 1), 5))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 0 1 1 0
[2,] 0 1 0 1 1
[3,] 1 1 1 1 0
[4,] 1 0 1 0 0
[5,] 1 1 0 1 1
and index vectors:
>c1 = c(1,3,4)
>c2 = c(4,5)
>c3 = c(1,3,5)
The function would behave something like this:
>selectByIndices( m, c1)
[,1] [,2] [,3] [,4] [,5]
[1,] 1 0 1 1 0
[3,] 1 1 1 1 0
>selectByIndices( m, c2)
[,1] [,2] [,3] [,4] [,5]
[2,] 0 1 0 1 1
[5,] 1 1 0 1 1
>selectByIndices( m, c3)
#no rows (i.e. empty collection) returned
Hoping it's clear enough, thanks in advance for your help.
## Create a function that extracts the qualifying rows
f <- function(m, j) {
m[rowSums(m[, j]) == length(j),]
# m[apply(m[, j], 1, function(X) all(X==1)),] ## This would also work
# which(rowSums(m[, j]) == length(j)) ## & this would get row indices
}
## Try it out
f(m, c1)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 0 1 1 0
# [2,] 1 1 1 1 0
f(m, c2)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 0 1 1
# [2,] 1 1 0 1 1
> selectRows <- function(mat, rown) suppressWarnings(mat[apply( mat[, rown], 1, all) , ])
> selectRows(m, c1)
[,1] [,2] [,3] [,4] [,5]
[1,] 1 0 1 1 0
[2,] 1 1 1 1 0
> whichRows <-function(mat, rown) suppressWarnings( which( apply( mat[, rown], 1, all) ) )
> whichRows(m, c1)
[1] 1 3

Filtering out rows in a matrix containing only 0 in R

I have the matrix
m <- matrix(c(1, 0, 3, 4, 0, 6), 3)
I need to filter out rows where both columns have a value of 0 in effect returning the matrix:
m <- matrix(c(1, 3, 4, 6), 3)
I have tried
m[m[, 1] > 0 & m[, 2] > 0]
but this returns a vector instead of a matrix stripped of rows with only 0. This should be simple but I am stuck.
Thanks,
-Elizabeth
In case you had many columns
m
[,1] [,2]
[1,] 1 4
[2,] 0 0
[3,] 3 6
m^2
[,1] [,2]
[1,] 1 16
[2,] 0 0
[3,] 9 36
rowSums(m^2)
[1] 17 0 45
m[rowSums(m^2)>0,]
[,1] [,2]
[1,] 1 4
[2,] 3 6
You are just missing a "," in your own solution.
Use
m[m[,1]>0 & m[,2]>0,]
and it will work:
> m[m[,1]>0 & m[,2]>0,]
[,1] [,2]
[1,] 1 4
[2,] 3 6

Resources