Count the number of `TRUE` in a list - r

I have a list as such
$`1`
[1] TRUE
$`5`
[1] TRUE
$`14`
[1] FALSE
$`17`
[1] TRUE
$`19`
[1] TRUE
$`20`
[1] TRUE
Is there an easy way to count the total number of TRUE values in the list?
I tried doing this trucount <- function(z){sum(z,na.rm = TRUE)} , but it doesn't work.
In the above example, the solution would return 5

You can use isTRUE():
> ll = list(`1`=TRUE, `5`=TRUE, `14`=TRUE, `17`=TRUE, `19`=TRUE, `20`=TRUE, `21`=FALSE)
> length(which(sapply(ll, isTRUE)))
[1] 6

Related

R - find cases in list of lists which meet specific condition

I have a large list of lists with three columns (TRUE/FALSE) per case. I want to find out for which cases all three columns = TRUE.
Example data:
l1 <- list( c("TRUE","FALSE","FALSE") , c("FALSE","FALSE","FALSE") , c("FALSE","FALSE","FALSE") )
l2 <- list( c("TRUE","TRUE","TRUE") , c("TRUE","TRUE","TRUE") , c("FALSE","FALSE","FALSE") )
l3 <- list( c("TRUE","TRUE","TRUE") , c("FALSE","FALSE","FALSE") , c("TRUE","FALSE","FALSE") )
mylist <- list(l1,l2,l3)
In the output I need to see which cases in which lists meet the condition, so something like
l2[[1]]
l3[[1]]
Hope that someone can help! Thank you so much in advance!
With rapply:
rapply(mylist, \(x) all(x == "TRUE"), how = "list")
output
[[1]]
[[1]][[1]]
[1] FALSE
[[1]][[2]]
[1] FALSE
[[1]][[3]]
[1] FALSE
[[2]]
[[2]][[1]]
[1] TRUE
[[2]][[2]]
[1] TRUE
[[2]][[3]]
[1] FALSE
[[3]]
[[3]][[1]]
[1] TRUE
[[3]][[2]]
[1] FALSE
[[3]][[3]]
[1] FALSE
Or, if you want a more compact result:
rapply(mylist, \(x) all(x == "TRUE"), how = "list") |>
lapply(\(x) which(unlist(x)))
[[1]]
integer(0)
[[2]]
[1] 1 2
[[3]]
[1] 1
Another compact solution with rrapply::rrapply:
rrapply::rrapply(mylist, \(x) all(x == "TRUE"), how = "melt")
L1 L2 value
1 2 1 TRUE, TRUE, TRUE
2 2 2 TRUE, TRUE, TRUE
3 3 1 TRUE, TRUE, TRUE
Note: you probably have logical vector in your real data, which is made of c(TRUE, FALSE) (without the brackets). In that case, all(x), is sufficient.
Perhaps this helps
which(sapply(mylist, \(x) sapply(x, \(y) all(y == 'TRUE'))), arr.ind = TRUE)
or use
lapply(mylist, \(x) Filter(\(y) all(y == 'TRUE'), x))
[[1]]
list()
[[2]]
[[2]][[1]]
[1] "TRUE" "TRUE" "TRUE"
[[2]][[2]]
[1] "TRUE" "TRUE" "TRUE"
[[3]]
[[3]][[1]]
[1] "TRUE" "TRUE" "TRUE"
Or may be
> lapply(mylist, \(x) lapply(x, \(y) all(y == 'TRUE')))
[[1]]
[[1]][[1]]
[1] FALSE
[[1]][[2]]
[1] FALSE
[[1]][[3]]
[1] FALSE
[[2]]
[[2]][[1]]
[1] TRUE
[[2]][[2]]
[1] TRUE
[[2]][[3]]
[1] FALSE
[[3]]
[[3]][[1]]
[1] TRUE
[[3]][[2]]
[1] FALSE
[[3]][[3]]
[1] FALSE
Here is another trick using lapply
lapply(
mylist,
function(x) {
as.list(
colMeans(type.convert(list2DF(x), as.is = TRUE)) == 1
)
}
)
which gives
[[1]]
[[1]][[1]]
[1] FALSE
[[1]][[2]]
[1] FALSE
[[1]][[3]]
[1] FALSE
[[2]]
[[2]][[1]]
[1] TRUE
[[2]][[2]]
[1] TRUE
[[2]][[3]]
[1] FALSE
[[3]]
[[3]][[1]]
[1] TRUE
[[3]][[2]]
[1] FALSE
[[3]][[3]]
[1] FALSE

Identifying patterns in two strings in R

I want to evaluate if ColA contains a new string than ColB. However, I am not interested in certain types of string, for example, oil. I would like to have an indicator variable as follow:
ColA ColB Ind
-------------------------- ------------------------ -----
coconut+grape+pine grape+coconut TRUE
orange+apple+grape+pine grape+coconut TRUE
grape+pine grape+oil TRUE
oil+grape grape+apple FALSE
grape grape+oil FALSE
grape+pine grape+orange+pine FALSE
Any Suggestions using R?
Many thanks!
Since we need to split the strings, we'll start with strsplit,
strsplit(dat$ColA, '+', fixed = TRUE)
# [[1]]
# [1] "coconut" "grape" "pine"
# [[2]]
# [1] "orange" "apple" "grape" "pine"
# [[3]]
# [1] "grape" "pine"
# [[4]]
# [1] "oil" "grape"
# [[5]]
# [1] "grape"
# [[6]]
# [1] "grape" "pine"
From here, we want to determine what is in ColA that is not in ColB. I'll use Map to run setdiff on each set (ColA's [[1]] with ColB's [[1]], etc).
Map(setdiff, strsplit(dat$ColA, '+', fixed = TRUE), strsplit(dat$ColB, '+', fixed = TRUE))
# [[1]]
# [1] "pine"
# [[2]]
# [1] "orange" "apple" "pine"
# [[3]]
# [1] "pine"
# [[4]]
# [1] "oil"
# [[5]]
# character(0)
# [[6]]
# character(0)
To determine which one has "new words", we can just check for non-zero length using lengths(.) > 0:
lengths(Map(setdiff, strsplit(dat$ColA, '+', fixed = TRUE), strsplit(dat$ColB, '+', fixed = TRUE))) > 0
# [1] TRUE TRUE TRUE TRUE FALSE FALSE
But since you don't care about oil, we need to remove that as well.
lapply(Map(setdiff, strsplit(dat$ColA, '+', fixed = TRUE), strsplit(dat$ColB, '+', fixed = TRUE)), setdiff, "oil")
# [[1]]
# [1] "pine"
# [[2]]
# [1] "orange" "apple" "pine"
# [[3]]
# [1] "pine"
# [[4]]
# character(0)
# [[5]]
# character(0)
# [[6]]
# character(0)
lengths(lapply(Map(setdiff, strsplit(dat$ColA, '+', fixed = TRUE), strsplit(dat$ColB, '+', fixed = TRUE)),
setdiff, "oil")) > 0
# [1] TRUE TRUE TRUE FALSE FALSE FALSE
#akrun suggested a tidyverse variant:
library(dplyr)
library(purrr) # map2_lgl
library(stringr) # str_extract_all
dat %>%
mutate(
new = map2_lgl(
str_extract_all(ColB, "\\w+"), str_extract_all(ColA, "\\w+"),
~ !all(setdiff(.y, "oil") %in% .x)
)
)
# ColA ColB Ind new
# 1 coconut+grape+pine grape+coconut TRUE TRUE
# 2 orange+apple+grape+pine grape+coconut TRUE TRUE
# 3 grape+pine grape+oil TRUE TRUE
# 4 oil+grape grape+apple FALSE FALSE
# 5 grape grape+oil FALSE FALSE
# 6 grape+pine grape+orange+pine FALSE FALSE
Data
dat <- structure(list(ColA = c("coconut+grape+pine", "orange+apple+grape+pine", "grape+pine", "oil+grape", "grape", "grape+pine"), ColB = c("grape+coconut", "grape+coconut", "grape+oil", "grape+apple", "grape+oil", "grape+orange+pine"), Ind = c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA, -6L))
Here's a solution similar to r2evans's that calls strsplit only once with the help of do.call.
rid <- function(x) x[!x %in% z] ## helper FUN to get rid of the oil
z <- "oil"
L <- sapply(unname(dat), strsplit, "\\+")
dat$ind <- sapply(1:nrow(L), function(x) length(do.call(setdiff, rev(Map(rid, L[x,]))))) > 0
dat
# V1 V2 ind
# 1 grape+coconut coconut+grape+pine TRUE
# 2 grape+coconut orange+apple+grape+pine TRUE
# 3 grape+oil grape+pine TRUE
# 4 grape+apple oil+grape FALSE
# 5 grape+oil grape FALSE
# 6 grape+orange+pine grape+pine FALSE
Data:
dat <- structure(list(V1 = c("grape+coconut", "grape+coconut", "grape+oil",
"grape+apple", "grape+oil", "grape+orange+pine"), V2 = c("coconut+grape+pine",
"orange+apple+grape+pine", "grape+pine", "oil+grape", "grape",
"grape+pine")), row.names = c(NA, -6L), class = "data.frame")

get indices for grep in R

I am working on text mining using tm package. I have corpus, with 320 documents in it, I would like to search for a keyword in corpus contents, such that it should return document number, So I have written like
miningCases <- lapply(myCorpusCopy,function(x){grepl(as.character(x), pattern = "\\<mining")})
Here are the first 8 results, when I print miningCases
[[1]]
[1] TRUE
[[2]]
[1] FALSE
[[3]]
[1] FALSE
[[4]]
[1] FALSE
[[5]]
[1] FALSE
[[6]]
[1] FALSE
[[7]]
[1] TRUE
[[8]]
[1] FALSE
I want to get something like 1 7, such it found pattern in 1st and 7th document. Any way to do this?

How to use lapply to specific indices of a list?

When using lapply to apply a function to a list, how would I do this for every other four elements in the list?
For example if i have list of length 100, how would I apply my function to list[1], list[5], list[9], list[13] and so on?
Use lapply on the subset you're interested in.
> l <- as.list(1:15)
> lapply(l[seq(1,length(l),by=4)], identity)
[[1]]
[1] 1
[[2]]
[1] 5
[[3]]
[1] 9
[[4]]
[1] 13
Or, if you want to replace the values in the original list:
s <- seq(1,length(l),by=4)
l[s] <- lapply(l[s], function(x) x*2)
Or use (Using #Joshua Ulrich's data)
s1 <- (seq_along(l)-1)%%4+1==1
l[s1]
# [[1]]
#[1] 1
#[[2]]
#[1] 5
#[[3]]
#[1] 9
#[[4]]
#[1] 13
Or
s2 <- c(TRUE, rep(FALSE,3)) # The logical index will recycle to the length of `l`
It would do similar to the one below:
rep(s2, length.out=length(l))
# [1] TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
#[13] TRUE FALSE FALSE
l[s2]
# [[1]]
#[1] 1
#[[2]]
#[1] 5
#[[3]]
#[1] 9
#[[4]]
#[1] 13
and then use lapply

Comparing multi-dimensional lists in R

I have the following two lists:
First list:
[[1]]
[1] "ab" "iew" "rer" "fdd"
[[2]]
[1] "ff" "de
[[3]]
[1] "cc"
Second list:
[[1]]
[1] "iew" "vfr"
[[2]]
[1] "ff" "cdc"
[[3]]
[1] "vf" "cde"
My goal is to compare these two multi-dimensional lists, so that the result would be:
[[1]]
[1] FALSE TRUE FALSE FALSE
[[2]]
[1] TRUE FALSE
[[3]]
[1] FALSE
What is the best vectorized way to preform this intersect() function?
Here's an alternative using mapply
> mapply("%in%", Firt.list, Second.list)
[[1]]
[1] FALSE TRUE FALSE FALSE
[[2]]
[1] TRUE FALSE
[[3]]
[1] FALSE
Where First.list and Second.list are:
Firt.list <- list(c("ab", "iew", "rer", "fdd" ), c("ff", "de"), c("cc"))
Second.list <- list(c("iew", "vfr"), c("ff", "cdc"), c("vf", "cde"))
If you want to know which values are the intersects of the lists, then try this
> mapply("intersect", Firt.list, Second.list)
[[1]]
[1] "iew"
[[2]]
[1] "ff"
[[3]]
character(0)

Resources