r extract element from the list with for-loop - r

I have the following list of values:
$`1`
[1] "S21_027_1" "5_G3_A_1_counts.txt"
$`5`
[1] "S21_027_13" "5_G3_A_12_counts.txt"
$`9`
[1] "S21_027_17" "5_G3_A_15_counts.txt"
$`14`
[1] "S21_027_21" "5_G3_A_22_counts.txt"
$`18`
[1] "S21_027_25" "5_G3_A_26_counts.txt"
$`22`
[1] "S21_027_29" "5_G3_A_29_counts.txt"
I try to extract only stuff which starts with S21_027.
I tried to use for loop however it keeps just one element.
My attempt to extract it:
order_column <- c()
for (i in length(order_col))
{
v <- order_col[[i]][[1]]
print(v)
order_column <- c(v, order_column)
}

Using base R
lapply(order_col, grep, pattern = 'S21_027', value = TRUE)
[[1]]
[1] "S21_027_1"
[[2]]
[1] "S21_027_13"
[[3]]
[1] "S21_027_17"

Does this work:
lst <- list(c('S21_027_1','5_G3_A_1_counts.txt'),
c('S21_027_13','5_G3_A_12_counts.txt'),
c('S21_027_17','5_G3_A_15_counts.txt'))
sapply(lst, function(x) x[grepl('^S21_027', x)])
[1] "S21_027_1" "S21_027_13" "S21_027_17"

You may use -
library(purrr)
library(stringr)
map(order_col, str_subset, "S21_027")
#[[1]]
#[1] "S21_027_1"
#[[2]]
#[1] "S21_027_13"
#[[3]]
#[1] "S21_027_17"
Or to extract the 1st element -
map_chr(order_col, head, 1)
#[1] "S21_027_1" "S21_027_13" "S21_027_17"

Related

Iteratively extract repeated word forms across speaking turns

I'm working on speaking turns in conversation. My interest is in the words that get repeated from a prior turn to a next turn:
turnsX <- data.frame(
speaker = c("A","B","A","B"),
speech = c("let's have a look",
"yeah let's take a look",
"yeah okay so where to start",
"let's start here"), stringsAsFactors = F
)
I want to extract the repeated word forms. To this end I've run a for loop, iteratively defining each speech turn as a regex pattern for the next speech turn and str_extracting the words that get repeated from turn to turn:
library(stringr)
pattern <- c()
extracted <- c()
for(i in 1:nrow(turnsX)){
pattern[i] <- paste0(unlist(str_split(turnsX$speech[i], " ")), collapse = "|")
extracted[i+1] <- str_extract_all(turnsX$speech[i+1], pattern[i])
}
The result however is partly incorrect:
extracted
[[1]]
NULL
[[2]]
[1] "a" "let's" "a" "a" "look"
[[3]]
[1] "yeah" "a" "a"
[[4]]
[1] "start"
[[5]]
[1] NA
The correct result should be:
extracted
[[1]]
NULL
[[2]]
[1] "let's" "a" "look"
[[3]]
[1] "yeah"
[[4]]
[1] "start"
Where's the mistake? How can the code be mended, or what other approach is there, to get the correct result?
Maybe you can use Map and %in%.
x <- strsplit(turnsX$speech, " ")
Map(function(y,z) y[y %in% z], x[-length(x)], x[-1])
#[[1]]
#[1] "let's" "a" "look"
#
#[[2]]
#[1] "yeah"
#
#[[3]]
#[1] "start"
Here's a base R approach using Map :
tmp <- strsplit(turnsX$speech, ' ')
c(NA, Map(intersect, tmp[-1], tmp[-length(tmp)]))
#[[1]]
#[1] NA
#[[2]]
#[1] "let's" "a" "look"
#[[3]]
#[1] "yeah"
#[[4]]
#[1] "start"
You want the word boundaries "\\b"
library(stringr)
pattern <- c()
extracted <- c()
for(i in 2:nrow(turnsX)){
pattern[i - 1] <- paste0(unlist(str_split(turnsX$speech[i - 1], " ")), collapse = "|\\b")
extracted[i] <- str_extract_all(turnsX$speech[i], pattern[i - 1])
}
# [[1]]
# NULL
#
# [[2]]
# [1] "let's" "a" "look"
#
# [[3]]
# [1] "yeah"
#
# [[4]]
# [1] "start"

R - lapply several functions in one lapply by elements

In R, when I run two functions in lapply, it runs the first function on the entire list, then run the second function on the list. Is it possible to force it runs both functions on the first element on the list before moving onto the second element?
I am using the function print and nchar for illustration purpose -- I wrote more complex functions that generate data.frame.
lapply(c("a","bb","cdd"), function(x) {
print(x)
nchar(x)
})
the output would be
[1] "a"
[1] "bb"
[1] "cdd"
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
I would like to have something like this:
[[1]]
[1] "a"
[1] 1
[[2]]
[1] "bb"
[1] 2
[[3]]
[1] "cdd"
[1] 3
is this possible?
Juan Antonio Roladan Diaz and cash2 both suggested using list, which kind of works:
lapply(c("a","bb","cdd"), function(x) {
list(x, nchar(x))
})
[[1]]
[[1]][[1]]
[1] "a"
[[1]][[2]]
[1] 1
[[2]]
[[2]][[1]]
[1] "bb"
[[2]][[2]]
[1] 2
[[3]]
[[3]][[1]]
[1] "cdd"
[[3]][[2]]
[1] 3
But it is a bit too messy.
using print gives a better result,
lapply(c("a","bb","cdd"), function(x) {
print(x)
print(nchar(x))
})
[1] "a"
[1] 1
[1] "bb"
[1] 2
[1] "cdd"
[1] 3
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
but is there a way to suppress nchar from being print out again?
invisible(lapply(c("a","bb","cdd"), function(x) { print(x); print(nchar(x)) }))
This happens because the function prints x, then returns nchar(x); the returned elements are put into a list by lapply and returned, and printed out on the REPL.
Replace nchar(x) with print(nchar(x)). Or, if you want the list returned, just return list(x, nchar(x)) from the inner function.

Check list in list in R

I have 2 lists, I want to check if the second list in the first list, if yes, paste letters "a","b"... to each element in the first list
list1 <- list("Year","Age","Enrollment","SES","BOE")
list2 <- list("Year","Enrollment","SES")
I try to use lapply
text <- letters[1:length(list2)]
listText<- lapply(list1,function(i) ifelse(i %in% list2,paste(i,text[i],sep="^"),i))
I got wrong output
> listText
[[1]]
[1] "Year^NA"
[[2]]
[1] "Age"
[[3]]
[1] "Enrollment^NA"
[[4]]
[1] "SES^NA"
[[5]]
[1] "BOE"
This is the output I want
[[1]]
[1] "Year^a"
[[2]]
[1] "Age"
[[3]]
[1] "Enrollment^b"
[[4]]
[1] "SES^c"
[[5]]
[1] "BOE"
We can use match to find the index and then use it to subset the first list and paste the letters
i1 <- match(unlist(list2), unlist(list1))
list1[i1] <- paste(list1[i1], letters[seq(length(i1))], sep="^")
You just need change to :
text <- as.character(letters[1:length(list2)])
names(text) <- unlist(list2)
The result is :
> listText
[[1]]
[1] "Year^a"
[[2]]
[1] "Age"
[[3]]
[1] "Enrollment^b"
[[4]]
[1] "SES^c"
[[5]]
[1] "BOE"

Separate string into list in r

I have a string in R that looks like this:
"{[PP]}{[BGH]}{[AC]}{[ETL]}....{[D]}"
I want to convert it into a list so that:
List[[1]] = {[PP]}
List[[2]] = {[BGH]}
....
List[[N]] = {[D]}
If there were commas you could do strsplit but I want to keep the brackets and not get rid of them. Not sure how to do this in R
without regular expressions:
s <- "{[PP]}{[BGH]}{[AC]}{[ETL]}{[D]}"
as.list(paste("{", strsplit(s, "\\{")[[1]][-1], sep = ""))
[[1]]
[1] "{[PP]}"
[[2]]
[1] "{[BGH]}"
[[3]]
[1] "{[AC]}"
[[4]]
[1] "{[ETL]}"
[[5]]
[1] "{[D]}"
strsplit still works if you pass this regular expression (?<=})(?={) which constrains the position to split on:
strsplit(s, "(?<=})(?={)", perl = T)
# [[1]]
# [1] "{[PP]}" "{[BGH]}" "{[AC]}" "{[ETL]}" "{[D]}"
Or as #thelatemail suggested:
strsplit(s, "(?<=})", perl = T)
obligatory stringi answer:
library(stringi)
dat <- "{[PP]}{[BGH]}{[AC]}{[ETL]}{[more]{[D]}"
as.list(stri_match_all_regex(dat, "(\\{\\[[[:alpha:]]+\\]\\})")[[1]][,2])
## [[1]]
## [1] "{[PP]}"
##
## [[2]]
## [1] "{[BGH]}"
##
## [[3]]
## [1] "{[AC]}"
##
## [[4]]
## [1] "{[ETL]}"
##
## [[5]]
## [1] "{[D]}"
There is a convenient function in qdap for this i.e. bracketXtract
library(qdap)
setNames(as.list(bracketXtract(s, "curly", TRUE)), NULL)
#[[1]]
#[1] "{[PP]}"
#[[2]]
#[1] "{[BGH]}"
#[[3]]
#[1] "{[AC]}"
#[[4]]
#[1] "{[ETL]}"
#[[5]]
#[1] "{[D]}"
By default, with = FALSE. So without using with = TRUE, it will remove the bracket.
data
s <- "{[PP]}{[BGH]}{[AC]}{[ETL]}{[D]}"

Remove NULL elements from list of lists

How do I remove the null elements from a list of lists, like below, in R:
lll <- list(list(NULL),list(1),list("a"))
The object I want would look like:
lll <- list(list(1),list("a"))
I saw a similar answer here: How can I remove an element from a list? but was not able to extend it from simple lists to a list of lists.
EDIT
Bad example above on my part. Both answers work on simpler case (above). What if list is like:
lll <- list(list(NULL),list(1,2,3),list("a","b","c"))
How to get:
lll <- list(list(1,2,3),list("a","b","c"))
This recursive solution has the virtue of working on even more deeply nested lists.
It's closely modeled on Gabor Grothendieck's answer to this quite similar question. My modification of that code is needed if the function is to also remove objects like list(NULL) (not the same as NULL), as you are wanting.
## A helper function that tests whether an object is either NULL _or_
## a list of NULLs
is.NullOb <- function(x) is.null(x) | all(sapply(x, is.null))
## Recursively step down into list, removing all such objects
rmNullObs <- function(x) {
x <- Filter(Negate(is.NullOb), x)
lapply(x, function(x) if (is.list(x)) rmNullObs(x) else x)
}
rmNullObs(lll)
# [[1]]
# [[1]][[1]]
# [1] 1
#
#
# [[2]]
# [[2]][[1]]
# [1] "a"
Here is an example of its application to a more deeply nested list, on which the other currently proposed solutions variously fail.
LLLL <- list(lll)
rmNullObs(LLLL)
# [[1]]
# [[1]][[1]]
# [[1]][[1]][[1]]
# [[1]][[1]][[1]][[1]]
# [1] 1
#
#
# [[1]][[1]][[2]]
# [[1]][[1]][[2]][[1]]
# [1] "a"
Here's an option using Filter and Negate combination
Filter(Negate(function(x) is.null(unlist(x))), lll)
# [[1]]
# [[1]][[1]]
# [1] 1
#
#
# [[2]]
# [[2]][[1]]
# [1] "a"
Using purrr
purrr::map(lll, ~ purrr::compact(.)) %>% purrr::keep(~length(.) != 0)
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 2
[[1]][[3]]
[1] 3
[[2]]
[[2]][[1]]
[1] "a"
[[2]][[2]]
[1] "b"
[[2]][[3]]
[1] "c"
For this particular example you can also use unlist with its recursive argument.
lll[!sapply(unlist(lll, recursive=FALSE), is.null)]
# [[1]]
# [[1]][[1]]
# [1] 1
#
#
# [[2]]
# [[2]][[1]]
# [1] "a"
Since you have lists in lists, you probably need to run l/sapply twice, like:
lll[!sapply(lll,sapply,is.null)]
#[[1]]
#[[1]][[1]]
#[1] 1
#
#
#[[2]]
#[[2]][[1]]
#[1] "a"
There is a new package rlist on CRAN, thanks to Kun Ren for making our life easier.
list.clean(.data, fun = is.null, recursive = FALSE)
or for recursive removal of NULL:
list.clean(.data, fun = is.null, recursive = TRUE)
Quick fix on Josh O'Brien's solution. There's a bit of an issue with lists of functions
is.NullOb <- function(x) if(!(is.function(x))) is.null(x) | all(sapply(x, is.null)) else FALSE
## Recursively step down into list, removing all such objects
rmNullObs <- function(x) {
if(!(is.function(x))) {
x = x[!(sapply(x, is.NullOb))]
lapply(x, function(x) if (is.list(x)) rmNullObs(x) else x)
}
}

Resources