R remove an object from a list of vectors - r

I have a list of vectors and i would like to remove a specific object. Any ideas hot to achieve that?
Lets say i would like to remove the object F. How can i do that?
blocks <- list(
c("A", "B"),
c("C"),
c("D","E", "F")
)

We could also use setdiff with Map
Map(setdiff, blocks, 'F')
#[[1]]
#[1] "A" "B"
#[[2]]
#[1] "C"
#[[3]]
#[1] "D" "E"
or with lapply
lapply(blocks, setdiff, 'F')
#[[1]]
#[1] "A" "B"
#[[2]]
#[1] "C"
#[[3]]
#[1] "D" "E"

If you wanted to remove the third element of the third element of your list, you could try:
blocks[[3]] <- blocks[[3]][-3]
blocks
# [[1]]
# [1] "A" "B"
#
# [[2]]
# [1] "C"
#
# [[3]]
# [1] "D" "E"
If you wanted to remove all elements equal to "F", you could use lapply and a user-defined function to process each vector in the list, removing all "F" elements.
lapply(blocks, function(x) x[x != "F"])
# [[1]]
# [1] "A" "B"
#
# [[2]]
# [1] "C"
#
# [[3]]
# [1] "D" "E"

Related

multiple data.table columns to one column of vectors

I have a data.table like this:
tab = data.table(V1 = c('a', 'b', 'c'),
V2 = c('d', 'e', 'f'),
V3 = c('g', 'h', 'i'),
id = c(1,2,3))
From the columns V1,V2,V3 of this table, I'd like to get for row i a vector of c(V1[i],V2[i], V3[i])
I can get a list of the desired vectors like this:
lapply(1:tab[, .N], function(x) tab[x, c(V1, V2, V3)])
Which returns:
[[1]]
[1] "a" "d" "g"
[[2]]
[1] "b" "e" "h"
[[3]]
[1] "c" "f" "i"
But I think this is probably slow and not very data.table-like.
Also, I'd like to generalize it, do that I don't have explicitly type V1, V2, V3, but rather pass a vector of column names to be processed this way.
Try this?
> asplit(unname(tab[, V1:V3]), 1)
[[1]]
"a" "d" "g"
[[2]]
"b" "e" "h"
[[3]]
"c" "f" "i"
Using split
split(as.matrix(tab[, V1:V3]), tab$id)
$`1`
[1] "a" "d" "g"
$`2`
[1] "b" "e" "h"
$`3`
[1] "c" "f" "i"
as.list(transpose(tab[, .(V1, V2, V3)]))
Or as a function
tdt <- function(DT, cols) as.list(transpose(DT[, .SD, .SDcols = cols]))
tdt(tab, c('V1', 'V2', 'V3'))
# $V1
# [1] "a" "d" "g"
#
# $V2
# [1] "b" "e" "h"
#
# $V3
# [1] "c" "f" "i"
tab[, 1:3] |> transpose() |> as.list()
$V1
[1] "a" "d" "g"
$V2
[1] "b" "e" "h"
$V3
[1] "c" "f" "i"

Create a list containing a variable number of lists

I need to create a list from rows of a dataframe in the following format:
df <- data.frame(y1 = c("a", "d"), y2 = c("b", "e"), y3 = c("c", "f"))
df$y1 <- as.character(df$y1)
df$y2 <- as.character(df$y2)
df$y3 <- as.character(df$y3)
x <- list(
list(y1 = df$y1[1],
y2 = df$y2[1],
y3 = df$y3[1]),
list(y1 = df$y1[2],
y2 = df$y2[2],
y3 = df$y3[2])
)
> x
[[1]]
[[1]]$`y1`
[1] "a"
[[1]]$y2
[1] "b"
[[1]]$y3
[1] "c"
[[2]]
[[2]]$`y1`
[1] "d"
[[2]]$y2
[1] "e"
[[2]]$y3
[1] "f"
This is an example when there are two rows in the dataframe. How can I achieve this when the number of rows in the dataframe is variable? So for every row in the dataframe, there should be a list.
We may also use apply by going over the rows and applying as.list to each:
apply(df, 1, as.list)
[[1]]
[[1]]$y1
[1] "a"
[[1]]$y2
[1] "b"
[[1]]$y3
[1] "c"
[[2]]
[[2]]$y1
[1] "d"
[[2]]$y2
[1] "e"
[[2]]$y3
[1] "f"
We first split every row of the dataframe and then for every row we convert each element into separate list element using as.list
lapply(split(df, 1:nrow(df)), as.list)
#$`1`
#$`1`$y1
#[1] "a"
#$`1`$y2
#[1] "b"
#$`1`$y3
#[1] "c"
#$`2`
#$`2`$y1
#[1] "d"
#$`2`$y2
#[1] "e"
#$`2`$y3
#[1] "f"
We can use transpose from purrr
library(purrr)
transpose(df)
#[1]]
#[[1]]$y1
#[1] "a"
#[[1]]$y2
#[1] "b"
#[[1]]$y3
#[1] "c"
#[[2]]
#[[2]]$y1
#[1] "d"
#[[2]]$y2
#[1] "e"
#[[2]]$y3
#[1] "f"

R - How do I check if an element is in a list of vectors?

Ok, my question might be a bit weirder than what the title suggests.
I have this list:
x <- list(
c("a", "d"),
c("a", "c"),
c("d", "e"),
c("e", "f"),
c("b", "c"),
c("f", "c"), # row 6
c("c", "e"),
c("f", "b"),
c("b", "a")
)
And I need to copy this stuff in another list called T. The only condition is that both letters of the pair must not be in T already. If one of them is already in T and the other isn't it's fine.
Basically in this example I would take the first 5 positions and copy them in T one after another because either one or both letters are new to T.
Then I would skip the 6th position because the letter "f" was already in the 4th position of T and the letter "c" is already in the 2nd and 5th positions of T.
Then I would skip the remaining 3 positions for the same reason (the letters "c", "e", "f", "b", "a" are already in T at this point)
I tried doing this
for(i in 1:length(T){
if (!( *first letter* %in% T && *second letter* %in% T)) {
T[[i]] <- c(*first letter*, *second letter*)
}
}
But it's like the "if" isn't even there, and I'm pretty sure I'm using %in% in the wrong way.
Any suggestions? I hope what I wrote makes sense, I'm new to R and to this site in general.
Thanks for your time
Effectively, for each element of the list, you want to lose it if both of its elements exist in earlier elements. A logical index is helpful here.
# Make a logical vector the length of x.
lose <- logical(length(x))
Now you can run a loop over the length of lose and compare it against all previous elements of x. Using seq_len saves us the headache of having to guard against the special case of i = 1 (seq_len(0) returns a zero-length integer instead of 0).
for (i in seq_along(lose)){
lose[i] <- all(x[[i]] %in% unique(unlist(x[seq_len(i - 1)])))
}
Now let's use the logical vector to subset x to T
T <- x[!lose]
T
#> [[1]]
#> [1] "a" "d"
#>
#> [[2]]
#> [1] "a" "c"
#>
#> [[3]]
#> [1] "d" "e"
#>
#> [[4]]
#> [1] "e" "f"
#>
#> [[5]]
#> [1] "b" "c"
# Created on 2018-07-19 by the [reprex package](http://reprex.tidyverse.org) (v0.2.0).
You can put the set of all previous elements in a list cum.sets, then use Map to check if all elements of the current vector are in the lagged cumulative set.
cum.sets <- lapply(seq_along(x), function(y) unlist(x[1:y]))
keep <- unlist(
Map(function(x, y) !all(x %in% y)
, x
, c(NA, cum.sets[-length(cum.sets)])))
x[keep]
# [[1]]
# [1] "a" "d"
#
# [[2]]
# [1] "a" "c"
#
# [[3]]
# [1] "d" "e"
#
# [[4]]
# [1] "e" "f"
#
# [[5]]
# [1] "b" "c"
tidyverse version (same output)
library(tidyverse)
cum.sets <- imap(x, ~ unlist(x[1:.y]))
keep <- map2_lgl(x, lag(cum.sets), ~!all(.x %in% .y))
x[keep]
You can use Reduce. In this case. IF all the new values are not in the list already, then concatenate it to the list, else drop it. the initial is the first element of the list:
Reduce(function(i, y) c(i, if(!all(y %in% unlist(i))) list(y)), x[-1],init = x[1])
[[1]]
[1] "a" "d"
[[2]]
[1] "a" "c"
[[3]]
[1] "d" "e"
[[4]]
[1] "e" "f"
[[5]]
[1] "b" "c"
The most straightforward option is that you could store unique entries in another vector as you're looping through your input data.
Here's a solution without considering the positions (1 or 2) of the alphabets in your output list or the order of your input list.
dat <- list(c('a','d'),c('a','c'),c('d','e'),c('e','f'),c('b','c'),
c('f','c'),c('c','e'),c('f','b'),c('b','a'))
Dat <- list()
idx <- list()
for(i in dat){
if(!all(i %in% idx)){
Dat <- append(Dat, list(i))
## append to idx if not previously observed
if(! i[1] %in% idx) idx <- append(idx, i[1])
if(! i[2] %in% idx) idx <- append(idx, i[2])
}
}
print(Dat)
#> [[1]]
#> [1] "a" "d"
#>
#> [[2]]
#> [1] "a" "c"
#>
#> [[3]]
#> [1] "d" "e"
#>
#> [[4]]
#> [1] "e" "f"
#>
#> [[5]]
#> [1] "b" "c"
On another note, I'd advise against using T as your vector name as it's used as TRUE in R.
We can unlist, check duplicated values with duplicated, reformat as a matrix and filter out pairs of TRUE values:
x[colSums(matrix(duplicated(unlist(x)), nrow = 2)) != 2]
# [[1]]
# [1] "a" "d"
#
# [[2]]
# [1] "a" "c"
#
# [[3]]
# [1] "d" "e"
#
# [[4]]
# [1] "e" "f"
#
# [[5]]
# [1] "b" "c"
#
And I recommend you don't use T as a variable name, it means TRUE by default (thought it's discouraged to use it as such), this could lead to unpleasant debugging.

Print only first 5 elements of the list

I create a dataframe called df and give column names to it.
Then I create a new list called test_list. I loop through dataframe(df) and sort them in order.
Now, How do I print or extract only first 5 elements in the list(test_fun)?
df<- data.frame(45,67,78,89,45,65,54,67,87)
colnames(df) <- c("A","B","C","D","E","F","G","H","I")
test_list <- list()
for(i in 1:nrow(df))
{
test_list[[i]] <- colnames(sort(df[i,c(1:9)], decreasing = TRUE))
}
I tried,
test_list[[1]]
#gives output
#[1] "D" "I" "C" "B" "H" "F" "G" "A" "E"
test_list[c(1,2,3,4,5)]
#gives output
#[[1]]
#[1] "D" "I" "C" "B" "H" "F" "G" "A" "E"
#[[2]]
#NULL
#[[3]]
#NULL
#[[4]]
#NULL
#[[5]]
#NULL
But, I need
#output as
#D
#I
#C
#B
#H
Using head
head(test_list[[1]],5)
[1] "D" "I" "C" "B" "H"
The way you formatted your desired output, it looks like you want a list with 9 elements, not a list with one element that is a vector with 9 values. Can you say which one you prefer? If it's the former:
for(i in 1:ncol(df))
{
test_list[[i]] <- colnames(sort(df[1,c(1:9)], decreasing = TRUE)[i])
}
head(test_list,5)
[[1]]
[1] "D"
[[2]]
[1] "I"
[[3]]
[1] "C"
[[4]]
[1] "B"
[[5]]
[1] "H"

Finding Elements of Lists in R

Right now I'm working with a character vector in R, that i use strsplit to separate word by word. I'm wondering if there's a function that I can use to check the whole list, and see if a specific word is in the list, and (if possible) say which elements of the list it is in.
ex.
a = c("a","b","c")
b= c("b","d","e")
c = c("a","e","f")
If z=list(a,b,c), then f("a",z) would optimally yield [1] 1 3, and f("b",z) would optimally yield [1] 1 2
Any assistance would be wonderful.
As alexwhan says, grep is the function to use. However, be careful about using it with a list. It isn't doing what you might think it's doing. For example:
grep("c", z)
[1] 1 2 3 # ?
grep(",", z)
[1] 1 2 3 # ???
What's happening behind the scenes is that grep coerces its 2nd argument to character, using as.character. When applied to a list, what as.character returns is the character representation of that list as obtained by deparsing it. (Modulo an unlist.)
as.character(z)
[1] "c(\"a\", \"b\", \"c\")" "c(\"b\", \"d\", \"e\")" "c(\"a\", \"e\", \"f\")"
cat(as.character(z))
c("a", "b", "c") c("b", "d", "e") c("a", "e", "f")
This is what grep is working on.
If you want to run grep on a list, a safer method is to use lapply. This returns another list, which you can operate on to extract what you're interested in.
res <- lapply(z, function(ch) grep("a", ch))
res
[[1]]
[1] 1
[[2]]
integer(0)
[[3]]
[1] 1
# which vectors contain a search term
sapply(res, function(x) length(x) > 0)
[1] TRUE FALSE TRUE
Much faster than grep is:
sapply(x, function(y) x %in% y)
and if you want the index of course just use which():
which(sapply(x, function(y) x %in% y))
Evidence!
x = setNames(replicate(26, list(sample(LETTERS, 10, rep=T))), sapply(LETTERS, list))
head(x)
$A
[1] "A" "M" "B" "X" "B" "J" "P" "L" "M" "L"
$B
[1] "H" "G" "F" "R" "B" "E" "D" "I" "L" "R"
$C
[1] "P" "R" "C" "N" "K" "E" "R" "S" "N" "P"
$D
[1] "F" "B" "B" "Z" "E" "Y" "J" "R" "H" "P"
$E
[1] "O" "P" "E" "X" "S" "Q" "S" "A" "H" "B"
$F
[1] "Y" "P" "T" "T" "P" "N" "K" "P" "G" "P"
system.time(replicate(1000, grep("A", x)))
user system elapsed
0.11 0.00 0.11
system.time(replicate(1000, sapply(x, function(y) "A" %in% y)))
user system elapsed
0.05 0.00 0.05
You're looking for grep():
grep("a", z)
#[1] 1 3
grep("b", z)
#[1] 1 2

Resources