multiple data.table columns to one column of vectors - r

I have a data.table like this:
tab = data.table(V1 = c('a', 'b', 'c'),
V2 = c('d', 'e', 'f'),
V3 = c('g', 'h', 'i'),
id = c(1,2,3))
From the columns V1,V2,V3 of this table, I'd like to get for row i a vector of c(V1[i],V2[i], V3[i])
I can get a list of the desired vectors like this:
lapply(1:tab[, .N], function(x) tab[x, c(V1, V2, V3)])
Which returns:
[[1]]
[1] "a" "d" "g"
[[2]]
[1] "b" "e" "h"
[[3]]
[1] "c" "f" "i"
But I think this is probably slow and not very data.table-like.
Also, I'd like to generalize it, do that I don't have explicitly type V1, V2, V3, but rather pass a vector of column names to be processed this way.

Try this?
> asplit(unname(tab[, V1:V3]), 1)
[[1]]
"a" "d" "g"
[[2]]
"b" "e" "h"
[[3]]
"c" "f" "i"

Using split
split(as.matrix(tab[, V1:V3]), tab$id)
$`1`
[1] "a" "d" "g"
$`2`
[1] "b" "e" "h"
$`3`
[1] "c" "f" "i"

as.list(transpose(tab[, .(V1, V2, V3)]))
Or as a function
tdt <- function(DT, cols) as.list(transpose(DT[, .SD, .SDcols = cols]))
tdt(tab, c('V1', 'V2', 'V3'))
# $V1
# [1] "a" "d" "g"
#
# $V2
# [1] "b" "e" "h"
#
# $V3
# [1] "c" "f" "i"

tab[, 1:3] |> transpose() |> as.list()
$V1
[1] "a" "d" "g"
$V2
[1] "b" "e" "h"
$V3
[1] "c" "f" "i"

Related

How to code to get an output vector list of unique elements based satisfying two conditions?

I'm trying to get list of uniques elements based on conditions of two columns in R.
For example, I have 4 groups and I want to get unique list of names of participants who are in group-1.
This requires to specify the two conditions in the code:
Unique(df$participants XXX_group_XXX).
How to code this condition specifically to get the output vecort list satisfying both conditions?
A simple solution using only base R:
set.seed(7*11*13)
name <- sample(LETTERS[1:10], 100, replace=TRUE)
G <- sample(1:5, 100, replace=TRUE)
U <- tapply(name, G, unique)
> U
$`1`
[1] "G" "F" "D" "B" "J" "A" "E" "H" "C"
$`2`
[1] "C" "J" "D" "B" "F" "G"
$`3`
[1] "C" "G" "H" "D" "F" "E" "I" "B" "J"
$`4`
[1] "F" "B" "G" "E" "I" "C" "H" "D" "J"
$`5`
[1] "G" "D" "A" "H" "F" "E" "B" "J" "C"
Would this work for you? I need to create a data frame first. Then I filter for the group you wish to see and get the unique values per group.
library(dplyr)
seed <- 123
# create some data
data <- data.frame(
name = sample(LETTERS, size = 100, replace = TRUE),
group = sample(c(1, 2, 3, 4), size = 100, replace = TRUE)
)
# base R
unique(data[data$group == 1, 1])
# or:
unique(data[data$group == 1, "name"])
# tidyverse
data %>%
filter(group == 1) %>%
distinct(name) %>%
pull() # if you want a vector to be returned

Show adjacent members in a list

I want to inspect adjacent elements in a list based on a match. For example, in a list of randomly ordered letters, I want to know what the neighboring letters of m is. My current solution is:
library(stringr)
ltrs <- sample(letters)
ltrs[(str_which(ltrs,'m')-2):(str_which(ltrs,'m')+2)]
[1] "j" "f" "m" "q" "a"
To me, the repetition of str_which() feels unnecessary. Is there a simpler way to achieve the same result?
First, I regenerate random data with a seed for reproducibility:
set.seed(42)
ltrs <- sample(letters)
ltrs
# [1] "q" "e" "a" "j" "d" "r" "z" "o" "g" "v" "i" "y" "n" "t" "w" "b" "c" "p" "x"
# [20] "l" "m" "s" "u" "h" "f" "k"
Use -2:2 and then (cautionarily) remove those below 1 or above the length of the vector:
ind <- -2:2 + which(ltrs == "m")
ind <- ind[0 < ind & ind < length(ltrs)]
ltrs[ind]
# [1] "x" "l" "m" "s" "u"
If your target is more than one (not just "m"), then we can use a different approach.
ind <- which(ltrs %in% c("m", "f"))
ind <- lapply(ind, function(z) { z <- z + -2:2; z[0 < z & z <= length(ltrs)]; })
ind
# [[1]]
# [1] 19 20 21 22 23
# [[2]]
# [1] 23 24 25 26
lapply(ind, function(z) ltrs[z])
# [[1]]
# [1] "x" "l" "m" "s" "u"
# [[2]]
# [1] "u" "h" "f" "k"
Or, if you don't care about keeping them grouped, we can try this:
ind <- which(ltrs %in% c("m", "f"))
ind <- unique(sort(outer(-2:2, ind, `+`)))
ind <- ind[0 < ind & ind <= length(ltrs)]
ltrs[ind]
# [1] "x" "l" "m" "s" "u" "h" "f" "k"
If you don't have duplicates, you can try the code like below
ltrs[seq_along(ltrs)%in% (which(ltrs=="m")+(-2:2))]
otherwise
ltrs[seq_along(ltrs) %in% c(outer(which(ltrs == "m"), -2:2, `+`))]
You can also use the slider::slide function (using data provided by #r2evans):
slider::slide(ltrs, ~ .x, .before = 2, .after = 2)[[which(ltrs == "m")]]
# [1] "x" "l" "m" "s" "u"
slider::slide(ltrs, ~ .x, .before = 2, .after = 2)[which(ltrs %in% c("m","f"))]
# [[1]]
# [1] "x" "l" "m" "s" "u"
#
# [[2]]
# [1] "u" "h" "f" "k"

Group a DNA sequence in codons

I have generated a random DNA sequence
base <- c("A","G","U")
seq <- sample(base, 15, replace = T)
[1] "A" "G" "A" "U" "A" "G" "U" "A" "U" "A" "G" "U" "G" "U" "G"
How can I group the resulting sequence to codons (set of three nucleotides) in order to look for the stop codons?
I need something like these:
new_seq <- c("AGA","UAG", "UAU", "AGU", "GUG")
Convert to 3 column matrix, then paste:
base <- c("A","G","U")
set.seed(1); x <- sample(base, 15, replace = T)
x
# [1] "A" "U" "A" "G" "A" "U" "U" "G" "G" "U" "U" "A" "A" "A" "G"
do.call(paste0, as.data.frame(matrix(x, ncol = 3, byrow = TRUE)))
# [1] "AUA" "GAU" "UGG" "UUA" "AAG"
We can use gl to create the group, and using tapply do a group by paste
unname(tapply(seq, as.integer(gl(length(seq), 3,
length(seq))), FUN = paste, collapse=""))
#[1] "GAU" "UUG" "AAG" "GGU" "AGA"
NOTE: This would also work when the length is not a multiple
Or another option is to split after pasteing into a single string
strsplit(paste(seq, collapse=""), "(?<=...)", perl = TRUE)[[1]]
#[1] "GAU" "UUG" "AAG" "GGU" "AGA"

Print only first 5 elements of the list

I create a dataframe called df and give column names to it.
Then I create a new list called test_list. I loop through dataframe(df) and sort them in order.
Now, How do I print or extract only first 5 elements in the list(test_fun)?
df<- data.frame(45,67,78,89,45,65,54,67,87)
colnames(df) <- c("A","B","C","D","E","F","G","H","I")
test_list <- list()
for(i in 1:nrow(df))
{
test_list[[i]] <- colnames(sort(df[i,c(1:9)], decreasing = TRUE))
}
I tried,
test_list[[1]]
#gives output
#[1] "D" "I" "C" "B" "H" "F" "G" "A" "E"
test_list[c(1,2,3,4,5)]
#gives output
#[[1]]
#[1] "D" "I" "C" "B" "H" "F" "G" "A" "E"
#[[2]]
#NULL
#[[3]]
#NULL
#[[4]]
#NULL
#[[5]]
#NULL
But, I need
#output as
#D
#I
#C
#B
#H
Using head
head(test_list[[1]],5)
[1] "D" "I" "C" "B" "H"
The way you formatted your desired output, it looks like you want a list with 9 elements, not a list with one element that is a vector with 9 values. Can you say which one you prefer? If it's the former:
for(i in 1:ncol(df))
{
test_list[[i]] <- colnames(sort(df[1,c(1:9)], decreasing = TRUE)[i])
}
head(test_list,5)
[[1]]
[1] "D"
[[2]]
[1] "I"
[[3]]
[1] "C"
[[4]]
[1] "B"
[[5]]
[1] "H"

R remove an object from a list of vectors

I have a list of vectors and i would like to remove a specific object. Any ideas hot to achieve that?
Lets say i would like to remove the object F. How can i do that?
blocks <- list(
c("A", "B"),
c("C"),
c("D","E", "F")
)
We could also use setdiff with Map
Map(setdiff, blocks, 'F')
#[[1]]
#[1] "A" "B"
#[[2]]
#[1] "C"
#[[3]]
#[1] "D" "E"
or with lapply
lapply(blocks, setdiff, 'F')
#[[1]]
#[1] "A" "B"
#[[2]]
#[1] "C"
#[[3]]
#[1] "D" "E"
If you wanted to remove the third element of the third element of your list, you could try:
blocks[[3]] <- blocks[[3]][-3]
blocks
# [[1]]
# [1] "A" "B"
#
# [[2]]
# [1] "C"
#
# [[3]]
# [1] "D" "E"
If you wanted to remove all elements equal to "F", you could use lapply and a user-defined function to process each vector in the list, removing all "F" elements.
lapply(blocks, function(x) x[x != "F"])
# [[1]]
# [1] "A" "B"
#
# [[2]]
# [1] "C"
#
# [[3]]
# [1] "D" "E"

Resources