Extract rows from data frame as list of vectors - r

For example, I have the data
a <- c("a", "b", "c")
b <- c("x", "y", "z")
df <- data.frame(a = a, b = b)
I want to do something to df so my result is
list(c("a", "x"), c("b", "y"), c("c", "z"))
I want the solution to be vectorized, and I'm having trouble utilizing the right *apply functions...

If you want to split up a data.frame by rows, try
split(df, seq.int(nrow(df)))

Here's another way.
as.data.frame(t(df),stringsAsFactors=FALSE,row.names=NA)
# V1 V2 V3
# 1 a b c
# 2 x y z
This produces a data frame, which is in fact a list of vectors. If you must have a "true" list, you could use this:
as.list(as.data.frame(t(df),stringsAsFactors=FALSE,row.names=NA))
# $V1
# [1] "a" "x"
#
# $V2
# [1] "b" "y"
#
# $V3
# [1] "c" "z"

Related

Turn a datatable into a two-dimensional list in R

I have a data.table (see dt). I want to turn it into a 2-dimensional list for future use (e.g. a, b and c are column names of another dt. I want to select the value of a non-missing column among a, b and c then impute into x, and so on). So the 2-dimensional list will act like a reference object for fcoalesce function.
# example
dt <- data.table(col1 = c("a", "b", "c", "d", "e", "f"),
col2 = c("x", "x", "x", "y", "y", "z"))
# desirable result
list.1 <- list(c("a", "b", "c"), c("d", "e"), c("f"))
list.2 <- list("x", "y", "z")
list(list.1, list.2)
Since the actual dt is much larger than the example dt, is there a more efficient way to do it?
You can use split():
lst1 <- split(dt$col1, dt$col2)
lst2 <- as.list(names(lst1))
result <- list(unname(lst1), lst2)
result
# [[1]]
# [[1]][[1]]
# [1] "a" "b" "c"
#
# [[1]][[2]]
# [1] "d" "e"
#
# [[1]][[3]]
# [1] "f"
#
#
# [[2]]
# [[2]][[1]]
# [1] "x"
#
# [[2]][[2]]
# [1] "y"
#
# [[2]][[3]]
# [1] "z"

Save unique values of variable for each combination of two variables in a dataset

I have a (large) dataset with three variables. For each combination of sub1 and sub2, I would like to save a all unique IVs in a separate vector or dataset, ignoring id, and name it using the variables "sub1.and.sub2.IV". As my dataset is quite large, I would like to avoid using which and automatically extract all combinations.
id sub1 sub2 IV
<chr> <chr> <chr> <chr>
1 3 a a p
2 3 a a f
3 6 a b z
4 6 a b e
5 7 a c b
6 7 a c b
In the end, I would have three vector or datasets:
> a.and.a.IV
[1] "p" "f"
> a.and.b.IV
[1] "z" "e"
> a.and.c.IV
[1] "b"
MRE example:
structure(list(id = c("3", "3", "6", "6", "7", "7"), sub1 = c("a",
"a", "a", "a", "a", "a"), sub2 = c("a", "a", "b", "b", "c", "c"
), IV = c("p", "f", "z", "e", "b", "b")), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
Maybe split
> split(df$IV, df[c("sub1","sub2")])
$a.a
[1] "p" "f"
$a.b
[1] "z" "e"
$a.c
[1] "b" "b"
One possibility could be::
a.and.a.IV<-unique(df[which(df$sub1 == "a" & df$sub2=="a"),]$IV)
a.and.b.IV<-unique(df[which(df$sub1 == "a" & df$sub2=="b"),]$IV)
a.and.c.IV<-unique(df[which(df$sub1 == "a" & df$sub2=="c"),]$IV)
> a.and.a.IV
[1] "p" "f"
> a.and.b.IV
[1] "z" "e"
> a.and.c.IV
[1] "b"
I used #ThomasIsCoding's comment to search for more solutions. I have found 3 solutions to split the dataframe into a list of tibbles and 1 solution using a loop to split a list into dataframes. The for loop stays the same for every solution:
Solution 1:
Using a custom made function by #romainfrancois to split and name the data.frames with the corresponding combinations of sub1 and sub2.
library(dplyr, warn.conflicts = FALSE)
named_group_split <- function(.tbl, ...) {
grouped <- group_by(.tbl, ...)
names <- rlang::eval_bare(rlang::expr(paste(!!!group_keys(grouped), sep = " / ")))
grouped %>%
group_split() %>%
rlang::set_names(names)
}
df_split1 <- df %>%
named_group_split(sub1, sub2) %>%
unique()
for(i in 1:length(df_split1)) {
assign(paste0(names(df_split1[i])), as.data.frame(df_split1[[i]]))
}
Solution 2:
Using dplyr::group_split to split the dataset into a list with all the original variables and their respective names. Unfortunately, this solution is not able to name the data.frames. Solution found here.
df_split2 <- df %>%
group_split(sub1, sub2)
for(i in 1:length(df_split2)) {
assign(paste0(names(df_split2[i])), as.data.frame(df_split2[[i]]))
}
Solution 3:
Using base::split allows to split the dataset into a list with just IVs as variable and the for loop.
df_split3 <- split(df$IV, df[c("sub1","sub2")])
for(i in 1:length(df_split3)) {
assign(paste0(names(df_split3[i])), as.data.frame(df_split3[[i]]))
}

Finding specific elements in lists

I am stuck at one of the challenges proposed in a tutorial I am reading.
# Using the following code:
challenge_list <- list(words = c("alpha", "beta", "gamma"),
numbers = 1:10
letter = letters
# challenge_list
# Extract the following things:
#
# - The word "gamma"
# - The letters "a", "e", "i", "o", and "u"
# - The numbers less than or equal to 3
I have tried using the followings:
## 1
challenge_list$"gamma"
## 2
challenge_list [[1]["gamma"]]
But nothing works.
> challenge_list$words[challenge_list$words == "gamma"]
[1] "gamma"
> challenge_list$letter[challenge_list$letter %in% c("a","e","i","o","u")]
[1] "a" "e" "i" "o" "u"
> challenge_list$numbers[challenge_list$numbers<=3]
[1] 1 2 3
We can use a function and then do the subset if it is numeric or not and then use Map to pass the list to vector that correspond to the original list element and apply the f1. This would return the new list with the filtered values
f1 <- function(x, y) if(is.numeric(x)) x[ x <= y] else x [x %in% y]
out <- Map(f1, challenge_list, list('gamma', 3, c("a","e","i","o","u")))
out
-output
#$words
#[1] "gamma"
#$numbers
#[1] 1 2 3
#$letter
#[1] "a" "e" "i" "o" "u"
Try this. Most of R objects can be filtered using brackets. In the case of lists you have to use a pair of them like [[]][] because the first one points to the object inside the list and the second one makes reference to the elements inside them. For vectors the task is easy as you only can use a pair of brackets and set conditions to extract elements. Here the code:
#Data
challenge_list <- list(words = c("alpha", "beta", "gamma"),
numbers = 1:10
letter = letters
#Code
challenge_list[[1]][1]
letter[letter %in% c("a", "e", "i", "o","u")]
numbers[numbers<=3]
As I have noticed your data is in a list, you can also play with the position of the elements like this:
#Data 2
challenge_list <- list(words = c("alpha", "beta", "gamma"),numbers = 1:10,letter = letters)
#Code 2
challenge_list[[1]][1]
challenge_list[[3]][challenge_list[[3]] %in% c("a", "e", "i", "o","u")]
challenge_list[[2]][challenge_list[[2]]<=3]
Output:
challenge_list[[1]][1]
[1] "alpha"
challenge_list[[3]][challenge_list[[3]] %in% c("a", "e", "i", "o","u")]
[1] "a" "e" "i" "o" "u"
challenge_list[[2]][challenge_list[[2]]<=3]
[1] 1 2 3

Using grep in list in order to fill a new df in R

Hello I have a list :
list=c("OK_67J","GGT_je","Ojj_OK_778","JUu3","JJE")
and i would like to transforme it as a df :
COL1 COL2
OK_67J A
GGT_je B
Ojj_OK_778 A
JUu3 B
JJE B
where I add a A if there is the OK_pattern and B if not.
I tried :
COL2<-rep("Virus",length(list))
list[grep("OK_",tips)]<-"A"
df <- data.frame(COL1=list,COL2=COL2)
Use grepl :
ifelse(grepl('OK_', list), "A", "B")
#[1] "A" "B" "A" "B" "B"
You can also do it without ifelse :
c("B", "A")[grepl('OK_', list) + 1]
It is better to not use variable name as list since it's a default function in R.
When you exchange your list[grep("OK_",tips)]<-"A" with COL2[grep("OK_",list)] <- "A" your solution will work.
list <- c("OK_67J", "GGT_je", "Ojj_OK_778", "JUu3", "JJE")
COL2 <- rep("B", length(list))
COL2[grep("OK_", list)] <- "A"
df <- data.frame(COL1 = list, COL2 = COL2)
df
# COL1 COL2
#1 OK_67J A
#2 GGT_je B
#3 Ojj_OK_778 A
#4 JUu3 B
#5 JJE B
First off, list is not a list but a character vector:
list=c("OK_67J","GGT_je","Ojj_OK_778","JUu3","JJE")
class(list)
[1] "character"
To transform it to a dataframe:
df <- data.frame(v1 = list)
To add the new column use grepl:
df$v2 <- ifelse(grepl("OK_", df$v1), "A", "B")
or use str_detect:
library(stringr)
df$v2 <- ifelse(str_detect(df$v1, "OK_"), "A", "B")
Result:
df
v1 v2
1 OK_67J A
2 GGT_je B
3 Ojj_OK_778 A
4 JUu3 B
5 JJE B

Inserting values in one list into another list by index

I have two lists x and y, and a vector of indices where.
x <- list(a = 1:4, b = letters[1:6])
y <- list(a = c(20, 50), b = c("abc", "xyz"))
where <- c(2, 4)
I want to insert y into x at the indices in where, so that the result is
list(a = c(1,20,2,50,3,4), b = c("a", "abc", "b", "xyz", "c", "d", "e", "f"))
#$a
#[1] 1 20 2 50 3 4
#
#$b
#[1] "a" "abc" "b" "xyz" "c" "d" "e" "f"
I've been trying it with append, but it's not working.
lapply(seq(x), function(i) append(x[[i]], y[[i]], after = where[i]))
#[[1]]
#[1] 1 2 20 50 3 4
#
#[[2]]
#[1] "a" "b" "c" "d" "abc" "xyz" "e" "f"
This is appending at the wrong index. Plus, I want to retain the list names in the process. I also don't know if append is the right function for this, since I've literally never seen it used anywhere.
What's the best way to insert values from one list into another list using an index vector?
How about an mapply solution
x <- list(a = 1:4, b = letters[1:6])
y <- list(a = c(20, 50), b = c("abc", "xyz"))
where <- c(2, 4)
mapply(function(x,y,w) {
r <- vector(class(x), length(x)+length(y))
r[-w] <- x
r[w] <- y
r
}, x, y, MoreArgs=list(where), SIMPLIFY=FALSE)
which returns
$a
[1] 1 20 2 50 3 4
$b
[1] "a" "abc" "b" "xyz" "c" "d" "e" "f"
which seems to be the results you desire.
Here I created a APPEND function that is an iterative (via Reduce) version of append:
APPEND <- function(x, where, y)
Reduce(function(z, args)do.call(append, c(list(z), args)),
Map(list, y, where - 1), init = x)
Then you just need to call that function via Map:
Map(APPEND, x, list(where), y)

Resources