Names of nested list containing dots (e.g. "c.2) - r

How can I get the names of the leafs of a nested list (containing a dataframe)
p <- list(a=1,b=list(b1=2,b2=3),c=list(c1=list(c11='a',c12='x'),c.2=data.frame("t"=1)))
into a vector format:
[[1]]
[1] "a"
[[2]]
[1] "b" "b1"
[[3]]
[1] "b" "b2"
[[4]]
[1] "c" "c1" "c11"
[[5]]
[1] "c" "c1" "c12"
[[6]]
[1] "c" "c.2"
The problem is that my list contains names with a dot (e.g. "c.2"). By using unlist, one gets "c.c.2" and I (or possibly strsplit) can't tell if the point is a delimiter of unlist or part of the name. That is the difference to this question.
It should ignore data.frames. My approach so far is adapted from here, but struggles with the points created by unlist:
listNames = function(l, maxDepth = 2) {
n = 0
listNames_rec = function(l, n) {
if(!is.list(l) | is.data.frame(l) | n>=maxDepth) TRUE
else {
n = n + 1
# print(n)
lapply(l, listNames_rec, n)
}
}
n = names(unlist(listNames_rec(l, n)))
return(n)
}
listNames(p, maxDepth = 3)
[1] "a" "b.b1" "b.b2" "c.c1.c11" "c.c1.c12" "c.c.2"

Like this?
subnames <- function(L, s) {
if (!is.list(L) || is.data.frame(L)) return(L)
names(L) <- gsub(".", s, names(L), fixed = TRUE)
lapply(L, subnames, s)
}
res <- listNames(subnames(p, ":"), maxDepth = 3)
gsub(":", ".",
gsub(".", "$", res, fixed = TRUE),
fixed = TRUE
)
#[1] "a" "b$b1" "b$b2" "c$c1$c11" "c$c1$c12" "c$c.2"

Not a full answer but I imagine rrapply package could help you here?
One option could be to extract all names:
library(rrapply)
library(dplyr)
rrapply(p, how = "melt") %>%
select(-value)
# L1 L2 L3
# 1 a <NA> <NA>
# 2 b b1 <NA>
# 3 b b2 <NA>
# 4 c c1 c11
# 5 c c1 c12
# 6 c c.2 t
The problem here is that data.frame names are included above too so you could extract them separately:
#extract data frame name
rrapply(p, classes = "data.frame", how = "melt") %>%
select(-value)
# L1 L2
# 1 c c.2
Then you could play around with these two datasets and perhaps extract duplicates but keep dataframe names
rrapply(p, how = "melt") %>%
bind_rows(rrapply(p, classes = "data.frame", how = "melt"))
#then filter etc...

A way might be:
listNames = function(l, n, N) {
if(!is.list(l) | is.data.frame(l) | n<1) list(rev(N))
else unlist(Map(listNames, l, n=n-1, N=lapply(names(l), c, N)), FALSE, FALSE)
}
listNames(p, 3, NULL)
#[[1]]
#[1] "a"
#
#[[2]]
#[1] "b" "b1"
#
#[[3]]
#[1] "b" "b2"
#
#[[4]]
#[1] "c" "c1" "c11"
#
#[[5]]
#[1] "c" "c1" "c12"
#
#[[6]]
#[1] "c" "c.2"

Related

Extract colnames from a nested list of data.frames

I have a nested list of data.frames, what is the easiest way to get the column names of all data.frames?
Example:
d = data.frame(a = 1:3, b = 1:3, c = 1:3)
l = list(a = d, list(b = d, c = d))
Result:
$a
[1] "a" "b" "c"
$b
[1] "a" "b" "c"
$c
[1] "a" "b" "c"
There are already a couple of answers. But let me leave another approach. I used rapply2() in the rawr package.
devtools::install_github('raredd/rawr')
library(rawr)
library(purrr)
rapply2(l = l, FUN = colnames) %>%
flatten
$a
[1] "a" "b" "c"
$b
[1] "a" "b" "c"
$c
[1] "a" "b" "c"
Here is a base R solution.
You can define a customized function to flatten your nested list (which can deal nested list of any depths, e.g., more than 2 levels), i.e.,
flatten <- function(x){
islist <- sapply(x, class) %in% "list"
r <- c(x[!islist], unlist(x[islist],recursive = F))
if(!sum(islist))return(r)
flatten(r)
}
and then use the following code to achieve the colnames
out <- Map(colnames,flatten(l))
such that
> out
$a
[1] "a" "b" "c"
$b
[1] "a" "b" "c"
$c
[1] "a" "b" "c"
Example with a deeper nested list
l <- list(a = d, list(b = d, list(c = list(e = list(f= list(g = d))))))
> l
$a
a b c
1 1 1 1
2 2 2 2
3 3 3 3
[[2]]
[[2]]$b
a b c
1 1 1 1
2 2 2 2
3 3 3 3
[[2]][[2]]
[[2]][[2]]$c
[[2]][[2]]$c$e
[[2]][[2]]$c$e$f
[[2]][[2]]$c$e$f$g
a b c
1 1 1 1
2 2 2 2
3 3 3 3
and you will get
> out
$a
[1] "a" "b" "c"
$b
[1] "a" "b" "c"
$c.e.f.g
[1] "a" "b" "c"
Here is an attempt to do this as Vectorized as possible,
i1 <- names(unlist(l, TRUE, TRUE))
#[1] "a.a1" "a.a2" "a.a3" "a.b1" "a.b2" "a.b3" "a.c1" "a.c2" "a.c3" "b.a1" "b.a2" "b.a3" "b.b1" "b.b2" "b.b3" "b.c1" "b.c2" "b.c3" "c.a1" "c.a2" "c.a3" "c.b1" "c.b2" "c.b3" "c.c1" "c.c2" "c.c3"
i2 <- names(split(i1, gsub('\\d+', '', i1)))
#[1] "a.a" "a.b" "a.c" "b.a" "b.b" "b.c" "c.a" "c.b" "c.c"
We can now split i2 on everything before the dot, which will give,
split(i2, sub('\\..*', '', i2))
# $a
# [1] "a.a" "a.b" "a.c"
# $b
# [1] "b.a" "b.b" "b.c"
# $c
# [1] "c.a" "c.b" "c.c"
To get them fully cleaned, we need to loop over and apply a simple regex,
lapply(split(i2, sub('\\..*', '', i2)), function(i)sub('.*\\.', '', i))
which gives,
$a
[1] "a" "b" "c"
$b
[1] "a" "b" "c"
$c
[1] "a" "b" "c"
The Code compacted
i1 <- names(unlist(l, TRUE, TRUE))
i2 <- names(split(i1, gsub('\\d+', '', i1)))
final_res <- lapply(split(i2, sub('\\..*', '', i2)), function(i)sub('.*\\.', '', i))
Try this
d = data.frame(a = 1:3, b = 1:3, c = 1:3)
l = list(a = d, list(b = d, c = d))
foo <- function(x, f){
if (is.data.frame(x)) return(f(x))
lapply(x, foo, f = f)
}
foo(l, names)
The crux here is that data.frames actually are special list, so it's important what to test for.
Small explanation: what needs to be done here is a recursion, since with every element you might look at either a dataframe, so you want to decide if you apply the names or go deeper into the recursion and call foo again.
First create l1, a nested list with only the colnames
l1 <- lapply(l, function(x) if(is.data.frame(x)){
list(colnames(x)) #necessary to list it for the unlist() step afterwards
}else{
lapply(x, colnames)
})
Then unlist l1
unlist(l1, recursive=F)
Here is one way using purrr functions map_depth and vec_depth
library(purrr)
return_names <- function(x) {
if(inherits(x, "list"))
return(map_depth(x, vec_depth(x) - 2, names))
else return(names(x))
}
map(l, return_names)
#$a
#[1] "a" "b" "c"
#[[2]]
#[[2]]$b
#[1] "a" "b" "c"
#[[2]]$c
#[1] "a" "b" "c"
Using an external package, this is also straightforward with rrapply() in the rrapply-package (and works for arbitrary levels of nesting):
library(rrapply)
rrapply(l, classes = "data.frame", f = colnames, how = "flatten")
#> $a
#> [1] "a" "b" "c"
#>
#> $b
#> [1] "a" "b" "c"
#>
#> $c
#> [1] "a" "b" "c"
## deeply nested list
l2 <- list(a = d, list(b = d, list(c = list(e = list(f = list(g = d))))))
rrapply(l2, classes = "data.frame", f = colnames, how = "flatten")
#> $a
#> [1] "a" "b" "c"
#>
#> $b
#> [1] "a" "b" "c"
#>
#> $g
#> [1] "a" "b" "c"

Creating several new vectors from an original vector with separators

I'm trying to create several vectors from an original vector.
I read some posts but couldn't find something to solve my problem.
My original vector is looking like this:
> orig_vec
[1] "A" "B" "C" "D;" "1" "2;" "a1" "a2" "a3"
I want vectors that look like this:
> vector1
[1] "A" "B" "C" "D"
> vector2
[1] "1" "2"
> vector3
[1] "a1" "a2" "a3"
So what I need is a code which recognizes the semicolons as separators and creates new vectors depending on the number of separated values in "orig_vec".
I also have the problem that the "orig_vec" can change.
When it looks like this:
> orig_vec
[1] "A" "B" "C" "D" "E;" "1" "2;" "a1" "a2" "a3;" "b1"
I need to get automatically these vectors:
> vector1
[1] "A" "B" "C" "D" "E"
> vector2
[1] "1" "2"
> vector3
[1] "a1" "a2" "a3"
> vector4
[1] "b1"
I'm sorry that I can't provide more code or any idea of a solution.
This should work:
x <- c("A", "B", "C", "D;", "1", "2;", "a1", "a2", "a3")
sapply(split(x, c(0, cumsum(grepl(";", x))[-length(x)])), function(x) gsub(";", "", x))
$`0`
[1] "A" "B" "C" "D"
$`1`
[1] "1" "2"
$`2`
[1] "a1" "a2" "a3"
We use the cumsum() of condition grepl(";", x) to create a vector for subsetting with split(), then remove the semicolons by sapply()ing gsub().
I like #LAP's as well, here's another option:
vec <- c("A", "B", "C", "D;", "1", "2;", "a1", "a2", "a3;", "b1")
ix <- grep(";", vec)
mapply(function(x, ix1, ix2) x[ix1:ix2],
x = list(sub(";", "", vec)),
ix1 = c(1, ix + 1),
ix2 = c(ix, length(vec)))
[[1]]
[1] "A" "B" "C" "D"
[[2]]
[1] "1" "2"
[[3]]
[1] "a1" "a2" "a3"
[[4]]
[1] "b1"
You'll notice most people are giving you answers that result in a list of vectors, rather than a handful of vectors assigned to variable names. It's generally much cleaner and easier to work with lists of objects rather than objects scattered around in your namespace. Just an added $.02.
Here is one way, based on the idea of first joining on a space then successively splitting, first on ; and then on a space:
s <- c("A", "B", "C", "D;", "1" , "2;" ,"a1", "a2", "a3")
s <- paste0(s,collapse = ' ')
s <- unlist(strsplit(s, ';'))
vectors <- lapply(s,function(x) unlist(strsplit(trimws(x),' ')))
> vectors
[[1]]
[1] "A" "B" "C" "D"
[[2]]
[1] "1" "2"
[[3]]
[1] "a1" "a2" "a3"
Just throwing in a tidyverse approach that works in a single pipe.
Similar to other answers, collapse the vector into a single string, then split that string on each ;. I'm using a space as the collapse so I can use str_trim easily later on.
library(tidyverse)
x %>%
paste(collapse = " ") %>%
strsplit(split = ";", fixed = T)
#> [[1]]
#> [1] "A B C D E" " 1 2" " a1 a2 a3" " b1"
Since strsplit gives you a list and, at least in this scenario, you're only interested in the first list entry, pull it out with [[ and trim the beginning and trailing spaces of those vectors. The map gives you a list of vectors of one string each.
x %>%
paste(collapse = " ") %>%
strsplit(split = ";", fixed = T) %>%
`[[`(1) %>%
map(str_trim)
#> [[1]]
#> [1] "A B C D E"
#>
#> [[2]]
#> [1] "1 2"
#>
#> [[3]]
#> [1] "a1 a2 a3"
#>
#> [[4]]
#> [1] "b1"
Then split each vector by the spaces, and flatten into one list of vectors.
All in one pipe:
x %>%
paste(collapse = " ") %>%
strsplit(split = ";", fixed = T) %>%
`[[`(1) %>%
map(str_trim) %>%
map(str_split, " ") %>%
flatten()
#> [[1]]
#> [1] "A" "B" "C" "D" "E"
#>
#> [[2]]
#> [1] "1" "2"
#>
#> [[3]]
#> [1] "a1" "a2" "a3"
#>
#> [[4]]
#> [1] "b1"
Created on 2019-02-13 by the reprex package (v0.2.1)

Creating strings from dataframe

My dataframe
x1 <- data.frame(C1 = letters[1:4], C3=1:4, C3=letters[11:14])
I need something a list where each listelement are two values from a row
x2 <- list(c("a", "1"), c("b", "2"), c("c", "3"), c("d", "4"))
Basically each two values from a row need to be a listelement so that I can process them later on!
I tried
lapply(X = x2, MARGIN = 1, FUN = paste, collapse = "")
But that did not give me the desired output!
Is this what you want?
paste0(x1[,1], x1[,2])
# [1] "a1" "b2" "c3" "d4"
How about:
as.list(paste0(x1[,1], x1[,2]))
# [[1]]
# [1] "a1"
#
# [[2]]
# [1] "b2"
#
# [[3]]
# [1] "c3"
#
# [[4]]
# [1] "d4"
It doesn't matter how many rows you have. You just need to specify the columns you want pasted into a string.
Here is a method using lapply:
lapply(1:nrow(x1), function(i) c(x1[i,1], x1[i,2]))
The result is
[[1]]
[1] "a" "1"
[[2]]
[1] "b" "2"
[[3]]
[1] "c" "3"
[[4]]
[1] "d" "4"
data
x1 <- data.frame(C1 = letters[1:4], C3=1:4, C3=letters[11:14],
stringsAsFactors = F)
Note that I used the stringsAsFactors = F argument to construct the data. If I didn't do this, then C1 and C3 would be factors, so I'd have to wrap x[i, 1] in as.character.
If there are multiple columns, we can use do.call
as.list(do.call(paste0, x1[-3]))

Applying as.numeric only to elements of a list that can be coerced to numeric (in R)

I have a function which returns a list containing individual character vectors which I would like to convert to numeric. Most of the time, all the elements of the list can easily be coerced to numeric:
and so a simplelapply(x, FUN = as.numeric) works fine.
e.g.
l <- list(a = c("1","1"), b = c("2","2"))
l
$a
[1] "1" "1"
$b
[1] "2" "2"
lapply(l, FUN = as.numeric)
$a
[1] 1 1
$b
[1] 2 2
However, in some situations, vectors contain true characters:
e.g.
l <- list(a = c("1","1"), b = c("a","b"))
l
$a
[1] "1" "1"
$b
[1] "a" "b"
lapply(l, FUN = as.numeric)
$a
[1] 1 1
$b
[1] NA NA
The solution I have come with works but feels a little convoluted:
l.id <- unlist(lapply(l, FUN = function(x){all(!is.na(suppressWarnings(as.numeric(x))))}))
l.id
a b
TRUE FALSE
l[l.id] <- lapply(l[l.id], FUN = as.numeric)
l
$a
[1] 1 1
$b
[1] "a" "b"
So I was just wondering if anyone out there had a more streamlined and elegant solution to suggest.
Thanks!
One option would be to check whether all the elements in the vector have only numbers and if so convert to numeric or else stay as the same.
lapply(l, function(x) if(all(grepl('^[0-9.]+$', x))) as.numeric(x) else x)
Or we can use type.convert to automatically convert the class, but the character vectors will be converted to factor class.
lapply(l, type.convert)
You could also do something like
lapply(l, function(x) if(is.numeric(t <- type.convert(x))) t else x)
# $a
# [1] 1 1
#
# $b
# [1] "a" "b"
This doesn't convert anything other than when a numeric results from type.convert(). Or, for this simple case we can use as.is = TRUE but note that this will not always give us what we want.
lapply(l, type.convert, as.is = TRUE)
# $a
# [1] 1 1
#
# $b
# [1] "a" "b"

Inserting values in one list into another list by index

I have two lists x and y, and a vector of indices where.
x <- list(a = 1:4, b = letters[1:6])
y <- list(a = c(20, 50), b = c("abc", "xyz"))
where <- c(2, 4)
I want to insert y into x at the indices in where, so that the result is
list(a = c(1,20,2,50,3,4), b = c("a", "abc", "b", "xyz", "c", "d", "e", "f"))
#$a
#[1] 1 20 2 50 3 4
#
#$b
#[1] "a" "abc" "b" "xyz" "c" "d" "e" "f"
I've been trying it with append, but it's not working.
lapply(seq(x), function(i) append(x[[i]], y[[i]], after = where[i]))
#[[1]]
#[1] 1 2 20 50 3 4
#
#[[2]]
#[1] "a" "b" "c" "d" "abc" "xyz" "e" "f"
This is appending at the wrong index. Plus, I want to retain the list names in the process. I also don't know if append is the right function for this, since I've literally never seen it used anywhere.
What's the best way to insert values from one list into another list using an index vector?
How about an mapply solution
x <- list(a = 1:4, b = letters[1:6])
y <- list(a = c(20, 50), b = c("abc", "xyz"))
where <- c(2, 4)
mapply(function(x,y,w) {
r <- vector(class(x), length(x)+length(y))
r[-w] <- x
r[w] <- y
r
}, x, y, MoreArgs=list(where), SIMPLIFY=FALSE)
which returns
$a
[1] 1 20 2 50 3 4
$b
[1] "a" "abc" "b" "xyz" "c" "d" "e" "f"
which seems to be the results you desire.
Here I created a APPEND function that is an iterative (via Reduce) version of append:
APPEND <- function(x, where, y)
Reduce(function(z, args)do.call(append, c(list(z), args)),
Map(list, y, where - 1), init = x)
Then you just need to call that function via Map:
Map(APPEND, x, list(where), y)

Resources