I have a list of lists and I want the sub-lists to all have the same length
i.e. to pad them with NAs if needed so they all reach the length of the longest list.
Mock example
list1 <- list(1, 2, 3)
list2 <- list(1, 2, 3, 4, 5)
list3 <- list(1, 2, 3, 4, 5, 6)
list_lists <- list(list1, list2, list3)
My best attempt yet
max_length <- max(unlist(lapply (list_lists, FUN = length)))
# returns the length of the longest list
list_lists <- lapply (list_lists, function (x) length (x) <- max_length)
Problem, it is replacing all my sub-lists into an integer = max_length...
list_lists [[1]]
> [1] 6
Can someone help?
Try this (where ls is your list):
lapply(lapply(sapply(ls, unlist), "length<-", max(lengths(ls))), as.list)
In lists, NULL would seem more appropriate than NA, and could be added with vector:
list_lists <- list(list(1, 2, 3),
list(1, 2, 3, 4, 5),
list(1, 2, 3, 4, 5, 6))
list_lists2 <- Map(function(x, y){c(x, vector('list', length = y))},
list_lists,
max(lengths(list_lists)) - lengths(list_lists))
str(list_lists2)
#> List of 3
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : NULL
#> ..$ : NULL
#> ..$ : NULL
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : NULL
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : num 6
If you really want NAs, just change vector to rep:
list_lists3 <- Map(function(x, y){c(x, rep(NA, y))},
list_lists,
max(lengths(list_lists)) - lengths(list_lists))
str(list_lists3)
#> List of 3
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : logi NA
#> ..$ : logi NA
#> ..$ : logi NA
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : logi NA
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : num 6
Note the types in the latter won't match up unless you specify NA_real_ or coerce NA to match the type of x.
Here is your code fixed.
The function should return x, not length(x).
Also, I used vectors, not lists for clarity.
list1 <- c(1, 2, 3)
list2 <- c(1, 2, 3, 4, 5)
list3 <- c(1, 2, 3, 4, 5, 6)
list_lists <- list(list1, list2, list3)
max_length <- max(unlist(lapply (list_lists, FUN = length)))
list_lists <- lapply (list_lists, function (x) {length (x) <- max_length;x})
# [[1]]
# [1] 1 2 3 NA NA NA
#
# [[2]]
# [1] 1 2 3 4 5 NA
#
# [[3]]
# [1] 1 2 3 4 5 6
For original lists the result is:
# [[1]]
# [[1]][[1]]
# [1] 1
#
# [[1]][[2]]
# [1] 2
#
# [[1]][[3]]
# [1] 3
#
# [[1]][[4]]
# NULL
#
# [[1]][[5]]
# NULL
#
# [[1]][[6]]
# NULL
#
#
# [[2]]
# [[2]][[1]]
# [1] 1
#
# [[2]][[2]]
# [1] 2
#
# [[2]][[3]]
# [1] 3
#
# [[2]][[4]]
# [1] 4
#
# [[2]][[5]]
# [1] 5
#
# [[2]][[6]]
# NULL
#
#
# [[3]]
# [[3]][[1]]
# [1] 1
#
# [[3]][[2]]
# [1] 2
#
# [[3]][[3]]
# [1] 3
#
# [[3]][[4]]
# [1] 4
#
# [[3]][[5]]
# [1] 5
#
# [[3]][[6]]
# [1] 6
Try this:
funJoeOld <- function(ls) {
list_length <- sapply(ls, length)
max_length <- max(list_length)
lapply(seq_along(ls), function(x) {
if (list_length[x] < max_length) {
c(ls[[x]], lapply(1:(max_length - list_length[x]), function(y) NA))
} else {
ls[[x]]
}
})
}
funJoeOld(list_lists)[[1]]
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
[[4]]
[1] NA
[[5]]
[1] NA
[[6]]
[1] NA
Edit
Just wanted to illuminate how using the right tools in R makes a huge difference. Although my solution gives correct results, it is very inefficient. By replacing sapply(ls, length) with lengths as well as lapply(1:z, function(y) NA) with as.list(rep(NA, z)), we obtain almost a 15x speed up. Observe:
funJoeNew <- function(ls) {
list_length <- lengths(ls)
max_length <- max(list_length)
lapply(seq_along(ls), function(x) {
if (list_length[x] < max_length) {
c(ls[[x]], as.list(rep(NA, max_length - list_length[x])))
} else {
ls[[x]]
}
})
}
funAlistaire <- function(ls) {
Map(function(x, y){c(x, rep(NA, y))},
ls,
max(lengths(ls)) - lengths(ls))
}
fun989 <- function(ls) {
lapply(lapply(sapply(ls, unlist), "length<-", max(lengths(ls))), as.list)
}
Compare equality
set.seed(123)
samp_list <- lapply(sample(1000, replace = TRUE), function(x) {lapply(1:x, identity)})
## have to unlist as the NAs in 989 are of the integer
## variety and the NAs in Joe/Alistaire are logical
identical(sapply(fun989(samp_list), unlist), sapply(funJoeNew(samp_list), unlist))
[1] TRUE
identical(funJoeNew(samp_list), funAlistaire(samp_list))
[1] TRUE
Benchmarks
microbenchmark(funJoeOld(samp_list), funJoeNew(samp_list), fun989(samp_list),
funAlistaire(samp_list), times = 30, unit = "relative")
Unit: relative
expr min lq mean median uq max neval cld
funJoeOld(samp_list) 21.825878 23.269846 17.434447 20.803035 18.851403 4.8056784 30 c
funJoeNew(samp_list) 1.827741 1.841071 2.253294 1.667047 1.780324 2.4659653 30 ab
fun989(samp_list) 3.108230 3.563780 3.170320 3.790048 3.888632 0.9890681 30 b
funAli(samp_list) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 30 a
There are two take aways here:
Having a good understanding of the apply family of functions makes for
concise and efficient code (as can be seen in #alistaire's and #989's solution).
Understanding the nuances of base R in general can have considerable consequences
Not sure if you are you looking for this and you may use lengths function for lists:
list_lists <- list(unlist(list1), unlist(list2), unlist(list3))
list_lists1 <- lapply(list_lists, `length<-`, max(lengths(list_lists)))
list_lists1
> list_lists1
[[1]]
[1] 1 2 3 NA NA NA
[[2]]
[1] 1 2 3 4 5 NA
[[3]]
[1] 1 2 3 4 5 6
OR for lists of the lists, you can go one step further:
list_lists2 <- lapply(list_lists1,as.list)
> list_lists2
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 2
[[1]][[3]]
[1] 3
[[1]][[4]]
[1] NA
[[1]][[5]]
[1] NA
[[1]][[6]]
[1] NA
[[2]]
[[2]][[1]]
[1] 1
[[2]][[2]]
[1] 2
[[2]][[3]]
[1] 3
[[2]][[4]]
[1] 4
[[2]][[5]]
[1] 5
[[2]][[6]]
[1] NA
[[3]]
[[3]][[1]]
[1] 1
[[3]][[2]]
[1] 2
[[3]][[3]]
[1] 3
[[3]][[4]]
[1] 4
[[3]][[5]]
[1] 5
[[3]][[6]]
[1] 6
>
Related
After a global tidyverse update, I have noted a change of behaviour in my code and after many researches I am desperately unable to solve the issue. Basically I need to convert a list of elements (including lists) to a dataframe.
Here is a reprex:
Data
x <- list(
col1 = list("a", "b", "c", NA),
col2 = list(1, 2, 3, 4),
col3 = list("value1", "value2", "value1", c("value1", "value2")))
Expected behaviour and output (before tidyverse update):
x <- data.frame((sapply(x, c)))
x <- purrr::map_df(x, function(x) sapply(x, function(x) unlist(x))) %>% as.data.frame()
> x
# col1 col2 col3
# 1 a 1 value1
# 2 b 2 value2
# 3 c 3 value1
# 4 <NA> 4 value1, value2
> str(x)
# 'data.frame': 4 obs. of 3 variables:
# $ col1: chr "a" "b" "c" NA
# $ col2: num 1 2 3 4
# $ col3:List of 4
# ..$ : chr "value1"
# ..$ : chr "value2"
# ..$ : chr "value1"
# ..$ : chr "value1" "value2"
Problem encountered after update
x <- data.frame((sapply(x, c)))
x <- purrr::map_df(x, function(x) sapply(x, function(x) unlist(x)))
# Error: Argument 1 must have names.
# Run `rlang::last_error()` to see where the error occurred.
# In addition: Warning message:
# Outer names are only allowed for unnamed scalar atomic inputs
> rlang::last_error()
# <error/rlang_error>
# Argument 1 must have names.
# Backtrace:
# 1. purrr::map_df(x, function(x) sapply(x, function(x) unlist(x)))
# 2. dplyr::bind_rows(res, .id = .id)
# Run `rlang::last_trace()` to see the full context.
This error seems to be well known, and I have explored many options with the purrr::flatten_() family, and others found on Stackoverflow but was not able to solve.
Thank you if any help is providable!
The first part of your attempt gives you a list for every column irrespective of it's length.
x <- data.frame((sapply(x, c)))
str(x)
#'data.frame': 4 obs. of 3 variables:
# $ col1:List of 4
# ..$ : chr "a"
# ..$ : chr "b"
# ..$ : chr "c"
# ..$ : logi NA
# $ col2:List of 4
# ..$ : num 1
# ..$ : num 2
# ..$ : num 3
# ..$ : num 4
# $ col3:List of 4
# ..$ : chr "value1"
# ..$ : chr "value2"
# ..$ : chr "value1"
# ..$ : chr "value1" "value2"
You can unlist the above for columns with only 1 element.
x[] <- lapply(x, function(p) if(max(lengths(p)) == 1) unlist(p) else p)
x
# col1 col2 col3
#1 a 1 value1
#2 b 2 value2
#3 c 3 value1
#4 <NA> 4 value1, value2
str(x)
#'data.frame': 4 obs. of 3 variables:
# $ col1: chr "a" "b" "c" NA
# $ col2: num 1 2 3 4
# $ col3:List of 4
# ..$ : chr "value1"
# ..$ : chr "value2"
# ..$ : chr "value1"
# ..$ : chr "value1" "value2"
One option utilizing dplyr, tibble and purrr could be:
imap_dfc(x, ~ tibble(!!.y := .x)) %>%
mutate(across(where(~ all(lengths(.) == 1)), ~ unlist(.)))
col1 col2 col3
<chr> <dbl> <list>
1 a 1 <chr [1]>
2 b 2 <chr [1]>
3 c 3 <chr [1]>
4 <NA> 4 <chr [2]>
no tidyverse solution, but it seems to work..
library( rlist )
as.data.frame( rlist::list.cbind( x ) )
# col1 col2 col3
# 1 a 1 value1
# 2 b 2 value2
# 3 c 3 value1
# 4 NA 4 value1, value2
I have a list with the following structure:
> str(test)
List of 5
$ :List of 2
..$ : num [1:4] 0.0544 0.0839 0.0486 0.043
..$ : num [1:4] 0.0799 0.2434 0.0373 0.2166
$ :List of 2
..$ : num [1:6] 0.938 1.047 1.022 0.689 0.39 ...
..$ : num [1:6] 0.871 0.25 0.824 0.664 0.481 ...
$ :List of 2
..$ : num [1:4] 0.000598 0.000923 0.000535 0.000473
..$ : num [1:4] 0.001039 0.003164 0.000485 0.002816
$ :List of 2
..$ : num [1:6] 0.01032 0.01152 0.01124 0.00758 0.00429 ...
..$ : num [1:6] 0.01133 0.00325 0.01071 0.00864 0.00625 ...
$ :List of 2
..$ : num -0.659
..$ : num -0.962
I want to compute the mean of each entry, i.e. for the first entry I want:
x <- mean(c(0.0544, 0.0839, 0.0486, 0.043))
y <- mean(c(0.0799, 0.2434, 0.0373, 0.2166))
And finally the mean of those two results = mean(c(x,y).
What is the most efficient way to do this?
Sample data - one (parent) list of (child) lists.
set.seed(1)
foo <- list(
list(runif(4),runif(4)),
list(runif(6),runif(6))
)
#> [[1]]
#> [[1]][[1]]
#> [1] 0.2655087 0.3721239 0.5728534 0.9082078
#>
#> [[1]][[2]]
#> [1] 0.2016819 0.8983897 0.9446753 0.6607978
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 0.62911404 0.06178627 0.20597457 0.17655675 0.68702285 0.38410372
#>
#> [[2]][[2]]
#> [1] 0.7698414 0.4976992 0.7176185 0.9919061 0.3800352 0.7774452
Get the means of each child list:
foo_means <- lapply(foo, lapply, mean)
#> [[1]]
#> [[1]][[1]]
#> [1] 0.5296734
#>
#> [[1]][[2]]
#> [1] 0.6763862
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 0.3574264
#>
#> [[2]][[2]]
#> [1] 0.6890909
To get the average of (child) averages for each (parent) list element:
lapply(foo_means, function(x) mean(unlist(x)))
#> [[1]]
#> [1] 0.6030298
#>
#> [[2]]
#> [1] 0.5232587
The first question appears to be a perfect use for rapply. Using the same sample data as provided above in #ngm's answer, you can do:
foo2 <- rapply(foo, mean, how = "replace")
foo2
#[[1]]
#[[1]][[1]]
#[1] 0.5296734
#
#[[1]][[2]]
#[1] 0.6763862
#
#
#[[2]]
#[[2]][[1]]
#[1] 0.3574264
#
#[[2]][[2]]
#[1] 0.6890909
Since the elements of your list are also lists, then use some nested lapply's to get the means of each list.
lapply(test, function(x) lapply(x, mean))
If you like a matrix as an output, you can do
sapply(test, function(x) sapply(x, mean))
With the sapply, you can get the means of each pair with
colMeans(sapply(test, function(x) sapply(x, mean, na.rm = TRUE)))
This has been answered many times.
It is as simple as
mean(unlist(lapply(mylist,sapply,mean)))
Follow this link.
Grouping functions (tapply, by, aggregate) and the *apply family
You can also use modify_depth function from the purrr package. Take example posted by #ngm
library(purrr)
set.seed(1)
foo <- list(list(runif(4), runif(4)),
list(runif(6), runif(6)))
modify_depth(foo, 2, mean)
#> [[1]]
#> [[1]][[1]]
#> [1] 0.5296734
#>
#> [[1]][[2]]
#> [1] 0.6763862
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 0.3574264
#>
#> [[2]][[2]]
#> [1] 0.6890909
Edit: to calculate the mean of all elements in the 1st level of that list
modify_depth(foo, 2, mean) %>%
map(flatten_dbl) %>%
map_dbl(mean)
modify_depth(foo, 2, mean) %>%
simplify_all() %>%
map_dbl(mean)
[1] 0.60303 0.52326
Created on 2018-04-20 by the reprex package (v0.2.0).
I want to interleave interElem after every 2 list elements.
Data:
listi <- c(rbind(letters[1:4], list(c(13,37))))
interElem <- c("inter","leavistan")
looks like:
> listi
[[1]]
[1] "a"
[[2]]
[1] 13 37
[[3]]
[1] "b"
[[4]]
[1] 13 37
[[5]]
[1] "c"
[[6]]
[1] 13 37
[[7]]
[1] "d"
[[8]]
[1] 13 37
>
Desired result (list element numbers are not accurate)
> listi
[[1]]
[1] "a"
[[2]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[3]]
[1] "b"
[[4]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[5]]
[1] "c"
[[6]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[7]]
[1] "d"
[[8]]
[1] 13 37
>
Here's the length of the new list
len = length(listi) + max(0, floor((length(listi) - 1) / 2))
and the index of the elements that should be the original values
idx = seq_len(len) %% 3 != 0
Use these to create a new list and insert the old and interstitial values
res = vector("list", len)
res[idx] = l
res[!idx] = list(v)
Package as a function for robustness and reuse.
fun = function(l, v, n = 2) {
## validate arguments
stopifnot(
is.numeric(n), length(n) == 1, !is.na(n),
is.list(l), is.vector(v)
)
## length and index for original values
len = length(l) + max(0, floor((length(l) - 1) / n))
idx = seq_len(len) %% (n + 1) != 0
## create and populate result
res = vector("list", len)
res[idx] = l
res[!idx] = list(v)
res
}
with
> str(fun(listi, interElem))
List of 11
$ : chr "a"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "b"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "c"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "d"
$ : num [1:2] 13 37
We can create a grouping variable to split every 2 elements with gl, then append the 'interElem' at the end of each nested list and flatten it with do.call(c
res <- head(do.call(c, lapply(split(listi, as.integer(gl(length(listi), 2,
length(listi)))), function(x) c(x, list(interElem )))), -1)
names(res) <- NULL
Or another option is to convert it to matrix, rbind with 'interElem' and concatenate to list
head(c(rbind(matrix(listi, nrow=2), list(interElem))), -1)
#[[1]]
#[1] "a"
#[[2]]
#[1] 13 37
#[[3]]
#[1] "inter" "leavistan"
#[[4]]
#[1] "b"
#[[5]]
#[1] 13 37
#[[6]]
#[1] "inter" "leavistan"
#[[7]]
#[1] "c"
#[[8]]
#[1] 13 37
#[[9]]
#[1] "inter" "leavistan"
#[[10]]
#[1] "d"
#[[11]]
#[1] 13 37
Or we can use append in a for loop
listn <- listi
i1 <- seq(2, length(listi), by = 2)
i2 <- i1 + (seq_along(i1) - 1)
for(i in seq_along(i2)) listn <- append(listn, list(interElem), after = i2[i])
head(listn, -1)
This works on your example and on lists with odd length, including 2 an 1. If the length of the list is odd, matrix() and cbind() rise warnings. Empty list as an entry returns insert list. All what I am trying to do here is to form the vector of indices for a following subsetting of the aggregated listiand list(interElem).
l <- length(listi)
i <- rbind(matrix(1:l, nrow = 2), rep(l+1,l%/%2))[1:(l + l%/%2 - (l+1)%%2)]
c(listi, list(interElem))[i]
I have a nested list containing NULL elements, and I'd like to replace those with something else. For example:
l <- list(
NULL,
1,
list(
2,
NULL,
list(
3,
NULL
)
)
)
I want to replace the NULL elements with NA. The natural way to do this is to recursively loop over the list using rapply. I tried:
rapply(l, function(x) NA, classes = "NULL", how = "replace")
rapply(l, function(x) if(is.null(x)) NA else x, how = "replace")
Unfortunately, neither of these methods work, since rapply apparently ignores NULL elements.
How can I manipulate the NULL elements in a nested list?
I'm going to go with "use a version of rapply doesn't doesn't have weird behaviour with NULL". This is the simplest implementation I can think of:
simple_rapply <- function(x, fn)
{
if(is.list(x))
{
lapply(x, simple_rapply, fn)
} else
{
fn(x)
}
}
(rawr::rapply2, as mentioned in the comments by #rawr is a more sophisticated attempt.)
Now I can do the replacement using
simple_rapply(l, function(x) if(is.null(x)) NA else x)
This is what William Dunlap suggested in 2010 when this question was asked on Rhelp:
replaceInList <- function (x, FUN, ...)
{
if (is.list(x)) {
for (i in seq_along(x)) {
x[i] <- list(replaceInList(x[[i]], FUN, ...))
}
x
}
else FUN(x, ...)
}
replaceInList(l, function(x)if(is.null(x))NA else x)
This is a hack, but as far as hacks go, I think I'm somewhat happy with it.
lna <- eval(parse(text = gsub("NULL", "NA", deparse(l))))
str(lna)
#> List of 3
#> $ : logi NA
#> $ : num 1
#> $ :List of 3
#> ..$ : num 2
#> ..$ : logi NA
#> ..$ :List of 2
#> .. ..$ : num 3
#> .. ..$ : logi NA
Update:
If for some reason you needed "NULL" as a character entry in the list (corner case, much?) you can still use the above hack since it replaces the contents of the string, not the quotes, thus it just requires another step
l2 <- list(
NULL,
1,
list(
2,
"NULL",
list(
3,
NULL
)
)
)
lna2 <- eval(parse(text = gsub("NULL", "NA", deparse(l2))))
lna2_2 <- eval(parse(text = gsub('\\"NA\\"', '\"NULL\"', deparse(lna2))))
str(lna2_2)
#> List of 3
#> $ : logi NA
#> $ : num 1
#> $ :List of 3
#> ..$ : num 2
#> ..$ : chr "NULL"
#> ..$ :List of 2
#> .. ..$ : num 3
#> .. ..$ : logi NA
I wrapped the replacement inside the sapply, which makes it more readable/understandable to me, albeit less general.
replace_null <- function(x) {
lapply(x, function(x) {
if (is.list(x)){
replace_null(x)
} else{
if(is.null(x)) NA else(x)
}
})
}
replace_null(l)
This can also be done with rrapply() in the rrapply-package. Below are a few different ways we could replace the NULL elements in a nested list by NA values:
library(rrapply)
l <- list(
NULL,
1,
list(
2,
NULL,
list(
3,
NULL
)
)
)
## replace NULL by NA using only f
rrapply(l, f = function(x) if(is.null(x)) NA else x, how = "replace")
#> [[1]]
#> [1] NA
#>
#> [[2]]
#> [1] 1
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 2
#>
#> [[3]][[2]]
#> [1] NA
#>
#> [[3]][[3]]
#> [[3]][[3]][[1]]
#> [1] 3
#>
#> [[3]][[3]][[2]]
#> [1] NA
## replace NULL by NA using condition argument
rrapply(l, condition = is.null, f = function(x) NA, how = "replace")
#> [[1]]
#> [1] NA
#>
#> [[2]]
#> [1] 1
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 2
#>
#> [[3]][[2]]
#> [1] NA
#>
#> [[3]][[3]]
#> [[3]][[3]][[1]]
#> [1] 3
#>
#> [[3]][[3]][[2]]
#> [1] NA
## replace NULL by NA using condition and deflt arguments
rrapply(l, condition = Negate(is.null), deflt = NA, how = "list")
#> [[1]]
#> [1] NA
#>
#> [[2]]
#> [1] 1
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 2
#>
#> [[3]][[2]]
#> [1] NA
#>
#> [[3]][[3]]
#> [[3]][[3]][[1]]
#> [1] 3
#>
#> [[3]][[3]][[2]]
#> [1] NA
We can also prune the NULL elements from the list altogether by setting how = "prune":
## keep only non-NULL elements
rrapply(l, condition = Negate(is.null), how = "prune")
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 2
#>
#> [[2]][[2]]
#> [[2]][[2]][[1]]
#> [1] 3
Is there a (built'in/easy) way to recursively display the names of a interlinked list as a tree? (with possibly an output similar to the tree shell command. )
For instance with list X, with two column A and B, A consiting in two subcolumn a1 and a2
nametree(x)
X
├── A
│ ├── a1
│ └── a2
└── B
names(X) would just display [1] "A" "B"
Here is a recursive solution:
nametree <- function(X, prefix = "")
if( is.list(X) )
for( i in seq_along(X) ) {
cat( prefix, names(X)[i], "\n", sep="" )
nametree(X[[i]], paste0(prefix, " "))
}
X <- list(X = list( A = list( a1=1:10, a2=1:10 ), B = 1:10 ))
nametree(X)
# X
# A
# a1
# a2
# B
Displaying the tree structure with branches rather than spaces is slightly trickier:
nametree <- function(X, prefix1 = "", prefix2 = "", prefix3 = "", prefix4 = "")
if( is.list(X) )
for( i in seq_along(X) ) {
cat( if(i<length(X)) prefix1 else prefix3, names(X)[i], "\n", sep="" )
prefix <- if( i<length(X) ) prefix2 else prefix4
nametree(
X[[i]],
paste0(prefix, "├──"),
paste0(prefix, "│ "),
paste0(prefix, "└──"),
paste0(prefix, " ")
)
}
nametree(X)
# X
# +--A
# ¦ +--a1
# ¦ +--a2
# +--B
# +--C
# +--a
# +--b
A simple example:
> mylist <- list(A=data.frame(A1=1:3,A2=4:6),B=7:9)
> out <- lapply(mylist,names)
$A
[1] "A1" "A2"
$B
NULL
This assumes you only have dataframes one level below the list...so it's not recursive per se, but it sounds like this is similar to your data structure.
DrMike and Henrik's suggestion to use str(mylist) will be recursive and is, in fact, able to control both how deep into the structure and the display of the output.
SimonO101's example of recursion:
> df <- data.frame( A = runif(3) , B = runif(3) )
> ll <- list( A = df , B = list( C = df , D = df ) , E = 1 )
> str(ll)
List of 3
$ A:'data.frame': 3 obs. of 2 variables:
..$ A: num [1:3] 0.948 0.356 0.467
..$ B: num [1:3] 0.2319 0.7574 0.0312
$ B:List of 2
..$ C:'data.frame': 3 obs. of 2 variables:
.. ..$ A: num [1:3] 0.948 0.356 0.467
.. ..$ B: num [1:3] 0.2319 0.7574 0.0312
..$ D:'data.frame': 3 obs. of 2 variables:
.. ..$ A: num [1:3] 0.948 0.356 0.467
.. ..$ B: num [1:3] 0.2319 0.7574 0.0312
$ E: num 1
Some examples of output:
> str(mylist)
List of 2
$ A:'data.frame': 3 obs. of 2 variables:
..$ A1: int [1:3] 1 2 3
..$ A2: int [1:3] 4 5 6
$ B: int [1:3] 7 8 9
> str(mylist, give.attr=FALSE, give.length=FALSE, give.head=FALSE, vec.len=0,
indent.str="|", comp.str="----")
List of 2
|----A:'data.frame': 3 obs. of 2 variables:
| ..$ A1:NULL ...
| ..$ A2:NULL ...
|----B:NULL ...
You can use the data.tree package. For example:
x <- list( A = list( a1 = list(data = 1:10), b1 = list(data = 1:100 )), B = list(data = c(1, 3, 5) ))
library(data.tree)
xtree <- FromListSimple(x, nodeName = "X")
xtree
This prints out:
levelName
1 X
2 ¦--A
3 ¦ ¦--a1
4 ¦ °--b1
5 °--B
Or you can convert the data into a printable format:
print(xtree, maxData = function(node) if (is.null(node$data)) 0 else max(node$data))
Which shows:
levelName maxData
1 X 0
2 ¦--A 0
3 ¦ ¦--a1 10
4 ¦ °--b1 100
5 °--B 5
Finally, to show the names of a node:
names(xtree$children)
This prints:
[1] "A" "B"
Here's what I came up with, see function definition at the bottom.
sample data:
# a short list
l1 <- list(a = factor("1"), b = c(u = 3, v = 4), d= list(x=5, y =6), e= 8, f = 9)
# a longer list
l2 <- replicate(100, l1, simplify = F)
default way of printing a short list:
print_list(l1)
#> $a
#> [1] 1
#> Levels: 1
#> $b
#> u v
#> 3 4
#> $d
#> $x
#> [1] 5
#> $y
#> [1] 6
#> $e
#> [1] 8
#> $f
#> [1] 9
restrict to first 3 items when named :
print_list(l1,n_named = 3)
#> $a
#> [1] 1
#> Levels: 1
#> $b
#> u v
#> 3 4
#> $d
#> $x
#> [1] 5
#> $y
#> [1] 6
#> # + 2 named items
pass parameters to print()
print_list(l1, quote = TRUE)
#> $a
#> [1] "1"
#> Levels: "1"
#> $b
#> u v
#> 3 4
#> $d
#> $x
#> [1] 5
#> $y
#> [1] 6
#> $e
#> [1] 8
#> $f
#> [1] 9
use str() rather than print() on non list items:
print_list(l1, fun = str)
#> $a
#> Factor w/ 1 level "1": 1
#> $b
#> Named num [1:2] 3 4
#> - attr(*, "names")= chr [1:2] "u" "v"
#> $d
#> $x
#> num 5
#> $y
#> num 6
#> $e
#> num 8
#> $f
#> num 9
use invisible rather than print to display only names:
print_list(l1, fun = invisible)
#> $a
#>
#> $b
#>
#> $d
#> $x
#>
#> $y
#>
#> $e
#>
#> $f
#>
print long list with restrictions:
print_list(l2,n_named = 3, n_unnamed = 2)
#> [[1]]
#> $a
#> [1] 1
#> Levels: 1
#> $b
#> u v
#> 3 4
#> $d
#> $x
#> [1] 5
#> $y
#> [1] 6
#> # + 2 named items
#> [[2]]
#> $a
#> [1] 1
#> Levels: 1
#> $b
#> u v
#> 3 4
#> $d
#> $x
#> [1] 5
#> $y
#> [1] 6
#> # + 2 named items
#> # + 98 items
function code
#' print list nicely
#'
#' #param l list to print
#' #param n_named max number of named items to display if list/sublist contains only named items
#' #param n_unnamed max number of items to display if list/sublist contains unnamed items
#' #param fun function to use to print non list items
#' #param ... additional arguments passed to fun
#'
#' #return unchanged input
#' #export
print_list <- function(l,
n_named = 20,
n_unnamed = 6,
fun = print,
...){
dots <- list(...)
fun0 <- function(l) do.call(fun, c(list(l),dots))
print_list0(l, nm = NULL, i = NULL, indent = -2,
n_named = n_named, n_unnamed = n_unnamed , fun = fun0)
}
print_list0 <- function(l, nm = NULL, i = NULL, indent=-2,
n_named = 20,
n_unnamed = 6,
fun){
if(!is.null(nm)){
if(nm!=""){
cat(strrep(" ", indent), "$", nm,"\n",sep="")
} else {
cat(strrep(" ", indent), "[[", i,"]]\n",sep="")
}
}
if(is.data.frame(l) || !is.list(l)){
output <- capture.output(fun(l))
output <- paste(strrep(" ", indent), output, collapse="\n")
cat(output,"\n")
} else {
nm = allNames(l)
named <- all(nm != "")
if(named && length(l) > n_named){
n_unshowed <- length(l) - n_named
l <- l[seq_len(n_named)]
nm <- nm[seq_len(n_named)]
Map(print_list0, l, nm, i = seq_along(l), indent=indent+2,
n_named = n_named, n_unnamed = n_unnamed,
fun = replicate(length(l), fun))
cat(strrep(" ", indent+2), "# + ", n_unshowed, " named items\n",sep="")
} else if(length(l) > n_unnamed){
n_unshowed <- length(l) - n_unnamed
l <- l[seq_len(n_unnamed)]
nm <- nm[seq_len(n_unnamed)]
Map(print_list0, l, nm, i = seq_along(l), indent=indent+2,
n_named = n_named, n_unnamed = n_unnamed,
fun = replicate(length(l), fun))
cat(strrep(" ", indent+2), "# + ", n_unshowed, " items\n",sep="")
} else {
Map(print_list0, l, nm, i = seq_along(l), indent=indent+2,
n_named = n_named, n_unnamed = n_unnamed,
fun = replicate(length(l), fun))
}
}
invisible(l)
}