recursively change names in nested lists in R - r

I have data in a nested list structure in R and I'd like to use a lookup table to change names no matter where they are in the structure.
Example
# build up an example
x <- as.list(c("a" = NA))
x[[1]] <- vector("list", 4)
names(x[[1]]) <- c("b","c","d","e")
x$a$b <- vector("list", 2)
names(x$a$b) <- c("d","f")
x$a$c <- 3
x$a$d <- 27
x$a$e <- "d"
x$a$b$d <- "data"
x$a$b$f <- "more data"
# make a lookup table for names I want to change from; to
lkp <- data.frame(matrix(data = c("a","z","b","bee","d","dee"),
ncol = 2,
byrow = TRUE), stringsAsFactors = FALSE)
names(lkp) <- c("from","to")
Output from the above
> x
$a
$a$b
$a$b$d
[1] "data"
$a$b$f
[1] "more data"
$a$c
[1] 3
$a$d
[1] 27
$a$e
[1] "d"
> lkp
from to
1 a z
2 b bee
3 d dee
Here is what I came up with to do this for only the first level:
> for(i in 1:nrow(lkp)){
+ names(x)[names(x) == lkp$from[[i]]] <- lkp$to[[i]]
+ }
> x
$z
$z$b
$z$b$d
[1] "data"
$z$b$f
[1] "more data"
$z$c
[1] 3
$z$d
[1] 27
$z$e
[1] "d"
So that works fine but uses a loop and only gets at the first level. I've tried various versions of the *apply world but have not yet been able to get something useful.
Thanks in advance for any thoughts
EDIT:
Interestingly rapply fails miserably (or, I fail miserably in my attempt!) when trying to access and modify names. Here's an example of just trying to change all names the same
> namef <- function(x) names(x) <- "z"
> rapply(x, namef, how = "list")
$a
$a$b
$a$b$d
[1] "z"
$a$b$f
[1] "z"
$a$c
[1] "z"
$a$d
[1] "z"
$a$e
[1] "z"

I used a character vector for look-up instead of you data.frame, but it will be easy to change it if you really want a data.frame.
lkp2 <- lkp$to
names(lkp2) <- lkp$from
rename <- function(nested_list) {
found <- names(nested_list) %in% names(lkp2)
names(nested_list)[found] <- lkp2[names(nested_list)[found]]
nested_list %>% map(~{
if (is.list(.x)) {
rename(.x)
} else {
.x
}
})
}
rename(x)
# $z
# $z$bee
# $z$bee$dee
# [1] "data"
#
# $z$bee$f
# [1] "more data"
#
#
# $z$c
# [1] 3
#
# $z$dee
# [1] 27
#
# $z$e
# [1] "d"
I am not sure this is the best way to do it, but it seems to do the job, and if you're only working with small lists (like XML documents) then there is no need to worry much about performance.
You might want to name the function with a better name.

Using an external package you can also do this with rrapply in the rrapply-package (extension of base rapply):
library(rrapply) ## v1.2.1
rrapply(list(x),
classes = "list",
f = function(x) {
newnames <- lkp$to[match(names(x), lkp$from)]
names(x)[!is.na(newnames)] <- newnames[!is.na(newnames)]
return(x)
},
how = "recurse"
)[[1]]
#> $z
#> $z$bee
#> $z$bee$dee
#> [1] "data"
#>
#> $z$bee$f
#> [1] "more data"
#>
#>
#> $z$c
#> [1] 3
#>
#> $z$dee
#> [1] 27
#>
#> $z$e
#> [1] "d"
Here, the f function achieves essentially the same as OP's for-loop. how = "recurse" tells the function to continue recursion after the application of f.
Note that the input is wrapped as list(x) so that the f function also modifies the name(s) of the list itself.
Update
rrapply v1.2.5 contains a dedicated option how = "names" to replace names in a nested list, which is a bit less convoluted:
rrapply(
x,
f = function(x, .xname) {
newname <- lkp$to[match(.xname, lkp$from)]
return(ifelse(is.na(newname), .xname, newname))
},
how = "names"
)
#> $z
#> $z$bee
#> $z$bee$dee
#> [1] "data"
#>
#> $z$bee$f
#> [1] "more data"
#>
#>
#> $z$c
#> [1] 3
#>
#> $z$dee
#> [1] 27
#>
#> $z$e
#> [1] "d"

Related

Add positional index to a list

I would like to add a sequential element onto a list. Suppose I have the following list
lst <- list("A"=list(e1="a",e2="!"), "B"=list(e1="b", e2="#"))
$A
$A$e1
[1] "a"
$A$e2
[1] "!"
$B
$B$e1
[1] "b"
$B$e2
[1] "#"
I would like to append a e3 which is the position index of that element in the list so essentially I would like my list to be:
$A
$A$e1
[1] "a"
$A$e2
[1] "!"
$A$e3
[1] 1
$B
$B$e1
[1] "b"
$B$e2
[1] "#"
$B$e3
[1] 2
setNames(lapply(seq_along(lst), function(i){
temp = lst[[i]]
temp$e3 = i
temp
}), names(lst))
#$`A`
#$`A`$`e1`
#[1] "a"
#$`A`$e2
#[1] "!"
#$`A`$e3
#[1] 1
#$B
#$B$`e1`
#[1] "b"
#$B$e2
#[1] "#"
#$B$e3
#[1] 2
Here is a solution that doesn't assume that the sub-lists have the same known number of elements.
library("tidyverse")
library("glue")
lst <- list("A"=list(e1="a",e2="!"), "B"=list(e1="b", e2="#"))
# The part
# `setNames(list(.y), glue("e{length(.x) + 1}"))`
# creates a one-element list named accordingly to append to the previous list
map2(lst, seq(lst),
~ append(.x, setNames(list(.y), glue("e{length(.x) + 1}") )))
#> $A
#> $A$e1
#> [1] "a"
#>
#> $A$e2
#> [1] "!"
#>
#> $A$e3
#> [1] 1
#>
#>
#> $B
#> $B$e1
#> [1] "b"
#>
#> $B$e2
#> [1] "#"
#>
#> $B$e3
#> [1] 2
# If naming the additional element is not important, then this can simplified to
map2(lst, seq(lst), append)
# or
map2(lst, seq(lst), c)
Created on 2019-03-06 by the reprex package (v0.2.1)
Another option using Map
Map(function(x, y) c(x, "e3" = y), x = lst, y = seq_along(lst))
#$A
#$A$e1
#[1] "a"
#$A$e2
#[1] "!"
#$A$e3
#[1] 1
#$B
#$B$e1
#[1] "b"
#$B$e2
#[1] "#"
#$B$e3
#[1] 2
This could be written even more concise as
Map(c, lst, e3 = seq_along(lst))
Thanks to #thelatemail
We can use a for loop as well
for(i in seq_along(lst)) lst[[i]]$e3 <- i
Assuming I understood correctly, that you want to add a 3rd element to each nested list which contains the index of that list in it's parent list. This works:
library(rlist)
lst <- list("A"=list(e1="a",e2="!"), "B"=list(e1="b", e2="#"))
for(i in seq(1:length(lst))){
lst[[i]] <- list.append(lst[[i]],e3=i)
}
lst
We can loop along the length of lst with lapply, adding this sequential index to each element.
lst2 <- lapply(seq_along(lst), function(i) {
df <- lst[[i]]
df$e3 <- i
return(df)
})
names(lst2) <- names(lst) # Preserve names from lst
Or, if you're not scared about modifying in place:
lapply(seq_along(lst), function(i) {
lst[[i]]$e3 <<- i
})
Both give the same output:
$A
$A$e1
[1] "a"
$A$e2
[1] "!"
$A$e3
[1] 1
$B
$B$e1
[1] "b"
$B$e2
[1] "#"
$B$e3
[1] 2

Appending list of dates to list of lists removes date format

I'm trying to append a list of dates to a list of lists such as myList below. This is working as expected except the date format for the date element in each list element is lost. Any ideas?
myList<-list(list("event"="A"),
list("event"="B"),
list("event"="C"))
dates<-as.Date(c("2011-06-05","2012-01-12","2016-05-09"))
outList<-mapply(FUN="c",myList,eventDate=as.list(dates),SIMPLIFY = FALSE)
I'm looking to achieve the below
[[1]]
[[1]]$event
[1] "A"
[[1]]$eventDate
[1] "2011-06-05"
[[2]]
[[2]]$event
[1] "B"
[[2]]$eventDate
[1] "2012-01-12"
[[3]]
[[3]]$event
[1] "C"
[[3]]$eventDate
[1] "2016-06-09"
Using Map, you can also create a small (lambda) function like so:
myList <- list(
list(event = "A"),
list(event = "B"),
list(event = "C")
)
dates <- as.Date(c("2011-06-05", "2012-01-12", "2016-05-09"))
outList <- Map(f = function(origList, date) {
origList$eventDate <- date
return(origList)
}, myList, dates)
outList
#> [[1]]
#> [[1]]$event
#> [1] "A"
#>
#> [[1]]$eventDate
#> [1] "2011-06-05"
#>
#>
#> [[2]]
#> [[2]]$event
#> [1] "B"
#>
#> [[2]]$eventDate
#> [1] "2012-01-12"
#>
#>
#> [[3]]
#> [[3]]$event
#> [1] "C"
#>
#> [[3]]$eventDate
#> [1] "2016-05-09"
The reason why you get the dates converted to numbers, is that the c function converts all elements to the lowest common type (usually characters, in this case numeric).
For example:
c(123, as.Date("2016-01-01"))
#> [1] 123 16801
It may be better to index as c could coerce it to integer storage value
for(i in seq_along(myList)) myList[[i]][['eventDate']] <- dates[i]
An additional list wrapper to insulate each Date element will also work here. I constructed that by running an lapply with the list function on the dates vector:
Map("c", myList, eventDate=lapply(dates, list))
[[1]]
[[1]]$event
[1] "A"
[[1]]$eventDate
[1] "2011-06-05"
[[2]]
[[2]]$event
[1] "B"
[[2]]$eventDate
[1] "2012-01-12"
[[3]]
[[3]]$event
[1] "C"
[[3]]$eventDate
[1] "2016-05-09"

R: Trying to assign to a list passed as an argument

I want to pass the name of a list as an argument to a function, and then assign to that list inside the function. The following illustrates the problem.
names <- c("a", "b", "c")
mylist <- list()
foo <- function(listname) {
for(name in names){
listname[[name]] <<- 7
}
}
This doesn't work, and generates an error that "listname" is not known. Can anyone clear this up, please? I would be most grateful.
I think you've got a couple of things going wrong. First off, I don't think you have anything stored in mylist, i.e. there are no names or anything there for your function to check. Try setting up your list something like this:
mylist <- list("a", "b", "c")
names(mylist) <- c("a", "b", "c")
mylist
#> $a
#> [1] "a"
#>
#> $b
#> [1] "b"
#>
#> $c
#> [1] "c"
In your function, you need to a) do names(listname) so you are checking the names of the actual list and b) return the list after you do the reassignment. Then, it can assign to the list and it will return those values.
foo <- function(listname) {
for(name in names(listname)){
listname[[name]] <- 7
}
listname
}
foo(mylist)
#> $a
#> [1] 7
#>
#> $b
#> [1] 7
#>
#> $c
#> [1] 7
Notice that mylist still has the same values in it; nothing there got reassigned.
mylist
#> $a
#> [1] "a"
#>
#> $b
#> [1] "b"
#>
#> $c
#> [1] "c"

Remove empty elements from list with character(0)

How can I remove empty elements from a list that contain zero length pairlist as
character(0), integer(0) etc...
list2
# $`hsa:7476`
# [1] "1","2","3"
#
# $`hsa:656`
# character(0)
#
# $`hsa:7475`
# character(0)
#
# $`hsa:7472`
# character(0)
I don't know how to deal with them. I mean if NULL it is much simpler. How can I remove these elements such that just hsa:7476 remains in the list.
Another option(I think more efficient) by keeping index where element length > 0 :
l[lapply(l,length)>0] ## you can use sapply,rapply
[[1]]
[1] 1 2 3
[[2]]
[1] "foo"
One possible approach is
Filter(length, l)
# [[1]]
# [1] 1 2 3
#
# [[2]]
# [1] "foo"
where
l <- list(1:3, "foo", character(0), integer(0))
This works due to the fact that positive integers get coerced to TRUE by Filter and, hence, are kept, while zero doesn't:
as.logical(0:2)
# [1] FALSE TRUE TRUE
For the sake of completeness, the purrr package from the popular tidyverse has some useful functions for working with lists - compact (introduction) does the trick, too, and works fine with magrittr's %>% pipes:
l <- list(1:3, "foo", character(0), integer(0))
library(purrr)
compact(l)
# [[1]]
# [1] 1 2 3
#
# [[2]]
# [1] "foo"
or
list(1:3, "foo", character(0), integer(0)) %>% compact
Use lengths() to define lengths of the list elements:
l <- list(1:3, "foo", character(0), integer(0))
l[lengths(l) > 0L]
#> [[1]]
#> [1] 1 2 3
#>
#> [[2]]
#> [1] "foo"
#>
Funny enough, none of the many solutions above remove the empty/blank character string: "". But the trivial solution is not easily found: L[L != ""].
To summarize, here are some various ways to remove unwanted items from an array list.
# Our Example List:
L <- list(1:3, "foo", "", character(0), integer(0))
# 1. Using the *purrr* package:
library(purrr)
compact(L)
# 2. Using the *Filter* function:
Filter(length, L)
# 3. Using *lengths* in a sub-array specification:
L[lengths(L) > 0]
# 4. Using *lapply* (with *length*) in a sub-array specification:
L[lapply(L,length)>0]
# 5. Using a sub-array specification:
L[L != ""]
# 6. Combine (3) & (5)
L[lengths(L) > 0 & L != ""]

How can I remove an element from a list?

I have a list and I want to remove a single element from it. How can I do this?
I've tried looking up what I think the obvious names for this function would be in the reference manual and I haven't found anything appropriate.
If you don't want to modify the list in-place (e.g. for passing the list with an element removed to a function), you can use indexing: negative indices mean "don't include this element".
x <- list("a", "b", "c", "d", "e"); # example list
x[-2]; # without 2nd element
x[-c(2, 3)]; # without 2nd and 3rd
Also, logical index vectors are useful:
x[x != "b"]; # without elements that are "b"
This works with dataframes, too:
df <- data.frame(number = 1:5, name = letters[1:5])
df[df$name != "b", ]; # rows without "b"
df[df$number %% 2 == 1, ] # rows with odd numbers only
I don't know R at all, but a bit of creative googling led me here: http://tolstoy.newcastle.edu.au/R/help/05/04/1919.html
The key quote from there:
I do not find explicit documentation for R on how to remove elements from lists, but trial and error tells me
myList[[5]] <- NULL
will remove the 5th element and then "close up" the hole caused by deletion of that element. That suffles the index values, So I have to be careful in dropping elements. I must work from the back of the list to the front.
A response to that post later in the thread states:
For deleting an element of a list, see R FAQ 7.1
And the relevant section of the R FAQ says:
... Do not set x[i] or x[[i]] to NULL, because this will remove the corresponding component from the list.
Which seems to tell you (in a somewhat backwards way) how to remove an element.
I would like to add that if it's a named list you can simply use within.
l <- list(a = 1, b = 2)
> within(l, rm(a))
$b
[1] 2
So you can overwrite the original list
l <- within(l, rm(a))
to remove element named a from list l.
Here is how the remove the last element of a list in R:
x <- list("a", "b", "c", "d", "e")
x[length(x)] <- NULL
If x might be a vector then you would need to create a new object:
x <- c("a", "b", "c", "d", "e")
x <- x[-length(x)]
Work for lists and vectors
Removing Null elements from a list in single line :
x=x[-(which(sapply(x,is.null),arr.ind=TRUE))]
Cheers
If you have a named list and want to remove a specific element you can try:
lst <- list(a = 1:4, b = 4:8, c = 8:10)
if("b" %in% names(lst)) lst <- lst[ - which(names(lst) == "b")]
This will make a list lst with elements a, b, c. The second line removes element b after it checks that it exists (to avoid the problem #hjv mentioned).
or better:
lst$b <- NULL
This way it is not a problem to try to delete a non-existent element (e.g. lst$g <- NULL)
Use - (Negative sign) along with position of element, example if 3rd element is to be removed use it as your_list[-3]
Input
my_list <- list(a = 3, b = 3, c = 4, d = "Hello", e = NA)
my_list
# $`a`
# [1] 3
# $b
# [1] 3
# $c
# [1] 4
# $d
# [1] "Hello"
# $e
# [1] NA
Remove single element from list
my_list[-3]
# $`a`
# [1] 3
# $b
# [1] 3
# $d
# [1] "Hello"
# $e
[1] NA
Remove multiple elements from list
my_list[c(-1,-3,-2)]
# $`d`
# [1] "Hello"
# $e
# [1] NA
my_list[c(-3:-5)]
# $`a`
# [1] 3
# $b
# [1] 3
my_list[-seq(1:2)]
# $`c`
# [1] 4
# $d
# [1] "Hello"
# $e
# [1] NA
There's the rlist package (http://cran.r-project.org/web/packages/rlist/index.html) to deal with various kinds of list operations.
Example (http://cran.r-project.org/web/packages/rlist/vignettes/Filtering.html):
library(rlist)
devs <-
list(
p1=list(name="Ken",age=24,
interest=c("reading","music","movies"),
lang=list(r=2,csharp=4,python=3)),
p2=list(name="James",age=25,
interest=c("sports","music"),
lang=list(r=3,java=2,cpp=5)),
p3=list(name="Penny",age=24,
interest=c("movies","reading"),
lang=list(r=1,cpp=4,python=2)))
list.remove(devs, c("p1","p2"))
Results in:
# $p3
# $p3$name
# [1] "Penny"
#
# $p3$age
# [1] 24
#
# $p3$interest
# [1] "movies" "reading"
#
# $p3$lang
# $p3$lang$r
# [1] 1
#
# $p3$lang$cpp
# [1] 4
#
# $p3$lang$python
# [1] 2
Don't know if you still need an answer to this but I found from my limited (3 weeks worth of self-teaching R) experience with R that, using the NULL assignment is actually wrong or sub-optimal especially if you're dynamically updating a list in something like a for-loop.
To be more precise, using
myList[[5]] <- NULL
will throw the error
myList[[5]] <- NULL : replacement has length zero
or
more elements supplied than there are to replace
What I found to work more consistently is
myList <- myList[[-5]]
Just wanted to quickly add (because I didn't see it in any of the answers) that, for a named list, you can also do l["name"] <- NULL. For example:
l <- list(a = 1, b = 2, cc = 3)
l['b'] <- NULL
In the case of named lists I find those helper functions useful
member <- function(list,names){
## return the elements of the list with the input names
member..names <- names(list)
index <- which(member..names %in% names)
list[index]
}
exclude <- function(list,names){
## return the elements of the list not belonging to names
member..names <- names(list)
index <- which(!(member..names %in% names))
list[index]
}
aa <- structure(list(a = 1:10, b = 4:5, fruits = c("apple", "orange"
)), .Names = c("a", "b", "fruits"))
> aa
## $a
## [1] 1 2 3 4 5 6 7 8 9 10
## $b
## [1] 4 5
## $fruits
## [1] "apple" "orange"
> member(aa,"fruits")
## $fruits
## [1] "apple" "orange"
> exclude(aa,"fruits")
## $a
## [1] 1 2 3 4 5 6 7 8 9 10
## $b
## [1] 4 5
Using lapply and grep:
lst <- list(a = 1:4, b = 4:8, c = 8:10)
# say you want to remove a and c
toremove<-c("a","c")
lstnew<-lst[-unlist(lapply(toremove, function(x) grep(x, names(lst)) ) ) ]
#or
pattern<-"a|c"
lstnew<-lst[-grep(pattern, names(lst))]
You can also negatively index from a list using the extract function of the magrittr package to remove a list item.
a <- seq(1,5)
b <- seq(2,6)
c <- seq(3,7)
l <- list(a,b,c)
library(magrittr)
extract(l,-1) #simple one-function method
[[1]]
[1] 2 3 4 5 6
[[2]]
[1] 3 4 5 6 7
There are a few options in the purrr package that haven't been mentioned:
pluck and assign_in work well with nested values and you can access it using a combination of names and/or indices:
library(purrr)
l <- list("a" = 1:2, "b" = 3:4, "d" = list("e" = 5:6, "f" = 7:8))
# select values (by name and/or index)
all.equal(pluck(l, "d", "e"), pluck(l, 3, "e"), pluck(l, 3, 1))
[1] TRUE
# or if element location stored in a vector use !!!
pluck(l, !!! as.list(c("d", "e")))
[1] 5 6
# remove values (modifies in place)
pluck(l, "d", "e") <- NULL
# assign_in to remove values with name and/or index (does not modify in place)
assign_in(l, list("d", 1), NULL)
$a
[1] 1 2
$b
[1] 3 4
$d
$d$f
[1] 7 8
Or you can remove values using modify_list by assigning zap() or NULL:
all.equal(list_modify(l, a = zap()), list_modify(l, a = NULL))
[1] TRUE
You can remove or keep elements using a predicate function with discard and keep:
# remove numeric elements
discard(l, is.numeric)
$d
$d$e
[1] 5 6
$d$f
[1] 7 8
# keep numeric elements
keep(l, is.numeric)
$a
[1] 1 2
$b
[1] 3 4
Here is a simple solution that can be done using base R. It removes the number 5 from the original list of numbers. You can use the same method to remove whatever element you want from a list.
#the original list
original_list = c(1:10)
#the list element to remove
remove = 5
#the new list (which will not contain whatever the `remove` variable equals)
new_list = c()
#go through all the elements in the list and add them to the new list if they don't equal the `remove` variable
counter = 1
for (n in original_list){
if (n != ){
new_list[[counter]] = n
counter = counter + 1
}
}
The new_list variable no longer contains 5.
new_list
# [1] 1 2 3 4 6 7 8 9 10
How about this? Again, using indices
> m <- c(1:5)
> m
[1] 1 2 3 4 5
> m[1:length(m)-1]
[1] 1 2 3 4
or
> m[-(length(m))]
[1] 1 2 3 4
You can use which.
x<-c(1:5)
x
#[1] 1 2 3 4 5
x<-x[-which(x==4)]
x
#[1] 1 2 3 5
if you'd like to avoid numeric indices, you can use
a <- setdiff(names(a),c("name1", ..., "namen"))
to delete names namea...namen from a. this works for lists
> l <- list(a=1,b=2)
> l[setdiff(names(l),"a")]
$b
[1] 2
as well as for vectors
> v <- c(a=1,b=2)
> v[setdiff(names(v),"a")]
b
2

Resources