interleave list after every n elements - r

I want to interleave interElem after every 2 list elements.
Data:
listi <- c(rbind(letters[1:4], list(c(13,37))))
interElem <- c("inter","leavistan")
looks like:
> listi
[[1]]
[1] "a"
[[2]]
[1] 13 37
[[3]]
[1] "b"
[[4]]
[1] 13 37
[[5]]
[1] "c"
[[6]]
[1] 13 37
[[7]]
[1] "d"
[[8]]
[1] 13 37
>
Desired result (list element numbers are not accurate)
> listi
[[1]]
[1] "a"
[[2]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[3]]
[1] "b"
[[4]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[5]]
[1] "c"
[[6]]
[1] 13 37
[[XXX]]
[1] "inter" "leavistan"
[[7]]
[1] "d"
[[8]]
[1] 13 37
>

Here's the length of the new list
len = length(listi) + max(0, floor((length(listi) - 1) / 2))
and the index of the elements that should be the original values
idx = seq_len(len) %% 3 != 0
Use these to create a new list and insert the old and interstitial values
res = vector("list", len)
res[idx] = l
res[!idx] = list(v)
Package as a function for robustness and reuse.
fun = function(l, v, n = 2) {
## validate arguments
stopifnot(
is.numeric(n), length(n) == 1, !is.na(n),
is.list(l), is.vector(v)
)
## length and index for original values
len = length(l) + max(0, floor((length(l) - 1) / n))
idx = seq_len(len) %% (n + 1) != 0
## create and populate result
res = vector("list", len)
res[idx] = l
res[!idx] = list(v)
res
}
with
> str(fun(listi, interElem))
List of 11
$ : chr "a"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "b"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "c"
$ : num [1:2] 13 37
$ : chr [1:2] "inter" "leavistan"
$ : chr "d"
$ : num [1:2] 13 37

We can create a grouping variable to split every 2 elements with gl, then append the 'interElem' at the end of each nested list and flatten it with do.call(c
res <- head(do.call(c, lapply(split(listi, as.integer(gl(length(listi), 2,
length(listi)))), function(x) c(x, list(interElem )))), -1)
names(res) <- NULL
Or another option is to convert it to matrix, rbind with 'interElem' and concatenate to list
head(c(rbind(matrix(listi, nrow=2), list(interElem))), -1)
#[[1]]
#[1] "a"
#[[2]]
#[1] 13 37
#[[3]]
#[1] "inter" "leavistan"
#[[4]]
#[1] "b"
#[[5]]
#[1] 13 37
#[[6]]
#[1] "inter" "leavistan"
#[[7]]
#[1] "c"
#[[8]]
#[1] 13 37
#[[9]]
#[1] "inter" "leavistan"
#[[10]]
#[1] "d"
#[[11]]
#[1] 13 37
Or we can use append in a for loop
listn <- listi
i1 <- seq(2, length(listi), by = 2)
i2 <- i1 + (seq_along(i1) - 1)
for(i in seq_along(i2)) listn <- append(listn, list(interElem), after = i2[i])
head(listn, -1)

This works on your example and on lists with odd length, including 2 an 1. If the length of the list is odd, matrix() and cbind() rise warnings. Empty list as an entry returns insert list. All what I am trying to do here is to form the vector of indices for a following subsetting of the aggregated listiand list(interElem).
l <- length(listi)
i <- rbind(matrix(1:l, nrow = 2), rep(l+1,l%/%2))[1:(l + l%/%2 - (l+1)%%2)]
c(listi, list(interElem))[i]

Related

Convert a string with curly brackets to a nested list object

We have a string with curly brackets to represent nested lists:
x <- "{{1,2,3,4},{1,2,3,4,{Axe,Bat,Cat,Donkey}},{1,2,3,4}}"
How can we convert it to a nested list R object?
So far I have below solution, it is not ideal. The idea is to convert it to a valid JSON format, then use jsonlite::fromJSON. I would like to skip/improve quoting step, so that numbers read are numeric. Yes, I could loop through the nested list and convert "numbers" to numeric, but would like to avoid.
Any other non-JSON solutions are welcome, too.
library(jsonlite)
# translate brackets to JSON square brackets
x <- chartr("{}", "[]", x)
# wrap in quotes
x <- gsub("(\\w+)", '"\\1"', x)
# finally read as JSON
fromJSON(x)
# [[1]]
# [1] "1" "2" "3" "4"
#
# [[2]]
# [[2]][[1]]
# [1] "1"
#
# [[2]][[2]]
# [1] "2"
#
# [[2]][[3]]
# [1] "3"
#
# [[2]][[4]]
# [1] "4"
#
# [[2]][[5]]
# [1] "Axe" "Bat" "Cat" "Donkey"
#
#
# [[3]]
# [1] "1" "2" "3" "4"
This solution does not use any packages. It replaces innermost lists with c(...) and then quotes the words that start with a letter and replaces { and } with list( and ) respectively. Then it parses and evaluates that to get the result.
x1 <- gsub("\\{([^{]+)\\}", "c(\\1)", x) # x is from question
x2 <- gsub("([a-zA-Z]\\w+)", "'\\1'", x1)
x3 <- gsub("\\{", "list(", x2)
x4 <- gsub("\\}", ")", x3)
result <- eval(parse(text = x4))
str(result)
giving:
List of 3
$ : num [1:4] 1 2 3 4
$ :List of 5
..$ : num 1
..$ : num 2
..$ : num 3
..$ : num 4
..$ : chr [1:4] "Axe" "Bat" "Cat" "Donkey"
$ : num [1:4] 1 2 3 4
This could also be written as a pipeline using magrittr:
library(magrittr)
x %>%
gsub("\\{([^{]+)\\}", "c(\\1)", .) %>%
gsub("([a-zA-Z]\\w+)", "'\\1'", .) %>%
gsub("\\{", "list(", .) %>%
gsub("\\}", ")", .) %>%
parse(text = .) %>%
eval
You could do:
jsonlite::fromJSON(txt=gsub('([A-Za-z]+)','"\\1"',chartr('{}','[]',x)))
[[1]]
[1] 1 2 3 4
[[2]]
[[2]][[1]]
[1] 1
[[2]][[2]]
[1] 2
[[2]][[3]]
[1] 3
[[2]][[4]]
[1] 4
[[2]][[5]]
[1] "Axe" "Bat" "Cat" "Donkey"
[[3]]
[1] 1 2 3 4
Or you could use alpha ie:
jsonlite::fromJSON(txt=gsub('([[:alpha:]]+)','"\\1"',chartr('{}','[]',x)))
The issue here is to obtain a good regex such that it does not include the numeric elements.

Split data frame into list while keeping column values as sub list names

I have following data frame and I want to keep first column values as sub list names and second column values as its elements.
Input-
var <- c("a","b","c","d")
val <- c("0-1-2-NA","0-1","0-2-4","0-NA")
dt <- data.frame(var,val, stringsAsFactors = FALSE)
> dt
var val
1 a 0-1-2-NA
2 b 0-1
3 c 0-2-4
4 d 0-NA
Desired Output-
$a
[1] 0 1 2 NA
$b
[1] 0 1
$c
[1] 0 2 4
$d
[1] 0 NA
Note- I also want to split values by - before creating list.
A base R solution with strsplit.
dat <- strsplit(dt$val, split = "-")
names(dat) <- dt$var
dat
# $a
# [1] "0" "1" "2" "NA"
#
# $b
# [1] "0" "1"
#
# $c
# [1] "0" "2" "4"
#
# $d
# [1] "0" "NA"
To automatically convert strings, add type.convert (or as.integer or as.numeric):
res <- with(dt, lapply(strsplit(setNames(val, var), "-"), type.convert))
str(res)
# List of 4
# $ a: int [1:4] 0 1 2 NA
# $ b: int [1:2] 0 1
# $ c: int [1:3] 0 2 4
# $ d: int [1:2] 0 NA

Equalizing the lengths of all the lists within a list?

I have a list of lists and I want the sub-lists to all have the same length
i.e. to pad them with NAs if needed so they all reach the length of the longest list.
Mock example
list1 <- list(1, 2, 3)
list2 <- list(1, 2, 3, 4, 5)
list3 <- list(1, 2, 3, 4, 5, 6)
list_lists <- list(list1, list2, list3)
My best attempt yet
max_length <- max(unlist(lapply (list_lists, FUN = length)))
# returns the length of the longest list
list_lists <- lapply (list_lists, function (x) length (x) <- max_length)
Problem, it is replacing all my sub-lists into an integer = max_length...
list_lists [[1]]
> [1] 6
Can someone help?
Try this (where ls is your list):
lapply(lapply(sapply(ls, unlist), "length<-", max(lengths(ls))), as.list)
In lists, NULL would seem more appropriate than NA, and could be added with vector:
list_lists <- list(list(1, 2, 3),
list(1, 2, 3, 4, 5),
list(1, 2, 3, 4, 5, 6))
list_lists2 <- Map(function(x, y){c(x, vector('list', length = y))},
list_lists,
max(lengths(list_lists)) - lengths(list_lists))
str(list_lists2)
#> List of 3
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : NULL
#> ..$ : NULL
#> ..$ : NULL
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : NULL
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : num 6
If you really want NAs, just change vector to rep:
list_lists3 <- Map(function(x, y){c(x, rep(NA, y))},
list_lists,
max(lengths(list_lists)) - lengths(list_lists))
str(list_lists3)
#> List of 3
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : logi NA
#> ..$ : logi NA
#> ..$ : logi NA
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : logi NA
#> $ :List of 6
#> ..$ : num 1
#> ..$ : num 2
#> ..$ : num 3
#> ..$ : num 4
#> ..$ : num 5
#> ..$ : num 6
Note the types in the latter won't match up unless you specify NA_real_ or coerce NA to match the type of x.
Here is your code fixed.
The function should return x, not length(x).
Also, I used vectors, not lists for clarity.
list1 <- c(1, 2, 3)
list2 <- c(1, 2, 3, 4, 5)
list3 <- c(1, 2, 3, 4, 5, 6)
list_lists <- list(list1, list2, list3)
max_length <- max(unlist(lapply (list_lists, FUN = length)))
list_lists <- lapply (list_lists, function (x) {length (x) <- max_length;x})
# [[1]]
# [1] 1 2 3 NA NA NA
#
# [[2]]
# [1] 1 2 3 4 5 NA
#
# [[3]]
# [1] 1 2 3 4 5 6
For original lists the result is:
# [[1]]
# [[1]][[1]]
# [1] 1
#
# [[1]][[2]]
# [1] 2
#
# [[1]][[3]]
# [1] 3
#
# [[1]][[4]]
# NULL
#
# [[1]][[5]]
# NULL
#
# [[1]][[6]]
# NULL
#
#
# [[2]]
# [[2]][[1]]
# [1] 1
#
# [[2]][[2]]
# [1] 2
#
# [[2]][[3]]
# [1] 3
#
# [[2]][[4]]
# [1] 4
#
# [[2]][[5]]
# [1] 5
#
# [[2]][[6]]
# NULL
#
#
# [[3]]
# [[3]][[1]]
# [1] 1
#
# [[3]][[2]]
# [1] 2
#
# [[3]][[3]]
# [1] 3
#
# [[3]][[4]]
# [1] 4
#
# [[3]][[5]]
# [1] 5
#
# [[3]][[6]]
# [1] 6
Try this:
funJoeOld <- function(ls) {
list_length <- sapply(ls, length)
max_length <- max(list_length)
lapply(seq_along(ls), function(x) {
if (list_length[x] < max_length) {
c(ls[[x]], lapply(1:(max_length - list_length[x]), function(y) NA))
} else {
ls[[x]]
}
})
}
funJoeOld(list_lists)[[1]]
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
[[4]]
[1] NA
[[5]]
[1] NA
[[6]]
[1] NA
Edit
Just wanted to illuminate how using the right tools in R makes a huge difference. Although my solution gives correct results, it is very inefficient. By replacing sapply(ls, length) with lengths as well as lapply(1:z, function(y) NA) with as.list(rep(NA, z)), we obtain almost a 15x speed up. Observe:
funJoeNew <- function(ls) {
list_length <- lengths(ls)
max_length <- max(list_length)
lapply(seq_along(ls), function(x) {
if (list_length[x] < max_length) {
c(ls[[x]], as.list(rep(NA, max_length - list_length[x])))
} else {
ls[[x]]
}
})
}
funAlistaire <- function(ls) {
Map(function(x, y){c(x, rep(NA, y))},
ls,
max(lengths(ls)) - lengths(ls))
}
fun989 <- function(ls) {
lapply(lapply(sapply(ls, unlist), "length<-", max(lengths(ls))), as.list)
}
Compare equality
set.seed(123)
samp_list <- lapply(sample(1000, replace = TRUE), function(x) {lapply(1:x, identity)})
## have to unlist as the NAs in 989 are of the integer
## variety and the NAs in Joe/Alistaire are logical
identical(sapply(fun989(samp_list), unlist), sapply(funJoeNew(samp_list), unlist))
[1] TRUE
identical(funJoeNew(samp_list), funAlistaire(samp_list))
[1] TRUE
Benchmarks
microbenchmark(funJoeOld(samp_list), funJoeNew(samp_list), fun989(samp_list),
funAlistaire(samp_list), times = 30, unit = "relative")
Unit: relative
expr min lq mean median uq max neval cld
funJoeOld(samp_list) 21.825878 23.269846 17.434447 20.803035 18.851403 4.8056784 30 c
funJoeNew(samp_list) 1.827741 1.841071 2.253294 1.667047 1.780324 2.4659653 30 ab
fun989(samp_list) 3.108230 3.563780 3.170320 3.790048 3.888632 0.9890681 30 b
funAli(samp_list) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 30 a
There are two take aways here:
Having a good understanding of the apply family of functions makes for
concise and efficient code (as can be seen in #alistaire's and #989's solution).
Understanding the nuances of base R in general can have considerable consequences
Not sure if you are you looking for this and you may use lengths function for lists:
list_lists <- list(unlist(list1), unlist(list2), unlist(list3))
list_lists1 <- lapply(list_lists, `length<-`, max(lengths(list_lists)))
list_lists1
> list_lists1
[[1]]
[1] 1 2 3 NA NA NA
[[2]]
[1] 1 2 3 4 5 NA
[[3]]
[1] 1 2 3 4 5 6
OR for lists of the lists, you can go one step further:
list_lists2 <- lapply(list_lists1,as.list)
> list_lists2
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 2
[[1]][[3]]
[1] 3
[[1]][[4]]
[1] NA
[[1]][[5]]
[1] NA
[[1]][[6]]
[1] NA
[[2]]
[[2]][[1]]
[1] 1
[[2]][[2]]
[1] 2
[[2]][[3]]
[1] 3
[[2]][[4]]
[1] 4
[[2]][[5]]
[1] 5
[[2]][[6]]
[1] NA
[[3]]
[[3]][[1]]
[1] 1
[[3]][[2]]
[1] 2
[[3]][[3]]
[1] 3
[[3]][[4]]
[1] 4
[[3]][[5]]
[1] 5
[[3]][[6]]
[1] 6
>

Filtering list in R

I have a list
A <- c(1,2,3,4,5,6,7,8,9,10)
B <- c("a" ,"b", "c" ,"d","b" ,"f" ,"g" ,"a" ,"b" ,"a")
C <- c(25, 26, 27, 28, 29, 30, 31, 32, 10, 15)
mylist <- list(A,B,C)
mylist
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "a" "b" "c" "d" "b" "f" "g" "a" "b" "a"
[[3]]
[1] 25 26 27 28 29 30 31 32 10 15
I would like to select all components A,B,C of the list where second component B has value "a" or "b" .
Sample output
mylist
[[1]]
[1] 1 2 6 8 9 10
[[2]]
[1] "a" "b" "b" "a" "b" "a"
[[3]]
[1] 25 26 29 32 10 15
How can I do that? Note that each component have same length.
To stay with a list, why not simply:
lapply(mylist, `[`, is.element(B, letters[1:2]))
#[[1]]
#[1] 1 2 5 8 9 10
#[[2]]
#[1] "a" "b" "b" "a" "b" "a"
#[[3]]
#[1] 25 26 29 32 10 15
I would go with a data.frame or data.table for this use case:
Using your original list (with a 10 added to A to have the same number of entries as B and C):
>df <- data.frame(A=mylist[[1]],B=mylist[[2]],C=mylist[[3]],stringsAsFactors=F)
> df[df$B %in% c("a","b"),]
A B C
1 1 a 25
2 2 b 26
5 5 b 29
8 8 a 32
9 9 b 10
10 10 a 15
This will subset the data.frame by where B values are a or b. If you build your list at first, you may avoid the list step and build the data.frame directly.
If you really want a list at end:
> as.list(df[df$B %in% c("a","b"),])
$A
[1] 1 2 5 8 9 10
$B
[1] "a" "b" "b" "a" "b" "a"
$C
[1] 25 26 29 32 10 15
If you wish to avoid the named entries, use unname: as.list(unname(df[..]))
Here is a simple solution.
First, I create mylist :
mylist <- list(1:10, letters[1:10], 25:15)
Then I create a function which returns TRUE if the condition is TRUE and FALSE otherwise
> filt <- function(x) {
+ x[2] %in% c("a", "b")
+ }
>
Then I use sapply to apply the function to mylist and I select only the components I need :
> mylist[sapply(mylist, filt) == TRUE]
[[1]]
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"

R the same element to sublists

I have a list m containing sublists. One sublist is like:
> m[[1]]
$input
$input$a
[1] 1
$input$b
[1] 2
$input$c
[1] 3
$output
$output$y
[1] "big dog"
The entire list is:
[[1]]
[[1]]$input
[[1]]$input$a
[1] 1
[[1]]$input$b
[1] 2
[[1]]$input$c
[1] 3
[[1]]$output
[[1]]$output$y
[1] "big dog"
[[2]]
[[2]]$input
[[2]]$input$a
[1] 12
[[2]]$input$b
[1] 89
[[2]]$input$c
[1] 67
[[2]]$output
[[2]]$output$y
[1] "fat cat"
[[3]]
[[3]]$input
[[3]]$input$a
[1] 7
[[3]]$input$b
[1] 4
[[3]]$input$c
[1] 97
[[3]]$output
[[3]]$output$y
[1] "fat cat"
And the code is:
m<-list(
list(
input=list(a=1,b=2,c=3),
output=list(y="big dog")
),
list(
input=list(a=12,b=89,c=67),
output=list(y="fat cat")
),
list(
input=list(a=7,b=4,c=97),
output=list(y="fat cat")
)
)
Now I want to add to each subsublist named 'input' the same variable named type and containing value "pet" to have:
m
[[1]]
[[1]]$input
[[1]]$input$a
[1] 1
[[1]]$input$b
[1] 2
[[1]]$input$c
[1] 3
[[1]]$input$type
[1] "pet"
[[1]]$output
[[1]]$output$y
[1] "big dog"
[[2]]
[[2]]$input
[[2]]$input$a
[1] 12
[[2]]$input$b
[1] 89
[[2]]$input$c
[1] 67
[[2]]$input$type
[1] "pet"
[[2]]$output
[[2]]$output$y
[1] "fat cat"
[[3]]
[[3]]$input
[[3]]$input$a
[1] 7
[[3]]$input$b
[1] 4
[[3]]$input$c
[1] 97
[[3]]$input$type
[1] "pet"
[[3]]$output
[[3]]$output$y
[1] "fat cat"
I tried:
Map(function(u) c(u$input, type="pet"))
But it does not work as it filter the list :(
Do yo have any idea?
This should do the trick:
m2 <- lapply(m, modifyList, list(input=list(type="pet")))
## Check that it worked
str(m2[3])
# List of 1
# $ :List of 2
# ..$ input :List of 4
# .. ..$ a : num 7
# .. ..$ b : num 4
# .. ..$ c : num 97
# .. ..$ type: chr "pet"
# ..$ output:List of 1
# .. ..$ y: chr "fat cat"
If the formulation above seems a bit opaque, try running the following, which will likely help you to see how modifyList() works in this case:
m[[1]]
list(input=list(type="pet"))
modifyList(m[[1]], list(input=list(type="pet")))
If I understand you correctly, I think you just want
m<-lapply(m, function(x) {x$input$type<-"pet";x})
That will add $type="pet" to every input in your list. Resulting in
list(
list(
input = list(a = 1, b = 2, c = 3, type = "pet"),
output = list(y = "big dog")
),
list(
input = list(a = 12, b = 89, c = 67, type = "pet"),
output = list(y = "fat cat")
),
list(
input = list(a = 7, b = 4, c = 97, type = "pet"),
output = list(y = "fat cat")
)
)

Resources