multicombine in parallel processing in R - r

I use the following code:
library(foreach)
library(doParallel)
N<-5
cl<-makeCluster(4)
registerDoParallel(cl)
comb <- function(x, ...) {
lapply(seq_along(x),
function(i) c(x[[i]], lapply(list(...), function(y) y[[i]])))
}
oper <- foreach(i=1:10, .combine='comb', .multicombine=TRUE,
.init=list(list(), list(), list())) %dopar% {
list(i+4, i+3, i+2)
}
stopCluster(cl)
If I need to insert K different functions. Is there a way to define in the .init=list(list(), list(), list()) a list that is a function of K(K=3 in this case) instead of adding ,list()? Is each oper runs on separate core (CPU)?
The output is:
> oper[[1]]
[[1]]
[1] 5
[[2]]
[1] 6
[[3]]
[1] 7
[[4]]
[1] 8
[[5]]
[1] 9
[[6]]
[1] 10
[[7]]
[1] 11
[[8]]
[1] 12
[[9]]
[1] 13
[[10]]
[1] 14
> oper[[2]]
[[1]]
[1] 4
[[2]]
[1] 5
[[3]]
[1] 6
[[4]]
[1] 7
[[5]]
[1] 8
[[6]]
[1] 9
[[7]]
[1] 10
[[8]]
[1] 11
[[9]]
[1] 12
[[10]]
[1] 13
> oper[[3]]
[[1]]
[1] 3
[[2]]
[1] 4
[[3]]
[1] 5
[[4]]
[1] 6
[[5]]
[1] 7
[[6]]
[1] 8
[[7]]
[1] 9
[[8]]
[1] 10
[[9]]
[1] 11
[[10]]
[1] 12
I would like to add some additional functions (K) without any need to add the ,list() at the relevant place. So when I write oper[[K]] I'll get the relevant result.

Related

Remove nested loops in R with map or apply

I have a list of vectors in R:
test_list <- list()
test_list[[1]] <- c(1,2,3,4,5,6,7)
test_list[[2]] <- c(1,2,3)
test_list[[3]] <- c(6,7)
test_list[[4]] <- c(9,10,11)
And I want to check the intersection of each vector with all the other vectors. A nested loop approach would look like this:
for(i in test_list) {
for(j in test_list) {
intersect(i, j)
}
}
And the results would look like this:
[1] 1 2 3 4 5 6 7
[1] 1 2 3
[1] 6 7
numeric(0)
[1] 1 2 3
[1] 1 2 3
numeric(0)
numeric(0)
[1] 6 7
numeric(0)
[1] 6 7
numeric(0)
numeric(0)
numeric(0)
numeric(0)
[1] 9 10 11
I have seen that I can remove one of the foor loops using map or apply:
get_overlap_cells <- function(x) {
for(i in test_list) {
overlaping_cells <- intersect(i, x)
}
}
r <- map(test_list, get_overlap_cells)
However, I would like to remove both loops, any ideas on how to achieve this?
Thank you!
combins <- expand.grid(seq_along(test_list), seq_along(test_list))
mapply( function(x,y) intersect(test_list[[x]],test_list[[y]]),
combins[,1], combins[,2])
[[1]]
[1] 1 2 3 4 5 6 7
[[2]]
[1] 1 2 3
[[3]]
[1] 6 7
[[4]]
numeric(0)
[[5]]
[1] 1 2 3
[[6]]
[1] 1 2 3
[[7]]
numeric(0)
[[8]]
numeric(0)
[[9]]
[1] 6 7
[[10]]
numeric(0)
[[11]]
[1] 6 7
[[12]]
numeric(0)
[[13]]
numeric(0)
[[14]]
numeric(0)
[[15]]
numeric(0)
[[16]]
[1] 9 10 11

How to split a list into n groups in all possible combinations of group length and elements within group in R?

My goal is to divide a list into n groups in all possible combinations (where the group has a variable length).
I found the same question answered here (Python environment), but I'm unable to replicate it in the R environment.
Could anyone kindly help me? Thanks a lot.
If you want an easy implementation for the similar objective, you can try listParts from package partitions, e.g.,
> x <- 4
> partitions::listParts(x)
[[1]]
[1] (1,2,3,4)
[[2]]
[1] (1,2,4)(3)
[[3]]
[1] (1,2,3)(4)
[[4]]
[1] (1,3,4)(2)
[[5]]
[1] (2,3,4)(1)
[[6]]
[1] (1,4)(2,3)
[[7]]
[1] (1,2)(3,4)
[[8]]
[1] (1,3)(2,4)
[[9]]
[1] (1,4)(2)(3)
[[10]]
[1] (1,2)(3)(4)
[[11]]
[1] (1,3)(2)(4)
[[12]]
[1] (2,4)(1)(3)
[[13]]
[1] (2,3)(1)(4)
[[14]]
[1] (3,4)(1)(2)
[[15]]
[1] (1)(2)(3)(4)
where x is the number of elements in the set, and all partitions denotes the indices of elements.
If you want to choose the number of partitions, below is a user function that may help
f <- function(x, n) {
res <- listParts(x)
subset(res, lengths(res) == n)
}
such that
> f(x, 2)
[[1]]
[1] (1,2,4)(3)
[[2]]
[1] (1,2,3)(4)
[[3]]
[1] (1,3,4)(2)
[[4]]
[1] (2,3,4)(1)
[[5]]
[1] (1,4)(2,3)
[[6]]
[1] (1,2)(3,4)
[[7]]
[1] (1,3)(2,4)
> f(x, 3)
[[1]]
[1] (1,4)(2)(3)
[[2]]
[1] (1,2)(3)(4)
[[3]]
[1] (1,3)(2)(4)
[[4]]
[1] (2,4)(1)(3)
[[5]]x
[1] (2,3)(1)(4)
[[6]]
[1] (3,4)(1)(2)
Update
Given x <- LETTERS[1:4], we can run
res <- rapply(listParts(length(x)), function(v) x[v], how = "replace")
such that
> res
[[1]]
[1] (A,B,C,D)
[[2]]
[1] (A,B,D)(C)
[[3]]
[1] (A,B,C)(D)
[[4]]
[1] (A,C,D)(B)
[[5]]
[1] (B,C,D)(A)
[[6]]
[1] (A,D)(B,C)
[[7]]
[1] (A,B)(C,D)
[[8]]
[1] (A,C)(B,D)
[[9]]
[1] (A,D)(B)(C)
[[10]]
[1] (A,B)(C)(D)
[[11]]
[1] (A,C)(B)(D)
[[12]]
[1] (B,D)(A)(C)
[[13]]
[1] (B,C)(A)(D)
[[14]]
[1] (C,D)(A)(B)
[[15]]
[1] (A)(B)(C)(D)

Why `lapply` returns result of assignment automatically?

q <- lapply(1:3, function(x) x ** 2)
## returns nothing, because it is an assignment
# however, how you explain this?:
> lapply(list(1:3, 4:6, 7:9, 10:11), function(v) q <- lapply(v, function(x) x ** 2))
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 4
[[1]][[3]]
[1] 9
[[2]]
[[2]][[1]]
[1] 16
[[2]][[2]]
[1] 25
[[2]][[3]]
[1] 36
[[3]]
[[3]][[1]]
[1] 49
[[3]][[2]]
[1] 64
[[3]][[3]]
[1] 81
[[4]]
[[4]][[1]]
[1] 100
[[4]][[2]]
[1] 121
# while this gives the same but is logical (q is stated as return value).
> lapply(list(1:3, 4:6, 7:9, 10:11), function(v) {q <- lapply(v, function(x) x ** 2);q})
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 4
[[1]][[3]]
[1] 9
[[2]]
[[2]][[1]]
[1] 16
[[2]][[2]]
[1] 25
[[2]][[3]]
[1] 36
[[3]]
[[3]][[1]]
[1] 49
[[3]][[2]]
[1] 64
[[3]][[3]]
[1] 81
[[4]]
[[4]][[1]]
[1] 100
[[4]][[2]]
[1] 121
why in the second expression, although the inner lapply is just assigned to q but q not called at end of function, the value of the assignment
is returned to the outer lapply and thus collected?
Please, anybody has an explanation for this phenomenon?
It also works with =
lapply(list(1:3, 4:6, 7:9, 10:11), function(v) q = lapply(c(v), function(x) x ** 2))
[[1]]
[[1]][[1]]
[1] 1
[[1]][[2]]
[1] 4
[[1]][[3]]
[1] 9
[[2]]
[[2]][[1]]
[1] 16
[[2]][[2]]
[1] 25
[[2]][[3]]
[1] 36
[[3]]
[[3]][[1]]
[1] 49
[[3]][[2]]
[1] 64
[[3]][[3]]
[1] 81
[[4]]
[[4]][[1]]
[1] 100
[[4]][[2]]
[1] 121
The answer lies in the return value of an assignment operation. The assignment operator <- not only writes a value to a variable in the calling environment, it actually invisibly returns the assigned value itself to the caller.
Remember all operations in R are actually functions. When you do
x <- 3
You are actually doing
`<-`(x, 3)
Which not only creates the symbol "x" in the calling environment and assigns the value 3 to that symbol, but invisibly returns the value 3 to the caller. To see this, consider:
y <- 2
y
#> [1] 2
y <- `<-`(x, 3)
y
#> [1] 3
Or equivalently,
y <- (x <- 4)
y
#> [1] 4
And in fact, because of R's order of evaluation, we can even do:
y <- x <- 5
y
#> [1] 5
Which is a neat way of setting multiple variables to the same value on the same line.
Now consider the lambda function you use inside your lapply:
function(v) q <- lapply(v, function(x) x ** 2)
Look what happens when we consider this function as a stand-alone:
func <- function(v) q <- lapply(v, function(x) x ** 2)
func(1:3)
As predicted, nothing happens. But what happens when we do:
a <- func(1:3)
If func(1:3) doesn't return anything, then presumably a should be empty now.
But it isn't...
a
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 4
#>
#> [[3]]
#> [1] 9
Because the value of assignation was returned to the caller invisibly, we were able to assign it to a value in the calling scope. Therefore, doing
lapply(list(1:3, 4:6, 7:9, 10:11), function(v) q <- lapply(v, function(x) x ** 2))
assigns the value of your inner function applied to all the list elements to a new list. This list is not returned invisibly, but just returned as normal.
So this is expected behaviour.

R: relisting a flat list

This question has a nice solution of flattening lists while preserving their data types (which unlist does not):
flatten = function(x, unlist.vectors=F) {
while(any(vapply(x, is.list, logical(1)))) {
if (! unlist.vectors)
x = lapply(x, function(x) if(is.list(x)) x else list(x))
x = unlist(x, recursive=F)
}
x
}
If I give it the following list, it behaves as expected:
> a = list(c(1,2,3), list(52, 561), "a")
> flatten(a)
[[1]]
[1] 1 2 3
[[2]]
[1] 52
[[3]]
[1] 561
[[4]]
[1] "a"
Now I'd like to restructure the flat list like a. relist fails miserably:
> relist(flatten(a), skeleton=a)
[[1]]
[[1]][[1]]
[1] 1 2 3
[[1]][[2]]
[1] 52
[[1]][[3]]
[1] 561
[[2]]
[[2]][[1]]
[[2]][[1]][[1]]
[1] "a"
[[2]][[2]]
[[2]][[2]][[1]]
NULL
[[3]]
[[3]][[1]]
NULL
Now, I could of course do relist(unlist(b), a) but that loses data types again. What is a good way to restructure a flat list?
Bonus points if it handles the analogous attribute to unlist.vectors correctly.
One way to do it is:
relist2 = function(x, like, relist.vectors=F) {
if (! relist.vectors)
like = rapply(a, function(f) NA, how='replace')
lapply(relist(x, skeleton=like), function(e) unlist(e, recursive=F))
}
This retains the classes and distinguishes between lists and vectors:
> relist2(flatten(a), like=a)
[[1]]
[1] 1 2 3
[[2]]
[[2]][[1]]
[1] 52
[[2]][[2]]
[1] 561
[[3]]
[1] "a"
> relist2(flatten(a, unlist.vectors=T), like=a, relist.vectors=T)
[[1]]
[1] 1 2 3
[[2]]
[[2]][[1]]
[1] 52
[[2]][[2]]
[1] 561
[[3]]
[1] "a"

adding a field to each element of a list

I have a list
> (mylist <- list(list(a=1),list(a=2),list(a=3)))
[[1]]
[[1]]$a
[1] 1
[[2]]
[[2]]$a
[1] 2
[[3]]
[[3]]$a
[1] 3
and I want to add field b to each sublist from 11:13 to get something like
> (mylist <- list(list(a=1,b=11),list(a=2,b=12),list(a=3,b=13)))
[[1]]
[[1]]$a
[1] 1
[[1]]$b
[1] 11
[[2]]
[[2]]$a
[1] 2
[[2]]$b
[1] 12
[[3]]
[[3]]$a
[1] 3
[[3]]$b
[1] 13
How do I do this?
(note that I have a large number of such relatively small lists, so this will be called in apply and has to be reasonably fast).
mylist <- list(list(a=1),list(a=2),list(a=3))
b.vals <- 11:13
mylist <- lapply(
1:length(mylist),
function(x) {
mylist[[x]]$b <- b.vals[[x]]
mylist[[x]]
} )

Resources