improve readability of output list of foreach loop - r

I have a simple example of foreach nested loop below. How can I improve the readability of the result res?
Below is another example of nested for loop with more readable results, each element of list can be easily identified e.g. res_desired[["B"]][[3]]
library(foreach)
library(doFuture)
registerDoFuture()
plan(multicore, workers = 1)
# foreach loop
res <- foreach(a = c("A", "B", "C", "D")) %:%
foreach(b = 1:4) %do%
{
paste0(rep(a, b))
}
# for loop
res_desired <- list()
for(a in c("A", "B", "C", "D"))
{
for(b in 1:4)
{
res_desired[[a]][[b]] <- paste0(rep(a, b))
}
}

The .final may be useful. According to ?foreach
.final - function of one argument that is called to return final result.
With one more nested loop
res_1 <- foreach(a = c("A", "B", "C", "D"),
.final = function(x) setNames(x, c("A", "B", "C", "D"))) %:%
foreach(c = c("a", "b", "c"),
.final = function(x) setNames(x, c("a", "b", "c"))) %:%
foreach(b = 1:4) %do% {paste0(rep(a, b))}
-checking
> res_1[["B"]][["c"]][[2]]
[1] "B" "B"
similar to
res_desired_1 <- list()
for(a in c("A", "B", "C", "D")) {
for(c in c("a", "b", "c")) {
for(b in 1:4) {
res_desired_1[[a]][[c]][[b]] <- paste0(rep(a, b))
}
}
}
> res_desired_1[["B"]][["c"]][[2]]
[1] "B" "B"

Related

Fastest way of automatically renaming dataframes, if they fulfill certain conditions

I would like to automatically rename dataframes if they fulfill certain conditions. I have two question about this.
In the code below, the rm part fails, but I do not understand why.
I am wondering if there is a faster/better way to do this (for example by first putting the df's in a list, renaming and unlisting).
Example data:
df_a <- data.frame(
A = c("a", "b", "c"),
B = c("a", "b", "c"),
C = c("a", "b", "c")
)
df_b <- data.frame(
same_as_A = c("a", "b", "c"),
same_as_B = c("a", "b", "c"),
same_as_C = c("a", "b", "c")
)
My attempt is the following (where the condition is that more than 2 columns match):
# names of the data
names_of_dataset_X <- c("A", "B", "C")
names_of_dataset_Y <- c("same_as_A", "same_as_B", "same_as_C")
dfs <- ls()
for (i in seq_along(dfs)) {
if ( sum( names( get( dfs[i] ) ) %in% names_of_dataset_X) > 2) {
dataset_X <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
} else if (TRUE) {
dataset_Y <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
}
}

access a nested list element from a vector of names specifying the path

Hullo,
If I've got a function
foo <- function(list, name)
where I would want
foo(list, c("a", "b", "c"))
to return
list[[a]][[b]][[c]]
and also fail gracefully if list[[a]][[b]][[c]] doesn't exist,
How do I accomplish this ideally in base R?
We could use pluck
foo <- function(list, name) {
purrr::pluck(list, !!! name)
}
-testing
> lst1 <- list(a = list(b = list(c = 1:2)), b = list(d = list(e = 1:5)))
> foo(lst1, c("a", "b", "c"))
[1] 1 2
Or simply in base R
foo <- function(list, name) {
list[[name]]
}
> foo(lst1, c("a", "b", "c"))
[1] 1 2

wilcox test inside rows

I have a data frame:
structure(list(groups = c("A", "A", "A", "A", "B", "B", "B",
"B", "C", "C", "C", "C", "D", "D", "D", "D"), weight = c(50.34869444,
49.20443342, 50.62727386, 50.12316397, 49.84571613, 50.88337532,
48.23188285, 51.13725686, 51.19946209, 49.02212935, 50.00188434,
49.70067628, 50.50444172, 48.88528478, 49.2378029, 49.11125589
), height = c(149.5389985, 150.7241218, 149.6922257, 149.6660622,
150.2770344, 149.6382699, 150.1900336, 151.264749, 151.3418096,
149.9407582, 150.2397936, 149.3163071, 148.079746, 149.1675788,
147.5201934, 150.8203477), age = c(10.18377395, 8.388813147,
9.858806212, 9.859746016, 9.584814407, 9.081315423, 10.67367302,
10.26713746, 10.96606861, 11.58603799, 10.34936347, 9.93621052,
9.584046986, 8.413787028, 10.39826156, 9.977231496), month_birth = c(3.627272074,
1.989467718, 2.175805989, 1.095100584, 2.16437856, 1.215151355,
2.63897628, 0.942159155, 1.155299136, 0.404000756, 1.695590789,
2.739378326, 1.950649717, 1.312775225, 1.904828579, 1.325257624
)), class = "data.frame", row.names = c(NA, -16L))
I want to use wilcox test to compare columns within each group individually
What was I trying to do:
wilcox.fun <- function(dat, col,group.labels) {
c1 <- combn(unique(group.labels),2)
sigs <- list()
for(i in 1:ncol(c1)) {
sigs[[i]] <- wilcox.test(
dat[c1[i,],col],
dat[c1[i,],col]
)
}
names(sigs) <- paste("Group",c1[1,],"by Group",c1[2,])
tests <- data.frame(Test=names(sigs),
W=unlist(lapply(sigs,function(x) x$statistic)),
p=unlist(lapply(sigs,function(x) x$p.value)),row.names=NULL)
return(tests)
}
debug(test.fun)
tests <- lapply(colnames(data[,c(2:6)]),function(x) wilcox.fun(data,group.labels=c(2:6),x))
names(tests) <- colnames(data[,c(2:6)])
I want to use the wilcox test to compare not between groups, but within the same group between the selected columns.
You can try this code to apply wilcox.test for every combination of variables within each group.
wilcox.fun <- function(dat) {
do.call(rbind, combn(names(dat)[-1], 2, function(x) {
test <- wilcox.test(dat[[x[1]]], dat[[x[2]]])
data.frame(Test = sprintf('Group %s by Group %s', x[1], x[2]),
W = test$statistic,
p = test$p.value)
}, simplify = FALSE))
}
result <- purrr::map_df(split(data, data$groups), wilcox.fun, .id = 'Group')

Recursive function to check all possible paths (from raw material to product)

I am struggling with a recursive function, who's goal is to determine which raw materials belong to which product. I clouldn't figure out, how to handle multiple possible paths in data frame "db". The wanted function should give: A-B-C-E, A-B-C-F, A-B-D-F for db. My function works for "da". I added it to show what I am after, and it is a bit like bill of materials explosion, but not exactly.
da <- data.frame(parent = c("A", "B", "B", "C", "D"),
child = c("B", "C", "D", "E", "F"),
stringsAsFactors = FALSE)
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
my_path <- function(a, df) {
b <- df$parent[df$child == a]
if (length(b) == 0) {
return(a)
} else {
return(c(my_path(b, df), a))
}
}
end_points <- da$child[is.na(match(da$child, da$parent))]
lapply(end_points, function(x) my_path(x, da)) # -> ok
end_points <- db$child[is.na(match(db$child, db$parent))]
lapply(end_points, function(x) my_path(x, db)) # -> not ok
Thx & kind regards
This is a job for igraph:
#the data
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
#create a graph
library(igraph)
g <- graph_from_data_frame(db)
#plot the graph
plot(g)
#find all vertices that have no ingoing resp. outgoing edges
starts <- V(g)[degree(g, mode = "in") == 0]
finals <- V(g)[degree(g, mode = "out") == 0]
#find paths, you need to loop if starts is longer than 1
res <- all_simple_paths(g, from = starts[[1]], to = finals)
#[[1]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C E
#
#[[2]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C F
#
#[[3]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B D F
#coerce to vectors
lapply(res, as_ids)

Matching vectors by last element

I am wondering how can I join list of vectors to a data.frame or just to vectors to append new item to each vector with a match.
# list of vectors that should be extended with values from vp
# based on last item match to vc
lst <- list(c("a", "b", "c"),
c("b", "d"),
c("f", "e")
)
vc <- c("c", "c", "d")
vp <- c("k", "l", "m")
# expected output:
expect <- list (c("a", "b", "c", "k"),
c("a", "b", "c", "l"),
c("b", "d", "m"),
c("f", "e"))
It is worth noticing that if last item in lst matches several values in vc, vector is duplicated. Vector stays unchanged if it does not match values in vc
Try this one:
L <- lapply(lst, function(v) vp[vc %in% v[length(v)]])
pv <- function(v1, v2) {
if (length(v2) == 0) {
list(v1)
}
else {
lapply(v2, function(v) c(v1,v))
}
}
L2 <- mapply(pv, lst, L)
unlist(L2, recursive=F)
Here is my solution:
l=lapply(lst, function(v) vp[vc %in% v])
res=sapply(1:length(lst), function(i)
{
x=lst[[i]]
y=l[[i]]
if (length(y)>0)
sapply(1:length(y), function(j) list(c(x, y[j])))
else
list(x)
}
)
unlist(res, recursive = FALSE)

Resources