I have a simple example of foreach nested loop below. How can I improve the readability of the result res?
Below is another example of nested for loop with more readable results, each element of list can be easily identified e.g. res_desired[["B"]][[3]]
library(foreach)
library(doFuture)
registerDoFuture()
plan(multicore, workers = 1)
# foreach loop
res <- foreach(a = c("A", "B", "C", "D")) %:%
foreach(b = 1:4) %do%
{
paste0(rep(a, b))
}
# for loop
res_desired <- list()
for(a in c("A", "B", "C", "D"))
{
for(b in 1:4)
{
res_desired[[a]][[b]] <- paste0(rep(a, b))
}
}
The .final may be useful. According to ?foreach
.final - function of one argument that is called to return final result.
With one more nested loop
res_1 <- foreach(a = c("A", "B", "C", "D"),
.final = function(x) setNames(x, c("A", "B", "C", "D"))) %:%
foreach(c = c("a", "b", "c"),
.final = function(x) setNames(x, c("a", "b", "c"))) %:%
foreach(b = 1:4) %do% {paste0(rep(a, b))}
-checking
> res_1[["B"]][["c"]][[2]]
[1] "B" "B"
similar to
res_desired_1 <- list()
for(a in c("A", "B", "C", "D")) {
for(c in c("a", "b", "c")) {
for(b in 1:4) {
res_desired_1[[a]][[c]][[b]] <- paste0(rep(a, b))
}
}
}
> res_desired_1[["B"]][["c"]][[2]]
[1] "B" "B"
Related
I would like to automatically rename dataframes if they fulfill certain conditions. I have two question about this.
In the code below, the rm part fails, but I do not understand why.
I am wondering if there is a faster/better way to do this (for example by first putting the df's in a list, renaming and unlisting).
Example data:
df_a <- data.frame(
A = c("a", "b", "c"),
B = c("a", "b", "c"),
C = c("a", "b", "c")
)
df_b <- data.frame(
same_as_A = c("a", "b", "c"),
same_as_B = c("a", "b", "c"),
same_as_C = c("a", "b", "c")
)
My attempt is the following (where the condition is that more than 2 columns match):
# names of the data
names_of_dataset_X <- c("A", "B", "C")
names_of_dataset_Y <- c("same_as_A", "same_as_B", "same_as_C")
dfs <- ls()
for (i in seq_along(dfs)) {
if ( sum( names( get( dfs[i] ) ) %in% names_of_dataset_X) > 2) {
dataset_X <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
} else if (TRUE) {
dataset_Y <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
}
}
Hullo,
If I've got a function
foo <- function(list, name)
where I would want
foo(list, c("a", "b", "c"))
to return
list[[a]][[b]][[c]]
and also fail gracefully if list[[a]][[b]][[c]] doesn't exist,
How do I accomplish this ideally in base R?
We could use pluck
foo <- function(list, name) {
purrr::pluck(list, !!! name)
}
-testing
> lst1 <- list(a = list(b = list(c = 1:2)), b = list(d = list(e = 1:5)))
> foo(lst1, c("a", "b", "c"))
[1] 1 2
Or simply in base R
foo <- function(list, name) {
list[[name]]
}
> foo(lst1, c("a", "b", "c"))
[1] 1 2
I have a data frame:
structure(list(groups = c("A", "A", "A", "A", "B", "B", "B",
"B", "C", "C", "C", "C", "D", "D", "D", "D"), weight = c(50.34869444,
49.20443342, 50.62727386, 50.12316397, 49.84571613, 50.88337532,
48.23188285, 51.13725686, 51.19946209, 49.02212935, 50.00188434,
49.70067628, 50.50444172, 48.88528478, 49.2378029, 49.11125589
), height = c(149.5389985, 150.7241218, 149.6922257, 149.6660622,
150.2770344, 149.6382699, 150.1900336, 151.264749, 151.3418096,
149.9407582, 150.2397936, 149.3163071, 148.079746, 149.1675788,
147.5201934, 150.8203477), age = c(10.18377395, 8.388813147,
9.858806212, 9.859746016, 9.584814407, 9.081315423, 10.67367302,
10.26713746, 10.96606861, 11.58603799, 10.34936347, 9.93621052,
9.584046986, 8.413787028, 10.39826156, 9.977231496), month_birth = c(3.627272074,
1.989467718, 2.175805989, 1.095100584, 2.16437856, 1.215151355,
2.63897628, 0.942159155, 1.155299136, 0.404000756, 1.695590789,
2.739378326, 1.950649717, 1.312775225, 1.904828579, 1.325257624
)), class = "data.frame", row.names = c(NA, -16L))
I want to use wilcox test to compare columns within each group individually
What was I trying to do:
wilcox.fun <- function(dat, col,group.labels) {
c1 <- combn(unique(group.labels),2)
sigs <- list()
for(i in 1:ncol(c1)) {
sigs[[i]] <- wilcox.test(
dat[c1[i,],col],
dat[c1[i,],col]
)
}
names(sigs) <- paste("Group",c1[1,],"by Group",c1[2,])
tests <- data.frame(Test=names(sigs),
W=unlist(lapply(sigs,function(x) x$statistic)),
p=unlist(lapply(sigs,function(x) x$p.value)),row.names=NULL)
return(tests)
}
debug(test.fun)
tests <- lapply(colnames(data[,c(2:6)]),function(x) wilcox.fun(data,group.labels=c(2:6),x))
names(tests) <- colnames(data[,c(2:6)])
I want to use the wilcox test to compare not between groups, but within the same group between the selected columns.
You can try this code to apply wilcox.test for every combination of variables within each group.
wilcox.fun <- function(dat) {
do.call(rbind, combn(names(dat)[-1], 2, function(x) {
test <- wilcox.test(dat[[x[1]]], dat[[x[2]]])
data.frame(Test = sprintf('Group %s by Group %s', x[1], x[2]),
W = test$statistic,
p = test$p.value)
}, simplify = FALSE))
}
result <- purrr::map_df(split(data, data$groups), wilcox.fun, .id = 'Group')
I am struggling with a recursive function, who's goal is to determine which raw materials belong to which product. I clouldn't figure out, how to handle multiple possible paths in data frame "db". The wanted function should give: A-B-C-E, A-B-C-F, A-B-D-F for db. My function works for "da". I added it to show what I am after, and it is a bit like bill of materials explosion, but not exactly.
da <- data.frame(parent = c("A", "B", "B", "C", "D"),
child = c("B", "C", "D", "E", "F"),
stringsAsFactors = FALSE)
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
my_path <- function(a, df) {
b <- df$parent[df$child == a]
if (length(b) == 0) {
return(a)
} else {
return(c(my_path(b, df), a))
}
}
end_points <- da$child[is.na(match(da$child, da$parent))]
lapply(end_points, function(x) my_path(x, da)) # -> ok
end_points <- db$child[is.na(match(db$child, db$parent))]
lapply(end_points, function(x) my_path(x, db)) # -> not ok
Thx & kind regards
This is a job for igraph:
#the data
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
#create a graph
library(igraph)
g <- graph_from_data_frame(db)
#plot the graph
plot(g)
#find all vertices that have no ingoing resp. outgoing edges
starts <- V(g)[degree(g, mode = "in") == 0]
finals <- V(g)[degree(g, mode = "out") == 0]
#find paths, you need to loop if starts is longer than 1
res <- all_simple_paths(g, from = starts[[1]], to = finals)
#[[1]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C E
#
#[[2]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C F
#
#[[3]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B D F
#coerce to vectors
lapply(res, as_ids)
I am wondering how can I join list of vectors to a data.frame or just to vectors to append new item to each vector with a match.
# list of vectors that should be extended with values from vp
# based on last item match to vc
lst <- list(c("a", "b", "c"),
c("b", "d"),
c("f", "e")
)
vc <- c("c", "c", "d")
vp <- c("k", "l", "m")
# expected output:
expect <- list (c("a", "b", "c", "k"),
c("a", "b", "c", "l"),
c("b", "d", "m"),
c("f", "e"))
It is worth noticing that if last item in lst matches several values in vc, vector is duplicated. Vector stays unchanged if it does not match values in vc
Try this one:
L <- lapply(lst, function(v) vp[vc %in% v[length(v)]])
pv <- function(v1, v2) {
if (length(v2) == 0) {
list(v1)
}
else {
lapply(v2, function(v) c(v1,v))
}
}
L2 <- mapply(pv, lst, L)
unlist(L2, recursive=F)
Here is my solution:
l=lapply(lst, function(v) vp[vc %in% v])
res=sapply(1:length(lst), function(i)
{
x=lst[[i]]
y=l[[i]]
if (length(y)>0)
sapply(1:length(y), function(j) list(c(x, y[j])))
else
list(x)
}
)
unlist(res, recursive = FALSE)