Matching vectors by last element - r

I am wondering how can I join list of vectors to a data.frame or just to vectors to append new item to each vector with a match.
# list of vectors that should be extended with values from vp
# based on last item match to vc
lst <- list(c("a", "b", "c"),
c("b", "d"),
c("f", "e")
)
vc <- c("c", "c", "d")
vp <- c("k", "l", "m")
# expected output:
expect <- list (c("a", "b", "c", "k"),
c("a", "b", "c", "l"),
c("b", "d", "m"),
c("f", "e"))
It is worth noticing that if last item in lst matches several values in vc, vector is duplicated. Vector stays unchanged if it does not match values in vc

Try this one:
L <- lapply(lst, function(v) vp[vc %in% v[length(v)]])
pv <- function(v1, v2) {
if (length(v2) == 0) {
list(v1)
}
else {
lapply(v2, function(v) c(v1,v))
}
}
L2 <- mapply(pv, lst, L)
unlist(L2, recursive=F)

Here is my solution:
l=lapply(lst, function(v) vp[vc %in% v])
res=sapply(1:length(lst), function(i)
{
x=lst[[i]]
y=l[[i]]
if (length(y)>0)
sapply(1:length(y), function(j) list(c(x, y[j])))
else
list(x)
}
)
unlist(res, recursive = FALSE)

Related

Get elements in a vector that are not present in other vectors [duplicate]

This question already has answers here:
How to tell what is in one vector and not another?
(6 answers)
Closed 6 months ago.
I have the following 3 vectors:
vec <- c("a", "b", "c", "d", "e", "f", "g", "h")
vec1 <- c("a", "d", "f", "h")
vec2 <- c("b", "c")
I would to get a new vector that contain the elements present in vec, that are not present in vec1 and vec2
output desired:
output <- c("e", "g")
Thanks all
You could use setdiff() and use c() to concatenate vec1 and vec2.
vec <- c("a", "b", "c", "d", "e", "f", "g", "h")
vec1 <- c("a", "d", "f", "h")
vec2 <- c("b", "c")
setdiff(vec, c(vec1, vec2))
Output:
> setdiff(vec, c(vec1, vec2))
[1] "e" "g"
vec[which(!vec %in% c(vec1, vec2))]
vec %in% c(vec1, vec2) returns TRUE for elements of vec that are in vec1 and vec2 combined and FALSE otherwise. !vec %in% c(vec1, vec2) does the opposite, i.e., it returns TRUE for elements of vec that are not in vec1 or vec2. which returnes the position index for those returned TRUE, which can be used by vec[] to extract the original elements from vec.

Fastest way of automatically renaming dataframes, if they fulfill certain conditions

I would like to automatically rename dataframes if they fulfill certain conditions. I have two question about this.
In the code below, the rm part fails, but I do not understand why.
I am wondering if there is a faster/better way to do this (for example by first putting the df's in a list, renaming and unlisting).
Example data:
df_a <- data.frame(
A = c("a", "b", "c"),
B = c("a", "b", "c"),
C = c("a", "b", "c")
)
df_b <- data.frame(
same_as_A = c("a", "b", "c"),
same_as_B = c("a", "b", "c"),
same_as_C = c("a", "b", "c")
)
My attempt is the following (where the condition is that more than 2 columns match):
# names of the data
names_of_dataset_X <- c("A", "B", "C")
names_of_dataset_Y <- c("same_as_A", "same_as_B", "same_as_C")
dfs <- ls()
for (i in seq_along(dfs)) {
if ( sum( names( get( dfs[i] ) ) %in% names_of_dataset_X) > 2) {
dataset_X <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
} else if (TRUE) {
dataset_Y <- copy(get( dfs[i] ))
rm(get( dfs[i] ))
}
}

improve readability of output list of foreach loop

I have a simple example of foreach nested loop below. How can I improve the readability of the result res?
Below is another example of nested for loop with more readable results, each element of list can be easily identified e.g. res_desired[["B"]][[3]]
library(foreach)
library(doFuture)
registerDoFuture()
plan(multicore, workers = 1)
# foreach loop
res <- foreach(a = c("A", "B", "C", "D")) %:%
foreach(b = 1:4) %do%
{
paste0(rep(a, b))
}
# for loop
res_desired <- list()
for(a in c("A", "B", "C", "D"))
{
for(b in 1:4)
{
res_desired[[a]][[b]] <- paste0(rep(a, b))
}
}
The .final may be useful. According to ?foreach
.final - function of one argument that is called to return final result.
With one more nested loop
res_1 <- foreach(a = c("A", "B", "C", "D"),
.final = function(x) setNames(x, c("A", "B", "C", "D"))) %:%
foreach(c = c("a", "b", "c"),
.final = function(x) setNames(x, c("a", "b", "c"))) %:%
foreach(b = 1:4) %do% {paste0(rep(a, b))}
-checking
> res_1[["B"]][["c"]][[2]]
[1] "B" "B"
similar to
res_desired_1 <- list()
for(a in c("A", "B", "C", "D")) {
for(c in c("a", "b", "c")) {
for(b in 1:4) {
res_desired_1[[a]][[c]][[b]] <- paste0(rep(a, b))
}
}
}
> res_desired_1[["B"]][["c"]][[2]]
[1] "B" "B"

How do I perform a permutation in R with fixed objects in a nucleotide string?

I need to see all possible variations of these nucleotide string: GCGCTAAGCAAAAAACAT
with two caveats:
1. everything that is not Bold is fixed
2. Bold can be either A or C
I used the permutations function:
library(gtools)
x <- c('a', 'c')
permutations(n=2, r=8, v=x, repeats.allwed=T)
It works, the problem is that I would need the list of results to be in the nucleotide sequence, or else I will spend more time copying and pasting the results in the nucleotide sequence than doing the permutation by hand.
Thanks a lot!!!
Giacomo
I would use sprintf:
library(gtools)
x <- c('a', 'c')
p <- permutations(n=2, r=8, v=x, repeats.allowed=T)
#split columns
p <- asplit(p, 2)
#insert into format string
do.call(sprintf, c(p, fmt = "GCGCT%s%sGC%s%s%s%s%s%sCAT"))
#[1] "GCGCTaaGCaaaaaaCAT" "GCGCTaaGCaaaaacCAT" "GCGCTaaGCaaaacaCAT" "GCGCTaaGCaaaaccCAT" "GCGCTaaGCaaacaaCAT" ...
You might want to create the permutations from upper-case letters.
Here is a solution, resulting in a matrix with each line being a permutation
library(gtools)
x <- c('A', 'C')
perms <- permutations(n = 2, r = 8, v = x, repeats.allowed = TRUE)
nucleotid <- c("G", "C", "G", "C", "T", "A",
"A", "G", "C", "A", "A", "A",
"A", "A", "A", "C", "A", "T")
# Looping throught each permutation to create the string
result <- apply(perms, 1, function(x){
y <- nucleotid
y[c(6,7,10:15)] <- x
y
})
t(result)

Recursive function to check all possible paths (from raw material to product)

I am struggling with a recursive function, who's goal is to determine which raw materials belong to which product. I clouldn't figure out, how to handle multiple possible paths in data frame "db". The wanted function should give: A-B-C-E, A-B-C-F, A-B-D-F for db. My function works for "da". I added it to show what I am after, and it is a bit like bill of materials explosion, but not exactly.
da <- data.frame(parent = c("A", "B", "B", "C", "D"),
child = c("B", "C", "D", "E", "F"),
stringsAsFactors = FALSE)
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
my_path <- function(a, df) {
b <- df$parent[df$child == a]
if (length(b) == 0) {
return(a)
} else {
return(c(my_path(b, df), a))
}
}
end_points <- da$child[is.na(match(da$child, da$parent))]
lapply(end_points, function(x) my_path(x, da)) # -> ok
end_points <- db$child[is.na(match(db$child, db$parent))]
lapply(end_points, function(x) my_path(x, db)) # -> not ok
Thx & kind regards
This is a job for igraph:
#the data
db <- data.frame(parent = c("A", "B", "B", "C", "D", "C"),
child = c("B", "C", "D", "E", "F", "F"),
stringsAsFactors = FALSE)
#create a graph
library(igraph)
g <- graph_from_data_frame(db)
#plot the graph
plot(g)
#find all vertices that have no ingoing resp. outgoing edges
starts <- V(g)[degree(g, mode = "in") == 0]
finals <- V(g)[degree(g, mode = "out") == 0]
#find paths, you need to loop if starts is longer than 1
res <- all_simple_paths(g, from = starts[[1]], to = finals)
#[[1]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C E
#
#[[2]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B C F
#
#[[3]]
#+ 4/6 vertices, named, from 4b85bd1:
#[1] A B D F
#coerce to vectors
lapply(res, as_ids)

Resources