Unique Sets Permutations R [duplicate] - r

This question already has answers here:
How to generate permutations or combinations of object in R?
(3 answers)
Closed 4 years ago.
Is there a way to generate all the unique sets of the following permutations, where I am able to change N and R easily.
library(gtools)
x <- c("A","B","C","D")
x <- permutations(n=4,r=2,v=x)
x
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "B" "A"
[5,] "B" "C"
[6,] "B" "D"
[7,] "C" "A"
[8,] "C" "B"
[9,] "C" "D"
[10,] "D" "A"
[11,] "D" "B"
[12,] "D" "C"
For example sets 1 and 4 are not unique, AB and BA contain the same characters.
The following list is unique, and this is what I want.
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "B" "C"
[5,] "B" "D"
[6,] "C" "D"

conbn would give you what you need:
#combn gives you the combinations, t is only used to transpose the matrix
t(combn(x, 2))
# [,1] [,2]
#[1,] "A" "B"
#[2,] "A" "C"
#[3,] "A" "D"
#[4,] "B" "C"
#[5,] "B" "D"
#[6,] "C" "D"

Related

Cycle each consecutive row of a matrix to the right by 1 position

I don't even really know how to describe what I want to do, so hopefully the title makes at least some sense.
Better if I show you:
I have a simple 3x5 matrix of letters a to e:
matrix(data = rep(letters[1:5], 3), nrow = 3, ncol = 5, byrow = TRUE)
It gives this:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "a" "b" "c" "d" "e"
[3,] "a" "b" "c" "d" "e"
I would like to change it to this without typing it manually:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
I'm thinking some kind of loop system or similar, but I have no idea where to start.
For the simple case you might try this for loop.
n <- dim(m3)[2]
for (i in seq_len(nrow(m))[-1]) {
m3[i, ] <- c(m3[i, (n - i + 2):n], m3[i, 1:(n - i + 1)])
}
m3
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
To let the pattern repeat for a longer matrix, we might generalize:
n <- dim(m7)[2]
for (i in seq_len(nrow(m7))[-1]) {
j <- i %% 5
if (j == 0) j <- 5
if (j > 1) m7[i, ] <- c(m7[i, (n - j + 2):n], m7[i, 1:(n - j + 1)])
}
m7
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
# [4,] "c" "d" "e" "a" "b"
# [5,] "b" "c" "d" "e" "a"
# [6,] "a" "b" "c" "d" "e"
# [7,] "e" "a" "b" "c" "d"
Data:
m3 <- matrix(data=letters[1:5], nrow=3, ncol=5, byrow=TRUE)
m7 <- matrix(data=letters[1:5], nrow=7, ncol=5, byrow=TRUE)
You can create a variable called ord ord <- seq_len(ncol(m))
Within the map function use the ord and the max(ord) to create some integers that will be used to subset the array.
Then rbinding the result with do.call(rbind)
Where m is the matrix
library(purrr)
do.call(rbind, map2(ord, nrow(m), \(x,y)
m[y, c(x:max(ord),
ord[- (x:max(ord))])]
)[c(1,rev(ord))]
)
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
[4,] "c" "d" "e" "a" "b"
[5,] "b" "c" "d" "e" "a"
[6,] "a" "b" "c" "d" "e"

What's the most economical way to fill a matrix with random values?

I'm trying to find the most economical and elegant code for a simple task: fill an empty matrix with randomly sampled values (here, A, B, or C). For illustration, let's take this matrix:
x <- matrix(NA, nrow=8, ncol=4)
[,1] [,2] [,3] [,4]
[1,] NA NA NA NA
[2,] NA NA NA NA
[3,] NA NA NA NA
[4,] NA NA NA NA
[5,] NA NA NA NA
[6,] NA NA NA NA
[7,] NA NA NA NA
[8,] NA NA NA NA
To fill it I've used two codes so far, each successfully doing the job. The first uses sapply:
x[] <- sapply(x, function(i) sample(LETTERS[1:3], 1, replace = F))
x
[,1] [,2] [,3] [,4]
[1,] "C" "A" "B" "C"
[2,] "B" "B" "B" "B"
[3,] "A" "B" "B" "B"
[4,] "B" "C" "A" "C"
[5,] "B" "A" "C" "A"
[6,] "A" "B" "C" "A"
[7,] "A" "C" "C" "A"
[8,] "C" "B" "B" "C"
while the second is a forloop:
for(i in 1:nrow(x)){
x[i,] <- sample(LETTERS[1:3], 4, replace = T)
}
x
[,1] [,2] [,3] [,4]
[1,] "C" "A" "C" "C"
[2,] "C" "A" "B" "B"
[3,] "C" "C" "A" "B"
[4,] "C" "C" "A" "C"
[5,] "A" "C" "C" "C"
[6,] "B" "C" "A" "A"
[7,] "C" "C" "B" "A"
[8,] "B" "C" "B" "C"
I like neither of them as they both look bulky. Is there a better way to get the expected result, that is, is there a shorter and/or more elegant way?
How about assigning it directly?
x[] <- sample(LETTERS, length(x), replace = TRUE)
x
# [,1] [,2] [,3] [,4]
#[1,] "A" "H" "V" "A"
#[2,] "X" "M" "Y" "O"
#[3,] "A" "W" "N" "I"
#[4,] "H" "Y" "Y" "C"
#[5,] "W" "N" "O" "P"
#[6,] "Y" "H" "P" "J"
#[7,] "I" "Y" "N" "H"
#[8,] "S" "F" "Z" "I"
If you want only include first three LETTERS this would work
x[] <- sample(LETTERS[1:3], length(x), replace = TRUE)
We can use replace without changing the original matrix
replace(x, TRUE, sample(LETTERS, length(x), replace = TRUE))
# [,1] [,2] [,3] [,4]
#[1,] "B" "O" "S" "D"
#[2,] "N" "C" "Q" "Z"
#[3,] "X" "X" "Z" "X"
#[4,] "O" "G" "R" "R"
#[5,] "L" "B" "S" "U"
#[6,] "Y" "I" "O" "A"
#[7,] "L" "Y" "P" "M"
#[8,] "R" "X" "H" "T"

calculate the repeatence of combinations elements in R

suppose I have two vector like this :
l1 = c('C','D','E','F')
l2 = c('G','C','D','F')
I generate all combinations of two elements using combn function:
l1_vector = t(combn(l1,2))
l2_vector = t(combn(l2,2))
> l1_vector
[,1] [,2]
[1,] "C" "D"
[2,] "C" "E"
[3,] "C" "F"
[4,] "D" "E"
[5,] "D" "F"
[6,] "E" "F"
> l2_vector
[,1] [,2]
[1,] "G" "C"
[2,] "G" "D"
[3,] "G" "F"
[4,] "C" "D"
[5,] "C" "F"
[6,] "D" "F"
Now I want to calculate the repeat elements of l1_vector and l2_vector , as the example i give, the repeat of elements should be 3 (["C","D"],["C","F"],["D","F"])
How can I do that without using loop ?
As mentioned in the comments, you can use the merge function for this. Since the default behavior of merge is to use all of the available columns, it will return only those rows that are perfect matches.
> merge(l1_vector, l2_vector)
V1 V2
1 C D
2 C F
3 D F
>
> nrow(merge(l1_vector, l2_vector))
[1] 3
While merge is perfectly fine for your case, there is some work around.
If you just need the number of repeated elements:
choose(length(intersect(l1, l2)), 2)
[1] 3
If you need the repeated elements:
t(combn(intersect(l1, l2), 2))
[,1] [,2]
[1,] "C" "D"
[2,] "C" "F"
[3,] "D" "F"

Exclude rows where element has been previously met for N times

I have following input data:
# [,1] [,2]
#[1,] "A" "B"
#[2,] "A" "C"
#[3,] "A" "D"
#[4,] "B" "C"
#[5,] "B" "D"
#[6,] "C" "D"
Next I want to exclude rows where first or second element has been previously for N times. For example if N = 2 then need to exclude following rows:
#[3,] "A" "D" - element "A" has been 2 times
#[5,] "B" "D" - element "B" has been 2 times
#[6,] "C" "D" - element "C" has been 2 times
Note: Need to take into account excluding results immediately. For example if element has met 5 times and after removing it met only 1 times then need to leave next row with this element. Because now it meets 2 times.
Example (N=2):
Input data:
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "A" "E"
[5,] "B" "C"
[6,] "B" "D"
[7,] "B" "E"
[8,] "C" "D"
[9,] "C" "E"
[10,] "D" "E"
Output data:
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[5,] "B" "C"
[10,] "D" "E"
There are possibly more elegant solutions... but this seems to work:
v <- c("A", "B", "C", "D", "E")
cmb <- t(combn(v, 2))
n <- 2
# Go through each letter
for (l in v)
{
# Find the combinations using that letter
rows <- apply(cmb, 1, function(x){l %in% x})
rows.2 <- which(rows==T)
if (length(rows.2)>n)
rows.2 <- rows.2[1:n]
# Take the first n rows containing the letter,
# then append all the ones not containing it
cmb <- rbind(cmb[rows.2,], cmb[rows==F,])
}
cmb
which outputs:
[,1] [,2]
[1,] "D" "E"
[2,] "B" "C"
[3,] "A" "C"
[4,] "A" "B"

R generate all possible combinations of size m from of a character vector of n elements [duplicate]

This question already has answers here:
Generate list of all possible combinations of elements of vector
(10 answers)
Closed 8 years ago.
so, I have this vector c("T", "A", "C", "G") for genomic data. I want to generate all possible combinations of size 3, with repeats such as:
T T T
T T A
T T C
T T G
T A T
..
that would give me 4^3=64 combinations. Combinations of size 4 would give 4^4, and for size 5 should give 4^5=1024 rows.
I searched through SOF, and think expand.grid() would do that, but I couldn't find out how to use it to get the desired output. Any idea?
x <- c("T", "A", "C", "G")
do.call(expand.grid, rep(list(x), 3))
permutations from gtools is designed to do just this:
library(gtools)
data <- c("T", "A", "C", "G")
permutations(4, 3, data, repeats.allowed = TRUE)
## [,1] [,2] [,3]
## [1,] "A" "A" "A"
## [2,] "A" "A" "C"
## [3,] "A" "A" "G"
## [4,] "A" "A" "T"
## [5,] "A" "C" "A"
## [6,] "A" "C" "C"
## [7,] "A" "C" "G"
## [8,] "A" "C" "T"
## [9,] "A" "G" "A"
## [10,] "A" "G" "C"
## [11,] "A" "G" "G"
## [12,] "A" "G" "T"
## [13,] "A" "T" "A"
## [14,] "A" "T" "C"
## [15,] "A" "T" "G"
## [16,] "A" "T" "T"
## [17,] "C" "A" "A"
## [18,] "C" "A" "C"
## [19,] "C" "A" "G"
## [20,] "C" "A" "T"
…

Resources