I have following input data:
# [,1] [,2]
#[1,] "A" "B"
#[2,] "A" "C"
#[3,] "A" "D"
#[4,] "B" "C"
#[5,] "B" "D"
#[6,] "C" "D"
Next I want to exclude rows where first or second element has been previously for N times. For example if N = 2 then need to exclude following rows:
#[3,] "A" "D" - element "A" has been 2 times
#[5,] "B" "D" - element "B" has been 2 times
#[6,] "C" "D" - element "C" has been 2 times
Note: Need to take into account excluding results immediately. For example if element has met 5 times and after removing it met only 1 times then need to leave next row with this element. Because now it meets 2 times.
Example (N=2):
Input data:
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "A" "E"
[5,] "B" "C"
[6,] "B" "D"
[7,] "B" "E"
[8,] "C" "D"
[9,] "C" "E"
[10,] "D" "E"
Output data:
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[5,] "B" "C"
[10,] "D" "E"
There are possibly more elegant solutions... but this seems to work:
v <- c("A", "B", "C", "D", "E")
cmb <- t(combn(v, 2))
n <- 2
# Go through each letter
for (l in v)
{
# Find the combinations using that letter
rows <- apply(cmb, 1, function(x){l %in% x})
rows.2 <- which(rows==T)
if (length(rows.2)>n)
rows.2 <- rows.2[1:n]
# Take the first n rows containing the letter,
# then append all the ones not containing it
cmb <- rbind(cmb[rows.2,], cmb[rows==F,])
}
cmb
which outputs:
[,1] [,2]
[1,] "D" "E"
[2,] "B" "C"
[3,] "A" "C"
[4,] "A" "B"
Related
I don't even really know how to describe what I want to do, so hopefully the title makes at least some sense.
Better if I show you:
I have a simple 3x5 matrix of letters a to e:
matrix(data = rep(letters[1:5], 3), nrow = 3, ncol = 5, byrow = TRUE)
It gives this:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "a" "b" "c" "d" "e"
[3,] "a" "b" "c" "d" "e"
I would like to change it to this without typing it manually:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
I'm thinking some kind of loop system or similar, but I have no idea where to start.
For the simple case you might try this for loop.
n <- dim(m3)[2]
for (i in seq_len(nrow(m))[-1]) {
m3[i, ] <- c(m3[i, (n - i + 2):n], m3[i, 1:(n - i + 1)])
}
m3
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
To let the pattern repeat for a longer matrix, we might generalize:
n <- dim(m7)[2]
for (i in seq_len(nrow(m7))[-1]) {
j <- i %% 5
if (j == 0) j <- 5
if (j > 1) m7[i, ] <- c(m7[i, (n - j + 2):n], m7[i, 1:(n - j + 1)])
}
m7
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
# [4,] "c" "d" "e" "a" "b"
# [5,] "b" "c" "d" "e" "a"
# [6,] "a" "b" "c" "d" "e"
# [7,] "e" "a" "b" "c" "d"
Data:
m3 <- matrix(data=letters[1:5], nrow=3, ncol=5, byrow=TRUE)
m7 <- matrix(data=letters[1:5], nrow=7, ncol=5, byrow=TRUE)
You can create a variable called ord ord <- seq_len(ncol(m))
Within the map function use the ord and the max(ord) to create some integers that will be used to subset the array.
Then rbinding the result with do.call(rbind)
Where m is the matrix
library(purrr)
do.call(rbind, map2(ord, nrow(m), \(x,y)
m[y, c(x:max(ord),
ord[- (x:max(ord))])]
)[c(1,rev(ord))]
)
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
[4,] "c" "d" "e" "a" "b"
[5,] "b" "c" "d" "e" "a"
[6,] "a" "b" "c" "d" "e"
Consider a vector:
dim <- c("a", "b", "c", "d")
I want to be able to create versions of the vector by dropping some variables and then using the updated vector for my loop.
For eg:
I want it to iterate to all possible vectors that can results from this:
dim <- c("a", "b", "d")
So on and so forth. Could I do this in a loop or someway that I do not have to specify anything. Order doesn't matter, so I do not want a,b,c and c,a,b
You can get this with:
dim <- c("a", "b", "c", "d")
> Map(combn, list(dim), 1:length(dim))
[[1]] # All combinations of size 1
[,1] [,2] [,3] [,4]
[1,] "a" "b" "c" "d"
[[2]] # All combinations of size 2
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "a" "a" "a" "b" "b" "c"
[2,] "b" "c" "d" "c" "d" "d"
[[3]] # All combinations of size 3
[,1] [,2] [,3] [,4]
[1,] "a" "a" "a" "b"
[2,] "b" "b" "c" "c"
[3,] "c" "d" "d" "d"
[[4]] # All combinations of size 4
[,1]
[1,] "a"
[2,] "b"
[3,] "c"
[4,] "d"
if you are looking for all combinations of dim you can check out the function combn from the combinat package:
combinat::combn(letters[1:4], 1, simplify = F)
[[1]]
[1] "a"
[[2]]
[1] "b"
[[3]]
[1] "c"
[[4]]
[1] "d"
combinat::combn(letters[1:4], 2, simplify = F)
combinat::combn(letters[1:4], 3, simplify = F)
combinat::combn(letters[1:4], 4, simplify = F)
In base R I would use either of the for-loop, sapply or lapply
for-loop
for (i in seq_along(dim)) {
print(dim[-i])
}
[1] "b" "c" "d"
[1] "a" "c" "d"
[1] "a" "b" "d"
[1] "a" "b" "c"
sapply
t( sapply(seq_along(dim), function(i) dim[-i]) )
[,1] [,2] [,3]
[1,] "b" "c" "d"
[2,] "a" "c" "d"
[3,] "a" "b" "d"
[4,] "a" "b" "c"
lapply
lapply(seq_along(dim), function(i) dim[-i])
[[1]]
[1] "b" "c" "d"
[[2]]
[1] "a" "c" "d"
[[3]]
[1] "a" "b" "d"
[[4]]
[1] "a" "b" "c"
I'm trying to find the most economical and elegant code for a simple task: fill an empty matrix with randomly sampled values (here, A, B, or C). For illustration, let's take this matrix:
x <- matrix(NA, nrow=8, ncol=4)
[,1] [,2] [,3] [,4]
[1,] NA NA NA NA
[2,] NA NA NA NA
[3,] NA NA NA NA
[4,] NA NA NA NA
[5,] NA NA NA NA
[6,] NA NA NA NA
[7,] NA NA NA NA
[8,] NA NA NA NA
To fill it I've used two codes so far, each successfully doing the job. The first uses sapply:
x[] <- sapply(x, function(i) sample(LETTERS[1:3], 1, replace = F))
x
[,1] [,2] [,3] [,4]
[1,] "C" "A" "B" "C"
[2,] "B" "B" "B" "B"
[3,] "A" "B" "B" "B"
[4,] "B" "C" "A" "C"
[5,] "B" "A" "C" "A"
[6,] "A" "B" "C" "A"
[7,] "A" "C" "C" "A"
[8,] "C" "B" "B" "C"
while the second is a forloop:
for(i in 1:nrow(x)){
x[i,] <- sample(LETTERS[1:3], 4, replace = T)
}
x
[,1] [,2] [,3] [,4]
[1,] "C" "A" "C" "C"
[2,] "C" "A" "B" "B"
[3,] "C" "C" "A" "B"
[4,] "C" "C" "A" "C"
[5,] "A" "C" "C" "C"
[6,] "B" "C" "A" "A"
[7,] "C" "C" "B" "A"
[8,] "B" "C" "B" "C"
I like neither of them as they both look bulky. Is there a better way to get the expected result, that is, is there a shorter and/or more elegant way?
How about assigning it directly?
x[] <- sample(LETTERS, length(x), replace = TRUE)
x
# [,1] [,2] [,3] [,4]
#[1,] "A" "H" "V" "A"
#[2,] "X" "M" "Y" "O"
#[3,] "A" "W" "N" "I"
#[4,] "H" "Y" "Y" "C"
#[5,] "W" "N" "O" "P"
#[6,] "Y" "H" "P" "J"
#[7,] "I" "Y" "N" "H"
#[8,] "S" "F" "Z" "I"
If you want only include first three LETTERS this would work
x[] <- sample(LETTERS[1:3], length(x), replace = TRUE)
We can use replace without changing the original matrix
replace(x, TRUE, sample(LETTERS, length(x), replace = TRUE))
# [,1] [,2] [,3] [,4]
#[1,] "B" "O" "S" "D"
#[2,] "N" "C" "Q" "Z"
#[3,] "X" "X" "Z" "X"
#[4,] "O" "G" "R" "R"
#[5,] "L" "B" "S" "U"
#[6,] "Y" "I" "O" "A"
#[7,] "L" "Y" "P" "M"
#[8,] "R" "X" "H" "T"
This question already has answers here:
How to generate permutations or combinations of object in R?
(3 answers)
Closed 4 years ago.
Is there a way to generate all the unique sets of the following permutations, where I am able to change N and R easily.
library(gtools)
x <- c("A","B","C","D")
x <- permutations(n=4,r=2,v=x)
x
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "B" "A"
[5,] "B" "C"
[6,] "B" "D"
[7,] "C" "A"
[8,] "C" "B"
[9,] "C" "D"
[10,] "D" "A"
[11,] "D" "B"
[12,] "D" "C"
For example sets 1 and 4 are not unique, AB and BA contain the same characters.
The following list is unique, and this is what I want.
[,1] [,2]
[1,] "A" "B"
[2,] "A" "C"
[3,] "A" "D"
[4,] "B" "C"
[5,] "B" "D"
[6,] "C" "D"
conbn would give you what you need:
#combn gives you the combinations, t is only used to transpose the matrix
t(combn(x, 2))
# [,1] [,2]
#[1,] "A" "B"
#[2,] "A" "C"
#[3,] "A" "D"
#[4,] "B" "C"
#[5,] "B" "D"
#[6,] "C" "D"
suppose I have two vector like this :
l1 = c('C','D','E','F')
l2 = c('G','C','D','F')
I generate all combinations of two elements using combn function:
l1_vector = t(combn(l1,2))
l2_vector = t(combn(l2,2))
> l1_vector
[,1] [,2]
[1,] "C" "D"
[2,] "C" "E"
[3,] "C" "F"
[4,] "D" "E"
[5,] "D" "F"
[6,] "E" "F"
> l2_vector
[,1] [,2]
[1,] "G" "C"
[2,] "G" "D"
[3,] "G" "F"
[4,] "C" "D"
[5,] "C" "F"
[6,] "D" "F"
Now I want to calculate the repeat elements of l1_vector and l2_vector , as the example i give, the repeat of elements should be 3 (["C","D"],["C","F"],["D","F"])
How can I do that without using loop ?
As mentioned in the comments, you can use the merge function for this. Since the default behavior of merge is to use all of the available columns, it will return only those rows that are perfect matches.
> merge(l1_vector, l2_vector)
V1 V2
1 C D
2 C F
3 D F
>
> nrow(merge(l1_vector, l2_vector))
[1] 3
While merge is perfectly fine for your case, there is some work around.
If you just need the number of repeated elements:
choose(length(intersect(l1, l2)), 2)
[1] 3
If you need the repeated elements:
t(combn(intersect(l1, l2), 2))
[,1] [,2]
[1,] "C" "D"
[2,] "C" "F"
[3,] "D" "F"