I have this vector:
Vec=c("a" , "b", "c ", "d")
I want this as data frame:
[,1] [,2] [,3] [,4]
[1,] a b c d
[2,] a b c d
[3,] a b c d
[4,] a b c d
[5,] a b c d
Another option:
t(replicate(5, Vec))
# [,1] [,2] [,3] [,4]
#[1,] "a" "b" "c " "d"
#[2,] "a" "b" "c " "d"
#[3,] "a" "b" "c " "d"
#[4,] "a" "b" "c " "d"
#[5,] "a" "b" "c " "d"
One way using rbind and do.call would be:
do.call(rbind, replicate(5, Vec, simplify = FALSE))
[,1] [,2] [,3] [,4]
[1,] "a" "b" "c " "d"
[2,] "a" "b" "c " "d"
[3,] "a" "b" "c " "d"
[4,] "a" "b" "c " "d"
[5,] "a" "b" "c " "d"
You can replace 5 with any number you like.
replicate returns the Vec 5 times in a list (simplify = FALSE creates the list) . These elements are rbind-ed using do.call.
Update:
Actually using matrix is probably the best:
> matrix(Vec, nrow=5, ncol=length(Vec), byrow=TRUE)
[,1] [,2] [,3] [,4]
[1,] "a" "b" "c " "d"
[2,] "a" "b" "c " "d"
[3,] "a" "b" "c " "d"
[4,] "a" "b" "c " "d"
[5,] "a" "b" "c " "d"
Change the nrow argument to whatever number you want and you ll have it ready.
All 3 answers will need to use as.data.frame to convert to a data.frame so I am excluding this from the microbenchmark:
Microbenchmark
> microbenchmark::microbenchmark(t(replicate(5, Vec)),
+ do.call(rbind, replicate(5, Vec, simplify = FALSE)),
+ matrix(Vec, nrow=5, ncol=4, byrow=TRUE),
+ times=1000)
Unit: microseconds
expr min lq mean median uq max neval
t(replicate(5, Vec)) 52.854 59.013 68.393740 63.374 70.815 1749.326 1000
do.call(rbind, replicate(5, Vec, simplify = FALSE)) 18.986 23.092 27.325856 25.144 27.710 105.708 1000
matrix(Vec, nrow = 5, ncol = 4, byrow = TRUE) 1.539 2.566 3.474166 3.079 3.593 29.763 1000
As you can see the matrix solution is by far the best.
Related
I don't even really know how to describe what I want to do, so hopefully the title makes at least some sense.
Better if I show you:
I have a simple 3x5 matrix of letters a to e:
matrix(data = rep(letters[1:5], 3), nrow = 3, ncol = 5, byrow = TRUE)
It gives this:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "a" "b" "c" "d" "e"
[3,] "a" "b" "c" "d" "e"
I would like to change it to this without typing it manually:
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
I'm thinking some kind of loop system or similar, but I have no idea where to start.
For the simple case you might try this for loop.
n <- dim(m3)[2]
for (i in seq_len(nrow(m))[-1]) {
m3[i, ] <- c(m3[i, (n - i + 2):n], m3[i, 1:(n - i + 1)])
}
m3
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
To let the pattern repeat for a longer matrix, we might generalize:
n <- dim(m7)[2]
for (i in seq_len(nrow(m7))[-1]) {
j <- i %% 5
if (j == 0) j <- 5
if (j > 1) m7[i, ] <- c(m7[i, (n - j + 2):n], m7[i, 1:(n - j + 1)])
}
m7
# [,1] [,2] [,3] [,4] [,5]
# [1,] "a" "b" "c" "d" "e"
# [2,] "e" "a" "b" "c" "d"
# [3,] "d" "e" "a" "b" "c"
# [4,] "c" "d" "e" "a" "b"
# [5,] "b" "c" "d" "e" "a"
# [6,] "a" "b" "c" "d" "e"
# [7,] "e" "a" "b" "c" "d"
Data:
m3 <- matrix(data=letters[1:5], nrow=3, ncol=5, byrow=TRUE)
m7 <- matrix(data=letters[1:5], nrow=7, ncol=5, byrow=TRUE)
You can create a variable called ord ord <- seq_len(ncol(m))
Within the map function use the ord and the max(ord) to create some integers that will be used to subset the array.
Then rbinding the result with do.call(rbind)
Where m is the matrix
library(purrr)
do.call(rbind, map2(ord, nrow(m), \(x,y)
m[y, c(x:max(ord),
ord[- (x:max(ord))])]
)[c(1,rev(ord))]
)
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "c" "d" "e"
[2,] "e" "a" "b" "c" "d"
[3,] "d" "e" "a" "b" "c"
[4,] "c" "d" "e" "a" "b"
[5,] "b" "c" "d" "e" "a"
[6,] "a" "b" "c" "d" "e"
I have put the data and output here. In first row if anything is not A,B,C or D then it should return NA , in second row if anything is not A,C,B or E then return NA
Here is a example showing one option to make it
> t(mapply(function(a, b) b[match(a, b)], asplit(x, 1), strsplit(y, "")))
[,1] [,2] [,3] [,4]
[1,] NA "B" "C" "A"
[2,] NA "B" "C" NA
Data
> x <- rbind(c("E", "B", "C", "A"), c("S", "B", "C", "D"))
> y <- c("ABCD", "ACBE")
> x
[,1] [,2] [,3] [,4]
[1,] "E" "B" "C" "A"
[2,] "S" "B" "C" "D"
> y
[1] "ABCD" "ACBE"
I have a string that is a concatenation of m possible types of elements - for the sake of simplicity m = 4 with A, B, C and D.
Whenever there are single elements more than once, I would have to split the string so that there are no repetitions left. However, I would like to generate all possible strings without repetitions.
To make this a little bit clearer, here is an example:
For A B A C D
String: A B C D
String: B A C D
This gets more complicated when there are several different elements that show up more than once:
For A B A C B D
String: A B C D
String: A C B D
String: B A C D
String: A C B D
Is there a smart way to compute this in R?
vec <- c("A","B","A","C","B","D")
combs <- lapply(setNames(nm = unique(vec)), function(a) which(vec == a))
eg <- do.call(expand.grid, combs)
out <- t(apply(eg, 1, function(r) names(eg)[order(r)]))
# [,1] [,2] [,3] [,4]
# [1,] "A" "B" "C" "D"
# [2,] "B" "A" "C" "D"
# [3,] "A" "C" "B" "D"
# [4,] "A" "C" "B" "D"
out
First vector:
vec <- c("A","B","A","C","D")
# ...
# [,1] [,2] [,3] [,4]
# [1,] "A" "B" "C" "D"
# [2,] "B" "A" "C" "D"
If you are starting and ending with strings vice vectors, then know that you can wrap the above with:
strsplit("ABACBD", "")[[1]]
# [1] "A" "B" "A" "C" "B" "D"
apply(out, 1, paste, collapse = "")
# [1] "ABCD" "BACD" "ACBD" "ACBD"
Assume we have the following permutations of the letters, "a", "b", and "c":
library(combinat)
do.call(rbind, permn(letters[1:3]))
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "a" "c" "b"
# [3,] "c" "a" "b"
# [4,] "c" "b" "a"
# [5,] "b" "c" "a"
# [6,] "b" "a" "c"
Is it possible to perform some function on a given permutation "on-the-fly" (i.e., a particular row) without storing the result?
That is, if the row == "a" "c" "b" or row == "b" "c" "a", do not store the result. The desired result in this case would be:
# [,1] [,2] [,3]
# [1,] "a" "b" "c"
# [2,] "c" "a" "b"
# [3,] "c" "b" "a"
# [4,] "b" "a" "c"
I know I can apply a function to all the permutations on the fly within combinat::permn with the fun argument such as:
permn(letters[1:3], fun = function(x) {
res <- paste0(x, collapse = "")
if (res == "acb" | res == "bca") {
return(NA)
} else {
return(res)
}
})
But this stills stores an NA and the returned list has 6 elements instead of the desired 4 elements:
# [[1]]
# [1] "abc"
#
# [[2]]
# [1] NA
#
# [[3]]
# [1] "cab"
#
# [[4]]
# [1] "cba"
#
# [[5]]
# [1] NA
#
# [[6]]
# [1] "bac"
Note, I am not interested in subsequently removing the NA values; I am specifically interested in not appending to the result list "on-the-fly" for a given permutation.
We could use a magrittr pipeline where we rbind the input matrix to the Rows to be checked and omit the duplicate rows.
library(combinat)
library(magrittr)
Rows <- rbind(c("a", "c", "b"), c("b", "c", "a"))
do.call(rbind, permn(letters[1:3])) %>%
subset(tail(!duplicated(rbind(Rows, .)), -nrow(Rows)))
giving:
[,1] [,2] [,3]
[1,] "a" "b" "c"
[2,] "c" "a" "b"
[3,] "c" "b" "a"
[4,] "b" "a" "c"
You can return NULL for the particular condition that you want to ignore and rbind the result which will ignore the NULL elements and bind only the combinations that you need.
do.call(rbind, combinat::permn(letters[1:3], function(x)
if(!all(x == c("a", "c", "b") | x == c("b", "c", "a")))
return(x)
))
# [,1] [,2] [,3]
#[1,] "a" "b" "c"
#[2,] "c" "a" "b"
#[3,] "c" "b" "a"
#[4,] "b" "a" "c"
Similarly,
do.call(rbind, permn(letters[1:3],function(x) {
res <- paste0(x, collapse = "")
if (!res %in% c("acb","bca"))
return(res)
}))
# [,1]
#[1,] "abc"
#[2,] "cab"
#[3,] "cba"
#[4,] "bac"
Suppose I have a structure like the following:
data = structure(list(person = structure(list(name = "A, B",
gender = "F", dead = NULL), .Names = c("name",
"gender", "dead")), person = structure(list(name = "C",
gender = "M", dead = "RIP"), .Names = c("name",
"gender", "dead"))), .Names = c("person", "person"))
and I want to convert it into a matrix
data = matrix(unlist(data), nrow = length(data), ncol=length(data[[1]]), byrow = TRUE)
How do I avoid recycling the elements when using matrix or even before that using only the base functions without plyr's rbind.fill?
The result is:
> data
[,1] [,2] [,3]
[1,] "A, B" "F" "C"
[2,] "M" "RIP" "A, B"
and I would like to get NA or "" where the value is NULL. For instance:
> data
[,1] [,2] [,3]
[1,] "A, B" "F" ""
[2,] "C" "M" "RIP"
Any help would be appreciated.
You could try the new stri_list2matrix function in the stringi package.
library(stringi)
stri_list2matrix(lapply(data, unlist), byrow=TRUE, fill="")
# [,1] [,2] [,3]
# [1,] "A, B" "F" ""
# [2,] "C" "M" "RIP"
Or for NA instead of "", leave out the fill argument
stri_list2matrix(lapply(data, unlist), byrow=TRUE)
# [,1] [,2] [,3]
# [1,] "A, B" "F" NA
# [2,] "C" "M" "RIP"
Or if you prefer a base R answer, to avoid problems you could make all vectors the same length first with length<-. This will append NA to all shorter vectors and make them the same length of the longest vector.
len <- max(sapply(data, length)) ## get length of longest vector
t(sapply(unname(data), function(x) `length<-`(unname(unlist(x)), len)))
# [,1] [,2] [,3]
# [1,] "A, B" "F" NA
# [2,] "C" "M" "RIP"