I have a list object. I used strsplit(data, ";") to unlist. It is characters "A";"B" so on and each row has different length. Therefore, I wrote a for loop to create a matrix. I want to have column 1 all same object "A".
Here is the code I wrote but it does not work as I wanted.
myList <- list()
myList[[1]] <- c("A", "C", 0, 0)
myList[[2]] <- c("A", "B", "C")
myList[[3]] <- c("A", 0, 0, 0, 0)
myList[[4]] <- c("B", "A")
myList[[5]] <- c("Aa", "A", "B", 0, 0)
myList[[6]] <- c("Aa", "A", "C", 0, 0)
myList[[7]] <- c("C", "A", 0, 0)
myList
TD=TD2=matrix(0,length(myList),5)
for(i in 1:length(myList))
{
m1=length(myList[[i]])
TD[i,1:m1]=matrix( myList[[i]] , ncol = m1 , byrow = TRUE )
}
for(j in 1:length(myList)){
TD2[j,]=TD[j,order(TD[j,],decreasing = T)]
}
Desired output to be
[,1] [,2] [,3] [,4] [,5]
[1,] "A" "C" "0" "0" "0"
[2,] "A" "C" "B" "0" "0"
[3,] "A" "0" "0" "0" "0"
[4,] "A" "B" "0" "0" "0"
[5,] "A" "B" "Aa" "0" "0"
[6,] "A" "C" "Aa" "0" "0"
[7,] "A" "C" "0" "0" "0"
You can define a factor object with custom order with factor(..., ordered = T) and sort it.
ord <- names(sort(table(unlist(myList))[-1], dec = T))
len <- max(lengths(myList))
t(sapply(myList, function(x){
y <- sort(factor(x, levels = c(ord, "0"), ordered = T))[1:len]
replace(y, is.na(y), "0")
}))
# [,1] [,2] [,3] [,4] [,5]
# [1,] "A" "C" "0" "0" "0"
# [2,] "A" "C" "B" "0" "0"
# [3,] "A" "0" "0" "0" "0"
# [4,] "A" "B" "0" "0" "0"
# [5,] "A" "B" "Aa" "0" "0"
# [6,] "A" "C" "Aa" "0" "0"
# [7,] "A" "C" "0" "0" "0"
Not exactly what you want, but since you can create a frequency table, this should also work:
TD = matrix(0,length(myList),5)
for (i in 1:length(myList)) {
myList[[i]] = sort(myList[[i]][which(myList[[i]] != "0")])
for (j in 1:length(myList[[i]])) TD[i, j] = myList[[i]][j]
}
> TD
[,1] [,2] [,3] [,4] [,5]
[1,] "A" "C" "0" "0" "0"
[2,] "A" "B" "C" "0" "0"
[3,] "A" "0" "0" "0" "0"
[4,] "A" "B" "0" "0" "0"
[5,] "A" "Aa" "B" "0" "0"
[6,] "A" "Aa" "C" "0" "0"
[7,] "A" "C" "0" "0" "0"
Related
I have following matrix with numbers of 0 and 1 with always the same number of strings per column, but also containing columns with only one string. I would like to to split each number into separate columns, that only one number per column and row occurs. But I would like to leave the columns with only one string as it is:
r1 <- c("0","001","0001","01","100")
r2 <- c("1","001","0001","10","100")
r3 <- c("0","100","1000","10","010")
r4 <- c("0","010","0100","10","001")
r5<- c("0","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)
The output:
r1 <- c("0","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","1","0","0","0","1","1","0","1","0","0")
r3 <- c("0","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","0","1","0","0","1","0","0","1","0","0","0","1")
r5 <- c("0","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)
My code, but it crashes, because of the columns with only one string:
n.mat <- do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
There's no need for apply or paste for this specific problem. Simply transpose the matrix, split all the strings, and re-construct the matrix according to the number of rows in the original matrix.
matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# [1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# [2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# [3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# [4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# [5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
If you want further optimizations, you can do something like the following, which will retain the rownames
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
By avoiding apply, you're only calling strsplit once, so you're going to notice much better performance if you have a lot of rows to process.
On my Chromebook (so these times are likely to be slow to begin with) testing with 10,000 rows, I get the following:
nrow(n.mat)
# [1] 10000
bench::mark(am_opt(), am(), gki(), jay(), check = FALSE)
# # A tibble: 4 x 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 am_opt() 28.3ms 40.1ms 27.4 2.75MB 0 14 0 511ms
# 2 am() 36.1ms 41.2ms 24.6 2.75MB 0 13 0 528ms
# 3 gki() 220.3ms 229.4ms 4.39 3.43MB 0 3 0 683ms
# 4 jay() 975.8ms 975.8ms 1.02 3.51MB 1.02 1 1 976ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>
I didn't benchmark Karthik's answer because just running it once took more than 1 minute.
system.time(karthik())
# user system elapsed
# 81.341 0.000 81.343
Where the functions are directly copied from the other answers:
am_opt <- function() {
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
}
am <- function() matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
gki <- function() matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
jay <- function() t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
karthik <- function() bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
Collapse paste0 using Reduce and use strsplit on "".
t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Does this work:
library(dplyr)
bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
You can use strsplit in apply, unlist the result and create with this a matrix.
matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
#[1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
#[2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
#[3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
#[4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
#[5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
This question already has answers here:
Creating a symmetric matrix in R
(7 answers)
Closed 2 years ago.
I have a matrix of characters:
mat1
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "0" "B" "A" "C" "D" "D"
[2,] "0" "0" "B" "C" "C" "C"
[3,] "0" "0" "0" "D" "D" "C"
[4,] "0" "0" "0" "0" "B" "B"
[5,] "0" "0" "0" "0" "0" "A"
[6,] "0" "0" "0" "0" "0" "0"
I want to have a Symmetrical matrix of that, as below:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "0" "B" "A" "C" "D" "D"
[2,] "B" "0" "B" "C" "C" "C"
[3,] "A" "B" "0" "D" "D" "C"
[4,] "C" "C" "D" "0" "B" "B"
[5,] "D" "C" "D" "B" "0" "A"
[6,] "D" "C" "C" "B" "A" "0"
You can set the lower triangular part of the matrix as equal to the lower triangular part of the transposed matrix, by using the lower.tri functions on the matrix mat1:
mat1[lower.tri(mat1)] <- t(mat1)[lower.tri(mat1)]
Is there a way in R to replace values in each row of a matrix/dataframe with a specific value from that row?
For example, I have the following matrix:
df<-cbind(c("A","C","G","T"),c("T","G","C","A"),c(0,1,0,1),c(1,0,1,0),c(0,1,0,1))
df
# [,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "0" "1" "0"
#[2,] "C" "G" "1" "0" "1"
#[3,] "G" "C" "0" "1" "0"
#[4,] "T" "A" "1" "0" "1"
and I want to replace the zeros in each row with the corresponding letter from the first column of that row, such that the new matrix will look like this:
newdf
# [,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "A" "1" "A"
#[2,] "C" "G" "1" "C" "1"
#[3,] "G" "C" "G" "1" "G"
#[4,] "T" "A" "1" "T" "1"
The closest I have been able to get is with the following commands, but it does not replace the zeros with the correct values from column 1.
df[df==0]<-NA
df[, 3:ncol(df)][is.na(df[, 3:ncol(df)])] <- df[,1]
We can replicate the first column to make the lengths equal and then do the assignment based on the logical matrix. It will subset the elements that are of the same length as in the rhs
i1 <- df == 0
newdf <- df
newdf[i1] <- df[,1][row(df)][i1]
newdf
[,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "A" "1" "A"
#[2,] "C" "G" "1" "C" "1"
#[3,] "G" "C" "G" "1" "G"
#[4,] "T" "A" "1" "T" "1"
I wish to find the row number, based on multiple parameters. I have made this test matrix:
data=
[,1] [,2] [,3]
[1,] "1" "a" "0"
[2,] "2" "b" "0"
[3,] "3" "c" "0"
[4,] "4" "a" "0"
[5,] "1" "b" "0"
[6,] "2" "c" "0"
[7,] "3" "a" "0"
[8,] "4" "b" "0"
Then I want to get the row number where
data[,1]==1 and data[,2]=='b'
I'm trying to create a list of permutations of a list, such that, for example, perms(list("a", "b", "c")) returns
list(list("a", "b", "c"), list("a", "c", "b"), list("b", "a", "c"),
list("b", "c", "a"), list("c", "a", "b"), list("c", "b", "a"))
I'm not sure how to proceed, any help would be greatly appreciated.
A while back I had to do this in base R without loading any packages.
permutations <- function(n){
if(n==1){
return(matrix(1))
} else {
sp <- permutations(n-1)
p <- nrow(sp)
A <- matrix(nrow=n*p,ncol=n)
for(i in 1:n){
A[(i-1)*p+1:p,] <- cbind(i,sp+(sp>=i))
}
return(A)
}
}
Usage:
> matrix(letters[permutations(3)],ncol=3)
[,1] [,2] [,3]
[1,] "a" "b" "c"
[2,] "a" "c" "b"
[3,] "b" "a" "c"
[4,] "b" "c" "a"
[5,] "c" "a" "b"
[6,] "c" "b" "a"
combinat::permn will do that work:
> library(combinat)
> permn(letters[1:3])
[[1]]
[1] "a" "b" "c"
[[2]]
[1] "a" "c" "b"
[[3]]
[1] "c" "a" "b"
[[4]]
[1] "c" "b" "a"
[[5]]
[1] "b" "c" "a"
[[6]]
[1] "b" "a" "c"
Note that calculation is huge if the element is large.
base R can also provide the answer:
all <- expand.grid(p1 = letters[1:3], p2 = letters[1:3], p3 = letters[1:3], stringsAsFactors = FALSE)
perms <- all[apply(all, 1, function(x) {length(unique(x)) == 3}),]
You can try permutations() from the gtools package, but unlike permn() from combinat, it doesn't output a list:
> library(gtools)
> permutations(3, 3, letters[1:3])
[,1] [,2] [,3]
[1,] "a" "b" "c"
[2,] "a" "c" "b"
[3,] "b" "a" "c"
[4,] "b" "c" "a"
[5,] "c" "a" "b"
[6,] "c" "b" "a"
A solution in base R, no dependencies on other packages:
> getPermutations <- function(x) {
if (length(x) == 1) {
return(x)
}
else {
res <- matrix(nrow = 0, ncol = length(x))
for (i in seq_along(x)) {
res <- rbind(res, cbind(x[i], Recall(x[-i])))
}
return(res)
}
}
> getPermutations(letters[1:3])
[,1] [,2] [,3]
[1,] "a" "b" "c"
[2,] "a" "c" "b"
[3,] "b" "a" "c"
[4,] "b" "c" "a"
[5,] "c" "a" "b"
[6,] "c" "b" "a"
I hope this helps.
# Another recursive implementation
# for those who like to roll their own, no package required
permutations <- function( x, prefix = c() )
{
if(length(x) == 0 ) return(prefix)
do.call(rbind, sapply(1:length(x), FUN = function(idx) permutations( x[-idx], c( prefix, x[idx])), simplify = FALSE))
}
permutations(letters[1:3])
# [,1] [,2] [,3]
#[1,] "a" "b" "c"
#[2,] "a" "c" "b"
#[3,] "b" "a" "c"
#[4,] "b" "c" "a"
#[5,] "c" "a" "b"
#[6,] "c" "b" "a"
Try:
> a = letters[1:3]
> eg = expand.grid(a,a,a)
> eg[!(eg$Var1==eg$Var2 | eg$Var2==eg$Var3 | eg$Var1==eg$Var3),]
Var1 Var2 Var3
6 c b a
8 b c a
12 c a b
16 a c b
20 b a c
22 a b c
As suggested by #Adrian in comments, last line can be replaced by:
eg[apply(eg, 1, anyDuplicated) == 0, ]
A fun solution "probabilistic" using sample for base R:
elements <- c("a", "b", "c")
k <- length(elements)
res=unique(t(sapply(1:200, function(x) sample(elements, k))))
# below, check you have all the permutations you need (if not, try again)
nrow(res) == factorial(k)
res
basically you call many random samples, hoping to get them all, and you unique them.
We can use base function combn with a little modifcation:
combn_n <- function(x) {
m <- length(x) - 1 # number of elements to choose: n-1
xr <- rev(x) # reversed x
part_1 <- rbind(combn(x, m), xr, deparse.level = 0)
part_2 <- rbind(combn(xr, m), x, deparse.level = 0)
cbind(part_1, part_2)
}
combn_n(letters[1:3])
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "a" "a" "b" "c" "c" "b"
[2,] "b" "c" "c" "b" "a" "a"
[3,] "c" "b" "a" "a" "b" "c"
Behold, the purrr 🐾 solution:
> map(1:3, ~ c('a', 'b', 'c')) %>%
cross() %>%
keep(~ length(unique(.x)) == 3) %>%
map(unlist)
#> [[1]]
#> [1] "c" "b" "a"
#>
#> [[2]]
#> [1] "b" "c" "a"
#>
#> [[3]]
#> [1] "c" "a" "b"
#>
#> [[4]]
#> [1] "a" "c" "b"
#>
#> [[5]]
#> [1] "b" "a" "c"
#>
#> [[6]]
#> [1] "a" "b" "c"
In case this helps, there is the "arrangements" package, that allows you to simply do :
> abc = letters[1:3]
> permutations(abc)
[,1] [,2] [,3]
[1,] "a" "b" "c"
[2,] "a" "c" "b"
[3,] "b" "a" "c"
[4,] "b" "c" "a"
[5,] "c" "a" "b"
[6,] "c" "b" "a"
A generic version of rnso's answer is:
get_perms <- function(x){
stopifnot(is.atomic(x)) # for the matrix call to make sense
out <- as.matrix(expand.grid(
replicate(length(x), x, simplify = FALSE), stringsAsFactors = FALSE))
out[apply(out,1, anyDuplicated) == 0, ]
}
Here are two examples:
get_perms(letters[1:3])
#R> Var1 Var2 Var3
#R> [1,] "c" "b" "a"
#R> [2,] "b" "c" "a"
#R> [3,] "c" "a" "b"
#R> [4,] "a" "c" "b"
#R> [5,] "b" "a" "c"
#R> [6,] "a" "b" "c"
get_perms(letters[1:4])
#R> Var1 Var2 Var3 Var4
#R> [1,] "d" "c" "b" "a"
#R> [2,] "c" "d" "b" "a"
#R> [3,] "d" "b" "c" "a"
#R> [4,] "b" "d" "c" "a"
#R> [5,] "c" "b" "d" "a"
#R> [6,] "b" "c" "d" "a"
#R> [7,] "d" "c" "a" "b"
#R> [8,] "c" "d" "a" "b"
#R> [9,] "d" "a" "c" "b"
#R> [10,] "a" "d" "c" "b"
#R> [11,] "c" "a" "d" "b"
#R> [12,] "a" "c" "d" "b"
#R> [13,] "d" "b" "a" "c"
#R> [14,] "b" "d" "a" "c"
#R> [15,] "d" "a" "b" "c"
#R> [16,] "a" "d" "b" "c"
#R> [17,] "b" "a" "d" "c"
#R> [18,] "a" "b" "d" "c"
#R> [19,] "c" "b" "a" "d"
#R> [20,] "b" "c" "a" "d"
#R> [21,] "c" "a" "b" "d"
#R> [22,] "a" "c" "b" "d"
#R> [23,] "b" "a" "c" "d"
#R> [24,] "a" "b" "c" "d"
One can also slightly alter Rick's answer by using lapply, only doing a single rbind, and reduce the number of [s]/[l]apply calls:
permutations <- function(x, prefix = c()){
if(length(x) == 1) # was zero before
return(list(c(prefix, x)))
out <- do.call(c, lapply(1:length(x), function(idx)
permutations(x[-idx], c(prefix, x[idx]))))
if(length(prefix) > 0L)
return(out)
do.call(rbind, out)
}
What about
pmsa <- function(l) {
pms <- function(n) if(n==1) return(list(1)) else unlist(lapply(pms(n-1),function(v) lapply(0:(n-1),function(k) append(v,n,k))),recursive = F)
lapply(pms(length(l)),function(.) l[.])
}
This gives a list. Then
pmsa(letters[1:3])