I have following matrix with numbers of 0 and 1 with always the same number of strings per column, but also containing columns with only one string. I would like to to split each number into separate columns, that only one number per column and row occurs. But I would like to leave the columns with only one string as it is:
r1 <- c("0","001","0001","01","100")
r2 <- c("1","001","0001","10","100")
r3 <- c("0","100","1000","10","010")
r4 <- c("0","010","0100","10","001")
r5<- c("0","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)
The output:
r1 <- c("0","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","1","0","0","0","1","1","0","1","0","0")
r3 <- c("0","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","0","1","0","0","1","0","0","1","0","0","0","1")
r5 <- c("0","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)
My code, but it crashes, because of the columns with only one string:
n.mat <- do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
There's no need for apply or paste for this specific problem. Simply transpose the matrix, split all the strings, and re-construct the matrix according to the number of rows in the original matrix.
matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# [1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# [2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# [3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# [4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# [5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
If you want further optimizations, you can do something like the following, which will retain the rownames
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
By avoiding apply, you're only calling strsplit once, so you're going to notice much better performance if you have a lot of rows to process.
On my Chromebook (so these times are likely to be slow to begin with) testing with 10,000 rows, I get the following:
nrow(n.mat)
# [1] 10000
bench::mark(am_opt(), am(), gki(), jay(), check = FALSE)
# # A tibble: 4 x 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 am_opt() 28.3ms 40.1ms 27.4 2.75MB 0 14 0 511ms
# 2 am() 36.1ms 41.2ms 24.6 2.75MB 0 13 0 528ms
# 3 gki() 220.3ms 229.4ms 4.39 3.43MB 0 3 0 683ms
# 4 jay() 975.8ms 975.8ms 1.02 3.51MB 1.02 1 1 976ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>
I didn't benchmark Karthik's answer because just running it once took more than 1 minute.
system.time(karthik())
# user system elapsed
# 81.341 0.000 81.343
Where the functions are directly copied from the other answers:
am_opt <- function() {
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
}
am <- function() matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
gki <- function() matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
jay <- function() t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
karthik <- function() bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
Collapse paste0 using Reduce and use strsplit on "".
t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Does this work:
library(dplyr)
bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
You can use strsplit in apply, unlist the result and create with this a matrix.
matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
#[1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
#[2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
#[3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
#[4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
#[5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
I have a list object. I used strsplit(data, ";") to unlist. It is characters "A";"B" so on and each row has different length. Therefore, I wrote a for loop to create a matrix. I want to have column 1 all same object "A".
Here is the code I wrote but it does not work as I wanted.
myList <- list()
myList[[1]] <- c("A", "C", 0, 0)
myList[[2]] <- c("A", "B", "C")
myList[[3]] <- c("A", 0, 0, 0, 0)
myList[[4]] <- c("B", "A")
myList[[5]] <- c("Aa", "A", "B", 0, 0)
myList[[6]] <- c("Aa", "A", "C", 0, 0)
myList[[7]] <- c("C", "A", 0, 0)
myList
TD=TD2=matrix(0,length(myList),5)
for(i in 1:length(myList))
{
m1=length(myList[[i]])
TD[i,1:m1]=matrix( myList[[i]] , ncol = m1 , byrow = TRUE )
}
for(j in 1:length(myList)){
TD2[j,]=TD[j,order(TD[j,],decreasing = T)]
}
Desired output to be
[,1] [,2] [,3] [,4] [,5]
[1,] "A" "C" "0" "0" "0"
[2,] "A" "C" "B" "0" "0"
[3,] "A" "0" "0" "0" "0"
[4,] "A" "B" "0" "0" "0"
[5,] "A" "B" "Aa" "0" "0"
[6,] "A" "C" "Aa" "0" "0"
[7,] "A" "C" "0" "0" "0"
You can define a factor object with custom order with factor(..., ordered = T) and sort it.
ord <- names(sort(table(unlist(myList))[-1], dec = T))
len <- max(lengths(myList))
t(sapply(myList, function(x){
y <- sort(factor(x, levels = c(ord, "0"), ordered = T))[1:len]
replace(y, is.na(y), "0")
}))
# [,1] [,2] [,3] [,4] [,5]
# [1,] "A" "C" "0" "0" "0"
# [2,] "A" "C" "B" "0" "0"
# [3,] "A" "0" "0" "0" "0"
# [4,] "A" "B" "0" "0" "0"
# [5,] "A" "B" "Aa" "0" "0"
# [6,] "A" "C" "Aa" "0" "0"
# [7,] "A" "C" "0" "0" "0"
Not exactly what you want, but since you can create a frequency table, this should also work:
TD = matrix(0,length(myList),5)
for (i in 1:length(myList)) {
myList[[i]] = sort(myList[[i]][which(myList[[i]] != "0")])
for (j in 1:length(myList[[i]])) TD[i, j] = myList[[i]][j]
}
> TD
[,1] [,2] [,3] [,4] [,5]
[1,] "A" "C" "0" "0" "0"
[2,] "A" "B" "C" "0" "0"
[3,] "A" "0" "0" "0" "0"
[4,] "A" "B" "0" "0" "0"
[5,] "A" "Aa" "B" "0" "0"
[6,] "A" "Aa" "C" "0" "0"
[7,] "A" "C" "0" "0" "0"
This question already has answers here:
Creating a symmetric matrix in R
(7 answers)
Closed 2 years ago.
I have a matrix of characters:
mat1
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "0" "B" "A" "C" "D" "D"
[2,] "0" "0" "B" "C" "C" "C"
[3,] "0" "0" "0" "D" "D" "C"
[4,] "0" "0" "0" "0" "B" "B"
[5,] "0" "0" "0" "0" "0" "A"
[6,] "0" "0" "0" "0" "0" "0"
I want to have a Symmetrical matrix of that, as below:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "0" "B" "A" "C" "D" "D"
[2,] "B" "0" "B" "C" "C" "C"
[3,] "A" "B" "0" "D" "D" "C"
[4,] "C" "C" "D" "0" "B" "B"
[5,] "D" "C" "D" "B" "0" "A"
[6,] "D" "C" "C" "B" "A" "0"
You can set the lower triangular part of the matrix as equal to the lower triangular part of the transposed matrix, by using the lower.tri functions on the matrix mat1:
mat1[lower.tri(mat1)] <- t(mat1)[lower.tri(mat1)]
Is there a way in R to replace values in each row of a matrix/dataframe with a specific value from that row?
For example, I have the following matrix:
df<-cbind(c("A","C","G","T"),c("T","G","C","A"),c(0,1,0,1),c(1,0,1,0),c(0,1,0,1))
df
# [,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "0" "1" "0"
#[2,] "C" "G" "1" "0" "1"
#[3,] "G" "C" "0" "1" "0"
#[4,] "T" "A" "1" "0" "1"
and I want to replace the zeros in each row with the corresponding letter from the first column of that row, such that the new matrix will look like this:
newdf
# [,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "A" "1" "A"
#[2,] "C" "G" "1" "C" "1"
#[3,] "G" "C" "G" "1" "G"
#[4,] "T" "A" "1" "T" "1"
The closest I have been able to get is with the following commands, but it does not replace the zeros with the correct values from column 1.
df[df==0]<-NA
df[, 3:ncol(df)][is.na(df[, 3:ncol(df)])] <- df[,1]
We can replicate the first column to make the lengths equal and then do the assignment based on the logical matrix. It will subset the elements that are of the same length as in the rhs
i1 <- df == 0
newdf <- df
newdf[i1] <- df[,1][row(df)][i1]
newdf
[,1] [,2] [,3] [,4] [,5]
#[1,] "A" "T" "A" "1" "A"
#[2,] "C" "G" "1" "C" "1"
#[3,] "G" "C" "G" "1" "G"
#[4,] "T" "A" "1" "T" "1"
I want use for loop function to output n/2 NewMatrix, NewMatrix <- OldMatrix[,c(i,i+1)] , i= ncol(OldMatrix).
And give them names as the format: NewMatrix_i, in order to I can access individual matrix easily.
OldMatrix
A B C D E F G H I J K
[1,] "1" "1" "C" "F" "F" "B" "F" "F" "1" "1" "4"
[2,] "1" "1" "C" "F" "F" "B" "F" "F" "1" "1" "4"
[3,] "1" "1" "C" "F" "F" "B" "F" "F" "1" "1" "4"
[4,] "5" "5" "C" "F" "F" "B" "F" "F" "1" "1" "4"
[5,] "5" "5" "C" "F" "F" "B" "F" "F" "1" "1" "4"
[6,] "5" "5" "C" "F" "F" "B" "F" "F" "1" "1" "4"
May be this helps:
indx <- 1:(ncol(OldMatrix)-1)
lst1 <- setNames(lapply(indx, function(i) OldMatrix[,c(i, i+1)]), paste("NewMatrix",indx, sep="_"))
lst1$NewMatrix_10
# J K
#[1,] "1" "4"
#[2,] "1" "4"
#[3,] "1" "4"
#[4,] "1" "4"
#[5,] "1" "4"
#[6,] "1" "4"