Sum partially overlapping square matrix / array - r

I have two square matrix / array like that
## Matrix 1
t1 <- c(2,1,1,1,1,0,1,0,1)
column.names <- c("A","B","C")
row.names <- c("A","B","C")
m1 <- array(t1,dim = c(3,3),dimnames = list(row.names,column.names))
m1
A B C
A 2 1 1
B 1 1 0
C 1 0 1
## Matrix 2
t2 <- c(1,0,0,0,1,1,0,1,1)
column.names <- c("A","B","D")
row.names <- c("A","B","D")
m2 <- array(t2,dim = c(3,3),dimnames = list(row.names,column.names))
m2
A B D
A 1 0 0
B 0 1 1
D 0 1 1
I need to sum up them (each existing column/row pairs) and to keep all possible combinations, like that :
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1
I have to compute this process a lot of times, so I am looking for a fast and lightweight solution.
Any help would be awsome, I am stuck ;)

A base R option using xtabs + expand.grid
as.data.frame.matrix(
xtabs(
p ~ .,
do.call(
rbind,
lapply(
list(m1, m2),
function(x) cbind(expand.grid(dimnames(x)), p = c(x))
)
)
)
)
gives
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1
Another option using igraph
library(igraph)
get.adjacency(
graph_from_data_frame(
do.call(
rbind,
lapply(
list(m1, m2),
function(x) {
get.data.frame(
graph_from_adjacency_matrix(
x,
"undirected"
)
)
}
)
), FALSE
),
sparse = FALSE
)
which gives
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1

Make m1 and m2 of same dimensions by including all the rownames and colnames available in both of them. Replace non-existent value with 0. You can then add both of them together.
cols <- unique(c(colnames(m1), colnames(m2)))
rows <- unique(c(rownames(m1), rownames(m2)))
dummy_m1 <- matrix(0, nrow = length(cols), ncol = length(rows),
dimnames = list(cols, rows))
dummy_m2 <- dummy_m1
dummy_m1[rownames(m1), colnames(m1)] <- m1
dummy_m2[rownames(m2), colnames(m2)] <- m2
dummy_m1 + dummy_m2
# A B C D
#A 3 1 1 0
#B 1 2 0 1
#C 1 0 1 0
#D 0 1 0 1

Related

In R, change the values of some items in a matrix without causing a copy of the entire matrix?

I have a "small" square matrix that I want to add to a "big" matrix. The big matrix contains all the rows and columns of the small matrix plus extras. I want to add the values where the indices are in common and just keep the values from the big one where that index is not contained in the small one. Unfortunately, all the data is copied on the addition so it takes a long time and can temporarily spike memory when the matrices are large.
I have tried adding subsets using matrices and data.frames, as well as a data.table method using rbindlist. Both the data.frame and matrix methods seem to cause a memory copy (why?) and the rbindlist method is not ideal because it requires a melt and dcast and temporarily spiking the memory by spiking the number of rows.
Is there any way to just change the values of some items in a matrix without causing a copy of the entire matrix?
Here are my attempts:
MList <- list(M1,M2)
unionCols <- Reduce(union, lapply(MList, colnames))
MTotal <- matrix(as.double(rep(0,(length(unionCols))^2)), nrow = length(unionCols))
rownames(MTotal) <- colnames(MTotal) <- unionCols
DFTotal <- as.data.frame(MTotal)
DFList <- lapply(MList, as.data.frame)
for(i in 1:length(MList)){
tracemem(MTotal)
tracemem(DFTotal)
mCol <- match(colnames(MList[[i]]), colnames(MTotal))
MTotal[mCol,mCol] <- MTotal[mCol,mCol] + MList[[i]] # this causes a copy
DFTotal[mCol,mCol] <- DFTotal[mCol,mCol] + DFList[[i]] # this causes a copy
}
M1
M2
MTotal
# rbindlist method
.AggDMCMatsSingleM2 <- function(M1, M2){
.MyMelt <- function(M){
DT <- setnames(reshape2::melt(M, id.vars = colnames(M)), c('Var1','Var2'), c('row','col'))
}
M_total <- as.matrix(data.table::dcast(rbindlist(lapply(list(M1,M2), .MyMelt)),
formula = as.formula(row ~ col),
value.var = 'value',
fun.aggregate = sum,
fill = 0),
rownames = 'row')
return(M_total)
}
M1
M2
.AggDMCMatsSingleM2(M1,M2)
If I follow what you are asking we can directly add and write to the big matrix using the bracket notation row/col names of the small matrix:
big_matrix<-matrix(data=rep(1, 25), nrow=5,
dimnames = list(c(LETTERS[1:5]),
c(letters[1:5])))
# a b c d e
#A 1 1 1 1 1
#B 1 1 1 1 1
#C 1 1 1 1 1
#D 1 1 1 1 1
#E 1 1 1 1 1
small_matrix<-matrix(data=c(1:9), nrow=3,
dimnames = list(c(LETTERS[2:4]),
c(letters[2:4])))
# b c d
#B 1 4 7
#C 2 5 8
#D 3 6 9
big_matrix[rownames(small_matrix), colnames(small_matrix)] <-
big_matrix[rownames(small_matrix), colnames(small_matrix)] + small_matrix
# a b c d e
#A 1 1 1 1 1
#B 1 2 5 8 1
#C 1 3 6 9 1
#D 1 4 7 10 1
#E 1 1 1 1 1
More complex test:
big_matrix<-matrix(data=rep(1, 25), nrow=5,
dimnames = list(c(LETTERS[1:5]),
c(letters[1:5])))
# a b c d e
#A 1 1 1 1 1
#B 1 1 1 1 1
#C 1 1 1 1 1
#D 1 1 1 1 1
#E 1 1 1 1 1
small_matrix<-matrix(data=c(1:9), nrow=3,
dimnames = list(c("A", "D", "C"),
c(letters[c(2:4)])))
# b c d
#A 1 4 7
#D 2 5 8
#C 3 6 9
big_matrix[rownames(small_matrix), colnames(small_matrix)] <-
big_matrix[rownames(small_matrix), colnames(small_matrix)] + small_matrix
big_matrix
# a b c d e
#A 1 2 5 8 1
#B 1 1 1 1 1
#C 1 4 7 10 1
#D 1 3 6 9 1
#E 1 1 1 1 1

Generate pairwise movement data from sequence

I have a sequence which looks like this
SEQENCE
1 A
2 B
3 B
4 C
5 A
Now from this sequence, I want to get the matrix like this where i the row and jth column element denotes how many times movement occurred from ith row node to jth column node
A B C
A 0 1 0
B 0 1 1
C 1 0 0
How Can I get this in R
1) Use table like this:
s <- DF[, 1]
table(tail(s, -1), head(s, -1))
giving:
A B C
A 0 0 1
B 1 1 0
C 0 1 0
2) or like this. Since embed does not work with factors we convert the factor to character,
s <- as.character(DF[, 1])
do.call(table, data.frame(embed(s, 2)))
giving:
X2
X1 A B C
A 0 0 1
B 1 1 0
C 0 1 0
3) xtabs also works:
s <- as.character(DF[, 1])
xtabs(data = data.frame(embed(s, 2)))
giving:
X2
X1 A B C
A 0 0 1
B 1 1 0
C 0 1 0
Note: The input DF in reproducible form is:
Lines <- " SEQENCE
1 A
2 B
3 B
4 C
5 A"
DF <- read.table(text = Lines, header = TRUE)

Special occurrency counting in data table

(preamble)
I don't know if this is the right place for that...I actually have a problem solving/optimization issue for the counting over a table. So if it's not. very sorry and deserve the minusrating.
Here's the data frame
dat <- data.frame(id=letters[1:5],matrix(c(0,0,1,0,0, 0,1,0,1,1, 0,0,2,1,0, 1,0,2,1,1, 0,0,2,0,0, 0,1,2,1,0),5,6))
#
# id X1 X2 X3 X4 X5 X6
# 1 a 0 0 0 1 0 0
# 2 b 0 1 0 0 0 1
# 3 c 1 0 2 2 2 2
# 4 d 0 1 1 1 0 1
# 5 e 0 1 0 1 0 0
I would like to count along every row, how many times we get to 1 and how many times from 1 we go to 0. so the final results should be
# id N1 N0
# a 1 1
# b 2 1
# c 1 1
# d 2 1
# e 2 2
I actually found an algorithm but it's more C/FORTRAN style (here below) and I can't believe there's not an esaier and more elegant way to get this in R. Thanks a lot for any help or hint.
nr <- nrow(dat)
nc <- ncol(dat)
rownames(dat) <- seq(1,nr,1)
colnames(dat) <- seq(1,nc,1)
dat$N1 <- NULL
dat$N2 <- NULL
for (i in 1:nr) {
n1 <- 0
n0 <- 0
j <- 2
while (!(j>nc)) {
k <- j
if (dat[i,k] == 1) {
n1 <- n1 + 1
k <- j + 1
while (!(k>nc)) {
if (dat[i,k] == 0) {
n0 <- n0 + 1
break
}
k <- k + 1
}
}
j <- k
j <- j + 1
}
dat$N1[i] <- n1
dat$N0[i] <- n0
}
Not sure if I totally got it, but you can try:
cbind(dat["id"],N0=rowSums(dat[,3:7]==1 & dat[,2:6]!=1)+(dat[,2]==1),
N1=rowSums(dat[,3:7]==0 & dat[,2:6]==1))
# id N0 N1
#1 a 1 1
#2 b 2 1
#3 c 1 1
#4 d 2 1
#5 e 2 2
Here's another way, using rle wrapped in data.table syntax:
library(data.table)
setDT(dat)
melt(dat, id="id")[, with(rle(value), list(
n1 = sum(values==1),
n1to0 = sum("10" == do.call(paste0, shift(values, 1:0, fill=0)))
)), by=id]
# id n1 n1to0
# 1: a 1 1
# 2: b 2 1
# 3: c 1 1
# 4: d 2 1
# 5: e 2 2
Notes.
shift with n=1:0 returns the lagged vector (lag of 1) and the vector itself (lag of 0).
melt creates a value column; and rle contains a values vector.

How to Fill in Empty Matrix in R with Loop [duplicate]

This question already has answers here:
Reshape dataframe and create similarity matrix
(2 answers)
Closed 7 years ago.
I have a blank matrix called Trial that is 5000 X 5000, but i'll put a small snippet.
a b c d e f
a
b
c
d
e
f
and I want to fill the Matrix, with a Data Table I have.
Name Value
-----------
Cat A
Cat B
Cat E
Dog D
Dog C
Dog F
So basically in the end, I want the matrix to be filled like this:
a b c d e f
a 1 1 0 0 1 0
b 1 1 0 0 1 0
c 0 0 1 1 0 1
d 0 0 1 1 0 1
e 1 1 0 0 1 0
f 0 0 1 1 0 1
So all the values relating to the Name will be 1, and if they don't relate they will be 0. For example, A and F don't relate because they are different names (cat and dog), thus they will get a 0.
Here is a way with loops
dd <- read.table(header = TRUE, text="Name Value
Cat A
Cat B
Cat E
Dog D
Dog C
Dog F")
o <- order(dd$Value)
sapply(1:nrow(dd), function(x) dd$Name %in% dd[x, 'Name'] + 0L)[o, o]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 0 0 1 0
# [2,] 1 1 0 0 1 0
# [3,] 0 0 1 1 0 1
# [4,] 0 0 1 1 0 1
# [5,] 1 1 0 0 1 0
# [6,] 0 0 1 1 0 1
or with an explicit for loop
mm <- matrix(nrow = nrow(dd), ncol = nrow(dd))
for (ii in 1:nrow(mm))
mm[ii, ] <- dd$Name %in% dd[ii, 'Name'] + 0L
mm[o, o]
For 5000 x 5000, takes less than 2 seconds on my crummy laptop
dd <- data.frame(Name = sample(LETTERS, 5000, replace = TRUE), Value = 1:5000)
o <- order(dd$Value)
system.time({
oo <- sapply(1:nrow(dd), function(x) dd$Name %in% dd[x, 'Name'] + 0L)[o, o]
})
# user system elapsed
# 1.680 0.188 1.874
system.time({
mm <- matrix(nrow = nrow(dd), ncol = nrow(dd))
for (ii in 1:nrow(mm))
mm[ii, ] <- dd$Name %in% dd[ii, 'Name'] + 0L
mm[o, o]
})
# user system elapsed
# 1.918 0.152 2.073

combine tables into a data frame

How do I turn a list of tables into a data frame?
I have:
> (tabs <- list(table(c('a','a','b')),table(c('c','c','b')),table(c()),table(c('b','b'))))
[[1]]
a b
2 1
[[2]]
b c
1 2
[[3]]
< table of extent 0 >
[[4]]
b
2
I want:
> data.frame(a=c(2,0,0),b=c(1,1,2),c=c(0,2,0))
a b c
1 2 1 0
2 0 1 2
3 0 0 0
4 0 2 0
PS. Please do not assume that the tables were created by table calls! They were not!
c_names <- unique(unlist(sapply(tabs, names)))
df <- do.call(rbind, lapply(tabs, `[`, c_names))
colnames(df) <- c_names
df[is.na(df)] <- 0
This assumes the tables are one dimensional.
all.names <- unique(unlist(lapply(tabs, names)))
df <- as.data.frame(do.call(rbind,
lapply(
tabs, function(x) as.list(replace(c(x)[all.names], is.na(c(x)[all.names]), 0))
) ) )
names(df) <- all.names
df
There is probably a cleaner way to do this.
# a b c
# 1 2 1 0
# 2 0 1 2
# 3 0 0 0
# 4 0 2 0
tabs <- list(table(c('a','a','b')),table(c('c','c','b')),table(c()),table(c('b','b')))
dat.names <- unique(unlist(sapply(tabs, names)))
dat <- matrix(0, nrow = length(tabs), ncol = length(dat.names))
colnames(dat) <- dat.names
for (ii in 1:length(tabs)) {
dat[ii, ] <- tabs[[ii]][match(colnames(dat), names(tabs[[ii]]) )]
}
dat[is.na(dat)] <- 0
> dat
a b c
[1,] 2 1 0
[2,] 0 1 2
[3,] 0 0 0
[4,] 0 2 0
Here is a pretty clean approach:
library(reshape2)
newTabs <- melt(tabs)
newTabs
# Var1 value L1
# 1 a 2 1
# 2 b 1 1
# 3 b 1 2
# 4 c 2 2
# 5 b 2 4
newTabs$L1 <- factor(newTabs$L1, seq_along(tabs))
dcast(newTabs, L1 ~ Var1, fill = 0, drop = FALSE)
# L1 a b c
# 1 1 2 1 0
# 2 2 0 1 2
# 3 3 0 0 0
# 4 4 0 2 0
This makes use of the fact that there is a melt method for lists (see reshape2:::melt.list) which automatically adds in a variable (L1 for an unnested list) that identifies the index of the list element. Since your list has some items which are empty, they won't show up in your melted list, so you need to factor the "L1" column, specifying the levels you want. dcast takes care of restructuring your output and allows you to specify the desired fill value.

Resources