R - Create matrix from 3 raw vector - r

I have 3 vectors as the following:
A <- c("A", "B", "C", "D", "E")
B <- c("1/1/1", "1/1/1", "2/1/1", "2/1/1", "3/1/1")
C <- c(1, 1, -1, 1, -1)
and I want to create a matrix like the following using these 3 vectors:
- 1/1/1 2/1/1 3/1/1
A 1 0 0
B 1 0 0
C 0 -1 0
D 0 1 0
E 0 0 -1
where vector A and B are rows and columns respectively and I have the data as C.
Any help would be appreciated.

Use ?xtabs
xtabs(C ~ A+B)
# B
#A 1/1/1 2/1/1 3/1/1
# A 1 0 0
# B 1 0 0
# C 0 -1 0
# D 0 1 0
# E 0 0 -1

You can try:
`[<-`(array(0,c(length(unique(A)),length(unique(B))),
list(unique(A),unique(B))),
cbind(A,B),C)
# 1/1/1 2/1/1 3/1/1
#A 1 0 0
#B 1 0 0
#C 0 -1 0
#D 0 1 0
#E 0 0 -1

Another option is acast from reshape2 after creating a data.frame
library(reshape2)
acast(data.frame(A, B, C), A~B, value.var = "C", fill =0)
# 1/1/1 2/1/1 3/1/1
#A 1 0 0
#B 1 0 0
#C 0 -1 0
#D 0 1 0
#E 0 0 -1

Related

Data frame into a symmetric matrix while keeping all row and column

The question R DataFrame into square (symmetric) matrix is about transforming a data frame into a symmetric matrix given the following example:
# data
x <- structure(c(1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),
.Dim = c(3L,5L),
.Dimnames = list(c("X", "Y", "Z"), c("A", "B", "C", "D", "E")))
x
# A B C D E
#X 1 1 0 0 0
#Y 1 1 0 0 0
#Z 0 0 0 1 0
# transformation
x <- x %*% t(x)
diag(x) <- 1
x
# X Y Z
#X 1 2 0
#Y 2 1 0
#Z 0 0 1
Now, I'm trying to keep the columns c("A", "B", "C", "D", "E") in the matrix, like this:
# X Y Z A B C D E
#X 1 0 0 1 1 0 0 0
#Y 0 1 0 1 1 0 0 0
#Z 0 0 1 0 0 0 1 0
#A 1 1 0 1 0 0 0 0
#B 1 1 0 0 1 0 0 0
#C 0 0 0 0 0 1 0 0
#D 0 0 1 0 0 0 1 0
#E 0 0 0 0 0 0 0 1
I'm pretty sure there's an easy way to do it similar to the first transformation including each case. Can anyone suggest a solution?
Thank you in advance!
This is just an augmentation, isn't it?
X <- as.matrix(x)
rbind(cbind(diag(nrow(X)), X), cbind(t(X), diag(ncol(X))))
build it step by step: thanks to #李哲源 for pointing out m <- diag(sum(dim(x))).
m <- diag(sum(dim(x)))
colnames(m) = rownames(m) = c(rownames(x),colnames(x))
m[dimnames(x)[[1]],dimnames(x)[[2]]] <- x
m[dimnames(x)[[2]],dimnames(x)[[1]]] <- t(x)
# X Y Z A B C D E
#X 1 0 0 1 1 0 0 0
#Y 0 1 0 1 1 0 0 0
#Z 0 0 1 0 0 0 1 0
#A 1 1 0 1 0 0 0 0
#B 1 1 0 0 1 0 0 0
#C 0 0 0 0 0 1 0 0
#D 0 0 1 0 0 0 1 0
#E 0 0 0 0 0 0 0 1
You can wrap it into a function and then call it:
makeSymMat <-function(x) {
x <- as.matrix(x)
m <- diag(sum(dim(x)))
colnames(m) = rownames(m) = c(rownames(x),colnames(x))
m[dimnames(x)[[1]],dimnames(x)[[2]]] <- x
m[dimnames(x)[[2]],dimnames(x)[[1]]] <- t(x)
return(m)
}
makeSymMat(x)

In an NxN matrix give value "True" to pairs from data frame

I have created a 5x5 matrix where rows and columns have the same names and a data frame with name pairs:
N <- 5
Names <- letters[1:N]
mat <- matrix(rep(0, N*N), nrow = N, ncol = N, dimnames = list(Names, Names))
a b c d e
a 0 0 0 0 0
b 0 0 0 0 0
c 0 0 0 0 0
d 0 0 0 0 0
e 0 0 0 0 0
The data frame then consist of different pairs:
col1 col2
1 a c
2 c b
3 d b
4 d e
How can I match these in so that col1 only refers to rows in my matrix and col2 only to columns? The above should compute to the following result:
a b c d e
a 0 0 1 0 0
b 0 0 0 0 0
c 0 1 0 0 0
d 0 1 0 0 1
e 0 0 0 0 0
You can use match to create a "key" of which combinations need to be replaced with 1, like this:
key <- vapply(seq_along(mydf),
function(x) match(mydf[[x]],
dimnames(mat)[[x]]),
numeric(nrow(mydf)))
Then, use matrix indexing to replace the relevant values.
mat[key] <- 1
mat
a b c d e
a 0 0 1 0 0
b 0 0 0 0 0
c 0 1 0 0 0
d 0 1 0 0 1
e 0 0 0 0 0
You could also do:
mat[as.matrix(d1)] <- 1
mat
# a b c d e
#a 0 0 1 0 0
#b 0 0 0 0 0
#c 0 1 0 0 0
#d 0 1 0 0 1
#e 0 0 0 0 0
data
d1 <- structure(list(col1 = c("a", "c", "d", "d"), col2 = c("c", "b",
"b", "e")), .Names = c("col1", "col2"), class = "data.frame",
row.names = c("1", "2", "3", "4"))

Transforming dataframe into expanded matrix in r

Say I have the following dataframe:
dfx <- data.frame(Var1=c("A", "B", "C", "D", "B", "C", "D", "C", "D", "D"),
Var2=c("E", "E", "E", "E", "A", "A", "A", "B", "B", "C"),
Var1out = c(1,-1,-1,-1,1,-1,-1,1,-1,-1),
Var2out= c(-1,1,1,1,-1,1,1,-1,1,1))
dfx
Var1 Var2 Var1out Var2out
1 A E 1 -1
2 B E -1 1
3 C E -1 1
4 D E -1 1
5 B A 1 -1
6 C A -1 1
7 D A -1 1
8 C B 1 -1
9 D B -1 1
10 D C -1 1
What you see here are 10 rows that correspond to match-ups between players A, B, C, D and E. They play each other once and the winner of each match-up is denoted by a +1 and the loser of each match-up is denoted by a -1 (put into the respective column Player Var1 result in Var1out, Player Var2 result in Var2out).
Desired output.
I wish to transform this dataframe to this output matrix (the order of rows are not important to me, but as you can see each row refers to a unique match-up):
A B C D E
1 1 0 0 0 -1
2 0 -1 0 0 1
3 0 0 -1 0 1
4 0 0 0 -1 1
5 -1 1 0 0 0
6 1 0 -1 0 0
7 1 0 0 -1 0
8 0 -1 1 0 0
9 0 1 0 -1 0
10 0 0 1 -1 0
What I've done:
I managed to make this matrix in a roundabout way. As roundabout ways tend to be slow and less satisfactory, I was wondering if anyone can spot a better way.
I first made sure that my two columns containing players had factor levels that contained every possible player that ever occurs (you'll note for instance that player E never occurs in Var1).
# Making sure Var1 and Var2 have same factor levels
levs <- unique(c(levels(dfx$Var1), levels(dfx$Var2))) #get all possible levels of factors
dfx$Var1 <- factor(dfx$Var1, levels=levs)
dfx$Var2 <- factor(dfx$Var2, levels=levs)
I next split the dataframe into two - one for Var1 and Var1out, and one for Var2 and Var2out:
library(dplyr)
temp.Var1 <- dfx %>% select(Var1, Var1out)
temp.Var2 <- dfx %>% select(Var2, Var2out)
Here I use model.matrix to expand columns by factor level:
mat.Var1<-with(temp.Var1, data.frame(model.matrix(~Var1+0)))
mat.Var2<-with(temp.Var2, data.frame(model.matrix(~Var2+0)))
I then replace for each row the column with a '1' indicating the presence of that factor, with the correct result and add these matrices:
mat1 <- apply(mat.Var1, 2, function(x) ifelse(x==1, x<-temp.Var1$Var1out, x<-0) )
mat2 <- apply(mat.Var2, 2, function(x) ifelse(x==1, x<-temp.Var2$Var2out, x<-0) )
matX <- mat1+mat2
matX
Var1A Var1B Var1C Var1D Var1E
1 1 0 0 0 -1
2 0 -1 0 0 1
3 0 0 -1 0 1
4 0 0 0 -1 1
5 -1 1 0 0 0
6 1 0 -1 0 0
7 1 0 0 -1 0
8 0 -1 1 0 0
9 0 1 0 -1 0
10 0 0 1 -1 0
Although this works, I have a sense that I am probably missing simpler solutions for this problem. Thanks.
Create an empty matrix and use matrix indexing to fill the relevant values in:
cols <- unique(unlist(dfx[1:2]))
M <- matrix(0, nrow = nrow(dfx), ncol = length(cols), dimnames = list(NULL, cols))
M[cbind(sequence(nrow(dfx)), match(dfx$Var1, cols))] <- dfx$Var1out
M[cbind(sequence(nrow(dfx)), match(dfx$Var2, cols))] <- dfx$Var2out
M
# A B C D E
# [1,] 1 0 0 0 -1
# [2,] 0 -1 0 0 1
# [3,] 0 0 -1 0 1
# [4,] 0 0 0 -1 1
# [5,] -1 1 0 0 0
# [6,] 1 0 -1 0 0
# [7,] 1 0 0 -1 0
# [8,] 0 -1 1 0 0
# [9,] 0 1 0 -1 0
# [10,] 0 0 1 -1 0
Another way is to use acast
library(reshape2)
#added `use.names=FALSE` from #Ananda Mahto's comments
dfy <- data.frame(Var=unlist(dfx[,1:2], use.names=FALSE),
VarOut=unlist(dfx[,3:4], use.names=FALSE), indx=1:nrow(dfx))
acast(dfy, indx~Var, value.var="VarOut", fill=0)
# A B C D E
#1 1 0 0 0 -1
#2 0 -1 0 0 1
#3 0 0 -1 0 1
#4 0 0 0 -1 1
#5 -1 1 0 0 0
#6 1 0 -1 0 0
#7 1 0 0 -1 0
#8 0 -1 1 0 0
#9 0 1 0 -1 0
#10 0 0 1 -1 0
Or use spread
library(tidyr)
spread(dfy,Var, VarOut , fill=0)[,-1]
# A B C D E
#1 1 0 0 0 -1
#2 0 -1 0 0 1
#3 0 0 -1 0 1
#4 0 0 0 -1 1
#5 -1 1 0 0 0
#6 1 0 -1 0 0
#7 1 0 0 -1 0
#8 0 -1 1 0 0
#9 0 1 0 -1 0
#10 0 0 1 -1 0

Delete columns from a square matrix that sum to zero along with corresponding rows

I have a binary transition matrix. I want to delete rows associated with columns that sum to zero. For example, if
A B C D E
A 0 0 0 1 0
B 1 0 0 1 0
C 0 0 1 1 0
D 0 0 1 0 0
E 0 0 1 1 0
column B and E sum to zero. I know how to get rid of the columns like this,
> a.adj=a[,!!colSums(a)]
> a.adj
A C D
A 0 0 1
B 1 0 1
C 0 1 1
D 0 1 0
E 0 1 1
but how can I at the same time delete rows B and E to get
A C D
A 0 0 1
C 0 1 1
D 0 1 0
If the rownames and colnames are in the same order
indx <- !!colSums(a)
a[indx,indx]
# A C D
#A 0 0 1
#C 0 1 1
#D 0 1 0
Use names to select both columns and rows
> ind <- colnames(a[,!!colSums(a)])
> a[ind, ind]
A C D
A 0 0 1
C 0 1 1
D 0 1 0

How to do row-wise subtraction and replace a specific number with zero?

Step 1: I have a simplified dataframe like this:
df1 = data.frame (B=c(1,0,1), C=c(1,1,0)
, D=c(1,0,1), E=c(1,1,0), F=c(0,0,1)
, G=c(0,1,0), H=c(0,0,1), I=c(0,1,0))
B C D E F G H I
1 1 1 1 1 0 0 0 0
2 0 1 0 1 0 1 0 1
3 1 0 1 0 1 0 1 0
Step 2: I want to do row wise subtraction, i.e. (row1 - row2), (row1 - row3) and (row2 - row3)
row1-row2 1 0 1 0 0 -1 0 -1
row1-row3 0 1 0 1 -1 0 -1 0
row2-row3 -1 1 -1 1 -1 1 -1 1
step 3: replace all -1 to 0
row1-row2 1 0 1 0 0 0 0 0
row1-row3 0 1 0 1 0 0 0 0
row2-row3 0 1 0 1 0 1 0 1
Could you mind to teach me how to do so?
I like using the plyr library for things like this using the combn function to generate all possible pairs of rows/columns.
require(plyr)
combos <- combn(nrow(df1), 2)
adply(combos, 2, function(x) {
out <- data.frame(df1[x[1] , ] - df1[x[2] , ])
out[out == -1] <- 0
return(out)
}
)
Results in:
X1 B C D E F G H I
1 1 1 0 1 0 0 0 0 0
2 2 0 1 0 1 0 0 0 0
3 3 0 1 0 1 0 1 0 1
If necessary, you can drop the first column, plyr spits that out automagically for you.
Similar questions:
Sum pairwise rows with R?
Chi Square Analysis using for loop in R
Compare one row to all other rows in a file using R
For the record, I would do this:
cmb <- combn(seq_len(nrow(df1)), 2)
out <- df1[cmb[1,], ] - df1[cmb[2,], ]
out[out < 0] <- 0
rownames(out) <- apply(cmb, 2,
function(x) paste("row", x[1], "-row", x[2], sep = ""))
This yields (the last line above is a bit of sugar, and may not be needed):
> out
B C D E F G H I
row1-row2 1 0 1 0 0 0 0 0
row1-row3 0 1 0 1 0 0 0 0
row2-row3 0 1 0 1 0 1 0 1
Which is fully vectorised and exploits indices to extend/extract the elements of df1 required for the row-by-row operation.
> df2 <- rbind(df1[1,]-df1[2,], df1[1,]-df1[3,], df1[2,]-df1[3,])
> df2
B C D E F G H I
1 1 0 1 0 0 -1 0 -1
2 0 1 0 1 -1 0 -1 0
21 -1 1 -1 1 -1 1 -1 1
> df2[df2==-1] <- 0
> df2
B C D E F G H I
1 1 0 1 0 0 0 0 0
2 0 1 0 1 0 0 0 0
21 0 1 0 1 0 1 0 1
If you'd like to change the name of the rows to those in your example:
> rownames(df2) <- c('row1-row2', 'row1-row3', 'row2-row3')
> df2
B C D E F G H I
row1-row2 1 0 1 0 0 0 0 0
row1-row3 0 1 0 1 0 0 0 0
row2-row3 0 1 0 1 0 1 0 1
Finally, if the number of rows is not known ahead of time, the following should do the trick:
df1 = data.frame (B=c(1,0,1), C=c(1,1,0), D=c(1,0,1), E=c(1,1,0), F=c(0,0,1), G=c(0,1,0), H=c(0,0,1), I=c(0,1,0))
n <- length(df1[,1])
ret <- data.frame()
for (i in 1:(n-1)) {
for (j in (i+1):n) {
diff <- df1[i,] - df1[j,]
rownames(diff) <- paste('row', i, '-row', j, sep='')
ret <- rbind(ret, diff)
}
}
ret[ret==-1] <- 0
print(ret)

Resources