Shifting rows in R - r

My data is as follows:
1 2 3 4 5
0 1 2 3 4
0 0 1 2 3
0 0 0 0 1
0 0 0 0 1
How can I make the data so that it will look like this:
1 2 3 4 5
1 2 3 4 0
1 2 3 0 0
0 1 0 0 0
1 0 0 0 0
So that the first row don't shift, the second row shifted left by 1, third row shifted left by 2, fourth row shifted left by 3, and last row shifted left by 4?
I tried to at first shift all the rows below the first row to the left by 1, but apparently, it doesn't work.
nc <- ncol(df)
df[-(1), 2:nc] <- df[-(1), 2:(nc+1)]
df[-(1), 10] <- 0
df

You can use the shift function from data.table with fill = 0. If you want the output as a data.frame, put data.frame() around the last line.
mat <- as.matrix(df)
library(data.table)
t(sapply(seq(nrow(mat)), function(i) shift(mat[i,], i - 1, 'lead', fill = 0)))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 2 3 4 5
# [2,] 1 2 3 4 0
# [3,] 1 2 3 0 0
# [4,] 0 1 0 0 0
# [5,] 1 0 0 0 0

A base R option:
m <- as.matrix(read.table(text = "1 2 3 4 5
0 1 2 3 4
0 0 1 2 3
0 0 0 0 1
0 0 0 0 1"))
do.call(rbind, lapply(seq_along(1:nrow(m)),
function(i) {c(m[i, i:ncol(m)], rep(0, i-1))}))
# V1 V2 V3 V4 V5
#[1,] 1 2 3 4 5
#[2,] 1 2 3 4 0
#[3,] 1 2 3 0 0
#[4,] 0 1 0 0 0
#[5,] 1 0 0 0 0

Related

Count number of pairs across elements in a list in R?

Similar questions have been asked about counting pairs, however none seem to be specifically useful for what I'm trying to do.
What I want is to count the number of pairs across multiple list elements and turn it into a matrix. For example, if I have a list like so:
myList <- list(
a = c(2,4,6),
b = c(1,2,3,4),
c = c(1,2,5,7),
d = c(1,2,4,5,8)
)
We can see that the pair 1:2 appears 3 times (once each in a, b, and c). The pair 1:3 appears only once in b. The pair 1:4 appears 2 times (once each in b and d)... etc.
I would like to count the number of times a pair appears and then turn it into a symmetrical matrix. For example, my desired output would look something like the matrix I created manually (where each element of the matrix is the total count for that pair of values):
> myMatrix
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 0 3 1 2 2 0 1 1
[2,] 3 0 1 3 2 1 1 1
[3,] 1 1 0 1 0 0 0 0
[4,] 2 3 1 0 0 0 0 1
[5,] 2 2 0 0 0 0 1 1
[6,] 0 1 0 0 0 0 0 0
[7,] 1 1 0 0 1 0 0 0
[8,] 1 1 0 1 1 0 0 0
Any suggestions are greatly appreciated
Inspired by #akrun's answer, I think you can use a crossproduct to get this very quickly and simply:
out <- tcrossprod(table(stack(myList)))
diag(out) <- 0
# values
#values 1 2 3 4 5 6 7 8
# 1 0 3 1 2 2 0 1 1
# 2 3 0 1 3 2 1 1 1
# 3 1 1 0 1 0 0 0 0
# 4 2 3 1 0 1 1 0 1
# 5 2 2 0 1 0 0 1 1
# 6 0 1 0 1 0 0 0 0
# 7 1 1 0 0 1 0 0 0
# 8 1 1 0 1 1 0 0 0
Original answer:
Use combn to get the combinations, as well as reversing each combination.
Then convert to a data.frame and table the results.
tab <- lapply(myList, \(x) combn(x, m=2, FUN=\(cm) rbind(cm, rev(cm)), simplify=FALSE))
tab <- data.frame(do.call(rbind, unlist(tab, rec=FALSE)))
table(tab)
# X2
#X1 1 2 3 4 5 6 7 8
# 1 0 3 1 2 2 0 1 1
# 2 3 0 1 3 2 1 1 1
# 3 1 1 0 1 0 0 0 0
# 4 2 3 1 0 1 1 0 1
# 5 2 2 0 1 0 0 1 1
# 6 0 1 0 1 0 0 0 0
# 7 1 1 0 0 1 0 0 0
# 8 1 1 0 1 1 0 0 0
We could loop over the list, get the pairwise combinations with combn, stack it to a two column dataset, convert the 'values' column to factor with levels specified as 1 to 8, get the frequency count (table), do a cross product (crossprod), convert the output back to logical, and then Reduce the list elements by adding elementwise and finally assign the diagonal elements to 0. (If needed set the names attributes of dimnames to NULL
out <- Reduce(`+`, lapply(myList, function(x)
crossprod(table(transform(stack(setNames(
combn(x,
2, simplify = FALSE), combn(x, 2, paste, collapse="_"))),
values = factor(values, levels = 1:8))[2:1]))> 0))
diag(out) <- 0
names(dimnames(out)) <- NULL
-output
> out
1 2 3 4 5 6 7 8
1 0 3 1 2 2 0 1 1
2 3 0 1 3 2 1 1 1
3 1 1 0 1 0 0 0 0
4 2 3 1 0 1 1 0 1
5 2 2 0 1 0 0 1 1
6 0 1 0 1 0 0 0 0
7 1 1 0 0 1 0 0 0
8 1 1 0 1 1 0 0 0
I thought of a solution based on #TarJae answer, is not a elegant one, but it was a fun challenge!
Libraries
library(tidyverse)
Code
map_df(myList,function(x) as_tibble(t(combn(x,2)))) %>%
count(V1,V2) %>%
{. -> temp_df} %>%
bind_rows(
temp_df %>%
rename(V2 = V1, V1 = V2)
) %>%
full_join(
expand_grid(V1 = 1:8,V2 = 1:8)
) %>%
replace_na(replace = list(n = 0)) %>%
arrange(V2,V1) %>%
pivot_wider(names_from = V1,values_from = n) %>%
as.matrix()
Output
V2 1 2 3 4 5 6 7 8
[1,] 1 0 3 1 2 2 0 1 1
[2,] 2 3 0 1 3 2 1 1 1
[3,] 3 1 1 0 1 0 0 0 0
[4,] 4 2 3 1 0 1 1 0 1
[5,] 5 2 2 0 1 0 0 1 1
[6,] 6 0 1 0 1 0 0 0 0
[7,] 7 1 1 0 0 1 0 0 0
[8,] 8 1 1 0 1 1 0 0 0
First identify the possible combination of each vector from the list to a tibble then I bind them to one tibble and count the combinations.
library(tidyverse)
a <- as_tibble(t(combn(myList[[1]],2)))
b <- as_tibble(t(combn(myList[[2]],2)))
c <- as_tibble(t(combn(myList[[3]],2)))
d <- as_tibble(t(combn(myList[[4]],2)))
bind_rows(a,b,c,d) %>%
count(V1, V2)
V1 V2 n
<dbl> <dbl> <int>
1 1 2 3
2 1 3 1
3 1 4 2
4 1 5 2
5 1 7 1
6 1 8 1
7 2 3 1
8 2 4 3
9 2 5 2
10 2 6 1
11 2 7 1
12 2 8 1
13 3 4 1
14 4 5 1
15 4 6 1
16 4 8 1
17 5 7 1
18 5 8 1

How to find number of times a change happens in a matrix, using R

Let's say I have a matrix
>tmp
[,1] [,2] [,3]
[1,] 0 0 3
[2,] 0 2 0
[3,] 1 0 0
[4,] 1 0 0
[5,] 0 2 0
[6,] 1 0 0
[7,] 0 0 3
[8,] 0 0 3
[9,] 0 2 0
I now want to count number of changes in the matrix, so let's say in the first row I have a 3, then it changes to 2 in the next row and so on. I want to add these changes to a table like this:
1 2 3
1 1 1 1
2 2 0 0
3 0 2 1
So it says that 1 changes to 1, 1 time. 1 changes to 2, 1 time. 2 changes to 1, 2 times and so on. I have tried thinking about it for some time, but i can't figure out a smart method. I was thinking of using the function table() in R, but i am not sure how to. Does anyone have a smart solution to this problem?
Thanks!
t2 = as.vector(t(tmp))
t2 = t2[t2 != 0]
trans = data.frame(from = t2[-length(t2)], to = t2[-1])
with(trans, table(from, to))
# to
# from 1 2 3
# 1 1 1 1
# 2 2 0 0
# 3 0 2 1
You could, of course, skip the data frame entirely and jump to table(from = t2[-length(t2)], to = t2[-1]).
Using this data:
tmp = as.matrix(read.table(text = " 0 0 3
0 2 0
1 0 0
1 0 0
0 2 0
1 0 0
0 0 3
0 0 3
0 2 0"))
library(zoo)
library(magrittr)
tmp %>%
apply(1, function(x) x[x!=0]) %>% # Get non-zero element from each row
rollapplyr(2, I) %>% # Make matrix whose rows are all 2-windows of above
{table(from = .[,1], to = .[,2])} # make into table
# to
# from 1 2 3
# 1 1 1 1
# 2 2 0 0
# 3 0 2 1
Data used
tmp <- data.table::fread("
a b c d
[1,] 0 0 3
[2,] 0 2 0
[3,] 1 0 0
[4,] 1 0 0
[5,] 0 2 0
[6,] 1 0 0
[7,] 0 0 3
[8,] 0 0 3
[9,] 0 2 0
")[, -'a']
tmp <- as.matrix(tmp)

Is there a way to generate a matrix in R of 0's and 1's to satisfy specific row and column totals?

I want to generate a 7 column by 10 row matrix with a total of exactly 20 randomly generated 1's, but with at least two 1's per row and two 1's per column. How could I do that?
Also, how would the code be different if I wanted to set a range of acceptable row and column totals instead of minimums?
Thanks!
I feel like there should be a more elegant solution, but here's a ball of duct tape:
matbuilder <- function(n,nrow,ncol) {
finished <- F
while(!finished) {
trial <- matrix(sample(c(rep(1,n),rep(0,nrow*ncol-n))),nrow=nrow,ncol=ncol)
if(all(rowSums(trial)>=2 & all(colSums(trial)>=2))) finished <- T
}
return(trial)
}
x <- matbuilder(20, 10, 7)
x
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 1 0 0 0 0 0
## [2,] 0 0 0 0 1 0 1
## [3,] 0 1 0 0 0 1 0
## [4,] 1 0 0 0 1 0 0
## [5,] 0 1 0 0 0 1 0
## [6,] 0 0 1 1 0 0 0
## [7,] 0 0 1 1 0 0 0
## [8,] 0 0 0 0 1 0 1
## [9,] 0 0 0 0 0 1 1
## [10,] 0 1 1 0 0 0 0
sum(x)
## [1] 20
rowSums(x)
## [1] 2 2 2 2 2 2 2 2 2 2
colSums(x)
## [1] 2 4 3 2 3 3 3
Or, to give a range of acceptable row/column totals...
matbuilder <- function(n,nrow,ncol,rowmin,rowmax,colmin,colmax,ntimeout=100000) {
finished <- F
i <- 1
trial <- NA
while(!finished) {
trial <- matrix(sample(c(rep(1,n),rep(0,nrow*ncol-n))),nrow=nrow,ncol=ncol)
if(all(rowSums(trial)>=rowmin) & all(rowSums(trial)<=rowmax) & all(colSums(trial)>=colmin) & all(colSums(trial)<=colmax)) finished <- T
i <- i+1
if(i>ntimeout) {
finished <- T
cat("sorry boss, timeout.")
}
}
return(trial)
}
x <- matbuilder(25,10,7,rowmin=2,rowmax=3,colmin=2,colmax=4)
x
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 0 0 0 0 1 1
## [2,] 0 1 1 0 0 0 1
## [3,] 1 0 0 0 1 0 0
## [4,] 1 0 1 1 0 0 0
## [5,] 1 0 0 0 0 1 1
## [6,] 0 1 1 1 0 0 0
## [7,] 0 0 0 1 0 0 1
## [8,] 0 0 1 0 1 0 0
## [9,] 0 0 0 1 1 0 0
## [10,] 0 0 0 0 1 1 0
sum(x)
## [1] 25
rowSums(x)
## [1] 3 3 2 3 3 3 2 2 2 2
colSums(x)
## [1] 4 2 4 4 4 3 4
This one involves sampling a matrix of indices such that each row is repeated twice and columns are repeated at least 2 times.
set.seed(42)
m = matrix(rep(0, 70), nrow = 10)
#Sample rows 1-10 twice
rows = sample(c(1:10, 1:10))
#Sample columns 1-7 twice and additional 6 to make 20
columns = sample(c(sample(1:7, 6, replace = TRUE), 1:7, 1:7))
#Create a matrix of indices that should be 1
inds = cbind(rows, columns)
#Remove duplicates in inds if any (Refer: https://stackoverflow.com/q/44555420/7128934)
inds[,2] = replace(x = inds[,2],
list = duplicated(inds),
values = sample(x = columns[!(columns %in% inds[,2][duplicated(inds)])],
size = 1))
m[inds] = 1
#Check
rowSums(m)
#[1] 2 2 2 2 2 2 2 2 2 2
colSums(m)
#[1] 4 2 2 3 2 2 5
sum(m)
#[1] 20

R: Matrix counting matches when 2 teams interacted from schedule with 3 participants per match

I'd like to make some calculations on FIRST robotics teams and need to build, for lack of better words, a binary interaction matrix. That is when two teams were on the same alliance. Each alliance has three teams, so there are 7 values from each match added to the matrix, when considering (i,j), (j,i), and (i,i).
The full data I'm using is here: http://frc-events.firstinspires.org/2016/MOKC/qualifications
But for simplicity, here is an example of 9 teams playing 1 match each.
> data.frame(Team.1=1:3,Team.2=4:6,Team.3=7:9)
Team.1 Team.2 Team.3
1 1 4 7
2 2 5 8
3 3 6 9
The matrix should count each binary interaction, (1,4),(4,7),(3,6),(6,3),(9,9), etc, and will be an N x N matrix, where in the above example N=9. Here's the matrix that represents the above lists:
> matrix(data=c(1,0,0,1,0,0,1,0,0,+
+ 0,1,0,0,1,0,0,1,0,+
+ 0,0,1,0,0,1,0,0,1,+
+ 1,0,0,1,0,0,1,0,0,+
+ 0,1,0,0,1,0,0,1,0,+
+ 0,0,1,0,0,1,0,0,1,+
+ 1,0,0,1,0,0,1,0,0,+
+ 0,1,0,0,1,0,0,1,0,+
+ 0,0,1,0,0,1,0,0,1),9,9)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] 1 0 0 1 0 0 1 0 0
[2,] 0 1 0 0 1 0 0 1 0
[3,] 0 0 1 0 0 1 0 0 1
[4,] 1 0 0 1 0 0 1 0 0
[5,] 0 1 0 0 1 0 0 1 0
[6,] 0 0 1 0 0 1 0 0 1
[7,] 1 0 0 1 0 0 1 0 0
[8,] 0 1 0 0 1 0 0 1 0
[9,] 0 0 1 0 0 1 0 0 1
In the real data, the team number are not sequential, and are would be more like 5732,1345,3451,etc, and there are more matches per team meaning the matrix values would be between 0 and max number of matches any of the teams played. This can be seen in the real data.
Thanks to anyone that can help.
There is probably a more elegant approach, but here is one using data.table.
library(data.table)
dat <- data.table(Team.1=1:3,Team.2=4:6,Team.3=7:9)
#add match ID
dat[,match:=1:.N]
#turn to long
mdat <- melt(dat,id="match",value.name="team")[,variable:=NULL]
#merge with itself
dat2 <- merge(mdat, mdat, by=c("match"),all=T, allow.cartesian = T)
# reshape
dcast(dat2, team.x~team.y, fun.agg=length)
team.x 1 2 3 4 5 6 7 8 9
1: 1 1 0 0 1 0 0 1 0 0
2: 2 0 1 0 0 1 0 0 1 0
3: 3 0 0 1 0 0 1 0 0 1
4: 4 1 0 0 1 0 0 1 0 0
5: 5 0 1 0 0 1 0 0 1 0
6: 6 0 0 1 0 0 1 0 0 1
7: 7 1 0 0 1 0 0 1 0 0
8: 8 0 1 0 0 1 0 0 1 0
9: 9 0 0 1 0 0 1 0 0 1
And, because I can, one in base-R. A case where I think the use of a for-loop is justified (as you keep modifying the same object).
#make matrix to put results in
nteams = length(unique(unlist(dat)))
res <- matrix(0,nrow=nteams, ncol=nteams)
#split data by row, generate combinations for each row and add to matrix
for(i in 1:nrow(dat)){
x=unlist(dat[i,])
coords=as.matrix(expand.grid(x,x))
res[coords] <- res[coords]+1
}
Here is my suggestion with base functions. I tried to create a matrix. My approach was to look for the position indexes for 1.
library(magrittr)
mydf <- data.frame(Team.1 = 1:3, Team.2 = 4:6,Team.3 = 7:9)
### Create a matrix with position indexes
lapply(1:nrow(mydf), function(x){
a <- t(combn(mydf[x, ], 2)) # Get some combination
b <- a[, 2:1] # Get other combination by reversing columns
foo <- rbind(a, b)
foo
}) %>%
do.call(rbind, .) -> ana
ana <- matrix(unlist(ana), nrow = nrow(ana))
### Another set: Get indexes for self (e.g., (1,1), (2,2), (3,3))
foo <- rep(1:max(mydf), times = 2)
matrix(foo, nrow = length(foo) / 2) -> bob
### A matric with all position indexes
cammy <- rbind(ana, bob)
### Create a plain matrix
mat <- matrix(0, nrow = max(mydf), ncol = max(mydf))
### Fill in the matrix with 1
mat[cammy] <- 1
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
# [1,] 1 0 0 1 0 0 1 0 0
# [2,] 0 1 0 0 1 0 0 1 0
# [3,] 0 0 1 0 0 1 0 0 1
# [4,] 1 0 0 1 0 0 1 0 0
# [5,] 0 1 0 0 1 0 0 1 0
# [6,] 0 0 1 0 0 1 0 0 1
# [7,] 1 0 0 1 0 0 1 0 0
# [8,] 0 1 0 0 1 0 0 1 0
# [9,] 0 0 1 0 0 1 0 0 1
EDIT
Here is a revised version based on the previous idea. This is not concise like Heroka's idea with base functions. In my modified data, team 1 and 4 had two matches. The idea here is that I counted how many times each pair appeared in the data set. The dplyr part is doing that. In the for loop, I filled in the matrix, mat by going through each row of cammy.
mydf <- data.frame(Team.1=c(1:3,1),Team.2=c(4:6,4),Team.3=c(7:9,5))
# Team.1 Team.2 Team.3
#1 1 4 7
#2 2 5 8
#3 3 6 9
#4 1 4 5
library(dplyr)
lapply(1:nrow(mydf), function(x){
a <- t(combn(mydf[x, ], 2)) # Get some combination
b <- a[, 2:1] # Get other combination by reversing columns
foo <- rbind(a, b)
foo
}) %>%
do.call(rbind, .) -> ana
ana <- data.frame(matrix(unlist(ana), nrow = nrow(ana)))
### Another set: Get indexes for self (e.g., (1,1), (2,2), (3,3))
foo <- rep(1:max(mydf), times = 2)
data.frame(matrix(foo, nrow = length(foo) / 2)) -> bob
cammy <- bind_rows(ana, bob) %>%
group_by(X1, X2) %>%
mutate(total = n()) %>%
as.matrix
### Create a plain matrix
mat <- matrix(0, nrow = max(mydf), ncol = max(mydf))
for(i in 1:nrow(cammy)){
mat[cammy[i, 1], cammy[i, 2]] <- cammy[i, 3]
}
print(mat)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
# [1,] 1 0 0 2 1 0 1 0 0
# [2,] 0 1 0 0 1 0 0 1 0
# [3,] 0 0 1 0 0 1 0 0 1
# [4,] 2 0 0 1 1 0 1 0 0
# [5,] 1 1 0 1 1 0 0 1 0
# [6,] 0 0 1 0 0 1 0 0 1
# [7,] 1 0 0 1 0 0 1 0 0
# [8,] 0 1 0 0 1 0 0 1 0
# [9,] 0 0 1 0 0 1 0 0 1

How to transform a directed Dataset into a Matrix with R

I have a Dataset in R which looks like this:
ID LinkedTo
1 Null
2 1
3 1
4 3
5 4
I want transform it into a Matrix which looks similar to this:
0 0 0 0 0
1 0 0 0 0
1 0 0 0 0
0 0 1 0 0
0 0 0 1 0
Another option , is to modelize your directed dataset as a directed graph and extract adjacency matrix.
library(igraph)
dat <- read.table(text='ID LinkedTo
2 1
3 1
4 3
5 4',header=TRUE)
gg <- graph.data.frame(dat)
as.matrix(get.adjacency(gg))
2 3 4 5 1
2 0 0 0 0 1
3 0 0 0 0 1
4 0 1 0 0 0
5 0 0 1 0 0
1 0 0 0 0 0
It's more convenient if you replace "Null" by NA in your dataset. Something like
i <- structure(list(ID = c(1, 2, 3, 4, 5),
LinkedTo = c(NA, 1, 1, 3, 4)),
.Names = c("ID", "LinkedTo"),
row.names = c(NA, -5L), class = "data.frame")
i
# ID LinkedTo
# 1 1 NA
# 2 2 1
# 3 3 1
# 4 4 3
# 5 5 4
Then you can do
m <- matrix(0, nrow(i), nrow(i))
m[i$ID + (i$LinkedTo - 1) * nrow(i)] <- 1
(It would work the same way if i was a matrix, but you would have to change i$ID and i$LinkedTo to i[, 1] and i[, 2] resp)
you can start by replacing the null with zeros, i think .
Then you can do a little for loop:
data.frame(id=1:5, pos=sample(1:5))->df
matrix(nrow=max(nrow(df)),ncol= max(df$id),data=0)->m
for (i in 1:nrow(df)){
m[i,df$pos[i]]<-1
}
Using #konvas i dataset
i[,2][is.na(i[,2])] <- 0
m <- matrix(0, nrow(i), nrow(i))
m[as.matrix(i)] <- 1
m
# [,1] [,2] [,3] [,4] [,5]
#[1,] 0 0 0 0 0
#[2,] 1 0 0 0 0
#[3,] 1 0 0 0 0
#[4,] 0 0 1 0 0
#[5,] 0 0 0 1 0
table should also work if you combine it with factor. (I say "should" because your conditions aren't clearly specified and your sample data are not reproducible.)
Using #konvas's "i" sample data, try:
> table(i$ID, factor(i$LinkedTo, 1:5))
1 2 3 4 5
1 0 0 0 0 0
2 1 0 0 0 0
3 1 0 0 0 0
4 0 0 1 0 0
5 0 0 0 1 0

Resources