subsetting rows from a list using another list in R - r

I'm trying to subset number of rows in a list using R.
I have 2 lists one has matrix with n rows and p columns the second list has the number of rows that I need to subset.
mat <- list(a = matrix(rnorm(8*4),8), b = matrix(rnorm(15*4),15), c = matrix(rnorm(7*4),7))
rw <- list(a = 6, b = 7, c = 4)
Both list have common names, in the above example, I would like to retain for element a first 6 rows, for b first 7 rows and c 4 rows.
How would you do that in R

One solution with Map:
Map(function(x, y) x[1:y, ], mat, rw)
# $a
# [,1] [,2] [,3] [,4]
# [1,] 1.3331549 -0.6985623 -1.1842788 -0.1496880
# [2,] 0.2096395 -0.2901906 0.4210395 0.9116542
# [3,] 0.1763317 1.3858205 -1.1567526 -1.1794618
# [4,] 1.3596395 0.5815012 -0.3681799 -0.6569447
# [5,] 0.2251352 0.2331387 -1.2509844 -1.1346729
# [6,] 0.6796729 1.1274772 0.3992489 0.2305927
#
# $b
# [,1] [,2] [,3] [,4]
# [1,] 0.30700748 -1.2173855 -0.3377885 -0.6748974
# [2,] 1.09506443 -0.6142685 -1.1301122 -0.7792081
# [3,] -0.61049306 -1.3414474 0.9771373 1.0191636
# [4,] 0.66687294 -0.5269721 0.9971987 -0.6514121
# [5,] 0.54623236 0.9020964 0.3252700 -0.3925129
# [6,] -0.04848903 -0.5204047 0.3344675 -0.3232105
# [7,] -0.56502719 -0.3743275 2.1760364 -0.2941956
#
# $c
# [,1] [,2] [,3] [,4]
# [1,] -0.3225609 -0.40126955 -1.787255 -1.5005721
# [2,] 0.3474430 -1.16657015 1.106033 0.3114282
# [3,] 0.4099467 -0.04353555 0.838330 0.3282246
# [4,] -1.4648740 0.51279791 0.198768 -0.3394502

Related

Change elements in one matrix based on positions given by another matrix in R

Let's say I have a symmetric matrix A, for example:
> A <- matrix(runif(16),nrow = 4,byrow = T)
> ind <- lower.tri(A)
> A[ind] <- t(A)[ind]
> A
[,1] [,2] [,3] [,4]
[1,] 0.4212778 0.6874073 0.1551896 0.46757640
[2,] 0.6874073 0.5610995 0.1779030 0.54072946
[3,] 0.1551896 0.1779030 0.9515304 0.79429777
[4,] 0.4675764 0.5407295 0.7942978 0.01206526
I also have a 4 x 3 matrix B that gives specific positions of matrix A, for example:
> B<-matrix(c(1,2,4,2,1,3,3,2,4,4,1,3),nrow=4,byrow = T)
> B
[,1] [,2] [,3]
[1,] 1 2 4
[2,] 2 1 3
[3,] 3 2 4
[4,] 4 1 3
The B matrix represents the following positions of A: (1,1), (1,2), (1,4), (2,2), (2,1), (2,3), (3,3), (3,2), (3,4), (4,4), (4,1), (4,3).
I want to change the values of A that are NOT in the positions given by B, replacing them by Inf. The result I want is:
[,1] [,2] [,3] [,4]
[1,] 0.4212778 0.6874073 Inf 0.46757640
[2,] 0.6874073 0.5610995 0.1779030 Inf
[3,] Inf 0.1779030 0.9515304 0.79429777
[4,] 0.4675764 Inf 0.7942978 0.01206526
How can I do that quickly avoiding a for loop (which I'm able to code)? I've seen many similar posts, but no one gave me what I want. Thank you!
You want to do something like matrix subsetting (e.g., P[Q]) except that you can't use negative indexing in matrix subsetting (e.g., P[-Q] is not allowed). Here's a work-around.
Store the elements you want to retain from A in a 2-column matrix where each row is a coordinate of A:
Idx <- cbind(rep(1:4, each=ncol(B)), as.vector(t(B)))
Create a matrix where all values are Inf, and then overwrite the values you wanted to "keep" from A:
Res <- matrix(Inf, nrow=nrow(A), ncol=ncol(A))
Res[Idx] <- A[Idx]
Result
Res
# [,1] [,2] [,3] [,4]
#[1,] 0.9043131 0.639718071 Inf 0.19158238
#[2,] 0.6397181 0.601327568 0.007363378 Inf
#[3,] Inf 0.007363378 0.752123162 0.61428003
#[4,] 0.1915824 Inf 0.614280026 0.02932679
Here is a one-liner
A[cbind(1:nrow(A), sum(c(1:ncol(A))) - rowSums(B))] <- Inf
[,1] [,2] [,3] [,4]
[1,] 0.4150663 0.23440503 Inf 0.6665222
[2,] 0.2344050 0.38736067 0.01352211 Inf
[3,] Inf 0.01352211 0.88319263 0.9942303
[4,] 0.6665222 Inf 0.99423028 0.7630221
Another way would be to identify the cells with an apply and set then to inf.
cnum <- 1:ncol(A)
A[cbind(1:nrow(A), apply(B, 1, function(x) cnum[-which(cnum %in% x)]))] <- Inf
A
# [,1] [,2] [,3] [,4]
# [1,] 0.9148060 0.9370754 Inf 0.8304476
# [2,] 0.9370754 0.5190959 0.7365883 Inf
# [3,] Inf 0.7365883 0.4577418 0.7191123
# [4,] 0.8304476 Inf 0.7191123 0.9400145
Note: set.seed(42).
A <- matrix(runif(16),nrow = 4,byrow = T)
ind <- lower.tri(A)
A[ind] <- t(A)[ind]
## >A[]
## [,1] [,2] [,3] [,4]
## [1,] 0.07317535 0.167118857 0.0597721 0.2128698
## [2,] 0.16711886 0.008661005 0.6419335 0.6114373
## [3,] 0.05977210 0.641933514 0.7269202 0.3547959
## [4,] 0.21286984 0.611437278 0.3547959 0.4927997
The first thing to notice is that the matrix B is not very helpful in its current form, because the information we need is the rows and each value in B
B<-matrix(c(1,2,4,2,1,3,3,2,4,4,1,3),nrow=4,byrow = T)
> B
## [,1] [,2] [,3]
## [1,] 1 2 4
## [2,] 2 1 3
## [3,] 3 2 4
## [4,] 4 1 3
So we can create that simply by using melt and use Var1 and value.
>melt(B)
## Var1 Var2 value
## 1 1 1 1
## 2 2 1 2
## 3 3 1 3
## 4 4 1 4
## 5 1 2 2
## 6 2 2 1
## 7 3 2 2
## 8 4 2 1
## 9 1 3 4
## 10 2 3 3
## 11 3 3 4
## 12 4 3 3
We need to replace the non existing index in A by inf. This is not easy to do directly. So an easy way out would be to create another matrix of Inf and fill the values of A according to the index of melt(B)
> C<-matrix(Inf,nrow(A),ncol(A))
idx <- as.matrix(melt(B)[,c("Var1","value")])
C[idx]<-A[idx]
> C
## [,1] [,2] [,3] [,4]
## [1,] 0.07317535 0.167118857 0.0597721 0.2128698
## [2,] 0.16711886 0.008661005 0.6419335 Inf
## [3,] Inf 0.641933514 0.7269202 0.3547959
## [4,] 0.21286984 Inf 0.3547959 0.4927997
Another approach that accomplishes matrix subsetting (e.g., P[Q]) would be to create the index Q manually. Here's one approach.
Figure out which column index is "missing" from each row of B:
col_idx <- apply(B, 1, function(x) (1:nrow(A))[-match(x, 1:nrow(A))])
Create subsetting matrix Q
Idx <- cbind(1:nrow(A), col_idx)
Do the replacement
A[Idx] <- Inf
Of course, you can make this a one-liner if you really want to:
A[cbind(1:nrow(A), apply(B, 1, function(x) (1:nrow(A))[-match(x, 1:nrow(A))])]

R - Dividing columns of matrix list by vector list

I have a list of matrices and a list of vectors, and I want to divide the columns of each matrix with the corresponding vector element.
For example, given
set.seed(230)
data <- list(cbind(c(NA, rnorm(6)),c(rnorm(6),NA)), cbind(runif(7), runif(7)))
divisors <- list(c(0.5,2), c(3,4))
I'm looking for a vectorized function that produces output that looks the same as
for(i in 1:length(data)){
for(j in 1:ncol(data[[i]])){data[[i]][,j] <- data[[i]][,j] / divisors[[i]][j]}
}
i.e.
[[1]]
[,1] [,2]
[1,] NA 0.28265752
[2,] -0.46967014 -0.07132588
[3,] 0.20253439 -0.37432527
[4,] 0.65736410 0.06630705
[5,] 0.72349294 0.67202129
[6,] 0.88532648 -0.80892508
[7,] 0.08162027 NA
[[2]]
[,1] [,2]
[1,] 0.26597435 0.18120979
[2,] 0.31213250 0.16493883
[3,] 0.19250804 0.14104145
[4,] 0.21196882 0.10172964
[5,] 0.10389773 0.04979742
[6,] 0.02754329 0.15064043
[7,] 0.25771766 0.23042586
The closest I have been able to come is
Map(`/`, data, divisors)
But that divides rows (rather than columns) of the matrix by the vector. Any help appreciated.
Transpose your matrices before and after:
lapply(Map(`/`, lapply(data, t), divisors), t)
# [[1]]
# [,1] [,2]
# [1,] NA 0.28265752
# [2,] -0.46967014 -0.07132588
# [3,] 0.20253439 -0.37432527
# [4,] 0.65736410 0.06630705
# [5,] 0.72349294 0.67202129
# [6,] 0.88532648 -0.80892508
# [7,] 0.08162027 NA
#
# [[2]]
# [,1] [,2]
# [1,] 0.26597435 0.18120979
# [2,] 0.31213250 0.16493883
# [3,] 0.19250804 0.14104145
# [4,] 0.21196882 0.10172964
# [5,] 0.10389773 0.04979742
# [6,] 0.02754329 0.15064043
# [7,] 0.25771766 0.23042586
I prefer the transpose approach above, but another option is to expand your divisor vectors into matrices of the same dimensions as in data:
div_mat = Map(matrix, data = divisors, nrow = sapply(data, nrow), ncol = 2, byrow = T)
Map("/", data, div_mat)

Initializing a matrix in R

I want to initialise a matrix with randomly generated numbers such that the sum of numbers in a row/column is 1 in 1 go.Both do not need to be 1 simultaneously i.e. either row sum is 1 or column sum is 1
For sum of rows = 1 you could try something like:
num_rows <- 5
num_cols <- 5
random_uniform_matrix <- matrix(runif(num_rows * num_cols), nrow = num_rows, ncol = num_cols)
random_uniform_matrix_normalised <- random_uniform_matrix / rowSums(random_uniform_matrix)
random_uniform_matrix_normalised
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0.23587728 0.09577532 0.28102271 0.03763127 0.34969342
# [2,] 0.07252286 0.42979916 0.19738456 0.19545165 0.10484177
# [3,] 0.12868304 0.30537875 0.08245634 0.26911364 0.21436823
# [4,] 0.31938540 0.37610285 0.18834984 0.10297283 0.01318908
# [5,] 0.10775810 0.09167090 0.54077248 0.16717661 0.09262190

Match two lists of matrices (row-wisely) in R

a and b are two lists in which the object number are the same. The objects containing in both a and b are matrix".
a<-list(matrix(c(1,2,2),1,3),matrix(c(2,1,1,1,2,2),2,3),matrix(,0,3))
b<-list(matrix(c(2,2,2),1,3),matrix(c(1,1,2),1,3),matrix(c(1,2,1),1,3))
> a
[[1]]
[,1] [,2] [,3]
[1,] 1 2 2
[[2]]
[,1] [,2] [,3]
[1,] 2 1 2
[2,] 1 1 2
[[3]]
[,1] [,2] [,3]
> b
[[1]]
[,1] [,2] [,3]
[1,] 2 2 2
[[2]]
[,1] [,2] [,3]
[1,] 1 1 2
[[3]]
[,1] [,2] [,3]
[1,] 1 2 1
From above, we can see there are 3 objects (i.e. matrix) in a and b. The row number in each matrix in list a varies, while the row number in each matrix of list b are the same (nrow=1). I want to compare each row of a with the corresponding row in b, and to calculate how many positions (in respective columns) are the same.
Let's take the second object in both a and b for an example. In a, object 2 is a matrix (ncol=3, nrow=2), we need to compare each row with the second matrix in b. The first row in the second object of a is 2 1 2, we can see that the second and third numbers are the same with the second object in b (1 1 2), so the output is 2, then we continually use the second row (1 1 2) for comparison, we find that the three number are all the same with the object 2 in list b, so the output is 3.
The expected result is as follows:
[[1]]
[1] 2
[[2]]
[1] 2 3
[[3]]
[1] 0
I used the following code to calculate:
Map(function(a,b) matrix(sapply(1:3, function(x) {a[,x]==b[,x]}),ncol=3),a,b)
[[1]]
[,1] [,2] [,3]
[1,] FALSE TRUE TRUE
[[2]]
[,1] [,2] [,3]
[1,] FALSE TRUE TRUE
[2,] TRUE TRUE TRUE
[[3]]
[,1] [,2] [,3]
[1,] Logical,0 Logical,0 Logical,0
But I don't know how to go on to sum the result, since there is Logical,0 in the third object. Could you help me with this? Thank you. Or whether there are other ways to solve this problem. Thanks!
Here's an option:
Map(function(x,y) apply(x, 1, function(z) rowSums(z == y)), a, b)
#[[1]]
#[1] 2
#
#[[2]]
#[1] 2 3
#
#[[3]]
#numeric(0)

Pairwise Lepage statistic between columns

I wonder how to compute pairwise Lepage statistic between columns on data like:
> cbind(v1=rnorm(10), v2=rnorm(10), v3=rnorm(10), v4=rnorm(10))
v1 v2 v3 v4
[1,] -2.47148729 0.61727115 1.28285770 0.72974010
[2,] 0.42657513 0.77615280 1.88207246 0.41295301
[3,] -0.32480814 -1.75461602 -0.16589154 -0.52731722
[4,] 0.02760296 -2.08827618 -0.47176830 -0.17416765
[5,] -0.52760532 -0.20514629 0.15589594 -0.54623986
[6,] -0.47143259 -0.56666084 -1.35046101 -0.92754741
[7,] 0.61071291 -1.65132215 1.61024187 0.83128254
[8,] -0.17746888 -1.09887111 -0.32012303 0.69382341
[9,] -0.38707069 -0.69628506 0.04597653 0.13479181
[10,] 0.52030680 1.11764587 -1.10243994 -0.83949756
I'm thinking of having something like:
v1.v1 v1.v2 v1.v3 v1.v4 ... v4.v4
[1,] 0 1 2 5 ... 0
Like what cor(x) does when x is a matrix. I guess dplyr might be an answer? Or there is a multisample version pLepage()?
Consider using base R's sapply. Not familiar with LePage test in R, but using correlation and your example data:
rdmatrix <- cbind(v1=rnorm(10), v2=rnorm(10), v3=rnorm(10), v4=rnorm(10))
corrmatrix <- sapply(1:ncol(rdmatrix),
function(x,y) cor(rdmatrix[,x], rdmatrix[,y]), 1:ncol(rdmatrix))
# [,1] [,2] [,3] [,4]
# [1,] 1.0000000 -0.4613219 -0.5661391 -0.1703655
# [2,] -0.4613219 1.0000000 0.1965278 0.2111900
# [3,] -0.5661391 0.1965278 1.0000000 -0.3305471
# [4,] -0.1703655 0.2111900 -0.3305471 1.0000000
To flatten it out in a matrix of one row, consider the below using outer() for all combination set of column names and do.call(cbind, ...) to flatten:
# MATRIX OF ALL COLS PAIRINGS
cols <- outer(colnames(rdmatrix), colnames(rdmatrix),
function(y,x) paste0(x, '.', y)) # NOTICE INVERSION OF X AND Y
# FLATTEN COL NAMES
cols <- do.call(cbind, as.list(cols))
# FLATTEN CORR MATRIX DATA
finalmatrix <- do.call(cbind, as.list(corrmatrix))
# NAME MATRIX COLUMNS
colnames(finalmatrix) <- cols[1,]
# v1.v1 v1.v2 v1.v3 v1.v4
# [1,] 1 -0.4613219 -0.5661391 -0.1703655
# v2.v1 v2.v2 v2.v3 v2.v4
# [1,] -0.4613219 1 0.1965278 0.21119
# v3.v1 v3.v2 v3.v3 v3.v4
# [1,] -0.5661391 0.1965278 1 -0.3305471
# v4.v1 v4.v2 v4.v3 v4.v4
# [1,] -0.1703655 0.21119 -0.3305471 1

Resources