Construct and inverse matrix from list using R - r

I have a relationship matrix generated from GCTA, that I can import into R using the following function
ReadGRMBin=function(prefix, AllN=F, size=4){
sum_i=function(i){
return(sum(1:i))
}
BinFileName=paste(prefix,".grm.bin",sep="")
NFileName=paste(prefix,".grm.N.bin",sep="")
IDFileName=paste(prefix,".grm.id",sep="")
id = read.table(IDFileName)
n=dim(id)[1]
BinFile=file(BinFileName, "rb");
grm=readBin(BinFile, n=n*(n+1)/2, what=numeric(0), size=size)
NFile=file(NFileName, "rb");
if(AllN==T){
N=readBin(NFile, n=n*(n+1)/2, what=numeric(0), size=size)
}
else N=readBin(NFile, n=1, what=numeric(0), size=size)
i=sapply(1:n, sum_i)
return(list(diag=grm[i], off=grm[-i], id=id, N=N))
}
It then lists the off diagonal and diagonal.
$ diag: num [1:850] 0.878 0.815 1.11 1.161 1.062 ...
$ off : num [1:360825] 0.0181 -0.0304 -0.0663 -0.0211 -0.0583 ...
$ n : int 850
I wish to create a grm I can inverse from this and ideally in the output row, column, value
I have tried the following code but it doesn't read the off diagonal in the correct format
m <- matrix(NA, ncol = length(grm$diag), nrow = length(grm$diag))
m[lower.tri(m)] <- grm$off
m[upper.tri(m)] <- t(m)[upper.tri(t(m))]
diag(m) <- grm$diag
m
want=cbind(which(!is.na(m),arr.ind = TRUE),na.omit(as.vector(m)))
Instead of reading the diagonal values as
2 1, 3 1, 3 2, 4 1, 4 2 etc
It is reading the diagonal going length wise as
2 1, 3 1, 4 1, 5 1, 6 1 etc
So the resulting matrix (shortened) ends up like this
[,1] [,2] [,3] [,4] [,5]
[1,] 0.87798703 0.018129893 -0.03044302 -0.066282429 -0.02106927
[2,] 0.01812989 0.814602911 0.07577287 -0.004078172 -0.03182918
[3,] -0.03044302 0.075772874 1.10976517 -0.055698857 -0.03960679
[4,] -0.06628243 -0.004078172 -0.05569886 1.160611629 -0.01021352
[5,] -0.02106927 -0.031829182 -0.03960679 -0.010213521 1.06245303
When preference is this
[,1] [,2] [,3] [,4] [,5]
[1,] 0.87798703 0.018129893 -0.03044302 -0.02106927 -0.04011643
[2,] 0.01812989 0.814602911 -0.06628243 -0.00582625 -0.06237402
[3,] -0.03044302 -0.06628243 1.10976517 0.1315616 -0.1601102
[4,] -0.02106927 -0.00582625 0.1315616 1.160611629 -0.1388046
[5,] -0.04011643 -0.06237402 -0.1601102 -0.1388046 1.06245303
If you know how to amend the above code to give the wanted format it would be much appreciated.
The end desired output would be the inverse of the matrix in long format if possible. Thanks
1 1 12456
1 2 78910
1 3 34568
1 4 68942

One simple solution is to adapt your code to fill the upper triangle before the lower (since it is the upper triangle that should be filled in column order):
grm = list(
diag = 1:5 * 11,
off = 0:9)
m <- diag(grm$diag)
m[upper.tri(m)] <- grm$off
m[lower.tri(m)] <- t(m)[lower.tri(t(m))]
# [,1] [,2] [,3] [,4] [,5]
# [1,] 11 0 1 3 6
# [2,] 0 22 2 4 7
# [3,] 1 2 33 5 8
# [4,] 3 4 5 44 9
# [5,] 6 7 8 9 55

Related

Solve linear equation system b=0

I found this code to resolve a linear equation system with b=0, but I would like to know why with the first matrix only one column is returned and with the second matrix two columns are returned.
library(MASS)
Null(t(A))
R > (A <- matrix(c(1,2,3,2,4,7), ncol = 3, byrow = TRUE))
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 2 4 7
R > Null(t(A))
[,1]
[1,] -8.944272e-01
[2,] 4.472136e-01
[3,] 7.771561e-16
R > (A <- matrix(c(1,2,3,2,4,6), ncol = 3, byrow = TRUE))
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 2 4 6
R > Null(t(A))
[,1] [,2]
[1,] -0.5345225 -0.8017837
[2,] 0.7745419 -0.3381871
[3,] -0.3381871 0.4927193
library(MASS)
A <- matrix(c(1,2,3,2,4,7), ncol = 3, byrow = T)
t(A)
#> [,1] [,2]
#> [1,] 1 2
#> [2,] 2 4
#> [3,] 3 7
B <- matrix(c(1,2,3,2,4,6), ncol = 3, byrow = T)
t(B)
#> [,1] [,2]
#> [1,] 1 2
#> [2,] 2 4
#> [3,] 3 6
From the above, you can see that in your last case, all the rows are linearly combination of one another. In your 1st case, 2 rows are linear combinations.
You have a rank of 2 vs 1 and thus answers of 2 vs 1.

extract every two elements in matrix row in r in sequence to calculate euclidean distance

How to extract every two elements in sequence in a matrix and return the result as a matrix so that I could feed the answer in a formula for calculation:
For example, I have a one row matrix with 6 columns:
[,1][,2][,3][,4][,5][,6]
[1,] 2 1 5 5 10 1
I want to extract column 1 and two in first iteration, 3 and 4 in second iteration and so on. The result has to be in the form of matrix.
[1,] 2 1
[2,] 5 5
[3,] 10 1
My original codes:
data <- matrix(c(1,1,1,2,2,1,2,2,5,5,5,6,10,1,10,2,11,1,11,2), ncol = 2)
Center Matrix:
[,1][,2][,3][,4][,5][,6]
[1,] 2 1 5 5 10 1
[2,] 1 1 2 1 10 1
[3,] 5 5 5 6 11 2
[4,] 2 2 5 5 10 1
[5,] 2 1 5 6 5 5
[6,] 2 2 5 5 11 1
[7,] 2 1 5 5 10 1
[8,] 1 1 5 6 11 1
[9,] 2 1 5 5 10 1
[10,] 5 6 11 1 10 2
objCentroidDist <- function(data, centers) {
resultMatrix <- matrix(NA, nrow=dim(data)[1], ncol=dim(centers)[1])
for(i in 1:nrow(centers)) {
resultMatrix [,i] <- sqrt(rowSums(t(t(data)-centers[i, ])^2))
}
resultMatrix
}
objCentroidDist(data,centers)
I want the Result matrix to be as per below:
[1,][,2][,3]
[1,]
[2,]
[3,]
[4,]
[5,]
[7,]
[8,]
[9,]
[10]
My concern is, how to calculate the data-centers distance if the dimensions of the data matrix are two, and centers matrix are six. (to calculate the distance from the data matrix and every two columns in centers matrix). Each row of the centers matrix has three centers.
Something like this maybe?
m <- matrix(c(2,1,5,5,10,1), ncol = 6)
list.seq.pairs <- lapply(seq(1, ncol(m), 2), function(x) {
m[,c(x, x+1)]
})
> list.seq.pairs
[[1]]
[1] 2 1
[[2]]
[1] 5 5
[[3]]
[1] 10 1
And, in case you're wanting to iterate over multiple rows in a matrix,
you can expand on the above like this:
mm <- matrix(1:18, ncol = 6, byrow = TRUE)
apply(mm, 1, function(x) {
lapply(seq(1, length(x), 2), function(y) {
x[c(y, y+1)]
})
})
EDIT:
I'm really not sure what you're after exactly. I think, if you want each row transformed into a 2 x 3 matrix:
mm <- matrix(1:18, ncol = 6, byrow = TRUE)
list.mats <- lapply(1:nrow(mm), function(x){
a = matrix(mm[x,], ncol = 2, byrow = TRUE)
})
> list.mats
[[1]]
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
[[2]]
[,1] [,2]
[1,] 7 8
[2,] 9 10
[3,] 11 12
[[3]]
[,1] [,2]
[1,] 13 14
[2,] 15 16
[3,] 17 18
If, however, you want to get to your results matrix- I think it's probably easiest to do whatever calculations you need to do while you're dealing with each row:
results <- t(apply(mm, 1, function(x) {
sapply(seq(1, length(x), 2), function(y) {
val1 = x[y] # Get item one
val2 = x[y+1] # Get item two
val1 / val2 # Do your calculation here
})
}))
> results
[,1] [,2] [,3]
[1,] 0.5000000 0.7500 0.8333333
[2,] 0.8750000 0.9000 0.9166667
[3,] 0.9285714 0.9375 0.9444444
That said, I don't understand what you're trying to do so this may miss the mark. You may have more luck if you ask a new question where you show example input and the actual expected output that you're after, with the actual values you expect.

Split a matrix in blocks of size n with offset i (vectorized method)

I want to split matrices of size k x l into blocks of size n x n considering an ofset o (Like Mathematica's Partition function does).
For example, given a matrix A like
A <- matrix(seq(1:16), nrow = 4, ncol = 4)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
and block size = 3, offset = 1, I want as output the four submatrices that I'd get from
A[1:3, 1:3]
A[1:3, 2:4]
A[2:4, 1:3]
A[2:4, 2:4]
If offset were equal to 2 or 3, the output for this example should be only the submatrix that I get from
A[1:3, 1:3]
How can I vectorize this?
There might be a more elegant way. Here is how I'd do it by writing a myPartition function which simulates the mathematica Partition function. Firstly use Map to construct possible index along the row and column axis where we use seq to take offset into consideration, and then use cross2 from purrr to construct a list of all possible combinations of the subset index. Finally use lapply to subset the matrix and return a list of subset matrix;
The testing results on offset 1, 2 and 3 are as follows which seems to behave as expected:
library(purrr)
ind <- function(k, n, o) Map(`:`, seq(1, k-n+1, by = o), seq(n, k, by = o))
# this is a little helper function that generates subset index according to dimension of the
# matrix, the first sequence construct the starting point of the subset index with an interval
# of o which is the offset while the second sequence construct the ending point of the subset index
# use Map to construct vector from start to end which in OP's case will be 1:3 and 2:4.
myPartition <- function(mat, n, o) {
lapply(cross2(ind(nrow(mat),n,o), ind(ncol(mat),n,o)), function(i) mat[i[[1]], i[[2]]])
}
# This is basically an lapply. we use cross2 to construct combinations of all subset index
# which will be 1:3 and 1:3, 1:3 and 2:4, 2:4 and 1:3 and 2:4 and 2:4 in OP's case. Use lapply
# to loop through the index and subset.
# Testing case for offset = 1
myPartition(A, 3, 1)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# [[2]]
# [,1] [,2] [,3]
# [1,] 2 6 10
# [2,] 3 7 11
# [3,] 4 8 12
# [[3]]
# [,1] [,2] [,3]
# [1,] 5 9 13
# [2,] 6 10 14
# [3,] 7 11 15
# [[4]]
# [,1] [,2] [,3]
# [1,] 6 10 14
# [2,] 7 11 15
# [3,] 8 12 16
# Testing case for offset = 2
myPartition(A, 3, 2)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# Testing case for offset = 3
myPartition(A, 3, 3)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
How about this using base R, the idea is to generate all possible windows (i.e. winds) of size n*n while taking into account the offset. Then print all possible permutations of winds's elements in matrix A (i.e. perms). It works for any A of size k*l.
A <- matrix(seq(1:16), nrow = 4, ncol = 4)
c <- ncol(A); r <- nrow(A)
offset <- 1; size <- 3
sq <- seq(1, max(r,c), offset)
winds <- t(sapply(sq, function(x) c(x,(x+size-1))))
winds <- winds[winds[,2]<=max(r, c),] # check the range
if (is.vector(winds)) dim(winds) <- c(1,2) # vector to matrix
perms <- expand.grid(list(1:nrow(winds), 1:nrow(winds)))
out=apply(perms, 1, function(x) {
a11 <- winds[x[1],1];a12 <- winds[x[1],2];a21 <- winds[x[2],1];a22 <- winds[x[2],2]
if (ifelse(r<c, a12<=r, a22<=c)) { # check the range
cat("A[", a11, ":", a12, ", ", a21, ":", a22, "]", sep="", "\n")
print(A[a11:a12, a21:a22])
}
})
# A[1:3, 1:3]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# A[2:4, 1:3]
# [,1] [,2] [,3]
# [1,] 2 6 10
# [2,] 3 7 11
# [3,] 4 8 12
# A[1:3, 2:4]
# [,1] [,2] [,3]
# [1,] 5 9 13
# [2,] 6 10 14
# [3,] 7 11 15
# A[2:4, 2:4]
# [,1] [,2] [,3]
# [1,] 6 10 14
# [2,] 7 11 15
# [3,] 8 12 16
For size=3 and offset=2 or offset=3:
# A[1:3, 1:3]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
For offset=2 and size=2:
# A[1:2, 1:2]
# [,1] [,2]
# [1,] 1 5
# [2,] 2 6
# A[3:4, 1:2]
# [,1] [,2]
# [1,] 3 7
# [2,] 4 8
# A[1:2, 3:4]
# [,1] [,2]
# [1,] 9 13
# [2,] 10 14
# A[3:4, 3:4]
# [,1] [,2]
# [1,] 11 15
# [2,] 12 16

How to change elements in one matrix indexed in another matrix?

I have two matrices A and B. Matrix B is a two-column matrix, each row containing one index of an element in matrix A. I want to change those elements in matrix A, which are indexed by each row in matrix B, to 0.
Is there a way to avoid using the loop shown below?
> A <- matrix(1:12, 3, 4)
> B <- matrix(c(1, 2, 2, 2, 3, 4), byrow = TRUE, ncol = 2)
> A
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
> B
[,1] [,2]
[1,] 1 2
[2,] 2 2
[3,] 3 4
> for (i in 1:nrow(B)) {
+ A[B[i, 1], B[i, 2]] <- 0
+ }
> A
[,1] [,2] [,3] [,4]
[1,] 1 0 7 10
[2,] 2 0 8 11
[3,] 3 6 9 0
It's done like this
A[B] <- 0;

Build a square-ish matrix with a specified number of cells

I would like to write a function that transforms an integer, n, (specifying the number of cells in a matrix) into a square-ish matrix that contain the sequence 1:n. The goal is to make the matrix as "square" as possible.
This involves a couple of considerations:
How to maximize "square"-ness? I was thinking of a penalty equal to the difference in the dimensions of the matrix, e.g. penalty <- abs(dim(mat)[1]-dim(mat)[2]), such that penalty==0 when the matrix is square and is positive otherwise. Ideally this would then, e.g., for n==12 lead to a preference for a 3x4 rather than 2x6 matrix. But I'm not sure the best way to do this.
Account for odd-numbered values of n. Odd-numbered values of n do not necessarily produce an obvious choice of matrix (unless they have an integer square root, like n==9. I thought about simply adding 1 to n, and then handling as an even number and allowing for one blank cell, but I'm not sure if this is the best approach. I imagine it might be possible to obtain a more square matrix (by the definition in 1) by adding more than 1 to n.
Allow the function to trade-off squareness (as described in #1) and the number of blank cells (as described in #2), so the function should have some kind of parameter(s) to address this trade-off. For example, for n==11, a 3x4 matrix is pretty square but not as square as a 4x4, but the 4x4 would have many more blank cells than the 3x4.
The function needs to optionally produce wider or taller matrices, so that n==12 can produce either a 3x4 or a 4x3 matrix. But this would be easy to handle with a t() of the resulting matrix.
Here's some intended output:
> makemat(2)
[,1]
[1,] 1
[2,] 2
> makemat(3)
[,1] [,2]
[1,] 1 3
[2,] 2 4
> makemat(9)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> makemat(11)
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
Here's basically a really terrible start to this problem.
makemat <- function(n) {
n <- abs(as.integer(n))
d <- seq_len(n)
out <- d[n %% d == 0]
if(length(out)<2)
stop('n has fewer than two factors')
dim1a <- out[length(out)-1]
m <- matrix(1:n, ncol=dim1a)
m
}
As you'll see I haven't really been able to account for odd-numbered values of n (look at the output of makemat(7) or makemat(11) as described in #2, or enforce the "squareness" rule described in #1, or the trade-off between them as described in #3.
I think the logic you want is already in the utility function n2mfrow(), which as its name suggests is for creating input to the mfrow graphical parameter and takes an integer input and returns the number of panels in rows and columns to split the display into:
> n2mfrow(11)
[1] 4 3
It favours tall layouts over wide ones, but that is easily fixed via rev() on the output or t() on a matrix produced from the results of n2mfrow().
makemat <- function(n, wide = FALSE) {
if(isTRUE(all.equal(n, 3))) {
dims <- c(2,2)
} else {
dims <- n2mfrow(n)
}
if(wide)
dims <- rev(dims)
m <- matrix(seq_len(prod(dims)), nrow = dims[1], ncol = dims[2])
m
}
Notice I have to special-case n = 3 as we are abusing a function intended for another use and a 3x1 layout on a plot makes more sense than a 2x2 with an empty space.
In use we have:
> makemat(2)
[,1]
[1,] 1
[2,] 2
> makemat(3)
[,1] [,2]
[1,] 1 3
[2,] 2 4
> makemat(9)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> makemat(11)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
> makemat(11, wide = TRUE)
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
Edit:
The original function padded seq_len(n) with NA, but I realised the OP wanted to have a sequence from 1 to prod(nrows, ncols), which is what the version above does. The one below pads with NA.
makemat <- function(n, wide = FALSE) {
if(isTRUE(all.equal(n, 3))) {
dims <- c(2,2)
} else {
dims <- n2mfrow(n)
}
if(wide)
dims <- rev(dims)
s <- rep(NA, prod(dims))
ind <- seq_len(n)
s[ind] <- ind
m <- matrix(s, nrow = dims[1], ncol = dims[2])
m
}
I think this function implicitly satisfies your constraints. The parameter can range from 0 to Inf. The function always returns either a square matrix with sides of ceiling(sqrt(n)), or a (maybe) rectangular matrix with rows floor(sqrt(n)) and just enough columns to "fill it out". The parameter trades off the selection between the two: if it is less than 1, then the second, more rectangular matrices are preferred, and if greater than 1, the first, always square matrices are preferred. A param of 1 weights them equally.
makemat<-function(n,param=1,wide=TRUE){
if (n<1) stop('n must be positive')
s<-sqrt(n)
bottom<-n-(floor(s)^2)
top<-(ceiling(s)^2)-n
if((bottom*param)<top) {
rows<-floor(s)
cols<-rows + ceiling(bottom / rows)
} else {
cols<-rows<-ceiling(s)
}
if(!wide) {
hold<-rows
rows<-cols
cols<-hold
}
m<-seq.int(rows*cols)
dim(m)<-c(rows,cols)
m
}
Here is an example where the parameter is set to default, and equally trades off the distance equally:
lapply(c(2,3,9,11),makemat)
# [[1]]
# [,1] [,2]
# [1,] 1 2
#
# [[2]]
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
#
# [[3]]
# [,1] [,2] [,3]
# [1,] 1 4 7
# [2,] 2 5 8
# [3,] 3 6 9
#
# [[4]]
# [,1] [,2] [,3] [,4]
# [1,] 1 4 7 10
# [2,] 2 5 8 11
# [3,] 3 6 9 12
Here is an example of using the param with 11, to get a 4x4 matrix.
makemat(11,3)
# [,1] [,2] [,3] [,4]
# [1,] 1 5 9 13
# [2,] 2 6 10 14
# [3,] 3 7 11 15
# [4,] 4 8 12 16
What about something fairly simple and you can handle the exceptions and other requests in a wrapper?
library(taRifx)
neven <- 8
nodd <- 11
nsquareodd <- 9
nsquareeven <- 16
makemat <- function(n) {
s <- seq(n)
if( odd(n) ) {
s[ length(s)+1 ] <- NA
n <- n+1
}
sq <- sqrt( n )
dimx <- ceiling( sq )
dimy <- floor( sq )
if( dimx*dimy < length(s) ) dimy <- ceiling( sq )
l <- dimx*dimy
ldiff <- l - length(s)
stopifnot( ldiff >= 0 )
if( ldiff > 0 ) s[ seq( length(s) + 1, length(s) + ldiff ) ] <- NA
matrix( s, nrow = dimx, ncol = dimy )
}
> makemat(neven)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 NA
> makemat(nodd)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 NA
> makemat(nsquareodd)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 NA
[3,] 3 7 NA
[4,] 4 8 NA
> makemat(nsquareeven)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16

Resources