Generate covariance matrix from correlation matrix - r

I have a correlation matrix:
a <- matrix(c(1, .8, .8, .8, 1, .8, .8, .8, 1), 3)
## [,1] [,2] [,3]
## [1,] 1.0 0.8 0.8
## [2,] 0.8 1.0 0.8
## [3,] 0.8 0.8 1.0
I would now like to create a covariance matrix from the correlation matrix. How can this be done in R?
I tried:
e1.sd <- 3
e2.sd <- 10
e3.sd <- 3
e.cov <- a * as.matrix(c, e1.sd, e2.sd, e3.sd) %*% t(as.matrix(c(e1.sd, e2.sd, e3.sd)))
But I get the error:
Error in a * as.matrix(c, e1.sd, e2.sd, e3.sd) %*% t(as.matrix(c(e1.sd, :
non-conformable arrays
What am I doing wrong?

If you know the standard deviations of your individual variables, you can:
stdevs <- c(e1.sd, e2.sd, e3.sd)
#stdevs is the vector that contains the standard deviations of your variables
b <- stdevs %*% t(stdevs)
# b is an n*n matrix whose generic term is stdev[i]*stdev[j] (n is your number of variables)
a_covariance <- b * a #your covariance matrix
On the other hand, if you don't know the standard deviations, it's impossible.

require(MBESS)
a <- matrix(c(1,.8,.8,.8,1,.8,.8,.8,1),3)
> cor2cov(a,c(3,10,3))
[,1] [,2] [,3]
[1,] 9.0 24 7.2
[2,] 24.0 100 24.0
[3,] 7.2 24 9.0

Building on S4M's answer, in base R, I would write this function:
cor2cov <- function(V, sd) {
V * tcrossprod(sd)
}
tcrossprod will calculate the product of each combination of elements of the sd vector (equivalent to x %*% t(x)), which we then (scalar) multiply by the variance-covariance matrix
Here's a quick check that the function is correct using the built in mtcars data set:
all.equal(
cor2cov(cor(mtcars), sapply(mtcars, sd)),
cov(mtcars)
)

The answer marked as correct is wrong.
The correct solution seems to be the one provided by MBESS package, so see the post from dayne.
> a
[,1] [,2] [,3]
[1,] 1.0 0.8 0.8
[2,] 0.8 1.0 0.8
[3,] 0.8 0.8 1.0
> b <- c(3,10,3)
> b %*% t(b)
[,1] [,2] [,3]
[1,] 9 30 9
[2,] 30 100 30
[3,] 9 30 9
> c <- b %*% t(b)
> c %*% a
[,1] [,2] [,3]
[1,] 40.2 44.4 40.2
[2,] 134.0 148.0 134.0
[3,] 40.2 44.4 40.2
> cor2cov(cor.mat=a, b )
[,1] [,2] [,3]
[1,] 9.0 24 7.2
[2,] 24.0 100 24.0
[3,] 7.2 24 9.0
> a %*% c
[,1] [,2] [,3]
[1,] 40.2 134 40.2
[2,] 44.4 148 44.4
[3,] 40.2 134 40.2
>

Related

element-wise averages of two (or more) nested lists of matrices

I have two lists A_1 and A_2, each contains two matrices.
A_1 <- list(a=matrix(1:8, 2), b=matrix(2:9, 2))
A_2 <- list(a=matrix(10:17, 2), b=matrix(5:12, 2))
I'd like to calculate element-wise averages of these two lists which results a list of
tibble::lst((A_1$a + A_2$a)/2, (A_1$b + A_2$b)/2)
I used
purrr::pmap(list(A_1 , A_2), mean)
but got
Error in mean.default(.l[[1L]][[i]], .l[[2L]][[i]], ...) :
'trim' must be numeric of length one`
or
purrr::map2(A_1, A_2, mean)
Error in mean.default(.x[[i]], .y[[i]], ...) :
'trim' must be numeric of length one`
In base R, We could use:
A <-list(A_1, A_2)
lapply(Reduce(\(x, y)Map('+', x, y), A), '/', length(A))
$a
[,1] [,2] [,3] [,4]
[1,] 5.5 7.5 9.5 11.5
[2,] 6.5 8.5 10.5 12.5
$b
[,1] [,2] [,3] [,4]
[1,] 3.5 5.5 7.5 9.5
[2,] 4.5 6.5 8.5 10.5
This code is generic in that we can use to find the mean of several lists.
Note that A_1 and A_2 must have the same number of matrices, not necessarily 2. Can be 10 etc. Also note that each corresponding matrix has the same dimensions. Example below:
B_1 <- list(matrix(c(1,2,3,4), 2), matrix(c(1,3,4,2), 2),
matrix(c(1:10), 5), matrix(c(1:20), 5))
B_2 <- lapply(B_1, '*', 2) # In this case, its B_1 * 2
B_3 <- lapply(B_2, '*', 3) #
Now you could use the code provide above:
B <-list(B_1, B_2, B_3)
lapply(Reduce(\(x, y)Map('+', x, y), B), '/', length(B))
Your mistake is in using the second matrix as trim= argument of mean whcih is the second. You need to concatenate the matrices. Example:
mean(1:3, 2:4)
# Error in mean.default(1:3, 2:4) : 'trim' must be numeric of length one
mean(c(1:3, 2:4))
# [1] 2.5
As solution you may use Map
Map(\(x, y) (x + y)/2, A_1, A_2)
# $a
# [,1] [,2] [,3] [,4]
# [1,] 5.5 7.5 9.5 11.5
# [2,] 6.5 8.5 10.5 12.5
#
# $b
# [,1] [,2] [,3] [,4]
# [1,] 3.5 5.5 7.5 9.5
# [2,] 4.5 6.5 8.5 10.5
Or, why not using arrays?
AA_1 <- array(unlist(A_1), dim=c(dim(A_1$a), length(A_1)))
AA_2 <- array(unlist(A_2), dim=c(dim(A_2$a), length(A_2)))
(AA_1 + AA_2)/2
# , , 1
#
# [,1] [,2] [,3] [,4]
# [1,] 5.5 7.5 9.5 11.5
# [2,] 6.5 8.5 10.5 12.5
#
# , , 2
#
# [,1] [,2] [,3] [,4]
# [1,] 3.5 5.5 7.5 9.5
# [2,] 4.5 6.5 8.5 10.5
in base R:
item_names <- names(A_1)
structure(
lapply(item_names, function(name){
0.5 * (A_1[[name]] + A_2[[name]])
## or, if you want the scalar mean:
## mean(A_1[[name]] + A_2[[name]])
}),
names = item_names
)
#> $a
#> [,1] [,2] [,3] [,4]
#> [1,] 5.5 7.5 9.5 11.5
#> [2,] 6.5 8.5 10.5 12.5
#>
#> $b
#> [,1] [,2] [,3] [,4]
#> [1,] 3.5 5.5 7.5 9.5
#> [2,] 4.5 6.5 8.5 10.5

Dividing a list of matrices by a matrix

I have a list of matrices that I like to divide the values in each matrix by a different value.
l1 <- list(1,2,3,4,5,6)
l2 <- list(7,8,9,10,11,12)
mat <- Map(
function(x, y) outer(unlist(x), unlist(y), `+`) / 2,
split(l1, ceiling(seq_along(l1) / 3)),
split(l2, ceiling(seq_along(l2) / 3))
)
For example the output below shows one of the elements in the mat list:
$`1`
[,1] [,2] [,3]
[1,] 4.0 4.5 5.0
[2,] 4.5 5.0 5.5
[3,] 5.0 5.5 6.0
I would like to divide the values in the matrix by another matrix with different values
Maybe a matrix that looks like this (I wasn't sure how to create a matrix in r)
2 1 2
3 2 3
1 2 3
My desired output would then look like this:
[,1] [,2] [,3]
[1,] 4.0/2 4.5/1 5.0/2
[2,] 4.5/3 5.0/2 5.5/3
[3,] 5.0/1 5.5/2 6.0/3
How could I do create this output? How do I create a matrix with my desired values in R?
Thank you.
If your matrices are the same dimensions you can divide them with the / operator.
# create matrix to divide by
mat_div <- matrix(c(2,3,1,1,2,2,2,3,3), nrow = 3)
# divide list of matricies
lapply(mat, `/`, mat_div)
#------
$`1`
[,1] [,2] [,3]
[1,] 2.0 4.50 2.500000
[2,] 1.5 2.50 1.833333
[3,] 5.0 2.75 2.000000
$`2`
[,1] [,2] [,3]
[1,] 3.5 7.50 4.000000
[2,] 2.5 4.00 2.833333
[3,] 8.0 4.25 3.000000
We can use Map
mat <- Map(`/`, mat, list(mat2))
-otuput
mat
$`1`
[,1] [,2] [,3]
[1,] 2.0 4.50 2.500000
[2,] 1.5 2.50 1.833333
[3,] 5.0 2.75 2.000000
$`2`
[,1] [,2] [,3]
[1,] 3.5 7.50 4.000000
[2,] 2.5 4.00 2.833333
[3,] 8.0 4.25 3.000000
data
mat2 <- cbind(c(2, 3, 1), c(1, 2, 2), c(2, 3, 3))

R: Calculation with each element in a matrix across lists

I think my example is something special. Since I am not advanced in the use of lapply I am stucking with the following calculation. Here is a short reproducivle example: Assume I've a list containing three matrices:
list <- list(est1=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2), est2=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2),
est3=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2))
$`est1`
[,1] [,2]
[1,] 0.4 1.0
[2,] 0.0 0.4
[3,] 0.0 0.0
[4,] 0.0 0.4
[5,] 0.0 1.0
$est2
[,1] [,2]
[1,] 0.0 0.2
[2,] 0.4 0.4
[3,] 1.0 0.0
[4,] 0.2 1.0
[5,] 0.4 0.4
$est3
[,1] [,2]
[1,] 1.0 0.2
[2,] 0.4 1.0
[3,] 1.0 0.0
[4,] 1.0 0.2
[5,] 0.4 0.4
Each matrix contains coefficient estimates for different iterations. Each element inside one matrix belongs to one coefficient. I want to calculate the percentage over the three Matrices at which a coefficient is different from zero.
Expected Output:
[,1] [,2]
0.67 1
0.67 1
0.67 0
0.67 1
0.67 1
Please do not call your list list. In the following, it will be called z.
z <- list(est1=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2), est2=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2),
est3=matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2))
For the kind of problems that you describe, I like to use arrays, so the first step is to transform your list into an array.
library(abind)
A <- abind(list, along=3)
Then, you can apply a function along the third dimension:
apply(A, 1:2, function(x) 100 * sum(x!=0) / length(x))
[,1] [,2]
[1,] 100.0 100.0
[2,] 100.0 66.7
[3,] 100.0 66.7
[4,] 100.0 66.7
[5,] 66.7 66.7
Maybe the following does what you want.
I start by setting the RNG seed to make the results reproducible
set.seed(2081) # Make the results reproducible
list <- list(est1 = matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2),
est2 = matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2),
est3 = matrix(sample(c(0,0.4,0.2,1), replace=TRUE, size=10), ncol=2))
zeros <- sapply(list, `==`, 0)
res <- rowSums(zeros) / ncol(zeros)
matrix(res, ncol = 2)
# [,1] [,2]
#[1,] 0.3333333 0.3333333
#[2,] 0.0000000 0.6666667
#[3,] 0.0000000 0.3333333
#[4,] 0.3333333 0.3333333
#[5,] 0.6666667 0.3333333
EDIT.
The following uses rowMeans and is simpler. The result is identical() to res above.
res2 <- rowMeans(zeros)
identical(res, res2)
#[1] TRUE
matrix(res2, ncol = 2)

apply and lapply in one function return an NAN

I have a function return list of list, I would like to find the standard deviation of the matrices of my output. The output of my function is a list of two list. I tried this code but it return me NAN. Since my function is complex, then I use this example from another question please see here since it is quite close to what I am trying to do.
> A <- matrix(c(1:9), 3, 3)
> A
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> B <- matrix(c(2:10), 3, 3)
> B
[,1] [,2] [,3]
[1,] 2 5 8
[2,] 3 6 9
[3,] 4 7 10
> my.list1 <- list(A, B)
so the mean of the first list is:
[,1] [,2] [,3]
[1,] 1.5 4.5 7.5
[2,] 2.5 5.5 8.5
[3,] 3.5 6.5 9.5
Then the standard deviation will be:
[,1] [,2] [,3]
[1,] 0.7071068 0.7071068 0.7071068
[2,] 0.7071068 0.7071068 0.7071068
[3,] 0.7071068 0.7071068 0.7071068
> c <- matrix(c(1:9), 3, 3)
> c
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> d <- matrix(c(2:10), 3, 3)
> d
[,1] [,2] [,3]
[1,] 2 5 8
[2,] 3 6 9
[3,] 4 7 10
> my.list2 <- list(c, d)
my.list <-list(my.list1,my.list2)
How can I get the standard deviation of my matrices on an element by element for the list?
Try ?rapply
> rapply(my.list, sd)
[1] 2.738613 2.738613 2.738613 2.738613
You could bind your lists into an array, or perhaps make your function return an array(?), then you could use apply() to apply your chosen functions...
A <- matrix(1:9, 3, 3)
B <- matrix(2:10, 3, 3)
my.list1 <- list(A, B)
c <- matrix(1:9, 3, 3)
d <- matrix(2:10, 3, 3)
my.list2 <- list(c, d)
Create array from all 4 lists
my.array1 <- abind::abind(c(my.list1, my.list2), along = 3)
Find the mean() of the required dimension
apply(my.array1, c(1, 2), mean)
apply(my.array1, c(1,2), sd)
Output
[,1] [,2] [,3]
[1,] 1.5 4.5 7.5
[2,] 2.5 5.5 8.5
[3,] 3.5 6.5 9.5

Solving non-square linear system with R

How to solve a non-square linear system with R : A X = B ?
(in the case the system has no solution or infinitely many solutions)
Example :
A=matrix(c(0,1,-2,3,5,-3,1,-2,5,-2,-1,1),3,4,T)
B=matrix(c(-17,28,11),3,1,T)
A
[,1] [,2] [,3] [,4]
[1,] 0 1 -2 3
[2,] 5 -3 1 -2
[3,] 5 -2 -1 1
B
[,1]
[1,] -17
[2,] 28
[3,] 11
If the matrix A has more rows than columns, then you should use least squares fit.
If the matrix A has fewer rows than columns, then you should perform singular value decomposition. Each algorithm does the best it can to give you a solution by using assumptions.
Here's a link that shows how to use SVD as a solver:
http://www.ecse.rpi.edu/~qji/CV/svd_review.pdf
Let's apply it to your problem and see if it works:
Your input matrix A and known RHS vector B:
> A=matrix(c(0,1,-2,3,5,-3,1,-2,5,-2,-1,1),3,4,T)
> B=matrix(c(-17,28,11),3,1,T)
> A
[,1] [,2] [,3] [,4]
[1,] 0 1 -2 3
[2,] 5 -3 1 -2
[3,] 5 -2 -1 1
> B
[,1]
[1,] -17
[2,] 28
[3,] 11
Let's decompose your A matrix:
> asvd = svd(A)
> asvd
$d
[1] 8.007081e+00 4.459446e+00 4.022656e-16
$u
[,1] [,2] [,3]
[1,] -0.1295469 -0.8061540 0.5773503
[2,] 0.7629233 0.2908861 0.5773503
[3,] 0.6333764 -0.5152679 -0.5773503
$v
[,1] [,2] [,3]
[1,] 0.87191556 -0.2515803 -0.1764323
[2,] -0.46022634 -0.1453716 -0.4694190
[3,] 0.04853711 0.5423235 0.6394484
[4,] -0.15999723 -0.7883272 0.5827720
> adiag = diag(1/asvd$d)
> adiag
[,1] [,2] [,3]
[1,] 0.1248895 0.0000000 0.00000e+00
[2,] 0.0000000 0.2242431 0.00000e+00
[3,] 0.0000000 0.0000000 2.48592e+15
Here's the key: the third eigenvalue in d is very small; conversely, the diagonal element in adiag is very large. Before solving, set it equal to zero:
> adiag[3,3] = 0
> adiag
[,1] [,2] [,3]
[1,] 0.1248895 0.0000000 0
[2,] 0.0000000 0.2242431 0
[3,] 0.0000000 0.0000000 0
Now let's compute the solution (see slide 16 in the link I gave you above):
> solution = asvd$v %*% adiag %*% t(asvd$u) %*% B
> solution
[,1]
[1,] 2.411765
[2,] -2.282353
[3,] 2.152941
[4,] -3.470588
Now that we have a solution, let's substitute it back to see if it gives us the same B:
> check = A %*% solution
> check
[,1]
[1,] -17
[2,] 28
[3,] 11
That's the B side you started with, so I think we're good.
Here's another nice SVD discussion from AMS:
http://www.ams.org/samplings/feature-column/fcarc-svd
Aim is to solve Ax = b
where A is p by q, x is q by 1 and b is p by 1 for x given A and b.
Approach 1: Generalized Inverse: Moore-Penrose
https://en.wikipedia.org/wiki/Generalized_inverse
Multiplying both sides of the equation, we get
A'Ax = A' b
where A' is the transpose of A. Note that A'A is q by q matrix now. One way to solve this now multiply both sides of the equation by the inverse of A'A. Which gives,
x = (A'A)^{-1} A' b
This is the theory behind generalized inverse. Here G = (A'A)^{-1} A' is pseudo-inverse of A.
library(MASS)
ginv(A) %*% B
# [,1]
#[1,] 2.411765
#[2,] -2.282353
#[3,] 2.152941
#[4,] -3.470588
Approach 2: Generalized Inverse using SVD.
#duffymo used SVD to obtain a pseudoinverse of A.
However, last elements of svd(A)$d may not be quite as small as in this example. So, probably one shouldn't use that method as is. Here's an example where none of the last elements of A is close to zero.
A <- as.matrix(iris[11:13, -5])
A
# Sepal.Length Sepal.Width Petal.Length Petal.Width
# 11 5.4 3.7 1.5 0.2
# 12 4.8 3.4 1.6 0.2
# 13 4.8 3.0 1.4 0.1
svd(A)$d
# [1] 10.7820526 0.2630862 0.1677126
One option would be to look as the singular values in cor(A)
svd(cor(A))$d
# [1] 2.904194e+00 1.095806e+00 1.876146e-16 1.155796e-17
Now, it is clear there is only two large singular values are present. So, one now can apply svd on A to get pseudo-inverse as below.
svda <- svd(A)
G = svda$v[, 1:2] %*% diag(1/svda$d[1:2]) %*% t(svda$u[, 1:2])
# to get x
G %*% B

Resources