How to keep certain values in an array in R? - r

Suppose I have a data array,
dat <- array(NA, c(115,45,248))
Q1: What I do if I want to get a new data array,
datnew <- array(NA, c(115,45,248))
in which, all the positive value remain and the negative value changed to NA?
Q2: What I do if I want to get a new data array,
datnew <- array(NA,c(115,45,31))
by averaging with the third dimension, but only averaging every 8 values?
Thanks a lot.

For question 2,
you can reverse the order of the dimensions, then add a dimension representing the groups to average over, then use apply:
tmp <- array( 1:32, c(2,2,8) )
tmp2 <- array( aperm(tmp), c(4,2,2,2) )
apply( tmp2, 2:4, mean )

Answer to Q1:
dat[dat < 0] <- NA
We treat dat as if it were a vector (it is but just with dims).
Answer to Q2:
Following Greg's nice, succinct solution, the solution I had in mind when posting my comment earlier was this (using Greg's tmp)
foo <- function(x, grp) aggregate(x, by = list(grp = grp), mean)$x
apply(tmp, 2:1, foo, grp = gl(2,4))
Examples:
Q1
> dat <- array(rnorm(3*3*3), c(3,3,3))
> dat
, , 1
[,1] [,2] [,3]
[1,] 0.1427815 0.1642626 -0.6876034
[2,] 0.6791252 2.1420478 -0.7073936
[3,] -0.9695173 -1.1050933 -0.3068230
, , 2
[,1] [,2] [,3]
[1,] 0.8246182 0.5132398 2.5428203
[2,] -0.4328711 0.9080648 -0.1231653
[3,] -0.7798170 -1.1160706 -0.9237559
, , 3
[,1] [,2] [,3]
[1,] -0.79505298 0.8795420 0.4520150
[2,] 0.04154077 -1.0422061 0.4657002
[3,] -0.67168971 0.7925304 -0.5461143
> dat[dat < 0] <- NA
> dat
, , 1
[,1] [,2] [,3]
[1,] 0.1427815 0.1642626 NA
[2,] 0.6791252 2.1420478 NA
[3,] NA NA NA
, , 2
[,1] [,2] [,3]
[1,] 0.8246182 0.5132398 2.542820
[2,] NA 0.9080648 NA
[3,] NA NA NA
, , 3
[,1] [,2] [,3]
[1,] NA 0.8795420 0.4520150
[2,] 0.04154077 NA 0.4657002
[3,] NA 0.7925304 NA
Q2
> foo <- function(x, grp) aggregate(x, by = list(grp = grp), mean)$x
> apply(tmp, 2:1, foo, grp = gl(2,4))
, , 1
[,1] [,2]
[1,] 7 9
[2,] 23 25
, , 2
[,1] [,2]
[1,] 8 10
[2,] 24 26
> all.equal(apply(tmp, 2:1, foo, grp = gl(2,4)), apply( tmp2, 2:4, mean ))
[1] TRUE

For question 1:
tmp2 <- ifelse(tmp1<0,tmp1,NA)
For question 2 see Greg's solution.

Related

Matching matrix rows with list elements in R

A reproducible data:
dat1 <- matrix(0, nrow = 9, ncol = 2)
dat1[,1] <- rep(1:3,3)
dat1[,2] <- c(1,1,1,2,2,2,3,3,3)
dat2 <- list()
dat2[[1]] <- matrix(c(1,2,1,3), nrow = 2, ncol = 2)
dat2[[2]] <- matrix(c(1,1,2,3,1,3), nrow = 3, ncol = 2 )
> dat1
[,1] [,2]
[1,] 1 1
[2,] 2 1
[3,] 3 1
[4,] 1 2
[5,] 2 2
[6,] 3 2
[7,] 1 3
[8,] 2 3
[9,] 3 3
> dat2
[[1]]
[,1] [,2]
[1,] 1 1
[2,] 2 3
[[2]]
[,1] [,2]
[1,] 1 3
[2,] 1 1
[3,] 2 3
I have a matrix (dat1) and a list (dat2).
Some rows of dat1 is same as some of the list elements of dat2. My objective is to find out the corresponding row numbers of dat1 that are matched with dat2 and store them in a list. AN EXAMPLE of the output:
> ex.result
[[1]]
[,1]
[1,] 1
[2,] 8
[[2]]
[,1]
[1,] 7
[2,] 1
[3,] 8
I am looking for a fast way to do this without using time consuming loops.
A slightly different approach:
lapply( dat2, function(m) {
apply( m, 1, function(r)
which( apply( sweep( dat1, 2, r, "==" ), 1, all ) ) ) %>% as.matrix })
Output:
[[1]]
[,1]
[1,] 1
[2,] 8
[[2]]
[,1]
[1,] 7
[2,] 1
[3,] 8
Here is an option:
lapply(dat2, function(mat)
apply(mat, 1, function(row)
match(toString(row), apply(dat1, 1, toString))))
#[[1]]
#[1] 1 8
#
#[[2]]
#[1] 7 1 8
This returns a list with integer vectors instead of a list with array/matrix entries though.
In the same vein as above, using Map() and vector recycling:
# Coercing to a data.frame to recycle the vector that is used to search:
setNames(
Map(function(x, y){
matrix(
match(
apply(y, 1, paste, collapse = ", "),
x
)
)
},
data.frame(apply(dat1, 1, paste, collapse = ", ")),
dat2),
seq_len(length(dat2)))

Find common values between matrices and return matrix with row-col position

I'd like to find between to matrices the shared values, and return the locations (row-col) in a matrix.
set.seed(123)
m <- matrix(sample(4), 2, 2, byrow = T)
# m
# [,1] [,2]
# [1,] 2 3
# [2,] 1 4
m2 <- matrix(sample(4), 2, 2, byrow = F)
# m2
# [,1] [,2]
# [1,] 4 2
# [2,] 1 3
Expected output:
# [,1] [,2]
# [1,] NA NA
# [2,] "2-1" NA
Bonus if this could be generalized to non-identical matrices (different dim).
Equal sizes
One option would be
replace(m * NA, m == m2, paste(row(m), col(m), sep = "-")[m == m2])
# [,1] [,2]
# [1,] NA NA
# [2,] "2-1" NA
Different sizes
I believe that in this case, regardless of the approach, you will first need to trim both matrices to be of equal size.
set.seed(12)
(m <- matrix(sample(6), 2, 3, byrow = TRUE))
# [,1] [,2] [,3]
# [1,] 1 5 4
# [2,] 6 3 2
(m2 <- matrix(sample(6), 3, 2, byrow = FALSE))
# [,1] [,2]
# [1,] 2 5
# [2,] 4 3
# [3,] 1 6
out <- matrix(NA, max(nrow(m), nrow(m2)), max(ncol(m), ncol(m2)))
mrow <- min(nrow(m), nrow(m2))
mcol <- min(ncol(m), ncol(m2))
mTrim <- m[1:mrow, 1:mcol]
m2Trim <- m2[1:mrow, 1:mcol]
out[1:mrow, 1:mcol][mTrim == m2Trim] <- paste(row(mTrim), col(mTrim), sep = "-")[mTrim == m2Trim]
out
# [,1] [,2] [,3]
# [1,] NA "1-2" NA
# [2,] NA "2-2" NA
# [3,] NA NA NA
This function gives the desired output, but works on the condition that dim() is equal between the two matrices.
In order to generalize this for non identical matrices, on solution would be to subset the bigger matrix first.
The key is which(mat1==mat2, arr.ind=T) to get row-col index:
which(m==m2, arr.ind=T)
row col
[1,] 2 1
Inside a function:
find_in_matr <- function(mat1, mat2) {
if (!all(dim(mat1) == dim(mat2))) {
stop("mat1 and mat2 need to have the same dim()!")
}
m <- mat1
m[] <- NA # copy mat1 dim, and empty values
loc <- which(mat1==mat2, arr.ind=T) # find positions (both indxs)
m[loc] <- mapply(paste, sep="-", loc[, 1], loc[, 2]) # paste indxs
return(m)
}
Example:
set.seed(123)
m <- matrix(sample(4), 2, 2, byrow = T)
# m
# [,1] [,2]
# [1,] 2 3
# [2,] 1 4
m2 <- matrix(sample(4), 2, 2, byrow = F)
# m2
# [,1] [,2]
# [1,] 4 2
# [2,] 1 3
find_in_matr(m, m2)
# [,1] [,2]
# [1,] NA NA
# [2,] "2-1" NA
Silly piped version
library(magrittr)
(m == m2) %>%
`[<-`(!., NA) %>%
`[<-`((w <- which(., arr = T)), apply(w, 1, paste, collapse = '-'))
# [,1] [,2]
# [1,] NA NA
# [2,] "2-1" NA
I try to do it with ifelse() :
x <- apply(which(m == m2, arr.ind = T), 1, paste, collapse = "-")
ifelse(m != m2, NA, x)
# [,1] [,2]
# [1,] NA NA
# [2,] "2-1" NA
This method can deal with any dimensions.
e.g.
set.seed(999)
m1 <- matrix(sample(1:3, 12, replace = T), 3, 4)
m2 <- matrix(sample(1:3, 12, replace = T), 3, 4)
x <- apply(which(m1 == m2, arr.ind = T), 1, paste, collapse = "-")
ifelse(m1 != m2, NA, x)
# [,1] [,2] [,3] [,4]
# [1,] NA "1-4" NA "3-4"
# [2,] NA NA "2-3" NA
# [3,] "2-3" NA NA "1-2"

Apply function on each element of a list of matrices

I have a list of matrices.
(below is a simplified example, I actually have a list of 3 matrices, the first one being in 2D, while the second and third ones are in 3D)
> a <- matrix(-1:2, ncol = 2)
> b <- array(c(-2:5), dim=c(2, 2, 2))
> c_list <- list(a,b)
> c_list
[[1]]
[,1] [,2]
[1,] -1 1
[2,] 0 2
[[2]]
, , 1
[,1] [,2]
[1,] -2 0
[2,] -1 1
, , 2
[,1] [,2]
[1,] 2 4
[2,] 3 5
I'd like to apply the function max(0,c_list) to each and every element (without a loop), in order to have the same type of object as "c_list" but with the negative values replaced by zeros.
> output
[[1]]
[,1] [,2]
[1,] 0 1
[2,] 0 2
[[2]]
, , 1
[,1] [,2]
[1,] 0 0
[2,] 0 1
, , 2
[,1] [,2]
[1,] 2 4
[2,] 3 5
I've managed to do it for a matrice or for a list with mapply or lapply, but not for a list of matrices.
Answer : either Sotos' answer
output <- lapply(c_list, function(i)replace(i, i < 0, 0))
or Moody_Mudskipper's answer
output <- lapply(c_list,pmax,0)
You can use pmax, it will preserve the format of the source matrix and vectorized so faster than looping with max.
lapply(c_list,pmax,0)
Using apply and lapply:
a <- matrix(-1:2, ncol = 2)
b <- matrix(-3:0, ncol = 2)
c <- list(a,b)
d <- lapply(c, function(m) {
apply(m, c(1, 2), function(x) max(0, x))
})
Output:
> d
[[1]]
[,1] [,2]
[1,] 0 1
[2,] 0 2
[[2]]
[,1] [,2]
[1,] 0 0
[2,] 0 0

Saving covariance matrix in a for loop - R

So there is a hobby project I am currently working on in order to improve my R skills. What I created with my previous code are various subsets of data "returnseries.1, returnseries.2, returnseries.3, ... "(from 1 to 119) which are stored each in a 252x6 matrix.
Now I am building a for loop to calculate the covariance matrix for each subset.
My code goes as the following:
for(k in 1:119){
covmat[k] = matrix(c(cov(returnseries[k])),nrow=6, ncol=6)
}
For some reason I get the error that: "My column index must be at most 7 not 8."
And I don't get why. I tried several other code versions but nothing gives me an answer. Thought that it had to do with the naming but using return series.[k] is providing me an error, that returnseries. is not defined
Would be delighted if somebody could provide a quick
You can use an array. A 3D array in this case.
Generate some data.
> xy <- list(one = matrix(rnorm(9), ncol = 3),
+ two = matrix(rnorm(9), ncol = 3),
+ three = matrix(rnorm(9), ncol = 3))
> xy
$one
[,1] [,2] [,3]
[1,] 0.1341714 -1.27229790 0.22431441
[2,] 1.0853899 0.02335881 -0.05600098
[3,] -1.5645181 0.83745858 -1.47670091
$two
[,1] [,2] [,3]
[1,] 1.4891642 -0.3766222 -0.86981432
[2,] 0.3424295 -1.7882177 1.79601480
[3,] -1.1583058 -0.1604330 0.02690498
$three
[,1] [,2] [,3]
[1,] -0.1511346 -0.3672432 -0.3008405
[2,] -1.9881830 -0.8545396 -0.7108430
[3,] 0.1637134 -0.7958267 1.1923535
Create empty array
> N <- 3
> ar <- array(rep(NA, 3*3*N), dim = c(3, 3, N))
> ar
, , 1
[,1] [,2] [,3]
[1,] NA NA NA
[2,] NA NA NA
[3,] NA NA NA
, , 2
[,1] [,2] [,3]
[1,] NA NA NA
[2,] NA NA NA
[3,] NA NA NA
, , 3
[,1] [,2] [,3]
[1,] NA NA NA
[2,] NA NA NA
[3,] NA NA NA
Fill in values.
> for (i in 1:N) {
+ ar[,, i] <- xy[[i]]
+ }
>
> ar
, , 1
[,1] [,2] [,3]
[1,] 0.1341714 -1.27229790 0.22431441
[2,] 1.0853899 0.02335881 -0.05600098
[3,] -1.5645181 0.83745858 -1.47670091
, , 2
[,1] [,2] [,3]
[1,] 1.4891642 -0.3766222 -0.86981432
[2,] 0.3424295 -1.7882177 1.79601480
[3,] -1.1583058 -0.1604330 0.02690498
, , 3
[,1] [,2] [,3]
[1,] -0.1511346 -0.3672432 -0.3008405
[2,] -1.9881830 -0.8545396 -0.7108430
[3,] 0.1637134 -0.7958267 1.1923535
You can do all sorts of wonderful things with this now. For example, do row sums.
> apply(ar, MARGIN = 3, FUN = rowSums)
[,1] [,2] [,3]
[1,] -0.9138121 0.2427277 -0.8192183
[2,] 1.0527477 0.3502266 -3.5535656
[3,] -2.2037604 -1.2918338 0.5602402
Here's proof for the first matrix. Compare it to the first column:
> rowSums(xy[[1]])
[1] -0.9138121 1.0527477 -2.2037604

How to find covariance matrix by category in data.table in R

Supposed that my data contain 3 categories. I want to find the covariance matrix of each category, and expect the return to be an array.
set.seed(7)
x <- rbind(cbind(rnorm(120,-1,0.1), rnorm(120,-0.5,0.1)),
cbind(rnorm(120,-0.4,0.1), rnorm(120,0,0.1)),
cbind(rnorm(120,.2,0.1), rnorm(120,0,0.1)))
lab <- c(rep(1, 120), rep(2, 120), rep(3, 120))
newx <- cbind(x, lab = lab)
s <- sapply(1:3, function(k){ var(newx[lab == k, -3]) })
dim(s) <- c(2,2,3)
return,
, , 1
[,1] [,2]
[1,] 0.008880447 -0.001116058
[2,] -0.001116058 0.009229061
, , 2
[,1] [,2]
[1,] 0.012193536 -0.001217923
[2,] -0.001217923 0.009391710
, , 3
[,1] [,2]
[1,] 0.010752319 0.001231336
[2,] 0.001231336 0.008226595
I know how to do it by using sapply, but if I want to use data.table, how can I do it ?
I have tried:
library(data.table)
dt <- data.table(newx)
dt[,lapply(.SD, var), by = lab]
but it does not provide the return as I expected.
If you need the result as a 3-dimensional matrix, then obviously that is not a data.table. However, you can use data.table for the calculation.
library(data.table)
DT <- as.data.table(newx)
result <- DT[,var(.SD),by=lab]
result <- as.matrix(result$V1)
dim(result) <- c(2,2,3)
result
# , , 1
#
# [,1] [,2]
# [1,] 0.008880447 -0.001116058
# [2,] -0.001116058 0.009229061
#
# , , 2
#
# [,1] [,2]
# [1,] 0.012193536 -0.001217923
# [2,] -0.001217923 0.009391710
#
# , , 3
#
# [,1] [,2]
# [1,] 0.010752319 0.001231336
# [2,] 0.001231336 0.008226595

Resources