Class of output object differs as input data differs - r

I am trying to draw a variable number of samples for each of n attempts. In this example n = 8 because length(n.obs) == 8. Once all of the samples have been drawn I want to combine them into a matrix.
Here is my first attempt:
set.seed(1234)
n.obs <- c(2,1,2,2,2,2,2,2)
my.samples <- sapply(1:8, function(x) sample(1:4, size=n.obs[x], prob=c(0.1,0.2,0.3,0.4), replace=TRUE))
my.samples
This approach produces a list.
class(my.samples)
#[1] "list"
I identify the number of columns needed in the output matrix using:
max.len <- max(sapply(my.samples, length))
max.len
#[1] 2
The output matrix can be created using:
corrected.list <- lapply(my.samples, function(x) {c(x, rep(NA, max.len - length(x)))})
output.matrix <- do.call(rbind, corrected.list)
output.matrix[is.na(output.matrix)] <- 0
output.matrix
# [,1] [,2]
#[1,] 4 3
#[2,] 3 0
#[3,] 3 2
#[4,] 3 4
#[5,] 4 3
#[6,] 3 3
#[7,] 3 4
#[8,] 1 4
The above approach seems to work fine as along as n.obs includes multiple values and at least one element in n.obs > 1. However, I want the code to be flexible enough to handle each of the following n.obs:
The above sapply statement returns a 2 x 8 matrix with the following n.obs.
set.seed(1234)
n.obs <- c(2,2,2,2,2,2,2,2)
The above sapply statement returns an integer with the following n.obs.
set.seed(3333)
n.obs <- c(1,1,1,1,1,1,1,1)
The above sapply statement returns a list with the following n.obs.
n.obs <- c(0,0,0,0,0,0,0,0)
Here are example desired results for each of the above three n.obs:
desired.output <- matrix(c(4, 3,
3, 3,
2, 3,
4, 4,
3, 3,
3, 3,
4, 1,
4, 2), ncol = 2, byrow = TRUE)
desired.output <- matrix(c(2,
3,
4,
2,
3,
4,
4,
1), ncol = 1, byrow = TRUE)
desired.output <- matrix(c(0,
0,
0,
0,
0,
0,
0,
0), ncol = 1, byrow = TRUE)
How can I generalize the code so that it always returns a matrix with eight rows regardless of the n.obs used as input? One way would be to use a series of if statements to handle problematic cases, but I thought there might be a simpler and more efficient solution.

We can write a function :
get_matrix <- function(n.obs) {
nr <- length(n.obs)
my.samples <- sapply(n.obs, function(x)
sample(1:4, size=x, prob=c(0.1,0.2,0.3,0.4), replace=TRUE))
max.len <- max(lengths(my.samples))
mat <- matrix(c(sapply(my.samples, `[`, 1:max.len)), nrow = nr, byrow = TRUE)
mat[is.na(mat)] <- 0
mat
}
Checking output :
get_matrix(c(2,1,2,2,2,2,2,2))
# [,1] [,2]
#[1,] 1 4
#[2,] 4 0
#[3,] 4 3
#[4,] 4 4
#[5,] 4 2
#[6,] 4 3
#[7,] 4 4
#[8,] 4 4
get_matrix(c(1,1,1,1,1,1,1,1))
# [,1]
#[1,] 4
#[2,] 4
#[3,] 3
#[4,] 4
#[5,] 2
#[6,] 4
#[7,] 1
#[8,] 4
get_matrix(c(0,0,0,0,0,0,0,0))
# [,1]
#[1,] 0
#[2,] 0
#[3,] 0
#[4,] 0
#[5,] 0
#[6,] 0
#[7,] 0
#[8,] 0

You could Vectorize the sample function on the size= argument.
samplev <- Vectorize(sample, "size", SIMPLIFY=F)
Wrap samplev into a function and assign maximal length using length<- in an lapply.
FUN <- function(n.obs, prob.=c(.1,.2,.3,.4)) {
s <- do.call(rbind, lapply(
samplev(1:4, size=n.obs, prob=prob., replace=TRUE),
`length<-`, max(n.obs)))
if (!all(dim(s))) matrix(0, length(n.obs))
else ({s[is.na(s)] <- 0; s})
}
Results:
set.seed(1234)
FUN(c(2,1,2,2,2,2,2,2))
# [,1] [,2]
# [1,] 4 3
# [2,] 3 0
# [3,] 3 2
# [4,] 3 4
# [5,] 4 3
# [6,] 3 3
# [7,] 3 4
# [8,] 1 4
FUN(c(2,2,2,2,2,2,2,2))
# [,1] [,2]
# [1,] 2 4
# [2,] 4 4
# [3,] 4 4
# [4,] 4 4
# [5,] 4 4
# [6,] 2 3
# [7,] 1 2
# [8,] 4 3
FUN(c(1,1,1,1,1,1,1,1))
# [,1]
# [1,] 4
# [2,] 4
# [3,] 3
# [4,] 4
# [5,] 2
# [6,] 4
# [7,] 4
# [8,] 1
FUN(c(0,0,0,0,0,0,0,0))
# [,1]
# [1,] 0
# [2,] 0
# [3,] 0
# [4,] 0
# [5,] 0
# [6,] 0
# [7,] 0
# [8,] 0
FUN(c(3, 4))
# [,1] [,2] [,3] [,4]
# [1,] 2 3 3 0
# [2,] 4 3 4 3

Related

Subtraction of list of matrix within a list in R

I have a list of which consists of list of matrices as follows:
$A
$A[[1]]
$A[[1]][[1]]
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
$A[[2]]
$A[[2]][[1]]
[,1] [,2]
[1,] 2 7
[2,] 3 8
[3,] 4 9
[4,] 5 10
[5,] 6 11
$B
$B[[1]]
$B[[1]][[1]]
[,1] [,2]
[1,] 11 16
[2,] 12 17
[3,] 13 18
[4,] 14 19
[5,] 15 20
$B[[2]]
$B[[2]][[1]]
[,1] [,2]
[1,] 12 17
[2,] 13 18
[3,] 14 19
[4,] 15 20
[5,] 16 21
$C
$C[[1]]
$C[[1]][[1]]
[,1] [,2]
[1,] 0 0
[2,] 0 0
[3,] 0 0
[4,] 0 0
[5,] 0 0
$C[[2]]
$C[[2]][[1]]
[,1] [,2]
[1,] 0 0
[2,] 0 0
[3,] 0 0
[4,] 0 0
[5,] 0 0
The above list of matrices was created using the codes below:
A_mat1 <- matrix(as.numeric(c(1:10)), nrow = 5, ncol = 2)
B_mat1 <- matrix(as.numeric(c(11:20)), nrow = 5, ncol = 2)
C_mat1 <- matrix(as.numeric(0), nrow = 5, ncol = 2)
A_mat2 <- matrix(as.numeric(c(2:11)), nrow = 5, ncol = 2)
B_mat2 <- matrix(as.numeric(c(12:21)), nrow = 5, ncol = 2)
C_mat2 <- matrix(as.numeric(0), nrow = 5, ncol = 2)
my_matrix_name <- as.vector(c("A_mat1", "B_mat1", "C_mat1", "A_mat2", "B_mat2", "C_mat2"))
my_list = list(A_mat1, B_mat1, C_mat1, A_mat2, B_mat2, C_mat2)
names(my_list) <- my_matrix_name
my_A <- as.vector(my_matrix_name[substring(my_matrix_name,1,1) == "A"])
my_B <- as.vector(my_matrix_name[substring(my_matrix_name,1,1) == "B"])
my_C <- as.vector(my_matrix_name[substring(my_matrix_name,1,1) == "C"])
A = list()
for(i in seq_len(length(my_A))){
A[[i]] <- list(my_list[[paste0(my_A[[i]])]])
}
B = list()
for(i in seq_len(length(my_B))){
B[[i]] <- list(my_list[[paste0(my_B[[i]])]])
}
C = list()
for(i in seq_len(length(my_C))){
C[[i]] <- list(my_list[[paste0(my_C[[i]])]])
}
my_group = list(A,B,C)
names(my_group) <- c("A", "B", "C")
my_group
Where group C matrices have values of 0. Now I want to create a formula to overwrite group C equals to group A minus group B i.e. C = A - B
I have tried using
my_group[[3]] = my_group[[1]] - my_group[[2]]
and
my_group[[3]] = Map("-", my_group[[1]] , my_group[[2]])
but it gives error message: non-numeric argument to binary operator. Can anyone help me please? Thanks!
Because you have nested lists, your Map function is actually trying to subtract a list from another list. You need your mapping function to extract the first element of each list and subtract those
Map(function(A, B) A[[1]] - B[[1]], my_group$A , my_group$B)
#> [[1]]
#> [,1] [,2]
#> [1,] -10 -10
#> [2,] -10 -10
#> [3,] -10 -10
#> [4,] -10 -10
#> [5,] -10 -10
#>
#> [[2]]
#> [,1] [,2]
#> [1,] -10 -10
#> [2,] -10 -10
#> [3,] -10 -10
#> [4,] -10 -10
#> [5,] -10 -10
An option with pmap
library(purrr)
pmap(my_group[c('A', 'B')], ~ ..1[[1]] - ..2[[1]])
#[[1]]
# [,1] [,2]
#[1,] -10 -10
#[2,] -10 -10
#[3,] -10 -10
#[4,] -10 -10
#[5,] -10 -10
#[[2]]
# [,1] [,2]
#[1,] -10 -10
#[2,] -10 -10
#[3,] -10 -10
#[4,] -10 -10
#[5,] -10 -10

R Get complement of small matrix in larger matrix

I have a n×2 matrix A and and m×2 matrix B with m<n. I want to find the complement of B in A, i.e. all rows from A that are not in B. How would I do that in base r?
setdiff does not work as it does not respect the matrix structure. rbind+duplicate does also not work since there may be rows in B that are not in A at all.
We can paste the values row-wise and check if they are present in B using %in% :
A[!paste(A[, 1], A[, 2], sep = '-') %in% paste(B[, 1], B[, 2], sep = '-'),]
Using reproducible data :
A <- matrix(1:16, ncol = 2)
B <- matrix(c(2, 10, 1, 2, 5, 13, 6, 14, 8, 16), ncol = 2, byrow = TRUE)
A
# [,1] [,2]
#[1,] 1 9
#[2,] 2 10
#[3,] 3 11
#[4,] 4 12
#[5,] 5 13
#[6,] 6 14
#[7,] 7 15
#[8,] 8 16
B
# [,1] [,2]
#[1,] 2 10
#[2,] 1 2
#[3,] 5 13
#[4,] 6 14
#[5,] 8 16
A[!paste(A[, 1], A[, 2], sep = '-') %in% paste(B[, 1], B[, 2], sep = '-'),]
# [,1] [,2]
#[1,] 1 9
#[2,] 3 11
#[3,] 4 12
#[4,] 7 15

Combining vectors in a subset of list of lists

I'd like to get a matrix mat out by combining vectors in a subset of list of lists. Following the way to do the same using a for loop. I am wondering if there is a faster way to do it.
i <- 1 # the subset
mat<- matrix(NA, ncol = p, nrow = n)
for (j in 1 : p) {
mat[, j] <- list_of_list[[j]][[i]]$the_vector
}
EDIT:
I am after the vectors indexed/subseted by 'i' at any given time. Also, the list_of_list has objects other than the_vector as well.
EDIT 2:
Adding a working example below.
lst <- list()
list_of_list <- list()
lst[[1]] <- list(a="a", c="b1", the_vector = 1:5)
lst[[2]] <- list(a="b", c="b2", the_vector = 1:5+1)
lst[[3]] <- list(a="c", c="b3", the_vector = 1:5+2)
list_of_list[[1]] <- lst
lst[[1]] <- list(a="a", c="b1", the_vector = 1:5*0)
lst[[2]] <- list(a="b", c="b2", the_vector = 1:5*1)
lst[[3]] <- list(a="c", c="b3", the_vector = 1:5*22)
list_of_list[[2]] <- lst
i <- 1 # the subset
p <- 2 # length of the list of list
n <- 5 # length of the vector
mat<- matrix(NA, ncol = p, nrow = n)
for (j in 1 : p) {
mat[, j] <- list_of_list[[j]][[i]]$the_vector
}
You may try the sapply() function:
i <- 1L
mat <- sapply(list_of_list, function(.x) .x[[i]]$the_vector)
mat
[,1] [,2]
[1,] 1 0
[2,] 2 0
[3,] 3 0
[4,] 4 0
[5,] 5 0
I have not benchmarked the code to make sure this is faster in terms of execution speed but it definitely requires fewer key strokes.
sapply() applies a function over a list or vector and is a kind of implied for loop.
You can just unlist your list then reshape it as a matrix:
matrix(unlist(list(list(1,2,3,4),list(5,6,7,8),list(9,10,11,12))), nrow=3, byrow = T)
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 5 6 7 8
[3,] 9 10 11 12
I am not sure if you are looking for something like this. It will give you a list of 3 matrices corresponding to vector from list_of_list's child lists.
mapply(list_of_list[[1]],list_of_list[[2]],
FUN = function(x,y){t(mapply(x$the_vector,y$the_vector,
FUN = function(u,v){matrix(c(u,v),ncol=2,byrow = F,dimnames = NULL)},
SIMPLIFY = T))},SIMPLIFY = F)
#[[1]]
# [,1] [,2]
#[1,] 1 0
#[2,] 2 0
#[3,] 3 0
#[4,] 4 0
#[5,] 5 0
#[[2]]
# [,1] [,2]
#[1,] 2 1
#[2,] 3 2
#[3,] 4 3
#[4,] 5 4
#[5,] 6 5
#[[3]]
# [,1] [,2]
#[1,] 3 22
#[2,] 4 44
#[3,] 5 66
#[4,] 6 88
#[5,] 7 110
Here is another solution really similar to #TUSHAr but that might maybe more modular:
## Lapply wrapping function that outputs a matrix
lapply.wrapper <- function(i, list_of_list) {
matrix(unlist(lapply(list_of_list, function(X, i) X[[i]]$the_vector, i = i)), ncol = length(list_of_list))
}
## Using the wrapper on the first subset:
lapply.wrapper(1, list_of_list)
# [,1] [,2]
#[1,] 1 0
#[2,] 2 0
#[3,] 3 0
#[4,] 4 0
#[5,] 5 0
## Applying the function to all subsets
sapply(1:length(list_of_list[[1]]), lapply.wrapper, list_of_list, simplify = FALSE)
#[[1]]
# [,1] [,2]
#[1,] 1 0
#[2,] 2 0
#[3,] 3 0
#[4,] 4 0
#[5,] 5 0
#
#[[2]]
# [,1] [,2]
#[1,] 2 1
#[2,] 3 2
#[3,] 4 3
#[4,] 5 4
#[5,] 6 5
#
#[[3]]
# [,1] [,2]
#[1,] 3 22
#[2,] 4 44
#[3,] 5 66
#[4,] 6 88
#[5,] 7 110

Minimum of cells in two matrices within a moving kernel

I have two matrices m1 and m2.
m1 <- matrix(1:16, ncol = 4)
m2 <- matrix(16:1, ncol = 4)
# > m1
# [,1] [,2] [,3] [,4]
# [1,] 1 5 9 13
# [2,] 2 6 10 14
# [3,] 3 7 11 15
# [4,] 4 8 12 16
# > m2
# [,1] [,2] [,3] [,4]
# [1,] 16 12 8 4
# [2,] 15 11 7 3
# [3,] 14 10 6 2
# [4,] 13 9 5 1
I want to find the minimum between the two matrices for each cell within a moving kernel of 3x3. The outer margines should be ignored, i.e. they can be filled with NAs and the min function should then have na.rm = TRUE. The result should look like this:
# > m3
# [,1] [,2] [,3] [,4]
# [1,] 1 1 3 3
# [2,] 1 1 2 2
# [3,] 2 2 1 1
# [4,] 3 3 1 1
I have already tried a combination of pmin{base} and runmin{caTools} like this:
pmin(runmin(m1, 3, endrule = "keep"),
runmin(m2, 3, endrule = "keep"))
However, this did not work. Probably due to the fact that
"If x is a matrix than each column will be processed separately."
(from ?runmin)
Is there any package, that performs such operations, or is it possible to apply?
Here is a base R approach:
m = pmin(m1, m2)
grid = expand.grid(seq(nrow(m)), seq(ncol(m)))
x = apply(grid, 1, function(u) {
min(m[max(1,u[1]-1):min(nrow(m), u[1]+1), max(1,u[2]-1):min(ncol(m), u[2]+1)])
})
dim(x) = dim(m)
#> x
# [,1] [,2] [,3] [,4]
#[1,] 1 1 3 3
#[2,] 1 1 2 2
#[3,] 2 2 1 1
#[4,] 3 3 1 1

R Create Matrix From an Operation on a "Row" Vector and a "Column" Vector

First create a "row" vector and a "column" vector in R:
> row.vector <- seq(from = 1, length = 4, by = 1)
> col.vector <- {t(seq(from = 1, length = 3, by = 2))}
From that I'd like to create a matrix by, e.g., multiplying each value in the row vector with each value in the column vector, thus creating from just those two vectors:
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 6 10
[3,] 3 9 15
[4,] 4 12 20
Can this be done with somehow using apply()? sweep()? ...a for loop?
Thank you for any help!
Simple matrix multiplication will work just fine
row.vector %*% col.vector
# [,1] [,2] [,3]
# [1,] 1 3 5
# [2,] 2 6 10
# [3,] 3 9 15
# [4,] 4 12 20
You'd be better off working with two actual vectors, instead of a vector and a matrix:
outer(row.vector,as.vector(col.vector))
# [,1] [,2] [,3]
#[1,] 1 3 5
#[2,] 2 6 10
#[3,] 3 9 15
#[4,] 4 12 20
Here's a way to get there with apply. Is there a reason why you're not using matrix?
> apply(col.vector, 2, function(x) row.vector * x)
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 6 10
## [3,] 3 9 15
## [4,] 4 12 20

Resources