Create matrix from smaller matrices in R - r

Is there a general function to build a matrix from smaller blocks, i.e. build matrix
A B
C D
from matrices A, B, C, D?
Of course there is this obvious way to create an empty big matrix and use sub-indexing, but isn't there anything simpler, easier and possibly faster?

Here are some base R solutions. Maybe you can use
M <- rbind(cbind(A,B),cbind(C,D))
or
u <- list(list(A,B),list(C,D))
M <- do.call(rbind,Map(function(x) do.call(cbind,x),u))
Example
A <- matrix(1:4,nrow = 2)
B <- matrix(1:6,nrow = 2)
C <- matrix(1:6,ncol = 2)
D <- matrix(1:9,nrow = 3)
such that
> M
[,1] [,2] [,3] [,4] [,5]
[1,] 1 3 1 3 5
[2,] 2 4 2 4 6
[3,] 1 4 1 4 7
[4,] 2 5 2 5 8
[5,] 3 6 3 6 9

Related

Generating Possible Sequences in R

I am trying to solve the following problem in R. Generically, given a sequence [a,b], I am to generate lists from this sequence that have a length n, whose elements pairwise at least have a difference of d.
I was thinking of using seq() but you can only create evenly-spaced sequences using this function.
This may be what you are after, generate all permutations of the possible different values that could exist in the sequence for size n and then check which satisfy your requirements of having their terminal value be b.
This is quite intensive and slow for larger vectors, but should return all possible valid sequences (unless I've made a mistake).
# sequence length of n which includes a, b
# therefore need to find n - 1 values (then check that last val of cumsum == b)
# vals must be greater than or equal to d
# vals have upper bound is if all but one value was d, b - ((n - 1) * d)
library(gtools)
library(matrixStats)
# parameters
a = 1
b = 20
n = 5
d = 2
# possible values that differences can be
poss_diffs <- d:(b - ((n - 1) * d))
# generate all possible permutations of differences
diff_perms_n <- permutations(n = length(poss_diffs), r = n - 1, v = poss_diffs)
# turn differences into sequences, add column for the a value
seqs_n <- matrixStats::rowCumsums(cbind(a, diff_perms_n))
# filter to only valid sequences, last column == b
valid_seqs <- seqs_n[seqs_n[, ncol(seqs_n)] == b, ]
# check that diffs are all greater than d
valid_seqs_diffs <- matrixStats::rowDiffs(valid_seqs)
print(head(valid_seqs))
print(head(valid_seqs_diffs))
# > print(head(valid_seqs))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 3 6 10 20
# [2,] 1 3 6 11 20
# [3,] 1 3 6 12 20
# [4,] 1 3 6 14 20
# [5,] 1 3 6 15 20
# [6,] 1 3 6 16 20
# > print(head(valid_seqs_diffs))
# [,1] [,2] [,3] [,4]
# [1,] 2 3 4 10
# [2,] 2 3 5 9
# [3,] 2 3 6 8
# [4,] 2 3 8 6
# [5,] 2 3 9 5
# [6,] 2 3 10 4

R - How to rbind two lists while alternating their list elements

I'd like to know how to rbind two lists containing vectors into a data frame. e.g.
a<-list(c(1,2,3,4,5), c(2,3,4,5,6))
b<-list(c(3,4,5,6,7), c(4,5,6,7,8))
How to make a data frame from the two lists as the following:
1 2 3 4 5
3 4 5 6 7
2 3 4 5 6
4 5 6 7 8
So I need to take the first element of each list and then rbind them. Then take the second element of each list and then rbind to the previous data frame. I know I could use a for loop but is there a better and faster way to do this?
A variation on #DiscoSuperfly's answer that will work with objects of uneven length, like:
a <- list(c(1,2,3,4,5), c(2,3,4,5,6), c(1,1,1,1,1))
b <- list(c(3,4,5,6,7), c(4,5,6,7,8))
An answer:
L <- list(a,b)
L <- lapply(L, `length<-`, max(lengths(L)))
do.call(rbind, do.call(Map, c(rbind, L)))
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 2 3 4 5
#[2,] 3 4 5 6 7
#[3,] 2 3 4 5 6
#[4,] 4 5 6 7 8
#[5,] 1 1 1 1 1
A solution using the purrr package.
library(purrr)
map2_dfr(a, b, ~data.frame(rbind(.x, .y)))
X1 X2 X3 X4 X5
1 1 2 3 4 5
2 3 4 5 6 7
3 2 3 4 5 6
4 4 5 6 7 8
Reduce(rbind,Map(rbind,a,b))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 3 4 5 6 7
[3,] 2 3 4 5 6
[4,] 4 5 6 7 8
Of the answers given, this seems the fastest when using two lists, thanks in large part to #thelatemail's suggested edit (thanks!).
Try this:
rbab<-do.call(rbind,c(a,b)); rbind(rbab[c(TRUE,FALSE),],rbab[c(FALSE,TRUE),])
Output:
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 3 4 5 6 7
[3,] 2 3 4 5 6
[4,] 4 5 6 7 8
Using c(TRUE,FALSE) above rbinds every other line a and b; then we flip that to c(FALSE,TRUE) to get the rest. Finally, we rbind it all together.
EDIT: Speed Test
Here's a larger scale speed test, for an objective comparison, which used two lists of 6000 elements each instead of the original a and b provided. A total of 100 iterations were used to estimate these statistics.
#Sample used:
a<-list(c(1,2,3,4,5),c(2,3,4,5,6))
b<-list(c(3,4,5,6,7),c(4,5,6,7,8))
a<-a[rep(1:2,3e3)]
b<-a[rep(1:2,3e3)]
#Here is the collaboration version (with #thelatemail):
func1 <- function(){
rbab<-do.call(rbind,c(a,b)); rbind(rbab[c(TRUE,FALSE),],rbab[c(FALSE,TRUE),])
}
#Here is my original version:
func2 <- function(){
rbind(do.call(rbind,c(a,b))[c(TRUE,FALSE),],do.call(rbind,c(a,b))[c(FALSE,TRUE),])
}
#Here's a base-R translation of #ycw's answer (*translated by thelatemail)
func3 <- function(){
do.call(rbind, Map(rbind, a, b))
}
#Here is #Onyambu's answer (also a great answer for its brevity!):
func4 <- function(){
Reduce(rbind,Map(rbind,a,b))
}
microbenchmark::microbenchmark(
func1(),func2(),func3(),func4()
)
Unit: microseconds
expr min lq mean median uq max neval
func1() 4.39 6.46 14.74 15.85 20.24 31.94 100
func2() 5789.26 6578.83 7114.21 7027.57 7531.52 9411.05 100
func3() 10279.50 10970.70 11611.90 11245.47 11866.70 16315.00 100
func4() 251098.18 265936.30 273667.45 275778.04 281740.77 291279.20 100
I created a new list with both a and b, and then make it a matrix. I am sure there is a more elegant way to do this.
a <- list(c(1,2,3,4,5), c(2,3,4,5,6), c(1,1,1,1,1))
b <- list(c(3,4,5,6,7), c(4,5,6,7,8))
# empty list
ab <- vector("list", length = length(a) + length(b))
# put a and b in correct locations
ab[seq(1, length(ab), 2)] <- a
ab[seq(2, length(ab), 2)] <- b
# make the matrix
res <- t(matrix(unlist(ab), nrow=5, ncol=length(a) + length(b)))
> ab <-rbind(unlist(a), unlist(b))
> ab <- rbind(ab[,1:5], ab[,6:10])
> ab
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 3 4 5 6 7
[3,] 2 3 4 5 6
[4,] 4 5 6 7 8
I would do:
d <- t(as.data.frame(c(a,b)))
rbind( d[ seq(1,nrow(d),by=2) ,] , d[ seq(2,nrow(d),by=2) ,])

How to delete an element at a time from a vector while retaining the others?

I have a vector x containing 5 elements.
x <- (1,2,3,4,5)
I would want to delete one element at each iteration and retain other elements in the vector.(as shown below)
x <- (2,3,4,5) #vector after first iteration
x <- (1,3,4,5) #vector after second iteration
x <- (1,2,4,5) #vector after third iteration
x <- (1,2,3,5) #vector after fourth iteration
and also, is it possible to store these new vectors in a list?
is there a way to extend this to multiple vectors?
You could use combn:
combn(5,4)
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 1 2
[2,] 2 2 2 3 3
[3,] 3 3 4 4 4
[4,] 4 5 5 5 5
To get the data as a list:
as.list(data.frame(combn(5,4)))
To use this on multiple vectors or a matrix, first transform it into a data.frame, to make it easier for lapply to go over the length (columns) of the data.frame. Then you can use lapply with combn like so:
mat <- data.frame(matrix(1:10,5))
lapply(mat, function(x) combn(x,length(x)-1))
$X1
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 1 2
[2,] 2 2 2 3 3
[3,] 3 3 4 4 4
[4,] 4 5 5 5 5
$X2
[,1] [,2] [,3] [,4] [,5]
[1,] 6 6 6 6 7
[2,] 7 7 7 8 8
[3,] 8 8 9 9 9
[4,] 9 10 10 10 10
We can do
lapply(seq_along(x), function(i) x[-i])
drop_n <- function(n, x) x[-n]
lapply(1:5, drop_n, x)
Here you have a way to get what you want. You only need to change the parameter n to make it more general
# Generate a list
L <- list()
# Define the number of elements
n <- 5
# Define the values
values <- 1:n
# Complete the list
for (i in 1:n){
L[[i]] <- values[-i]
}

Least square optimization in R

I am wondering how one could solve the following problem in R.
We have a v vector (of n elements) and a B matrix (of dimension m x n).
E.g:
> v
[1] 2 4 3 1 5 7
> B
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 2 1 5 5 3 4
[2,] 4 5 6 3 2 5
[3,] 3 7 5 1 7 6
I am looking for the m-long vector u such that
sum( ( v - ( u %*% B) )^2 )
is minimized (i.e. minimizes the sum of squares).
You are describing linear regression, which can be done with the lm function:
coefficients(lm(v~t(B)+0))
# t(B)1 t(B)2 t(B)3
# 0.2280676 -0.1505233 0.7431653

Build a square-ish matrix with a specified number of cells

I would like to write a function that transforms an integer, n, (specifying the number of cells in a matrix) into a square-ish matrix that contain the sequence 1:n. The goal is to make the matrix as "square" as possible.
This involves a couple of considerations:
How to maximize "square"-ness? I was thinking of a penalty equal to the difference in the dimensions of the matrix, e.g. penalty <- abs(dim(mat)[1]-dim(mat)[2]), such that penalty==0 when the matrix is square and is positive otherwise. Ideally this would then, e.g., for n==12 lead to a preference for a 3x4 rather than 2x6 matrix. But I'm not sure the best way to do this.
Account for odd-numbered values of n. Odd-numbered values of n do not necessarily produce an obvious choice of matrix (unless they have an integer square root, like n==9. I thought about simply adding 1 to n, and then handling as an even number and allowing for one blank cell, but I'm not sure if this is the best approach. I imagine it might be possible to obtain a more square matrix (by the definition in 1) by adding more than 1 to n.
Allow the function to trade-off squareness (as described in #1) and the number of blank cells (as described in #2), so the function should have some kind of parameter(s) to address this trade-off. For example, for n==11, a 3x4 matrix is pretty square but not as square as a 4x4, but the 4x4 would have many more blank cells than the 3x4.
The function needs to optionally produce wider or taller matrices, so that n==12 can produce either a 3x4 or a 4x3 matrix. But this would be easy to handle with a t() of the resulting matrix.
Here's some intended output:
> makemat(2)
[,1]
[1,] 1
[2,] 2
> makemat(3)
[,1] [,2]
[1,] 1 3
[2,] 2 4
> makemat(9)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> makemat(11)
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
Here's basically a really terrible start to this problem.
makemat <- function(n) {
n <- abs(as.integer(n))
d <- seq_len(n)
out <- d[n %% d == 0]
if(length(out)<2)
stop('n has fewer than two factors')
dim1a <- out[length(out)-1]
m <- matrix(1:n, ncol=dim1a)
m
}
As you'll see I haven't really been able to account for odd-numbered values of n (look at the output of makemat(7) or makemat(11) as described in #2, or enforce the "squareness" rule described in #1, or the trade-off between them as described in #3.
I think the logic you want is already in the utility function n2mfrow(), which as its name suggests is for creating input to the mfrow graphical parameter and takes an integer input and returns the number of panels in rows and columns to split the display into:
> n2mfrow(11)
[1] 4 3
It favours tall layouts over wide ones, but that is easily fixed via rev() on the output or t() on a matrix produced from the results of n2mfrow().
makemat <- function(n, wide = FALSE) {
if(isTRUE(all.equal(n, 3))) {
dims <- c(2,2)
} else {
dims <- n2mfrow(n)
}
if(wide)
dims <- rev(dims)
m <- matrix(seq_len(prod(dims)), nrow = dims[1], ncol = dims[2])
m
}
Notice I have to special-case n = 3 as we are abusing a function intended for another use and a 3x1 layout on a plot makes more sense than a 2x2 with an empty space.
In use we have:
> makemat(2)
[,1]
[1,] 1
[2,] 2
> makemat(3)
[,1] [,2]
[1,] 1 3
[2,] 2 4
> makemat(9)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> makemat(11)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
> makemat(11, wide = TRUE)
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
Edit:
The original function padded seq_len(n) with NA, but I realised the OP wanted to have a sequence from 1 to prod(nrows, ncols), which is what the version above does. The one below pads with NA.
makemat <- function(n, wide = FALSE) {
if(isTRUE(all.equal(n, 3))) {
dims <- c(2,2)
} else {
dims <- n2mfrow(n)
}
if(wide)
dims <- rev(dims)
s <- rep(NA, prod(dims))
ind <- seq_len(n)
s[ind] <- ind
m <- matrix(s, nrow = dims[1], ncol = dims[2])
m
}
I think this function implicitly satisfies your constraints. The parameter can range from 0 to Inf. The function always returns either a square matrix with sides of ceiling(sqrt(n)), or a (maybe) rectangular matrix with rows floor(sqrt(n)) and just enough columns to "fill it out". The parameter trades off the selection between the two: if it is less than 1, then the second, more rectangular matrices are preferred, and if greater than 1, the first, always square matrices are preferred. A param of 1 weights them equally.
makemat<-function(n,param=1,wide=TRUE){
if (n<1) stop('n must be positive')
s<-sqrt(n)
bottom<-n-(floor(s)^2)
top<-(ceiling(s)^2)-n
if((bottom*param)<top) {
rows<-floor(s)
cols<-rows + ceiling(bottom / rows)
} else {
cols<-rows<-ceiling(s)
}
if(!wide) {
hold<-rows
rows<-cols
cols<-hold
}
m<-seq.int(rows*cols)
dim(m)<-c(rows,cols)
m
}
Here is an example where the parameter is set to default, and equally trades off the distance equally:
lapply(c(2,3,9,11),makemat)
# [[1]]
# [,1] [,2]
# [1,] 1 2
#
# [[2]]
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
#
# [[3]]
# [,1] [,2] [,3]
# [1,] 1 4 7
# [2,] 2 5 8
# [3,] 3 6 9
#
# [[4]]
# [,1] [,2] [,3] [,4]
# [1,] 1 4 7 10
# [2,] 2 5 8 11
# [3,] 3 6 9 12
Here is an example of using the param with 11, to get a 4x4 matrix.
makemat(11,3)
# [,1] [,2] [,3] [,4]
# [1,] 1 5 9 13
# [2,] 2 6 10 14
# [3,] 3 7 11 15
# [4,] 4 8 12 16
What about something fairly simple and you can handle the exceptions and other requests in a wrapper?
library(taRifx)
neven <- 8
nodd <- 11
nsquareodd <- 9
nsquareeven <- 16
makemat <- function(n) {
s <- seq(n)
if( odd(n) ) {
s[ length(s)+1 ] <- NA
n <- n+1
}
sq <- sqrt( n )
dimx <- ceiling( sq )
dimy <- floor( sq )
if( dimx*dimy < length(s) ) dimy <- ceiling( sq )
l <- dimx*dimy
ldiff <- l - length(s)
stopifnot( ldiff >= 0 )
if( ldiff > 0 ) s[ seq( length(s) + 1, length(s) + ldiff ) ] <- NA
matrix( s, nrow = dimx, ncol = dimy )
}
> makemat(neven)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 NA
> makemat(nodd)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 NA
> makemat(nsquareodd)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 NA
[3,] 3 7 NA
[4,] 4 8 NA
> makemat(nsquareeven)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16

Resources