I am working with the R programming language.
I am trying to recreate the travelling salesman problem. The travelling salesman problem is a problem where a salesman has to visit "n" number of cities exactly once, in such a way that his total distance is minimized.
For this problem, I first created a dataset made of n = 6 cities (longitude, latitude):
set.seed(123)
data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))
data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))
final_data = rbind(data_1, data_2)
final_data
id long lat
1 1 -74.56048 40.07051
2 2 -74.23018 40.12929
3 3 -72.44129 41.71506
4 4 -77.53908 41.55434
5 5 -79.26506 43.22408
6 6 -78.68685 42.35981
For a given order of cities (e.g. 1,2,3,4,5,6), I created a function which determines the distance (based on the Euclidean Distance) between each successive pair of cities, and then calculates the total distance travelled:
distance <- function( long1, lat1, long2, lat2, long3, lat3, long4, lat4, long5, lat5, long6, lat6) {
d1_2 = sqrt( (long1 - lat1)^2 + (long2 - lat2)^2 )
d2_3 = sqrt( (long2 - lat2)^2 + (lat3 - long3)^2 )
d3_4 = sqrt( (long3 - lat3)^2 + (long4 - lat4)^2 )
d4_5 = sqrt( (long4 - lat4)^2 + (long5 - lat5)^2 )
d5_6 = sqrt( (long5 - lat5)^2 + (long6 - lat6)^2 )
return( d1_2 + d2_3 + d3_4 + d4_5 + d5_6 )
}
distance(final_data[1,2], final_data[1,3], final_data[2,2], final_data[2,3], final_data[3,2], final_data[3,3], final_data[4,2], final_data[4,3], final_data[5,2], final_data[5,3], final_data[6,2], final_data[6,3])
Then, I can randomize the order of the rows to obtain different routes and calculate the distance for each route:
#first route
rows <- sample(nrow(final_data))
route_1 <- final_data[rows, ]
> route_1
id long lat
1 1 -74.56048 40.07051
3 3 -72.44129 41.71506
4 4 -77.53908 41.55434
2 2 -74.23018 40.12929
6 6 -78.68685 42.35981
5 5 -79.26506 43.22408
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3])
[1] 830.5902
Next Route:
#second route
rows <- sample(nrow(final_data))
route_2 <- final_data[rows, ]
> route_2
id long lat
5 5 -79.26506 43.22408
4 4 -77.53908 41.55434
3 3 -72.44129 41.71506
2 2 -74.23018 40.12929
1 1 -74.56048 40.07051
6 6 -78.68685 42.35981
distance(route_2[1,2], route_2[1,3], route_2[2,2], route_2[2,3], route_2[3,2], route_2[3,3], route_2[4,2], route_2[4,3], route_2[5,2], route_2[5,3], route_2[6,2], route_2[6,3])
[1] 826.028
#etc
My Question: In the spirit of the Travelling Salesman Problem, I am trying to (ironically) show that what I am doing is extremely inefficient and will not work for more than 10 cities (i.e. take too long to run). In the case of 6 cities, can someone please show me how to calculate the distance for every possible route (6! = 720 routes) and calculate the time required to compute all these distances?
Here is what I know how to do so far:
Part 1: Generate All Possible Routes
library(combinat)
all_routes = permn(c(1,2,3,4,5,6))
> head(all_routes)
[[1]]
[1] 1 2 3 4 5 6
[[2]]
[1] 1 2 3 4 6 5
[[3]]
[1] 1 2 3 6 4 5
[[4]]
[1] 1 2 6 3 4 5
[[5]]
[1] 1 6 2 3 4 5
[[6]]
[1] 6 1 2 3 4 5
Part 2: Record the Time Required to Calculate a Single Route
start.time <- Sys.time()
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3])
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
Time difference of 0.003665924 secs
Can someone please show me how to put this all together?
Thanks!
To calculate the cumulative distance for all 6! routes for the given final_data could be done like this:
set.seed(123)
data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))
data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))
final_data = rbind(data_1, data_2)
N <- nrow(final_data) # just for repeated convenience
final_data
# id long lat
# 1 1 -74.56048 40.07051
# 2 2 -74.23018 40.12929
# 3 3 -72.44129 41.71506
# 4 4 -77.53908 41.55434
# 5 5 -79.26506 43.22408
# 6 6 -78.68685 42.35981
Calculate the distances between each city, pair-wise. I'm using distHaversine because you listed lat/lon, and part of me cringes seeing cartesian distance calcs applied to that :-)
dists <- outer(seq_len(N), seq_len(N), function(a,b) {
geosphere::distHaversine(final_data[a,2:3], final_data[b,2:3]) # Notes 1, 2
})
dists
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0.00 28876.24 255554.4 300408.5 525566.9 429264.3
# [2,] 28876.24 0.00 231942.7 320616.0 541980.9 448013.6
# [3,] 255554.43 231942.67 0.0 424449.9 584761.5 521210.7
# [4,] 300408.47 320616.03 424449.9 0.0 233840.9 130640.9
# [5,] 525566.87 541980.93 584761.5 233840.9 0.0 107178.2
# [6,] 429264.34 448013.57 521210.7 130640.9 107178.2 0.0
(Units are in meters.)
Calculate the cumulative distance along each of the routes:
perms <- gtools::permutations(N, N)
nrow(perms)
# [1] 720
perms[c(1:4, 719:720),]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 2 3 4 5 6
# [2,] 1 2 3 4 6 5
# [3,] 1 2 3 5 4 6
# [4,] 1 2 3 5 6 4
# [5,] 6 5 4 3 1 2
# [6,] 6 5 4 3 2 1
allroutes5 <- t(apply(perms, 1, function(route) {
dists[cbind(route[-N], route[-1])]
}))
head(allroutes5)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 28876.24 231942.7 424449.9 233840.9 107178.2
# [2,] 28876.24 231942.7 424449.9 130640.9 107178.2
# [3,] 28876.24 231942.7 584761.5 233840.9 130640.9
# [4,] 28876.24 231942.7 584761.5 107178.2 130640.9
# [5,] 28876.24 231942.7 521210.7 130640.9 233840.9
# [6,] 28876.24 231942.7 521210.7 107178.2 233840.9
allroutes_total <- rowSums(allroutes5)
head(allroutes_total)
# [1] 1026287.9 923087.9 1210062.2 1083399.4 1146511.4 1123048.7
As confirmation of this, the first row of allroutes5 is the sequence of cities 1, 2, 3, 4, 5, and 6. Recalling dists above, from 1-2 is 28876; 2-3 is 231942; 3-4 is 424449; etc. Sum these up, and we have the total distance traveled over all cities in that route. allroutes_total holds the distances for all 720 possible routings (permutations).
min(allroutes_total)
# [1] 799046.4
which.min(allroutes_total)
# [1] 266
perms[which.min(allroutes_total),]
# [1] 3 2 1 4 6 5
Notes:
Using your formula, I was able to duplicate your distances:
dists <- outer(seq_len(N), seq_len(N), function(a,b) {
sqrt((final_data[a,"long"] - final_data[a,"lat"])^2 + (final_data[b,"long"] - final_data[b,"lat"])^2)
})
dists
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 162.1127 161.9208 161.7774 165.2982 167.7613 166.7110
# [2,] 161.9208 161.7287 161.5852 165.1101 167.5759 166.5244
# [3,] 161.7774 161.5852 161.4415 164.9694 167.4373 166.3850
# [4,] 165.2982 165.1101 164.9694 168.4235 170.8415 169.8103
# [5,] 167.7613 167.5759 167.4373 170.8415 173.2258 172.2088
# [6,] 166.7110 166.5244 166.3850 169.8103 172.2088 171.1858
### first route
which(apply(perms, 1, identical, c(1L, 3L, 4L, 2L, 6L, 5L)))
# [1] 32
allroutes_total[32]
# [1] 830.5902
### second route
which(apply(perms, 1, identical, c(5L, 4L, 3L, 2L, 1L, 6L)))
# [1] 567
allroutes_total[567]
# [1] 826.028
And if you're curious, your second route was tied for fifth-shortest:
min(allroutes_total)
# [1] 826.0252
which.min(allroutes_total)
# [1] 561
perms[which.min(allroutes_total),]
# [1] 5 4 2 3 1 6
rank(allroutes_total)[567]
# [1] 5.5
I'm not sure that's the right distance calculation, though. I'd think the euclidean distance should be:
dists <- outer(seq_len(N), seq_len(N), function(a,b) {
sqrt((final_data[a,"long"] - final_data[b,"long"])^2 + (final_data[a,"lat"] - final_data[b,"lat"])^2)
})
dists
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0.0000000 0.3354875 2.682444 3.327741 5.663758 4.718888
# [2,] 0.3354875 0.0000000 2.390565 3.602725 5.909975 4.983694
# [3,] 2.6824442 2.3905652 0.000000 5.100325 6.988631 6.278753
# [4,] 3.3277405 3.6027253 5.100325 0.000000 2.401467 1.402200
# [5,] 5.6637577 5.9099750 6.988631 2.401467 0.000000 1.039848
# [6,] 4.7188885 4.9836936 6.278753 1.402200 1.039848 0.000000
Related
Hi I have a dataset and I am trying to get a group cluster id based on the 50 mile radius. Here is the structure of the dataset
g_lat<- c(45.52306, 40.26719, 34.05223, 37.38605, 37.77493)
g_long<- c(-122.67648,-86.13490, -118.24368, -122.08385, -122.41942)
df<- data.frame(g_lat, g_long)
I want to create a group cluster id which is basically going to group locations that are within 50 mile radius. Let me know how I can achieve this? Thanks so much. Below is the expected output.
g_lat g_long clusterid
45.52306 -122.67648 1
40.26719 -86.13490 2
34.05223 -118.24368 3
37.38605 -122.08385 4
37.77493 -122.41942 4
g_lat<- c(45.52306, 40.26719, 34.05223, 37.38605, 37.77493)
g_long<- c(-122.67648,-86.13490, -118.24368, -122.08385, -122.41942)
df<- data.frame(point = c(1:5), longitude = g_long, latitude = g_lat)
library(sf)
my.sf.point <- st_as_sf(x = df,
coords = c("longitude", "latitude"),
crs = "+proj=longlat +datum=WGS84")
#distance matrix in feet
st_distance(my.sf.point)
#which poiint are within 50 miles (~80467.2 meters)
l <- st_is_within_distance(my.sf.point, dist = 80467.2 )
l
# Sparse geometry binary predicate list of length 5, where the predicate was `is_within_distance'
# 1: 1
# 2: 2
# 3: 3
# 4: 4, 5
# 5: 4, 5
df$within_50 <- rowSums(as.matrix(l))-1
df
# point longitude latitude within_50
# 1 1 -122.6765 45.52306 0
# 2 2 -86.1349 40.26719 0
# 3 3 -118.2437 34.05223 0
# 4 4 -122.0838 37.38605 1
# 5 5 -122.4194 37.77493 1
m <- as.matrix(l)
colnames(m) <- c(1:nrow(df))
rownames(m) <- c(1:nroe(df))
df$points_within_50 <- apply( m, 1, function(u) paste( names(which(u)), collapse="," ) )
df$clusterid <- dplyr::group_indices(df, df$points_within_50)
# point longitude latitude within_50 points_within_50 clusterid
# 1 1 -122.6765 45.52306 0 1 1
# 2 2 -86.1349 40.26719 0 2 2
# 3 3 -118.2437 34.05223 0 3 3
# 4 4 -122.0838 37.38605 1 4,5 4
# 5 5 -122.4194 37.77493 1 4,5 4
You can create 2d matrix with distances between the locations. The geosphere has a function that does the heavy lifting for you.
library(geosphere)
library(magrittr)
g_lat <- c(45.52306, 40.26719, 34.05223, 37.38605, 37.77493)
g_long <- c(-122.67648,-86.13490, -118.24368, -122.08385, -122.41942)
m <- cbind(g_long, g_lat)
(matrix <- distm(m) / 1609.34)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.0000 1872.882 825.4595 562.3847 534.8927
#> [2,] 1872.8818 0.000 1812.5862 1936.5786 1946.4373
#> [3,] 825.4595 1812.586 0.0000 315.2862 347.3751
#> [4,] 562.3847 1936.579 315.2862 0.0000 32.5345
#> [5,] 534.8927 1946.437 347.3751 32.5345 0.0000
matrix < 50
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] TRUE FALSE FALSE FALSE FALSE
#> [2,] FALSE TRUE FALSE FALSE FALSE
#> [3,] FALSE FALSE TRUE FALSE FALSE
#> [4,] FALSE FALSE FALSE TRUE TRUE
#> [5,] FALSE FALSE FALSE TRUE TRUE
colSums(matrix < 50)
#> [1] 1 1 1 2 2
Created on 2018-09-16 by the [reprex package](http://reprex.tidyverse.org) (v0.2.0).
I would like to create an R dataframe with random integers WITHOUT repetition.
I have come up with this approach which works:
rank_random<-data.frame(matrix(NA, nrow = 13, ncol = 30)
for (colIdx in seq(1:30) {
rank_random[colIdx,] <-sample(1:ncol(subset(exc_ret, select=-c(Date))), 30,
replace=F)
}
I assume that you mean without repetition on each row. If you meant something else, please clarify.
For your example:
N= ncol(subset(exc_ret, select=-c(Date)))
num.rows = 30
t(sapply( seq(num.rows),
FUN=function(x){sample(1:N, num.rows, replace=F)} ))
To test it for a simpler case
N= 5
num.rows = 5
t(sapply( seq(num.rows),
FUN=function(x){sample(1:N, num.rows, replace=F)} ))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 2 4 5 1 3
# [2,] 2 5 1 3 4
# [3,] 5 1 4 3 2
# [4,] 3 4 5 2 1
# [5,] 3 2 5 1 4
How to extract every two elements in sequence in a matrix and return the result as a matrix so that I could feed the answer in a formula for calculation:
For example, I have a one row matrix with 6 columns:
[,1][,2][,3][,4][,5][,6]
[1,] 2 1 5 5 10 1
I want to extract column 1 and two in first iteration, 3 and 4 in second iteration and so on. The result has to be in the form of matrix.
[1,] 2 1
[2,] 5 5
[3,] 10 1
My original codes:
data <- matrix(c(1,1,1,2,2,1,2,2,5,5,5,6,10,1,10,2,11,1,11,2), ncol = 2)
Center Matrix:
[,1][,2][,3][,4][,5][,6]
[1,] 2 1 5 5 10 1
[2,] 1 1 2 1 10 1
[3,] 5 5 5 6 11 2
[4,] 2 2 5 5 10 1
[5,] 2 1 5 6 5 5
[6,] 2 2 5 5 11 1
[7,] 2 1 5 5 10 1
[8,] 1 1 5 6 11 1
[9,] 2 1 5 5 10 1
[10,] 5 6 11 1 10 2
objCentroidDist <- function(data, centers) {
resultMatrix <- matrix(NA, nrow=dim(data)[1], ncol=dim(centers)[1])
for(i in 1:nrow(centers)) {
resultMatrix [,i] <- sqrt(rowSums(t(t(data)-centers[i, ])^2))
}
resultMatrix
}
objCentroidDist(data,centers)
I want the Result matrix to be as per below:
[1,][,2][,3]
[1,]
[2,]
[3,]
[4,]
[5,]
[7,]
[8,]
[9,]
[10]
My concern is, how to calculate the data-centers distance if the dimensions of the data matrix are two, and centers matrix are six. (to calculate the distance from the data matrix and every two columns in centers matrix). Each row of the centers matrix has three centers.
Something like this maybe?
m <- matrix(c(2,1,5,5,10,1), ncol = 6)
list.seq.pairs <- lapply(seq(1, ncol(m), 2), function(x) {
m[,c(x, x+1)]
})
> list.seq.pairs
[[1]]
[1] 2 1
[[2]]
[1] 5 5
[[3]]
[1] 10 1
And, in case you're wanting to iterate over multiple rows in a matrix,
you can expand on the above like this:
mm <- matrix(1:18, ncol = 6, byrow = TRUE)
apply(mm, 1, function(x) {
lapply(seq(1, length(x), 2), function(y) {
x[c(y, y+1)]
})
})
EDIT:
I'm really not sure what you're after exactly. I think, if you want each row transformed into a 2 x 3 matrix:
mm <- matrix(1:18, ncol = 6, byrow = TRUE)
list.mats <- lapply(1:nrow(mm), function(x){
a = matrix(mm[x,], ncol = 2, byrow = TRUE)
})
> list.mats
[[1]]
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
[[2]]
[,1] [,2]
[1,] 7 8
[2,] 9 10
[3,] 11 12
[[3]]
[,1] [,2]
[1,] 13 14
[2,] 15 16
[3,] 17 18
If, however, you want to get to your results matrix- I think it's probably easiest to do whatever calculations you need to do while you're dealing with each row:
results <- t(apply(mm, 1, function(x) {
sapply(seq(1, length(x), 2), function(y) {
val1 = x[y] # Get item one
val2 = x[y+1] # Get item two
val1 / val2 # Do your calculation here
})
}))
> results
[,1] [,2] [,3]
[1,] 0.5000000 0.7500 0.8333333
[2,] 0.8750000 0.9000 0.9166667
[3,] 0.9285714 0.9375 0.9444444
That said, I don't understand what you're trying to do so this may miss the mark. You may have more luck if you ask a new question where you show example input and the actual expected output that you're after, with the actual values you expect.
This probably has an easy solution, but I can still not find one. I have two matrices, one of size M1 = (4, 2000000), and the other, M2=(4,209). I want to find the length of elements intersection between each column of M2 to all columns of M1.
For one column in M2 I do:
res <- apply(M1, 2, function(x) length(intersect(tmp, x)))
where tmp is the first column of M2.
This takes about 30 seconds. To speed up the calculation for all columns of M2, I do foreach:
list <- foreach(k=1:ncol(M2)) %dopar% {
tmp <- M2[,k]
res <- apply(M1, 2, function(x) length(intersect(tmp, x)))
}
This takes about 20 minutes.
Is there a way to avoid this foreach loop with an apply function?
Thank you!
Having data:
set.seed(991)
M1 = matrix(sample(5, 50, TRUE), 5)
M2 = matrix(sample(5, 25, TRUE), 5)
your solution returns:
op = sapply(1:ncol(M2),
function(k) apply(M1, 2, function(x) length(intersect(M2[, k], x))))
op
# [,1] [,2] [,3] [,4] [,5]
# [1,] 3 1 3 2 3
# [2,] 3 2 3 3 4
# [3,] 2 2 2 2 3
# [4,] 2 3 3 2 3
# [5,] 2 2 3 1 2
# [6,] 2 2 2 2 3
# [7,] 2 3 3 2 3
# [8,] 2 2 3 3 3
# [9,] 2 2 3 3 3
#[10,] 1 3 2 1 2
which is what
ans1 = tcrossprod(table(col(M1), M1) > 0L, table(col(M2), M2) > 0L)
returns.
all.equal(op, ans1, check.attributes = FALSE)
#[1] TRUE
Since we don't need the number of occurences, we could replace the expensive calls to table with simple matrix manipulations:
m1 = matrix(0L, ncol(M1), max(M1))
m1[cbind(rep(1:ncol(M1), each = nrow(M1)), c(M1))] = 1L
m2 = matrix(0L, ncol(M2), max(M2))
m2[cbind(rep(1:ncol(M2), each = nrow(M2)), c(M2))] = 1L
ans2 = tcrossprod(m1, m2)
all.equal(op, ans2)
#[1] TRUE
For your case, it seems more suitable to start by making sparse tabulations, if there is a chance to avoid memory contraints:
library(Matrix)
sm1 = sparseMatrix(x = 1L,
i = rep(1:ncol(M1), each = nrow(M1)),
j = M1,
use.last.ij = TRUE)
sm2 = sparseMatrix(x = 1L,
i = rep(1:ncol(M2), each = nrow(M2)),
j = M2,
use.last.ij = TRUE)
ans3 = tcrossprod(sm1, sm2)
all.equal(op, as.matrix(ans3), check.attributes = FALSE)
#[1] TRUE
Given your matrix dimensions, you could do this which should be faster:
apply(m2, 2, function(x) colSums(m1==x[1] | m1==x[2] | m1==x[3] | m1==x[4]))
For example, suppose:
m1
[,1] [,2] [,3]
[1,] 3 6 4
[2,] 9 8 11
[3,] 10 1 12
[4,] 2 5 7
m2
[,1] [,2]
[1,] 3 6
[2,] 2 7
[3,] 1 5
[4,] 8 4
Then, it will give you:
[,1] [,2]
[1,] 2 0
[2,] 2 2
[3,] 0 2
Update about time efficiency
So to summarize, as the OP has mentioned in the comments,
The naive for solution takes about 20 mins
My solution takes about 36 secs
That of #alexis_laz about 12 secs
for doing the same job.
I want to split matrices of size k x l into blocks of size n x n considering an ofset o (Like Mathematica's Partition function does).
For example, given a matrix A like
A <- matrix(seq(1:16), nrow = 4, ncol = 4)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
and block size = 3, offset = 1, I want as output the four submatrices that I'd get from
A[1:3, 1:3]
A[1:3, 2:4]
A[2:4, 1:3]
A[2:4, 2:4]
If offset were equal to 2 or 3, the output for this example should be only the submatrix that I get from
A[1:3, 1:3]
How can I vectorize this?
There might be a more elegant way. Here is how I'd do it by writing a myPartition function which simulates the mathematica Partition function. Firstly use Map to construct possible index along the row and column axis where we use seq to take offset into consideration, and then use cross2 from purrr to construct a list of all possible combinations of the subset index. Finally use lapply to subset the matrix and return a list of subset matrix;
The testing results on offset 1, 2 and 3 are as follows which seems to behave as expected:
library(purrr)
ind <- function(k, n, o) Map(`:`, seq(1, k-n+1, by = o), seq(n, k, by = o))
# this is a little helper function that generates subset index according to dimension of the
# matrix, the first sequence construct the starting point of the subset index with an interval
# of o which is the offset while the second sequence construct the ending point of the subset index
# use Map to construct vector from start to end which in OP's case will be 1:3 and 2:4.
myPartition <- function(mat, n, o) {
lapply(cross2(ind(nrow(mat),n,o), ind(ncol(mat),n,o)), function(i) mat[i[[1]], i[[2]]])
}
# This is basically an lapply. we use cross2 to construct combinations of all subset index
# which will be 1:3 and 1:3, 1:3 and 2:4, 2:4 and 1:3 and 2:4 and 2:4 in OP's case. Use lapply
# to loop through the index and subset.
# Testing case for offset = 1
myPartition(A, 3, 1)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# [[2]]
# [,1] [,2] [,3]
# [1,] 2 6 10
# [2,] 3 7 11
# [3,] 4 8 12
# [[3]]
# [,1] [,2] [,3]
# [1,] 5 9 13
# [2,] 6 10 14
# [3,] 7 11 15
# [[4]]
# [,1] [,2] [,3]
# [1,] 6 10 14
# [2,] 7 11 15
# [3,] 8 12 16
# Testing case for offset = 2
myPartition(A, 3, 2)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# Testing case for offset = 3
myPartition(A, 3, 3)
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
How about this using base R, the idea is to generate all possible windows (i.e. winds) of size n*n while taking into account the offset. Then print all possible permutations of winds's elements in matrix A (i.e. perms). It works for any A of size k*l.
A <- matrix(seq(1:16), nrow = 4, ncol = 4)
c <- ncol(A); r <- nrow(A)
offset <- 1; size <- 3
sq <- seq(1, max(r,c), offset)
winds <- t(sapply(sq, function(x) c(x,(x+size-1))))
winds <- winds[winds[,2]<=max(r, c),] # check the range
if (is.vector(winds)) dim(winds) <- c(1,2) # vector to matrix
perms <- expand.grid(list(1:nrow(winds), 1:nrow(winds)))
out=apply(perms, 1, function(x) {
a11 <- winds[x[1],1];a12 <- winds[x[1],2];a21 <- winds[x[2],1];a22 <- winds[x[2],2]
if (ifelse(r<c, a12<=r, a22<=c)) { # check the range
cat("A[", a11, ":", a12, ", ", a21, ":", a22, "]", sep="", "\n")
print(A[a11:a12, a21:a22])
}
})
# A[1:3, 1:3]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# A[2:4, 1:3]
# [,1] [,2] [,3]
# [1,] 2 6 10
# [2,] 3 7 11
# [3,] 4 8 12
# A[1:3, 2:4]
# [,1] [,2] [,3]
# [1,] 5 9 13
# [2,] 6 10 14
# [3,] 7 11 15
# A[2:4, 2:4]
# [,1] [,2] [,3]
# [1,] 6 10 14
# [2,] 7 11 15
# [3,] 8 12 16
For size=3 and offset=2 or offset=3:
# A[1:3, 1:3]
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
For offset=2 and size=2:
# A[1:2, 1:2]
# [,1] [,2]
# [1,] 1 5
# [2,] 2 6
# A[3:4, 1:2]
# [,1] [,2]
# [1,] 3 7
# [2,] 4 8
# A[1:2, 3:4]
# [,1] [,2]
# [1,] 9 13
# [2,] 10 14
# A[3:4, 3:4]
# [,1] [,2]
# [1,] 11 15
# [2,] 12 16