R: Find all combinations without replacement of a sparse matrix - r

I want to find all possible combinations (without replacement) of a big sparse matrix. Every combination can choose at most one time from each row and column. My goal is to find the combination that maximizes the sum of the chosen entries.
Say I have the following matrix:
6 8 . .
. 5 7 .
. 6 . 9
There are 4 possible combinations (in terms of i and j): [(1,1),(2,2),(3,4)],[(1,1),(2,3),(3,2)],[(1,2),(2,3),(3,2)],[(1,2),(2,3),(3,4)]
My outcome should be the sum of entries for each possible combination, where my final goal is to find the combination that maximizes this outcome ([(1,2),(2,3),(3,4)] = 8 + 7 + 9 = 24 in this example).
Edit: here is the full code that generates the sparse matrix of which I want to find the optimal combination:
library(data.table)
library(ggplot2)
library(haven)
library(Matrix)
library(evd)
set.seed(12345)
N1 <- 100
M <- 100
I1 <- 10
I2 <- 2
I <- I1 * I2
N <- N1 * I2
J <- 5
p_c_A = 0.02
p_c_B = 0.01
p_0 = 0.05
p_1 = 0.2
dt_workers<- data.table(worker_id = 1:N,
firm_id = sample.int(M, N, replace = TRUE),
worker_type = sample.int(I1, N, replace = TRUE))
dt_workers[, worker_ethnicity := 1 * (worker_id > N1)]
dt_firms <- data.table(firm_id = 1:M,
firm_type = sample(J) )
sys_util <- matrix(NA, nrow=I1, ncol=J)
for(i in 1:dim(sys_util)[1]){
for(j in 1:dim(sys_util)[2]){
sys_util[i,j] <- i * j}
}
joint_surplus
con_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
con_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
con_A <- 1 * (con_A < p_c_A)
con_B <- 1 * (con_B < p_c_B)
p_meet_A <- con_A * p_1 + (1 - con_A) * p_0
p_meet_B <- con_B * p_1 + (1 - con_B) * p_0
meet_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
meet_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
meet_A <- 1* ( meet_A < p_meet_A )
meet_B <- 1* ( meet_B < p_meet_B )
meet <- rbind(meet_A,meet_B)
meet_sparse <- Matrix(meet, sparse = TRUE)
util <- which (meet_sparse>0, arr.ind=T)
n_draws <- dim(util)[1]
mu = 0
sigma = 10
idio = rgumbel(n=n_draws, loc=mu, scale=sigma)
util <- cbind(util,idio)
sys <- vector()
for(k in 1:dim(util)[1]){
g <- util[k,1]
f <- util[k,2]
i <- dt_workers[g, worker_type]
j <- dt_firms[f, firm_type]
sys[k] = sys_util[i,j]
}
util <- cbind(util,sys)
total_util = util[,3] + util[,4]
M <- sparseMatrix(
i = util[,1],
j = util[,2],
x = total_util
)
dat <- as.data.frame(summary(M))
dat <-dat[order(dat$i, dat$j),]
rownames(dat) <- NULL

library(Matrix)
M <- sparseMatrix(
i = c(1, 1, 2, 2, 3, 3),
j = c(1, 2, 2, 3, 2, 4),
x = c(6, 8, 5, 7, 6, 9)
)
#> 3 x 4 sparse Matrix of class "dgCMatrix"
#>
#> [1,] 6 8 . .
#> [2,] . 5 7 .
#> [3,] . 6 . 9
dat <- as.data.frame(summary(M))
#> i j x
#> 1 1 1 6
#> 2 1 2 8
#> 3 2 2 5
#> 4 3 2 6
#> 5 2 3 7
#> 6 3 4 9
row_indices <- unique(dat$i)
col_indices <- split(dat$j, dat$i)
#> $`1`
#> [1] 1 2
#>
#> $`2`
#> [1] 2 3
#>
#> $`3`
#> [1] 2 4
all_combinations_with_atmost_one_per_row <- do.call(expand.grid, col_indices)
#> 1 2 3
#> 1 1 2 2
#> 2 2 2 2
#> 3 1 3 2
#> 4 2 3 2
#> 5 1 2 4
#> 6 2 2 4
#> 7 1 3 4
#> 8 2 3 4
more_than_one_per_col <- apply(all_combinations_with_atmost_one_per_row, MARGIN = 1, anyDuplicated)
#> [1] 3 2 0 3 0 2 0 0
combinations <- all_combinations_with_atmost_one_per_row[!more_than_one_per_col, , drop = FALSE]
#> 1 2 3
#> 3 1 3 2
#> 5 1 2 4
#> 7 1 3 4
#> 8 2 3 4
lapply(
split(combinations, 1:nrow(combinations)),
function(cols) {
elements <- data.frame(i = row_indices, j = unlist(cols))
elements$value <- M[as.matrix(elements)]
list(elements = elements, sum = sum(elements$value))
}
)
#> $`1`
#> $`1`$elements
#> i j value
#> 1 1 1 6
#> 2 2 3 7
#> 3 3 2 6
#>
#> $`1`$sum
#> [1] 19
#>
#>
#> $`2`
#> $`2`$elements
#> i j value
#> 1 1 1 6
#> 2 2 2 5
#> 3 3 4 9
#>
#> $`2`$sum
#> [1] 20
#>
#>
#> $`3`
#> $`3`$elements
#> i j value
#> 1 1 1 6
#> 2 2 3 7
#> 3 3 4 9
#>
#> $`3`$sum
#> [1] 22
#>
#>
#> $`4`
#> $`4`$elements
#> i j value
#> 1 1 2 8
#> 2 2 3 7
#> 3 3 4 9
#>
#> $`4`$sum
#> [1] 24
Created on 2019-04-10 by the reprex package (v0.2.1)
And the best combination is found with res[[which.max(sapply(res, `[[`, "sum"))]]
$elements
i j value
1 1 2 8
2 2 3 7
3 3 4 9
$sum
[1] 24

I found a solution using linear programming with the help of Aurèle:
f.con <- matrix(,nrow = dim(dat)[1],ncol=0)
for(k in 1: N){
vec <- 1 * (dat[,1] == k)
f.con <- cbind(f.con , vec )
}
for(k in 1: M){
vec <- 1 * (dat[,2] == k)
f.con <- cbind(f.con , vec )
}
f.con <- t(f.con)
f.obj <- dat[,3]
f.dir <- rep ("<=", dim(f.con)[1])
f.rhs <- rep (1, dim(f.con)[1])
res = lp (direction = "max", f.obj, f.con, f.dir, f.rhs , all.int=TRUE)$solution

Related

Repeat sequence every nth element x times

I have a vector e.g:
v <- c(1, 2, 3, 4)
I want to repeat the sequence of every nth element x times e.g:
x=2
n= 2
[1] 1, 2, 1, 2, 3, 4, 3, 4
I know that
rep(v, times=n)
[1] 1, 2, 3, 4, 1, 2, 3, 4
rep(v, each=n)
[1] 1, 1, 2, 2, 3, 3, 4, 4
Thanks!
You could split the vector and then repeat:
fun <- function(v, m, n) {
unlist(by(v, ceiling(seq_along(v) / m), rep, n), use.names = FALSE)
}
v <- c(1, 2, 3, 4)
fun(v, 2, 2)
# [1] 1 2 1 2 3 4 3 4
fun(v, 3, 3)
# [1] 1 2 3 1 2 3 1 2 3 4 4 4
Another split option:
unlist(rep(split(v,(seq_along(v)-1) %/% n), each = x), use.names = FALSE)
#[1] 1 2 1 2 3 4 3 4
We may create a grouping index with gl and use tapply
f_rep <- function(v, x, n)
{
unname(unlist(tapply(v, as.integer(gl(length(v), x,
length(v))), rep, times = n)))
}
-testing
> x <- 2
> n <- 2
> f_rep(v, x, n)
[1] 1 2 1 2 3 4 3 4
> f_rep(v, 3, 3)
[1] 1 2 3 1 2 3 1 2 3 4 4 4
With sequence:
v <- 9:1
x <- 2L
n <- 3L
v[sequence(rep(n, x*(length(v) %/% n)), rep(seq(1, length(v), n), each = x))]
#> [1] 9 8 7 9 8 7 6 5 4 6 5 4 3 2 1 3 2 1
If it needs to handle vectors whose lengths are not multiples of n:
v <- 1:5
x <- 3L
n <- 3L
v[
sequence(
c(
rep(n, x*(length(v) %/% n)),
rep(length(v) %% n, x)
),
c(
rep(seq(1, length(v), n), each = x),
length(v) - length(v) %% n + 1L
)
)
]
#> [1] 1 2 3 1 2 3 1 2 3 4 5 4 5 4 5

R function for returning multiplication result of a function

appreciate your guidance as im new to R programme. basically i've created a function to check whether the value is even or odd.
i wish to create a new result column whereby 'even' results in the original value * 2, and 'odd' results in the original value - 5.
not sure where i've gone wrong with the second part of the code but i am trying to figure out where can i include my 'check' column in the second function to specify it should be checked for even or odd.
i only learnt about ifelse(check(df$check) but it doesnt seem to work in my instance.
much appreciated!
## print 'odd' or 'even' results in df
check = function(df,col){
df['check'] =
ifelse(df[,col] %% 2 ==0, 'even', 'odd')
return(df)
}
# multiplication and subtraction for odd_even results
checkresult = function(df,col){
df['res'] =
ifelse(check(df) == 'even', df[,col] * 2, df[,col]-5)
return(df)
}
checkresult(df)
The simplest way to do it is by not implementing a new function, just use ifelse() as it was intended:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
df$res <- ifelse(df$x %% 2 == 0, df$x * 2, df$x - 5)
df |> head()
#> x res
#> 1 10 20
#> 2 7 2
#> 3 6 12
#> 4 3 -2
#> 5 9 4
#> 6 10 20
If you need to implement a function that returns a dataframe:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
my_func <- function(dframe, column){
vec <- dframe[,column]
dframe$res <- ifelse(vec %% 2 == 0, vec * 2, vec - 5)
return(dframe)
}
my_func(df, "x") |> head()
#> x res
#> 1 10 20
#> 2 7 2
#> 3 6 12
#> 4 3 -2
#> 5 9 4
#> 6 10 20
Edit 1
If you want the parity of the number, one extra line is required:
set.seed(100)
df <- data.frame(x = sample(1:15, size = 100, replace = T))
my_func <- function(dframe, column){
vec <- dframe[,column]
dframe$parity <- ifelse(vec %% 2 == 0, "EVEN", "ODD")
dframe$res <- ifelse(vec %% 2 == 0, vec * 2, vec - 5)
return(dframe)
}
my_func(df, "x") |> head()
#> x parity res
#> 1 10 EVEN 20
#> 2 7 ODD 2
#> 3 6 EVEN 12
#> 4 3 ODD -2
#> 5 9 ODD 4
#> 6 10 EVEN 20

how to name the result of the list of a list in R

Suppose I have the following list of a list.
s <- c(1,2,3)
ss <- c(4,5,6)
S <- list(s,ss)
h <- c(4,8,7)
hh <- c(0,3,4)
H <- list(h,hh)
HH <- list(S,H)
I would like to name the result of the list as follows:
$First_model
$Res_1[[1]]
[1] 1 2 3
$Res_2[[2]]
[1] 4 5 6
$Second_model
$Res_1[[1]]
[1] 4 8 7
$Res_2[[2]]
[1] 0 3 4
I tried the following, but it is not what I expected.
names1 <- c("First","Second")
# And second you paste them to your list
names(HH) <- paste0(names1,"_result", sep = "")
You can try the following -
names1 <- c("First","Second")
lapply(setNames(HH, paste0(names1, '_Model')), function(x)
setNames(x, paste0('Res_', seq_along(x))))
#$First_Model
#$First_Model$Res_1
#[1] 1 2 3
#$First_Model$Res_2
#[1] 4 5 6
#$Second_Model
#$Second_Model$Res_1
#[1] 4 8 7
#$Second_Model$Res_2
#[1] 0 3 4
library(purrr)
s <- c(1,2,3)
ss <- c(4,5,6)
S <- list(s,ss)
h <- c(4,8,7)
hh <- c(0,3,4)
H <- list(h,hh)
HH <- list(S,H)
names1 <- c("First","Second")
set_names(HH, names1) %>% map(~set_names(..1, c('Res_1', 'Res_2')))
#> $First
#> $First$Res_1
#> [1] 1 2 3
#>
#> $First$Res_2
#> [1] 4 5 6
#>
#>
#> $Second
#> $Second$Res_1
#> [1] 4 8 7
#>
#> $Second$Res_2
#> [1] 0 3 4
Created on 2021-06-03 by the reprex package (v2.0.0)

R - Sum list of matrix with different columns

I have a large list of matrix with different columns and I would like to sum these matrix counting 0 if column X does not exist in one matrix.
If you have used the function rbind.fill from plyr I would like something similar but with sum function. Of course I could build a function to do that, but I'm thinking about a native function efficiently programmed in Frotrain or C due to my large data.
Here an example:
This is the easy example where I have the same columns:
aa <- list(
m1 = matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, dimnames = list(c(1,2,3),c('a','b','c'))),
m2 = matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, dimnames = list(c(1,2,3),c('a','b','c')))
)
aa
Reduce('+',aa)
Giving the results:
> aa
$m1
a b c
1 1 4 7
2 2 5 8
3 3 6 9
$m2
a b c
1 1 4 7
2 2 5 8
3 3 6 9
> Reduce('+',aa)
a b c
1 2 8 14
2 4 10 16
3 6 12 18
And with my data:
bb <- list(
m1 = matrix(c(1,2,3,7,8,9), nrow = 3, dimnames = list(c(1,2,3),c('a','c'))),
m2 = matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, dimnames = list(c(1,2,3),c('a','b','c')))
)
bb
Reduce('+',bb)
Here I would like to have b = c(0,0,0) in the first matrix to sum them.
> bb
$m1
a c
1 1 7
2 2 8
3 3 9
$m2
a b c
1 1 4 7
2 2 5 8
3 3 6 9
Many thanks!
Xevi
One option would be
un1 <- sort(unique(unlist(lapply(bb, colnames))))
bb1 <- lapply(bb, function(x) {
nm1 <- setdiff(un1, colnames(x))
m1 <- matrix(0, nrow = nrow(x), ncol = length(nm1), dimnames = list(NULL, nm1))
cbind(x, m1)[, un1]})
and use the Reduce
Reduce(`+`, bb1)
# a b c
# 1 2 4 14
# 2 4 5 16
# 3 6 6 18

How to use R to produce sequence (1,2,3...n,2,3...n,3,4..n...n-1,n)

I don't know what it is called, nested arithmetic progression maybe?
If n is a integer, say n=50, What I would like is
(1,2,3...n,2,3...n,3,4..n...n-1,n)
it is like concatenation of
1:n, 2:n, ...,n-1:n
Is there an easy way of doing this?
Thanks!
The subject says the last subsequence is n but the body of the question says it is (n-1):n. I have assumed (n-1):n but to get the other just change each n-1 in the code to n and each 2 in the code to 1.
1) lapply Assuming we want 1:n, 2:n, ..., (n-1):n iterate over the starting value of each subsequence like this:
n <- 4
unlist(lapply(seq_len(n-1), seq, n))
## [1] 1 2 3 4 2 3 4 3 4
2) sequence Another approach is to transform sequence(seq(n, 2)) like this:
s <- sequence(seq(n, 2))
s + cumsum(s == 1) - 1
## [1] 1 2 3 4 2 3 4 3 4
3) outer
m <- outer(seq_len(n), seq_len(n-1), ">=") * seq(n)
m[m > 0]
## [1] 1 2 3 4 2 3 4 3 4
3a) This variation of (3) also works:
m <- outer(seq_len(n), seq_len(n-1), "+") - 1
m[m <= n]
## [1] 1 2 3 4 2 3 4 3 4
4) Reduce
f <- function(x, y) c(x, seq(y, n))
Reduce(f, 1:(n-1), c())
## [1] 1 2 3 4 2 3 4 3 4
5) Recursion
Recurse <- function(v) {
if (length(v) > 2) c(v, Recall(tail(v, -1))) else v
}
Recurse(1:n)
## [1] 1 2 3 4 2 3 4 3 4
Using Rcpp
library(Rcpp)
cppFunction('Rcpp::NumericVector mySeq( int n ) {
Rcpp::IntegerVector vec = seq(0, n);
int total_n = sum( vec );
Rcpp::NumericVector out(total_n);
size_t i, j;
int idx = 0;
int x = 1;
for( i = 0; i < n; i++ ) {
x = i + 1;
for( j = i; j < n; j++) {
out[idx] = x;
x++;
idx++;
}
}
return out;
}')
mySeq(5)
# [1] 1 2 3 4 5 2 3 4 5 3 4 5 4 5 5
mySeq(10)
# [1] 1 2 3 4 5 6 7 8 9 10 2 3 4 5 6 7 8 9 10 3 4 5 6 7 8 9 10 4 5 6 7 8 9 10 5 6 7 8
# [39] 9 10 6 7 8 9 10 7 8 9 10 8 9 10 9 10 10
And as ever with these multi-option answers, here's a benchmark
library(microbenchmark)
n <- 10000
microbenchmark(
rcpp = { mySeq(n) },
lapply = { lapn(n) },
sequence = { seqn(n) },
outer = { outn(n) },
outer2 = { outn2(n) },
# reduce = { reducen(n) }, ## takes too long
# recurse = { recursen(n) }, ## takes too long
times = 10
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# rcpp 213.9762 220.3786 245.6753 230.6847 262.8544 326.5764 10
# lapply 250.5695 260.5681 288.2523 278.9582 302.9768 367.5507 10
# sequence 1356.2691 1430.5877 1472.6946 1455.7467 1485.3578 1753.4076 10
# outer 2381.8864 2459.8159 2497.1630 2478.9865 2526.9577 2662.0489 10
# outer2 2509.8079 2531.1497 2651.6906 2636.3873 2785.3693 2820.2356 10
Functions
lapn <- function(n) { unlist(lapply(seq_len(n-1), seq, n)) }
seqn <- function(n) {
s <- sequence(seq(n, 2))
s + cumsum(s == 1) - 1
return(s)
}
outn <- function(n) {
m <- outer(seq_len(n), seq_len(n-1), ">=") * seq(n)
m[m > 0]
}
outn2 <- function(n) {
m <- outer(seq_len(n), seq_len(n-1), "+") - 1
m[m <= n]
}
reducen <- function(n) {
f <- function(x, y) c(x, seq(y, n))
Reduce(f, 1:(n-1), c())
}
recursen <- function(n) {
Recurse <- function(v) {
if (length(v) > 2) c(v, Recall(tail(v, -1))) else v
}
Recurse(1:n)
}
For example,
> n <- 4
> X <- matrix(1:n, nrow = n, ncol = n)
> X
[,1] [,2] [,3] [,4]
[1,] 1 1 1 1
[2,] 2 2 2 2
[3,] 3 3 3 3
[4,] 4 4 4 4
> lower.tri(X, diag = TRUE)
[,1] [,2] [,3] [,4]
[1,] TRUE FALSE FALSE FALSE
[2,] TRUE TRUE FALSE FALSE
[3,] TRUE TRUE TRUE FALSE
[4,] TRUE TRUE TRUE TRUE
> x <- X[lower.tri(X, diag = TRUE)]
> x
[1] 1 2 3 4 2 3 4 3 4 4
> x[-length(x)]
[1] 1 2 3 4 2 3 4 3 4

Resources