How to speed up a Rcpp double for loop - r

Since I am not an expert in C++ (or Rcpp) I was wondering if there are other / better ways to write this little function below, which uses 2 for loops to fill up a matrix based on the values of 2 other matrices.
Here is some example data:
library(Rcpp)
sizem <- 50000
n <- 500
k <- 5
d <- matrix(nrow = n, ncol = k, 0)
x <- matrix(nrow = n, ncol = sizem, runif(sizem*n, 0, 500))
rownames(x) <- paste0("p-",1:n)
v <- matrix(nrow = k, ncol = sizem, sample(0:300, size = sizem*k, T))
and the function currently looks like this:
cppFunction("
NumericMatrix compdistc(NumericMatrix d, NumericMatrix x, NumericMatrix v, int k, int n) {
NumericMatrix out = clone(d);
int i,j;
for (j=0; j < k; ++j) {
for (i=0; i < n; ++i) {
out(i,j) = sum(pow((x(i,_) - v(j,_)), 2));
}
}
return(out);
}
")
out <- compdistc(d, x, v, k, n)
and this is my current benchmark:
mc <- microbenchmark(times = 10,
out = compdistc(d, x, v, k, n)
); mc
Unit: seconds
expr min lq mean median uq max neval
out 2.123477 2.135013 2.174556 2.148467 2.204108 2.354089 10

Related

Why Rcpp code is much slowly than raw R code

I want to implement RRHO analysis described in this manuscript https://academic.oup.com/nar/article/38/17/e169/1033168,
Maybe it's more clear and easy to see following R code to implement RRHO analysis. calculate_hyper_overlap function is what I try to do.
## Compute the overlaps between two *character* atomic vector:
hyper_test <- function(sample1, sample2, n) {
count <- length(intersect(sample1, sample2))
m <- length(sample1)
k <- length(sample2)
# under-enrichment
if (count <= m * k / n) {
sign <- -1L
pvalue <- stats::phyper(
q = count, m = m, n = n - m,
k = k, lower.tail = TRUE, log.p = FALSE
)
} else {
# over-enrichment
sign <- 1L
pvalue <- stats::phyper(
q = count, m = m, n = n - m,
k = k, lower.tail = FALSE, log.p = FALSE
)
}
c(count = count, pvalue = pvalue, sign = sign)
}
calculate_hyper_overlap <- function(sample1, sample2, n, stepsize) {
row_ids <- seq.int(stepsize, length(sample1), by = stepsize)
col_ids <- seq.int(stepsize, length(sample2), by = stepsize)
indexes <- expand.grid(
row_ids = row_ids,
col_ids = col_ids
)
overlaps <- apply(as.matrix(indexes), 1L, function(x) {
hyper_test(
sample1[seq_len(x[["row_ids"]])],
sample2[seq_len(x[["col_ids"]])],
n = n
)
}, simplify = FALSE)
overlaps <- data.table::transpose(overlaps)
number_of_obj <- length(row_ids)
matrix_counts <- matrix(
overlaps[[1L]],
nrow = number_of_obj
)
matrix_pvals <- matrix(
overlaps[[2L]],
nrow = number_of_obj
)
matrix_signs <- matrix(
overlaps[[3L]],
nrow = number_of_obj
)
list(
counts = matrix_counts,
pvalue = matrix_pvals,
signs = matrix_signs
)
}
The Rcpp code I use is here:
// [[Rcpp::export]]
List calculate_hyper_overlap_cpp(CharacterVector sample1, CharacterVector sample2, int n, int stepsize)
{
int list1_len = floor((sample1.size() - stepsize) / stepsize) + 1;
int list2_len = floor((sample2.size() - stepsize) / stepsize) + 1;
IntegerMatrix counts(list1_len, list2_len);
NumericMatrix pvalue(list1_len, list2_len);
IntegerMatrix signs(list1_len, list2_len);
for (int i = 0; i < list1_len; i++)
{
for (int j = 0; j < list2_len; j++)
{
CharacterVector list1 = sample1[Range(0, (i + 1) * stepsize - 1)];
CharacterVector list2 = sample2[Range(0, (j + 1) * stepsize - 1)];
int count = intersect(list1, list2).size();
counts(i, j) = count;
int m = list1.size(), k = list2.size();
if (count <= m * k / n)
// under-enrichment
{
pvalue(i, j) = R::phyper(count, m, n - m, k, true, false);
signs(i, j) = -1;
}
else
// over-enrichment
{
pvalue(i, j) = R::phyper(count, m, n - m, k, false, false);
signs(i, j) = 1;
}
}
}
return List::create(Named("counts") = counts,
Named("pvalue") = pvalue,
Named("signs") = signs);
}
here is the test:
n <- 200
sample1 <- rnorm(n)
sample2 <- rnorm(n)
names(sample1) <- names(sample2) <- paste0("gene", seq_len(n))
bench_res <- bench::mark(
res1 <- calculate_hyper_overlap_cpp(
names(sample1), names(sample2),
n = n, stepsize = 3L
),
res2 <- calculate_hyper_overlap(
names(sample1), names(sample2),
n = n, stepsize = 3L
),
check = FALSE
)
dplyr::select(bench_res, where(~ !is.list(.x)))
The test results
The first line is the time by Rcpp code and the second by raw R code

Vectorizing nested ifelse

I'm trying to fasten my function in R. It contains of three ifelse statements where one of it is nested. For the single one I conducted vectorization which reduced my computation time. Unfortunately I don't see how I can vectorize the nested one. Every way I apply it returns an error. Furthemore if there is any another quirk I can use to speed it up?
cont.run <- function(reps=10000, n=10000, d=0.005, l=10 ,s=0.1) {
r <- rep(0, reps)
theta <- rep(0, n)
for (t in 1:reps) {
epsilon <- rnorm(1, 0, d)
Zt = sum(ifelse(epsilon > theta, 1,
ifelse(epsilon < -theta, -1, 0)))
r[t] <- Zt / (l * n)
theta <- ifelse(runif(n) < s, abs(r[t]), theta)
}
return(mean(r))
}
system.time(cont.run())
I got:
cont.run <- function(reps=10000, n=10000, d=0.005, l=10 ,s=0.1) {
r <- rep(0, reps)
theta <- rep(0, n)
for (t in 1:reps) {
epsilon <- rnorm(1, 0, d)
Zt = rep(NA, length(theta))
Zt = sum(Zt[epsilon > theta, 1])
Zt = sum(Zt[epsilon < -theta, -1])
r[t] <- Zt / (l * n)
theta = rep(theta, length(s))
theta[runif(n) < s] = abs(r[t])
}
return(mean(r))
}
system.time(cont.run())
Here's a little bit improved code.
Main change is that we don't use double ifelse, but instead perform two sums on TRUE vectors (sum(epsilon > theta) - sum(epsilon < -theta)) (we don't care about zeroes here). I added a couple of other improvements (eg., replaced rep with numeric, moved some operations outside the for loop).
contRun <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
# Replace rep with numeric
r <- numeric(reps)
theta <- numeric(n)
# Define before loop
ln <- l * n
# Don't use t as it's a function in base R
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
# Sum two TRUE vectors
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
# Define before ifelse
absr <- abs(r[i])
theta <- ifelse(runif(n) < s, absr, theta)
}
return(mean(r))
}
library(microbenchmark)
microbenchmark(cont.run(), contRun())
Unit: seconds
expr min lq mean median uq max neval
cont.run() 13.652324 13.749841 13.769848 13.766342 13.791573 13.853786 100
contRun() 6.533654 6.559969 6.581068 6.577265 6.596459 6.770318 100
PS. For this kind of computing you might one to set seed (set.seed() before the for loop) to make sure that you can reproduce your results.
Furthemore if there is any another quirk I can use to speed it up?
In addition to PoGibas answer, you can avoid calling ifelse and get a faster function as follows
contRun <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
# Replace rep with numeric
r <- numeric(reps)
theta <- numeric(n)
# Define before loop
ln <- l * n
# Don't use t as it's a function in base R
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
# Sum two TRUE vectors
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
# Define before ifelse
absr <- abs(r[i])
theta <- ifelse(runif(n) < s, absr, theta)
}
mean(r)
}
contRun2 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# avoid ifelse
theta[runif(n) < s] <- absr
}
mean(r)
}
contRun3 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# replace runif
theta[sample(c(T, F), prob = c(s, 1 - s), size = n, replace = TRUE)] <- absr
}
mean(r)
}
# gives the same
set.seed(1)
o1 <- contRun()
set.seed(1)
o2 <- contRun2()
set.seed(1)
o3 <- contRun3()
all.equal(o1, o2)
#R [1] TRUE
all.equal(o1, o3) # likely will not match
#R [1] [1] "Mean relative difference: 0.1508537"
# but distribution is the same
set.seed(1)
c1 <- replicate(10000, contRun2(reps = 100, n = 100))
c2 <- replicate(10000, contRun3(reps = 100, n = 100))
par(mfcol = c(1, 2), mar = c(5, 4, 2, .5))
hist(c1, breaks = seq(-.015, .015, length.out = 26))
hist(c2, breaks = seq(-.015, .015, length.out = 26))
# the latter is faster
microbenchmark::microbenchmark(
contRun = {set.seed(1); contRun ()},
contRun2 = {set.seed(1); contRun2()},
contRun3 = {set.seed(1); contRun3()},
times = 5)
#R Unit: seconds
#R expr min lq mean median uq max neval
#R contRun 7.121264 7.371242 7.388159 7.384997 7.443940 7.619352 5
#R contRun2 3.811267 3.887971 3.892523 3.892158 3.921148 3.950070 5
#R contRun3 1.920594 1.920754 1.998829 1.999755 2.009035 2.144005 5
The only bottleneck now is runif in contRun2. Replacing it with sample yields quite an improvement.

How can I subset rows or columns of a bigstatsr::FBM in Rcpp and store them in a vector?

I have a function that computes basic summary statistics from the rows (or columns) of a given Matrix and I am now trying to also use this function with a bigstatsr::FBM (I am aware that using columns should be more efficient).
The reason I want to store the rows / columns in a vector is that I would like to compute quantiles with std::nth_element. If there is a different way to do that with out the vector I would be equally happy.
This is the code I use for a regular matrix.
// [[Rcpp::plugins(cpp11)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppEigen.h>
using namespace Rcpp;
// [[Rcpp::export]]
Eigen::MatrixXd summaryC(Eigen::MatrixXd x,int nrow) {
Eigen::MatrixXd result(nrow, 5);
int indices[6] = {-1, 0, 249, 500, 750, 999};
for (int i = 0; i < nrow; i++) {
Eigen::VectorXd v = x.row(i);
for (int q = 0; q < 5; ++q) {
std::nth_element(v.data() + indices[q] + 1,
v.data() + indices[q+1],
v.data() + v.size());
result(i,q) = v[indices[q+1]];
}
}
return result;
}
/*** R
x <- matrix(as.numeric(1:1000000), ncol = 1000)
summaryC(x = x, nrow = 1000)
***/
However I struggle to do this with an FBM as I am not fully grasping the intricacies of how the FBM - Pointer works.
I tried the following without success:
// [[Rcpp::depends(BH, bigstatsr, RcppEigen)]]
// [[Rcpp::plugins(cpp11)]]
#include <bigstatsr/BMAcc.h>
#include <RcppEigen.h>
// [[Rcpp::export]]
Eigen::MatrixXd summaryCbig(Environment fbm,int nrow, Eigen::VecttorXi ind_col) {
Eigen::MatrixXd result(nrow, 5);
XPtr<FBM> xpMat = fbm["address"];
BMAcc<double> macc(xpMat);
int indices[6] = {-1, 0, 249, 500, 750, 999};
for (int i = 0; i < nrow; i++) {
Eigen::VectorXd v = macc.row(i); // this does not work
Eigen::VectorXd v = macc(i,_); // this does not work
SubBMAcc<double> maccr(XPtr, i, ind_col -1); // This did not work with Eigen::VectorXi, but works with const NumericVector&
Eigen::VectorXd v = maccr // this does not work even for appropriate ind_col
for (int q = 0; q < 5; ++q) {
std::nth_element(v.data() + indices[q] + 1,
v.data() + indices[q+1],
v.data() + v.size());
macc(i,q) = v[indices[q+1]];
}
}
}
/*** R
x <- matrix(as.numeric(1:1000000), ncol = 1000)
summaryCbig(x = x, nrow = 1000, ind_col = 1:1000)
***/
Any help would be greatly appreciated, thank you!
Update - the big_apply - approach
I implemented the approach twice with two differently sized matrices X1 and X2. Code for X1:
X1 <- FBM(1000, 1000, init 1e6)
X2 <- FBM(10000, 10000, init = 9999)
library(bigstatsr)
microbenchmark::microbenchmark(
big_apply(X, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X1[ind, ])
}, a.combine = "rbind", ind = rows_along(X), ncores = nb_cores(), block.size = 500),
big_apply(X, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X1[ind, ])
}, a.combine = "rbind", ind = rows_along(X), ncores = 1, block.size = 500),
times = 5
)
When using X1 and block.size = 500, having 4 cores instead of 1 makes the task 5-10 times slower on my PC (4 CPU and using windows, unfortunately).
using the bigger matrix X2 and leaving block.size with the default takes 10 times longer with 4 cores instead of the non-parallelized version.
Result for X2:
min lq mean median uq max neval
16.149055 19.13568 19.369975 20.139363 20.474103 20.951676 5
1.297259 2.67385 2.584647 2.858035 2.867537 3.226552 5
Assuming you have
library(bigstatsr)
X <- FBM(1000, 1000, init = 1:1e6)
I would not reinvent the wheel and use:
big_apply(X, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X[ind, ])
}, a.combine = "rbind", ind = rows_along(X), ncores = nb_cores(), block.size = 500)
Choose the block.size (number of rows) wisely.
Function big_apply() is very useful if you want to apply an R(cpp) function to blocks of the FBM.
Edit: Of course, parallelism will me slower for small matrices, because of OVERHEAD of parallelism (usually, 1-3 seconds). See the results for X1 and X2:
library(bigstatsr)
X1 <- FBM(1000, 1000, init = 1e6)
microbenchmark::microbenchmark(
PAR = big_apply(X1, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X[ind, ])
}, a.combine = "rbind", ind = rows_along(X1), ncores = nb_cores(), block.size = 500),
SEQ = big_apply(X1, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X[ind, ])
}, a.combine = "rbind", ind = rows_along(X1), ncores = 1, block.size = 500),
times = 5
)
Unit: milliseconds
expr min lq mean median uq max neval cld
PAR 1564.20591 1602.0465 1637.77552 1629.9803 1651.04509 1741.59974 5 b
SEQ 68.92936 69.1002 76.70196 72.9173 85.31751 87.24543 5 a
X2 <- FBM(10000, 10000, init = 9999)
microbenchmark::microbenchmark(
PAR = big_apply(X2, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X[ind, ])
}, a.combine = "rbind", ind = rows_along(X2), ncores = nb_cores(), block.size = 500),
SEQ = big_apply(X2, a.FUN = function(X, ind) {
matrixStats::rowQuantiles(X[ind, ])
}, a.combine = "rbind", ind = rows_along(X2), ncores = 1, block.size = 500),
times = 5
)
Unit: seconds
expr min lq mean median uq max neval cld
PAR 4.757409 4.958869 5.071982 5.083381 5.218098 5.342153 5 a
SEQ 10.842828 10.846281 11.177460 11.360162 11.416967 11.421065 5 b
The bigger your matrix is, the more you will gain from parallelism.

Reducing nested for loop to single loop in R

This nested for loop can take quite some time to run depending on inputs to specs, perms and K. 'pop' is just an array to store all values. Perms is a large value, say 10,000.
K <- 1
N <- 100
Hstar <- 10
perms <- 10000
specs <- 1:N
pop <- array(dim = c(c(perms, N), K))
haps <- as.character(1:Hstar)
probs <- rep(1/Hstar, Hstar)
for(j in 1:perms){
for(i in 1:K){
if(i == 1){
pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
}
else{
pop[j ,, 1] <- sample(haps[s1], size = N, replace = TRUE, prob = probs[s1])
pop[j ,, 2] <- sample(haps[s2], size = N, replace = TRUE, prob = probs[s2])
}
}
}
HAC.mat <- array(dim = c(c(perms, N), K))
for(k in specs){
for(j in 1:perms){
for(i in 1:K){
ind.index <- sample(specs, size = k, replace = FALSE)
hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE), ind.index, sample(1:K, size = 1, replace = TRUE)]
HAC.mat[j, k, i] <- length(unique(hap.plot))
}
}
}
means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))
par(mfrow = c(1, 2))
plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
lines(specs, means, lwd = 2)
HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)
To make the loop run faster, I am thinking to condense the above loop into a single loop and having a single index (i) run from 1:(specs*perms) and using modular arithmetic with floor and ceiling functions to get the job done. I am not quite certain how best to implement this.
Let's use RcppArmadillo.
But first, I need to change 2 things to your code:
It is easier (and faster) to work with pop as an array of integers rather than characters. It is easy to make a correspondence table using unique and match.
I need to permute the first two dimensions of pop so that the accesses are more contiguous.
New code to generate pop:
K <- 1
N <- 100
Hstar <- 10
perms <- 10000
specs <- 1:N
pop <- array(dim = c(N, perms, K))
haps <- 1:Hstar
probs <- rep(1/Hstar, Hstar)
for(j in 1:perms){
for(i in 1:K){
if(i == 1){
pop[, j, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
}
else{
pop[, j, 1] <- sample(haps[s1], size = N, replace = TRUE, prob = probs[s1])
pop[, j, 2] <- sample(haps[s2], size = N, replace = TRUE, prob = probs[s2])
}
}
}
RcppArmadillo code to generate HAC.mat:
// [[Rcpp::depends(RcppArmadillo)]]
#define ARMA_DONT_PRINT_OPENMP_WARNING
#include <RcppArmadillo.h>
#include <RcppArmadilloExtensions/sample.h>
#include <set>
using namespace Rcpp;
int sample_one(int n) {
return n * unif_rand();
}
int sample_n_distinct(const IntegerVector& x,
int k,
const int * pop_ptr) {
IntegerVector ind_index = RcppArmadillo::sample(x, k, false);
std::set<int> distinct_container;
for (int i = 0; i < k; i++) {
distinct_container.insert(pop_ptr[ind_index[i]]);
}
return distinct_container.size();
}
// [[Rcpp::export]]
arma::Cube<int> fillCube(const arma::Cube<int>& pop,
const IntegerVector& specs,
int perms,
int K) {
int N = specs.size();
arma::Cube<int> res(perms, N, K);
IntegerVector specs_C = specs - 1;
const int * pop_ptr;
int i, j, k;
for (i = 0; i < K; i++) {
for (k = 0; k < N; k++) {
for (j = 0; j < perms; j++) {
pop_ptr = &(pop(0, sample_one(perms), sample_one(K)));
res(j, k, i) = sample_n_distinct(specs_C, k + 1, pop_ptr);
}
}
}
return res;
}
In R:
Rcpp::sourceCpp('cube-sample.cpp')
HAC.mat <- fillCube(pop, specs, perms, K)
This is 10 times as fast as your version on my computer.

R - Vectorized implementation of ternary function

I have three vectors X, Y and Z of equal length n. I need to create an n x n x n array of a function f(X[i],Y[j],Z[k]). The straightforward way to do this is to sequentially loop through each element of each of the 3 vectors. However, the time required to compute the array grows exponentially with n. Is there a way to implement this using vectorized operations?
EDIT: As mentioned in the comments, I have added a simple example of what's needed.
set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)
F = array(0, dim=c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
for (j in 1:length(Y))
for (k in 1:length(Z))
F[i,j,k] = X[i] * (Y[j] + Z[k])
Thanks.
You can use nested outer :
set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)
F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
for (j in 1:length(Y))
for (k in 1:length(Z))
F[i,j,k] = X[i] * (Y[j] + Z[k])
F2 <- outer(X, outer(Y, Z, "+"), "*")
> identical(F, F2)
[1] TRUE
A microbenchmark including the expand.grid solution proposed by Nick K :
X = rnorm(100)
Y = seq(1:100)
Z = seq(101:200)
forLoop <- function(X, Y, Z) {
F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
for (j in 1:length(Y))
for (k in 1:length(Z))
F[i,j,k] = X[i] * (Y[j] + Z[k])
return(F)
}
nestedOuter <- function(X, Y, Z) {
outer(X, outer(Y, Z, "+"), "*")
}
expandGrid <- function(X, Y, Z) {
df <- expand.grid(X = X, Y = Y, Z = Z)
G <- df$X * (df$Y + df$Z)
dim(G) <- c(length(X), length(Y), length(Z))
return(G)
}
library(microbenchmark)
mbm <- microbenchmark(
forLoop = F1 <- forLoop(X, Y, Z),
nestedOuter = F2 <- nestedOuter(X, Y, Z),
expandGrid = F3 <- expandGrid(X, Y, Z),
times = 50L)
> mbm
Unit: milliseconds
expr min lq mean median uq max neval
forLoop 3261.872552 3339.37383 3458.812265 3388.721159 3524.651971 4074.40422 50
nestedOuter 3.293461 3.36810 9.874336 3.541637 5.126789 54.24087 50
expandGrid 53.907789 57.15647 85.612048 88.286431 103.516819 235.45443 50
Here's as an additional option, a possible Rcpp implementation (in case you like your loops). I wasn't able to outperform #Juliens solution though (maybe someone can), but they are more or less have the same timing
library(Rcpp)
cppFunction('NumericVector RCPP(NumericVector X, NumericVector Y, NumericVector Z){
int nrow = X.size(), ncol = 3, indx = 0;
double temp(1) ;
NumericVector out(pow(nrow, ncol)) ;
IntegerVector dim(ncol) ;
for (int l = 0; l < ncol; l++){
dim[l] = nrow;
}
for (int j = 0; j < nrow; j++) {
for (int k = 0; k < nrow; k++) {
temp = Y[j] + Z[k] ;
for (int i = 0; i < nrow; i++) {
out[indx] = X[i] * temp ;
indx += 1 ;
}
}
}
out.attr("dim") = dim;
return out;
}')
Validating
identical(RCPP(X, Y, Z), F)
## [1] TRUE
A quick benchmark
set.seed(123)
X = rnorm(100)
Y = 1:100
Z = 101:200
nestedOuter <- function(X, Y, Z) outer(X, outer(Y, Z, "+"), "*")
library(microbenchmark)
microbenchmark(
nestedOuter = nestedOuter(X, Y, Z),
RCPP = RCPP(X, Y, Z),
unit = "relative",
times = 1e4)
# Unit: relative
# expr min lq mean median uq max neval
# nestedOuter 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10000
# RCPP 1.164254 1.141713 1.081235 1.100596 1.080133 0.7092394 10000
You could use expand.grid as follows:
df <- expand.grid(X = X, Y = Y, Z = Z)
G <- df$X * (df$Y + df$Z)
dim(G) <- c(length(X), length(Y), length(Z))
all.equal(F, G)
If you had a vectorised function, this would work just as well. If not, you could use plyr::daply.

Resources