Vectorizing double summations using R - r

I am struggling with translating this function into R using via vectorization technique:
Where all I have been able to do so far is this:
c <- matrix(1:9, 3)
z <- 1:3
sum(abs(outer(z, z,"-")) * c)/sum(c)
But I don't think its necessarily correct. I tried a for-loop version but that is too long and my answer is likely wrong anyway. Anyone keen on this? What am I missing (or doing wrong)? Any help would be appreciated.

Here's a double-loop version:
q =
function(z,c){
num = 0
for(i in 1:length(z)){
for(j in 1:length(z)){
num = num + abs(z[i]-z[j]) * c[i,j]
}
}
num/sum(c)
}
Here's your vectorised version, functionised:
q2 =
function(z,c){sum(c*abs(outer(z,z,'-')) /sum(c))}
Not a great difference in timing between them really for a small matrix:
> microbenchmark::microbenchmark(q(z,c), q2(z,c))
Unit: microseconds
expr min lq mean median uq max neval cld
q(z, c) 15.368 15.7505 16.59644 16.0225 16.6290 30.346 100 b
q2(z, c) 12.232 12.8885 13.79178 13.2225 13.6585 44.085 100 a
But for a larger test it's a big win:
> c2 = matrix(runif(100*100),100,100)
> z2 = runif(100)
> microbenchmark::microbenchmark(q(z2,c2), q2(z2,c2))
Unit: microseconds
expr min lq mean median uq max neval cld
q(z2, c2) 7437.031 7588.131 8046.92272 7794.927 8332.104 10729.799 100 b
q2(z2, c2) 74.742 78.647 94.20153 86.113 100.125 188.428 100 a
>
Numeric difference is within floating point tolerance:
> q(z2,c2) - q2(z2,c2)
[1] 6.661338e-16
So unless anyone has faster code, I'd stick with what you've got.

As perfectly explained by #Spacedman, your approach is very efficient, but if you still want to go faster you could try Rcpp :
library(Rcpp)
sourceCpp(code='
#include <Rcpp.h>
// [[Rcpp::export]]
double qRcpp(const Rcpp::NumericVector z, const Rcpp::NumericMatrix cm){
int zlen = z.length();
if(!(zlen == cm.nrow() && cm.nrow() == cm.ncol()))
Rcpp::stop("Invalid sizes");
double num = 0;
for(int i = 0 ; i < zlen ; i++){
for(int j = 0 ; j < zlen ; j++){
num = num + std::abs(z[i]-z[j]) * cm(i,j);
}
}
return num / Rcpp::sum(cm);
}
')
Benchmark :
c2 = matrix(runif(100*100),100,100)
z2 = runif(100)
microbenchmark::microbenchmark(q(z2,c2), q2(z2,c2),qRcpp(z2,c2))
# Unit: microseconds
# expr min lq mean median uq max neval
# q(z2, c2) 10273.035 10976.3050 11680.85554 11348.763 11765.2010 44115.632 100
# q2(z2, c2) 64.292 67.9455 80.56427 75.543 86.3565 244.019 100
# qRcpp(z2, c2) 21.042 21.9180 25.30515 24.256 26.8860 56.403 100

Related

Count number of palindromes within a string

I have written the below code to count the number of palindromic strings in a given string:
countPalindromes <- function(str){
len <- nchar(str)
count <- 0
for(i in 1:len){
for(j in i:len){
subs <- substr(str, i, j)
rev <- paste(rev(substring(subs, 1:nchar(subs), 1:nchar(subs))), collapse = "")
if(subs == rev){
count <- count + 1
}
}
}
count
}
This is actually working fine but the code needs to be optimized in such a way so that it executes at a faster rate.
Please suggest some ways to optimize this piece of code.
Here's a solution that uses the wonderful stringi package - just as Andre suggested - together with a wee bit of vectorization.
cp <- function(s) {
lenstr <- stri_length(s) # Get the length
res <- sapply(1:lenstr, function(i) {
# Get all substrings
sub_string <- stringi::stri_sub(s, i, i:lenstr)
# Count matches
sum((sub_string == stringi::stri_reverse(sub_string)))
})
sum(res)
}
This should give the same result as your function
> cp("enafdemderredmedfane")
[1] 30
> countPalindromes("enafdemderredmedfane")
[1] 30
There is not much speedup for short strings, but for longer strings you can really see a benefit:
> microbenchmark::microbenchmark(countPalindromes("howdoyoudo"), cp("howdoyoudo"))
Unit: microseconds
expr min lq mean median uq max neval cld
countPalindromes("howdoyoudo") 480.979 489.6180 508.9044 494.9005 511.201 662.605 100 b
cp("howdoyoudo") 156.117 163.1555 175.4785 169.5640 179.993 324.145 100 a
Compared to
> microbenchmark::microbenchmark(countPalindromes("enafdemderredmedfane"), cp("enafdemderredmedfane"))
Unit: microseconds
expr min lq mean median uq max neval cld
countPalindromes("enafdemderredmedfane") 2031.565 2115.0305 2475.5974 2222.354 2384.151 6696.484 100 b
cp("enafdemderredmedfane") 324.991 357.6055 430.8334 387.242 478.183 1298.390 100 a
Working with a vector the process is faster, I am thinking of eliminating the double for, but I can not find an efficient way.
countPalindromes_new <- function(str){
len <- nchar(str)
strsp <- strsplit(str, "")[[1]]
count <- 0
for(i in 1:len){
for(j in i:len){
if(all(strsp[i:j] == strsp[j:i])){
count <- count + 1
}
}
}
count
}
> microbenchmark::microbenchmark(countPalindromes("howdoyoudo"), cp("howdoyoudo"), countPalindromes_new("howdoyoudo"))
Unit: microseconds
expr min lq mean median uq max neval
countPalindromes("howdoyoudo") 869.121 933.1215 1069.68001 963.201 1022.081 6712.751 100
cp("howdoyoudo") 192.000 202.8805 243.11972 219.308 258.987 477.441 100
countPalindromes_new("howdoyoudo") 49.068 53.3340 62.32815 57.387 63.574 116.481 100
> microbenchmark::microbenchmark(countPalindromes("enafdemderredmedfane"), cp("enafdemderredmedfane"), countPalindromes_new("enafdemderredmedfane"))
Unit: microseconds
expr min lq mean median uq max neval
countPalindromes("enafdemderredmedfane") 3578.029 3800.9620 4170.0888 3987.416 4173.6550 10205.445 100
cp("enafdemderredmedfane") 391.254 438.4010 609.8782 481.708 534.6135 6116.270 100
countPalindromes_new("enafdemderredmedfane") 200.534 214.1875 235.3501 223.148 245.5475 448.854 100
UPDATE (NEW VERSION WIHTOUT LEN 1 COMPARASION):
countPalindromes_new2 <- function(str){
len <- nchar(str)
strsp <- strsplit(str, "")[[1]]
count <- len
for(i in 1:(len-1)){
for(j in (i + 1):len){
if(all(strsp[i:j] == strsp[j:i])){
count <- count + 1
}
}
}
count
}
Simply: normally I'm against using new libraries everywhere. But stringi is THE library for working with strings in R.
string_vec <- c("anna","nothing","abccba")
string_rev <- stringi::stri_reverse(string_vec)
sum(string_vec == string_rev)
#evals 2

Make cumulative sum faster

I'm trying to take cumulative sums for each column of a matrix. Here's my code in R:
testMatrix = matrix(1:65536, ncol=256);
microbenchmark(apply(testMatrix, 2, cumsum), times=100L);
Unit: milliseconds
expr min lq mean median uq max neval
apply(testMatrix, 2, cumsum) 1.599051 1.766112 2.329932 2.15326 2.221538 93.84911 10000
I used Rcpp for comparison:
cppFunction('NumericMatrix apply_cumsum_col(NumericMatrix m) {
for (int j = 0; j < m.ncol(); ++j) {
for (int i = 1; i < m.nrow(); ++i) {
m(i, j) += m(i - 1, j);
}
}
return m;
}');
microbenchmark(apply_cumsum_col(testMatrix), times=10000L);
Unit: microseconds
expr min lq mean median uq max neval
apply_cumsum_col(testMatrix) 205.833 257.719 309.9949 265.986 276.534 96398.93 10000
So the C++ code is 7.5 times as fast. Is it possible to do better than apply(testMatrix, 2, cumsum) in pure R? It feels like I have an order of magnitude overhead for no reason.
It is difficult to beat C++ with just R code. The fastest way I can think of doing it is if you are willing to split your matrix in to a list. That way, R is using primitive functions and doesn't copy the object with each iteration (apply is essentially a pretty loop). You can see that C++ still wins out but there is a significant speedup with the list approach if you really just want to use R code.
fun1 <- function(){
apply(testMatrix, 2, cumsum)
}
testList <- split(testMatrix, col(testMatrix))
fun2 <- function(){
lapply(testList, cumsum)
}
microbenchmark(fun1(),
fun2(),
apply_cumsum_col(testMatrix),
times=100L)
Unit: microseconds
expr min lq mean median uq max neval
fun1() 3298.534 3411.9910 4376.4544 3477.608 3699.2485 9249.919 100
fun2() 558.800 596.0605 766.2377 630.841 659.3015 5153.100 100
apply_cumsum_col(testMatrix) 219.651 282.8570 576.9958 311.562 339.5680 4915.290 100
EDIT
Please note that this method is slower than fun1 if you include the time to split the matrix in to a list.
Using a byte-compiled for loop is slightly faster than the apply call on my system. I expected it to be faster because it does less work than apply. As expected, the R loop is still slower than the simple C++ function you wrote.
colCumsum <- compiler::cmpfun(function(x) {
for (i in 1:ncol(x))
x[,i] <- cumsum(x[,i])
x
})
testMatrix <- matrix(1:65536, ncol=256)
m <- testMatrix
require(microbenchmark)
microbenchmark(colCumsum(m), apply_cumsum_col(m), apply(m, 2, cumsum), times=100L)
# Unit: microseconds
# expr min lq median uq max neval
# matrixCumsum(m) 1478.671 1540.5945 1586.1185 2199.9530 37377.114 100
# apply_cumsum_col(m) 178.214 192.4375 204.3905 234.8245 1616.030 100
# apply(m, 2, cumsum) 1879.850 1940.1615 1991.3125 2745.8975 4346.802 100
all.equal(colCumsum(m), apply(m, 2, cumsum))
# [1] TRUE
Maybe it is to late but I will write my answer so anyone else can see it.
First of all, in your C++ code you need to clone you matrix otherwise you are write into R's memory and it is forbiden by CRAN. So your code becomes:
rcpp_apply<-cppFunction('NumericMatrix apply_cumsum_col(NumericMatrix m) {
NumericMatrix g=clone(m);
for (int j = 0; j < m.ncol(); ++j) {
for (int i = 1; i < m.nrow(); ++i) {
g(i, j) += g(i - 1, j);
}
}
return g;
}');
Since your matrix is typeof integer then you can change your C++'s argument to be IntegerMatrix.
rcpp_apply_integer<-cppFunction('IntegerMatrix apply_cumsum_col(IntegerMatrix m) {
NumericMatrix g=clone(m);
for (int j = 0; j < m.ncol(); ++j) {
for (int i = 1; i < m.nrow(); ++i) {
g(i, j) += g(i - 1, j);
}
}
return g;
}');
This impoved the code about 2 times. Here is a benchmark:
microbenchmark::microbenchmark(R=apply(testMatrix, 2, cumsum),Rcpp=rcpp_apply(testMatrix),Rcpp_integer=rcpp_apply_integer(testMatrix), times=10)
Unit: microseconds
expr min lq mean median uq max neval
R 1552.217 1706.165 1770.1264 1740.0345 1897.884 1940.989 10
Rcpp 502.900 523.838 637.7188 665.0605 699.134 743.471 10
Rcpp_integer 220.455 274.645 274.9327 275.8770 277.930 316.109 10
all.equal(rcpp_apply(testMatrix),rcpp_apply_integer(testMatrix))
[1] TRUE
If your matrix has large values then you have to use NumericMatrix.

Faster way to find the first TRUE value in a vector

In one function I very often need to use code like:
which(x==1)[1]
which(x>1)[1]
x[x>10][1]
where x is a numeric vector. summaryRprof() shows that I spend >80% of the time on relational operators. I wonder if there is a function that does comparison only till the first TRUE value is reached to speed up my code. For-loop is slower than the options provided above.
I don't know of a pure R way to do this, so I wrote a C function to do it for the quantstrat package. This function was written with a specific purpose in mind, so it's not as general as I would like. For example, you may notice that it only works on real/double/numeric data, so be sure to coerce Data to that before calling the .firstCross function.
#include <R.h>
#include <Rinternals.h>
SEXP firstCross(SEXP x, SEXP th, SEXP rel, SEXP start)
{
int i, int_rel, int_start;
double *real_x=NULL, real_th;
if(ncols(x) > 1)
error("only univariate data allowed");
/* this currently only works for real x and th arguments
* support for other types may be added later */
real_th = asReal(th);
int_rel = asInteger(rel);
int_start = asInteger(start)-1;
switch(int_rel) {
case 1: /* > */
real_x = REAL(x);
for(i=int_start; i<nrows(x); i++)
if(real_x[i] > real_th)
return(ScalarInteger(i+1));
break;
case 2: /* < */
real_x = REAL(x);
for(i=int_start; i<nrows(x); i++)
if(real_x[i] < real_th)
return(ScalarInteger(i+1));
break;
case 3: /* == */
real_x = REAL(x);
for(i=int_start; i<nrows(x); i++)
if(real_x[i] == real_th)
return(ScalarInteger(i+1));
break;
case 4: /* >= */
real_x = REAL(x);
for(i=int_start; i<nrows(x); i++)
if(real_x[i] >= real_th)
return(ScalarInteger(i+1));
break;
case 5: /* <= */
real_x = REAL(x);
for(i=int_start; i<nrows(x); i++)
if(real_x[i] <= real_th)
return(ScalarInteger(i+1));
break;
default:
error("unsupported relationship operator");
}
/* return number of observations if relationship is never TRUE */
return(ScalarInteger(nrows(x)));
}
And here's the R function that calls it:
.firstCross <- function(Data, threshold=0, relationship, start=1) {
rel <- switch(relationship[1],
'>' = ,
'gt' = 1,
'<' = ,
'lt' = 2,
'==' = ,
'eq' = 3,
'>=' = ,
'gte' = ,
'gteq' = ,
'ge' = 4,
'<=' = ,
'lte' = ,
'lteq' = ,
'le' = 5)
.Call('firstCross', Data, threshold, rel, start)
}
Some benchmarks, just for fun.
> library(quantstrat)
> library(microbenchmark)
> firstCross <- quantstrat:::.firstCross
> set.seed(21)
> x <- rnorm(1e6)
> microbenchmark(which(x > 3)[1], firstCross(x,3,">"), times=10)
Unit: microseconds
expr min lq median uq max neval
which(x > 3)[1] 9482.081 9578.072 9597.3870 9690.448 9820.176 10
firstCross(x, 3, ">") 11.370 11.675 31.9135 34.443 38.614 10
> which(x>3)[1]
[1] 919
> firstCross(x,3,">")
[1] 919
Note that firstCross will yield a larger relative speedup the larger Data is (because R's relational operators have to finish comparing the entire vector).
> x <- rnorm(1e7)
> microbenchmark(which(x > 3)[1], firstCross(x,3,">"), times=10)
Unit: microseconds
expr min lq median uq max neval
which(x > 3)[1] 94536.21 94851.944 95799.857 96154.756 113962.794 10
firstCross(x, 3, ">") 5.08 5.507 25.845 32.164 34.183 10
> which(x>3)[1]
[1] 97
> firstCross(x,3,">")
[1] 97
...and it won't be appreciably faster if the first TRUE value is near the end of the vector.
> microbenchmark(which(x==last(x))[1], firstCross(x,last(x),"eq"),times=10)
Unit: milliseconds
expr min lq median uq max neval
which(x == last(x))[1] 92.56311 93.85415 94.38338 98.18422 106.35253 10
firstCross(x, last(x), "eq") 86.55415 86.70980 86.98269 88.32168 92.97403 10
> which(x==last(x))[1]
[1] 10000000
> firstCross(x,last(x),"eq")
[1] 10000000
Base R provides Position and Find for locating the first index and value, respectively, for which a predicate returns a true value. These higher-order functions return immediately upon the first hit.
f<-function(x) {
r<-vector("list",3)
r[[1]]<-which(x==1)[1]
r[[2]]<-which(x>1)[1]
r[[3]]<-x[x>10][1]
return(r)
}
p<-function(f,b) function(a) f(a,b)
g<-function(x) {
r<-vector("list",3)
r[[1]]<-Position(p(`==`,1),x)
r[[2]]<-Position(p(`>`,1),x)
r[[3]]<-Find(p(`>`,10),x)
return(r)
}
The relative performance depends greatly on the probability of finding a hit early relative to the cost of the predicate vs the overhead of Position/Find.
library(microbenchmark)
set.seed(1)
x<-sample(1:100,1e5,replace=TRUE)
microbenchmark(f(x),g(x))
Unit: microseconds
expr min lq mean median uq max neval cld
f(x) 5034.283 5410.1205 6313.861 5798.4780 6948.5675 26735.52 100 b
g(x) 587.463 650.4795 1013.183 734.6375 950.9845 20285.33 100 a
y<-rep(0,1e5)
microbenchmark(f(y),g(y))
Unit: milliseconds
expr min lq mean median uq max neval cld
f(y) 3.470179 3.604831 3.791592 3.718752 3.866952 4.831073 100 a
g(y) 131.250981 133.687454 137.199230 134.846369 136.193307 177.082128 100 b
That's a nice question and answer... just to add any() is no faster than which() or match()but both are quicker than [] which I guess may create a big vector useless T,F's. So I'm guessing No ..short of the answer above.
v=rep('A', 10e6)
v[5e6]='B'
v[10e6]='B'
microbenchmark(which(v=='B')[1])
Unit: milliseconds
expr min lq median uq max neval
which(v == "B")[1] 332.3788 337.6718 344.4076 347.1194 503.4022 100
microbenchmark(any(v=='B'))
Unit: milliseconds
expr min lq median uq max neval
any(v == "B") 334.4466 335.114 335.6714 347.5474 356.0261 100
microbenchmark(v[v=='B'][1])
Unit: milliseconds
expr min lq median uq max neval
v[v == "B"][1] 601.5923 605.3331 609.191 612.0689 707.1409 100
microbenchmark(match("B", v))
Unit: milliseconds
expr min lq median uq max neval
match("B", v) 339.2872 344.7648 350.5444 359.6746 915.6446 100
Any other ideas out there?

How to speed up this simple function in R

I am trying to do Conway–Maxwell-Poisson regression using COMPoissonReg in R
However, it is extremely slow for large dataset. Thus I tried to profile and check the source code.
The majority time (>95%) is spent on a function COMPoissonReg:::computez, which is equivalent to:
test <- function (lambda, nu, max=100)
{
forans <- matrix(0, ncol = max + 1, nrow = length(lambda))
for (j in 1:max) {
temp <- matrix(0, ncol = j, nrow = length(lambda))
for (i in 1:j) {
temp[, i] <- lambda/(i^nu)
}
for (k in 1:length(lambda)) {
forans[k, j + 1] <- prod(temp[k, ])
}
}
forans[, 1] <- rep(1, length(lambda))
ans <- rowSums(forans)
return(ans)
}
v is nu here, and lambda is a vector, max is the upper-limit of s (here it is set to 100 as an approximate to infinity).
The question doesn't really need special background stats knowledge, but the link or link2 is here just in case.
A simple script to test performance, this takes 8 secs, and if I lazily cmpfun compile it, it takes 4 secs. I believe it has the potential to be further improved. (without rewriting in C, and I am aiming for around ~ 0.05 sec so that I don't have to refactor the code in package which iteratively calls this function.)
lambda <- rnorm(10000, 1.5, 0.3)
Rprof(tmp <- tempfile())
sum(log(test(lambda, 1.2)))
Rprof()
summaryRprof(tmp)
Update
I realized another issue: floating point arithmetic limitation. Doing power series is dangerous, it can overflow very soon, especially if we have to vectorize. E.g. lambda ^ 100 is certainly NAN if lambda > 10000. Maybe I will use reduce if I program in other languages, but I fear in R reduce is slow.
You can make it much faster than the function you are using by avoiding loops. For example:
test2<-function(lambda,nu,max=100){
len<-length(lambda)
mm<-matrix(rep(lambda,each=max+1),max+1,len)
mm<-mm^(0:max)
mm<-mm/factorial(0:max)^nu
colSums(mm)
}
This runs about 50 times faster with lambda of length 100:
> require(microbenchmark)
> lam<-rnorm(100)
> max(abs(test(lam,1.2)-test2(lam,1.2)))
[1] 4.510281e-16
> microbenchmark(test(lam,1.2),test2(lam,1.2),times=10)
Unit: milliseconds
expr min lq median uq max neval
test(lam, 1.2) 77.124705 77.422619 78.241945 79.635746 81.260280 10
test2(lam, 1.2) 1.335716 1.373116 1.401411 1.507765 1.562447 10
You can probably optimize it a little more, but this should get most of the gains, unless there is some kind of builtin function you can exploit rather than doing the sum explicitly.
On input of length 10000, it takes 0.148 seconds on my machine, versus 6.850 seconds for test:
> lam<-rnorm(10000)
> max(abs(test(lam,1.2)-test2(lam,1.2)))
[1] 3.552714e-15
> system.time(test2(lam,1.2))
user system elapsed
0.132 0.016 0.148
> system.time(test(lam,1.2))
user system elapsed
6.780 0.056 6.850
OK, here's an Rcpp answer. As expected, it's a lot faster than either of the others.
require(Rcpp)
rcppfun<-"
Rcpp::NumericVector myfun(Rcpp::NumericVector lambda,
Rcpp::NumericVector weights)
{
int num = lambda.size();
int max = weights.size();
std::vector<double> r(num);
for(int i=0; i<num; i++){
double total = 0;
double prod = 1;
for(int j=0; j<max; j++){
total += prod/weights[j];
prod *= lambda[i];
}
r[i]=total;
}
return Rcpp::wrap(r);
}
"
testRcpp<-cppFunction(rcppfun)
test5<-function(lambda,nu,max=100){
wts<-factorial(0:max)^nu
testRcpp(lambda,wts)
}
This is about 40x faster than my original test2 and about 12x faster than than #NealFultz's improved logarithmic implementation.
> lam<-abs(rnorm(10000))
> max(abs(test5(lam,1.2)-test2(lam,1.2)))
[1] 7.105427e-15
> microbenchmark(test2(lam,1.2),test3(lam,1.2),test4(lam,1.2),test5(lam,1.2))
Unit: milliseconds
expr min lq median uq max neval
test2(lam, 1.2) 125.601616 126.790516 127.700099 135.182263 222.340179 100
test3(lam, 1.2) 125.523424 126.666410 126.921035 131.316254 178.633839 100
test4(lam, 1.2) 41.734015 42.640340 43.190553 50.932952 97.765219 100
test5(lam, 1.2) 3.432029 3.501046 3.519007 3.532603 3.754232 100
On edit, here's one more Rcpp version that should at least partially address the overflow issue, by computing each term incrementally, rather than the numerator and denominator separately.
rcppfun2<-"
Rcpp::NumericVector myfun2(Rcpp::NumericVector lambda, Rcpp::NumericVector nu){
int num = lambda.size();
int max = nu.size();
std::vector<double> r(num);
for(int i=0; i<num; i++){
double term = 1;
double total = 1;
for(int j=0; j< max; j++){
term *= (lambda[i]/nu[j]);
total += term;
}
r[i]=total;
}
return Rcpp::wrap(r);
}
"
testRcpp2<-cppFunction(rcppfun2)
test6<-function(lambda,nu,max=100){
testRcpp2(lambda,(1:max)^nu)
}
> lam<-abs(rnorm(10000))
> max(abs(test2(lam,1.2)-test6(lam,1.2)))
[1] 1.065814e-14
> microbenchmark(test5(lam,1.2),test6(lam,1.2))
Unit: milliseconds
expr min lq median uq max neval
test5(lam, 1.2) 3.416786 3.426013 3.435492 3.444196 3.604486 100
test6(lam, 1.2) 3.554147 3.572285 3.580865 3.588030 3.840713 100
I slept on it, came up with another big improvement if you can use the gsl package. All you're doing is evaluating a polynomial:
require(gsl)
test5 <- function(lambda, nu, max=100){
gsl_poly(factorial(0:max)^-nu, lambda)
}
R>microbenchmark(test2(1:50,5.1), test4(1:50,5.1), test5(1:50,5.1))
Unit: microseconds
expr min lq median uq max neval
test2(1:50, 5.1) 4518.957 4838.5185 5318.5040 5617.6330 19978.039 100
test4(1:50, 5.1) 2043.422 2268.3490 2472.0430 2727.1045 10328.376 100
test5(1:50, 5.1) 311.144 407.2465 476.0755 540.6095 1138.766 100
Following up #mrips, sometimes working on the log can be a bit faster because you can multiply instead of exponentiate:
test4 <- function(lambda,nu,max=100){
mm<-matrix(log(lambda),max,length(lambda), byrow=TRUE)
mm<-mm * 1:max - nu*lfactorial(1:max)
1 + colSums(exp(mm))
}
I've also factored out the special case where s = 0. Here's my timings:
R>microbenchmark(test2(1:50,5), test4(1:50,5))
Unit: microseconds
expr min lq median uq max neval
test2(1:50, 5) 952.360 1432.6600 1436.4525 1440.1860 3467.981 100
test4(1:50, 5) 695.189 1041.4785 1042.8315 1045.6525 2970.441 100

increase efficiency and speed of R function

When using R I always have in mind: "Avoid using loops if possible". However, I am stuck right now, I haven't been able to figure out a CRANTASTIC way to code what I need.
For the record, after several comments, my statement above is not the right statement, there's no need to avoid loops here to improve the efficiency.
I have two string vectors as input, let us call them, a and b - they can only contain the letters "M", "I" and "D".
a = c("M","I","D","D","M","M","M","M","M","M")
b = c("M","M","M","M","M","M","D","M","M")
My desired output is:
d = c("M","I","D","D","M","M","M","M","I","M","M")
The following function gives me such output:
my.function <- function(a, b)
{
nrow.df = length(a) + length(which(b=="D"))
my.df = data.frame(a = rep(NA, nrow.df),
b = rep(NA, nrow.df),
d = rep(NA, nrow.df))
my.df$a[1:length(a)] = a
my.df$b[1:length(b)] = b
for (i in 1:nrow.df)
{
if(my.df$a[i] == "D") {
my.df$d[i] = "D"
my.df$b[(i+1):nrow.df] = my.df$b[i:(nrow.df-1)]
} else if (my.df$b[i] == "D") {
my.df$d[i] = "I"
my.df$a[(i+1):nrow.df] = my.df$a[i:(nrow.df-1)]
} else if (my.df$a[i] == "I") {
my.df$d[i] = "I"
} else if (my.df$b[i] == "I") {
my.df$d[i] = "D"
} else {
my.df$d[i] = my.df$a[i]
}
}
return(my.df$d)
}
> d = my.function(a,b)
> d
[1] "M" "I" "D" "D" "M" "M" "M" "M" "I" "M" "M"
The function logic is as follows, whenever there is a "D" in a, it puts a "D" in d and shift the vector b by 1, and vice versa, whenever there is a "D" in b, it puts an "I" in d and shifts a by 1.
Next, when there is an "I" in a, but not a "D" in b, put an "I" in a, and vice versa, whenever there is an "I" in b, and not a "D" in a, put a "D" in d. Otherwise, d = a.
It is not a complex function but I am struggling on how to make it R efficient. I am applying this function millions of times with mclapply so having a fast implementation of such function would save me lots of time.
Do you recommend using Rcpp? Would it be much faster? Is there any slow down on communicating R with Cpp millions of time, or it's just auto with Rcpp?
Building on my comment, if speed is one concern, step 1 is to not unnecessarily use data.frames. This answer doesn't address the loop (as others have already said, there is nothing wrong with using a loop in R if it is done properly).
Here is a very slightly modified version of your function, using vectors instead of data.frames to store the data.
my.function.v <- function(a, b) {
nrow.df = length(a) + length(which(b=="D"))
A <- B <- D <- vector(length = nrow.df)
A[1:length(a)] = a
B[1:length(b)] = b
for (i in 1:nrow.df)
{
if(A[i] == "D") {
D[i] = "D"
B[(i+1):nrow.df] = B[i:(nrow.df-1)]
} else if (B[i] == "D") {
D[i] = "I"
A[(i+1):nrow.df] = A[i:(nrow.df-1)]
} else if (A[i] == "I") {
D[i] = "I"
} else if (B[i] == "I") {
D[i] = "D"
} else {
D[i] = A[i]
}
}
return(D)
}
Notice the relative difference in speed below:
library(microbenchmark)
microbenchmark(my.function(a, b), my.function.v(a, b), f(a, b))
# Unit: microseconds
# expr min lq median uq max neval
# my.function(a, b) 1448.416 1490.8780 1511.3435 1547.3880 6674.332 100
# my.function.v(a, b) 157.248 165.8725 171.6475 179.1865 324.722 100
# f(a, b) 168.874 177.5455 184.8775 193.3455 416.551 100
As can be seen, #mrip's function also fares much better than your original function.
I don't see any easy way to avoid a loop here. However, there is still a more efficient way of doing this. The problem is that you are actually shifting a and b every time you come across the character D, and shifting a vector like this is an O(n) operation, so the running time of this loop would actually be O(n^2).
You can simplify the code and get slightly better performance like this:
f<-function(a,b){
aSkipped<-0
bSkipped<-0
d<-rep(0,length(a)+sum(b=="D"))
for(i in 1:length(d)){
if(a[i-aSkipped] == "D") {
d[i] = "D"
bSkipped<-bSkipped+1
} else if (b[i-bSkipped] == "D") {
d[i] = "I"
aSkipped<-aSkipped+1
} else if (a[i-aSkipped] == "I") {
d[i] = "I"
} else if (b[i-bSkipped] == "I") {
d[i] = "D"
} else {
d[i] = a[i-aSkipped]
}
}
d
}
On edit. You will really see large performance improvements when the input gets big. For small strings, and not too many "D"s this and Ananda Mahto's solution run in about the same time:
> set.seed(123)
> a<-c(sample(c("M","I"),500,T))
> b<-c(sample(c("M","I"),500,T))
> a[sample(500,50)]<-"D"
> b[sample(500,50)]<-"D"
> microbenchmark(f(a,b),my.function.v(a,b))
Unit: milliseconds
expr min lq median uq max neval
f(a, b) 4.259970 4.324046 4.368018 4.463925 9.694951 100
my.function.v(a, b) 4.442873 4.497172 4.533196 4.639543 9.901044 100
But for strings of length 50000 with 5000 "D"s the difference is substantial:
> set.seed(123)
> a<-c(sample(c("M","I"),50000,T))
> b<-c(sample(c("M","I"),50000,T))
> a[sample(50000,5000)]<-"D"
> b[sample(50000,5000)]<-"D"
> system.time(f(a,b))
user system elapsed
0.460 0.000 0.463
> system.time(my.function.v(a,b))
user system elapsed
7.056 0.008 7.077
OK, here's the Rcpp solution, and as expected, it beats the R solution by a lot:
rcppFun<-"
CharacterVector fcpp(CharacterVector a,CharacterVector b,int size){
int aSkipped = 0;
int bSkipped = 0;
int asize = a.size();
Rcpp::CharacterVector d(size);
for(int i=0; i<size; i++){
if(i-aSkipped<asize && a[i-aSkipped][0] == 'D') {
d[i] = \"D\";
bSkipped++;
} else if (b[i-bSkipped][0] == 'D') {
d[i] = \"I\";
aSkipped++;
} else if (a[i-aSkipped][0] == 'I') {
d[i] = \"I\";
} else if (b[i-bSkipped][0] == 'I') {
d[i] = \"D\";
} else {
d[i] = a[i-aSkipped];
}
}
return d;
}"
require("Rcpp")
fcpp<-cppFunction(rcppFun)
f3<-function(a,b){
fcpp(a,b,as.integer(length(a)+sum(b=="D")))
}
Warning: that function does no parameter checking at all, so if you feed it bad data you can easily get a seg fault.
If you are going to be calling this a lot, Rcpp is definitely the way to go:
> with(ab(10),microbenchmark(f(a,b),f3(a,b),f2(a,b),my.function.v(a,b)))
Unit: microseconds
expr min lq median uq max neval
f(a, b) 103.993 107.5155 108.6815 109.7455 178.801 100
f3(a, b) 7.354 8.1305 8.5575 9.1220 18.014 100
f2(a, b) 87.081 90.4150 92.2730 94.2585 146.502 100
my.function.v(a, b) 84.389 86.5140 87.6090 88.8340 109.106 100
> with(ab(100),microbenchmark(f(a,b),f3(a,b),f2(a,b),my.function.v(a,b)))
Unit: microseconds
expr min lq median uq max neval
f(a, b) 992.082 1018.9850 1032.0180 1071.0690 2784.710 100
f3(a, b) 12.873 14.3605 14.7370 15.5095 35.582 100
f2(a, b) 119.396 125.4405 129.3015 134.9915 1909.930 100
my.function.v(a, b) 769.618 786.7865 802.2920 824.0820 905.737 100
> with(ab(1000),microbenchmark(f(a,b),f3(a,b),f2(a,b),my.function.v(a,b)))
Unit: microseconds
expr min lq median uq max neval
f(a, b) 9816.295 10065.065 10233.1350 10392.696 12383.373 100
f3(a, b) 66.057 67.869 83.9075 87.231 1167.086 100
f2(a, b) 1637.972 1760.258 2667.6985 3138.229 47610.317 100
my.function.v(a, b) 9692.885 10272.425 10997.2595 11402.602 54315.922 100
> with(ab(10000),microbenchmark(f(a,b),f3(a,b),f2(a,b)))
Unit: microseconds
expr min lq median uq max neval
f(a, b) 101644.922 103311.678 105185.5955 108342.4960 144620.777 100
f3(a, b) 607.702 610.039 669.8515 678.1845 785.415 100
f2(a, b) 221305.641 247952.345 254478.1580 341195.5510 656408.378 100
>
Just for the sake of showing how it might be done, it can be done without a loop in R; here's one way. It's faster when the length is about roughly 1000 or less but slower when larger. One takeaway is that you surely could speed this up in Rcpp.
f2 <- function(a,b) {
da <- which(a=="D")
db <- which(b=="D")
dif <- outer(da, db, `<`)
da <- da + rowSums(!dif)
db <- db + colSums(dif)
ia <- which(a=="I")
ia <- ia + colSums(outer(db, ia, `<`))
ib <- which(b=="I")
ib <- ib + colSums(outer(da, ib, `<`))
out <- rep("M", length(a) + length(db))
out[da] <- "D"
out[db] <- "I"
out[ia] <- "I"
out[ib] <- "D"
out
}
For generating data
ab <- function(N) {
set.seed(123)
a<-c(sample(c("M","I"),N,TRUE))
b<-c(sample(c("M","I"),N,TRUE))
a[sample(N,N/10)]<-"D"
b[sample(N,N/10)]<-"D"
list(a=a,b=b)
}
Timings:
> library(microbenchmark)
> with(ab(10), microbenchmark(my.function.v(a, b), f(a, b), f2(a,b)))
Unit: microseconds
expr min lq median uq max neval
my.function.v(a, b) 79.102 86.9005 89.3680 93.2410 279.761 100
f(a, b) 84.334 91.1055 94.1790 98.2645 215.579 100
f2(a, b) 94.807 101.5405 105.1625 108.9745 226.149 100
> with(ab(100), microbenchmark(my.function.v(a, b), f(a, b), f2(a,b)))
Unit: microseconds
expr min lq median uq max neval
my.function.v(a, b) 732.849 750.4480 762.906 845.0835 1953.371 100
f(a, b) 789.380 805.8905 819.022 902.5865 1921.064 100
f2(a, b) 124.442 129.1450 134.543 137.5910 237.498 100
> with(ab(1000), microbenchmark(my.function.v(a, b), f(a, b), f2(a,b)))
Unit: milliseconds
expr min lq median uq max neval
my.function.v(a, b) 10.146865 10.387144 10.695895 11.123164 13.08263 100
f(a, b) 7.776286 7.973918 8.266882 8.633563 9.98204 100
f2(a, b) 1.322295 1.355601 1.385302 1.465469 1.85349 100
> with(ab(10000), microbenchmark(my.function.v(a, b), f(a, b), f2(a,b), times=10))
Unit: milliseconds
expr min lq median uq max neval
my.function.v(a, b) 429.4030 435.00373 439.06706 442.51650 465.00124 10
f(a, b) 80.7709 83.71715 85.14887 88.02067 89.00047 10
f2(a, b) 164.7807 170.37608 175.94281 247.78353 251.14653 10

Resources