I am using RcppParallel to speed up some calculations. However, I am running out of memory in the process, so I would like to save results within the Parallel loop that are pass some relevance threshold. Below is a toy example to illustrate my point:
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
// [[Rcpp::plugins(cpp11)]]
struct Example : public RcppParallel::Worker {
RcppParallel::RVector<double> xvals, xvals_output, yvals;
Example(const NumericVector & xvals, NumericVector & yvals, NumericVector & xvals_output) :
xvals(xvals), xvals_output(xvals_output), yvals(yvals) {}
void operator()(std::size_t begin, size_t end) {
for(std::size_t i=begin; i < end; i++) {
double y = xvals[i] * (xvals[i] - 1);
// if(y < 0) {
// xvals_output.push_back(xvals[i]);
// yvals.push_back(y);
// }
xvals_output[i] = xvals[i];
yvals[i] = y;
}
}
};
// [[Rcpp::export]]
List find_values(NumericVector xvals) {
NumericVector xvals_output(xvals.size());
NumericVector yvals(xvals.size());
Example ex(xvals, yvals, xvals_output);
parallelFor(0, xvals.size(), ex);
List L = List::create(xvals_output, yvals);
return(L);
}
The R code would be:
find_values(seq(-10,10, by=0.5))
The commented out code is what I would like to do.
That is, I would like to initialize an empty vector, and append only the y-values that pass a certain threshold and also the associated x-values.
In my real usage, I am calculating a MxN matrix, so memory is an issue.
What is the correct way to approach this issue?
If anyone ever comes across a similar problem, here's a solution using "concurrent_vector" from TBB (which RcppParallel uses under the hood and is available as a header).
#include <Rcpp.h>
#include <RcppParallel.h>
#include <tbb/concurrent_vector.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
// [[Rcpp::plugins(cpp11)]]
struct Example : public RcppParallel::Worker {
RcppParallel::RVector<double> xvals;
tbb::concurrent_vector< std::pair<double, double> > &output;
Example(const NumericVector & xvals, tbb::concurrent_vector< std::pair<double, double> > &output) :
xvals(xvals), output(output) {}
void operator()(std::size_t begin, size_t end) {
for(std::size_t i=begin; i < end; i++) {
double y = xvals[i] * (xvals[i] - 1);
if(y < 0) {
output.push_back( std::pair<double, double>(xvals[i], y) );
}
}
}
};
// [[Rcpp::export]]
List find_values(NumericVector xvals) {
tbb::concurrent_vector< std::pair<double, double> > output;
Example ex(xvals,output);
parallelFor(0, xvals.size(), ex);
NumericVector xout(output.size());
NumericVector yout(output.size());
for(int i=0; i<output.size(); i++) {
xout[i] = output[i].first;
yout[i] = output[i].second;
}
List L = List::create(xout, yout);
return(L);
}
Output:
> find_values(seq(-10,10, by=0.5))
[[1]]
[1] 0.5
[[2]]
[1] -0.25
Related
This is a rather simple question, but I haven't been able to quite find the answer on the web yet.
Wishing my latest attempt, here is latest compiler output:
note: candidate function not viable: no known conversion from 'double (unsigned int, const double *, void *, void )' to 'nlopt_func' (aka 'double ()(unsigned int, const double *, double *, void *)') for 2nd argument
From this error I surmise that I am now wrapping or 'type casting' the data argument correctly and also the parameter vector. The discrepency between the third input, the gradient, confuses me. As I am calling a gradient free optimization routine.
Here is a simple linear regression with a constant and a variable:
#include "RcppArmadillo.h"
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(nloptr)]]
//#include <vector>
#include <nloptrAPI.h>
using namespace arma;
using namespace Rcpp;
typedef struct {
arma::mat data_in;
} *my_func_data;
typedef struct {
double a, b;
} my_theta;
double myfunc(unsigned n, const double *theta, void *grad, void *data){
my_func_data &temp = (my_func_data &) data;
arma::mat data_in = temp->data_in;
my_theta *theta_temp = (my_theta *) theta;
double a = theta_temp->a, b = theta_temp->b;
int Len = arma::size(data_in)[0];
arma::vec Y1 = data_in(span(0, Len-1), 1);
arma::vec Y2 = data_in(span(0, Len-1), 2);
arma::vec res = data_in(span(0, Len-1), 0) - a*Y1 - b*Y2 ;
return sum(res);
}
// [[Rcpp::export]]
void test_nlopt_c() {
arma::mat data_in(10,3);
data_in(span(0,9),0) = arma::regspace(40, 49);
data_in(span(0,9),1) = arma::ones(10);
data_in(span(0,9),2) = arma::regspace(10, 19);
my_func_data &temp = (my_func_data &) data_in;
double lb[2] = { 0, 0,}; /* lower bounds */
nlopt_opt opt;
opt = nlopt_create(NLOPT_LN_NELDERMEAD, 2); /* algorithm and dimensionality */
nlopt_set_lower_bounds(opt, lb);
nlopt_set_min_objective(opt, myfunc, &data_in );
nlopt_set_xtol_rel(opt, 1e-4);
double minf; /* the minimum objective value, upon return */
double x[2] = {0.5, 0.5}; /* some initial guess */
nlopt_result result = nlopt_optimize(opt, x, &minf);
Rcpp::Rcout << "result:" << result;
return;
}
Got it figured out, stupid answer turns out to be correct, just change 'void' to 'double', no clue why. Anyway, the example code needs some improving but it works.
#include "RcppArmadillo.h"
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(nloptr)]]
//#include <vector>
#include <nloptrAPI.h>
using namespace arma;
using namespace Rcpp;
typedef struct {
arma::mat data_in;
} *my_func_data;
typedef struct {
double a, b;
} my_theta;
double myfunc(unsigned n, const double *theta, double *grad, void *data){
my_func_data &temp = (my_func_data &) data;
arma::mat data_in = temp->data_in;
my_theta *theta_temp = (my_theta *) theta;
double a = theta_temp->a, b = theta_temp->b;
int Len = arma::size(data_in)[0];
arma::vec Y1 = data_in(span(0, Len-1), 1);
arma::vec Y2 = data_in(span(0, Len-1), 2);
arma::vec res = data_in(span(0, Len-1), 0) - a*Y1 - b*Y2 ;
return sum(res);
}
// [[Rcpp::export]]
void test_nlopt_c() {
arma::mat data_in(10,3);
data_in(span(0,9),0) = arma::regspace(40, 49);
data_in(span(0,9),1) = arma::ones(10);
data_in(span(0,9),2) = arma::regspace(10, 19);
my_func_data &temp = (my_func_data &) data_in;
double lb[2] = { 0, 0,}; /* lower bounds */
nlopt_opt opt;
opt = nlopt_create(NLOPT_LN_NELDERMEAD, 2); /* algorithm and dimensionality */
nlopt_set_lower_bounds(opt, lb);
nlopt_set_min_objective(opt, myfunc, &data_in );
nlopt_set_xtol_rel(opt, 1e-4);
double minf; /* the minimum objective value, upon return */
double x[2] = {0.5, 0.5}; /* some initial guess */
nlopt_result result = nlopt_optimize(opt, x, &minf);
Rcpp::Rcout << "result:" << result;
return;
}
I have to convert individual elements of Rcpp::IntegerVector into their string form so I can add another string to them. My code looks like this:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
Rcpp::String int_to_char_single_fun(int x){
// Obtain environment containing function
Rcpp::Environment base("package:base");
// Make function callable from C++
Rcpp::Function int_to_string = base["as.character"];
// Call the function and receive its list output
Rcpp::String res = int_to_string(Rcpp::_["x"] = x); // example of original param
// Return test object in list structure
return (res);
}
//[[Rcpp::export]]
Rcpp::CharacterVector add_chars_to_int(Rcpp::IntegerVector x){
int n = x.size();
Rcpp::CharacterVector BASEL_SEG(n);
for(int i = 0; i < n; i++){
BASEL_SEG[i] = "B0" + int_to_char_single_fun(x[i]);
}
return BASEL_SEG;
}
/*** R
int_vec <- as.integer(c(1,2,3,4,5))
BASEL_SEG_char <- add_chars_to_int(int_vec)
*/
I get the following error:
no match for 'operator+'(operand types are 'const char[3]' and 'Rcpp::String')
I cannot import any C++ libraries like Boost to do this and can only use Rcpp functionality to do this. How do I add string to integer here in Rcpp?
We basically covered this over at the Rcpp Gallery when we covered Boost in an example for lexical_cast (though that one went the other way). So rewriting it quickly yields this:
Code
// We can now use the BH package
// [[Rcpp::depends(BH)]]
#include <Rcpp.h>
#include <boost/lexical_cast.hpp>
using namespace Rcpp;
using boost::lexical_cast;
using boost::bad_lexical_cast;
// [[Rcpp::export]]
std::vector<std::string> lexicalCast(std::vector<int> v) {
std::vector<std::string> res(v.size());
for (unsigned int i=0; i<v.size(); i++) {
try {
res[i] = lexical_cast<std::string>(v[i]);
} catch(bad_lexical_cast &) {
res[i] = "(failed)";
}
}
return res;
}
/*** R
lexicalCast(c(42L, 101L))
*/
Output
R> Rcpp::sourceCpp("/tmp/lexcast.cpp")
R> lexicalCast(c(42L, 101L))
[1] "42" "101"
R>
Alternatives
Because converting numbers to strings is as old as computing itself you could also use:
itoa()
snprintf()
streams
and probably a few more I keep forgetting.
As others have pointed out, there are several ways to do this. Here are two very straightforward approaches.
1. std::to_string
Rcpp::CharacterVector add_chars_to_int1(Rcpp::IntegerVector x){
int n = x.size();
Rcpp::CharacterVector BASEL_SEG(n);
for(int i = 0; i < n; i++){
BASEL_SEG[i] = "B0" + std::to_string(x[i]);
}
return BASEL_SEG;
}
2. Creating a new Rcpp::CharacterVector
Rcpp::CharacterVector add_chars_to_int2(Rcpp::IntegerVector x){
int n = x.size();
Rcpp::CharacterVector BASEL_SEG(n);
Rcpp::CharacterVector myIntToStr(x.begin(), x.end());
for(int i = 0; i < n; i++){
BASEL_SEG[i] = "B0" + myIntToStr[i];
}
return BASEL_SEG;
}
Calling them:
add_chars_to_int1(int_vec) ## using std::to_string
[1] "B01" "B02" "B03" "B04" "B05"
add_chars_to_int2(int_vec) ## converting to CharacterVector
[1] "B01" "B02" "B03" "B04" "B05"
I was trying to use rcpp/armadillo with openmp to speed up a loop in R. The loop takes a matrix with each row containing indices of a location vector(or matrix if it's 2D locations) as input(and other matrix/vec to be used). Inside the loop, I extracted each row of input indices matrix and find the corresponding locations, calculate distance matrix, and covariance matrix, do cholesky and backsolve, save the backsolve results to a new matrix. Here is the rcpp code:
`#include <iostream>
#include <RcppArmadillo.h>
#include <omp.h>
#include <Rcpp.h>
// [[Rcpp::plugins(openmp)]]
using namespace Rcpp;
using namespace arma;
using namespace std;
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
mat NZentries_new2 (int m, int nnp, const mat& locs, const umat& revNNarray, const mat& revCondOnLatent, const vec& nuggets, const vec covparms){
// initialized the output matrix
mat Lentries=zeros(nnp,m+1);
// initialized objects in parallel part
int n0; //number of !is_na elements
uvec inds;//
vec revCon_row;//
uvec inds00;//
vec nug;//
mat covmat;//
vec onevec;//
vec M;//
mat dist;//
int k;//
omp_set_num_threads(2);// selects the number of cores to use.
#pragma omp parallel for shared(locs,revNNarray,revCondOnLatent,nuggets,nnp,m,Lentries) private(k,M,dist,onevec,covmat,nug,n0,inds,revCon_row,inds00) default(none) schedule(static)
for (k = 0; k < nnp; k++) {
// extract a row to work with
inds=revNNarray.row(k).t();
revCon_row=revCondOnLatent.row(k).t();
if (k < m){
n0=k+1;
} else {
n0=m+1;
}
// extract locations
inds00=inds(span(m+1-n0,m))-ones<uvec>(n0);
nug=nuggets.elem(inds00) % (ones(n0)-revCon_row(span(m+1-n0,m))); // vec is vec, cannot convert to mat
dist=calcPWD2(locs.rows(inds00));
#pragma omp critical
{
//calculate covariance matrix
covmat= MaternFun(dist,covparms) + diagmat(nug) ; // summation from arma
}
// get last row of inverse Cholesky
onevec = zeros(n0);
onevec[n0-1] = 1;
M=solve(chol(covmat,"upper"),onevec);
// save the entries to matrix
Lentries(k,span(0,n0-1)) = M.t();
}
return Lentries;
}`
The current version works fine but speed is slow(almost the same as no parallel version), if I take the line in omp critical bracket out, it cause segment fault and R will be crashed. This MaterFun is a function I defined as below with several other small functions. So my question is that why MaternFun has to stay in the critical part.
// [[Rcpp::export]]
mat MaternFun( mat distmat, vec covparms ){
int d1 = distmat.n_rows;
int d2 = distmat.n_cols;
int j1;
int j2;
mat covmat(d1,d2);
double scaledist;
double normcon = covparms(0)/(pow(2.0,covparms(2)-1)*Rf_gammafn(covparms(2)));
for (j1 = 0; j1 < d1; j1++){
for (j2 = 0; j2 < d2; j2++){
if ( distmat(j1,j2) == 0 ){
covmat(j1,j2) = covparms(0);
} else {
scaledist = distmat(j1,j2)/covparms(1);
covmat(j1,j2) = normcon*pow( scaledist, covparms(2) )*
Rf_bessel_k(scaledist,covparms(2),1.0);
}
}
}
return covmat;
}
// [[Rcpp::export]]
double dist2(double lat1,double long1,double lat2,double long2) {
double dist = sqrt(pow(lat1 - lat2, 2) + pow(long1 - long2, 2)) ;
return (dist) ;
}
// [[Rcpp::export]]
mat calcPWD2( mat x) {//Rcpp::NumericMatrix
int outrows = x.n_rows ;
int outcols = x.n_rows ;
mat out(outrows, outcols) ;
for (int arow = 0 ; arow < outrows ; arow++) {
for (int acol = 0 ; acol < outcols ; acol++) {
out(arow, acol) = dist2(x(arow, 0),x(arow, 1),
x(acol, 0),x(acol, 1)) ; //extract element from mat
}
}
return (out) ;
}
Here is some sample inputs for testing the MaterFun in R:
library(fields)
distmat=rdist(1:5) # distance matrix
covparms=c(1,0.2,1.5)
The issue is there are two calls to R math functions (Rf_bessel_k and Rf_gammafn) that require the access to be single threaded instead of parallel.
To get around this, let's add a dependency on boost via BH to obtain the cyl_bessel_k and tgamma functions. Alternatively, there is always the option of reimplementing R's besselK and gamma in C++ so it doesn't use the single-threaded R variant.
This gives:
#include <Rcpp.h>
#include <boost/math/special_functions/bessel.hpp>
#include <boost/math/special_functions/gamma.hpp>
// [[Rcpp::depends(BH)]]
// [[Rcpp::export]]
double besselK_boost(double x, double v) {
return boost::math::cyl_bessel_k(v, x);
}
// [[Rcpp::export]]
double gamma_fn_boost(double x) {
return boost::math::tgamma(x);
}
Test Code
x0 = 9.536743e-07
nu = -10
all.equal(besselK(x0, nu), besselK_boost(x0, nu))
# [1] TRUE
x = 2
all.equal(gamma(x), gamma_fn_boost(x))
# [1] TRUE
Note: The order of parameters for boost's variant differs from R's:
cyl_bessel_k(v, x)
Rf_bessel_k(x, v, expon.scaled = FALSE)
From here, we can modify the MaternFun. Unfortunately, because calcPWD2 is missing, the furthest we can go is switching to use boost and incorporating in OpenMP protections.
#include <RcppArmadillo.h>
#include <boost/math/special_functions/bessel.hpp>
#include <boost/math/special_functions/gamma.hpp>
#ifdef _OPENMP
#include <omp.h>
#endif
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(BH)]]
// [[Rcpp::plugins(openmp)]]
// [[Rcpp::export]]
arma::mat MaternFun(arma::mat distmat, arma::vec covparms) {
int d1 = distmat.n_rows;
int d2 = distmat.n_cols;
int j1;
int j2;
arma::mat covmat(d1,d2);
double scaledist;
double normcon = covparms(0) /
(pow(2.0, covparms(2) - 1) * boost::math::tgamma(covparms(2)));
for (j1 = 0; j1 < d1; ++j1){
for (j2 = 0; j2 < d2; ++j2){
if ( distmat(j1, j2) == 0 ){
covmat(j1, j2) = covparms(0);
} else {
scaledist = distmat(j1, j2)/covparms(1);
covmat(j1, j2) = normcon * pow( scaledist, covparms(2) ) *
boost::math::cyl_bessel_k(covparms(2), scaledist);
}
}
}
return covmat;
}
I have written parallel implementation of sums in groups using RcppParallel.
// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;
struct SumsG: public Worker
{
const RVector<double> v;
const RVector<int> gi;
RVector<double> sg;
SumsG(const NumericVector v, const IntegerVector gi, NumericVector sg): v(v), gi(gi), sg(sg) {}
SumsG(const SumsG& p, Split): v(p.v), gi(p.gi), sg(p.sg) {}
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; i++) {
sg[gi[i]] += v[i];
}
}
void join(const SumsG& p) {
for(std::size_t i = 0; i < sg.length(); i++) {
sg[i] += p.sg[i];
}
}
};
// [[Rcpp::export]]
List sumsingroups(NumericVector v, IntegerVector gi, int ni) {
NumericVector sg(ni);
SumsG p(v, gi, sg);
parallelReduce(0, v.length(), p);
return List::create(_["sg"] = p.sg);
}
It compiles using Rcpp::sourceCpp. Now when I call it from R sumsingroups(1:10, rep(0:1, each = 5), 2) several times I get the right answer (15 40) and then something different (usually some multiplicative of the right answer). Running
res <- sumsingroups(1:10, rep(0:1, each = 5), 2)
for(i in 1:1000) {
tmp <- sumsingroups(1:10, rep(0:1, each = 5), 2)
if(res[[1]][1] != tmp[[1]][1]) break
Sys.sleep(0.1)
}
breaks at random iteration returning
$sg
[1] 60 160
or
$sg
[1] 30 80
I am new to Rcpp and RcppParallel and do not know what could cause such behavior.
Update. Things that did not help:
Added for (std::size_t i = 0; i < sg.length(); i++) sg[i] = 0; to
both of constructors.
Changed names so that they are different in
Worker definition and in function implementation.
Try this.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
#include <RcppParallel.h>
using namespace RcppParallel;
struct SumsInGroups5: public Worker
{
const RVector<double> v;
const RVector<int> g;
std::vector<double> s;
SumsInGroups5(const NumericVector v, const IntegerVector g): v(v), g(g), s(*std::max_element(g.begin(), g.end()) + 1, 0.0){ }
SumsInGroups5(const SumsInGroups5& p, Split): v(p.v), g(p.g), s(*std::max_element(g.begin(), g.end()) + 1, 0.0) {}
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; ++i) {
s[g[i]]+=v[i];
}
}
void join(const SumsInGroups5& rhs) {
for(std::size_t i = 0; i < s.size(); i++) {
s[i] += rhs.s[i];
}
}
};
// [[Rcpp::export]]
NumericVector sg5(NumericVector v, IntegerVector g) {
SumsInGroups5 p(v, g);
parallelReduce(0, v.length(), p);
return wrap(p.s);
}
/*** R
a <- 1:10
g <- c(rep(0,5),rep(1,5))
bb <- lapply(1:10000,function(x)sg5(a,g))
cc<-do.call("rbind",bb)
unique(cc)
*/
Compared to my other tries this code did not produce weird result in the same cases other code did. Not very assuring.
I have written a function that desirably sums values in groups. It takes two vectors of the same length: v and g and should return a vector of length the same as unique elements in g. Groups are encoded as integers starting from zero. Using Rcpp::sourceCpp the code compiles but when called from R (sg(runif(6), rep(0:1,each = 3)) for example) returns numeric(0).
// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;
struct SumsInGroups: public Worker
{
const RVector<double> v;
const RVector<int> g;
RVector<double> s;
SumsInGroups(const NumericVector v, const IntegerVector g, NumericVector s): v(v), g(g), s(s) {}
SumsInGroups(const SumsInGroups& p, Split): v(p.v), g(p.g), s(p.s) {}
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; ++i) {
if (s[g[i]] != s[g[i]]) s[g[i]] = v[i];
else s[g[i]] += v[i];
}
}
void join(const SumsInGroups& rhs) {
for(std::size_t i = 0; i < s.length(); i++) {
s[i] += rhs.s[i];
}
}
};
// [[Rcpp::export]]
RVector<double> sg(NumericVector v, IntegerVector g) {
NumericVector s;
SumsInGroups p(v, g, s);
parallelReduce(0, v.length(), p);
return p.s;
}
I am very new to RcppParallel so any comments and suggestions are welcomed.
You need to initialize s. I suggest initializing with zeroes. Here is the code which worked for me. Note that since I initialize with zeroes, I do not need the checking you do in operator ().
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;
struct SumsInGroups: public Worker
{
const RVector<double> v;
const RVector<int> g;
RVector<double> s;
SumsInGroups(const NumericVector v, const IntegerVector g, NumericVector s): v(v), g(g), s(s) {}
SumsInGroups(const SumsInGroups& p, Split): v(p.v), g(p.g), s(p.s) {}
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; ++i) {
s[g[i]] += v[i];
}
}
void join(const SumsInGroups& rhs) {
for(std::size_t i = 0; i < s.length(); i++) {
s[i] += rhs.s[i];
}
}
};
// [[Rcpp::export]]
RVector<double> sg(NumericVector v, IntegerVector g) {
NumericVector s(*std::max_element(g.begin(), g.end()) + 1);
SumsInGroups p(v, g, s);
parallelReduce(0, v.length(), p);
return p.s;
}
/*** R
set.seed(101)
o <- runif(15)
i <-sample(0:3,15, rep = TRUE)
sg(o, i)
tapply(o, i, sum)
*/