Weird access with external pointers - r

I made a small reproducible example:
#include <Rcpp.h>
using namespace Rcpp;
class Index {
public:
Index(int i_) : i(i_) {}
int getI() { return i; }
private:
int i;
};
// [[Rcpp::export]]
SEXP getXPtrIndex(int i) {
Rcout << "getXPtrIndex: i = " << i << std::endl;
Index ind(i);
Rcout << "getXPtrIndex: ind.i = " << ind.getI() << std::endl;
return XPtr<Index>(&ind, true);
}
// [[Rcpp::export]]
void getXPtrIndexValue(SEXP ptr) {
XPtr<Index> ind_ptr(ptr);
Rcout << "getXPtrIndexValue: ind_ptr->i = " << ind_ptr->getI() << std::endl;
Index ind = *ind_ptr;
Rcout << "getXPtrIndexValue: ind.i = " << ind.getI() << std::endl;
}
Basically, I define a small class, along with a function to get an external pointer of an element of this class. The last function is used to print the weird accessor when returning the class element back to C++.
Results in R:
> (extptr <- getXPtrIndex(10))
getXPtrIndex: i = 10
getXPtrIndex: ind.i = 10
<pointer: 0x7ffeeec31b00>
> getXPtrIndexValue(extptr)
getXPtrIndexValue: ind_ptr->i = 33696400
getXPtrIndexValue: ind.i = 0
Why can't I access 10?
I'm using Rcpp version 0.12.12 (the latest I think).

It seems to have something to do with the temporary object---by the time your second function runs the "content" of the first is already gone.
So either just make
Index ind(10);
a global, and comment out the line in your first function. Then all is peachy (I changed the R invocation slightly):
R> extptr <- getXPtrIndex(10)
getXPtrIndex: i = 10
getXPtrIndex: ind.i = 10
R> getXPtrIndexValue(extptr)
getXPtrIndexValue: ind_ptr->i = 10
getXPtrIndexValue: ind.i = 10
R>
Or it also works the same way when you make you Index object static to ensure persistence. Corrected example below.
#include <Rcpp.h>
using namespace Rcpp;
class Index {
public:
Index(int i_) : i(i_) {}
int getI() { return i; }
private:
int i;
};
// [[Rcpp::export]]
SEXP getXPtrIndex(int i) {
Rcout << "getXPtrIndex: i = " << i << std::endl;
static Index ind(i);
Rcout << "getXPtrIndex: ind.i = " << ind.getI() << std::endl;
return XPtr<Index>(&ind, true);
}
// [[Rcpp::export]]
void getXPtrIndexValue(SEXP ptr) {
XPtr<Index> ind_ptr(ptr);
Rcout << "getXPtrIndexValue: ind_ptr->i = " << ind_ptr->getI() << std::endl;
Index ind = *ind_ptr;
Rcout << "getXPtrIndexValue: ind.i = " << ind.getI() << std::endl;
}
/*** R
extptr <- getXPtrIndex(10)
getXPtrIndexValue(extptr)
*/

Related

Plot 2D histogram of 4 columns data file by using ROOT

I need to plot 2D histograms of four columns of datasets in a data file.
here is my code:
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include "TFile.h"
#include "TGraphErrors.h"
#include "TH2.h"
using namespace std;
int main() {
// reading an ascii file
int row = 0;
double my_array[1839][4];
ifstream myfile ("test.txt");
if ( myfile.is_open() ) {
string line;
while( getline(myfile,line)) {
cout << line << endl;
stringstream ss(line);
int col=0;
while (ss >> my_array[row][col]) col++;
row++;
}
}
myfile.close();
for (int i=0; i <1839; ++i) {
cout << "my_array " << my_array[i][0] << ", " << my_array[i][1] << "," << m\
y_array[i][2] << my_array[i][3] << endl;
}
TH2D *hitm = new TH2D("hitm","hitmap", 256,-0.5,255.5,256,-0.5,255.5);
for (int i=0; i <1839; ++i) {
double x = my_array[i][0];
double y = my_array[i][1];
double z = my_array[i][2];
double d = my_array[i][3];
return 0;
}
'
it doesn't work yet, all of what it do is printing the columns on the terminal, can you please help me on that ?
thanks in advance

Rcpp: Calculation in loop stops with error "Not a matrix"

in an R script I source a cpp file to make some calculations. In that R script, a function defined in the cpp file is called and a matrix and an integer is provided. After a few rounds through the loop it gives the error "Not a matrix" (in line of code resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));), even though for the rounds before it worked.
R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(45:46)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=10))
}
cat("finished")
I start this R script via the command line. R version R-4.1.0. In R-Studio it crashes with a fatal error.
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out2(nosamples);
int loops;
int loops2;
double result=0;
NumericVector::iterator it;
double acc = 0;
NumericVector resid(ncol);
NumericVector out(ncol*(ncol-1)/2);
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << std::endl;
std::cout << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
std::cout << " i: " << i << std::endl;
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i)); //here it stops
std::cout << " i: " << i << std::endl;
for(int ii=0; ii<ncol;++ii){
acc += resid[i];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
std::cout << " i: " << i << std::endl;
}
}
std::cout << "loops:" << std::endl;
std::cout << loops << std::endl;
out = out[out > 0];
it = std::min_element(out.begin(), out.end());
out= *it;
std::cout << out << std::endl;
loops2 += 1;
out2[loops2]=out[0];
}
std::cout << "cpp finished" << std::endl;
return(out2);
}
Can someone explain what the problem is about?
Thanks and kind regards
Edit
I adapted some things in the cpp file (shown below) and the error disappeared. First I thought, everything is fine. But when I increase the number of loops, another problem occurs: the function breaks, but no error is shown. It breaks after loop number 543 ("loop2: 543"). At least it does the same in each round of the while loop with the same data.
I adapted the R-script and the ccp file to make this problem (at least on my machine) reproducible.
I know this code seems to be somehow meaningless, but it is part of a bigger program and I wanted to give here a minimum example.
The R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(100:101)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=800))
}
cat("finished")
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
using namespace Rcpp;
using Eigen::Map;
using Eigen::VectorXd;
typedef Map<VectorXd> MapVecd;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out(ncol*(ncol-1)/2);
NumericVector out2(nosamples);
NumericVector out3(ncol*(ncol-1)/2);
NumericVector resid(ncol);
int loops;
int loops2;
double result=0;
double acc = 0;
int show_cout=0;
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
// std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
for(int ii=0; ii<ncol;++ii){
acc += resid[ii];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
}
}
// std::cout << "loops:" << loops << std::endl;
//
out = out[out > 0];
const MapVecd xy(as<MapVecd>(out));
out3=xy.minCoeff();
out2[loops2]=out3[0];
loops2 += 1;
}
std::cout << "cpp finished" << std::endl;
return(out2);
}
Two things here:
Use out[loops++] = result; instead of loops += 1; out[loops] = result; because you were starting at 1, and probably accessing the last element outside of the range of this vector.
Use
for(int ii=0; ii<ncol;++ii){ double eps = x(ii, j) - x(ii, i); acc += eps * eps; }
instead of relying on this resid vector.

Dealing with NA values using Rcpp

I'm testing a piece of my code, which is shown below:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericMatrix testOutMat(const int& ncols, const int& nrows, const NumericVector& col_prob){
//Store row and column positions
NumericVector col_pos = no_init(nrows);
NumericVector row_pos = no_init(nrows);
int row_val;
int nz_counter=0;
for(int j=0; j<ncols; ++j){
for(int i=0; i<nrows; ++i){
row_val = R::rbinom(1,col_prob[j]);
Rcout << "i,j: " << i << "," << j << std::endl;
Rcout << "val: " << row_val << std::endl;
if(row_val==1){ //if (i,j)th entry is a 1, save location
row_pos[i] = i;
col_pos[i] = j;
nz_counter += 1;
} else{ //assign as NA
row_pos[i] = NA_REAL;
col_pos[i] = NA_REAL;
}
Rcout << "row_pos[i]: " << row_pos[i] << std::endl;
Rcout << "col_pos[i]: " << col_pos[i] << std::endl;
Rcout << "num non-zeros: " << nz_counter << std::endl;
}
}
NumericMatrix out = no_init(nz_counter,2);
Rcout << "Printing output matrix" << std::endl;
for(int i=0; i<nz_counter; ++i){
if(!Rcpp::NumericVector::is_na(row_pos[i])){
out(i,0) = row_pos[i];
out(i,1) = col_pos[i];
}
Rcout << "row_pos[i]: " << row_pos[i] << std::endl;
Rcout << "col_pos[i]: " << col_pos[i] << std::endl;
}
return out;
}
/*** R
set.seed(1)
res <- testOutMat(ncols=5,nrows=5,col_prob = runif(20, 0.1, 0.2))
*/
From the output, I have that the entries (i,j)={(0,0),(3,1)} are non-zero, so that res should be a 2x2 matrix with 0 0 in the first row and 3 1 in the second. However, I get something very different:
[,1] [,2]
[1,] 64 1024
[2,] 1 4
I suspect that this is due to how I'm handling NAs. The overall goal of the function is to generate the row and column indices for non-zero elements (generated by the call to rbinom).
I've tried debugging this for some time now and I can't seem to get a fix.
The problem here is that you're writing over row_pos and col_pos over and over again (ncols times) without any kind of keeping track of the prior result. That, coupled with your no_init() use, is what's causing the end result you see. We can change your code just a bit to ensure that row_pos and col_pos don't get overwritten:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
IntegerMatrix testOutMat(const int ncols, const int nrows,
const NumericVector& col_prob) {
IntegerMatrix binomial_deviates(nrows, ncols);
IntegerVector row_positions;
IntegerVector col_positions;
int nz_counter = 0;
for ( int j = 0; j < ncols; ++j ) {
binomial_deviates(_, j) = rbinom(nrows, 1, col_prob[j]);
for ( int i = 0; i < nrows; ++i ) {
if ( binomial_deviates(i, j) == 1 ) {
row_positions.push_back(i);
col_positions.push_back(j);
nz_counter += 1;
}
}
}
IntegerMatrix out(nz_counter, 2);
for ( int i = 0; i < nz_counter; ++i ) {
out(i, 0) = row_positions[i];
out(i, 1) = col_positions[i];
}
return out;
}
/*** R
set.seed(1)
res <- testOutMat(ncols=5,nrows=5,col_prob = runif(20, 0.1, 0.2))
*/
Result:
> set.seed(1)
> res <- testOutMat(ncols=5,nrows=5,col_prob = runif(20, 0.1, 0.2))
> res
[,1] [,2]
[1,] 0 0
[2,] 3 1

Strange behavior when incrementally sampling using RcppArmadillo::sample

I'm trying to implement some draws using a polya urn scheme using Rcpp. Basically, I have a matrix I'm drawing from, and a 2nd matrix with weights proportional to the probabilities. After each draw, I need to increase the weight of whichever cell I drew.
I was running into some indexing errors which lead me to examine the sampling more generally, and I found that my weight matrix was getting modified by RcppArmadillo::sample. Two questions (1) is this behavior that I should have expected or is this a bug which I should report somewhere? (2) Any ideas on current work-around? Here's a reproducible example:
#include <RcppArmadilloExtensions/sample.h>
// [[Rcpp::depends(RcppArmadillo)]]
using namespace Rcpp ;
// [[Rcpp::export]]
void sampler(int N, int inc, NumericMatrix& weight_matrix, int reps) {
IntegerVector wm_tmp = seq_along(weight_matrix);
Rcout << "Initial weight_matrix:\n" << weight_matrix << "\n";
int x_ind;
for(int i = 0; i < reps; ++i) {
x_ind = RcppArmadillo::sample(wm_tmp, 1, true, weight_matrix)(0) - 1;
Rcout << "Weight matrix after sample: (rep = " << i << ")\n" << weight_matrix << "\n";
Rcout << "x_ind: " << x_ind << "\n";
// get indices
weight_matrix[x_ind] = weight_matrix[x_ind] + inc;
Rcout << "Add increment of " << inc << " to weight_matrix:\n" << weight_matrix << "\n";
}
}
//
// // [[Rcpp::export]]
// IntegerVector seq_cpp(IntegerMatrix x) {
// IntegerVector tmp = seq_along(x);
// IntegerVector ret = RcppArmadillo::sample(tmp, 2, true);
// return ret;
// }
/*** R
weight_matrix <- matrix(1, 5, 2)
sampler(5, 1, weight_matrix, 3)
weight_matrix <- matrix(1, 5, 2)
sampler(5, 0, weight_matrix, 3)
*/
Thanks!
That is known and documented behaviour.
You could do
i) Use Rcpp::clone() to create a distinct copy of your SEXP (ie NumericMatrix).
ii) Use an Armadillo matrix instead and pass as const arma::mat & m.
There are architectural reasons having to do with the way R organizes its data structure which mean that we cannot give you fast access (no copies!) and also protect against writes.

command : Delete[] x

I have the following simple code. I allocate dynamically memory for 3 doubles, I assign to each double a number and after I deallocate the memory but as one can see if runs the code the only difference before and after the deletion (delete[] x) and the only difference is for the first double of the vector. I can't understand why the content of the first element of the vector changed and the content of x remained the same with the same address of memory.
#include <iostream>
#include <cmath>
int main(int argc, char * argv[])
{
double * x;
x = new double [3];
x[0] = 1; x[1]=3; x[2]=5;
std::cout << x[0] << " " << x[1] << " " << x[2] << "\n";
std::cout << x << "\n";
delete[] x;
std::cout << x[0] << " " << x[1] << " " << x[2] << "\n";
std::cout << x << "\n";
return 0;
}
To my understanding, this is undefined behaviour; x is read after it is deleted.

Resources