Rcpp: Calculation in loop stops with error "Not a matrix" - r

in an R script I source a cpp file to make some calculations. In that R script, a function defined in the cpp file is called and a matrix and an integer is provided. After a few rounds through the loop it gives the error "Not a matrix" (in line of code resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));), even though for the rounds before it worked.
R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(45:46)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=10))
}
cat("finished")
I start this R script via the command line. R version R-4.1.0. In R-Studio it crashes with a fatal error.
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out2(nosamples);
int loops;
int loops2;
double result=0;
NumericVector::iterator it;
double acc = 0;
NumericVector resid(ncol);
NumericVector out(ncol*(ncol-1)/2);
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << std::endl;
std::cout << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
std::cout << " i: " << i << std::endl;
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i)); //here it stops
std::cout << " i: " << i << std::endl;
for(int ii=0; ii<ncol;++ii){
acc += resid[i];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
std::cout << " i: " << i << std::endl;
}
}
std::cout << "loops:" << std::endl;
std::cout << loops << std::endl;
out = out[out > 0];
it = std::min_element(out.begin(), out.end());
out= *it;
std::cout << out << std::endl;
loops2 += 1;
out2[loops2]=out[0];
}
std::cout << "cpp finished" << std::endl;
return(out2);
}
Can someone explain what the problem is about?
Thanks and kind regards
Edit
I adapted some things in the cpp file (shown below) and the error disappeared. First I thought, everything is fine. But when I increase the number of loops, another problem occurs: the function breaks, but no error is shown. It breaks after loop number 543 ("loop2: 543"). At least it does the same in each round of the while loop with the same data.
I adapted the R-script and the ccp file to make this problem (at least on my machine) reproducible.
I know this code seems to be somehow meaningless, but it is part of a bigger program and I wanted to give here a minimum example.
The R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(100:101)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=800))
}
cat("finished")
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
using namespace Rcpp;
using Eigen::Map;
using Eigen::VectorXd;
typedef Map<VectorXd> MapVecd;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out(ncol*(ncol-1)/2);
NumericVector out2(nosamples);
NumericVector out3(ncol*(ncol-1)/2);
NumericVector resid(ncol);
int loops;
int loops2;
double result=0;
double acc = 0;
int show_cout=0;
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
// std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
for(int ii=0; ii<ncol;++ii){
acc += resid[ii];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
}
}
// std::cout << "loops:" << loops << std::endl;
//
out = out[out > 0];
const MapVecd xy(as<MapVecd>(out));
out3=xy.minCoeff();
out2[loops2]=out3[0];
loops2 += 1;
}
std::cout << "cpp finished" << std::endl;
return(out2);
}

Two things here:
Use out[loops++] = result; instead of loops += 1; out[loops] = result; because you were starting at 1, and probably accessing the last element outside of the range of this vector.
Use
for(int ii=0; ii<ncol;++ii){ double eps = x(ii, j) - x(ii, i); acc += eps * eps; }
instead of relying on this resid vector.

Related

Passing map by reference in C++ and mutating its value

I am passing map by reference in the canSum function where i am mutating its value and adding pairs but at the end when I iterate over the map I find the value of map has not been updated.
canSum function is a recursive function which takes a number (targetSum) and an array and finds if it is possible to form targetSum by any combinations of number in the array (numbers can be repeated).
#include<iostream>
#include<vector>
#include<map>
using namespace std;
bool canSum(int targetSum,vector<int> a,map<int, bool> &m){
if(!(m.find(targetSum) == m.end()))
return m[targetSum];
if (targetSum == 0)
return true;
if(targetSum<0)
return false;
for (int num : a)
{
if (canSum(targetSum - num, a,m)==true)
{
// m[targetSum] = true;
m.insert(pair<int, bool>(targetSum, true));
return m[targetSum];
}
}
m[targetSum] = false;
return m[targetSum];
}
int main(){
int targetSum, t;
vector<int> a;
map<int, bool> m;
m[0] = true;
cout << "enter target" << endl;
cin >> targetSum;
cout << "enter array, press esc to stop entering"<<endl;
while(cin>>t){
a.push_back(t);
}
for (int j = 0; j < a.size(); j++)
{
cout << a[j]<<" ";
}
cout << endl;
for (auto itr = m.begin(); itr != m.end(); ++itr) {
cout << '\t' << itr->first
<< '\t' << itr->second << '\n';
}
if(canSum(targetSum, a,m)){
cout << endl << "true" << endl;
}
else cout << endl << "false" << endl;
return 0;
}
Please help me. Thank you.
The for loop to print the map should be after the function call like.
if(canSum(targetSum, a,m)){
cout << endl << "true" << endl;
}
else cout << endl << "false" << endl;
for (auto itr = m.begin(); itr != m.end(); ++itr) {
cout << '\t' << itr->first
<< '\t' << itr->second << '\n';
}
Instead of
for (auto itr = m.begin(); itr != m.end(); ++itr) {
cout << '\t' << itr->first
<< '\t' << itr->second << '\n';
}
if(canSum(targetSum, a,m)){
cout << endl << "true" << endl;
}
else cout << endl << "false" << endl;
To see mutations in the map due to the function

Where to initialize array then to scatter it. MPI_Scatter

I need to send array pieces to all processes using MPI_Scatter then to get sum of all elements. Where should I initialize array then to scatter it? In root rank?
If I initialize array on root rank then other ranks dont get their data. Otherway I can initialize array for everyone (out of if(rank == root)...else), but it means, that I create array several times.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <iostream>
#include <time.h>
using namespace std;
int main(int argc, char* argv[])
{
int size;
int rank;
srand(time(NULL));
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int arr_size = size * 2;
int block = arr_size / (size);
int* B = new int[block];
if (rank == 0)
{
int* A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);
}
cout << "process " << rank << " received: " << endl;
for (int i = 0; i < block; i++)
{
cout << B[i] << " ";
}
cout << endl;
int local_sum = 0;
for (int i = 0; i < block; i++)
{
local_sum += B[i];
}
cout << "sum in process " << rank << " = " << local_sum << endl;
cout << endl;
int global_sum;
MPI_Reduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{
cout << "sum = " << global_sum << endl;
}
MPI_Finalize();
return 0;
}
I get something like this (only root rank got its data):
process 1 received:
process 3 received:
-842150451 -842150451
-842150451 -842150451
sum in process 1 = -1684300902
sum in process 3 = -1684300902
process 2 received:
-842150451 -842150451
sum in process 2 = -1684300902
process 0 received:
4 9
sum in process 0 = 13
sum = -757935397
MPI_Scatter() is a collective operation and must hence be invoked by all the ranks.
Declare int *A = NULL; on all ranks and only allocate and populate on rank zero.
int* A = NULL;
int* B = new int[block];
if (rank == 0)
{
A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
}
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);

command : Delete[] x

I have the following simple code. I allocate dynamically memory for 3 doubles, I assign to each double a number and after I deallocate the memory but as one can see if runs the code the only difference before and after the deletion (delete[] x) and the only difference is for the first double of the vector. I can't understand why the content of the first element of the vector changed and the content of x remained the same with the same address of memory.
#include <iostream>
#include <cmath>
int main(int argc, char * argv[])
{
double * x;
x = new double [3];
x[0] = 1; x[1]=3; x[2]=5;
std::cout << x[0] << " " << x[1] << " " << x[2] << "\n";
std::cout << x << "\n";
delete[] x;
std::cout << x[0] << " " << x[1] << " " << x[2] << "\n";
std::cout << x << "\n";
return 0;
}
To my understanding, this is undefined behaviour; x is read after it is deleted.

Weird access with external pointers

I made a small reproducible example:
#include <Rcpp.h>
using namespace Rcpp;
class Index {
public:
Index(int i_) : i(i_) {}
int getI() { return i; }
private:
int i;
};
// [[Rcpp::export]]
SEXP getXPtrIndex(int i) {
Rcout << "getXPtrIndex: i = " << i << std::endl;
Index ind(i);
Rcout << "getXPtrIndex: ind.i = " << ind.getI() << std::endl;
return XPtr<Index>(&ind, true);
}
// [[Rcpp::export]]
void getXPtrIndexValue(SEXP ptr) {
XPtr<Index> ind_ptr(ptr);
Rcout << "getXPtrIndexValue: ind_ptr->i = " << ind_ptr->getI() << std::endl;
Index ind = *ind_ptr;
Rcout << "getXPtrIndexValue: ind.i = " << ind.getI() << std::endl;
}
Basically, I define a small class, along with a function to get an external pointer of an element of this class. The last function is used to print the weird accessor when returning the class element back to C++.
Results in R:
> (extptr <- getXPtrIndex(10))
getXPtrIndex: i = 10
getXPtrIndex: ind.i = 10
<pointer: 0x7ffeeec31b00>
> getXPtrIndexValue(extptr)
getXPtrIndexValue: ind_ptr->i = 33696400
getXPtrIndexValue: ind.i = 0
Why can't I access 10?
I'm using Rcpp version 0.12.12 (the latest I think).
It seems to have something to do with the temporary object---by the time your second function runs the "content" of the first is already gone.
So either just make
Index ind(10);
a global, and comment out the line in your first function. Then all is peachy (I changed the R invocation slightly):
R> extptr <- getXPtrIndex(10)
getXPtrIndex: i = 10
getXPtrIndex: ind.i = 10
R> getXPtrIndexValue(extptr)
getXPtrIndexValue: ind_ptr->i = 10
getXPtrIndexValue: ind.i = 10
R>
Or it also works the same way when you make you Index object static to ensure persistence. Corrected example below.
#include <Rcpp.h>
using namespace Rcpp;
class Index {
public:
Index(int i_) : i(i_) {}
int getI() { return i; }
private:
int i;
};
// [[Rcpp::export]]
SEXP getXPtrIndex(int i) {
Rcout << "getXPtrIndex: i = " << i << std::endl;
static Index ind(i);
Rcout << "getXPtrIndex: ind.i = " << ind.getI() << std::endl;
return XPtr<Index>(&ind, true);
}
// [[Rcpp::export]]
void getXPtrIndexValue(SEXP ptr) {
XPtr<Index> ind_ptr(ptr);
Rcout << "getXPtrIndexValue: ind_ptr->i = " << ind_ptr->getI() << std::endl;
Index ind = *ind_ptr;
Rcout << "getXPtrIndexValue: ind.i = " << ind.getI() << std::endl;
}
/*** R
extptr <- getXPtrIndex(10)
getXPtrIndexValue(extptr)
*/

dynamic arrays c++ Access violation writing location

What is wrong with my code ? I have the error like this.
Unhandled exception at 0x00d21673 in mnozenie_macierzy.exe : 0xC0000005: Access violation writing location 0xcdcdcdcd.
It create the first array and the half to the second. The program multiplies arrays.
Sorry for my English if It isn't correct. I hope you understand me.
#include <iostream>
#include <time.h>
using namespace std;
void losowa_tablica(int **tab1, int **tab2, int a, int b, int c, int d)
{
int i, j;
for(i=0; i<a; i++)
{
cout << endl;
for(j=0; j<b; j++)
{
tab1[i][j]=rand();
cout << "tab1[" << i << "][" << j << "] : \t" << tab1[i][j] << "\t";
}
}
cout << endl;
for(i=0; i<c; i++)
{
cout << endl;
for(j=0; j<d; j++)
{
tab2[i][j]=rand();
cout << "tab2[" << i << "][" << j << "] : \t" << tab2[i][j] << "\t";
}
}
cout << endl << endl;
}
int **mnozenie(int **tab1, int **tab2, int a, int b, int c, int d)
{
int g, suma, i, j;
int **mac=new int*[a];
for(int i=0; i<d; i++)
mac[i]=new int[d];
for(i=0; i<a; i++)
for(j=0; j<d; j++)
{
g=b-1, suma=0;
do
{
suma+=tab1[i][g]*tab2[g][j];
g--;
}while(g!=0);
mac[i][j]=suma;
}
return mac;
}
int main()
{
int a,b,c,d;
cout << "Podaj liczbe wierszy pierwszej macierzy: " << endl;
cin >> a;
cout << "Podaj liczbe kolumn pierwszej macierzy: " << endl;
cin >> b;
cout << "Podaj liczbe wierszy drugiej macierzy: " << endl;
cin >> c;
cout << "Podaj liczbe kolumn drugiej macierzy: " << endl;
cin >> d;
int **tab1=new int*[a];
for(int i=0; i<b; i++)
tab1[i]=new int[b];
int **tab2=new int*[c];
for(int i=0; i<d; i++)
tab2[i]=new int[d];
losowa_tablica(tab1, tab2, a, b, c, d);
if ( b==c )
{
cout << "Mnozenie wykonalne" << endl;
int **mno=mnozenie(tab1, tab2, a, b, c, d);
}
else cout << "Mnozenie niewykonalne" << endl;
system("pause");
}
Your code yields undefined behavior:
int **tab1=new int*[a]; // allocating an array of 'a' elements
for(int i=0; i<b; i++) // if b > a then the next line will eventually yield UB
tab1[i]=new int[b];
int **tab2=new int*[c]; // allocating an array of 'c' elements
for(int i=0; i<d; i++) // if d > c then the next line will eventually yield UB
tab2[i]=new int[d];
int **mac=new int*[a]; // allocating an array of 'a' elements
for(int i=0; i<d; i++) // if d > a then the next line will eventually yield UB
mac[i]=new int[d];
In practice, the above code will most likely perform a memory access violation at some point.

Resources