OpenCL distance between strings - opencl

I have this C++ code from the web that computes for distance between two strings. Can someone help me convert this to OpenCL for parallelism? I'm having hard time learning OpenCL.
#include <stdio.h>
#include <math.h>
#include <string.h>
#define MIN(x,y) ((x) < (y) ? (x) : (y))
int main()
{
int d[100][100];
int i,j,m,n,temp,tracker;
char s[] = "Sanfoundry";
char t[] = "Education";
m = strlen(s);
n = strlen(t);
for(i=0;i<=m;i++)
d[0][i] = i;
for(j=0;j<=n;j++)
d[j][0] = j;
for (j=1;j<=m;j++)
{
for(i=1;i<=n;i++)
{
if(s[i-1] == t[j-1])
{
tracker = 0;
} else {
tracker = 1;
}
temp = MIN((d[i-1][j]+1),(d[i][j-1]+1));
d[i][j] = MIN(temp,(d[i-1][j-1]+tracker));
}
}
printf("the Levinstein distance is %d\n",d[n][m]);
return 0;
}
Specifically, what part of the code must be put in a kernel? Also, what memory objects needed to be created?
Thanks

Related

create new vector which uses range typed from keybord and steps by 0.25

I have to create a code to create new vector which is created of range typed by user.
For ex: we start with 3 and end with 9. So i need crate vector forom 3 to 9 stepped by 0.25
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int main()
{
float starts=0;
float ends=0;
float stepo = 0.25;
float tab[]={};
int startInt = 0;
int endInt = 0;
int counter = 0;
printf("Podaj poczatek:\n");// "Type starting int"
scanf("%d", &endInt);
printf("Podaj koniec:\n"); // "Type ending int"
scanf("%d", &startInt);
int diff = startInt - endInt;
printf("%d\n", diff);
starts= startInt;
ends= endInt;
for (int i = 0; i< (diff)*4; i++) {
tab[i]= ends;
printf("%f\n", tab[i]);**strong text**
ends = ends + stepo;
}
return 0;
}
I solved it by my own ;-) "float tab[]={};" it was a problem.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int main()
{
float starts=0;
float ends=0;
float stepo = 0.25;
printf("Podaj poczatek:\n");
scanf("%f", &ends);
printf("Podaj koniec:\n");
scanf("%f", &starts);
float diff = starts - ends;
printf("%f\n", diff);
int tabSize = diff *4;
float tab[tabSize];
for (int i = 0; i<=diff*4; i++){
tab[i]= ends;
printf("%8.2f\n", tab[i]);
ends = ends + stepo;
}
return 0;
}

How can I convert CGAL points into PCL point cloud?

I would like to utilize functions and classes from both library (CGAL and PCL). Therefore, it needs to convert the processed data from one into another.
So, How can I convert points in CGAL into pointcloud in pcl and vice-versa?
Given the headers:
#include <pcl/point_types.h>
#include <pcl/conversions.h>
#include <CGAL/Surface_mesh.h>
#include <CGAL/Simple_cartesian.h>
Here is a function to convert from PCL to CGAL
int convert_mesh_from_PCL_to_CGAL(pcl::PolygonMesh::Ptr PCL_mesh, CGAL_Mesh& CGAL_mesh)
{
pcl::PointCloud<pcl::PointXYZ>::Ptr mesh_cloud (new pcl::PointCloud<pcl::PointXYZ>);
pcl::fromPCLPointCloud2( PCL_mesh->cloud, *mesh_cloud );
// clear and reserve the
CGAL_mesh.clear();
int n = mesh_cloud->size();
int f = PCL_mesh->polygons.size();
int e = 0;
CGAL_mesh.reserve(n, 2*f, e);
//copy the vertices
double x, y, z;
for (int i=0; i<mesh_cloud->size(); i++)
{
Point p;
x = mesh_cloud->points[i].x;
y = mesh_cloud->points[i].y;
z = mesh_cloud->points[i].z;
p = Point(x, y, z);
CGAL_mesh.add_vertex(p);
}
// copy the faces
std::vector <int> vertices;
for(int i=0; i<PCL_mesh->polygons.size(); i++)
{
vertices.resize(3);
vertices[0] = PCL_mesh->polygons[i].vertices[0];
vertices[1] = PCL_mesh->polygons[i].vertices[1];
vertices[2] = PCL_mesh->polygons[i].vertices[2];
CGAL_mesh.add_face(CGAL_Mesh::Vertex_index (vertices[0]),
CGAL_Mesh::Vertex_index (vertices[1]),
CGAL_Mesh::Vertex_index (vertices[2]));
}
return 0;
}
For CGAL to PCL I had some commented code, I'll try to test it and update it later but that might give you an idea on how to do it.
int convert_mesh_from_CGAL_to_PCL(CGAL_Mesh CGAL_mesh, pcl::PolygonMesh::Ptr old_PCL_mesh, pcl::PolygonMesh::Ptr PCL_mesh)
{
pcl::PointCloud<pcl::PointXYZ>::Ptr mesh_cloud (new pcl::PointCloud<pcl::PointXYZ>);
pcl::fromPCLPointCloud2( old_PCL_mesh->cloud, *mesh_cloud );
int i=0;
BOOST_FOREACH(CGAL_vertex v, vertices(CGAL_mesh))
{
mesh_cloud->points[i].x = CGAL_mesh[v].point.x();
mesh_cloud->points[i].y = CGAL_mesh[v].point.y();
mesh_cloud->points[i].z = CGAL_mesh[v].point.z();
i++;
}
//BOOST_FOREACH(CGAL_vertex v, vertices(CGAL_mesh))
//BOOST_FOREACH(CGAL_face f, faces(CGAL_mesh))
pcl::toPCLPointCloud2( *mesh_cloud, PCL_mesh->cloud );
return 0;
}
It's not that complicated. Here's a simple example:
First, some headers you might need:
#include <CGAL/Simple_cartesian.h>
#include <pcl/point_types.h>
#include <pcl/point_cloud.h>
#include <utility> // std::tuple
#include <vector>
Some using declarations make life easy.
(CGAL examples tend to use typedefs a lot, like this one.)
// CGAL using declarations
using Kernel = CGAL::Simple_cartesian<float>;
using Point = Kernel::Point_3;
using Vector = Kernel::Vector_3;
using Color = std::array<unsigned char, 3>;
using PNC = std::tuple<Point, Vector, Color>;
// PCL using declarations
using Cloud = pcl::PointCloud<pcl::PointXYZRGBNormal>;
To convert CGAL points to PCL point cloud:
Cloud cgal2pcl(const std::vector<PNC> & points)
{
Cloud cloud;
for (const auto & pnc : points) {
const Point & p = std::get<0>(pnc);
const Vector & v = std::get<1>(pnc);
const Color & c = std::get<2>(pnc);
pcl::PointXYZRGBNormal pt;
pt.x = p.x();
pt.y = p.y();
pt.z = p.z();
pt.normal_x = v.x();
pt.normal_y = v.y();
pt.normal_z = v.z();
pt.r = c[0];
pt.g = c[1];
pt.b = c[2];
cloud.points.push_back(pt);
}
return cloud;
}
And from PCL point cloud to CGAL points:
std::vector<PNC> pcl2cgal(const Cloud & cloud)
{
std::vector<PNC> points;
points.reserve(cloud.points.size());
for (const auto & pt : cloud.points) {
Point p (pt.x, pt.y, pt.z );
Vector n (pt.normal_x, pt.normal_y, pt.normal_z);
Color c { {pt.r, pt.g, pt.b} };
points.push_back(std::make_tuple(p, n, c));
}
return points;
}
For your concrete example, you might want to change std::tuple<Point, Vector, Color> to std::pair<Point, Vector> to store pcl::PointNormal.
And if you need pcl::PointXYZ, then std::vector<Point> might just meet the demands.

R function callback from C

I have the following C function callThisFromR.c which I want to call from the R language,
#include <stdio.h>
#include <stdint.h>
#include <string.h>
void callThisFromR (
// some integer
int32_t integerFromR, // this is the length of the double vector `vec` to be passed again to R
// This is the R function passed to C: `getFuncFromR(vecLen, vec)`
double (* getFuncFromR) (
int32_t ,
double []
),
char inputString[] ,
int32_t inputStringLen
) {
printf("This is the integer passed from R: %d\n", integerFromR);
printf("This is the string passed from R: %s\n", inputString);
printf("This is the string length passed from R: %d\n", inputStringLen);
// Generate some double data to pass to R
double *vec;
vec = (double *) malloc( integerFromR * sizeof(*vec) );
for( int i=0; i<integerFromR; i++ ) {
vec[i] = (double) (i+1);
}
double result = getFuncFromR(integerFromR,vec);
printf("This is the result of the R callback function passed from R: %f\n", result );
}
This C function is supposed to be called from within R and expects to receive an int32 integer integerFromR, a string inputString and its length inputStringLen, as well as a callback function getFuncFromR from R. For example, here is the main.c C-equivalent of what I need to have in R,
#include <stdio.h>
#include <stdint.h>
#include <string.h>
double getFuncFromR (
int32_t vecLen,
double vec[]
)
{
double vecSum = 0.;
for(int i = 0; i < vecLen; i++){
vecSum += vec[i];
}
return vecSum;
}
int main(int argc, char *argv[])
{
char inputString[] = "ThisIsTheStringPassedFromR2C";
int32_t inputStringLen = strlen(inputString);
int32_t integerFromR = 3;
callThisFromR( integerFromR
, &getFuncFromR
, inputString
, inputStringLen
);
return 0;
}
Easy in C. But to call callThisFromR() C-function from R and pass an R function getFuncFromR() has so far been like a nightmare. How can I write the C wrapper function? My exhaustive web search for an answer (including the Rcpp and R-Foreign-Language-Extension documentation) have led to nothing.

RcppParallel RVector push_back or something similar?

I am using RcppParallel to speed up some calculations. However, I am running out of memory in the process, so I would like to save results within the Parallel loop that are pass some relevance threshold. Below is a toy example to illustrate my point:
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
// [[Rcpp::plugins(cpp11)]]
struct Example : public RcppParallel::Worker {
RcppParallel::RVector<double> xvals, xvals_output, yvals;
Example(const NumericVector & xvals, NumericVector & yvals, NumericVector & xvals_output) :
xvals(xvals), xvals_output(xvals_output), yvals(yvals) {}
void operator()(std::size_t begin, size_t end) {
for(std::size_t i=begin; i < end; i++) {
double y = xvals[i] * (xvals[i] - 1);
// if(y < 0) {
// xvals_output.push_back(xvals[i]);
// yvals.push_back(y);
// }
xvals_output[i] = xvals[i];
yvals[i] = y;
}
}
};
// [[Rcpp::export]]
List find_values(NumericVector xvals) {
NumericVector xvals_output(xvals.size());
NumericVector yvals(xvals.size());
Example ex(xvals, yvals, xvals_output);
parallelFor(0, xvals.size(), ex);
List L = List::create(xvals_output, yvals);
return(L);
}
The R code would be:
find_values(seq(-10,10, by=0.5))
The commented out code is what I would like to do.
That is, I would like to initialize an empty vector, and append only the y-values that pass a certain threshold and also the associated x-values.
In my real usage, I am calculating a MxN matrix, so memory is an issue.
What is the correct way to approach this issue?
If anyone ever comes across a similar problem, here's a solution using "concurrent_vector" from TBB (which RcppParallel uses under the hood and is available as a header).
#include <Rcpp.h>
#include <RcppParallel.h>
#include <tbb/concurrent_vector.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppParallel)]]
// [[Rcpp::plugins(cpp11)]]
struct Example : public RcppParallel::Worker {
RcppParallel::RVector<double> xvals;
tbb::concurrent_vector< std::pair<double, double> > &output;
Example(const NumericVector & xvals, tbb::concurrent_vector< std::pair<double, double> > &output) :
xvals(xvals), output(output) {}
void operator()(std::size_t begin, size_t end) {
for(std::size_t i=begin; i < end; i++) {
double y = xvals[i] * (xvals[i] - 1);
if(y < 0) {
output.push_back( std::pair<double, double>(xvals[i], y) );
}
}
}
};
// [[Rcpp::export]]
List find_values(NumericVector xvals) {
tbb::concurrent_vector< std::pair<double, double> > output;
Example ex(xvals,output);
parallelFor(0, xvals.size(), ex);
NumericVector xout(output.size());
NumericVector yout(output.size());
for(int i=0; i<output.size(); i++) {
xout[i] = output[i].first;
yout[i] = output[i].second;
}
List L = List::create(xout, yout);
return(L);
}
Output:
> find_values(seq(-10,10, by=0.5))
[[1]]
[1] 0.5
[[2]]
[1] -0.25

Why does mclapply function in R is more efficient than Rcpp + OpenMP?

I have a function with a loop (EstimateUniques) that is parallelized with OpenMP. I suggested that multithreading should be more efficient than multiprocessing, but when I compare this function with the simple run of "mclapply", it showed lower performance. What is the proper way to achieve the same level of parallelization in c++ as in R? Am I doing something wrong?
Performance comparison (time in seconds):
#Cores CPP R
1 1.721s 1.538s
2 1.945s 1.080s
3 2.858s 0.801s
R code:
Rcpp::sourceCpp('ReproducibleExample.cpp')
arr <- 1:10000
n_rep <- 150
n_iters <- 200
EstimateUniquesR <- function(arr, n_iters, n_rep, cores) {
parallel::mclapply(1:n_iters, function(i)
GetNumberOfUniqSamples(arr, i * 10, n_rep), mc.cores=cores)
}
cpp_times <- sapply(1:3, function(threads)
system.time(EstimateUniques(arr, n_iters, n_rep, threads))['elapsed'])
r_times <- sapply(1:3, function(cores)
system.time(EstimateUniquesR(arr, n_iters, n_rep, cores))['elapsed'])
data.frame(CPP=cpp_times, R=r_times)
Example.cpp file:
// [[Rcpp::plugins(openmp)]]
// [[Rcpp::plugins(cpp11)]]
#include <algorithm>
#include <vector>
#include <omp.h>
// [[Rcpp::export]]
int GetNumberOfUniqSamples(const std::vector<int> &bs_array, int size, unsigned n_rep) {
unsigned long sum = 0;
for (unsigned i = 0; i < n_rep; ++i) {
std::vector<int> uniq_vals(size);
for (int try_num = 0; try_num < size; ++try_num) {
uniq_vals[try_num] = bs_array[rand() % bs_array.size()];
}
std::sort(uniq_vals.begin(), uniq_vals.end());
sum += std::distance(uniq_vals.begin(), std::unique(uniq_vals.begin(), uniq_vals.end()));
}
return std::round(double(sum) / n_rep);
}
// [[Rcpp::export]]
std::vector<int> EstimateUniques(const std::vector<int> &bs_array, const int n_iters,
const int n_rep = 1000, const int threads=1) {
std::vector<int> uniq_counts(n_iters);
#pragma omp parallel for num_threads(threads) schedule(dynamic)
for (int i = 0; i < n_iters; ++i) {
uniq_counts[i] = GetNumberOfUniqSamples(bs_array, (i + 1) * 10, n_rep);
}
return uniq_counts;
}
I tried to use other types of scheduling in OpenMP, but they gave even worse results.

Resources