Arduino: float function returns inf - arduino

I have a function (shown below) that I need some advice on. The function returns the slope of a line which is fit (via the least squares method) to n data points. To give you a context, my project is a barometric pressure based altimeter which uses this function to determine velocity based on the n most recent altitude-time pairs. These altitude-time pairs are stored in 2 global arrays(times[] and alts[]).
My problem is not that this method doesn't work. It usually does. But sometimes I will run the altimeter and this function will return the value 'inf' interspersed with a bunch of other wrong values (I have also seen 'NaN' but that is more rare). There are a few areas of suspicion I have at this point but I would like a fresh perspective. Here is some further contextual information that may or may not be of use:
I am using interrupts for a quadrature encoder
The times[] array is of type unsigned long
The alts[] array is of type float
n is a const int, in this case n = 9
On the ATMEGA328 a double is the same as a float.. Arduino-double
float velF() { // uses the last n data points, fits a line to them,
// and uses the slope of that line as the velocity at that moment
float sumTY = 0, sumT = 0, sumY = 0, sumT2 = 0;
for (int i = 0; i < n; i++) {
sumTY += (float)times[i] * alts[i] / 1000;
sumT += (float)times[i] / 1000;
sumY += alts[i];
sumT2 += (float)times[i] * times[i] / 1000000;
}
return (n*sumTY - sumT*sumY) / (n*sumT2 - sumT*sumT);
}
Any help or advice would be greatly appreciated!

Code is certainly performing division by zero.
For a variety of reasons, n*sumT2 - sumT*sumT will be zero. #John Bollinger In most of these cases, the top (dividend) of the division will also be zero and a return value of zero would be acceptable.
float velF(void) {
float sumTY = 0, sumT = 0, sumY = 0, sumT2 = 0;
for (size_t i = 0; i < n; i++) {
// insure values are reasoable
assert(alts[i] >= ALT_MIN && alts[i] <= ALT_MAX);
assert(times[i] >= TIME_MIN && times[i] <= TIME_MAX);
sumTY += (float)times[i] * alts[i] / 1000;
sumT += (float)times[i] / 1000;
sumY += alts[i];
sumT2 += (float)times[i] * times[i] / 1000000;
}
float d = n*sumT2 - sumT*sumT;
if (d == 0) return 0;
return (n*sumTY - sumT*sumY) / d;
}
Side note: could factor out the division for improved accuracy and speed. Suggest performing the last calculation as double.
float velF(void) {
float sumTY = 0, sumT = 0, sumY = 0, sumT2 = 0;
for (size_t i = 0; i < n; i++) {
float tf = (float) times[i];
sumTY += tf * alts[i];
sumT += tf;
sumY += alts[i];
sumT2 += tf * tf;
}
double nd = n;
double sumTd = sumT;
double d = nd*sumT2 - sumTd*sumTd;
if (d == 0) return 0;
return (nd*sumTY - sumTd*sumY)*1000 / d;
}

Related

How do I plot E8 (Exceptional Lie Group order 8) in 2D?

For the last week or so I have been struggling to find a resource that will allow me to make something like the 2D petrie polygon diagrams in this article.
My main trouble is finding out what the rules are for the edge and node connections.
I.e. in this plot, is there a simple way to make the image from scratch (even if it not fully representative of the bigger theory behind it)?
Any help is massively appreciated!
K
Here is how I solved this problem!
e8
// to run
// clink -c Ex8
// ./Ex8
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "dislin.h"
// method to generate all permutations of a set with repeated elements:
the root system
float root_sys[240][8];
int count = 0;
/// checks elements in root system to see if they should be permuted
int shouldSwap(float base[], int start, int curr)
{
for (int i = start; i < curr; i++)
if (base[i] == base[curr])
return 0;
return 1;
}
/// performs permutations of root system
void permutations(float base[], int index, int n)
{
if (index >= n) {
for(int i = 0; i < n; i++){
root_sys[count][i] = base[i];
}
count++;
return;
}
for (int i = index; i < n; i++) {
int check = shouldSwap(base, index, i);
if (check) {
float temp_0 = base[index];
float temp_1 = base[i];
base[index] = temp_1;
base[i] = temp_0;
permutations(base, index + 1, n);
float temp_2 = base[index];
float temp_3 = base[i];
base[index] = temp_3;
base[i] = temp_2;
}
}
}
// function to list all distances from one node to others
float inner_product(float * vect_0, float * vect_1){
float sum = 0;
for(int i = 0; i < 8; i++){
sum = sum + ((vect_0[i] - vect_1[i]) * (vect_0[i] - vect_1[i]));
}return sum;
}
/// inner product funtion
float inner_product_plus(float * vect_0, float * vect_1){
float sum = 0;
for(int i = 0; i < 8; i++){
sum = sum + (vect_0[i] * vect_1[i]);
}return sum;
}
int main(void){
// base vector permutations of E8 root system
float base_sys[8][8] = {
{1,1,0,0,0,0,0,0},
{1,-1,0,0,0,0,0,0},
{-1,-1,0,0,0,0,0,0},
{0.5,0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5},
{0.5,0.5,0.5,0.5,-0.5,-0.5,-0.5,-0.5},
{0.5,0.5,0.5,0.5,0.5,0.5,-0.5,-0.5},
{0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5},
{-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5}
};
//permute the base vectors
for(int i = 0; i < 8; i++){
permutations(base_sys[i],0,8);
}
//calculating distances between all roots, outputting correspondence matrix
int distance_matrix[240][240];
for(int i = 0; i < 240; i++){
int dist_m = 100;
for(int ii = 0; ii < 240; ii++){
float dist = inner_product(root_sys[i], root_sys[ii]);
if(dist == 2){ //connecting distance in E8
distance_matrix[i][ii] = 1;
}else{distance_matrix[i][ii] == 0;};
}
}
//use another program to calculate eigenvectors of root system . . . after some fiddling, these vectors appear
float re[8] = {0.438217070641, 0.205187681291,
0.36459828198, 0.0124511903657,
-0.0124511903657, -0.36459828198,
-0.205187681291, -0.67645247517};
float im[8] = {-0.118465163028, 0.404927414852,
0.581970822973, 0.264896157496,
0.501826483552, 0.345040496917,
0.167997088796, 0.118465163028};
//define co-ordinate system for relevent points
float rings_x[240];
float rings_y[240];
//decide on which points belong to the system
for(int i = 0; i < 240; i++){
float current_point[8];
for(int ii = 0; ii < 8; ii++){
current_point[ii] = root_sys[i][ii];
}
rings_x[i] = inner_product_plus(current_point, re);
rings_y[i] = inner_product_plus(current_point, im);
}
//graph the system using DISLIN library
scrmod("revers");
setpag("da4l");
metafl("cons");
disini();
graf(-1.2, 1.2, -1.2, 1.2, -1.2, 1.2, -1.2, 1);
// a connection appears depending on the previously calculated distance matrix
for(int i = 0; i < 240; i++){
for(int ii = 0; ii < 240; ii++){
int connect = distance_matrix[i][ii];
if(connect == 1){
rline(rings_x[i], rings_y[i], rings_x[ii], rings_y[ii]);
distance_matrix[ii][i] = 0;
}else{continue;}
}
}
// More DISLIN functions
titlin("E8", 1);
name("R-axis", "x");
name("I-axis", "y");
marker(21);
hsymbl(15);
qplsca(rings_x, rings_y, 240);
return 0;
}
Extra points to anyone who can explain how to rotate the 2d plot to create a 3-d animation of this object

Image processing in MPI

This is my attempt to code the classical smoothing pixel average algorithm in MPI. I almost got it working but something weird happens with the halo exchange as can see the lines right in the edges. I can't seem to find the bug. Am I properly exchanging halos? What section of the final array should I gather?
https://pastebin.com/4rtFnSJ5
int next = rank + 1;
int prev = rank - 1;
if (next >= size) {
next = MPI_PROC_NULL;
}
if (prev < 0) {
prev = MPI_PROC_NULL;
}
int rows = y / px;
int cols = x;
int d = 1;
for (int iter = 0; iter < TotalIter; iter++) {
for (int i = 0; i < rows + 2; i++)
for (int j = 0; j < cols + 2; j++)
for (int k = 0; k < rgb; k++)
new[i][j * rgb + k] = 0;
for (int i = 1; i < rows + 1; i++) {
int iMin = -min(d, i - 1);
int iMax = min(d, (rows + 1 - i - 1));
for (int j = 1; j < cols + 1; j++) {
int jMin = -min(d, j - 1);
int iMax = min(d, (cols + 1 - j - 1));
int counter = 0;
for (int p = iMin; p <= iMax; p++)
for (int q = jMin; q <= jMax; q++) {
counter = counter + 1;
for (int k = 0; k < rgb; k++) {
new[i][j * rgb + k] += old[i + p][(j + q) * rgb + k];
}
}
for (int k = 0; k < rgb; k++) {
new[i][j * rgb + k] -= old[i][j * rgb + k];
new[i][j * rgb + k] /= (counter - 1);
}
}
}
for (int i = 2; i < rows; i++)
for (int j = 2; j < cols; j++)
for (int k = 0; k < rgb; k++) {
old[i][j * rgb + k] = new[i][j * rgb + k];
}
MPI_Sendrecv(&old[rows][1], cols * rgb, MPI_INT, next, 1, &old[0][1],
cols * rgb, MPI_INT, prev, 1, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&old[1][1], cols * rgb, MPI_INT, prev, 2, &old[rows + 1][1],
cols * rgb, MPI_INT, next, 2, MPI_COMM_WORLD, &status);
}
for (int i = 1; i< rows+1; i++)
for (int j = 1; j< cols+1; j++)
for (int k = 0; k< rgb; k++) {
buf[i-1][(j-1)*rgb+k] = old[i][j*rgb+k] ;
}
MPI_Gather(&buf[0][0], rows *cols *rgb, MPI_INT, &Finalbuffer[0][0],
rows *cols *rgb, MPI_INT, 0, MPI_COMM_WORLD);
The output looks like this when run on 8 MPI processes. I can clearly see delimiting lines. For that reason I thought I was not doing halo exchanges properly.
OK, so there are a bunch of issues here.
First, your code could only ever work with d=1 since you only swap halos of depth 1. If you want to process neighbours of distance d, you need to swap halos of depth d.
Second, you do the first halo swap after your first sweep through the arrays so you are reading junk halo data on iteration 1 - you need to do a halo swap before you start processing your arrays.
Third, when you copy back new to old you start from index 2 : you need to include all the pixels from 1 to lrows and 1 to lcols.
Finally, your logic of Imin, Imax etc seems wrong. You don't want to truncate the range at the edges in the parallel program - you need to go off the edges to pick up the halo data. I just set Imin = -d, Imax = d etc.
With these fixes the code seems to run OK, i.e. there are no obvious halo effects, but it still gives different results on different numbers of processes.
PS I was also flattered to see you used the "arraymalloc2d" code from one of my own MPI examples - http://www.archer.ac.uk/training/course-material/2018/07/intro-epcc/exercises/cfd.tar.gz ; I'm glad to see that these training codes are proving useful to people!

MPI program runtime error MPI_GATHER, qsub mpijobparallel

I am trying to run this fast fourier implementation code. It compiles fine but gives this error at runtime. I have no idea about the error or what it means. Can anyone help me out?
I compiled and run the program by:
mpicc -o exec test.c
./exec
CODE:
This is the code that I found on GITHUB. Its the parallel version of fast fourier algorithm.
#include <stdio.h>
#include <mpi.h> //To use MPI
#include <complex.h> //to use complex numbers
#include <math.h> //for cos() and sin()
#include "timer.h" //to use timer
#define PI 3.14159265
#define bigN 16384 //Problem Size
#define howmanytimesavg 3
int main()
{
int my_rank,comm_sz;
MPI_Init(NULL,NULL); //start MPI
MPI_Comm_size(MPI_COMM_WORLD,&comm_sz); ///how many processes are we
using?
MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); //which process is this?
double start,finish;
double avgtime = 0;
FILE *outfile;
int h;
if(my_rank == 0) //if process 0 open outfile
{
outfile = fopen("ParallelVersionOutput.txt", "w"); //open from current
directory
}
for(h = 0; h < howmanytimesavg; h++) //loop to run multiple times for AVG
time.
{
if(my_rank == 0) //If it's process 0 starts timer
{
start = MPI_Wtime();
}
int i,k,n,j; //Basic loop variables
double complex evenpart[(bigN / comm_sz / 2)]; //array to save the data
for EVENHALF
double complex oddpart[(bigN / comm_sz / 2)]; //array to save the data
for ODDHALF
double complex evenpartmaster[ (bigN / comm_sz / 2) * comm_sz]; //array
to save the data for EVENHALF
double complex oddpartmaster[ (bigN / comm_sz / 2) * comm_sz]; //array
to save the data for ODDHALF
double storeKsumreal[bigN]; //store the K real variable so we can abuse
symmerty
double storeKsumimag[bigN]; //store the K imaginary variable so we can
abuse symmerty
double subtable[(bigN / comm_sz)][3]; //Each process owns a subtable
from the table below
double table[bigN][3] = //TABLE of numbers to use
{
0,3.6,2.6, //n, Real,Imaginary CREATES TABLE
1,2.9,6.3,
2,5.6,4.0,
3,4.8,9.1,
4,3.3,0.4,
5,5.9,4.8,
6,5.0,2.6,
7,4.3,4.1,
};
if(bigN > 8) //Everything after row 8 is all 0's
{
for(i = 8; i < bigN; i++)
{
table[i][0] = i;
for(j = 1; j < 3;j++)
{
table[i][j] = 0.0; //set to 0.0
}
}
}
int sendandrecvct = (bigN / comm_sz) * 3; //how much to send and
recieve??
MPI_Scatter(table,sendandrecvct,MPI_DOUBLE,subtable,sendandrecvct,MPI_DOUBLE,0,MPI_COMM_WORLD); //scatter the table to subtables
for (k = 0; k < bigN / 2; k++) //K coeffiencet Loop
{
/* Variables used for the computation */
double sumrealeven = 0.0; //sum of real numbers for even
double sumimageven = 0.0; //sum of imaginary numbers for even
double sumrealodd = 0.0; //sum of real numbers for odd
double sumimagodd = 0.0; //sum of imaginary numbers for odd
for(i = 0; i < (bigN/comm_sz)/2; i++) //Sigma loop EVEN and ODD
{
double factoreven , factorodd = 0.0;
int shiftevenonnonzeroP = my_rank * subtable[2*i][0]; //used to shift index numbers for correct results for EVEN.
int shiftoddonnonzeroP = my_rank * subtable[2*i + 1][0]; //used to shift index numbers for correct results for ODD.
/* -------- EVEN PART -------- */
double realeven = subtable[2*i][1]; //Access table for real number at spot 2i
double complex imaginaryeven = subtable[2*i][2]; //Access table for imaginary number at spot 2i
double complex componeeven = (realeven + imaginaryeven * I); //Create the first component from table
if(my_rank == 0) //if proc 0, dont use shiftevenonnonzeroP
{
factoreven = ((2*PI)*((2*i)*k))/bigN; //Calculates the even factor for Cos() and Sin()
// *********Reduces computational time*********
}
else //use shiftevenonnonzeroP
{
factoreven = ((2*PI)*((shiftevenonnonzeroP)*k))/bigN; //Calculates the even factor for Cos() and Sin()
// *********Reduces computational time*********
}
double complex comptwoeven = (cos(factoreven) - (sin(factoreven)*I)); //Create the second component
evenpart[i] = (componeeven * comptwoeven); //store in the evenpart array
/* -------- ODD PART -------- */
double realodd = subtable[2*i + 1][1]; //Access table for real number at spot 2i+1
double complex imaginaryodd = subtable[2*i + 1][2]; //Access table for imaginary number at spot 2i+1
double complex componeodd = (realodd + imaginaryodd * I); //Create the first component from table
if (my_rank == 0)//if proc 0, dont use shiftoddonnonzeroP
{
factorodd = ((2*PI)*((2*i+1)*k))/bigN;//Calculates the odd factor for Cos() and Sin()
// *********Reduces computational time*********
}
else //use shiftoddonnonzeroP
{
factorodd = ((2*PI)*((shiftoddonnonzeroP)*k))/bigN;//Calculates the odd factor for Cos() and Sin()
// *********Reduces computational time*********
}
double complex comptwoodd = (cos(factorodd) - (sin(factorodd)*I));//Create the second component
oddpart[i] = (componeodd * comptwoodd); //store in the oddpart array
}
/*Process ZERO gathers the even and odd part arrays and creates a evenpartmaster and oddpartmaster array*/
MPI_Gather(evenpart,(bigN / comm_sz / 2),MPI_DOUBLE_COMPLEX,evenpartmaster,(bigN / comm_sz / 2), MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD);
MPI_Gather(oddpart,(bigN / comm_sz / 2),MPI_DOUBLE_COMPLEX,oddpartmaster,(bigN / comm_sz / 2), MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD);
if(my_rank == 0)
{
for(i = 0; i < (bigN / comm_sz / 2) * comm_sz; i++) //loop to sum the EVEN and ODD parts
{
sumrealeven += creal(evenpartmaster[i]); //sums the realpart of the even half
sumimageven += cimag(evenpartmaster[i]); //sums the imaginarypart of the even half
sumrealodd += creal(oddpartmaster[i]); //sums the realpart of the odd half
sumimagodd += cimag(oddpartmaster[i]); //sums the imaginary part of the odd half
}
storeKsumreal[k] = sumrealeven + sumrealodd; //add the calculated reals from even and odd
storeKsumimag[k] = sumimageven + sumimagodd; //add the calculated imaginary from even and odd
storeKsumreal[k + bigN/2] = sumrealeven - sumrealodd; //ABUSE symmetry Xkreal + N/2 = Evenk - OddK
storeKsumimag[k + bigN/2] = sumimageven - sumimagodd; //ABUSE symmetry Xkimag + N/2 = Evenk - OddK
if(k <= 10) //Do the first 10 K's
{
if(k == 0)
{
fprintf(outfile," \n\n TOTAL PROCESSED SAMPLES : %d\n",bigN);
}
fprintf(outfile,"================================\n");
fprintf(outfile,"XR[%d]: %.4f XI[%d]: %.4f \n",k,storeKsumreal[k],k,storeKsumimag[k]);
fprintf(outfile,"================================\n");
}
}
}
if(my_rank == 0)
{
GET_TIME(finish); //stop timer
double timeElapsed = finish-start; //Time for that iteration
avgtime = avgtime + timeElapsed; //AVG the time
fprintf(outfile,"Time Elaspsed on Iteration %d: %f Seconds\n", (h+1),timeElapsed);
}
}
if(my_rank == 0)
{
avgtime = avgtime / howmanytimesavg; //get avg time
fprintf(outfile,"\nAverage Time Elaspsed: %f Seconds", avgtime);
fclose(outfile); //CLOSE file ONLY proc 0 can.
}
MPI_Barrier(MPI_COMM_WORLD); //wait to all proccesses to catch up before finalize
MPI_Finalize(); //End MPI
return 0;
}
ERROR:
Fatal error in PMPI_Gather: Invalid datatype, error stack:
PMPI_Gather(904): MPI_Gather(sbuf=0x7fffb62799a0, scount=8192,
MPI_DATATYPE_NULL, rbuf=0x7fffb6239980, rcount=8192, MPI_DATATYPE_NULL,
root=0, MPI_COMM_WORLD) failed
PMPI_Gather(815): Datatype for argument sendtype is a null datatype
[unset]: write_line error; fd=-1 buf=:cmd=abort exitcode=537490947
:
system msg for write_line failure : Bad file descriptor
There is no MPI_DATATYPE_NULL in your code, but you only use MPI_DOUBLE_COMPLEX. Note the latter type is a Fortran datatype, and using it in C is not correct strictly speaking.
My guess is that MPI_DOUBLE_COMPLEX is causing the issue (type not defined or not initialized because you invoked the C version of MPI_Init()).
You can obviously rewrite your code in Fortran, or use your own derived datatype for a C double complex number.
Meanwhile, I suggest you write simple C and Fortran helloworld programs that use MPI_DOUBLE_COMPLEX (MPI_Bcast() of one element for example) to confirm the issue is with MPI_DOUBLE_COMPLEX and is restricted to C or not.

R Weighted moving average with partial averages

I am trying to code in R a(centered) weighted moving average function that returns me a vector of the same size than the input vector.
The following code almost gives me what I want but it does not work for the first and last values of my vector
set.seed(0)
len=10
x=floor(l*runif(l))
weights=c(1,3,0,3,1)
weights=weights/sum(weights)
rollapply(x,width=length(weights), function(x) sum(x*weights),align="center")
na.omit(filter(x,sides=2,weights))
Setting partial=TRUE in the rollapply function is sort of what I want to do. Anyway it does not work since my function does not support an x of changing sizes.
I could the latter and manually add the sides computations with a loop. It would work but I would like to find a nicer (computationally faster) way to do it.
For a more rigorous description of my needs here is a mathematical version
r is the vector my function would return
x and the weights w as inputs :
With Rcpp, you can do:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector roll_mean(const NumericVector& x,
const NumericVector& w) {
int n = x.size();
int w_size = w.size();
int size = (w_size - 1) / 2;
NumericVector res(n);
int i, ind_x, ind_w;
double w_sum = Rcpp::sum(w), tmp_wsum, tmp_xwsum, tmp_w;
// beginning
for (i = 0; i < size; i++) {
tmp_xwsum = tmp_wsum = 0;
for (ind_x = i + size, ind_w = w_size - 1; ind_x >= 0; ind_x--, ind_w--) {
tmp_w = w[ind_w];
tmp_wsum += tmp_w;
tmp_xwsum += x[ind_x] * tmp_w;
}
res[i] = tmp_xwsum / tmp_wsum;
}
// middle
int lim2 = n - size;
for (; i < lim2; i++) {
tmp_xwsum = 0;
for (ind_x = i - size, ind_w = 0; ind_w < w_size; ind_x++, ind_w++) {
tmp_xwsum += x[ind_x] * w[ind_w];
}
res[i] = tmp_xwsum / w_sum;
}
// end
for (; i < n; i++) {
tmp_xwsum = tmp_wsum = 0;
for (ind_x = i - size, ind_w = 0; ind_x < n; ind_x++, ind_w++) {
tmp_w = w[ind_w];
tmp_wsum += tmp_w;
tmp_xwsum += x[ind_x] * tmp_w;
}
res[i] = tmp_xwsum / tmp_wsum;
}
return res;
}
I use this function in one of my packages.
Just put that in a .cpp file and source it with Rcpp::sourceCpp.

Different results GPU & CPU when more than one 8 work items per group

I'm new in open cl. And tried as my first work to write code that checks intersection between many polylines to single polygon.
I'm running the code in both cpu and gpu.. and get different results.
First I sent NULL as local parameter when called clEnqueueNDRangeKernel.
clEnqueueNDRangeKernel(command_queue, kIntersect, 1, NULL, &global, null, 2, &evtCalcBounds, &evtKernel);
After trying many things i saw that if i send 1 as local it is working good. and returning the same results for the cpu and gpu.
size_t local = 1;
clEnqueueNDRangeKernel(command_queue, kIntersect, 1, NULL, &global, &local, 2, &evtCalcBounds, &evtKernel);
Played abit more and found that the cpu returns false result when i run the kernel with local 8 or more (for some reason).
I'm not using any local memory, just globals and privates.
I didn't added the code because i think it is irrelevant to the problem (note that for single work group it is working good), and it is long. If it is needed, i will try to simplify it.
The code flow is going like this:
I have polylines coordinates stored in a big buffer. and the single polygon in another. In addition i'm providing another buffer with single int that holds the current results count. All buffers are __global arguments.
In the kernel i'm simply checking intersection between all the lines of the "polyline[get_global(0)]" with the lines of the polygon. If true,
i'm using atomic_inc for the results count. There is no read and write memory from the same buffer, no barriers or mem fences,... the atomic_inc is the only thread safe mechanism i'm using.
-- UPDATE --
Added my code:
I know that i can maybe have better use of open cl functions for calculating some vectors, but for now, i'm simply convert code from my old regular CPU single threaded program to CL. so this is not my concern now.
bool isPointInPolygon(float x, float y, __global float* polygon) {
bool blnInside = false;
uint length = convert_uint(polygon[4]);
int s = 5;
uint j = length - 1;
for (uint i = 0; i < length; j = i++) {
uint realIdx = s + i * 2;
uint realInvIdx = s + j * 2;
if (((polygon[realIdx + 1] > y) != (polygon[realInvIdx + 1] > y)) &&
(x < (polygon[realInvIdx] - polygon[realIdx]) * (y - polygon[realIdx + 1]) / (polygon[realInvIdx + 1] - polygon[realIdx + 1]) + polygon[realIdx]))
blnInside = !blnInside;
}
return blnInside;
}
bool isRectanglesIntersected(float p_dblMinX1, float p_dblMinY1,
float p_dblMaxX1, float p_dblMaxY1,
float p_dblMinX2, float p_dblMinY2,
float p_dblMaxX2, float p_dblMaxY2) {
bool blnResult = true;
if (p_dblMinX1 > p_dblMaxX2 ||
p_dblMaxX1 < p_dblMinX2 ||
p_dblMinY1 > p_dblMaxY2 ||
p_dblMaxY1 < p_dblMinY2) {
blnResult = false;
}
return blnResult;
}
bool isLinesIntersects(
double Ax, double Ay,
double Bx, double By,
double Cx, double Cy,
double Dx, double Dy) {
double distAB, theCos, theSin, newX, ABpos;
// Fail if either line is undefined.
if (Ax == Bx && Ay == By || Cx == Dx && Cy == Dy)
return false;
// (1) Translate the system so that point A is on the origin.
Bx -= Ax; By -= Ay;
Cx -= Ax; Cy -= Ay;
Dx -= Ax; Dy -= Ay;
// Discover the length of segment A-B.
distAB = sqrt(Bx*Bx + By*By);
// (2) Rotate the system so that point B is on the positive X axis.
theCos = Bx / distAB;
theSin = By / distAB;
newX = Cx*theCos + Cy*theSin;
Cy = Cy*theCos - Cx*theSin; Cx = newX;
newX = Dx*theCos + Dy*theSin;
Dy = Dy*theCos - Dx*theSin; Dx = newX;
// Fail if the lines are parallel.
return (Cy != Dy);
}
bool isPolygonInersectsPolyline(__global float* polygon, __global float* polylines, uint startIdx) {
uint polylineLength = convert_uint(polylines[startIdx]);
uint start = startIdx + 1;
float x1 = polylines[start];
float y1 = polylines[start + 1];
float x2;
float y2;
int polygonLength = convert_uint(polygon[4]);
int polygonLength2 = polygonLength * 2;
int startPolygonIdx = 5;
for (int currPolyineIdx = 0; currPolyineIdx < polylineLength - 1; currPolyineIdx++)
{
x2 = polylines[start + (currPolyineIdx*2) + 2];
y2 = polylines[start + (currPolyineIdx*2) + 3];
float polyX1 = polygon[0];
float polyY1 = polygon[1];
for (int currPolygonIdx = 0; currPolygonIdx < polygonLength; ++currPolygonIdx)
{
float polyX2 = polygon[startPolygonIdx + (currPolygonIdx * 2 + 2) % polygonLength2];
float polyY2 = polygon[startPolygonIdx + (currPolygonIdx * 2 + 3) % polygonLength2];
if (isLinesIntersects(x1, y1, x2, y2, polyX1, polyY1, polyX2, polyY2)) {
return true;
}
polyX1 = polyX2;
polyY1 = polyY2;
}
x1 = x2;
y1 = y2;
}
// No intersection found till now so we check containing
return isPointInPolygon(x1, y1, polygon);
}
__kernel void calcIntersections(__global float* polylines, // My flat points array - [pntCount, x,y,x,y,...., pntCount, x,y,... ]
__global float* pBounds, // The rectangle bounds of each polyline - set of 4 values [top, left, bottom, right....]
__global uint* pStarts, // The start index of each polyline in the polylines array
__global float* polygon, // The polygon i want to intersect with - first 4 items are the rectangle bounds [top, left, bottom, right, pntCount, x,y,x,y,x,y....]
__global float* output, // Result array for saving the intersections polylines indices
__global uint* resCount) // The result count
{
int i = get_global_id(0);
uint start = convert_uint(pStarts[i]);
if (isRectanglesIntersected(pBounds[i * 4], pBounds[i * 4 + 1], pBounds[i * 4 + 2], pBounds[i * 4 + 3],
polygon[0], polygon[1], polygon[2], polygon[3])) {
if (isPolygonInersectsPolyline(polygon, polylines, start)){
int oldVal = atomic_inc(resCount);
output[oldVal] = i;
}
}
}
Can anyone explain it to me ?

Resources