OpenCL Newbie question :)
I'm trying to write an opencl kernel like
__kernel void NLLSQ
(
__global double* image,
__global double* nllsqResult
)
{
//Do some stuff
}
Which works fine until I try to put a loop in. ie:
__kernel void NLLSQ
(
__global double* image,
__global double* nllsqResult
)
{
for (int i = 0; i < 2; i++)
{
//Do some stuff
}
}
Which causes my computer to crash an monitor to go black. I think the problem is that I've sent too much work to the graphics card.
My full code looks like this
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#elif defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
#error "Double precision doubleing point not supported by OpenCL implementation."
#endif
int2 clipPixel(int2 coordinate, int width, int height)
{
coordinate.x = max(0, coordinate.x);
coordinate.y = max(0, coordinate.y);
coordinate.x = min(width, coordinate.x); //1911
coordinate.y = min(height, coordinate.y); //1071
return coordinate;
}
int Coord2Index(int X, int Y, int width)
{
return (width * Y) + X;
}
//2D Gaussian 'bubble' Function
double f(int x, int y, double a, double b, double s)
{
return a + b*exp(-(x*x+y*y)/(s*s));
}
// (∂f/∂b)
double dfdb(int x, int y, double s)
{
return exp(-(x*x+y*y)/(s*s));
}
// (∂f/∂σ)
double dfds(int x, int y, double b, double s)
{
double v = -(x*x + y*y);
return b * exp(v/(s*s))*-2*v/(s*s*s);
}
//Non-Linear Least Squares
__kernel void NLLSQ
(
__global double* image,
__global double* nllsqResult
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int index = Coord2Index( x, y, 1912 );
int jacIndex = 0;
int dyIndex = 0;
int indexRslt = Coord2Index( x, y, 1904 );
double dY[81];
double J[81][3];
double JTJ[3][3];
double3 B = (double3)(0, 1, 1); //initial guess
double JTdY[3];
//Creates the dY vector
for (int j = -4; j <= 4; j++)
{
for (int i = -4; i <= 4; i++)
{
dY[dyIndex] = image[index] - f( i, j, B.x, B.y, B.z);
dyIndex = dyIndex + 1;
}
}
//Creates the Jacobian
for (int j = -4; j <= 4; j++)
{
for (int i = -4; i <= 4; i++)
{
index = Coord2Index( x + i + 4, y + j + 4, 1912 );
J[jacIndex][0] = 1;
J[jacIndex][1] = dfdb(i, j, B.z);
J[jacIndex][2] = dfds(i, j, B.y, B.z);
jacIndex = jacIndex + 1;
}
}
//Now to solve (JT * J) * ΔB = JT * ΔY for ΔB ....
JTdY[0] = 0;
JTdY[1] = 0;
JTdY[2] = 0;
//Create JTJ
for (int i = 0; i < 81; i++)
{
JTJ[0][0] = J[i][0] * J[i][0];
JTJ[0][1] = J[i][0] * J[i][1];
JTJ[0][2] = J[i][0] * J[i][2];
JTJ[1][0] = J[i][1] * J[i][0];
JTJ[1][1] = J[i][1] * J[i][1];
JTJ[1][2] = J[i][1] * J[i][2];
JTJ[2][0] = J[i][2] * J[i][0];
JTJ[2][1] = J[i][2] * J[i][1];
JTJ[2][2] = J[i][2] * J[i][2];
//JT * ΔY
JTdY[0] = J[i][0] * dY[i];
JTdY[1] = J[i][1] * dY[i];
JTdY[2] = J[i][2] * dY[i];
}
//TO DO: might have to make this next part more general if I decide not to use a 9x9 bubble template size
// Also not sure what to do when det(A) = 0 (is that even possible?)
// (JT * J) * ΔB = JT * ΔY is a system of the form Ax = b
// A = (JT * J), ΔB = x, JT * ΔY = b
//Solve using cramer's rule http://en.wikipedia.org/wiki/Cramer%27s_rule
// xi = det(Ai)/det(A)
//determinant of A
double detA =
JTJ[0][0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) -
JTJ[0][1] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) +
JTJ[0][2] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ;
double detA1 =
JTdY[0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) -
JTJ[0][1] * ( JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) +
JTJ[0][2] * ( JTdY[1] * JTJ[2][1] - JTJ[1][1] * JTdY[2] ) ;
double detA2 =
JTJ[0][0] * (JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) -
JTdY[0] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) +
JTJ[0][2] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) ;
double detA3 =
JTJ[0][0] * (JTJ[1][1] * JTdY[2] - JTdY[1] * JTJ[2][1]) -
JTJ[0][1] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) +
JTdY[0] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ;
// B(k+1) = B(k) + ΔB
B.x = B.x + (detA1/detA);
B.y = B.y + (detA2/detA);
B.z = B.z + (detA3/detA);
nllsqResult[indexRslt] = B.z;
}
I would like to use a for loop as such
//Non-Linear Least Squares
__kernel void NLLSQ
(
__global double* image,
__global double* nllsqResult
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
int index = Coord2Index( x, y, 1912 );
int jacIndex = 0;
int dyIndex = 0;
int indexRslt = Coord2Index( x, y, 1904 );
double dY[81];
double J[81][3];
double JTJ[3][3];
double3 B = (double3)(0, 1, 1); //initial guess
double JTdY[3];
//Creates the dY vector
for (int j = -4; j <= 4; j++)
{
for (int i = -4; i <= 4; i++)
{
dY[dyIndex] = image[index] - f( i, j, B.x, B.y, B.z);
dyIndex = dyIndex + 1;
}
}
for (int iters = 0; iters < 10; iters++) //FOR LOOP ADDED HERE
{
jacIndex = 0;
//Creates the Jacobian
for (int j = -4; j <= 4; j++)
{
for (int i = -4; i <= 4; i++)
{
index = Coord2Index( x + i + 4, y + j + 4, 1912 );
J[jacIndex][0] = 1;
J[jacIndex][1] = dfdb(i, j, B.z);
J[jacIndex][2] = dfds(i, j, B.y, B.z);
jacIndex = jacIndex + 1;
}
}
//Now to solve (JT * J) * ΔB = JT * ΔY for ΔB ....
JTdY[0] = 0;
JTdY[1] = 0;
JTdY[2] = 0;
//Create JTJ
for (int i = 0; i < 81; i++)
{
JTJ[0][0] = J[i][0] * J[i][0];
JTJ[0][1] = J[i][0] * J[i][1];
JTJ[0][2] = J[i][0] * J[i][2];
JTJ[1][0] = J[i][1] * J[i][0];
JTJ[1][1] = J[i][1] * J[i][1];
JTJ[1][2] = J[i][1] * J[i][2];
JTJ[2][0] = J[i][2] * J[i][0];
JTJ[2][1] = J[i][2] * J[i][1];
JTJ[2][2] = J[i][2] * J[i][2];
//JT * ΔY
JTdY[0] = J[i][0] * dY[i];
JTdY[1] = J[i][1] * dY[i];
JTdY[2] = J[i][2] * dY[i];
}
//TO DO: might have to make this next part more general if I decide not to use a 9x9 bubble template size
// Also not sure what to do when det(A) = 0 (is that even possible?)
// (JT * J) * ΔB = JT * ΔY is a system of the form Ax = b
// A = (JT * J), ΔB = x, JT * ΔY = b
//Solve using cramer's rule http://en.wikipedia.org/wiki/Cramer%27s_rule
// xi = det(Ai)/det(A)
//determinant of A
double detA =
JTJ[0][0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) -
JTJ[0][1] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) +
JTJ[0][2] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ;
double detA1 =
JTdY[0] * (JTJ[1][1] * JTJ[2][2] - JTJ[1][2] * JTJ[2][1]) -
JTJ[0][1] * ( JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) +
JTJ[0][2] * ( JTdY[1] * JTJ[2][1] - JTJ[1][1] * JTdY[2] ) ;
double detA2 =
JTJ[0][0] * (JTdY[1] * JTJ[2][2] - JTJ[1][2] * JTdY[2] ) -
JTdY[0] * (JTJ[1][0] * JTJ[2][2] - JTJ[1][2] * JTJ[2][0]) +
JTJ[0][2] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) ;
double detA3 =
JTJ[0][0] * (JTJ[1][1] * JTdY[2] - JTdY[1] * JTJ[2][1]) -
JTJ[0][1] * (JTJ[1][0] * JTdY[2] - JTdY[1] * JTJ[2][0]) +
JTdY[0] * (JTJ[1][0] * JTJ[2][1] - JTJ[1][1] * JTJ[2][0]) ;
// B(k+1) = B(k) + ΔB
B.x = B.x + (detA1/detA);
B.y = B.y + (detA2/detA);
B.z = B.z + (detA3/detA);
}
nllsqResult[indexRslt] = B.z;
}
It seams that your kernel takes to long and the Timeout Detection and Recovery mechanisms from Windows kicks in. You can disable TDR by changing the registriy values as described here: MSDN However, if you dsiable TDR your screen may hang until the computation of your kernel is finished. If you have an infinit loop in your kernel nothing will stop it, and since you haven't any response of your computer killing the task would be very difficult. Good that there are power and reset buttons.
Related
I have black color png with transparent background.
I am trying to change color using hue-rotate(180deg) and invert(100%) CSS but failed.
In the case of other color png, all is good.
.huerotate{-webkit-filter: hue-rotate(180deg); filter: hue-rotate(180deg);}
<img src="blackXXX.png" class="huerotate"/>
Is it possible or impossible?
Yes, you can do it... the black is tricky.
Here's how:
background: url(black.png);
filter: brightness(0.9) invert(.7) sepia(.5) hue-rotate(100deg) saturate(200%);
This makes black -> blue.
This website can help you generate the FILTER from a HEX color:
https://isotropic.co/tool/hex-color-to-css-filter/
If you need a custom color, just try
this fiddle
The javascript:
'use strict';
class Color {
constructor(r, g, b) {
this.set(r, g, b);
}
toString() {
return `rgb(${Math.round(this.r)}, ${Math.round(this.g)}, ${Math.round(this.b)})`;
}
set(r, g, b) {
this.r = this.clamp(r);
this.g = this.clamp(g);
this.b = this.clamp(b);
}
hueRotate(angle = 0) {
angle = angle / 180 * Math.PI;
const sin = Math.sin(angle);
const cos = Math.cos(angle);
this.multiply([
0.213 + cos * 0.787 - sin * 0.213,
0.715 - cos * 0.715 - sin * 0.715,
0.072 - cos * 0.072 + sin * 0.928,
0.213 - cos * 0.213 + sin * 0.143,
0.715 + cos * 0.285 + sin * 0.140,
0.072 - cos * 0.072 - sin * 0.283,
0.213 - cos * 0.213 - sin * 0.787,
0.715 - cos * 0.715 + sin * 0.715,
0.072 + cos * 0.928 + sin * 0.072,
]);
}
grayscale(value = 1) {
this.multiply([
0.2126 + 0.7874 * (1 - value),
0.7152 - 0.7152 * (1 - value),
0.0722 - 0.0722 * (1 - value),
0.2126 - 0.2126 * (1 - value),
0.7152 + 0.2848 * (1 - value),
0.0722 - 0.0722 * (1 - value),
0.2126 - 0.2126 * (1 - value),
0.7152 - 0.7152 * (1 - value),
0.0722 + 0.9278 * (1 - value),
]);
}
sepia(value = 1) {
this.multiply([
0.393 + 0.607 * (1 - value),
0.769 - 0.769 * (1 - value),
0.189 - 0.189 * (1 - value),
0.349 - 0.349 * (1 - value),
0.686 + 0.314 * (1 - value),
0.168 - 0.168 * (1 - value),
0.272 - 0.272 * (1 - value),
0.534 - 0.534 * (1 - value),
0.131 + 0.869 * (1 - value),
]);
}
saturate(value = 1) {
this.multiply([
0.213 + 0.787 * value,
0.715 - 0.715 * value,
0.072 - 0.072 * value,
0.213 - 0.213 * value,
0.715 + 0.285 * value,
0.072 - 0.072 * value,
0.213 - 0.213 * value,
0.715 - 0.715 * value,
0.072 + 0.928 * value,
]);
}
multiply(matrix) {
const newR = this.clamp(this.r * matrix[0] + this.g * matrix[1] + this.b * matrix[2]);
const newG = this.clamp(this.r * matrix[3] + this.g * matrix[4] + this.b * matrix[5]);
const newB = this.clamp(this.r * matrix[6] + this.g * matrix[7] + this.b * matrix[8]);
this.r = newR;
this.g = newG;
this.b = newB;
}
brightness(value = 1) {
this.linear(value);
}
contrast(value = 1) {
this.linear(value, -(0.5 * value) + 0.5);
}
linear(slope = 1, intercept = 0) {
this.r = this.clamp(this.r * slope + intercept * 255);
this.g = this.clamp(this.g * slope + intercept * 255);
this.b = this.clamp(this.b * slope + intercept * 255);
}
invert(value = 1) {
this.r = this.clamp((value + this.r / 255 * (1 - 2 * value)) * 255);
this.g = this.clamp((value + this.g / 255 * (1 - 2 * value)) * 255);
this.b = this.clamp((value + this.b / 255 * (1 - 2 * value)) * 255);
}
hsl() {
// Code taken from https://stackoverflow.com/a/9493060/2688027, licensed under CC BY-SA.
const r = this.r / 255;
const g = this.g / 255;
const b = this.b / 255;
const max = Math.max(r, g, b);
const min = Math.min(r, g, b);
let h, s, l = (max + min) / 2;
if (max === min) {
h = s = 0;
} else {
const d = max - min;
s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
switch (max) {
case r:
h = (g - b) / d + (g < b ? 6 : 0);
break;
case g:
h = (b - r) / d + 2;
break;
case b:
h = (r - g) / d + 4;
break;
}
h /= 6;
}
return {
h: h * 100,
s: s * 100,
l: l * 100,
};
}
clamp(value) {
if (value > 255) {
value = 255;
} else if (value < 0) {
value = 0;
}
return value;
}
}
class Solver {
constructor(target, baseColor) {
this.target = target;
this.targetHSL = target.hsl();
this.reusedColor = new Color(0, 0, 0);
}
solve() {
const result = this.solveNarrow(this.solveWide());
return {
values: result.values,
loss: result.loss,
filter: this.css(result.values),
};
}
solveWide() {
const A = 5;
const c = 15;
const a = [60, 180, 18000, 600, 1.2, 1.2];
let best = { loss: Infinity };
for (let i = 0; best.loss > 25 && i < 3; i++) {
const initial = [50, 20, 3750, 50, 100, 100];
const result = this.spsa(A, a, c, initial, 1000);
if (result.loss < best.loss) {
best = result;
}
}
return best;
}
solveNarrow(wide) {
const A = wide.loss;
const c = 2;
const A1 = A + 1;
const a = [0.25 * A1, 0.25 * A1, A1, 0.25 * A1, 0.2 * A1, 0.2 * A1];
return this.spsa(A, a, c, wide.values, 500);
}
spsa(A, a, c, values, iters) {
const alpha = 1;
const gamma = 0.16666666666666666;
let best = null;
let bestLoss = Infinity;
const deltas = new Array(6);
const highArgs = new Array(6);
const lowArgs = new Array(6);
for (let k = 0; k < iters; k++) {
const ck = c / Math.pow(k + 1, gamma);
for (let i = 0; i < 6; i++) {
deltas[i] = Math.random() > 0.5 ? 1 : -1;
highArgs[i] = values[i] + ck * deltas[i];
lowArgs[i] = values[i] - ck * deltas[i];
}
const lossDiff = this.loss(highArgs) - this.loss(lowArgs);
for (let i = 0; i < 6; i++) {
const g = lossDiff / (2 * ck) * deltas[i];
const ak = a[i] / Math.pow(A + k + 1, alpha);
values[i] = fix(values[i] - ak * g, i);
}
const loss = this.loss(values);
if (loss < bestLoss) {
best = values.slice(0);
bestLoss = loss;
}
}
return { values: best, loss: bestLoss };
function fix(value, idx) {
let max = 100;
if (idx === 2 /* saturate */) {
max = 7500;
} else if (idx === 4 /* brightness */ || idx === 5 /* contrast */) {
max = 200;
}
if (idx === 3 /* hue-rotate */) {
if (value > max) {
value %= max;
} else if (value < 0) {
value = max + value % max;
}
} else if (value < 0) {
value = 0;
} else if (value > max) {
value = max;
}
return value;
}
}
loss(filters) {
// Argument is array of percentages.
const color = this.reusedColor;
color.set(0, 0, 0);
color.invert(filters[0] / 100);
color.sepia(filters[1] / 100);
color.saturate(filters[2] / 100);
color.hueRotate(filters[3] * 3.6);
color.brightness(filters[4] / 100);
color.contrast(filters[5] / 100);
const colorHSL = color.hsl();
return (
Math.abs(color.r - this.target.r) +
Math.abs(color.g - this.target.g) +
Math.abs(color.b - this.target.b) +
Math.abs(colorHSL.h - this.targetHSL.h) +
Math.abs(colorHSL.s - this.targetHSL.s) +
Math.abs(colorHSL.l - this.targetHSL.l)
);
}
css(filters) {
function fmt(idx, multiplier = 1) {
return Math.round(filters[idx] * multiplier);
}
return `filter: invert(${fmt(0)}%) sepia(${fmt(1)}%) saturate(${fmt(2)}%) hue-rotate(${fmt(3, 3.6)}deg) brightness(${fmt(4)}%) contrast(${fmt(5)}%);`;
}
}
function hexToRgb(hex) {
// Expand shorthand form (e.g. "03F") to full form (e.g. "0033FF")
const shorthandRegex = /^#?([a-f\d])([a-f\d])([a-f\d])$/i;
hex = hex.replace(shorthandRegex, (m, r, g, b) => {
return r + r + g + g + b + b;
});
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
return result
? [
parseInt(result[1], 16),
parseInt(result[2], 16),
parseInt(result[3], 16),
]
: null;
}
$(document).ready(() => {
$('button.execute').click(() => {
const rgb = hexToRgb($('input.target').val());
if (rgb.length !== 3) {
alert('Invalid format!');
return;
}
const color = new Color(rgb[0], rgb[1], rgb[2]);
const solver = new Solver(color);
const result = solver.solve();
let lossMsg;
if (result.loss < 1) {
lossMsg = 'This is a perfect result.';
} else if (result.loss < 5) {
lossMsg = 'The is close enough.';
} else if (result.loss < 15) {
lossMsg = 'The color is somewhat off. Consider running it again.';
} else {
lossMsg = 'The color is extremely off. Run it again!';
}
$('.realPixel').css('background-color', color.toString());
$('.filterPixel').attr('style', result.filter);
$('.filterDetail').text(result.filter);
$('.lossDetail').html(`Loss: ${result.loss.toFixed(1)}. <b>${lossMsg}</b>`);
});
});
I have a question about the math involved to copy a path.
Let's say I have this path:
http://imgur.com/a/42l0t
I want an exact copy of this path besides the black one. I wrote a small C# program that calculates the angle between two points. Depending on the angle, an offset to the X or Y value is added.
It kind of works, this is the result:
http://imgur.com/bJQDCgq
As you can see, it's not that pretty.
Now, my real question is: What is the proper math to use for this?
Hopefully someone knwos an answer, because I'm kinda stuck on this one.
Regards,
Sascha
Code:
void Plot(List<Point> points)
{
Graphics g = pictureBox.CreateGraphics();
g.Clear(Color.White);
for (int i = 0; i < points.Count - 1; i++)
{
g.DrawLine(Pens.Black, points[i], points[i + 1]);
}
List<Point> points2 = new List<Point>();
for (int i = 0; i < points.Count - 1; i++)
{
var angle = getAngleFromPoint(points[i], points[i + 1]);
Debug.WriteLine(angle);
if (angle < 180 && angle >= 135)
{
points2.Add(new Point(points[i].X - OFFSET, points[i].Y));
}
if (angle < 135 && angle >= 90)
{
if (points[i].Y < points[i + 1].Y)
{
points2.Add(new Point(points[i].X - OFFSET / 2, points[i].Y + OFFSET));
}
else
{
}
}
if (angle < 90 && angle >= 45)
{
if (points[i].Y < points[i + 1].Y)
{
points2.Add(new Point(points[i].X - OFFSET, points[i].Y));
}
else
{
points2.Add(new Point(points[i].X + OFFSET, points[i].Y));
}
}
if (angle < 45 && angle >= 0)
{
if (points[i].Y < points[i + 1].Y)
{
points2.Add(new Point(points[i].X - OFFSET, points[i].Y));
}
else
{
points2.Add(new Point(points[i].X + OFFSET, points[i].Y));
}
}
if (angle < 360 && angle >= 315)
{
if (points[i].Y < points[i + 1].Y)
{
points2.Add(new Point(points[i].X + OFFSET, points[i].Y));
}
else
{
points2.Add(new Point(points[i].X + 10, points[i].Y - OFFSET));
}
}
if (angle < 315 && angle >= 270)
{
points2.Add(new Point(points[i].X, points[i].Y - OFFSET));
}
if (angle < 270 && angle >= 225)
{
if (points[i].Y < points[i + 1].Y)
{
points2.Add(new Point(points[i].X - OFFSET / 2, points[i].Y - OFFSET));
}
else
{
}
}
if (angle < 225 && angle >= 180)
{
if (points[i].X < points[i + 1].X)
{
points2.Add(new Point(points[i].X, points[i].Y - OFFSET));
}
else
{
if (points[i].Y < points[i + 1].Y) // \
{
points2.Add(new Point(points[i].X - OFFSET, points[i].Y));
}
else
{
}
}
}
}
for (int i = 0; i < points2.Count - 1; i++)
{
g.DrawLine(Pens.Red, points2[i], points2[i + 1]);
}
}
I think if i decrease the angles (from 45 degree steps to maybe 30 degrees) I could imnprove the result, but there must be a better solution.
I suppose one way to tackle this is to split it into line-pairs (ie: three points)
Find the parallel line (at distance d) for each line in the pair. Then find where these parallel lines intersect to give you the location of a point on the new line.
In very rough psuedo-code:
points a, b, c
distance d
lineab = findLineParallelTo(line(a,b), d)
linebc = findLineParallelTo(line(b,c), d)
return intersect(lineab, linebc)
I implemented the solution from #Jack and it works great:
public class Line
{
public PointF P { get; private set; }
public PointF Q { get; private set; }
public float Pitch
{
get; private set;
}
public Line()
{
}
public Line(float px, float py, float qx, float qy) : this(new PointF(px, py), new PointF(qx, qy))
{
}
public Line(PointF p, PointF q)
{
P = p;
Q = q;
}
#region Methods
/// <summary>
/// http://stackoverflow.com/questions/2825412/draw-a-parallel-line
/// </summary>
public Line FindParallelLine(float distance)
{
float length = (float)Math.Sqrt((P.X - Q.X) * (P.X - Q.X) + (P.Y - Q.Y) * (P.Y - Q.Y));
// This is the second line
float px = P.X + distance * (Q.Y - P.Y) / length;
float qx = Q.X + distance * (Q.Y - P.Y) / length;
float py = P.Y + distance * (P.X - Q.X) / length;
float qy = Q.Y + distance * (P.X - Q.X) / length;
return new Line(px, py, qx, qy);
}
public override string ToString()
{
return string.Format("P({0}|{1}), Q({2}|{3}) - Pitch: {4}", P.X, P.Y, Q.X, Q.Y, Pitch);
}
#endregion
}
private PointF FindIntersection(Line a, Line b)
{
PointF A = a.P;
PointF B = a.Q;
PointF C = b.P;
PointF D = b.Q;
float dy1 = B.Y - A.Y;
float dx1 = B.X - A.X;
float dy2 = D.Y - C.Y;
float dx2 = D.X - C.X;
// Check whether the two line parallel.
if (dy1 * dx2 == dy2 * dx1)
{
return PointF.Empty;
}
else
{
float x = ((C.Y - A.Y) * dx1 * dx2 + dy1 * dx2 * A.X - dy2 * dx1 * C.X) / (dy1 * dx2 - dy2 * dx1);
float y = A.Y + (dy1 / dx1) * (x - A.X);
return new PointF(x, y);
}
}
private PointF FindIntersection(PointF a, PointF b, PointF c, float distance)
{
Line line1 = new Line(a, b);
Line line2 = new Line(b, c);
Line parallel = line1.FindParallelLine(distance);
Line parallel2 = line2.FindParallelLine(distance);
return FindIntersection(parallel, parallel2);
}
private List<PointF> FindIntersections(PointF[] points, float distance)
{
List<PointF> intersections = new List<PointF>();
for (int i = 0; i < points.Length - 2; i++)
{
PointF intersection = FindIntersection(points[i], points[i + 1], points[i + 2], distance);
if (!intersection.IsEmpty && !double.IsNaN(intersection.X) && !double.IsNaN(intersection.Y))
{
intersections.Add(intersection);
}
}
return intersections;
}
private PointF GetFirstPoint(PointF[] points, float distance)
{
Line parallel = new Line(points[0], points[1]).FindParallelLine(distance);
return parallel.P;
}
private PointF GetLastPoint(PointF[] points, float distance)
{
Line parallel = new Line(points[points.Length - 2], points[points.Length - 1]).FindParallelLine(distance);
return parallel.Q;
}
Example call:
OFFSET = float.Parse(textBox1.Text);
List<PointF> points = new List<PointF>();
points.Add(new PointF(200, 180));
points.Add(new PointF(160, 160));
points.Add(new PointF(100, 160));
points.Add(new PointF(60, 140));
points.Add(new PointF(40, 100));
points.Add(new PointF(80, 60));
points.Add(new PointF(140, 100));
points.Add(new PointF(180, 140));
points.Add(new PointF(220, 80));
List<PointF> intersections = FindIntersections(points.ToArray(), OFFSET);
intersections.Insert(0, GetFirstPoint(points.ToArray(), OFFSET));
intersections.Add(GetLastPoint(points.ToArray(), OFFSET));
Graphics g = pictureBox.CreateGraphics();
g.Clear(Color.White);
g.DrawLines(Pens.Black, points.ToArray());
// Connect the intersection points.
g.DrawLines(Pens.Red, intersections.ToArray());
Example image:
http://imgur.com/onUstGT
Thanks again #Jack !
Using Qt, my negative int is not rounding properly. Here is my source code:
if (intCalibrate < 0)
{
intCalibrateCalculate = (intCalibrate * 100.0f * 5.0f/9.0f - 0.5f) + 65535;
}
else
{
intCalibrateCalculate = (intCalibrate * 100.0f * 5.0f/9.0f + 0.5f);
}
It is rounding correctly if it is positive. Not sure why negative isn't working.
I figured it out.
if (intCalibrate < 0)
{
intCalibrateCalculate = (intCalibrate * 100.0f * 5.0f/9.0f - 0.5f);
intCalibrateCalculate = intCalibrateCalculate + 65535;
}
else
{
intCalibrateCalculate = (intCalibrate * 100.0f * 5.0f/9.0f + 0.5f);
}
I'm using Incanter and Parallel Colt for a project, and need to have a function that returns the modified Bessel function of an order n for a value v.
The Colt library has two methods for order 0 and order 1, but beyond that, only a method that return the Bessel function of order n for a value v (cern.jet.math.tdouble.Bessel/jn).
I'm trying to build the R function, dskellam(x,lambda1, lambda2) for the Skellam distribution, in Clojure/Java
Is there something I can do with the return value of the Bessel method to convert it to a modified Bessel?
No, the difference isn't a simple transformation, as these links make clear:
http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html
http://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html
I'd have a look at "Numerical Recipes" or Abramowitz & Stegun. It wouldn't be hard to implement your own in a short period of time.
Here's a Java implementation of the modified Bessel functions:
package math;
/**
* Functions that are not part of standard libraries
* User: Michael
* Date: 1/9/12
* Time: 9:22 PM
*/
public class Functions {
public static final double ACC = 4.0;
public static final double BIGNO = 1.0e10;
public static final double BIGNI = 1.0e-10;
public static void main(String[] args) {
double xmin = ((args.length > 0) ? Double.valueOf(args[0]) : 0.0);
double xmax = ((args.length > 1) ? Double.valueOf(args[1]) : 4.0);
double dx = ((args.length > 2) ? Double.valueOf(args[2]) : 0.1);
System.out.printf("%10s %10s %10s %10s\n", "x", "bessi0(x)", "bessi1(x)", "bessi2(x)");
for (double x = xmin; x < xmax; x += dx) {
System.out.printf("%10.6f %10.6f %10.6f %10.6f\n", x, bessi0(x), bessi1(x), bessi(2, x));
}
}
public static final double bessi0(double x) {
double answer;
double ax = Math.abs(x);
if (ax < 3.75) { // polynomial fit
double y = x / 3.75;
y *= y;
answer = 1.0 + y * (3.5156229 + y * (3.0899424 + y * (1.2067492 + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2)))));
} else {
double y = 3.75 / ax;
answer = 0.39894228 + y * (0.1328592e-1 + y * (0.225319e-2 + y * (-0.157565e-2 + y * (0.916281e-2 + y * (-0.2057706e-1 + y * (0.2635537e-1 + y * (-0.1647633e-1 + y * 0.392377e-2)))))));
answer *= (Math.exp(ax) / Math.sqrt(ax));
}
return answer;
}
public static final double bessi1(double x) {
double answer;
double ax = Math.abs(x);
if (ax < 3.75) { // polynomial fit
double y = x / 3.75;
y *= y;
answer = ax * (0.5 + y * (0.87890594 + y * (0.51498869 + y * (0.15084934 + y * (0.2658733e-1 + y * (0.301532e-2 + y * 0.32411e-3))))));
} else {
double y = 3.75 / ax;
answer = 0.2282967e-1 + y * (-0.2895312e-1 + y * (0.1787654e-1 - y * 0.420059e-2));
answer = 0.39894228 + y * (-0.3988024e-1 + y * (-0.362018e-2 + y * (0.163801e-2 + y * (-0.1031555e-1 + y * answer))));
answer *= (Math.exp(ax) / Math.sqrt(ax));
}
return answer;
}
public static final double bessi(int n, double x) {
if (n < 2)
throw new IllegalArgumentException("Function order must be greater than 1");
if (x == 0.0) {
return 0.0;
} else {
double tox = 2.0/Math.abs(x);
double ans = 0.0;
double bip = 0.0;
double bi = 1.0;
for (int j = 2*(n + (int)Math.sqrt(ACC*n)); j > 0; --j) {
double bim = bip + j*tox*bi;
bip = bi;
bi = bim;
if (Math.abs(bi) > BIGNO) {
ans *= BIGNI;
bi *= BIGNI;
bip *= BIGNI;
}
if (j == n) {
ans = bip;
}
}
ans *= bessi0(x)/bi;
return (((x < 0.0) && ((n % 2) == 0)) ? -ans : ans);
}
}
}
Bicubic interpolation is one of the common interpolation method, but I can not find any working implementation on OpenCL. I was decided to write bicubic interpolation on OpenCL myself, but ...
I have some problem with kernel programm.
When I run kernel execution, program failed with error CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST. No any other information about cause of error. I am using javacl binding form google code: http://code.google.com/p/javacl, AMD Accelerated Parallel Processing SDK 2.3 on Ubuntu linux 10.10, hardware AMD Radeon 5xxxHD
I haven`t opencl debugger on ubuntu for AMD APP SDK (
If I uncomment float4 val=read_imagef(signal, sampler, (float2)(x+iX,y+iY)); and comment calculation of bicubic interpolation "float4 val=..." all work without any error(but using bilinear interpolation). I think that this error because of invalid memory access or register memory overflow.
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP_TO_EDGE;
const float CATMULL_ROM[16]={-0.5F,1.5F,-1.5F,0.5F,1.0F,-2.5F,2.0F,-0.5F,-0.5F,0.0F,0.5F,0.0F,0.0F,1.0F,0.0F,0.0F};
__kernel void bicubicUpscale(int scale,read_only image2d_t signal, write_only image2d_t upscale) {
int x = get_global_id(0)-2, y = get_global_id(1)-2;
float C[16];
float T[16];
for (int i = 0; i < 16; i++)
{
C[i]=0.0F;
T[i]=0.0F;
}
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++)
{
T[4*i+j] += read_imagef(signal, sampler, (int2)(x+k,y+i)).x * CATMULL_ROM[4*j+k];
}
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++)
{
C[4*i+j] += CATMULL_ROM[4*i+k] * T[4*k+j];
}
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
float iX=(float)j/(float) scale;
float iY=(float)i/(float) scale;
//float4 val=read_imagef(signal, sampler, (float2)(x+iX,y+iY));
float val= iX * (iX * (iX * (iY * (iY * (iY * C[0] + C[1]) + C[2]) + C[3])
+ (iY * (iY * (iY * C[4] + C[5]) + C[6]) + C[7]))
+ (iY * (iY * (iY * C[8] + C[9]) + C[10]) + C[11]))
+ (iY * (iY * (iY * C[12] + C[13]) + C[14]) + C[15]);
write_imagef(upscale, (int2)(x*scale+j, y*scale+i), val);
}
}
}
I rewrite this program for using local memory, but it still not working correctly
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP_TO_EDGE;
const float CATMULL_ROM[]={-0.5F,1.5F,-1.5F,0.5F,1.0F,-2.5F,2.0F,-0.5F,-0.5F,0.0F,0.5F,0.0F,0.0F,1.0F,0.0F,0.0F};
__kernel void bicubicUpscale(local float* sharedBuffer,int scale,read_only image2d_t signal, write_only image2d_t upscale) {
int x = get_global_id(0)-2, y = get_global_id(1)-2;
//int locX=get_local_id(0);
int offsetT = (y+2)*512+(x+2)*32+16;
int offsetC = (y+2)*512+(x+2)*32;
global float* C=&sharedBuffer[offsetT];
global float* T=&sharedBuffer[offsetT];
for (int i = 0; i < 32; i++){
sharedBuffer[offsetC+ i]=0.0F;
}
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++){
//T[4*i+j] = mad(read_imagef(signal, sampler, (int2)(x+k,y+i)).x,CATMULL_ROM[4*j+k],T[4*i+j]);
T[i+j] += read_imagef(signal, sampler, (int2)(x+k,y+i)).x * CATMULL_ROM[4*j+k];
}
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++){
//C[4*i+j] = mad(CATMULL_ROM[4*i+k],T[4*k+j],C[4*i+j]);
sharedBuffer[offsetC +4*i+j] += CATMULL_ROM[4*i+k] * sharedBuffer[offsetT + 4*k+j];
}
barrier (CLK_GLOBAL_MEM_FENCE);
for (int i = 0; i < scale; i++)
for (int j = 0; j < scale; j++)
{
float iX=(float)j/(float) scale;
float iY=(float)i/(float) scale;
float4 val= iX * (iX * (iX * (iY * (iY * (iY * C[0] + C[1]) + C[2]) + C[3])
+ (iY * (iY * (iY * C[4] + C[5]) + C[6]) + C[7]))
+ (iY * (iY * (iY * C[8] + C[9]) + C[10]) + C[11]))
+ (iY * (iY * (iY * C[12] + C[13]) + C[14]) + C[15]);
write_imagef(upscale, (int2)(x*scale+j, y*scale+i), val);
}
}
Do you know any decision for this problem.
Java sources + maven2 build. Use command "mvn clean compile exec:java" to compile and run demo.
Regards,
Igor
I am fix it! This kernel is not optimal in performance point of view, but functional correct.
Please use such parameters for enqueueNDRange:
kernelBicubic.getKernel().setArgs(scaleFactor, inImage, imageOut);
lastEvent=kernelBicubic.getKernel().enqueueNDRange(queue,
new int[]{(int) inImage.getWidth()+1,(int) inImage.getHeight()+1},lastEvent);
Kernel code:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP_TO_EDGE;
const float CATMULL_ROM[16]={-0.5F, 1.5F,-1.5F, 0.5F, 1.0F,-2.5F, 2.0F,-0.5F,-0.5F, 0.0F, 0.5F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F};
inlie float calcT(image2d_t signal,int x,int y,int i,int j){
return read_imagef(signal, sampler, (int2)(x ,y+i)).x * CATMULL_ROM[4*j]
+read_imagef(signal, sampler, (int2)(x+1,y+i)).x * CATMULL_ROM[4*j+1]
+read_imagef(signal, sampler, (int2)(x+2,y+i)).x * CATMULL_ROM[4*j+2]
+read_imagef(signal, sampler, (int2)(x+3,y+i)).x * CATMULL_ROM[4*j+3];
}
inline float C(image2d_t signal,int x,int y,int i,int j){
return CATMULL_ROM[4*i ] * calcT(signal,x,y,0,j)
+CATMULL_ROM[4*i+1] * calcT(signal,x,y,1,j)
+CATMULL_ROM[4*i+2] * calcT(signal,x,y,2,j)
+CATMULL_ROM[4*i+3] * calcT(signal,x,y,3,j);
}
__kernel void bicubicUpscale(int scale,read_only image2d_t signal, write_only image2d_t upscale) {
int x = get_global_id(0)-2, y = get_global_id(1)-2;
float C0 =C(signal,x,y,0,0);
float C1 =C(signal,x,y,0,1);
float C2 =C(signal,x,y,0,2);
float C3 =C(signal,x,y,0,3);
float C4 =C(signal,x,y,1,0);
float C5 =C(signal,x,y,1,1);
float C6 =C(signal,x,y,1,2);
float C7 =C(signal,x,y,1,3);
float C8 =C(signal,x,y,2,0);
float C9 =C(signal,x,y,2,1);
float C10=C(signal,x,y,2,2);
float C11=C(signal,x,y,2,3);
float C12=C(signal,x,y,3,0);
float C13=C(signal,x,y,3,1);
float C14=C(signal,x,y,3,2);
float C15=C(signal,x,y,3,3);
float xOff=scale*1.5F + x*scale;
float yOff=scale*1.5F + y*scale;
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
float iY=(float)j/(float) scale;
float iX=(float)i/(float) scale;
float val= iX * (iX * (iX * (iY * (iY * (iY * C0 + C1) + C2) + C3)
+ (iY * (iY * (iY * C4 + C5) + C6) + C7))
+ (iY * (iY * (iY * C8 + C9) + C10) + C11))
+ (iY * (iY * (iY * C12 + C13) + C14) + C15);
write_imagef(upscale, (int2)(xOff+j, yOff+i), val);
}
}
}