Related
I'm new in open cl. And tried as my first work to write code that checks intersection between many polylines to single polygon.
I'm running the code in both cpu and gpu.. and get different results.
First I sent NULL as local parameter when called clEnqueueNDRangeKernel.
clEnqueueNDRangeKernel(command_queue, kIntersect, 1, NULL, &global, null, 2, &evtCalcBounds, &evtKernel);
After trying many things i saw that if i send 1 as local it is working good. and returning the same results for the cpu and gpu.
size_t local = 1;
clEnqueueNDRangeKernel(command_queue, kIntersect, 1, NULL, &global, &local, 2, &evtCalcBounds, &evtKernel);
Played abit more and found that the cpu returns false result when i run the kernel with local 8 or more (for some reason).
I'm not using any local memory, just globals and privates.
I didn't added the code because i think it is irrelevant to the problem (note that for single work group it is working good), and it is long. If it is needed, i will try to simplify it.
The code flow is going like this:
I have polylines coordinates stored in a big buffer. and the single polygon in another. In addition i'm providing another buffer with single int that holds the current results count. All buffers are __global arguments.
In the kernel i'm simply checking intersection between all the lines of the "polyline[get_global(0)]" with the lines of the polygon. If true,
i'm using atomic_inc for the results count. There is no read and write memory from the same buffer, no barriers or mem fences,... the atomic_inc is the only thread safe mechanism i'm using.
-- UPDATE --
Added my code:
I know that i can maybe have better use of open cl functions for calculating some vectors, but for now, i'm simply convert code from my old regular CPU single threaded program to CL. so this is not my concern now.
bool isPointInPolygon(float x, float y, __global float* polygon) {
bool blnInside = false;
uint length = convert_uint(polygon[4]);
int s = 5;
uint j = length - 1;
for (uint i = 0; i < length; j = i++) {
uint realIdx = s + i * 2;
uint realInvIdx = s + j * 2;
if (((polygon[realIdx + 1] > y) != (polygon[realInvIdx + 1] > y)) &&
(x < (polygon[realInvIdx] - polygon[realIdx]) * (y - polygon[realIdx + 1]) / (polygon[realInvIdx + 1] - polygon[realIdx + 1]) + polygon[realIdx]))
blnInside = !blnInside;
}
return blnInside;
}
bool isRectanglesIntersected(float p_dblMinX1, float p_dblMinY1,
float p_dblMaxX1, float p_dblMaxY1,
float p_dblMinX2, float p_dblMinY2,
float p_dblMaxX2, float p_dblMaxY2) {
bool blnResult = true;
if (p_dblMinX1 > p_dblMaxX2 ||
p_dblMaxX1 < p_dblMinX2 ||
p_dblMinY1 > p_dblMaxY2 ||
p_dblMaxY1 < p_dblMinY2) {
blnResult = false;
}
return blnResult;
}
bool isLinesIntersects(
double Ax, double Ay,
double Bx, double By,
double Cx, double Cy,
double Dx, double Dy) {
double distAB, theCos, theSin, newX, ABpos;
// Fail if either line is undefined.
if (Ax == Bx && Ay == By || Cx == Dx && Cy == Dy)
return false;
// (1) Translate the system so that point A is on the origin.
Bx -= Ax; By -= Ay;
Cx -= Ax; Cy -= Ay;
Dx -= Ax; Dy -= Ay;
// Discover the length of segment A-B.
distAB = sqrt(Bx*Bx + By*By);
// (2) Rotate the system so that point B is on the positive X axis.
theCos = Bx / distAB;
theSin = By / distAB;
newX = Cx*theCos + Cy*theSin;
Cy = Cy*theCos - Cx*theSin; Cx = newX;
newX = Dx*theCos + Dy*theSin;
Dy = Dy*theCos - Dx*theSin; Dx = newX;
// Fail if the lines are parallel.
return (Cy != Dy);
}
bool isPolygonInersectsPolyline(__global float* polygon, __global float* polylines, uint startIdx) {
uint polylineLength = convert_uint(polylines[startIdx]);
uint start = startIdx + 1;
float x1 = polylines[start];
float y1 = polylines[start + 1];
float x2;
float y2;
int polygonLength = convert_uint(polygon[4]);
int polygonLength2 = polygonLength * 2;
int startPolygonIdx = 5;
for (int currPolyineIdx = 0; currPolyineIdx < polylineLength - 1; currPolyineIdx++)
{
x2 = polylines[start + (currPolyineIdx*2) + 2];
y2 = polylines[start + (currPolyineIdx*2) + 3];
float polyX1 = polygon[0];
float polyY1 = polygon[1];
for (int currPolygonIdx = 0; currPolygonIdx < polygonLength; ++currPolygonIdx)
{
float polyX2 = polygon[startPolygonIdx + (currPolygonIdx * 2 + 2) % polygonLength2];
float polyY2 = polygon[startPolygonIdx + (currPolygonIdx * 2 + 3) % polygonLength2];
if (isLinesIntersects(x1, y1, x2, y2, polyX1, polyY1, polyX2, polyY2)) {
return true;
}
polyX1 = polyX2;
polyY1 = polyY2;
}
x1 = x2;
y1 = y2;
}
// No intersection found till now so we check containing
return isPointInPolygon(x1, y1, polygon);
}
__kernel void calcIntersections(__global float* polylines, // My flat points array - [pntCount, x,y,x,y,...., pntCount, x,y,... ]
__global float* pBounds, // The rectangle bounds of each polyline - set of 4 values [top, left, bottom, right....]
__global uint* pStarts, // The start index of each polyline in the polylines array
__global float* polygon, // The polygon i want to intersect with - first 4 items are the rectangle bounds [top, left, bottom, right, pntCount, x,y,x,y,x,y....]
__global float* output, // Result array for saving the intersections polylines indices
__global uint* resCount) // The result count
{
int i = get_global_id(0);
uint start = convert_uint(pStarts[i]);
if (isRectanglesIntersected(pBounds[i * 4], pBounds[i * 4 + 1], pBounds[i * 4 + 2], pBounds[i * 4 + 3],
polygon[0], polygon[1], polygon[2], polygon[3])) {
if (isPolygonInersectsPolyline(polygon, polylines, start)){
int oldVal = atomic_inc(resCount);
output[oldVal] = i;
}
}
}
Can anyone explain it to me ?
Currently, I have an OpenCL kernel for like traversal as below. I'd be glad if someone had some point on optimization of this quite large kernel.
The thing is, I'm running this code with SAH BVH and I'd like to get performance similar to Timo Aila with his traversals in his paper (Understanding the Efficiency of Ray Traversal on GPUs), of course his code uses SplitBVH (which I might consider using in place of SAH BVH, but in my opinion it has really slow build times). But I'm asking about traversal, not BVH (also I've so far worked only with scenes, where SplitBVH won't give you much advantages over SAH BVH).
First of all, here is what I have so far (standard while-while traversal kernel).
__constant sampler_t sampler = CLK_FILTER_NEAREST;
// Inline definition of horizontal max
inline float max4(float a, float b, float c, float d)
{
return max(max(max(a, b), c), d);
}
// Inline definition of horizontal min
inline float min4(float a, float b, float c, float d)
{
return min(min(min(a, b), c), d);
}
// Traversal kernel
__kernel void traverse( __read_only image2d_t nodes,
__global const float4* triangles,
__global const float4* rays,
__global float4* result,
const int num,
const int w,
const int h)
{
// Ray index
int idx = get_global_id(0);
if(idx < num)
{
// Stack
int todo[32];
int todoOffset = 0;
// Current node
int nodeNum = 0;
float tmin = 0.0f;
float depth = 2e30f;
// Fetch ray origin, direction and compute invdirection
float4 origin = rays[2 * idx + 0];
float4 direction = rays[2 * idx + 1];
float4 invdir = native_recip(direction);
float4 temp = (float4)(0.0f, 0.0f, 0.0f, 1.0f);
// Traversal loop
while(true)
{
// Fetch node information
int2 nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
int4 specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
// While node isn't leaf
while(specs.z == 0)
{
// Fetch child bounding boxes
float4 n0xy = read_imagef(nodes, sampler, nodeCoord);
float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0));
float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0));
// Test ray against child bounding boxes
float oodx = origin.x * invdir.x;
float oody = origin.y * invdir.y;
float oodz = origin.z * invdir.z;
float c0lox = n0xy.x * invdir.x - oodx;
float c0hix = n0xy.y * invdir.x - oodx;
float c0loy = n0xy.z * invdir.y - oody;
float c0hiy = n0xy.w * invdir.y - oody;
float c0loz = nz.x * invdir.z - oodz;
float c0hiz = nz.y * invdir.z - oodz;
float c1loz = nz.z * invdir.z - oodz;
float c1hiz = nz.w * invdir.z - oodz;
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), tmin);
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), depth);
float c1lox = n1xy.x * invdir.x - oodx;
float c1hix = n1xy.y * invdir.x - oodx;
float c1loy = n1xy.z * invdir.y - oody;
float c1hiy = n1xy.w * invdir.y - oody;
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), tmin);
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), depth);
bool traverseChild0 = (c0max >= c0min);
bool traverseChild1 = (c1max >= c1min);
nodeNum = specs.x;
int nodeAbove = specs.y;
// We hit just one out of 2 childs
if(traverseChild0 != traverseChild1)
{
if(traverseChild1)
{
nodeNum = nodeAbove;
}
}
// We hit either both or none
else
{
// If we hit none, pop node from stack (or exit traversal, if stack is empty)
if (!traverseChild0)
{
if(todoOffset == 0)
{
break;
}
nodeNum = todo[--todoOffset];
}
// If we hit both
else
{
// Sort them (so nearest goes 1st, further 2nd)
if(c1min < c0min)
{
unsigned int tmp = nodeNum;
nodeNum = nodeAbove;
nodeAbove = tmp;
}
// Push further on stack
todo[todoOffset++] = nodeAbove;
}
}
// Fetch next node information
nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
}
// If node is leaf & has some primitives
if(specs.z > 0)
{
// Loop through primitives & perform intersection with them (Woop triangles)
for(int i = specs.x; i < specs.y; i++)
{
// Fetch first point from global memory
float4 v0 = triangles[i * 4 + 0];
float o_z = v0.w - origin.x * v0.x - origin.y * v0.y - origin.z * v0.z;
float i_z = 1.0f / (direction.x * v0.x + direction.y * v0.y + direction.z * v0.z);
float t = o_z * i_z;
if(t > 0.0f && t < depth)
{
// Fetch second point from global memory
float4 v1 = triangles[i * 4 + 1];
float o_x = v1.w + origin.x * v1.x + origin.y * v1.y + origin.z * v1.z;
float d_x = direction.x * v1.x + direction.y * v1.y + direction.z * v1.z;
float u = o_x + t * d_x;
if(u >= 0.0f && u <= 1.0f)
{
// Fetch third point from global memory
float4 v2 = triangles[i * 4 + 2];
float o_y = v2.w + origin.x * v2.x + origin.y * v2.y + origin.z * v2.z;
float d_y = direction.x * v2.x + direction.y * v2.y + direction.z * v2.z;
float v = o_y + t * d_y;
if(v >= 0.0f && u + v <= 1.0f)
{
// We got successful hit, store the information
depth = t;
temp.x = u;
temp.y = v;
temp.z = t;
temp.w = as_float(i);
}
}
}
}
}
// Pop node from stack (if empty, finish traversal)
if(todoOffset == 0)
{
break;
}
nodeNum = todo[--todoOffset];
}
// Store the ray traversal result in global memory
result[idx] = temp;
}
}
First question of the day is, how could one write his Persistent while-while and Speculative while-while kernel in OpenCL?
Ad Persistent while-while, do I get it right, that I actually just start kernel with global work size equivalent to local work size, and both these numbers should be equal to warp/wavefront size of the GPU?
I get that with CUDA the persistent thread implementation looks like this:
do
{
volatile int& jobIndexBase = nextJobArray[threadIndex.y];
if(threadIndex.x == 0)
{
jobIndexBase = atomicAdd(&warpCounter, WARP_SIZE);
}
index = jobIndexBase + threadIndex.x;
if(index >= totalJobs)
return;
/* Perform work for task numbered 'index' */
}
while(true);
How could equivalent in OpenCL look like, I know I'll have to do some barriers in there, I also know that one should be after the score where I atomically add WARP_SIZE to warpCounter.
Ad Speculative traversal - well I probably don't have any ideas how this should be implemented in OpenCL, so any hints are welcome. I also don't have idea where to put barriers (because putting them around simulated __any will result in driver crash).
If you made it here, thanks for reading & any hints, answers, etc. are welcome!
An optimization you can do is use vector variables and the fused multiply add function to speed up your set up math. As for the rest of the kernel, It is slow because it is branchy. If you can make assumptions on the signal data you might be able to reduce the execution time by reducing the code branches. I have not checked the float4 swizles (the .xxyy and .x .y .z .w after the float 4 variables) so just check that.
float4 n0xy = read_imagef(nodes, sampler, nodeCoord);
float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0));
float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0));
float4 oodf4 = -origin * invdir;
float4 c0xyf4 = fma(n0xy,invdir.xxyy,oodf4);
float4 c0zc1z = fma(nz,(float4)(invdir.z),oodf4);
float c0min = max4(min(c0xyf4.x, c0xyf4.y), min(c0xyf4.z, c0xyf4.w), min(c0zc1z.z, c0zc1z.w), tmin);
float c0max = min4(max(c0xyf4.x, c0xyf4.y), max(c0xyf4.z, c0xyf4.w), max(c0zc1z.z, c0zc1z.w), depth);
float4 c1xy = fma(n1xy,invdir.xxyy,oodf4);
float c1min = max4(min(c1xy.x, c1xy.y), min(c1xy.z, c1xy.w), min(c0zc1z.z, c0zc1z.w), tmin);
float c1max = min4(max(c1xy.x, c1xy.y), max(c1xy.z, c1xy.w), max(c0zc1z.z, c0zc1z.w), depth);
I have been grappling with the Gribb/Hartmann method of extracting the Frustum planes for some time now, with little success. I want to build a camera view-frustum to cull my scene.
I am working with column-major matrices in a right-handed coordinate system. (OpenGL style - I'm using C# and Playstation Mobile, but the math should be the same)
I want to get my planes in World-Space, so I build my frustum from the View-Projection Matrix (that's projectionMatrix * viewMatrix). The view Matrix is the inverse of the camera's World-Transform.
The problem is; regardless of what I tweak, I can't seem to get a correct frustum. I think that I may be missing something obvious.
If I "strafe" my camera left or right while still looking down the z-axis, the normals of my planes change so that they are always pointing at the origin of the scene - which makes me think that they are not in world-space...
The planes from a projection matrix can be extracted using the Gribb/Hartmann method as follows, (column major):
void extract_planes_from_projmat(
const float mat[4][4],
float left[4], float right[4],
float bottom[4], float top[4],
float near[4], float far[4])
{
for (int i = 4; i--; ) left[i] = mat[i][3] + mat[i][0];
for (int i = 4; i--; ) right[i] = mat[i][3] - mat[i][0];
for (int i = 4; i--; ) bottom[i] = mat[i][3] + mat[i][1];
for (int i = 4; i--; ) top[i] = mat[i][3] - mat[i][1];
for (int i = 4; i--; ) near[i] = mat[i][3] + mat[i][2];
for (int i = 4; i--; ) far[i] = mat[i][3] - mat[i][2];
}
Where mat4 is the product of the projection matrix and the model-view matrix.
See:
https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/
http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf
Note: if the matrix components aren't normalized and you require a Hessian Normal Form plane, then you will need to normalize the resulting planes.
The missing part:
comboMatrix = projection_matrix * Matrix4_Transpose(modelview_matrix)
Then the world-space frustum plane extraction for OpenGL is exactly as mentioned in the Gribb/Hartmann method:
p_planes[0].a = comboMatrix._41 + comboMatrix._11;
p_planes[0].b = comboMatrix._42 + comboMatrix._12;
p_planes[0].c = comboMatrix._43 + comboMatrix._13;
p_planes[0].d = comboMatrix._44 + comboMatrix._14;
// Right clipping plane
p_planes[1].a = comboMatrix._41 - comboMatrix._11;
p_planes[1].b = comboMatrix._42 - comboMatrix._12;
p_planes[1].c = comboMatrix._43 - comboMatrix._13;
p_planes[1].d = comboMatrix._44 - comboMatrix._14;
// Top clipping plane
p_planes[2].a = comboMatrix._41 - comboMatrix._21;
p_planes[2].b = comboMatrix._42 - comboMatrix._22;
p_planes[2].c = comboMatrix._43 - comboMatrix._23;
p_planes[2].d = comboMatrix._44 - comboMatrix._24;
// Bottom clipping plane
p_planes[3].a = comboMatrix._41 + comboMatrix._21;
p_planes[3].b = comboMatrix._42 + comboMatrix._22;
p_planes[3].c = comboMatrix._43 + comboMatrix._23;
p_planes[3].d = comboMatrix._44 + comboMatrix._24;
// Near clipping plane
p_planes[4].a = comboMatrix._41 + comboMatrix._31;
p_planes[4].b = comboMatrix._42 + comboMatrix._32;
p_planes[4].c = comboMatrix._43 + comboMatrix._33;
p_planes[4].d = comboMatrix._44 + comboMatrix._34;
// Far clipping plane
p_planes[5].a = comboMatrix._41 - comboMatrix._31;
p_planes[5].b = comboMatrix._42 - comboMatrix._32;
p_planes[5].c = comboMatrix._43 - comboMatrix._33;
p_planes[5].d = comboMatrix._44 - comboMatrix._34;
These planes now are in world-space and can be used to frustum cull world-space objects.
for(int i = 0; i < 6; i++)
{
var dist = dot3(world_space_point.xyz, p_planes[i].xyz) + p_planes[i].d + sphere_radius;
if(dist < 0) return false; // sphere culled
}
i'm trying to code correct 2D affine texture mapping in GLSL.
Explanation:
...NONE of this images is correct for my purposes. Right (labeled Correct) has perspective correction which i do not want. So this: Getting to know the Q texture coordinate solution (without further improvements) is not what I'm looking for.
I'd like to simply "stretch" texture inside quadrilateral, something like this:
but composed from two triangles. Any advice (GLSL) please?
This works well as long as you have a trapezoid, and its parallel edges are aligned with one of the local axes. I recommend playing around with my Unity package.
GLSL:
varying vec2 shiftedPosition, width_height;
#ifdef VERTEX
void main() {
gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
shiftedPosition = gl_MultiTexCoord0.xy; // left and bottom edges zeroed.
width_height = gl_MultiTexCoord1.xy;
}
#endif
#ifdef FRAGMENT
uniform sampler2D _MainTex;
void main() {
gl_FragColor = texture2D(_MainTex, shiftedPosition / width_height);
}
#endif
C#:
// Zero out the left and bottom edges,
// leaving a right trapezoid with two sides on the axes and a vertex at the origin.
var shiftedPositions = new Vector2[] {
Vector2.zero,
new Vector2(0, vertices[1].y - vertices[0].y),
new Vector2(vertices[2].x - vertices[1].x, vertices[2].y - vertices[3].y),
new Vector2(vertices[3].x - vertices[0].x, 0)
};
mesh.uv = shiftedPositions;
var widths_heights = new Vector2[4];
widths_heights[0].x = widths_heights[3].x = shiftedPositions[3].x;
widths_heights[1].x = widths_heights[2].x = shiftedPositions[2].x;
widths_heights[0].y = widths_heights[1].y = shiftedPositions[1].y;
widths_heights[2].y = widths_heights[3].y = shiftedPositions[2].y;
mesh.uv2 = widths_heights;
I recently managed to come up with a generic solution to this problem for any type of quadrilateral. The calculations and GLSL maybe of help. There's a working demo in java (that runs on Android), but is compact and readable and should be easily portable to unity or iOS: http://www.bitlush.com/posts/arbitrary-quadrilaterals-in-opengl-es-2-0
In case anyone's still interested, here's a C# implementation that takes a quad defined by the clockwise screen verts (x0,y0) (x1,y1) ... (x3,y3), an arbitrary pixel at (x,y) and calculates the u and v of that pixel. It was originally written to CPU-render an arbitrary quad to a texture, but it's easy enough to split the algorithm across CPU, Vertex and Pixel shaders; I've commented accordingly in the code.
float Ax, Bx, Cx, Dx, Ay, By, Cy, Dy, A, B, C;
//These are all uniforms for a given quad. Calculate on CPU.
Ax = (x3 - x0) - (x2 - x1);
Bx = (x0 - x1);
Cx = (x2 - x1);
Dx = x1;
Ay = (y3 - y0) - (y2 - y1);
By = (y0 - y1);
Cy = (y2 - y1);
Dy = y1;
float ByCx_plus_AyDx_minus_BxCy_minus_AxDy = (By * Cx) + (Ay * Dx) - (Bx * Cy) - (Ax * Dy);
float ByDx_minus_BxDy = (By * Dx) - (Bx * Dy);
A = (Ay*Cx)-(Ax*Cy);
//These must be calculated per-vertex, and passed through as interpolated values to the pixel-shader
B = (Ax * y) + ByCx_plus_AyDx_minus_BxCy_minus_AxDy - (Ay * x);
C = (Bx * y) + ByDx_minus_BxDy - (By * x);
//These must be calculated per-pixel using the interpolated B, C and x from the vertex shader along with some of the other uniforms.
u = ((-B) - Mathf.Sqrt((B*B-(4.0f*A*C))))/(A*2.0f);
v = (x - (u * Cx) - Dx)/((u*Ax)+Bx);
Tessellation solves this problem. Subdividing quad vertex adds hints to interpolate pixels.
Check out this link.
https://www.youtube.com/watch?v=8TleepxIORU&feature=youtu.be
I had similar question ( https://gamedev.stackexchange.com/questions/174857/mapping-a-texture-to-a-2d-quadrilateral/174871 ) , and at gamedev they suggested using imaginary Z coord, which I calculate using the following C code, which appears to be working in general case (not just trapezoids):
//usual euclidean distance
float distance(int ax, int ay, int bx, int by) {
int x = ax-bx;
int y = ay-by;
return sqrtf((float)(x*x + y*y));
}
void gfx_quad(gfx_t *dst //destination texture, we are rendering into
,gfx_t *src //source texture
,int *quad // quadrilateral vertices
)
{
int *v = quad; //quad vertices
float z = 20.0;
float top = distance(v[0],v[1],v[2],v[3]); //top
float bot = distance(v[4],v[5],v[6],v[7]); //bottom
float lft = distance(v[0],v[1],v[4],v[5]); //left
float rgt = distance(v[2],v[3],v[6],v[7]); //right
// By default all vertices lie on the screen plane
float az = 1.0;
float bz = 1.0;
float cz = 1.0;
float dz = 1.0;
// Move Z from screen, if based on distance ratios.
if (top<bot) {
az *= top/bot;
bz *= top/bot;
} else {
cz *= bot/top;
dz *= bot/top;
}
if (lft<rgt) {
az *= lft/rgt;
cz *= lft/rgt;
} else {
bz *= rgt/lft;
dz *= rgt/lft;
}
// draw our quad as two textured triangles
gfx_textured(dst, src
, v[0],v[1],az, v[2],v[3],bz, v[4],v[5],cz
, 0.0,0.0, 1.0,0.0, 0.0,1.0);
gfx_textured(dst, src
, v[2],v[3],bz, v[4],v[5],cz, v[6],v[7],dz
, 1.0,0.0, 0.0,1.0, 1.0,1.0);
}
I'm doing it in software to scale and rotate 2d sprites, and for OpenGL 3d app you will need to do it in pixel/fragment shader, unless you will be able to map these imaginary az,bz,cz,dz into your actual 3d space and use the usual pipeline. DMGregory gave exact code for OpenGL shaders: https://gamedev.stackexchange.com/questions/148082/how-can-i-fix-zig-zagging-uv-mapping-artifacts-on-a-generated-mesh-that-tapers
I came up with this issue as I was trying to implement a homography warping in OpenGL. Some of the solutions that I found relied on a notion of depth, but this was not feasible in my case since I am working on 2D coordinates.
I based my solution on this article, and it seems to work for all cases that I could try. I am leaving it here in case it is useful for someone else as I could not find something similar. The solution makes the following assumptions:
The vertex coordinates are the 4 points of a quad in Lower Right, Upper Right, Upper Left, Lower Left order.
The coordinates are given in OpenGL's reference system (range [-1, 1], with origin at bottom left corner).
std::vector<cv::Point2f> points;
// Convert points to homogeneous coordinates to simplify the problem.
Eigen::Vector3f p0(points[0].x, points[0].y, 1);
Eigen::Vector3f p1(points[1].x, points[1].y, 1);
Eigen::Vector3f p2(points[2].x, points[2].y, 1);
Eigen::Vector3f p3(points[3].x, points[3].y, 1);
// Compute the intersection point between the lines described by opposite vertices using cross products. Normalization is only required at the end.
// See https://leimao.github.io/blog/2D-Line-Mathematics-Homogeneous-Coordinates/ for a quick summary of this approach.
auto line1 = p2.cross(p0);
auto line2 = p3.cross(p1);
auto intersection = line1.cross(line2);
intersection = intersection / intersection(2);
// Compute distance to each point.
for (const auto &pt : points) {
auto distance = std::sqrt(std::pow(pt.x - intersection(0), 2) +
std::pow(pt.y - intersection(1), 2));
distances.push_back(distance);
}
// Assumes same order as above.
std::vector<cv::Point2f> texture_coords_unnormalized = {
{1.0f, 1.0f},
{1.0f, 0.0f},
{0.0f, 0.0f},
{0.0f, 1.0f}
};
std::vector<float> texture_coords;
for (int i = 0; i < texture_coords_unnormalized.size(); ++i) {
float u_i = texture_coords_unnormalized[i].x;
float v_i = texture_coords_unnormalized[i].y;
float d_i = distances.at(i);
float d_i_2 = distances.at((i + 2) % 4);
float scale = (d_i + d_i_2) / d_i_2;
texture_coords.push_back(u_i*scale);
texture_coords.push_back(v_i*scale);
texture_coords.push_back(scale);
}
Pass the texture coordinates to your shader (use vec3). Then:
gl_FragColor = vec4(texture2D(textureSampler, textureCoords.xy/textureCoords.z).rgb, 1.0);
thanks for answers, but after experimenting i found a solution.
two triangles on the left has uv (strq) according this and two triangles on the right are modifed version of this perspective correction.
Numbers and shader:
tri1 = [Vec2(-0.5, -1), Vec2(0.5, -1), Vec2(1, 1)]
tri2 = [Vec2(-0.5, -1), Vec2(1, 1), Vec2(-1, 1)]
d1 = length of top edge = 2
d2 = length of bottom edge = 1
tri1_uv = [Vec4(0, 0, 0, d2 / d1), Vec4(d2 / d1, 0, 0, d2 / d1), Vec4(1, 1, 0, 1)]
tri2_uv = [Vec4(0, 0, 0, d2 / d1), Vec4(1, 1, 0, 1), Vec4(0, 1, 0, 1)]
only right triangles are rendered using this glsl shader (on left is fixed pipeline):
void main()
{
gl_FragColor = texture2D(colormap, vec2(gl_TexCoord[0].x / glTexCoord[0].w, gl_TexCoord[0].y);
}
so.. only U is perspective and V is linear.
I am experiment kinect on winrt for metro app.
I am trying to obtain angle at the elbow.
normally i will do the following
Vector3D handLeftVector = new Vector3D(HandLeftX, HandLeftY, HandLeftZ);
handLeftVector.Normalize();
Vector3D ElbowLeftEVector = new Vector3D(ElbowLeftX, ElbowLeftY, ElbowLeftZ);
ElbowLeftEVector.Normalize();
Vector3D ShoulderLeftVector = new Vector3D(ShoulderLeftX, ShoulderLeftY, ShoulderLeftZ);
ShoulderLeftVector.Normalize();
Vector3D leftElbowV1 = ShoulderLeftVector - ElbowLeftEVector;
Vector3D leftElbowV2 = handLeftVector - ElbowLeftEVector;
double leftElbowAngle = Vector3D.AngleBetween(leftElbowV1, leftElbowV2);
However Vector3D object isn't available in winrt.
I had decided to replicate the Vector3D method as below. However the result doesn't seem to be as expected. Did I make a mistake anywhere?
double leftElbowV1X = ShoulderLeftX - ElbowLeftX;
double leftElbowV1Y = ShoulderLeftY - ElbowLeftY;
double leftElbowV1Z = ShoulderLeftZ - ElbowLeftZ;
double leftElbowV2X = handLeftX - ElbowLeftX;
double leftElbowV2Y = handLeftY - ElbowLeftY;
double leftElbowV2Z = handLeftZ - ElbowLeftZ;
double product = leftElbowV1X * leftElbowV2X + leftElbowV1Y * leftElbowV2Y + leftElbowV1Z * leftElbowV2Z;
double magnitudeA = Math.Sqrt(Math.Pow(leftElbowV1X, 2) + Math.Pow(leftElbowV1Y, 2) + Math.Pow(leftElbowV1Z, 2));
double magnitudeB = Math.Sqrt(Math.Pow(leftElbowV2X, 2) + Math.Pow(leftElbowV2Y, 2) + Math.Pow(leftElbowV2Z, 2));
magnitudeA = Math.Abs(magnitudeA);
magnitudeB = Math.Abs(magnitudeB);
double cosDelta = product / (magnitudeA * magnitudeB);
double angle = Math.Acos(cosDelta) *180.0 / Math.P;
And is there a need to normalize it?
i had managed to resolve it, however i am thinking if there is a more efficient way of doing.
Not sure if this helps but this is some old angle code I use, return in degrees:
float AngleBetween(Vector3 from, Vector3 dest) {
float len = from.magnitude * dest.magnitude;
if(len < Mathf.Epsilon) len = Mathf.Epsilon;
float f = Vector3.Dot(from,dest) / len;
if(f>1.0f)f=1.0f;
else if ( f < -1.0f) f = -1.0f;
return Mathf.Acos(f) * 180.0f / (float)Math.PI;
}
It's obviously using API specific syntax but I think the method is clear.