Libavcodec mpeg4 avi encoding framerate and timebase problem - frame-rate

I try to generate a video with a timebase more precise than the 1/fps (camera frame rate are not constant). But the generated AVI does not seem to take into account the framerate that I indicate.
#include <iostream>
extern "C" {
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
}
#pragma comment(lib,"avcodec.lib")
#pragma comment(lib,"avformat.lib")
constexpr int TIMEFACTOR = 10;
static void encodeFrame(AVCodecContext *avctx, AVFormatContext *ctx, AVFrame* frame)
{
int ret = avcodec_send_frame(avctx, frame);
AVPacket packet;
av_init_packet(&packet);
ret = 0;
while (ret >= 0) {
ret = avcodec_receive_packet(avctx, &packet);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return; // nothing to write
};
//packet.pts = (frame) ? frame->pts : packet.pts;
av_packet_rescale_ts(&packet, avctx->time_base, ctx->streams[0]->time_base);
packet.duration = TIMEFACTOR;
av_interleaved_write_frame(ctx, &packet);
av_packet_unref(&packet);
}
}
static void fill_yuv_frame(AVFrame* frame, int width, int height, int frameId)
{
// Y
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + frameId * 3;
}
}
// Cb and Cr
for (int y = 0; y < height / 2; y++) {
for (int x = 0; x < width / 2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + frameId * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + frameId * 5;
}
}
}
int main(int argc, char** argv)
{
const char filename[] = "output.avi";
const char encoder[] = "mpeg4";
constexpr int WIDTH = 640;
constexpr int HEIGHT = 480;
av_log_set_level(AV_LOG_DEBUG);
//delete file because libavcodec reload file
remove(filename);
AVFormatContext* ofmtCtx;
int ret = avformat_alloc_output_context2(&ofmtCtx, NULL, "avi", filename);
AVCodec* avcodec = avcodec_find_encoder_by_name(encoder);
AVCodecContext* avctx = avcodec_alloc_context3(avcodec);
avctx->width = WIDTH ;
avctx->height = HEIGHT ;
avctx->sample_aspect_ratio = { 1, 1 };
avctx->pix_fmt = AV_PIX_FMT_YUV420P; //Do not work with other type
avctx->codec_id = AV_CODEC_ID_MPEG4;
avctx->bit_rate = 4 * 1000 * 1000;
avctx->time_base = av_make_q(1, 25 * TIMEFACTOR);
avctx->framerate = av_make_q(25, 1);
avctx->ticks_per_frame = TIMEFACTOR;
avctx->gop_size = 10;
avctx->max_b_frames = 1;
AVStream* m_outStream = avformat_new_stream(ofmtCtx, NULL);
ret = avcodec_open2(avctx, avcodec, NULL);
ret = avcodec_parameters_from_context(m_outStream->codecpar, avctx);
m_outStream->time_base = avctx->time_base;
m_outStream->r_frame_rate = avctx->framerate;
m_outStream->avg_frame_rate = avctx->framerate;
ret = avio_open(&(ofmtCtx->pb),
filename,
AVIO_FLAG_WRITE);
ret = avformat_write_header(ofmtCtx, NULL);
av_dump_format(ofmtCtx, 0, filename, 1);
AVFrame * avframe = av_frame_alloc();
avframe->format = avctx->pix_fmt;
avframe->width = avctx->width;
avframe->height = avctx->height;
ret = av_frame_get_buffer(avframe, 0);
ret = av_frame_make_writable(avframe);
for (int i = 0; i < 25; ++i) {
fflush(stdout);
fill_yuv_frame(avframe, avctx->width, avctx->height, i);
avframe->pts = i * TIMEFACTOR;
encodeFrame(avctx, ofmtCtx, avframe);
}
encodeFrame(avctx, ofmtCtx, NULL);
av_write_trailer(ofmtCtx);
return 0;
}
And this is dump output :
[mpeg4 # 000002AA64FA1880] intra_quant_bias = 0 inter_quant_bias = -64
[file # 000002AA64F69AC0] Setting default whitelist 'file,crypto'
[avi # 000002AA64F73400] reserve_index_space:0 master_index_max_size:256
[avi # 000002AA64F73400] duration_est:36000.000, filesize_est:18.4GiB, master_index_max_size:256
Output #0, avi, to 'output.avi':
Metadata:
ISFT : Lavf58.12.100
Stream #0:0, 0, 1/250: Video: mpeg4, 1 reference frame (FMP4 / 0x34504D46), yuv420p, 640x480 (0x0) [SAR 1:1 DAR 4:3], 0/1, q=2-31, 4000 kb/s, 25 fps, 25 tbr, 250 tbn
But if I open the video in ffmpeg command line fps is 250, and video content 250 frames, with many dropframe.

Related

OpenCL generate SHA-256 hash

I need help with OpenCL.
The task is as follows:
There is an input parameter of type string. It is necessary to generate a SHA-256 hash using the resources of the video card.
It is necessary to create a cycle to select a hash. Each time add some postfix to the original string.
Result*Hash should start with 5 zeros "00000 ...".
For example, the entrance. parameter: "strela".
SHA-256: "7d7ceecdee08ea1c0ac46b27657a79395af36526b3214b59a92f8351ccf8f762"
Next, you need to add a postfix. For example, "strela1"
Here the hash will be: a2afd15651f44f19f3e4e216bf3ead22d5f5937e9f9dc250382ff1f764ba219f
then continue to add the postfix until the resulting hash begins to start with "00000.."
It is necessary to use all the cores of the video card, i.e. use parallelization. Each core will use its postfix.
As soon as some kernel computes the hash we need, interrupt all calculations on the cores and display the hash we need.
Source:
main.cpp
#define _CRT_SECURE_NO_WARNINGS
#include "sha256.h"
#include <stdio.h>
#include < string.h >
void crypt_and_print(char input[])
{
char result[65];
char diff[65] = "00000";
char *istr;
char buffer2[20];
int temp;
char str2[20];
for (int i = 0; i < 1; i++)
{
char string[] = "1qqq";
sprintf(buffer2, "%d", i);
temp = 8 - strlen(buffer2);
str2[0] = '\0';
while (strlen(str2) != temp)
strcat(str2, "0");
strcat(str2, buffer2);
strcat(string, str2);
sha256_crypt(string, result);
istr = strstr(result, diff);
if (istr != NULL) {
printf(istr);
break;
}
}
}
int main()
{
char result[65];
sha256_init(2048);
crypt_and_print((char*)"");
}
sha256.c
#define _CRT_SECURE_NO_WARNINGS
#include "sha256.h"
static cl_platform_id platform_id = NULL;
static cl_device_id device_id = NULL;
static cl_uint ret_num_devices;
static cl_uint ret_num_platforms;
static cl_context context;
static cl_int ret;
static char* source_str;
static size_t source_size;
static cl_program program;
static cl_kernel kernel;
static cl_command_queue command_queue;
static cl_mem pinned_saved_keys, pinned_partial_hashes, buffer_out, buffer_keys, data_info;
static cl_uint *partial_hashes;
static cl_uint *res_hashes;
static char *saved_plain;
static unsigned int datai[3];
static int have_full_hashes;
static size_t kpc = 4;
static size_t global_work_size=3;
static size_t local_work_size=1;
static size_t string_len;
void load_source();
void createDevice();
void createkernel();
void create_clobj();
void crypt_all();
void sha256_init(size_t user_kpc)
{
kpc = user_kpc;
load_source();
createDevice();
createkernel();
create_clobj();
}
void sha256_crypt(char input[], char* output)
{
int i;
string_len = strlen(input);
global_work_size = 3;
datai[0] = SHA256_PLAINTEXT_LENGTH;
datai[1] = global_work_size;
datai[2] = string_len;
memcpy(saved_plain, input, string_len+1);
crypt_all();
for(i=0; i<SHA256_RESULT_SIZE; i++)
{
sprintf(output+i*8,"%08x", partial_hashes[i]);
}
printf("'%s':\n%s\n", input, output);
}
void crypt_all()
{
//printf("%s\n",saved_plain);
ret = clEnqueueWriteBuffer(command_queue, data_info, CL_TRUE, 0, sizeof(unsigned int) * 3, datai, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, buffer_keys, CL_TRUE, 0, SHA256_PLAINTEXT_LENGTH * kpc, saved_plain, 0, NULL, NULL);
// printf("%s\n",buffer_keys);
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
ret = clFinish(command_queue);
// read back partial hashes
ret = clEnqueueReadBuffer(command_queue, buffer_out, CL_TRUE, 0, sizeof(cl_uint) * SHA256_RESULT_SIZE, partial_hashes, 0, NULL, NULL);
have_full_hashes = 0;
}
void load_source()
{
FILE *fp;
fp = fopen("/sha256.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
}
void create_clobj(){
pinned_saved_keys = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, (SHA256_PLAINTEXT_LENGTH)*kpc, NULL, &ret);
saved_plain = (char*)clEnqueueMapBuffer(command_queue, pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, (SHA256_PLAINTEXT_LENGTH)*kpc, 0, NULL, NULL, &ret);
memset(saved_plain, 0, SHA256_PLAINTEXT_LENGTH * kpc);
res_hashes = (cl_uint *)malloc(sizeof(cl_uint) * SHA256_RESULT_SIZE);
memset(res_hashes, 0, sizeof(cl_uint) * SHA256_RESULT_SIZE);
pinned_partial_hashes = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint) * SHA256_RESULT_SIZE, NULL, &ret);
partial_hashes = (cl_uint *) clEnqueueMapBuffer(command_queue, pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint) * SHA256_RESULT_SIZE, 0, NULL, NULL, &ret);
memset(partial_hashes, 0, sizeof(cl_uint) * SHA256_RESULT_SIZE);
buffer_keys = clCreateBuffer(context, CL_MEM_READ_ONLY, (SHA256_PLAINTEXT_LENGTH) * kpc, NULL, &ret);
buffer_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * SHA256_RESULT_SIZE, NULL, &ret);
data_info = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned int) * 3, NULL, &ret);
clSetKernelArg(kernel, 0, sizeof(data_info), (void *) &data_info);
clSetKernelArg(kernel, 1, sizeof(buffer_keys), (void *) &buffer_keys);
clSetKernelArg(kernel, 2, sizeof(buffer_out), (void *) &buffer_out);
}
void createDevice()
{
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
}
void createkernel()
{
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, "sha256_crypt_kernel", &ret);
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
}
sha256.cl
#ifndef uint32_t
#define uint32_t unsigned int
#endif
#define H0 0x6a09e667
#define H1 0xbb67ae85
#define H2 0x3c6ef372
#define H3 0xa54ff53a
#define H4 0x510e527f
#define H5 0x9b05688c
#define H6 0x1f83d9ab
#define H7 0x5be0cd19
uint rotr(uint x, int n) {
if (n < 32) return (x >> n) | (x << (32 - n));
return x;
}
uint ch(uint x, uint y, uint z) {
return (x & y) ^ (~x & z);
}
uint maj(uint x, uint y, uint z) {
return (x & y) ^ (x & z) ^ (y & z);
}
uint sigma0(uint x) {
return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22);
}
uint sigma1(uint x) {
return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25);
}
uint gamma0(uint x) {
return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3);
}
uint gamma1(uint x) {
return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10);
}
__kernel void sha256_crypt_kernel(__global uint *data_info,__global char *plain_key, __global uint *digest){
int t, gid, msg_pad;
int stop, mmod;
uint i, ulen, item, total;
uint W[80], temp, A,B,C,D,E,F,G,H,T1,T2;
uint num_keys = data_info[1];
int current_pad;
//printf(get_global_id(0));
uint K[64]={
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
msg_pad=0;
ulen = data_info[2];
total = ulen%64>=56?2:1 + ulen/64;
//printf("ulen: %u total:%u\n", ulen, total);
digest[0] = H0;
digest[1] = H1;
digest[2] = H2;
digest[3] = H3;
digest[4] = H4;
digest[5] = H5;
digest[6] = H6;
digest[7] = H7;
for(item=0; item<total; item++)
{
A = digest[0];
B = digest[1];
C = digest[2];
D = digest[3];
E = digest[4];
F = digest[5];
G = digest[6];
H = digest[7];
#pragma unroll
for (t = 0; t < 80; t++){
W[t] = 0x00000000;
}
msg_pad=item*64;
if(ulen > msg_pad)
{
current_pad = (ulen-msg_pad)>64?64:(ulen-msg_pad);
}
else
{
current_pad =-1;
}
// printf("current_pad: %d\n",current_pad);
if(current_pad>0)
{
i=current_pad;
stop = i/4;
// printf("i:%d, stop: %d msg_pad:%d\n",i,stop, msg_pad);
for (t = 0 ; t < stop+get_global_id(0) ; t++){
W[t] = ((uchar) plain_key[msg_pad + t * 4]) << 24;
W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 2]) << 8;
W[t] |= (uchar) plain_key[msg_pad + t * 4 + 3];
// printf("W[%u]: %u\n",t,W[t]);
}
mmod = i % 4;
if ( mmod == 3){
W[t] = ((uchar) plain_key[msg_pad + t * 4]) << 24;
W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 2]) << 8;
W[t] |= ((uchar) 0x80) ;
} else if (mmod == 2) {
W[t] = ((uchar) plain_key[msg_pad + t * 4]) << 24;
W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
W[t] |= 0x8000 ;
} else if (mmod == 1) {
W[t] = ((uchar) plain_key[msg_pad + t * 4]) << 24;
W[t] |= 0x800000 ;
} else /*if (mmod == 0)*/ {
W[t] = 0x80000000 ;
}
if (current_pad<56)
{
W[15] = ulen*8 ;
// printf("ulen avlue 2 :w[15] :%u\n", W[15]);
}
}
else if(current_pad <0)
{
if( ulen%64==0)
W[0]=0x80000000;
W[15]=ulen*8;
//printf("ulen avlue 3 :w[15] :%u\n", W[15]);
}
for (t = 0; t < 64; t++) {
if (t >= 16)
W[t] = gamma1(W[t - 2]) + W[t - 7] + gamma0(W[t - 15]) + W[t - 16];
T1 = H + sigma1(E) + ch(E, F, G) + K[t] + W[t];
T2 = sigma0(A) + maj(A, B, C);
H = G; G = F; F = E; E = D + T1; D = C; C = B; B = A; A = T1 + T2;
}
digest[0] += A;
digest[1] += B;
digest[2] += C;
digest[3] += D;
digest[4] += E;
digest[5] += F;
digest[6] += G;
digest[7] += H;
}
printf("hi");
}
How can i use here paralelism (all GPU cores) to calculate needed hash code?
Is it real to do task like this using OPENCL ?

msp430f2618 fading LED using pwm

I'm trying to write code in C to fade an off-board LED using PWM and the MSP430f2618. I can get the LED to turn on but it stays at full intensity. I am trying to read in an array of frequency values and fade the LED based on the frequency values.
int main(void)
{
WDTCTL = WDTPW | WDTHOLD; // Stop watchdog timer
int array_size = 0, i = 0, delay = 0;
double frequency[50] = {0.0};
array_size = sizeof(frequency);
frequency [0] = 60.0;
for (i = 1; i < array_size; i++)
{
if (frequency[i - 1] < 61)
{
frequency[i] = frequency[i-1] + 0.1;
}
else
{
frequency[i] = 60.0;
}
}
P4OUT &= 0;
P4DIR |= (BIT1 + BIT2); //P4.1 and P4.2 output
P4SEL &= ~(BIT1 + BIT2); //P4.1 and P4.2 TBx options, timer select
TBCCR0 = 512-1;
TBCCTL1 = OUTMOD_7;
TBCCTL2 = OUTMOD_7;
for (i = 0; i < array_size; i++)
{
P4OUT &= 0;
if ((frequency[i] < 60.2) && (frequency[i] >=60.0))
{
//TBCCR1 = 3200;
TBCCR1 = 384;
}
else if ((frequency[i] < 60.4) && (frequency[i] >=60.2))
{
//TBCCR1 = 2560;
TBCCR1 = 256;
}
else if ((frequency[i] < 60.6) && (frequency[i] >=60.4))
{
//TBCCR1 = 1920;
TBCCR1 = 128;
}
else if ((frequency[i] < 60.8) && (frequency[i] >=60.6))
{
//TBCCR1 = 1280;
TBCCR1 = 64;
}
else if ((frequency[i] < 61) && (frequency[i] >=60.8))
{
//TBCCR1 = 640;
TBCCR1 = 32;
}
else
{
TBCCR2 = 512;
}
P4OUT ^= BIT1;
for (delay = 0; delay < 32000; delay++);
}
TBCTL = TBSSEL_2 + MC_1; // ACLK, up mode
__bis_SR_register(LPM0_bits); // Enter LPM3
return 0;
}
The timer is not running until you start it by setting the MC field. That initialization must be done at the beginning.

.bss will not fit into region ram

I've been working on an LED wall and came into a RAM issue. Basically I am using teensy 3.0 and trying to load the following script, however, the script errors out with .bss will not fit into region 'RAM'
Please help! Any information would be greatly appreciated! Thanks!
/*
Nike NFL draft LED wall program
OctoWS2811 BasicTest.ino - Basic RGB LED Test
http://www.pjrc.com/teensy/td_libs_OctoWS2811.html
Copyright (c) 2013 Paul Stoffregen, PJRC.COM, LLC
*/
#include <OctoWS2811.h>
const int ledsPerStrip = 290;
DMAMEM int displayMemory[ledsPerStrip*6];
int drawingMemory[ledsPerStrip*6];
const int config = WS2811_GRB | WS2811_800kHz;
OctoWS2811 leds(ledsPerStrip, displayMemory, drawingMemory, config);
#define ORANGE 0xE05800
#define WHITE 0xFFFFFF
#define BLACK 0x000000
#define BLACK2 0x1E1E1E
void setup() {
leds.begin();
leds.show();
}
static int widths[] = { 30, 30, 50, 90, 40, 60 };
static int speeds[] = { 5, 5, 10, 16, 11, 13 };
static int locations[] = { 0, 0, 0, 0, 0, 0 };
static int counter = 0;
//static int location = 0;
static boolean reverse = false;
int blend(int source , float alpha) {
int source_r = (source >> 16);
int source_g = ((source >> 8) & 0x00FF);
int source_b = (source & 0x0000FF);
source_r = source_r * alpha;
source_g = source_g * alpha;
source_b = source_b * alpha;
return source_b | (source_g << 8) | (source_r << 16);
}
void loop() {
int microsec = 2000000 / leds.numPixels(); // change them all in 2 seconds
int location;
int offset;
int width;
int current;
int min;
int color;
float alpha = 0.95; // Set brightness of head
int head_width = 3; // Set width of head
delay(20);
int i;
for(i = 0; i < 6; ++i) {
location = locations[i];
width = widths[i];
color = 0xFFFFFF;
offset = i*ledsPerStrip;
location = location + speeds[i];
if(location > ledsPerStrip + width) {
location = 0;
}
locations[i] = location;
if(location < width) {
current = location;
min = 0;
} else if(location >= width) {
current = location;
min = location - width + 1;
}
for(current; current >= 0; --current) {
if(current >= min) {
if(current < ledsPerStrip) {
if(!reverse) {
leds.setPixel(current + offset, color);
} else {
leds.setPixel((ledsPerStrip - current) + offset, color);
}
}
if(current < (location - head_width)) {
color = blend(color, alpha);
}
} else {
if(!reverse) {
leds.setPixel(current + offset, BLACK);
} else {
leds.setPixel((ledsPerStrip - current) + offset, BLACK);
}
}
}
}
leds.show();
counter++;
}
And the returned error:
This report would have more information with
"Show verbose output during compilation"
enabled in File > Preferences.
Arduino: 1.0.5 (Windows 7), Board: "Teensy 3.0"
c:/program files/arduino/hardware/tools/arm-none-eabi/bin/../lib/gcc/arm-none-eabi/4.7.2/../../../../arm-none-eabi/bin/ld.exe: Nike_NFL_Program.cpp.elf section .bss' will not fit in regionRAM'
c:/program files/arduino/hardware/tools/arm-none-eabi/bin/../lib/gcc/arm-none-eabi/4.7.2/../../../../arm-none-eabi/bin/ld.exe: region `RAM' overflowed by 1028 bytes
collect2.exe: error: ld returned 1 exit status
Thanks!
Your simply running out of memory. sizing it down from 6 to 5 compiles. Note that the 3.0 has 16384 of SRAM. Each multiple of consumes a large chunk of the limited 16384 of SRAM.
I think you should put the code of static or variable to DRAM.
Such as the code you supply.
From:
const int ledsPerStrip = 290;
DMAMEM int displayMemory[ledsPerStrip*6];
int drawingMemory[ledsPerStrip*6];
const int config = WS2811_GRB | WS2811_800kHz;
static int widths[] = { 30, 30, 50, 90, 40, 60 };
static int speeds[] = { 5, 5, 10, 16, 11, 13 };
static int locations[] = { 0, 0, 0, 0, 0, 0 };
static int counter = 0;
To:
#include "link_defs.h"
__SECTION(dram.rodata) const int ledsPerStrip = 290;
__SECTION(dram.bss) DMAMEM int displayMemory[ledsPerStrip*6];
__SECTION(dram.bss) int drawingMemory[ledsPerStrip*6];
__SECTION(dram.rodata) const int config = WS2811_GRB | WS2811_800kHz;
__SECTION(dram.data) static int widths[] = { 30, 30, 50, 90, 40, 60 };
__SECTION(dram.data) static int speeds[] = { 5, 5, 10, 16, 11, 13 };
__SECTION(dram.data) static int locations[] = { 0, 0, 0, 0, 0, 0 };
__SECTION(dram.data) static int counter = 0;

How do you represent an image in OpenCL

I have sample code but it completely leaves out what my (void*)should_be!
I setup a cl_image_desc, cl_image_format, buffer, origin, and region:
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = width;
desc.image_height = height;
desc.image_depth = 0;
desc.image_array_size = 0;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = NULL;
cl_image_format format;
format.image_channel_order = CL_R;
format.image_channel_data_type = CL_FLOAT;
cl_mem bufferSourceImage = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, NULL);
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height,1};
In this next snippet sourceImage is a void pointer to my image. But what is my image? For every pixel there are r, g, b, a, x, and y values.
clEnqueueWriteImage(queue, bufferSourceImage, CL_TRUE, origin, region, 0, 0, sourceImage, 0, NULL, NULL);
How do I turn my image (a bunch of (r,g,b,a,x,y)'s) into a suitable array?
This is the kernel they provide:
__kernel void convolution(__read_only image2d_t sourceImage, __write_only image2d_t outputImage, int rows, int cols, __constant float* filter, int filterWidth, sampler_t sampler)
{
int column = get_global_id(0);
int row = get_global_id(1);
int halfWidth = (int)(filterWidth/2);
float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};
int filterIdx = 0;
int2 coords;
for(int i = -halfWidth; i <= halfWidth; i++)
{
coords.y = row + i;
for(int i2 = -halfWidth; i2 <= halfWidth; i2++)
{
coords.x = column + i2;
float4 pixel;
pixel = read_imagef(sourceImage, sampler, coords);
sum.x += pixel.x * filter[filterIdx++];
}
}
if(myRow < rows && myCol < cols)
{
coords.x = column;
coords.y = row;
write_imagef(outputImage, coords, sum);
}
}
Set up the cl_image_format as you like and then you just have to follow that format what you selected. Currently your channel (R, G, B, A) data should be represented as "single precision floating-point value" - image_channel_data_type = CL_FLOAT, and you can take only one channel of those and feed it into the expected R channel (image_channel_order = CL_R).
Your kernel expect float:
float4 pixel;
pixel = read_imagef(sourceImage, sampler, coords);

OpenCL RGB->HSL and back

Ive been through every resource and cant fix my problem.
My host code calls the rgb2hsl kernel, then calls the hsl2rgb kernel. I should end up with the same image that I started with, but I do not. My new image hue is off in certain areas.
The red areas should not be there.
Here is the screen shot of what happens:
Here is the original picture
Here is the code:
#define E .0000001f
bool fEqual(float x, float y)
{
return (x+E > y && x-E < y);
}
__kernel void rgb2hsl(__global float *values, int numValues)
{
// thread index and total
int idx = get_global_id(0);
int idxVec3 = idx*3;
float3 gMem;
if (idx < numValues)
{
gMem.x = values[idxVec3];
gMem.y = values[idxVec3+1];
gMem.z = values[idxVec3+2];
}
barrier(CLK_LOCAL_MEM_FENCE);
gMem /= 255.0f; //convert from 256 color to float
//calculate chroma
float M = max(gMem.x, gMem.y);
M = max(M, gMem.z);
float m = min(gMem.x, gMem.y);
m = min(m, gMem.z);
float chroma = M-m; //calculate chroma
float lightness = (M+m)/2.0f;
float saturation = chroma/(1.0f-fabs(2.0f*lightness-1.0f));
float hue = 0;
if (fEqual(gMem.x, M))
hue = (int)((gMem.y - gMem.z)/chroma) % 6;
if (fEqual(gMem.y, M))
hue = (((gMem.z - gMem.x))/chroma) + 2;
if (fEqual(gMem.z, M))
hue = (((gMem.x - gMem.y))/chroma) + 4;
hue *= 60.0f;
barrier(CLK_LOCAL_MEM_FENCE);
if (idx < numValues)
{
values[idxVec3] = hue;
values[idxVec3+1] = saturation;
values[idxVec3+2] = lightness;
}
}
__kernel void hsl2rgb(__global float *values, int numValues)
{
// thread index and total
int idx = get_global_id(0);
int idxVec3 = idx*3;
float3 gMem;
if (idx < numValues)
{
gMem.x = values[idxVec3];
gMem.y = values[idxVec3+1];
gMem.z = values[idxVec3+2];
}
barrier(CLK_LOCAL_MEM_FENCE);
float3 rgb = (float3)(0,0,0);
//calculate chroma
float chroma = (1.0f - fabs( (float)(2.0f*gMem.z - 1.0f) )) * gMem.y;
float H = gMem.x/60.0f;
float x = chroma * (1.0f - fabs( fmod(H, 2.0f) - 1.0f ));
switch((int)H)
{
case 0:
rgb = (float3)(chroma, x, 0);
break;
case 1:
rgb = (float3)(x, chroma, 0);
break;
case 2:
rgb = (float3)(0, chroma, x);
break;
case 3:
rgb = (float3)(0, x, chroma);
break;
case 4:
rgb = (float3)(x, 0, chroma);
break;
case 5:
rgb = (float3)(chroma, 0, x);
break;
default:
rgb = (float3)(0, 0, 0);
}
barrier(CLK_LOCAL_MEM_FENCE);
rgb += gMem.z - .5f*chroma;
rgb *= 255;
if (idx < numValues)
{
values[idxVec3] = rgb.x;
values[idxVec3+1] = rgb.y;
values[idxVec3+2] = rgb.z;
}
}
The problem was this line:
hue = (int)((gMem.y - gMem.z)/chroma) % 6;
It should be
hue = fmod((gMem.y - gMem.z)/chroma, 6.0f);
I did some more changes to remove artifacts:
#define E .0000001f
bool fEqual(float x, float y)
{
return (x+E > y && x-E < y);
}
__kernel void rgb2hsl(__global float *values, int numValues)
{
// thread index and total
int idx = get_global_id(0);
int idxVec3 = idx*3;
float3 gMem;
if (idx < numValues)
{
gMem.x = values[idxVec3];
gMem.y = values[idxVec3+1];
gMem.z = values[idxVec3+2];
}
barrier(CLK_LOCAL_MEM_FENCE);
gMem /= 255.0f; //convert from 256 color to float
//calculate chroma
float M = max(gMem.x, gMem.y);
M = max(M, gMem.z);
float m = min(gMem.x, gMem.y);
m = min(m, gMem.z);
float chroma = M-m; //calculate chroma
float lightness = (M+m)/2.0f;
float saturation = chroma/(1.0f-fabs(2.0f*lightness-1.0f));
float hue = 0;
if (fEqual(gMem.x, M))
hue = fmod((gMem.y - gMem.z)/chroma, 6.0f);
if (fEqual(gMem.y, M))
hue = (((gMem.z - gMem.x))/chroma) + 2;
if (fEqual(gMem.z, M))
hue = (((gMem.x - gMem.y))/chroma) + 4;
hue *= 60.0f;
barrier(CLK_LOCAL_MEM_FENCE);
if (M == m)
hue = saturation = 0;
barrier(CLK_GLOBAL_MEM_FENCE);
if (idx < numValues)
{
//NOTE: ARTIFACTS SHOW UP if we do not cast to integer!
values[idxVec3] = (int)hue;
values[idxVec3+1] = saturation;
values[idxVec3+2] = lightness;
}
}

Resources