Arduino Nano IoT cannot read onboard IMU and analogRead() in the same loop - arduino

#include <Arduino_LSM6DS3.h>
#include <avr/dtostrf.h>
void setup()
{
Serial.begin(9600);
while (!Serial); // wait for Serial
if (!IMU.begin()) {
Serial.println("Failed to initialize IMU!");
while (1);
}
Serial.println ("Starting");
}
void loop()
{
char msg[100]="";
char msg2[100]="";
char buffer[8];
char buffer2[8];
readIMU(msg,buffer);
readFlex(msg2, buffer2);
Serial.println(msg);
Serial.println(msg2);
delay(1000);
}
void readIMU(char message[100], char buffer[8])
{
float x,y,z,x1,y1,z1;
if (IMU.gyroscopeAvailable()) {
IMU.readGyroscope(x, y, z);
dtostrf(x, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
dtostrf(y, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
dtostrf(z, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
}
if (IMU.accelerationAvailable()){
IMU.readAcceleration(x1, y1, z1);
dtostrf(x1, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
dtostrf(y1, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
dtostrf(z1, 4, 2, buffer);
strcat(message, buffer);
strcat(message, ", ");
}
}
void readFlex(char message[100], char buffer[8])
{
int value = analogRead(1);
ltoa(value,buffer, 10);
strcat(message, buffer);
strcat(message, ", ");
delay(10);
value = analogRead(2);
ltoa(value,buffer, 10);
strcat(message, buffer);
delay(10);
strcat(message, ", ");
delay(10);
value = analogRead(3);
ltoa(value,buffer, 10);
strcat(message, buffer);
strcat(message, ", ");
delay(10);
value =0;
value = analogRead(4);
ltoa(value,buffer, 10);
strcat(message, buffer);
strcat(message, ", ");
delay(10);
value =0;
value = analogRead(5);
ltoa(value,buffer, 10);
strcat(message, buffer);
strcat(message, "\0");
delay(10);
}
If I comment out the either the readIMU() or readFlex(), the other works fine. But if I uncomment both function, the readIMU() seems to not read value anymore and keep print the same value every loop like below with the exception of the first loop() . The only exception that it work is that if I only read A1 and A2, issue occur if it reads A3-A5 as well. I only put in the post the IMU reading. Am I missing anything here?

Related

Firebase Realtime Database not showing anything with ESP32 CAM

I'm trying to send images from my ESP32 Cam to my Firebase Realtime Database, this is my .ino code
const char* ssid = "xxx";
const char* password = "xxx";
String FIREBASE_HOST = "xxx";
String FIREBASE_AUTH = "xxx";
#include "FirebaseESP32.h"
FirebaseData firebaseData;
#include <WiFi.h>
#include "soc/soc.h"
#include "soc/rtc_cntl_reg.h"
#include "Base64.h"
#include "esp_camera.h"
#define CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22
void setup() {
WRITE_PERI_REG(RTC_CNTL_BROWN_OUT_REG, 0);
Serial.begin(115200);
WiFi.begin(ssid, password);
long int StartTime=millis();
while (WiFi.status() != WL_CONNECTED) {
delay(500);
if ((StartTime+10000) < millis()) break;
}
if (WiFi.status() == WL_CONNECTED) {
char* apssid = "ESP32-CAM";
char* appassword = "12345678";
WiFi.softAP((WiFi.localIP().toString()+"_"+(String)apssid).c_str(), appassword);
}
else {
return;
}
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.pixel_format = PIXFORMAT_JPEG;
if(psramFound()){
config.frame_size = FRAMESIZE_UXGA;
config.jpeg_quality = 10;
config.fb_count = 2;
} else {
config.frame_size = FRAMESIZE_SVGA;
config.jpeg_quality = 12;
config.fb_count = 1;
}
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
delay(1000);
ESP.restart();
}
sensor_t * s = esp_camera_sensor_get();
s->set_framesize(s, FRAMESIZE_CIF);
Firebase.begin(FIREBASE_HOST, FIREBASE_AUTH);
Firebase.reconnectWiFi(true);
Firebase.setMaxRetry(firebaseData, 3);
Firebase.setMaxErrorQueue(firebaseData, 30);
Firebase.enableClassicRequest(firebaseData, true);
String jsonData = "{\"photo\":\"" + Photo2Base64() + "\"}";
String photoPath = "/esp32-cam";
Serial.println(Photo2Base64());
Firebase.setString(firebaseData, "/esp32-cam", Photo2Base64());
}
void loop() {
delay(10000);
}
String Photo2Base64() {
camera_fb_t * fb = NULL;
fb = esp_camera_fb_get();
if(!fb) {
return "";
}
String imageFile = "data:image/jpeg;base64,";
char *input = (char *)fb->buf;
char output[base64_enc_len(3)];
for (int i=0;i<fb->len;i++) {
base64_encode(output, (input++), 3);
if (i%3==0) imageFile += urlencode(String(output));
}
esp_camera_fb_return(fb);
return imageFile;
}
String urlencode(String str)
{
String encodedString="";
char c;
char code0;
char code1;
char code2;
for (int i =0; i < str.length(); i++){
c=str.charAt(i);
if (c == ' '){
encodedString+= '+';
} else if (isalnum(c)){
encodedString+=c;
} else{
code1=(c & 0xf)+'0';
if ((c & 0xf) >9){
code1=(c & 0xf) - 10 + 'A';
}
c=(c>>4)&0xf;
code0=c+'0';
if (c > 9){
code0=c - 10 + 'A';
}
code2='\0';
encodedString+='%';
encodedString+=code0;
encodedString+=code1;
}
yield();
}
return encodedString;
}
The serial monitor shows the Base64 image, even putting into a browser, the image appears...
Serial Monitor
But does not appear in the Real Time Database...
Problema de Firebase
When I compile the code, there is no error, can you guys help me?

Pro trinket EEPROM read and write

(I am using Arduino IDE)
The Thing I am trying to achieve is it saving the "progress" even when the microcontroller turns off. I have tried for a while tweaking the code but I have yet to get it to actually save/read values from EEPROM properly, so here's the part of my code(Ignore the rest for running the display+i have removed all of the irrelevant code):
#include <EEPROM.h>
int hunger = 10000;
int happiness = 10000;
int y = 0;
int z = 0;
int sensorPin = A0;
int sensorPin2 = A1;
void setup() {
Serial.begin(9600);
EEPROM.write(5, 10000);
EEPROM.write(5, 10000);
hunger = EEPROM.read(8)- '0';
happiness = EEPROM.read(5)- '0';
display.begin(SSD1306_SWITCHCAPVCC, 0x3C);
display.display();
display.clearDisplay();
display.display();
Serial.begin(9600);
pinMode(sensorPin,INPUT);
}
void loop() {
int value = happiness;
int value2 = hunger;
EEPROM.write(5, happiness);
EEPROM.write(8, hunger);
hunger -= 1;
happiness -= 1;
if(analogRead(sensorPin)>1020){
testhappy();
happiness+=2500;
}
else{
testsad();
}
while(analogRead(sensorPin2)>1020){
testeat();
hunger+=5000;
}
}
void testhappy(void){
display.setTextSize(2);
display.setTextColor(WHITE);
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap(30, 32, Happy32_glcd_bmp, 32, 32, 1);
display.display();
delay(1);
display.display();
delay(1);
for(y=0;y<2;y++){
for(z=0;z<5;z++){
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap((30+z), 32, Happy32_glcd_bmp, 32, 32, 1);
display.display();
}
for(z=5;z!=0;z-=1){
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap((30+z), 32, Happy32_glcd_bmp, 32, 32, 1);
display.display();
}
for(z=0;z> -5;z-=1){
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap((30+z), 32, Happy32_glcd_bmp, 32, 32, 1);
display.display();
}
for(z=0-5;z!=0;z+=1){
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap((30+z), 32, Happy32_glcd_bmp, 32, 32, 1);
display.display();
}
}
display.stopscroll();
}
void testsad(void){
display.setTextSize(2);
display.setTextColor(WHITE);
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap(30, 32, Mild32_glcd_bmp, 32, 32, 1);
display.display();
delay(1);
}
void testeat(void){
delay(200);
for(y=0;y<6;y++){
display.setTextSize(2);
display.setTextColor(WHITE);
display.setCursor(0,0);
display.clearDisplay();
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap(30, 32, Eat32_glcd_bmp, 32, 32, 1);
display.drawBitmap(60, 38, Cookie18_glcd_bmp, 8, 8, 1);
display.display();
delay(200);
display.clearDisplay();
display.setCursor(0,0);
display.print(hunger/100);display.print("/");display.print(happiness/100);
display.drawBitmap(60, 38, Cookie28_glcd_bmp, 8, 8, 1);
display.drawBitmap(30, 32, Mild32_glcd_bmp, 32, 32, 1);
display.display();
delay(200);
}
}
void testdrawbitmap(const uint8_t *bitmap, uint8_t w, uint8_t h) {
uint8_t icons[NUMFLAKES][3];
// initialize
for (uint8_t f=0; f< NUMFLAKES; f++) {
icons[f][XPOS] = random(display.width());
icons[f][YPOS] = 0;
icons[f][DELTAY] = random(5) + 1;
Serial.print("x: ");
Serial.print(icons[f][XPOS], DEC);
Serial.print(" y: ");
Serial.print(icons[f][YPOS], DEC);
Serial.print(" dy: ");
Serial.print(icons[f][DELTAY], DEC);
}
while (1) {
// draw each icon
for (uint8_t f=0; f< NUMFLAKES; f++) {
display.drawBitmap(icons[f][XPOS], icons[f][YPOS], bitmap, w, h, WHITE);
}
display.display();
delay(200);
// then erase it + move it
for (uint8_t f=0; f< NUMFLAKES; f++) {
display.drawBitmap(icons[f][XPOS], icons[f][YPOS], bitmap, w, h, BLACK);
// move it
icons[f][YPOS] += icons[f][DELTAY];
// if its gone, reinit
if (icons[f][YPOS] > display.height()) {
icons[f][XPOS] = random(display.width());
icons[f][YPOS] = 0;
icons[f][DELTAY] = random(5) + 1;
}
}
}
}
(I am getting no errors when compiling the code)
Look here at the page for EEPROM.write here. https://www.arduino.cc/en/Reference/EEPROMWrite
There it says the arguments are defined as:
EEPROM.write(address, value)
Parameters address: the location to write to, starting from 0 (int)
value: the value to write, from 0 to 255 (byte)
Now look at the number you're trying to write into EEPROM:
EEPROM.write(5, 10000);
Hopefully now you can see what the problem is. Your second argument is far too large. You should break it into two bytes or use a different method like EEPROM.put (look that up) which doesn't suffer this limitation.

Arduino multiple pin ADC read

Trying to read the value's from two analog sensors.
Can you use the for loop to read analog pins?
int i;
for (i = 0; i < 2; i = i + 1) {
x[I]=(analogRead(A[i]);
Working Code:
int AnalogpIn[2];
int MapValue[2];
void setup() {
Serial.begin(9600);
}
void loop() {
delay(100);
AnalogpIn[0] = analogRead(A0);
MapValue[0] = map(AnalogpIn[0], 0, 1023, 0, 255);
delay(100);
AnalogpIn[1] = analogRead(A1);
MapValue[1] = map(AnalogpIn[1], 0, 1023, 0, 255);
int i;
for (i = 0; i < 2; i = i + 1) {
Serial.print(AnalogpIn[i]);
Serial.print("-");
Serial.print(MapValue[i]);
Serial.println("");
delay (100);
}
}
Try this out,
I think this is what you are looking for. Have made few changes.
void loop(){
int i=0;
for (int i = 0; i < 2; i++) {
AnalogpIn[i] = analogRead(i);
MapValue[i] = map(AnalogpIn[i], 0, 1023, 0, 255);
Serial.print(AnalogpIn[i]);
Serial.print("-");
Serial.print(MapValue[i]);
Serial.println("");
}
delay (100);
}

For loop in OpenCl kernel rolling through global memory float array

I feel I don't understand a basic parallel programming concept. The kernel below is a simple/contrived example that reproduces the problem I'm having. It attempts to use all the values in "points" to calculate a value and assign it to all of the items in "blocks." I want to push the limits for the size of these arrays. While I can make the "blocks" array quit large (>100 million floats), I get an "invalid command queue" error when "points" is filled with more than ~100 thousand floats (after calling clFinish immediately after clEnqueueNDRangeKernel). Could any of you help me understand why?
__kernel void openClTesting (__global float *blocks, __global float *points, int pointsCount)
{
int globalId = get_global_id(0);
int count = 0;
for (int i = 0; i < pointsCount; i++)
{
count++;
}
blocks[globalId] = count;
};
Some Device Info:
CL_DEVICE_LOCAL_MEM_SIZE = 49,152
CL_DEVICE_GLOBAL_MEM_SIZE = 2,147,483,648
CL_DEVICE_MAX_MEM_ALLOC_SIZE = 536,870,912
Host Code:
#include "stdafx.h"
#include "CL\opencl.h"
#include <iostream>
#include <fstream>
#include <string>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#define NUM_POINTS 100000
#define NUM_BLOCKS 100000000
struct openClData
{
cl_device_id deviceId = NULL;
cl_uint numDevices;
cl_uint numPlatforms;
cl_int ret;
cl_platform_id *platforms = NULL;
cl_context context;
cl_command_queue commandQueue;
cl_program program;
cl_kernel kernel;
char* kernelCode;
cl_uint kernelCodeSize;
size_t globalItemSize;
size_t localItemSize = 1;
};
char* getKernelCode();
void printErrorLog(openClData oclData);
void printRet(openClData oclData, int line);
int countFileChars(const char *fileName);
int _tmain(int argc, _TCHAR* argv[])
{
openClData oclData;
oclData.globalItemSize = NUM_POINTS;
oclData.kernelCode = getKernelCode();
std::cout << oclData.kernelCode << std::endl;
oclData.kernelCodeSize = strlen(oclData.kernelCode);
int numPoints = NUM_POINTS;
int numBlocks = NUM_BLOCKS;
cl_long localMemSize = 0, globalMemSize = 0, maxAllocMemSize = 0;
float *blocks = new float[numBlocks]{0};
float *points = new float[numPoints]{0};
//prepare platform, device, context and command queue
oclData.ret = clGetPlatformIDs(0, NULL, &oclData.numPlatforms);
printRet(oclData, __LINE__);
oclData.platforms = (cl_platform_id *)malloc(oclData.numPlatforms * sizeof(cl_platform_id));
oclData.ret = clGetPlatformIDs(oclData.numPlatforms, oclData.platforms, NULL);
printRet(oclData, __LINE__);
oclData.ret = clGetDeviceIDs(oclData.platforms[0], CL_DEVICE_TYPE_GPU, 1, &oclData.deviceId, &oclData.numDevices);
printRet(oclData, __LINE__);
oclData.context = clCreateContext(NULL, 1, &oclData.deviceId, NULL, NULL, &oclData.ret);
printRet(oclData, __LINE__);
oclData.commandQueue = clCreateCommandQueue(oclData.context, oclData.deviceId, 0, &oclData.ret);
printRet(oclData, __LINE__);
//prepare cl_mem objects
cl_mem memObjBlocks = clCreateBuffer(oclData.context, CL_MEM_READ_WRITE, sizeof(float) * numBlocks, NULL, &oclData.ret);
printRet(oclData, __LINE__);
cl_mem memObjPoints = clCreateBuffer(oclData.context, CL_MEM_READ_WRITE, sizeof(float) * numPoints, NULL, &oclData.ret);
printRet(oclData, __LINE__);
oclData.ret = clEnqueueWriteBuffer(oclData.commandQueue, memObjBlocks, CL_TRUE, 0, sizeof(float) * numBlocks, blocks, 0, NULL, NULL);
printRet(oclData, __LINE__);
oclData.ret = clEnqueueWriteBuffer(oclData.commandQueue, memObjPoints, CL_TRUE, 0, sizeof(float) * numPoints, points, 0, NULL, NULL);
printRet(oclData, __LINE__);
//prepare program
oclData.program = clCreateProgramWithSource(oclData.context, 1, (const char**)&oclData.kernelCode, (const size_t *)&oclData.kernelCodeSize, &oclData.ret);
printRet(oclData, __LINE__);
oclData.ret = clBuildProgram(oclData.program, 1, &oclData.deviceId, NULL, NULL, NULL);
printRet(oclData, __LINE__);
if (oclData.ret == CL_BUILD_PROGRAM_FAILURE) printErrorLog(oclData);
oclData.kernel = clCreateKernel(oclData.program, "openClTesting", &oclData.ret);
printRet(oclData, __LINE__);
//set arguments
oclData.ret = clSetKernelArg(oclData.kernel, 0, sizeof(cl_mem), &memObjBlocks);
printRet(oclData, __LINE__);
oclData.ret = clSetKernelArg(oclData.kernel, 1, sizeof(cl_mem), &memObjPoints);
printRet(oclData, __LINE__);
oclData.ret = clSetKernelArg(oclData.kernel, 2, sizeof(int), &numPoints);
printRet(oclData, __LINE__);
//run
oclData.ret = clEnqueueNDRangeKernel(oclData.commandQueue, oclData.kernel, 1, NULL, &oclData.globalItemSize, &oclData.localItemSize, 0, NULL, NULL);
printRet(oclData, __LINE__);
oclData.ret = clFinish(oclData.commandQueue);
printRet(oclData, __LINE__);
oclData.ret = clEnqueueReadBuffer(oclData.commandQueue, memObjBlocks, CL_TRUE, 0, sizeof(float) * numBlocks, blocks, 0, NULL, NULL);
printRet(oclData, __LINE__);
oclData.ret = clFinish(oclData.commandQueue);
printRet(oclData, __LINE__);
//print some device info
oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemSize, 0);
std::cout << "CL_DEVICE_LOCAL_MEM_SIZE = " << localMemSize << '\n';
oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_long), &globalMemSize, 0);
std::cout << "CL_DEVICE_GLOBAL_MEM_SIZE = " << globalMemSize << '\n';
oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_long), &maxAllocMemSize, 0);
std::cout << "CL_DEVICE_MAX_MEM_ALLOC_SIZE = " << maxAllocMemSize << '\n';
//clean up
oclData.ret = clFlush(oclData.commandQueue);
printRet(oclData, __LINE__);
oclData.ret = clFinish(oclData.commandQueue);
printRet(oclData, __LINE__);
oclData.ret = clReleaseKernel(oclData.kernel);
printRet(oclData, __LINE__);
oclData.ret = clReleaseProgram(oclData.program);
printRet(oclData, __LINE__);
oclData.ret = clReleaseMemObject(memObjBlocks);
printRet(oclData, __LINE__);
oclData.ret = clReleaseMemObject(memObjPoints);
printRet(oclData, __LINE__);
oclData.ret = clReleaseCommandQueue(oclData.commandQueue);
printRet(oclData, __LINE__);
oclData.ret = clReleaseContext(oclData.context);
printRet(oclData, __LINE__);
for (size_t i = 0; i < 10; i++)
{
std::cout << blocks[i] << std::endl;
}
delete blocks;
delete points;
return 0;
}
char* getKernelCode()
{
char* kernelCode =
"__kernel void openClTesting (__global float *blocks, __global float *points, int pointsCount)"
"{"
" int globalId = get_global_id(0);"
" int count = 0;"
" for (int i = 0; i < pointsCount; i++)"
" {"
" count++;"
" }"
"blocks[globalId] = count;"
"}";
return kernelCode;
}
void printErrorLog(openClData oclData)
{
size_t log_size;
clGetProgramBuildInfo(oclData.program, oclData.deviceId, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *)malloc(log_size);
clGetProgramBuildInfo(oclData.program, oclData.deviceId, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
std::cout << log;
free(log);
}
void printRet(openClData oclData, int line)
{
std::cout << line << ", " << oclData.ret << std::endl;
}
int countFileChars(const char *fileName)
{
std::ifstream ifs(fileName);
ifs.seekg(0, std::ios_base::end);
size_t count = ifs.tellg();
ifs.seekg(0, std::ios_base::beg);
return count;
}
A few things I notice:
You're launching NUM_POINTS work-items, but write the result of each to blocks[globalId] - which has NUM_BLOCKS items. So that's undefined behaviour when NUM_POINTS is greater than NUM_BLOCKS. It also explains why varying NUM_BLOCKS does nothing (outside of the above restriction): aside from the memory allocation, the value of NUM_BLOCKS has no effect. (And the memory allocation limit you found roughly matches the CL_DEVICE_MAX_MEM_ALLOC_SIZE value for your implementation.)
You might be running into a kernel timeout condition here. 100000 loop iterations in a single work-item is quite a lot. Depending on the OpenCL implementation, kernels can be killed off if they take too long to run. Consider making better use of the thread-parallelism available, and split the work more horizontally across work-items, rather than looping. Many, shortish-running work-items are typically better than few long-running ones.
On a general note, localItemSize = 1; should be avoided, because it forces every OpenCL work group to consist of a single work item which will reduce you parallelism to the number of work groups that your compute device can run in parallel, which will be much less than the number of work items it can run. You can simply pass NULL for the local item size instead to have the OpenCL implementation figure out a reasonable value in its own:
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, NULL, 0, NULL, NULL);
This may also be the source of your error because you are creating NUM_POINTS work groups but the size of the queue on the device is memory limited (CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE).

OpenCL error 48 when launching kernel

Continuing with my OpenCL adventure, this is what I have till now from my CUDA implementation. I was trying to check if at least the first kernel call was working but I got error 48 and have no idea what am I missing. I was following the example in this page
KERNEL
__kernel
void clut_distributePixels(__global int *pixelGroup, int c_rows, int c_cols, int c_numColors){
int x = get_global_id(0);
int y = get_global_id(1);
if (x >= c_cols || y >= c_rows) return;
int index = y * c_cols + x;
pixelGroup[index] = index/c_numColors;
}
Read Kernel from file
char *file_contents(const char *filename, int *length){
FILE *f = fopen(filename, "r");
void *buffer;
if (!f) {
fprintf(stderr, "Unable to open %s for reading\n", filename);
return NULL;
}
fseek(f, 0, SEEK_END);
*length = ftell(f);
fseek(f, 0, SEEK_SET);
buffer = malloc(*length+1);
*length = fread(buffer, 1, *length, f);
fclose(f);
((char*)buffer)[*length] = '\0';
return (char*)buffer;
}
CODE
#include <iostream>
#include <OpenCL/OpenCL.h>
#include "Utilities.hpp"
int main(int argc, const char * argv[]){
if (argc < 3) {
std::cout << "Use: {GPU|CPU} nColors" << std::endl;
return 1;
}
/************************************************
HOST SIDE INITIALIZATION
************************************************/
int h_numColors = atoi(argv[2]);
Color *h_image;
int h_rows, h_cols;
if (readText2RGB("LenaOriginal.txt", &h_image, &h_rows, &h_cols) != SUCCESS){
return 1;
}
int *h_pixelGroup = new int[h_rows*h_cols];
Color *h_groupRep = new Color[h_numColors];
Color *h_clutImage = new Color[h_rows*h_cols];
int h_change = 0;
/************************************************
PLATFORM AND DEVICE SETUP
************************************************/
cl_int errorStatus;
//Use the first platform
cl_platform_id platform;
errorStatus = clGetPlatformIDs(1, &platform, NULL);
//Use the first device that matches the type selected
cl_device_id device;
if (strcmp(argv[1], "CPU")){
errorStatus = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
}else if (strcmp(argv[1], "GPU")){
errorStatus = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
}else{
std::cout << "Unknown device type. Choose either CPU or GPU" << std::endl;
return 1;
}
//Define context properties and create context
cl_context_properties contextProps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};
cl_context context = clCreateContext(contextProps, 1, &device, NULL, NULL, &errorStatus);
//Create the command queue
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &errorStatus);
/************************************************
DEVICE VARIABLE SETUP
************************************************/
cl_mem d_image;
cl_mem d_pixelGroup;
cl_mem d_groupRep;
cl_mem d_clutImage;
cl_mem d_change;
d_image = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(Color)*h_rows*h_cols, h_image, &errorStatus);
d_pixelGroup = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*h_rows*h_cols, NULL, &errorStatus);
d_groupRep = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Color)*h_numColors, NULL, &errorStatus);
d_clutImage = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Color)*h_rows*h_cols, NULL, &errorStatus);
d_change = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int), NULL, &errorStatus);
/************************************************
CREATE, COMPILE PROGRAM and CREATE KERNEL
************************************************/
int pl;
size_t sourceLength;
char * sourceCode = file_contents("vectorQuantization.cl", &pl);
sourceLength = (size_t)pl;
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&sourceCode, &sourceLength, &errorStatus);
errorStatus = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
cl_kernel k_clut_distributePixels = clCreateKernel(program, "clut_distributePixels", &errorStatus);
errorStatus = clSetKernelArg(k_clut_distributePixels, 0, sizeof(cl_mem), (void*)&d_pixelGroup);
errorStatus = clSetKernelArg(k_clut_distributePixels, 1, sizeof(cl_mem), (void*)&h_rows);
errorStatus = clSetKernelArg(k_clut_distributePixels, 2, sizeof(cl_mem), (void*)&h_cols);
errorStatus = clSetKernelArg(k_clut_distributePixels, 3, sizeof(cl_mem), (void*)&h_numColors);
cl_kernel k_clut_checkDistances = clCreateKernel(program, "clut_checkDistances", &errorStatus);
errorStatus = clSetKernelArg(k_clut_checkDistances, 0, sizeof(cl_mem), (void*)&d_image);
errorStatus = clSetKernelArg(k_clut_checkDistances, 1, sizeof(cl_mem), (void*)&d_pixelGroup);
errorStatus = clSetKernelArg(k_clut_checkDistances, 2, sizeof(cl_mem), (void*)&d_groupRep);
errorStatus = clSetKernelArg(k_clut_checkDistances, 3, sizeof(cl_mem), (void*)&h_rows);
errorStatus = clSetKernelArg(k_clut_checkDistances, 4, sizeof(cl_mem), (void*)&h_cols);
errorStatus = clSetKernelArg(k_clut_checkDistances, 5, sizeof(cl_mem), (void*)&h_numColors);
errorStatus = clSetKernelArg(k_clut_checkDistances, 6, sizeof(cl_mem), (void*)&d_change);
cl_kernel k_clut_createImage = clCreateKernel(program, "clut_createImage", &errorStatus);
errorStatus = clSetKernelArg(k_clut_createImage, 0, sizeof(cl_mem), (void*)&d_clutImage);
errorStatus = clSetKernelArg(k_clut_createImage, 1, sizeof(cl_mem), (void*)&d_pixelGroup);
errorStatus = clSetKernelArg(k_clut_createImage, 2, sizeof(cl_mem), (void*)&d_groupRep);
errorStatus = clSetKernelArg(k_clut_createImage, 3, sizeof(cl_mem), (void*)&h_rows);
errorStatus = clSetKernelArg(k_clut_createImage, 4, sizeof(cl_mem), (void*)&h_cols);
/************************************************
EXECUTE PROGRAM AND GET RESULTS
************************************************/
/*STEP 1: evenly distribute pixels among the colors in the CLUT */
size_t grid[2] = {static_cast<size_t>(h_rows), static_cast<size_t>(h_cols)};
errorStatus = clEnqueueNDRangeKernel(queue, k_clut_distributePixels, 2, NULL, grid, NULL, 0, NULL, NULL);
clFinish(queue);
/*********/
/* ERROR */
/*********/
errorStatus = clEnqueueReadBuffer(queue, d_pixelGroup, CL_TRUE, 0, sizeof(int)*h_rows*h_cols, h_pixelGroup, 0, NULL, NULL);
std::cout << h_pixelGroup[7] << ", " << h_pixelGroup[8] << ", " << h_pixelGroup[9] << ", " << h_pixelGroup[10] << std::endl;
//do {
/*STEP 2: compute reprenstative */
/*STEP 3: compute distances and reassign pixel to group */
//copyFromConstantMemory
//} while (h_change != 0);
std::cout << "Done !!" << std::endl;
return 0;
}
I found my error. First of all Always check return values when you are learning new stuff. I just remember that from when I was learning CUDA, so with this simple macro I started checking everything
#define CL_SUCCESS_OR_RETURN(code) do { \
assert(code == CL_SUCCESS); \
if (code != CL_SUCCESS) { return code; } \
}while (0);
And the error was at the very beginning when I check if it is CPU or GPU. I forgot that strcmp returns 0 when the strings are equal. After fixing this, all worked beautifully !!
Anyways, if you have any other suggestion or advise or you see something ugly or not a best practice in the code please comment.

Resources