How to enlarge bus width using Vitis libraries BLAS - opencl

I am trying to use the BLAS L1 implementation from Vitis libraries, I want to set bus width
to 128 bit, I am doing it using the ap_int.h header, defining an ap_int<128> structure.
I implemented an OpenCl kernel with VITIS HLS module, but the returned result is wrong (it works well for int32_t* type as input vector)
#include "ap_int.h"
#include <hls_stream.h>
#include "xf_blas.hpp"
using namespace xf::blas;
extern "C" {
void min_kernel(ap_int<128>* inVec,
int* resultIndex, // index of the minimal item in the vector
int p_n)
{
#pragma HLS INTERFACE m_axi port = inVec offset = slave bundle = gmem
#pragma HLS INTERFACE m_axi port = resultIndex offset = slave bundle = gmem
#pragma HLS INTERFACE s_axilite port = inVec bundle = control
#pragma HLS INTERFACE s_axilite port = resultIndex bundle = control
#pragma HLS INTERFACE s_axilite port = p_n bundle = control
#pragma HLS INTERFACE s_axilite port = return bundle = control
int res;
hls::stream<WideType<ap_int<128>, 1 << 2> > l_str;
#pragma HLS data_pack variable = l_str
#pragma HLS DATAFLOW
readVec2Stream<ap_int<128>, 1 << 2>(inVec, p_n, l_str);
amin<ap_int<128>, 2, int>(p_n, l_str, res);
*resultIndex = res;
}
}
Is there another way to set the bus width or define the parallely processed entries - in the amin function?

Related

Question about Config in Perf with Intel PT

Like perf wiki says(https://perf.wiki.kernel.org/index.php/Perf_tools_support_for_Intel%C2%AE_Processor_Trace#Reducing_and_handling_the_massive_amount_of_trace_data)
The Intel PT kernel driver creates a new PMU for Intel PT. PMU
events are selected by providing the PMU name followed by the
"config" separated by slashes. An enhancement has been made to
allow default "config" e.g. the option
-e intel_pt//
will use a default config value. Currently that is the same as
-e intel_pt/tsc,noretcomp=0/
which is the same as
-e intel_pt/tsc=1,noretcomp=0/
In the source code,
static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) {
char buf[256];
int mtc, mtc_periods = 0, mtc_period;
int psb_cyc, psb_periods, psb_period;
int pos = 0;
u64 config;
...
return config;
}
Which inital config for perf event.
Now I want to modify the config for my own code. How could I modify the config setting by knowing which bit means?
Like others'code
default config int perf_event_attr for Intel PT event*/
static __u64 pt_default_config() {
int mtc, mtc_periods = 0, mtc_period;
int psb_cyc, psb_periods, psb_period;
__u64 config = 0;
char c;
/* pt bit */
config |= 1;
/* tsc */
config |= (1 << 10);
/* disable ret compress */
config |= (1 << 11);
Does there has any mannual show the mapping like 1 << 10 means tsc enable?

ESP-IDF NimBLE Control the Relay and send the relay status to Client

I am so new in BLE and I found a code from github for send and receive ble data.
My question how can I convert the printf("Data from the client: %.*s\n", ctxt->om->om_len, ctxt->om->om_data);receive BLE data.
Its mean ı want to create a pointer for stored this value. Its need to be int or a thing of the hex value.
Thank you for your help.
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include "esp_wifi.h"
#include "esp_system.h"
#include "esp_event.h"
#include "nvs_flash.h"
#include "driver/gpio.h"
#include <stdio.h>
#include "esp_log.h"
#include "esp_nimble_hci.h"
#include "nimble/nimble_port.h"
#include "nimble/nimble_port_freertos.h"
#include "host/ble_hs.h"
#include "services/gap/ble_svc_gap.h"
#include "services/gatt/ble_svc_gatt.h"
#include "sdkconfig.h"
#include "string.h"
char *TAG = "BLE-Server";
uint8_t ble_addr_type;
void ble_app_advertise(void);
// Write data to ESP32 defined as server
static int device_write(uint16_t conn_handle, uint16_t attr_handle, struct ble_gatt_access_ctxt *ctxt, void *arg)
{
printf("Data from the client: %.*s\n", ctxt->om->om_len, ctxt->om->om_data);
return 0;
}
// Read data from ESP32 defined as server
static int device_read(uint16_t con_handle, uint16_t attr_handle, struct ble_gatt_access_ctxt *ctxt, void *arg)
{
os_mbuf_append(ctxt->om, "Data from the server", strlen("Data from the server"));
return 0;
}
// Array of pointers to other service definitions
// UUID - Universal Unique Identifier
static const struct ble_gatt_svc_def gatt_svcs[] = {
{.type = BLE_GATT_SVC_TYPE_PRIMARY,
.uuid = BLE_UUID16_DECLARE(0x180), // Define UUID for device type
.characteristics = (struct ble_gatt_chr_def[]){
{.uuid = BLE_UUID16_DECLARE(0xFEF4), // Define UUID for reading
.flags = BLE_GATT_CHR_F_READ,
.access_cb = device_read},
{.uuid = BLE_UUID16_DECLARE(0xDEAD), // Define UUID for writing
.flags = BLE_GATT_CHR_F_WRITE,
.access_cb = device_write},
{0}}},
{0}};
// BLE event handling
static int ble_gap_event(struct ble_gap_event *event, void *arg)
{
switch (event->type)
{
// Advertise if connected
case BLE_GAP_EVENT_CONNECT:
ESP_LOGI("GAP", "BLE GAP EVENT CONNECT %s", event->connect.status == 0 ? "OK!" : "FAILED!");
if (event->connect.status != 0)
{
ble_app_advertise();
}
break;
// Advertise again after completion of the event
case BLE_GAP_EVENT_ADV_COMPLETE:
ESP_LOGI("GAP", "BLE GAP EVENT");
ble_app_advertise();
break;
default:
break;
}
return 0;
}
// Define the BLE connection
void ble_app_advertise(void)
{
// GAP - device name definition
struct ble_hs_adv_fields fields;
const char *device_name;
memset(&fields, 0, sizeof(fields));
device_name = ble_svc_gap_device_name(); // Read the BLE device name
fields.name = (uint8_t *)device_name;
fields.name_len = strlen(device_name);
fields.name_is_complete = 1;
ble_gap_adv_set_fields(&fields);
// GAP - device connectivity definition
struct ble_gap_adv_params adv_params;
memset(&adv_params, 0, sizeof(adv_params));
adv_params.conn_mode = BLE_GAP_CONN_MODE_UND; // connectable or non-connectable
adv_params.disc_mode = BLE_GAP_DISC_MODE_GEN; // discoverable or non-discoverable
ble_gap_adv_start(ble_addr_type, NULL, BLE_HS_FOREVER, &adv_params, ble_gap_event, NULL);
}
// The application
void ble_app_on_sync(void)
{
ble_hs_id_infer_auto(0, &ble_addr_type); // Determines the best address type automatically
ble_app_advertise(); // Define the BLE connection
}
// The infinite task
void host_task(void *param)
{
nimble_port_run(); // This function will return only when nimble_port_stop() is executed
}
void app_main(void)
{
nvs_flash_init(); // 1 - Initialize NVS flash using
esp_nimble_hci_and_controller_init(); // 2 - Initialize ESP controller
nimble_port_init(); // 3 - Initialize the host stack
ble_svc_gap_device_name_set("BLE-Server"); // 4 - Initialize NimBLE configuration - server name
ble_svc_gap_init(); // 4 - Initialize NimBLE configuration - gap service
ble_svc_gatt_init(); // 4 - Initialize NimBLE configuration - gatt service
ble_gatts_count_cfg(gatt_svcs); // 4 - Initialize NimBLE configuration - config gatt services
ble_gatts_add_svcs(gatt_svcs); // 4 - Initialize NimBLE configuration - queues gatt services.
ble_hs_cfg.sync_cb = ble_app_on_sync; // 5 - Initialize application
nimble_port_freertos_init(host_task); // 6 - Run the thread
gpio_set_direction(GPIO_NUM_4, GPIO_MODE_OUTPUT);
int level = 0;
while (true) {
gpio_set_level(GPIO_NUM_4, level);
level = !level;
vTaskDelay(300 / portTICK_PERIOD_MS);
}
}
You can just copy the data to an array of bytes/uint8_t and use it.
I think it’s not wise to use a pointer to this data, because it’s not static.
For example:
uint8_t incoming[36];
memcpy(incoming,ctxt->om->om_data,ctxt->om->om_len);
The Results
1st=50 2nd=51 3rd=52 4nd=0
Receive Data = 1234
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include "esp_wifi.h"
#include "esp_system.h"
#include "esp_event.h"
#include "nvs_flash.h"
#include "driver/gpio.h"
#include <stdio.h>
#include "esp_log.h"
#include "esp_nimble_hci.h"
#include "nimble/nimble_port.h"
#include "nimble/nimble_port_freertos.h"
#include "host/ble_hs.h"
#include "services/gap/ble_svc_gap.h"
#include "services/gatt/ble_svc_gatt.h"
#include "sdkconfig.h"
#include "string.h"
char *TAG = "BLE-Server";
uint8_t ble_addr_type;
uint8_t rx_data[4];
void ble_app_advertise(void);
// Write data to ESP32 defined as server
static int device_write(uint16_t conn_handle, uint16_t attr_handle, struct ble_gatt_access_ctxt *ctxt, void *arg)
{
printf("Data from the client: %.*s\n", ctxt->om->om_len, ctxt->om->om_data);
memcpy(rx_data,ctxt->om->om_data,ctxt->om->om_len);
return 0;
}
// Read data from ESP32 defined as server
static int device_read(uint16_t con_handle, uint16_t attr_handle, struct ble_gatt_access_ctxt *ctxt, void *arg)
{
os_mbuf_append(ctxt->om, "Data from the server", strlen("Data from the server"));
return 0;
}
// Array of pointers to other service definitions
// UUID - Universal Unique Identifier
static const struct ble_gatt_svc_def gatt_svcs[] = {
{.type = BLE_GATT_SVC_TYPE_PRIMARY,
.uuid = BLE_UUID16_DECLARE(0x180), // Define UUID for device type
.characteristics = (struct ble_gatt_chr_def[]){
{.uuid = BLE_UUID16_DECLARE(0xFEF4), // Define UUID for reading
.flags = BLE_GATT_CHR_F_READ,
.access_cb = device_read},
{.uuid = BLE_UUID16_DECLARE(0xDEAD), // Define UUID for writing
.flags = BLE_GATT_CHR_F_WRITE,
.access_cb = device_write},
{0}}},
{0}};
// BLE event handling
static int ble_gap_event(struct ble_gap_event *event, void *arg)
{
switch (event->type)
{
// Advertise if connected
case BLE_GAP_EVENT_CONNECT:
ESP_LOGI("GAP", "BLE GAP EVENT CONNECT %s", event->connect.status == 0 ? "OK!" : "FAILED!");
if (event->connect.status != 0)
{
ble_app_advertise();
}
break;
// Advertise again after completion of the event
case BLE_GAP_EVENT_ADV_COMPLETE:
ESP_LOGI("GAP", "BLE GAP EVENT");
ble_app_advertise();
break;
default:
break;
}
return 0;
}
// Define the BLE connection
void ble_app_advertise(void)
{
// GAP - device name definition
struct ble_hs_adv_fields fields;
const char *device_name;
memset(&fields, 0, sizeof(fields));
device_name = ble_svc_gap_device_name(); // Read the BLE device name
fields.name = (uint8_t *)device_name;
fields.name_len = strlen(device_name);
fields.name_is_complete = 1;
ble_gap_adv_set_fields(&fields);
// GAP - device connectivity definition
struct ble_gap_adv_params adv_params;
memset(&adv_params, 0, sizeof(adv_params));
adv_params.conn_mode = BLE_GAP_CONN_MODE_UND; // connectable or non-connectable
adv_params.disc_mode = BLE_GAP_DISC_MODE_GEN; // discoverable or non-discoverable
ble_gap_adv_start(ble_addr_type, NULL, BLE_HS_FOREVER, &adv_params, ble_gap_event, NULL);
}
// The application
void ble_app_on_sync(void)
{
ble_hs_id_infer_auto(0, &ble_addr_type); // Determines the best address type automatically
ble_app_advertise(); // Define the BLE connection
}
// The infinite task
void host_task(void *param)
{
nimble_port_run(); // This function will return only when nimble_port_stop() is executed
}
void app_main(void)
{
nvs_flash_init(); // 1 - Initialize NVS flash using
esp_nimble_hci_and_controller_init(); // 2 - Initialize ESP controller
nimble_port_init(); // 3 - Initialize the host stack
ble_svc_gap_device_name_set("BLE-Server"); // 4 - Initialize NimBLE configuration - server name
ble_svc_gap_init(); // 4 - Initialize NimBLE configuration - gap service
ble_svc_gatt_init(); // 4 - Initialize NimBLE configuration - gatt service
ble_gatts_count_cfg(gatt_svcs); // 4 - Initialize NimBLE configuration - config gatt services
ble_gatts_add_svcs(gatt_svcs); // 4 - Initialize NimBLE configuration - queues gatt services.
ble_hs_cfg.sync_cb = ble_app_on_sync; // 5 - Initialize application
nimble_port_freertos_init(host_task); // 6 - Run the thread
gpio_set_direction(GPIO_NUM_4, GPIO_MODE_OUTPUT);
int level = 0;
while (true) {
uint8_t ctc_data;
ctc_data=rx_data[2];
gpio_set_level(GPIO_NUM_4, level);
level = !level;
printf("1st=%u 2nd=%u 3rd=%u 4nd=%u\n",rx_data[1],ctc_data,rx_data[3],rx_data[4]);
printf("Receive Data = %.*s\n", 4, rx_data);
vTaskDelay(1000 / portTICK_PERIOD_MS);
}
}

Get list of available WiFi connections in Linux

I am try to get list of SSID in Fedora 31 Linux, by D-Bus message, using Qt5.
I am checking many tutorials, but still cant communicate by D-Bus, and I still do not understand differences between interface, path and service. With documentation help (https://developer.gnome.org/NetworkManager/stable/spec.html) and Internet I wrote:
QDBusInterface nm("org.freedesktop.NetworkManager", "/org/freedesktop/NetworkManager", "org.freedesktop.NetworkManager.Device.Wireless", QDBusConnection::systemBus());
if(nm.isValid()) {
QDBusMessage msg = nm.call("GetAllAccessPoints");
}
But variable "msg" receiving one argument:
"No interface „org.freedesktop.NetworkManager.Device.Wireless” in object at path /org/freedesktop/NetworkManager"
How can I connect to D-Bus ?
Your confusion is justified, as the process is not really intuitive. Basically what you need to do is to first create a QDBusInterface representing NetworkManager itself. Via that object you need to get the list of the network interfaces, iterate through them, filter out the WiFi interface(s), creating a corresponding QDBusInterface, instruct the interface to scan the available networks, and then request the list of visible access points. Then you get the SSID property of each Access Point object. Here is a simple example which demonstrates the process with plain Qt:
list_ssid.pro:
QT -= gui
QT += dbus
SOURCES += list_ssid.cpp
list_ssid.cpp:
#include <QtCore/QCoreApplication>
#include <QtCore/QDebug>
#include <QtCore/QStringList>
#include <QtDBus/QtDBus>
#include <QDebug>
#include <QThread>
int main(int argc, char **argv)
{
QCoreApplication app(argc, argv);
// get the interface to nm
QDBusInterface nm("org.freedesktop.NetworkManager", "/org/freedesktop/NetworkManager",
"org.freedesktop.NetworkManager", QDBusConnection::systemBus());
if(!nm.isValid())
{
qFatal("Failed to connect to the system bus");
}
// get all devices
QDBusMessage msg = nm.call("GetDevices");
qDebug() << "GetDevices reply: " << msg << endl;
QDBusArgument arg = msg.arguments().at(0).value<QDBusArgument>();
if(arg.currentType() != QDBusArgument::ArrayType)
{
qFatal("Something went wrong with getting the device list");
}
QList<QDBusObjectPath> pathsLst = qdbus_cast<QList<QDBusObjectPath> >(arg);
foreach(QDBusObjectPath p, pathsLst)
{
qDebug() << "DEV PATH: " << p.path();
// creating an interface used to gather this devices properties
QDBusInterface device("org.freedesktop.NetworkManager", p.path(),
"org.freedesktop.NetworkManager.Device", QDBusConnection::systemBus());
// 2 is WiFi dev, see https://people.freedesktop.org/~lkundrak/nm-docs/nm-dbus-types.html#NMDeviceType
if (device.property("DeviceType").toInt() != 2)
{
continue;
}
// we got a wifi device, let's get an according dbus interface
QDBusInterface wifi_device("org.freedesktop.NetworkManager", p.path(),
"org.freedesktop.NetworkManager.Device.Wireless", QDBusConnection::systemBus());
// we need to call scan on the inteface prior to request the list of interfaces
QMap<QString, QVariant> argList;
QDBusMessage msg = wifi_device.call("RequestScan", argList);
QThread::sleep(2); // not the best solution, but here we just wait for the scan
// doing the actual call
msg = wifi_device.call("GetAllAccessPoints");
qDebug()<< "Answer for GetAllAccessPoints: " << msg << endl << endl;
// dig out the paths of the Access Point objects:
QDBusArgument ap_list_arg = msg.arguments().at(0).value<QDBusArgument>();
QList<QDBusObjectPath> ap_path_list = qdbus_cast<QList<QDBusObjectPath> >(ap_list_arg);
// and iterate through the list
foreach(QDBusObjectPath p ,ap_path_list)
{
// for each Access Point we create an interface
QDBusInterface ap_interface("org.freedesktop.NetworkManager", p.path(),
"org.freedesktop.NetworkManager.AccessPoint", QDBusConnection::systemBus());
// and getting the name of the SSID
qDebug() << "SSID: " << ap_interface.property("Ssid").toString();
}
}
return 0;
}
The same using networkmanager-qt, for the sake of comparison:
CMakeLists.txt:
project(ssid_list LANGUAGES CXX)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOUIC ON)
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Qt5 REQUIRED COMPONENTS
Core
Gui
Network
DBus
)
find_package(KF5NetworkManagerQt REQUIRED)
add_executable(ssid_list
ssid_list.cpp
)
target_link_libraries(ssid_list Qt5::Core Qt5::DBus Qt5::Network KF5::NetworkManagerQt)
ssid_list.cpp
#include <arpa/inet.h>
#include <QThread>
#include <NetworkManagerQt/Manager>
#include <NetworkManagerQt/Device>
#include <NetworkManagerQt/WirelessDevice>
#include <NetworkManagerQt/AccessPoint>
int main()
{
// getting all of the devices, and iterate through them
NetworkManager::Device::List list = NetworkManager::networkInterfaces();
Q_FOREACH (NetworkManager::Device::Ptr dev, list)
{
if(dev->type() != NM_DEVICE_TYPE_WIFI)
{
//skipping non-wifi interfaces
continue;
}
// creating a Wifi device with this object path
NetworkManager::WirelessDevice wifi_dev(dev->uni());
wifi_dev.requestScan();
QThread::sleep(2); // still not the best solution:w
//get the Object Path of all the visible access points
// and iterate through
foreach(QString ap_path, wifi_dev.accessPoints())
{
// creating an AccessPoint object with this path
NetworkManager::AccessPoint ap(ap_path);
// and finally get the SSID
qDebug() << "SSID:" << ap.ssid();
}
}
}

Sending ZeroMQ data using TCP

I am trying to make a simple server.
A language I am restricted to use is c++.
I am using ZeroMQ.
I have creatred a simple server and a client, as in documentation.
ZeroMQ uses TCP instead of HTTP.
I know that HTTP's underlying layer is TCP, so I want to know will it have any performance issues by using TCP instead of HTTP.
And for HTTP I can use curl to test the application.
What should I use for TCP ( curl command to send request to a socket with a string parameter ).
Server:
#include <zmq.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
int main (void)
{ // Socket to talk to clients
void *context = zmq_ctx_new ();
void *responder = zmq_socket (context, ZMQ_REP);
int rc = zmq_bind (responder, "tcp://*:5555");
assert (rc == 0);
while (1)
{
char buffer [10];
zmq_recv (responder, buffer, 10, 0);
printf ("Received Hello\n");
// trying to send json object
zmq_send (responder, "World", 5, 0);
// zmq_send (responder, "World", 5, 0);
sleep (1); // Do some 'work'
}
return 0;
}
client:
// Hello World client
// Connects REQ socket to tcp://localhost:5555
// Sends "Hello" to server, expects "World" back
#include <zmq.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
int main (void)
{
void *context = zmq_ctx_new ();
// Socket to talk to server
printf ("Connecting to hello world server...\n");
void *requester = zmq_socket (context, ZMQ_REQ);
zmq_connect (requester, "tcp://localhost:5555");
int request_nbr;
for (request_nbr = 0; request_nbr != 10; request_nbr++)
{
zmq_msg_t request;
zmq_msg_init_size (&request, 5);
memcpy (zmq_msg_data (&request), "Hello", 5);
printf ("Sending Hello %d...\n", request_nbr);
zmq_msg_send (&request, requester, 0);
zmq_msg_close (&request);
zmq_msg_t reply;
zmq_msg_init (&reply);
zmq_msg_recv (&reply, requester, 0);
printf ("Received World %d\n", request_nbr);
zmq_msg_close (&reply);
}
zmq_close (requester);
zmq_ctx_destroy (context);
return 0;
}
Q1: will it have any performance issues by using TCP instead of HTTP?
A1: yes, it will. Both performance and latency will benefit from avoiding HTTP-rich-re-wrapping of data
Q2: What should I use for TCP to send a request to a socket with a string parameter?
A2: No command ( curl command ) will help you. ZeroMQ uses certain line-code ( assume it as a trivial protocol between communicationg peers ), so a standalone command-line tool will not be able to match the line-code requirement off-the-shelf. Solution? Create a simple c-programme, that will consume a cmd-line arguments ( the string, as an example ) and assemble a ZeroMQ-layer compatible data-framing so as to communicate with the remote peer. Also you shall notice, that for ZeroMQ REQ/REP Formal Communication Pattern to work, this proxy-tool will have to become the sole respective REQ, resp. REP entity in the step-forward-locking diadic-communication relation, thus also providing an awaited response, the REQ-side is expecting to receive after the REP-side has received a message.

error CL_OUT_OF_RESOURCES while reading back data in host memory while using atomic function in opencl kernel

I am trying to implement atomic functions in my opencl kernel. Multiple threads I am creating are parallely trying to write a single memory location. I want them to perform serial execution on that particular line of code. I have never used an atomic function before.
I found similar problems on many blogs and forums,and I am trying one solution.,i.e. use of two different functions 'acquire' and 'release' for locking and unlocking the semaphore. I have included necessary opencl extensions, which are all surely supported by my device (NVIDIA GeForce GTX 630M).
My kernel execution configuration:
global_item_size = 8;
ret = clEnqueueNDRangeKernel(command_queue2, kernel2, 1, NULL, &global_item_size2, &local_item_size2, 0, NULL, NULL);
Here is my code: reducer.cl
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
typedef struct data
{
double dattr[10];
int d_id;
int bestCent;
}Data;
typedef struct cent
{
double cattr[5];
int c_id;
}Cent;
__global void acquire(__global int* mutex)
{
int occupied;
do {
occupied = atom_xchg(mutex, 1);
} while (occupied>0);
}
__global void release(__global int* mutex)
{
atom_xchg(mutex, 0); //the previous value, which is returned, is ignored
}
__kernel void reducer(__global int *keyMobj, __global int *valueMobj,__global Data *dataMobj,__global Cent *centMobj,__global int *countMobj,__global double *sumMobj, __global int *mutex)
{
__local double sum[2][2];
__local int cnt[2];
int i = get_global_id(0);
int n,j;
if(i<2)
cnt[i] = countMobj[i];
barrier(CLK_GLOBAL_MEM_FENCE);
n = keyMobj[i];
for(j=0; j<2; j++)
{
barrier(CLK_GLOBAL_MEM_FENCE);
acquire(mutex);
sum[n][j] += dataMobj[i].dattr[j];
release(mutex);
}
if(i<2)
{
for(j=0; j<2; j++)
{
sum[i][j] = sum[i][j]/countMobj[i];
centMobj[i].cattr[j] = sum[i][j];
}
}
}
Unfortunately the solution doesn't seem like working for me. When I am reading back the centMobj into the host memory, using
ret = clEnqueueReadBuffer(command_queue2, centMobj, CL_TRUE, 0, (sizeof(Cent) * 2), centNode, 0, NULL, NULL);
ret = clEnqueueReadBuffer(command_queue2, sumMobj, CL_TRUE, 0, (sizeof(double) * 2 * 2), sum, 0, NULL, NULL);
it is giving me error with error code = -5 (CL_OUT_OF_RESOURCES) for both centMobj and sumMobj.
I am not getting if there is any problem in my atomic function code or problem is in reading back data into the host memory. If I am using the atomic function incorrectly, please make me correct.
Thank you in advance.
In OpenCL, synchronization between work items can be done only inside a work-group. Code trying to synchronize work-items across different work-groups may work in some very specific (and implementation/device dependent) cases, but will fail in the general case.
The solution is to either use atomics to serialize accesses to the same memory location (but without blocking any work item), or redesign the code differently.

Resources