Better performance for kernel on ant colony optimisation - opencl

I'm trying to get a better performance on my ant colony optimisation problem. In order to do so, I'm using openCL to run the update pheromones part in parallel. I have just started learning openCL and this is the kernel code I have developed. Although it runs faster than the sequential version, I still think I can achieve more performance with it, but I'm not finding other things I can do. Is there a way to improve this code even more ?
PS: I have tested this code only on the CPU, since the computer I am working on doesn't have a GPU.
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
int calculateLengthOfTrail(__global int*, const int, __global int*, const int );
int edgeInTrail(const int ,const int , __global int* , const int , const int );
int indexOfCity(__global int*, const int, const int, const int);
__kernel void updatePheromones(
__global double* pheromones,
__global int* ants,
__global int* distances,
__local double* pheromones_old,
const int numCities,
const int numAnts,
const double pheromoneDecreaseFactor,
const double pheromoneIncreaseFactor
)
{
int i = get_global_id(0);
int k, j;
if(i<numCities)
{
for(j = i +1; j<numCities; j++)
{
for (k = 0; k < numAnts; k++)
{
double size = calculateLengthOfTrail(ants,k, distances, numCities);
double decrease = (1.0 - pheromoneDecreaseFactor) * pheromones_old[i+numCities*j];
double increase = 0.0;
int edge = edgeInTrail(i, j, ants, k, numCities);
if (edge== 1)
increase = (pheromoneIncreaseFactor / size);
pheromones[i+numCities*j] = decrease + increase;
if (pheromones[i+numCities*j] < 0.0001)
pheromones[i +numCities*j] = 0.0001;
else if (pheromones[i + numCities*j] > 100000.0)
pheromones[i+numCities*j] = 100000.0;
pheromones[j+numCities*i] = pheromones[i+numCities*j];
}
}
}
}
int edgeInTrail(const int cityX, const int cityY, __global int* ants, const int row, const int numCities)
{
int lastIndex = numCities - 1;
int indexCity = indexOfCity(ants, row, cityX, numCities);
if (indexCity == 0 && ants[1+numCities*row] == cityY)
return 1;
else if (indexCity == 0 && ants[lastIndex+numCities*row] == cityY)
return 1;
else if (indexCity == 0)
return 0;
else if (indexCity == lastIndex && ants[(lastIndex-1)+numCities*row] == cityY)
return 1;
else if (indexCity == lastIndex && ants[row*numCities] == cityY)
return 1;
else if (indexCity == lastIndex)
return 0;
else if (ants[(indexCity-1)+numCities*row] == cityY)
return 1;
else if (ants[(indexCity+1)+numCities*row] == cityY)
return 1;
else
return 0;
}
int calculateLengthOfTrail(__global int* ants, const int row, __global int* distances, const int numCities)
{
int sumDistance = 0;
int i;
for(i =0; i<numCities-1; i++)
sumDistance += distances[ants[i+numCities*row]+numCities*ants[(i+1)+numCities*row]];
return sumDistance;
}
int indexOfCity(__global int* ants, int row, int city, int numCities)
{
int i;
for(i =0; i<numCities; i++)
{
if(ants[i+numCities*row] == city)
return i;
}
return -1;
}

Related

Freeing shows double free or corruption

typedef struct row_container
{
int size;
char *data;
}row_container;
typedef struct message_bar
{
char message[80];
time_t message_time;
}message_bar;
typedef struct brick_win_size
{
int row;
int col;
int current_row;
int current_column;
int data_row;
int row_off;
int col_off;
row_container *container;
char *filename;
message_bar *msg_bar;
}brick_window;
void container_delete_character(struct brick_win_size *win)
{
uint8_t insert_flag = 1;
int row = win->current_row;
int offset = win->current_column;
row_container *container = win->container;
char *data = container[row].data;
if(offset < container[row].size){
memmove(&data[offset], &data[offset + 1], container[row].size - 1);
data = realloc(data, container[row].size - 1);
container[row].data = data;
container[row].size--;
}
if(insert_flag){
if((container[row].size == 0) && (row < win->data_row)){
for(int index = row; index < win->data_row; index++){
if(index != win->data_row -1){
container[index] = container[index+1];
}else{
free(container[index]); //error line...............................
}
}
if(win->data_row != 0){
win->container = realloc(win->container, sizeof(row_container) * (win->data_row - 1));
win->data_row --;
}
}
}
}
Here in the above code i just want to delete one element at one point in the function. so i am just reassigning pointers with the other pointer and finally i am freeing the final element which is eventually ending up in build failure *expected ‘void ’ but argument is of type ‘row_container {aka struct row_container}’
Moreover if i give the & de reference operator it is ending up in double free or corruption error during execution

How to write Nested Loop in Kernel side OpenCL

I'm a beginner in OpenCL. I'm trying to implement an OpenCL application.I have a doubt that how to write opencl kernel code . i have given a original c code.
Question :- help me to change that given c code into opencl kernel code?.
ORIGINAL C CODE:
int i, j;
// initialization of indexes
for (i = 0; i<n; i++)
Index[i] = i;
// Bubble sort
for (i = 0; i<n - 1; i++)
{
for (j = i + 1; j<n; j++)
{
if (I[i] > I[j])
{
double z = I[i]; // exchange attractiveness
I[i] = I[j];
I[j] = z;
z = f[i]; // exchange fitness
f[i] = f[j];
f[j] = z;
int k = Index[i]; // exchange indexes
Index[i] = Index[j];
Index[j] = k;
}
}
}
Example for 4096 element arrays(alternate bubble1 and bubble2 at least 2048 times--->4096(N) kernel executions ):
index init on host side since its just assignment.
Auxiliary functions:
void swap2p(__private int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
void swap2g(__global int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
Alternating kernel-1:
__kernel void bubble1(__global int * I, __global int * f, __global int * Index){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+1<4096)
{
vals[0]=I[threadId*2];
vals[1]=I[threadId*2+1];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2,threadId*2+1);
swap2g(f,threadId*2,threadId*2+1);
swap2g(Index,threadId*2,threadId*2+1);
I[threadId*2]=vals[0];
I[threadId*2+1]=vals[1];
}
}
}
alternating kernel-2:
__kernel void bubble2(__global int * I){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+2<4096)
{
vals[0]=I[threadId*2+1];
vals[1]=I[threadId*2+2];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2+1,threadId*2+2);
swap2g(f,threadId*2+1,threadId*2+2);
swap2g(Index,threadId*2+1,threadId*2+2);
I[threadId*2+1]=vals[0];
I[threadId*2+2]=vals[1];
}
}
}
Global thread number: N/2 (2048)

Getting segmentation fault (or bad access) for some inputs and the program halts

#include <iostream>
#include <vector>
#include <string>
using namespace std;
void step_selection_sort(vector <int> &a, int size, int idx){
int i,j,min,temp;
i = idx;
min = i;
for (j=i+1;j<size;j++)
{
if (a[min]>a[j])
min=j;
}
if (min!=i)
{
temp = a[i];
a[i] = a[min];
a[min] = temp;
}
idx++;
}
void selection_sort(vector <int> &a, int size, int idx){
int i;
for(i=0;i<size;i++)
{
step_selection_sort(a,size,idx);
}
}
void step_desc_sort(vector <int>& a, int size, int idx){
int i,j,max,temp;
i = idx;
max = i;
for (j=i+1;j<size;j++)
{
if (a[max]<a[j])
max=j;
}
if (max!=i)
{
temp = a[i];
a[i] = a[max];
a[max] = temp;
}
idx++;
}
void desc_sort(vector <int>& a, int size, int idx){
int i;
for(i=0;i<size;i++)
{
step_desc_sort(a,size,idx);
}
}
void swap (int & a, int & b)
{
int t = a;
a = b;
b = t;
}
int findCeil (vector <int>& nums, int first, int begin, int end)
{
int ceilIndex = begin;
for (int i = begin+1; i <= end; i++)
if (nums[i] > first && nums[i] < nums[ceilIndex])
ceilIndex = i;
return ceilIndex;
}
int findBottom(vector <int>& nums,int first,int begin,int end)
{
int bottomIndex = begin;
for (int i = begin+1; i <= end; i++)
if (nums[i] < first && nums[i] > nums[bottomIndex])
bottomIndex = i;
return bottomIndex;
}
void sortedPermutations_ASC (vector <int> nums,int num)
{
bool isfinished=false;
if(isfinished==false)
for(int i=0;i<num;i++)
cout << nums[i]; //bad access when giving inputs bigger than 8
cout << endl;
int k;
for ( k = num - 2; k >= 0; --k )
if (nums[k] < nums[k+1])
break;
if ( k == -1 )
isfinished=true;
else
{
int ceilIndex = findCeil( nums, nums[k], k + 1, num - 1 );
swap( nums[k], nums[ceilIndex] );
selection_sort(nums,num,k+1);
sortedPermutations_ASC(nums,num);
}
}
void sortedPermutations_DESC (vector <int> nums,int num)
{
int i;
bool isfinished=false;
if(isfinished==false)
for(i=0;i<num;i++)
cout << nums[i];
cout << endl;
int k;
for ( k = num - 2; k >= 0; --k )
if (nums[k] > nums[k+1])
break;
if ( k == -1 )
isfinished=true;
else
{
int bottomIndex = findBottom( nums, nums[k], k + 1, num - 1 );
swap( nums[k], nums[bottomIndex] );
desc_sort(nums,num,k+1);
sortedPermutations_DESC(nums,num);
}
return;
}
int main(){
vector <int> nums;
string line,temp;
int num,j,k;
getline(cin,line);
while(j<line.size() && line[j]!=' ')
j++;
num=stoi(line.substr(0,j));
string kind;
j++;
kind=line.substr(j);
if(kind=="ASC"){
for(k=0;k<num;k++)
nums.push_back(k+1);
sortedPermutations_ASC(nums,num);
}
if(kind=="DESC"){
for(k=0;k<num;k++)
nums.push_back(num-k);
sortedPermutations_DESC(nums,num);
}
return 0;
}
here's is my code. it gives the permutations of a number.It works properly when inputs are between 1 and 8 .But it doesn't work with numbers bigger than 8 .
for example if I give
9 ASC (it means in Ascending order)
to the program , I get "Segmentation Fault:11" in terminal (mac) after printing some of the permutations .
I tried running it in Xcode . with the same input it says :
Thread 1:EXC_BAD_ACCESS(code=2,address=0x7ffff5f3fffc8)
for the line that I put comment in front of it .
I don't know what to do anymore ...
Any help would be appreciated - thanks in advance

rosserial arduino hello world won't verify

Edit: Solved
SOLUTION:
I'm running Arduino 1.0.5
I fixed the problem by changing /Sketchbook/library/ros_lib/ros/node_handle.h line 260 from
}else if (topic_ == TopicInfo::ID_TX_STOP){
to
}else if (topic_ == ID_TX_STOP){
However, this gave me a new error message:
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp: In member function 'size_t Print::print(const __FlashStringHelper*)':
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:44:9: error: 'prog_char' does not name a type
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:47:23: error: 'p' was not declared in this scope
So to fix this I edited usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp, line 44 from
const prog_char *p = (const prog_char *)ifsh;
to
const char PROGMEM *p = (const char PROGMEM *)ifsh;
Now it compiles!
Original Problem:
I've been using this tutorial to get everything set up: (http://wiki.ros.org/rosserial_arduino/Tutorials/Arduino%20IDE%20Setup) I can install everything without an issue I think, the ros_lib folder is placed in my sketchbook libraries. But when I do the next tutorial with the helloworld example, the code does not verify correctly. When I try to verify with the checkmark in the Arduino IDE, I get the following set of error codes:
In file included from /home/user/sketchbook/libraries/ros_lib/ros.h:38:0,
from HelloWorld.cpp:6:
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h: In member function 'virtual int ros::NodeHandle_::spinOnce()':
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h:260:45: error: expected unqualified-id before numeric constant
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h:260:45: error: expected ')' before numeric constant
I've reinstalled the ros_lib as well as rosserial and I keep getting this error, so I don't know what the problem is. I looked around line 260 of the node_handle.h file but I didn't notice anything out of place.
Here's node_handle.h: (spacing might be a little off)
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2011, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote prducts derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROS_NODE_HANDLE_H_
#define ROS_NODE_HANDLE_H_
#include "std_msgs/Time.h"
#include "rosserial_msgs/TopicInfo.h"
#include "rosserial_msgs/Log.h"
#include "rosserial_msgs/RequestParam.h"
#define SYNC_SECONDS 5
#define MODE_FIRST_FF 0
/*
* The second sync byte is a protocol version. It's value is 0xff for the first
* version of the rosserial protocol (used up to hydro), 0xfe for the second version
* (introduced in hydro), 0xfd for the next, and so on. Its purpose is to enable
* detection of mismatched protocol versions (e.g. hydro rosserial_python with groovy
* rosserial_arduino. It must be changed in both this file and in
* rosserial_python/src/rosserial_python/SerialClient.py
*/
#define MODE_PROTOCOL_VER 1
#define PROTOCOL_VER1 0xff // through groovy
#define PROTOCOL_VER2 0xfe // in hydro
#define PROTOCOL_VER PROTOCOL_VER2
#define MODE_SIZE_L 2
#define MODE_SIZE_H 3
#define MODE_SIZE_CHECKSUM 4 // checksum for msg size received from size L and H
#define MODE_TOPIC_L 5 // waiting for topic id
#define MODE_TOPIC_H 6
#define MODE_MESSAGE 7
#define MODE_MSG_CHECKSUM 8 // checksum for msg and topic id
#define MSG_TIMEOUT 20 //20 milliseconds to recieve all of message data
#define ID_TX_STOP 11 //hardcode for hydro version
#include "msg.h"
namespace ros {
class NodeHandleBase_{
public:
virtual int publish(int id, const Msg* msg)=0;
virtual int spinOnce()=0;
virtual bool connected()=0;
};
}
#include "publisher.h"
#include "subscriber.h"
#include "service_server.h"
#include "service_client.h"
namespace ros {
using rosserial_msgs::TopicInfo;
/* Node Handle */
template<class Hardware,
int MAX_SUBSCRIBERS=25,
int MAX_PUBLISHERS=25,
int INPUT_SIZE=512,
int OUTPUT_SIZE=512>
class NodeHandle_ : public NodeHandleBase_
{
protected:
Hardware hardware_;
/* time used for syncing */
unsigned long rt_time;
/* used for computing current time */
unsigned long sec_offset, nsec_offset;
unsigned char message_in[INPUT_SIZE];
unsigned char message_out[OUTPUT_SIZE];
Publisher * publishers[MAX_PUBLISHERS];
Subscriber_ * subscribers[MAX_SUBSCRIBERS];
/*
* Setup Functions
*/
public:
NodeHandle_() : configured_(false) {
for(unsigned int i=0; i< MAX_PUBLISHERS; i++)
publishers[i] = 0;
for(unsigned int i=0; i< MAX_SUBSCRIBERS; i++)
subscribers[i] = 0;
for(unsigned int i=0; i< INPUT_SIZE; i++)
message_in[i] = 0;
for(unsigned int i=0; i< OUTPUT_SIZE; i++)
message_out[i] = 0;
req_param_resp.ints_length = 0;
req_param_resp.ints = NULL;
req_param_resp.floats_length = 0;
req_param_resp.floats = NULL;
req_param_resp.ints_length = 0;
req_param_resp.ints = NULL;
}
Hardware* getHardware(){
return &hardware_;
}
/* Start serial, initialize buffers */
void initNode(){
hardware_.init();
mode_ = 0;
bytes_ = 0;
index_ = 0;
topic_ = 0;
};
/* Start a named port, which may be network server IP, initialize buffers */
void initNode(char *portName){
hardware_.init(portName);
mode_ = 0;
bytes_ = 0;
index_ = 0;
topic_ = 0;
};
protected:
//State machine variables for spinOnce
int mode_;
int bytes_;
int topic_;
int index_;
int checksum_;
bool configured_;
/* used for syncing the time */
unsigned long last_sync_time;
unsigned long last_sync_receive_time;
unsigned long last_msg_timeout_time;
public:
/* This function goes in your loop() function, it handles
* serial input and callbacks for subscribers.
*/
virtual int spinOnce(){
/* restart if timed out */
unsigned long c_time = hardware_.time();
if( (c_time - last_sync_receive_time) > (SYNC_SECONDS*2200) ){
configured_ = false;
}
/* reset if message has timed out */
if ( mode_ != MODE_FIRST_FF){
if (c_time > last_msg_timeout_time){
mode_ = MODE_FIRST_FF;
}
}
/* while available buffer, read data */
while( true )
{
int data = hardware_.read();
if( data < 0 )
break;
checksum_ += data;
if( mode_ == MODE_MESSAGE ){ /* message data being recieved */
message_in[index_++] = data;
bytes_--;
if(bytes_ == 0) /* is message complete? if so, checksum */
mode_ = MODE_MSG_CHECKSUM;
}else if( mode_ == MODE_FIRST_FF ){
if(data == 0xff){
mode_++;
last_msg_timeout_time = c_time + MSG_TIMEOUT;
}
}else if( mode_ == MODE_PROTOCOL_VER ){
if(data == PROTOCOL_VER){
mode_++;
}else{
mode_ = MODE_FIRST_FF;
if (configured_ == false)
requestSyncTime(); /* send a msg back showing our protocol version */
}
}else if( mode_ == MODE_SIZE_L ){ /* bottom half of message size */
bytes_ = data;
index_ = 0;
mode_++;
checksum_ = data; /* first byte for calculating size checksum */
}else if( mode_ == MODE_SIZE_H ){ /* top half of message size */
bytes_ += data<<8;
mode_++;
}else if( mode_ == MODE_SIZE_CHECKSUM ){
if( (checksum_%256) == 255)
mode_++;
else
mode_ = MODE_FIRST_FF; /* Abandon the frame if the msg len is wrong */
}else if( mode_ == MODE_TOPIC_L ){ /* bottom half of topic id */
topic_ = data;
mode_++;
checksum_ = data; /* first byte included in checksum */
}else if( mode_ == MODE_TOPIC_H ){ /* top half of topic id */
topic_ += data<<8;
mode_ = MODE_MESSAGE;
if(bytes_ == 0)
mode_ = MODE_MSG_CHECKSUM;
}else if( mode_ == MODE_MSG_CHECKSUM ){ /* do checksum */
mode_ = MODE_FIRST_FF;
if( (checksum_%256) == 255){
if(topic_ == TopicInfo::ID_PUBLISHER){
requestSyncTime();
negotiateTopics();
last_sync_time = c_time;
last_sync_receive_time = c_time;
return -1;
}else if(topic_ == TopicInfo::ID_TIME){
syncTime(message_in);
}else if (topic_ == TopicInfo::ID_PARAMETER_REQUEST){
req_param_resp.deserialize(message_in);
param_recieved= true;
}else if(topic_ == TopicInfo::ID_TX_STOP){
configured_ = false;
}else{
if(subscribers[topic_-100])
subscribers[topic_-100]->callback( message_in );
}
}
}
}
/* occasionally sync time */
if( configured_ && ((c_time-last_sync_time) > (SYNC_SECONDS*500) )){
requestSyncTime();
last_sync_time = c_time;
}
return 0;
}
/* Are we connected to the PC? */
virtual bool connected() {
return configured_;
};
/********************************************************************
* Time functions
*/
void requestSyncTime()
{
std_msgs::Time t;
publish(TopicInfo::ID_TIME, &t);
rt_time = hardware_.time();
}
void syncTime( unsigned char * data )
{
std_msgs::Time t;
unsigned long offset = hardware_.time() - rt_time;
t.deserialize(data);
t.data.sec += offset/1000;
t.data.nsec += (offset%1000)*1000000UL;
this->setNow(t.data);
last_sync_receive_time = hardware_.time();
}
Time now(){
unsigned long ms = hardware_.time();
Time current_time;
current_time.sec = ms/1000 + sec_offset;
current_time.nsec = (ms%1000)*1000000UL + nsec_offset;
normalizeSecNSec(current_time.sec, current_time.nsec);
return current_time;
}
void setNow( Time & new_now )
{
unsigned long ms = hardware_.time();
sec_offset = new_now.sec - ms/1000 - 1;
nsec_offset = new_now.nsec - (ms%1000)*1000000UL + 1000000000UL;
normalizeSecNSec(sec_offset, nsec_offset);
}
/********************************************************************
* Topic Management
*/
/* Register a new publisher */
bool advertise(Publisher & p)
{
for(int i = 0; i < MAX_PUBLISHERS; i++){
if(publishers[i] == 0){ // empty slot
publishers[i] = &p;
p.id_ = i+100+MAX_SUBSCRIBERS;
p.nh_ = this;
return true;
}
}
return false;
}
/* Register a new subscriber */
template<typename MsgT>
bool subscribe(Subscriber< MsgT> & s){
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &s;
s.id_ = i+100;
return true;
}
}
return false;
}
/* Register a new Service Server */
template<typename MReq, typename MRes>
bool advertiseService(ServiceServer<MReq,MRes>& srv){
bool v = advertise(srv.pub);
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &srv;
srv.id_ = i+100;
return v;
}
}
return false;
}
/* Register a new Service Client */
template<typename MReq, typename MRes>
bool serviceClient(ServiceClient<MReq, MRes>& srv){
bool v = advertise(srv.pub);
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &srv;
srv.id_ = i+100;
return v;
}
}
return false;
}
void negotiateTopics()
{
rosserial_msgs::TopicInfo ti;
int i;
for(i = 0; i < MAX_PUBLISHERS; i++)
{
if(publishers[i] != 0) // non-empty slot
{
ti.topic_id = publishers[i]->id_;
ti.topic_name = (char *) publishers[i]->topic_;
ti.message_type = (char *) publishers[i]->msg_->getType();
ti.md5sum = (char *) publishers[i]->msg_->getMD5();
ti.buffer_size = OUTPUT_SIZE;
publish( publishers[i]->getEndpointType(), &ti );
}
}
for(i = 0; i < MAX_SUBSCRIBERS; i++)
{
if(subscribers[i] != 0) // non-empty slot
{
ti.topic_id = subscribers[i]->id_;
ti.topic_name = (char *) subscribers[i]->topic_;
ti.message_type = (char *) subscribers[i]->getMsgType();
ti.md5sum = (char *) subscribers[i]->getMsgMD5();
ti.buffer_size = INPUT_SIZE;
publish( subscribers[i]->getEndpointType(), &ti );
}
}
configured_ = true;
}
virtual int publish(int id, const Msg * msg)
{
if(id >= 100 && !configured_)
return 0;
/* serialize message */
unsigned int l = msg->serialize(message_out+7);
/* setup the header */
message_out[0] = 0xff;
message_out[1] = PROTOCOL_VER;
message_out[2] = (unsigned char) ((unsigned int)l&255);
message_out[3] = (unsigned char) ((unsigned int)l>>8);
message_out[4] = 255 - ((message_out[2] + message_out[3])%256);
message_out[5] = (unsigned char) ((int)id&255);
message_out[6] = (unsigned char) ((int)id>>8);
/* calculate checksum */
int chk = 0;
for(int i =5; i<l+7; i++)
chk += message_out[i];
l += 7;
message_out[l++] = 255 - (chk%256);
if( l <= OUTPUT_SIZE ){
hardware_.write(message_out, l);
return l;
}else{
logerror("Message from device dropped: message larger than buffer.");
return -1;
}
}
/********************************************************************
* Logging
*/
private:
void log(char byte, const char * msg){
rosserial_msgs::Log l;
l.level= byte;
l.msg = (char*)msg;
publish(rosserial_msgs::TopicInfo::ID_LOG, &l);
}
public:
void logdebug(const char* msg){
log(rosserial_msgs::Log::ROSDEBUG, msg);
}
void loginfo(const char * msg){
log(rosserial_msgs::Log::INFO, msg);
}
void logwarn(const char *msg){
log(rosserial_msgs::Log::WARN, msg);
}
void logerror(const char*msg){
log(rosserial_msgs::Log::ERROR, msg);
}
void logfatal(const char*msg){
log(rosserial_msgs::Log::FATAL, msg);
}
/********************************************************************
* Parameters
*/
private:
bool param_recieved;
rosserial_msgs::RequestParamResponse req_param_resp;
bool requestParam(const char * name, int time_out = 1000){
param_recieved = false;
rosserial_msgs::RequestParamRequest req;
req.name = (char*)name;
publish(TopicInfo::ID_PARAMETER_REQUEST, &req);
unsigned int end_time = hardware_.time() + time_out;
while(!param_recieved ){
spinOnce();
if (hardware_.time() > end_time) return false;
}
return true;
}
public:
bool getParam(const char* name, int* param, int length =1){
if (requestParam(name) ){
if (length == req_param_resp.ints_length){
//copy it over
for(int i=0; i<length; i++)
param[i] = req_param_resp.ints[i];
return true;
}
}
return false;
}
bool getParam(const char* name, float* param, int length=1){
if (requestParam(name) ){
if (length == req_param_resp.floats_length){
//copy it over
for(int i=0; i<length; i++)
param[i] = req_param_resp.floats[i];
return true;
}
}
return false;
}
bool getParam(const char* name, char** param, int length=1){
if (requestParam(name) ){
if (length == req_param_resp.strings_length){
//copy it over
for(int i=0; i<length; i++)
strcpy(param[i],req_param_resp.strings[i]);
return true;
}
}
return false;
}
};
}
#endif
I tried commenting out line 160 and 161:
//}else if (topic_ == TopicInfo::ID_TX_STOP){
//configured_ = false;
This gave me a different error message:
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp: In member function 'size_t Print::print(const __FlashStringHelper*)':
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:44:9: error: 'prog_char' does not name a type
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:47:23: error: 'p' was not declared in this scope
So here's Print.cpp
/*
Print.cpp - Base class that provides print() and println()
Copyright (c) 2008 David A. Mellis. All right reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Modified 23 November 2006 by David A. Mellis
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "Arduino.h"
#include "Print.h"
// Public Methods //////////////////////////////////////////////////////////////
/* default implementation: may be overridden */
size_t Print::write(const uint8_t *buffer, size_t size)
{
size_t n = 0;
while (size--) {
n += write(*buffer++);
}
return n;
}
size_t Print::print(const __FlashStringHelper *ifsh)
{
const prog_char *p = (const prog_char *)ifsh;
size_t n = 0;
while (1) {
unsigned char c = pgm_read_byte(p++);
if (c == 0) break;
n += write(c);
}
return n;
}
size_t Print::print(const String &s)
{
size_t n = 0;
for (uint16_t i = 0; i < s.length(); i++) {
n += write(s[i]);
}
return n;
}
size_t Print::print(const char str[])
{
return write(str);
}
size_t Print::print(char c)
{
return write(c);
}
size_t Print::print(unsigned char b, int base)
{
return print((unsigned long) b, base);
}
size_t Print::print(int n, int base)
{
return print((long) n, base);
}
size_t Print::print(unsigned int n, int base)
{
return print((unsigned long) n, base);
}
size_t Print::print(long n, int base)
{
if (base == 0) {
return write(n);
} else if (base == 10) {
if (n < 0) {
int t = print('-');
n = -n;
return printNumber(n, 10) + t;
}
return printNumber(n, 10);
} else {
return printNumber(n, base);
}
}
size_t Print::print(unsigned long n, int base)
{
if (base == 0) return write(n);
else return printNumber(n, base);
}
size_t Print::print(double n, int digits)
{
return printFloat(n, digits);
}
size_t Print::println(const __FlashStringHelper *ifsh)
{
size_t n = print(ifsh);
n += println();
return n;
}
size_t Print::print(const Printable& x)
{
return x.printTo(*this);
}
size_t Print::println(void)
{
size_t n = print('\r');
n += print('\n');
return n;
}
size_t Print::println(const String &s)
{
size_t n = print(s);
n += println();
return n;
}
size_t Print::println(const char c[])
{
size_t n = print(c);
n += println();
return n;
}
size_t Print::println(char c)
{
size_t n = print(c);
n += println();
return n;
}
size_t Print::println(unsigned char b, int base)
{
size_t n = print(b, base);
n += println();
return n;
}
size_t Print::println(int num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(unsigned int num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(long num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(unsigned long num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(double num, int digits)
{
size_t n = print(num, digits);
n += println();
return n;
}
size_t Print::println(const Printable& x)
{
size_t n = print(x);
n += println();
return n;
}
// Private Methods /////////////////////////////////////////////////////////////
size_t Print::printNumber(unsigned long n, uint8_t base) {
char buf[8 * sizeof(long) + 1]; // Assumes 8-bit chars plus zero byte.
char *str = &buf[sizeof(buf) - 1];
*str = '\0';
// prevent crash if called with base == 1
if (base < 2) base = 10;
do {
unsigned long m = n;
n /= base;
char c = m - base * n;
*--str = c < 10 ? c + '0' : c + 'A' - 10;
} while(n);
return write(str);
}
size_t Print::printFloat(double number, uint8_t digits)
{
size_t n = 0;
// Handle negative numbers
if (number < 0.0)
{
n += print('-');
number = -number;
}
// Round correctly so that print(1.999, 2) prints as "2.00"
double rounding = 0.5;
for (uint8_t i=0; i<digits; ++i)
rounding /= 10.0;
number += rounding;
// Extract the integer part of the number and print it
unsigned long int_part = (unsigned long)number;
double remainder = number - (double)int_part;
n += print(int_part);
// Print the decimal point, but only if there are digits beyond
if (digits > 0) {
n += print(".");
}
// Extract digits from the remainder one at a time
while (digits-- > 0)
{
remainder *= 10.0;
int toPrint = int(remainder);
n += print(toPrint);
remainder -= toPrint;
}
return n;
}

How accelerate MPI-calls?

I try to accelerate simple MPI-programm with OpenMP. I use MPICH2 and 4-core Intel processor. I have simple code:
int main(int argc, char** argv) {
int size, rank, provided;
const int root = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int cubeCount = StrToDouble(argv[1]);
int matrixSize = *StrToDouble(argv[2]);
WorkNode node(rank, size, cubeCount, matrixSize);
time_t t0 = time(0);
node.work();
time_t t1 = time(0);
time_t total = t1 - t0;
Class WorkNode also very simple, contains only array of Cube and method work.
class Cube {
public:
Cube(int matrixSize);
double *matrix;
int matrixSize;
}
Cube::Cube(int matrixSize) {
matrix = new double[matrixSize];
this->matrixSize = matrixSize;
}
Finally method work:
// double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Status status;
for (int i = 0; i < processorCount; i++) {
int nodeToSend = this->id + i;
int nodeRecv = this->id - i;
if (nodeToSend >= processorCount) {
nodeToSend -= processorCount;
}
if (nodeRecv < 0) {
nodeRecv += processorCount;
}
#pragma omp parallel for num_threads(2)
for (int i = 0; i < cubeCount; i++) {
Cube *cube = cubes[i];
if (nodeToSend != this->id) {
MPI_Bsend(cube->matrix, cube->matrixSize, MPI_DOUBLE, nodeToSend, _MY_MPI_ANY_TAG, MPI_COMM_WORLD);
}
if (nodeRecv != this->id) {
double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Recv(inBuffer, cube->matrixSize, MPI_DOUBLE, nodeRecv, _MY_MPI_ANY_TAG, MPI_COMM_WORLD, &status);
delete inBuffer;
}
}
}
//delete inBuffer
Unfortunately, openMP does not accelerate the program (even if the number of MPI processes = 2), and sometimes even slows down. Can I somehow accelerate MPI calls?

Resources