Better performance for kernel on ant colony optimisation

Better performance for kernel on ant colony optimisation - opencl

I'm trying to get a better performance on my ant colony optimisation problem. In order to do so, I'm using openCL to run the update pheromones part in parallel. I have just started learning openCL and this is the kernel code I have developed. Although it runs faster than the sequential version, I still think I can achieve more performance with it, but I'm not finding other things I can do. Is there a way to improve this code even more ?
PS: I have tested this code only on the CPU, since the computer I am working on doesn't have a GPU.
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
int calculateLengthOfTrail(__global int*, const int, __global int*, const int );
int edgeInTrail(const int ,const int , __global int* , const int , const int );
int indexOfCity(__global int*, const int, const int, const int);
__kernel void updatePheromones(
__global double* pheromones,
__global int* ants,
__global int* distances,
__local double* pheromones_old,
const int numCities,
const int numAnts,
const double pheromoneDecreaseFactor,
const double pheromoneIncreaseFactor
)
{
int i = get_global_id(0);
int k, j;
if(i<numCities)
{
for(j = i +1; j<numCities; j++)
{
for (k = 0; k < numAnts; k++)
{
double size = calculateLengthOfTrail(ants,k, distances, numCities);
double decrease = (1.0 - pheromoneDecreaseFactor) * pheromones_old[i+numCities*j];
double increase = 0.0;
int edge = edgeInTrail(i, j, ants, k, numCities);
if (edge== 1)
increase = (pheromoneIncreaseFactor / size);
pheromones[i+numCities*j] = decrease + increase;
if (pheromones[i+numCities*j] < 0.0001)
pheromones[i +numCities*j] = 0.0001;
else if (pheromones[i + numCities*j] > 100000.0)
pheromones[i+numCities*j] = 100000.0;
pheromones[j+numCities*i] = pheromones[i+numCities*j];
}
}
}
}
int edgeInTrail(const int cityX, const int cityY, __global int* ants, const int row, const int numCities)
{
int lastIndex = numCities - 1;
int indexCity = indexOfCity(ants, row, cityX, numCities);
if (indexCity == 0 && ants[1+numCities*row] == cityY)
return 1;
else if (indexCity == 0 && ants[lastIndex+numCities*row] == cityY)
return 1;
else if (indexCity == 0)
return 0;
else if (indexCity == lastIndex && ants[(lastIndex-1)+numCities*row] == cityY)
return 1;
else if (indexCity == lastIndex && ants[row*numCities] == cityY)
return 1;
else if (indexCity == lastIndex)
return 0;
else if (ants[(indexCity-1)+numCities*row] == cityY)
return 1;
else if (ants[(indexCity+1)+numCities*row] == cityY)
return 1;
else
return 0;
}
int calculateLengthOfTrail(__global int* ants, const int row, __global int* distances, const int numCities)
{
int sumDistance = 0;
int i;
for(i =0; i<numCities-1; i++)
sumDistance += distances[ants[i+numCities*row]+numCities*ants[(i+1)+numCities*row]];
return sumDistance;
}
int indexOfCity(__global int* ants, int row, int city, int numCities)
{
int i;
for(i =0; i<numCities; i++)
{
if(ants[i+numCities*row] == city)
return i;
}
return -1;
}

Related

Freeing shows double free or corruption

typedef struct row_container
{
int size;
char *data;
}row_container;
typedef struct message_bar
{
char message[80];
time_t message_time;
}message_bar;
typedef struct brick_win_size
{
int row;
int col;
int current_row;
int current_column;
int data_row;
int row_off;
int col_off;
row_container *container;
char *filename;
message_bar *msg_bar;
}brick_window;
void container_delete_character(struct brick_win_size *win)
{
uint8_t insert_flag = 1;
int row = win->current_row;
int offset = win->current_column;
row_container *container = win->container;
char *data = container[row].data;
if(offset < container[row].size){
memmove(&data[offset], &data[offset + 1], container[row].size - 1);
data = realloc(data, container[row].size - 1);
container[row].data = data;
container[row].size--;
}
if(insert_flag){
if((container[row].size == 0) && (row < win->data_row)){
for(int index = row; index < win->data_row; index++){
if(index != win->data_row -1){
container[index] = container[index+1];
}else{
free(container[index]); //error line...............................
}
}
if(win->data_row != 0){
win->container = realloc(win->container, sizeof(row_container) * (win->data_row - 1));
win->data_row --;
}
}
}
}
Here in the above code i just want to delete one element at one point in the function. so i am just reassigning pointers with the other pointer and finally i am freeing the final element which is eventually ending up in build failure *expected ‘void ’ but argument is of type ‘row_container {aka struct row_container}’
Moreover if i give the & de reference operator it is ending up in double free or corruption error during execution

How to write Nested Loop in Kernel side OpenCL

I'm a beginner in OpenCL. I'm trying to implement an OpenCL application.I have a doubt that how to write opencl kernel code . i have given a original c code.
Question :- help me to change that given c code into opencl kernel code?.
ORIGINAL C CODE:
int i, j;
// initialization of indexes
for (i = 0; i<n; i++)
Index[i] = i;
// Bubble sort
for (i = 0; i<n - 1; i++)
{
for (j = i + 1; j<n; j++)
{
if (I[i] > I[j])
{
double z = I[i]; // exchange attractiveness
I[i] = I[j];
I[j] = z;
z = f[i]; // exchange fitness
f[i] = f[j];
f[j] = z;
int k = Index[i]; // exchange indexes
Index[i] = Index[j];
Index[j] = k;
}
}
}

Example for 4096 element arrays(alternate bubble1 and bubble2 at least 2048 times--->4096(N) kernel executions ):
index init on host side since its just assignment.
Auxiliary functions:
void swap2p(__private int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
void swap2g(__global int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
Alternating kernel-1:
__kernel void bubble1(__global int * I, __global int * f, __global int * Index){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+1<4096)
{
vals[0]=I[threadId*2];
vals[1]=I[threadId*2+1];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2,threadId*2+1);
swap2g(f,threadId*2,threadId*2+1);
swap2g(Index,threadId*2,threadId*2+1);
I[threadId*2]=vals[0];
I[threadId*2+1]=vals[1];
}
}
}
alternating kernel-2:
__kernel void bubble2(__global int * I){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+2<4096)
{
vals[0]=I[threadId*2+1];
vals[1]=I[threadId*2+2];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2+1,threadId*2+2);
swap2g(f,threadId*2+1,threadId*2+2);
swap2g(Index,threadId*2+1,threadId*2+2);
I[threadId*2+1]=vals[0];
I[threadId*2+2]=vals[1];
}
}
}
Global thread number: N/2 (2048)

Getting segmentation fault (or bad access) for some inputs and the program halts

#include <iostream>
#include <vector>
#include <string>
using namespace std;
void step_selection_sort(vector <int> &a, int size, int idx){
int i,j,min,temp;
i = idx;
min = i;
for (j=i+1;j<size;j++)
{
if (a[min]>a[j])
min=j;
}
if (min!=i)
{
temp = a[i];
a[i] = a[min];
a[min] = temp;
}
idx++;
}
void selection_sort(vector <int> &a, int size, int idx){
int i;
for(i=0;i<size;i++)
{
step_selection_sort(a,size,idx);
}
}
void step_desc_sort(vector <int>& a, int size, int idx){
int i,j,max,temp;
i = idx;
max = i;
for (j=i+1;j<size;j++)
{
if (a[max]<a[j])
max=j;
}
if (max!=i)
{
temp = a[i];
a[i] = a[max];
a[max] = temp;
}
idx++;
}
void desc_sort(vector <int>& a, int size, int idx){
int i;
for(i=0;i<size;i++)
{
step_desc_sort(a,size,idx);
}
}
void swap (int & a, int & b)
{
int t = a;
a = b;
b = t;
}
int findCeil (vector <int>& nums, int first, int begin, int end)
{
int ceilIndex = begin;
for (int i = begin+1; i <= end; i++)
if (nums[i] > first && nums[i] < nums[ceilIndex])
ceilIndex = i;
return ceilIndex;
}
int findBottom(vector <int>& nums,int first,int begin,int end)
{
int bottomIndex = begin;
for (int i = begin+1; i <= end; i++)
if (nums[i] < first && nums[i] > nums[bottomIndex])
bottomIndex = i;
return bottomIndex;
}
void sortedPermutations_ASC (vector <int> nums,int num)
{
bool isfinished=false;
if(isfinished==false)
for(int i=0;i<num;i++)
cout << nums[i]; //bad access when giving inputs bigger than 8
cout << endl;
int k;
for ( k = num - 2; k >= 0; --k )
if (nums[k] < nums[k+1])
break;
if ( k == -1 )
isfinished=true;
else
{
int ceilIndex = findCeil( nums, nums[k], k + 1, num - 1 );
swap( nums[k], nums[ceilIndex] );
selection_sort(nums,num,k+1);
sortedPermutations_ASC(nums,num);
}
}
void sortedPermutations_DESC (vector <int> nums,int num)
{
int i;
bool isfinished=false;
if(isfinished==false)
for(i=0;i<num;i++)
cout << nums[i];
cout << endl;
int k;
for ( k = num - 2; k >= 0; --k )
if (nums[k] > nums[k+1])
break;
if ( k == -1 )
isfinished=true;
else
{
int bottomIndex = findBottom( nums, nums[k], k + 1, num - 1 );
swap( nums[k], nums[bottomIndex] );
desc_sort(nums,num,k+1);
sortedPermutations_DESC(nums,num);
}
return;
}
int main(){
vector <int> nums;
string line,temp;
int num,j,k;
getline(cin,line);
while(j<line.size() && line[j]!=' ')
j++;
num=stoi(line.substr(0,j));
string kind;
j++;
kind=line.substr(j);
if(kind=="ASC"){
for(k=0;k<num;k++)
nums.push_back(k+1);
sortedPermutations_ASC(nums,num);
}
if(kind=="DESC"){
for(k=0;k<num;k++)
nums.push_back(num-k);
sortedPermutations_DESC(nums,num);
}
return 0;
}
here's is my code. it gives the permutations of a number.It works properly when inputs are between 1 and 8 .But it doesn't work with numbers bigger than 8 .
for example if I give
9 ASC (it means in Ascending order)
to the program , I get "Segmentation Fault:11" in terminal (mac) after printing some of the permutations .
I tried running it in Xcode . with the same input it says :
Thread 1:EXC_BAD_ACCESS(code=2,address=0x7ffff5f3fffc8)
for the line that I put comment in front of it .
I don't know what to do anymore ...
Any help would be appreciated - thanks in advance

rosserial arduino hello world won't verify

Edit: Solved
SOLUTION:
I'm running Arduino 1.0.5
I fixed the problem by changing /Sketchbook/library/ros_lib/ros/node_handle.h line 260 from
}else if (topic_ == TopicInfo::ID_TX_STOP){
to
}else if (topic_ == ID_TX_STOP){
However, this gave me a new error message:
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp: In member function 'size_t Print::print(const __FlashStringHelper*)':
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:44:9: error: 'prog_char' does not name a type
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:47:23: error: 'p' was not declared in this scope
So to fix this I edited usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp, line 44 from
const prog_char *p = (const prog_char *)ifsh;
to
const char PROGMEM *p = (const char PROGMEM *)ifsh;
Now it compiles!
Original Problem:
I've been using this tutorial to get everything set up: (http://wiki.ros.org/rosserial_arduino/Tutorials/Arduino%20IDE%20Setup) I can install everything without an issue I think, the ros_lib folder is placed in my sketchbook libraries. But when I do the next tutorial with the helloworld example, the code does not verify correctly. When I try to verify with the checkmark in the Arduino IDE, I get the following set of error codes:
In file included from /home/user/sketchbook/libraries/ros_lib/ros.h:38:0,
from HelloWorld.cpp:6:
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h: In member function 'virtual int ros::NodeHandle_::spinOnce()':
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h:260:45: error: expected unqualified-id before numeric constant
/home/user/sketchbook/libraries/ros_lib/ros/node_handle.h:260:45: error: expected ')' before numeric constant
I've reinstalled the ros_lib as well as rosserial and I keep getting this error, so I don't know what the problem is. I looked around line 260 of the node_handle.h file but I didn't notice anything out of place.
Here's node_handle.h: (spacing might be a little off)
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2011, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote prducts derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROS_NODE_HANDLE_H_
#define ROS_NODE_HANDLE_H_
#include "std_msgs/Time.h"
#include "rosserial_msgs/TopicInfo.h"
#include "rosserial_msgs/Log.h"
#include "rosserial_msgs/RequestParam.h"
#define SYNC_SECONDS 5
#define MODE_FIRST_FF 0
/*
* The second sync byte is a protocol version. It's value is 0xff for the first
* version of the rosserial protocol (used up to hydro), 0xfe for the second version
* (introduced in hydro), 0xfd for the next, and so on. Its purpose is to enable
* detection of mismatched protocol versions (e.g. hydro rosserial_python with groovy
* rosserial_arduino. It must be changed in both this file and in
* rosserial_python/src/rosserial_python/SerialClient.py
*/
#define MODE_PROTOCOL_VER 1
#define PROTOCOL_VER1 0xff // through groovy
#define PROTOCOL_VER2 0xfe // in hydro
#define PROTOCOL_VER PROTOCOL_VER2
#define MODE_SIZE_L 2
#define MODE_SIZE_H 3
#define MODE_SIZE_CHECKSUM 4 // checksum for msg size received from size L and H
#define MODE_TOPIC_L 5 // waiting for topic id
#define MODE_TOPIC_H 6
#define MODE_MESSAGE 7
#define MODE_MSG_CHECKSUM 8 // checksum for msg and topic id
#define MSG_TIMEOUT 20 //20 milliseconds to recieve all of message data
#define ID_TX_STOP 11 //hardcode for hydro version
#include "msg.h"
namespace ros {
class NodeHandleBase_{
public:
virtual int publish(int id, const Msg* msg)=0;
virtual int spinOnce()=0;
virtual bool connected()=0;
};
}
#include "publisher.h"
#include "subscriber.h"
#include "service_server.h"
#include "service_client.h"
namespace ros {
using rosserial_msgs::TopicInfo;
/* Node Handle */
template<class Hardware,
int MAX_SUBSCRIBERS=25,
int MAX_PUBLISHERS=25,
int INPUT_SIZE=512,
int OUTPUT_SIZE=512>
class NodeHandle_ : public NodeHandleBase_
{
protected:
Hardware hardware_;
/* time used for syncing */
unsigned long rt_time;
/* used for computing current time */
unsigned long sec_offset, nsec_offset;
unsigned char message_in[INPUT_SIZE];
unsigned char message_out[OUTPUT_SIZE];
Publisher * publishers[MAX_PUBLISHERS];
Subscriber_ * subscribers[MAX_SUBSCRIBERS];
/*
* Setup Functions
*/
public:
NodeHandle_() : configured_(false) {
for(unsigned int i=0; i< MAX_PUBLISHERS; i++)
publishers[i] = 0;
for(unsigned int i=0; i< MAX_SUBSCRIBERS; i++)
subscribers[i] = 0;
for(unsigned int i=0; i< INPUT_SIZE; i++)
message_in[i] = 0;
for(unsigned int i=0; i< OUTPUT_SIZE; i++)
message_out[i] = 0;
req_param_resp.ints_length = 0;
req_param_resp.ints = NULL;
req_param_resp.floats_length = 0;
req_param_resp.floats = NULL;
req_param_resp.ints_length = 0;
req_param_resp.ints = NULL;
}
Hardware* getHardware(){
return &hardware_;
}
/* Start serial, initialize buffers */
void initNode(){
hardware_.init();
mode_ = 0;
bytes_ = 0;
index_ = 0;
topic_ = 0;
};
/* Start a named port, which may be network server IP, initialize buffers */
void initNode(char *portName){
hardware_.init(portName);
mode_ = 0;
bytes_ = 0;
index_ = 0;
topic_ = 0;
};
protected:
//State machine variables for spinOnce
int mode_;
int bytes_;
int topic_;
int index_;
int checksum_;
bool configured_;
/* used for syncing the time */
unsigned long last_sync_time;
unsigned long last_sync_receive_time;
unsigned long last_msg_timeout_time;
public:
/* This function goes in your loop() function, it handles
* serial input and callbacks for subscribers.
*/
virtual int spinOnce(){
/* restart if timed out */
unsigned long c_time = hardware_.time();
if( (c_time - last_sync_receive_time) > (SYNC_SECONDS*2200) ){
configured_ = false;
}
/* reset if message has timed out */
if ( mode_ != MODE_FIRST_FF){
if (c_time > last_msg_timeout_time){
mode_ = MODE_FIRST_FF;
}
}
/* while available buffer, read data */
while( true )
{
int data = hardware_.read();
if( data < 0 )
break;
checksum_ += data;
if( mode_ == MODE_MESSAGE ){ /* message data being recieved */
message_in[index_++] = data;
bytes_--;
if(bytes_ == 0) /* is message complete? if so, checksum */
mode_ = MODE_MSG_CHECKSUM;
}else if( mode_ == MODE_FIRST_FF ){
if(data == 0xff){
mode_++;
last_msg_timeout_time = c_time + MSG_TIMEOUT;
}
}else if( mode_ == MODE_PROTOCOL_VER ){
if(data == PROTOCOL_VER){
mode_++;
}else{
mode_ = MODE_FIRST_FF;
if (configured_ == false)
requestSyncTime(); /* send a msg back showing our protocol version */
}
}else if( mode_ == MODE_SIZE_L ){ /* bottom half of message size */
bytes_ = data;
index_ = 0;
mode_++;
checksum_ = data; /* first byte for calculating size checksum */
}else if( mode_ == MODE_SIZE_H ){ /* top half of message size */
bytes_ += data<<8;
mode_++;
}else if( mode_ == MODE_SIZE_CHECKSUM ){
if( (checksum_%256) == 255)
mode_++;
else
mode_ = MODE_FIRST_FF; /* Abandon the frame if the msg len is wrong */
}else if( mode_ == MODE_TOPIC_L ){ /* bottom half of topic id */
topic_ = data;
mode_++;
checksum_ = data; /* first byte included in checksum */
}else if( mode_ == MODE_TOPIC_H ){ /* top half of topic id */
topic_ += data<<8;
mode_ = MODE_MESSAGE;
if(bytes_ == 0)
mode_ = MODE_MSG_CHECKSUM;
}else if( mode_ == MODE_MSG_CHECKSUM ){ /* do checksum */
mode_ = MODE_FIRST_FF;
if( (checksum_%256) == 255){
if(topic_ == TopicInfo::ID_PUBLISHER){
requestSyncTime();
negotiateTopics();
last_sync_time = c_time;
last_sync_receive_time = c_time;
return -1;
}else if(topic_ == TopicInfo::ID_TIME){
syncTime(message_in);
}else if (topic_ == TopicInfo::ID_PARAMETER_REQUEST){
req_param_resp.deserialize(message_in);
param_recieved= true;
}else if(topic_ == TopicInfo::ID_TX_STOP){
configured_ = false;
}else{
if(subscribers[topic_-100])
subscribers[topic_-100]->callback( message_in );
}
}
}
}
/* occasionally sync time */
if( configured_ && ((c_time-last_sync_time) > (SYNC_SECONDS*500) )){
requestSyncTime();
last_sync_time = c_time;
}
return 0;
}
/* Are we connected to the PC? */
virtual bool connected() {
return configured_;
};
/********************************************************************
* Time functions
*/
void requestSyncTime()
{
std_msgs::Time t;
publish(TopicInfo::ID_TIME, &t);
rt_time = hardware_.time();
}
void syncTime( unsigned char * data )
{
std_msgs::Time t;
unsigned long offset = hardware_.time() - rt_time;
t.deserialize(data);
t.data.sec += offset/1000;
t.data.nsec += (offset%1000)*1000000UL;
this->setNow(t.data);
last_sync_receive_time = hardware_.time();
}
Time now(){
unsigned long ms = hardware_.time();
Time current_time;
current_time.sec = ms/1000 + sec_offset;
current_time.nsec = (ms%1000)*1000000UL + nsec_offset;
normalizeSecNSec(current_time.sec, current_time.nsec);
return current_time;
}
void setNow( Time & new_now )
{
unsigned long ms = hardware_.time();
sec_offset = new_now.sec - ms/1000 - 1;
nsec_offset = new_now.nsec - (ms%1000)*1000000UL + 1000000000UL;
normalizeSecNSec(sec_offset, nsec_offset);
}
/********************************************************************
* Topic Management
*/
/* Register a new publisher */
bool advertise(Publisher & p)
{
for(int i = 0; i < MAX_PUBLISHERS; i++){
if(publishers[i] == 0){ // empty slot
publishers[i] = &p;
p.id_ = i+100+MAX_SUBSCRIBERS;
p.nh_ = this;
return true;
}
}
return false;
}
/* Register a new subscriber */
template<typename MsgT>
bool subscribe(Subscriber< MsgT> & s){
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &s;
s.id_ = i+100;
return true;
}
}
return false;
}
/* Register a new Service Server */
template<typename MReq, typename MRes>
bool advertiseService(ServiceServer<MReq,MRes>& srv){
bool v = advertise(srv.pub);
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &srv;
srv.id_ = i+100;
return v;
}
}
return false;
}
/* Register a new Service Client */
template<typename MReq, typename MRes>
bool serviceClient(ServiceClient<MReq, MRes>& srv){
bool v = advertise(srv.pub);
for(int i = 0; i < MAX_SUBSCRIBERS; i++){
if(subscribers[i] == 0){ // empty slot
subscribers[i] = (Subscriber_*) &srv;
srv.id_ = i+100;
return v;
}
}
return false;
}
void negotiateTopics()
{
rosserial_msgs::TopicInfo ti;
int i;
for(i = 0; i < MAX_PUBLISHERS; i++)
{
if(publishers[i] != 0) // non-empty slot
{
ti.topic_id = publishers[i]->id_;
ti.topic_name = (char *) publishers[i]->topic_;
ti.message_type = (char *) publishers[i]->msg_->getType();
ti.md5sum = (char *) publishers[i]->msg_->getMD5();
ti.buffer_size = OUTPUT_SIZE;
publish( publishers[i]->getEndpointType(), &ti );
}
}
for(i = 0; i < MAX_SUBSCRIBERS; i++)
{
if(subscribers[i] != 0) // non-empty slot
{
ti.topic_id = subscribers[i]->id_;
ti.topic_name = (char *) subscribers[i]->topic_;
ti.message_type = (char *) subscribers[i]->getMsgType();
ti.md5sum = (char *) subscribers[i]->getMsgMD5();
ti.buffer_size = INPUT_SIZE;
publish( subscribers[i]->getEndpointType(), &ti );
}
}
configured_ = true;
}
virtual int publish(int id, const Msg * msg)
{
if(id >= 100 && !configured_)
return 0;
/* serialize message */
unsigned int l = msg->serialize(message_out+7);
/* setup the header */
message_out[0] = 0xff;
message_out[1] = PROTOCOL_VER;
message_out[2] = (unsigned char) ((unsigned int)l&255);
message_out[3] = (unsigned char) ((unsigned int)l>>8);
message_out[4] = 255 - ((message_out[2] + message_out[3])%256);
message_out[5] = (unsigned char) ((int)id&255);
message_out[6] = (unsigned char) ((int)id>>8);
/* calculate checksum */
int chk = 0;
for(int i =5; i<l+7; i++)
chk += message_out[i];
l += 7;
message_out[l++] = 255 - (chk%256);
if( l <= OUTPUT_SIZE ){
hardware_.write(message_out, l);
return l;
}else{
logerror("Message from device dropped: message larger than buffer.");
return -1;
}
}
/********************************************************************
* Logging
*/
private:
void log(char byte, const char * msg){
rosserial_msgs::Log l;
l.level= byte;
l.msg = (char*)msg;
publish(rosserial_msgs::TopicInfo::ID_LOG, &l);
}
public:
void logdebug(const char* msg){
log(rosserial_msgs::Log::ROSDEBUG, msg);
}
void loginfo(const char * msg){
log(rosserial_msgs::Log::INFO, msg);
}
void logwarn(const char *msg){
log(rosserial_msgs::Log::WARN, msg);
}
void logerror(const char*msg){
log(rosserial_msgs::Log::ERROR, msg);
}
void logfatal(const char*msg){
log(rosserial_msgs::Log::FATAL, msg);
}
/********************************************************************
* Parameters
*/
private:
bool param_recieved;
rosserial_msgs::RequestParamResponse req_param_resp;
bool requestParam(const char * name, int time_out = 1000){
param_recieved = false;
rosserial_msgs::RequestParamRequest req;
req.name = (char*)name;
publish(TopicInfo::ID_PARAMETER_REQUEST, &req);
unsigned int end_time = hardware_.time() + time_out;
while(!param_recieved ){
spinOnce();
if (hardware_.time() > end_time) return false;
}
return true;
}
public:
bool getParam(const char* name, int* param, int length =1){
if (requestParam(name) ){
if (length == req_param_resp.ints_length){
//copy it over
for(int i=0; i<length; i++)
param[i] = req_param_resp.ints[i];
return true;
}
}
return false;
}
bool getParam(const char* name, float* param, int length=1){
if (requestParam(name) ){
if (length == req_param_resp.floats_length){
//copy it over
for(int i=0; i<length; i++)
param[i] = req_param_resp.floats[i];
return true;
}
}
return false;
}
bool getParam(const char* name, char** param, int length=1){
if (requestParam(name) ){
if (length == req_param_resp.strings_length){
//copy it over
for(int i=0; i<length; i++)
strcpy(param[i],req_param_resp.strings[i]);
return true;
}
}
return false;
}
};
}
#endif
I tried commenting out line 160 and 161:
//}else if (topic_ == TopicInfo::ID_TX_STOP){
//configured_ = false;
This gave me a different error message:
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp: In member function 'size_t Print::print(const __FlashStringHelper*)':
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:44:9: error: 'prog_char' does not name a type
/usr/share/arduino/hardware/arduino/cores/arduino/Print.cpp:47:23: error: 'p' was not declared in this scope
So here's Print.cpp
/*
Print.cpp - Base class that provides print() and println()
Copyright (c) 2008 David A. Mellis. All right reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Modified 23 November 2006 by David A. Mellis
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "Arduino.h"
#include "Print.h"
// Public Methods //////////////////////////////////////////////////////////////
/* default implementation: may be overridden */
size_t Print::write(const uint8_t *buffer, size_t size)
{
size_t n = 0;
while (size--) {
n += write(*buffer++);
}
return n;
}
size_t Print::print(const __FlashStringHelper *ifsh)
{
const prog_char *p = (const prog_char *)ifsh;
size_t n = 0;
while (1) {
unsigned char c = pgm_read_byte(p++);
if (c == 0) break;
n += write(c);
}
return n;
}
size_t Print::print(const String &s)
{
size_t n = 0;
for (uint16_t i = 0; i < s.length(); i++) {
n += write(s[i]);
}
return n;
}
size_t Print::print(const char str[])
{
return write(str);
}
size_t Print::print(char c)
{
return write(c);
}
size_t Print::print(unsigned char b, int base)
{
return print((unsigned long) b, base);
}
size_t Print::print(int n, int base)
{
return print((long) n, base);
}
size_t Print::print(unsigned int n, int base)
{
return print((unsigned long) n, base);
}
size_t Print::print(long n, int base)
{
if (base == 0) {
return write(n);
} else if (base == 10) {
if (n < 0) {
int t = print('-');
n = -n;
return printNumber(n, 10) + t;
}
return printNumber(n, 10);
} else {
return printNumber(n, base);
}
}
size_t Print::print(unsigned long n, int base)
{
if (base == 0) return write(n);
else return printNumber(n, base);
}
size_t Print::print(double n, int digits)
{
return printFloat(n, digits);
}
size_t Print::println(const __FlashStringHelper *ifsh)
{
size_t n = print(ifsh);
n += println();
return n;
}
size_t Print::print(const Printable& x)
{
return x.printTo(*this);
}
size_t Print::println(void)
{
size_t n = print('\r');
n += print('\n');
return n;
}
size_t Print::println(const String &s)
{
size_t n = print(s);
n += println();
return n;
}
size_t Print::println(const char c[])
{
size_t n = print(c);
n += println();
return n;
}
size_t Print::println(char c)
{
size_t n = print(c);
n += println();
return n;
}
size_t Print::println(unsigned char b, int base)
{
size_t n = print(b, base);
n += println();
return n;
}
size_t Print::println(int num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(unsigned int num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(long num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(unsigned long num, int base)
{
size_t n = print(num, base);
n += println();
return n;
}
size_t Print::println(double num, int digits)
{
size_t n = print(num, digits);
n += println();
return n;
}
size_t Print::println(const Printable& x)
{
size_t n = print(x);
n += println();
return n;
}
// Private Methods /////////////////////////////////////////////////////////////
size_t Print::printNumber(unsigned long n, uint8_t base) {
char buf[8 * sizeof(long) + 1]; // Assumes 8-bit chars plus zero byte.
char *str = &buf[sizeof(buf) - 1];
*str = '\0';
// prevent crash if called with base == 1
if (base < 2) base = 10;
do {
unsigned long m = n;
n /= base;
char c = m - base * n;
*--str = c < 10 ? c + '0' : c + 'A' - 10;
} while(n);
return write(str);
}
size_t Print::printFloat(double number, uint8_t digits)
{
size_t n = 0;
// Handle negative numbers
if (number < 0.0)
{
n += print('-');
number = -number;
}
// Round correctly so that print(1.999, 2) prints as "2.00"
double rounding = 0.5;
for (uint8_t i=0; i<digits; ++i)
rounding /= 10.0;
number += rounding;
// Extract the integer part of the number and print it
unsigned long int_part = (unsigned long)number;
double remainder = number - (double)int_part;
n += print(int_part);
// Print the decimal point, but only if there are digits beyond
if (digits > 0) {
n += print(".");
}
// Extract digits from the remainder one at a time
while (digits-- > 0)
{
remainder *= 10.0;
int toPrint = int(remainder);
n += print(toPrint);
remainder -= toPrint;
}
return n;
}

How accelerate MPI-calls?

I try to accelerate simple MPI-programm with OpenMP. I use MPICH2 and 4-core Intel processor. I have simple code:
int main(int argc, char** argv) {
int size, rank, provided;
const int root = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int cubeCount = StrToDouble(argv[1]);
int matrixSize = *StrToDouble(argv[2]);
WorkNode node(rank, size, cubeCount, matrixSize);
time_t t0 = time(0);
node.work();
time_t t1 = time(0);
time_t total = t1 - t0;
Class WorkNode also very simple, contains only array of Cube and method work.
class Cube {
public:
Cube(int matrixSize);
double *matrix;
int matrixSize;
}
Cube::Cube(int matrixSize) {
matrix = new double[matrixSize];
this->matrixSize = matrixSize;
}
Finally method work:
// double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Status status;
for (int i = 0; i < processorCount; i++) {
int nodeToSend = this->id + i;
int nodeRecv = this->id - i;
if (nodeToSend >= processorCount) {
nodeToSend -= processorCount;
}
if (nodeRecv < 0) {
nodeRecv += processorCount;
}
#pragma omp parallel for num_threads(2)
for (int i = 0; i < cubeCount; i++) {
Cube *cube = cubes[i];
if (nodeToSend != this->id) {
MPI_Bsend(cube->matrix, cube->matrixSize, MPI_DOUBLE, nodeToSend, _MY_MPI_ANY_TAG, MPI_COMM_WORLD);
}
if (nodeRecv != this->id) {
double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Recv(inBuffer, cube->matrixSize, MPI_DOUBLE, nodeRecv, _MY_MPI_ANY_TAG, MPI_COMM_WORLD, &status);
delete inBuffer;
}
}
}
//delete inBuffer
Unfortunately, openMP does not accelerate the program (even if the number of MPI processes = 2), and sometimes even slows down. Can I somehow accelerate MPI calls?

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Better performance for kernel on ant colony optimisation - opencl

Related

Freeing shows double free or corruption

How to write Nested Loop in Kernel side OpenCL

Getting segmentation fault (or bad access) for some inputs and the program halts

rosserial arduino hello world won't verify

How accelerate MPI-calls?

Categories

Resources