I have written the following code as test
I am receiving from each processor an array and I am placing them in ad 2D array each row is for an array from a different processor
#include <iostream>
#include <mpi.h>
using namespace std;
int main(int argc, char* argv[])
{
int *sendBuff;
int **table;
int size, rank;
MPI_Status stat;
int pass = 1;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
sendBuff = new int[10];
printf("task %d passed %d\n", rank, pass); //1
pass++;
if (rank == 0)
{
table = new int*[size];
}
for (int i = 0; i < 10; i++)
{
sendBuff[i] = rank;
}
printf("task %d passed %d\n", rank, pass); //2
pass++;
if (rank != 0)
{
MPI_Send(&sendBuff, 10, MPI_INT, 0, rank, MPI_COMM_WORLD);
}
printf("task %d passed %d\n", rank, pass); //3
pass++;
if (rank == 0)
{
table[0] = sendBuff;
for (int i = 1; i < size; i++)
{
MPI_Recv(&table[i], 10, MPI_INT, i, i, MPI_COMM_WORLD, &stat);
}
}
printf("task %d passed %d\n", rank, pass); //4
pass++;
delete[] sendBuff;
if (rank == 0)
{
for (int i = 0; i < size; i++)
{
delete[] table[i];
}
delete[] table;
}
MPI_Finalize();
return 0;
}
but It is not runing
I run using
mpirun -np 4 a.out
and I get the following:
[arch:03429] *** Process received signal ***
[arch:03429] Signal: Aborted (6)
[arch:03429] Signal code: (-6)
[arch:03429] [ 0] /usr/lib/libpthread.so.0(+0xf870) [0x7fd2675bd870]
[arch:03429] [ 1] /usr/lib/libc.so.6(gsignal+0x39) [0x7fd2672383d9]
[arch:03429] [ 2] /usr/lib/libc.so.6(abort+0x148) [0x7fd2672397d8]
[arch:03429] [ 3] /usr/lib/libc.so.6(+0x72e64) [0x7fd267275e64]
[arch:03429] [ 4] /usr/lib/libc.so.6(+0x7862e) [0x7fd26727b62e]
[arch:03429] [ 5] /usr/lib/libc.so.6(+0x79307) [0x7fd26727c307]
[arch:03429] [ 6] a.out() [0x408704]
[arch:03429] [ 7] /usr/lib/libc.so.6(__libc_start_main+0xf5) [0x7fd267224bc5]
[arch:03429] [ 8] a.out() [0x408429]
[arch:03429] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 3429 on node arch exited on signal 6 (Aborted).
--------------------------------------------------------------------------
Any help?
As Hristo Iliev pointed out, the array sendBuf should be the argument of MPI_Send. It works the same way for table[i].
Another fact : MPI_Send and MPI_Recv do not allocate memory. These functions just copy a message from one place to another. Both sendBuff and table[i] should be allocated previously. And writting table[0]=sendBuff would therefore trigger a memory leak.
Here is a code that may help you :
#include <iostream>
#include <mpi.h>
using namespace std;
int main(int argc, char* argv[])
{
int *sendBuff;
int **table;
int size, rank;
MPI_Status stat;
int pass = 1;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
sendBuff = new int[10];
printf("firts task %d passed %d\n", rank, pass); //1
pass++;
if (rank == 0)
{
table = new int*[size];
}
for (int i = 0; i < 10; i++)
{
sendBuff[i] = rank;
}
printf("second task %d passed %d\n", rank, pass); //2
pass++;
if (rank != 0)
{
MPI_Send(sendBuff, 10, MPI_INT, 0, rank, MPI_COMM_WORLD);
}
printf("thrid task %d passed %d\n", rank, pass); //3
pass++;
if (rank == 0)
{
table[0]=new int[10];
for(int i=0;i<10;i++){
table[0][i]=sendBuff[i];
}
// table[0] = sendBuff;
for (int i = 1; i < size; i++)
{
table[i]=new int[10];
MPI_Recv(table[i], 10, MPI_INT, i, i, MPI_COMM_WORLD, &stat);
}
}
printf("fourth task %d passed %d\n", rank, pass); //4
pass++;
if (rank == 0)
{
for (int i = 0; i < size; i++)
{
delete [] table[i];
table[i]=NULL;
}
delete [] table;
}
delete [] sendBuff;
MPI_Finalize();
return 0;
}
A function that may help you : MPI_Gather(...). It seems to be what you are looking for ! Watch for memory allocation if you want to use it : all the values of table should be allocated as one contiguous chunk of memory.
http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Gather.html
Bye,
Francis
Related
I want to use MPI_Reduce function find largest value and its PID(rank) at the same time, but the result shows it is not true, I don't know how to fix it, result:
PID:1, loc_num:2
PID:2, loc_num:3
PID:3, loc_num:4
global data: 1
coresponding PID: 0
my program:
#include <stdio.h>
#include <string.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
//init MPI
int PID, P;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &P);
MPI_Comm_rank(MPI_COMM_WORLD, &PID);
struct{
int value;
int PID;
} in, out;
int value = 1;
in.value = value;
in.PID = PID;
for(int i = 1; i <= P; i++){
if (PID == i){
value = value + i;
printf("PID:%d, loc_num:%d \n",PID, value);
}
}
MPI_Reduce(&in, &out, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
int max_PID = out.PID;
int max_num = out.value;
if (PID == 0){
printf("global data: %d \n", max_num);
printf("coresponding PID: %d \n",max_PID);
}
MPI_Finalize();
return 0;
}
I just follow the structure of in.value= value and in.PID=PID and then,every PID calculate value=value+PID so the answer is when PID=1, loc=2;when PID=2, loc=3 ...next compare all of them by max, and sent them to PID=0
There is no error in the MPI_Reduce of your example. As #Gilles pointed out, the issue is you are not assigning the newly calculated value to in.value.
If you put the assignment statement after calculation as below, then everything work as expected.
for(int i = 1; i <= P; i++){
if (PID == i){
value = value + i;
printf("PID:%d, loc_num:%d \n",PID, value);
}
}
in.value = value;
in.PID = PID;
MPI_Reduce(&in, &out, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
In your example below, you are not assigning the calculated values to the in struct object.
in.value = value; // value is set as 1
in.PID = PID;
for(int i = 1; i <= P; i++){
if (PID == i){
value = value + i; // calculating the value but not assigning to in.value
printf("PID:%d, loc_num:%d \n",PID, value);
}
}
// uses the old value for in.value (i.e 1) for reduction
MPI_Reduce(&in, &out, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
How could I read external input file for mpi? I need to read one integer from external file (zadanie4_vstup.txt), to compute simple factorial. I have tried to substitute the second argument in MPI_Init() with address of int variable (n), but it looks it is nonsense.
Thank you.
#include <stdio.h>
#include <mpi.h>
int main(int argc, char ** argv)
{
FILE *fr, *fw;
fr = fopen("zadanie4_vstup.txt", "r");
fw = fopen("zadanie4_vystup.txt", "w");
int nproc, me;
int fakt=1, i, buff, n;
MPI_Status stat;
fscanf(fr, "%d", &n);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
if(nproc > 1) {
if(me == 0) {
for(i=1; i<nproc; i++) {
MPI_Recv(&buff, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
fakt*=buff;
}
} else {
MPI_Send(&fakt, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
if(me == 0) {
fprintf(fw, "%d! = %d\n", n, fakt);
}
fclose(fr);
fclose(fw);
MPI_Finalize();
}
here is a version of your program that reads n on the command line.
note i simplified the communications by using MPI_Reduce()
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[]) {
int nproc, me;
int fakt=1, res, i, buff, n;
MPI_Status stat;
MPI_Init(&argc, &argv);
n = atoi(argv[1]);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
MPI_Reduce(&fakt, &res, 1, MPI_INT, MPI_PROD, 0, MPI_COMM_WORLD);
if(me == 0) {
printf("%d! = %d\n", n, res);
}
MPI_Finalize();
return 0;
}
for example
$ mpirun -np 4 ./fakt 6
6! = 720
I am running MPI applications on a cluster with Sun Grid Engine, using OpenMPI.
Has anyone ever experience MPI communications which hang while the applications are running?
For example: Process on rank 0 calls MPI_Send to process on rank 1, and process on rank 1 calls MPI_Recv from process on rank 0. The ranks are correct, the tags are correct, however the communication just does not happen therefore the application never terminates.
N.B. The applications work on my laptop and other machines so it is not a case that I have some IDs wrong... Also, these applications work on the cluster the first time, but when I submit the exact same job again the MPI communication hangs as explained above.
If anyone has experienced something similar, any help would be appreciated, thanks.
EDIT:
I have implemented a very simple example of one of the applications:
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <pthread.h>
#include <sys/time.h>
#define NUM_CELLS 5 //Total number of integers to sort
/* Initial process to create numbers and send to first cell */
void *start(void *data)
{
int i, num;
time_t t;
srand((unsigned) time(&t));
//Create array of random numbers and print
for(i = 0; i < NUM_CELLS; i++)
{
num = rand() % 100;
printf("0 SEND\n");
MPI_Send(&num, 1, MPI_INT, 1, 1, MPI_COMM_WORLD);
printf("0 SENT\n");
}
return NULL;
}
/* Process for individual sort cell in sort pump */
void *sort_cell(void *data)
{
int *pos = (int *)data;
int num=2, i;
for(i = 0; i < NUM_CELLS; i++)
{
//Receive num
printf("%d WAIT\n", *pos);
if(*pos == 1)
MPI_Recv(&num, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 2)
MPI_Recv(&num, 1, MPI_INT, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 3)
MPI_Recv(&num, 1, MPI_INT, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 4)
MPI_Recv(&num, 1, MPI_INT, 3, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 5)
MPI_Recv(&num, 1, MPI_INT, 4, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%d RECV\n", *pos);
//Keep larger number and send smaller number to next cell
printf("%d SEND\n", *pos);
if(*pos == 1)
MPI_Send(&num, 1, MPI_INT, 2, 2, MPI_COMM_WORLD);
else if(*pos == 2)
MPI_Send(&num, 1, MPI_INT, 3, 3, MPI_COMM_WORLD);
else if(*pos == 3)
MPI_Send(&num, 1, MPI_INT, 4, 4, MPI_COMM_WORLD);
else if(*pos == 4)
MPI_Send(&num, 1, MPI_INT, 1, 5, MPI_COMM_WORLD);
printf("%d SENT\n", *pos);
}
return NULL;
}
int main(int argc, char **argv)
{
int i;
double elapsedTime;
struct timeval t1, t2;
//Start timer
gettimeofday(&t1, NULL);
int my_rank, provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//Stop timer
gettimeofday(&t2, NULL);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; //sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; //us to ms
printf("Rank %d - Setup time: %f milliseconds\n", my_rank, elapsedTime);
//Start timer
gettimeofday(&t1, NULL);
//Execute processes in parallel according to mapping
if(my_rank == 0)
{
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, start, NULL);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 1)
{
int pos = 1;
int num_threads = 2;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
int pos1 = 5;
pthread_create(&threads[1], NULL, sort_cell, (void *) &pos1);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 2)
{
int pos = 2;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 3)
{
int pos = 3;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 4)
{
int pos = 4;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
MPI_Barrier(MPI_COMM_WORLD);
//Stop timer
gettimeofday(&t2, NULL);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; //sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; //us to ms
printf("Rank %d - Execution time: %f milliseconds\n", my_rank, elapsedTime);
MPI_Finalize();
return 0;
}
Depending on the order of the sort_cells on the ranks, it works and I cannot understand why..
The application is as follows:
Sort_cell -> Sort_cell -> Sort_cell -> Sort_cell -> Sort_cell -> Sort_cell
ranks: 0 1 2 3 4 1
(where each Sort_cell is a process run on the specified ranks respectively)
If these are run on ranks which are grouped, ie: 00111, 01233, 00112, etc it works.. but once i execute on a stray rank (like the above example) ie: 00110, 01221, 01231, etc the MPI communication hangs. Any suggestions?
So I have a some code where I am using MPI_Bcast to send information from the root node to all nodes, but instead I want to get my P0 to send chunks of the array to individual processes.
How do I do this with MPI_Send and MPI_Receive?
I've never used them before and I don't know if I need to loop my MPI_Receive to effectively send everything or what.
I've put giant caps lock comments in the code where I need to replace my MPI_Bcast(), sorry in advance for the waterfall of code.
Code:
#include "mpi.h"
#include <stdio.h>
#include <math.h>
#define MAXSIZE 10000000
int add(int *A, int low, int high)
{
int res = 0, i;
for(i=low; i<=high; i++)
res += A[i];
return(res);
}
int main(argc,argv)
int argc;
char *argv[];
{
int myid, numprocs, x;
int data[MAXSIZE];
int i, low, high, myres, res;
double elapsed_time;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
if (myid == 0)
{
for(i=0; i<MAXSIZE; i++)
data[i]=1;
}
/* star the timer */
elapsed_time = -MPI_Wtime();
//THIS IS WHERE I GET CONFUSED ABOUT MPI_SEND AND MPI_RECIEVE!!!
MPI_Bcast(data, MAXSIZE, MPI_INT, 0, MPI_COMM_WORLD);
x = MAXSIZE/numprocs;
low = myid * x;
high = low + x - 1;
if (myid == numprocs - 1)
high = MAXSIZE-1;
myres = add(data, low, high);
printf("I got %d from %d\n", myres, myid);
MPI_Reduce(&myres, &res, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
/* stop the timer*/
elapsed_time += MPI_Wtime();
if (myid == 0)
printf("The sum is %d, time taken = %f.\n", res,elapsed_time);
MPI_Barrier(MPI_COMM_WORLD);
printf("The sum is %d at process %d.\n", res,myid);
MPI_Finalize();
return 0;
}
You need MPI_Scatter. A good intro is here: http://mpitutorial.com/tutorials/mpi-scatter-gather-and-allgather/
I think in your code it could look like this:
elements_per_proc = MAXSIZE/numprocs;
// Create a buffer that will hold a chunk of the global array
int *data_chunk = malloc(sizeof(int) * elements_per_proc);
MPI_Scatter(data, elements_per_proc, MPI_INT, data_chunk,
elements_per_proc, MPI_INT, 0, MPI_COMM_WORLD);
If you really want use MPI_Send and MPI_Recv, then you can use something like this:
int x = MAXSIZE / numprocs;
int *procData = new int[x];
if (rank == 0) {
for (int i = 1; i < num; i++) {
MPI_Send(data + i*x, x, MPI_INT, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(procData, x, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
I try to accelerate simple MPI-programm with OpenMP. I use MPICH2 and 4-core Intel processor. I have simple code:
int main(int argc, char** argv) {
int size, rank, provided;
const int root = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int cubeCount = StrToDouble(argv[1]);
int matrixSize = *StrToDouble(argv[2]);
WorkNode node(rank, size, cubeCount, matrixSize);
time_t t0 = time(0);
node.work();
time_t t1 = time(0);
time_t total = t1 - t0;
Class WorkNode also very simple, contains only array of Cube and method work.
class Cube {
public:
Cube(int matrixSize);
double *matrix;
int matrixSize;
}
Cube::Cube(int matrixSize) {
matrix = new double[matrixSize];
this->matrixSize = matrixSize;
}
Finally method work:
// double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Status status;
for (int i = 0; i < processorCount; i++) {
int nodeToSend = this->id + i;
int nodeRecv = this->id - i;
if (nodeToSend >= processorCount) {
nodeToSend -= processorCount;
}
if (nodeRecv < 0) {
nodeRecv += processorCount;
}
#pragma omp parallel for num_threads(2)
for (int i = 0; i < cubeCount; i++) {
Cube *cube = cubes[i];
if (nodeToSend != this->id) {
MPI_Bsend(cube->matrix, cube->matrixSize, MPI_DOUBLE, nodeToSend, _MY_MPI_ANY_TAG, MPI_COMM_WORLD);
}
if (nodeRecv != this->id) {
double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Recv(inBuffer, cube->matrixSize, MPI_DOUBLE, nodeRecv, _MY_MPI_ANY_TAG, MPI_COMM_WORLD, &status);
delete inBuffer;
}
}
}
//delete inBuffer
Unfortunately, openMP does not accelerate the program (even if the number of MPI processes = 2), and sometimes even slows down. Can I somehow accelerate MPI calls?