A simple MPI program - mpi

I appreciate it if somebody tell me why this simple MPI send and receive code doesn't run on two processors, when the value of n=40(at line 20), but works for n <=30. In other words, if the message size goes beyond an specific number (which is not that large, roughly a 1-D array of size 8100) the MPI deadlocks.
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include "iostream"
#include "math.h"
using namespace std;
int main(int argc, char *argv[])
{
int processor_count, processor_rank;
double *buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0;
MPI_Status status;
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);
N_pa_prim1=14; l=7; n=40; N_p0=7;
buff_H = new double [n*n*N_p0+1]; //Receive buffer allocation
buff_send_H = new double [n*n*N_p0+1]; //Send buffer allocation
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 1e-8*rand();
if (processor_rank == 0)
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);
else if(processor_rank == 1)
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
cout << "Received successfully by " << processor_rank << endl;
MPI_Finalize();
return 0;
}

The deadlocking is correct behaviour; you have a deadlock in your code.
The MPI Specification allows MPI_Send to behave as MPI_Ssend -- that is, to be blocking. A blocking communications primitive does not return until the communications "have completed" in some sense, which (in the case of a blocking send) probably means the receive has started.
Your code looks like:
If Processor 0:
Send to processor 1
If Processor 1:
Send to processor 0
Receive
That is -- the receive doesn't start until the sends have completed. You're sending, but they'll never return, because no one is receiving! (The fact that this works for small messages is an implementation artifact - most mpi implementations use so called a so-called "eager protocol" for "small enough" messages; but this can't be counted upon in general.)
Note that there are other logic errors here, too -- this program will also deadlock for more than 2 processors, as processors of rank >= 2 will be waiting for a message which never comes.
You can fix your program by alternating sends and receives by rank:
if (processor_rank == 0) {
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
} else if (processor_rank == 1) {
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
}
or by using MPI_Sendrecv (which is a blocking (send + receive), rather than a blocking send + a blocking receive):
int sendto;
if (processor_rank == 0)
sendto = 1;
else if (processor_rank == 1)
sendto = 0;
if (processor_rank == 0 || processor_rank == 1) {
MPI_Sendrecv(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, sendto, 163,
buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163,
MPI_COMM_WORLD, &status);
}
Or by using non-blocking sends and receives:
MPI_Request reqs[2];
MPI_Status statuses[2];
if (processor_rank == 0) {
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[0]);
} else if (processor_rank == 1) {
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[0]);
}
if (processor_rank == 0 || processor_rank == 1)
MPI_Irecv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall(2, reqs, statuses);

Thank you Jonathan for your help. Here I have chosen the third solution and written a similar code to yours except adding "for" loops to send a number of messages. This time it doesn't deadlock; however processors keep on receiving only the last message. (since the messages are long, I've only printed their last elements to check the consistency)
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
using namespace std;
int main(int argc, char *argv[])
{
int processor_count, processor_rank;
//Initialize MPI
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);
double **buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0, count, temp;
N_pa_prim1=5; l=7; n=50; N_p0=7;
MPI_Request reqs[N_pa_prim1];
MPI_Status statuses[N_pa_prim1];
buff_H = new double *[N_pa_prim1]; //Receive buffer allocation
for (int i = 0; i < N_pa_prim1; i++)
buff_H[i] = new double [n*n*N_p0+1];
buff_send_H = new double [n*n*N_p0+1]; //Send buffer allocation
if (processor_rank == 0) {
for (int i = 0; i < N_pa_prim1; i++){
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 2.0325e-8*rand();
cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[i]);
}
}
else if (processor_rank == 1) {
for (int i = 0; i < N_pa_prim1; i++){
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 3.5871e-8*rand();
cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[i]);
}
}
for (int i = 0; i < N_pa_prim1; i++)
MPI_Irecv(buff_H[i], n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[N_pa_prim1+i]);
MPI_Waitall(2*N_pa_prim1, reqs, statuses);
for (int i = 0; i < N_pa_prim1; i++)
cout << processor_rank << "\t" << buff_H[i][n*n*N_p0] << "\t" << "Receive" << endl;
MPI_Finalize();
return 0;
}

Related

MPI - One-sided communications

I'm trying to use one-sided communications in MPI.
The following example consists of an array of 4 doubles that is split between 2 processes.
The first process writes 0, 1, 2, 3 in the distributed array while the second one subsequently tries to read it. Unfortunately, it doesn't work. I must be doing something wrong somewhere.
Thanks!
#include <mpi.h>
#include <iostream>
int main(){
MPI_Init(0, nullptr);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int n=2;
double* data, x;
MPI_Win window;
MPI_Alloc_mem(n*sizeof(double), MPI_INFO_NULL, &data);
MPI_Win_create(data, n*sizeof(float), sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &window);
int i;
MPI_Win_fence(0, window);
if(rank==0){
for(i=0; i<n*size; ++i){
x=i;
MPI_Put(&x, 1, MPI_DOUBLE, i/n, i%n, 1, MPI_DOUBLE, window);
}
MPI_Win_fence(0, window);
}else{
MPI_Win_fence(0, window);
for(i=0; i<n*size; ++i){
MPI_Get(&x, 1, MPI_DOUBLE, i/n, i%n, 1, MPI_DOUBLE, window);
std::cout << i << " " << i/n << " " << i%n << " => " << x << "\n";
}
}
MPI_Win_free(&window);
MPI_Free_mem(data);
MPI_Finalize();
return 0;
}

Getting undesired behavior when sending-receiving messages using MPI

I'm exploring MPI in C++ and I wanted to parallelize the creation of a picture of the Mandelbrot set. I'm using the ppm format. Each processor builds its part and sends it back to the main process that receives it as MPI_CHAR. This is the code:
#include "mpi.h"
#include <iostream>
#include <string>
#include <fstream>
#include <complex>
using namespace std;
int mandelbrot(int x, int y, int width, int height, int max) {
complex<float> point((float) (y - height/2.0) * 4.0/width, (float) (x - width/2.0) * 4.0/width);
complex<float> z(0, 0);
unsigned int iteration = 0;
while (abs(z) < 4 && iteration < max) {
z = z * z + point;
iteration++;
}
return iteration;
}
int main(int argc, char **argv) {
int numprocs;
int myid;
int buff_size = 404270; // 200x200
char buff[buff_size];
int i;
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
int width = 200, height = 200, max_iter = 1000;
if (myid == 0) {
ofstream image("mandel.ppm");
image << "P3\n" << width << " " << height << " 255\n";
for(i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
MPI_Recv(buff, length, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
image << buff;
}
} else {
stringstream ss;
// proc rank: 1, 2, ..., n
int part = height/(numprocs-1), start = (myid - 1) * part, end = part * myid;
printf("%d -> %d\n", start, end);
for (int row = start; row < end; row++) {
for (int col = 0; col < width; col++) {
int iteration = mandelbrot(row, col, width, height, max_iter);
if (row == start) ss << 255 << ' ' << 255 << ' ' << 255 << "\n";
else if (iteration < max_iter) ss << iteration * 255 << ' ' << iteration * 20 << ' ' << iteration * 5 << "\n";
else ss << 0 << ' ' << 0 << ' ' << 0 << "\n";
}
}
printf("\n sizeof = %d\n", ss.str().length());
MPI_Send(ss.str().c_str(), ss.str().length(), MPI_CHAR, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Code compilation:
$ mpic++ -std=c++0x mpi.mandel.cpp -o mpi.mandel
Running with 3 processes (process main + process rank 1 and 2)
$ mpirun -np 3 ./mpi.mandel
Resulting ppm pictures when running with 3, 4, and 5 process:
It seems that the point-to-point communication of sending-receiving is mixing the results when more than 3 processes try to send the MPI_CHAR elements to the main process. How can avoid this behavior?
It works when creating the buffer buff with the same length as the receiving message:
.
.
for (int i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
printf("\nfrom %d <<-- %d (stat.source=%d) Receiving %d chars\n", myid, i, stat.MPI_SOURCE, length);
char buff[length + 1];
MPI_Recv(buff, length, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
buff[length] = '\0';
image << buff;
}
.
.
Thus, we don't need anymore the declaration at the beginning int buff_size = 404270; neither char buff[buff_size];

Removing MPI_Bcast()

So I have a some code where I am using MPI_Bcast to send information from the root node to all nodes, but instead I want to get my P0 to send chunks of the array to individual processes.
How do I do this with MPI_Send and MPI_Receive?
I've never used them before and I don't know if I need to loop my MPI_Receive to effectively send everything or what.
I've put giant caps lock comments in the code where I need to replace my MPI_Bcast(), sorry in advance for the waterfall of code.
Code:
#include "mpi.h"
#include <stdio.h>
#include <math.h>
#define MAXSIZE 10000000
int add(int *A, int low, int high)
{
int res = 0, i;
for(i=low; i<=high; i++)
res += A[i];
return(res);
}
int main(argc,argv)
int argc;
char *argv[];
{
int myid, numprocs, x;
int data[MAXSIZE];
int i, low, high, myres, res;
double elapsed_time;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
if (myid == 0)
{
for(i=0; i<MAXSIZE; i++)
data[i]=1;
}
/* star the timer */
elapsed_time = -MPI_Wtime();
//THIS IS WHERE I GET CONFUSED ABOUT MPI_SEND AND MPI_RECIEVE!!!
MPI_Bcast(data, MAXSIZE, MPI_INT, 0, MPI_COMM_WORLD);
x = MAXSIZE/numprocs;
low = myid * x;
high = low + x - 1;
if (myid == numprocs - 1)
high = MAXSIZE-1;
myres = add(data, low, high);
printf("I got %d from %d\n", myres, myid);
MPI_Reduce(&myres, &res, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
/* stop the timer*/
elapsed_time += MPI_Wtime();
if (myid == 0)
printf("The sum is %d, time taken = %f.\n", res,elapsed_time);
MPI_Barrier(MPI_COMM_WORLD);
printf("The sum is %d at process %d.\n", res,myid);
MPI_Finalize();
return 0;
}
You need MPI_Scatter. A good intro is here: http://mpitutorial.com/tutorials/mpi-scatter-gather-and-allgather/
I think in your code it could look like this:
elements_per_proc = MAXSIZE/numprocs;
// Create a buffer that will hold a chunk of the global array
int *data_chunk = malloc(sizeof(int) * elements_per_proc);
MPI_Scatter(data, elements_per_proc, MPI_INT, data_chunk,
elements_per_proc, MPI_INT, 0, MPI_COMM_WORLD);
If you really want use MPI_Send and MPI_Recv, then you can use something like this:
int x = MAXSIZE / numprocs;
int *procData = new int[x];
if (rank == 0) {
for (int i = 1; i < num; i++) {
MPI_Send(data + i*x, x, MPI_INT, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(procData, x, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}

OpenMPI Master Slave

I'm trying to divide the string str_data1 and send it to the slave processors in MPI_COMM_WORLD, but I am getting an error on the slaves.
The error looks something like this:
2
2
3
�E0� �E0�� �E0O�
#include <stdio.h>
#include <string.h>
#include "mpi.h"
int main(int argc, char* argv[]) {
int rank;
int p;
MPI_Status status;
int msg_size = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
char *str_data1 = "Samsung Galaxy Tab S 10.5 Wi-Fi, Tablet PC Android";
int len1 = strlen(str_data1), i;
char *str1[len1];
char *a[len1];
if (rank == 0) {
char *ds = strdup(str_data1);
int n = 0;
a[n] = strtok(ds, " ,");
while (a[n] && n < len1) {
a[++n] = strtok(NULL, " ,");
}
int chunk = n / p;
int str_size = chunk;
for (i = 1; i < p; i++) {
if (i == p - 1) {
str_size = n - chunk * i;
}
MPI_Send(&str_size, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&a, str_size + 1, MPI_CHAR, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(&msg_size, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
char messagsg_size];
printf(" \n %d ", msg_size);
MPI_Recv(&message, msg_size + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD,
&status);
printf(" \n %s ", message);
}
MPI_Finalize();
return 0;
}
does anyone have any clues what im doing wrong? Thanks.
Some newer compilers will give you nice warning about these sorts of things. It's been very nice ever since Clang added this. If you were to use that compiler, you'd see this:
$ mpic++ asdf.c
clang: warning: treating 'c' input as 'c++' when in C++ mode, this behavior is deprecated
asdf.c:17:23: warning: conversion from string literal to 'char *' is deprecated [-Wc++11-compat-deprecated-writable-strings]
char *str_data1 = "Samsung Galaxy Tab S 10.5 Wi-Fi, Tablet PC Android";
^
asdf.c:39:22: warning: argument type 'char *(*)[len1]' doesn't match specified 'MPI' type tag that requires 'char *' [-Wtype-safety]
MPI_Send(&a, str_size + 1, MPI_CHAR, i, 0, MPI_COMM_WORLD);
^~ ~~~~~~~~
asdf.c:47:18: warning: argument type 'char (*)[msg_size]' doesn't match specified 'MPI' type tag that requires 'char *' [-Wtype-safety]
MPI_Recv(&message, msg_size + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD,
^~~~~~~~ ~~~~~~~~
3 warnings generated.
That shows you that you're using the wrong types for your character arrays. You should be using either just character arrays:
char a[len1];
or character pointers:
char *a;
Either way, you need to do some code rework to make that work correctly. Specifically, this section:
char *ds = strdup(str_data1);
int n = 0;
a[n] = strtok(ds, " ,");
while (a[n] && n < len1) {
a[++n] = strtok(NULL, " ,");
}
I don't think that string tokenizer is doing what you think it is since it would just be overwriting itself over and over.
Thanks Wesly.. i tired with structure.. looks like its working..
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include<mpi.h>
struct tokan {
char buff[30];
} t[50];
int main(int argc, char* argv[]) {
int rank;
int p;
MPI_Status status;
int msg_size = 0, i = 0, j = 0, msg_size1 = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
int n = 0, m = 0, k = 0, N = 0, M = 0;
char *str1, *str2, *str_data2, *str_data1;
if (rank == 0) {
str_data1 =
"processes in this communicator will now abort, and potentially your MPI job";
str_data2 =
"The behavior is undefined if lhs or rhs are not pointers to null-terminated byte strings";
str1 = (char *) malloc(strlen(str_data1) * sizeof(char));
str2 = (char *) malloc(strlen(str_data2) * sizeof(char));
strcpy(str1, str_data1);
strcpy(str2, str_data2);
int len;
M = strlen(str2);
N = strlen(str1);
char *ptr;
k = 0;
char *ds = strdup(str_data1);
ptr = strtok(ds, " ,");
while (ptr != NULL) {
++n;
len = strlen(ptr);
for (j = 0; j < len; j++) {
t[k].buff[j] = *(ptr + j);
}
//printf(" %s ", t[k].buff);
k++;
ptr = strtok(NULL, " ,");
}
int chunk = n / p;
int str_size = chunk, cnt = 0;
j = chunk;
for (i = 1; i < p; i++) {
if (i == p - 1) {
str_size = n - chunk * i;
}
MPI_Send(&str_size, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
for (cnt = 0; cnt < str_size; cnt++) {
/*printf("process 0: %c %d %d %d %d %d %d\n", t[j].buff, str_size, n,
chunk, cnt, j, l);*/
MPI_Send(t[j].buff, 100, MPI_CHAR, i, 0, MPI_COMM_WORLD);
j++;
}
}
str_size = chunk;
for (i = 1; i < p; i++) {
MPI_Send(&M, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
//printf("process 0: %s %d %d %d %d\n", str2, n, chunk, cnt, j);
MPI_Send(str2, M, MPI_CHAR, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(&msg_size, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
int k;
for (k = 0; k < msg_size; k++) {
MPI_Recv(t[k].buff, 100, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status);
printf(" \nstr1: %s %d %d %d\n", t[k].buff, rank, msg_size, k);
}
printf("***************");
MPI_Recv(&M, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
str2 = (char *) malloc((M + 1) * sizeof(char));
MPI_Recv(str2, M, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &status);
str2[M] = '\0';
printf(" \nstr2: %s %d %d %d \n", str2, rank, k, M);
}
if (rank == 0)
free(str1);
free(str2);
MPI_Finalize();
return 0;
}

MPI Receive from many proceses

Here is my code:
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Send(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Send(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Send(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
}
else {
for (i = 1; i < numtasks; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
for (j = 0; j < received_pixels; j++) {
rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
}
free(results_recv);
}
If I run this with 2 proceses it is ok because one will send and the other one will receive.
If I run this with 4 proceses I receive the following error messages:
Fatal error in MPI_Recv: Other MPI error, error stack:
MPI_Recv(186)...........................: MPI_Recv(buf=0xbff05324, count=1, MPI_INT, src=1, tag=1, MPI_COMM_WORLD, status=0xbff053ec) failed
MPIDI_CH3I_Progress(461)................:
MPID_nem_handle_pkt(636)................:
MPIDI_CH3_PktHandler_EagerShortSend(308): Failed to allocate memory for an unexpected message. 261895 unexpected messages queued.
What should I do to fix this?
These lines:
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
and the corresponding MPI_Recvs look like they're essentially reimplementing MPI_Gather. Using the MPI_Gather call with size set to pixeli instead of 1 may allow the implementation to schedule the sends and receives more efficiently, but more importantly, it will probably drastically cut down on the total number of send/receive pairs needed to complete the whole batch of communication. You could do similar by removing the for loop and doing:
rc = MPI_Send(&results[i], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
but again, using the builtin MPI_Gather would be the preferred way of doing it.
The shortest answer is to tell you to use synchronious communications, that is MPI_Ssend() instead of MPI_Send().
The trouble is that you send to many messages which are buffered (i guess...but i though MPI_Send() was blocking...). The memory consumption goes up until failure...Synchronious messages avoid buffering but it does not reduce the number of messages and it may be slower.
You can reduce the number of messages and increase performances by sending many pixels at once : second argument of MPI_Send() or MPI_Recv()...
Sending a buffer of 3 int [pixeli,first_line,first_col] would also limit communications.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int main(int argc,char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int pixeli=1000000;
int received_pixels;
int first_line,first_col,start_line_recv,start_col_recv;
int tag=0;
int results[pixeli];
int i,j;
for(i=0;i<pixeli;i++){
results[i]=rank*pixeli+i;
}
int* results_recv;
int rc;
MPI_Status Stat;
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Ssend(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Ssend(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Ssend(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&results[0], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
//for (i = 0; i < pixeli; i++) {
// rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
//}
}
else {
for (i = 1; i < size; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
MPI_Recv(&results_recv[0], received_pixels, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//for (j = 0; j < received_pixels; j++) {
// rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//printf("proc %d %d\n",rank,results_recv[j]);
//}
free(results_recv);
}
}
MPI_Finalize();
return 0;
}
Bye,
Francis

Resources