Related
I have a number of processors, let's say, 9, that are arange like a ring together. So, the processors communicating with each other in a ring and in a non-blocking setting MPI_Isend() and MPI_Irecv(). And the task is to recieve the rank of previous proccessor and add that to its own rank, and then pass it to its neighor. This continues until reaching to the processor '0' again. Then processor '0' prints the sum which is n(n+1)/2 ( in this case 45). I know that these non-blicking function return immediately even if the communication is not finished, and MPI_Wait() is needed to ensure the completion of the communication. And I know that it's better to have a buffer of size 2 to store the rank and sum. But I don,t know how and when to update the message before sending it to the next rank?
I don't want to use if statemet. Lik if(rank==0) then send to 1 and add then if(rank==1) receive from 0 and then add 1 and then send to 2,... Since this one is highly inefficient for larg number of processor.
int main (int argc, char *argv[])
{
int size, rank, next, prev;
int buf[2],
MPI_Request reqs[9];
MPI_Status stats[9];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
prev = rank-1;
next = rank+1;
if (rank == 0) prev = size - 1;
if (rank == (size - 1)) next = 0;
//MPI_Irecv (&buf,count,datatype,source,tag,comm,&request)
ierror = MPI_Irecv(&buf[0], 1, MPI_INT, prev, tag1, MPI_COMM_WORLD, &reqs[0]);
ierror = MPI_Irecv(&buf[1], 1, MPI_INT, next, tag2, MPI_COMM_WORLD, &reqs[1]);
//MPI_Isend (&buf,count,datatype,dest,tag,comm,&request)
ierror = MPI_Isend(&buf[0], 1, MPI_INT, prev, tag2, MPI_COMM_WORLD, &reqs[2]);
ierror = MPI_Isend(&buf[1], 1, MPI_INT, next, tag1, MPI_COMM_WORLD, &reqs[3]);
ierror = MPI_Waitall(9, reqs, stats);
In this code I am trying to broadcast using non blocking send and receive as a practice. I have multiple questions and issues.
1.Should I pair Isend() and Irecv() to use the same request?
2.When the message is an array, how should it be passed? in this case, message or &message?
3.Why I cannot run this code on less or more than 8 processors? if the rank doesn't exit, shouldn't it just go on without executing that piece of code?
4.The snippet on the at the bottom is there in order to print the total time once, but the waitall() does not work, and I do not understand why.
5. When passing arrays longer than 2^12, I get segmentation error, while I have checked the limits of Isend() and Irecv() and they supposed to handle even bigger length messages.
6.I used long double for record the time, is this a common or good practice? when I used smaller variables like float or double I would get nan.
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, rank, size, ready;
long int N = pow(2, 10);
float* message = (float *)malloc(sizeof(float *) * N + 1);
long double start, end;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//MPI_Request* request = (MPI_Request *)malloc(sizeof(MPI_Request *) * size);
MPI_Request request[size-1];
/*Stage I: -np 8*/
if(rank == 0){
for(i = 0; i < N; i++){
message[i] = N*rand();
message[i] /= rand();
}
start = MPI_Wtime();
MPI_Isend(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[0]);
MPI_Isend(&message, N, MPI_FLOAT, 2, 0, MPI_COMM_WORLD, &request[1]);
MPI_Isend(&message, N, MPI_FLOAT, 4, 0, MPI_COMM_WORLD, &request[3]);
printf("Processor root-rank %d- sent the message...\n", rank);
}
if (rank == 1){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[0]);
MPI_Wait(&request[0], MPI_STATUS_IGNORE);
printf("Processor rank 1 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 3, 0, MPI_COMM_WORLD, &request[2]);
MPI_Isend(&message, N, MPI_FLOAT, 5, 0, MPI_COMM_WORLD, &request[4]);
}
if(rank == 2){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[1]);
MPI_Wait(&request[1], MPI_STATUS_IGNORE);
printf("Processor rank 2 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 6, 0, MPI_COMM_WORLD, &request[5]);
}
if(rank == 3){
MPI_Irecv(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[2]);
MPI_Wait(&request[2], MPI_STATUS_IGNORE);
printf("Processor rank 3 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 7, 0, MPI_COMM_WORLD, &request[6]);
}
if(rank == 4){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[3]);
MPI_Wait(&request[3], MPI_STATUS_IGNORE);
printf("Processor rank 4 received the message.\n");
}
if(rank == 5){
MPI_Irecv(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[4]);
MPI_Wait(&request[4], MPI_STATUS_IGNORE);
printf("Processor rank 5 received the message.\n");
}
if(rank == 6){
MPI_Irecv(&message, N, MPI_FLOAT, 2, 0, MPI_COMM_WORLD, &request[5]);
MPI_Wait(&request[5], MPI_STATUS_IGNORE);
printf("Processor rank 6 received the message.\n");
}
if(rank == 7){
MPI_Irecv(&message, N, MPI_FLOAT, 3, 0, MPI_COMM_WORLD, &request[6]);
MPI_Wait(&request[6], MPI_STATUS_IGNORE);
printf("Processor rank 7 received the message.\n");
}
/*MPI_Testall(size-1,request,&ready, MPI_STATUS_IGNORE);*/
/* if (ready){*/
end = MPI_Wtime();
printf("Total Time: %Lf\n", end - start);
/*}*/
MPI_Finalize();
}
Each MPI task runs in its own address space, so there is no correlation between request[1] on rank 0 and request[1] on rank 2. That means you do not have to "pair" the requests. That being said, if you think "pairing" the requests improves the readability of your code, you might want to do so even if this is not required.
the buffer parameter of MPI_Isend() and MPI_Irecv() is a pointer to the start of the data, this is message (and not &message) here.
if you run with let's say 2 MPI tasks, MPI_Send(..., dest=2, ...) on rank 0 will fail because there 2 is an invalid rank in the MPI_COMM_WORLD communicator.
many requests are uninitialized when MPI_Waitall() (well, MPI_Testall() here) is invoked. One option is to first initialize all of them to MPI_REQUEST_NULL.
using &message results in memory corruption and that likely explains the crash.
From the MPI standard, the prototype is double MPI_Wtime(), so you'd rather use double here (the NaN likely come from the memory corruption described above)
This is a MPI code for LU Decomposition.
I have used the following strategy -
There is a master(rank 0) and others are slaves. The master sends rows to each slave.
Since each slave might receive more than row, I store all the received rows in a
buffer and then perform LU Decomposition on it. After doing that I send back the
buffer to the master. The master does not do any computation. It just sends and receives.
for(i=0; i<n; i++)
map[i] = i%(numProcs-1) + 1;
for(i=0; i<n-1; i++)
{
if(rank == 0)
{
status = pivot(LU,i,n);
for(j=0; j<n; j++)
row1[j] = LU[n*i+j];
}
MPI_Bcast(&status, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(status == -1)
return -1;
MPI_Bcast(row1, n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
int tag1 = 1, tag2 = 2, tag3 = 3, tag4 = 4;
if(rank == 0)
{
int pno, start, index, l, rowsReceived = 0;
MPI_Request req;
MPI_Status stat;
for(j=i+1; j<n; j++)
MPI_Isend(&LU[n*j], n, MPI_DOUBLE, map[j], map[j], MPI_COMM_WORLD, &req);
if(i>=n-(numProcs-1))
cnt++;
for(j=0; j<numProcs-1-cnt; j++)
{
MPI_Recv(&pno, 1, MPI_INT, MPI_ANY_SOURCE, tag2, MPI_COMM_WORLD, &stat);
//printf("1. Recv from %d and j : %d and i : %d\n",pno,j,i);
MPI_Recv(&rowsReceived, 1, MPI_INT, pno, tag3, MPI_COMM_WORLD, &stat);
MPI_Recv(rowFinal, n*rowsReceived, MPI_DOUBLE, pno, tag4, MPI_COMM_WORLD, &stat);
/* Will not go more than numProcs anyways */
for(k=i+1; k<n; k++)
{
if(map[k] == pno)
{
start = k;
break;
}
}
for(k=0; k<rowsReceived; k++)
{
index = start + k*(numProcs-1);
for(l=0; l<n; l++)
LU[n*index+l] = rowFinal[n*k+l];
}
}
}
else
{
int rowsReceived = 0;
MPI_Status stat, stats[3];
MPI_Request reqs[3];
for(j=i+1; j<n; j++)
if(map[j] == rank)
rowsReceived += 1;
for(j=0; j<rowsReceived; j++)
{
MPI_Recv(&rowFinal[n*j], n, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &stat);
}
for(j=0; j<rowsReceived; j++)
{
double factor = rowFinal[n*j+i]/row1[i];
for(k=i+1; k<n; k++)
rowFinal[n*j+k] -= (row1[k]*factor);
rowFinal[n*j+i] = factor;
}
if(rowsReceived != 0)
{
//printf("Data sent from %d iteration : %d\n",rank,i);
MPI_Isend(&rank, 1, MPI_INT, 0, tag2, MPI_COMM_WORLD, &reqs[0]);
MPI_Isend(&rowsReceived, 1, MPI_INT, 0, tag3, MPI_COMM_WORLD, &reqs[1]);
MPI_Isend(rowFinal, n*rowsReceived, MPI_DOUBLE, 0, tag4, MPI_COMM_WORLD, &reqs[2]);
}
//MPI_Waitall(3,reqs,stats);
}
}
The problem that I am facing is that sometimes the program hangs. My guess is
that the sends and receives are not being matched but I am not being able to
figure out where the problem lies.
I ran test cases on matrices of size 1000x1000, 2000x2000, 3000x3000, 5000x5000
and 7000x7000. Presently the code hangs for 7000x7000. Could someone please help
me out?
Things to note :-
map implements the mapping scheme, which row goes to which slave.
rowsReceived tells each slave the no of rows it will receive. I dont need to
calculate that each and every time, but I will fix it later.
row1 is the buffer in which the active row will be stored.
rowFinal is the buffer of the rows being received and being modified.
cnt is not important and can be ignored. For that the check for
rowReceived!=0 needs to be removed.
It looks like you are never completing your nonblocking operations. You have a bunch of calls to MPI_Isend and MPI_Irecv throughout the code, but you're never doing an MPI_Wait or MPI_Test (or one of the similar calls). Without that completion call, those nonblocking calls will never complete.
Here is my code:
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Send(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Send(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Send(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
}
else {
for (i = 1; i < numtasks; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
for (j = 0; j < received_pixels; j++) {
rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
}
free(results_recv);
}
If I run this with 2 proceses it is ok because one will send and the other one will receive.
If I run this with 4 proceses I receive the following error messages:
Fatal error in MPI_Recv: Other MPI error, error stack:
MPI_Recv(186)...........................: MPI_Recv(buf=0xbff05324, count=1, MPI_INT, src=1, tag=1, MPI_COMM_WORLD, status=0xbff053ec) failed
MPIDI_CH3I_Progress(461)................:
MPID_nem_handle_pkt(636)................:
MPIDI_CH3_PktHandler_EagerShortSend(308): Failed to allocate memory for an unexpected message. 261895 unexpected messages queued.
What should I do to fix this?
These lines:
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
and the corresponding MPI_Recvs look like they're essentially reimplementing MPI_Gather. Using the MPI_Gather call with size set to pixeli instead of 1 may allow the implementation to schedule the sends and receives more efficiently, but more importantly, it will probably drastically cut down on the total number of send/receive pairs needed to complete the whole batch of communication. You could do similar by removing the for loop and doing:
rc = MPI_Send(&results[i], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
but again, using the builtin MPI_Gather would be the preferred way of doing it.
The shortest answer is to tell you to use synchronious communications, that is MPI_Ssend() instead of MPI_Send().
The trouble is that you send to many messages which are buffered (i guess...but i though MPI_Send() was blocking...). The memory consumption goes up until failure...Synchronious messages avoid buffering but it does not reduce the number of messages and it may be slower.
You can reduce the number of messages and increase performances by sending many pixels at once : second argument of MPI_Send() or MPI_Recv()...
Sending a buffer of 3 int [pixeli,first_line,first_col] would also limit communications.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int main(int argc,char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int pixeli=1000000;
int received_pixels;
int first_line,first_col,start_line_recv,start_col_recv;
int tag=0;
int results[pixeli];
int i,j;
for(i=0;i<pixeli;i++){
results[i]=rank*pixeli+i;
}
int* results_recv;
int rc;
MPI_Status Stat;
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Ssend(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Ssend(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Ssend(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&results[0], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
//for (i = 0; i < pixeli; i++) {
// rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
//}
}
else {
for (i = 1; i < size; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
MPI_Recv(&results_recv[0], received_pixels, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//for (j = 0; j < received_pixels; j++) {
// rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//printf("proc %d %d\n",rank,results_recv[j]);
//}
free(results_recv);
}
}
MPI_Finalize();
return 0;
}
Bye,
Francis
I'm trying to create a log file from each processor and then send that to the root as a char array. I first send the length and then I send the data. The length sends fine, but the data is always garbage! Here is my code:
MPI_Barrier (MPI_COMM_WORLD);
string out = "";
MPI_Status status[2];
MPI_Request reqs[num_procs];
string log = "TEST";
int length = log.length();
char* temp = (char *) malloc(length+1);
strcpy(temp, log.c_str());
if (my_id != 0)
{
MPI_Send (&length, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
MPI_Send (&temp, length+1, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
}
else {
int length;
for (int i = 1; i < num_procs; i++)
{
MPI_Recv (&length, 2, MPI_INT, i, 1, MPI_COMM_WORLD, &status[0]);
char* rec_buf;
rec_buf = (char *) malloc(length+1);
MPI_Recv (rec_buf, length+1, MPI_CHAR, i, 1, MPI_COMM_WORLD, &status[1]);
out += rec_buf;
free(rec_buf);
}
}
MPI_Barrier (MPI_COMM_WORLD);
free(temp);
You are passing a char** to MPI_Send instead of a char* this causes memory corruption, or in your case the garbled output you are getting. Everything should be fine if you use
MPI_Send (temp, length+1, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
(note the removed & in front of the first argument, temp.)