When doing the final reduction (summation of a bunch of matrices in my program), as follows
struct Tomo {
typedef Eigen::Matrix<int, HISTOGRAM_BOXES, HISTOGRAM_BOXES, Eigen::RowMajor> HistoMtx;
HistoMtx exp_val;
HistoMtx u;
[...]
struct buffer_set {
Tomo * X;
Tomo * Y;
Tomo * Z;
} buffers[2];
[...]
if(rank == 0){
/* MASTER NODE */
for(int source=1; source<size; source++){
printf("Reducing from %i\n", source);
for(int i=0;i<env_count;i++){
MPI_Recv(buffers[1].X[i].exp_val.data(), buffers[1].X[i].exp_val.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
MPI_Recv(buffers[1].Y[i].exp_val.data(), buffers[1].Y[i].exp_val.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
MPI_Recv(buffers[1].Z[i].exp_val.data(), buffers[1].Z[i].exp_val.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
MPI_Recv(buffers[1].X[i].u.data(), buffers[1].X[i].u.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
MPI_Recv(buffers[1].Y[i].u.data(), buffers[1].Y[i].u.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
MPI_Recv(buffers[1].Z[i].u.data(), buffers[1].Z[i].u.size(), MPI_INT, source, 0, MPI_COMM_WORLD, &status);
}
merge_buffers(0, 1);
}
WriteH5File("h5file.h5", 0);
}else{
/* SLAVE NODES */
for(int i=0;i<env_count;i++){
MPI_Send(buffers[0].X[i].exp_val.data(), buffers[0].X[i].exp_val.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(buffers[0].Y[i].exp_val.data(), buffers[0].Y[i].exp_val.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(buffers[0].Z[i].exp_val.data(), buffers[0].Z[i].exp_val.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(buffers[0].X[i].u.data(), buffers[0].X[i].u.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(buffers[0].Y[i].u.data(), buffers[0].Y[i].u.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(buffers[0].Z[i].u.data(), buffers[0].Z[i].u.size(), MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
the pbs_mom process dies. When running the program in an interactive session, I find the following in my logs
[compute-35-3.local:01139] [[33012,0],2] ORTED_CMD_PROCESSOR: STUCK IN INFINITE LOOP - ABORTING
[compute-35-3:01139] *** Process received signal ***
I don't understand what this means or what would trigger it. It seems quite internal to OpenMPI.
Could be an issue with the underlying network or something else that might require administrator attention. For example:
http://www.open-mpi.org/community/lists/users/2010/08/14130.php
http://lists.mcs.anl.gov/pipermail/petsc-users/2013-August/018470.html
Related
In this code I am trying to broadcast using non blocking send and receive as a practice. I have multiple questions and issues.
1.Should I pair Isend() and Irecv() to use the same request?
2.When the message is an array, how should it be passed? in this case, message or &message?
3.Why I cannot run this code on less or more than 8 processors? if the rank doesn't exit, shouldn't it just go on without executing that piece of code?
4.The snippet on the at the bottom is there in order to print the total time once, but the waitall() does not work, and I do not understand why.
5. When passing arrays longer than 2^12, I get segmentation error, while I have checked the limits of Isend() and Irecv() and they supposed to handle even bigger length messages.
6.I used long double for record the time, is this a common or good practice? when I used smaller variables like float or double I would get nan.
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, rank, size, ready;
long int N = pow(2, 10);
float* message = (float *)malloc(sizeof(float *) * N + 1);
long double start, end;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//MPI_Request* request = (MPI_Request *)malloc(sizeof(MPI_Request *) * size);
MPI_Request request[size-1];
/*Stage I: -np 8*/
if(rank == 0){
for(i = 0; i < N; i++){
message[i] = N*rand();
message[i] /= rand();
}
start = MPI_Wtime();
MPI_Isend(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[0]);
MPI_Isend(&message, N, MPI_FLOAT, 2, 0, MPI_COMM_WORLD, &request[1]);
MPI_Isend(&message, N, MPI_FLOAT, 4, 0, MPI_COMM_WORLD, &request[3]);
printf("Processor root-rank %d- sent the message...\n", rank);
}
if (rank == 1){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[0]);
MPI_Wait(&request[0], MPI_STATUS_IGNORE);
printf("Processor rank 1 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 3, 0, MPI_COMM_WORLD, &request[2]);
MPI_Isend(&message, N, MPI_FLOAT, 5, 0, MPI_COMM_WORLD, &request[4]);
}
if(rank == 2){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[1]);
MPI_Wait(&request[1], MPI_STATUS_IGNORE);
printf("Processor rank 2 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 6, 0, MPI_COMM_WORLD, &request[5]);
}
if(rank == 3){
MPI_Irecv(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[2]);
MPI_Wait(&request[2], MPI_STATUS_IGNORE);
printf("Processor rank 3 received the message.\n");
MPI_Isend(&message, N, MPI_FLOAT, 7, 0, MPI_COMM_WORLD, &request[6]);
}
if(rank == 4){
MPI_Irecv(&message, N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &request[3]);
MPI_Wait(&request[3], MPI_STATUS_IGNORE);
printf("Processor rank 4 received the message.\n");
}
if(rank == 5){
MPI_Irecv(&message, N, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &request[4]);
MPI_Wait(&request[4], MPI_STATUS_IGNORE);
printf("Processor rank 5 received the message.\n");
}
if(rank == 6){
MPI_Irecv(&message, N, MPI_FLOAT, 2, 0, MPI_COMM_WORLD, &request[5]);
MPI_Wait(&request[5], MPI_STATUS_IGNORE);
printf("Processor rank 6 received the message.\n");
}
if(rank == 7){
MPI_Irecv(&message, N, MPI_FLOAT, 3, 0, MPI_COMM_WORLD, &request[6]);
MPI_Wait(&request[6], MPI_STATUS_IGNORE);
printf("Processor rank 7 received the message.\n");
}
/*MPI_Testall(size-1,request,&ready, MPI_STATUS_IGNORE);*/
/* if (ready){*/
end = MPI_Wtime();
printf("Total Time: %Lf\n", end - start);
/*}*/
MPI_Finalize();
}
Each MPI task runs in its own address space, so there is no correlation between request[1] on rank 0 and request[1] on rank 2. That means you do not have to "pair" the requests. That being said, if you think "pairing" the requests improves the readability of your code, you might want to do so even if this is not required.
the buffer parameter of MPI_Isend() and MPI_Irecv() is a pointer to the start of the data, this is message (and not &message) here.
if you run with let's say 2 MPI tasks, MPI_Send(..., dest=2, ...) on rank 0 will fail because there 2 is an invalid rank in the MPI_COMM_WORLD communicator.
many requests are uninitialized when MPI_Waitall() (well, MPI_Testall() here) is invoked. One option is to first initialize all of them to MPI_REQUEST_NULL.
using &message results in memory corruption and that likely explains the crash.
From the MPI standard, the prototype is double MPI_Wtime(), so you'd rather use double here (the NaN likely come from the memory corruption described above)
Here is my code:
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Send(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Send(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Send(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
}
else {
for (i = 1; i < numtasks; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
for (j = 0; j < received_pixels; j++) {
rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
}
free(results_recv);
}
If I run this with 2 proceses it is ok because one will send and the other one will receive.
If I run this with 4 proceses I receive the following error messages:
Fatal error in MPI_Recv: Other MPI error, error stack:
MPI_Recv(186)...........................: MPI_Recv(buf=0xbff05324, count=1, MPI_INT, src=1, tag=1, MPI_COMM_WORLD, status=0xbff053ec) failed
MPIDI_CH3I_Progress(461)................:
MPID_nem_handle_pkt(636)................:
MPIDI_CH3_PktHandler_EagerShortSend(308): Failed to allocate memory for an unexpected message. 261895 unexpected messages queued.
What should I do to fix this?
These lines:
for (i = 0; i < pixeli; i++) {
rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
and the corresponding MPI_Recvs look like they're essentially reimplementing MPI_Gather. Using the MPI_Gather call with size set to pixeli instead of 1 may allow the implementation to schedule the sends and receives more efficiently, but more importantly, it will probably drastically cut down on the total number of send/receive pairs needed to complete the whole batch of communication. You could do similar by removing the for loop and doing:
rc = MPI_Send(&results[i], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
but again, using the builtin MPI_Gather would be the preferred way of doing it.
The shortest answer is to tell you to use synchronious communications, that is MPI_Ssend() instead of MPI_Send().
The trouble is that you send to many messages which are buffered (i guess...but i though MPI_Send() was blocking...). The memory consumption goes up until failure...Synchronious messages avoid buffering but it does not reduce the number of messages and it may be slower.
You can reduce the number of messages and increase performances by sending many pixels at once : second argument of MPI_Send() or MPI_Recv()...
Sending a buffer of 3 int [pixeli,first_line,first_col] would also limit communications.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int main(int argc,char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int pixeli=1000000;
int received_pixels;
int first_line,first_col,start_line_recv,start_col_recv;
int tag=0;
int results[pixeli];
int i,j;
for(i=0;i<pixeli;i++){
results[i]=rank*pixeli+i;
}
int* results_recv;
int rc;
MPI_Status Stat;
if (rank != 0) {
// trimitem numarul de pixeli prelucrati
rc = MPI_Ssend(&pixeli, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
// trimitem coordonatele de unde am inceput prelucrarea
rc = MPI_Ssend(&first_line, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
rc = MPI_Ssend(&first_col, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&results[0], pixeli, MPI_INT, 0, tag, MPI_COMM_WORLD);
//for (i = 0; i < pixeli; i++) {
// rc = MPI_Send(&results[i], 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
//}
}
else {
for (i = 1; i < size; i++) {
rc = MPI_Recv(&received_pixels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
results_recv = (int*) calloc (received_pixels, sizeof(int));
rc = MPI_Recv(&start_line_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Recv(&start_col_recv, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
MPI_Recv(&results_recv[0], received_pixels, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//for (j = 0; j < received_pixels; j++) {
// rc = MPI_Recv(&results_recv[j], 1, MPI_INT, i, tag, MPI_COMM_WORLD, &Stat);
//printf("proc %d %d\n",rank,results_recv[j]);
//}
free(results_recv);
}
}
MPI_Finalize();
return 0;
}
Bye,
Francis
I am trying to write my own MPI function that would compute the smallest number in a vector and broadcast that to all processes. I treat the processes as a binary tree, and find the minimum as I move from leaves to the root. Then I send message from the root to the leaves through its children. But I get a segmentation fault when I trying to receive the minimum value from the left child (process rank 3) of process rank 1 in an execution with just 4 processes ranked from 0 to 3.
void Communication::ReduceMin(double &partialMin, double &totalMin)
{
MPI_Barrier(MPI_COMM_WORLD);
double *leftChild, *rightChild;
leftChild = (double *)malloc(sizeof(double));
rightChild = (double *)malloc(sizeof(double));
leftChild[0]=rightChild[0]=1e10;
cout<<"COMM REDMIN: "<<myRank<<" "<<partialMin<<" "<<nProcs<<endl;
MPI_Status *status;
//MPI_Recv from 2*i+1 amd 2*i+2
if(nProcs > 2*myRank+1)
{
cout<<myRank<<" waiting from "<<2*myRank+1<<" for "<<leftChild[0]<<endl;
MPI_Recv((void *)&leftChild[0], 1, MPI_DOUBLE, 2*myRank+1, 2*myRank+1, MPI_COMM_WORLD, status); //SEG FAULT HERE
cout<<myRank<<" got from "<<2*myRank+1<<endl;
}
if(nProcs > 2*myRank+2)
{
cout<<myRank<<" waiting from "<<2*myRank+2<<endl;
MPI_Recv((void *)rightChild, 1, MPI_DOUBLE, 2*myRank+2, 2*myRank+2, MPI_COMM_WORLD, status);
cout<<myRank<<" got from "<<2*myRank+1<<endl;
}
//sum it up
cout<<myRank<<" finding the min"<<endl;
double myMin = min(min(leftChild[0], rightChild[0]), partialMin);
//MPI_Send to (i+1)/2-1
if(myRank!=0)
{
cout<<myRank<<" sending "<<myMin<<" to "<<(myRank+1)/2 -1 <<endl;
MPI_Send((void *)&myMin, 1, MPI_DOUBLE, (myRank+1)/2 - 1, myRank, MPI_COMM_WORLD);
}
double min;
//MPI_Recv from (i+1)/2-1
if(myRank!=0)
{
cout<<myRank<<" waiting from "<<(myRank+1)/2-1<<endl;
MPI_Recv((void *)&min, 1, MPI_DOUBLE, (myRank+1)/2 - 1, (myRank+1)/2 - 1, MPI_COMM_WORLD, status);
cout<<myRank<<" got from "<<(myRank+1)/2-1<<endl;
}
totalMin = min;
//MPI_send to 2*i+1 and 2*i+2
if(nProcs > 2*myRank+1)
{
cout<<myRank<<" sending to "<<2*myRank+1<<endl;
MPI_Send((void *)&min, 1, MPI_DOUBLE, 2*myRank+1, myRank, MPI_COMM_WORLD);
}
if(nProcs > 2*myRank+2)
{
cout<<myRank<<" sending to "<<2*myRank+1<<endl;
MPI_Send((void *)&min, 1, MPI_DOUBLE, 2*myRank+2, myRank, MPI_COMM_WORLD);
}
}
PS: I know I can use
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce((void *)&partialMin, (void *)&totalMin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
MPI_Bcast((void *)&totalMin, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
But I want to write my own code for fun.
The error is in the way you use the status argument in the receive calls. Instead of passing the address of an MPI_Status instance, you simply pass an uninitialised pointer and that leads to the crash:
MPI_Status *status; // status declared as a pointer and never initialised
...
MPI_Recv((void *)&leftChild[0], 1, MPI_DOUBLE, 2*myRank+1, 2*myRank+1,
MPI_COMM_WORLD, status); // status is an invalid pointer here
You should change your code to:
MPI_Status status;
...
MPI_Recv((void *)&leftChild[0], 1, MPI_DOUBLE, 2*myRank+1, 2*myRank+1,
MPI_COMM_WORLD, &status);
Since you do not examine at all the status in your code, you can simply pass MPI_STATUS_IGNORE in all calls:
MPI_Recv((void *)&leftChild[0], 1, MPI_DOUBLE, 2*myRank+1, 2*myRank+1,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
I'm trying to create a log file from each processor and then send that to the root as a char array. I first send the length and then I send the data. The length sends fine, but the data is always garbage! Here is my code:
MPI_Barrier (MPI_COMM_WORLD);
string out = "";
MPI_Status status[2];
MPI_Request reqs[num_procs];
string log = "TEST";
int length = log.length();
char* temp = (char *) malloc(length+1);
strcpy(temp, log.c_str());
if (my_id != 0)
{
MPI_Send (&length, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
MPI_Send (&temp, length+1, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
}
else {
int length;
for (int i = 1; i < num_procs; i++)
{
MPI_Recv (&length, 2, MPI_INT, i, 1, MPI_COMM_WORLD, &status[0]);
char* rec_buf;
rec_buf = (char *) malloc(length+1);
MPI_Recv (rec_buf, length+1, MPI_CHAR, i, 1, MPI_COMM_WORLD, &status[1]);
out += rec_buf;
free(rec_buf);
}
}
MPI_Barrier (MPI_COMM_WORLD);
free(temp);
You are passing a char** to MPI_Send instead of a char* this causes memory corruption, or in your case the garbled output you are getting. Everything should be fine if you use
MPI_Send (temp, length+1, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
(note the removed & in front of the first argument, temp.)
The node00 can send the information to node01 whose rank is 1 sucessfully, but it will be blocked on the second send to the node01. Why did it happen? Thanks a lot. I think there is no deadlock in the code.
Given the MPI programming, there are 5 nodes
.....
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
MPI_Status status;
int buff;
if(rank ==0)
{
buff=123;
for(int i=1;i<size;i++){
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //succeed
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //blocked
}
}
else
{
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);//succeed
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);//blocked
}
............
Why don't you use MPI_Isend and MPI_Irecv.These are non blocking calls.
#include<stdio.h>
#include<math.h>
#include<mpi.h>
#define tag 777
int rank;
int size;
int main(int argc,char *argv[])
{
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
//MPI_Get_processor_name(processor_name, &namelen);
MPI_Status status;
int buff;
int i;
if(rank ==0)
{ printf("hai");
buff=123;
for(i=1;i<size;i++){
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //succeed
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //blocked
}
}
else
{
printf("hello");
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);//succeed
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);//blocked
}
//MPI_Finalize(); //finalize MPI operations
return 0;
}
You should use mpi_finialize, like this:
int main(int argc, char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int buff, tag = 99;
MPI_Status status;
if (rank == 0)
{
buff = 123;
for (int i = 1; i < size; i++)
{
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //succeed
MPI_Send(&buff, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //blocked
}
}
else
{
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); //succeed
printf("%d\n", buff);
MPI_Recv(&buff, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); //blocked
printf("%d\n", buff);
}
MPI_Finalize();
return 0;
}