I am encountering some strange behavior with MPICH. The following minimal example, which sends a message to the non-existing process with rank -1, causes a deadlock:
// Program: send-err
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv) {
MPI_Init(NULL, NULL);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// We are assuming at least 2 processes for this task
if (world_size != 2) {
fprintf(stderr, "World size must be 2 for %s\n", argv[0]);
MPI_Abort(MPI_COMM_WORLD, 1);
}
int number;
if (world_rank == 0) {
number = -1;
MPI_Send(&number, // data buffer
1, // buffer size
MPI_INT, // data type
-1, //destination
0, //tag
MPI_COMM_WORLD); // communicator
} else if (world_rank == 1) {
MPI_Recv(&number,
1, // buffer size
MPI_INT, // data type
0, // source
0, //tag
MPI_COMM_WORLD, // communicator
MPI_STATUS_IGNORE);
}
MPI_Finalize();
}
If the call to the send function,
MPI_Send( start, count, datatype, destination_rank, tag, communicator )
uses destination_rank = -2, then the program fails with the error message:
> mpirun -np 2 send-err
Abort(402250246) on node 0 (rank 0 in comm 0): Fatal error in PMPI_Send: Invalid rank, error stack:
PMPI_Send(157): MPI_Send(buf=0x7fffeb411b44, count=1, MPI_INT, dest=MPI_ANY_SOURCE, tag=0, MPI_COMM_WORLD) failed
PMPI_Send(94).: Invalid rank has value -2 but must be nonnegative and less than 2
Based on the error message, I would expect a program that sends a message to the process with rank -1 to fail similarly to the program sending a message to the process with rank -2. What causes this difference in behavior?
Related
OpenMPI has the MCA parameter mpi_abort_print_stack for trace backs after an MPI_ABORT, but the back trace is quite limited in details. For example, if I compile (mpicxx broadcast.cxx) and run (mpiexec --mca mpi_abort_print_stack 1 -n 4 ./a.out) this simple example code:
#include <mpi.h>
const int N = 10;
int arr[N];
int main(int argc, char** argv)
{
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Correct:
MPI_Bcast(arr, N, MPI_INT, 0, MPI_COMM_WORLD);
// Incorrect:
const int my_size = (rank == 1) ? N+1 : N;
MPI_Bcast(arr, my_size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
I get the following output and back trace:
[manjaro:2223406] *** An error occurred in MPI_Bcast
[manjaro:2223406] *** reported by process [1082195969,3]
[manjaro:2223406] *** on communicator MPI_COMM_WORLD
[manjaro:2223406] *** MPI_ERR_TRUNCATE: message truncated
[manjaro:2223406] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[manjaro:2223406] *** and potentially your MPI job)
[manjaro:2223406] [0] func:/usr/lib/libopen-pal.so.40(opal_backtrace_buffer+0x3b) [0x7f3681231a9b]
[manjaro:2223406] [1] func:/usr/lib/libmpi.so.40(ompi_mpi_abort+0x160) [0x7f368183f040]
[manjaro:2223406] [2] func:/usr/lib/libmpi.so.40(ompi_mpi_errors_are_fatal_comm_handler+0xb9) [0x7f36818369b9]
[manjaro:2223406] [3] func:/usr/lib/libmpi.so.40(ompi_errhandler_invoke+0xd3) [0x7f3681836ab3]
[manjaro:2223406] [4] func:/usr/lib/libmpi.so.40(PMPI_Bcast+0x455) [0x7f36818514b5]
[manjaro:2223406] [5] func:./a.out(+0x7c48) [0x560d4d420c48]
[manjaro:2223406] [6] func:/usr/lib/libc.so.6(+0x29290) [0x7f36812d2290]
[manjaro:2223406] [7] func:/usr/lib/libc.so.6(__libc_start_main+0x8a) [0x7f36812d234a]
[manjaro:2223406] [8] func:./a.out(+0x7ac5) [0x560d4d420ac5]
So it tells me that there is a problem in some MPI_Bcast call, but not exactly which one.
Is it possible to get a more detailed back trace, including e.g. line numbers?
One possibility, as suggested by #talonmies in the comments, is to define a macro which does the error checking after each MPI call. This is similar to the kind of error checking that you often see in CUDA code: What is the canonical way to check for errors using the CUDA runtime API?
#include <mpi.h>
#define check_mpi_error(n) __check_mpi_error(__FILE__, __LINE__, n)
inline void __check_mpi_error(const char *file, const int line, const int n)
{
char errbuffer[MPI_MAX_ERROR_STRING];
int errlen;
if (n != MPI_SUCCESS)
{
MPI_Error_string(n, errbuffer, &errlen);
printf("MPI-error: %s\n", errbuffer);
printf("Location: %s:%i\n", file, line);
MPI_Abort(MPI_COMM_WORLD, n);
}
}
const int N = 10;
int arr[N];
int main(int argc, char** argv)
{
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
// Correct:
int err;
err = MPI_Bcast(arr, N, MPI_INT, 0, MPI_COMM_WORLD);
check_mpi_error(err);
// Incorrect:
const int my_size = (rank == 1) ? N+1 : N;
err = MPI_Bcast(arr, my_size, MPI_INT, 0, MPI_COMM_WORLD);
check_mpi_error(err);
MPI_Finalize();
return 0;
}
If I compile (mpicxx -o bcast broadcast_debug.cxx) and run (mpiexec -n 4 bcast) this code, I get the following output:
MPI-error: MPI_ERR_TRUNCATE: message truncated
Location: broadcast_debug.cxx:38
I am writing a sample program MPI in which one process send an integer to other process.
This is my source code
#include <mpi.h>
#include <stdio.h>
int main(int argc, char** argv) {
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int number;
if (world_rank == 0) {
number = -1;
MPI_Send(&number, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
}
else if (world_rank == 1) {
MPI_Recv(&number, 1, MPI_INT, 0, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
printf("Process 1 received number %d from process 0\n",
number);
}
}
And this is the error when I run mpiexec in windows cmd line
ERROR: Error reported: failed to set work directory to 'D:\study_documents\Thesis\Nam 4\Demo\Sample Codes\MPI_HelloWorld\Debug' on DESKTOP-EKN1RD3
Error (3) The system cannot find the path specified.
I wonder if it is possible to send data to a 3rd party communicator by having its integer value.
In other words, I would like to send a MPI_Comm (int) to processes in a non-related communicator in order to establish a communication between them.
For example, this picture shows my goal:
For this purpose, I have developed testing codes which intend to transfer the MPI_Comm.
parent.c
#include <mpi.h>
#include <stdio.h>
int main(int argc, char** argv) {
MPI_Init(NULL, NULL);
int world_size, world_rank;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm children;
int err[world_size], msg;
MPI_Comm_spawn("./children", NULL, 2, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &children, err);
if (world_rank == 0) {
MPI_Send(&children, 1, MPI_INT, 0, 0, children);
MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, children, MPI_STATUS_IGNORE);
}
MPI_Finalize();
return (0);
}
children.c
#include <mpi.h>
#include <stdio.h>
int main(int argc, char** argv) {
MPI_Init(NULL, NULL);
int world_size, world_rank;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm parent;
MPI_Comm_get_parent(&parent);
int comm, msg = 123;
if (world_rank == 0) {
MPI_Recv(&comm, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, parent, MPI_STATUS_IGNORE);
MPI_Comm children = (MPI_Comm) comm;
MPI_Send(&msg, 1, MPI_INT, 0, 0, children);
}
MPI_Finalize();
return (0);
}
Of course, the code does not work due to:
Fatal error in MPI_Send: Invalid communicator, error stack
Is there any way two establish that connection?
PS: This is an example, and I know that if I used "parent" comm in the send of "children.c", it would work. But my intention is to send data to a 3rd party communicator with only its integer id.
I write programs using MPI and I have an access to two different clusters. I am not good in system administration, so I can not tell anything about software, OS, compilers which are used there. But, on one machine I have an deadlock using such code:
#include "mpi.h"
#include <iostream>
int main(int argc, char **argv) {
int rank, numprocs;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int x = rank;
if (rank == 0) {
for (int i=0; i<numprocs; ++i)
MPI_Send(&x, 1, MPI_INT, i, 100500, MPI_COMM_WORLD);
}
MPI_Recv(&x, 1, MPI_INT, 0, 100500, MPI_COMM_WORLD, &status);
MPI_Finalize();
return 0;
}
The error message is related:
Fatal error in MPI_Send: Other MPI error, error stack:
MPI_Send(184): MPI_Send(buf=0x7fffffffceb0, count=1, MPI_INT, dest=0, tag=100500, MPI_COMM_WORLD) failed
MPID_Send(54): DEADLOCK: attempting to send a message to the local process without a prior matching receive
Why is that so? I can't understand, why does it happen on one machine, but doesn't happen on another?
MPI_Send is a blocking operation. It may not complete until a matching receive is posted. In your case rank 0 is trying to send a message to itself before having posted a matching receive. If you must do something like this you would replace MPI_Send with MPI_Isend(+MPI_Wait...` after the receive). But you might as well just not make him send a message to itself.
The proper thing to use in your case is MPI_Bcast.
Since rank 0 already has the correct value of x, you do not need to send it in a message. This means that in the loop you should skip sending to rank 0 and instead start from rank 1:
if (rank == 0) {
for (int i=1; i<numprocs; ++i)
MPI_Send(&x, 1, MPI_INT, i, 100500, MPI_COMM_WORLD);
}
MPI_Recv(&x, 1, MPI_INT, 0, 100500, MPI_COMM_WORLD, &status);
Now rank 0 won't try to talk to itself, but since the receive is outside the conditional, it will still try to receive a message from itself. The solution is to simply make the receive the alternative branch:
if (rank == 0) {
for (int i=1; i<numprocs; ++i)
MPI_Send(&x, 1, MPI_INT, i, 100500, MPI_COMM_WORLD);
}
else
MPI_Recv(&x, 1, MPI_INT, 0, 100500, MPI_COMM_WORLD, &status);
Another more involved solution is to use non-blocking operations to post the receive before the send operation:
MPI_Request req;
MPI_Irecv(&x, 1, MPI_INT, 0, 100500, MPI_COMM_WORLD, &req);
if (rank == 0) {
int xx = x;
for (int i=0; i<numprocs; ++i)
MPI_Send(&xx, 1, MPI_INT, i, 100500, MPI_COMM_WORLD);
}
MPI_Wait(&req, &status);
Now rank 0 will not block in MPI_Send as there is already a matching receive posted earlier. In all other ranks MPI_Irecv will be immediately followed by MPI_Wait, which is equivalent to a blocking receive (MPI_Recv). Note that the value of x is copied to a different variable inside the conditional as simultaneously sending from and receiving into the same memory location is forbidden by the MPI standard for obvious correctness reasons.
After making the changes mentioned in the comments, not getting any output.
I'm new to MPI. If I run it with more than two processes I get two additional lines on my console saying:
1- more process has sent help message help-mpi-errors.txt / mpi_errors_are_fatal
2- Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages.
What am I doing wrong?
This is the complete output on my terminal:
*** An error occurred in MPI_Bcast
*** reported by process [4248174593,1]
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_TYPE: invalid datatype
*** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
*** and potentially your MPI job)
1 more process has sent help message help-mpi-errors.txt /
mpi_errors_are_fatal Set MCA parameter "orte_base_help_aggregate" to 0
to see all help / error messages
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char** argv)
{
const int server = 0;
const int source = server;
float* array = (float*)NULL;
int length;
int num_procs, my_rank, mpi_error_code;
int index;
mpi_error_code = MPI_Init(&argc, &argv);
mpi_error_code = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
mpi_error_code = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
/*input, allocate, initialize on server only*/
if(my_rank == server){
scanf("%d", &length);
array = (float*) malloc(sizeof(float) * length);
for(index = 0; index < length; index++){
array[index] = 0.0;
}
}
/*broadcast, output on all processes*/
if(num_procs > 1){
mpi_error_code = MPI_Bcast(&length, 1, MPI_INT, source, MPI_COMM_WORLD);
if(my_rank != server){
array = (float*) malloc(sizeof(float) * length);
}
mpi_error_code = MPI_Bcast(array, length, MPI_INT, source, MPI_COMM_WORLD);
printf("%d: broadcast length = %d\n", my_rank, length);
}
mpi_error_code = MPI_Finalize();
}