MPI_Waitall error: address not mapped - mpi

I have the following code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
static int rank, size;
char msg[] = "This is a test message";
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 2) {
fprintf(stderr, "This test requires exactly 2 tasks (has: %d).\n", size);
MPI_Finalize();
return -1;
}
int run = 1;
if (argc > 1) {
run = atoi(argv[1]);
}
int len = strlen(msg) + 1;
if (argc > 2) {
len = atoi(argv[2]);
}
char buf[len];
strncpy(buf, msg, len);
MPI_Status statusArray[run];
MPI_Request reqArray[run];
double start = MPI_Wtime();
for (int i = 0; i < run; i++) {
if (!rank) {
MPI_Isend(buf, len, MPI_CHAR, 1, 0, MPI_COMM_WORLD, &reqArray[i]);
printf("mpi_isend for run %d\n", i);
} else {
MPI_Irecv(buf, len, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &reqArray[i]);
printf("mpi_irecv for run %d\n", i);
}
}
int buflen = 512;
char name[buflen];
gethostname(name, buflen);
printf("host: %s has rank %d\n", name, rank);
printf("Reached here! for host %s before MPI_Waitall \n", name);
if(!rank) {
printf("calling mpi_waitall for sending side which is %s\n", name);
MPI_Waitall(run, &reqArray[0], &statusArray[0]);
}
else {
printf("calling mpi_waitall for receiving side which is %s\n", name);
MPI_Waitall(run, &reqArray[0], &statusArray[0]);
}
printf("finished waiting! for host %s\n", name);
double end = MPI_Wtime();
if (!rank) {
printf("Throughput: %.4f Gbps\n", 1e-9 * len * 8 * run / (end - start));
}
MPI_Finalize();
}
I got a seg-fault on the sending side before MPI_Waitall. The error message is:
[host1:27679] *** Process received signal ***
[host1:27679] Signal: Segmentation fault (11)
[host1:27679] Signal code: Address not mapped (1)
[host1:27679] Failing at address: 0x8
[host1:27679] [ 0] /lib64/libpthread.so.0() [0x3ce7e0f500]
[host1:27679] [ 1] /usr/lib64/openmpi/mca_btl_openib.so(+0x21dc7) [0x7f46695c1dc7]
[host1:27679] [ 2] /usr/lib64/openmpi/mca_btl_openib.so(+0x1cbe1) [0x7f46695bcbe1]
[host1:27679] [ 3] /lib64/libpthread.so.0() [0x3ce7e07851]
[host1:27679] [ 4] /lib64/libc.so.6(clone+0x6d) [0x3ce76e811d]
[host1:27679] *** End of error message ***
I think there is something wrong with the array of MPI_Request. Could someone point it out?
Thanks!

I ran your program without a problem (other than a warning for not including unistd.h). The problem is probably related to your setup of Open MPI. Are you using a machine with an InfiniBand network? If not, you probably want to change to just use the default tcp implementation. Your problem might be related to that.
If you want to specify that you'll only use tcp, you should run like this:
mpirun --mca btl tcp,self -n 2 <prog_name> <prog_args>
That will ensure that openib isn't accidentally detected and used when it shouldn't be.
If, on the other hand, you do mean to use InfiniBand, you might have discovered some sort of problem with Open MPI. I doubt that's the case though since you're not doing anything fancy.

Related

Anomalous MPI behavior

I am wondering if anyone can offer an explanation.
I'll start with the code:
/*
Barrier implemented using tournament-style coding
*/
// Constraints: Number of processes must be a power of 2, e.g.
// 2,4,8,16,32,64,128,etc.
#include <mpi.h>
#include <stdio.h>
#include <unistd.h>
void mybarrier(MPI_Comm);
// global debug bool
int verbose = 1;
int main(int argc, char * argv[]) {
int rank;
int size;
int i;
int sum = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int check = size;
// check to make sure the number of processes is a power of 2
if (rank == 0){
while(check > 1){
if (check % 2 == 0){
check /= 2;
} else {
printf("ERROR: The number of processes must be a power of 2!\n");
MPI_Abort(MPI_COMM_WORLD, 1);
return 1;
}
}
}
// simple task, with barrier in the middle
for (i = 0; i < 500; i++){
sum ++;
}
mybarrier(MPI_COMM_WORLD);
for (i = 0; i < 500; i++){
sum ++;
}
if (verbose){
printf("process %d arrived at finalize\n", rank);
}
MPI_Finalize();
return 0;
}
void mybarrier(MPI_Comm comm){
// MPI variables
int rank;
int size;
int * data;
MPI_Status * status;
// Loop variables
int i;
int a;
int skip;
int complete = 0;
int currentCycle = 1;
// Initialize MPI vars
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &size);
// step 1, gathering
while (!complete){
skip = currentCycle * 2;
// if currentCycle divides rank evenly, then it is a target
if ((rank % currentCycle) == 0){
// if skip divides rank evenly, then it needs to receive
if ((rank % skip) == 0){
MPI_Recv(data, 0, MPI_INT, rank + currentCycle, 99, comm, status);
if (verbose){
printf("1: %d from %d\n", rank, rank + currentCycle);
}
// otherwise, it needs to send. Once sent, the process is done
} else {
if (verbose){
printf("1: %d to %d\n", rank, rank - currentCycle);
}
MPI_Send(data, 0, MPI_INT, rank - currentCycle, 99, comm);
complete = 1;
}
}
currentCycle *= 2;
// main process will never send, so this code will allow it to complete
if (currentCycle >= size){
complete = 1;
}
}
complete = 0;
currentCycle = size / 2;
// step 2, scattering
while (!complete){
// if currentCycle is 1, then this is the last loop
if (currentCycle == 1){
complete = 1;
}
skip = currentCycle * 2;
// if currentCycle divides rank evenly then it is a target
if ((rank % currentCycle) == 0){
// if skip divides rank evenly, then it needs to send
if ((rank % skip) == 0){
if (verbose){
printf("2: %d to %d\n", rank, rank + currentCycle);
}
MPI_Send(data, 0, MPI_INT, rank + currentCycle, 99, comm);
// otherwise, it needs to receive
} else {
if (verbose){
printf("2: %d waiting for %d\n", rank, rank - currentCycle);
}
MPI_Recv(data, 0, MPI_INT, rank - currentCycle, 99, comm, status);
if (verbose){
printf("2: %d from %d\n", rank, rank - currentCycle);
}
}
}
currentCycle /= 2;
}
}
Expected behavior
The code is to increment a sum to 500, wait for all other processes to reach that point using blocking MPI_Send and MPI_Recv calls, and then increment sum to 1000.
Observed behavior on cluster
Cluster behaves as expected
Anomalous behavior observed on my machine
All processes in main function are reported as being 99, which I have linked specifically to the tag of the second while loop of mybarrier.
In addition
My first draft was written with for loops, and with that one, the program executes as expected on the cluster as well, but on my machine execution never finishes, even though all processes call MPI_Finalize (but none move beyond it).
MPI Versions
My machine is running OpenRTE 2.0.2
The cluster is running OpenRTE 1.6.3
The questions
I have observed that my machine seems to run unexpectedly all of the time, while the cluster executes normally. This is true with other MPI code I have written as well. Was there major changes between 1.6.3 and 2.0.2 that I'm not aware of?
At any rate, I'm baffled, and I was wondering if anyone could offer some explanation as to why my machine seems to not run MPI correctly. I hope I have provided enough details, but if not, I will be happy to provide whatever additional information you require.
There is a problem with your code, maybe that's what causing the weird behavior you are seeing.
You are passing to the MPI_Recv routines a status object that hasn't been allocated. In fact, that pointer is not even initialized, so if it happens not to be NULL, the MPI_Recv will endup writing wherever in memory causing undefined behavior. The correct form is the following:
MPI_Status status;
...
MPI_Recv(..., &status);
Or if you want to use the heap:
MPI_Status *status = malloc(sizeof(MPI_Status));
...
MPI_Recv(..., status);
...
free(status);
Also since you are not using the value returned by the receive, you should instead use MPI_STATUS_IGNORE instead:
MPI_Recv(..., MPI_STATUS_IGNORE);

Why am I getting an "invalid data type" error in this sample mpi broadcast program?

After making the changes mentioned in the comments, not getting any output.
I'm new to MPI. If I run it with more than two processes I get two additional lines on my console saying:
1- more process has sent help message help-mpi-errors.txt / mpi_errors_are_fatal
2- Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages.
What am I doing wrong?
This is the complete output on my terminal:
*** An error occurred in MPI_Bcast
*** reported by process [4248174593,1]
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_TYPE: invalid datatype
*** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
*** and potentially your MPI job)
1 more process has sent help message help-mpi-errors.txt /
mpi_errors_are_fatal Set MCA parameter "orte_base_help_aggregate" to 0
to see all help / error messages
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char** argv)
{
const int server = 0;
const int source = server;
float* array = (float*)NULL;
int length;
int num_procs, my_rank, mpi_error_code;
int index;
mpi_error_code = MPI_Init(&argc, &argv);
mpi_error_code = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
mpi_error_code = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
/*input, allocate, initialize on server only*/
if(my_rank == server){
scanf("%d", &length);
array = (float*) malloc(sizeof(float) * length);
for(index = 0; index < length; index++){
array[index] = 0.0;
}
}
/*broadcast, output on all processes*/
if(num_procs > 1){
mpi_error_code = MPI_Bcast(&length, 1, MPI_INT, source, MPI_COMM_WORLD);
if(my_rank != server){
array = (float*) malloc(sizeof(float) * length);
}
mpi_error_code = MPI_Bcast(array, length, MPI_INT, source, MPI_COMM_WORLD);
printf("%d: broadcast length = %d\n", my_rank, length);
}
mpi_error_code = MPI_Finalize();
}

Creating multiple child processes with a single pipe

I need to create three child processes, each of which reads a string from the command line arguments and writes the string to a single pipe. The parent would then read the strings from the pipe and display all three of them on the screen. I tried doing it for two processes to test and it is printing one of the strings twice as opposed to both of them.
#include <stdio.h>
#include <unistd.h>
int main (int argc, char *argv[]) {
char *character1 = argv[1];
char *character2 = argv[2];
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
pid_t pid1; // defining pid1 of type pid_t
pid_t pid2; // defining pid2 of type pid_t
if (pipe(p) == -1) {
fprintf(stderr, "Pipe Failed"); // pipe fail
}
pid1 = fork(); // fork
if (pid1 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
else if (pid1 == 0){ // if child process 1
close(p[0]); // close the read end
write(p[1], character1, sizeof(&inbuf[0])); // write character 1 to the pipe
}
else { // if parent, create a second child process, child process 2
pid2 = fork();
if (pid2 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
if (pid2 = 0) { // if child process 2
close(p[0]); // close the read end
write(p[1], character2, sizeof(&inbuf[0])); // write character 2 to the pipe
}
else { // if parent process
close(p[1]); // close the write end
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
}
}
}
Your code doesn't keep looping until there's no more data to read. It does a single read. It also doesn't check the value returned by read(), but it should.
I've abstracted the fork() and write() (and error check) code into a function. This seems to work:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static void child(int fd, const char *string)
{
pid_t pid = fork();
int len = strlen(string);
if (pid < 0)
{
fprintf(stderr, "%.5d: failed to fork (%d: %s)\n",
(int)getpid(), errno, strerror(errno));
exit(1);
}
else if (pid > 0)
return;
else if (write(fd, string, len) != len)
{
fprintf(stderr, "%.5d: failed to write on pipe %d (%d: %s)\n",
(int)getpid(), fd, errno, strerror(errno));
exit(1);
}
else
exit(0);
}
int main (int argc, char *argv[])
{
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
if (argc != 4)
{
fprintf(stderr, "Usage: %s str1 str2 str3\n", argv[0]);
return 1;
}
if (pipe(p) == -1)
{
fprintf(stderr, "Pipe Failed"); // pipe fail
return 1;
}
for (int i = 0; i < 3; i++)
child(p[1], argv[i+1]);
int nbytes;
close(p[1]); // close the write end
while ((nbytes = read(p[0], inbuf, sizeof(inbuf))) > 0)
printf("%.*s\n", nbytes, inbuf); // print
return 0;
}
I ran the command multiple times, each time using the command line:
./p3 'message 1' 'the second message' 'a third message for the third process'
On one run, the output was:
the second messagemessage 1
a third message for the third process
On another, I got:
the second messagemessage 1a third message for the third process
And on another, I got:
message 1
the second messagea third message for the third process
(This is on a MacBook Pro with Intel Core i7, running Mac OS X 10.8.3, and using GCC 4.7.1.)

MPI hangs on MPI_Send for large messages

There is a simple program in c++ / mpi (mpich2), which sends an array of type double. If the size of the array more than 9000, then during the call MPI_Send my programm hangs. If array is smaller than 9000 (8000, for example) programm works fine. Source code is bellow:
main.cpp
using namespace std;
Cube** cubes;
int cubesLen;
double* InitVector(int N) {
double* x = new double[N];
for (int i = 0; i < N; i++) {
x[i] = i + 1;
}
return x;
}
void CreateCubes() {
cubes = new Cube*[12];
cubesLen = 12;
for (int i = 0; i < 12; i++) {
cubes[i] = new Cube(9000);
}
}
void SendSimpleData(int size, int rank) {
Cube* cube = cubes[0];
int nodeDest = rank + 1;
if (nodeDest > size - 1) {
nodeDest = 1;
}
double* coefImOut = (double *) malloc(sizeof (double)*cube->coefficentsImLength);
cout << "Before send" << endl;
int count = cube->coefficentsImLength;
MPI_Send(coefImOut, count, MPI_DOUBLE, nodeDest, 0, MPI_COMM_WORLD);
cout << "After send" << endl;
free(coefImOut);
MPI_Status status;
double *coefIm = (double *) malloc(sizeof(double)*count);
int nodeFrom = rank - 1;
if (nodeFrom < 1) {
nodeFrom = size - 1;
}
MPI_Recv(coefIm, count, MPI_DOUBLE, nodeFrom, 0, MPI_COMM_WORLD, &status);
free(coefIm);
}
int main(int argc, char *argv[]) {
int size, rank;
const int root = 0;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
CreateCubes();
if (rank != root) {
SendSimpleData(size, rank);
}
MPI_Finalize();
return 0;
}
class Cube
class Cube {
public:
Cube(int size);
Cube(const Cube& orig);
virtual ~Cube();
int Id() { return id; }
void Id(int id) { this->id = id; }
int coefficentsImLength;
double* coefficentsIm;
private:
int id;
};
Cube::Cube(int size) {
this->coefficentsImLength = size;
coefficentsIm = new double[size];
for (int i = 0; i < size; i++) {
coefficentsIm[i] = 1;
}
}
Cube::Cube(const Cube& orig) {
}
Cube::~Cube() {
delete[] coefficentsIm;
}
The program runs on 4 processes:
mpiexec -n 4 ./myApp1
Any ideas?
The details of the Cube class aren't relevant here: consider a simpler version
#include <mpi.h>
#include <cstdlib>
using namespace std;
int main(int argc, char *argv[]) {
int size, rank;
const int root = 0;
int datasize = atoi(argv[1]);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank != root) {
int nodeDest = rank + 1;
if (nodeDest > size - 1) {
nodeDest = 1;
}
int nodeFrom = rank - 1;
if (nodeFrom < 1) {
nodeFrom = size - 1;
}
MPI_Status status;
int *data = new int[datasize];
for (int i=0; i<datasize; i++)
data[i] = rank;
cout << "Before send" << endl;
MPI_Send(&data, datasize, MPI_INT, nodeDest, 0, MPI_COMM_WORLD);
cout << "After send" << endl;
MPI_Recv(&data, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);
delete [] data;
}
MPI_Finalize();
return 0;
}
where running gives
$ mpirun -np 4 ./send 1
Before send
After send
Before send
After send
Before send
After send
$ mpirun -np 4 ./send 65000
Before send
Before send
Before send
If in DDT you looked at the message queue window, you'd see everyone is sending, and no one is receiving, and you have a classic deadlock.
MPI_Send's semantics, wierdly, aren't well defined, but it is allowed to block until "the receive has been posted". MPI_Ssend is clearer in this regard; it will always block until the receive has been posted. Details about the different send modes can be seen here.
The reason it worked for smaller messages is an accident of the implementation; for "small enough" messages (for your case, it looks to be <64kB), your MPI_Send implementation uses an "eager send" protocol and doesn't block on the receive; for larger messages, where it isn't necessarily safe just to keep buffered copies of the message kicking around in memory, the Send waits for the matching receive (which it is always allowed to do anyway).
There's a few things you could do to avoid this; all you have to do is make sure not everyone is calling a blocking MPI_Send at the same time. You could (say) have even processors send first, then receive, and odd processors receive first, and then send. You could use nonblocking communications (Isend/Irecv/Waitall). But the simplest solution in this case is to use MPI_Sendrecv, which is a blocking (Send + Recv), rather than a blocking send plus a blocking receive. The send and receive will execute concurrently, and the function will block until both are complete. So this works
#include <mpi.h>
#include <cstdlib>
using namespace std;
int main(int argc, char *argv[]) {
int size, rank;
const int root = 0;
int datasize = atoi(argv[1]);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank != root) {
int nodeDest = rank + 1;
if (nodeDest > size - 1) {
nodeDest = 1;
}
int nodeFrom = rank - 1;
if (nodeFrom < 1) {
nodeFrom = size - 1;
}
MPI_Status status;
int *outdata = new int[datasize];
int *indata = new int[datasize];
for (int i=0; i<datasize; i++)
outdata[i] = rank;
cout << "Before sendrecv" << endl;
MPI_Sendrecv(outdata, datasize, MPI_INT, nodeDest, 0,
indata, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);
cout << "After sendrecv" << endl;
delete [] outdata;
delete [] indata;
}
MPI_Finalize();
return 0;
}
Running gives
$ mpirun -np 4 ./send 65000
Before sendrecv
Before sendrecv
Before sendrecv
After sendrecv
After sendrecv
After sendrecv

Segmentation fault while using MPI_File_open

I'm trying to read from a file for an MPI application. The cluster has 4 nodes with 12 cores in each node. I have tried running a basic program to compute rank and that works. When I added MPI_File_open it throws an exception at runtime
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES = EXIT CODE: 139
The cluster has MPICH2 installed and has a Network File System. I check MPI_File_open with different parameters like ReadOnly mode, MPI_COMM_WORLD etc.
Can I use MPI_File_open with Network File System?
int main(int argc, char* argv[])
{
int myrank = 0;
int nprocs = 0;
int i = 0;
MPI_Comm icomm = MPI_COMM_WORLD;
MPI_Status status;
MPI_Info info;
MPI_File *fh = NULL;
int error = 0;
MPI_Init(&argc, &argv);
MPI_Barrier(MPI_COMM_WORLD); // Wait for all processor to start
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); // Get number of processes
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); // Get own rank
usleep(myrank*100000);
if ( myrank == 1 || myrank == 0 )
printf("Hello from %d\r\n", myrank);
if (myrank == 0)
{
error = MPI_File_open( MPI_COMM_SELF, "lw1.wei", MPI_MODE_UNIQUE_OPEN,
MPI_INFO_NULL, fh);
if ( error )
{
printf("Error in opening file\r\n");
}
else
{
printf("File successfully opened\r\n");
}
MPI_File_close(fh);
}
MPI_Barrier(MPI_COMM_WORLD); //! Wait for all the processors to end
MPI_Finalize();
if ( myrank == 0 )
{
printf("Number of Processes %d\n\r", nprocs);
}
return 0;
}
You forgot to allocate an MPI_File object before opening the file. You may either change this line:
MPI_File *fh = NULL;
into:
MPI_File fh;
and open file by giving fh's address to MPI_File_open(..., &fh). Or you may simply allocate memory from heap using malloc().
MPI_File *fh = malloc(sizeof(MPI_File));

Resources