OpenMPI doesn't kill other rank when one rank crashes - mpi

I have some sample code:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
int main(int argc, char** argv) {
// Initialize the MPI environment
MPI_Init(&argc, &argv);
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// We are assuming at least 2 processes for this task
if (world_size < 2) {
fprintf(stderr, "World size must be greater than 1 for %s\n", argv[0]);
MPI_Abort(MPI_COMM_WORLD, 1);
}
int number;
if (world_rank == 1) {
number = -1;
MPI_Send(&number, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
raise(SIGSEGV);
} else if (world_rank == 0) {
MPI_Recv(&number, 1, MPI_INT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Process 0 received number %d from process 1\n", number);
}
printf("rank %d finalize\n", world_rank);
MPI_Finalize();
}
Rank 1 raises a signal to simulate crash. After the raise() rank 1 exits. But rank 0 stills prints rank 0 finalize.
Is there any way to know in rank 0 whether rank 1 crashes in this case? Is it possible to let mpirun kill rank 0 when rank 1 crashes?

Note there is a race condition in your problem, and mpirun might have not enough time to notice task 1 crashed and kill task 0 before the message is printed.
You can force Open MPI to kill all tasks as soon as a crash is detected with the option below
mpirun -mca orte_abort_on_non_zero_status 1 ...

Related

MPI Send and receive a pointer in MPI_Type_struct

I want to send a set of data with the MPI_Type_struct and one of them is a pointer to an array (because the matrices that I'm going to use are going to be very large and I need to do malloc). The problem I see is that all the data is passed correctly except the matrix. I know that it is possible to pass a matrix through the pointer since if I only send the pointer of the matrix, correct results are observed.
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
void main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int size, rank;
int m,n;
m=n=2;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
typedef struct estruct
{
float *array;
int sizeM, sizeK, sizeN, rank_or;
} ;
struct estruct kernel, server;
MPI_Datatype types[5] = {MPI_FLOAT, MPI_INT,MPI_INT,MPI_INT,MPI_INT};
MPI_Datatype newtype;
int lengths[5] = {n*m,1,1,1,1};
MPI_Aint displacements[5];
displacements[0] = (size_t) & (kernel.array[0]) - (size_t)&kernel;
displacements[1] = (size_t) & (kernel.sizeM) - (size_t)&kernel;
displacements[2] = (size_t) & (kernel.sizeK) - (size_t)&kernel;
displacements[3] = (size_t) & (kernel.sizeN) - (size_t)&kernel;
displacements[4] = (size_t) & (kernel.rank_or) - (size_t)&kernel;
MPI_Type_struct(5, lengths, displacements, types, &newtype);
MPI_Type_commit(&newtype);
if (rank == 0)
{
kernel.array = (float *)malloc(m * n * sizeof(float));
for(int i = 0; i < m*n; i++) kernel.array[i] = i;
kernel.sizeM = 5;
kernel.sizeK = 5;
kernel.sizeN = 5;
kernel.rank_or = 5;
MPI_Send(&kernel, 1, newtype, 1, 0, MPI_COMM_WORLD);
}
else
{
server.array = (float *)malloc(m * n * sizeof(float));
MPI_Recv(&server, 1, newtype, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%i \n", server.sizeM);
printf("%i \n", server.sizeK);
printf("%i \n", server.sizeN);
printf("%i \n", server.rank_or);
for(int i = 0; i < m*n; i++) printf("%f\n",server.array[i]);
}
MPI_Finalize();
}
Assuming that only two processes are executed,I expect that process with rank = 1 receive and display the correct elements of the matrix on the screen (the other elements are well received), but the actual output is:
5
5
5
5
0.065004
0.000000
0.000000
0.000000
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 26206 RUNNING AT pmul
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions
I hope someone can help me.

How to run the same MPI program multiple times

I have an MPI program to calculate a sorting time. I run it with mpirun -np 2 mpiSort. So this gives me the sorting time by 2 processes.
I want to get the sorting time for 5 times to average them. How do I do that automatically?
If I do a loop in the mpiSort program. It actually executes 5(times) x 2(processes) = 10 times.
Edit: The mpiSort does the sort in parallel. Basically, I'm trying to do mpirun -np 2 mpiSort without typing it 5 times. Because I want to do the same for 4 cores, 8 cores.
You could run on five cores using mpirun -np 5 mpiSort and add an MPI_gather at the end. Is the sort code actually using MPI (i.e. calls MPI_init at the beginning?). Assuming you are, you can run on 5 cores and simply average at the end with a reduce,
# include <mpi.h>
#include <iostream>
using namespace std;
int main ( int argc, char *argv[] )
{
int ierr, rank, nprocs, root=0;
double time, buf;
ierr = MPI_Init ( &argc, &argv );
ierr = MPI_Comm_rank (MPI_COMM_WORLD, &rank);
ierr = MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
time = 0.5;
ierr = MPI_Reduce (&time, &buf, 1, MPI_DOUBLE_PRECISION,
MPI_SUM, root, MPI_COMM_WORLD);
if (rank == root){
buf = buf / nprocs;
cout << buf << "\n";
}
MPI_Finalize ( );
}
where time is each processes sort time.
Put in a loop is the way to go. I was confused because I got 10 values of endTime = MPI_Wtime(), and I only used 5 of them from the root process. Thanks to #EdSmith with his MPI_Reduce code, the correct calculated time is the average of the two processes by using MPI_Reduce.
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
for (int run=0; run<5; run++) {
...
endTime = MPI_Wtime();
totalTime = endTime - startTime;
MPI_Reduce (&totalTime, &workTime, 1, MPI_DOUBLE_PRECISION, MPI_SUM, root, MPI_COMM_WORLD);
if (rank == root) {
paraTime = workTime/nProcs;
}
...
}
MPI_Finalize();

MPI: How to use MPI_Win_allocate_shared properly

I would like to use a shared memory between processes. I tried MPI_Win_allocate_shared but it gives me a strange error when I execute the program:
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT
Here's my source:
# include <stdlib.h>
# include <stdio.h>
# include <time.h>
# include "mpi.h"
int main ( int argc, char *argv[] );
void pt(int t[], int s);
int main ( int argc, char *argv[] )
{
int rank, size, shared_elem = 0, i;
MPI_Init ( &argc, &argv );
MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
MPI_Comm_size ( MPI_COMM_WORLD, &size );
MPI_Win win;
int *shared;
if (rank == 0) shared_elem = size;
MPI_Win_allocate_shared(shared_elem*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win);
if(rank==0)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, win);
for(i = 0; i < size; i++)
{
shared[i] = -1;
}
MPI_Win_unlock(0,win);
}
MPI_Barrier(MPI_COMM_WORLD);
int *local = (int *)malloc( size * sizeof(int) );
MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
for(i = 0; i < 10; i++)
{
MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
}
printf("processus %d (avant): ", rank);
pt(local,size);
MPI_Win_unlock(0,win);
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win);
MPI_Put(&rank, 1, MPI_INT, 0, rank, 1, MPI_INT, win);
MPI_Win_unlock(0,win);
MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
for(i = 0; i < 10; i++)
{
MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
}
printf("processus %d (apres): ", rank);
pt(local,size);
MPI_Win_unlock(0,win);
MPI_Win_free(&win);
MPI_Free_mem(shared);
MPI_Free_mem(local);
MPI_Finalize ( );
return 0;
}
void pt(int t[],int s)
{
int i = 0;
while(i < s)
{
printf("%d ",t[i]);
i++;
}
printf("\n");
}
I get the following result:
processus 0 (avant): -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
processus 0 (apres): 0 -1 -1 -1 -1 -1 -1 -1 -1 -1
processus 4 (avant): 0 -1 -1 -1 -1 -1 -1 -1 -1 -1
processus 4 (apres): 0 -1 -1 -1 4 -1 -1 -1 -1 -1
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 5
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 6
Assertion failed in file ./src/mpid/ch3/include/mpid_rma_shm.h at line 592: local_target_rank >= 0
internal ABORT - process 9
Can someone please help me figure out what's going wrong & what that error means ? Thanks a lot.
MPI_Win_allocate_shared is a departure from the very abstract nature of MPI. It exposes the underlying memory organisation and allows the programs to bypass the expensive (and often confusing) MPI RMA operations and utilise the shared memory directly on systems that have such. While MPI typically deals with distributed-memory environments where ranks do not share the physical memory address space, a typical HPC system nowadays consists of many interconnected shared-memory nodes. Thus, it is possible for ranks that execute on the same node to attach to shared memory segments and communicate by sharing data instead of message passing.
MPI provides a communicator split operation that allows one to create subgroups of ranks such that the ranks in each subgroup are able to share memory:
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, key, info, &newcomm);
On a typical cluster, this essentially groups the ranks by the nodes they execute on. Once the split is done, a shared-memory window allocation can be executed over the ranks in each newcomm. Note that for a multi-node cluster job this will result in several independent newcomm communicators and thus several shared memory windows. Ranks on one node won't (and shouldn't) be able to see the shared memory windows on other nodes.
In that regard, MPI_Win_allocate_shared is a platform-independent wrapper around the OS-specific mechanisms for shared memory allocation.
There are several problems with this code and the usage. Some of these are mentioned in #Hristolliev's answer.
you have to run all the processes in the same node to have a intranode communicator or use "communicator split shared".
you need to run this code with at least 10 processes.
Third, local should be deallocated with free().
you should get the shared pointer from a query.
you should deallocate shared (I think this is taken care by Win_free)
This is the resulting code:
# include <stdlib.h>
# include <stdio.h>
# include <time.h>
# include "mpi.h"
int main ( int argc, char *argv[] );
void pt(int t[], int s);
int main ( int argc, char *argv[] )
{
int rank, size, shared_elem = 0, i;
MPI_Init ( &argc, &argv );
MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
MPI_Comm_size ( MPI_COMM_WORLD, &size );
MPI_Win win;
int *shared;
// if (rank == 0) shared_elem = size;
// MPI_Win_allocate_shared(shared_elem*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &shared, &win);
if (rank == 0)
{
MPI_Win_allocate_shared(size, sizeof(int), MPI_INFO_NULL,
MPI_COMM_WORLD, &shared, &win);
}
else
{
int disp_unit;
MPI_Aint ssize;
MPI_Win_allocate_shared(0, sizeof(int), MPI_INFO_NULL,
MPI_COMM_WORLD, &shared, &win);
MPI_Win_shared_query(win, 0, &ssize, &disp_unit, &shared);
}
if(rank==0)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, win);
for(i = 0; i < size; i++)
{
shared[i] = -1;
}
MPI_Win_unlock(0,win);
}
MPI_Barrier(MPI_COMM_WORLD);
int *local = (int *)malloc( size * sizeof(int) );
MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
for(i = 0; i < 10; i++)
{
MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
}
printf("processus %d (avant): ", rank);
pt(local,size);
MPI_Win_unlock(0,win);
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win);
MPI_Put(&rank, 1, MPI_INT, 0, rank, 1, MPI_INT, win);
MPI_Win_unlock(0,win);
MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
for(i = 0; i < 10; i++)
{
MPI_Get(&(local[i]), 1, MPI_INT, 0, i,1, MPI_INT, win);
}
printf("processus %d (apres): ", rank);
pt(local,size);
MPI_Win_unlock(0,win);
MPI_Win_free(&win);
// MPI_Free_mem(shared);
free(local);
// MPI_Free_mem(local);
MPI_Finalize ( );
return 0;
}
void pt(int t[],int s)
{
int i = 0;
while(i < s)
{
printf("%d ",t[i]);
i++;
}
printf("\n");
}

Open MPI's MPI_reduce not combining array sums

I am very new to Open MPI. I have made a small program that computes the sum of an array, by splitting array into pieces equal to the number of processes. The problem in my program is that each process is computing right sum of its share of the array, but the individually computed sums are not summed by MPI_reduce function. I tried my best to solve and also consulted the Open MPI manual, but there is still something that I might be missing. I would be grateful for any kind of guidance. Below is the program I made:
#include "mpi.h"
#include <stdio.h>
int main(int argc, char *argv[])
{
int n, rank, nrofProcs, i;
int sum, ans;
// 0,1,2, 3,4,5, 6,7,8, 9
int myarr[] = {1,5,9, 2,8,3, 7,4,6, 10};
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nrofProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
n = 10;
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
sum = 0.0;
int remaining = n % nrofProcs;
int lower =rank*(n/nrofProcs);
int upper = (lower+(n/nrofProcs))-1;
for (i = lower; i <= upper; i++)
{
sum = sum + myarr[i];
}
if(rank==nrofProcs-1)
{
while(i<=remaining)
{
sum = sum + myarr[i];
i++;
}
}
/* (PROBLEM IS HERE, IT IS NOT COMBINING "sums") */
MPI_Reduce(&sum, &ans, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
// if (rank == 0)
printf( "rank: %d, Sum ans: %d\n", rank, sum);
/* shut down MPI */
MPI_Finalize();
return 0;
}
Output:
rank: 2, Sum ans: 17
rank: 1, Sum ans: 13
rank: 0, Sum ans: 15
(Output should be rank: 0, Sum ans: 55)
Sorry, I made some mistakes, that I corrected after running parallel debugging on my program. Here I am sharing code to split an array of length N on M processes, where N and M can have any value:
/*
An MPI program split an array of length N on M processes, where N and M can have any value
*/
#include <math.h>
#include "mpi.h"
#include <iostream>
#include <vector>
using namespace std;
int main(int argc, char *argv[])
{
int n, rank, nrofProcs, i;
int sum, ans;
// 0,1,2, 3,4,5, 6,7,8, 9, 10
int myarr[] = {1,5,9, 2,8,3, 7,4,6,11,10};
vector<int> myvec (myarr, myarr + sizeof(myarr) / sizeof(int) );
n = myvec.size(); // number of items in array
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nrofProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
sum = 0.0;
int remaining = n % nrofProcs;
int lower =rank*(n/nrofProcs);
int upper = (lower+(n/nrofProcs))-1;
for (i = lower; i <= upper; i++)
{
sum = sum + myvec[i];
}
if(rank==nrofProcs-1)
{
int ctr=0;
while(ctr<remaining)
{
sum = sum + myvec[i];
ctr++;
i++;
}
}
/* combine everyone's calculations */
MPI_Reduce(&sum, &ans, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
cout << "rank: " <<rank << " Sum ans: " << ans<< endl;
/* shut down MPI */
MPI_Finalize();
return 0;
}

difficulty with MPI_Gather function

I have a value on local array (named lvotes) for each of the processors (assume 3 processors), and first element of each is storing a value, i.e.:
P0 : 4
P1 : 6
p2 : 7
Now, using MPI_Gather, I want gather them all in P0, so It will look like :
P0 : 4, 6, 7
I used gather this way:
MPI_Gather(lvotes, P, MPI_INT, lvotes, 1, MPI_INT, 0, MPI_COMM_WORLD);
But I get problems. It's my first time coding in MPI. I could use any suggestion.
Thanks
This is a common issue with people using the gather/scatter collectives for the first time; in both the send and receive counts you specify the count of items to send to or receive from each process. So although it's true that you'll be, in total, getting (say) P items, if P is the number of processors, that's not what you specify to the gather operation; you specify you are sending a count of 1, and receiving a count of 1 (from each process). Like so:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
int main ( int argc, char **argv ) {
int rank;
int size;
int lvotes;
int *gvotes;
MPI_Init ( &argc, &argv );
MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
MPI_Comm_size ( MPI_COMM_WORLD, &size );
if (rank == 0)
gvotes = malloc(size * sizeof(int) );
/* everyone sets their first lvotes element */
lvotes = rank+4;
/* Gather to process 0 */
MPI_Gather(&lvotes, 1, MPI_INT, /* send 1 int from lvotes.. */
gvotes, 1, MPI_INT, /* gather 1 int each process into lvotes */
0, MPI_COMM_WORLD); /* ... to root process 0 */
printf("P%d: %d\n", rank, lvotes);
if (rank == 0) {
printf("P%d: Gathered ", rank);
for (int i=0; i<size; i++)
printf("%d ", gvotes[i]);
printf("\n");
}
if (rank == 0)
free(gvotes);
MPI_Finalize();
return 0;
}
Running gives
$ mpirun -np 3 ./gather
P1: 5
P2: 6
P0: 4
P0: Gathered 4 5 6

Resources