mpi scatterv and gatherv - mpi

When I run my code using command " mpirun -hosts o251-12,o251-13 ./matrixmult " and I type ijk R 4 , it gives error. Worse thing is if I run again the error is changed sometimes.
And if I use 1 or 2 instead of 4, it works well, but if I use big number, it gives error. I tried change my code, but doesn't work.
Here is my code
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include <string.h>
void putValue(int *A, int *B, char *flag, int n);
void ijk(int *A, int *B, int *result, int n, int tmpAn);
void ikj(int *A, int *B, int *result, int n, int tmpAn);
void kij(int *A, int *B, int *result, int n, int tmpAn);
int main(){
int *A;
int *B;
int *result;
int n;
char flag[2];
char form[4];
int my_rank;
int comm_sz;
double time1;
double time2;
double time;
int i;
int j;
int *tmpA;
int *tmpResult;
int *sc;
int *displs;
int alpha;
int d;
MPI_Init(NULL,NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD,&comm_sz);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank==0){
time1=MPI_Wtime();
}
if(my_rank==0){
scanf("%s", form);
scanf("%s", flag);
scanf("%d", &n);
A = (int*)malloc(n * n * sizeof(int));
B = (int*)malloc(n * n * sizeof(int));
result = (int*)malloc(n * n * sizeof(int));
putValue(A,B,flag,n);
printf("running on %d processors\n", comm_sz);
}
if(my_rank){
B = (int*)malloc(n * n * sizeof(int));
}
MPI_Bcast(&n, 1, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(B,n*n,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&form, 4, MPI_CHAR,0,MPI_COMM_WORLD);
sc = (int*)malloc(comm_sz * sizeof(int));
displs = (int*)malloc(comm_sz * sizeof(int));
alpha = n%comm_sz;
d=0;
for(i=0;i<comm_sz;i++){
sc[i] = n/comm_sz;
if(i<alpha){
sc[i] = sc[i]+1;
}
sc[i]=sc[i]*n;
displs[i] = d;
d+=sc[i];
}
tmpA = (int*)malloc(n * sc[my_rank] * sizeof(int));
tmpResult = (int*)malloc(n * sc[my_rank] * sizeof(int));
for(i=0; i<sc[my_rank]; i++){
tmpResult[i]=0;
}
MPI_Scatterv(A,sc,displs,MPI_INT,tmpA,sc[my_rank],MPI_INT,0,MPI_COMM_WORLD);
if(strcmp(form,"ijk")==0){
ijk(tmpA,B,tmpResult,n,sc[my_rank]);
}
else if(strcmp(form,"ikj")==0){
ikj(tmpA,B,tmpResult,n,sc[my_rank]);
}
else if(strcmp(form,"kij")==0){
kij(tmpA,B,tmpResult,n,sc[my_rank]);
}
MPI_Gatherv(tmpResult,sc[my_rank],MPI_INT,result,sc,displs,MPI_INT,0,MPI_COMM_WORLD);
if(my_rank==0){
time2=MPI_Wtime();
time=time2-time1;
printf("elapsed time = %.6e secondes\n",time);
if(strcmp(flag,"I")==0){
for(i=0;i<n;i++){
for(j=0;j<n;j++){
printf("%d ",result[i*n+j]);
}
printf("\n");
}
}
}
MPI_Finalize();
return 0;
}
void putValue(int *A, int *B, char *flag, int n){
int i,j;
srand((unsigned)time(NULL));
if(strcmp(flag,"R")==0){
for(i=0; i<n; i++){
for(j=0; j<n; j++){
A[i*n+j] = (int)rand()%101;
B[i*n+j] = (int)rand()%101;
printf("%d\n",A[i*n+j]);
printf("%d\n",B[i*n+j]);
}
}
}
else if(strcmp(flag,"I")==0){
for(i=0; i<n; i++){
for(j=0; j<n; j++){
int x;
scanf("%d", &x);
A[i*n+j]=x;
}
}
for(i=0; i<n; i++){
for(j=0; j<n; j++){
int x;
scanf("%d", &x);
B[i*n+j]=x;
}
}
}
}
void ijk(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(i=0;i<(tmpAn/n);i++){
for(j=0;j<n;j++){
for(k=0;k<n;k++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
void ikj(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(i=0;i<(tmpAn/n);i++){
for(k=0;k<n;k++){
for(j=0;j<n;j++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
void kij(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(k=0;k<n;k++){
for(i=0;i<(tmpAn/n);i++){
for(j=0;j<n;j++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
Sometimes it the error is
rank = 3, revents = 25, state = 8
Assertion failed in file ../../src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c at line 2988: (it_plfd->revents & POLLERR) == 0
internal ABORT - process 1
and sometimes the error is
Fatal error in PMPI_Gatherv: Unknown error class, error stack:
PMPI_Gatherv(1001)....................: MPI_Gatherv failed(sbuf=0x231f140,
scount=4, MPI_INT, rbuf=0x231f060, rcnts=0x231f0b0, displs=0x231f0d0, MPI_INT, root=0, MPI_COMM_WORLD) failed
MPIR_Gatherv_impl(545)................: fail failed
I_MPIR_Gatherv_intra(617).............: Failure during collective
I_MPIR_Gatherv_intra(590).............: fail failed
MPIR_Gatherv_advanced(720)............: fail failed
MPIDU_Complete_posted_with_error(1710): Process failed
====================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 17870 RUNNING AT o251-13
= EXIT CODE: 134
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES

This line is executed by the root process
scanf("%d", &n);
This line is executed by the rest before n is broadcast:
B = (int*)malloc(n * n * sizeof(int));
Move that alloc statement after the broadcast.

Related

Why does code not work after using fopen() command

I created FILE *ptr to read a file, and after I declare ptr =fopen("file_name.txt", "r");
none of my code written afterwards will work.
BELOW IS SOME OF THE ACTUAL CODE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAXFILE 30
#define MAX 50
typedef struct Student
{
int stdid;
char *fname;
char *lname;
int day;
int month;
int year;
}Student;
typedef struct Course
{
char *cid;
char *cname;
float credit;
}Course;
typedef struct Enrollment
{
int stdid;
int cid;
char *semester;
float score;
}Enrollment;
int main()
{
char stud[MAXFILE], courses[MAXFILE], enroll[MAXFILE];
printf("TESTING1");
scanf("%s %s %s",stud,courses,enroll);
int studnum,coursenum,enrollnum;
FILE *st, *cou, *enr;
st = fopen("stud","r");
cou = fopen("courses","r");
enr = fopen("enroll","r");
printf("TESTING2");
if(st==NULL || cou == NULL || enr== NULL)
{
printf("unable to open file\n");
exit(1);
}
fscanf(st,"%d",&studnum);
Student *students = (Student*)(malloc(sizeof(Student) * studnum));
for(int i =0; i<studnum;i++)
{
char ftemp[MAX],ltemp[MAX];
int fir, las;
fscanf(st,"%d %s %s %d %d %d",&students[i].stdid, ftemp, ltemp, &students[i].day, &students[i].month, &students[i].year);
fir = strlen(ftemp)+1;
las = strlen(ltemp)+1;
strcpy(students[i].fname,ftemp);
strcpy(students[i].lname,ltemp);
students[i].fname = malloc(fir * sizeof(char));
students[i].lname = malloc(las * sizeof(char));
strcpy(students[i].fname,ftemp);
strcpy(students[i].lname,ltemp);
}
//reading the students.txt file and filling out Student type array with data;
printf("TESTING3");
fscanf(cou,"%d",&coursenum);
Course *course = (Course*)(malloc(sizeof(Course)*coursenum));
for(int i =0; i<coursenum;i++)
{
char cidtemp[MAX], cntemp[MAX];
int tempcid, tempcn;
fscanf(cou,"%s %s %f",cidtemp, cntemp, &course[i].credit);
tempcid = strlen(cidtemp)+1;
tempcn = strlen(cntemp)+1;
course[i].cid = malloc(tempcid*sizeof(char));
course[i].cname = malloc(tempcn * sizeof(char));
strcpy(course[i].cid,cidtemp);
strcpy(course[i].cname, cntemp);
}
//reading the couses.txt file and filling out an array of type Course with the data
printf("TESTING4");
fscanf(enr,"%d",&enrollnum);
Enrollment *enrollment = (Enrollment*)(malloc(sizeof(Enrollment)*enrollnum));
for(int i =0; i<enrollnum; i++)
{
int stdid;
int cid;
char *semester;
float score;
char semest[MAX];
int semtemp;
fscanf(enr,"%d %d %s %f",&enrollment[i].stdid, &enrollment[i].cid, semest, &enrollment[i].score);
semtemp = strlen(semest)+1;
enrollment[i].semester = malloc(semtemp * sizeof(char));
strcpy(enrollment[i].semester, semest);
printf("%d\t%d\t%s\t%.0f \n", enrollment[i].stdid, enrollment[i].cid, enrollment[i].semester, enrollment[i].score);
}
//reading the enrollment files and fill out at array of type Enrollment
//testing new arrays
printf("%d", enrollnum);
for(int i = 0; i<enrollnum; i ++)
{
printf("%d\t%d\t%s\t%.0f \n", enrollment[i].stdid, enrollment[i].cid, enrollment[i].semester, enrollment[i].score);
}
fclose(st);
fclose(cou);
fclose(enr);
for(int i =0; i < studnum;i++)
{
free(students[i].fname);
free(students[i].lname);
}
free(students);
for(int i =0; i < coursenum;i++)
{
free(course[i].cid);
free(course[i].cname);
}
free(course);
for(int i =0; i < enrollnum;i++)
{
free(enrollment[i].semester);
}
free(enrollment);
return 0;
}

Freeing shows double free or corruption

typedef struct row_container
{
int size;
char *data;
}row_container;
typedef struct message_bar
{
char message[80];
time_t message_time;
}message_bar;
typedef struct brick_win_size
{
int row;
int col;
int current_row;
int current_column;
int data_row;
int row_off;
int col_off;
row_container *container;
char *filename;
message_bar *msg_bar;
}brick_window;
void container_delete_character(struct brick_win_size *win)
{
uint8_t insert_flag = 1;
int row = win->current_row;
int offset = win->current_column;
row_container *container = win->container;
char *data = container[row].data;
if(offset < container[row].size){
memmove(&data[offset], &data[offset + 1], container[row].size - 1);
data = realloc(data, container[row].size - 1);
container[row].data = data;
container[row].size--;
}
if(insert_flag){
if((container[row].size == 0) && (row < win->data_row)){
for(int index = row; index < win->data_row; index++){
if(index != win->data_row -1){
container[index] = container[index+1];
}else{
free(container[index]); //error line...............................
}
}
if(win->data_row != 0){
win->container = realloc(win->container, sizeof(row_container) * (win->data_row - 1));
win->data_row --;
}
}
}
}
Here in the above code i just want to delete one element at one point in the function. so i am just reassigning pointers with the other pointer and finally i am freeing the final element which is eventually ending up in build failure *expected ‘void ’ but argument is of type ‘row_container {aka struct row_container}’
Moreover if i give the & de reference operator it is ending up in double free or corruption error during execution

Read input integer from external file for MPI

How could I read external input file for mpi? I need to read one integer from external file (zadanie4_vstup.txt), to compute simple factorial. I have tried to substitute the second argument in MPI_Init() with address of int variable (n), but it looks it is nonsense.
Thank you.
#include <stdio.h>
#include <mpi.h>
int main(int argc, char ** argv)
{
FILE *fr, *fw;
fr = fopen("zadanie4_vstup.txt", "r");
fw = fopen("zadanie4_vystup.txt", "w");
int nproc, me;
int fakt=1, i, buff, n;
MPI_Status stat;
fscanf(fr, "%d", &n);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
if(nproc > 1) {
if(me == 0) {
for(i=1; i<nproc; i++) {
MPI_Recv(&buff, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
fakt*=buff;
}
} else {
MPI_Send(&fakt, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
if(me == 0) {
fprintf(fw, "%d! = %d\n", n, fakt);
}
fclose(fr);
fclose(fw);
MPI_Finalize();
}
here is a version of your program that reads n on the command line.
note i simplified the communications by using MPI_Reduce()
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[]) {
int nproc, me;
int fakt=1, res, i, buff, n;
MPI_Status stat;
MPI_Init(&argc, &argv);
n = atoi(argv[1]);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
MPI_Reduce(&fakt, &res, 1, MPI_INT, MPI_PROD, 0, MPI_COMM_WORLD);
if(me == 0) {
printf("%d! = %d\n", n, res);
}
MPI_Finalize();
return 0;
}
for example
$ mpirun -np 4 ./fakt 6
6! = 720

How to write Nested Loop in Kernel side OpenCL

I'm a beginner in OpenCL. I'm trying to implement an OpenCL application.I have a doubt that how to write opencl kernel code . i have given a original c code.
Question :- help me to change that given c code into opencl kernel code?.
ORIGINAL C CODE:
int i, j;
// initialization of indexes
for (i = 0; i<n; i++)
Index[i] = i;
// Bubble sort
for (i = 0; i<n - 1; i++)
{
for (j = i + 1; j<n; j++)
{
if (I[i] > I[j])
{
double z = I[i]; // exchange attractiveness
I[i] = I[j];
I[j] = z;
z = f[i]; // exchange fitness
f[i] = f[j];
f[j] = z;
int k = Index[i]; // exchange indexes
Index[i] = Index[j];
Index[j] = k;
}
}
}
Example for 4096 element arrays(alternate bubble1 and bubble2 at least 2048 times--->4096(N) kernel executions ):
index init on host side since its just assignment.
Auxiliary functions:
void swap2p(__private int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
void swap2g(__global int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
Alternating kernel-1:
__kernel void bubble1(__global int * I, __global int * f, __global int * Index){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+1<4096)
{
vals[0]=I[threadId*2];
vals[1]=I[threadId*2+1];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2,threadId*2+1);
swap2g(f,threadId*2,threadId*2+1);
swap2g(Index,threadId*2,threadId*2+1);
I[threadId*2]=vals[0];
I[threadId*2+1]=vals[1];
}
}
}
alternating kernel-2:
__kernel void bubble2(__global int * I){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+2<4096)
{
vals[0]=I[threadId*2+1];
vals[1]=I[threadId*2+2];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2+1,threadId*2+2);
swap2g(f,threadId*2+1,threadId*2+2);
swap2g(Index,threadId*2+1,threadId*2+2);
I[threadId*2+1]=vals[0];
I[threadId*2+2]=vals[1];
}
}
}
Global thread number: N/2 (2048)

How accelerate MPI-calls?

I try to accelerate simple MPI-programm with OpenMP. I use MPICH2 and 4-core Intel processor. I have simple code:
int main(int argc, char** argv) {
int size, rank, provided;
const int root = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int cubeCount = StrToDouble(argv[1]);
int matrixSize = *StrToDouble(argv[2]);
WorkNode node(rank, size, cubeCount, matrixSize);
time_t t0 = time(0);
node.work();
time_t t1 = time(0);
time_t total = t1 - t0;
Class WorkNode also very simple, contains only array of Cube and method work.
class Cube {
public:
Cube(int matrixSize);
double *matrix;
int matrixSize;
}
Cube::Cube(int matrixSize) {
matrix = new double[matrixSize];
this->matrixSize = matrixSize;
}
Finally method work:
// double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Status status;
for (int i = 0; i < processorCount; i++) {
int nodeToSend = this->id + i;
int nodeRecv = this->id - i;
if (nodeToSend >= processorCount) {
nodeToSend -= processorCount;
}
if (nodeRecv < 0) {
nodeRecv += processorCount;
}
#pragma omp parallel for num_threads(2)
for (int i = 0; i < cubeCount; i++) {
Cube *cube = cubes[i];
if (nodeToSend != this->id) {
MPI_Bsend(cube->matrix, cube->matrixSize, MPI_DOUBLE, nodeToSend, _MY_MPI_ANY_TAG, MPI_COMM_WORLD);
}
if (nodeRecv != this->id) {
double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Recv(inBuffer, cube->matrixSize, MPI_DOUBLE, nodeRecv, _MY_MPI_ANY_TAG, MPI_COMM_WORLD, &status);
delete inBuffer;
}
}
}
//delete inBuffer
Unfortunately, openMP does not accelerate the program (even if the number of MPI processes = 2), and sometimes even slows down. Can I somehow accelerate MPI calls?

Resources