mpi scatterv and gatherv

mpi scatterv and gatherv - mpi

When I run my code using command " mpirun -hosts o251-12,o251-13 ./matrixmult " and I type ijk R 4 , it gives error. Worse thing is if I run again the error is changed sometimes.
And if I use 1 or 2 instead of 4, it works well, but if I use big number, it gives error. I tried change my code, but doesn't work.
Here is my code
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include <string.h>
void putValue(int *A, int *B, char *flag, int n);
void ijk(int *A, int *B, int *result, int n, int tmpAn);
void ikj(int *A, int *B, int *result, int n, int tmpAn);
void kij(int *A, int *B, int *result, int n, int tmpAn);
int main(){
int *A;
int *B;
int *result;
int n;
char flag[2];
char form[4];
int my_rank;
int comm_sz;
double time1;
double time2;
double time;
int i;
int j;
int *tmpA;
int *tmpResult;
int *sc;
int *displs;
int alpha;
int d;
MPI_Init(NULL,NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD,&comm_sz);
MPI_Barrier(MPI_COMM_WORLD);
if(my_rank==0){
time1=MPI_Wtime();
}
if(my_rank==0){
scanf("%s", form);
scanf("%s", flag);
scanf("%d", &n);
A = (int*)malloc(n * n * sizeof(int));
B = (int*)malloc(n * n * sizeof(int));
result = (int*)malloc(n * n * sizeof(int));
putValue(A,B,flag,n);
printf("running on %d processors\n", comm_sz);
}
if(my_rank){
B = (int*)malloc(n * n * sizeof(int));
}
MPI_Bcast(&n, 1, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(B,n*n,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&form, 4, MPI_CHAR,0,MPI_COMM_WORLD);
sc = (int*)malloc(comm_sz * sizeof(int));
displs = (int*)malloc(comm_sz * sizeof(int));
alpha = n%comm_sz;
d=0;
for(i=0;i<comm_sz;i++){
sc[i] = n/comm_sz;
if(i<alpha){
sc[i] = sc[i]+1;
}
sc[i]=sc[i]*n;
displs[i] = d;
d+=sc[i];
}
tmpA = (int*)malloc(n * sc[my_rank] * sizeof(int));
tmpResult = (int*)malloc(n * sc[my_rank] * sizeof(int));
for(i=0; i<sc[my_rank]; i++){
tmpResult[i]=0;
}
MPI_Scatterv(A,sc,displs,MPI_INT,tmpA,sc[my_rank],MPI_INT,0,MPI_COMM_WORLD);
if(strcmp(form,"ijk")==0){
ijk(tmpA,B,tmpResult,n,sc[my_rank]);
}
else if(strcmp(form,"ikj")==0){
ikj(tmpA,B,tmpResult,n,sc[my_rank]);
}
else if(strcmp(form,"kij")==0){
kij(tmpA,B,tmpResult,n,sc[my_rank]);
}
MPI_Gatherv(tmpResult,sc[my_rank],MPI_INT,result,sc,displs,MPI_INT,0,MPI_COMM_WORLD);
if(my_rank==0){
time2=MPI_Wtime();
time=time2-time1;
printf("elapsed time = %.6e secondes\n",time);
if(strcmp(flag,"I")==0){
for(i=0;i<n;i++){
for(j=0;j<n;j++){
printf("%d ",result[i*n+j]);
}
printf("\n");
}
}
}
MPI_Finalize();
return 0;
}
void putValue(int *A, int *B, char *flag, int n){
int i,j;
srand((unsigned)time(NULL));
if(strcmp(flag,"R")==0){
for(i=0; i<n; i++){
for(j=0; j<n; j++){
A[i*n+j] = (int)rand()%101;
B[i*n+j] = (int)rand()%101;
printf("%d\n",A[i*n+j]);
printf("%d\n",B[i*n+j]);
}
}
}
else if(strcmp(flag,"I")==0){
for(i=0; i<n; i++){
for(j=0; j<n; j++){
int x;
scanf("%d", &x);
A[i*n+j]=x;
}
}
for(i=0; i<n; i++){
for(j=0; j<n; j++){
int x;
scanf("%d", &x);
B[i*n+j]=x;
}
}
}
}
void ijk(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(i=0;i<(tmpAn/n);i++){
for(j=0;j<n;j++){
for(k=0;k<n;k++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
void ikj(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(i=0;i<(tmpAn/n);i++){
for(k=0;k<n;k++){
for(j=0;j<n;j++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
void kij(int *A, int *B, int *result, int n, int tmpAn){
int i,j,k;
for(k=0;k<n;k++){
for(i=0;i<(tmpAn/n);i++){
for(j=0;j<n;j++){
result[i*n+j] += (A[i*n+k] * B[k*n+j]);
}
}
}
}
Sometimes it the error is
rank = 3, revents = 25, state = 8
Assertion failed in file ../../src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c at line 2988: (it_plfd->revents & POLLERR) == 0
internal ABORT - process 1
and sometimes the error is
Fatal error in PMPI_Gatherv: Unknown error class, error stack:
PMPI_Gatherv(1001)....................: MPI_Gatherv failed(sbuf=0x231f140,
scount=4, MPI_INT, rbuf=0x231f060, rcnts=0x231f0b0, displs=0x231f0d0, MPI_INT, root=0, MPI_COMM_WORLD) failed
MPIR_Gatherv_impl(545)................: fail failed
I_MPIR_Gatherv_intra(617).............: Failure during collective
I_MPIR_Gatherv_intra(590).............: fail failed
MPIR_Gatherv_advanced(720)............: fail failed
MPIDU_Complete_posted_with_error(1710): Process failed
====================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 17870 RUNNING AT o251-13
= EXIT CODE: 134
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES

This line is executed by the root process
scanf("%d", &n);
This line is executed by the rest before n is broadcast:
B = (int*)malloc(n * n * sizeof(int));
Move that alloc statement after the broadcast.

Related

Why does code not work after using fopen() command

I created FILE *ptr to read a file, and after I declare ptr =fopen("file_name.txt", "r");
none of my code written afterwards will work.
BELOW IS SOME OF THE ACTUAL CODE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAXFILE 30
#define MAX 50
typedef struct Student
{
int stdid;
char *fname;
char *lname;
int day;
int month;
int year;
}Student;
typedef struct Course
{
char *cid;
char *cname;
float credit;
}Course;
typedef struct Enrollment
{
int stdid;
int cid;
char *semester;
float score;
}Enrollment;
int main()
{
char stud[MAXFILE], courses[MAXFILE], enroll[MAXFILE];
printf("TESTING1");
scanf("%s %s %s",stud,courses,enroll);
int studnum,coursenum,enrollnum;
FILE *st, *cou, *enr;
st = fopen("stud","r");
cou = fopen("courses","r");
enr = fopen("enroll","r");
printf("TESTING2");
if(st==NULL || cou == NULL || enr== NULL)
{
printf("unable to open file\n");
exit(1);
}
fscanf(st,"%d",&studnum);
Student *students = (Student*)(malloc(sizeof(Student) * studnum));
for(int i =0; i<studnum;i++)
{
char ftemp[MAX],ltemp[MAX];
int fir, las;
fscanf(st,"%d %s %s %d %d %d",&students[i].stdid, ftemp, ltemp, &students[i].day, &students[i].month, &students[i].year);
fir = strlen(ftemp)+1;
las = strlen(ltemp)+1;
strcpy(students[i].fname,ftemp);
strcpy(students[i].lname,ltemp);
students[i].fname = malloc(fir * sizeof(char));
students[i].lname = malloc(las * sizeof(char));
strcpy(students[i].fname,ftemp);
strcpy(students[i].lname,ltemp);
}
//reading the students.txt file and filling out Student type array with data;
printf("TESTING3");
fscanf(cou,"%d",&coursenum);
Course *course = (Course*)(malloc(sizeof(Course)*coursenum));
for(int i =0; i<coursenum;i++)
{
char cidtemp[MAX], cntemp[MAX];
int tempcid, tempcn;
fscanf(cou,"%s %s %f",cidtemp, cntemp, &course[i].credit);
tempcid = strlen(cidtemp)+1;
tempcn = strlen(cntemp)+1;
course[i].cid = malloc(tempcid*sizeof(char));
course[i].cname = malloc(tempcn * sizeof(char));
strcpy(course[i].cid,cidtemp);
strcpy(course[i].cname, cntemp);
}
//reading the couses.txt file and filling out an array of type Course with the data
printf("TESTING4");
fscanf(enr,"%d",&enrollnum);
Enrollment *enrollment = (Enrollment*)(malloc(sizeof(Enrollment)*enrollnum));
for(int i =0; i<enrollnum; i++)
{
int stdid;
int cid;
char *semester;
float score;
char semest[MAX];
int semtemp;
fscanf(enr,"%d %d %s %f",&enrollment[i].stdid, &enrollment[i].cid, semest, &enrollment[i].score);
semtemp = strlen(semest)+1;
enrollment[i].semester = malloc(semtemp * sizeof(char));
strcpy(enrollment[i].semester, semest);
printf("%d\t%d\t%s\t%.0f \n", enrollment[i].stdid, enrollment[i].cid, enrollment[i].semester, enrollment[i].score);
}
//reading the enrollment files and fill out at array of type Enrollment
//testing new arrays
printf("%d", enrollnum);
for(int i = 0; i<enrollnum; i ++)
{
printf("%d\t%d\t%s\t%.0f \n", enrollment[i].stdid, enrollment[i].cid, enrollment[i].semester, enrollment[i].score);
}
fclose(st);
fclose(cou);
fclose(enr);
for(int i =0; i < studnum;i++)
{
free(students[i].fname);
free(students[i].lname);
}
free(students);
for(int i =0; i < coursenum;i++)
{
free(course[i].cid);
free(course[i].cname);
}
free(course);
for(int i =0; i < enrollnum;i++)
{
free(enrollment[i].semester);
}
free(enrollment);
return 0;
}

Freeing shows double free or corruption

typedef struct row_container
{
int size;
char *data;
}row_container;
typedef struct message_bar
{
char message[80];
time_t message_time;
}message_bar;
typedef struct brick_win_size
{
int row;
int col;
int current_row;
int current_column;
int data_row;
int row_off;
int col_off;
row_container *container;
char *filename;
message_bar *msg_bar;
}brick_window;
void container_delete_character(struct brick_win_size *win)
{
uint8_t insert_flag = 1;
int row = win->current_row;
int offset = win->current_column;
row_container *container = win->container;
char *data = container[row].data;
if(offset < container[row].size){
memmove(&data[offset], &data[offset + 1], container[row].size - 1);
data = realloc(data, container[row].size - 1);
container[row].data = data;
container[row].size--;
}
if(insert_flag){
if((container[row].size == 0) && (row < win->data_row)){
for(int index = row; index < win->data_row; index++){
if(index != win->data_row -1){
container[index] = container[index+1];
}else{
free(container[index]); //error line...............................
}
}
if(win->data_row != 0){
win->container = realloc(win->container, sizeof(row_container) * (win->data_row - 1));
win->data_row --;
}
}
}
}
Here in the above code i just want to delete one element at one point in the function. so i am just reassigning pointers with the other pointer and finally i am freeing the final element which is eventually ending up in build failure *expected ‘void ’ but argument is of type ‘row_container {aka struct row_container}’
Moreover if i give the & de reference operator it is ending up in double free or corruption error during execution

Read input integer from external file for MPI

How could I read external input file for mpi? I need to read one integer from external file (zadanie4_vstup.txt), to compute simple factorial. I have tried to substitute the second argument in MPI_Init() with address of int variable (n), but it looks it is nonsense.
Thank you.
#include <stdio.h>
#include <mpi.h>
int main(int argc, char ** argv)
{
FILE *fr, *fw;
fr = fopen("zadanie4_vstup.txt", "r");
fw = fopen("zadanie4_vystup.txt", "w");
int nproc, me;
int fakt=1, i, buff, n;
MPI_Status stat;
fscanf(fr, "%d", &n);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
if(nproc > 1) {
if(me == 0) {
for(i=1; i<nproc; i++) {
MPI_Recv(&buff, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
fakt*=buff;
}
} else {
MPI_Send(&fakt, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
if(me == 0) {
fprintf(fw, "%d! = %d\n", n, fakt);
}
fclose(fr);
fclose(fw);
MPI_Finalize();
}

here is a version of your program that reads n on the command line.
note i simplified the communications by using MPI_Reduce()
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[]) {
int nproc, me;
int fakt=1, res, i, buff, n;
MPI_Status stat;
MPI_Init(&argc, &argv);
n = atoi(argv[1]);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#pragma omp parallel for private(i) reduction(*:fakt)
for(i=me*n/nproc+1; i<=(me+1)*n/nproc; i++) {
fakt *= i;
}
MPI_Reduce(&fakt, &res, 1, MPI_INT, MPI_PROD, 0, MPI_COMM_WORLD);
if(me == 0) {
printf("%d! = %d\n", n, res);
}
MPI_Finalize();
return 0;
}
for example
$ mpirun -np 4 ./fakt 6
6! = 720

How to write Nested Loop in Kernel side OpenCL

I'm a beginner in OpenCL. I'm trying to implement an OpenCL application.I have a doubt that how to write opencl kernel code . i have given a original c code.
Question :- help me to change that given c code into opencl kernel code?.
ORIGINAL C CODE:
int i, j;
// initialization of indexes
for (i = 0; i<n; i++)
Index[i] = i;
// Bubble sort
for (i = 0; i<n - 1; i++)
{
for (j = i + 1; j<n; j++)
{
if (I[i] > I[j])
{
double z = I[i]; // exchange attractiveness
I[i] = I[j];
I[j] = z;
z = f[i]; // exchange fitness
f[i] = f[j];
f[j] = z;
int k = Index[i]; // exchange indexes
Index[i] = Index[j];
Index[j] = k;
}
}
}

Example for 4096 element arrays(alternate bubble1 and bubble2 at least 2048 times--->4096(N) kernel executions ):
index init on host side since its just assignment.
Auxiliary functions:
void swap2p(__private int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
void swap2g(__global int * I,int i,int j)
{
int tmp=I[i];
I[i]=I[j];
I[j]=tmp;
}
Alternating kernel-1:
__kernel void bubble1(__global int * I, __global int * f, __global int * Index){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+1<4096)
{
vals[0]=I[threadId*2];
vals[1]=I[threadId*2+1];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2,threadId*2+1);
swap2g(f,threadId*2,threadId*2+1);
swap2g(Index,threadId*2,threadId*2+1);
I[threadId*2]=vals[0];
I[threadId*2+1]=vals[1];
}
}
}
alternating kernel-2:
__kernel void bubble2(__global int * I){
int threadId=get_global_id(0);
__private int vals[2];
if(threadId*2+2<4096)
{
vals[0]=I[threadId*2+1];
vals[1]=I[threadId*2+2];
if(vals[0]>vals[1])
{
swap2p(vals,threadId*2+1,threadId*2+2);
swap2g(f,threadId*2+1,threadId*2+2);
swap2g(Index,threadId*2+1,threadId*2+2);
I[threadId*2+1]=vals[0];
I[threadId*2+2]=vals[1];
}
}
}
Global thread number: N/2 (2048)

How accelerate MPI-calls?

I try to accelerate simple MPI-programm with OpenMP. I use MPICH2 and 4-core Intel processor. I have simple code:
int main(int argc, char** argv) {
int size, rank, provided;
const int root = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int cubeCount = StrToDouble(argv[1]);
int matrixSize = *StrToDouble(argv[2]);
WorkNode node(rank, size, cubeCount, matrixSize);
time_t t0 = time(0);
node.work();
time_t t1 = time(0);
time_t total = t1 - t0;
Class WorkNode also very simple, contains only array of Cube and method work.
class Cube {
public:
Cube(int matrixSize);
double *matrix;
int matrixSize;
}
Cube::Cube(int matrixSize) {
matrix = new double[matrixSize];
this->matrixSize = matrixSize;
}
Finally method work:
// double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Status status;
for (int i = 0; i < processorCount; i++) {
int nodeToSend = this->id + i;
int nodeRecv = this->id - i;
if (nodeToSend >= processorCount) {
nodeToSend -= processorCount;
}
if (nodeRecv < 0) {
nodeRecv += processorCount;
}
#pragma omp parallel for num_threads(2)
for (int i = 0; i < cubeCount; i++) {
Cube *cube = cubes[i];
if (nodeToSend != this->id) {
MPI_Bsend(cube->matrix, cube->matrixSize, MPI_DOUBLE, nodeToSend, _MY_MPI_ANY_TAG, MPI_COMM_WORLD);
}
if (nodeRecv != this->id) {
double *inBuffer = new double[cubes[0]->matrixSize];
MPI_Recv(inBuffer, cube->matrixSize, MPI_DOUBLE, nodeRecv, _MY_MPI_ANY_TAG, MPI_COMM_WORLD, &status);
delete inBuffer;
}
}
}
//delete inBuffer
Unfortunately, openMP does not accelerate the program (even if the number of MPI processes = 2), and sometimes even slows down. Can I somehow accelerate MPI calls?

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

mpi scatterv and gatherv - mpi

This line is executed by the root process scanf("%d", &n); This line is executed by the rest before n is broadcast: B = (int)malloc(n n * sizeof(int)); Move that alloc statement after the broadcast.

Related

Why does code not work after using fopen() command

Freeing shows double free or corruption

Read input integer from external file for MPI

How to write Nested Loop in Kernel side OpenCL

How accelerate MPI-calls?

Categories

Resources

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

mpi scatterv and gatherv - mpi

This line is executed by the root process scanf("%d", &n); This line is executed by the rest before n is broadcast: B = (int*)malloc(n * n * sizeof(int)); Move that alloc statement after the broadcast.

Related

Why does code not work after using fopen() command

Freeing shows double free or corruption

Read input integer from external file for MPI

How to write Nested Loop in Kernel side OpenCL

How accelerate MPI-calls?

Categories

Resources

This line is executed by the root process scanf("%d", &n); This line is executed by the rest before n is broadcast: B = (int)malloc(n n * sizeof(int)); Move that alloc statement after the broadcast.