MPI sending 2D array - multidimensional-array

I am new to MPI. I am trying to assign an adjacency matrix to the processes so that I am able to implement the 1-D BFS algorithm given by https://ieeexplore.ieee.org/document/1559977. Assume that I have a 6*6 matrix like that with 4 processes:
0 0 0 0 0 0
1 1 1 1 1 1
0 1 0 1 0 1
1 0 1 0 1 0
1 1 1 1 1 1
0 1 0 1 0 1
I want it to be processed like:
A A A A A A
B B B B B B
C C C C C C
D D D D D D
D D D D D D
D D D D D D
That every process is assigned with (int)(no_of_vertices/size) number of rows and the last process is assigned with the rest of the rows which equals to (no_of_vertices-(size-1)*local_vertices) since the graph size will hardly be evenly divisible by the number of processes p that every process has the same workload.
Therefore, I read through this answer by Jonathan Dursi sending blocks of 2D array in C using MPI and have my own code. I post it below:
#include "mpi.h"
#include<stdio.h>
#include<stdlib.h>
#include<iostream>
#include<iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <string>
#include <sstream>
#include <chrono>
#include <cmath>
using namespace std;
#define MAX_QUEUE_SIZE 5
int Max(int a, int b, int c)
{
int max;
if (a >= b)
{
if (a >= c) {
max = a;
}
else
max = c;
}
else if (b >= c) { max = b; }
else max = c;
return max;
}
int areAllVisited(int visited[], int size)
{
for (int i = 0; i < size; i++)
{
if (visited[i] == 0)
return 0;
}
return 1;
}
int malloc2dint(int*** array, int n, int m) {
/* allocate the n*m contiguous items */
int* p = (int*)malloc(n * m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**)malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (int i = 0; i < n; i++)
(*array)[i] = &(p[i * m]);
return 0;
}
int free2dint(int*** array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
//Variables and Initializations
int size, rank;
int local_vertices;
int** adj_matrix=NULL;
int *adjacency_matrix;
int adjacency_queue[MAX_QUEUE_SIZE];
int source_vertex;
int no_of_vertices;
int ** local_adj_matrix;
int *visited;
int *distance;
int* frontier_vertex;
//MPI Code
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size); // MPI_COMM -> communicator, size=number of processes
// Get my rank in the communicator
MPI_Comm_rank(MPI_COMM_WORLD, &rank);//rank= current processes
//read input file, transform into adjacency matrix
if (rank == 0)
{
string filename="out.txt";
std::fstream in(filename.c_str());
cout << "reading file:" << filename << endl;
string s;
size_t n = 0, m = 0;
string data1, data2;
while (true)
{
std::getline(in, s);
istringstream is(s);
is >> data1 >> data2;
int d1 = stoi(data1);
int d2 = stoi(data2);
n = Max(n, d2, d1);
m += 1;
if (in.eof()) { break; }
}
//this block will count the number of lines and calculate the maximun number appeared in the file, which are the parameters n, m(vertice, edge)
in.clear();
in.seekg(0, ios::beg);
n += 1;
m -= 1;
//initialize the 2D array
malloc2dint(&adj_matrix, n, n);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++)
{
adj_matrix[i][j] = 0;
}
}
for (size_t i = 0; i < m; i++)
{
int x, y;
std::getline(in, s);
istringstream is(s);
is >> data1 >> data2;
x = stoi(data1);
y = stoi(data2);
adj_matrix[x][y] = 1;
}
in.close();
//print the matrix
cout << "the adjacency matrix is:" << endl;
for (int i = 0; i < n; i++) {
cout << endl;
for (int j = 0; j < n; j++) {
cout << adj_matrix[i][j] << " ";
}
}
source_vertex = 0;
no_of_vertices = n;
local_vertices = (int)(no_of_vertices/size);
}
MPI_Bcast(&local_vertices,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&no_of_vertices, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&source_vertex, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* create a datatype to describe the subarrays of the global array */
int sizes[2] = { no_of_vertices, no_of_vertices }; /* global size */
int subsizes[2] = { 1, no_of_vertices }; /* local size */
int starts[2] = { 0,0 }; /* where this one starts */
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, no_of_vertices * sizeof(int), &subarrtype);// /local_vertices
MPI_Type_commit(&subarrtype);
if (rank == size - 1) {
local_vertices = no_of_vertices - (size - 1) * local_vertices;
malloc2dint(&local_adj_matrix, local_vertices, no_of_vertices); }
malloc2dint(&local_adj_matrix,local_vertices,no_of_vertices);
//MPI_Barrier(MPI_COMM_WORLD);
int* adjptr = NULL;
if (rank == 0) adjptr = &(adj_matrix[0][0]);
int *sendcounts=new int[size];
int *displs = new int[size];
if (rank == 0) {
for (int i = 0; i < size; i++)
if (i == size - 1) {
sendcounts[i] = no_of_vertices - (size - 1) * local_vertices;
}
else
sendcounts[i] = local_vertices;
int disp = 0;
for (int i = 0; i < size; i++) {
displs[i] = disp;
disp += no_of_vertices*local_vertices;
}
}
//Scattering each row of the adjacency matrix to each of the processes
//int MPI_Scatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype,void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
MPI_Scatterv(adjptr, sendcounts,displs,subarrtype, &local_adj_matrix[0][0], local_vertices*no_of_vertices, MPI_INT, 0, MPI_COMM_WORLD);
cout << "rank=" << rank << endl;
for(int i=0;i<local_vertices;i++){
cout << endl;
for (int j = 0; j < no_of_vertices; j++) {
cout << local_adj_matrix[i][j] << " ";
}
}
MPI_Barrier(MPI_COMM_WORLD);
//End of BFS code
MPI_Finalize();
return 0;
}
However, the situation in 2 is not the same as mine and naturally, I got the unexpected outputs:
reading file:out.txt
the adjacency matrix is:
0 0 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 1 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0
rank=1
0 0 0 0 0 0 0 1 0 0 0 0 rank=0
rank=2
rank=4
rank=3
rank=5
rank=6
rank=7
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
-410582944 556 -410582944 556 -410353664 556 815104 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 -912406441 -1879044864
0 0 0 0 0 0 -906966900 -1879046144 -410511728 556 -410398928 556
0 0 0 0 0 0 0 0 0 0 0 0
0 0 1 1 0 0 0 0 0 0 0 0
-33686019 2949222 32575338 37898 -410536560 556 -410396256 556 3473509 6357044 8192057 0
0 0 0 0 0 0 0 0 0 0 0 0
C:\Program Files\Microsoft MPI\Bin\mpiexec.exe (process 3628) exited with code 0.
Press any key to close this window . . .
I am sure that there are huge mistakes in my initialization of the two arrays sendcounts and displs and the creation of the newtype subarrtype but I have no idea how to fix them. Anyone give me some hands?
possible input file:
0 3
0 2
2 3
3 1
5 11
5 4
5 1
6 10
8 5
9 4
10 4
11 7

Since you have defined a subarray that maps on to all the local data you want to send, all the sendcounts should be 1. The receive counts need to be the number of integers in the message (which is what you seem to have set them as) but the send count is one subarray.

Just as sendcounts is in terms of sizeof(dataype), so are the displacements. If you edit line 186:
disp += no_of_vertices*local_vertices;
to be:
disp += local_vertices;
then I think it works.
You actually don't need all the complications of resized dataytypes as you are scattering complete, contiguous rows.

Related

julia jump: declare array with variable upper and lower limit

I am trying to solve a sensitivity analysis using Julia (JUMP). I am stuck at the following problem:
I am trying to declare a variable that has both an upper and a lower limit to feed it to the lp_objective_perturbation_range function. That function requires the reference of an array as input.
I have tried the following syntax:
# Lösung Aufgabe 1
println("Lösung Aufgabe 1")
# Produktionsplan auf Grundlage der Ausgangswerte
println("Produktionsplan auf Grundlage der Ausgangswerte")
produktionsplanModell = Model(with_optimizer(GLPK.Optimizer))
# Bereits angefallene Fixkosten
# Modenschau - 8.100.000 USD
# Designer - 860.000 USD
fixkosten = 8100000 + 860000
c = [33.75; 66.25; 26.625; 210; 22; 136; 60.5; 53.5; 143.25; 110; 155.25]
A = [ 0 0 0 0 0 2 0 0 1.5 2 1.5;
0.5 1.5 0 0 0 0 0 0 0 0 0;
0 0 0 1.5 0 0 0 0 0 0 0;
0 0 0 0 1.5 3 0 0 0 0 0;
0 0 0 0 0 0 1.5 0.5 0 0 0;
0 0 1.5 0 0 0 0 0 2 0 0;
0 0 0 0 0 0 0 0 0 3 2.5;]
b = [28000.0; 30000; 9000; 20000; 18000; 30000; 45000]
w = [60000.0;15000;20000; 4000; 6000; 5500; 9000;15000;15000; 7000; 5000]
y = [0.0; 0; 0; 0; 0; 0; 0; 0; 2800; 4200; 3000]
# Definition der Variablen
#variable(produktionsplanModell, w[i] >= x[i] >= y[i] for i=1:11)
Unfortunately, this isn't working. So I need an array that has the following definition and can be assigned to the model:
#variable(produktionsplanModell, 60000 >= x1 >= 0)
#variable(produktionsplanModell, 15000 >= x2 >= 0)
#variable(produktionsplanModell, 20000 >= x3 >= 0)
#variable(produktionsplanModell, 4000 >= x4 >= 0)
#variable(produktionsplanModell, 6000 >= x5 >= 0)
#variable(produktionsplanModell, 5500 >= x6 >= 0)
#variable(produktionsplanModell, 9000 >= x7 >= 0)
#variable(produktionsplanModell, 15000 >= x8 >= 0)
#variable(produktionsplanModell, 15000 >= x9 >= 2800)
#variable(produktionsplanModell, 7000 >= x10 >= 4200)
#variable(produktionsplanModell, 5000 >= x11 >= 3000)
Is it possible to do that? Rest of the program is working fine. Thanks in advance!
The correct syntax is:
#variable(produktionsplanModell , y[i] <= x[i=1:11] <= w[i] )
Hence you need to define the loop inside variable declaration.
Of course another option is:
#variable(produktionsplanModell, x[1:11] )
for i in 1:11
#constraint(produktionsplanModell , w[i] <= x[i] <= y[i])
end
THX.
I do this and it works fine...
#variable(produktionsplanModell, x[1:11])
for i=1:11
set_lower_bound(x[i], y[i])
set_upper_bound(x[i], w[i])
end
because of my constraints look like that.
#constraint(produktionsplanModell, constraint[j=1:7], sum( A[j,i]*x[i] for i=1:11 ) <= b[j])

Writing a Partial Sum GPU Kernel

I have the following array with sparse 1's every now and then. Its a massive vector, megabytes in size
[0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 ..]
I need to store those 1's at an index for processing, so I need a kernel that produces this:
[0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 ..]
How can I parallelize such an operation?
You're looking for a 'parallel inclusive scan', which the thrust library (ships with the cuda toolkit), includes out of the box:
#include <thrust/scan.h>
#include <thrust/device_vector.h>
#include <iostream>
int main( int argc, char * argv[] )
{
int data[17] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 };
thrust::device_vector< int > in( data, data + 17 );
thrust::device_vector< int > out( in.size() );
thrust::inclusive_scan( in.begin(), in.end(), out.begin() );
for ( int i = 0; i < out.size(); ++i )
std::cout << out[i] << " ";
std::cout << endl;
}
outputs:
0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2
Or you could explicitly write a kernel - which will just be a variation on the parallel prefix sum algorithm, which thrust generalizes nicely.

Convert R code to Rcpp - for loop, vector input, vector output

I am beginner to Rcpp and C++. If I could see a working example relative to my context it might help me in my journey.
Lets take the following R code:
value <-c(0.2,0.3,0.4,0.5,0.6,-2.0,0.7,0.4,-10,0.1,0.2,0.4,3.0,0.6,0.7,0.8,-1.2,0.6,0.7,0.8,0.3,0.5,2,0.1,0.2)
res <- NULL
while (length(res) < length(value)) {
if (value[length(res)+1] < -1) {
res <- c(res, rep(1,5))
} else {
res <- c(res, 0)
}
}
with numerical output:
> str(res)
num [1:25] 0 0 0 0 0 1 1 1 1 1 ...
The code is a for loop, find instances of < -1 and then append a vector with rep 1,5 times else if not -1 do 0.
Next I wish to send this off to Rcpp:
I am following some examples here:
Hadley Wickham and http://dirk.eddelbuettel.com
My conversion attempt is below:
cppFunction('double resC(NumericVector x) {
int n = x.size();
double res = 0;
for(int i = ; i < n; ++i) {
if (value[i] < -1) {
res += c(res, rep(1,5));
} else {
res += c(res, 0);
}
return res;
}')
resC(value)
Can C++ append to vectors the same way R can? Its looking like not a straight similar swap.
I opted to code it in Julia
Logic same just put the curly R braces in:
signal = [0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 ]
n_day = zeros(signal)
i = 1
j = 1
for i in 1:length(signal)
if signal[i] == 1
n_day[i] = 1
j = 1
print("i =", i)
while(j<=4)
# Carry over index position
n = i + j # carry over index position
n_day[n] = 1
j = j + 1
end
j=1
n=1
end
end
for output:
julia> print(n_day)
[0 1 1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0]

For loop storage of output data

I am trying to store the output data from the forloop in the n.I matrix at the end of the code, but I am certain that something is wrong with my output matrix. It is giving me all the same values, either 0 or 1. I know that print(SS) is outputting the correct values and can see that the forloop is working properly.
Does anyone have any advice on how to fix the matrix, or any way that I am able to store the data from the forloop? Thanks in advance!
c=0.2
As=1
d=1
d0=0.5
s=0.5
e=0.1
ERs=e/As
C2 = c*As*exp(-d*s/d0)
#Island States (Initial Probability)
SS=0
for(i in 1:5) {
if (SS > 0) {
if (runif(1, min = 0, max = 1) < ERs){
SS = 0
}
}
else {
if (runif(1, min = 0, max = 1) < C2) {
SS = 1
}
}
print(SS)
}
n.I=matrix(c(SS), nrow=i, ncol=1, byrow=TRUE)
The efficient solution here is not to use a loop. It's unnecessary since the whole task can be easily vectorized.
Z =runif(100,0,1)
as.integer(x <= Z)
#[1] 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
#[70] 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
you can save them in a list. Not very efficient but gets the job done.
list[[1]] indicates the first element saved in a list if you want to retrieve it.
list_pos <- list() # create the list out of the for loop
for(i in 1:100) {
c=0.10 #colonization rate
A=10 #Area of all islands(km^2)
d=250 #Distance from host to target (A-T)
s=0.1 #magnitude of distance
d0=100 #Specific "half distance" for dispersal(km)
C1 = c*A*exp(-d/d0) #Mainland to Target colonization
Z =runif(1,0,1)
x <- C1*A
if(x <= Z) {
list_pos[[i]] <- print("1") # Here you can store the 1 results.print is actually not necessary.
}
if(x >= Z){
list_pos[[i]] <- print("0") # Here you can store the 0 results.print is actually not necessary.
}
}

Tune SVM in R - Dependent variable has wrong type

I'm using svm from e1071 for a dataset like this:
sdewey <- svm(x = as.matrix(trainS),
y = trainingSmall$DEWEY,
type="C-classification")
That works just fine, but when I try to tune the cost and gamma like this:
svm_tune <- tune(svm, train.x=as.matrix(trainS), train.y=trainingSmall$DEWEY, type="C-classification", ranges=list(cost=10^(-1:6), gamma=1^(-1:1)))
I get this error:
Error in tune(svm, train.x = as.matrix(trainS), train.y =
trainingSmall$DEWEY, : Dependent variable has wrong type!
The structure of my training data is this, but with many more lines:
'data.frame': 1000 obs. of 1542 variables:
$ women.prisoners : int 1 0 0 0 0 0 0 0 0 0 ...
$ reformatories.for.women : int 1 0 0 0 0 0 0 0 0 0 ...
$ women : int 1 0 0 0 0 0 0 0 0 0 ...
$ criminal.justice : int 1 0 0 0 0 0 0 0 0 0 ...
$ soccer : int 0 1 0 0 0 0 0 0 0 0 ...
$ coal.mines.and.mining : int 0 0 1 0 0 0 0 0 0 0 ...
$ coal : int 0 0 1 0 0 0 0 0 0 0 ...
$ engineering.geology : int 0 0 1 0 0 0 0 0 0 0 ...
$ family.violence : int 0 0 0 1 0 0 0 0 0 0 ...
It is a multi-class problem.
I'm not sure of how I could solve this or if there are other ways of finding out the optimal value for the cost and gamma parameters.
Here is an example of my data, and trainS is that data without the first 4 columns (DEWEY, D1, D2 and D3)
Thanks
require(e1071)
trainingSmall<-read.csv("trainingSmallExtra.csv")
sdewey <- svm(x = as.matrix(trainingSmall[,4:nrow(trainingSmall)]),
y = trainingSmall$DEWEY,
type = "C-classification",
kernel = "linear" # same as no kernel
)
This works because svm has automatically converted DEWEY to a factor.
The tune model failed because, being that it is made for user customization, it relies on you to supply the correct data type. Since DEWEY was integer instead of factor it failed. We can fix this:
trainingSmall$DEWEY <- as.factor(trainingSmall$DEWEY)
svm_tune <- tune(svm, train.x = as.matrix(trainingSmall[,4:nrow(trainingSmall)]),
train.y = trainingSmall$DEWEY, # the way I'm formatting your
kernel = "linear", # code is Google's R style
type = "C-classification",
ranges = list(
cost = 10^(-1:6),
gamma = 1^(-1:1)
)
)

Resources