I am getting same result while running the matrix multiplication in both GPU and CPU.
here is my code:.
viennacl::ocl::set_context_platform_index(1, 1);
viennacl::ocl::set_context_platform_index(0, 0);
viennacl::ocl::switch_context(0);
std::cout << "--- Computing matrix-matrix product using viennacl in GPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
exec_time = timer.get();
std::cout << " - Execution time: " << exec_time << std::endl;
std::cout << "result on GPU: "<<viennacl::ocl::current_device().name() << std::endl;
//same operation on CPU
std::cout << "coming here" << std::endl;
viennacl::ocl::switch_context(1);
std::cout << "--- Computing matrix-matrix product using viennacl in CPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
exec_time = timer.get();
std::cout << " - Execution time: " << exec_time << std::endl;
std::cout << "result on CPU: " << viennacl::ocl::current_device().name() << std::endl << std::endl;
Here is my result:
--- Computing matrix-matrix product using viennacl in GPU ---
- Execution time: 24.4675
result on GPU: GeForce GTX 1080
coming here
--- Computing matrix-matrix product using viennacl in CPU ---
- Execution time: 24.4654
result on CPU: Intel(R) Xeon(R) CPU E3-1225 v5 # 3.30GHz
please help me to sort out this issue.
Thanks in advance
and finally i got the correct results in CPU and GPU:
code:
int main()
{
typedef float ScalarType;
viennacl::tools::timer timer;
double exec_timecpu;
double exec_timegpu;
viennacl::tools::uniform_random_numbers<ScalarType> randomNumber;
viennacl::ocl::set_context_platform_index(1, 1);
viennacl::ocl::set_context_platform_index(0, 0);
viennacl::ocl::switch_context(1);
viennacl::matrix<ScalarType> vcl_A(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType, viennacl::column_major> vcl_B(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType> vcl_C(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
for (unsigned int i = 0; i < vcl_A.size1(); ++i)
for (unsigned int j = 0; j < vcl_A.size2(); ++j)
vcl_A(i,j) = randomNumber();
for (unsigned int i = 0; i < vcl_B.size1(); ++i)
for (unsigned int j = 0; j < vcl_B.size2(); ++j)
vcl_B(i,j) = randomNumber();
std::cout << std::endl;
std::cout << "--- Computing matrix-matrix product using viennacl in CPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
viennacl::backend::finish();
exec_timecpu = timer.get();
std::cout << " - Execution time: " << exec_timecpu << std::endl;
std::cout << "result on CPU: " << viennacl::ocl::current_device().name() << std::endl << std::endl;
//same operation on GPU
viennacl::ocl::switch_context(0);
viennacl::matrix<ScalarType > vcl_GA(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType > vcl_GB(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType > vcl_GC(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
for (unsigned int i = 0; i < vcl_GA.size1(); ++i)
for (unsigned int j = 0; j < vcl_GA.size2(); ++j)
vcl_GA(i,j) = randomNumber();
for (unsigned int i = 0; i < vcl_GB.size1(); ++i)
for (unsigned int j = 0; j < vcl_GB.size2(); ++j)
vcl_GB(i,j) = randomNumber();
std::cout << "--- Computing matrix-matrix product using viennacl in GPU ---" << std::endl;
vcl_GC = viennacl::linalg::prod(vcl_GA, vcl_GB);
timer.start();
vcl_GC = viennacl::linalg::prod(vcl_GA, vcl_GB);
viennacl::backend::finish();
exec_timegpu = timer.get();
std::cout << " - Execution time: " << exec_timegpu << std::endl;
std::cout << "result on GPU: "<<viennacl::ocl::current_device().name() << std::endl;
return 0;
}
output:
--- Computing matrix-matrix product using viennacl in CPU ---
- Execution time: 0.559754
result on CPU: Intel(R) Xeon(R) CPU E3-1225 v5 # 3.30GHz
--- Computing matrix-matrix product using viennacl in GPU ---
- Execution time: 0.004177
result on GPU: GeForce GTX 1080
Things to note:
*Make sure to define VIENNACL_WITH_OPENCL in header.
*Create different buffers for different devices because in opencl the buffers are interconnected with the computing devices so that we can't use the same buffer in two different devices.
**Make sure to add viennacl::backend::finish() to wait for the kernels to finish the execution.
Related
in an R script I source a cpp file to make some calculations. In that R script, a function defined in the cpp file is called and a matrix and an integer is provided. After a few rounds through the loop it gives the error "Not a matrix" (in line of code resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));), even though for the rounds before it worked.
R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(45:46)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=10))
}
cat("finished")
I start this R script via the command line. R version R-4.1.0. In R-Studio it crashes with a fatal error.
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out2(nosamples);
int loops;
int loops2;
double result=0;
NumericVector::iterator it;
double acc = 0;
NumericVector resid(ncol);
NumericVector out(ncol*(ncol-1)/2);
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << std::endl;
std::cout << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
std::cout << " i: " << i << std::endl;
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i)); //here it stops
std::cout << " i: " << i << std::endl;
for(int ii=0; ii<ncol;++ii){
acc += resid[i];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
std::cout << " i: " << i << std::endl;
}
}
std::cout << "loops:" << std::endl;
std::cout << loops << std::endl;
out = out[out > 0];
it = std::min_element(out.begin(), out.end());
out= *it;
std::cout << out << std::endl;
loops2 += 1;
out2[loops2]=out[0];
}
std::cout << "cpp finished" << std::endl;
return(out2);
}
Can someone explain what the problem is about?
Thanks and kind regards
Edit
I adapted some things in the cpp file (shown below) and the error disappeared. First I thought, everything is fine. But when I increase the number of loops, another problem occurs: the function breaks, but no error is shown. It breaks after loop number 543 ("loop2: 543"). At least it does the same in each round of the while loop with the same data.
I adapted the R-script and the ccp file to make this problem (at least on my machine) reproducible.
I know this code seems to be somehow meaningless, but it is part of a bigger program and I wanted to give here a minimum example.
The R script:
## all together
# rm(list=ls())
library(RcppArmadillo)
library(Rcpp)
sourceCpp("~/test.cpp",verbose = FALSE)
cat("start loop")
for(n in c(100:101)){
cat("\n", n, "\n")
p_m <- matrix(data=rnorm(n^2,1,1),nrow = n, ncol=n)
print(class(p_m))
print(some_function(p_m,nosamples=800))
}
cat("finished")
The cpp file:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
using namespace Rcpp;
using Eigen::Map;
using Eigen::VectorXd;
typedef Map<VectorXd> MapVecd;
// [[Rcpp::export]]
NumericVector some_function(NumericMatrix x,int nosamples) {
int ncol = x.ncol();
NumericVector out(ncol*(ncol-1)/2);
NumericVector out2(nosamples);
NumericVector out3(ncol*(ncol-1)/2);
NumericVector resid(ncol);
int loops;
int loops2;
double result=0;
double acc = 0;
int show_cout=0;
loops2=0;
std::cout << nosamples << std::endl;
std::cout << (ncol-1) << std::endl;
std::cout << ncol*(ncol-1)/2 << std::endl;
while(loops2 < (nosamples)){
std::cout << "loops2:" << loops2 << std::endl;
loops=0;
int i;
int j;
for(j=0;j<(ncol-1);++j){
// std::cout << " j: " << j << std::endl;
for (i = (j+1); i < (ncol); ++i) {
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
resid = (x(_,j) - x(_,i))*(x(_,j) - x(_,i));
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
for(int ii=0; ii<ncol;++ii){
acc += resid[ii];
}
result=sqrt(acc);
loops += 1;
out[loops] = result;
if(show_cout==1){
std::cout << " i: " << i << std::endl;
}
}
}
// std::cout << "loops:" << loops << std::endl;
//
out = out[out > 0];
const MapVecd xy(as<MapVecd>(out));
out3=xy.minCoeff();
out2[loops2]=out3[0];
loops2 += 1;
}
std::cout << "cpp finished" << std::endl;
return(out2);
}
Two things here:
Use out[loops++] = result; instead of loops += 1; out[loops] = result; because you were starting at 1, and probably accessing the last element outside of the range of this vector.
Use
for(int ii=0; ii<ncol;++ii){ double eps = x(ii, j) - x(ii, i); acc += eps * eps; }
instead of relying on this resid vector.
I need to send array pieces to all processes using MPI_Scatter then to get sum of all elements. Where should I initialize array then to scatter it? In root rank?
If I initialize array on root rank then other ranks dont get their data. Otherway I can initialize array for everyone (out of if(rank == root)...else), but it means, that I create array several times.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <iostream>
#include <time.h>
using namespace std;
int main(int argc, char* argv[])
{
int size;
int rank;
srand(time(NULL));
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int arr_size = size * 2;
int block = arr_size / (size);
int* B = new int[block];
if (rank == 0)
{
int* A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);
}
cout << "process " << rank << " received: " << endl;
for (int i = 0; i < block; i++)
{
cout << B[i] << " ";
}
cout << endl;
int local_sum = 0;
for (int i = 0; i < block; i++)
{
local_sum += B[i];
}
cout << "sum in process " << rank << " = " << local_sum << endl;
cout << endl;
int global_sum;
MPI_Reduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{
cout << "sum = " << global_sum << endl;
}
MPI_Finalize();
return 0;
}
I get something like this (only root rank got its data):
process 1 received:
process 3 received:
-842150451 -842150451
-842150451 -842150451
sum in process 1 = -1684300902
sum in process 3 = -1684300902
process 2 received:
-842150451 -842150451
sum in process 2 = -1684300902
process 0 received:
4 9
sum in process 0 = 13
sum = -757935397
MPI_Scatter() is a collective operation and must hence be invoked by all the ranks.
Declare int *A = NULL; on all ranks and only allocate and populate on rank zero.
int* A = NULL;
int* B = new int[block];
if (rank == 0)
{
A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
}
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);
Memory address shows two different values.
We have const variable (a) and we put the address of the variable into two pointers (b and c). After changing the value at the address in one of the pointers (c), we had a situation where the same memory address has two different values.
Is there any explanation for this behavior?
#include <iostream>
int main(void)
{
const int a = 99;
const int *b = &a;
int *c = (int *)b;
std::cout << &a << " - " << a << '\n';
std::cout << b << " - " << *b << '\n';
std::cout << c << " - " << *c << "\n\n";
*c = 61;
std::cout << &a << " - " << a << '\n';
std::cout << b << " - " << *b << '\n';
std::cout << c << " - " << *c << '\n';
return 0;
}
//here are the result(output)
003CFAA4 - 99
003CFAA4 - 99
003CFAA4 - 99
003CFAA4 - 99
003CFAA4 - 61
003CFAA4 - 61
Okay so I need help with getting the bank to NOT reset to 100 after each time the loop runs. I have tried many ways but can't seem to get it to work. Could you please help me with a few explanations and examples?
#include <iostream>
#include <stdio.h>
#include <cstdlib>
#include <ctime>
using namespace std;
int displaystats(int gamesplayed, int wins, int losses, int bank);
int main()
{
int bank = 100;//intital bank value
int bet = 0;//desired wager
int wins = 0;//games won
int losses = 0;//games lost
int gamesplayed = 0;//how many rounds you played
int compdice1 = 0;//first rolled dice for computer
int compdice2 = 0;//second rolled dice for computer
int playdice1 = 0;//first rolled dice for player
int playdice2 = 0;//seconds rolled dice for player
int newdice = 0;//the dice to risk your wager
int comproll = 0;//the sum of the computers roll
int playroll = 0;//the sum of the players roll
do
{
if (bank < 0)
{
cout << "You have " << bank << " coins in your bank." << endl;
cout << "I am sorry you are out of money." << endl;
displaystats(gamesplayed, wins, losses, bank);
break;
}
else if (bank > 0)
{
cout << "You have " << bank << " coins in your bank." << endl;
cout << "How many coins would you like to bet? ";
cin >> bet;
compdice1 = (rand() + time(0)) % 6 + 1;//computer dice
compdice2 = (rand() + time(0)) % 6 + 1;//computer second dice
playdice1 = (rand() + time(0)) % 6 + 1;//player dice
playdice2 = (rand() + time(0)) % 6 + 1;//player second dice
comproll = compdice1 + compdice2;//computer sum
playroll = playdice1 + playdice2;//player sume
cout << "Your roll was " << playdice1 << " and " << playdice2 << " with a sume of " << playroll << endl;
if (playroll < comproll)
{
char option;//option to roll another dice
cout << "You win!" << endl;
cout << "Would you like to roll a third dice to earn 1.5 times your bet, yes or no? ";
cin >> option;
if (option == 'yes')
{
int newroll;//the new sum of the three dice
int newdice;//the extra roll
newdice = (rand() + time(0)) % 6 + 1;
newroll = playroll + newdice;//the value of players roll
if (newroll > comproll)
{
cout << "The computer rolled " << comproll << endl;
cout << "You now rolled higher than the computer therefore, I am sorry you lose this round." << endl;
cout << "Your bank now equals " << bank - bet << endl;
losses++;
gamesplayed++;
}
else if (newroll < comproll)
{
cout << "You win!" << endl;
cout << "Your bank now equals " << bank + (1.5 * bet) << endl;
wins++;
gamesplayed++;
}
}
else if (option == 'no')
{
cout << "Your bank now equals " << bank + bet << endl;
wins++;
gamesplayed++;
}
}
else if (playroll > comproll)
{
cout << "The computer rolled " << comproll << endl;
cout << "You rolled higher than the computer therefore, I am sorry you lose this round." << endl;
cout << "Your bank now equals " << bank - bet << endl;
losses++;
gamesplayed++;
}
else if (playroll = comproll)
{
cout << "The computer also rolled " << comproll << endl;
cout << "I am sorry you now lose double your bet!" << endl;
cout << "Your bank now equals " << bank - (2 * bet) << endl;
losses++;
gamesplayed++;
}
}
} while (bank > 0);
int stats = displaystats(gamesplayed, wins, losses, bank);
cout << "Your stats are " << stats << endl;
return 0;
}
int displaystats(int gamesplayed, int wins, int losses, int bank)
{
cout << "Games Played: " << gamesplayed << endl;
cout << "Wins: " << wins << endl;
cout << "Losses: " << losses << endl;
cout << "Bank Total: " << bank << endl;
return (gamesplayed, wins, losses, bank);
}
Your problem with the bank "resetting" constantly was you never actually subtracted the bet from the bank. See the following code, I hope this helps.
cout << "You have " << bank << " coins in your bank." << endl;
cout << "How many coins would you like to bet? ";
cin >> bet;
//This is the line that you forgot.
bank = bank - bet
If you then win, then you may want to later on add some money back into the bank. (But this is up to you.) I hope this helps.
EDIT:
Here is a paste-bin for the full code as requested: http://pastebin.com/HzvRxjXL
Also, if this solves your problem, I would appreciate it if you would mark it as the answer so others don't spend time answering a problem that has been solved.
EDIT 2:
This has a (As far as I can tell) fixed and commentated version of the code. I hope this helps: http://pastebin.com/miQjy4B5
What is wrong with my code ? I have the error like this.
Unhandled exception at 0x00d21673 in mnozenie_macierzy.exe : 0xC0000005: Access violation writing location 0xcdcdcdcd.
It create the first array and the half to the second. The program multiplies arrays.
Sorry for my English if It isn't correct. I hope you understand me.
#include <iostream>
#include <time.h>
using namespace std;
void losowa_tablica(int **tab1, int **tab2, int a, int b, int c, int d)
{
int i, j;
for(i=0; i<a; i++)
{
cout << endl;
for(j=0; j<b; j++)
{
tab1[i][j]=rand();
cout << "tab1[" << i << "][" << j << "] : \t" << tab1[i][j] << "\t";
}
}
cout << endl;
for(i=0; i<c; i++)
{
cout << endl;
for(j=0; j<d; j++)
{
tab2[i][j]=rand();
cout << "tab2[" << i << "][" << j << "] : \t" << tab2[i][j] << "\t";
}
}
cout << endl << endl;
}
int **mnozenie(int **tab1, int **tab2, int a, int b, int c, int d)
{
int g, suma, i, j;
int **mac=new int*[a];
for(int i=0; i<d; i++)
mac[i]=new int[d];
for(i=0; i<a; i++)
for(j=0; j<d; j++)
{
g=b-1, suma=0;
do
{
suma+=tab1[i][g]*tab2[g][j];
g--;
}while(g!=0);
mac[i][j]=suma;
}
return mac;
}
int main()
{
int a,b,c,d;
cout << "Podaj liczbe wierszy pierwszej macierzy: " << endl;
cin >> a;
cout << "Podaj liczbe kolumn pierwszej macierzy: " << endl;
cin >> b;
cout << "Podaj liczbe wierszy drugiej macierzy: " << endl;
cin >> c;
cout << "Podaj liczbe kolumn drugiej macierzy: " << endl;
cin >> d;
int **tab1=new int*[a];
for(int i=0; i<b; i++)
tab1[i]=new int[b];
int **tab2=new int*[c];
for(int i=0; i<d; i++)
tab2[i]=new int[d];
losowa_tablica(tab1, tab2, a, b, c, d);
if ( b==c )
{
cout << "Mnozenie wykonalne" << endl;
int **mno=mnozenie(tab1, tab2, a, b, c, d);
}
else cout << "Mnozenie niewykonalne" << endl;
system("pause");
}
Your code yields undefined behavior:
int **tab1=new int*[a]; // allocating an array of 'a' elements
for(int i=0; i<b; i++) // if b > a then the next line will eventually yield UB
tab1[i]=new int[b];
int **tab2=new int*[c]; // allocating an array of 'c' elements
for(int i=0; i<d; i++) // if d > c then the next line will eventually yield UB
tab2[i]=new int[d];
int **mac=new int*[a]; // allocating an array of 'a' elements
for(int i=0; i<d; i++) // if d > a then the next line will eventually yield UB
mac[i]=new int[d];
In practice, the above code will most likely perform a memory access violation at some point.