What is the meaning of using MPI for single server? - mpi

I am the novice in the field of distributed-computation and I know the most popular standard is the Message Passing Interface. However, if I only have one server, I can also run my program under the MPI framework as the following demo example.
# include <cmath>
# include <cstdlib>
# include <ctime>
# include <iomanip>
# include <iostream>
# include <mpi.h>
using namespace std;
int main ( int argc, char *argv[] );
double f ( double x );
void timestamp ( );
int main ( int argc, char *argv[] )
{
double end_time;
int i;
int id;
int ierr;
int m;
int p;
double r8_pi = 3.141592653589793238462643;
int process;
double q_global;
double q_local;
int received;
int source;
double start_time;
MPI_Status status;
int tag;
int target;
double x;
double xb[2];
double x_max = 1.0;
double x_min = 0.0;
//
// Establish the MPI environment.
//
ierr = MPI_Init ( &argc, &argv );
if ( ierr != 0 )
{
cout << "\n";
cout << "INTERVALS_MPI - Fatal error!";
cout << " MPI_Init returned ierr = " << ierr << "\n";
exit ( 1 );
}
//
// Determine this processes's rank.
//
ierr = MPI_Comm_rank ( MPI_COMM_WORLD, &id );
//
// Get the number of processes.
//
ierr = MPI_Comm_size ( MPI_COMM_WORLD, &p );
//
// Say hello (once), and shut down right away unless we
// have at least 2 processes available.
//
if ( id == 0 )
{
timestamp ( );
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " C++ version\n";
cout << "\n";
cout << " An MPI example program,\n";
cout << " A quadrature over an interval is done by\n";
cout << " assigning subintervals to processes.\n";
cout << "\n";
cout << " The number of processes is " << p << "\n";
start_time = MPI_Wtime ( );
if ( p <= 1 )
{
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Need at least 2 processes!\n";
MPI_Finalize ( );
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Abnormal end of execution.\n";
exit ( 1 );
}
}
cout << "\n";
cout << "Process " << id << ": Active!\n";
//
// Every process could figure out the endpoints of its interval
// on its own. But we want to demonstrate communication. So we
// assume that the assignment of processes to intervals is done
// only by the master process, which then tells each process
// what job it is to do.
//
if ( id == 0 )
{
for ( process = 1; process <= p-1; process++ )
{
xb[0] = ( ( double ) ( p - process ) * x_min
+ ( double ) ( process - 1 ) * x_max )
/ ( double ) ( p - 1 );
xb[1] = ( ( double ) ( p - process - 1 ) * x_min
+ ( double ) ( process ) * x_max )
/ ( double ) ( p - 1 );
target = process;
tag = 1;
ierr = MPI_Send ( xb, 2, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );
}
}
else
{
source = 0;
tag = 1;
ierr = MPI_Recv ( xb, 2, MPI_DOUBLE, source, tag, MPI_COMM_WORLD, &status );
}
//
// Wait here until everyone has gotten their assignment.
//
ierr = MPI_Barrier ( MPI_COMM_WORLD );
if ( id == 0 )
{
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Subintervals have been assigned.\n";
}
//
// Every process needs to be told the number of points to use.
// Since this is the same value for everybody, we use a broadcast.
// Again, we are doing it in this roundabout way to emphasize that
// the choice for M could really be made at runtime, by processor 0,
// and then sent out to the others.
//
m = 100;
source = 0;
ierr = MPI_Bcast ( &m, 1, MPI_INT, source, MPI_COMM_WORLD );
//
// Now, every process EXCEPT 0 computes its estimate of the
// integral over its subinterval, and sends the result back
// to process 0.
//
if ( id != 0 )
{
q_local = 0.0;
for ( i = 1; i <= m; i++ )
{
x = ( ( double ) ( 2 * m - 2 * i + 1 ) * xb[0]
+ ( double ) ( 2 * i - 1 ) * xb[1] )
/ ( double ) ( 2 * m );
q_local = q_local + f ( x );
}
q_local = q_local * ( xb[1] - xb[0] ) / ( double ) ( m );
target = 0;
tag = 2;
ierr = MPI_Send ( &q_local, 1, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );
}
//
// Process 0 expects to receive N-1 partial results.
//
else
{
received = 0;
q_global = 0.0;
while ( received < p - 1 )
{
source = MPI_ANY_SOURCE;
tag = 2;
ierr = MPI_Recv ( &q_local, 1, MPI_DOUBLE, source, tag, MPI_COMM_WORLD,
&status );
q_global = q_global + q_local;
received = received + 1;
}
}
//
// The master process prints the answer.
//
if ( id == 0 )
{
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Estimate for PI is " << q_global << "\n";
cout << " Error is " << q_global - r8_pi << "\n";
end_time = MPI_Wtime ( );
cout << "\n";
cout << " Elapsed wall clock seconds = "
<< end_time - start_time << "\n";
}
//
// Terminate MPI.
//
MPI_Finalize ( );
//
// Terminate.
//
if ( id == 0 )
{
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Normal end of execution.\n";
cout << "\n";
timestamp ( );
}
return 0;
}
//****************************************************************************80
double f ( double x )
{
double value;
value = 4.0 / ( 1.0 + x * x );
return value;
}
//****************************************************************************80
void timestamp ( )
{
# define TIME_SIZE 40
static char time_buffer[TIME_SIZE];
const struct std::tm *tm_ptr;
std::time_t now;
now = std::time ( NULL );
tm_ptr = std::localtime ( &now );
std::strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm_ptr );
std::cout << time_buffer << "\n";
return;
# undef TIME_SIZE
}
Actually, this is the simple case that we use the MPI to compute the integral of specific function. I run this program by using 4 processes. I am confused that we can also use the OpenMP to do the share memory programming instead of MPI to reduce the communication cost. I do not know the meaning of MPI on single machine.

Related

Where to initialize array then to scatter it. MPI_Scatter

I need to send array pieces to all processes using MPI_Scatter then to get sum of all elements. Where should I initialize array then to scatter it? In root rank?
If I initialize array on root rank then other ranks dont get their data. Otherway I can initialize array for everyone (out of if(rank == root)...else), but it means, that I create array several times.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <iostream>
#include <time.h>
using namespace std;
int main(int argc, char* argv[])
{
int size;
int rank;
srand(time(NULL));
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int arr_size = size * 2;
int block = arr_size / (size);
int* B = new int[block];
if (rank == 0)
{
int* A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);
}
cout << "process " << rank << " received: " << endl;
for (int i = 0; i < block; i++)
{
cout << B[i] << " ";
}
cout << endl;
int local_sum = 0;
for (int i = 0; i < block; i++)
{
local_sum += B[i];
}
cout << "sum in process " << rank << " = " << local_sum << endl;
cout << endl;
int global_sum;
MPI_Reduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{
cout << "sum = " << global_sum << endl;
}
MPI_Finalize();
return 0;
}
I get something like this (only root rank got its data):
process 1 received:
process 3 received:
-842150451 -842150451
-842150451 -842150451
sum in process 1 = -1684300902
sum in process 3 = -1684300902
process 2 received:
-842150451 -842150451
sum in process 2 = -1684300902
process 0 received:
4 9
sum in process 0 = 13
sum = -757935397
MPI_Scatter() is a collective operation and must hence be invoked by all the ranks.
Declare int *A = NULL; on all ranks and only allocate and populate on rank zero.
int* A = NULL;
int* B = new int[block];
if (rank == 0)
{
A = new int[arr_size];
cout << "generated array: " << endl;
for (int i = 0; i < arr_size; i++)
{
A[i] = rand() % 100;
cout << A[i] << " ";
}
cout << endl;
}
MPI_Scatter(A, block, MPI_INT, B, block, MPI_INT, 0, MPI_COMM_WORLD);

Getting undesired behavior when sending-receiving messages using MPI

I'm exploring MPI in C++ and I wanted to parallelize the creation of a picture of the Mandelbrot set. I'm using the ppm format. Each processor builds its part and sends it back to the main process that receives it as MPI_CHAR. This is the code:
#include "mpi.h"
#include <iostream>
#include <string>
#include <fstream>
#include <complex>
using namespace std;
int mandelbrot(int x, int y, int width, int height, int max) {
complex<float> point((float) (y - height/2.0) * 4.0/width, (float) (x - width/2.0) * 4.0/width);
complex<float> z(0, 0);
unsigned int iteration = 0;
while (abs(z) < 4 && iteration < max) {
z = z * z + point;
iteration++;
}
return iteration;
}
int main(int argc, char **argv) {
int numprocs;
int myid;
int buff_size = 404270; // 200x200
char buff[buff_size];
int i;
MPI_Status stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
int width = 200, height = 200, max_iter = 1000;
if (myid == 0) {
ofstream image("mandel.ppm");
image << "P3\n" << width << " " << height << " 255\n";
for(i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
MPI_Recv(buff, length, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
image << buff;
}
} else {
stringstream ss;
// proc rank: 1, 2, ..., n
int part = height/(numprocs-1), start = (myid - 1) * part, end = part * myid;
printf("%d -> %d\n", start, end);
for (int row = start; row < end; row++) {
for (int col = 0; col < width; col++) {
int iteration = mandelbrot(row, col, width, height, max_iter);
if (row == start) ss << 255 << ' ' << 255 << ' ' << 255 << "\n";
else if (iteration < max_iter) ss << iteration * 255 << ' ' << iteration * 20 << ' ' << iteration * 5 << "\n";
else ss << 0 << ' ' << 0 << ' ' << 0 << "\n";
}
}
printf("\n sizeof = %d\n", ss.str().length());
MPI_Send(ss.str().c_str(), ss.str().length(), MPI_CHAR, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Code compilation:
$ mpic++ -std=c++0x mpi.mandel.cpp -o mpi.mandel
Running with 3 processes (process main + process rank 1 and 2)
$ mpirun -np 3 ./mpi.mandel
Resulting ppm pictures when running with 3, 4, and 5 process:
It seems that the point-to-point communication of sending-receiving is mixing the results when more than 3 processes try to send the MPI_CHAR elements to the main process. How can avoid this behavior?
It works when creating the buffer buff with the same length as the receiving message:
.
.
for (int i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
printf("\nfrom %d <<-- %d (stat.source=%d) Receiving %d chars\n", myid, i, stat.MPI_SOURCE, length);
char buff[length + 1];
MPI_Recv(buff, length, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
buff[length] = '\0';
image << buff;
}
.
.
Thus, we don't need anymore the declaration at the beginning int buff_size = 404270; neither char buff[buff_size];

TLE using DP for Timus Online Judge

I was solving problem timusOJ Metro. I solved it using DP but I am getting TLE. I don't know how to do it better? Solution in Ideone. Help!
#include <bits/stdc++.h>
using namespace std;
double dis[1005][1005];
map< pair< int, int >, int > m;
int N, M, K;
double min( double a, double b ){
if( a < b )
return a;
return b;
}
int main(){
cin >> M >> N;
cin >> K;
dis[0][0]= 0;
for( int i= 1; i<= N; ++i )
dis[i][0]= 100*i;
for( int j= 1; j<= M; ++j )
dis[0][j]= 100*j;
for( int i= 0, x,y; i< K; ++i ){
scanf( "%d%d", &x, &y );
m[{y,x}]++;
}
double shortcut= sqrt(20000);
for( int i= 1; i<= N; ++i )
for( int j= 1; j<= M; ++j ){
//cout << i << " " << j << "\n\t";
if( m[{i,j}] > 0 ){
dis[i][j]= min( dis[i-1][j-1] + shortcut , min(
dis[i-1][j] , dis[i][j-1] ) + 100 ) ;
m[{i,j}]--;
}else{
dis[i][j]= min( dis[i-1][j], dis[i][j-1] ) + 100;
}
}
cout << ceil(dis[N][M]) << endl;
}
P.S. I have problem in formatting code in this platform therefore ideone is used.
UPD: Accepted :)
Read the question wrong , I claim that by triangular inequality The distance to go to a block diagonally is lesser than travelling right and up.!
I will submit and explain soon..

OpenGL Texturing Garbage

I have a problem with texturing – it loads the image correctly, but renders garbage onto the geometry. The geometry itself draws fine (a simple triangle), but no matter which texture I load it just spits random patterns onto the triangle.
I'm using g++ 4.2.1 on Mac OS X with Qt 4.7 and OpenGL
First of all, here's the console output:
BallGLWidget::initializeGL called
Image format is GL_RGB
Checking textures...
glGetError enum value: GL_NO_ERROR
Also, my logging code for the shader initialization doesn't register any error.
The OpenGL initialization function:
void BallGLWidget::initializeGL()
{
cout << "BallGLWidget::initializeGL called" << endl;
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
initializeShaders();
checkOpenGLError();
glEnableVertexAttribArray(VERTEX_POS_NUM);
glEnableVertexAttribArray(TEX_POS_NUM);
glBindAttribLocation(programHandle, VERTEX_POS_NUM, VERTEX_POS_ATTRIB_NAME);
glBindAttribLocation(programHandle, TEX_POS_NUM, TEX_COORD_ATTRIB_NAME);
//this MUST be called AFTER glBindAttribLocation
glLinkProgram(programHandle);
//FIXME:-----------DEBUG-----------
printProgramInfoLog(programHandle);
//-----------END-DEBUG-----------
glUseProgram(programHandle);
//FIXME:-----------DEBUG-----------
printProgramInfoLog(programHandle);
//-----------END-DEBUG-----------
checkOpenGLError();
samplerUniformLocation =
glGetUniformLocation(programHandle, BALL_SAMPLER_NAME);
glUniform1f(samplerUniformLocation, 0);
glActiveTexture(GL_TEXTURE0);
ball_texture_handle = loadTexture(BALL_IMAGE_PATH);
//bind it in initialization because we're only using
//1 texture in the program
glBindTexture(GL_TEXTURE_2D, ball_texture_handle);
}
Here's the loadTexture function:
GLuint BallGLWidget::loadTexture(const char* filenamePtr)
{
//create & prepare a temporary texture handle that will be copied to
//DesktopMain::ball_texture_handle after this function returns
GLuint texHandle;
glGenTextures(1, &texHandle);
glBindTexture(GL_TEXTURE_2D, texHandle);
QImage* img = new QImage();
if(!img->load(filenamePtr))
{
//error loading image, handle error
cerr << "ERROR LOADING TEXTURE" << endl;
}
//This is the Qt way- its commented out for conventional OpenGL code
//bind the texture to the current context
//GLuint texHandle = bindTexture(*img);
GLenum openglImageFormat;
QImage::Format imgFormat = img->format();
switch(imgFormat)
{
case QImage::Format_RGB32:
openglImageFormat = GL_RGB;
cout << "Image format is GL_RGB" << endl;
break;
case QImage::Format_ARGB32:
openglImageFormat = GL_RGBA;
cout << "Image format is GL_RGBA" << endl;
break;
//handle this case the same as ARGB32
case QImage::Format_ARGB32_Premultiplied:
openglImageFormat = GL_RGBA;
cout << "Image format is GL_RGBA (premultiplied)" << endl;
break;
case QImage::Format_Invalid:
cerr << "ERROR: INVALID IMAGE FORMAT" << endl;
return -1;
break;
default:
cerr << "ERROR: UNRECOGNIZED IMAGE FORMT" << endl;
return -1;
break;
}
//use tightly packed pixel values
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
//use linear filtering
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB,
img->width(), img->height(), 0, openglImageFormat,
GL_UNSIGNED_BYTE, img->bits());
cerr << "Checking textures..." << endl;
checkOpenGLError();
delete img;
return texHandle;
}
The vertex shader:
attribute vec2 a_v_position;
attribute vec2 a_tex_position;
varying vec2 tex_coord_output;
void main()
{
//copy attributes to varyings for use in the frag shader
tex_coord_output = a_tex_position;
gl_Position = vec4(a_v_position, 0.0, 1.0);
}
The fragment shader:
varying vec2 tex_coord_output;
uniform sampler2D ballsampler;
void main()
{
gl_FragColor = texture2D(ballsampler, tex_coord_output);
}
EDIT:
A screenshot of the program, as requested.
https://docs.google.com/open?id=0B8xCefwW3X4TY2Y3N2M0MGYtMDQ0NS00MDk4LWEzODgtNDc3OWFkODI3ZWE3
EDIT:
The attribute locations were off because apparently glBindAttribLocation only works if called BEFORE the program object is linked (http://www.opengl.org/sdk/docs/man/xhtml/glBindAttribLocation.xml). I changed the code above accordingly, but the program still looks like below (there is still a problem with the texturing...):
I get the following result:
https://docs.google.com/open?id=0B8xCefwW3X4TNWE0YTQ5MTktZTA2Yy00YmI4LWJmMjMtYTlhOTYxMGNkMTk0
Break it down, try something simple:
vert.glsl
#version 120
uniform mat4 projection;
uniform mat4 modelview;
attribute vec2 position;
attribute vec2 texcoord;
varying vec2 fragTexCoord;
void main(void)
{
fragTexCoord = texcoord;
gl_Position = projection * modelview * vec4( position, 0.0, 1.0 );
}
frag.glsl
#version 120
uniform sampler2D texture;
varying vec2 fragTexCoord;
void main(void)
{
gl_FragColor = texture2D( texture, fragTexCoord );
}
main.cpp
#include <GL/glew.h>
#include <GL/glut.h>
#include <cstdlib>
#include <stdexcept>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace std;
GLuint CreateShader( const GLenum& aShaderType, const string& aShaderSource )
{
GLuint shader = glCreateShader( aShaderType );
const GLchar* shaderString = aShaderSource.c_str();
GLint shaderLength = aShaderSource.size();
glShaderSource( shader, 1, &shaderString, &shaderLength );
glCompileShader( shader );
GLint compiled;
glGetShaderiv( shader, GL_COMPILE_STATUS, &compiled );
if( GL_FALSE == compiled )
{
// compile failure, dump log
GLint loglen;
glGetShaderiv( shader, GL_INFO_LOG_LENGTH , &loglen);
vector< char > log( loglen );
glGetShaderInfoLog( shader, loglen, NULL, &log[0] );
string type;
switch( aShaderType )
{
case GL_VERTEX_SHADER: type = "GL_VERTEX_SHADER"; break;
case GL_FRAGMENT_SHADER: type = "GL_FRAGMENT_SHADER"; break;
default: type = "UNKNOWN SHADER"; break;
}
stringstream err;
err << "*** " << type << " ***" << endl;
err << aShaderSource;
err << "*** Compilation Log ***" << endl;
err << string( log.begin(), log.end() );
throw std::logic_error( err.str() );
}
return shader;
}
GLuint CreateProgram( const string& aVertexShader, const string& aFragmentShader )
{
GLuint vert = CreateShader( GL_VERTEX_SHADER, aVertexShader );
GLuint frag = CreateShader( GL_FRAGMENT_SHADER, aFragmentShader );
GLuint program = glCreateProgram();
glAttachShader( program, vert );
glAttachShader( program, frag );
glLinkProgram( program );
glDeleteShader( vert );
glDeleteShader( frag );
GLint linked;
glGetProgramiv( program, GL_LINK_STATUS, &linked );
if( GL_FALSE == linked )
{
// link failure, dump log
GLint loglen;
glGetProgramiv( program, GL_INFO_LOG_LENGTH , &loglen);
vector< char > log( loglen );
glGetProgramInfoLog( program, loglen, NULL, &log[0] );
stringstream err;
err << "*** Link log ***" << endl;
err << string( log.begin(), log.end() );
throw std::logic_error( err.str() );
}
return program;
}
string LoadFile( const string& filename )
{
ifstream infile(filename.c_str(), ios::binary);
istreambuf_iterator<char> begin(infile), end;
return string(begin, end);
}
GLuint prog = 0;
GLuint tex = 0;
void init()
{
GLenum glewError = glewInit();
if( GLEW_OK != glewError )
{
stringstream err;
err << "GLEW error: " << glewGetErrorString(glewError) << endl;
throw std::logic_error( err.str() );
}
cout << "GLEW_VERSION : " << glewGetString(GLEW_VERSION) << endl;
cout << "GL_VERSION : " << glGetString(GL_VERSION) << endl;
cout << "GLSL VERSION : " << glGetString(GL_SHADING_LANGUAGE_VERSION) << endl;
cout << "GL_VENDOR : " << glGetString(GL_VENDOR) << endl;
cout << "GL_RENDERER : " << glGetString(GL_RENDERER) << endl;
if( !GLEW_VERSION_2_1 )
{
stringstream err;
err << "OpenGL 2.1 or better required for GLSL support." << endl;
throw std::logic_error( err.str() );
}
// load shaders
string vert = LoadFile( "vert.glsl" );
string frag = LoadFile( "frag.glsl" );
prog = CreateProgram( vert, frag );
// create random texture
const unsigned int width = 32;
const unsigned int height = 32;
const unsigned int channels = 3;
unsigned char buffer[ width * height * channels ];
for( unsigned int i = 0; i < width * height; ++i )
{
buffer[i*channels + 0] = rand()%255;
buffer[i*channels + 1] = rand()%255;
buffer[i*channels + 2] = rand()%255;
}
// upload texture data
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, channels, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, buffer);
}
struct Vertex
{
Vertex() : x(0), y(0), s(0), t(0) {}
Vertex( float _x, float _y, float _s, float _t ) : x(_x), y(_y), s(_s), t(_t) {}
float x, y;
float s, t;
};
void display()
{
static float currentTime = glutGet(GLUT_ELAPSED_TIME) / 1000.0f;
float newTime = glutGet(GLUT_ELAPSED_TIME) / 1000.0f;
float frameTime = newTime - currentTime;
currentTime = newTime;
vector< Vertex > verts;
verts.push_back( Vertex( -1, -1, 0, 0 ) );
verts.push_back( Vertex( 1, -1, 1, 0 ) );
verts.push_back( Vertex( 1, 1, 1, 1 ) );
verts.push_back( Vertex( -1, 1, 0, 1 ) );
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
static float angle = 0;
angle += 60 * frameTime;
glRotatef( angle, 0, 0, 1 );
glScalef( 5, 5, 5 );
glUseProgram( prog );
// load uniforms
GLfloat projection[16];
glGetFloatv( GL_PROJECTION_MATRIX, projection );
GLint projection_loc = glGetUniformLocation( prog, "projection" );
glUniformMatrix4fv( projection_loc, 1, GL_FALSE, projection );
GLfloat modelview[16];
glGetFloatv( GL_MODELVIEW_MATRIX, modelview );
GLint modelview_loc = glGetUniformLocation( prog, "modelview" );
glUniformMatrix4fv( modelview_loc, 1, GL_FALSE, modelview );
GLint texture_loc = glGetUniformLocation( prog, "texture" );
glUniform1i( texture_loc, 0 );
glActiveTexture( GL_TEXTURE0 );
glBindTexture( GL_TEXTURE_2D, tex );
// load attributes
GLint position_loc = glGetAttribLocation( prog, "position" );
glVertexAttribPointer( position_loc, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), &verts[0].x );
glEnableVertexAttribArray( position_loc );
GLint texcoord_loc = glGetAttribLocation( prog, "texcoord" );
glVertexAttribPointer( texcoord_loc, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), &verts[0].s );
glEnableVertexAttribArray( texcoord_loc );
// render
glDrawArrays( GL_QUADS, 0, verts.size() );
glDisableVertexAttribArray( position_loc );
glDisableVertexAttribArray( texcoord_loc );
glutSwapBuffers();
}
void reshape(int w, int h)
{
glViewport(0, 0, w, h);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double aspect = (double)w / (double)h;
glOrtho(-10*aspect, 10*aspect, -10, 10, -1, 1);
}
int main(int argc, char **argv)
{
glutInit(&argc, argv);
glutInitWindowSize(800,600);
glutInitDisplayMode(GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE);
glutCreateWindow("GLSL");
try
{
init();
}
catch( std::exception& e )
{
cout << "Init failure: " << endl << e.what() << endl;
return EXIT_FAILURE;
}
glutDisplayFunc(display);
glutIdleFunc(display);
glutReshapeFunc(reshape);
glutMainLoop();
return EXIT_SUCCESS;
}

Floating point exponentiation without power-function

Currently I have to work in an environment where the power-operator is bugged. Can anyone think of a method temporarily work around this bug and compute a^b (both floating point) without a power function or operator?
if you have sqrt() available:
double sqr( double x ) { return x * x; }
// meaning of 'precision': the returned answer should be base^x, where
// x is in [power-precision/2,power+precision/2]
double mypow( double base, double power, double precision )
{
if ( power < 0 ) return 1 / mypow( base, -power, precision );
if ( power >= 10 ) return sqr( mypow( base, power/2, precision/2 ) );
if ( power >= 1 ) return base * mypow( base, power-1, precision );
if ( precision >= 1 ) return sqrt( base );
return sqrt( mypow( base, power*2, precision*2 ) );
}
double mypow( double base, double power ) { return mypow( base, power, .000001 ); }
test code:
void main()
{
cout.precision( 12 );
cout << mypow( 2.7, 1.23456 ) << endl;
cout << pow ( 2.7, 1.23456 ) << endl;
cout << mypow( 1.001, 1000.7 ) << endl;
cout << pow ( 1.001, 1000.7 ) << endl;
cout << mypow( .3, -10.7 ) << endl;
cout << pow ( .3, -10.7 ) << endl;
cout << mypow( 100000, .00001 ) << endl;
cout << pow ( 100000, .00001 ) << endl;
cout << mypow( 100000, .0000001 ) << endl;
cout << pow ( 100000, .0000001 ) << endl;
}
outputs:
3.40835049344
3.40835206431
2.71882549461
2.71882549383
393371.348073
393371.212573
1.00011529225
1.00011513588
1.00000548981
1.00000115129
You can use the identity ab = e(b log a), then all the calculations are relative to the same base e = 2.71828...
Now you have to implement f(x) = ln(x), and g(x) = e^x. The fast, low precision method would be to use lookup tables for f(x) and g(x). Maybe that's good enough for your purposes. If not, you can use the Taylor series expansions to express ln(x) and e^x in terms
of multiplication and addition.
given that you can use sqrt, this simple recursive algorithm works:
Suppose that we're calculating aˆb. The way the algorithm works is by doing Fast Exponentiation on the exponent until we hit the fractional part, once in the fractional part, do a modified binary search, until we're close enough to the fractional part.
double EPS = 0.0001;
double exponentiation(double base, double exp){
if(exp >= 1){
double temp = exponentiation(base, exp / 2);
return temp * temp;
} else{
double low = 0;
double high = 1.0;
double sqr = sqrt(base);
double acc = sqr;
double mid = high / 2;
while(abs(mid - exp) > EPS){
sqr = sqrt(sqr);
if (mid <= exp) {
low = mid;
acc *= sqr;
} else{
high = mid;
acc *= (1/sqr);
}
mid = (low + high) / 2;
}
return acc;
}
}

Resources