I'm trying to use one-sided communications in MPI.
The following example consists of an array of 4 doubles that is split between 2 processes.
The first process writes 0, 1, 2, 3 in the distributed array while the second one subsequently tries to read it. Unfortunately, it doesn't work. I must be doing something wrong somewhere.
#include <mpi.h>
#include <iostream>
int main(){
MPI_Init(0, nullptr);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int n=2;
double* data, x;
MPI_Win window;
MPI_Alloc_mem(n*sizeof(double), MPI_INFO_NULL, &data);
MPI_Win_create(data, n*sizeof(float), sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &window);
int i;
MPI_Win_fence(0, window);
for(i=0; i<n*size; ++i){
MPI_Put(&x, 1, MPI_DOUBLE, i/n, i%n, 1, MPI_DOUBLE, window);
MPI_Win_fence(0, window);
MPI_Win_fence(0, window);
for(i=0; i<n*size; ++i){
MPI_Get(&x, 1, MPI_DOUBLE, i/n, i%n, 1, MPI_DOUBLE, window);
std::cout << i << " " << i/n << " " << i%n << " => " << x << "\n";
return 0;
I am the novice in the field of distributed-computation and I know the most popular standard is the Message Passing Interface. However, if I only have one server, I can also run my program under the MPI framework as the following demo example.
# include <cmath>
# include <cstdlib>
# include <ctime>
# include <iomanip>
# include <iostream>
# include <mpi.h>
using namespace std;
int main ( int argc, char *argv[] );
double f ( double x );
void timestamp ( );
int main ( int argc, char *argv[] )
double end_time;
int i;
int id;
int ierr;
int m;
int p;
double r8_pi = 3.141592653589793238462643;
int process;
double q_global;
double q_local;
int received;
int source;
double start_time;
MPI_Status status;
int tag;
int target;
double x;
double xb[2];
double x_max = 1.0;
double x_min = 0.0;
// Establish the MPI environment.
ierr = MPI_Init ( &argc, &argv );
if ( ierr != 0 )
cout << "\n";
cout << "INTERVALS_MPI - Fatal error!";
cout << " MPI_Init returned ierr = " << ierr << "\n";
exit ( 1 );
// Determine this processes's rank.
ierr = MPI_Comm_rank ( MPI_COMM_WORLD, &id );
// Get the number of processes.
ierr = MPI_Comm_size ( MPI_COMM_WORLD, &p );
// Say hello (once), and shut down right away unless we
// have at least 2 processes available.
if ( id == 0 )
timestamp ( );
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " C++ version\n";
cout << "\n";
cout << " An MPI example program,\n";
cout << " A quadrature over an interval is done by\n";
cout << " assigning subintervals to processes.\n";
cout << "\n";
cout << " The number of processes is " << p << "\n";
start_time = MPI_Wtime ( );
if ( p <= 1 )
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Need at least 2 processes!\n";
MPI_Finalize ( );
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Abnormal end of execution.\n";
exit ( 1 );
cout << "\n";
cout << "Process " << id << ": Active!\n";
// Every process could figure out the endpoints of its interval
// on its own. But we want to demonstrate communication. So we
// assume that the assignment of processes to intervals is done
// only by the master process, which then tells each process
// what job it is to do.
if ( id == 0 )
for ( process = 1; process <= p-1; process++ )
xb[0] = ( ( double ) ( p - process ) * x_min
+ ( double ) ( process - 1 ) * x_max )
/ ( double ) ( p - 1 );
xb[1] = ( ( double ) ( p - process - 1 ) * x_min
+ ( double ) ( process ) * x_max )
/ ( double ) ( p - 1 );
target = process;
tag = 1;
ierr = MPI_Send ( xb, 2, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );
source = 0;
tag = 1;
ierr = MPI_Recv ( xb, 2, MPI_DOUBLE, source, tag, MPI_COMM_WORLD, &status );
// Wait here until everyone has gotten their assignment.
ierr = MPI_Barrier ( MPI_COMM_WORLD );
if ( id == 0 )
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Subintervals have been assigned.\n";
// Every process needs to be told the number of points to use.
// Since this is the same value for everybody, we use a broadcast.
// Again, we are doing it in this roundabout way to emphasize that
// the choice for M could really be made at runtime, by processor 0,
// and then sent out to the others.
m = 100;
source = 0;
ierr = MPI_Bcast ( &m, 1, MPI_INT, source, MPI_COMM_WORLD );
// Now, every process EXCEPT 0 computes its estimate of the
// integral over its subinterval, and sends the result back
// to process 0.
if ( id != 0 )
q_local = 0.0;
for ( i = 1; i <= m; i++ )
x = ( ( double ) ( 2 * m - 2 * i + 1 ) * xb[0]
+ ( double ) ( 2 * i - 1 ) * xb[1] )
/ ( double ) ( 2 * m );
q_local = q_local + f ( x );
q_local = q_local * ( xb[1] - xb[0] ) / ( double ) ( m );
target = 0;
tag = 2;
ierr = MPI_Send ( &q_local, 1, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );
// Process 0 expects to receive N-1 partial results.
received = 0;
q_global = 0.0;
while ( received < p - 1 )
source = MPI_ANY_SOURCE;
tag = 2;
ierr = MPI_Recv ( &q_local, 1, MPI_DOUBLE, source, tag, MPI_COMM_WORLD,
&status );
q_global = q_global + q_local;
received = received + 1;
// The master process prints the answer.
if ( id == 0 )
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Estimate for PI is " << q_global << "\n";
cout << " Error is " << q_global - r8_pi << "\n";
end_time = MPI_Wtime ( );
cout << "\n";
cout << " Elapsed wall clock seconds = "
<< end_time - start_time << "\n";
// Terminate MPI.
MPI_Finalize ( );
// Terminate.
if ( id == 0 )
cout << "\n";
cout << "INTERVALS - Master process:\n";
cout << " Normal end of execution.\n";
cout << "\n";
timestamp ( );
return 0;
double f ( double x )
double value;
value = 4.0 / ( 1.0 + x * x );
return value;
void timestamp ( )
# define TIME_SIZE 40
static char time_buffer[TIME_SIZE];
const struct std::tm *tm_ptr;
std::time_t now;
now = std::time ( NULL );
tm_ptr = std::localtime ( &now );
std::strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm_ptr );
std::cout << time_buffer << "\n";
# undef TIME_SIZE
Actually, this is the simple case that we use the MPI to compute the integral of specific function. I run this program by using 4 processes. I am confused that we can also use the OpenMP to do the share memory programming instead of MPI to reduce the communication cost. I do not know the meaning of MPI on single machine.
I'm exploring MPI in C++ and I wanted to parallelize the creation of a picture of the Mandelbrot set. I'm using the ppm format. Each processor builds its part and sends it back to the main process that receives it as MPI_CHAR. This is the code:
#include "mpi.h"
#include <iostream>
#include <string>
#include <fstream>
#include <complex>
using namespace std;
int mandelbrot(int x, int y, int width, int height, int max) {
complex<float> point((float) (y - height/2.0) * 4.0/width, (float) (x - width/2.0) * 4.0/width);
complex<float> z(0, 0);
unsigned int iteration = 0;
while (abs(z) < 4 && iteration < max) {
z = z * z + point;
return iteration;
int main(int argc, char **argv) {
int numprocs;
int myid;
int buff_size = 404270; // 200x200
char buff[buff_size];
int i;
MPI_Status stat;
int width = 200, height = 200, max_iter = 1000;
if (myid == 0) {
ofstream image("mandel.ppm");
image << "P3\n" << width << " " << height << " 255\n";
for(i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
image << buff;
} else {
stringstream ss;
// proc rank: 1, 2, ..., n
int part = height/(numprocs-1), start = (myid - 1) * part, end = part * myid;
printf("%d -> %d\n", start, end);
for (int row = start; row < end; row++) {
for (int col = 0; col < width; col++) {
int iteration = mandelbrot(row, col, width, height, max_iter);
if (row == start) ss << 255 << ' ' << 255 << ' ' << 255 << "\n";
else if (iteration < max_iter) ss << iteration * 255 << ' ' << iteration * 20 << ' ' << iteration * 5 << "\n";
else ss << 0 << ' ' << 0 << ' ' << 0 << "\n";
printf("\n sizeof = %d\n", ss.str().length());
MPI_Send(ss.str().c_str(), ss.str().length(), MPI_CHAR, 0, 0, MPI_COMM_WORLD);
return 0;
Code compilation:
$ mpic++ -std=c++0x mpi.mandel.cpp -o mpi.mandel
Running with 3 processes (process main + process rank 1 and 2)
$ mpirun -np 3 ./mpi.mandel
Resulting ppm pictures when running with 3, 4, and 5 process:
It seems that the point-to-point communication of sending-receiving is mixing the results when more than 3 processes try to send the MPI_CHAR elements to the main process. How can avoid this behavior?
It works when creating the buffer buff with the same length as the receiving message:
for (int i=1; i < numprocs; i++) {
MPI_Probe(i, 0, MPI_COMM_WORLD, &stat);
int length;
MPI_Get_count(&stat, MPI_CHAR, &length);
printf("\nfrom %d <<-- %d (stat.source=%d) Receiving %d chars\n", myid, i, stat.MPI_SOURCE, length);
char buff[length + 1];
buff[length] = '\0';
image << buff;
Thus, we don't need anymore the declaration at the beginning int buff_size = 404270; neither char buff[buff_size];
I'm trying to wrap my head around how to use VAOs appropriately for instanced rendering (specifically in Qt 5.2, using OpenGL 3.3). My understanding is that VAOs save the state of the VBOs and associated attributes so that you don't need to worry about binding and enabling everything at drawing time, you just bind the VAO. But with instancing, you often have multiple VBOs. How do you get around needing to bind them all? Or do I just need to use a single VBO for both my per vertex data and my per instance data?
I've been looking at a couple tutorials, for example: http://ogldev.atspace.co.uk/www/tutorial33/tutorial33.html
It looks to me like what he does is use a VAO for his per vertex data and NOT for his per instance data. I've tried doing the same thing with my Qt-based code, and it's not working for me (probably because I don't entirely understand how that works... shouldn't his instance data still need to be bound when drawing happens?)
Some dummy code... this is a bit silly, I'm just drawing a single instance of two triangles, with a perspective matrix as a per instance attribute.
#include "glwindow.h"
#include <QColor>
#include <QMatrix4x4>
#include <QVector>
#include <QVector3D>
#include <QVector4D>
#include <QDebug>
GLWindow::GLWindow(QWindow *parent)
: QWindow(parent)
, _vbo(QOpenGLBuffer::VertexBuffer)
, _matbo(QOpenGLBuffer::VertexBuffer)
, _context(0)
void GLWindow::initGL()
_positionAttr = _program->attributeLocation("position");
_colourAttr = _program->attributeLocation("colour");
_matrixAttr = _program->attributeLocation("matrix");
QVector<QVector3D> triangles;
triangles << QVector3D(-0.5, 0.5, 1) << QVector3D(-0.5, -0.5, 1) << QVector3D(0.5, -0.5, 1);
triangles << QVector3D(0.5, 0.5, 0.5) << QVector3D(-0.5, -0.5, 0.5) << QVector3D(0.5, -0.5, 0.5);
QVector<QVector3D> colours;
colours << QVector3D(1, 0, 0) << QVector3D(0, 1, 0) << QVector3D(0, 0, 1);
colours << QVector3D(1, 1, 1) << QVector3D(1, 1, 1) << QVector3D(1, 1, 1);
size_t positionSize = triangles.size() * sizeof(QVector3D);
size_t colourSize = colours.size() * sizeof(QVector3D);
_vbo.allocate(positionSize + colourSize);
_vbo.write(0, triangles.constData(), positionSize);
_vbo.write(positionSize, colours.constData(), colourSize);
_colourOffset = positionSize;
_program->setAttributeBuffer(_positionAttr, GL_FLOAT, 0, 3, 0);
_program->setAttributeBuffer(_colourAttr, GL_FLOAT, _colourOffset, 3, 0);
_matbo.allocate(4 * sizeof(QVector4D));
_program->setAttributeBuffer(_matrixAttr, GL_FLOAT, 0, 4, 4 * sizeof(QVector4D));
_func330->glVertexAttribDivisor(_matrixAttr, 1);
resizeGL(width(), height());
void GLWindow::resizeGL(int w, int h)
glViewport(0, 0, w, h);
void GLWindow::paintGL()
if (! _context) // not yet initialized
QColor background(Qt::black);
glClearColor(background.redF(), background.greenF(), background.blueF(), 1.0f);
QMatrix4x4 matrix;
matrix.perspective(60, 4.0/3.0, 0.1, 100.0);
matrix.translate(0, 0, -2);
_matbo.write(0, matrix.constData(), 4 * sizeof(QVector4D));
_func330->glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 1);
void GLWindow::setupShaders()
QString vShaderSrc("#version 330\n"
"layout(location = 0) in vec4 position;\n"
"layout(location = 1) in vec4 colour;\n"
"layout(location = 2) in mat4 matrix;\n"
"smooth out vec4 col;\n"
"void main() {\n"
" col = colour;\n"
" gl_Position = matrix * position;\n"
QString fShaderSrc("#version 330\n"
"smooth in vec4 col;\n"
"void main() {\n"
" gl_FragColor = col;\n"
_program = new QOpenGLShaderProgram(this);
_program->addShaderFromSourceCode(QOpenGLShader::Vertex, vShaderSrc);
_program->addShaderFromSourceCode(QOpenGLShader::Fragment, fShaderSrc);
void GLWindow::exposeEvent(QExposeEvent *event)
if (isExposed())
if (! _context)
_context = new QOpenGLContext(this);
QSurfaceFormat format(requestedFormat());
_func330 = _context->versionFunctions<QOpenGLFunctions_3_3_Core>();
if (_func330)
qWarning() << "Could not obtain required OpenGL context version";
#ifndef GL_WINDOW_H
#define GL_WINDOW_H
#include <QExposeEvent>
#include <QSurfaceFormat>
#include <QWindow>
#include <QOpenGLBuffer>
#include <QOpenGLContext>
#include <QOpenGLFunctions>
#include <QOpenGLFunctions_3_3_Core>
#include <QOpenGLShaderProgram>
#include <QOpenGLVertexArrayObject>
class GLWindow : public QWindow, protected QOpenGLFunctions
GLWindow(QWindow * = 0);
virtual ~GLWindow();
void initGL();
void paintGL();
void resizeGL(int, int);
virtual void exposeEvent(QExposeEvent *);
void setupShaders();
QOpenGLBuffer _vbo;
QOpenGLBuffer _matbo;
QOpenGLContext *_context;
QOpenGLShaderProgram *_program;
QOpenGLVertexArrayObject _vao;
QOpenGLFunctions_3_3_Core *_func330;
GLuint _positionAttr;
GLuint _colourAttr;
GLuint _matrixAttr;
size_t _colourOffset;
} ;
#include <QGuiApplication>
#include <QSurfaceFormat>
#include "glwindow.h"
int main(int argc, char **argv)
QGuiApplication app(argc, argv);
GLWindow window;
window.resize(400, 400);
return app.exec();
# Automatically generated by qmake (3.0) Fri May 16 09:49:41 2014
TARGET = glbuffertest
CONFIG += qt debug
# Input
SOURCES += glbuffertest.cpp glwindow.cpp
HEADERS += glwindow.h
I've tried getting rid of my _matbo buffer and instead putting the matrix data into the same VBO as the position and colour attributes, but it's not working for me. My initGL function now looks like:
void GLWindow::initGL()
_positionAttr = _program->attributeLocation("position");
_colourAttr = _program->attributeLocation("colour");
_matrixAttr = _program->attributeLocation("matrix");
QVector<QVector3D> triangles;
triangles << QVector3D(-0.5, 0.5, 1) << QVector3D(-0.5, -0.5, 1) << QVector3D(0.5, -0.5, 1);
triangles << QVector3D(0.5, 0.5, 0.5) << QVector3D(-0.5, -0.5, 0.5) << QVector3D(0.5, -0.5, 0.5);
QVector<QVector3D> colours;
colours << QVector3D(1, 0, 0) << QVector3D(0, 1, 0) << QVector3D(0, 0, 1);
colours << QVector3D(1, 1, 1) << QVector3D(1, 1, 1) << QVector3D(1, 1, 1);
size_t positionSize = triangles.size() * sizeof(QVector3D);
size_t colourSize = colours.size() * sizeof(QVector3D);
size_t matrixSize = 4 * sizeof(QVector4D);
_vbo.allocate(positionSize + colourSize + matrixSize);
_vbo.write(0, triangles.constData(), positionSize);
_vbo.write(positionSize, colours.constData(), colourSize);
_colourOffset = positionSize;
_matrixOffset = positionSize + colourSize;
_program->setAttributeBuffer(_positionAttr, GL_FLOAT, 0, 3, 0);
_program->setAttributeBuffer(_colourAttr, GL_FLOAT, _colourOffset, 3, 0);
_program->setAttributeBuffer(_matrixAttr, GL_FLOAT, _matrixOffset, 4, 4 * sizeof(QVector4D));
_func330->glVertexAttribDivisor(_matrixAttr, 1);
resizeGL(width(), height());
and paintGL:
void GLWindow::paintGL()
if (! _context) // not yet initialized
QColor background(Qt::black);
glClearColor(background.redF(), background.greenF(), background.blueF(), 1.0f);
QMatrix4x4 matrix;
matrix.perspective(60, 4.0/3.0, 0.1, 100.0);
matrix.translate(0, 0, -2);
_vbo.write(_matrixOffset, matrix.constData(), 4 * sizeof(QVector4D));
/* I tried replacing the three preceding lines with the following, without success: */
_vbo.write(_matrixOffset, matrix.constData(), 4 * sizeof(QVector4D));
_func330->glVertexAttribDivisor(_matrixAttr, 1); */
_func330->glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 1);
So it seems my instancing problems are bigger than just having the wrong buffer bound at the wrong time. What else am I doing wrong?
I think you must create one VBO for positions and one VBO for colors (or use interleaved data with a stride). VAO allows you to use multiple VBO, one per attribute.
// prepare your shader program
// ...
// prepare your VBOs : one VBO for pos, one VBO for colors, one for normals,...
// example for position
// if your store the points using QVector<QVector3D>
vertexPositionBuffer.allocate(vertices.constData(), vertices.size() * sizeof(QVector3D));
// do the same for colors or other attributes
// ...
// after all buffers are created
// Bind the position buffer
shaderProgram.setAttributeBuffer("vertexPosition", GL_FLOAT, 0, 3);
// do the same for all other buffers
// ...
// release vao
and in your paintGL function:
// update your matrices
// bind your shader program
// set you uniform variables
// then
glDrawArrays(GL_TRIANGLES, 0, vertices.size());
// release your shader program
I've got it. The main problems were that:
I had to loop through all four columns of my mat4 attribute, setting and enabling each of them, and calling glVertexAttribDivisor() on each.
I had completely messed up the call to QOpenGLShaderProgram::setAttributeBuffer() for my mat4 attribute.
Essentially, you have to treat a mat4 as four separate vec4 attributes (one for each column). This doesn't affect how you copy QMatrix4x4 data to a QOpenGLBuffer object in the slightest, just how you tell the shader program to deal with the data. This is well described in both the tutorial I linked to in my original question and in The OpenGL Programming Guide's instancing tutorial, I just didn't get it. So, going back to the first attempt at glwindow.cpp above, I've changed very little and things now work:
#include "glwindow.h"
#include <QColor>
#include <QMatrix4x4>
#include <QVector>
#include <QVector3D>
#include <QVector4D>
#include <QDebug>
GLWindow::GLWindow(QWindow *parent)
: QWindow(parent)
, _vbo(QOpenGLBuffer::VertexBuffer)
, _matbo(QOpenGLBuffer::VertexBuffer)
, _context(0)
void GLWindow::initGL()
_positionAttr = _program->attributeLocation("position");
_colourAttr = _program->attributeLocation("colour");
_matrixAttr = _program->attributeLocation("matrix");
QVector<QVector3D> triangles;
triangles << QVector3D(-0.5, 0.5, 1) << QVector3D(-0.5, -0.5, 1) << QVector3D(0.5, -0.5, 1);
triangles << QVector3D(0.5, 0.5, 0.5) << QVector3D(-0.5, -0.5, 0.5) << QVector3D(0.5, -0.5, 0.5);
QVector<QVector3D> colours;
colours << QVector3D(1, 0, 0) << QVector3D(0, 1, 0) << QVector3D(0, 0, 1);
colours << QVector3D(1, 1, 1) << QVector3D(1, 1, 1) << QVector3D(1, 1, 1);
size_t positionSize = triangles.size() * sizeof(QVector3D);
size_t colourSize = colours.size() * sizeof(QVector3D);
_vbo.allocate(positionSize + colourSize);
_vbo.write(0, triangles.constData(), positionSize);
_vbo.write(positionSize, colours.constData(), colourSize);
_colourOffset = positionSize;
_program->setAttributeBuffer(_positionAttr, GL_FLOAT, 0, 3, 0);
_program->setAttributeBuffer(_colourAttr, GL_FLOAT, _colourOffset, 3, 0);
_matbo.allocate(4 * sizeof(QVector4D));
// This is completely wrong
/*_program->setAttributeBuffer(_matrixAttr, GL_FLOAT, 0, 4, 4 * sizeof(QVector4D));
_func330->glVertexAttribDivisor(_matrixAttr, 1);
// The right way to set up a mat4 attribute for instancing
for (unsigned i = 0; i < 4; i++)
_program->setAttributeBuffer(_matrixAttr + i, GL_FLOAT, i * sizeof(QVector4D), 4, 4 * sizeof(QVector4D));
_program->enableAttributeArray(_matrixAttr + i);
_func330->glVertexAttribDivisor(_matrixAttr + i, 1);
resizeGL(width(), height());
void GLWindow::resizeGL(int w, int h)
glViewport(0, 0, w, h);
void GLWindow::paintGL()
if (! _context) // not yet initialized
QColor background(Qt::black);
glClearColor(background.redF(), background.greenF(), background.blueF(), 1.0f);
QMatrix4x4 matrix;
matrix.perspective(60, 4.0/3.0, 0.1, 100.0);
matrix.translate(0, 0, -2);
_matbo.write(0, matrix.constData(), 4 * sizeof(QVector4D));
_func330->glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 1);
void GLWindow::setupShaders()
QString vShaderSrc("#version 330\n"
"layout(location = 0) in vec4 position;\n"
"layout(location = 1) in vec4 colour;\n"
"layout(location = 2) in mat4 matrix;\n"
"smooth out vec4 col;\n"
"void main() {\n"
" col = colour;\n"
" gl_Position = matrix * position;\n"
QString fShaderSrc("#version 330\n"
"smooth in vec4 col;\n"
"void main() {\n"
" gl_FragColor = col;\n"
_program = new QOpenGLShaderProgram(this);
_program->addShaderFromSourceCode(QOpenGLShader::Vertex, vShaderSrc);
_program->addShaderFromSourceCode(QOpenGLShader::Fragment, fShaderSrc);
void GLWindow::exposeEvent(QExposeEvent *event)
if (isExposed())
if (! _context)
_context = new QOpenGLContext(this);
QSurfaceFormat format(requestedFormat());
_func330 = _context->versionFunctions<QOpenGLFunctions_3_3_Core>();
if (_func330)
qWarning() << "Could not obtain required OpenGL context version";
Note that I also moved the binding of _matbo and setting up of the mat4 attribute so that it's all done before releasing the VAO. I was initially very confused over how many VBOs were allowed and when they needed to be bound. There's no problem having multiple VBOs inside a single VAO, it's just that the right one needs to be bound to be written to, and the right one needs to be bound before calling QOpenGLShaderProgram::setAttributeBuffer(). It doesn't matter which buffer is bound when glDraw*() is called (I trust someone will comment if I'm wrong about that).