memory calculation is going negative while using malloc_hook - unix

I want to track memory usage through the the program execution. I have used malloc_hook to capture all the memory related calls. below is the program
long int memory_bytes_hook = 0;
long int peak_memory_hook = 0;
std::mutex memory_mutex_hook;
static void my_init_hook (void);
static void *my_malloc_hook (size_t, const void *);
static void my_free_hook (void*, const void *);
static void* my_realloc_hook (void *ptr, size_t size, const void *caller);
static void *(*old_malloc_hook)(size_t, const void *);
static void (*old_free_hook)(void*, const void *);
static void *(*old_realloc_hook)(void *ptr, size_t size, const void *caller);
static void
my_init_hook (void)
{
__malloc_hook = my_malloc_hook;
__free_hook = my_free_hook;
__realloc_hook = my_realloc_hook ;
}
static void *
my_malloc_hook (size_t size, const void *caller)
{
const std::lock_guard<std::mutex> lock(memory_mutex_hook);
void *result;
__malloc_hook = nullptr;
__free_hook = nullptr ;
__realloc_hook = nullptr ;
result = malloc (size);
memory_bytes_hook += malloc_usable_size(result);
if(peak_memory_hook < memory_bytes_hook )
peak_memory_hook = memory_bytes_hook ;
my_init_hook();
return result;
}
static void* my_realloc_hook (void *ptr, size_t size, const void *caller)
{
const std::lock_guard<std::mutex> lock(memory_mutex_hook);
__malloc_hook = nullptr;
__free_hook = nullptr;//old_free_hook ;
__realloc_hook = nullptr;//old_realloc_hook ;
long int oldSize = malloc_usable_size(ptr);
void *result = realloc(ptr, size);
long int updated_size = malloc_usable_size(result);
memory_bytes_hook += (updated_size - oldSize);
if(peak_memory_hook < memory_bytes_hook )
peak_memory_hook = memory_bytes_hook ;
my_init_hook();
return result;
}
static void
my_free_hook (void *ptr, const void *caller)
{
const std::lock_guard<std::mutex> lock(memory_mutex_hook);
__malloc_hook = nullptr;// old_malloc_hook;
__free_hook = nullptr;// old_free_hook ;
__realloc_hook = nullptr;//old_realloc_hook ;
/* Call recursively */
long int oldSize = malloc_usable_size(ptr);
free (ptr);
memory_bytes_hook -= oldSize;
if(peak_memory_hook < memory_bytes_hook )
peak_memory_hook = memory_bytes_hook ;
my_init_hook();
}
int main(int argc, char** argv)
{
my_init_hook();
... calling other libraries and do lot of operation for couple of hours...
std::cout <<"At last: peak_memory hook: "<< peak_memory_hook << "\t" << " Bytes
allocated: " << memory_bytes_hook << "\n";
return 0;
}
At the end of program execution I am getting negative value of memory_bytes_hook . I did put some logs at intermediate intervals. It was observed that memory_bytes_hook value started reducing after some time and eventually went to negative. That's why It is not because of overflow. I was wondering , What could be other reason of it going negative value. I was hoping, malloc hook would catch all the memory related calls.

Related

Segmentation fault inside range

#include <iostream>
#include <vector>
#include <algorithm>
#include <queue> // std::priority_queue
using std::vector;
using std::cin;
using std::cout;
struct fj{
int indexI=0;
int freeT=0;
};
struct DereferenceCompareNode : public std::binary_function<fj, fj, bool>
{
bool operator()(const fj lhs, const fj rhs) const
{
return lhs.freeT > rhs.freeT;
}
};
class JobQueue {
private:
int num_workers_;
vector<int> jobs_;
vector<int> assigned_workers_;
vector<long long> start_times_;
void WriteResponse() const {
for (int i = 0; i < jobs_.size(); ++i) {
cout << assigned_workers_[i] << " " << start_times_[i] << "\n";
}
}
void ReadData() {
int m;
cin >> num_workers_ >> m;
jobs_.resize(m);
std::cout<<"Read fault"<<"\n";
for(int i = 0; i < m; i++)
cin >> jobs_[i];
std::cout<<"Read fault ends"<<"\n";
}
void AssignJobs() {
// TODO: replace this code with a faster algorithm.
std::cout<<"Fault point 1"<<"\n";
assigned_workers_.resize(jobs_.size());
start_times_.resize(jobs_.size());
vector<long long> next_free_time(num_workers_, 0);
std::priority_queue<int, vector<int>, std::greater<int> > thread;
std::priority_queue<fj, vector<fj>, DereferenceCompareNode > freeJob;
/*
for (int i = 0; i < jobs_.size(); ++i) {
int duration = jobs_[i];
int next_worker = 0;
for (int j = 0; j < num_workers_; ++j) {
if (next_free_time[j] < next_free_time[next_worker])
next_worker = j;
}
assigned_workers_[i] = next_worker;
start_times_[i] = next_free_time[next_worker];
next_free_time[next_worker] += duration;
}
*/
std::cout<<"dump point 2"<<"\n";
for(int i=0;i<num_workers_;i++){
thread.push(i);
}
std::cout<<"dump point 1"<<"\n";
int counter = 0;
while(jobs_.size()!=0){
std::cout<<"jobs_.size:"<<jobs_.size()<<"\n";
std::cout<<"freeJob.size:"<<freeJob.size()<<"\n";
//check logic
do{
if(freeJob.top().freeT == counter){
std::cout<<"freeJob.top().freeT:"<<freeJob.top().freeT<<"\n";
std::cout<<"counter:"<<counter<<"\n";
thread.push(freeJob.top().indexI);
freeJob.pop();
}else{
break;
}
}
while(freeJob.size()!=0);
std::cout<<"Thread:"<<thread.size()<<"\n";
while(thread.size()!=0){
if(jobs_.size()!=0){
fj currA;
currA.indexI = thread.top();
currA.freeT = jobs_.at(0)+counter;
std::cout<<"currA.indexI:"<<currA.indexI<<"\n";
std::cout<<"currA.freeT:"<<currA.freeT<<"\n";
thread.pop();
jobs_.erase(jobs_.begin());
assigned_workers_.push_back(currA.indexI);
start_times_.push_back(currA.freeT);
}else{
break;
}
}
counter++;
}
}
public:
void Solve() {
ReadData();
AssignJobs();
WriteResponse();
}
};
int main() {
std::ios_base::sync_with_stdio(false);
JobQueue job_queue;
job_queue.Solve();
return 0;
}
I am getting segmentation fault in function ReadData while taking inputs for vector jobs.
I am getting fault even when I am inside bounds of defined size.
Everything was fine when have not written AssignJob function.
Am I doing something wrong with some bounds or taking illegal inputs format or messing with some other stuff?
Am I doing something wrong
Yes, you are: freeJob starts out empty, so this is undefined behavior:
if(freeJob.top().freeT == counter){
In fact, you never push anything into freeJob, you only pop() things from it.

QAudioOutput buffer underflow

Getting a message "Got a buffer underflow!" after each write in this simple program.
Beep.hpp:
#pragma once
#include <QTimer>
#include <QAudioOutput>
class Beep: public QObject
{
Q_OBJECT
public:
explicit Beep();
virtual ~Beep();
void onTimer();
private:
QAudioOutput m_out;
QIODevice *m_outDev;
QTimer m_timer;
};
Beep.cpp:
#include "Beep.hpp"
int ms = 100;
const QAudioFormat defaultAudioFormat = []()
{
QAudioFormat format;
format.setSampleRate(8000);
format.setChannelCount(1);
format.setSampleSize(16);
format.setCodec("audio/pcm");
format.setByteOrder(QAudioFormat::LittleEndian);
format.setSampleType(QAudioFormat::SignedInt);
return format;
}();
Beep::Beep() :
m_out(defaultAudioFormat),
m_outDev()
{
m_out.setBufferSize(16 * ms);
m_outDev = m_out.start();
QObject::connect(&m_timer, &QTimer::timeout, this, &Beep::onTimer);
m_timer.setSingleShot(false);
m_timer.start(ms);
}
Beep::~Beep()
{
}
void Beep::onTimer()
{
std::vector<uint8_t> samples(16 * ms);
m_outDev->write((char*) &samples.front(), samples.size());
}
main.cpp:
#include <QCoreApplication>
#include "Beep.hpp"
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
Beep beep;
return app.exec();
}
This test program is just writing buffers with zeros. With real data there are cracking sounds.
Writing more data or changing timings makes it worse. What's wrong with this code?
Using a Timer is the wrong way to do it.
Use the notify() signal
void AudioManager::init_audio(AudioManager *mgr) {
if (mgr->stream_id == -1) return;
mgr->audio_format.setSampleRate(mgr->context->time_base.den);
mgr->audio_format.setSampleSize(16);
mgr->audio_format.setChannelCount(2);
mgr->audio_format.setCodec("audio/pcm");
mgr->audio_format.setSampleType(QAudioFormat::SignedInt);
QAudioDeviceInfo info(QAudioDeviceInfo::defaultOutputDevice());
if (!info.isFormatSupported(mgr->audio_format)) {
mgr->audio_format = info.nearestFormat(mgr->audio_format);
}
mgr->audio_out = new QAudioOutput(mgr->audio_format, nullptr);
mgr->audio_out->setNotifyInterval(15);
mgr->audio_out->setBufferSize(mgr->context->time_base.den * 4); // 1 second worth of stereo data
connect(mgr->audio_out, SIGNAL(notify()), mgr, SLOT(audio_out_notify()));
connect(mgr->audio_out, SIGNAL(stateChanged(QAudio::State)), mgr, SLOT(audio_out_state_changed(QAudio::State)));
qreal volume_out = (qreal)parent->volume / 100.0f;
mgr->audio_out->setVolume(volume_out);
mgr->audio_out_device = mgr->audio_out->start();
}
This will be called when the audio playback requires more data
void AudioManager::audio_out_notify() {
qDebug() << "Audio notify";
check_audio_playback();
}
Most of the below code will be irrelevant but it is also called is audio has stopped playing.
void AudioManager::check_audio_playback() {
if (stream_id == -1) return;
pthread_mutex_lock(&audio_mutex);
if (!audio_out->state() == QAudio::State::IdleState) {
pthread_mutex_unlock(&audio_mutex);
return;
}
if (parent->pts_start_time < 0.0) {
if (parent->Video.stream_id == -1 && decode_pos > 65) { // start playback
parent->pts_start_time = buffers[0].frame_time;
parent->sys_start_time = (double)parent->timer.elapsed() / 1000.0;
qDebug() << "Audio playback started";
} else {
pthread_mutex_unlock(&audio_mutex);
return;
}
}
if (playback_pos == decode_pos) {
pthread_mutex_unlock(&audio_mutex);
return;
}
AudioBuffer *buffer = nullptr;
double current_sys_time = ((double)parent->timer.elapsed() / 1000.0) - parent->sys_start_time;
bool bounds = false;
int skipped = 0;
while (!bounds) {
if (playback_pos == decode_pos) bounds = true;
else {
AudioBuffer *temp_buffer = &buffers[playback_pos];
double temp_time = temp_buffer->frame_time - parent->pts_start_time;
if (temp_time < current_sys_time ) {
if (buffer) {
buffer->used = false;
skipped++;
}
buffer = temp_buffer;
playback_pos++; playback_pos %= MAX_AUD_BUFFERS;
} else {
bounds = true;
}
}
}
if (skipped > 0) qDebug("Skipped %d audio buffers on playback", skipped);
if (buffer) {
audio_out_device->write((const char *)buffer->data, buffer->buffer_size);
buffer->used = false;
}
pthread_mutex_unlock(&audio_mutex);
}
The example on the Qt website wasn't that obvious http://qt.apidoc.info/5.1.1/qtmultimedia/audiooutput.html at first but when I put it in to test it wasn't too bad.
The reason was that the source of audio data wasn't a "production-quality module" (it's a dummy testing class): the timer was drifting because its real interval was 10ms plus the processing time.
Other observations:
make QAudioOutput::setBufferSize() bigger
do QAudioInput::read() and QAudioOutput::write() in chunks with size that matches QAudioInput::periodSize() and QAudioOutput::periodSize()

Constant container (map) - eliminate heap allocation

If I create a static const std::map, it will allocate memory on heap. Following code throws bad_alloc:
#include <iostream>
#include <map>
class A {
public:
static const std::map<int, int> a;
};
const std::map<int, int> A::a = { { 1, 3} , { 2, 5} };
void* operator new ( std::size_t count )
{
throw std::bad_alloc();
}
int
main (void)
{
for(auto &ai: A::a) {
std::cout << ai.first << " " << ai.second << "\n";
}
return 0;
}
Is it possible to create this constant map somehow without having memory allocation?
As Igor Tandetnik suggested, a custom allocator would do the trick. The following is a quick'n'dirty example of a simple linear allocator, which returns memory slots from a static buffer:
#include <iostream>
#include <map>
#include <cassert>
template <typename T>
class LinearAllocator {
static constexpr size_t _maxAlloc = 1<<20;
using Buffer = std::array<T, _maxAlloc>;
using FreeList = std::array<bool, _maxAlloc>;
static Buffer _buffer;
static FreeList _allocated;
public:
typedef T* pointer;
typedef T value_type;
template<typename U>
struct rebind { typedef LinearAllocator<U> other; };
pointer allocate(size_t /*n*/, const void *hint=0) {
for(size_t i = 0; i < _maxAlloc; ++i) {
if(!_allocated[i]) {
_allocated[i] = true;
return &_buffer[i];
}
}
throw std::bad_alloc();
}
void deallocate(pointer p, size_t /*n*/) {
assert(p >= &_buffer[0] && p < &_buffer[_maxAlloc]);
_allocated[p-&_buffer[0]] = false;
}
LinearAllocator() throw() { }
LinearAllocator(const LinearAllocator &a) throw() { }
template <class U>
LinearAllocator(const LinearAllocator<U> &a) throw() { }
~LinearAllocator() throw() { }
};
template <typename T>
typename LinearAllocator<T>::Buffer LinearAllocator<T>::_buffer;
template <typename T>
typename LinearAllocator<T>::FreeList LinearAllocator<T>::_allocated;
using MyMap = std::map<int, int, std::less<int>,
LinearAllocator<std::pair<int,int> > >;
// make sure we notice if new gets called
void* operator new(size_t size) {
std::cout << "new called" << std::endl;
}
int main() {
MyMap m;
m[0] = 1; m[1] = 3; m[2] = 8;
for(auto & p : m)
std::cout << p.first << ": " << p.second << std::endl;
return 0;
}
Output:
0: 1
1: 3
2: 8
Note that this allocator will only handle requests for single slots at a time. I'm sure you will figure out how to extend it according to your requirements.

How to create a Queue of unsigned char array in Qt?

I am new in Queue (FIFO) and Qt. I want to create a Queue of unsigned char array in Qt. How to do it? Please help
unsigned char buffer[1024];
If you want to use the Qt API, then you can use the QQueue class -
QQueue<unsigned char> queue;
queue.enqueue(65);
queue.enqueue(66);
queue.enqueue(67);
while (!queue.isEmpty())
cout << queue.dequeue() << endl;
If you want to build the queue on your own, then I guess you can declare a Queue class like this -
class Queue
{
private:
enum{SIZE=1024, EMPTY=0};
unsigned char buffer[SIZE];
int readHead, writeHead;
public:
Queue()
{
readHead = writeHead = EMPTY;
}
void push(unsigned char data);
unsigned char pop();
unsigned char peek();
bool isEmpty();
};
void Queue::push(unsigned char data)
{
if((readHead - writeHead) >= SIZE)
{
// You should handle Queue overflow the way you want here.
return;
}
buffer[writeHead++ % SIZE] = data;
}
unsigned char Queue::pop()
{
unsigned char item = peek();
readHead++;
return item;
}
unsigned char Queue::peek()
{
if(isEmpty())
{
// You should handle Queue underflow the way you want here.
return;
}
return buffer[readHead % SIZE];
}
bool Queue::isEmpty()
{
return (readHead == writeHead);
}
If you want to maintain a Queue of unsigned char array, then you will have to maintain a queue of unsigned char pointers -
QQueue<unsigned char *> queue;
unsigned char *array1 = new unsigned char[10]; // array of 10 items
array1[0] = 65;
array1[1] = 66;
queue.enqueue(array1);
unsigned char *array2 = new unsigned char[20]; // an array of 20 items
queue.enqueue(array2);
unsigned char *arr = queue.dequeue();
qDebug() << arr[0] << ", " << arr[1];
Note: You should take care of the memory cleanup after you are done with this queue. IMHO, you better avoid this type of design though.

QSemaphore producer consumer problem

This is more or less Qt's example with some small changes.
The output is PcPcPcPc...etc. I don't understand why.
Namely, I am confused about how sProducer.acquire(256); works. I believe I understand how sProducer.acquire(1); works. It doesn't make sense to me to acquire anything more than 1 because I don't see how acquiring more than 1 makes any difference logically. Could someone explain this? On the surface, writing 1 byte and reading 1 byte doesn't seem very efficient due to semaphore overhead...but acquiring more resources doesn't seem to make a performance difference nor does the code make sense.
Logically I think both the acquire and release have to have the same number (whatever that number is). But how can I modify this code so I can acquire more (say 256) and thus reduce semaphore overhead? The code bellow just doesn't make sense to me when acquire and release is not 1.
#include <QtCore>
#include <iostream>
#include <QTextStream>
//Global variables.
QTextStream out(stdout);
QTextStream in(stdin);
const int DataSize = 1024;
const int BufferSize = 512;
char buffer[BufferSize];
QSemaphore sProducer(BufferSize);
QSemaphore sConsumer(0);
//-----------------------------
class Producer : public QThread
{
public:
void run();
};
void Producer::run()
{
for (int i = 0; i < DataSize; ++i) {
sProducer.acquire(256);
buffer[i % BufferSize] = 'P';
sConsumer.release(256);
}
}
class Consumer : public QThread
{
public:
void run();
};
void Consumer::run()
{
for (int i = 0; i < DataSize; ++i) {
sConsumer.acquire(256);
std::cerr << buffer[i % BufferSize];
out << "c";
out.flush();
sProducer.release(256);
}
std::cerr << std::endl;
}
int main()
{
Producer producer;
Consumer consumer;
producer.start();
consumer.start();
producer.wait();
consumer.wait();
in.readLine(); //so i can read console text.
return 0;
}
Since there is only one producer and one consumer, they can move freely their own private cursor, their i variable, of the amount of bytes they want, as long as there is enough room to do that (something higher that 256 on both sides with a 512 buffer would cause a deadlock).
Basically, when a thread successfully acquire 256 bytes, it means it can safely read or write these 256 bytes in one single operation, so you just have to put another loop inside the acquire/release block to handle that number of bytes.
For the producer:
void Producer::run()
{
for (int i = 0; i < DataSize; ++i) {
const int blockSize = 256;
sProducer.acquire(blockSize);
for(int j = 0; j < blockSize; ++i, ++j) {
buffer[i % BufferSize] = 'P';
}
sConsumer.release(blockSize);
}
}
And for the consumer
void Consumer::run()
{
for (int i = 0; i < DataSize; ++i) {
const int blockSize = 128;
sConsumer.acquire(blockSize);
for(int j = 0; j < blockSize; ++i, ++j) {
std::cerr << buffer[i % BufferSize];
out << "c";
out.flush();
}
sProducer.release(blockSize);
}
std::cerr << std::endl;
}

Resources