Bdb crashes on concurrent write, c++ linux/osx - berkeley-db

I am having issues with bdb and there locking mechanisms.
The following code results in either a seg fault, or what looks like a deadlock/endless loop
#include <iostream>
#include "db_cxx.h"
#include <boost/thread.hpp>
using namespace std;
void thread_instance(Db* db, double start){
double s = start;
double finish = start + 5000;
for(int x=s; x < finish ; x++){
Dbt key(&x, sizeof(double));
Dbt ddata(&x, sizeof(double));
db->put(NULL, &key, &ddata, 0);
}
}
int
compare_double(DB *dbp, const DBT *a,const DBT *b){
double ai, bi;
memcpy(&ai, a->data, sizeof(double));
memcpy(&bi, b->data, sizeof(double));
return (ai > bi ? 1 : ((ai < bi) ? -1 : 0));
}
int main(){
system("rm data/*");
u_int32_t env_flags = DB_CREATE | DB_INIT_MPOOL | DB_INIT_CDB;
DbEnv* env = new DbEnv(0);
env->set_cachesize(0, 2000000, 1);
u_int32_t m = 0;
env->open("data/", env_flags, 0);
Db* db = new Db(env, 0);
db->set_bt_compare(compare_double);
db->set_flags(DB_DUPSORT);
db->set_pagesize(32768);
db->set_dup_compare(compare_double);
u_int32_t oFlags = DB_CREATE;
try {
db->open(NULL, "db", NULL, DB_BTREE, oFlags, 0);
} catch (DbException &e) {
} catch (std::exception &e) {
}
vector<boost::thread*> threads;
for(int x=0; x < 3; x++){
threads.push_back(new boost::thread(boost::bind(&thread_instance, db, (x *5000))));
}
for(int x=0; x < threads.size(); x++){
threads[x]->join();
}
};
I have tried DB_INIT_LOCK as well, but with the same results.
The stack track:
Program received signal EXC_BAD_ACCESS, Could not access memory.
Reason: KERN_INVALID_ADDRESS at address: 0x0000000000000019
[Switching to process 34816]
0x00000001002e36a7 in __bamc_put ()
(gdb) ba
#0 0x00000001002e36a7 in __bamc_put ()
#1 0x0000000100386689 in __dbc_iput ()
#2 0x0000000100387a6c in __dbc_put ()
#3 0x0000000100383092 in __db_put ()
#4 0x0000000100397888 in __db_put_pp ()
#5 0x00000001002cee59 in Db::put ()
#6 0x0000000100001f88 in thread_instance (db=0x1007006c0, start=5000) at src/main.cpp:16
#7 0x0000000100698254 in thread_proxy ()
#8 0x00007fff80cb9456 in _pthread_start ()
#9 0x00007fff80cb9309 in thread_start ()
Does anyone know what could be going on here ?

I'd recommend to check errors after db->open, print message if any instead of skipping it:
catch (DbException &e) {
cerr << e.what() << endl;
}
Also, looks like you have not defined flags correctly: at least DB_THREAD is required because you use it within threads

Related

On running the code in Intel Devcloud, it is throwing runtime error

I was trying to run this code but while compiling in intel devcloud using these commands:
icpx -qopenmp -fopenmp-targets=spir64 openmp_target_offload_clause_ordering.cpp
export OMP_TARGET_OFFLOAD=MANDATORY
it is showing runtime error.
#include <stdio.h>
int main() {
double *V = reinterpret_cast<double*>(0xdeadbeef);
printf("pointer=%p\n", V);
#pragma omp target parallel for simd is_device_ptr(V) if(true)
for(int i = 0; i < 1; ++i) {
printf("pointer=%p\n", V);
}
#pragma omp target parallel for simd if(true) is_device_ptr(V)
for(int i = 0; i < 1; ++i) {
printf("pointer=%p\n", V);
}
return 100;
}
The device pointer you are entering is invalid.
Replace (0xdeadbeef) with (omp_target_alloc(size, 0)) in the first line of your main function as follows:
double ptr = reinterpret_cast<double>(omp_target_alloc(size, 0));
Hope this helps!

SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute

I am trying to calculate the euclidean distance for KNN but in parallel using dpc++. the training dataset contains 5 features and 1600 rows, while I want to calculate the distance between the current test point and each training point on the grid in parallel, but I keep getting an error regarding sycl kernal.
code for the function:
code
std::vector<double> distance_calculation_FPGA(queue& q,const std::vector<std::vector<double>>& dataset,const std::vector<double>& curr_test) {
range<1> num_items{ dataset.size()};
std::vector<double>res;
res.resize(dataset.size());
buffer dataset_buf(dataset);
buffer curr_test_buf(curr_test);
buffer res_buf(res.data(), num_items);
q.submit([&](handler& h) {
accessor a(dataset_buf, h, read_only);
accessor b(curr_test_buf, h, read_only);
accessor dif(res_buf, h, write_only, no_init);
h.parallel_for(num_items, [=](auto i) {
for (int j = 0; j <(const int) a[i].size(); ++j) {
dif[i] += (a[i][j] - b[j]) * (a[i][j] - b[j]) ;
}
});
});
for (int i = 0; i < res.size(); ++i) {
std::cout << res[i] << std::endl;
}
//old distance calculation (serial)
//for (int i = 0; i < dataset.size(); ++i) {
// double dis = 0;
// for (int j = 0; j < dataset[i].size(); ++j) {
// dis += (curr_test[j] - dataset[i][j]) * (curr_test[j] - dataset[i][j]);
//}
//res.push_back(dis);
//}
return res;
}
the error I am receiving:
SYCL kernel cannot call a variadic function
SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute
Would be extremely grateful for any help!
Thanks
We tried running your code by creating dummy 'dataset' and 'curr_test' variables. We were able to run the program successfully. Please refer this thread
Please refer to the complete code attached below.
#include <CL/sycl.hpp>
#include <iostream>
using namespace sycl;
std::vector<double> distance_calculation_FPGA(queue& q,const std::vector<std::vector<double>>& dataset,const std::vector<double>& curr_test)
{
range<1> num_items{ dataset.size()};
std::vector<double>res;
res.resize(dataset.size());
buffer dataset_buf(dataset);
buffer curr_test_buf(curr_test);
buffer res_buf(res.data(), num_items);
q.submit([&](handler& h) {
accessor a(dataset_buf, h, read_only);
accessor b(curr_test_buf, h, read_only);
accessor dif(res_buf, h, write_only, no_init);
h.parallel_for(num_items, [=](auto i) {
for (int j = 0; j <(const int) a[i].size(); ++j) {
// dif[i] += (a[i][j] - b[j]) * (a[i][j] - b[j]) ;
dif[i]+=a[i][j];
}
});
});
q.wait(); //We have added this line of code for synchronization.
for (int i : res) {
std::cout <<i<< std::endl;
}
return res;
}
int main(){
std::vector<std::vector<double>> dataset;
for(int i=0;i<5;i++)
{
std::vector<double> d;
for(int j=0;j<1600;j++)
{
d.push_back((double)j);
}
dataset.push_back(d);
}
std::vector<double> curr_test;
for(int i=0;i<1600;i++)
{
curr_test.push_back((double)i);
}
queue q;
std::cout << "Running on "<<
q.get_device().get_info<sycl::info::device::name>()<< std::endl;
//print the device name as a test to check the parallelisation
distance_calculation_FPGA(q,dataset,curr_test);
return 0;
}

C string capitalize problem. I encounter bus error

I am making capitalized function. But when execute this code, the bus error occur. Is there anyone can debug this? I really need your help!! please help me . I am computer programming novice without c knowledge.
#include <stdio.h>
void up(char *c)
{
if((*c < ‘z’) && (*c > ‘a’))
{
*c -= 32;
}
}
char *ft_strcapitalize(char *str)
{
int i;
i=0;
while (str[i])
{
up(&str[i]);
i++;
}
return str;
}
int main()
{
char *str = “salut, comment tu vas ? 42mots quarante-deux cinquante”;
ft_strcapitalize(str);
printf(“%s”, str);
}
This solution should work for you.
#include <stdio.h>
void up(char *c)
{
if (!c)
return;
if((*c <= 'z') && (*c >= 'a'))
*c -= 32;
}
char *ft_strcapitalize(char *str)
{
int i;
for (i = 0; str[i]; i++)
up(&str[i]);
return str;
}
int main()
{
char str[] = "salut, comment tu vas ? 42mots quarante-deux cinquante";
ft_strcapitalize(str);
printf("%s\n", str);
return 0;
}

Segmentation fault inside range

#include <iostream>
#include <vector>
#include <algorithm>
#include <queue> // std::priority_queue
using std::vector;
using std::cin;
using std::cout;
struct fj{
int indexI=0;
int freeT=0;
};
struct DereferenceCompareNode : public std::binary_function<fj, fj, bool>
{
bool operator()(const fj lhs, const fj rhs) const
{
return lhs.freeT > rhs.freeT;
}
};
class JobQueue {
private:
int num_workers_;
vector<int> jobs_;
vector<int> assigned_workers_;
vector<long long> start_times_;
void WriteResponse() const {
for (int i = 0; i < jobs_.size(); ++i) {
cout << assigned_workers_[i] << " " << start_times_[i] << "\n";
}
}
void ReadData() {
int m;
cin >> num_workers_ >> m;
jobs_.resize(m);
std::cout<<"Read fault"<<"\n";
for(int i = 0; i < m; i++)
cin >> jobs_[i];
std::cout<<"Read fault ends"<<"\n";
}
void AssignJobs() {
// TODO: replace this code with a faster algorithm.
std::cout<<"Fault point 1"<<"\n";
assigned_workers_.resize(jobs_.size());
start_times_.resize(jobs_.size());
vector<long long> next_free_time(num_workers_, 0);
std::priority_queue<int, vector<int>, std::greater<int> > thread;
std::priority_queue<fj, vector<fj>, DereferenceCompareNode > freeJob;
/*
for (int i = 0; i < jobs_.size(); ++i) {
int duration = jobs_[i];
int next_worker = 0;
for (int j = 0; j < num_workers_; ++j) {
if (next_free_time[j] < next_free_time[next_worker])
next_worker = j;
}
assigned_workers_[i] = next_worker;
start_times_[i] = next_free_time[next_worker];
next_free_time[next_worker] += duration;
}
*/
std::cout<<"dump point 2"<<"\n";
for(int i=0;i<num_workers_;i++){
thread.push(i);
}
std::cout<<"dump point 1"<<"\n";
int counter = 0;
while(jobs_.size()!=0){
std::cout<<"jobs_.size:"<<jobs_.size()<<"\n";
std::cout<<"freeJob.size:"<<freeJob.size()<<"\n";
//check logic
do{
if(freeJob.top().freeT == counter){
std::cout<<"freeJob.top().freeT:"<<freeJob.top().freeT<<"\n";
std::cout<<"counter:"<<counter<<"\n";
thread.push(freeJob.top().indexI);
freeJob.pop();
}else{
break;
}
}
while(freeJob.size()!=0);
std::cout<<"Thread:"<<thread.size()<<"\n";
while(thread.size()!=0){
if(jobs_.size()!=0){
fj currA;
currA.indexI = thread.top();
currA.freeT = jobs_.at(0)+counter;
std::cout<<"currA.indexI:"<<currA.indexI<<"\n";
std::cout<<"currA.freeT:"<<currA.freeT<<"\n";
thread.pop();
jobs_.erase(jobs_.begin());
assigned_workers_.push_back(currA.indexI);
start_times_.push_back(currA.freeT);
}else{
break;
}
}
counter++;
}
}
public:
void Solve() {
ReadData();
AssignJobs();
WriteResponse();
}
};
int main() {
std::ios_base::sync_with_stdio(false);
JobQueue job_queue;
job_queue.Solve();
return 0;
}
I am getting segmentation fault in function ReadData while taking inputs for vector jobs.
I am getting fault even when I am inside bounds of defined size.
Everything was fine when have not written AssignJob function.
Am I doing something wrong with some bounds or taking illegal inputs format or messing with some other stuff?
Am I doing something wrong
Yes, you are: freeJob starts out empty, so this is undefined behavior:
if(freeJob.top().freeT == counter){
In fact, you never push anything into freeJob, you only pop() things from it.

cudaErrorIllegalInstruction on recursive function call

I am writing a path tracer for GPU using CUDA 10.2. The entire program ran fine until i added a recursive call to the trace function. nvcc still compiles it, although with the warning: "Severity Code Description Project File Line Suppression State
Warning Stack size for entry function '' cannot be statically determined". When the GPU reaches the point it stops and the next time CPU gets an cudaError from an API call it is cuda error 715, which is cudaErrorIllegalInstruction. I tried recreating the issue by writing another recursive kernel/function pair, and the compiler gave the same warning, but it executed expectedly. Unfortunately this means i have to dump my entire function here (if there are any questions to the functions and types used i will happily answer them):
__device__ Vec3 trace(
const Settings& settings,
const Ray& r,
const Shape* shapes,
const size_t nshapes,
uint8_t bounces,
curandState& randState) {
if (bounces >= settings.maxBounces) {
return Vec3(0.0f);
}
const Shape* shape = nullptr;
float t = inf;
bool flipNormal;
float dist;
for (size_t i = 0; i < nshapes; i++) {
if (shapes[i].intersect(r, dist, flipNormal) && dist < t) {
shape = shapes + i;
t = dist;
}
}
if (shape == nullptr)
return settings.background;
const Vec3 hitPos = r.ori + t * r.dir;
const Vec3 normal = flipNormal ? -shape->normal(hitPos) : shape->normal(hitPos);
const Vec3 hemiDir = cosineSample(normal, randState);
const Vec3 traceCol = trace(
settings,
Ray(hitPos + normal * settings.bias, hemiDir),
shapes,
nshapes,
bounces + 1,
randState
);
return shape->surface.emittance + shape->surface.color * traceCol;
}
Has anyone else had this issue and in that case, how was it fixed? I could probably redesign to a non-recursive design, although it wouldn't be an optimal solution.
I don't even know where to start with debugging this issue, so any ideas are greatly appreciated.
The problem is that CUDA usually selects a fitting max stack size for a kernel call, but it is unable to because nvcc cannot predict the necessary size for a recursive functions.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdint.h>
__device__ int recurse(uint64_t n, uint64_t max) {
if (n < max)
return recurse(n + 1, max);
else
return n;
}
__global__ void start(uint64_t max) {
uint32_t idx = threadIdx.x + (blockIdx.x * blockDim.x);
if(idx == 256 * 256 - 1)
printf("%i: %i\n", idx, recurse(0, max));
return;
}
int main() {
cudaError_t status;
status = cudaSetDevice(0);
if (status != cudaSuccess) {
std::cerr << "failed: " << cudaGetErrorString(status) << std::endl;
return status;
}
cudaThreadSetLimit(cudaLimitStackSize, 2048);
start<<<256, 256>>>(126);
status = cudaDeviceSynchronize();
if (status != cudaSuccess) {
std::cerr << "failed: " << cudaGetErrorString(status) << std::endl;
return status;
}
return 0;
}
This program will run, but if 2048 is replaced with 1024, it will output the cudaErrorIllegalInstruction.

Resources