How to dynamically fill the structure which is a pointer to pointer of arrays in C++ implementing xfs

How to dynamically fill the structure which is a pointer to pointer of arrays in C++ implementing xfs - pointers

Structure 1:
typedef struct _wfs_cdm_cu_info
{
USHORT usTellerID;
USHORT usCount;
LPWFSCDMCASHUNIT * lppList;
} WFSCDMCUINFO, * LPWFSCDMCUINFO;
Structure 2:
typedef struct _wfs_cdm_cashunit
{
USHORT usNumber;
USHORT usType;
LPSTR lpszCashUnitName;
CHAR cUnitID[5];
CHAR cCurrencyID[3];
ULONG ulValues;
ULONG ulInitialCount;
ULONG ulCount;
ULONG ulRejectCount;
ULONG ulMinimum;
ULONG ulMaximum;
BOOL bAppLock;
USHORT usStatus;
USHORT usNumPhysicalCUs;
LPWFSCDMPHCU * lppPhysical;
} WFSCDMCASHUNIT, * LPWFSCDMCASHUNIT;
Structure 3:
typedef struct _wfs_cdm_physicalcu
{
LPSTR lpPhysicalPositionName;
CHAR cUnitID[5];
ULONG ulInitialCount;
ULONG ulCount;
ULONG ulRejectCount;
ULONG ulMaximum;
USHORT usPStatus;
BOOL bHardwareSensor;
} WFSCDMPHCU, * LPWFSCDMPHCU;
The code:
LPWFSCDMCUINFO lpWFSCDMCuinf = NULL;
LPWFSCDMCASHUNIT lpWFSCDMCashUnit = NULL;
LPWFSCDMPHCU lpWFSCDMPhcu = NULL;
int i=0;
try
{
hResult = WFMAllocateBuffer(sizeof(WFSCDMCUINFO),WFS_MEM_ZEROINIT|WFS_MEM_SHARE,(void**)&lpWFSCDMCuinf);
lpWFSCDMCuinf->usCount =7;
lpWFSCDMCuinf->usTellerID = 0;
hResult = WFMAllocateMore(7*sizeof(LPWFSCDMCASHUNIT),lpWFSCDMCuinf,(void**)&lpWFSCDMCuinf->lppList);
for(i=0;i<7;i++)
{
LPWFSCDMCASHUNIT lpWFSCDMCashUnit = NULL;
hResult = WFMAllocateMore(sizeof(WFSCDMCASHUNIT), lpWFSCDMCuinf, (void**)&lpWFSCDMCashUnit);
lpWFSCDMCuinf->lppList[i] = lpWFSCDMCashUnit;//store the pointer
//FILLING CASH UNIT
-----------------------------
lpWFSCDMCashUnit->ulValues =50;
-----------------------------
WFMAllocateMore(1* sizeof(LPWFSCDMPHCU), lpWFSCDMCuinf, (void**)&lpWFSCDMCashUnit->lppPhysical);// Allocate Physical Unit structure
for(int j=0;j<1;j++)
{
LPWFSCDMPHCU lpWFSCDMPhcu = NULL;
hResult = WFMAllocateMore(sizeof(WFSCDMPHCU), lpWFSCDMCuinf, (void**)&lpWFSCDMPhcu);
lpWFSCDMCashUnit->lppPhysical[j] = lpWFSCDMPhcu;
//FILLING Phy CASHUNIT
-------------------------------------------------------
lpWFSCDMPhcu->ulMaximum = 2000;
-----------------------------
}
}
//lpWFSCDMCuinf->lppList=&lpWFSCDMCashUnit;
hResult =WFSExecute (hService,WFS_CMD_CDM_END_EXCHANGE,(LPVOID)&lpWFSCDMCuinf,60000,&lppResult);
return (int)hResult;
I'm getting stuck while I retrieve all the values in structure 1.
I need to dynamically add the values into these structure and display Structure1 as output.An allocation of memory needs to be done for this.I have tried using the above code for allocating the memory but in spite of allocating the values are not properly stored in structure.
The value of usCount changes as per the denomination set. Based on this usNumPhysicalCUs is set.
Also when I send &lpWFSCDMCuinf within the WFSExecutemethod the lppPhysical seems to be empty.
I cant exactly figure out where I'm getting wrong.

First of all your must allocate memory for each block.
For pointers array you will allocate memory to store count of pointers, than for each pointer in allocated memory you must allocate memory for structure itself.
I rewrite your code in more short form. There is no error checking and this code is sample only.
LPWFSCDMCUINFO lpWFSCDMCuinf = NULL;
HRESULT hr = WFMAllocateBuffer(sizeof(WFSCDMCUINFO), WFS_MEM_ZEROINIT|WFS_MEM_SHARE, (void**)&lpWFSCDMCuinf);
// Allocate 7 times of WFSCDMCASHUNIT
const int cuCount = 7;
lpWFSCDMCuinf->usCount = cuCount;
hr = WFMAllocateMore(cuCount * sizeof(LPWFSCDMCASHUNIT), lpWFSCDMCuinf, (void**)&lpWFSCDMCuinf->lppList);
for (int i=0; i < cuCount; i++)
{
// for one entry
LPWFSCDMCASHUNIT currentCU = NULL;
hr = WFMAllocateMore(sizeof(WFSCDMCASHUNIT), lpWFSCDMCuinf, (void**)&currentCU);
// Store pinter
lpWFSCDMCuinf->lppList[i] = currentCU;
// Fill current CU data here
// ....
// Allocate Phisical Unit Pointers
const int phuCount = 1;
currentCU->usNumPhysicalCUs = phuCount;
WFMAllocateMore(phuCount * sizeof(LPWFSCDMPHCU), lpWFSCDMCuinf, (void**)&currentCU->lppPhysical);
// Allocate Phisical Unit structure
for (int j=0; j < phuCount; j++)
{
LPWFSCDMPHCU phuCurrent = NULL;
// Allocate Phisical Unit structure
WFMAllocateMore(sizeof(WFSCDMPHCU), lpWFSCDMCuinf, (void**)&phuCurrent);
currentCU->lppPhysical[j] = phuCurrent;
// Fill Phisical Unit here
// ..
// ..
}
}
In additional to this sample I recommend you to write some helper function to allocate XFS structures like WFSCDMCUINFO. In my own project I've used some macro to serialize XFS structure in memory with WFMAllocate and WFMAllocateMore functions.
XFS structures is so complex and different from class to class. I wrote some macros to serialize and deserialize structures in memory stream and XFS memory buffers. In application I use heap alloc to store XFS structures in memory, but when I need to return structures in another XFS message I need to transfer memory buffers to XFS memory with WFMAllocate and WFMAllocateMore.

Related

Instantiating a hash map in C

I am trying to instantiate a hash map in c,
Here's the code:
//hash Struct
typedef struct hashMap{
int** Map;
int val;
} hashMap;
typedef hashMap* mapPtr;
// Function to create a hashMap object pointer
void createHash(mapPtr aHash){
// Create a hash object
if(!aHash){
aHash = calloc(1, sizeof(hashMap));
int size = (int) pow(10,9);
// Allocate Memory for 2d array
aHash->Map = calloc(2, sizeof(int*));
// Allocate internal memory for the array -> 10^9 blocks
for(int i = 2; i < 2; i++){
aHash->Map[i] = calloc(size, sizeof(int));
}
for(int i = 0; i < size; i++){
aHash->Map[0][i] = -1;
aHash->Map[1][i] = -1;
}
// memset(aHash->Map[0], -1, size*sizeof(int));
}
}
but I keep getting a segmentation fault, indicating that I'm trying to access memory outside the buffer.
I know that either the memset of or the for loop method would work, but neither seem to be working.

How can I write the memory pointer in CUDA [duplicate]

This question already has an answer here:
Summing the rows of a matrix (stored in either row-major or column-major order) in CUDA
(1 answer)
Closed 5 years ago.
I declared two GPU memory pointers, and allocated the GPU memory, transfer data and launch the kernel in the main:
// declare GPU memory pointers
char * gpuIn;
char * gpuOut;
// allocate GPU memory
cudaMalloc(&gpuIn, ARRAY_BYTES);
cudaMalloc(&gpuOut, ARRAY_BYTES);
// transfer the array to the GPU
cudaMemcpy(gpuIn, currIn, ARRAY_BYTES, cudaMemcpyHostToDevice);
// launch the kernel
role<<<dim3(1),dim3(40,20)>>>(gpuOut, gpuIn);
// copy back the result array to the CPU
cudaMemcpy(currOut, gpuOut, ARRAY_BYTES, cudaMemcpyDeviceToHost);
cudaFree(gpuIn);
cudaFree(gpuOut);
And this is my code inside the kernel:
__global__ void role(char * gpuOut, char * gpuIn){
int idx = threadIdx.x;
int idy = threadIdx.y;
char live = '0';
char dead = '.';
char f = gpuIn[idx][idy];
if(f==live){
gpuOut[idx][idy]=dead;
}
else{
gpuOut[idx][idy]=live;
}
}
But here are some errors, I think here are some errors on the pointers. Any body can give a help?

The key concept is the storage order of multidimensional arrays in memory -- this is well described here. A useful abstraction is to define a simple class which encapsulates a pointer to a multidimensional array stored in linear memory and provides an operator which gives something like the usual a[i][j] style access. Your code could be modified something like this:
template<typename T>
struct array2d
{
T* p;
size_t lda;
__device__ __host__
array2d(T* _p, size_t _lda) : p(_p), lda(_lda) {};
__device__ __host__
T& operator()(size_t i, size_t j) {
return p[j + i * lda];
}
__device__ __host__
const T& operator()(size_t i, size_t j) const {
return p[j + i * lda];
}
};
__global__ void role(array2d<char> gpuOut, array2d<char> gpuIn){
int idx = threadIdx.x;
int idy = threadIdx.y;
char live = '0';
char dead = '.';
char f = gpuIn(idx,idy);
if(f==live){
gpuOut(idx,idy)=dead;
}
else{
gpuOut(idx,idy)=live;
}
}
int main()
{
const int rows = 5, cols = 6;
const size_t ARRAY_BYTES = sizeof(char) * size_t(rows * cols);
// declare GPU memory pointers
char * gpuIn;
char * gpuOut;
char currIn[rows][cols], currOut[rows][cols];
// allocate GPU memory
cudaMalloc(&gpuIn, ARRAY_BYTES);
cudaMalloc(&gpuOut, ARRAY_BYTES);
// transfer the array to the GPU
cudaMemcpy(gpuIn, currIn, ARRAY_BYTES, cudaMemcpyHostToDevice);
// launch the kernel
role<<<dim3(1),dim3(rows,cols)>>>(array2d<char>(gpuOut, cols), array2d<char>(gpuIn, cols));
// copy back the result array to the CPU
cudaMemcpy(currOut, gpuOut, ARRAY_BYTES, cudaMemcpyDeviceToHost);
cudaFree(gpuIn);
cudaFree(gpuOut);
return 0;
}
The important point here is that a two dimensional C or C++ array stored in linear memory can be addressed as col + row * number of cols. The class in the code above is just a convenient way of expressing this.

Arduino Dynamic Two-dimensional array

I'm working on an Arduino project where I need to build (and work with) a two-dimensional array at runtime. I've been poking around looking for a solution, but I've had no luck. I found an example of a dynamic one-dimentional array helper here: http://playground.arduino.cc/Code/DynamicArrayHelper, so i've been trying to adopt that code for my use. I created a library using the following code:
My Header file:
#ifndef Dynamic2DArray_h
#define Dynamic2DArray_h
#include "Arduino.h"
class Dynamic2DArray
{
public:
Dynamic2DArray( bool sorted );
//Add an integer pair to the array
bool add( int v1, int v2);
//Clear out (empty) the array
bool clear();
//Get the array item in the specified row, column
int getValue(int row, int col);
//Get the number of rows in the array
int length();
private:
int _rows;
void * _slots;
bool _sorted;
void _sort();
};
#endif
The library's code:
#include "Arduino.h"
#include "Dynamic2DArray.h"
#define ARRAY_COLUMNS 2
int _rows;
void * _slots;
bool _sorted;
Dynamic2DArray::Dynamic2DArray(bool sorted) {
//Set our local value indicating where we're supposed to
//sort or not
_sorted = sorted;
//Initialize the row count so it starts at zero
_rows = 0;
}
bool Dynamic2DArray::add( int v1, int v2) {
//Add the values to the array
//implementation adapted from http://playground.arduino.cc/Code/DynamicArrayHelper
//Allocate memory based on the size of the current array rows plus one (the new row)
int elementSize = sizeof(int) * ARRAY_COLUMNS;
//calculate how much memory the current array is using
int currentBufferSize = elementSize * _rows;
//calculate how much memory the new array will use
int newBufferSize = elementSize * (_rows + 1);
//allocate memory for the new array (which should be bigger than the old one)
void * newArray = malloc ( newBufferSize );
//Does newArray not point to something (a memory address)?
if (newArray == 0) {
//Then malloc failed, so return false
return false;
}
// copy the data from the old array, to the new array
for (int idx = 0; idx < currentBufferSize ; idx++)
{
((byte*)newArray)[idx] = ((byte *)_slots)[idx];
}
// free the original array
if (_slots != NULL)
{
free(_slots);
}
// clear the newly allocated memory space (the new row)
for (int idx = currentBufferSize; idx < newBufferSize; idx++)
{
((byte *)newArray)[idx] = 0;
}
// Store the number of rows the memory is allocated for
_rows = ++_rows;
// set the array to the newly created array
_slots = newArray;
//Free up the memory used by the new array
free(newArray);
//If the array's supposed to be sorted,
//then sort it
if (_sorted) {
_sort();
}
// success
return true;
};
int Dynamic2DArray::length() {
return _rows;
};
bool Dynamic2DArray::clear() {
//Free up the memory allocated to the _slots array
free(_slots);
//And zero out the row count
_rows = 0;
};
int Dynamic2DArray::getValue(int row, int col) {
//do we have a valid row/col?
if ((row < _rows) && (col < ARRAY_COLUMNS)) {
//Return the array value at that row/col
return _slots[row][col];
} else {
//No? Then there's nothing we can do here
return -1;
}
};
//Sorted probably doesn't matter, I can probably ignore this one
void _sort() {
}
The initial assignment of the _slots value is giving me problems, I don't know how to define it so this code builds. The _slots variable is supposed to point to the dynamic array, but I've got it wrong.
When I try to compile the code into my project's code, I get the following:
Arduino: 1.8.0 (Windows 10), Board: "Pro Trinket 3V/12MHz (USB)"
sketch\Dynamic2DArray.cpp: In member function 'int Dynamic2DArray::getValue(int, int)':
sketch\Dynamic2DArray.cpp:83:22: warning: pointer of type 'void *' used in arithmetic [-Wpointer-arith]
return _slots[row][col];
^
Dynamic2DArray.cpp:83: error: 'void*' is not a pointer-to-object type
Can someone please help me fix this code? I've posted the files to https://github.com/johnwargo/Arduino-Dynamic-2D-Array-Lib.

The code you took was for a 1D dynamic array; the modifications for a 2D array are too tricky. Give up these horrors.
I think there is no reason you use dynamic array. You can assume that size max is ROW_MAX * COL_MAX, so you can define a static array int array[ROW_MAX][COL_MAX].
on one hand if you defined a dynamic array, you could free space when you dont use it anymore and take advantage of it for other work. I dont know if this is your case.
on the other hand if you define a static array (on UNO), you have 32kB available on program space, instead of 2kB available on RAM.
Because of the difference 32kB / 2kB, there are very few chances you can get bigger array with dynamic allocation.

Multiple read-write synchronization issues in opencl local and global memories

I have an opencl kernel that finds the maximum ASCII character in a string.
The problem is I cannot synchronize the multiple read-writes to global and local memories.
I am trying to update a local_maximum character in shared memory, and at the end of the workgroup (last thread), the global_maximum character, by comparing it with the local_maximum. The threads are writing one over another, I guess.
eg: Input string: "pirates of the carribean".
Output String: 'r' (but it should be 's').
Please have a look at the code and give a solution as to what I can do to get everything synchronized. I am sure people having sound knowledge can understand the code. Optimization tips are welcome.
The code is below:
__kernel void find_highest_ascii( __global const char* data, __global char* result, unsigned int size, __local char* localMaxC )
{
//creating variables and initialising..
unsigned int i, localSize, globalSize, j;
char privateMaxC,temp,temp1;
i = get_global_id(0);
localSize = get_local_size(0);
globalSize = get_global_size(0);
privateMaxC = '\0';
if(i<size){
if(i == 0)
read_mem_fence( CLK_LOCAL_MEM_FENCE );
*localMaxC = '\0';
mem_fence( CLK_LOCAL_MEM_FENCE);
////////////////////////////////////////////////////
/////UPDATING PRIVATE MAX CHARACTER/////////////////
////////////////////////////////////////////////////
for( j = i; j<size; j+=globalSize )
{
if( data[j] > privateMaxC )
{
privateMaxC = data[j];
}
}
///////////////////////////////////////////////////
///////////////////////////////////////////////////
////UPDATING SHARED MAX CHARACTER//////////////////
///////////////////////////////////////////////////
temp = *localMaxC;
read_mem_fence( CLK_LOCAL_MEM_FENCE );
if(privateMaxC>temp)
{
*localMaxC = privateMaxC;
write_mem_fence( CLK_LOCAL_MEM_FENCE );
temp = privateMaxC;
}
//////////////////////////////////////////////////
//UPDATING GLOBAL MAX CHARACTER.
temp1 = *result;
if(( (i+1)%localSize == 0 || i==size-1) && (temp > temp1 ))
{
read_mem_fence( CLK_GLOBAL_MEM_FENCE );
*result = temp;
write_mem_fence( CLK_GLOBAL_MEM_FENCE );
}
}
}

You are correct that threads will be overwriting each other's values, since your code is riddled with race conditions. In OpenCL, there is no way to synchronise between work-items that are in different work-groups. Instead of trying to achieve this kind of synchronisation with explicit fences, you can make your code much simpler by using the built-in atomic functions instead. In particular, there is an atomic_max built-in which solves your problem perfectly.
So, instead of the code you currently have to update both your local and global memory maximum values, just do something like this:
kernel void ascii_max(global int *input, global int *output, int size,
local int *localMax)
{
int i = get_global_id(0);
int l = get_local_id(0);
// Private reduction
int privateMax = '\0';
for (int idx = i; idx < size; idx+=get_global_size(0))
{
privateMax = max(privateMax, input[idx]);
}
// Local reduction
atomic_max(localMax, privateMax);
barrier(CLK_LOCAL_MEM_FENCE);
// Global reduction
if (l == 0)
{
atomic_max(output, *localMax);
}
}
This will require you to update your local memory scratch space and final result to use 32-bit integer values, but on the whole is a significantly cleaner approach to solving this problem (not to mention it actually works).
NON-ATOMIC SOLUTION
If you really don't want to use atomics, then you can implement a bog-standard reduction using local memory and work-group barriers. Here's an example:
kernel void ascii_max(global int *input, global int *output, int size,
local int *localMax)
{
int i = get_global_id(0);
int l = get_local_id(0);
// Private reduction
int privateMax = '\0';
for (int idx = i; idx < size; idx+=get_global_size(0))
{
privateMax = max(privateMax, input[idx]);
}
// Local reduction
localMax[l] = privateMax;
for (int offset = get_local_size(0)/2; offset > 1; offset>>=1)
{
barrier(CLK_LOCAL_MEM_FENCE);
if (l < offset)
{
localMax[l] = max(localMax[l], localMax[l+offset]);
}
}
// Store work-group result in global memory
if (l == 0)
{
output[get_group_id(0)] = max(localMax[0], localMax[1]);
}
}
This compares pairs of elements at a time using local memory as a scratch space. Each work-group will produce a single result, which is stored in global memory. If your data-set is small, you could run this with a single work-group (i.e. make global and local sizes the same), and this will work just fine. If it is larger, you could run a two-stage reduction by running this kernel twice, e.g.:
size_t N = ...; // something big
size_t local = 128;
size_t global = local*local; // Must result in at most 'local' number of work-groups
// First pass - run many work-groups using temporary buffer as output
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_temp);
clEnqueueNDRangeKernel(..., &global, &local, ...);
// Second pass - run one work-group with temporary buffer as input
global = local;
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_temp);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_output);
clEnqueueNDRangeKernel(..., &global, &local, ...);
I'll leave it to you to run them and decide which approach would be best for your own data-set.

How do I read ext2 root directory from mapped memory?

I'm making a Remote Filesystem Server for my university and I'm having some trouble with reading the root directory... Here's the thing:
I've read the root inode (inode 2) and it has consistent data, I mean that, for example, owner user Id field is set at '1000'. Then I proceed to read the contents of the inode data blocks, but when I try to access to the data block in question (the only one that is addressed in the inode i_block array, 240 on my debugging) all bytes are set to '0'. Can anyone help me with this? It's really important. Note: I cannot make it another way than with mapped memory and I'm not opening a real disk, but rather opening a .disk linux file. It has been created with the command-line
mkfs.ext2 -F -r 0 -b 1024 ext2.disk 30000
Here's my code:
#include <linux/ext2_fs.h>
typedef struct s_inode *pinode; /* Pointer to inode struct */
typedef struct s_direct *pdir; /* Pointer to direct struct */
int main(int argv, char *argc[]){
int *data;
pdir root = malloc(sizeof(struct s_direct));
/* Code for mpping .disk file, fetching supernode, and other ext2 data */
/* fsys is a global variable that holds general ext2 system data */
fsys->root = get_inode(2);
data = get_cont(fsys->root);
root = (pdir)getblock(data[0]);
}
pinode get_inode(int idx){
pinode inod;
int grp, offs;
grp = (idx-1)/fsys->superblock->s_inodes_per_group;
offs = (idx-1)%fsys->superblock->s_inodes_per_group;
inod = (pinode)&fsys->diskmap[(fsys->group[grp]->itab)+offs*sizeof(struct s_inode)];
return inod;
}
int *get_cont(pinode inod){
int *cont;
int *idx;
int i=0;
int *block;
idx = malloc(sizeof(int));
cont = malloc(sizeof(int));
while(i < inod->i_blocks && i<13) {
realloc(cont, i*sizeof(int));
cont[i]=inod->i_block[i];
i++;
}
if(i < inod->i_blocks){
*idx=13;
block=(int*)getblock(inod->i_block[*idx]);
fetchcont(block, idx, cont, inod->i_blocks, 0);
}
if(i < inod->i_blocks){
block=(int*)getblock(inod->i_block[*idx]);
fetchcont(block, idx, cont, inod->i_blocks, 1);
}
if(i < inod->i_blocks){
block=(int*)getblock(inod->i_block[*idx]);
fetchcont(block, idx, cont, inod->i_blocks, 2);
}
return cont;
}
int fetchcont(int *block, int *idx, int *cont, int lim, int lvl){
int i=0;
if(lvl == 0){
while((*idx) < lim && i<fsys->bsize){
realloc(cont, (*idx)*sizeof(int));
cont[*idx]=block[i];
(*idx)++;
i++;
}
if(i>=fsys->bsize){
return 1;
}else{
return 0;
}
}else{
lvl--;
while(i<fsys->bsize){
if(!fetchcont((int*)getblock(block[i]), idx, cont, lim, lvl)){
return 0;
}
i++;
}
}
}
void *getblock(int idx){
char *block;
int grp, offs;
grp = (idx-1)/fsys->superblock->s_blocks_per_group;
offs = (idx-1)%fsys->superblock->s_blocks_per_group;
block = &fsys->diskmap[fsys->group[grp]->blocks+offs*fsys->bsize];
return block;
}

Solved the problem. I assumed that block n was the n data block, but the offset included ALL the blocks. I've changed my getblock function to
void *getblock(int idx){
return &fsys->diskmap[fsys->bsize*idx];
}
and worked!