Illegal memory accessed while using double pointer in CUDA

Illegal memory accessed while using double pointer in CUDA - pointers

In the code given below, d_slot is a double pointer initialized to NULL in the main.
It's value is changed in the kernel 'test'.
The code which i'm going to implement requires the value of d_slot to be carried and not be reverted back to NULL as it happens after 'test' is completed.
(This is perhaps because the double pointer is passed by value and not by reference)
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include <helper_cuda.h>
#include <unistd.h>
#include <stdlib.h>
struct radix_tree_root {
unsigned int height;
struct radix_tree_node *rnode;
};
struct radix_tree_node {
unsigned int count;
void *slots[64];
};
__global__ void test1(struct radix_tree_node **d_slot,struct radix_tree_root *d_root)
{
(d_slot) = &d_root->rnode;
printf("From test1: d_slot = %p\t*d_slot = %p\n",d_slot,*d_slot);
}
__global__ void test2(struct radix_tree_node **d_slot)
{
printf("From test2: d_slot = %p\n",d_slot);
}
__global__ void test3(struct radix_tree_node ***d_slot,struct radix_tree_root *d_root)
{
(*d_slot) = &d_root->rnode;
}
int
main(void)
{
struct radix_tree_root *root,*d_root;
struct radix_tree_node **d_slot=NULL;
cudaError_t err = cudaSuccess;
root = (struct radix_tree_root *) malloc(sizeof(struct radix_tree_root));
root->height = 0;
root->rnode =NULL;
//allocate memory to d_root in the GPU//
err = cudaMalloc((void **)&d_root, sizeof(struct radix_tree_root));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device d_root (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
//copy root to d_root
err = cudaMemcpy(d_root, root, (sizeof(struct radix_tree_root)), cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy root from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("\nFrom the main: d_root = %p\n",d_root);
test1<<<1,1>>>(d_slot,d_root);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
test2<<<1,1>>>(d_slot);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
//test3<<<1,1>>>(&d_slot,d_root);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
//test2<<<1,1>>>(d_slot);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
err = cudaFree(d_root);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to free device d_root (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
free(root);
printf("successful execution of entire program\n");
return 0;
}
The output of this code is:
From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008 *d_slot = (nil)
From test2: d_slot = (nil)
successful execution of entire program
This was all fine. But when i uncommented the 'test3' and the 'test2' kernels given in the above code,
I expected the value of d_slot to be carried forward...
However, there was an error encountered...
The output of the code with 'test3' and 'test2' uncommented is:
From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008 *d_slot = (nil)
From test2: d_slot = (nil)
Failed to free device d_root (error code an illegal memory access was encountered)!
So my question is,
"How do I successfully assign value to d_slot (a double pointer)
in the kernel without losing it's value after the the completion of kernel-execution?"

There needs to be some location in graphics memory that test1 can write to and test2 and test3 can read from. You could use cudaMalloc a second time to allocate space for a struct radix_tree_node * like so:
cudaMalloc((void **)&d_slot, sizeof(struct radix_tree_root *));
Then test1 can write a pointer value to *d_slot and test2 and test3 can read the value that test1 wrote from *d_slot.

Related

OpenCL: Basic example not working. clSetKernelArg -38 Error

I am attempting a very simple OpenCL example. I have developed the following code below. It compiles a simple kernel, and then I create a simple float* buffer and set it to a cl::Buffer. However, when I attempt to call the kernel.setArg() function, it crashes, with an error -38. This error states that my cl::Buffer is invalid. I have no idea why this is happening:
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include <CL/cl2.hpp>
#define MULTI_LINE_STRING(ARG) #ARG
namespace op
{
const char *resizeAndMergeKernel = MULTI_LINE_STRING(
__kernel void testKernel(__global float* image)
{
}
);
}
void testCL(){
cl::Device device;
cl::Context context;
cl::CommandQueue queue;
int deviceId = 0;
// Load Device
std::vector<cl::Platform> platforms;
std::vector<cl::Device> devices;
std::string deviceName;
cl_uint i, type;
cl::Platform::get(&platforms);
type = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
if( type == CL_SUCCESS)
{
// Get only relavent device
cl::Context allContext(devices);
std::vector<cl::Device> gpuDevices;
gpuDevices = allContext.getInfo<CL_CONTEXT_DEVICES>();
bool deviceFound = false;
for(int i=0; i<gpuDevices.size(); i++){
if(i == deviceId){
device = gpuDevices[i];
context = cl::Context(device);
queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
deviceFound = true;
cout << "Made new GPU Instance: " << deviceId << endl;
break;
}
}
if(!deviceFound)
{
throw std::runtime_error("Error: Invalid GPU ID");
}
}
// Create Kernel
cl::Program program = cl::Program(context, op::resizeAndMergeKernel, true);
cl::Kernel kernel = cl::Kernel(program, "testKernel");
// Simple Buffer
cl_int err;
float* test = new float[3*224*224];
cl::Buffer x = cl::Buffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * 3 * 224 * 224, (void*)test, &err);
cout << err << endl;
kernel.setArg(0,x); // CRASHES WITH cl::Error -38
}
As you can see the last line kernel.setArg(0,x) crashes with error -38.

It's not a "crash", it's an error code. OpenCL error -38 is CL_INVALID_MEM_OBJECT. It means the cl_mem_obj is not valid. It is because you are passing a cl::Buffer object to setArg, but you need to instead pass the cl_mem handle which represents that buffer. The cl::Buffer operator() method returns that. So use kernel.setArg(0,x()). Note the () are the added part (yes, it's subtle).

dlopen/dlsym: error getting function pointer

I am attempting to get the function pointer by using dlopen and dlsym, however I have been unable to get it working correctly. It fails when trying to doing the dlsym call. Following is my code.
Any help please?
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
int test() {
printf("%s", "test()");
return 123;
}
int main() {
char * functionname = "test";
void* handle = dlopen(NULL,RTLD_LAZY|RTLD_GLOBAL);
if (!handle) {
fprintf(stderr, "Couldn't open handle: %s\n",
dlerror());
exit(1);
}
int (*fun)() = (int (*)())dlsym(handle, functionname);
if (fun == NULL) {
fprintf(stderr, "Couldn't find function: %s\n",functionname);
exit(1);
}
int a = fun();
printf("result: %d \n", a);
}

Probably you need to specify to the linker to export the symbols as dynamic. With gcc you have to use -rdynamic.
You can check the exported dynamic symbols with objdump -T.

Creating multiple child processes with a single pipe

I need to create three child processes, each of which reads a string from the command line arguments and writes the string to a single pipe. The parent would then read the strings from the pipe and display all three of them on the screen. I tried doing it for two processes to test and it is printing one of the strings twice as opposed to both of them.
#include <stdio.h>
#include <unistd.h>
int main (int argc, char *argv[]) {
char *character1 = argv[1];
char *character2 = argv[2];
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
pid_t pid1; // defining pid1 of type pid_t
pid_t pid2; // defining pid2 of type pid_t
if (pipe(p) == -1) {
fprintf(stderr, "Pipe Failed"); // pipe fail
}
pid1 = fork(); // fork
if (pid1 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
else if (pid1 == 0){ // if child process 1
close(p[0]); // close the read end
write(p[1], character1, sizeof(&inbuf[0])); // write character 1 to the pipe
}
else { // if parent, create a second child process, child process 2
pid2 = fork();
if (pid2 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
if (pid2 = 0) { // if child process 2
close(p[0]); // close the read end
write(p[1], character2, sizeof(&inbuf[0])); // write character 2 to the pipe
}
else { // if parent process
close(p[1]); // close the write end
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
}
}
}

Your code doesn't keep looping until there's no more data to read. It does a single read. It also doesn't check the value returned by read(), but it should.
I've abstracted the fork() and write() (and error check) code into a function. This seems to work:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static void child(int fd, const char *string)
{
pid_t pid = fork();
int len = strlen(string);
if (pid < 0)
{
fprintf(stderr, "%.5d: failed to fork (%d: %s)\n",
(int)getpid(), errno, strerror(errno));
exit(1);
}
else if (pid > 0)
return;
else if (write(fd, string, len) != len)
{
fprintf(stderr, "%.5d: failed to write on pipe %d (%d: %s)\n",
(int)getpid(), fd, errno, strerror(errno));
exit(1);
}
else
exit(0);
}
int main (int argc, char *argv[])
{
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
if (argc != 4)
{
fprintf(stderr, "Usage: %s str1 str2 str3\n", argv[0]);
return 1;
}
if (pipe(p) == -1)
{
fprintf(stderr, "Pipe Failed"); // pipe fail
return 1;
}
for (int i = 0; i < 3; i++)
child(p[1], argv[i+1]);
int nbytes;
close(p[1]); // close the write end
while ((nbytes = read(p[0], inbuf, sizeof(inbuf))) > 0)
printf("%.*s\n", nbytes, inbuf); // print
return 0;
}
I ran the command multiple times, each time using the command line:
./p3 'message 1' 'the second message' 'a third message for the third process'
On one run, the output was:
the second messagemessage 1
a third message for the third process
On another, I got:
the second messagemessage 1a third message for the third process
And on another, I got:
message 1
the second messagea third message for the third process
(This is on a MacBook Pro with Intel Core i7, running Mac OS X 10.8.3, and using GCC 4.7.1.)

Error writing and reading a structure from PIPE

I have a client server program where client writes a command on PIPE for server. While reading the command from Server it reads only first char of command and throws error. Can anyone help me with this?
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <string.h>
#include <sys/wait.h>
#include <mqueue.h>
#include <sys/stat.h>
#include "Functions.h"
#define MSGBUFFER_SIZE 50000
pid_t serverPid;
pid_t clientPid;
typedef struct msgbuf {
int messageLength;
int messageType;
char messageText[MSGBUFFER_SIZE];
} Message_buf;
int writePIPE(int fd, Message_buf *inputMessage){
printf("\n In write pipe message length :%d",inputMessage->messageLength);
printf("\n In write pipe message Data :%s",inputMessage->messageText);
ssize_t n=write(fd,inputMessage,inputMessage->messageLength);
printf("\n Size :%d", n);
return n;
}
ssize_t readPIPE(int fd, Message_buf *outputMessage)
{
ssize_t len;
ssize_t n;
if((n=read(fd,outputMessage,sizeof(outputMessage)))==0)
{
printf("\n Error");
return 0;
}
if((len=outputMessage->messageLength)>0)
{
printf("\n Length ---->:%d",len);
if((n=read(fd,outputMessage->messageText,strlen(outputMessage->messageText)))!=len)
printf("\n ERRRRROR expected %d got %d",len,n);
}
//printf("\n In Read PIPE: %s",outputMessage->messageText);
return len;
}
void Server(int readfd,int writefd)
{
Message_buf server_MessageBuf;
ssize_t length;
if((length=readPIPE(readfd,&server_MessageBuf))==0)
{
printf("\n End of file while reading pathname");
}
//server_MessageBuf.messageText[length]='\0';
printf("\n LENGTH :%d",server_MessageBuf.messageLength);
printf("\n Printing in server: %s\n",server_MessageBuf.messageText);
}
void Client(int readfd,int writefd)
{
char inputFileName[MAX_SIZE];
char inputOperation[MAX_SIZE];
char *cCommandInput = NULL;
char *fileOperation = NULL;
char *operation = (char *) malloc(MAX_SIZE);
int commandValidateStatus = 0;
int commandInterpretationStatus=0;
Message_buf client_MessageBuf;
for(;;)
{
while(1)
{
cCommandInput = acceptInput();
fileOperation = (char *) malloc(sizeof(cCommandInput));
strcpy(fileOperation,cCommandInput);
/**Function call to determine operation read/delete/exit/invalid choice and filename*****/
commandInterpretationStatus = interpretCommand(cCommandInput,
inputOperation, inputFileName);
operation = inputOperation;
/**Function call to validate the input command******/
commandValidateStatus = validateCommand(
commandInterpretationStatus, inputOperation, inputFileName);
if(commandValidateStatus==-1)
{
printf("\n Invalid Operation");
}
/*Exit command entered***/
if (commandValidateStatus == 1)
{
/*Code to clear resources */
kill(serverPid,SIGKILL);
kill(clientPid,SIGKILL);
exit(0);
}
/***Read or Delete****/
if (commandValidateStatus == 2 || commandValidateStatus == 3)
{
printf("\n Read or Delete\n");
strcpy(client_MessageBuf.messageText,fileOperation);
client_MessageBuf.messageLength=strlen(fileOperation);
client_MessageBuf.messageType=1;
if((writePIPE(writefd,&client_MessageBuf))<0)
{
printf("\n Error writing on client side ");
}
//read(readfd,*client_MessageBuf,sizeof(client_MessageBuf));
//printf("\n Reding server responsed");
//printf("%s",client_MessageBuf.messageText);
}
}
}
}
int main()
{
int pipe1[2],pipe2[2];
pipe(pipe1);
pipe(pipe2);
pid_t pid;
pid=fork();
serverPid=pid;
if(pid==0)
{
/*Call Server*/
close(pipe1[1]);
close(pipe2[0]);
Server(pipe1[0], pipe2[1]);
}
else
{
close(pipe1[0]);
close(pipe2[1]);
Client(pipe2[0],pipe1[1]);
}
return 0;
}

It looks like the code writes and reads struct msgbuf incorrectly:
typedef struct msgbuf {
int messageLength;
int messageType;
char messageText[MSGBUFFER_SIZE];
} Message_buf;
// ...
strcpy(client_MessageBuf.messageText,fileOperation);
client_MessageBuf.messageLength=strlen(fileOperation);
client_MessageBuf.messageType=1;
if((writePIPE(writefd,&client_MessageBuf))<0)
// ....
int writePIPE(int fd, Message_buf *inputMessage){
printf("\n In write pipe message length :%d",inputMessage->messageLength);
printf("\n In write pipe message Data :%s",inputMessage->messageText);
ssize_t n=write(fd,inputMessage,inputMessage->messageLength);
printf("\n Size :%d", n);
return n;
}
The above pieces that write struct msgbuf only write the first messageLength bytes of the structure which doesn't include the length of messageLength and messageType members, i.e. it truncates the object.
When reading:
ssize_t readPIPE(int fd, Message_buf *outputMessage)
{
// ...
if((n=read(fd,outputMessage,sizeof(outputMessage)))==0)
It reads only sizeof(outputMessage) bytes, which is the size of the pointer, not the object. Even if you fix it by changing it to sizeof(*outputMessage) that won't do enough, since that would expect to read the complete object whereas the writing piece truncates the object.
A good start to fix it would be to split the message into two parts: header and payload. The header is a structure of a fixed size, e.g.:
typedef struct {
int messageType;
int payloadLength;
} MessageHeader;
The payload is a variable-length part following the header. This way it would first write the entire header object into the pipe followed by payloadLength bytes of payload. When reading it would first read again the entire header and then read exactly payloadLength bytes.
Also note, that read() and write() calls may read or write less then asked, so that case needs to be explicitly handled by keeping reading or writing until the exact number of bytes has been processed.

GMainContext have ref_count > 0 after unref

I am not getting ref_count to decrease properly for my GMainContext. The example program here is a small version of a large program (which uses threads, hence the need to create a context and push it on the thread).
GMainLoop *loop;
GMainContext *ctx;
struct conn
{
GSocketClient *client;
GSocketConnection *conn;
GInputStream *in;
GOutputStream *out;
gchar data[8192];
unsigned int count;
};
static void
read_done_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
{
struct conn *c = (struct conn *)user_data;
gssize len = g_input_stream_read_finish(c->in, res, NULL);
g_input_stream_read_async(c->in, c->data, sizeof c->data / sizeof *c->data, G_PRIORITY_DEFAULT, NULL, read_done_cb, c);
if (c->count++ == 1) {
printf("End of life as I know it...\n");
g_main_loop_quit(loop);
}
}
static void
write_done_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
{
}
static void
connect_done_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
{
printf("## %s\n", __FUNCTION__);
struct conn *c = (struct conn *)user_data;
c->conn = g_socket_client_connect_to_host_finish(c->client, res, NULL);
c->in = g_io_stream_get_input_stream(G_IO_STREAM(c->conn));
c->out = g_io_stream_get_output_stream(G_IO_STREAM(c->conn));
char *data = "GET /axis-cgi/mjpg/video.cgi HTTP/1.0\r\n\r\n";
g_output_stream_write_async(c->out, data, strlen(data), G_PRIORITY_DEFAULT, NULL, write_done_cb, c);
g_input_stream_read_async(c->in, c->data, sizeof c->data / sizeof *c->data, G_PRIORITY_DEFAULT, NULL, read_done_cb, c);
}
int
main(int argc, char **argv)
{
g_type_init();
struct conn *c = g_malloc0(sizeof *c);
ctx = g_main_context_new();
loop = g_main_loop_new(ctx, FALSE);
g_main_context_push_thread_default(ctx);
c->client = g_socket_client_new();
g_socket_client_connect_to_host_async(c->client, "10.85.25.20", 80, NULL, connect_done_cb, c);
g_main_loop_run(loop);
g_io_stream_close(G_IO_STREAM(c->conn), NULL, NULL);
g_object_unref(c->client);
g_object_unref(c->conn);
g_main_context_pop_thread_default(ctx);
g_main_loop_unref(loop);
g_main_context_unref(ctx);
return 0;
}
Using gdb, inserting breakpoint just before return I can see that ctx still have one ref count:
(gdb) p ctx->ref_count
$2 = 1
If I do another g_main_context_unref(ctx); everything shuts down as expected. I do not understand where I get this ownership though.
Thanks in advance for your help

I found the error. I read_done_cb I issued another g_input_stream_read_async and immediately after quitting the main loop. g_input_stream_read_async upped the ref_count but GMainLoop never got a chance to return to my callback (and decreasing the ref_count on my GMainContext).
Moving the call to g_input_stream_read_async in my callback to below the if statement
static void
read_done_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
{
struct conn *c = (struct conn *)user_data;
gssize len = g_input_stream_read_finish(c->in, res, NULL);
if (c->count++ == 1) {
printf("End of life as I know it...\n");
g_main_loop_quit(loop);
}
g_input_stream_read_async(c->in, c->data, sizeof c->data / sizeof *c->data, G_PRIORITY_DEFAULT, NULL, read_done_cb, c);
}
correctly resolved the number of ref counts on my main context.
Silly mistake. Hopefully someone will find some use of my post at least.

g_main_context_new(), g_main_loop_new(), and g_main_context_push_thread_default() all ref the context. g_main_context_pop_thread_default(), g_main_loop_unref(), and g_main_context_unref() all unref it. So your intuition is sound.
I would use a watchpoint in gdb: watch ctx->ref_count to find out where the extra reference is being added.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Illegal memory accessed while using double pointer in CUDA - pointers

Related

OpenCL: Basic example not working. clSetKernelArg -38 Error

dlopen/dlsym: error getting function pointer

Creating multiple child processes with a single pipe

Error writing and reading a structure from PIPE

GMainContext have ref_count > 0 after unref

Categories

Resources