CUDA - int from device get not updated while copying it to host - pointers

i'm pretty new in CUDA (and in C also..), I'm trying to use the int shared as a flag to stop all threads devices when finish is set, but when I copy it back to host it never gets updated, I can do it with char * but it doesn't work while using a simple int
minimal code example:
__global__ void bingo(int * finish){
__shared__ int shared;
if(threadIdx.x == 5){
printf("\nassign to finish %d",threadIdx.x);
shared = threadIdx.x;
finish = (int*) threadIdx.x;
printf("GPU says: %d\n",*finish);
return;
}
__syncthreads();
if(shared != NULL){
printf("\nreturn from thread: %d", threadIdx.x);
return;
}
}
int main() {
int* threadBingo;
cudaMalloc((void**)&threadBingo, sizeof( int));
bingo<<<1,10>>>(threadBingo );
cudaDeviceSynchronize();
int* threadWhoMadeBingo = (int *) malloc(sizeof(int));
cudaMemcpy(threadWhoMadeBingo, threadBingo, sizeof(int), cudaMemcpyDeviceToHost);
printf("\n thread who made bingo %d\n", *threadWhoMadeBingo);
cudaDeviceReset();
cudaDeviceSynchronize();
return 0;
}
And the output:
assign to finish 5
GPU says: 5
return from thread: 0
return from thread: 1
return from thread: 2
return from thread: 3
return from thread: 4
return from thread: 6
return from thread: 7
return from thread: 8
return from thread: 9
thread who made bingo 0
As you can see, the last line should be 5 not 0

Ok, i found it:
line finish = (int*) threadIdx.x; should be -> *finish = threadIdx.x;.
I'll acept this answer in two days.

Related

undirected Graph adjacency list implementation using C

Code :
#include <stdio.h>
#include <stdlib.h>
#define N 5
typedef struct list
{
int data;
struct list * next;//self referenced structure
}slist;
void displayList(slist * start)
{
slist * temp;
if(start==NULL)
{
printf("Empty Linked List");
return;
}
for(temp=start;temp!=NULL;temp=temp->next)
printf("%d ",temp->data);
return;
}
void insertLast(slist * * start,slist * node)
{
slist * temp;
if(start==NULL){
(* start)=node;
return;
}
temp=(* start);
while((temp->next)!= NULL)
temp=temp->next;
temp->next=node;
return;
}
int main()
{
int i,j;
//slist * node;
char Ans;
/*printf("Write the number of vertices\n");
scanf("%d",&N);*/
slist * start[N];
for(i=0;i<N;i++)
start[i]=NULL;
for(i=0;i<N;i++)
{
for(j=i+1;j<N;j++)
{
printf("Is there a connection between V[%d] and V[%d]\n",(i+1),(j+1));
scanf(" %c",&Ans);
if(Ans=='y'||Ans=='Y')
{
slist * node1=(slist *)malloc(sizeof(slist));
node1->data=(j+1); node1->next=NULL;
insertLast(&start[i],node1);enter code here
slist * node2=(slist *)malloc(sizeof(slist));
node2->data=(i+1); node2->next=NULL;
insertLast(&start[j],node2);
}
}
}
for(i=0;i<N;i++)
{
displayList(start[i]);
printf("\n");
}
return 0;
}
The above code is showing segmentation fault at the line where while((temp->next)!=NULL) is written whereas while creation of linked lists, the same insertLast worked just fine. What is the fault in the code?
Your program crashed as you are checking if start is NULL or not. But that does not guarantee that *start is also not NULL. In such situation, temp gets NULL and in while loop temp->next actually trying to access next element of NULL pointer and that is why the crash.
Changing this line -
if(start==NULL)
to
if(*start==NULL)
in insertLast() will fix the crash.
I also recommend to use a debugger like gdb to debug such issues.

Free up memory in C creating issue

I have a structure
typedef struct
{
char * name;
Player players[10];
}Team;
The memory is getting leak beacuse I am using malloc in initializeTeam.
Team * initializeTeam(int players, char *name)
{
int i=0;
Team *teama=malloc(sizeof(*teama));
teama->name=name;
for(i=0 ; i < players ; i++)
{
teama->players[i].defensive=((rand() % 7) + 1);
teama->players[i].offensive=((rand() % 10) + 1);
}
return teama;
};
I want memory not to leak.
The way is I can use free(teama);, if I use it the value is getting lost, as I am returning teama I want value to stay.
How can I go about it?

Creating multiple child processes with a single pipe

I need to create three child processes, each of which reads a string from the command line arguments and writes the string to a single pipe. The parent would then read the strings from the pipe and display all three of them on the screen. I tried doing it for two processes to test and it is printing one of the strings twice as opposed to both of them.
#include <stdio.h>
#include <unistd.h>
int main (int argc, char *argv[]) {
char *character1 = argv[1];
char *character2 = argv[2];
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
pid_t pid1; // defining pid1 of type pid_t
pid_t pid2; // defining pid2 of type pid_t
if (pipe(p) == -1) {
fprintf(stderr, "Pipe Failed"); // pipe fail
}
pid1 = fork(); // fork
if (pid1 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
else if (pid1 == 0){ // if child process 1
close(p[0]); // close the read end
write(p[1], character1, sizeof(&inbuf[0])); // write character 1 to the pipe
}
else { // if parent, create a second child process, child process 2
pid2 = fork();
if (pid2 < 0) {
fprintf(stderr, "Fork Failed"); // fork fail
}
if (pid2 = 0) { // if child process 2
close(p[0]); // close the read end
write(p[1], character2, sizeof(&inbuf[0])); // write character 2 to the pipe
}
else { // if parent process
close(p[1]); // close the write end
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
read(p[0], inbuf, sizeof(&inbuf[0])); // Read the pipe that both children write to
printf("%s\n", inbuf); // print
}
}
}
Your code doesn't keep looping until there's no more data to read. It does a single read. It also doesn't check the value returned by read(), but it should.
I've abstracted the fork() and write() (and error check) code into a function. This seems to work:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static void child(int fd, const char *string)
{
pid_t pid = fork();
int len = strlen(string);
if (pid < 0)
{
fprintf(stderr, "%.5d: failed to fork (%d: %s)\n",
(int)getpid(), errno, strerror(errno));
exit(1);
}
else if (pid > 0)
return;
else if (write(fd, string, len) != len)
{
fprintf(stderr, "%.5d: failed to write on pipe %d (%d: %s)\n",
(int)getpid(), fd, errno, strerror(errno));
exit(1);
}
else
exit(0);
}
int main (int argc, char *argv[])
{
char inbuf[100]; //creating an array with a max size of 100
int p[2]; // Pipe descriptor array
if (argc != 4)
{
fprintf(stderr, "Usage: %s str1 str2 str3\n", argv[0]);
return 1;
}
if (pipe(p) == -1)
{
fprintf(stderr, "Pipe Failed"); // pipe fail
return 1;
}
for (int i = 0; i < 3; i++)
child(p[1], argv[i+1]);
int nbytes;
close(p[1]); // close the write end
while ((nbytes = read(p[0], inbuf, sizeof(inbuf))) > 0)
printf("%.*s\n", nbytes, inbuf); // print
return 0;
}
I ran the command multiple times, each time using the command line:
./p3 'message 1' 'the second message' 'a third message for the third process'
On one run, the output was:
the second messagemessage 1
a third message for the third process
On another, I got:
the second messagemessage 1a third message for the third process
And on another, I got:
message 1
the second messagea third message for the third process
(This is on a MacBook Pro with Intel Core i7, running Mac OS X 10.8.3, and using GCC 4.7.1.)

Segmentation fault while using MPI_File_open

I'm trying to read from a file for an MPI application. The cluster has 4 nodes with 12 cores in each node. I have tried running a basic program to compute rank and that works. When I added MPI_File_open it throws an exception at runtime
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES = EXIT CODE: 139
The cluster has MPICH2 installed and has a Network File System. I check MPI_File_open with different parameters like ReadOnly mode, MPI_COMM_WORLD etc.
Can I use MPI_File_open with Network File System?
int main(int argc, char* argv[])
{
int myrank = 0;
int nprocs = 0;
int i = 0;
MPI_Comm icomm = MPI_COMM_WORLD;
MPI_Status status;
MPI_Info info;
MPI_File *fh = NULL;
int error = 0;
MPI_Init(&argc, &argv);
MPI_Barrier(MPI_COMM_WORLD); // Wait for all processor to start
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); // Get number of processes
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); // Get own rank
usleep(myrank*100000);
if ( myrank == 1 || myrank == 0 )
printf("Hello from %d\r\n", myrank);
if (myrank == 0)
{
error = MPI_File_open( MPI_COMM_SELF, "lw1.wei", MPI_MODE_UNIQUE_OPEN,
MPI_INFO_NULL, fh);
if ( error )
{
printf("Error in opening file\r\n");
}
else
{
printf("File successfully opened\r\n");
}
MPI_File_close(fh);
}
MPI_Barrier(MPI_COMM_WORLD); //! Wait for all the processors to end
MPI_Finalize();
if ( myrank == 0 )
{
printf("Number of Processes %d\n\r", nprocs);
}
return 0;
}
You forgot to allocate an MPI_File object before opening the file. You may either change this line:
MPI_File *fh = NULL;
into:
MPI_File fh;
and open file by giving fh's address to MPI_File_open(..., &fh). Or you may simply allocate memory from heap using malloc().
MPI_File *fh = malloc(sizeof(MPI_File));

Bdb crashes on concurrent write, c++ linux/osx

I am having issues with bdb and there locking mechanisms.
The following code results in either a seg fault, or what looks like a deadlock/endless loop
#include <iostream>
#include "db_cxx.h"
#include <boost/thread.hpp>
using namespace std;
void thread_instance(Db* db, double start){
double s = start;
double finish = start + 5000;
for(int x=s; x < finish ; x++){
Dbt key(&x, sizeof(double));
Dbt ddata(&x, sizeof(double));
db->put(NULL, &key, &ddata, 0);
}
}
int
compare_double(DB *dbp, const DBT *a,const DBT *b){
double ai, bi;
memcpy(&ai, a->data, sizeof(double));
memcpy(&bi, b->data, sizeof(double));
return (ai > bi ? 1 : ((ai < bi) ? -1 : 0));
}
int main(){
system("rm data/*");
u_int32_t env_flags = DB_CREATE | DB_INIT_MPOOL | DB_INIT_CDB;
DbEnv* env = new DbEnv(0);
env->set_cachesize(0, 2000000, 1);
u_int32_t m = 0;
env->open("data/", env_flags, 0);
Db* db = new Db(env, 0);
db->set_bt_compare(compare_double);
db->set_flags(DB_DUPSORT);
db->set_pagesize(32768);
db->set_dup_compare(compare_double);
u_int32_t oFlags = DB_CREATE;
try {
db->open(NULL, "db", NULL, DB_BTREE, oFlags, 0);
} catch (DbException &e) {
} catch (std::exception &e) {
}
vector<boost::thread*> threads;
for(int x=0; x < 3; x++){
threads.push_back(new boost::thread(boost::bind(&thread_instance, db, (x *5000))));
}
for(int x=0; x < threads.size(); x++){
threads[x]->join();
}
};
I have tried DB_INIT_LOCK as well, but with the same results.
The stack track:
Program received signal EXC_BAD_ACCESS, Could not access memory.
Reason: KERN_INVALID_ADDRESS at address: 0x0000000000000019
[Switching to process 34816]
0x00000001002e36a7 in __bamc_put ()
(gdb) ba
#0 0x00000001002e36a7 in __bamc_put ()
#1 0x0000000100386689 in __dbc_iput ()
#2 0x0000000100387a6c in __dbc_put ()
#3 0x0000000100383092 in __db_put ()
#4 0x0000000100397888 in __db_put_pp ()
#5 0x00000001002cee59 in Db::put ()
#6 0x0000000100001f88 in thread_instance (db=0x1007006c0, start=5000) at src/main.cpp:16
#7 0x0000000100698254 in thread_proxy ()
#8 0x00007fff80cb9456 in _pthread_start ()
#9 0x00007fff80cb9309 in thread_start ()
Does anyone know what could be going on here ?
I'd recommend to check errors after db->open, print message if any instead of skipping it:
catch (DbException &e) {
cerr << e.what() << endl;
}
Also, looks like you have not defined flags correctly: at least DB_THREAD is required because you use it within threads

Resources