*a=10
*b=20
How to swap them without using the third variable? Output should be like
*a=20
*b=10
Not sure if the interviewer was looking for XOR over something else but it seems you can simply use +, -, and x. Should work if a is bigger or negative as well.
*a+=*b
*b-=*a
*b=*b x -1
*a-=*b
In your example that would give us:
*a+=*b --> *a = 30
*b-=*a --> *b = -10
*b=*b x -1 --> *b = 10
*a-=*b --> *a = 20
Here is a simple code to do so:
#include <stdio.h>
#include <stdlib.h>
void usingXOR(int** x, int** y){
unsigned long long a = (unsigned long long)*x;
unsigned long long b = (unsigned long long)*y;
a = a^b;
b = a^b;
a = a^b;
*x = (int*)a;
*y = (int*)b;
}
void main(){
int x=5;
int y=10;
int* a = &x;
int* b = &y;
//If you only want to swap the values the pointers are pointing to
//Here the addresses the pointers are holding dont get swapped
(*a) = (*a)+(*b);
(*b) = (*a)-(*b);
(*a) = (*a)-(*b);
//If you want to swap addresses in the pointers
//printf("Before swap address a: %p\n", a);
//printf("Before swap address b: %p\n", b);
//usingXOR(&a,&b);
printf("a: %d\n", *a);
printf("b: %d\n", *b);
//printf("After swap address a: %p\n", a);
//printf("After swap address b: %p\n", b);
}
Related
int main ()
{
int num_1 = 111;
int *p = &num_1;
int &ref1 = *p;
int *(&ref2) = p;
printf("&ref2 : %d\n", &ref2);
printf("*ref2 : %d\n", *ref2);
printf("ref2 : %d\n", ref2);
return 0;
}
I get pointer literally points address.
So in int &ref1 = *p; ref1's address is equal to num_1's address, hence have the same value as num1 which is 111.
However, what I don't understand is ref2 part.
If int *(&ref2) is the value of ref2 variable, shouldn't it have the same address of the num_1?
If int *(&ref2) is the value of ref2 variable, shouldn't it have the same address of the num_1?
The declaration int *(&ref2) = p; defines ref2 to be a reference to p.
Part of your misunderstanding may come from wrong output you get from using inappropriate printf conversion specifiers; it's wrong to print addresses with %d - in your first and third printf use %p instead. It might become clearer if you add
printf("p : %p\n", p);
I am trying to include a local atomic similar to that described by DarkZeros here within a working reduction kernel. The kernel finds a largest value within a set of points; the aim of the local atomic is to allow me to filter selected point_ids into an output array without any gaps.
At present when I use the local atomic to increment the addition to a local array the kernel runs but produces a wrong overall highest point. If the atomic line is commented out then a correct result returns.
What is going on here and how do I fix it?
Simplified kernel code:
__kernel void reduce(__global const float4* dataSet, __global const int* input, const unsigned int items, //points and index
__global int* output, __local float4* shared, const unsigned int n, //finding highest
__global int* filtered, __global const float2* tri_input, const unsigned int pass, //finding filtered
__global int* global_count //global count
){
//set everything up
const unsigned int group_id = get_global_id(0) / get_local_size(0);
const unsigned int local_id = get_local_id(0);
const unsigned int group_size = items;
const unsigned int group_stride = 2 * group_size;
const int local_stride = group_stride * group_size;
__local float4 *zeroIt = &shared[local_id];
zeroIt->x = 0; zeroIt->y = 0; zeroIt->z = 0; zeroIt->w = 0;
volatile __local int local_count_set_1;
volatile __local int global_val_set_1;
volatile __local int filter_local[64];
if(local_id==0){
local_count_set_1 = 0;
global_val_set_1 = -1;
}
barrier(CLK_LOCAL_MEM_FENCE);
int i = group_id * group_stride + local_id;
while (i < n){
//load up a pair of points using the index to locate them within a massive dataSet
int ia = input[i];
float4 a = dataSet[ia-1];
int ib = input[i + group_size];
float4 b = dataSet[ib-1];
//on the first pass kernel increment a local count
if(pass == 0){
filter_local[atomic_inc(&local_count_set_1)] = 1; //including this line causes an erroneous highest point result
//filter_local[local_id] = 1; //but including this line does not
//atomic_inc(&local_count_set_1); //and neither does this one
}
//find the highest of the pair
float4 result;
if(a.z>b.z) result = a;
else result = b;
//load up the previous highest result locally
float4 s = shared[local_id];
//if the previous highest beat this, stick, else twist
if(s.z>result.z){ result = s; }
shared[local_id] = result;
i += local_stride;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (group_size >= 512){
if (local_id < 256) {
__local float4 *a = &shared[local_id];
__local float4 *b = &shared[local_id+256];
if(b->z>a->z){ shared[local_id] = shared[local_id+256]; }
}}
//repeat barrier ops in increments down to group_size>=2 - this filters the highest result in shared
//finally, return the filtered highest result of shared to the global level
barrier(CLK_LOCAL_MEM_FENCE);
if(local_id == 0){
__local float4 *v = &shared[0];
int send = v->w ;
output[group_id] = send+1;
}}
[UPDATE]: When the atomic_inc line is included the 'wrong' highest point result is always a point near the end of the test dataset. I'm guessing that this means that the atomic_inc is affecting a latter comparison, but I'm not sure exactly what or where yet.
[UPDATE]: Edited code to simplify/clarify/update with debugging tweaks. Still not working and it is driving me loopy.
Total face-palm moment. In the setup phase of the kernel there are the lines:
if(local_id==0){
local_count_set_1 = 0;
global_val_set_1 = -1;
}
barrier(CLK_LOCAL_MEM_FENCE);
When these are split and the local_count_set_1 is included within the while loop, the error does not occur. i.e:
if(local_id==0) global_val_set_1 = -1;
barrier(CLK_LOCAL_MEM_FENCE);
while (i < n){
if(local_id==0) local_count_set_1 = 0;
barrier(CLK_LOCAL_MEM_FENCE);
....
if(pass = 0){
filter_local[atomic_inc(&local_count_set_1)] = 1;
}
....
I'm hoping this fixes the issue // will update if not.
Aaaand that's a weekend I'll never get back.
I do not want to mess up my RAM or make problem / bug that related to memory.
So.. what do I need to do before i switch / change a variable pointer pointed-to?
Or.. what i've doing is just fine?
Here is my source code:
#include <stdio.h>
int main(int argc, char *argv[])
{
int x = 10;
int y = 87;
int arr[5] = {1,2,3,4,5};
int *ptr;
ptr = &x;
printf("Now ptr pointed to x --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
ptr = &y;
printf("Now ptr pointed to y --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
ptr = arr;
printf("1st 2 byte: %d \n", *ptr);
*ptr++;
printf("2nd next 2 byte: %d \n", *ptr);
*ptr++;
printf("3rd next 2 byte: %d \n", *ptr);
*ptr++;
printf("4th next 2 byte: %d \n", *ptr);
// Now i want to switch to x again :D
ptr = &x;
printf("Now ptr pointed to x AGAIN --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
return 0;
}
Please enlightenment.
Thank You
There doesn't seem to be a problem with what you have. The pointer is simply changing where it points to, but those background variables are not being changed at all. The variables x, y and your array will be alive for as long as the main function is running, as they are within the scope of main. If you want them to be alive for even less time, you could restrict them to other functions that are called from main.
void xVariable()
{
int x = 7;
}
int main()
{
int y = 8;
xVariable();
for(int i = 0; i < 9; i++)
{
int z = 9;
}
return 0;
}
In this example, y will be alive the entire run of the program. x will only be alive while the function xVariable is running. i and z are only alive for the duration of the loop. This is all a basic example of how variable scope works, but I would recommend looking it further if memory is going to be important.
I have int A, B, C. And A is in range 0-9999, B is 0-99, C is 0-99.
Because the function must return only one double, I think of putting them all into one number. Otherwise I need to call function three times.
But I cannot write an efficient code to do this. This will be called millions times, so it should be quite effective, but no ASM.
I need a function double pack3int_to_double(int A, int B, int C) {}
Couldn't you just store A + 1000B + 100000C?
For example, if you wanted to store A = 1234, B = 6, and C = 89, you'd just store
89061234
CCBAAAA
You can then extract the numbers by casting the double to an int and using standard integer division and modulus tricks to recover the individual values.
Hope this helps!
If A<10,000 and B & C <100, A can be expressed with 14 bits, and B & C with 8 bits. Thus you need 30 bits in total.
You could therefore pack/unpack the integers by shifting it to the right place:
int packed = A + B<<14 + C<<22;
A = packed & 0x3FFF; B = (packed >> 14) & 0xFF; C = (packed >> 22) & 0xFF;
Bit shifting is of course MUCH faster than multiply/divide, and you can cast the int to a double and vice versa.
This is technically not legal C code, so you would use this at your own risk:
typedef union {
double x;
struct {
unsigned a : 14;
unsigned b : 7;
unsigned c : 7;
} y;
} result_t;
The C standard doesn't allow using a union member to write a value and a different one to read it out, but I am not aware of a compiler that does the static analysis to diagnose such a problem (it doesn't mean one won't do so in the future). Also, using certain int values may result in a trap representation for a double. But, if you know your system will not generate any trap representations, you can consider using this.
double pack3int_to_double(int A, int B, int C) {
result_t r;
r.y.a = A;
r.y.b = B;
r.y.c = C;
return r.x;
}
void unpack3int_from_double (double X, int *A, int *B, int *C) {
result_t r = { X };
*A = r.y.a;
*B = r.y.b;
*C = r.y.c;
}
You can use out parameters in function call and retrieve all 3 int variables.
You could return a NaN double with the data stored in the mantissa. That gives you 53 bits to utilize. Should be plenty.
http://en.m.wikipedia.org/wiki/NaN
Inspired by your answers, this is what I come up so far. This should be quite efficient, and only 32 bits are used, so the exponent of the double is not touched.
struct pack_abc {
unsigned short a;
unsigned char b, c;
int safety;
};
double pack3int_to_double(int A, int B, int C) {
struct pack_abc R = {A, B, C, 0}; // or 0 could be replaced with something smater, like NaN?
return *(double*)&R;
}
void main() {
int w = 1234, a = 56, d = 78;
int W, A, D, i;
double p = pack3int_to_double(w, a, d);
// we got the data packed into 'p', now let's unpack it
struct pack_abc *R = (struct pack_abc*) & p;
printf("%i %i %i\n", (int)R->a, (int)R->b, (int)R->c);
}
I'm using the rainbowduino and it has some methods that take individual r g b values as unsigned chars, and some that take a 24bit rgb colour code.
I want to convert r g b values into this 24bit colour code of type uint32_t (so that all my code only has to use r g b values.
Any ideas?
I have already tried uint32_t result = r << 16 + g << 8 + b;
r = 100 g =200 b=0 gave green, but r=0 g=200 b=0 gave nothing
Rb.setPixelXY(unsigned char x, unsigned char y, unsigned char colorR, unsigned char colorG, unsigned char colorB)
This sets the pixel(x,y)by specifying each channel(color) with 8bit number.
Rb.setPixelXY(unsigned char x, unsigned char y, unit32_t colorRGB)
This sets the pixel(x,y)by specifying a 24bit RGB color code.
The drivers code is:
void Rainbowduino::setPixelXY(unsigned char x, unsigned char y, uint32_t colorRGB /*24-bit RGB Color*/)
{
if(x > 7 || y > 7)
{
// Do nothing.
// This check is used to avoid writing to out-of-bound pixels by graphics function.
// But this might slow down setting pixels (remove this check if fast disply is desired)
}
else
{
colorRGB = (colorRGB & 0x00FFFFFF);
frameBuffer[0][x][y]=(colorRGB & 0x0000FF); //channel Blue
colorRGB = (colorRGB >> 8);
frameBuffer[1][x][y]=(colorRGB & 0x0000FF); //channel Green
colorRGB = (colorRGB >> 8);
frameBuffer[2][x][y]=(colorRGB & 0x0000FF); //channel Red
}
}
So I would think similar to the above :
uint8_t x,y,r,b,g;
uint32_t result = (r << 16) | (g << 8) | b;
Rb.setPixelXY(x, y, result);
should work. It I think the above likely needs the parenthesis, to ensure proper ordering, as "+" is higher than "<<". Also likely won't hurt but the "|" is better, as not to prevent undesired carry's.
P.S. Remember when shifting to be unsigned, unless you want arithmetic shift versus logical.
and on that note I don't like shifts as they are often messed up and inefficient. Rather a union is simple and efficient.
union rgb {
uint32_t word;
uint8_t byte[3];
struct {
uint8_t blue;
uint8_t green;
uint8_t red;
} color ;
}rgb ;
// one way to assign by discrete names.
rbg.color.blue = b;
rbg.color.green = g;
rbg.color.red = r;
//or assign using array
rgb.byte[0] = b;
rgb.byte[1] = g;
rgb.byte[2] = r;
// then interchangeably use the whole integer word when desired.
Rb.setPixelXY(x, y, rgb.word);
no messing with keeping track of shifts.
One way to approach this would be to shift the bits to the left...
uint32_t result = r << 16 + g << 8 + b;