I have a message m that I encrypt by the code
c = m xor [m<<6] xor [m<<10]
(m<<x means that I shift it by x bits - eg 1001<<2 = 0100)
I need to decrypt this message.
I have tried code like:
c xor [c<<6] xor [c<<10]
OR
c xor [c>>6] xor [c>>10]
but none of them worked.
The function c = m xor [m<<6] xor [m<<10] is a bijection (i.e. reversible on a given modulo 2n). So you're good to go...
Just piece-wise de-XOR the value by 6 and 10 bits, starting from the lowest bits.
int main() {
uint32_t n = 0xfffffff;
for (uint32_t m = 0; m < n; ++m) {
uint32_t c = m ^ (m << 6) ^ (m << 10); // c = encrypted value
uint32_t x = c;
x ^= ((x & 0x3F) << 6);
x ^= ((x & 0x3FF) << 10);
x ^= ((x & 0xFC0) << 6);
x ^= ((x & 0x3F000) << 6);
x ^= ((x & 0xFFC00) << 10);
x ^= ((x & 0xFC0000) << 6);
x ^= ((x & 0x3F000000) << 6);
x ^= ((x & 0x3FF00000) << 10);
if (m != x) {
printf("Mismatch: %X > %X > %X\n", m, c, x); // should never happen
break;
}
}
}
Related
I have a message m that I encrypt by the code
c = m xor [m>>1]
(m>>x means that I shift it by x bits - eg 1000>>1 = 0100)
I need to decrypt this message.
I have tried code like for 6 but its Not working for 1 bit shift:
int main() {
uint32_t n = 0xff;
for (uint32_t m = 0; m < n; ++m) {
printf("input: %X \n", m);
uint32_t c = m ^ (m >> 6);// c = encrypted value
printf("encrypted value: %X \n", m);
uint32_t x = c;
x ^= ((x & 0x3F) >> 6);
x ^= ((x & 0xFC0) >> 6);
x ^= ((x & 0x3F000) >> 6);
x ^= ((x & 0xFC0000) >> 6);
x ^= ((x & 0x3F000000) >> 6);
if (m != x) {
printf("Mismatch: %X > %X > %X\n", m, c, x); // should never happen
break;
}
}
}
A xor-shift with a right-shift by 1 bit can be undone like this:
x ^= x >> 1;
x ^= x >> 2;
x ^= x >> 4;
x ^= x >> 8;
x ^= x >> 16;
A xor-shift with a right-shift by 6 can be undone like you showed, but also like this, which is shorter:
x ^= (x >> 6) ^ (x >> 12);
x ^= x >> 18;
The program needs to measure the maximum number of modulus operations it took to calculate the GCD of 2 integers at each value of "i" from 8 to n.
For example, if i = 8, I have to find the combination of (a,b) that took the most modulus operations to calculate the GCD.
I have to calculate GCD for all values of 'a' (1-i) for every value of 'b' between (1-i) and do this at every value of "i" until it reaches n.
For example, if n = 15, the output must look like this:
At i = 8; GCD (5, 8) = 1 took 4 modulus operations
At i = 9; GCD (5, 8) = 1 took 4 modulus operations
At i = 10; GCD (5, 8) = 1 took 4 modulus operations
At i = 11; GCD (5, 8) = 1 took 4 modulus operations
At i = 12; GCD (5, 8) = 1 took 4 modulus operations
At i = 13; GCD (8, 13) = 1 took 5 modulus operations
At i = 14; GCD (8, 13) = 1 took 5 modulus operations
At i = 15; GCD (8, 13) = 1 took 5 modulus operations
Here's my code so far because I'm so confused on the implementation:
class Euclidean
{
public:
//Finds the GCD using modulus operations until the remainder is 0
int calcGCD(int x , int y)
{
int remainder;
while(y != 0)
{
remainder = x % y;
x = y;
y = remainder;
}
return x;
}
//Calculates the maximum amount of oprations required to calculate GCD
//at (a,b) for every i from 8 to n
int CalculateMaxOperations(int n)
{
int GCD;
int a;
int b;
for(i = 8; i <= n; i++)
{
for(b = 1; b <= i; b++)
{
if( b <= i)
{
for(a = 1; a <= i; a++)
{
if(a <= i)
{
GCD = calcGCD(a, b);
}
}
}
}
}
}
//Print statement reveals the set (a,b) that took the most operations to calculate
//the GCD at every single value 'i' until it eventually reaches 'n.'
void PrintResults(int c, int d, int operations)
{
cout << "At i = " << i << ";" << " GCD (" << c << "," << d << ")" << " = " <<
calcGCD (c, d) << " took " << operations << " operations " << endl;
}
private:
int i;
};
int main()
{
Euclidean e;
int x = e.CalculateMaxOperations(15);
return 0;
}
I have several similar kernels to generate random data and store it in global memory. I'm always using the same algorithm to randomize, but due to variable scope issues (I need to keep track of data) I fail to avoid severe code duplications.
Are there any ways to avoid this? Generating random data in OpenCL seems a fairly standard task, but it goes against any good coding standards to have this level of code duplication. For example, here are two of my kernels:
////////////////////////////////////////////////////////////////////////////////
// OpenCL Kernel for Mersenne Twister RNG -- applied to AWGN channel
////////////////////////////////////////////////////////////////////////////////
__kernel void MersenneTwisterAWGN(__global double* d_Rand,
__global int* seeds,
__global long* inputcw,
int nPerRng, float sigma)
{
int globalID = get_global_id(0);
double c = 2.0/(sigma*sigma);
int iState, iState1, iStateM, iOut;
unsigned int mti, mti1, mtiM, x;
unsigned int mt[MT_NN];
//Initialize current state
mt[0] = seeds[globalID];
for (iState = 1; iState < MT_NN; iState++)
mt[iState] = (1812433253U*(mt[iState-1]^(mt[iState-1]>>30))+iState) & MT_WMASK;
iState = 0;
mti1 = mt[0];
for (iOut = 0; iOut < nPerRng; iOut=iOut+2) {
iState1 = iState + 1;
iStateM = iState + MT_MM;
if(iState1 >= MT_NN) iState1 -= MT_NN;
if(iStateM >= MT_NN) iStateM -= MT_NN;
mti = mti1;
mti1 = mt[iState1];
mtiM = mt[iStateM];
// MT recurrence
x = (mti & MT_UMASK) | (mti1 & MT_LMASK);
x = mtiM ^ (x >> 1) ^ ((x & 1) ? matrix_a : 0);
mt[iState] = x;
iState = iState1;
//Tempering transformation
x ^= (x >> MT_SHIFT0);
x ^= (x << MT_SHIFTB) & mask_b;
x ^= (x << MT_SHIFTC) & mask_c;
x ^= (x >> MT_SHIFT1);
double u1 = ((double)x + 1.0f) / 4294967296.0f;
iState1 = iState + 1;
iStateM = iState + MT_MM;
if(iState1 >= MT_NN) iState1 -= MT_NN;
if(iStateM >= MT_NN) iStateM -= MT_NN;
mti = mti1;
mti1 = mt[iState1];
mtiM = mt[iStateM];
// MT recurrence
x = (mti & MT_UMASK) | (mti1 & MT_LMASK);
x = mtiM ^ (x >> 1) ^ ((x & 1) ? matrix_a : 0);
mt[iState] = x;
iState = iState1;
//Tempering transformation
x ^= (x >> MT_SHIFT0);
x ^= (x << MT_SHIFTB) & mask_b;
x ^= (x << MT_SHIFTC) & mask_c;
x ^= (x >> MT_SHIFT1);
double u2 = ((double)x + 1.0f) / 4294967296.0f;
double r = sqrt(-2.0f * log(u1));
double phi = 2 * PI * u2;
u1 = r * cos(phi);
u1 = inputcw[iOut]+sigma*u1;
u1=1/(1+exp(-c*u1));
d_Rand[globalID * nPerRng + iOut]=log((1-u1)/u1);
if (iOut!=nPerRng-1) {
u2 = r * sin(phi);
u2 = inputcw[iOut+1]+sigma*u2;
u2=1/(1+exp(-c*u2));
u2=log((1-u2)/u2);
d_Rand[globalID * nPerRng + iOut+1]=u2;
}
}
}
and
////////////////////////////////////////////////////////////////////////////////
// OpenCL Kernel for Mersenne Twister RNG -- applied to BSC channel
////////////////////////////////////////////////////////////////////////////////
__kernel void MersenneTwisterBSC(__global double* d_Rand,
__global int* seeds,
__global long* inputcw,
int nPerRng, float flipProb)
{
int globalID = get_global_id(0);
int iState, iState1, iStateM, iOut;
unsigned int mti, mti1, mtiM, x;
unsigned int mt[MT_NN];
//Initialize current state
mt[0] = seeds[globalID];
for (iState = 1; iState < MT_NN; iState++)
mt[iState] = (1812433253U*(mt[iState-1]^(mt[iState-1]>>30))+iState) & MT_WMASK;
iState = 0;
mti1 = mt[0];
for (iOut = 0; iOut < nPerRng; iOut=iOut+1) {
iState1 = iState + 1;
iStateM = iState + MT_MM;
if(iState1 >= MT_NN) iState1 -= MT_NN;
if(iStateM >= MT_NN) iStateM -= MT_NN;
mti = mti1;
mti1 = mt[iState1];
mtiM = mt[iStateM];
// MT recurrence
x = (mti & MT_UMASK) | (mti1 & MT_LMASK);
x = mtiM ^ (x >> 1) ^ ((x & 1) ? matrix_a : 0);
mt[iState] = x;
iState = iState1;
//Tempering transformation
x ^= (x >> MT_SHIFT0);
x ^= (x << MT_SHIFTB) & mask_b;
x ^= (x << MT_SHIFTC) & mask_c;
x ^= (x >> MT_SHIFT1);
double c = log((1-flipProb)/flipProb);
double u = ((double)x + 1.0f) / 4294967296.0f;
u = (2*isless(u,flipProb)-1)*inputcw[iOut]*c;
d_Rand[globalID * nPerRng + iOut]=u;
}
}
Are there any ways, tricks or methods to avoid this? Subroutines seem unable to make proper use of the variables (especially mt), so I didn't manage to cut it down in the way other languages would allow to.
Or should I just accept this as a necessary evil in OpenCL and keep managing 10 different kernels this way?
At Khronos' site, it says
OpenCL programs may also contain auxiliary functions and constant data that can be used by __kernel functions.
An example to generate random number between 0.0f and 1.0f per thread:
Core function to iterate a seed:
uint wang_hash(uint seed)
{
seed = (seed ^ 61) ^ (seed >> 16);
seed *= 9;
seed = seed ^ (seed >> 4);
seed *= 0x27d4eb2d;
seed = seed ^ (seed >> 15);
return seed;
}
Initialization and iteration of each threads seed:
// id=thread id, rnd=seed array
void wang_rnd_init(__global unsigned int * rnd,int id)
{
uint maxint=0;
maxint--; // could be a 0xFFFFFFFF
uint rndint=wang_hash(id);
rnd[id]=rndint;
}
// id=thread id, rnd=seed array
float wang_rnd(__global unsigned int * rnd,int id)
{
uint maxint=0;
maxint--; // could be a 0xFFFFFFFF
uint rndint=wang_hash(rnd[id]);
rnd[id]=rndint;
return ((float)rndint)/(float)maxint;
}
Usage in a random grayscale color pixel generator kernel:
__kernel void rnd_1(__global unsigned int * rnd, __global int *rgba)
{
int id=get_global_id(0);
float rgba_register=wang_rnd(rnd,id);
rgba[id] = ((int)(rgba_register * 255) << 24) | ((int)(rgba_register * 255) << 16) | ((int)(rgba_register * 255) << 8) | ((int)(rgba_register * 255));
}
and wang_rnd() can be used in other kernels without defining it twice if they are in same compiled context, same as putting all relevant kernels and functions in the same file to be compiled.
Auxilliary functions are not limited to registers and global memory. They can take local and constant memory parameters too. Since they are working with device side memory mainly, they can take and return structs too.
Please see the following Question recently posted on HackerRank
Adam is standing at point (a,b) in an infinite 2D grid. He wants to know if he can reach point (x,y) or not. The only operation he can do is to move to point (a+b,b), (a,a+b), (a-b,b), or (a,a-b) from some point (a,b). It is given that he can move to any point on this 2D grid,i.e., the points having positive or negative X(or Y) co-ordinates.Tell Adam whether he can reach (x,y) or not.
https://www.hackerrank.com/contests/infinitum-jun14/challenges/possible-path
I realized that both x and y must be a sum of some multiple of a and b...
So x%(a+b) OR x%(a-b) should be divisible by either a or b
and similarly for y...
But the following does not work ...
long long int xb,yb,xa,ya;
xb = x % b;
xa = x % a;
yb = y % b;
ya = y % a;
// for x
bool cxbaplusb = a+b==0 ? xb == 0: (xb%(a+b))==0;
bool cxbaminb = a-b==0 ? xb == 0: (xb%(a-b))==0;
// for y
bool cybaplusb = a+b==0 ? yb == 0: (yb%(a+b))==0;
bool cybaminb = a-b==0 ? yb == 0: (yb%(a-b))==0;
// for x
bool cxaaplusb = a+b==0 ? xa == 0: (xa%(a+b))==0;
bool cxaaminb = a-b==0 ? xa == 0: (xa%(a-b))==0;
// for y
bool cyaaplusb = a+b==0 ? ya == 0: (ya%(a+b))==0;
bool cyaaminb = a-b==0 ? ya == 0: (ya%(a-b))==0;
if ( (cxbaplusb || cxbaminb || cxaaplusb || cxaaminb) && (cybaplusb || cybaminb || cyaaplusb || cyaaminb) )
std::cout << "YES" << std::endl;
else
std::cout << "NO" << std::endl;
But this is not working ... Am I missing any conditions ? Any suggestions ??
The following mathematical explanation may help you achieve your goal.
Source: https://hr-filepicker.s3.amazonaws.com/infinitum-jun14/editorials/2372-possible-path.pdf
Please check the input size
1 ≤ a,b,x,y ≤ 10^18
https://www.hackerrank.com/challenges/possible-path
CPP won't support this much size, it will throw garbage value resulting in wrong answer
def gcd(a, b):
if(b == 0):
return a
return gcd(b, a%b)
t=input()
for i in range(t):
a = map(int, raw_input().split())
if(gcd(a[0],a[1]) == gcd(a[2],a[3])):
print "YES"
else:
print "NO"
#include<iostream>
using namespace std;
int gcd(int a, int b){
return b ? gcd(b, a%b) : a;
}
int main(){
int t;
cin >> t;
while (t--){
int a, b, x, y;
cin >> a >> b >> x >> y;
if (gcd(abs(a), abs(b)) == gcd(abs(x), abs(y)))
cout << "YES" << endl;
else
cout << "NO" << endl;
}
return 0;
}
Initially I had same doubt as you , but its not " x and y must be a sum of some multiple of a and b " because we can move from (a,b) to any point in (a+b,b), (a-b,b),(a,b+a),(a,b-a) in case if you move (a+b,b) now a=a+b,b=b so for this value of a,b ( not the given one here a is updated to a+b) only you can do the above operation so its not that x and y always must be sum of some multiple of a,b . so that you have to go with gcd method
public class Solution {
public static void main(String[] args) {
int a,b,x,y;
if(gcd(x,y)=gcd(a,b))
System.out.println("Adam can reach")
else
System.out.println("Adam cannot reach")
}
}
Currently I have to work in an environment where the power-operator is bugged. Can anyone think of a method temporarily work around this bug and compute a^b (both floating point) without a power function or operator?
if you have sqrt() available:
double sqr( double x ) { return x * x; }
// meaning of 'precision': the returned answer should be base^x, where
// x is in [power-precision/2,power+precision/2]
double mypow( double base, double power, double precision )
{
if ( power < 0 ) return 1 / mypow( base, -power, precision );
if ( power >= 10 ) return sqr( mypow( base, power/2, precision/2 ) );
if ( power >= 1 ) return base * mypow( base, power-1, precision );
if ( precision >= 1 ) return sqrt( base );
return sqrt( mypow( base, power*2, precision*2 ) );
}
double mypow( double base, double power ) { return mypow( base, power, .000001 ); }
test code:
void main()
{
cout.precision( 12 );
cout << mypow( 2.7, 1.23456 ) << endl;
cout << pow ( 2.7, 1.23456 ) << endl;
cout << mypow( 1.001, 1000.7 ) << endl;
cout << pow ( 1.001, 1000.7 ) << endl;
cout << mypow( .3, -10.7 ) << endl;
cout << pow ( .3, -10.7 ) << endl;
cout << mypow( 100000, .00001 ) << endl;
cout << pow ( 100000, .00001 ) << endl;
cout << mypow( 100000, .0000001 ) << endl;
cout << pow ( 100000, .0000001 ) << endl;
}
outputs:
3.40835049344
3.40835206431
2.71882549461
2.71882549383
393371.348073
393371.212573
1.00011529225
1.00011513588
1.00000548981
1.00000115129
You can use the identity ab = e(b log a), then all the calculations are relative to the same base e = 2.71828...
Now you have to implement f(x) = ln(x), and g(x) = e^x. The fast, low precision method would be to use lookup tables for f(x) and g(x). Maybe that's good enough for your purposes. If not, you can use the Taylor series expansions to express ln(x) and e^x in terms
of multiplication and addition.
given that you can use sqrt, this simple recursive algorithm works:
Suppose that we're calculating aˆb. The way the algorithm works is by doing Fast Exponentiation on the exponent until we hit the fractional part, once in the fractional part, do a modified binary search, until we're close enough to the fractional part.
double EPS = 0.0001;
double exponentiation(double base, double exp){
if(exp >= 1){
double temp = exponentiation(base, exp / 2);
return temp * temp;
} else{
double low = 0;
double high = 1.0;
double sqr = sqrt(base);
double acc = sqr;
double mid = high / 2;
while(abs(mid - exp) > EPS){
sqr = sqrt(sqr);
if (mid <= exp) {
low = mid;
acc *= sqr;
} else{
high = mid;
acc *= (1/sqr);
}
mid = (low + high) / 2;
}
return acc;
}
}