Midori128 incorrect ciphertext - encryption
I have a project in which I must implement a variety of Security Protocols for my Arduino Uno R3, such as Present, Misty, Prince, and so on. At the moment I am attempting to implement Midori128. I managed to get Midiori64 to work properly, but for Midori128 the encryption is incorrect, while the decryption works properly.
This is the code at the moment, bear in mind I pulled this code from GitHub, and then change it around to get it to work on Arduino as efficiently as possible; but like I said, the encryption is incorrect, and I don't know why.
#include <stdio.h>
#include <stdint.h>
static uint16_t s_box[16] = { 0x1,0x0,0x5,0x3,0xe,0x2,0xf,0x7,0xd,0xa,0x9,0xb,0xc,0x8,0x4,0x6 };
static uint16_t const_key[19][16] = { { 0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1 },{ 0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0 },
{ 1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1 },{ 0,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1 },
{ 0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1 },{ 1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0 },
{ 0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0 },{ 0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0 },
{ 1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1 },{ 0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0 },
{ 0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,1 },{ 0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0 },
{ 0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0 },{ 1,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0 },
{ 1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0 },{ 0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1 },
{ 0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0 },{ 0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0 },
{0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0} };
uint16_t bit_permutation_0(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x08) << 4), temp ^= ((x & 0x40)), temp ^= ((x & 0x02) << 4), temp ^= (x & 0x10),
temp ^= ((x & 0x80) >> 4), temp ^= ((x & 0x04)), temp ^= ((x & 0x20) >> 4), temp ^= (x & 0x01);
return temp;
}
uint16_t bit_permutation_1(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x40) << 1), temp ^= ((x & 0x02) << 5), temp ^= ((x & 0x01) << 5), temp ^= ((x & 0x80) >> 3),
temp ^= ((x & 0x04) << 1), temp ^= ((x & 0x20) >> 3), temp ^= ((x & 0x10) >> 3), temp ^= ((x & 0x08) >> 3);
return temp;
}
uint16_t inv_bit_permutation_1(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x10) << 3), temp ^= ((x & 0x80) >> 1), temp ^= ((x & 0x04) << 3), temp ^= ((x & 0x02) << 3),
temp ^= ((x & 0x01) << 3), temp ^= ((x & 0x08) >> 1), temp ^= ((x & 0x40) >> 5), temp ^= ((x & 0x20) >> 5);
return temp;
}
uint16_t bit_permutation_2(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x20) << 2), temp ^= ((x & 0x10) << 2), temp ^= ((x & 0x08) << 2), temp ^= ((x & 0x40) >> 2),
temp ^= ((x & 0x02) << 2), temp ^= ((x & 0x01) << 2), temp ^= ((x & 0x80) >> 6), temp ^= ((x & 0x04) >> 2);
return temp;
}
uint16_t inv_bit_permutation_2(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x02) << 6), temp ^= ((x & 0x10) << 2), temp ^= ((x & 0x80) >> 2), temp ^= ((x & 0x40) >> 2),
temp ^= ((x & 0x20) >> 2), temp ^= ((x & 0x01) << 2), temp ^= ((x & 0x08) >> 2), temp ^= ((x & 0x04) >> 2);
return temp;
}
uint16_t bit_permutation_3(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x01) << 7), temp ^= ((x & 0x08) << 3), temp ^= ((x & 0x40) >> 1), temp ^= ((x & 0x20) >> 1),
temp ^= ((x & 0x10) >> 1), temp ^= ((x & 0x80) >> 5), temp ^= ((x & 0x04) >> 1), temp ^= ((x & 0x02) >> 1);
return temp;
}
uint16_t inv_bit_permutation_3(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x04) << 5), temp ^= ((x & 0x20) << 1), temp ^= ((x & 0x10) << 1), temp ^= ((x & 0x08) << 1),
temp ^= ((x & 0x40) >> 3), temp ^= ((x & 0x02) << 1), temp ^= ((x & 0x01) << 1), temp ^= ((x & 0x80) >> 7);
return temp;
}
uint16_t Midori128_S_Box(int r, uint16_t x)
{
uint16_t y;
if (r % 4 == 0)
{
y = bit_permutation_0(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ (s_box[(y & 0x0f)]);
y = bit_permutation_0(y);
}
if (r % 4 == 1)
{
y = bit_permutation_1(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_1(y);
}
if (r % 4 == 2)
{
y = bit_permutation_2(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_2(y);
}
else
{
y = bit_permutation_3(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_3(y);
}
return y;
}
void SubCell(int r,uint16_t *state)
{
int i;
for (i = 0; i <= 15; i++)
{
state[i] = Midori128_S_Box(r%4,state[i]);
}
}
void ShuffleCell(uint16_t *state)
{
int i;
uint16_t temp[16];
temp[0] = state[0], temp[1] = state[10], temp[2] = state[5], temp[3] = state[15],
temp[4] = state[14], temp[5] = state[4], temp[6] = state[11], temp[7] = state[1],
temp[8] = state[9], temp[9] = state[3], temp[10] = state[12], temp[11] = state[6],
temp[12] = state[7], temp[13] = state[13], temp[14] = state[2], temp[15] = state[8];
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void Inv_ShuffleCell(uint16_t *state)
{
int i;
uint16_t temp[16];
temp[0] = state[0], temp[1] = state[7], temp[2] = state[14], temp[3] = state[9],
temp[4] = state[5], temp[5] = state[2], temp[6] = state[11], temp[7] = state[12],
temp[8] = state[15], temp[9] = state[8], temp[10] = state[1], temp[11] = state[6],
temp[12] = state[10], temp[13] = state[13], temp[14] = state[4], temp[15] = state[3];
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void MixColumn(uint16_t *state)
{
int i;
uint16_t temp[16];
for (i = 0; i <= 3; i++)
{
temp[4 * i + 0] = state[4 * i + 1] ^ state[4 * i + 2] ^ state[4 * i + 3];
temp[4 * i + 1] = state[4 * i + 0] ^ state[4 * i + 2] ^ state[4 * i + 3];
temp[4 * i + 2] = state[4 * i + 0] ^ state[4 * i + 1] ^ state[4 * i + 3];
temp[4 * i + 3] = state[4 * i + 0] ^ state[4 * i + 1] ^ state[4 * i + 2];
}
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void rth_Round_Encrypt_KeyAdd(int r, uint16_t *state, uint16_t *K)
{
int i;
for (i = 0; i <= 15; i++)
{
state[i] = state[i] ^ K[i] ^ const_key[r][i];
}
}
void rth_Round_Decrypt_KeyAdd(int r, uint16_t *state, uint16_t *K)
{
int i;
uint16_t Kr[16];
for (i = 0; i <= 15; i++)
{
Kr[i] = K[i] ^ const_key[r][i];
}
MixColumn(Kr);
Inv_ShuffleCell(Kr);
for (i = 0; i <= 15; i++)
{
state[i] = state[i] ^ Kr[i];
}
}
void Encrypt(int r, uint16_t *plaintext, uint16_t *K, uint16_t *ciphertext)
{
int i;
for (i = 0; i <= 15; i++)
{
ciphertext[i] = plaintext[i];
}
for (i = 0; i <= 15; i++)
{
ciphertext[i] = ciphertext[i] ^ K[i];
}
for (i = 0; i <= (r - 2); i++)
{
SubCell(i, ciphertext);
ShuffleCell(ciphertext);
MixColumn(ciphertext);
rth_Round_Encrypt_KeyAdd(i, ciphertext, K);
}
SubCell(i, ciphertext);
for (i = 0; i <= 15; i++)
{
ciphertext[i] = ciphertext[i] ^ K[i];
}
}
void Decrypt(int r, uint16_t *ciphertext, uint16_t *K,uint16_t *plaintext)
{
int i;
for (i = 0; i <= 15; i++)
{
plaintext[i] = ciphertext[i];
}
for (i = 0; i <= 15; i++)
{
plaintext[i] = plaintext[i] ^ K[i];
}
for (i = (r-2); i >=0;i--)
{
SubCell(i + 1, plaintext);
MixColumn(plaintext);
Inv_ShuffleCell(plaintext);
rth_Round_Decrypt_KeyAdd(i, plaintext, K);
}
SubCell(i, plaintext);
for (i = 0; i <= 15; i++)
{
plaintext[i] = plaintext[i] ^ K[i];
}
}
void setup (){
Serial.begin(9600);
printf.begin();
}
void loop ()
{
int i,j1,j2,j3;
uint16_t plaintext[16] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 };
uint16_t Key[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
uint16_t plaintext_temp[16];
uint16_t ciphertext_temp[16];
for (j1 = 0; j1 <= 0xff; j1++)
{
plaintext[0] = j1;
for (j2 = 0; j2 <= 0xff; j2++)
{
plaintext[1] = j2;
for (j3 = 0; j3 <= 0xff; j3++)
{
plaintext[2] = j3;
/*printf("Before Encryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", plaintext[i]);
}
printf("\n");*/
Encrypt(20, plaintext, Key, ciphertext_temp);
printf("After Encryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", ciphertext_temp[i]);
}
printf("\n");
/*Decrypt(20, ciphertext_temp, Key, plaintext_temp);
printf("After Decryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", plaintext_temp[i]);
}
printf("\n");*/
printf("\n");
}
}
}
}
I reviewed the original paper numerous times, and understand the method pretty well, yet I can't pinpoint the exact reason why it's not outputting the correct cipher text, which is c055cbb95996d14902b60574d5e728d6. Any help or hints would be greatly appreciated. Thank you.
Late Edit:
I managed to find the 4 sboxes utilized in Midori128, however, I'm still at a loss at how to exactly incorporate them into my code, if anyone has any ideas I'd appreciate it greatly.
Here are the sboxes:
static uint16_t ssb0[16][16] = {
{0x11, 0x10, 0x51, 0x50, 0xb4, 0x30, 0xf4, 0x70, 0x59, 0x58, 0x19, 0x18, 0xfc, 0x78, 0xbc, 0x38},
{0x01, 0x00, 0x13, 0x12, 0xa4, 0x20, 0xb6, 0x32, 0x0b, 0x0a, 0x1b, 0x1a, 0xae, 0x2a, 0xbe, 0x3a},
{0x15, 0x31, 0x55, 0x71, 0xb5, 0x35, 0xf5, 0x75, 0x5d, 0x79, 0x1d, 0x39, 0xfd, 0x7d, 0xbd, 0x3d},
{0x05, 0x21, 0x17, 0x33, 0xa5, 0x25, 0xb7, 0x37, 0x0f, 0x2b, 0x1f, 0x3b, 0xaf, 0x2f, 0xbf, 0x3f},
{0x4b, 0x4a, 0x5b, 0x5a, 0xee, 0x6a, 0xfe, 0x7a, 0x49, 0x48, 0x41, 0x40, 0xec, 0x68, 0xe4, 0x60},
{0x03, 0x02, 0x53, 0x52, 0xa6, 0x22, 0xf6, 0x72, 0x09, 0x08, 0x43, 0x42, 0xac, 0x28, 0xe6, 0x62},
{0x4f, 0x6b, 0x5f, 0x7b, 0xef, 0x6f, 0xff, 0x7f, 0x4d, 0x69, 0x45, 0x61, 0xed, 0x6d, 0xe5, 0x65},
{0x07, 0x23, 0x57, 0x73, 0xa7, 0x27, 0xf7, 0x77, 0x0d, 0x29, 0x47, 0x63, 0xad, 0x2d, 0xe7, 0x67},
{0x95, 0xb0, 0xd5, 0xf0, 0x94, 0x90, 0xd4, 0xd0, 0xdd, 0xf8, 0x9d, 0xb8, 0xdc, 0xd8, 0x9c, 0x98},
{0x85, 0xa0, 0x97, 0xb2, 0x84, 0x80, 0x96, 0x92, 0x8f, 0xaa, 0x9f, 0xba, 0x8e, 0x8a, 0x9e, 0x9a},
{0x91, 0xb1, 0xd1, 0xf1, 0x14, 0x34, 0x54, 0x74, 0xd9, 0xf9, 0x99, 0xb9, 0x5c, 0x7c, 0x1c, 0x3c},
{0x81, 0xa1, 0x93, 0xb3, 0x04, 0x24, 0x16, 0x36, 0x8b, 0xab, 0x9b, 0xbb, 0x0e, 0x2e, 0x1e, 0x3e},
{0xcf, 0xea, 0xdf, 0xfa, 0xce, 0xca, 0xde, 0xda, 0xcd, 0xe8, 0xc5, 0xe0, 0xcc, 0xc8, 0xc4, 0xc0},
{0x87, 0xa2, 0xd7, 0xf2, 0x86, 0x82, 0xd6, 0xd2, 0x8d, 0xa8, 0xc7, 0xe2, 0x8c, 0x88, 0xc6, 0xc2},
{0xcb, 0xeb, 0xdb, 0xfb, 0x4e, 0x6e, 0x5e, 0x7e, 0xc9, 0xe9, 0xc1, 0xe1, 0x4c, 0x6c, 0x44, 0x64},
{0x83, 0xa3, 0xd3, 0xf3, 0x06, 0x26, 0x56, 0x76, 0x89, 0xa9, 0xc3, 0xe3, 0x0c, 0x2c, 0x46, 0x66}
};
static uint16_t ssb1[16][16] = {
{0x88, 0x8a, 0x4b, 0xcb, 0xac, 0xae, 0x6f, 0xef, 0x80, 0x82, 0x43, 0xc3, 0x94, 0x96, 0x57, 0xd7},
{0xa8, 0xaa, 0x6b, 0xeb, 0x8c, 0x8e, 0x4f, 0xcf, 0x98, 0x9a, 0x5b, 0xdb, 0x9c, 0x9e, 0x5f, 0xdf},
{0xb4, 0xb6, 0x77, 0xf7, 0xa4, 0xa6, 0x67, 0xe7, 0x90, 0x92, 0x53, 0xd3, 0x84, 0x86, 0x47, 0xc7},
{0xbc, 0xbe, 0x7f, 0xff, 0xa0, 0xa2, 0x63, 0xe3, 0xb8, 0xba, 0x7b, 0xfb, 0xb0, 0xb2, 0x73, 0xf3},
{0xca, 0xc8, 0x4a, 0x0a, 0xee, 0xec, 0x6e, 0x2e, 0xc2, 0xc0, 0x42, 0x02, 0xd6, 0xd4, 0x56, 0x16},
{0xea, 0xe8, 0x6a, 0x2a, 0xce, 0xcc, 0x4e, 0x0e, 0xda, 0xd8, 0x5a, 0x1a, 0xde, 0xdc, 0x5e, 0x1e},
{0xf6, 0xf4, 0x76, 0x36, 0xe6, 0xe4, 0x66, 0x26, 0xd2, 0xd0, 0x52, 0x12, 0xc6, 0xc4, 0x46, 0x06},
{0xfe, 0xfc, 0x7e, 0x3e, 0xe2, 0xe0, 0x62, 0x22, 0xfa, 0xf8, 0x7a, 0x3a, 0xf2, 0xf0, 0x72, 0x32},
{0x08, 0x89, 0x09, 0x8b, 0x2c, 0xad, 0x2d, 0xaf, 0x00, 0x81, 0x01, 0x83, 0x14, 0x95, 0x15, 0x97},
{0x28, 0xa9, 0x29, 0xab, 0x0c, 0x8d, 0x0d, 0x8f, 0x18, 0x99, 0x19, 0x9b, 0x1c, 0x9d, 0x1d, 0x9f},
{0x34, 0xb5, 0x35, 0xb7, 0x24, 0xa5, 0x25, 0xa7, 0x10, 0x91, 0x11, 0x93, 0x04, 0x85, 0x05, 0x87},
{0x3c, 0xbd, 0x3d, 0xbf, 0x20, 0xa1, 0x21, 0xa3, 0x38, 0xb9, 0x39, 0xbb, 0x30, 0xb1, 0x31, 0xb3},
{0x49, 0xc9, 0x48, 0x0b, 0x6d, 0xed, 0x6c, 0x2f, 0x41, 0xc1, 0x40, 0x03, 0x55, 0xd5, 0x54, 0x17},
{0x69, 0xe9, 0x68, 0x2b, 0x4d, 0xcd, 0x4c, 0x0f, 0x59, 0xd9, 0x58, 0x1b, 0x5d, 0xdd, 0x5c, 0x1f},
{0x75, 0xf5, 0x74, 0x37, 0x65, 0xe5, 0x64, 0x27, 0x51, 0xd1, 0x50, 0x13, 0x45, 0xc5, 0x44, 0x07},
{0x7d, 0xfd, 0x7c, 0x3f, 0x61, 0xe1, 0x60, 0x23, 0x79, 0xf9, 0x78, 0x3b, 0x71, 0xf1, 0x70, 0x33}
};
static uint16_t ssb2[16][16] = {
{0x44, 0xc3, 0x47, 0x43, 0x40, 0xc0, 0xc2, 0x42, 0x54, 0xd3, 0x57, 0x53, 0x50, 0xd0, 0xd2, 0x52},
{0x3c, 0xbb, 0x3f, 0x3b, 0x38, 0xb8, 0xba, 0x3a, 0x7c, 0xfb, 0x7f, 0x7b, 0x78, 0xf8, 0xfa, 0x7a},
{0x74, 0xf3, 0x77, 0x73, 0x70, 0xf0, 0xf2, 0x72, 0x64, 0xe3, 0x67, 0x63, 0x60, 0xe0, 0xe2, 0x62},
{0x34, 0xb3, 0x37, 0x33, 0x30, 0xb0, 0xb2, 0x32, 0x14, 0x93, 0x17, 0x13, 0x10, 0x90, 0x92, 0x12},
{0x04, 0x83, 0x07, 0x03, 0x00, 0x80, 0x82, 0x02, 0x4c, 0xcb, 0x4f, 0x4b, 0x48, 0xc8, 0xca, 0x4a},
{0x0c, 0x8b, 0x0f, 0x0b, 0x08, 0x88, 0x8a, 0x0a, 0x5c, 0xdb, 0x5f, 0x5b, 0x58, 0xd8, 0xda, 0x5a},
{0x2c, 0xab, 0x2f, 0x2b, 0x28, 0xa8, 0xaa, 0x2a, 0x6c, 0xeb, 0x6f, 0x6b, 0x68, 0xe8, 0xea, 0x6a},
{0x24, 0xa3, 0x27, 0x23, 0x20, 0xa0, 0xa2, 0x22, 0x1c, 0x9b, 0x1f, 0x1b, 0x18, 0x98, 0x9a, 0x1a},
{0x45, 0xc7, 0x46, 0x41, 0xc4, 0xc5, 0xc6, 0xc1, 0x55, 0xd7, 0x56, 0x51, 0xd4, 0xd5, 0xd6, 0xd1},
{0x3d, 0xbf, 0x3e, 0x39, 0xbc, 0xbd, 0xbe, 0xb9, 0x7d, 0xff, 0x7e, 0x79, 0xfc, 0xfd, 0xfe, 0xf9},
{0x75, 0xf7, 0x76, 0x71, 0xf4, 0xf5, 0xf6, 0xf1, 0x65, 0xe7, 0x66, 0x61, 0xe4, 0xe5, 0xe6, 0xe1},
{0x35, 0xb7, 0x36, 0x31, 0xb4, 0xb5, 0xb6, 0xb1, 0x15, 0x97, 0x16, 0x11, 0x94, 0x95, 0x96, 0x91},
{0x05, 0x87, 0x06, 0x01, 0x84, 0x85, 0x86, 0x81, 0x4d, 0xcf, 0x4e, 0x49, 0xcc, 0xcd, 0xce, 0xc9},
{0x0d, 0x8f, 0x0e, 0x09, 0x8c, 0x8d, 0x8e, 0x89, 0x5d, 0xdf, 0x5e, 0x59, 0xdc, 0xdd, 0xde, 0xd9},
{0x2d, 0xaf, 0x2e, 0x29, 0xac, 0xad, 0xae, 0xa9, 0x6d, 0xef, 0x6e, 0x69, 0xec, 0xed, 0xee, 0xe9},
{0x25, 0xa7, 0x26, 0x21, 0xa4, 0xa5, 0xa6, 0xa1, 0x1d, 0x9f, 0x1e, 0x19, 0x9c, 0x9d, 0x9e, 0x99}
};
static uint16_t ssb3[16][16] = {
{0x22, 0x2b, 0x20, 0x29, 0xa2, 0xab, 0x26, 0x2f, 0x4b, 0x0b, 0x49, 0x09, 0xcb, 0x8b, 0x4f, 0x0f},
{0xb2, 0xbb, 0x34, 0x3d, 0x32, 0x3b, 0x36, 0x3f, 0xdb, 0x9b, 0x5d, 0x1d, 0x5b, 0x1b, 0x5f, 0x1f},
{0x02, 0x43, 0x00, 0x41, 0x82, 0xc3, 0x06, 0x47, 0x42, 0x03, 0x40, 0x01, 0xc2, 0x83, 0x46, 0x07},
{0x92, 0xd3, 0x14, 0x55, 0x12, 0x53, 0x16, 0x57, 0xd2, 0x93, 0x54, 0x15, 0x52, 0x13, 0x56, 0x17},
{0x2a, 0x23, 0x28, 0x21, 0xaa, 0xa3, 0x2e, 0x27, 0x6b, 0x0a, 0x69, 0x08, 0xeb, 0x8a, 0x6f, 0x0e},
{0xba, 0xb3, 0x3c, 0x35, 0x3a, 0x33, 0x3e, 0x37, 0xfb, 0x9a, 0x7d, 0x1c, 0x7b, 0x1a, 0x7f, 0x1e},
{0x62, 0x63, 0x60, 0x61, 0xe2, 0xe3, 0x66, 0x67, 0x6a, 0x4a, 0x68, 0x48, 0xea, 0xca, 0x6e, 0x4e},
{0xf2, 0xf3, 0x74, 0x75, 0x72, 0x73, 0x76, 0x77, 0xfa, 0xda, 0x7c, 0x5c, 0x7a, 0x5a, 0x7e, 0x5e},
{0xb4, 0xbd, 0x24, 0x2d, 0xb6, 0xbf, 0xa6, 0xaf, 0xdd, 0x9d, 0x4d, 0x0d, 0xdf, 0x9f, 0xcf, 0x8f},
{0xb0, 0xb9, 0x30, 0x39, 0xa0, 0xa9, 0xa4, 0xad, 0xd9, 0x99, 0x59, 0x19, 0xc9, 0x89, 0xcd, 0x8d},
{0x94, 0xd5, 0x04, 0x45, 0x96, 0xd7, 0x86, 0xc7, 0xd4, 0x95, 0x44, 0x05, 0xd6, 0x97, 0xc6, 0x87},
{0x90, 0xd1, 0x10, 0x51, 0x80, 0xc1, 0x84, 0xc5, 0xd0, 0x91, 0x50, 0x11, 0xc0, 0x81, 0xc4, 0x85},
{0xbc, 0xb5, 0x2c, 0x25, 0xbe, 0xb7, 0xae, 0xa7, 0xfd, 0x9c, 0x6d, 0x0c, 0xff, 0x9e, 0xef, 0x8e},
{0xb8, 0xb1, 0x38, 0x31, 0xa8, 0xa1, 0xac, 0xa5, 0xf9, 0x98, 0x79, 0x18, 0xe9, 0x88, 0xed, 0x8c},
{0xf4, 0xf5, 0x64, 0x65, 0xf6, 0xf7, 0xe6, 0xe7, 0xfc, 0xdc, 0x6c, 0x4c, 0xfe, 0xde, 0xee, 0xce},
{0xf0, 0xf1, 0x70, 0x71, 0xe0, 0xe1, 0xe4, 0xe5, 0xf8, 0xd8, 0x78, 0x58, 0xe8, 0xc8, 0xec, 0xcc}
};
your code contains Sb1, where is Sb0? according to original paper, there are 2 smaller 4-bits boxes to construct 8-bit sbox.
in void Encrypt(int r, uint16_t *plaintext, uint16_t *K, uint16_t *ciphertext) , you only need to xor key with plaintext to get the cipher text not to copy then xoring , just use one for loop
use this link as benchmark for your development code
Related
NRF24L01 is not sending valid data
I have a problem with NRF24L01 I'm building a weather station and I ran into a problem, I can't send the correct value from each sensor to the receiver. The values in the char are sending correctly but the float value is not, I keep getting the value 656677.37 Here is the transmitter code //DallasTemperature #include <OneWire.h> #include <DallasTemperature.h> //DHT11 #include <DHT.h> //BMP280 #include <Wire.h> #include "i2c.h" #include "i2c_BMP280.h" #include <MQ135.h> //NRF24l01 pn ln #include <SPI.h> #include <nRF24L01.h> #include <RF24.h> //============================================[Pin Definitions]============================================= #define MQ135_PIN A1 #define DHT_PIN 2 #define DHTTYPE DHT11 #define ONE_WIRE_BUS 3 #define LDR_PIN A0 #define RAIN_PIN A2 //============================================[Global defines]============================================= OneWire oneWire(ONE_WIRE_BUS); DallasTemperature sensors(&oneWire); DHT dht(DHT_PIN, DHTTYPE); BMP280 bmp280; MQ135 mq135_sensor(MQ135_PIN); RF24 radio(9, 10); // CE, CSN //============================================[Global variables]============================================= float temp_DALLAS; float humidity_DHT; float temperature_DHT; float hic; float temperature_BMP; float pascal_BMP; float rzero_MQ135; float correctedRZero_MQ135; float resistance_MQ135; float ppm_MQ135; float correctedPPM_MQ135; #define MAX_ADC_READING 1023 #define ADC_REF_VOLTAGE 5.0 #define REF_RESISTANCE 5030 // measure this for best results #define LUX_CALC_SCALAR 12518931 #define LUX_CALC_EXPONENT -1.405 int ldrRawData; float resistorVoltage, ldrVoltage; float ldrResistance; float ldrLux; int rainRawData; const byte address[6] = "00001"; #define nodechar 20 #define typechar 20 //#define valuechar 20 //#define unitchar 20 struct Template { char node[nodechar]; char type[typechar]; //char value[valuechar] = "\0"; //char unit[unitchar]; float value; };Template myStruct; unsigned int h_old = 0; unsigned int h_new = 0; unsigned int t_old = 0; unsigned int t_new = 0; unsigned int hic_old = 0; unsigned int hic_new = 0; unsigned int pbmp_old = 0; unsigned int pbmp_new = 0; unsigned int tbmp_old = 0; unsigned int tbmp_new = 0; unsigned int rzMQ135_old = 0; unsigned int rzMQ135_new = 0; unsigned int crzMQ135_old = 0; unsigned int crzMQ135_new = 0; unsigned int rMQ135_old = 0; unsigned int rMQ135_new = 0; unsigned int ppmMQ135_old = 0; unsigned int ppmMQ135_new = 0; unsigned int cppmMQ135_old = 0; unsigned int cppmMQ135_new = 0; unsigned int tdallas_old = 0; unsigned int tdallas_new = 0; unsigned int ldr_old = 0; unsigned int ldr_new = 0; unsigned int rain_old = 0; unsigned int rain_new = 0; void setup(){ Serial.begin(115200); dht.begin(); Serial.print("Probe BMP280: "); if(bmp280.initialize()) Serial.println("Sensor found"); else Serial.println("Sensor missing"); bmp280.setEnabled(0); bmp280.triggerMeasurement(); Serial.print("Locating devices..."); sensors.begin(); Serial.print("Found "); Serial.print(sensors.getDeviceCount(), DEC); Serial.println(" devices."); if (!radio.begin()) Serial.println(F("radio hardware is not responding!!")); else Serial.println(F("radio hardware is responding!!")); radio.setPALevel(RF24_PA_MAX); //radio.setDataRate(RF24_250KBPS); radio.openWritingPipe(address); radio.setPALevel(RF24_PA_MIN); radio.stopListening(); String stringOne2 = "1;Meteo"; stringOne2.toCharArray(myStruct.node, nodechar); } void loop(){ humidity_DHT = dht.readHumidity(); temperature_DHT = dht.readTemperature(); hic = dht.computeHeatIndex(temperature_DHT, humidity_DHT, false); bmp280.awaitMeasurement(); bmp280.getTemperature(temperature_BMP); bmp280.getPressure(pascal_BMP); static float meters, metersold; bmp280.getAltitude(meters); metersold = (metersold * 10 + meters)/11; bmp280.triggerMeasurement(); rzero_MQ135 = mq135_sensor.getRZero(); correctedRZero_MQ135 = mq135_sensor.getCorrectedRZero(temperature_DHT, humidity_DHT); resistance_MQ135 = mq135_sensor.getResistance(); ppm_MQ135 = mq135_sensor.getPPM(); correctedPPM_MQ135 = mq135_sensor.getCorrectedPPM(temperature_DHT, humidity_DHT); sensors.requestTemperatures(); // method 2 - faster temp_DALLAS = sensors.getTempCByIndex(0); ldrRawData = analogRead(LDR_PIN); resistorVoltage = (float)ldrRawData / MAX_ADC_READING * ADC_REF_VOLTAGE; ldrVoltage = ADC_REF_VOLTAGE - resistorVoltage; ldrResistance = ldrVoltage/resistorVoltage * REF_RESISTANCE; ldrLux = LUX_CALC_SCALAR * pow(ldrResistance, LUX_CALC_EXPONENT); rainRawData = analogRead(RAIN_PIN); rainRawData = 1024 - rainRawData; /*Serial.print(F("Humidity: ")); Serial.print(humidity_DHT); Serial.print(F("% Temperature: ")); Serial.print(temperature_DHT); Serial.print(F("°C ")); Serial.print(F(" Heat index: ")); Serial.print(hic); Serial.print(F("°C ")); Serial.print(" HeightPT1: "); Serial.print(metersold); Serial.print(" m; Height: "); Serial.print(meters); Serial.print(" Pressure: "); Serial.print(pascal_BMP); Serial.print(" Pa; T: "); Serial.print(temperature_BMP); Serial.print(" C "); Serial.print("MQ135 RZero: "); Serial.print(rzero_MQ135); Serial.print(" Corrected RZero: "); Serial.print(correctedRZero_MQ135); Serial.print(" Resistance: "); Serial.print(resistance_MQ135); Serial.print(" PPM: "); Serial.print(ppm_MQ135); Serial.print("ppm"); Serial.print(" Corrected PPM: "); Serial.print(correctedPPM_MQ135); Serial.print("ppm "); Serial.print("Temp C: "); Serial.print(temp_DALLAS); Serial.print(" LDR Raw Data: "); Serial.print(ldrRawData); Serial.print(" LDR Voltage: "); Serial.print(ldrVoltage); Serial.print(" volts LDR Resistance: "); Serial.print(ldrResistance); Serial.print(" Ohms LDR Illuminance: "); Serial.print(ldrLux); Serial.print(" lux Rain:"); Serial.println(rainRawData);*/ h_new = humidity_DHT; if(h_new != h_old){ //myStruct.type = 1; String stringOne = "HUMIDITY_DHT"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "%"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = h_new; radio.write(&myStruct, sizeof(myStruct)); h_old = h_new; } t_new = temperature_DHT; if(t_new != t_old){ // myStruct.type = 2; String stringOne = "TEMP_DHT"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "°C"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = t_new; radio.write(&myStruct, sizeof(myStruct)); t_old = t_new; } hic_new = hic; if(hic_new != hic_old){ //myStruct.type = 3; String stringOne = "HIC_DHT"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "°C"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = hic_new; radio.write(&myStruct, sizeof(myStruct)); hic_old = hic_new; } pbmp_new = pascal_BMP; if(pbmp_new != pbmp_old){ //myStruct.type = 4; String stringOne = "PASCAL_BMP"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "Pa"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = pbmp_new; radio.write(&myStruct, sizeof(myStruct)); pbmp_old = pbmp_new; } tbmp_new = temperature_BMP; if(tbmp_new != tbmp_old){ //myStruct.type = 5; String stringOne = "TEMP_BMP"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "°C"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = tbmp_new; radio.write(&myStruct, sizeof(myStruct)); tbmp_old = tbmp_new; } rzMQ135_new = rzero_MQ135; if(rzMQ135_new != rzMQ135_old){ // myStruct.type = 6; String stringOne = "RZ_MQ135"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "raw"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = rzMQ135_new; radio.write(&myStruct, sizeof(myStruct)); rzMQ135_old = rzMQ135_new; } crzMQ135_new = correctedRZero_MQ135; if(crzMQ135_new != crzMQ135_old){ //myStruct.type = 7; String stringOne = "CRZ_MQ135"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "raw"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = crzMQ135_new; radio.write(&myStruct, sizeof(myStruct)); crzMQ135_old = crzMQ135_new; } rMQ135_new = resistance_MQ135; if(rMQ135_new != rMQ135_old){ //myStruct.type = 8; String stringOne = "R_MQ135"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "R"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = rMQ135_new; radio.write(&myStruct, sizeof(myStruct)); rMQ135_old = rMQ135_new; } ppmMQ135_new = ppm_MQ135; if(ppmMQ135_new != ppmMQ135_old){ //myStruct.type = 9; String stringOne = "PPM_MQ135"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "ppm"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = ppmMQ135_new; radio.write(&myStruct, sizeof(myStruct)); ppmMQ135_old = ppmMQ135_new; } cppmMQ135_new = correctedPPM_MQ135; if(cppmMQ135_new != cppmMQ135_old){ //myStruct.type = 10; String stringOne = "CPPM_MQ135"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "ppm"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = cppmMQ135_new; radio.write(&myStruct, sizeof(myStruct)); cppmMQ135_old = cppmMQ135_new; } tdallas_new = temp_DALLAS; if(tdallas_new != tdallas_old){ //myStruct.type = 10; String stringOne = "Temp_Dallas"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "°C"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = tdallas_new; radio.write(&myStruct, sizeof(myStruct)); tdallas_old = tdallas_new; } ldr_new = ldrLux; if(ldr_new != ldr_old){ //myStruct.type = 11; String stringOne = "LDR"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "lux"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = ldr_new; radio.write(&myStruct, sizeof(myStruct)); ldr_old = ldr_new; } rain_new = rainRawData; if(rain_new != rain_old){ // myStruct.type = 12; String stringOne = "RAIN"; stringOne.toCharArray(myStruct.type, typechar); /*String stringOne2 = "%"; stringOne2.toCharArray(myStruct.unit, unitchar);*/ myStruct.value = rain_new; radio.write(&myStruct, sizeof(myStruct)); rain_old = rain_new; } delay(500); } Here is the receiver code #include <SPI.h> #include <nRF24L01.h> #include <RF24.h> #include <Wire.h> #include <Adafruit_GFX.h> #include <Adafruit_SSD1306.h> #define WIRE Wire Adafruit_SSD1306 display = Adafruit_SSD1306(128, 32, &WIRE); RF24 radio(9,10); // CE, CSN const byte address[6] = "00001"; #define nodechar 20 #define typechar 20 //#define unitchar 20 //#define valuechar 20 struct MyStruct{ char node[nodechar]; char type[typechar]; //char unit[unitchar]; float value; }; void setup(){ Serial.begin(115200); Serial.println("OLED FeatherWing test"); // SSD1306_SWITCHCAPVCC = generate display voltage from 3.3V internally display.begin(SSD1306_SWITCHCAPVCC, 0x3C); if(!radio.begin()) Serial.println(F("radio hardware is not responding!!")); else Serial.println(F("radio hardware is responding!!")); radio.openReadingPipe(0, address); radio.setPALevel(RF24_PA_MAX); radio.startListening(); display.display(); // Clear the buffer. display.clearDisplay(); display.display(); } void loop(){ display.clearDisplay(); display.setTextSize(1); display.setTextColor(SSD1306_WHITE); display.setCursor(0,0); display.print("IP: 10.0.0.243\n"); if(radio.available()){ MyStruct myStruct; radio.read(&myStruct, sizeof(myStruct)); //char text[32] = ""; //radio.read(&text, sizeof(text)); Serial.print(myStruct.node); Serial.print(" - "); Serial.print(myStruct.type); Serial.print(" - "); Serial.println(myStruct.value); //Serial.print(" - "); //Serial.println(myStruct.unit); //display.clearDisplay(); //display.setTextSize(1); //display.setTextColor(SSD1306_WHITE); display.setCursor(1,1); display.print("\nID: "); display.print(myStruct.node); display.print("\nType:"); display.print(myStruct.type); display.print("\nValue:"); display.println(myStruct.value); //display.println("Sending val #0"); //display.setCursor(0,0); display.display(); // actually display all of the above } } I can't send the correct float value, without sending data I get the correct values from the sensors
maybe struct alignment problem : try this on each side : struct __attribute__((__packed__)) MyStruct{ char node[nodechar]; char type[typechar]; //char unit[unitchar]; float value; };
Convert RDS into R file
I had a .rds file code which im told it is a file with R codes. I tried to access it by using readRDS and is able to view the output. May I know how can I convert the .rds into .r file or any editable format so that I could make some edits and view the codes? I have tried looking for solutions but I could not find any related ones. Link to the .rds file https://drive.google.com/file/d/1SGgKA1ejkF7_uq_27E6Qpaq_fdAcDL8O/view?usp=sharing
Assuming the rds file contains R code, rather than data (which it can, because rds format can save any object including functions and expressions)... then, we can use dput to export the contents into a human readable format. Here's a simple example: hello = function() { print("Hello world") } saveRDS(hello, 'hello.rds') dput(readRDS('hello.rds'), file='hello.r') The file hello.r now contains the following: function () { print("Hello world") } If we do the same thing with your original file (i.e., dput(readRDS('Fundamental_Model.rds'), file='hello.r')), we get: list(Fundamental_Model = structure(list(handle = <pointer: (nil)>, raw = as.raw(c(0x00, 0x00, 0x00, 0x3f, 0x0b, 0x00, 0x00, # ... many lines omitted 0x00, 0x00, 0x00, 0x6e, 0x69, 0x74, 0x65, 0x72, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x32, 0x38)), niter = 129, call = xgb.train(data = df_train, nrounds = best.nrounds, paras = xgb.params), params = list(paras = list(colsample_bytree = 0.85, subsample = 0.85, booster = "gbtree", max_depth = 6, eta = 0.03, eval_metric = "rmse", objective = "reg:linear", gamma = 0), silent = 1), callbacks = list(cb.print.evaluation = structure(function (env = parent.frame()) { if (length(env$bst_evaluation) == 0 || period == 0 || NVL(env$rank, 0) != 0) return() i <- env$iteration if ((i - 1)%%period == 0 || i == env$begin_iteration || i == env$end_iteration) { stdev <- if (showsd) env$bst_evaluation_err else NULL msg <- format.eval.string(i, env$bst_evaluation, stdev) cat(msg, "\n") } }, call = cb.print.evaluation(period = print_every_n), name = "cb.print.evaluation")), feature_names = c("X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", "X16"), nfeatures = 11L), class = "xgb.Booster"))
Ascon-128 in arduino
I have tried implementing the Ascon-128 encryption algorithm in Arduino https://ascon.iaik.tugraz.at. The compiler doesn't find any mistakes, but when I run the code the monitor doesn't return anything. Not even the Serial.println(ciphertext[0]); gets printed. Do you see any mistakes in my code? Thank you #include <stdio.h> typedef unsigned long long bit64; bit64 constants[16] = {0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69 ,0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f }; bit64 state[5] = {0}, t[5] = { 0 }; void setup() { Serial.begin(9600); } void loop() { // put your main code here, to run repeatedly: main(); delay(500); } bit64 rotate(bit64 x, int l){ bit64 temp; temp = (x >> l) ^ (x << (64 - l)); return temp; } void print_state(bit64 state[5]){ for (int i = 0; i < 5; i++) printf("%llx\n", state[i]); } void sbox (bit64 x[5]) { x[0] ^= x[4]; x[4] ^= x[3]; x[2] ^= x[1]; t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; t[4] = x[4]; t[0] = ~t[0]; t[1] = ~t[1]; t[2] = ~t[2]; t[3] = ~t[3]; t[4] = ~t[4]; t[0] &= x[1]; t[1] &= x[2]; t[2] &= x[3]; t[3] &= x[4]; t[4] &= x[0]; x[0] ^= t[1]; x[1] ^= t[2]; x[2] ^= t[3]; x[3] ^= t[4]; x[4] ^= t[0]; x[1] ^= x[0]; x[0] ^= x[4]; x[3] ^= x[2]; x[2] = ~x[2]; } void linear(bit64 state[5]) { bit64 temp0, temp1; temp0 = rotate(state[0], 19); temp1 = rotate(state[0], 28); state[0] ^= temp0 ^temp1; temp0 = rotate(state[1], 61); temp1 = rotate(state[1], 39); state[1] ^= temp0 ^temp1; temp0 = rotate(state[2], 1); temp1 = rotate(state[2], 6); state[2] ^= temp0 ^temp1; temp0 = rotate(state[3], 10); temp1 = rotate(state[3], 17); state[3] ^= temp0 ^temp1; temp0 = rotate(state[4], 7); temp1 = rotate(state[4], 41); state[4] ^= temp0 ^temp1; } void add_constant(bit64 state[5], int i, int a) { state[2] = state[2] ^ constants[12- a + i]; } void p(bit64 state[5], int a) { for (int i = 0; i < a; i++){ add_constant(state, i, a); sbox(state); linear(state); } } void initialization(bit64 state[5], bit64 key[2]) { p(state, 12); state[3] ^= key[0]; state[4] ^= key[1]; } void encrypt(bit64 state[5], int length, bit64 plaintext[], bit64 ciphertext[]) { ciphertext[0] = plaintext[0] ^ state[0]; for (int i = 1; i < length; i++){ p(state, 6); ciphertext[i] = plaintext[i] ^ state[0]; state[0] = ciphertext[i]; } } void finalization(bit64 state[5], bit64 key[2]){ state[3] ^= key[0]; state[4] ^= key[1]; p(state, 12); // bijgevoegd state[3] ^= key[0]; state[4] ^= key[1]; } int main() { bit64 nonce[2] = { 2000 }; bit64 key[2] = { 3000 }; bit64 IV = 0x80400c060000000; bit64 to_encode = 0x82187; bit64 plaintext [] = { 0x1234567890abcdef, to_encode }, ciphertext[10] = { 0 }; state[0] = IV; state[1] = key[0]; state[2] = key[1]; state[3] = nonce[0]; state[4] = key[1]; initialization(state, key); print_state(state); encrypt(state, 2, plaintext, ciphertext); Serial.println(ciphertext[0]); Serial.println(ciphertext[1]); finalization(state, key); Serial.println(state[3]); Serial.println(state[4]); return 0; }
How can i use opencl in loop of host program?
I want to write the input image into the buffer and execute the kernel file in the while loop of the host program (not in the kernel file). I did set OpenCL stuff (context, device, buffer, command queue, etc) out of the while loop, write the image to the buffer I created in the loop, and run the kernel. It works on my desktop, but when i executed it on i.mx6 board, it returns segmentation fault error in second while loop. The code is below. while (count < 10) { unsigned char* input = (unsigned char*)inImg.data; start = clock(); unsigned char* output = (unsigned char*)malloc(width * height * sizeof(unsigned char)); size_t localWorkSize[2] = { WGX, WGY }; size_t globalWorkSize[2] = { RoundUp(WGX, width), RoundUp(WGY, height) }; cl_mem cl_inImg = clCreateBuffer(context, CL_MEM_READ_ONLY, width * height * sizeof(unsigned char) * inImg.channels(), NULL, &ret); /*error check*/ cl_mem cl_outImg = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(unsigned char) * inImg.channels(), NULL, &ret); /*error check*/ int filterSize = 5; int paddingSize = (int)(filterSize / 2) * 2; int localWidth = WGX + paddingSize; int localHeight = WGY + paddingSize; size_t localMemSize = (localWidth * localHeight * sizeof(unsigned char) * inImg.channels()); clEnqueueWriteBuffer(queue, cl_inImg, CL_TRUE, 0, width * height * sizeof(unsigned char) * inImg.channels(), input, 0, NULL, NULL); cl_int errNum = 0; errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_inImg); errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_outImg); errNum |= clSetKernelArg(kernel, 2, localMemSize, NULL); errNum |= clSetKernelArg(kernel, 3, sizeof(int), &filterSize); errNum |= clSetKernelArg(kernel, 4, sizeof(int), &localWidth); errNum |= clSetKernelArg(kernel, 5, sizeof(int), &localHeight); errNum |= clSetKernelArg(kernel, 6, sizeof(int), &width); errNum |= clSetKernelArg(kernel, 7, sizeof(int), &height); /*error check*/ errNum = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); /*error check*/ clFinish(queue); errNum = clEnqueueReadBuffer(queue, cl_outImg, CL_TRUE, 0, width * height * sizeof(unsigned char) * inImg.channels(), output, 0, NULL, NULL); /*error check*/ } I can't find where i did incorrectly, because it works normally on my PC. How should i fix this?
For loop in OpenCl kernel rolling through global memory float array
I feel I don't understand a basic parallel programming concept. The kernel below is a simple/contrived example that reproduces the problem I'm having. It attempts to use all the values in "points" to calculate a value and assign it to all of the items in "blocks." I want to push the limits for the size of these arrays. While I can make the "blocks" array quit large (>100 million floats), I get an "invalid command queue" error when "points" is filled with more than ~100 thousand floats (after calling clFinish immediately after clEnqueueNDRangeKernel). Could any of you help me understand why? __kernel void openClTesting (__global float *blocks, __global float *points, int pointsCount) { int globalId = get_global_id(0); int count = 0; for (int i = 0; i < pointsCount; i++) { count++; } blocks[globalId] = count; }; Some Device Info: CL_DEVICE_LOCAL_MEM_SIZE = 49,152 CL_DEVICE_GLOBAL_MEM_SIZE = 2,147,483,648 CL_DEVICE_MAX_MEM_ALLOC_SIZE = 536,870,912 Host Code: #include "stdafx.h" #include "CL\opencl.h" #include <iostream> #include <fstream> #include <string> #include <stddef.h> #include <stdlib.h> #include <stdio.h> #define NUM_POINTS 100000 #define NUM_BLOCKS 100000000 struct openClData { cl_device_id deviceId = NULL; cl_uint numDevices; cl_uint numPlatforms; cl_int ret; cl_platform_id *platforms = NULL; cl_context context; cl_command_queue commandQueue; cl_program program; cl_kernel kernel; char* kernelCode; cl_uint kernelCodeSize; size_t globalItemSize; size_t localItemSize = 1; }; char* getKernelCode(); void printErrorLog(openClData oclData); void printRet(openClData oclData, int line); int countFileChars(const char *fileName); int _tmain(int argc, _TCHAR* argv[]) { openClData oclData; oclData.globalItemSize = NUM_POINTS; oclData.kernelCode = getKernelCode(); std::cout << oclData.kernelCode << std::endl; oclData.kernelCodeSize = strlen(oclData.kernelCode); int numPoints = NUM_POINTS; int numBlocks = NUM_BLOCKS; cl_long localMemSize = 0, globalMemSize = 0, maxAllocMemSize = 0; float *blocks = new float[numBlocks]{0}; float *points = new float[numPoints]{0}; //prepare platform, device, context and command queue oclData.ret = clGetPlatformIDs(0, NULL, &oclData.numPlatforms); printRet(oclData, __LINE__); oclData.platforms = (cl_platform_id *)malloc(oclData.numPlatforms * sizeof(cl_platform_id)); oclData.ret = clGetPlatformIDs(oclData.numPlatforms, oclData.platforms, NULL); printRet(oclData, __LINE__); oclData.ret = clGetDeviceIDs(oclData.platforms[0], CL_DEVICE_TYPE_GPU, 1, &oclData.deviceId, &oclData.numDevices); printRet(oclData, __LINE__); oclData.context = clCreateContext(NULL, 1, &oclData.deviceId, NULL, NULL, &oclData.ret); printRet(oclData, __LINE__); oclData.commandQueue = clCreateCommandQueue(oclData.context, oclData.deviceId, 0, &oclData.ret); printRet(oclData, __LINE__); //prepare cl_mem objects cl_mem memObjBlocks = clCreateBuffer(oclData.context, CL_MEM_READ_WRITE, sizeof(float) * numBlocks, NULL, &oclData.ret); printRet(oclData, __LINE__); cl_mem memObjPoints = clCreateBuffer(oclData.context, CL_MEM_READ_WRITE, sizeof(float) * numPoints, NULL, &oclData.ret); printRet(oclData, __LINE__); oclData.ret = clEnqueueWriteBuffer(oclData.commandQueue, memObjBlocks, CL_TRUE, 0, sizeof(float) * numBlocks, blocks, 0, NULL, NULL); printRet(oclData, __LINE__); oclData.ret = clEnqueueWriteBuffer(oclData.commandQueue, memObjPoints, CL_TRUE, 0, sizeof(float) * numPoints, points, 0, NULL, NULL); printRet(oclData, __LINE__); //prepare program oclData.program = clCreateProgramWithSource(oclData.context, 1, (const char**)&oclData.kernelCode, (const size_t *)&oclData.kernelCodeSize, &oclData.ret); printRet(oclData, __LINE__); oclData.ret = clBuildProgram(oclData.program, 1, &oclData.deviceId, NULL, NULL, NULL); printRet(oclData, __LINE__); if (oclData.ret == CL_BUILD_PROGRAM_FAILURE) printErrorLog(oclData); oclData.kernel = clCreateKernel(oclData.program, "openClTesting", &oclData.ret); printRet(oclData, __LINE__); //set arguments oclData.ret = clSetKernelArg(oclData.kernel, 0, sizeof(cl_mem), &memObjBlocks); printRet(oclData, __LINE__); oclData.ret = clSetKernelArg(oclData.kernel, 1, sizeof(cl_mem), &memObjPoints); printRet(oclData, __LINE__); oclData.ret = clSetKernelArg(oclData.kernel, 2, sizeof(int), &numPoints); printRet(oclData, __LINE__); //run oclData.ret = clEnqueueNDRangeKernel(oclData.commandQueue, oclData.kernel, 1, NULL, &oclData.globalItemSize, &oclData.localItemSize, 0, NULL, NULL); printRet(oclData, __LINE__); oclData.ret = clFinish(oclData.commandQueue); printRet(oclData, __LINE__); oclData.ret = clEnqueueReadBuffer(oclData.commandQueue, memObjBlocks, CL_TRUE, 0, sizeof(float) * numBlocks, blocks, 0, NULL, NULL); printRet(oclData, __LINE__); oclData.ret = clFinish(oclData.commandQueue); printRet(oclData, __LINE__); //print some device info oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemSize, 0); std::cout << "CL_DEVICE_LOCAL_MEM_SIZE = " << localMemSize << '\n'; oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_long), &globalMemSize, 0); std::cout << "CL_DEVICE_GLOBAL_MEM_SIZE = " << globalMemSize << '\n'; oclData.ret = clGetDeviceInfo(oclData.deviceId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_long), &maxAllocMemSize, 0); std::cout << "CL_DEVICE_MAX_MEM_ALLOC_SIZE = " << maxAllocMemSize << '\n'; //clean up oclData.ret = clFlush(oclData.commandQueue); printRet(oclData, __LINE__); oclData.ret = clFinish(oclData.commandQueue); printRet(oclData, __LINE__); oclData.ret = clReleaseKernel(oclData.kernel); printRet(oclData, __LINE__); oclData.ret = clReleaseProgram(oclData.program); printRet(oclData, __LINE__); oclData.ret = clReleaseMemObject(memObjBlocks); printRet(oclData, __LINE__); oclData.ret = clReleaseMemObject(memObjPoints); printRet(oclData, __LINE__); oclData.ret = clReleaseCommandQueue(oclData.commandQueue); printRet(oclData, __LINE__); oclData.ret = clReleaseContext(oclData.context); printRet(oclData, __LINE__); for (size_t i = 0; i < 10; i++) { std::cout << blocks[i] << std::endl; } delete blocks; delete points; return 0; } char* getKernelCode() { char* kernelCode = "__kernel void openClTesting (__global float *blocks, __global float *points, int pointsCount)" "{" " int globalId = get_global_id(0);" " int count = 0;" " for (int i = 0; i < pointsCount; i++)" " {" " count++;" " }" "blocks[globalId] = count;" "}"; return kernelCode; } void printErrorLog(openClData oclData) { size_t log_size; clGetProgramBuildInfo(oclData.program, oclData.deviceId, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); char *log = (char *)malloc(log_size); clGetProgramBuildInfo(oclData.program, oclData.deviceId, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); std::cout << log; free(log); } void printRet(openClData oclData, int line) { std::cout << line << ", " << oclData.ret << std::endl; } int countFileChars(const char *fileName) { std::ifstream ifs(fileName); ifs.seekg(0, std::ios_base::end); size_t count = ifs.tellg(); ifs.seekg(0, std::ios_base::beg); return count; }
A few things I notice: You're launching NUM_POINTS work-items, but write the result of each to blocks[globalId] - which has NUM_BLOCKS items. So that's undefined behaviour when NUM_POINTS is greater than NUM_BLOCKS. It also explains why varying NUM_BLOCKS does nothing (outside of the above restriction): aside from the memory allocation, the value of NUM_BLOCKS has no effect. (And the memory allocation limit you found roughly matches the CL_DEVICE_MAX_MEM_ALLOC_SIZE value for your implementation.) You might be running into a kernel timeout condition here. 100000 loop iterations in a single work-item is quite a lot. Depending on the OpenCL implementation, kernels can be killed off if they take too long to run. Consider making better use of the thread-parallelism available, and split the work more horizontally across work-items, rather than looping. Many, shortish-running work-items are typically better than few long-running ones.
On a general note, localItemSize = 1; should be avoided, because it forces every OpenCL work group to consist of a single work item which will reduce you parallelism to the number of work groups that your compute device can run in parallel, which will be much less than the number of work items it can run. You can simply pass NULL for the local item size instead to have the OpenCL implementation figure out a reasonable value in its own: clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, NULL, 0, NULL, NULL); This may also be the source of your error because you are creating NUM_POINTS work groups but the size of the queue on the device is memory limited (CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE).