Convert RDS into R file - r

I had a .rds file code which im told it is a file with R codes. I tried to access it by using readRDS and is able to view the output. May I know how can I convert the .rds into .r file or any editable format so that I could make some edits and view the codes? I have tried looking for solutions but I could not find any related ones.
Link to the .rds file https://drive.google.com/file/d/1SGgKA1ejkF7_uq_27E6Qpaq_fdAcDL8O/view?usp=sharing

Assuming the rds file contains R code, rather than data (which it can, because rds format can save any object including functions and expressions)... then, we can use dput to export the contents into a human readable format.
Here's a simple example:
hello = function() {
print("Hello world")
}
saveRDS(hello, 'hello.rds')
dput(readRDS('hello.rds'), file='hello.r')
The file hello.r now contains the following:
function ()
{
print("Hello world")
}
If we do the same thing with your original file (i.e., dput(readRDS('Fundamental_Model.rds'), file='hello.r')), we get:
list(Fundamental_Model = structure(list(handle = <pointer: (nil)>,
raw = as.raw(c(0x00, 0x00, 0x00, 0x3f, 0x0b, 0x00, 0x00,
# ... many lines omitted
0x00, 0x00, 0x00, 0x6e, 0x69, 0x74, 0x65, 0x72, 0x03, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x32, 0x38)), niter = 129,
call = xgb.train(data = df_train, nrounds = best.nrounds,
paras = xgb.params), params = list(paras = list(colsample_bytree = 0.85,
subsample = 0.85, booster = "gbtree", max_depth = 6,
eta = 0.03, eval_metric = "rmse", objective = "reg:linear",
gamma = 0), silent = 1), callbacks = list(cb.print.evaluation = structure(function (env = parent.frame())
{
if (length(env$bst_evaluation) == 0 || period == 0 ||
NVL(env$rank, 0) != 0)
return()
i <- env$iteration
if ((i - 1)%%period == 0 || i == env$begin_iteration ||
i == env$end_iteration) {
stdev <- if (showsd)
env$bst_evaluation_err
else NULL
msg <- format.eval.string(i, env$bst_evaluation,
stdev)
cat(msg, "\n")
}
}, call = cb.print.evaluation(period = print_every_n), name = "cb.print.evaluation")),
feature_names = c("X6", "X7", "X8", "X9", "X10", "X11", "X12",
"X13", "X14", "X15", "X16"), nfeatures = 11L), class = "xgb.Booster"))

Related

Special charecters on non english keyboard

I needed some special charecters to be printed on cmd with Keyboard.h, but i dont have a English layout keyboard,ive got a Estoninan layout because im from Estonia. Lets get to the point!
Ive triend every special key but i havent got the \ key yet, anyone knows how can i get it?
Board used: Arduino Pro Micro
Currently got special charecters:
! = !
~ = ~~
" = Ä
# = #
¤ = +15708
% = %
& = /
/ = -
( = )
) = =
= = ´´
? = _
Õ = +15467
Ü = +15460
Ä = +15484
Ö = +15466
* = (
_ = ?
: = Ö
; = ö
> = :
# = "
£ = +15709
$ = ¤
€ = +32084
{ = Ü
[ = ü
] = õ
} = Õ
\ = '
§ = +15705
½ = +15683
ž = +14914
| = *
< = ;
š = +14943
ˇ = +13433
´ = +15692
+ = `
` = ˇ
ˇ = +13433
' = ä
ü = +15428
õ = +15435
ö = +15434
ä = +15452
- = +
. = .
, = ,
Keyboard.press(KEY_LEFT_ALT);
Keyboard.press(KEY_LEFT_CTRL);
Keyboard.press('_');
to get Backslash

Midori128 incorrect ciphertext

I have a project in which I must implement a variety of Security Protocols for my Arduino Uno R3, such as Present, Misty, Prince, and so on. At the moment I am attempting to implement Midori128. I managed to get Midiori64 to work properly, but for Midori128 the encryption is incorrect, while the decryption works properly.
This is the code at the moment, bear in mind I pulled this code from GitHub, and then change it around to get it to work on Arduino as efficiently as possible; but like I said, the encryption is incorrect, and I don't know why.
#include <stdio.h>
#include <stdint.h>
static uint16_t s_box[16] = { 0x1,0x0,0x5,0x3,0xe,0x2,0xf,0x7,0xd,0xa,0x9,0xb,0xc,0x8,0x4,0x6 };
static uint16_t const_key[19][16] = { { 0,0,0,1,0,1,0,1,1,0,1,1,0,0,1,1 },{ 0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0 },
{ 1,0,1,0,0,1,0,0,0,0,1,1,0,1,0,1 },{ 0,1,1,0,0,0,1,0,0,0,0,1,0,0,1,1 },
{ 0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1 },{ 1,1,0,1,0,0,0,1,0,1,1,1,0,0,0,0 },
{ 0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0 },{ 0,0,0,0,1,0,1,1,1,1,0,0,1,1,0,0 },
{ 1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1 },{ 0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,0 },
{ 0,1,1,1,0,0,0,1,1,0,0,1,0,1,1,1 },{ 0,0,1,0,0,0,1,0,1,0,0,0,1,1,1,0 },
{ 0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0 },{ 1,1,1,1,1,0,0,0,1,1,0,0,1,0,1,0 },
{ 1,1,0,1,1,1,1,1,1,0,0,1,0,0,0,0 },{ 0,1,1,1,1,1,0,0,1,0,0,0,0,0,0,1 },
{ 0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0 },{ 0,0,1,0,0,0,1,1,1,0,1,1,0,1,0,0 },
{0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,0} };
uint16_t bit_permutation_0(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x08) << 4), temp ^= ((x & 0x40)), temp ^= ((x & 0x02) << 4), temp ^= (x & 0x10),
temp ^= ((x & 0x80) >> 4), temp ^= ((x & 0x04)), temp ^= ((x & 0x20) >> 4), temp ^= (x & 0x01);
return temp;
}
uint16_t bit_permutation_1(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x40) << 1), temp ^= ((x & 0x02) << 5), temp ^= ((x & 0x01) << 5), temp ^= ((x & 0x80) >> 3),
temp ^= ((x & 0x04) << 1), temp ^= ((x & 0x20) >> 3), temp ^= ((x & 0x10) >> 3), temp ^= ((x & 0x08) >> 3);
return temp;
}
uint16_t inv_bit_permutation_1(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x10) << 3), temp ^= ((x & 0x80) >> 1), temp ^= ((x & 0x04) << 3), temp ^= ((x & 0x02) << 3),
temp ^= ((x & 0x01) << 3), temp ^= ((x & 0x08) >> 1), temp ^= ((x & 0x40) >> 5), temp ^= ((x & 0x20) >> 5);
return temp;
}
uint16_t bit_permutation_2(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x20) << 2), temp ^= ((x & 0x10) << 2), temp ^= ((x & 0x08) << 2), temp ^= ((x & 0x40) >> 2),
temp ^= ((x & 0x02) << 2), temp ^= ((x & 0x01) << 2), temp ^= ((x & 0x80) >> 6), temp ^= ((x & 0x04) >> 2);
return temp;
}
uint16_t inv_bit_permutation_2(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x02) << 6), temp ^= ((x & 0x10) << 2), temp ^= ((x & 0x80) >> 2), temp ^= ((x & 0x40) >> 2),
temp ^= ((x & 0x20) >> 2), temp ^= ((x & 0x01) << 2), temp ^= ((x & 0x08) >> 2), temp ^= ((x & 0x04) >> 2);
return temp;
}
uint16_t bit_permutation_3(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x01) << 7), temp ^= ((x & 0x08) << 3), temp ^= ((x & 0x40) >> 1), temp ^= ((x & 0x20) >> 1),
temp ^= ((x & 0x10) >> 1), temp ^= ((x & 0x80) >> 5), temp ^= ((x & 0x04) >> 1), temp ^= ((x & 0x02) >> 1);
return temp;
}
uint16_t inv_bit_permutation_3(uint16_t x)
{
uint16_t temp = 0x00;
temp ^= ((x & 0x04) << 5), temp ^= ((x & 0x20) << 1), temp ^= ((x & 0x10) << 1), temp ^= ((x & 0x08) << 1),
temp ^= ((x & 0x40) >> 3), temp ^= ((x & 0x02) << 1), temp ^= ((x & 0x01) << 1), temp ^= ((x & 0x80) >> 7);
return temp;
}
uint16_t Midori128_S_Box(int r, uint16_t x)
{
uint16_t y;
if (r % 4 == 0)
{
y = bit_permutation_0(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ (s_box[(y & 0x0f)]);
y = bit_permutation_0(y);
}
if (r % 4 == 1)
{
y = bit_permutation_1(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_1(y);
}
if (r % 4 == 2)
{
y = bit_permutation_2(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_2(y);
}
else
{
y = bit_permutation_3(x);
y = (s_box[(y & 0xf0) >> 4] << 4) ^ s_box[(y & 0x0f)];
y = inv_bit_permutation_3(y);
}
return y;
}
void SubCell(int r,uint16_t *state)
{
int i;
for (i = 0; i <= 15; i++)
{
state[i] = Midori128_S_Box(r%4,state[i]);
}
}
void ShuffleCell(uint16_t *state)
{
int i;
uint16_t temp[16];
temp[0] = state[0], temp[1] = state[10], temp[2] = state[5], temp[3] = state[15],
temp[4] = state[14], temp[5] = state[4], temp[6] = state[11], temp[7] = state[1],
temp[8] = state[9], temp[9] = state[3], temp[10] = state[12], temp[11] = state[6],
temp[12] = state[7], temp[13] = state[13], temp[14] = state[2], temp[15] = state[8];
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void Inv_ShuffleCell(uint16_t *state)
{
int i;
uint16_t temp[16];
temp[0] = state[0], temp[1] = state[7], temp[2] = state[14], temp[3] = state[9],
temp[4] = state[5], temp[5] = state[2], temp[6] = state[11], temp[7] = state[12],
temp[8] = state[15], temp[9] = state[8], temp[10] = state[1], temp[11] = state[6],
temp[12] = state[10], temp[13] = state[13], temp[14] = state[4], temp[15] = state[3];
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void MixColumn(uint16_t *state)
{
int i;
uint16_t temp[16];
for (i = 0; i <= 3; i++)
{
temp[4 * i + 0] = state[4 * i + 1] ^ state[4 * i + 2] ^ state[4 * i + 3];
temp[4 * i + 1] = state[4 * i + 0] ^ state[4 * i + 2] ^ state[4 * i + 3];
temp[4 * i + 2] = state[4 * i + 0] ^ state[4 * i + 1] ^ state[4 * i + 3];
temp[4 * i + 3] = state[4 * i + 0] ^ state[4 * i + 1] ^ state[4 * i + 2];
}
for (i = 0; i <= 15; i++)
{
state[i] = temp[i];
}
}
void rth_Round_Encrypt_KeyAdd(int r, uint16_t *state, uint16_t *K)
{
int i;
for (i = 0; i <= 15; i++)
{
state[i] = state[i] ^ K[i] ^ const_key[r][i];
}
}
void rth_Round_Decrypt_KeyAdd(int r, uint16_t *state, uint16_t *K)
{
int i;
uint16_t Kr[16];
for (i = 0; i <= 15; i++)
{
Kr[i] = K[i] ^ const_key[r][i];
}
MixColumn(Kr);
Inv_ShuffleCell(Kr);
for (i = 0; i <= 15; i++)
{
state[i] = state[i] ^ Kr[i];
}
}
void Encrypt(int r, uint16_t *plaintext, uint16_t *K, uint16_t *ciphertext)
{
int i;
for (i = 0; i <= 15; i++)
{
ciphertext[i] = plaintext[i];
}
for (i = 0; i <= 15; i++)
{
ciphertext[i] = ciphertext[i] ^ K[i];
}
for (i = 0; i <= (r - 2); i++)
{
SubCell(i, ciphertext);
ShuffleCell(ciphertext);
MixColumn(ciphertext);
rth_Round_Encrypt_KeyAdd(i, ciphertext, K);
}
SubCell(i, ciphertext);
for (i = 0; i <= 15; i++)
{
ciphertext[i] = ciphertext[i] ^ K[i];
}
}
void Decrypt(int r, uint16_t *ciphertext, uint16_t *K,uint16_t *plaintext)
{
int i;
for (i = 0; i <= 15; i++)
{
plaintext[i] = ciphertext[i];
}
for (i = 0; i <= 15; i++)
{
plaintext[i] = plaintext[i] ^ K[i];
}
for (i = (r-2); i >=0;i--)
{
SubCell(i + 1, plaintext);
MixColumn(plaintext);
Inv_ShuffleCell(plaintext);
rth_Round_Decrypt_KeyAdd(i, plaintext, K);
}
SubCell(i, plaintext);
for (i = 0; i <= 15; i++)
{
plaintext[i] = plaintext[i] ^ K[i];
}
}
void setup (){
Serial.begin(9600);
printf.begin();
}
void loop ()
{
int i,j1,j2,j3;
uint16_t plaintext[16] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 };
uint16_t Key[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
uint16_t plaintext_temp[16];
uint16_t ciphertext_temp[16];
for (j1 = 0; j1 <= 0xff; j1++)
{
plaintext[0] = j1;
for (j2 = 0; j2 <= 0xff; j2++)
{
plaintext[1] = j2;
for (j3 = 0; j3 <= 0xff; j3++)
{
plaintext[2] = j3;
/*printf("Before Encryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", plaintext[i]);
}
printf("\n");*/
Encrypt(20, plaintext, Key, ciphertext_temp);
printf("After Encryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", ciphertext_temp[i]);
}
printf("\n");
/*Decrypt(20, ciphertext_temp, Key, plaintext_temp);
printf("After Decryption: ");
for (i = 0; i < 16; i++)
{
printf("%x ", plaintext_temp[i]);
}
printf("\n");*/
printf("\n");
}
}
}
}
I reviewed the original paper numerous times, and understand the method pretty well, yet I can't pinpoint the exact reason why it's not outputting the correct cipher text, which is c055cbb95996d14902b60574d5e728d6. Any help or hints would be greatly appreciated. Thank you.
Late Edit:
I managed to find the 4 sboxes utilized in Midori128, however, I'm still at a loss at how to exactly incorporate them into my code, if anyone has any ideas I'd appreciate it greatly.
Here are the sboxes:
static uint16_t ssb0[16][16] = {
{0x11, 0x10, 0x51, 0x50, 0xb4, 0x30, 0xf4, 0x70, 0x59, 0x58, 0x19, 0x18, 0xfc, 0x78, 0xbc, 0x38},
{0x01, 0x00, 0x13, 0x12, 0xa4, 0x20, 0xb6, 0x32, 0x0b, 0x0a, 0x1b, 0x1a, 0xae, 0x2a, 0xbe, 0x3a},
{0x15, 0x31, 0x55, 0x71, 0xb5, 0x35, 0xf5, 0x75, 0x5d, 0x79, 0x1d, 0x39, 0xfd, 0x7d, 0xbd, 0x3d},
{0x05, 0x21, 0x17, 0x33, 0xa5, 0x25, 0xb7, 0x37, 0x0f, 0x2b, 0x1f, 0x3b, 0xaf, 0x2f, 0xbf, 0x3f},
{0x4b, 0x4a, 0x5b, 0x5a, 0xee, 0x6a, 0xfe, 0x7a, 0x49, 0x48, 0x41, 0x40, 0xec, 0x68, 0xe4, 0x60},
{0x03, 0x02, 0x53, 0x52, 0xa6, 0x22, 0xf6, 0x72, 0x09, 0x08, 0x43, 0x42, 0xac, 0x28, 0xe6, 0x62},
{0x4f, 0x6b, 0x5f, 0x7b, 0xef, 0x6f, 0xff, 0x7f, 0x4d, 0x69, 0x45, 0x61, 0xed, 0x6d, 0xe5, 0x65},
{0x07, 0x23, 0x57, 0x73, 0xa7, 0x27, 0xf7, 0x77, 0x0d, 0x29, 0x47, 0x63, 0xad, 0x2d, 0xe7, 0x67},
{0x95, 0xb0, 0xd5, 0xf0, 0x94, 0x90, 0xd4, 0xd0, 0xdd, 0xf8, 0x9d, 0xb8, 0xdc, 0xd8, 0x9c, 0x98},
{0x85, 0xa0, 0x97, 0xb2, 0x84, 0x80, 0x96, 0x92, 0x8f, 0xaa, 0x9f, 0xba, 0x8e, 0x8a, 0x9e, 0x9a},
{0x91, 0xb1, 0xd1, 0xf1, 0x14, 0x34, 0x54, 0x74, 0xd9, 0xf9, 0x99, 0xb9, 0x5c, 0x7c, 0x1c, 0x3c},
{0x81, 0xa1, 0x93, 0xb3, 0x04, 0x24, 0x16, 0x36, 0x8b, 0xab, 0x9b, 0xbb, 0x0e, 0x2e, 0x1e, 0x3e},
{0xcf, 0xea, 0xdf, 0xfa, 0xce, 0xca, 0xde, 0xda, 0xcd, 0xe8, 0xc5, 0xe0, 0xcc, 0xc8, 0xc4, 0xc0},
{0x87, 0xa2, 0xd7, 0xf2, 0x86, 0x82, 0xd6, 0xd2, 0x8d, 0xa8, 0xc7, 0xe2, 0x8c, 0x88, 0xc6, 0xc2},
{0xcb, 0xeb, 0xdb, 0xfb, 0x4e, 0x6e, 0x5e, 0x7e, 0xc9, 0xe9, 0xc1, 0xe1, 0x4c, 0x6c, 0x44, 0x64},
{0x83, 0xa3, 0xd3, 0xf3, 0x06, 0x26, 0x56, 0x76, 0x89, 0xa9, 0xc3, 0xe3, 0x0c, 0x2c, 0x46, 0x66}
};
static uint16_t ssb1[16][16] = {
{0x88, 0x8a, 0x4b, 0xcb, 0xac, 0xae, 0x6f, 0xef, 0x80, 0x82, 0x43, 0xc3, 0x94, 0x96, 0x57, 0xd7},
{0xa8, 0xaa, 0x6b, 0xeb, 0x8c, 0x8e, 0x4f, 0xcf, 0x98, 0x9a, 0x5b, 0xdb, 0x9c, 0x9e, 0x5f, 0xdf},
{0xb4, 0xb6, 0x77, 0xf7, 0xa4, 0xa6, 0x67, 0xe7, 0x90, 0x92, 0x53, 0xd3, 0x84, 0x86, 0x47, 0xc7},
{0xbc, 0xbe, 0x7f, 0xff, 0xa0, 0xa2, 0x63, 0xe3, 0xb8, 0xba, 0x7b, 0xfb, 0xb0, 0xb2, 0x73, 0xf3},
{0xca, 0xc8, 0x4a, 0x0a, 0xee, 0xec, 0x6e, 0x2e, 0xc2, 0xc0, 0x42, 0x02, 0xd6, 0xd4, 0x56, 0x16},
{0xea, 0xe8, 0x6a, 0x2a, 0xce, 0xcc, 0x4e, 0x0e, 0xda, 0xd8, 0x5a, 0x1a, 0xde, 0xdc, 0x5e, 0x1e},
{0xf6, 0xf4, 0x76, 0x36, 0xe6, 0xe4, 0x66, 0x26, 0xd2, 0xd0, 0x52, 0x12, 0xc6, 0xc4, 0x46, 0x06},
{0xfe, 0xfc, 0x7e, 0x3e, 0xe2, 0xe0, 0x62, 0x22, 0xfa, 0xf8, 0x7a, 0x3a, 0xf2, 0xf0, 0x72, 0x32},
{0x08, 0x89, 0x09, 0x8b, 0x2c, 0xad, 0x2d, 0xaf, 0x00, 0x81, 0x01, 0x83, 0x14, 0x95, 0x15, 0x97},
{0x28, 0xa9, 0x29, 0xab, 0x0c, 0x8d, 0x0d, 0x8f, 0x18, 0x99, 0x19, 0x9b, 0x1c, 0x9d, 0x1d, 0x9f},
{0x34, 0xb5, 0x35, 0xb7, 0x24, 0xa5, 0x25, 0xa7, 0x10, 0x91, 0x11, 0x93, 0x04, 0x85, 0x05, 0x87},
{0x3c, 0xbd, 0x3d, 0xbf, 0x20, 0xa1, 0x21, 0xa3, 0x38, 0xb9, 0x39, 0xbb, 0x30, 0xb1, 0x31, 0xb3},
{0x49, 0xc9, 0x48, 0x0b, 0x6d, 0xed, 0x6c, 0x2f, 0x41, 0xc1, 0x40, 0x03, 0x55, 0xd5, 0x54, 0x17},
{0x69, 0xe9, 0x68, 0x2b, 0x4d, 0xcd, 0x4c, 0x0f, 0x59, 0xd9, 0x58, 0x1b, 0x5d, 0xdd, 0x5c, 0x1f},
{0x75, 0xf5, 0x74, 0x37, 0x65, 0xe5, 0x64, 0x27, 0x51, 0xd1, 0x50, 0x13, 0x45, 0xc5, 0x44, 0x07},
{0x7d, 0xfd, 0x7c, 0x3f, 0x61, 0xe1, 0x60, 0x23, 0x79, 0xf9, 0x78, 0x3b, 0x71, 0xf1, 0x70, 0x33}
};
static uint16_t ssb2[16][16] = {
{0x44, 0xc3, 0x47, 0x43, 0x40, 0xc0, 0xc2, 0x42, 0x54, 0xd3, 0x57, 0x53, 0x50, 0xd0, 0xd2, 0x52},
{0x3c, 0xbb, 0x3f, 0x3b, 0x38, 0xb8, 0xba, 0x3a, 0x7c, 0xfb, 0x7f, 0x7b, 0x78, 0xf8, 0xfa, 0x7a},
{0x74, 0xf3, 0x77, 0x73, 0x70, 0xf0, 0xf2, 0x72, 0x64, 0xe3, 0x67, 0x63, 0x60, 0xe0, 0xe2, 0x62},
{0x34, 0xb3, 0x37, 0x33, 0x30, 0xb0, 0xb2, 0x32, 0x14, 0x93, 0x17, 0x13, 0x10, 0x90, 0x92, 0x12},
{0x04, 0x83, 0x07, 0x03, 0x00, 0x80, 0x82, 0x02, 0x4c, 0xcb, 0x4f, 0x4b, 0x48, 0xc8, 0xca, 0x4a},
{0x0c, 0x8b, 0x0f, 0x0b, 0x08, 0x88, 0x8a, 0x0a, 0x5c, 0xdb, 0x5f, 0x5b, 0x58, 0xd8, 0xda, 0x5a},
{0x2c, 0xab, 0x2f, 0x2b, 0x28, 0xa8, 0xaa, 0x2a, 0x6c, 0xeb, 0x6f, 0x6b, 0x68, 0xe8, 0xea, 0x6a},
{0x24, 0xa3, 0x27, 0x23, 0x20, 0xa0, 0xa2, 0x22, 0x1c, 0x9b, 0x1f, 0x1b, 0x18, 0x98, 0x9a, 0x1a},
{0x45, 0xc7, 0x46, 0x41, 0xc4, 0xc5, 0xc6, 0xc1, 0x55, 0xd7, 0x56, 0x51, 0xd4, 0xd5, 0xd6, 0xd1},
{0x3d, 0xbf, 0x3e, 0x39, 0xbc, 0xbd, 0xbe, 0xb9, 0x7d, 0xff, 0x7e, 0x79, 0xfc, 0xfd, 0xfe, 0xf9},
{0x75, 0xf7, 0x76, 0x71, 0xf4, 0xf5, 0xf6, 0xf1, 0x65, 0xe7, 0x66, 0x61, 0xe4, 0xe5, 0xe6, 0xe1},
{0x35, 0xb7, 0x36, 0x31, 0xb4, 0xb5, 0xb6, 0xb1, 0x15, 0x97, 0x16, 0x11, 0x94, 0x95, 0x96, 0x91},
{0x05, 0x87, 0x06, 0x01, 0x84, 0x85, 0x86, 0x81, 0x4d, 0xcf, 0x4e, 0x49, 0xcc, 0xcd, 0xce, 0xc9},
{0x0d, 0x8f, 0x0e, 0x09, 0x8c, 0x8d, 0x8e, 0x89, 0x5d, 0xdf, 0x5e, 0x59, 0xdc, 0xdd, 0xde, 0xd9},
{0x2d, 0xaf, 0x2e, 0x29, 0xac, 0xad, 0xae, 0xa9, 0x6d, 0xef, 0x6e, 0x69, 0xec, 0xed, 0xee, 0xe9},
{0x25, 0xa7, 0x26, 0x21, 0xa4, 0xa5, 0xa6, 0xa1, 0x1d, 0x9f, 0x1e, 0x19, 0x9c, 0x9d, 0x9e, 0x99}
};
static uint16_t ssb3[16][16] = {
{0x22, 0x2b, 0x20, 0x29, 0xa2, 0xab, 0x26, 0x2f, 0x4b, 0x0b, 0x49, 0x09, 0xcb, 0x8b, 0x4f, 0x0f},
{0xb2, 0xbb, 0x34, 0x3d, 0x32, 0x3b, 0x36, 0x3f, 0xdb, 0x9b, 0x5d, 0x1d, 0x5b, 0x1b, 0x5f, 0x1f},
{0x02, 0x43, 0x00, 0x41, 0x82, 0xc3, 0x06, 0x47, 0x42, 0x03, 0x40, 0x01, 0xc2, 0x83, 0x46, 0x07},
{0x92, 0xd3, 0x14, 0x55, 0x12, 0x53, 0x16, 0x57, 0xd2, 0x93, 0x54, 0x15, 0x52, 0x13, 0x56, 0x17},
{0x2a, 0x23, 0x28, 0x21, 0xaa, 0xa3, 0x2e, 0x27, 0x6b, 0x0a, 0x69, 0x08, 0xeb, 0x8a, 0x6f, 0x0e},
{0xba, 0xb3, 0x3c, 0x35, 0x3a, 0x33, 0x3e, 0x37, 0xfb, 0x9a, 0x7d, 0x1c, 0x7b, 0x1a, 0x7f, 0x1e},
{0x62, 0x63, 0x60, 0x61, 0xe2, 0xe3, 0x66, 0x67, 0x6a, 0x4a, 0x68, 0x48, 0xea, 0xca, 0x6e, 0x4e},
{0xf2, 0xf3, 0x74, 0x75, 0x72, 0x73, 0x76, 0x77, 0xfa, 0xda, 0x7c, 0x5c, 0x7a, 0x5a, 0x7e, 0x5e},
{0xb4, 0xbd, 0x24, 0x2d, 0xb6, 0xbf, 0xa6, 0xaf, 0xdd, 0x9d, 0x4d, 0x0d, 0xdf, 0x9f, 0xcf, 0x8f},
{0xb0, 0xb9, 0x30, 0x39, 0xa0, 0xa9, 0xa4, 0xad, 0xd9, 0x99, 0x59, 0x19, 0xc9, 0x89, 0xcd, 0x8d},
{0x94, 0xd5, 0x04, 0x45, 0x96, 0xd7, 0x86, 0xc7, 0xd4, 0x95, 0x44, 0x05, 0xd6, 0x97, 0xc6, 0x87},
{0x90, 0xd1, 0x10, 0x51, 0x80, 0xc1, 0x84, 0xc5, 0xd0, 0x91, 0x50, 0x11, 0xc0, 0x81, 0xc4, 0x85},
{0xbc, 0xb5, 0x2c, 0x25, 0xbe, 0xb7, 0xae, 0xa7, 0xfd, 0x9c, 0x6d, 0x0c, 0xff, 0x9e, 0xef, 0x8e},
{0xb8, 0xb1, 0x38, 0x31, 0xa8, 0xa1, 0xac, 0xa5, 0xf9, 0x98, 0x79, 0x18, 0xe9, 0x88, 0xed, 0x8c},
{0xf4, 0xf5, 0x64, 0x65, 0xf6, 0xf7, 0xe6, 0xe7, 0xfc, 0xdc, 0x6c, 0x4c, 0xfe, 0xde, 0xee, 0xce},
{0xf0, 0xf1, 0x70, 0x71, 0xe0, 0xe1, 0xe4, 0xe5, 0xf8, 0xd8, 0x78, 0x58, 0xe8, 0xc8, 0xec, 0xcc}
};
your code contains Sb1, where is Sb0? according to original paper, there are 2 smaller 4-bits boxes to construct 8-bit sbox.
in void Encrypt(int r, uint16_t *plaintext, uint16_t *K, uint16_t *ciphertext) , you only need to xor key with plaintext to get the cipher text not to copy then xoring , just use one for loop
use this link as benchmark for your development code

error between clEnqueueMapImage and clEnqueueUnmapMemObject

Why I can't memcpy for mapped buffer:
void* map_ptr = clEnqueueMapImage(d->GetQueue(), DeviceMem, CL_TRUE, CL_MAP_WRITE, HostOrigin, Region, &HostRowPitch, &HostSlicePitch, 0, NULL, NULL, NULL);
memcpy(map_ptr, Data, HostWidth * HostHeight * sizeof(unsigned int)); ///ERROR ????????????
*Error = clEnqueueUnmapMemObject(d->GetQueue(), DeviceMem, map_ptr, 0, NULL, NULL);
CreateBuffer is:
cl_mem temp = clCreateBuffer(d->GetContext(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, Size, NULL, Error);
HostSlicePitch != 0
size_t HostOrigin[3] = {0,0,0};
size_t Region[3] = {sizeof(unsigned int)*HostWidth, HostHeight, HostDepth};
Solved!
Need clFlush between Map and UnMap.
https://github.com/CyberRSR/OpenCLV
void* map_ptr = clEnqueueMapBuffer(d->GetQueue(), DeviceMem, CL_TRUE, CL_MAP_WRITE, 0, HostWidth*HostHeight*HostDepth, NULL, NULL, d->GetEvent(), Error);
clFlush(d->GetQueue());
d->ThrowMemEvent(DeviceMem);
memcpy(map_ptr, Data, HostWidth*HostHeight*HostDepth*sizeof(unsigned int));
*Error = clEnqueueUnmapMemObject(d->GetQueue(), DeviceMem, map_ptr, 0, NULL, d->GetEvent());

Using clEnqueueNDRangeKernel in OpenCL

I need help with one function in OpenCL. When I'm starting using clEnqueueNDRangeKernel instead of clEnqueueTask it takes much more time for program to succeed. Why so? As I understand, the program should use data parallel model and it will work faster, am I wrong? And if I am, how I can change code to see the actual work of data parallel model?
__kernel void black_white_img(__global unsigned char *pDataIn, __global unsigned char *pDataOut, unsigned int InSize, unsigned int OutSize)
{
for (int i = 0, j = 0; i < InSize; i+=4, j++)
{
unsigned char Value = (pDataIn[i] + pDataIn[i + 1] + pDataIn[i + 2]) / 3;
pDataOut[j] = Value;
}
}
int iWidth, iHeight, iBpp;
vector<unsigned char> pDataIn;
vector<unsigned char> pDataOut;
int err = LoadBmpFile(L"3840x2160.bmp", iWidth, iHeight, iBpp, pDataIn);
if (err != 0 || pDataIn.size() == 0 || iBpp != 32)
{
std::cout << "error load input file!\n";
}
pDataOut.resize(pDataIn.size()/4);
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_mem memobj1 = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
unsigned int SizeIn, SizeOut;
SizeIn = pDataIn.size();
SizeOut = pDataOut.size();
FILE *fp;
char fileName[] = "./kernel.cl";
char *source_str;
size_t source_size;
//Loading kernel
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
system("PAUSE");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
//Getting Platform and Device
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
//Create context
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
//create kernel program
program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
(const size_t *)&source_size, &ret);
//build it
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
//create queue
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
//create bufer
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, pDataIn.size(), NULL, &ret);
memobj1 = clCreateBuffer(context, CL_MEM_READ_WRITE,pDataOut.size(), NULL, &ret);
//copy buffer to kernel
ret = clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, pDataIn.size(), pDataIn.data(), 0, NULL, NULL);
//create opencl kernel
kernel = clCreateKernel(program, "red_to_green", &ret);
//set kernel args
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobj1);
ret = clSetKernelArg(kernel, 2, sizeof(unsigned int), (void *)&SizeIn);
ret = clSetKernelArg(kernel, 3, sizeof(unsigned int), (void *)&SizeOut);
const size_t cycles_max = 10;
clock_t t0 = clock();
for (int i = 0; i<cycles_max; i++){
float start_time = clock();
float search_time = 0;
//float last_time = 0;
//execute opencl kernel
//ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
size_t global_item_size = 8;
size_t local_item_size = 4;
ret = clEnqueueNDRangeKernel(command_queue,kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
//copy from buffer
ret = clEnqueueReadBuffer(command_queue, memobj1, CL_TRUE, 0, pDataOut.size(), pDataOut.data(), 0, NULL, NULL);
ret = clFinish(command_queue);
float end_time = clock();
search_time = end_time - start_time;
//float last_time = last_time + search_time;
cout << search_time << endl;
}
clock_t t1 = clock();
double time_seconds = (t1-t0)*CLOCKS_PER_SEC/cycles_max;
cout << time_seconds/1000 <<endl;
WriteBmpFile(L"3840x2160_wb.bmp", iWidth, iHeight, 8, pDataOut.size(), pDataOut.data(), false);
system("PAUSE");
from the docs page:
The kernel is executed using a single work-item.
clEnqueueTask is equivalent to calling clEnqueueNDRangeKernel with
work_dim = 1, global_work_offset = NULL, global_work_size[0] set to 1,
and local_work_size[0] set to 1.
When you use clEnqueueNDRangeKernel, you are using 2 work groups of 4 work items, but they are all doing the same work. They all read from the same global memory, but more importantly, they all try to write to the same locations in global memory.
You need to take into account the worker's global id when doing your computations.
__kernel void black_white_img(__global unsigned char *pDataIn, __global unsigned char *pDataOut, unsigned int InSize, unsigned int OutSize)
{
int gid = get_global_id(0);
int gsize = get_global_size(0);
for (int j = gid; j < (InSize >> 2); j+= gsize)
{
unsigned char Value = (pDataIn[j*4] + pDataIn[j*4 + 1] + pDataIn[j*4 + 2]) / 3;
pDataOut[j] = Value;
}
}
It looks like you are iterating over all pixels of an input image in your kernel. This will cause all threads to calculate the image intensity for all pixels. Try to launch a single thread for each pixel instead. To do so, change your kernel source code to only calculate the output value for one pixel:
__kernel void black_white_img(__global unsigned char *pDataIn, __global unsigned char *pDataOut) {
int j = get_global_id(0);
int i = j*4;
pDataOut[i] = (pDataIn[j] + pDataIn[j + 1] + pDataIn[j + 2]) / 3;
}
This code will now perform the averaging over the RGB values of your RGBA input image for the single pixel at location i. Now all you need to do is launch as many threads as your image has pixels. Relevant changes:
//create opencl kernel
kernel = clCreateKernel(program, "black_white_img", &ret);
//set kernel args
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobj1);
const size_t cycles_max = 10;
clock_t t0 = clock();
for (int i = 0; i<cycles_max; i++){
float start_time = clock();
float search_time = 0;
//float last_time = 0;
//execute opencl kernel
//ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
size_t global_item_size = iWidth * iHeight;
ret = clEnqueueNDRangeKernel(command_queue,kernel, 1, NULL, &global_item_size, NULL, 0, NULL, NULL);
This should give a considerable speedup comparing to your code.

Running an OpenCl program

I'm new to OpenCl, and I'm having trouble running my first ever code. I have an ATI graphics card and I've done the necessary installation from here http://www.thebigblob.com/getting-started-with-opencl-and-gpu-computing/ here. The following is the code I picked off that website to test my setup.
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_SOURCE_SIZE (0x100000)
int main(void) {
// Create the two input vectors
int i;
const int LIST_SIZE = 1024;
int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
for(i = 0; i < LIST_SIZE; i++) {
A[i] = i;
B[i] = LIST_SIZE - i;
}
// Load the kernel source code into the array source_str
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("vector_add_kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// Get platform and device information
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1,
&device_id, &ret_num_devices);
// Create an OpenCL context
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// Create memory buffers on the device for each vector
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
// Copy the lists A and B to their respective memory buffers
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
// Execute the OpenCL kernel on the list
size_t global_item_size = LIST_SIZE; // Process the entire lists
size_t local_item_size = 64; // Process in groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
// Read the memory buffer C on the device to the local variable C
int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
// Display the result to the screen
for(i = 0; i < LIST_SIZE; i++)
printf("%d + %d = %d\n", A[i], B[i], C[i]);
// Clean up
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(a_mem_obj);
ret = clReleaseMemObject(b_mem_obj);
ret = clReleaseMemObject(c_mem_obj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(A);
free(B);
free(C);
return 0;
}
...as per the instructions there. I keep getting "Failed to Load Kernel", which means it could not open the file.Where do I get the kernel source from?? Could someone please tell me how to get it to run??. Thanks in advance..
gcc -c -I /home/suraj/Desktop/Intern/softwares/AMD-APP-SDK-v2.7-lnx32
/AMD-APP-SDK-v2.7-RC-lnx32/include opencl.c -o oopencl.o
gcc opencl.o -o host -L /home/suraj/Desktop/Intern/softwares/AMD-APP-SDK
-v2.7-lnx32/AMD-APP-SDK-v2.7-RC-lnx32/lib/x86 -l OpenCL
I just went through this exercise with this source code as well, and everything worked fine for me. Did you grab the kernel program from github? It is the third program down in the list called "vector_add_kernel.cl". The C program needs the kernel to actually run.

Resources