Syntax error in Frama-C due to custom machdep - frama-c

I am using MPLAB XC16 C Compiler for my application. If I use machdep x86_16, the Frama-C works normally. For example, I can launche Frama-C in this way:
$ frama-c-gui machdep x86_16 -cpp-command 'C:\\"Program Files (x86)"\\Microchip\\xc16\\v1.26\\bin\\xc16-gcc.exe -E' -no-cpp-gnu-like D:\\project\\*.c
But machdep x86_16 do not comply fully with XC16. So I want to customize machdep.
Following the instructions, I created file machdep_xc16.ml that contain:
open Cil_types
let xc16 =
{
version = "dsPIC33F";
compiler = "XC16"; (* Compiler being used. *)
sizeof_short = 2; (* Size of "short" *)
sizeof_int = 2; (* Size of "int" *)
sizeof_long = 4; (* Size of "long" *)
sizeof_longlong = 8; (* Size of "long long" *)
sizeof_ptr = 2; (* Size of pointers *)
sizeof_float = 4; (* Size of "float" *)
sizeof_double = 4; (* Size of "double" *)
sizeof_longdouble = 8; (* Size of "long double" *)
sizeof_void = 0; (* Size of "void" *)
sizeof_fun = 0; (* Size of function *)
size_t = "unsigned int"; (* Type of "sizeof(T)" *)
wchar_t = "unsigned short"; (* Type of "wchar_t" *)
ptrdiff_t = "int"; (* Type of "ptrdiff_t" *)
alignof_short = 2; (* Alignment of "short" *)
alignof_int = 2; (* Alignment of "int" *)
alignof_long = 2; (* Alignment of "long" *)
alignof_longlong = 2; (* Alignment of "long long" *)
alignof_ptr = 2; (* Alignment of pointers *)
alignof_float = 2; (* Alignment of "float" *)
alignof_double = 2; (* Alignment of "double" *)
alignof_longdouble = 2; (* Alignment of "long double" *)
alignof_str = 1; (* Alignment of strings *)
alignof_fun = 1; (* Alignment of function *)
alignof_aligned = 16; (* Alignment of a type with aligned attribute *)
char_is_unsigned = false; (* Whether "char" is unsigned *)
const_string_literals = true; (* Whether string literals have const chars *)
little_endian = true; (* whether the machine is little endian *)
underscore_name = true; (* If assembly names have leading underscore *)
has__builtin_va_list = false; (* Whether [__builtin_va_list] is a known type *)
__thread_is_keyword = false; (* Whether [__thread] is a keyword *)
}
let mach2 = { xc16 with compiler = "baz" }
let () =
let ran = ref false in
Cmdline.run_after_loading_stage
(fun () ->
Kernel.result "Registering machdep 'xc16' as 'XC16'";
File.new_machdep "XC16" xc16;
if !ran then begin
Kernel.result "Trying to register machdep 'mach2' as 'XC16'";
File.new_machdep "XC16" mach2
end
else ran := true
)
I inserted the following lines in the file __fc_machdep.h just before line "#error Must define ..."
#ifdef __FC_MACHDEP_XC16
#define __FC_BYTE_ORDER __LITTLE_ENDIAN
/* min and max values as specified in limits.h */
#define __FC_SCHAR_MAX 0x7f
#define __FC_SCHAR_MIN (-__FC_SCHAR_MAX -1)
#define __FC_UCHAR_MAX 0xff
#define __FC_CHAR_MIN __FC_SCHAR_MIN
#define __FC_CHAR_MAX __FC_SCHAR_MAX
#define __FC_SHRT_MAX 0x7fff
#define __FC_SHRT_MIN (-__FC_SHRT_MAX -1)
#define __FC_USHRT_MAX 0xffff
#define __FC_INT_MAX __FC_SHRT_MAX
#define __FC_INT_MIN __FC_SHRT_MIN
#define __FC_UINT_MAX __FC_USHRT_MAX
#define __FC_LONG_MAX 0x7fffffff
#define __FC_LONG_MIN (-__FC_LONG_MAX -1)
#define __FC_ULONG_MAX 0xffffffffU
#define __FC_LLONG_MAX 0x7fffffffffffffffLL
#define __FC_LLONG_MIN (-__FC_LLONG_MAX -1)
#define __FC_ULLONG_MAX 0xffffffffffffffffUL
/* Required */
#undef __CHAR_UNSIGNED__
#define __WORDSIZE 16
#define __SIZEOF_SHORT 2
#define __SIZEOF_INT 2
#define __SIZEOF_LONG 4
#define __SIZEOF_LONGLONG 8
#define __CHAR_BIT 8
#define __PTRDIFF_T int
#define __SIZE_T unsigned int
#define __FC_SIZE_MAX __FC_INT_MAX
/* stdio.h */
#define __FC_EOF (-1)
#define __FC_FOPEN_MAX 8
#define __FC_RAND_MAX 32767
#define __FC_PATH_MAX 260
#define __WCHAR_T unsigned short
/* Optional */
#define __INT8_T signed char
#define __UINT8_T unsigned char
#define __INT16_T signed int
#define __UINT16_T unsigned int
#define __INTPTR_T signed int
#define __UINTPTR_T unsigned int
#define __INT32_T signed long
#define __UINT32_T unsigned long
#define __INT64_T signed long long
#define __UINT64_T unsigned long long
/* Required */
#define __INT_LEAST8_T signed char
#define __UINT_LEAST8_T unsigned char
#define __INT_LEAST16_T signed int
#define __UINT_LEAST16_T unsigned int
#define __INT_LEAST32_T signed long
#define __UINT_LEAST32_T unsigned long
#define __INT_LEAST64_T signed long long
#define __UINT_LEAST64_T unsigned long long
#define __INT_FAST8_T signed char
#define __UINT_FAST8_T unsigned char
#define __INT_FAST16_T signed int
#define __UINT_FAST16_T unsigned int
#define __INT_FAST32_T signed long
#define __UINT_FAST32_T unsigned long
#define __INT_FAST64_T signed long long
#define __UINT_FAST64_T unsigned long long
/* POSIX */
#define __SSIZE_T signed long
#define __FC_PTRDIFF_MIN __FC_INT_MIN
#define __FC_PTRDIFF_MAX __FC_INT_MAX
#define __FC_VA_LIST_T char*
/* Required */
#define __INT_MAX_T signed long long
#define __UINT_MAX_T unsigned long long
#else
Now if I launch Frama-C in this way:
$ frama-c-gui -load-script machdep_xc16 -machdep XC16 -cpp-command 'C:\\"Program Files (x86)"\\Microchip\\xc16\\v1.26\\bin\\xc16-gcc.exe -E' -no-cpp-gnu-like D:\\project\\*.c
I get output like this:
[kernel] Registering machdep 'xc16' as 'XC16'
[kernel] Parsing .opam/4.02.3+mingw64c/share/frama-c/libc/__fc_builtin_for_normalization.i (no preprocessing)
[kernel] warning: machdep XC16 has no registered macro. Using __FC_MACHDEP_XC16 for pre-processing
[kernel] Parsing D:/project/main.c (with preprocessing)
. . .
[kernel] Parsing D:/project/get_data.c (with preprocessing)
[kernel] syntax error at .opam/4.02.3+mingw64c/share/frama-c/libc/__fc_define_wchar_t.h:28:
26 #if !defined(__cplusplus)
27 /* wchar_t is a keyword in C++ and shall not be a typedef. */
28 typedef __WCHAR_T wchar_t;
^^^^^^^^^^^^^^^^^^^^^^^^^^
29 #else
30 typedef __WCHAR_T fc_wchar_t;
The syntax error occurs when the file containing #include <stdio.h> is processed.
What am I doing wrong?

The instructions about how to add a new machdep have been revised in the manual and will be available on the next Frama-C release (Phosporus).
The main issue with a new machdep is that there are two (seemingly redundant) parts to a machdep: the OCaml-level definitions, used by Frama-C, and the C-level definitions, used by the C preprocessor while parsing the Frama-C standard library. Realizing that both are necesssary and complementary helps understanding why the whole process is cumbersome (although it will be simplified in the future).
Here's an extract of the upcoming instructions:
A custom machine description may be implemented as follows:
let my_machine = {
version = "generic C compiler for my machine";
compiler = "generic"; (* may also be "gcc" or "msvc" *)
cpp_arch_flags = ["-m64"];
sizeof_short = 2;
sizeof_int = 4;
sizeof_long = 8;
(* ... *)
}
let () = File.new_machdep "my_machine" my_machine
Note that your machdep_xc16.ml can be simplified: the code you used is part of a test that tries to register twice the same machdep, just to ensure that it fails. But in practice, when you use -load-script you can just create the machdep directly as above, calling File.new_machdep directly.
After this code is loaded, Frama-C can be instructed to use the new machine
model using the -machdep command line option.
If you intend to use Frama-C's standard library headers, you must also do the following:
define constant __FC_MACHDEP_<CUSTOM>, replacing <CUSTOM>
with the name (in uppercase letters) of your created machdep;
this can be done via -cpp-extra-args="-D__FC_MACHDEP_<CUSTOM>";
provide a header file with macro definitions corresponding to your caml
definitions. For the most part, these are macros prefixed by __FC_,
corresponding to standard C macro definitions, e.g.,
__FC_UCHAR_MAX. These definitions are used by Frama-C's
<limits.h> and other headers to provide the standard C definitions.
The test file tests/misc/custom_machdep/__fc_machdep_custom.h
contains a complete example of the required definitions. Other examples can
be found in share/libc/__fc_machdep.h.
Make sure that your custom header defines the __FC_MACHDEP
include guard, and that the program you are analyzing includes this header
before all other headers. One way to ensure this without having to modify any
source files is to use an option such as -include in GCC.
An example of the complete command-line is presented below, for a custom
machdep called myarch, defined in file my_machdep.ml and
with stdlib constants defined in machdep_myarch.h:
frama-c -load-script my_machdep.ml -machdep myarch \
-cpp-extra-args="-D__FC_MACHDEP_MYARCH -include machdep_myarch.h"
Note that the __fc_machdep_custom.h in Silicon is incomplete, but the version you posted seems complete, so use it instead: put it in a file called e.g. machdep_xc16.h, add #define __FC_MACHDEP to it, and include it before the other files, e.g. using -include machdep_xc16.h as preprocessor flag. This will ensure that your version of the machdep will be used instead of Frama-C's, which will then allow you to use Frama-C's standard library with the constants defined according to your architecture.
Also, because your command line contains -cpp-command and -no-cpp-gnu-like, you'll have to adapt the -cpp-extra-args above, putting -D__FC_MACHDEP_MYARCH and -include machdep_myarch.h directly in your -cpp-command.

Related

Can I skip eva's assertion on signed overflow?

Sample code:
void main(){
unsigned int x;
x = 1U << 31; // OK
x = 1 << 31; // Sign overflowed
return;
}
frama-c-gui -eva main.c:
void main(void)
{
unsigned int x;
x = 1U << 31;
/*# assert Eva: signed_overflow: 1 << 31 ≤ 2147483647; */
x = (unsigned int)(1 << 31);
return;
}
Get red alarm because of signed overflow on line 4. I have existing code with ton of hardware registers defined with mask bits and shifting bits like this. It's unreasonable to modify the code add "U" for all the mask bits. Is there a option in eval plugin to treat these constants as unsigned integer?
There are some options in the kernel to control which kinds of alarms should be emitted (see frama-c -kernel-h or the manual, especially its section 6.3, for more information).
In your particular case, you are probably interested in -no-warn-signed-overflow, that will disable alarms related to overflows on signed arithmetic. Eva will then assume 2-complement arithmetic, and emit a warning about that if the situation occurs, but only once for the whole analysis.

yylval undefined with lex and yacc

I was trying a simple program to create an abstract syntax tree using lex and yacc.
My yacc_file.y is
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node
{
struct node *left;
struct node *right;
char *token;
} node;
node *mknode(node *left, node *right, char *token);
void printtree(node *tree);
#define YYSTYPE struct node *
%}
%start lines
%token NUMBER
%token PLUS MINUS TIMES
%token LEFT_PARENTHESIS RIGHT_PARENTHESIS
%token END
%left PLUS MINUS
%left TIMES
%%
lines: /* empty */
| lines line /* do nothing */
line: exp END { printtree($1); printf("\n");}
;
exp : term {$$ = $1;}
| exp PLUS term {$$ = mknode($1, $3, "+");}
| exp MINUS term {$$ = mknode($1, $3, "-");}
;
term : factor {$$ = $1;}
| term TIMES factor {$$ = mknode($1, $3, "*");}
;
factor : NUMBER {$$ = mknode(0,0,(char *)yylval);}
| LEFT_PARENTHESIS exp RIGHT_PARENTHESIS {$$ = $2;}
;
%%
int main (void) {return yyparse ( );}
node *mknode(node *left, node *right, char *token)
{
/* malloc the node */
node *newnode = (node *)malloc(sizeof(node));
char *newstr = (char *)malloc(strlen(token)+1);
strcpy(newstr, token);
newnode->left = left;
newnode->right = right;
newnode->token = newstr;
return(newnode);
}
void printtree(node *tree)
{
int i;
if (tree->left || tree->right)
printf("(");
printf(" %s ", tree->token);
if (tree->left)
printtree(tree->left);
if (tree->right)
printtree(tree->right);
if (tree->left || tree->right)
printf(")");
}
int yyerror (char *s) {fprintf (stderr, "%s\n", s);}
My lex_file.l file is
%{
#include "yacc_file.tab.h"
%}
%%
[0-9]+ {yylval = (int)yytext; return NUMBER;}
/* cast pointer to int for compiler warning */
[ \t\n] ;
"+" return(PLUS);
"-" return(MINUS);
"*" return(TIMES);
"(" return(LEFT_PARENTHESIS);
")" return(RIGHT_PARENTHESIS);
";" return(END);
%%
int yywrap (void) {return 1;}
To run, I have done the following
yacc -d yacc_file.y
lex lex_file.y
cc y.tab.c lex.yy.c -o a.exe
I got the following error
lexfile.l: In function 'yylex':
lex_file.l:10:2: error: 'yylval' undeclared(first used in this function)
[0-9]+ {yylval=(int)yytext; return NUMBER;}
I have searched on google and %union seems to solve the problem. But I am not sure how to use it.
The command
yacc -d yacc_file.y
produces a header file called y.tab.h and a C file called y.tab.c. That's the yacc-compatible default naming, and it does not agree with your flex file, which is expecting the header to be called yacc_file.tab.h.
You could just change the #include statement in your flex file, but that wouldn't be compatible with the build system at your college. So I suggest you change to the command bison -d yacc_file.y instead of your yacc command. That will produce a header file called yacc_file.tab.h and a C file called yacc_file.tab.c. (Of course, you will then have to change the cc command to compile yacc_file.tab.c instead of y.tab.c.)
Presumably there is some incorrect yacc_file.tab.h on your machine, which doesn't include a declaration of yylval. Hence the compilation error.
To avoid confusing yourself further, when you fix your build procedure I'd recommend deleting all the intermediate files -- y.tab.h and y.tab.c as well as yacc_file.tab.c and yacc_file.tab.h, and lex.yy.c. Then you can do a clean build without having to worry about picking up some outdated intermediate file.
Also, in yacc_file.y, you #define YYSTYPE as struct node *. That's fine, but the #define will not be copied into the generated header file; in the header file, YYSTYPE will be #defined as int if there is no other #define before the header file is #included.
Moreover, in lex_file.l you use yylval as though it were an int (yylval = (int)yytext;) but I think that statement does not do what you think it does. What it does is reinterpret the address of yytext as an integer. That's legal but meaningless. What you wanted to do, I think, is to convert the string in yytext as an integer. To do that, you need to use strtod or some similar function from the standard C library.
Regardless, it is vital that the scanner and the parser agree on the type of yylval. Otherwise, things will go desperately wrong.
As you mention, it is possible to use a %union declaration to declare YYSTYPE as a union type. You should make sure you understand C union types, and also read the bison manual section on semantics..

C++: OpenSSL, aes cfb encryption [duplicate]

I tried to implement a "very" simple encryption/decryption example. I need it for a project where I would like to encrypt some user information. I can't encrypt the whole database but only some fields in a table.
The database and most of the rest of the project works, except the encryption:
Here is a simplified version of it:
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <iostream>
#include <string.h>
using namespace std;
int main()
{
/* ckey and ivec are the two 128-bits keys necessary to
en- and recrypt your data. Note that ckey can be
192 or 256 bits as well
*/
unsigned char ckey[] = "helloworldkey";
unsigned char ivec[] = "goodbyworldkey";
int bytes_read;
unsigned char indata[AES_BLOCK_SIZE];
unsigned char outdata[AES_BLOCK_SIZE];
unsigned char decryptdata[AES_BLOCK_SIZE];
/* data structure that contains the key itself */
AES_KEY keyEn;
/* set the encryption key */
AES_set_encrypt_key(ckey, 128, &keyEn);
/* set where on the 128 bit encrypted block to begin encryption*/
int num = 0;
strcpy( (char*)indata , "Hello World" );
bytes_read = sizeof(indata);
AES_cfb128_encrypt(indata, outdata, bytes_read, &keyEn, ivec, &num, AES_ENCRYPT);
cout << "original data:\t" << indata << endl;
cout << "encrypted data:\t" << outdata << endl;
AES_cfb128_encrypt(outdata, decryptdata, bytes_read, &keyEn, ivec, &num, AES_DECRYPT);
cout << "input data was:\t" << decryptdata << endl;
return 0;
}
But the output of "decrypted" data are some random characters, but they are the same after every execution of the code. outdata changes with every execution...
I tried to debug and search for a solution, but I couldn't find any solution for my problem.
Now my question, what is going wrong here? Or do I completely misunderstand the provided functions?
The problem is that AES_cfb128_encrypt modifies the ivec (it has to in order to allow for chaining). Your solution is to create a copy of the ivec and initialize it before each call to AES_cfb128_encrypt as follows:
const char ivecstr[AES_BLOCK_SIZE] = "goodbyworldkey\0";
unsigned char ivec[AES_BLOCK_SIZE];
memcpy( ivec , ivecstr, AES_BLOCK_SIZE);
Then repeat the memcpy before your second call to AES_cfb128_encrypt.
Note 1: Your initial vector was a byte too short, so I put an explicit additional \0 at the end of it. You should make sure all of your strings are of the correct length when copying or passing them.
Note 2: Any code which uses encryption should REALLY avoid using strcpy or any other copy of unchecked length. It's a hazard.

Struct Stuffing Incorrectly

I have the following struct:
typedef union
{
struct
{
unsigned char ID;
unsigned short Vdd;
unsigned char B1State;
unsigned short B1FloatV;
unsigned short B1ChargeV;
unsigned short B1Current;
unsigned short B1TempC;
unsigned short B1StateTimer;
unsigned short B1DutyMod;
unsigned char B2State;
unsigned short B2FloatV;
unsigned short B2ChargeV;
unsigned short B2Current;
unsigned short B2TempC;
unsigned short B2StateTimer;
unsigned short B2DutyMod;
} bat_values;
unsigned char buf[64];
} BATTERY_CHARGE_STATUS;
and I am stuffing it from an array as follows:
for(unsigned char ii = 0; ii < 64; ii++) usb_debug_data.buf[ii]=inBuffer[ii];
I can see that the array has the following (arbitrary) values:
inBuffer[0] = 80;
inBuffer[1] = 128;
inBuffer[2] = 12;
inBuffer[3] = 0;
inBuffer[4] = 23;
...
now I want display these values by changing the text of a QEditLine:
str=QString::number((int)usb_debug_data.bat_values.ID);
ui->batID->setText(str);
str=QString::number((int)usb_debug_data.bat_values.Vdd)
ui->Vdd->setText(str);
str=QString::number((int)usb_debug_data.bat_values.B1State)
ui->B1State->setText(str);
...
however, the QEditLine text values are not turning up as expected. I see the following:
usb_debug_data.bat_values.ID = 80 (correct)
usb_debug_data.bat_values.Vdd = 12 (incorrect)
usb_debug_data.bat_values.B1State = 23 (incorrect)
seems like 'usb_debug_data.bat_values.Vdd', which is a short, is not taking its value from inBuffer[1] and inBuffer[2]. Likewise, 'usb_debug_data.bat_values.B1State' should get its value from inBuffer[3] but for some reason is picking up its value from inBuffer[4].
Any idea why this is happening?
C and C++ are free to insert padding between elements of a structure, and beyond the last element, for whatever purposes it desires (usually efficiency but sometimes because the underlying architecture does not allow unaligned access at all).
So you'll probably find that items of two-bytes length are aligned to two-byte boundaries, so you'll end up with something like:
unsigned char ID; // 1 byte
// 1 byte filler, aligns following short
unsigned short Vdd; // 2 bytes
unsigned char B1State; // 1 byte
// 3 bytes filler, aligns following int
unsigned int myVar; // 4 bytes
Many compilers will allow you to specific how to pack structures, such as with:
#pragma pack(1)
or the gcc:
__attribute__((packed))
attribute.
If you don't want to (or can't) pack your structures, you can revert to field-by-filed copying (probably best in a function):
void copyData (BATTERY_CHARGE_STATUS *bsc, unsigned char *debugData) {
memcpy (&(bsc->ID), debugData, sizeof (bsc->ID));
debugData += sizeof (bsc->ID);
memcpy (&(bsc->Vdd), debugData, sizeof (bsc->Vdd));
debugData += sizeof (bsc->Vdd);
: : :
memcpy (&(bsc->B2DutyMod), debugData, sizeof (bsc->B2DutyMod));
debugData += sizeof (bsc->B2DutyMod); // Not really needed
}
It's a pain that you have to keep the structure and function synchronised but hopefully it won't be changing that much.
Structs are not packed by default so the compiler is free to insert padding between members. The most common reason is to ensure some machine dependent alignment. The wikipedia entry on data structure alignment is a pretty good place to start. You essentially have two choices:
insert compiler specific pragmas to force alignment (e.g, #pragma packed or __attribute__((packed))__.
write explicit serialization and deserialization functions to transform your structures into and from byte arrays
I usually prefer the latter since it doesn't make my code ugly with little compiler specific adornments everywhere.
The next thing that you are likely to discover is that the byte order for multi-byte integers is also platform specific. Look up endianness for more details

printf byte to hex string strange output

The following simple code produces strange output:
#include <stdio.h>
#include <string.h>
#include "/tmp/sha.h"
#define DIGEST 64
//taken from coreutils 8.5 - produces 64-byte sha digest and puts it into resblock
extern int sha512_stream(FILE *stream, void *resblock);
int main(int argc, char** argv) {
char sha[DIGEST];
memset(sha, 0, DIGEST);
FILE *stream;
stream = fopen("/bin/less", "r");
sha512_stream(stream, (void *) sha);
fclose(stream);
char buf[2] = {10, 32};
printf("%02x\n", sha[0]);
printf("%02x\n", buf[0]);
return 0;}
Gives the output:
ffffffa9 0a
The first byte of sha is A9, but where are the padding F's coming from?
On Ubuntu Linux 10.10 with gcc 4.4.5.
(char) defaults to (signed char) on Linux x86, and because printf() uses stdarg the (signed char) is being promoted implicitly to (int) resulting in sign extension. You'll need to declare it (unsigned char) to get the expected behavior. (There is no way to pass type information through stdarg, so default promotions are performed on arguments.)

Resources