I'm trying to understand following inline assembly code, it took from https://elixir.bootlin.com/linux/v3.16.82/source/arch/x86/include/asm/checksum_32.h at line 114
how it works please....
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
unsigned short len, unsigned short proto, __wsum sum)
{
asm("addl %1, %0 ;\n"
"adcl %2, %0 ;\n"
"adcl %3, %0 ;\n"
"adcl $0, %0 ;\n"
: "=r" (sum)
: "g" (daddr), "g"(saddr),
"g" ((len + proto) << 8), "0" (sum));
return sum;
}
Related
My goal is to instrument my initial IR with proper calls to TSan runtime library functions using LLVM opt tool and TSan passes. In other words, I want to end up with similar TSan instrumentation as when using clang -fsanitize=thread -S but by directly using opt and TSan passes instead.
As far as I know, LLVM has two passes for TSan instrumentation: tsan-module (a module pass) and tsan (a function pass). Both passes are available by default in opt, i.e. are included in opt -print-passes report.
I choose tiny_race.c as my sample programe, where the main thread and the thread it spawns (Thread1) form a data race while accessing a global variable Global.
Here are the two steps I take to instrument the code my way:
Generating the initial LLVM IR for tiny_race.c:
clang -S -emit-llvm tiny_race.c -o tiny_race.ll
Using LLVM opt to instrument tiny_race.ll with the two TSan passes:
opt -passes='tsan-module,tsan' tiny_race.ll -S -o myInstrumented.ll
The above pass pipeline executes fine but the resulting myInstrumented.ll lacks some TSan instrumentations. More specifically:
Thread1 (child thread) is left completely un-instrumented.
main thread only has #__tsan_func_entry and #__tsan_func_exit instrumentations and its accesses to Global are not instrumented.
Could anyone please explain why my approach produces a partially-instrumented output? Any suggestion is greatly appreciated.
To better display the difference between the IR resulting from my approach and the expected one, bellow you can find definitions of main and Thread1 in each of them.
Here is myInstrumented.ll:
; Function Attrs: noinline nounwind optnone uwtable
define dso_local ptr #Thread1(ptr noundef %x) #0 {
entry:
%x.addr = alloca ptr, align 8
store ptr %x, ptr %x.addr, align 8
store i32 42, ptr #Global, align 4
%0 = load ptr, ptr %x.addr, align 8
ret ptr %0
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
entry:
%0 = call ptr #llvm.returnaddress(i32 0)
call void #__tsan_func_entry(ptr %0) *****TSAN INSTRUMENTATION*****
%retval = alloca i32, align 4
%t = alloca i64, align 8
store i32 0, ptr %retval, align 4
%call = call i32 #pthread_create(ptr noundef %t, ptr noundef null, ptr noundef #Thread1, ptr noundef null) #4
store i32 43, ptr #Global, align 4
%1 = load i64, ptr %t, align 8
%call1 = call i32 #pthread_join(i64 noundef %1, ptr noundef null)
%2 = load i32, ptr #Global, align 4
call void #__tsan_func_exit() *****TSAN INSTRUMENTATION*****
ret i32 %2
}
And here is the resulting IR when using clang -fsanitize=thread -S -emit-llvm tiny_race.c which is my expected result:
; Function Attrs: noinline nounwind optnone sanitize_thread uwtable
define dso_local ptr #Thread1(ptr noundef %x) #0 {
entry:
%0 = call ptr #llvm.returnaddress(i32 0)
call void #__tsan_func_entry(ptr %0) *****TSAN INSTRUMENTATION*****
%x.addr = alloca ptr, align 8
store ptr %x, ptr %x.addr, align 8
call void #__tsan_write4(ptr #Global) *****TSAN INSTRUMENTATION*****
store i32 42, ptr #Global, align 4
%1 = load ptr, ptr %x.addr, align 8
call void #__tsan_func_exit() *****TSAN INSTRUMENTATION*****
ret ptr %1
}
; Function Attrs: noinline nounwind optnone sanitize_thread uwtable
define dso_local i32 #main() #0 {
entry:
%0 = call ptr #llvm.returnaddress(i32 0)
call void #__tsan_func_entry(ptr %0) *****TSAN INSTRUMENTATION*****
%retval = alloca i32, align 4
%t = alloca i64, align 8
store i32 0, ptr %retval, align 4
%call = call i32 #pthread_create(ptr noundef %t, ptr noundef null, ptr noundef #Thread1, ptr noundef null) #4
call void #__tsan_write4(ptr #Global) *****TSAN INSTRUMENTATION*****
store i32 43, ptr #Global, align 4
call void #__tsan_read8(ptr %t) *****TSAN INSTRUMENTATION*****
%1 = load i64, ptr %t, align 8
%call1 = call i32 #pthread_join(i64 noundef %1, ptr noundef null)
call void #__tsan_read4(ptr #Global) *****TSAN INSTRUMENTATION*****
%2 = load i32, ptr #Global, align 4
call void #__tsan_func_exit() *****TSAN INSTRUMENTATION*****
ret i32 %2
}
I have compiled a cpp code and downloaded it to Arduino Uno for blinking an LED. The code works fine.
However, when I convert it to .ll and from .ll to an object file then hex and upload, the code stops working. No LED blinks by the Arduino.
If I address the ports directly:
typedef unsigned char uint8_t;
typedef uint8_t * volatile port_type;
const port_type portB = (port_type) 0x25;
const port_type ddrB = (port_type) 0x24;
it will work fine but if I initialize port addressed via global constructor, it does not work:
int getPortB() {return 0x25;}
int getDdrB() {return 0x24;}
const port_type portB = (port_type) getPortB();
const port_type ddrB = (port_type) getDdrB();
This is because that global constructor is not called at all. If I call it from main function via
call addrspace(1) void #global_var_init()
it will work.
I use the following commands to compile and download the ll file to the Arduino uno:
llvm-as-9 blink1.ll -o blink1.bc
llc-9 -filetype=obj blink1.bc
avr-g++ -mmcu=atmega328p blink1.o -o blink1
avr-objcopy -O ihex -R .eeprom blink1 blink1.hex
avrdude -F -V -c arduino -p ATMEGA328P -P /dev/ttyUSB0 -b 115200 -U flash:w:blink1.hex
blink1.ll
; ModuleID = 'blink1.cpp'
source_filename = "blink1.cpp"
target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
target triple = "avr"
#portB = dso_local global i8* null, align 1
#ddrB = dso_local global i8* null, align 1
#llvm.global_ctors = appending global [1 x { i32, void () addrspace(1)*, i8* }] [{ i32, void () addrspace(1)*, i8* } { i32 65535, void () addrspace(1)* #global_var_init, i8* null }]
; Function Attrs: noinline
define internal void #global_var_init() addrspace(1) {
%1 = inttoptr i16 37 to i8*
store volatile i8* %1, i8** #portB, align 1
%2 = inttoptr i16 36 to i8*
store volatile i8* %2, i8** #ddrB, align 1
ret void
}
; Function Attrs: noinline nounwind optnone
define dso_local void #delay_500ms() addrspace(1) {
call addrspace(0) void asm sideeffect "ldi r19, 150 \0A\09ldi r20, 128 \0A\09ldi r23, 41 \0A\09L1: \0A\09dec r20 \0A\09brne L1 \0A\09dec r19 \0A\09brne L1 \0A\09dec r23 \0A\09brne L1 \0A\09", ""() #3, !srcloc !2
ret void
}
; Function Attrs: noinline norecurse nounwind optnone
define dso_local i16 #main() addrspace(1) {
; call addrspace(1) void #global_var_init()
%1 = alloca i16, align 1
store i16 0, i16* %1, align 1
%2 = load volatile i8*, i8** #ddrB, align 1
store i8 32, i8* %2, align 1
br label %3
3: ; preds = %0, %3
%4 = load volatile i8*, i8** #portB, align 1
store i8 32, i8* %4, align 1
call addrspace(1) void #delay_500ms()
%5 = load volatile i8*, i8** #portB, align 1
store i8 0, i8* %5, align 1
call addrspace(1) void #delay_500ms()
br label %3
}
!0 = !{i32 1, !"wchar_size", i32 2}
!1 = !{!"clang version 9.0.1-+20210314105943+c1a0a213378a-1~exp1~20210314220516.107 "}
!2 = !{i32 1296, i32 1313, i32 1338, i32 1362, i32 1377, i32 1397, i32 1416, i32 1436, i32 1455, i32 1475, i32 1494}
Is this an LLVM bug or am I doing a mistake?
I have a TCPSocket* object which holds a connection to a client. This object is passed to another object to send data back to the client:
uint32_t count = 10;
char* message = new char[4];
message[0] = count & 0xff;
message[1] = (count >> 8) & 0xff;
message[2] = (count >> 16) & 0xff;
message[3] = (count >> 24) & 0xff;
client->send(&message, 4);
When this part of the program is called, the following appears on the serial line, and no data is received by the client:
++ MbedOS Fault Handler ++
FaultType: HardFault
Context:
R0 : 00000000
R1 : 10008000
R2 : 00000004
R3 : 2007C000
R4 : 10000914
R5 : 00000000
R6 : 00000000
R7 : 10004330
R8 : 10004320
R9 : FFFFF435
R10 : 00000000
R11 : 00000000
R12 : 00012AC1
SP : 10002AF0
LR : 0000D1A1
PC : 00005938
xPSR : 21000000
PSP : 10002AD0
MSP : 10007FD8
CPUID: 412FC230
HFSR : 40000000
MMFSR: 00000000
BFSR : 00000082
UFSR : 00000000
DFSR : 0000000A
AFSR : 00000000
BFAR : 10008010
Mode : Thread
Priv : Privileged
Stack: PSP
-- MbedOS Fault Handler --
++ MbedOS Error Info ++
Error Status: 0x80FF013D Code: 317 Module: 255
Error Message: Fault exception
Location: 0xD337
Error Value: 0x5938
Current Thread: main Id: 0x10002B48 Entry: 0xD7D7 StackSize: 0x1000 StackMem: 0x10001B48 SP: 0x10007F88
For more info, visit: https://armmbed.github.io/mbedos-error/?error=0x80FF013D
-- MbedOS Error Info --
Everything is in one thread so I cant see what could be causing this.
These are the relevant parts of the program:
main:
// Network interface
EthernetInterface net;
TCPSocket listener; //listens for incoming connection requests
TCPSocket* client;
CommandProcessor commandProcessor(client);
int main() {
int remaining;
int rcount;
char *p;
char *buffer = new char[16];
nsapi_size_or_error_t result;
int n = net.set_network("192.168.1.103","255.255.255.0","192.168.1.2");
pc.printf("\n Success? %d\n", n);
net.connect();
listener.open(&net);
listener.bind(3045);
listener.listen(1);
client = listener.accept(NULL);
client->set_timeout(1000);
led1 = 1;
while(1) {
int remaining = 16;
int rcount = 0;
p = buffer;
while (remaining > 0 && 0 < (result = client->recv(p, remaining))) {
p += result;
rcount += result;
remaining -= result;
}
if (remaining == 0) //full message received
{
commandProcessor.process(buffer);
}
}
}
CommandProcessor:
CommandProcessor::CommandProcessor(TCPSocket* client)
{
this->client = client;
}
void CommandProcessor::process(char* message)
{
switch(message[0]) { //Command is first byte of message
case 0x3: {
uint32_t count = 10 ;
char* message = new char[4];
message[0] = count & 0xff;
message[1] = (count >> 8) & 0xff;
message[2] = (count >> 16) & 0xff;
message[3] = (count >> 24) & 0xff;
client->send(message, 4);
}
}
}
commandProcessor's client is NULL when you call commandProcessor.process(buffer).
Why don’t you create an instance of CommandProcessor after you get a pointer to a socket from accept().
CommandProcessor* commandProcessor;
client = listener.accept(NULL);
commandProcessor = new CommandProcessor(client);
commandProcessor->process(buffer);
Alternatively, you can set client with a function like this.
void CommandProcessor::setClient(TCPSocket* client) {
this->client = client;
}
usage:
client = listener.accept(NULL);
commandProcessor.setClient(client);
I'm somewhat new to LLVM and compilers.
I've decided to generate a DAG using the following command
llc -view-sched-dags hello_world.ll
I got a really big graph with different dependency types. "Getting Started with LLVM Core Libraries" book explained that:
Black arrows mean data flow dependency
Red arrows mean glue dependency
Blue dashed arrows mean chain dependency
I clearly remember talking about data flow dependency in my compiler class at school. But I don't remember talking about the other two. Can someone expland the meaning of other dependencies? Any help is appreciated.
hello_world.cpp
#include <stdio.h>
#include <assert.h>
int sum(int a, int b) {
return a + b;
}
int main(int argc, char** argv) {
printf("Hello World! %d\n", sum(argc, 1));
return 0;
}
hello_world.ll
; ModuleID = 'hello_world.cpp'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
#.str = private unnamed_addr constant [17 x i8] c"Hello World! %d\0A\00", align 1
; Function Attrs: nounwind uwtable
define i32 #_Z3sumii(i32 %a, i32 %b) #0 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32* %a.addr, align 4
%1 = load i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: uwtable
define i32 #main(i32 %argc, i8** %argv) #1 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load i32* %argc.addr, align 4
%call = call i32 #_Z3sumii(i32 %0, i32 1)
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([17 x i8]* #.str, i32 0, i32 0), i32 %call)
ret i32 0
}
declare i32 #printf(i8*, ...) #2
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.5.0 "}
hello_world.main.jpg
hello_world.sum.jpg
Chain dependencies prevent nodes with side effects (including memory operations and explicit register operations) from being scheduled out of order relative to each other.
Glue prevents the two nodes from being broken up during scheduling. It's actually more subtle than that [1], but most of the time you don't need to worry about it. (If you're implementing your own backend that requires two instructions to be adjacent to each other, you really want to be using a pseudoinstruction instead, and expand that after scheduling happens.)
[1]: See http://lists.llvm.org/pipermail/llvm-dev/2014-June/074046.html for example
I do not want to mess up my RAM or make problem / bug that related to memory.
So.. what do I need to do before i switch / change a variable pointer pointed-to?
Or.. what i've doing is just fine?
Here is my source code:
#include <stdio.h>
int main(int argc, char *argv[])
{
int x = 10;
int y = 87;
int arr[5] = {1,2,3,4,5};
int *ptr;
ptr = &x;
printf("Now ptr pointed to x --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
ptr = &y;
printf("Now ptr pointed to y --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
ptr = arr;
printf("1st 2 byte: %d \n", *ptr);
*ptr++;
printf("2nd next 2 byte: %d \n", *ptr);
*ptr++;
printf("3rd next 2 byte: %d \n", *ptr);
*ptr++;
printf("4th next 2 byte: %d \n", *ptr);
// Now i want to switch to x again :D
ptr = &x;
printf("Now ptr pointed to x AGAIN --> *ptr = %d ~ ptr address: %p \n", *ptr, ptr);
return 0;
}
Please enlightenment.
Thank You
There doesn't seem to be a problem with what you have. The pointer is simply changing where it points to, but those background variables are not being changed at all. The variables x, y and your array will be alive for as long as the main function is running, as they are within the scope of main. If you want them to be alive for even less time, you could restrict them to other functions that are called from main.
void xVariable()
{
int x = 7;
}
int main()
{
int y = 8;
xVariable();
for(int i = 0; i < 9; i++)
{
int z = 9;
}
return 0;
}
In this example, y will be alive the entire run of the program. x will only be alive while the function xVariable is running. i and z are only alive for the duration of the loop. This is all a basic example of how variable scope works, but I would recommend looking it further if memory is going to be important.