gcc cdecl calling convention - gcc

test a cdecl calling convention,but it's a little confusion about this:
original C code:
int __attribute__((cdecl)) add(int a,int b)
{
int i;
i = a+b;
return i;
}
void __attribute__((cdecl)) print(int i, ...)
{
int j,a,b;
a = 2;
b = 3;
j = add(a,b);
}
void __attribute__((cdecl)) main(void)
{
print(2,3);
}
void __attribute__((cdecl)) main(void)
{
print(2,3);
}
compile code with this:
gcc test3.c -o test3 -g -mrtd
the corresponding asm like that:
(gdb) disassemble print
Dump of assembler code for function print:
0x080483cb <+0>: push ebp
0x080483cc <+1>: mov ebp,esp
0x080483ce <+3>: sub esp,0x18
0x080483d1 <+6>: mov DWORD PTR [ebp-0x8],0x2
0x080483d8 <+13>:mov DWORD PTR [ebp-0xc],0x3
0x080483df <+20>:mov eax,DWORD PTR [ebp-0xc]
0x080483e2 <+23>:mov DWORD PTR [esp+0x4],eax
0x080483e6 <+27>:mov eax,DWORD PTR [ebp-0x8]
0x080483e9 <+30>:mov DWORD PTR [esp],eax
0x080483ec <+33>:call 0x80483b4 <add>
0x080483f1 <+38>:mov DWORD PTR [ebp-0x4],eax
0x080483f4 <+41>:leave
0x080483f5 <+42>:ret
(gdb) disassemble add
Dump of assembler code for function add:
0x080483b4 <+0>: push ebp
0x080483b5 <+1>: mov ebp,esp
0x080483b7 <+3>: sub esp,0x10
0x080483ba <+6>: mov eax,DWORD PTR [ebp+0xc]
0x080483bd <+9>: mov edx,DWORD PTR [ebp+0x8]
0x080483c0 <+12>:lea eax,[edx+eax*1]
0x080483c3 <+15>:mov DWORD PTR [ebp-0x4],eax
0x080483c6 <+18>:mov eax,DWORD PTR [ebp-0x4]
0x080483c9 <+21>:leave
0x080483ca <+22>:ret
(gdb) disassemble main
Dump of assembler code for function main:
0x080483f6 <+0>: push ebp
0x080483f7 <+1>: mov ebp,esp
0x080483f9 <+3>: sub esp,0x8
0x080483fc <+6>: mov DWORD PTR [esp+0x4],0x3
0x08048404 <+14>:mov DWORD PTR [esp],0x2
0x0804840b <+21>:call 0x80483cb <print>
0x08048410 <+26>:leave
0x08048411 <+27>:ret
the cdecl convention don't add some instruction like:
add esp 8 or other similar instructions after the calling function.
why this? Thank you.

The code uses the leave instruction, which restores the stack frame. Therefore there is no need to adjust the stack pointer separately.

Related

Performance difference Rust and C++

I am currently learning Rust, and as a first exercise I wanted to implement a function that computes the nth fibonacci number:
fn main() {
for i in 0..48 {
println!("{}: {}", i, fibonacci(i));
}
}
fn fibonacci(n: u32) -> u32 {
match n {
0 => 0,
1 => 1,
_ => fibonacci(n - 1) + fibonacci(n - 2),
}
}
I run it as:
$ time cargo run --release
real 0m15.380s
user 0m15.362s
sys 0m0.014s
As an exercise, I also implemented the same algorithm in C++. I was expecting a similar performance, but the C++ code runs in 80% of the time:
#include<iostream>
unsigned int fibonacci(unsigned int n);
int main (int argc, char* argv[]) {
for(unsigned int i = 0; i < 48; ++i) {
std::cout << i << ": " << fibonacci(i) << '\n';
}
return 0;
}
unsigned int fibonacci(unsigned int n) {
if(n == 0) {
return 0;
} else if (n == 1) {
return 1;
} else {
return fibonacci(n - 1) + fibonacci(n - 2);
}
}
Compiled as:
$ g++ test.cpp -o test.exe -O2
And running:
$ time ./test.exe
real 0m12.127s
user 0m12.124s
sys 0m0.000s
Why do I see such a difference in performance? I am not interested in calculating the fibonacci faster in Rust (with a different algorithm); I am only interested on where the difference comes from. This is just an exercise in my progress as I learn Rust.
TL;DR: It's not Rust vs C++, it's LLVM (Clang) vs GCC.
Different optimizers optimize the code differently, and in this case GCC produces larger but faster code.
This can be verified using godbolt.
Here is Rust, compiled with both GCC (via rustgcc-master):
example::fibonacci:
push r15
push r14
push r13
push r12
push rbp
xor ebp, ebp
push rbx
mov ebx, edi
sub rsp, 24
.L2:
test ebx, ebx
je .L1
cmp ebx, 1
je .L4
lea r12d, -1[rbx]
xor r13d, r13d
.L19:
cmp r12d, 1
je .L6
lea r14d, -1[r12]
xor r15d, r15d
.L16:
cmp r14d, 1
je .L8
lea edx, -1[r14]
xor ecx, ecx
.L13:
cmp edx, 1
je .L10
lea edi, -1[rdx]
mov DWORD PTR 12[rsp], ecx
mov DWORD PTR 8[rsp], edx
call example::fibonacci.localalias
mov ecx, DWORD PTR 12[rsp]
mov edx, DWORD PTR 8[rsp]
add ecx, eax
sub edx, 2
jne .L13
.L14:
add r15d, ecx
sub r14d, 2
je .L17
jmp .L16
.L4:
add ebp, 1
.L1:
add rsp, 24
mov eax, ebp
pop rbx
pop rbp
pop r12
pop r13
pop r14
pop r15
ret
.L6:
add r13d, 1
.L20:
sub ebx, 2
add ebp, r13d
jmp .L2
.L8:
add r15d, 1
.L17:
add r13d, r15d
sub r12d, 2
je .L20
jmp .L19
.L10:
add ecx, 1
jmp .L14
And with LLVM (via rustc):
example::fibonacci:
push rbp
push r14
push rbx
mov ebx, edi
xor ebp, ebp
mov r14, qword ptr [rip + example::fibonacci#GOTPCREL]
cmp ebx, 2
jb .LBB0_3
.LBB0_2:
lea edi, [rbx - 1]
call r14
add ebp, eax
add ebx, -2
cmp ebx, 2
jae .LBB0_2
.LBB0_3:
add ebx, ebp
mov eax, ebx
pop rbx
pop r14
pop rbp
ret
We can see that LLVM produces a naive version -- calling the function in each iteration of the loop -- while GCC partially unrolls the recursion by inlining some calls. This results in a smaller number of calls in the case of GCC, and at about 5ns of overhead per function call, it's significant enough.
We can do the same exercise with the C++ version using LLVM via Clang and GCC and note that the result is pretty much similar.
So, as announced, it's a LLVM vs GCC difference, not a language one.
Incidentally, the fact that optimizers may produce such widely different results is a reason why I am quite excited at the progress of the rustc_codegen_gcc initiative (dubbed rustgcc-master on godbolt) which aims at pluging a GCC backend into the rustc frontend: once complete anyone will be able to switch to the better optimizer for their own workload.

Binary create a shell as another user

I have a specific question,
I have a binary that launch a shell with execv but the shell change the user and with gdb i can't seem to find where the user change is happening.
level0#RainFall:~$ whoami
level0
level0#RainFall:~$ ls -la
-rwsr-x---+ 1 level1 users 747441 Mar 6 2016 level0
level0#RainFall:~$ gdb
(gdb) file level0
Reading symbols from /home/user/level0/level0...(no debugging symbols found)...done.
(gdb) run 423
Starting program: /home/user/level0/level0 423
process 3718 is executing new program: /bin/dash
$ whoami
level0
But when I don't use gdb:
level0#RainFall:~$ ./level0 423
$ whoami
level1
$
Here is the disas of main
0x08048ec0 <+0>: push ebp
0x08048ec1 <+1>: mov ebp,esp
0x08048ec3 <+3>: and esp,0xfffffff0
0x08048ec6 <+6>: sub esp,0x20
0x08048ec9 <+9>: mov eax,DWORD PTR [ebp+0xc]
0x08048ecc <+12>: add eax,0x4
0x08048ecf <+15>: mov eax,DWORD PTR [eax]
0x08048ed1 <+17>: mov DWORD PTR [esp],eax
0x08048ed4 <+20>: call 0x8049710 <atoi>
0x08048ed9 <+25>: cmp eax,0x1a7
// it compare argv[1] with 423 if it is unequal it print No!
0x08048ede <+30>: jne 0x8048f58 <main+152>
0x08048ee0 <+32>: mov DWORD PTR [esp],0x80c5348
0x08048ee7 <+39>: call 0x8050bf0 <strdup>
0x08048eec <+44>: mov DWORD PTR [esp+0x10],eax
0x08048ef0 <+48>: mov DWORD PTR [esp+0x14],0x0
0x08048ef8 <+56>: call 0x8054680 <getegid>
0x08048efd <+61>: mov DWORD PTR [esp+0x1c],eax
0x08048f01 <+65>: call 0x8054670 <geteuid>
0x08048f06 <+70>: mov DWORD PTR [esp+0x18],eax
0x08048f0a <+74>: mov eax,DWORD PTR [esp+0x1c]
0x08048f0e <+78>: mov DWORD PTR [esp+0x8],eax
0x08048f12 <+82>: mov eax,DWORD PTR [esp+0x1c]
0x08048f16 <+86>: mov DWORD PTR [esp+0x4],eax
0x08048f1a <+90>: mov eax,DWORD PTR [esp+0x1c]
0x08048f1e <+94>: mov DWORD PTR [esp],eax
0x08048f21 <+97>: call 0x8054700 <setresgid>
0x08048f26 <+102>: mov eax,DWORD PTR [esp+0x18]
0x08048f2a <+106>: mov DWORD PTR [esp+0x8],eax
0x08048f2e <+110>: mov eax,DWORD PTR [esp+0x18]
0x08048f32 <+114>: mov DWORD PTR [esp+0x4],eax
0x08048f36 <+118>: mov eax,DWORD PTR [esp+0x18]
0x08048f3a <+122>: mov DWORD PTR [esp],eax
0x08048f3d <+125>: call 0x8054690 <setresuid>
0x08048f42 <+130>: lea eax,[esp+0x10]
0x08048f46 <+134>: mov DWORD PTR [esp+0x4],eax
0x08048f4a <+138>: mov DWORD PTR [esp],0x80c5348
//at this point euid and egid are the one of the user that launched gdb`
0x08048f51 <+145>: call 0x8054640 <execv>
// we never go there since execv opens a shell
0x08048f56 <+150>: jmp 0x8048f80 <main+192>
0x08048f58 <+152>: mov eax,ds:0x80ee170
0x08048f5d <+157>: mov edx,eax
0x08048f5f <+159>: mov eax,0x80c5350
0x08048f64 <+164>: mov DWORD PTR [esp+0xc],edx
0x08048f68 <+168>: mov DWORD PTR [esp+0x8],0x5
0x08048f70 <+176>: mov DWORD PTR [esp+0x4],0x1
0x08048f78 <+184>: mov DWORD PTR [esp],eax
0x08048f7b <+187>: call 0x804a230 <fwrite>
0x08048f80 <+192>: mov eax,0x0
0x08048f85 <+197>: leave
0x08048f86 <+198>: ret
End of assembler dump.
I don't understand how the binary changes behaviour if I execute it by gdb or in the shell, maybe its because the binary's proprietary is level1?
If someone has the time to explain to me how it works I'll be really greatful
Thanks a lot
I don't understand how the binary changes behaviour
The binary doesn't -- the kernel creates a new process with different UID when presented a set-uid binary (that's what s in -rwsr-x---+ means).
For obvious security reasons the kernel doesn't do that when the binary is being debugged.

What is the reason for this x86 calling convention?

I was trying to reduce the clutter in my original question (below), but I am afraid that made it harder to follow. So here is the original source along with IDA's disassembly.
My question still is this: why does getStruct() pop the return argument and only the return argument off the stack? (It's calling ret 4 instead of ret for no arguments or ret 12 for all three arguments).
#include <iostream>
struct SomeStruct {
char m_buff[0x1000];
};
SomeStruct getStruct(uint32_t someArg1, uint32_t someArg2)
{
return SomeStruct();
}
int main(int argc, const char * argv[])
{
SomeStruct myLocalStruct = getStruct(0x20,0x30);
return 0;
}
; _DWORD __stdcall getStruct(unsigned int, unsigned int)
public getStruct(unsigned int, unsigned int)
getStruct(unsigned int, unsigned int) proc near ; CODE XREF: _main+4Dp
var_8 = dword ptr -8
var_4 = dword ptr -4
arg_0 = dword ptr 8
arg_4 = dword ptr 0Ch
arg_8 = dword ptr 10h
push ebp
mov ebp, esp
sub esp, 18h
mov eax, [ebp+arg_8]
mov ecx, [ebp+arg_4]
mov edx, [ebp+arg_0]
mov [ebp+var_4], ecx
mov [ebp+var_8], eax
mov eax, esp
mov [eax], edx
mov dword ptr [eax+4], 1000h
call ___bzero
add esp, 18h
pop ebp
retn 4
getStruct(unsigned int, unsigned int) endp
; ---------------------------------------------------------------------------
align 10h
; =============== S U B R O U T I N E =======================================
; Attributes: bp-based frame
; int __cdecl main(int argc, const char **argv, const char **envp)
public _main
_main proc near
var_1020 = dword ptr -1020h
var_101C = dword ptr -101Ch
var_1018 = dword ptr -1018h
var_14 = dword ptr -14h
var_10 = dword ptr -10h
var_C = dword ptr -0Ch
argc = dword ptr 8
argv = dword ptr 0Ch
envp = dword ptr 10h
push ebp
mov ebp, esp
push edi
push esi
sub esp, 1030h
mov eax, [ebp+argv]
mov ecx, [ebp+argc]
lea edx, [ebp+var_1018]
mov esi, 20h
mov edi, 30h
mov [ebp+var_C], 0
mov [ebp+var_10], ecx
mov [ebp+var_14], eax
mov [esp], edx ; ptr to destination
mov dword ptr [esp+4], 20h ; unsigned int
mov dword ptr [esp+8], 30h
mov [ebp+var_101C], esi
mov [ebp+var_1020], edi
call getStruct(uint,uint)
sub esp, 4
mov eax, 0
add esp, 1030h
pop esi
pop edi
pop ebp
retn
_main endp
Original question below:
I have some function with the following declaration:
SomeStruct getStruct(uint32_t someArg1, uint32_t someArg2);
getStruct is being called like this:
myLocalStruct = getStruct(someArg1,someArg2);
When compiling this using clang on x86 the calling code looks roughly like this:
lea esi, [ebp-myLocalStructOffset]
mov [esp], esi
mov [esp+4], someArg1
mov [esp+8], someArg2
call getStruct;
sub esp, 4;
So the caller is restoring its stack pointer after the call. Sure enough, the implementation of getStruct ends with a ret 4, effectively popping the structs pointer.
This looks like it is partially cdecl with the caller being responsible for the stack cleanup, and partially stdcall with the callee removing the arguments. I just cannot figure out what the reason is for this approach. Why not leave all the cleanup to the caller? Is there any benefit to this ?
It looks as if you forgot to quote the few lines of assembler above the part you quoted. I assume there is something like:
sub esp,12
somewhere above what you quoted. The calling convention looks like pure stdcall, and the return value is in reality passed as a hidden pointer argument, i.e. the code is in fact compiled as if you had declared:
void __stdcall getStruct(SomeStruct *returnValue, uint32_t someArg1, uint32_t someArg2);

frame pointer register 'ebx' modified by inline assembly code

Unfortunately, I had to re-image my laptop to install Visual Studio 2012. My project build but with above warning. Previously I had Visual Studio 2010 and I never got the above warning. The code is as follows:
__asm
{
//Initialize pointers on matrices
mov eax, dword ptr [this]
mov ebx, dword ptr [eax+UPkk]
mov dword ptr [UPkk_ptr],ebx
mov ebx, dword ptr [eax+UPk1k]
mov dword ptr [UPk1k_ptr],ebx
mov ebx, dword ptr [eax+DPk1k]
mov dword ptr [DPk1k_ptr],ebx
mov ebx, dword ptr [eax+DPkk]
mov dword ptr [DPkk_ptr],ebx
mov ebx, dword ptr [eax+mat_A]
mov dword ptr [mat_A_ptr],ebx
mov ebx, dword ptr [eax+vec_a]
mov dword ptr [vec_a_ptr],ebx
mov ebx, dword ptr [eax+vec_b]
mov dword ptr [vec_b_ptr],ebx
}
Do I need to change any settings in the project?
Best Regards
Chintan
Edit: In the above code when I replace ebx with ecx, the warnings go away and the code works fine. However, there is another piece of code where I have used ebx and ecx and in that case my program crashes. Here is the code:
__asm
{
//Initialize UPk1k[idx_4] pointer
mov eax, dword ptr [UPk1k_ptr]
mov ebx, dword ptr [idx_4]
imul ebx,8
add eax,ebx
mov dword ptr [UPk1k_id4_ptr],eax
//Initialize UPkk[idx_4] pointer
mov eax, dword ptr [UPkk_ptr]
mov ebx, dword ptr [idx_4]
imul ebx,8
add eax,ebx
mov dword ptr [UPkk_id4_ptr],eax
//Initialize UPk1k[idx_4] pointer
mov eax, dword ptr [vec_b_ptr]
mov ebx, dword ptr [idx_1]
imul ebx,8
add eax,ebx
mov dword ptr [vec_b_id1_ptr],eax
mov edi, dword ptr [idx_1] //Load idx_1 in edi
mov esi, 0 //initialize loop counter
jmp start_proc11
start_for11:inc esi //idx_2++
start_proc11:cmp esi, edi //idx_2<idx_1 ?
jge end_for11 //If yes so end of the loop
mov eax, UPk1k_id4_ptr //load UPk1k[idx_4] adress
mov ebx, vec_b_ptr //load vec_b adress
mov ecx, esi
imul ecx,8
add eax, ecx //UPk1k[idx_4+idx_2] in eax
add ebx, ecx //vec_b[idx_2] in eax
fld qword ptr [eax]//push UPk1k[idx_4+idx_2]
fld qword ptr [ebx] //push vec_b[idx_2]
mov edx,dword ptr [Sd_ptr]
fmul qword ptr [edx] //vec_b[idx_2]*Sd
fadd //pop UPk1k[idx_4+idx_2]+vec_b[idx_2]*Sd
mov edx,dword ptr [UPkk_id4_ptr]
fstp qword ptr [edx+esi*8] //pop UPkk[idx_4+idx_2]=UPk1k[idx_4+idx_2]+vec_b[idx_2]*Sd
fld qword ptr [ebx] //push vec_b[idx_2]
mov edx,dword ptr [vec_b_id1_ptr]
fld qword ptr [edx] //push vec_b[idx_2]
fmul qword ptr [eax]
fadd
fstp qword ptr [ebx]
jmp start_for11 //end of the loop
end_for11:
}
Many Thanks
Best Regards
CS
See MSDN about registers and that warning. They explain why the warning is produced: it forces the compiler to preserve value of EBX, which might be counter-productive to performance, the usual reason inline asm is used. Relevant quote:
In addition, by using EBX, ESI or EDI in inline assembly code, you
force the compiler to save and restore those registers in the function
prologue and epilogue.
To disable the warning, I think the syntax is
#pragma warning( disable : 4731 )
However, I'd try to use some other register instead, because the warning is there for a good reason, really, like most warnings.
In fact, Looking at your asm code, simply replace ebx With ecx, that should solve the problem.

Inline assembler get address of pointer Visual Studio

I have a function in VS where I pass a pointer to the function. I then want to store the pointer in a register to further manipulate. How do you do that?
I have tried
float __declspec(align(16)) x[16] =
{
0.125000, 0.125000, 0.125000, 0,
-0.125000, 0.125000, -0.125000, 0,
0.125000, -0.125000, -0.125000, 0,
-0.125000, -0.125000, 0.125000, 0
};
void e()
{
__asm mov eax, x // doesn't work
__asm mov ebx, [eax]
}
void f(float *p)
{
__asm mov eax, p // does work
__asm mov ebx, [eax]
}
int main()
{
f(x);
e();
}
Option 1 actually seems to work fine. Consider the following program:
#include <stdio.h>
void f(int *p) {
__asm mov eax, p
__asm mov ebx, [eax]
// break here
}
void main()
{
int i = 0x12345678;
f(&i);
}
With Visual Studio 2008 SP1, a single-file C++ program and debug build, I'm getting the following in the registers window when stepping into the end of f():
EAX = 004DF960
EBX = 12345678
ECX = 00000000
EDX = 00000001
ESI = 00000000
EDI = 004DF884
EIP = 003013C3
ESP = 004DF7B8
EBP = 004DF884
EFL = 00000202
Looking at the values in EAX, EBX and ESP, that looks like a pretty good evidence that you actually have the pointer you wanted in EAX. The address in EAX is just a tad higher than the one in ESP, suggesting it's one frame higher up the stack. The dereferenced value loaded into EBX suggests we got the right address.
Loading the address of a global is subtly different. The following example uses the LEA instruction to accomplish the task.
#include <stdio.h>
int a[] = { 0x1234, 0x4567 };
void main()
{
// __asm mov eax, a ; interpreted as eax <- a[0]
__asm lea eax, a ; interpreted as eax <- &a[0]
__asm mov ebx, [eax]
__asm mov ecx, [eax+4]
// break here
}
Stepping to the end of main() gets you the following register values. EAX gets the address of the first element in the array, while EBX an ECX get the values of its members.
EAX = 00157038
EBX = 00001234
ECX = 00004567
EDX = 00000001
ESI = 00000000
EDI = 0047F800
EIP = 001513C9
ESP = 0047F734
EBP = 0047F800
EFL = 00000202
The magic isn't in the LEA instruction itself. Rather, it appears that the __asm directive treats C/C++ identifiers differently depending on whether a MOV or an LEA instruction is used. Here is the ASM dump of the same program, when the MOV instruction is uncommented. Notice how the MOV instruction gets the content of a[] as its argument (DWORD PTR), while the LEA instruction gets its offset.
; ...
PUBLIC ?a##3PAHA ; a
_DATA SEGMENT
?a##3PAHA DD 01234H ; a
DD 04567H
_DATA ENDS
; ...
mov eax, DWORD PTR ?a##3PAHA
lea eax, OFFSET ?a##3PAHA
mov ebx, DWORD PTR [eax]
mov ecx, DWORD PTR [eax+4]
; ...
I'm not sure if this is correct, but have you tried casting *p to an int first and then loading that value?
void f(*p)
{
int tmp = (int)p;
// asm stuff...
}

Resources