Disassemble exe file after compilation with gcc - gcc

This made One week i search for a solution, please help me.
This is my source code: myexe.c
#include <stdlib.h>
#include <stdio.h>
/* gcc -m32 -o myexe myexe.c */
int main(void)
{
system("ls /home/myexe");
return 0;
}
After complilation, I want to disassemble myexe and change ls /home/myexe to cat /home/myexe in the assembly code.
It is a exercice I received. Please help me!
myexe output:
ch11: format de fichier elf64-x86-64
Déassemblage de la section .init :
0000000000000530 <_init>:
530: 48 83 ec 08 sub $0x8,%rsp
534: 48 8b 05 a5 0a 20 00 mov 0x200aa5(%rip),%rax # 200fe0 <__gmon_start__>
53b: 48 85 c0 test %rax,%rax
53e: 74 02 je 542 <_init+0x12>
540: ff d0 callq *%rax
542: 48 83 c4 08 add $0x8,%rsp
546: c3 retq
Déassemblage de la section .plt :
0000000000000550 <.plt>:
550: ff 35 b2 0a 20 00 pushq 0x200ab2(%rip) # 201008 <_GLOBAL_OFFSET_TABLE_+0x8>
556: ff 25 b4 0a 20 00 jmpq *0x200ab4(%rip) # 201010 <_GLOBAL_OFFSET_TABLE_+0x10>
55c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000000560 <system#plt>:
560: ff 25 b2 0a 20 00 jmpq *0x200ab2(%rip) # 201018 <system#GLIBC_2.2.5>
566: 68 00 00 00 00 pushq $0x0
56b: e9 e0 ff ff ff jmpq 550 <.plt>
Déassemblage de la section .plt.got :
0000000000000570 <.plt.got>:
570: ff 25 82 0a 20 00 jmpq *0x200a82(%rip) # 200ff8 <__cxa_finalize#GLIBC_2.2.5>
576: 66 90 xchg %ax,%ax
Déassemblage de la section .text :
0000000000000580 <_start>:
580: 31 ed xor %ebp,%ebp
582: 49 89 d1 mov %rdx,%r9
585: 5e pop %rsi
586: 48 89 e2 mov %rsp,%rdx
589: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
58d: 50 push %rax
58e: 54 push %rsp
58f: 4c 8d 05 aa 01 00 00 lea 0x1aa(%rip),%r8 # 740 <__libc_csu_fini>
596: 48 8d 0d 33 01 00 00 lea 0x133(%rip),%rcx # 6d0 <__libc_csu_init>
59d: 48 8d 3d 0c 01 00 00 lea 0x10c(%rip),%rdi # 6b0 <main>
5a4: ff 15 2e 0a 20 00 callq *0x200a2e(%rip) # 200fd8 <__libc_start_main#GLIBC_2.2.5>
5aa: f4 hlt
5ab: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
00000000000005b0 <deregister_tm_clones>:
5b0: 48 8d 3d 79 0a 20 00 lea 0x200a79(%rip),%rdi # 201030 <__TMC_END__>
5b7: 48 8d 05 79 0a 20 00 lea 0x200a79(%rip),%rax # 201037 <__TMC_END__+0x7>
5be: 55 push %rbp
5bf: 48 29 f8 sub %rdi,%rax
5c2: 48 89 e5 mov %rsp,%rbp
5c5: 48 83 f8 0e cmp $0xe,%rax
5c9: 76 15 jbe 5e0 <deregister_tm_clones+0x30>
5cb: 48 8b 05 fe 09 20 00 mov 0x2009fe(%rip),%rax # 200fd0 <_ITM_deregisterTMCloneTable>
5d2: 48 85 c0 test %rax,%rax
5d5: 74 09 je 5e0 <deregister_tm_clones+0x30>
5d7: 5d pop %rbp
5d8: ff e0 jmpq *%rax
5da: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
5e0: 5d pop %rbp
5e1: c3 retq
5e2: 0f 1f 40 00 nopl 0x0(%rax)
5e6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
5ed: 00 00 00
00000000000005f0 <register_tm_clones>:
5f0: 48 8d 3d 39 0a 20 00 lea 0x200a39(%rip),%rdi # 201030 <__TMC_END__>
5f7: 48 8d 35 32 0a 20 00 lea 0x200a32(%rip),%rsi # 201030 <__TMC_END__>
5fe: 55 push %rbp
5ff: 48 29 fe sub %rdi,%rsi
602: 48 89 e5 mov %rsp,%rbp
605: 48 c1 fe 03 sar $0x3,%rsi
609: 48 89 f0 mov %rsi,%rax
60c: 48 c1 e8 3f shr $0x3f,%rax
610: 48 01 c6 add %rax,%rsi
613: 48 d1 fe sar %rsi
616: 74 18 je 630 <register_tm_clones+0x40>
618: 48 8b 05 d1 09 20 00 mov 0x2009d1(%rip),%rax # 200ff0 <_ITM_registerTMCloneTable>
61f: 48 85 c0 test %rax,%rax
622: 74 0c je 630 <register_tm_clones+0x40>
624: 5d pop %rbp
625: ff e0 jmpq *%rax
627: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
62e: 00 00
630: 5d pop %rbp
631: c3 retq
632: 0f 1f 40 00 nopl 0x0(%rax)
636: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
63d: 00 00 00
0000000000000640 <__do_global_dtors_aux>:
640: 80 3d e9 09 20 00 00 cmpb $0x0,0x2009e9(%rip) # 201030 <__TMC_END__>
647: 75 27 jne 670 <__do_global_dtors_aux+0x30>
649: 48 83 3d a7 09 20 00 cmpq $0x0,0x2009a7(%rip) # 200ff8 <__cxa_finalize#GLIBC_2.2.5>
650: 00
651: 55 push %rbp
652: 48 89 e5 mov %rsp,%rbp
655: 74 0c je 663 <__do_global_dtors_aux+0x23>
657: 48 8b 3d ca 09 20 00 mov 0x2009ca(%rip),%rdi # 201028 <__dso_handle>
65e: e8 0d ff ff ff callq 570 <.plt.got>
663: e8 48 ff ff ff callq 5b0 <deregister_tm_clones>
668: 5d pop %rbp
669: c6 05 c0 09 20 00 01 movb $0x1,0x2009c0(%rip) # 201030 <__TMC_END__>
670: f3 c3 repz retq
672: 0f 1f 40 00 nopl 0x0(%rax)
676: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
67d: 00 00 00
0000000000000680 <frame_dummy>:
680: 48 8d 3d 61 07 20 00 lea 0x200761(%rip),%rdi # 200de8 <__JCR_END__>
687: 48 83 3f 00 cmpq $0x0,(%rdi)
68b: 75 0b jne 698 <frame_dummy+0x18>
68d: e9 5e ff ff ff jmpq 5f0 <register_tm_clones>
692: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
698: 48 8b 05 49 09 20 00 mov 0x200949(%rip),%rax # 200fe8 <_Jv_RegisterClasses>
69f: 48 85 c0 test %rax,%rax
6a2: 74 e9 je 68d <frame_dummy+0xd>
6a4: 55 push %rbp
6a5: 48 89 e5 mov %rsp,%rbp
6a8: ff d0 callq *%rax
6aa: 5d pop %rbp
6ab: e9 40 ff ff ff jmpq 5f0 <register_tm_clones>
00000000000006b0 <main>:
6b0: 55 push %rbp
6b1: 48 89 e5 mov %rsp,%rbp
6b4: 48 8d 3d 99 00 00 00 lea 0x99(%rip),%rdi # 754 <_IO_stdin_used+0x4>
6bb: e8 a0 fe ff ff callq 560 <system#plt>
6c0: b8 00 00 00 00 mov $0x0,%eax
6c5: 5d pop %rbp
6c6: c3 retq
6c7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
6ce: 00 00
00000000000006d0 <__libc_csu_init>:
6d0: 41 57 push %r15
6d2: 41 56 push %r14
6d4: 41 89 ff mov %edi,%r15d
6d7: 41 55 push %r13
6d9: 41 54 push %r12
6db: 4c 8d 25 f6 06 20 00 lea 0x2006f6(%rip),%r12 # 200dd8 <__frame_dummy_init_array_entry>
6e2: 55 push %rbp
6e3: 48 8d 2d f6 06 20 00 lea 0x2006f6(%rip),%rbp # 200de0 <__init_array_end>
6ea: 53 push %rbx
6eb: 49 89 f6 mov %rsi,%r14
6ee: 49 89 d5 mov %rdx,%r13
6f1: 4c 29 e5 sub %r12,%rbp
6f4: 48 83 ec 08 sub $0x8,%rsp
6f8: 48 c1 fd 03 sar $0x3,%rbp
6fc: e8 2f fe ff ff callq 530 <_init>
701: 48 85 ed test %rbp,%rbp
704: 74 20 je 726 <__libc_csu_init+0x56>
706: 31 db xor %ebx,%ebx
708: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
70f: 00
710: 4c 89 ea mov %r13,%rdx
713: 4c 89 f6 mov %r14,%rsi
716: 44 89 ff mov %r15d,%edi
719: 41 ff 14 dc callq *(%r12,%rbx,8)
71d: 48 83 c3 01 add $0x1,%rbx
721: 48 39 dd cmp %rbx,%rbp
724: 75 ea jne 710 <__libc_csu_init+0x40>
726: 48 83 c4 08 add $0x8,%rsp
72a: 5b pop %rbx
72b: 5d pop %rbp
72c: 41 5c pop %r12
72e: 41 5d pop %r13
730: 41 5e pop %r14
732: 41 5f pop %r15
734: c3 retq
735: 90 nop
736: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
73d: 00 00 00
0000000000000740 <__libc_csu_fini>:
740: f3 c3 repz retq
Déassemblage de la section .fini :
0000000000000744 <_fini>:
744: 48 83 ec 08 sub $0x8,%rsp
748: 48 83 c4 08 add $0x8,%rsp
74c: c3 retq

If you want to disassemble your program you can use any of the large number of free and paid disassemblers that are out there.
If you simply want to change the command run by system, you do not need to disassemble the program. You can simply use a hex editor to modify the binary.

Related

Locate jump tables in x86 assembly

Where are jump tables located in x86 elf code?
progname: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <main>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub $0x10,%rsp
8: c7 45 fc 02 00 00 00 movl $0x2,-0x4(%rbp)
f: 83 7d fc 09 cmpl $0x9,-0x4(%rbp)
13: 0f 87 ac 00 00 00 ja c5 <main+0xc5>
19: 8b 45 fc mov -0x4(%rbp),%eax
1c: 48 8b 04 c5 00 00 00 mov 0x0(,%rax,8),%rax
23: 00
20: R_X86_64_32S .rodata+0x48
24: ff e0 jmpq *%rax
26: bf 00 00 00 00 mov $0x0,%edi
27: R_X86_64_32 .rodata
2b: b8 00 00 00 00 mov $0x0,%eax
30: e8 00 00 00 00 callq 35 <main+0x35>
31: R_X86_64_PLT32 printf-0x4
35: e9 9b 00 00 00 jmpq d5 <main+0xd5>
3a: bf 00 00 00 00 mov $0x0,%edi
3b: R_X86_64_32 .rodata+0xc
3f: b8 00 00 00 00 mov $0x0,%eax
44: e8 00 00 00 00 callq 49 <main+0x49>
45: R_X86_64_PLT32 printf-0x4
49: e9 87 00 00 00 jmpq d5 <main+0xd5>
4e: bf 00 00 00 00 mov $0x0,%edi
4f: R_X86_64_32 .rodata+0x18
53: b8 00 00 00 00 mov $0x0,%eax
58: e8 00 00 00 00 callq 5d <main+0x5d>
59: R_X86_64_PLT32 printf-0x4
5d: eb 76 jmp d5 <main+0xd5>
5f: bf 00 00 00 00 mov $0x0,%edi
60: R_X86_64_32 .rodata
64: b8 00 00 00 00 mov $0x0,%eax
69: e8 00 00 00 00 callq 6e <main+0x6e>
6a: R_X86_64_PLT32 printf-0x4
6e: eb 65 jmp d5 <main+0xd5>
70: bf 00 00 00 00 mov $0x0,%edi
71: R_X86_64_32 .rodata+0xc
75: b8 00 00 00 00 mov $0x0,%eax
7a: e8 00 00 00 00 callq 7f <main+0x7f>
7b: R_X86_64_PLT32 printf-0x4
7f: eb 54 jmp d5 <main+0xd5>
81: bf 00 00 00 00 mov $0x0,%edi
82: R_X86_64_32 .rodata+0x18
86: b8 00 00 00 00 mov $0x0,%eax
8b: e8 00 00 00 00 callq 90 <main+0x90>
8c: R_X86_64_PLT32 printf-0x4
90: eb 43 jmp d5 <main+0xd5>
92: bf 00 00 00 00 mov $0x0,%edi
93: R_X86_64_32 .rodata
97: b8 00 00 00 00 mov $0x0,%eax
9c: e8 00 00 00 00 callq a1 <main+0xa1>
9d: R_X86_64_PLT32 printf-0x4
a1: eb 32 jmp d5 <main+0xd5>
a3: bf 00 00 00 00 mov $0x0,%edi
a4: R_X86_64_32 .rodata+0xc
a8: b8 00 00 00 00 mov $0x0,%eax
ad: e8 00 00 00 00 callq b2 <main+0xb2>
ae: R_X86_64_PLT32 printf-0x4
b2: eb 21 jmp d5 <main+0xd5>
b4: bf 00 00 00 00 mov $0x0,%edi
b5: R_X86_64_32 .rodata+0x18
b9: b8 00 00 00 00 mov $0x0,%eax
be: e8 00 00 00 00 callq c3 <main+0xc3>
bf: R_X86_64_PLT32 printf-0x4
c3: eb 10 jmp d5 <main+0xd5>
c5: bf 00 00 00 00 mov $0x0,%edi
c6: R_X86_64_32 .rodata+0x24
ca: b8 00 00 00 00 mov $0x0,%eax
cf: e8 00 00 00 00 callq d4 <main+0xd4>
d0: R_X86_64_PLT32 printf-0x4
d4: 90 nop
d5: b8 00 00 00 00 mov $0x0,%eax
da: c9 leaveq
db: c3 retq
I have a basic switch case with 10 cases (including default case). If x=2, then it goes to 2nd block in switch case code.
Need help in understanding where exactly gcc generated jump tables are located in elf files and in which section. I pasted output of objdump -dr progname above.

What are the exceptions for __always_inline and __never_inline in gcc?

Andrei Alexandrescu mentioned in a talk that he presented at cppcon that gcc doesnt always inline when then __always inline macro is defined and used, and vice-versa for __never_inline. I couldn't find much documentation about which scenarios that occurs in, so someone tell me?
I couldn't find much documentation about which scenarios that occurs in, so someone tell me?
Could you give a code example?
Passing the function as a function pointer and calling it from a different translation unit (even from the same translation unit assuming optimization is -O0 or -Os and it's better for size or like really just... "lucky") will effectively disallow the possibility of inlining the function:
cat << EOF > main.c
__attribute__((__always_inline__))
static inline
void f(void) {
printf("Hey buddy!\n");
}
extern void call_fp(void (*fp)(void));
int main() {
call_fp(f);
}
EOF
cat << EOF > g.c
void call_fp(void (*fp)(void)) {
printf("Hey pal!");
fp();
}
EOF
Compile and inspect:
$ gcc -g -Ofast main.c g.c && objdump -S ./a.out | grep '<main>:\|<call_fp>:'
0000000000001050 <main>:
int main() {
1050: 48 83 ec 08 sub $0x8,%rsp
call_fp(f);
1054: 48 8d 3d 35 01 00 00 lea 0x135(%rip),%rdi # 1190 <f>
################################### vvvvv NOT INLINED ###############
105b: e8 40 01 00 00 callq 11a0 <call_fp>
}
1060: 31 c0 xor %eax,%eax
1062: 48 83 c4 08 add $0x8,%rsp
1066: c3 retq
1067: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
106e: 00 00
--
00000000000011a0 <call_fp>:
void call_fp(void (*fp)(void)) {
11a0: 53 push %rbx
printf("Hey pal!");
11a1: 31 c0 xor %eax,%eax
void call_fp(void (*fp)(void)) {
11a3: 48 89 fb mov %rdi,%rbx
printf("Hey pal!");
11a6: 48 8d 3d 62 0e 00 00 lea 0xe62(%rip),%rdi # 200f <_IO_stdin_used+0xf>
11ad: e8 8e fe ff ff callq 1040 <printf#plt>
fp();
11b2: 48 89 d8 mov %rbx,%rax
}
11b5: 5b pop %rbx
fp();
################################### vvvv NOT INLINED ####################
11b6: ff e0 jmpq *%rax
11b8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
11bf: 00
You could still optimize across translation units with LTO:
$ gcc -g -flto -Ofast main.c g.c && objdump -S ./a.out | grep '<main>:' -A30
int main() {
1050: 48 83 ec 08 sub $0x8,%rsp
void call_fp(void (*fp)(void)) {
printf("Hey pal!");
1054: 48 8d 3d a9 0f 00 00 lea 0xfa9(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
105b: 31 c0 xor %eax,%eax
105d: e8 de ff ff ff callq 1040 <printf#plt>
printf("Hey buddy!\n");
1062: 48 8d 3d a4 0f 00 00 lea 0xfa4(%rip),%rdi # 200d <_IO_stdin_used+0xd>
####################### Both call_fp() and f() were inlined!!!!!!!!!!!!!
1069: e8 c2 ff ff ff callq 1030 <puts#plt>
call_fp(f);
}
106e: 31 c0 xor %eax,%eax
1070: 48 83 c4 08 add $0x8,%rsp
1074: c3 retq
1075: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
107c: 00 00 00
107f: 90 nop
As for __attribute__((__noinline__)) I did not encounter a possible case where gcc would rather choose to inline a __noinline__ function.

gcc 5 does not detect stack smashing for inline functions but gcc 7 does

This is the code I used to test the stack protection feature of gcc.
static inline void charcpy(char* temp)
{
temp[0]='a';
temp[1]='b';
temp[2]='c';
temp[3]='d';
temp[4]='\0';
}
int main()
{
char temp[3];
charcpy(temp);
return 0;
}
When I compile with gcc 7.3 (without specifying any flags), I got the following runtime error on my desktop
*** stack smashing detected ***: <unknown> terminated
Aborted (core dumped)
The uname -a command gives the following for my desktop if it matters
Linux lixun-Desktop 4.15.0-36-generic #39-Ubuntu SMP Mon Sep 24 16:19:09 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
However, when I tried the same thing on a server machine using gcc 5.4, no error shows up. The uname -a command for the server machine is
Linux aggravation 4.4.0-137-generic #163-Ubuntu SMP Mon Sep 24 13:14:43 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
Then I use objdump -D a.out to check their assembly codes but still cannot figure out why the stack protection is not working on the server machine.
Here is the output on my desktop (I only paste the section I think might matter)
Disassembly of section .init:
0000000000000510 <_init>:
510: 48 83 ec 08 sub $0x8,%rsp
514: 48 8b 05 cd 0a 20 00 mov 0x200acd(%rip),%rax # 200fe8 <__gmon_start__>
51b: 48 85 c0 test %rax,%rax
51e: 74 02 je 522 <_init+0x12>
520: ff d0 callq *%rax
522: 48 83 c4 08 add $0x8,%rsp
526: c3 retq
Disassembly of section .plt:
0000000000000530 <.plt>:
530: ff 35 8a 0a 20 00 pushq 0x200a8a(%rip) # 200fc0 <_GLOBAL_OFFSET_TABLE_+0x8>
536: ff 25 8c 0a 20 00 jmpq *0x200a8c(%rip) # 200fc8 <_GLOBAL_OFFSET_TABLE_+0x10>
53c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000000540 <__stack_chk_fail#plt>:
540: ff 25 8a 0a 20 00 jmpq *0x200a8a(%rip) # 200fd0 <__stack_chk_fail#GLIBC_2.4>
546: 68 00 00 00 00 pushq $0x0
54b: e9 e0 ff ff ff jmpq 530 <.plt>
Disassembly of section .plt.got:
0000000000000550 <__cxa_finalize#plt>:
550: ff 25 a2 0a 20 00 jmpq *0x200aa2(%rip) # 200ff8 <__cxa_finalize#GLIBC_2.2.5>
556: 66 90 xchg %ax,%ax
...
Disassembly of section .text:
0000000000000560 <_start>:
560: 31 ed xor %ebp,%ebp
562: 49 89 d1 mov %rdx,%r9
565: 5e pop %rsi
566: 48 89 e2 mov %rsp,%rdx
569: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
56d: 50 push %rax
56e: 54 push %rsp
56f: 4c 8d 05 ea 01 00 00 lea 0x1ea(%rip),%r8 # 760 <__libc_csu_fini>
576: 48 8d 0d 73 01 00 00 lea 0x173(%rip),%rcx # 6f0 <__libc_csu_init>
57d: 48 8d 3d 24 01 00 00 lea 0x124(%rip),%rdi # 6a8 <main>
584: ff 15 56 0a 20 00 callq *0x200a56(%rip) # 200fe0 <__libc_start_main#GLIBC_2.2.5>
58a: f4 hlt
58b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
...
000000000000066a <charcpy>:
66a: 55 push %rbp
66b: 48 89 e5 mov %rsp,%rbp
66e: 48 89 7d f8 mov %rdi,-0x8(%rbp)
672: 48 8b 45 f8 mov -0x8(%rbp),%rax
676: c6 00 61 movb $0x61,(%rax)
679: 48 8b 45 f8 mov -0x8(%rbp),%rax
67d: 48 83 c0 01 add $0x1,%rax
681: c6 00 62 movb $0x62,(%rax)
684: 48 8b 45 f8 mov -0x8(%rbp),%rax
688: 48 83 c0 02 add $0x2,%rax
68c: c6 00 63 movb $0x63,(%rax)
68f: 48 8b 45 f8 mov -0x8(%rbp),%rax
693: 48 83 c0 03 add $0x3,%rax
697: c6 00 64 movb $0x64,(%rax)
69a: 48 8b 45 f8 mov -0x8(%rbp),%rax
69e: 48 83 c0 04 add $0x4,%rax
6a2: c6 00 00 movb $0x0,(%rax)
6a5: 90 nop
6a6: 5d pop %rbp
6a7: c3 retq
00000000000006a8 <main>:
6a8: 55 push %rbp
6a9: 48 89 e5 mov %rsp,%rbp
6ac: 48 83 ec 10 sub $0x10,%rsp
6b0: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
6b7: 00 00
6b9: 48 89 45 f8 mov %rax,-0x8(%rbp)
6bd: 31 c0 xor %eax,%eax
6bf: 48 8d 45 f5 lea -0xb(%rbp),%rax
6c3: 48 89 c7 mov %rax,%rdi
6c6: e8 9f ff ff ff callq 66a <charcpy>
6cb: b8 00 00 00 00 mov $0x0,%eax
6d0: 48 8b 55 f8 mov -0x8(%rbp),%rdx
6d4: 64 48 33 14 25 28 00 xor %fs:0x28,%rdx
6db: 00 00
6dd: 74 05 je 6e4 <main+0x3c>
6df: e8 5c fe ff ff callq 540 <__stack_chk_fail#plt>
6e4: c9 leaveq
6e5: c3 retq
6e6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
6ed: 00 00 00
And this is the output on the server machine
Disassembly of section .init:
00000000004003f0 <_init>:
4003f0: 48 83 ec 08 sub $0x8,%rsp
4003f4: 48 8b 05 fd 0b 20 00 mov 0x200bfd(%rip),%rax # 600ff8 <_DYNAMIC+0x1d0>
4003fb: 48 85 c0 test %rax,%rax
4003fe: 74 05 je 400405 <_init+0x15>
400400: e8 3b 00 00 00 callq 400440 <__libc_start_main#plt+0x10>
400405: 48 83 c4 08 add $0x8,%rsp
400409: c3 retq
Disassembly of section .plt:
0000000000400410 <__stack_chk_fail#plt-0x10>:
400410: ff 35 f2 0b 20 00 pushq 0x200bf2(%rip) # 601008 <_GLOBAL_OFFSET_TABLE_+0x8>
400416: ff 25 f4 0b 20 00 jmpq *0x200bf4(%rip) # 601010 <_GLOBAL_OFFSET_TABLE_+0x10>
40041c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000400420 <__stack_chk_fail#plt>:
400420: ff 25 f2 0b 20 00 jmpq *0x200bf2(%rip) # 601018 <_GLOBAL_OFFSET_TABLE_+0x18>
400426: 68 00 00 00 00 pushq $0x0
40042b: e9 e0 ff ff ff jmpq 400410 <_init+0x20>
0000000000400430 <__libc_start_main#plt>:
400430: ff 25 ea 0b 20 00 jmpq *0x200bea(%rip) # 601020 <_GLOBAL_OFFSET_TABLE_+0x20>
400436: 68 01 00 00 00 pushq $0x1
40043b: e9 d0 ff ff ff jmpq 400410 <_init+0x20>
Disassembly of section .plt.got:
0000000000400440 <.plt.got>:
400440: ff 25 b2 0b 20 00 jmpq *0x200bb2(%rip) # 600ff8 <_DYNAMIC+0x1d0>
400446: 66 90 xchg %ax,%ax
...
Disassembly of section .text:
0000000000400450 <_start>:
400450: 31 ed xor %ebp,%ebp
400452: 49 89 d1 mov %rdx,%r9
400455: 5e pop %rsi
400456: 48 89 e2 mov %rsp,%rdx
400459: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
40045d: 50 push %rax
40045e: 54 push %rsp
40045f: 49 c7 c0 40 06 40 00 mov $0x400640,%r8
400466: 48 c7 c1 d0 05 40 00 mov $0x4005d0,%rcx
40046d: 48 c7 c7 84 05 40 00 mov $0x400584,%rdi
400474: e8 b7 ff ff ff callq 400430 <__libc_start_main#plt>
400479: f4 hlt
40047a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400546 <charcpy>:
400546: 55 push %rbp
400547: 48 89 e5 mov %rsp,%rbp
40054a: 48 89 7d f8 mov %rdi,-0x8(%rbp)
40054e: 48 8b 45 f8 mov -0x8(%rbp),%rax
400552: c6 00 61 movb $0x61,(%rax)
400555: 48 8b 45 f8 mov -0x8(%rbp),%rax
400559: 48 83 c0 01 add $0x1,%rax
40055d: c6 00 62 movb $0x62,(%rax)
400560: 48 8b 45 f8 mov -0x8(%rbp),%rax
400564: 48 83 c0 02 add $0x2,%rax
400568: c6 00 63 movb $0x63,(%rax)
40056b: 48 8b 45 f8 mov -0x8(%rbp),%rax
40056f: 48 83 c0 03 add $0x3,%rax
400573: c6 00 64 movb $0x64,(%rax)
400576: 48 8b 45 f8 mov -0x8(%rbp),%rax
40057a: 48 83 c0 04 add $0x4,%rax
40057e: c6 00 00 movb $0x0,(%rax)
400581: 90 nop
400582: 5d pop %rbp
400583: c3 retq
0000000000400584 <main>:
400584: 55 push %rbp
400585: 48 89 e5 mov %rsp,%rbp
400588: 48 83 ec 10 sub $0x10,%rsp
40058c: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
400593: 00 00
400595: 48 89 45 f8 mov %rax,-0x8(%rbp)
400599: 31 c0 xor %eax,%eax
40059b: 48 8d 45 f0 lea -0x10(%rbp),%rax
40059f: 48 89 c7 mov %rax,%rdi
4005a2: e8 9f ff ff ff callq 400546 <charcpy>
4005a7: b8 00 00 00 00 mov $0x0,%eax
4005ac: 48 8b 55 f8 mov -0x8(%rbp),%rdx
4005b0: 64 48 33 14 25 28 00 xor %fs:0x28,%rdx
4005b7: 00 00
4005b9: 74 05 je 4005c0 <main+0x3c>
4005bb: e8 60 fe ff ff callq 400420 <__stack_chk_fail#plt>
4005c0: c9 leaveq
4005c1: c3 retq
4005c2: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
4005c9: 00 00 00
4005cc: 0f 1f 40 00 nopl 0x0(%rax)
...
I also tried to specify -fstack-protector(-all or -strong) on the server machine, it still shows no error.
Anyone knows why there is the difference?
The server machine places the array temp into a different place in the main stack frame. It uses this operation to compute the address, with a 16-byte offset from the frame pointer:
40059b: 48 8d 45 f0 lea -0x10(%rbp),%rax
The other machine uses this instead:
6bf: 48 8d 45 f5 lea -0xb(%rbp),%rax
There is only an 11-byte offset from the frame pointer. The canary is stored at offset 8 in both cases.
As a result, on the server machine, there are 5 unused bytes after the array, and the overflow spills into that. The canary is not overwritten, which is why the overflow is not detected. But neither is the return address, so it is not possible to redirect execution here.
In real-world software, this would be a source-level stack-based buffer overflow which is not exploitable by accident in the compiled binary. Such things happen occasionally.

The difference between mov and movl instruction in X86? and I meet some trouble when reading assembly [duplicate]

This question already has an answer here:
Assembly do we need the endings? [duplicate]
(1 answer)
Closed 1 year ago.
Recently, I read some books about computer science. I wrote some C code, and disassembled them, using gcc and objdump.
The following C code:
#include <stdio.h>
#include <stdbool.h>
int dojob()
{
static short num[ ][4] = { {2, 9, -1, 5}, {3, 8, 2, -6}};
static short *pn[ ] = {num[0], num[1]};
static short s[2] = {0, 0};
int i, j;
for (i=0; i<2; i++) {
for (j=0; j<4; j++){
s[i] += *pn[i]++;
}
printf ("sum of line %d: %d\n", i+1, s[i]);
}
return 0;
}
int main ( )
{
dojob();
}
got the following assembly code (AT&T syntex; only assembly of function dojob and some data is list):
00401350 <_dojob>:
401350: 55 push %ebp
401351: 89 e5 mov %esp,%ebp
401353: 83 ec 28 sub $0x28,%esp
401356: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp)
40135d: eb 75 jmp 4013d4 <_dojob+0x84>
40135f: c7 45 f0 00 00 00 00 movl $0x0,-0x10(%ebp)
401366: eb 3c jmp 4013a4 <_dojob+0x54>
401368: 8b 45 f4 mov -0xc(%ebp),%eax
40136b: 8b 04 85 00 20 40 00 mov 0x402000(,%eax,4),%eax
401372: 8d 48 02 lea 0x2(%eax),%ecx
401375: 8b 55 f4 mov -0xc(%ebp),%edx
401378: 89 0c 95 00 20 40 00 mov %ecx,0x402000(,%edx,4)
40137f: 0f b7 10 movzwl (%eax),%edx
401382: 8b 45 f4 mov -0xc(%ebp),%eax
401385: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
40138c: 00
40138d: 89 c1 mov %eax,%ecx
40138f: 89 d0 mov %edx,%eax
401391: 01 c8 add %ecx,%eax
401393: 89 c2 mov %eax,%edx
401395: 8b 45 f4 mov -0xc(%ebp),%eax
401398: 66 89 94 00 08 50 40 mov %dx,0x405008(%eax,%eax,1)
40139f: 00
4013a0: 83 45 f0 01 addl $0x1,-0x10(%ebp)
4013a4: 83 7d f0 03 cmpl $0x3,-0x10(%ebp)
4013a8: 7e be jle 401368 <_dojob+0x18>
4013aa: 8b 45 f4 mov -0xc(%ebp),%eax
4013ad: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
4013b4: 00
4013b5: 98 cwtl
4013b6: 8b 55 f4 mov -0xc(%ebp),%edx
4013b9: 83 c2 01 add $0x1,%edx
4013bc: 89 44 24 08 mov %eax,0x8(%esp)
4013c0: 89 54 24 04 mov %edx,0x4(%esp)
4013c4: c7 04 24 24 30 40 00 movl $0x403024,(%esp)
4013cb: e8 50 08 00 00 call 401c20 <_printf>
4013d0: 83 45 f4 01 addl $0x1,-0xc(%ebp)
4013d4: 83 7d f4 01 cmpl $0x1,-0xc(%ebp)
4013d8: 7e 85 jle 40135f <_dojob+0xf>
4013da: b8 00 00 00 00 mov $0x0,%eax
4013df: c9 leave
4013e0: c3 ret
Disassembly of section .data:
00402000 <__data_start__>:
402000: 08 20 or %ah,(%eax)
402002: 40 inc %eax
402003: 00 10 add %dl,(%eax)
402005: 20 40 00 and %al,0x0(%eax)
Disassembly of section .bss:
...
00405008 <_s.1927>:
405008: 00 00 add %al,(%eax)
...
I have two questions:
I don't understand the difference between mov and movl instruction? Why the compiler generate mov for some code, and movl for others?
I completely understand the meaning of the C code, but not the assembly that the compiler generated. Who can make some comments for it for me to understand? I will thank a lot.
The MOVL instruction was generated because you put two int (i and j variables), MOVL will perform a MOV of 32 bits, and integer' size is 32 bits.
a non exhaustive list of all MOV* exist (like MOVD for doubleword or MOVQ for quadword) to allow to optimize your code and use the better expression to gain most time as possible.
PS: may be the -M intel objdump's argument can help you to have a better comprehension of the disassembly, a lot of man on the Intel syntax can may be find easily.

Usage of instruction pxor before SSE instruction cvtsi2ss

I am currently writing various implementations of a color to black/white image converter. I would like to do a :
Simple C++ implementation
Self made ASM implementation
Self made ASM implementation with AVX vector instructions.
The goal is to benchmark each one of these and analyse the performance improvement I get.
The following snippet of code is the C++ implementation. It only treats a single portion of the image, because I also want to do multithreaded computing.
void CBwConverter::run(const CImg<uint8_t> &src, CImg<uint8_t> &dst, uint32_t pixel, size_t size) const {
const uint8_t *rC = src.data(0,pixel,0,0);
const uint8_t *gC = src.data(0,pixel,0,1);
const uint8_t *bC = src.data(0,pixel,0,2);
uint8_t *mC = dst.data(0,pixel,0,0);
for(size_t c = 0; c < size; c++, rC++, gC++, bC++, mC++) {
*mC = (uint8_t)(0.299f*(*rC) + 0.587f*(*gC) + 0.114f*(*bC));
}
}
Now, before starting the ASM version, I had my C++ code compiled and disassembled just to see how it looks like. After compiling with gcc -std=c++11 -g -O2 -c CBwConverter.cc, I obtained the following output with objdump -d CBwConvert.o :
0000000000000000 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm>:
0: 53 push %rbx
1: 8b 3e mov (%rsi),%edi
3: 89 c8 mov %ecx,%eax
5: 44 8b 56 04 mov 0x4(%rsi),%r10d
9: 44 8b 5e 08 mov 0x8(%rsi),%r11d
d: 89 c9 mov %ecx,%ecx
f: 48 8b 5e 18 mov 0x18(%rsi),%rbx
13: 0f af c7 imul %edi,%eax
16: 4c 0f af d7 imul %rdi,%r10
1a: 4b 8d 34 1b lea (%r11,%r11,1),%rsi
1e: 4c 8d 0c 03 lea (%rbx,%rax,1),%r9
22: 4c 89 d7 mov %r10,%rdi
25: 49 0f af fb imul %r11,%rdi
29: 4c 0f af d6 imul %rsi,%r10
2d: 48 01 c7 add %rax,%rdi
30: 4c 01 d0 add %r10,%rax
33: 48 01 df add %rbx,%rdi
36: 48 8d 34 03 lea (%rbx,%rax,1),%rsi
3a: 8b 02 mov (%rdx),%eax
3c: 48 0f af c8 imul %rax,%rcx
40: 48 03 4a 18 add 0x18(%rdx),%rcx
44: 4d 85 c0 test %r8,%r8
47: 74 6b je b4 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0xb4>
49: 31 d2 xor %edx,%edx
4b: f3 0f 10 25 00 00 00 movss 0x0(%rip),%xmm4 # 53 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x53>
52: 00
53: f3 0f 10 1d 00 00 00 movss 0x0(%rip),%xmm3 # 5b <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x5b>
5a: 00
5b: f3 0f 10 15 00 00 00 movss 0x0(%rip),%xmm2 # 63 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x63>
62: 00
63: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
68: 41 0f b6 04 11 movzbl (%r9,%rdx,1),%eax
6d: 66 0f ef c0 pxor %xmm0,%xmm0
71: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
75: 0f b6 04 17 movzbl (%rdi,%rdx,1),%eax
79: 0f 28 c8 movaps %xmm0,%xmm1
7c: 66 0f ef c0 pxor %xmm0,%xmm0
80: f3 0f 59 cc mulss %xmm4,%xmm1
84: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
88: 0f b6 04 16 movzbl (%rsi,%rdx,1),%eax
8c: f3 0f 59 c3 mulss %xmm3,%xmm0
90: f3 0f 58 c1 addss %xmm1,%xmm0
94: 66 0f ef c9 pxor %xmm1,%xmm1
98: f3 0f 2a c8 cvtsi2ss %eax,%xmm1
9c: f3 0f 59 ca mulss %xmm2,%xmm1
a0: f3 0f 58 c1 addss %xmm1,%xmm0
a4: f3 0f 2c c0 cvttss2si %xmm0,%eax
a8: 88 04 11 mov %al,(%rcx,%rdx,1)
ab: 48 83 c2 01 add $0x1,%rdx
af: 49 39 d0 cmp %rdx,%r8
b2: 75 b4 jne 68 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x68>
b4: 5b pop %rbx
b5: c3 retq
I can already tell that the for-loop start at 68 and ends at b2.
Something bothers me in the disassembled program. Why does the compiler decide to set registers %xmm0 and %xmm1 to 0, typically at 6d with instruction pxor ? These registers are overwritten just after with instruction cvtsi2ss which loads an integer and converts it to a single-precision number and then finally stores it into them. Why set them to 0 when they are overwritten just after ? If the compiler does it, am I supposed to do the same when writing my own asm version ?

Resources