Basic OS X Assembly and the Mach-O format - macos

I am interested in programming in x86-64 assembly on the Mac OS X platform. I came across this page about creating a 248B Mach-O program, which led me to Apple's own Mach-O format reference. After that I thought I'd make that same simple C program in Xcode and check out the generated assembly.
This was the code:
int main(int argc, const char * argv[])
return 42;
But the assembly generated was 334 lines, containing (based on the 248B model) a lot of excess content.
Firstly, why is so much DWARF debug info included in the Release build of a C executable? Secondly, I notice the Mach-O header data is included 4 times (in different DWARF-related sections). Why is this necessary? Finally, the Xcode assembly includes:
.private_extern _main
.globl _main
But in the 248B program, these are all nowhere to be seen - the program instead begins at _start. How is that possible if all programs by definition begin in main?
Full Xcode Assembly:
# Assembly output for main.c
# Generated at 4:04:08 PM on Sunday, January 20, 2013
# Using Release configuration, x86_64 architecture for Tiny target of Tiny project
.section __TEXT,__text,regular,pure_instructions
.file 1 "/Users/####/Desktop/Tiny/Tiny/main.c"
.section __DWARF,__debug_info,regular,debug
.section __DWARF,__debug_abbrev,regular,debug
.section __DWARF,__debug_aranges,regular,debug
.section __DWARF,__debug_macinfo,regular,debug
.section __DWARF,__debug_line,regular,debug
.section __DWARF,__debug_loc,regular,debug
.section __DWARF,__debug_pubtypes,regular,debug
.section __DWARF,__debug_str,regular,debug
.section __DWARF,__debug_ranges,regular,debug
.section __DWARF,__debug_loc,regular,debug
.section __TEXT,__text,regular,pure_instructions
.section __DATA,__data
.section __TEXT,__text,regular,pure_instructions
.private_extern _main
.globl _main
_main: ## #main
.loc 1 12 0 ## /Users/####/Desktop/Tiny/Tiny/main.c:12:0
## BB#0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
##DEBUG_VALUE: main:argc <- EDI+0
##DEBUG_VALUE: main:argv <- RSI+0
movl $42, %eax
.loc 1 15 5 prologue_end ## /Users/####/Desktop/Tiny/Tiny/main.c:15:5
popq %rbp
.section __DATA,__data
.section __TEXT,__text,regular,pure_instructions
.section __DWARF,__debug_info,regular,debug
.long 127 ## Length of Compilation Unit Info
.short 2 ## DWARF version number
Lset0 = Labbrev_begin-Lsection_abbrev ## Offset Into Abbrev. Section
.long Lset0
.byte 8 ## Address Size (in bytes)
.byte 1 ## Abbrev [1] 0xb:0x78 DW_TAG_compile_unit
Lset1 = Lstring0-Lsection_str ## DW_AT_producer
.long Lset1
.short 12 ## DW_AT_language
Lset2 = Lstring1-Lsection_str ## DW_AT_name
.long Lset2
.quad 0 ## DW_AT_entry_pc
.long 0 ## DW_AT_stmt_list
Lset3 = Lstring2-Lsection_str ## DW_AT_comp_dir
.long Lset3
.byte 1 ## DW_AT_APPLE_optimized
.byte 2 ## Abbrev [2] 0x27:0x3e DW_TAG_subprogram
Lset4 = Lstring3-Lsection_str ## DW_AT_name
.long Lset4
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.byte 1 ## DW_AT_prototyped
.long 101 ## DW_AT_type
.byte 1 ## DW_AT_external
.quad Lfunc_begin0 ## DW_AT_low_pc
.quad Lfunc_end0 ## DW_AT_high_pc
.byte 1 ## DW_AT_frame_base
.byte 86
.byte 3 ## Abbrev [3] 0x46:0xf DW_TAG_formal_parameter
Lset5 = Lstring5-Lsection_str ## DW_AT_name
.long Lset5
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 101 ## DW_AT_type
Lset6 = Ldebug_loc0-Lsection_debug_loc ## DW_AT_location
.long Lset6
.byte 3 ## Abbrev [3] 0x55:0xf DW_TAG_formal_parameter
Lset7 = Lstring6-Lsection_str ## DW_AT_name
.long Lset7
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 125 ## DW_AT_type
Lset8 = Ldebug_loc2-Lsection_debug_loc ## DW_AT_location
.long Lset8
.byte 0 ## End Of Children Mark
.byte 4 ## Abbrev [4] 0x65:0x7 DW_TAG_base_type
Lset9 = Lstring4-Lsection_str ## DW_AT_name
.long Lset9
.byte 5 ## DW_AT_encoding
.byte 4 ## DW_AT_byte_size
.byte 4 ## Abbrev [4] 0x6c:0x7 DW_TAG_base_type
Lset10 = Lstring7-Lsection_str ## DW_AT_name
.long Lset10
.byte 6 ## DW_AT_encoding
.byte 1 ## DW_AT_byte_size
.byte 5 ## Abbrev [5] 0x73:0x5 DW_TAG_const_type
.long 108 ## DW_AT_type
.byte 6 ## Abbrev [6] 0x78:0x5 DW_TAG_pointer_type
.long 115 ## DW_AT_type
.byte 6 ## Abbrev [6] 0x7d:0x5 DW_TAG_pointer_type
.long 120 ## DW_AT_type
.byte 0 ## End Of Children Mark
.section __DWARF,__debug_abbrev,regular,debug
.byte 1 ## Abbreviation Code
.byte 17 ## DW_TAG_compile_unit
.byte 1 ## DW_CHILDREN_yes
.byte 37 ## DW_AT_producer
.byte 14 ## DW_FORM_strp
.byte 19 ## DW_AT_language
.byte 5 ## DW_FORM_data2
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 82 ## DW_AT_entry_pc
.byte 1 ## DW_FORM_addr
.byte 16 ## DW_AT_stmt_list
.byte 6 ## DW_FORM_data4
.byte 27 ## DW_AT_comp_dir
.byte 14 ## DW_FORM_strp
.ascii "\341\177" ## DW_AT_APPLE_optimized
.byte 12 ## DW_FORM_flag
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 2 ## Abbreviation Code
.byte 46 ## DW_TAG_subprogram
.byte 1 ## DW_CHILDREN_yes
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 39 ## DW_AT_prototyped
.byte 12 ## DW_FORM_flag
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 63 ## DW_AT_external
.byte 12 ## DW_FORM_flag
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 1 ## DW_FORM_addr
.byte 64 ## DW_AT_frame_base
.byte 10 ## DW_FORM_block1
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 3 ## Abbreviation Code
.byte 5 ## DW_TAG_formal_parameter
.byte 0 ## DW_CHILDREN_no
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 2 ## DW_AT_location
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 4 ## Abbreviation Code
.byte 36 ## DW_TAG_base_type
.byte 0 ## DW_CHILDREN_no
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 62 ## DW_AT_encoding
.byte 11 ## DW_FORM_data1
.byte 11 ## DW_AT_byte_size
.byte 11 ## DW_FORM_data1
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 5 ## Abbreviation Code
.byte 38 ## DW_TAG_const_type
.byte 0 ## DW_CHILDREN_no
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 6 ## Abbreviation Code
.byte 15 ## DW_TAG_pointer_type
.byte 0 ## DW_CHILDREN_no
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__apple_names,regular,debug
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 1 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long 0 ## Bucket 0
.long 2090499946 ## Hash in Bucket 0
.long LNames0-Lnames_begin ## Offset in Bucket 0
Lset11 = Lstring3-Lsection_str ## main
.long Lset11
.long 1 ## Num DIEs
.long 39
.long 0
.section __DWARF,__apple_objc,regular,debug
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_namespac,regular,debug
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_types,regular,debug
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 2 ## Header Bucket Count
.long 2 ## Header Hash Count
.long 20 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 3 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.short 3 ## eAtomTypeTag
.short 5 ## DW_FORM_data2
.short 5 ## eAtomTypeTypeFlags
.short 11 ## DW_FORM_data1
.long 0 ## Bucket 0
.long 1 ## Bucket 1
.long 193495088 ## Hash in Bucket 0
.long 2090147939 ## Hash in Bucket 1
.long Ltypes0-Ltypes_begin ## Offset in Bucket 0
.long Ltypes1-Ltypes_begin ## Offset in Bucket 1
Lset12 = Lstring4-Lsection_str ## int
.long Lset12
.long 1 ## Num DIEs
.long 101
.short 36
.byte 0
.long 0
Lset13 = Lstring7-Lsection_str ## char
.long Lset13
.long 1 ## Num DIEs
.long 108
.short 36
.byte 0
.long 0
.section __DWARF,__debug_pubtypes,regular,debug
Lset14 = Lpubtypes_end1-Lpubtypes_begin1 ## Length of Public Types Info
.long Lset14
.short 2 ## DWARF Version
Lset15 = Linfo_begin1-Lsection_info ## Offset of Compilation Unit Info
.long Lset15
Lset16 = Linfo_end1-Linfo_begin1 ## Compilation Unit Length
.long Lset16
.long 0 ## End Mark
.section __DWARF,__debug_loc,regular,debug
.quad Lfunc_begin0
.quad Ltmp6
Lset17 = Ltmp8-Ltmp7 ## Loc expr size
.short Lset17
.byte 85 ## DW_OP_reg5
.quad 0
.quad 0
.quad Lfunc_begin0
.quad Ltmp6
Lset18 = Ltmp10-Ltmp9 ## Loc expr size
.short Lset18
.byte 84 ## DW_OP_reg4
.quad 0
.quad 0
.section __DWARF,__debug_aranges,regular,debug
.section __DWARF,__debug_ranges,regular,debug
.section __DWARF,__debug_macinfo,regular,debug
.section __DWARF,__debug_inlined,regular,debug
Lset19 = Ldebug_inlined_end1-Ldebug_inlined_begin1 ## Length of Debug Inlined Information Entry
.long Lset19
.short 2 ## Dwarf Version
.byte 8 ## Address Size (in bytes)
.section __DWARF,__debug_str,regular,debug
.asciz "Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)"
.asciz "/Users/####/Desktop/Tiny/Tiny/main.c"
.asciz "/Users/####/Desktop/Tiny"
.asciz "main"
.asciz "int"
.asciz "argc"
.asciz "argv"
.asciz "char"

Firstly, why is so much DWARF debug info included in the Release build of a C executable?
Being able to debug optimized code is incredibly useful. Cases in which bugs are only visible in optimized builds are not rare. If you're hand writing assembly you're unlikely to care about DWARF information though, so I'd suggest building your comparison code without the -g argument.
Secondly, I notice the Mach-O header data is included 4 times (in different DWARF-related sections). Why is this necessary?
These aren't Mach-O headers that you're seeing. They're the headers for DWARF accelerator tables, an LLVM extension to DWARF that optimizes the test for whether a symbol is defined within a given compilation unit.
But in the 248B program, these are all nowhere to be seen - the program instead begins at _start. How is that possible if all programs by definition begin in main?
Historically on OS X all programs begin at start. However, this symbol typically comes from a system library rather than being defined by the program itself. The system implementation of start will perform some initialization and then jump to your programs "real" entry point.
The entry points to Mach-O binaries is defined by either the LC_UNIXTHREAD or LC_MAIN load commands. When LC_UNIXTHREAD, the convention for pre-10.8 versions of OS X, is used with a regular C or C++ program the linker uses start as the entry point. This symbol typically comes from /usr/lib/crt1.o, and its address is written in to the instruction pointer field of the LC_UNIXTHREAD load command. The 248B binary you link to includes an LC_UNIXTHREAD command with eip set to 0x000010e8. That's the address of the symbol _start. Since this small program is a static executable and the binary is generated directly it can write whatever address it wishes to in to the instruction pointer field of the load command.
If you're building your executable targeting OS X 10.8+ the linker will generate an LC_MAIN load command instead of LC_UNIXTHREAD. The kernel knows that binaries using the LC_MAIN command should be executed by loading the dynamic linker and jumping to its entry point. The dynamic linker, dyld, initializes itself and then jumps to the address specified in the LC_MAIN command. In this brave new world no symbol named start is used at all.


Ada listing files.... what are the right compiler in GNAT to get them to come out

I am used to getting nice listing files from C code where I can see lovely source code intertwined with opcodes and hex offsets for debugging as seen here: List File In C (.LST) List File In C (.LST)
And the -S directive gets me the assembler code only from g++ for Ada.... but I can't seem to get it to give up the good stuff so I can debug a nasty elaboration crash.
Any thoughts on the GNAT compiler switches to send in?
Maybe this helps. The next command generates something similar to what you refer to:
$ gnatmake -g main.adb -cargs -Wa,-adhln > main.lst
The -cargs (a so-called mode switch) causes gnatmake to pass the subsequent arguments to the compiler. The compiler subsequently passes the -adhln switches to the assembler (see here). But you might as wel use objdump -d -S main.o to see the assembly/source code after build.
with Ada.Text_IO; use Ada.Text_IO;
procedure Main is
Put_Line ("Hello, world!");
end Main;
output (main.lst)
1 .file "main.adb"
2 .text
3 .Ltext0:
4 .section .rodata
5 .LC1:
6 0000 48656C6C .ascii "Hello, world!"
6 6F2C2077
6 6F726C64
6 21
7 000d 000000 .align 8
8 .LC0:
9 0010 01000000 .long 1
10 0014 0D000000 .long 13
11 .text
12 .align 2
13 .globl _ada_main
15 _ada_main:
16 .LFB1:
17 .file 1 "main.adb"
1:main.adb **** with Ada.Text_IO; use Ada.Text_IO;
2:main.adb ****
3:main.adb **** procedure Main is
18 .loc 1 3 1
19 .cfi_startproc
20 0000 55 pushq %rbp
21 .cfi_def_cfa_offset 16
22 .cfi_offset 6, -16
23 0001 4889E5 movq %rsp, %rbp
24 .cfi_def_cfa_register 6
25 0004 53 pushq %rbx
26 0005 4883EC08 subq $8, %rsp
27 .cfi_offset 3, -24
28 .LBB2:
4:main.adb **** begin
5:main.adb **** Put_Line ("Hello, world!");
29 .loc 1 5 4
30 0009 B8000000 movl $.LC1, %eax
30 00
31 000e BA000000 movl $.LC0, %edx
31 00
32 0013 4889C1 movq %rax, %rcx
33 0016 4889D3 movq %rdx, %rbx
34 0019 4889D0 movq %rdx, %rax
35 001c 4889CF movq %rcx, %rdi
36 001f 4889C6 movq %rax, %rsi
37 0022 E8000000 call ada__text_io__put_line__2
37 00
38 .LBE2:
6:main.adb **** end Main;
39 .loc 1 6 5
40 0027 4883C408 addq $8, %rsp
41 002b 5B popq %rbx
42 002c 5D popq %rbp
43 .cfi_def_cfa 7, 8
44 002d C3 ret
45 .cfi_endproc
46 .LFE1:
48 .Letext0:
You might want to look at the section on debugging control in the top-secret GNAT documentation, especially the -gnatG switch.

Strange Linaro mips compiler assembly output

To create assembly code I'm calling the mipsel-openwrt-linux-uclibc-gcc compiler (Linaro GCC 4.8-2014.04 r49389) with flag -S.
This produces assembly code looking like this:
.section .mdebug.abi32
.gnu_attribute 4, 3
.option pic0
.cfi_sections .debug_frame
.globl Version
.section .rodata.str1.4,"aMS",#progbits,1
.align 2
.ascii "3.3\000"
.align 2
.type Version, #object
.size Version, 4
.word $LC0
.file 1 "version.c"
.section .debug_info,"",#progbits
.4byte 0x3d
.2byte 0x4
.4byte $Ldebug_abbrev0
.byte 0x4
.uleb128 0x1
.4byte $LASF0
.byte 0x1
.4byte $LASF1
.4byte $LASF2
.4byte $Ldebug_line0
.uleb128 0x2
.4byte $LASF3
.byte 0x1
.byte 0x2
.4byte 0x2e
.uleb128 0x5
.byte 0x3
.4byte Version
.uleb128 0x3
.byte 0x4
.4byte 0x34
.uleb128 0x4
.4byte 0x39
.uleb128 0x5
.byte 0x1
.byte 0x6
.4byte $LASF4
.byte 0
.section .debug_abbrev,"",#progbits
.uleb128 0x1
.uleb128 0x11
.byte 0x1
---- I cutted here ----
.uleb128 0xe
.byte 0
.byte 0
.byte 0
.section .debug_aranges,"",#progbits
.4byte 0x14
.2byte 0x2
.4byte $Ldebug_info0
.byte 0x4
.byte 0
.2byte 0
.2byte 0
.4byte 0
.4byte 0
.section .debug_line,"",#progbits
.section .debug_str,"MS",#progbits,1
.ascii "/home/lvr/Src/openwrt/build_dir/target-mipsel_mips32_uCl"
.ascii "ibc-\000"
.ascii "GNU C 4.8.3 -mno-branch-likely -mips32 -mtune=mips32 -ms"
.ascii "oft-float -mllsc -mplt -mno-shared -g -Os -std=c99 -fno-"
.ascii "caller-saves -fhonour-copts\000"
.ascii "Version\000"
.ascii "char\000"
.ascii "version.c\000"
.ident "GCC: (OpenWrt/Linaro GCC 4.8-2014.04 r49389) 4.8.3"
.section .note.GNU-stack,"",#progbits
This doesn't look like AT&T nor Intel syntax to me. To produce more familiar syntax I've tried the -masm=intel flag, but same effect..
My first thougt was the produced ASM code is dedicated to the fixed word width of mips,
but any ideas how to generate AT&T or Intel syntax via mipsel-openwrt-linux-uclibc-gcc?
Thanks for any hints!
"What AT&T and Intel syntax have to do with MIPS?" - Nothing directly, I've tagged it that way as tag 'mipsel-openwrt-linux-uclibc-gcc' was not available (sry, maybe bad style).
But thanks Margaret your comment directed me to compile some more .c files leading to the expected AT&T syntax :) - f.e.:
.section .mdebug.abi32
.gnu_attribute 4, 3
.option pic0
.cfi_sections .debug_frame
.align 2
$LFB16 = .
.file 1 "io.c"
.loc 1 474 0
.set nomips16
.ent prepare_text
.type prepare_text, #function
.frame $sp,48,$31 # vars= 0, regs= 5/0, args= 16, gp= 8
.mask 0x800f0000,-4
.fmask 0x00000000,0
.set noreorder
.set nomacro
$LVL0 = .
addiu $sp,$sp,-48
.cfi_def_cfa_offset 48
sw $17,32($sp)
sw $31,44($sp)
sw $19,40($sp)
sw $18,36($sp)
sw $16,28($sp)
.cfi_offset 17, -16
.cfi_offset 31, -4
.cfi_offset 19, -8
.cfi_offset 18, -12
--- CUT ---
.ascii "__ino64_t\000"
.ascii "changes\000"
.ascii "__ctype_touplow_t\000"
.ascii "memcmp\000"
.ascii "equivs\000"
.ascii "file_size\000"
.ident "GCC: (OpenWrt/Linaro GCC 4.8-2014.04 r49389) 4.8.3"
.section .note.GNU-stack,"",#progbits

Where does C function's TAN return its value in 64-bit GCC?

I am linking my assembly function with GCC on linux 64-bit. The library I use is TAN from math.h. I link it with;
gcc -s prog.o -o prog -lm
The program works but the return value is 0.0000000 (for 3.4 radian). I use extrn in my assembly code;
extrn tan
extrn printf
I use xmm0 to pass the argument (in radian) to the TAN function. Now I am not sure which register is used to return the value from TAN. Is it xmm0, st0 or in RAX? I can't find a decent reference on this.
For my gcc, it's xmm0.
Here's a C program:
#include <stdio.h>
#include <math.h>
int main () {
double x = tan(M_PI/4.0);
// RESULT: x=1.000000
printf ("x=%f\n", x);
return 0;
And here's the corresponding "gcc -S":
.section .rodata
.string "x=%f\n"
.globl main
.type main, #function
.file 1 "x.cpp"
.loc 1 4 0
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
.loc 1 6 0
movabsq $4607182418800017407, %rax
movq %rax, -8(%rbp)
.loc 1 8 0
movq -8(%rbp), %rax
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
movl $.LC1, %edi
movl $1, %eax
call printf
.loc 1 9 0
movl $0, %eax
.loc 1 10 0
.cfi_def_cfa 7, 8

Clang's ASM output vs GCC's

(I don't know almost anything about assembly language yet).
I'm trying to follow this tutorial.
The problem is that his compiler, and my test setup (gcc on Linux 32 bit) produces completely different, and significantly less output than my main setup (clang on OSX 64 bit).
Here are my outputs for int main() {}
gcc on Linux 32 bit
$ cat blank.c
int main() {}
$ gcc -S blank.c
$ cat blank.s
.file "blank.c"
.globl main
.type main, #function
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
popl %ebp
.cfi_def_cfa 4, 4
.cfi_restore 5
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
clang on Mac OSX 64 bit
$ cat blank.c
int main() {}
$ clang -S blank.c
$ cat blank.s
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main: ## #main
## BB#0:
pushq %rbp
movq %rsp, %rbp
movl $0, %eax
popq %rbp
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
Lset0 = Leh_frame_common_end0-Leh_frame_common_begin0 ## Length of Common Information Entry
.long Lset0
.long 0 ## CIE Identifier Tag
.byte 1 ## DW_CIE_VERSION
.asciz "zR" ## CIE Augmentation
.byte 1 ## CIE Code Alignment Factor
.byte 120 ## CIE Data Alignment Factor
.byte 16 ## CIE Return Address Column
.byte 1 ## Augmentation Size
.byte 16 ## FDE Encoding = pcrel
.byte 12 ## DW_CFA_def_cfa
.byte 7 ## Register
.byte 8 ## Offset
.byte 144 ## DW_CFA_offset + Reg (16)
.byte 1 ## Offset
.align 3
Lset1 = Leh_frame_end0-Leh_frame_begin0 ## Length of Frame Information Entry
.long Lset1
Lset2 = Leh_frame_begin0-Leh_frame_common0 ## FDE CIE offset
.long Lset2
Ltmp2: ## FDE initial location
Ltmp3 = Leh_func_begin0-Ltmp2
.quad Ltmp3
Lset3 = Leh_func_end0-Leh_func_begin0 ## FDE address range
.quad Lset3
.byte 0 ## Augmentation size
.byte 4 ## DW_CFA_advance_loc4
Lset4 = Ltmp0-Leh_func_begin0
.long Lset4
.byte 14 ## DW_CFA_def_cfa_offset
.byte 16 ## Offset
.byte 134 ## DW_CFA_offset + Reg (6)
.byte 2 ## Offset
.byte 4 ## DW_CFA_advance_loc4
Lset5 = Ltmp1-Ltmp0
.long Lset5
.byte 13 ## DW_CFA_def_cfa_register
.byte 6 ## Register
.align 3
Is it possible to generate similar assembly output on my Mac, so I can follow the tutorial? or is this assembly code platform-specific? And if it is, what flags on clang can I use to generate less verbose/boilerplate(?) code?
Make sure you instruct clang to generate 32 bit code with clang -m32 on Mac OSX 64 bit and you basically don't have to worry about the other differences.
Both the .cfi_XXX directives in the gcc output and the lines after .section __TEXT,__eh_frame in the clang output are used to generate the .eh_frame section for stack unwinding. For details, see:
Compile your program with gcc -fno-asynchronous-unwind-tables. Or just ignore various .cfi_XYZ directives. For the clang case, just don't pay attention to the __eh_frame section. Bear in mind that it's rather uncommon for two different compilers to generate identical code, even from identical source.

Clean x86_64 assembly output with gcc? [duplicate]

This question already has answers here:
How to remove "noise" from GCC/clang assembly output?
(3 answers)
Closed 6 years ago.
I've been teaching myself GNU Assembly for a while now by writing statements in C, compiling them with "gcc -S" and studying the output. This works alright on x86 (and compiling with -m32) but on my AMD64 box, for this code (just as an example):
int main()
return 0;
GCC gives me:
.file "test.c"
.globl main
.type main, #function
pushq %rbp
movq %rsp, %rbp
movl $0, %eax
.size main, .-main
.section .eh_frame,"a",#progbits
.long .LECIE1-.LSCIE1
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.long .LEFDE1-.LASFDE1
.long .LASFDE1-.Lframe1
.long .LFB2
.long .LFE2-.LFB2
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB2
.byte 0xe
.uleb128 0x10
.byte 0x86
.uleb128 0x2
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xd
.uleb128 0x6
.align 8
.ident "GCC: (Ubuntu 4.3.3-5ubuntu4) 4.3.3"
.section .note.GNU-stack,"",#progbits
Compared with:
.file "test.c"
.globl main
.type main, #function
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
movl $0, %eax
popl %ecx
popl %ebp
leal -4(%ecx), %esp
.size main, .-main
.ident "GCC: (Ubuntu 4.3.3-5ubuntu4) 4.3.3"
.section .note.GNU-stack,"",#progbits
on x86.
Is there a way to make GCC -S on x86_64 output Assembly without the fluff?
The stuff that goes into .eh_frame section is unwind descriptors, which you only need to unwind stack (e.g. with GDB). While learning assembly, you could simply ignore it. Here is a way to do the "clean up" you want:
gcc -S -o - test.c | sed -e '/^\.L/d' -e '/\.eh_frame/Q'
.file "test.c"
.globl main
.type main,#function
pushq %rbp
movq %rsp, %rbp
movl $0, %eax
.size main,.Lfe1-main
You can try placing the code you want to study in a function.
int ftest(void)
return 0;
int main(void)
return ftest();
If you look at the assembly-source for test it will be as clean as you need.
pushq %rbp
movq %rsp, %rbp
movl $0, %eax
I've found that using the -Os flag makes things clearer. I tried it your tiny example, but it made very little difference.
That said, I remember it being helpful when I was learning assembly (on a Sparc).
