I am writing a simple compiler for gas assembly on macos which generated this assembly:
.section __TEXT, __text
.globl _print
_print:
pushq %rbp
movq %rsp, %rbp
movb $0, %al
call _printf
xorl %eax, %eax
popq %rbp
retq
str0:
.asciz ""
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
leaq str0(%rip), %rax
movq %rax, 0(%rbp)
movq ___stdinp#GOTPCREL(%rip), %rax
movq $0, 1(%rbp)
movq (%rax), %rdx
leaq 2(%rbp), %rdi
leaq 1(%rbp), %rsi
callq _getline
movq 0(%rbp), %rsi
movq 0(%rbp), %rdi
callq _print
xorl %eax, %eax
popq %rbp
retq
.subsections_via_symbols
And run it using:
gcc -c main.s -o main.o
gcc main.o -o main
./main
But whenever I use ./main I get: main(13366,0x1132f8e00) malloc: *** error for object 0x5f00000000000000: pointer being realloc'd was not allocated main(13366,0x1132f8e00) malloc: *** set a breakpoint in malloc_error_break to debug
I don't know what I could do to fix this and have no idea what is going wrong so any help would be appreciated!
A simple 3-line Hello, World program in Rust emits a 280 line assembly file in order to print two words. Meanwhile, a pure assembly version of this program only takes ~15 lines. Why is the Rust version so much longer?
This is on Mac OS.
hello.rs:
fn main() {
println!("hello world")
}
hello.s (generated by rustc --emit=asm hello.rs):
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 7
.private_extern __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.globl __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.p2align 4, 0x90
__ZN3std2rt10lang_start17hb4e01c1e588bf694E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $32, %rsp
leaq l___unnamed_1(%rip), %rax
movq %rdi, -8(%rbp)
leaq -8(%rbp), %rcx
movq %rcx, %rdi
movq %rsi, -16(%rbp)
movq %rax, %rsi
movq -16(%rbp), %rax
movq %rdx, -24(%rbp)
movq %rax, %rdx
movq -24(%rbp), %rcx
callq __ZN3std2rt19lang_start_internal17hcf96e32a124891dcE
movq %rax, -32(%rbp)
movq -32(%rbp), %rax
addq $32, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
callq *(%rdi)
callq __ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movzbl (%rdi), %eax
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3fmt9Arguments6new_v117h39ef65f250941772E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq %rdi, %rax
movq $0, -16(%rbp)
movq %rsi, (%rdi)
movq %rdx, 8(%rdi)
movq -16(%rbp), %rdx
movq -8(%rbp), %rsi
movq %rdx, 16(%rdi)
movq %rsi, 24(%rdi)
movq %rcx, 32(%rdi)
movq %r8, 40(%rdi)
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq (%rdi), %rdi
callq __ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE
movl %eax, -12(%rbp)
movl -12(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE:
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, _rust_eh_personality
.cfi_lsda 16, Lexception0
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
movq %rdi, -32(%rbp)
Ltmp0:
leaq -32(%rbp), %rdi
callq __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
Ltmp1:
movl %eax, -36(%rbp)
jmp LBB5_1
LBB5_1:
jmp LBB5_2
LBB5_2:
movl -36(%rbp), %eax
addq $48, %rsp
popq %rbp
retq
LBB5_3:
jmp LBB5_4
LBB5_4:
movq -16(%rbp), %rdi
callq __Unwind_Resume
ud2
LBB5_5:
Ltmp2:
movq %rax, -16(%rbp)
movl %edx, -8(%rbp)
jmp LBB5_3
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table5:
Lexception0:
.byte 255
.byte 255
.byte 1
.uleb128 Lcst_end0-Lcst_begin0
Lcst_begin0:
.uleb128 Ltmp0-Lfunc_begin0
.uleb128 Ltmp1-Ltmp0
.uleb128 Ltmp2-Lfunc_begin0
.byte 0
.uleb128 Ltmp1-Lfunc_begin0
.uleb128 Lfunc_end0-Ltmp1
.byte 0
.byte 0
Lcst_end0:
.p2align 2
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
__ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
xorl %edi, %edi
callq __ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movb %dil, -1(%rbp)
leaq -1(%rbp), %rdi
callq __ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E
movl %eax, -8(%rbp)
movl -8(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN5hello4main17hef70db39c48df377E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
leaq l___unnamed_2(%rip), %rax
xorl %ecx, %ecx
movl %ecx, %r8d
leaq -48(%rbp), %rdi
movq %rax, %rsi
movl $1, %edx
movl $8, %ecx
callq __ZN4core3fmt9Arguments6new_v117h39ef65f250941772E
leaq -48(%rbp), %rdi
callq __ZN3std2io5stdio6_print17hd8f597a6d310dad5E
addq $48, %rsp
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movslq %edi, %rax
leaq __ZN5hello4main17hef70db39c48df377E(%rip), %rdi
movq %rsi, -8(%rbp)
movq %rax, %rsi
movq -8(%rbp), %rdx
callq __ZN3std2rt10lang_start17hb4e01c1e588bf694E
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__const
.p2align 3
l___unnamed_1:
.quad __ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE
.quad 8
.quad 8
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E
.section __TEXT,__const
l___unnamed_3:
.ascii "hello world\n"
.section __DATA,__const
.p2align 3
l___unnamed_2:
.quad l___unnamed_3
.asciz "\f\000\000\000\000\000\000"
.subsections_via_symbols
When compiling code there are a lot of potential trade-offs that a compiler could make between different aspects, such as:
Code size
Run time performance
Memory used
Ease of debugging
Compilation time
Most compilers will offer you some way to specify how you want these trade-offs to be considered. rustc is no exception - you can use the opt-level code generation option to specify what you want:
opt-level This flag lets you control the optimization level.
0: no optimizations, also turn on cfg(debug_assertions).
1: basic optimizations
2: some optimizations
3: all optimizations
s: optimize for binary size
z: optimize for binary size, but also turn off loop vectorization.
For example you could specify -C opt-level=3 for maximum performance optimizations (which would also increase compilation time in most cases).
The rustc option -O is shorthand for -C opt-level=3.
As per the godbolt link supplied by #jbm, with -O you get 19 lines of assembly.
Also note that most people do not call rustc directly, rather they use cargo as a build control tool. The cargo build subcommand allows you to specify a build profile which determines things such as the optimization level applied. The --release flag invokes an optimized profile suitable for release builds.
I've been really stuck with a challenge at our "hacking" class. A string (password) needs to be found, and the only file provided is a .s assembly file.
Based on my extremely small knowledge of assembly, I compiled the file with gcc -S Program.s, with an a.outexecutable file being output. I tried to run the file with ./a.out in Mac Terminal, to no success — after pressing enter at the end of the command, cursor just jumps to a new line with nothing in stdout, and echo $? returns 0.
At the end of the assembly file, there's a .asciz "zja4heit5k7", so I figured it might be the correct string, but to no success.
I would be eternally thankful if anyone can provide any steps to help me solve the problem.
Program.s
.
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _r
.p2align 4, 0x90
_r: ## #r
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movl $0, -12(%rbp)
movl $0, -12(%rbp)
LBB0_1: ## =>This Inner Loop Header: Depth=1
xorl %eax, %eax
movb %al, %cl
cmpq $0, -8(%rbp)
movb %cl, -13(%rbp) ## 1-byte Spill
je LBB0_3
## BB#2: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $0, %edx
setne %sil
movb %sil, -13(%rbp) ## 1-byte Spill
LBB0_3: ## in Loop: Header=BB0_1 Depth=1
movb -13(%rbp), %al ## 1-byte Reload
testb $1, %al
jne LBB0_4
jmp LBB0_9
LBB0_4: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $97, %edx
jl LBB0_7
## BB#5: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
cmpl $122, %edx
jg LBB0_7
## BB#6: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
movb %dl, %sil
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movb %sil, (%rax,%rcx)
LBB0_7: ## in Loop: Header=BB0_1 Depth=1
jmp LBB0_8
LBB0_8: ## in Loop: Header=BB0_1 Depth=1
movl -12(%rbp), %eax
addl $1, %eax
movl %eax, -12(%rbp)
jmp LBB0_1
LBB0_9:
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq _an(%rip), %rdi
movl $0, -4(%rbp)
callq _r
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__data
.globl _an ## #an
_an:
.asciz "zja4heit5k7"
.subsections_via_symbols
Hi there Im trying to understand more abt the compilers;
when using gcc -S
it generates a .s file like
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movb $0, %al
callq _printf
xorl %ecx, %ecx
movl %eax, -4(%rbp) ## 4-byte Spill
movl %ecx, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello World!\n"
.subsections_via_symbols
my question is : is this .s assemble file generated based on specific cpu arch like x86 or its cpu arch irrelevant?
if its irrelevant, will we add cpu config like ARMv7s in command "gcc -O " ?
Question 2:
llvm-gcc -S generated code is pretty much different from a assemble language; is that a cpu arch irrelevant LLVM IR language? and the LLVM backend handle the rest of jobs to convert it to specific cpu arches?
many thanks
I am trying to mimic Security Tubes execve tutorial (http://hackoftheday.securitytube.net/2013/04/demystifying-execve-shellcode-stack.html) in 64bit Asm. I am not sure where the bus error is coming from. I stepped through through the app in GDB but the error didn't occur until after I was out of frame. If anyone knows what is happening, I would love to hear from you.
.section __DATA,__data
.section __TEXT,__text
.globl _start
_start:
xor %rax, %rax
push %rax
movabsq $0x68732f6e69622f2f, %rdi
push %rax
mov %rsp, %rsi
push %rdi
mov %rsp, %rdx
mov $0x2000059, %rax
syscall
The following should work better but I don't have osx to test:
.globl _start
_start:
xor %edx, %edx ; NULL for env
movabsq $0x0068732f6e69622f, %rax ; /bin/sh<0>
push %rax
mov %rsp, %rdi ; filename
push %rdx ; NULL for argv terminator
push %rdi ; argv[0] = filename
mov %rsp, %rsi ; argv
mov $0x200003b, %eax ; I think 59 is decimal
syscall