Hi there Im trying to understand more abt the compilers;
when using gcc -S
it generates a .s file like
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movb $0, %al
callq _printf
xorl %ecx, %ecx
movl %eax, -4(%rbp) ## 4-byte Spill
movl %ecx, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello World!\n"
.subsections_via_symbols
my question is : is this .s assemble file generated based on specific cpu arch like x86 or its cpu arch irrelevant?
if its irrelevant, will we add cpu config like ARMv7s in command "gcc -O " ?
Question 2:
llvm-gcc -S generated code is pretty much different from a assemble language; is that a cpu arch irrelevant LLVM IR language? and the LLVM backend handle the rest of jobs to convert it to specific cpu arches?
many thanks
Related
I am writing a simple compiler for gas assembly on macos which generated this assembly:
.section __TEXT, __text
.globl _print
_print:
pushq %rbp
movq %rsp, %rbp
movb $0, %al
call _printf
xorl %eax, %eax
popq %rbp
retq
str0:
.asciz ""
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
leaq str0(%rip), %rax
movq %rax, 0(%rbp)
movq ___stdinp#GOTPCREL(%rip), %rax
movq $0, 1(%rbp)
movq (%rax), %rdx
leaq 2(%rbp), %rdi
leaq 1(%rbp), %rsi
callq _getline
movq 0(%rbp), %rsi
movq 0(%rbp), %rdi
callq _print
xorl %eax, %eax
popq %rbp
retq
.subsections_via_symbols
And run it using:
gcc -c main.s -o main.o
gcc main.o -o main
./main
But whenever I use ./main I get: main(13366,0x1132f8e00) malloc: *** error for object 0x5f00000000000000: pointer being realloc'd was not allocated main(13366,0x1132f8e00) malloc: *** set a breakpoint in malloc_error_break to debug
I don't know what I could do to fix this and have no idea what is going wrong so any help would be appreciated!
This question already has answers here:
OSX GNU assembler problem with 'call foo#plt'
(2 answers)
Closed 3 years ago.
I'm following a C compiler writing journey to generate assembly code for evaluating a binary expression value.
Here's the out assembly codes (saved in file "out.s"):
.text
LC0:
.string "%d\n"
_printint:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
movl %eax, %esi
leaq LC0(%rip), %rdi
movl $0, %eax
call printf#PLT
nop
leave
ret
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
movq $2, %r8
movq $3, %r9
movq $5, %r10
imulq %r9, %r10
addq %r8, %r10
movq $8, %r8
movq $3, %r9
movq %r8,%rax
cqo
idivq %r9
movq %rax,%r8
subq %r8, %r10
movq %r10, %rdi
call _printint
movl $0, %eax
popq %rbp
ret
When running it with
cc -o out out.s
, it complained with
out.s:13:2: error: unsupported symbol modifier in branch relocation
call printf#PLT
^
How is the error happened and how to fix it?
Thanks in advance!
PS:
I am using macOS Catalina 10.15.2
cc version is
Apple clang version 11.0.0 (clang-1100.0.33.17)
Target: x86_64-apple-darwin19.2.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin
The souce codes are compiled with
cc -o comp1 -g cg.c expr.c gen.c main.c scan.c tree.c
call _printf works. Thanks #PeterCordes.
Wondering the error prompt by cc though.
A simple 3-line Hello, World program in Rust emits a 280 line assembly file in order to print two words. Meanwhile, a pure assembly version of this program only takes ~15 lines. Why is the Rust version so much longer?
This is on Mac OS.
hello.rs:
fn main() {
println!("hello world")
}
hello.s (generated by rustc --emit=asm hello.rs):
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 7
.private_extern __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.globl __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.p2align 4, 0x90
__ZN3std2rt10lang_start17hb4e01c1e588bf694E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $32, %rsp
leaq l___unnamed_1(%rip), %rax
movq %rdi, -8(%rbp)
leaq -8(%rbp), %rcx
movq %rcx, %rdi
movq %rsi, -16(%rbp)
movq %rax, %rsi
movq -16(%rbp), %rax
movq %rdx, -24(%rbp)
movq %rax, %rdx
movq -24(%rbp), %rcx
callq __ZN3std2rt19lang_start_internal17hcf96e32a124891dcE
movq %rax, -32(%rbp)
movq -32(%rbp), %rax
addq $32, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
callq *(%rdi)
callq __ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movzbl (%rdi), %eax
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3fmt9Arguments6new_v117h39ef65f250941772E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq %rdi, %rax
movq $0, -16(%rbp)
movq %rsi, (%rdi)
movq %rdx, 8(%rdi)
movq -16(%rbp), %rdx
movq -8(%rbp), %rsi
movq %rdx, 16(%rdi)
movq %rsi, 24(%rdi)
movq %rcx, 32(%rdi)
movq %r8, 40(%rdi)
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq (%rdi), %rdi
callq __ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE
movl %eax, -12(%rbp)
movl -12(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE:
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, _rust_eh_personality
.cfi_lsda 16, Lexception0
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
movq %rdi, -32(%rbp)
Ltmp0:
leaq -32(%rbp), %rdi
callq __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
Ltmp1:
movl %eax, -36(%rbp)
jmp LBB5_1
LBB5_1:
jmp LBB5_2
LBB5_2:
movl -36(%rbp), %eax
addq $48, %rsp
popq %rbp
retq
LBB5_3:
jmp LBB5_4
LBB5_4:
movq -16(%rbp), %rdi
callq __Unwind_Resume
ud2
LBB5_5:
Ltmp2:
movq %rax, -16(%rbp)
movl %edx, -8(%rbp)
jmp LBB5_3
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table5:
Lexception0:
.byte 255
.byte 255
.byte 1
.uleb128 Lcst_end0-Lcst_begin0
Lcst_begin0:
.uleb128 Ltmp0-Lfunc_begin0
.uleb128 Ltmp1-Ltmp0
.uleb128 Ltmp2-Lfunc_begin0
.byte 0
.uleb128 Ltmp1-Lfunc_begin0
.uleb128 Lfunc_end0-Ltmp1
.byte 0
.byte 0
Lcst_end0:
.p2align 2
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
__ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
xorl %edi, %edi
callq __ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movb %dil, -1(%rbp)
leaq -1(%rbp), %rdi
callq __ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E
movl %eax, -8(%rbp)
movl -8(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN5hello4main17hef70db39c48df377E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
leaq l___unnamed_2(%rip), %rax
xorl %ecx, %ecx
movl %ecx, %r8d
leaq -48(%rbp), %rdi
movq %rax, %rsi
movl $1, %edx
movl $8, %ecx
callq __ZN4core3fmt9Arguments6new_v117h39ef65f250941772E
leaq -48(%rbp), %rdi
callq __ZN3std2io5stdio6_print17hd8f597a6d310dad5E
addq $48, %rsp
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movslq %edi, %rax
leaq __ZN5hello4main17hef70db39c48df377E(%rip), %rdi
movq %rsi, -8(%rbp)
movq %rax, %rsi
movq -8(%rbp), %rdx
callq __ZN3std2rt10lang_start17hb4e01c1e588bf694E
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__const
.p2align 3
l___unnamed_1:
.quad __ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE
.quad 8
.quad 8
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E
.section __TEXT,__const
l___unnamed_3:
.ascii "hello world\n"
.section __DATA,__const
.p2align 3
l___unnamed_2:
.quad l___unnamed_3
.asciz "\f\000\000\000\000\000\000"
.subsections_via_symbols
When compiling code there are a lot of potential trade-offs that a compiler could make between different aspects, such as:
Code size
Run time performance
Memory used
Ease of debugging
Compilation time
Most compilers will offer you some way to specify how you want these trade-offs to be considered. rustc is no exception - you can use the opt-level code generation option to specify what you want:
opt-level This flag lets you control the optimization level.
0: no optimizations, also turn on cfg(debug_assertions).
1: basic optimizations
2: some optimizations
3: all optimizations
s: optimize for binary size
z: optimize for binary size, but also turn off loop vectorization.
For example you could specify -C opt-level=3 for maximum performance optimizations (which would also increase compilation time in most cases).
The rustc option -O is shorthand for -C opt-level=3.
As per the godbolt link supplied by #jbm, with -O you get 19 lines of assembly.
Also note that most people do not call rustc directly, rather they use cargo as a build control tool. The cargo build subcommand allows you to specify a build profile which determines things such as the optimization level applied. The --release flag invokes an optimized profile suitable for release builds.
I've been really stuck with a challenge at our "hacking" class. A string (password) needs to be found, and the only file provided is a .s assembly file.
Based on my extremely small knowledge of assembly, I compiled the file with gcc -S Program.s, with an a.outexecutable file being output. I tried to run the file with ./a.out in Mac Terminal, to no success — after pressing enter at the end of the command, cursor just jumps to a new line with nothing in stdout, and echo $? returns 0.
At the end of the assembly file, there's a .asciz "zja4heit5k7", so I figured it might be the correct string, but to no success.
I would be eternally thankful if anyone can provide any steps to help me solve the problem.
Program.s
.
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _r
.p2align 4, 0x90
_r: ## #r
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movl $0, -12(%rbp)
movl $0, -12(%rbp)
LBB0_1: ## =>This Inner Loop Header: Depth=1
xorl %eax, %eax
movb %al, %cl
cmpq $0, -8(%rbp)
movb %cl, -13(%rbp) ## 1-byte Spill
je LBB0_3
## BB#2: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $0, %edx
setne %sil
movb %sil, -13(%rbp) ## 1-byte Spill
LBB0_3: ## in Loop: Header=BB0_1 Depth=1
movb -13(%rbp), %al ## 1-byte Reload
testb $1, %al
jne LBB0_4
jmp LBB0_9
LBB0_4: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $97, %edx
jl LBB0_7
## BB#5: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
cmpl $122, %edx
jg LBB0_7
## BB#6: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
movb %dl, %sil
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movb %sil, (%rax,%rcx)
LBB0_7: ## in Loop: Header=BB0_1 Depth=1
jmp LBB0_8
LBB0_8: ## in Loop: Header=BB0_1 Depth=1
movl -12(%rbp), %eax
addl $1, %eax
movl %eax, -12(%rbp)
jmp LBB0_1
LBB0_9:
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq _an(%rip), %rdi
movl $0, -4(%rbp)
callq _r
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__data
.globl _an ## #an
_an:
.asciz "zja4heit5k7"
.subsections_via_symbols
I'm trying to write an x86-64 hello world in assembly on OSX, but whenever I make a syscall to write, it's segfaulting. I've tried the equivalent syscall via Gnu C inline assembly and it works, so I'm thoroughly confused:
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main:
.cfi_startproc
movq 0x2000004, %rax
movq 1, %rdi
leaq _hi(%rip), %rsi
movq 12, %rdx
syscall
xor %rax, %rax
ret
.cfi_endproc
.section __DATA,__data
.globl _hi
_hi:
.asciz "Hello world\n"
This is based off of the following Gnu C, which works:
#include <string.h>
int main() {
char *hw = "Hello World\n";
unsigned long long result;
asm volatile ("movq %1, %%rax\n"
"movq %2, %%rdi\n"
"movq %3, %%rsi\n"
"movq %4, %%rdx\n"
"syscall\n"
: "=rax" (result)
: "Z" (0x2000004),
"Z" (1),
"r" (hw),
"Z" (12)
: "rax", "rdi", "rsi", "rdx");
}
The C block when compiled generates the following asm:
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
leaq L_.str(%rip), %rcx
movq %rcx, -8(%rbp)
## InlineAsm Start
movq $33554436, %rax
movq $1, %rdi
movq %rcx, %rsi
movq $12, %rdx
syscall
## InlineAsm End
movq %rcx, -16(%rbp)
xorl %eax, %eax
popq %rbp
ret
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello World\n"
Your problem is on these few lines:
movq 0x2000004, %rax
movq 1, %rdi
leaq _hi(%rip), %rsi
movq 12, %rdx
Be aware that with at&t syntax that if you want to use constants you MUST prefix them with a $ (dollar sign) otherwise you are referencing memory addresses. Without a $ sign your value is an immediate indirect address.
For instance:
movq 0x2000004, %rax
attempts to move the quadword from memory address 0x2000004 and place it in %rax.
You probably just have to modify your code to look like:
movq $0x2000004, %rax
movq $1, %rdi
leaq _hi(%rip), %rsi
movq $12, %rdx
Notice that I have added a dollar sign to the beginning of each constant.
Here is a simple 64-bit "Hello World" (or Hello StackOverflow) in this case. It should build on OSX. Give it a try:
section .data
string1 db 0xa, " Hello StackOverflow!!!", 0xa, 0xa, 0
len equ $ - string1
section .text
global _start
_start:
; write string to stdout
mov rax, 1 ; set write to command
mov rsi, string1 ; string1 to source index
mov rdi, rax ; set destination index to 1 (stdout) already in rax
mov rdx, len ; set length in rdx
syscall ; call kernel
; exit
xor rdi,rdi ; zero rdi (rdi hold return value)
mov rax, 0x3c ; set syscall number to 60 (0x3c hex)
syscall ; call kernel
; **Compile/Output**
;
; $ nasm -felf64 -o hello-stack_64.o hello-stack_64.asm
; $ ld -o hello-stack_64 hello-stack_64.o
; $ ./hello-stack_64
;
; Hello StackOverflow!!!