GCC LLVM assemble on different CPU arches - gcc

Hi there Im trying to understand more abt the compilers;
when using gcc -S
it generates a .s file like
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movb $0, %al
callq _printf
xorl %ecx, %ecx
movl %eax, -4(%rbp) ## 4-byte Spill
movl %ecx, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello World!\n"
.subsections_via_symbols
my question is : is this .s assemble file generated based on specific cpu arch like x86 or its cpu arch irrelevant?
if its irrelevant, will we add cpu config like ARMv7s in command "gcc -O " ?
Question 2:
llvm-gcc -S generated code is pretty much different from a assemble language; is that a cpu arch irrelevant LLVM IR language? and the LLVM backend handle the rest of jobs to convert it to specific cpu arches?
many thanks

Related

Macos get pointer being realloc'd was not allocated error in gas assembly

I am writing a simple compiler for gas assembly on macos which generated this assembly:
.section __TEXT, __text
.globl _print
_print:
pushq %rbp
movq %rsp, %rbp
movb $0, %al
call _printf
xorl %eax, %eax
popq %rbp
retq
str0:
.asciz ""
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
leaq str0(%rip), %rax
movq %rax, 0(%rbp)
movq ___stdinp#GOTPCREL(%rip), %rax
movq $0, 1(%rbp)
movq (%rax), %rdx
leaq 2(%rbp), %rdi
leaq 1(%rbp), %rsi
callq _getline
movq 0(%rbp), %rsi
movq 0(%rbp), %rdi
callq _print
xorl %eax, %eax
popq %rbp
retq
.subsections_via_symbols
And run it using:
gcc -c main.s -o main.o
gcc main.o -o main
./main
But whenever I use ./main I get: main(13366,0x1132f8e00) malloc: *** error for object 0x5f00000000000000: pointer being realloc'd was not allocated main(13366,0x1132f8e00) malloc: *** set a breakpoint in malloc_error_break to debug
I don't know what I could do to fix this and have no idea what is going wrong so any help would be appreciated!

unsupported symbol modifier in branch relocation: "call printf#PLT" [duplicate]

This question already has answers here:
OSX GNU assembler problem with 'call foo#plt'
(2 answers)
Closed 3 years ago.
I'm following a C compiler writing journey to generate assembly code for evaluating a binary expression value.
Here's the out assembly codes (saved in file "out.s"):
.text
LC0:
.string "%d\n"
_printint:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
movl %eax, %esi
leaq LC0(%rip), %rdi
movl $0, %eax
call printf#PLT
nop
leave
ret
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
movq $2, %r8
movq $3, %r9
movq $5, %r10
imulq %r9, %r10
addq %r8, %r10
movq $8, %r8
movq $3, %r9
movq %r8,%rax
cqo
idivq %r9
movq %rax,%r8
subq %r8, %r10
movq %r10, %rdi
call _printint
movl $0, %eax
popq %rbp
ret
When running it with
cc -o out out.s
, it complained with
out.s:13:2: error: unsupported symbol modifier in branch relocation
call printf#PLT
^
How is the error happened and how to fix it?
Thanks in advance!
PS:
I am using macOS Catalina 10.15.2
cc version is
Apple clang version 11.0.0 (clang-1100.0.33.17)
Target: x86_64-apple-darwin19.2.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin
The souce codes are compiled with
cc -o comp1 -g cg.c expr.c gen.c main.c scan.c tree.c
call _printf works. Thanks #PeterCordes.
Wondering the error prompt by cc though.

Why does Rust emit 280 lines of assembly code to print "Hello, world"?

A simple 3-line Hello, World program in Rust emits a 280 line assembly file in order to print two words. Meanwhile, a pure assembly version of this program only takes ~15 lines. Why is the Rust version so much longer?
This is on Mac OS.
hello.rs:
fn main() {
println!("hello world")
}
hello.s (generated by rustc --emit=asm hello.rs):
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 7
.private_extern __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.globl __ZN3std2rt10lang_start17hb4e01c1e588bf694E
.p2align 4, 0x90
__ZN3std2rt10lang_start17hb4e01c1e588bf694E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $32, %rsp
leaq l___unnamed_1(%rip), %rax
movq %rdi, -8(%rbp)
leaq -8(%rbp), %rcx
movq %rcx, %rdi
movq %rsi, -16(%rbp)
movq %rax, %rsi
movq -16(%rbp), %rax
movq %rdx, -24(%rbp)
movq %rax, %rdx
movq -24(%rbp), %rcx
callq __ZN3std2rt19lang_start_internal17hcf96e32a124891dcE
movq %rax, -32(%rbp)
movq -32(%rbp), %rax
addq $32, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
callq *(%rdi)
callq __ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movzbl (%rdi), %eax
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3fmt9Arguments6new_v117h39ef65f250941772E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq %rdi, %rax
movq $0, -16(%rbp)
movq %rsi, (%rdi)
movq %rdx, 8(%rdi)
movq -16(%rbp), %rdx
movq -8(%rbp), %rsi
movq %rdx, 16(%rdi)
movq %rsi, 24(%rdi)
movq %rcx, 32(%rdi)
movq %r8, 40(%rdi)
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq (%rdi), %rdi
callq __ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE
movl %eax, -12(%rbp)
movl -12(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN4core3ops8function6FnOnce9call_once17h47f538be1b10688dE:
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, _rust_eh_personality
.cfi_lsda 16, Lexception0
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
movq %rdi, -32(%rbp)
Ltmp0:
leaq -32(%rbp), %rdi
callq __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
Ltmp1:
movl %eax, -36(%rbp)
jmp LBB5_1
LBB5_1:
jmp LBB5_2
LBB5_2:
movl -36(%rbp), %eax
addq $48, %rsp
popq %rbp
retq
LBB5_3:
jmp LBB5_4
LBB5_4:
movq -16(%rbp), %rdi
callq __Unwind_Resume
ud2
LBB5_5:
Ltmp2:
movq %rax, -16(%rbp)
movl %edx, -8(%rbp)
jmp LBB5_3
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table5:
Lexception0:
.byte 255
.byte 255
.byte 1
.uleb128 Lcst_end0-Lcst_begin0
Lcst_begin0:
.uleb128 Ltmp0-Lfunc_begin0
.uleb128 Ltmp1-Ltmp0
.uleb128 Ltmp2-Lfunc_begin0
.byte 0
.uleb128 Ltmp1-Lfunc_begin0
.uleb128 Lfunc_end0-Ltmp1
.byte 0
.byte 0
Lcst_end0:
.p2align 2
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
__ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hf5e270b394827df3E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
xorl %edi, %edi
callq __ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h03f2ed18f1614f97E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movb %dil, -1(%rbp)
leaq -1(%rbp), %rdi
callq __ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h7e671b2505e0c229E
movl %eax, -8(%rbp)
movl -8(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN5hello4main17hef70db39c48df377E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $48, %rsp
leaq l___unnamed_2(%rip), %rax
xorl %ecx, %ecx
movl %ecx, %r8d
leaq -48(%rbp), %rdi
movq %rax, %rsi
movl $1, %edx
movl $8, %ecx
callq __ZN4core3fmt9Arguments6new_v117h39ef65f250941772E
leaq -48(%rbp), %rdi
callq __ZN3std2io5stdio6_print17hd8f597a6d310dad5E
addq $48, %rsp
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movslq %edi, %rax
leaq __ZN5hello4main17hef70db39c48df377E(%rip), %rdi
movq %rsi, -8(%rbp)
movq %rax, %rsi
movq -8(%rbp), %rdx
callq __ZN3std2rt10lang_start17hb4e01c1e588bf694E
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__const
.p2align 3
l___unnamed_1:
.quad __ZN4core3ptr18real_drop_in_place17h0ab16307507408dbE
.quad 8
.quad 8
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h275cd8632ff3ab7dE
.quad __ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h61f8ee8d3fead017E
.section __TEXT,__const
l___unnamed_3:
.ascii "hello world\n"
.section __DATA,__const
.p2align 3
l___unnamed_2:
.quad l___unnamed_3
.asciz "\f\000\000\000\000\000\000"
.subsections_via_symbols
When compiling code there are a lot of potential trade-offs that a compiler could make between different aspects, such as:
Code size
Run time performance
Memory used
Ease of debugging
Compilation time
Most compilers will offer you some way to specify how you want these trade-offs to be considered. rustc is no exception - you can use the opt-level code generation option to specify what you want:
opt-level This flag lets you control the optimization level.
0: no optimizations, also turn on cfg(debug_assertions).
1: basic optimizations
2: some optimizations
3: all optimizations
s: optimize for binary size
z: optimize for binary size, but also turn off loop vectorization.
For example you could specify -C opt-level=3 for maximum performance optimizations (which would also increase compilation time in most cases).
The rustc option -O is shorthand for -C opt-level=3.
As per the godbolt link supplied by #jbm, with -O you get 19 lines of assembly.
Also note that most people do not call rustc directly, rather they use cargo as a build control tool. The cargo build subcommand allows you to specify a build profile which determines things such as the optimization level applied. The --release flag invokes an optimized profile suitable for release builds.

String hidden inside .s assembly source code

I've been really stuck with a challenge at our "hacking" class. A string (password) needs to be found, and the only file provided is a .s assembly file.
Based on my extremely small knowledge of assembly, I compiled the file with gcc -S Program.s, with an a.outexecutable file being output. I tried to run the file with ./a.out in Mac Terminal, to no success — after pressing enter at the end of the command, cursor just jumps to a new line with nothing in stdout, and echo $? returns 0.
At the end of the assembly file, there's a .asciz "zja4heit5k7", so I figured it might be the correct string, but to no success.
I would be eternally thankful if anyone can provide any steps to help me solve the problem.
Program.s
.
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _r
.p2align 4, 0x90
_r: ## #r
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movl $0, -12(%rbp)
movl $0, -12(%rbp)
LBB0_1: ## =>This Inner Loop Header: Depth=1
xorl %eax, %eax
movb %al, %cl
cmpq $0, -8(%rbp)
movb %cl, -13(%rbp) ## 1-byte Spill
je LBB0_3
## BB#2: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $0, %edx
setne %sil
movb %sil, -13(%rbp) ## 1-byte Spill
LBB0_3: ## in Loop: Header=BB0_1 Depth=1
movb -13(%rbp), %al ## 1-byte Reload
testb $1, %al
jne LBB0_4
jmp LBB0_9
LBB0_4: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
cmpl $97, %edx
jl LBB0_7
## BB#5: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
cmpl $122, %edx
jg LBB0_7
## BB#6: ## in Loop: Header=BB0_1 Depth=1
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movsbl (%rax,%rcx), %edx
addl $13, %edx
movb %dl, %sil
movq -8(%rbp), %rax
movslq -12(%rbp), %rcx
movb %sil, (%rax,%rcx)
LBB0_7: ## in Loop: Header=BB0_1 Depth=1
jmp LBB0_8
LBB0_8: ## in Loop: Header=BB0_1 Depth=1
movl -12(%rbp), %eax
addl $1, %eax
movl %eax, -12(%rbp)
jmp LBB0_1
LBB0_9:
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq _an(%rip), %rdi
movl $0, -4(%rbp)
callq _r
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.section __DATA,__data
.globl _an ## #an
_an:
.asciz "zja4heit5k7"
.subsections_via_symbols

Why does osx 64-bit asm syscall segfault

I'm trying to write an x86-64 hello world in assembly on OSX, but whenever I make a syscall to write, it's segfaulting. I've tried the equivalent syscall via Gnu C inline assembly and it works, so I'm thoroughly confused:
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main:
.cfi_startproc
movq 0x2000004, %rax
movq 1, %rdi
leaq _hi(%rip), %rsi
movq 12, %rdx
syscall
xor %rax, %rax
ret
.cfi_endproc
.section __DATA,__data
.globl _hi
_hi:
.asciz "Hello world\n"
This is based off of the following Gnu C, which works:
#include <string.h>
int main() {
char *hw = "Hello World\n";
unsigned long long result;
asm volatile ("movq %1, %%rax\n"
"movq %2, %%rdi\n"
"movq %3, %%rsi\n"
"movq %4, %%rdx\n"
"syscall\n"
: "=rax" (result)
: "Z" (0x2000004),
"Z" (1),
"r" (hw),
"Z" (12)
: "rax", "rdi", "rsi", "rdx");
}
The C block when compiled generates the following asm:
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
leaq L_.str(%rip), %rcx
movq %rcx, -8(%rbp)
## InlineAsm Start
movq $33554436, %rax
movq $1, %rdi
movq %rcx, %rsi
movq $12, %rdx
syscall
## InlineAsm End
movq %rcx, -16(%rbp)
xorl %eax, %eax
popq %rbp
ret
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello World\n"
Your problem is on these few lines:
movq 0x2000004, %rax
movq 1, %rdi
leaq _hi(%rip), %rsi
movq 12, %rdx
Be aware that with at&t syntax that if you want to use constants you MUST prefix them with a $ (dollar sign) otherwise you are referencing memory addresses. Without a $ sign your value is an immediate indirect address.
For instance:
movq 0x2000004, %rax
attempts to move the quadword from memory address 0x2000004 and place it in %rax.
You probably just have to modify your code to look like:
movq $0x2000004, %rax
movq $1, %rdi
leaq _hi(%rip), %rsi
movq $12, %rdx
Notice that I have added a dollar sign to the beginning of each constant.
Here is a simple 64-bit "Hello World" (or Hello StackOverflow) in this case. It should build on OSX. Give it a try:
section .data
string1 db 0xa, " Hello StackOverflow!!!", 0xa, 0xa, 0
len equ $ - string1
section .text
global _start
_start:
; write string to stdout
mov rax, 1 ; set write to command
mov rsi, string1 ; string1 to source index
mov rdi, rax ; set destination index to 1 (stdout) already in rax
mov rdx, len ; set length in rdx
syscall ; call kernel
; exit
xor rdi,rdi ; zero rdi (rdi hold return value)
mov rax, 0x3c ; set syscall number to 60 (0x3c hex)
syscall ; call kernel
; **Compile/Output**
;
; $ nasm -felf64 -o hello-stack_64.o hello-stack_64.asm
; $ ld -o hello-stack_64 hello-stack_64.o
; $ ./hello-stack_64
;
; Hello StackOverflow!!!

Resources