understanding assembler code for function syscall - linux-kernel

dump of assembler code for function syscall:
0x00007f2db202a3b0 <syscall+0>: mov %rdi,%rax
0x00007f2db202a3b3 <syscall+3>: mov %rsi,%rdi
0x00007f2db202a3b6 <syscall+6>: mov %rdx,%rsi
0x00007f2db202a3b9 <syscall+9>: mov %rcx,%rdx
0x00007f2db202a3bc <syscall+12>: mov %r8,%r10
0x00007f2db202a3bf <syscall+15>: mov %r9,%r8
0x00007f2db202a3c2 <syscall+18>: mov 0x8(%rsp),%r9
0x00007f2db202a3c7 <syscall+23>: syscall
0x00007f2db202a3c9 <syscall+25>: cmp $0xfffffffffffff001,%rax
0x00007f2db202a3cf <syscall+31>: jae 0x7f2db202a3d2 <syscall+34>
0x00007f2db202a3d1 <syscall+33>: retq
0x00007f2db202a3d2 <syscall+34>: mov 0x28fbbf(%rip),%rcx #0x7f2db22b9f98
0x00007f2db202a3d9 <syscall+41>: xor %edx,%edx
0x00007f2db202a3db <syscall+43>: sub %rax,%rdx
0x00007f2db202a3de <syscall+46>: mov %edx,%fs:(%rcx)
0x00007f2db202a3e1 <syscall+49>: or $0xffffffffffffffff,%rax
0x00007f2db202a3e5 <syscall+53>: jmp 0x7f2db202a3d1 <syscall+33>
End of assembler dump.
can someone tell me what is happening above the line( 0x00007f2db202a3c7 <syscall+23>: syscall )

This is conversion from "System V x86_64 calling conventions" to "linux syscall calling conventions".
When you call syscall function, compiler treats it as usual function and uses System V x86_64 calling conventions for passing arguments. Arguments are passed in the following order: rdi, rsi, rdx, rcx, r8, r9, stack. But calling conventions for system calls are different (see syscall(2)): system call number goes into rax and arguments go into rdi, rsi, rdx, r10, r8, r9. So, syscall converts one calling convention to another one:
rdi -> rax (syscall number)
rsi -> rdi (first argument)
rdx -> rsi (second argument)
...

Related

Reversing an array and printing it in x86-64

I am trying to print an array, reverse it, and then print it again. I manage to print it once. I can also make 2 consecutive calls to _printy and it works. But the code breaks with the _reverse function. It does not segfault, it exits with code 24 (I looked online but this seems to mean that the maximum number of file descriptors has been exceeded, and I cannot get what this means in this context). I stepped with a debugger and the loop logic seems to make sense.
I am not passing the array in RDI, because _printy restores the content of that register when it exits. I also tried to load it directly into RDI before calling _reverse but that does not solve the problem.
I cannot figure out what the problem is. Any idea?
BITS 64
DEFAULT REL
; -------------------------------------
; -------------------------------------
; PRINT LIST
; -------------------------------------
; -------------------------------------
%define SYS_WRITE 0x02000004
%define SYS_EXIT 0x02000001
%define SYS_OPEN 0x02000005
%define SYS_CLOSE 0x02000006
%define SYS_READ 0x02000003
%define EXIT_SUCCESS 0
%define STDOUT 1
%define LF 10
%define INT_OFFSET 48
section .text
extern _printf
extern _puts
extern _exit
global _main
_main:
push rbp
lea rdi, [rel array]
call _printy
call _reverse
call _printy
pop rbp
call _exit
_reverse:
push rbp
lea rsi, [rdi + 4 * (length - 1) ]
.LOOP2:
cmp rdi, rsi
jge .DONE2
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
add rdi,4
sub rsi,4
jmp .LOOP2
.DONE2:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
_printy:
push rbp
xor rcx, rcx
mov r8, rdi
.loop:
cmp rcx, length
jge .done
push rcx
push r8
lea rdi, [rel msg]
mov rsi, [r8 + rcx * 4]
xor rax, rax
call _printf
pop r8
pop rcx
add rcx, 1
jmp .loop
.done:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
section .data
array: dd 78, 2, 3, 4, 5, 6
length: equ ($ - array) / 4
msg: db "%d => ", 0
Edit with some info from the debugger
Stepping into the _printy function gives the following msg, once reaching the call to _printf.
* thread #1, queue = 'com.apple.main-thread', stop reason = step over failed (Could not create return address breakpoint.)
frame #0: 0x0000000100003f8e a.out`printf
a.out`printf:
-> 0x100003f8e <+0>: jmp qword ptr [rip + 0x4074] ; (void *)0x00007ff80258ef0b: printf
0x100003f94: lea r11, [rip + 0x4075] ; _dyld_private
0x100003f9b: push r11
0x100003f9d: jmp qword ptr [rip + 0x5d] ; (void *)0x00007ff843eeb520: dyld_stub_binder
I am not an expert, but a quick research online led to the following
During the 'thread step-out' command, check that the memory we are about to place a breakpoint in is executable. Previously, if the current function had a nonstandard stack layout/ABI, and had a valid data pointer in the location where the return address is usually located, data corruption would occur when the breakpoint was written. This could lead to an incorrectly reported crash or silent corruption of the program's state. Now, if the above check fails, the command safely aborts.
So after all this might not be a problem (I am also able to track the execution of the printf call). But this is really the only understandable piece of information I am able to extract from the debugger. Deep in some quite obscure (to me) function calls I reach this
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff80256db7f libsystem_c.dylib`flockfile + 10
libsystem_c.dylib`flockfile:
-> 0x7ff80256db7f <+10>: call 0x7ff8025dd480 ; symbol stub for: __error
0x7ff80256db84 <+15>: mov r14d, dword ptr [rax]
0x7ff80256db87 <+18>: mov rdi, qword ptr [rbx + 0x68]
0x7ff80256db8b <+22>: add rdi, 0x8
Target 0: (a.out) stopped.
(lldb)
Process 61913 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff8025dd480 libsystem_c.dylib`__error
This is one of the function calls happening in _printf.
Ask further questions if there is something more I can do.
Your array consists of int32 numbers aka dd in nasm terminology, but your swap operates on 64 bit numbers:
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
Assuming you were not after some crazy optimizations where you swap a pair of elements simultaneously you want this to remain in 32 bits:
mov r8d, [rdi]
mov r9d, [rsi]
mov [rdi], r9d
mov [rsi], r8d

Segmentation fault after function call in x86 assembly

I'm currently writing a compiler and I have created some tests and only one of them fails with Segmentation fault (core dumped) as error message.
This is the code that gets compiled
function main(): int {
var f: int = 10;
return func(f) - func(f / 2);
}
function func(a: int): int {
return a;
}
And this is the generated assembly code (it's not really optimized as you can see)
section .text
global _start
_start:
call function_main
mov rdi, rax
mov rax, 60
syscall
global function_main
function_main:
push rbp
mov rbp, rsp
sub rsp, 4
mov rax, 10
mov DWORD[rbp-0], eax
mov eax, DWORD[rbp-0]
mov rdi, rax
call function_func
push rax
mov eax, DWORD[rbp-0]
mov rbx, 2
idiv rbx
mov rdi, rax
call function_func
mov rbx, rax
pop rax
sub rax, rbx
mov rsp, rbp
pop rbp
ret
global function_func
function_func:
push rbp
mov rbp, rsp
sub rsp, 4
mov DWORD[rbp-0], edi
mov eax, DWORD[rbp-0]
mov rsp, rbp
pop rbp
ret
The assembly file is compiled with nasm -f elf64 ./test9.lv.asm -o ./test9.lv.asm.o and ld -g ./test9.lv.asm.o a.out
I've used gdb to debug the binary file and it seems like the program receives the SIGSEGV signal right after func(f) returns the first time.
But now I don't know why this is happening in this case.

Comparing input to character not working in x86_64 Mac assembly nasm

In nasm assembly on mac with the processor architecture x86_64, I am struggling to compare input to a string or character. When comparing input (stdin) to a character, it's not being true when it should be. I am new to assembly.
Here is my code.
global start
section .bss
input resb 10
section .text
start:
;getting the input
mov rax, 0x2000003 ;meaning read
mov rdi, 0
mov rsi, input
mov rdx, 10
syscall ;special
;here is where I do the comparing
mov rax, r
mov rbx, input
cmp rax, rbx
je right
;jumping to the return function
jmp ret
right:
mov rax, 0x2000004 ;meaning write
mov rdi, 1
mov rsi, right_way
mov rdx, right_len
syscall ;special
ret:
mov rax, 0x2000001 ;return 0
xor rdi, rdi ;which means to make rdi = 0 could be replaced with mov rdi, 0 but xor is faster
syscall
section .data
right_way: db "You are correct!", 10, 0
right_len: equ $-right_way
r: db "r", 10
At the "je right" line, it should jump to the right function but it does not. Do I need to convert the input to something else?
Help would be appreciated. Thanks!

What is wrong with my implementation of the Lisp "cons" function?

I am trying to make the Lisp function cons in x86_84 assembly on MacOS. Below I am trying to make a pair of 2 and 3, but it is not working; I am getting a segmentation fault.
.global _main
.extern _malloc
.text
.macro make_node register
mov rdi, 8 # 64-bit number
call _malloc # failed on malloc
mov [rax], \register # contents of register in address of rax
mov \register, [rax]
.endm
cons:
push rbp
mov rbp, rsp
mov r8, [rbp + 16]
make_node r8
mov r9, [rbp + 24]
make_node r9
mov rsp, rbp
pop rbp
ret
_main:
push 3
push 2
call cons
add rsp, 16
# I should now be able to do whatever I want with r8 (2) and r9 (3)
mov rdi, 0
mov rax, 0x2000001
syscall
I stepped through it with GDB and I see that it failed on calling malloc, but to me, there doesn't seem to be a problem since malloc only takes one argument (the number of bytes to allocate) in the rdi register.
Dump of assembler code for function cons:
0x0000000100003f48 <+0>: push %rbp
0x0000000100003f49 <+1>: mov %rsp,%rbp
0x0000000100003f4c <+4>: mov 0x10(%rbp),%r8
0x0000000100003f50 <+8>: mov $0x8,%rdi
=> 0x0000000100003f57 <+15>: callq 0x100003f96
0x0000000100003f5c <+20>: mov %r8,(%rax)
0x0000000100003f5f <+23>: mov (%rax),%r8
0x0000000100003f62 <+26>: mov 0x18(%rbp),%r9
0x0000000100003f66 <+30>: mov $0x8,%rdi
0x0000000100003f6d <+37>: callq 0x100003f96
0x0000000100003f72 <+42>: mov %r9,(%rax)
0x0000000100003f75 <+45>: mov (%rax),%r9
0x0000000100003f78 <+48>: mov %rbp,%rsp
0x0000000100003f7b <+51>: pop %rbp
0x0000000100003f7c <+52>: retq
End of assembler dump.
(gdb) ni
Thread 2 received signal SIGSEGV, Segmentation fault.
I am assembling on a Mac like this: clang -masm=intel cell.asm.
Does anyone familiar with x86 assembly know the source of my error?
(Also, in case anyone asks, I know that it's important to call free after malloc but this code is the only code necessary to demonstrate my problem.)

Loading "/bin/sh" into a register

I'm trying to write ASM code to spawn a shell.
I've figured out that the syscall number for __execve is 0x3b or 59.
I need to send "/bin/sh" as the first parameter, a pointer to {"/bin/sh", NULL} as the second parameter and NULL as the third parameter.
By the convetions of x86_64 ASM on the Intel architecture - the first parameter is written into the RDI register, the second parameter is written into the RSI register and the final parameter is written into the RDX register.
This is my code:
global _start
section .text
_start:
jmp message
mystart:
xor rax, rax
push rax
push rax
pop rdx ; third parameter - NULL
pop rdi ; first parameter - "/bin/sh"
mov rax, rdi
push rax
push rsp
pop rsi ; second parameter - pointer to {"/bin/sh", NULL}
xor rax, rax
mov al, 0x3b
syscall
xor rax, rax
mov al, 0x3c
xor rdi, rdi
mov dil, 0x0a
syscall
message:
call mystart
db "/bin/sh"
section .data
I use the following instructions to compile and link the code.
yasm -f elf64 shell.asm -o shell.o
ld -o shell.out shell.o
The GDB dump of the _start function is as follows:
Dump of assembler code for function _start:
0x0000000000400080 <+0>: jmp 0x4000a3 <_start+35>
0x0000000000400082 <+2>: xor rax,rax
0x0000000000400085 <+5>: push rax
0x0000000000400086 <+6>: push rax
0x0000000000400087 <+7>: pop rdx
0x0000000000400088 <+8>: pop rdi
0x0000000000400089 <+9>: mov rax,rdi
0x000000000040008c <+12>: push rax
0x000000000040008d <+13>: push rsp
0x000000000040008e <+14>: pop rsi
0x000000000040008f <+15>: xor rax,rax
0x0000000000400092 <+18>: mov al,0x3b
0x0000000000400094 <+20>: syscall
0x0000000000400096 <+22>: xor rax,rax
0x0000000000400099 <+25>: mov al,0x3c
0x000000000040009b <+27>: xor rdi,rdi
0x000000000040009e <+30>: mov dil,0xa
---Type <return> to continue, or q <return> to quit---
0x00000000004000a1 <+33>: syscall
0x00000000004000a3 <+35>: call 0x400082 <_start+2>
0x00000000004000a8 <+40>: pop rsp
0x00000000004000a9 <+41>: (bad)
0x00000000004000aa <+42>: (bad)
0x00000000004000ab <+43>: .byte 0x69
0x00000000004000ac <+44>: outs dx,BYTE PTR ds:[rsi]
0x00000000004000ad <+45>: pop rsp
0x00000000004000ae <+46>: (bad)
0x00000000004000af <+47>: jae 0x400119
As you can see the (bad) instructions are caused by db "/bin/sh", what is wrong with this string? What is a (bad) instruction? How do I debug such problems in the future?
① You cannot load a string into a register, only a pointer to a string.
② Your stack magic is merely wrong. Move one of the doubled push rax to just below pop rdi, and the program works for me.

Resources