Recursion in Assembly - gcc

I need help with Assembly code which I just started learning.
.intel_syntax noprefix;
.text;
.globl main;
main:
mov eax, 3;
mov ebx, 0;
push eax;
push ebx;
call f;
add esp, 8;
push eax;
mov eax, offset message;
push eax;
call printf
add esp,8;
mov eax,0;
ret;
f:
mov eax, [esp+8];
mov ebx, [esp+4];
cmp eax,3;
jge ety2;
cmp eax,2;
je ety1;
cmp eax,0;
je ety1;
cmp eax,1;
je ety3;
ety3:
mov eax,0;
ret;
ety1:
mov eax,1;
ret;
ety2:
xor ebx,ebx;
dec eax;
push eax;
push ebx;
call f;
add esp,8;
add ebx,[esp+4];
add ebx,eax;
mov eax,[esp+8];
dec eax;
dec eax;
push eax;
push ebx;
call f;
add esp,8;
add ebx,[esp+4];
add ebx,eax;
add ebx,eax;
mov eax,[esp+8];
dec eax;
dec eax;
dec eax;
push eax;
push ebx;
call f;
add esp,8;
add ebx,[esp+4];
sub ebx,eax;
mov eax,[esp+8];
mov eax,ebx;
ret;
.data;
message:
.asciz "Result=%i\n";
.att_syntax prefix;
In main function 'eax' register is used as a 'n' parameter for function that:
for n=0 or n=2 returns 1;
for n=1 returns 0;
for n>=3 returns f(n-1)+(2*f(n-2))-f(n-3);
So for n=3 function returns 0, n=4 returns 2, n=5 returns 1, n=6 returns 5 e.t.c.
The recursion is pretty problematic, for values < 5 fuction works fine, but for 6, 7 e.t.c. function returns tremendously high or low (negative) values.
I've been working on it for +10 hours, and I can't manage to make it work
property. What am I doing wrong?
It is required to use "PUSH" and "[esp+4]", "add esp,4;" and other simple instructions that are already in the code.
Program is compiled under -m32 command parameter(gcc -Wall funcas.s -m32 -o test).
I wrote down the same code in C to show what i'm trying to achieve
#include <stdio.h>
#include <stdlib.h>
int funkcja(int n)
{
if(n>=3)
{
return (funkcja(n-1)+(2*funkcja(n-2))-funkcja(n-3));
}
else
{
if(n==2)return 1;
if(n==1)return 0;
if(n==0)return 1;
}
return -1;
}
int main()
{
int a=6;
printf("%d\n", funkcja(a));
return 0;
}

The problem is that the code keeps accumulating all of the results. Change f to only use one parameter. Example Microsoft type assembler code. In both f() and main(), n is stored on the stack.
.model flat,c
; ...
.data
fmt db '%d',00ah,000h
.code
extern printf:proc
public main
f proc ;int f(int n)
mov eax, [esp+4]
cmp eax,3
jge f2
cmp eax,2
je f1
cmp eax,1
je f0
cmp eax,0
je f1
mov eax,-1
ret
f0: mov eax,0
ret
f1: mov eax,1
ret
f2: push ebx ;save ebx
dec eax ;eax = n-1
push eax ;[esp] = n-1
call f ;eax = f(n-1)
mov ebx,eax ;ebx = f(n-1)
dec dword ptr [esp] ;[esp] = n-2
call f ;eax = f(n-2)
add eax,eax ;eax = 2*f(n-2)
add ebx,eax ;ebx = f(n-1) + 2*f(n-2)
dec dword ptr [esp] ;[esp] = n-3
call f ;eax = f(n-3)
add esp,4 ;restore esp
sub ebx,eax ;ebx = f(n-1) + 2*f(n-2) - f(n-3)
mov eax,ebx ;eax = f(n-1) + 2*f(n-2) - f(n-3)
pop ebx ;restore ebx
ret
f endp
main proc near
push dword ptr 0 ;[esp] = n
main0: call f
push eax
push offset fmt
call printf
add esp,8
inc dword ptr [esp]
cmp dword ptr [esp],20
jl main0
add esp,4
xor eax,eax
ret
main endp

I don't understand your action with EBX and the second argument on the stack.
Let's start from scratch. A recursive function is a function as well. When you call it you have to preserve registers which can be altered by the function and you need unaltered after the function return. The function calls itself three times with different n and operates with the different results. While you've got n on the stack for arbitrary recovery, you have to preserve the results. It becomes more clear when you split return (funkcja(n-1)+(2*funkcja(n-2))-funkcja(n-3)); into
int result = 0;
result += funkcja(n-1);
result += ( 2 * funkcja(n-2) );
result -= funkcja(n-3);
return result;
result is a so called local variable. It's only needed for this run of the function and will lost with the function return. A local variable is usually stored on the stack. You don't need to build a stackframe with prolog and epilog, a simple push/pop combination will do it as well.
# f(n) = f(n-1) + (2*f(n-2)) - f(n-3)
# 0 1
# 1 0
# 2 1
# 3 0 1 + 0 - 1
# 4 2 0 + 2 - 0
# 5 1 2 + 0 - 1
# 6 5 1 + 4 - 0
# 7 5 5 + 2 - 2
# 8 14 5 + 10 - 1
# 9 19 14 + 10 - 5
.intel_syntax noprefix
.text
.globl main
main:
mov eax, 9
push eax
call funkcja
add esp, 4
push eax
mov eax, offset message
push eax
call printf
add esp,8
mov eax,0
ret
funkcja:
mov eax, [esp+4]
cmp eax,3
jge 3f
2:
cmp eax,2
jne 0f
mov eax, 1
ret
0:
cmp eax,0
jne 1f
mov eax, 1
ret
1:
xor eax, eax
ret
3:
push 0 # Result = 0
# 1. Call
mov eax, [esp+8] # +8: retrieve n behind push and return address
sub eax, 1
push eax
call funkcja
add esp, 4
add [esp], eax # Result += EAX
# 2. Call
mov eax, [esp+8] # +8: retrieve n behind push and return address
sub eax, 2
push eax
call funkcja
add esp, 4
add eax, eax
add [esp], eax # Result += EAX
# 3. Call
mov eax, [esp+8] # +8: retrieve n behind push and return address
sub eax, 3
push eax
call funkcja
add esp, 4
sub [esp], eax # Result -= EAX
pop eax # Return EAX = Result
ret
.data;
message: .asciz "Result=%i\n"
.att_syntax prefix

Related

I'm unsure what the problem with my assembly code it works until eax is popped and replaced by a register

; Input x and y, output min of the two numbers
.586
.MODEL FLAT
INCLUDE io.h
.STACK 4096
.DATA
number DWORD ?
array DWORD 20, 15, 62, 40, 18
nbrElts DWORD 5
prompt BYTE "Enter value:", 0
string BYTE 80 DUP (?)
resultLbl BYTE "Position", 0
result BYTE 11 DUP (?), 0
.CODE
_MainProc PROC
input prompt, string, 20 ; read ASCII characters
atod string ; convert to integer
mov number, eax ; store in memory
push nbrElts ; 3rd parameter (# of elements in array)
lea eax, array ; 2nd parameter (address of array)
push eax
push number ; 1st parameter (value from user)
call searchArray ; searchArray(number, array, 5)
add esp, 12
dtoa result, eax ; convert to ASCII characters
output resultLbl, result ; output label and result
mov eax, 0 ; exit with return code 0
ret
_MainProc ENDP
; searchArray(int x, array, int y)
;
searchArray PROC
push ebp ; save base pointer
mov ebp, esp ; establish stack frame
push eax ; save registers
push ebx
push esi
push ecx
push edx
mov ebx, [ebp+8] ; x, value from user
mov esi, [ebp+12] ; address of array
mov ecx, [ebp+16] ; y, number of elements
mov edx, 1
mov ecx, 5
forLoop:
mov eax, [esi] ; a[i]
cmp eax, ebx ; eax = ebx ?
je isEqual
;cmp eax, ebx
add esi, 4
inc edx
loop forLoop
;mov eax, 0
cmp edx, 6
je notEqual
isEqual:
mov eax, edx
jmp exitCode
notEqual:
mov eax, 0
jmp exitCode
exitCode:
mov eax, edx
pop edx ; restore EBP
pop ecx ; restore EAX
pop esi
pop ebx
pop ebp
ret ; return
searchArray ENDP
END ; end of source code
The pops at the end of the function need to match the pushes at the beginning of the function. If they don't match, the stack pointer ends up in the wrong place and the ret returns to the wrong place.
In your case, you have an extra push without a corresponding pop.
The reason to push registers at the beginning and pop them at the end is to preserve their values. But you don't want to preserve the value of eax. You want to return a different value, the result of the function. So there is absolutely no reason to push eax.

Performance difference Rust and C++

I am currently learning Rust, and as a first exercise I wanted to implement a function that computes the nth fibonacci number:
fn main() {
for i in 0..48 {
println!("{}: {}", i, fibonacci(i));
}
}
fn fibonacci(n: u32) -> u32 {
match n {
0 => 0,
1 => 1,
_ => fibonacci(n - 1) + fibonacci(n - 2),
}
}
I run it as:
$ time cargo run --release
real 0m15.380s
user 0m15.362s
sys 0m0.014s
As an exercise, I also implemented the same algorithm in C++. I was expecting a similar performance, but the C++ code runs in 80% of the time:
#include<iostream>
unsigned int fibonacci(unsigned int n);
int main (int argc, char* argv[]) {
for(unsigned int i = 0; i < 48; ++i) {
std::cout << i << ": " << fibonacci(i) << '\n';
}
return 0;
}
unsigned int fibonacci(unsigned int n) {
if(n == 0) {
return 0;
} else if (n == 1) {
return 1;
} else {
return fibonacci(n - 1) + fibonacci(n - 2);
}
}
Compiled as:
$ g++ test.cpp -o test.exe -O2
And running:
$ time ./test.exe
real 0m12.127s
user 0m12.124s
sys 0m0.000s
Why do I see such a difference in performance? I am not interested in calculating the fibonacci faster in Rust (with a different algorithm); I am only interested on where the difference comes from. This is just an exercise in my progress as I learn Rust.
TL;DR: It's not Rust vs C++, it's LLVM (Clang) vs GCC.
Different optimizers optimize the code differently, and in this case GCC produces larger but faster code.
This can be verified using godbolt.
Here is Rust, compiled with both GCC (via rustgcc-master):
example::fibonacci:
push r15
push r14
push r13
push r12
push rbp
xor ebp, ebp
push rbx
mov ebx, edi
sub rsp, 24
.L2:
test ebx, ebx
je .L1
cmp ebx, 1
je .L4
lea r12d, -1[rbx]
xor r13d, r13d
.L19:
cmp r12d, 1
je .L6
lea r14d, -1[r12]
xor r15d, r15d
.L16:
cmp r14d, 1
je .L8
lea edx, -1[r14]
xor ecx, ecx
.L13:
cmp edx, 1
je .L10
lea edi, -1[rdx]
mov DWORD PTR 12[rsp], ecx
mov DWORD PTR 8[rsp], edx
call example::fibonacci.localalias
mov ecx, DWORD PTR 12[rsp]
mov edx, DWORD PTR 8[rsp]
add ecx, eax
sub edx, 2
jne .L13
.L14:
add r15d, ecx
sub r14d, 2
je .L17
jmp .L16
.L4:
add ebp, 1
.L1:
add rsp, 24
mov eax, ebp
pop rbx
pop rbp
pop r12
pop r13
pop r14
pop r15
ret
.L6:
add r13d, 1
.L20:
sub ebx, 2
add ebp, r13d
jmp .L2
.L8:
add r15d, 1
.L17:
add r13d, r15d
sub r12d, 2
je .L20
jmp .L19
.L10:
add ecx, 1
jmp .L14
And with LLVM (via rustc):
example::fibonacci:
push rbp
push r14
push rbx
mov ebx, edi
xor ebp, ebp
mov r14, qword ptr [rip + example::fibonacci#GOTPCREL]
cmp ebx, 2
jb .LBB0_3
.LBB0_2:
lea edi, [rbx - 1]
call r14
add ebp, eax
add ebx, -2
cmp ebx, 2
jae .LBB0_2
.LBB0_3:
add ebx, ebp
mov eax, ebx
pop rbx
pop r14
pop rbp
ret
We can see that LLVM produces a naive version -- calling the function in each iteration of the loop -- while GCC partially unrolls the recursion by inlining some calls. This results in a smaller number of calls in the case of GCC, and at about 5ns of overhead per function call, it's significant enough.
We can do the same exercise with the C++ version using LLVM via Clang and GCC and note that the result is pretty much similar.
So, as announced, it's a LLVM vs GCC difference, not a language one.
Incidentally, the fact that optimizers may produce such widely different results is a reason why I am quite excited at the progress of the rustc_codegen_gcc initiative (dubbed rustgcc-master on godbolt) which aims at pluging a GCC backend into the rustc frontend: once complete anyone will be able to switch to the better optimizer for their own workload.

80x86 assembly code not returning from function and displaying output

The goal of the program is to calculate a GCD of two numbers using a recursive function defined by this pseudo code
unsigned int gcd ( unsigned int a, unsigned int b ) {
if ( a > b )
return gcd ( a - b, b ) ;
if ( a < b )
return gcd ( a, b - a ) ;
return a ;
}
Here is the (undoubtedly poor) assembly code I'm having issues with.
.586
.MODEL FLAT
INCLUDE io.h
.STACK 4096
.DATA
a DWORD ?
b DWORD ?
prompt1 BYTE "a: ", 0
prompt2 BYTE "b:", 0
string BYTE 20 DUP (?)
resultLbl BYTE "gcd:", 0
result BYTE 11 DUP (?), 0
.CODE
_MainProc PROC
input prompt1, string, 20
atod string
mov a, eax
input prompt2, string, 20
atod string
mov b, eax
push b
push a
call _gcd
add esp, 8
dtoa result, eax
output resultLbl, result
mov eax, 0
ret
_MainProc ENDP
_gcd PROC
push ebp
mov ebp, esp
push ebx
push eax
mov eax, [ebp+8]
mov ebx, [ebp+12]
cmp eax, ebx
jg loop1
cmp eax, ebx
jl loop2
pop ebx
pop ebp
ret
loop1:
push ebx
sub eax, ebx
push eax
call _gcd
loop2:
sub ebx, eax
push ebx
push eax
output did2, result
call _gcd
_gcd ENDP
END
By creating some outputs in the loops to display a message when they occur I can tell that the program is calculating the GCD correctly however as soon as both values are equal and "ret" is hit inside of _gcd the program terminates. What do I need to change so the GCD value is returned and displayed correctly?

How to echo memory location use NASM [duplicate]

I am looking for a way to print an integer in assembler (the compiler I am using is NASM on Linux), however, after doing some research, I have not been able to find a truly viable solution. I was able to find a description for a basic algorithm to serve this purpose, and based on that I developed this code:
global _start
section .bss
digit: resb 16
count: resb 16
i: resb 16
section .data
section .text
_start:
mov dword[i], 108eh ; i = 4238
mov dword[count], 1
L01:
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[digit], edx
add dword[digit], 30h ; add 48 to digit to make it an ASCII char
call write_digit
inc dword[count]
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[i], eax
cmp dword[i], 0Ah
jg L01
add dword[i], 48 ; add 48 to i to make it an ASCII char
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, i ; store *address* of i into ecx
mov edx, 16 ; byte size of 16
int 80h
jmp exit
exit:
mov eax, 01h ; exit()
xor ebx, ebx ; errno
int 80h
write_digit:
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, digit ; store *address* of digit into ecx
mov edx, 16 ; byte size of 16
int 80h
ret
C# version of what I want to achieve (for clarity):
static string int2string(int i)
{
Stack<char> stack = new Stack<char>();
string s = "";
do
{
stack.Push((char)((i % 10) + 48));
i = i / 10;
} while (i > 10);
stack.Push((char)(i + 48));
foreach (char c in stack)
{
s += c;
}
return s;
}
The issue is that it outputs the characters in reverse, so for 4238, the output is 8324. At first, I thought that I could use the x86 stack to solve this problem, push the digits in, and pop them out and print them at the end, however when I tried implementing that feature, it flopped and I could no longer get an output.
As a result, I am a little bit perplexed about how I can implement a stack in to this algorithm in order to accomplish my goal, aka printing an integer. I would also be interested in a simpler/better solution if one is available (as it's one of my first assembler programs).
One approach is to use recursion. In this case you divide the number by 10 (getting a quotient and a remainder) and then call yourself with the quotient as the number to display; and then display the digit corresponding to the remainder.
An example of this would be:
;Input
; eax = number to display
section .data
const10: dd 10
section .text
printNumber:
push eax
push edx
xor edx,edx ;edx:eax = number
div dword [const10] ;eax = quotient, edx = remainder
test eax,eax ;Is quotient zero?
je .l1 ; yes, don't display it
call printNumber ;Display the quotient
.l1:
lea eax,[edx+'0']
call printCharacter ;Display the remainder
pop edx
pop eax
ret
Another approach is to avoid recursion by changing the divisor. An example of this would be:
;Input
; eax = number to display
section .data
divisorTable:
dd 1000000000
dd 100000000
dd 10000000
dd 1000000
dd 100000
dd 10000
dd 1000
dd 100
dd 10
dd 1
dd 0
section .text
printNumber:
push eax
push ebx
push edx
mov ebx,divisorTable
.nextDigit:
xor edx,edx ;edx:eax = number
div dword [ebx] ;eax = quotient, edx = remainder
add eax,'0'
call printCharacter ;Display the quotient
mov eax,edx ;eax = remainder
add ebx,4 ;ebx = address of next divisor
cmp dword [ebx],0 ;Have all divisors been done?
jne .nextDigit
pop edx
pop ebx
pop eax
ret
This example doesn't suppress leading zeros, but that would be easy to add.
I think that maybe implementing a stack is not the best way to do this (and I really think you could figure out how to do that, saying as how pop is just a mov and a decrement of sp, so you can really set up a stack anywhere you like by just allocating memory for it and setting one of your registers as your new 'stack pointer').
I think this code could be made clearer and more modular if you actually allocated memory for a c-style null delimited string, then create a function to convert the int to string, by the same algorithm you use, then pass the result to another function capable of printing those strings. It will avoid some of the spaghetti code syndrome you are suffering from, and fix your problem to boot. If you want me to demonstrate, just ask, but if you wrote the thing above, I think you can figure out how with the more split up process.
; Input
; EAX = pointer to the int to convert
; EDI = address of the result
; Output:
; None
int_to_string:
xor ebx, ebx ; clear the ebx, I will use as counter for stack pushes
.push_chars:
xor edx, edx ; clear edx
mov ecx, 10 ; ecx is divisor, devide by 10
div ecx ; devide edx by ecx, result in eax remainder in edx
add edx, 0x30 ; add 0x30 to edx convert int => ascii
push edx ; push result to stack
inc ebx ; increment my stack push counter
test eax, eax ; is eax 0?
jnz .push_chars ; if eax not 0 repeat
.pop_chars:
pop eax ; pop result from stack into eax
stosb ; store contents of eax in at the address of num which is in EDI
dec ebx ; decrement my stack push counter
cmp ebx, 0 ; check if stack push counter is 0
jg .pop_chars ; not 0 repeat
mov eax, 0x0a
stosb ; add line feed
ret ; return to main
; eax = number to stringify/output
; edi = location of buffer
intToString:
push edx
push ecx
push edi
push ebp
mov ebp, esp
mov ecx, 10
.pushDigits:
xor edx, edx ; zero-extend eax
div ecx ; divide by 10; now edx = next digit
add edx, 30h ; decimal value + 30h => ascii digit
push edx ; push the whole dword, cause that's how x86 rolls
test eax, eax ; leading zeros suck
jnz .pushDigits
.popDigits:
pop eax
stosb ; don't write the whole dword, just the low byte
cmp esp, ebp ; if esp==ebp, we've popped all the digits
jne .popDigits
xor eax, eax ; add trailing nul
stosb
mov eax, edi
pop ebp
pop edi
pop ecx
pop edx
sub eax, edi ; return number of bytes written
ret

Bubble Sort in NASM Ubuntu

I was asked to create a bubble sort program in NASM Ubuntu. Here's the code:
section .data
i db 0 ; Value to be incremented
question db 'Enter a number: ' ; Prompt
questionLen equ $-question
newLine db 10, 10, 0 ; New blank line
newLineLen equ $-newLine
section .bss
num resb 5 ; Array of size 5
counter resb 1 ; Value to be incremented
counter2 resb 1 ; Value to be incremented
temp resb 1
temp2 resb 1
section .text
global _start
_start:
mov esi, 0
getInput:
mov eax, 4
mov ebx, 1
mov ecx, question ; Prints the question
mov edx, questionLen
int 80h
add byte[i], 30h ; I'll retain this expression, since the program experienced an error
; when this expression is deleted
sub byte[i], 30h ; Converts the increment value to integer
mov eax, 3
mov ebx, 0
lea ecx, [num + esi] ; Element of the array
mov edx, 2
int 80h
inc esi
inc byte[i]
cmp byte[i], 5 ; As long as the array hasn't reached the size of 5,
jl getInput ; the program continues to ask input from the user
mov esi, 0
mov byte[i], 0
mov edi, 0 ; Index of the array
bubble_sort:
mov byte[counter], 0
mov byte[counter2], 0
begin_for_1:
mov al, 0
mov al, [counter] ; Acts as the outer for loop
cmp al, 5
jg printArray ; Prints the sorted list when the array size has reached 5
begin_for_2:
mov edi, [counter2] ; Acts as the inner for loop
cmp edi, 4
jg end_for_2
mov bl, 0 ; Acts as the if statement
mov cl, 0
mov bl, [num + edi]
mov cl, [num + edi + 1]
mov byte[temp], cl ; This is the same as if(a[j] > a[j + 1]){...}
cmp bl, [temp]
jg bubbleSortSwap
return:
inc edi ; Same as j++
jmp begin_for_2 ; Goes out of the inner for loop
end_for_2:
inc byte[counter] ; Same as i++
jmp begin_for_1 ; Goes out of the outer for loop
bubbleSortSwap:
mov [num + edi + 1], bl
mov [num + edi], cl ; The set of statements is the same as swap(&a[j], &a[j + 1]);
jmp return
printArray:
mov eax, 4
mov ebx, 1
mov ecx, [num + esi] ; Prints one element at a time
mov edx, 1
int 80h
inc esi
inc byte[i]
cmp byte[i], 5
jl printArray ; As long as the array size hasn't reached 5, printing continues
mov eax, 4
mov ebx, 1
mov ecx, newLine ; Displays a new blank line after the array
mov edx, newLineLen
int 80h
mov eax, 1 ; Exits the program
mov ebx, 0
int 80h
But the only problem is, it cannot print the rest of the iterations, because it only prints the 1st iteration like this:
Enter a number: 7
Enter a number: 1
Enter a number: 4
Enter a number: 3
Enter a number: 5
17435
What I want to output is the array input and the final output, from the 1st iteration up to the last.
Naw... he just needs some stuff sorted! :)
Doesn't print any output at all for me, as posted. Problem is you're putting "[contents]" in ecx - you want address - you do it right in the input routine.
You can get by with fewer variables - use esi and/or edi as both the "count" and the "index". If you use variables, make sure the size of the variable matches the size of the register you're moving it in/out of! ("mov edi, [counter2]" isn't doing what you want) Courage! If it wuz easy, everybody'd be doing it.
Best,
Frank

Resources