Related
I'm having a problem implementing quicksort in assembly, when I'm trying to perform the second recursive call to qsort(arr,pi+1,high) I don't know why, but it exits the recursion too early.
I've tried using the stack and writing helper functions, but everything I've tied created new problems. When debbuging, I see that I exit the function when %ebx = 4 and %ecx = 5 and the array is almost sorted array ={10,30,40,50,70,90,80} - I need only to perform the last swap when %ebx = 5 and %ecx = 6, but it exits before, and I don't understand why it won't go back to when %ecx was 6.
I thought perhaps the problem is when I'm making the comparisons between high and low, but If I change those, I'm having different problems (it exists too early the first recursive call). perhaps anyone has an idea what's wrong with my code?
here's my code:
.data
array: .int 10, 80, 30, 90, 40, 50, 70
size: .int 7
.text
.global main
main:
movl $0, %ebx # low index
movl (size), %ecx # high index
dec %ecx # ecx = size -1
qsort:
cmp %ecx, %ebx # high > low
jge end
movsx %ecx, %r13 # push high
movsx %ebx, %r12 # push low
call partition # rax now is the pivot
movq %r12, %rbx # pop low
movl %eax, %ecx # prepare new high = pi
subl $1, %ecx # high = pi-1
call qsort # qsort(arr,low,pi-1)
movl %eax,%ebx # push new low = pi+1
inc %ebx
call qsort # qsort(arr,pi+1,high)
ret
partition:
movq %r12, %rax # get low
movq %r13, %rbx # get high
movl %eax, %ecx # j = low
subl $1, %eax # i = low-1
movl array(,%ebx,4), %edx # pivot = array[high]
swap_loop:
cmp %ebx, %ecx # high > j
jge swap_greater
movl array(,%ecx,4), %esi # esi = array[j]
cmp %edx, %esi # pivot > array[j] ?
jge loops # jmp if array[j] > pivot
inc %eax # i++
# swap
movsx %esi, %rsi
pushq %rsi
movl array(,%eax,4), %edi # edi = array[i]
movl %edi, array(,%ecx,4) # array[j]=array[i]
pop %rsi
movl %esi, array(,%eax,4) # array[i]= previous array[j]
loops:
inc %ecx # j++
jmp swap_loop
swap_greater:
inc %eax
movsx %edx, %rdx
pushq %rdx
movl array(,%eax,4), %edi # edi = array[i+1]
movl %edi, array(,%ebx,4) # array[high]=array[i+1]
pop %rdx
movl %edx, array(,%eax,4) # array[i+1]= previous array[high]
dec %eax
end_p:
inc %eax
ret
end:
inc %ecx
movsx %ecx, %r13 # update high
ret
edit: as Peter Cordes has pointed out - I need to work with the stack in order to retrieve the parameters when I go back from the recursive calls. after re-writing my code using the stack, I have a new problem, I always pop out the same thing on the second recursive call. I don't understand why it doesn't do the "recursive folding" like in C, when I look at the stack there's something weird, since I'm not seeing the values as I excpect ( I excpect to see layers or rax-high-low-rax-high-low...etc) but instead there are those numbers:
native process 42032 In: second_rec L?? PC: 0x4005ea
0x7fffffffdeb0: 0x0000000000000000 0x0000000000000001
0x7fffffffdec0: 0x0000000000000002 0x00000000004005ef
0x7fffffffded0: 0x0000000000000000 0x0000000000000002
0x7fffffffdee0: 0x0000000000000001 0x00000000004005df
0x7fffffffdef0: 0x0000000000000000 0x0000000000000001
0x7fffffffdf00: 0x0000000000000002 0x00000000004005df
0x7fffffffdf10: 0x0000000000000000 0x0000000000000002
0x7fffffffdf20: 0x0000000000000003 0x00000000004005df
0x7fffffffdf30: 0x0000000000000000 0x0000000000000003
0x7fffffffdf40: 0x0000000000000004 0x00007ffff7a05b97
0x7fffffffdf50: 0x0000000000000001 0x00007fffffffe028
0x7fffffffdf60: 0x0000000100008000 0x00000000004005a7
perhaps I'm not pushing\popping in the right timing, but from what I've understood, I'm pushing parameters before call, and popping them when I return from the call... does anyone have an idea what am I doing wrong? the new code:
.data
array: .int 10, 80, 30, 90, 40, 50, 70
size: .int 7
.text
.global main
main:
movl $0, %ebx # low index
movl (size), %ecx # high index
dec %ecx # ecx = size -1
qsort:
cmp %ecx, %ebx # high > low
jge end
movsx %ecx, %rcx # prepare high
movsx %ebx, %rbx # prepare low
pushq %rcx # push high
pushq %rbx # push low
call partition # rax now is the pivot
popq %rbx # pop low
popq %rcx # pop high
movsx %eax, %rax
pushq %rax # push pivot
movl %eax, %ecx # prepare new high = pi
subl $1, %ecx # pi-=1
movsx %ecx, %rcx
pushq %rcx # push high
pushq %rbx # push low
call qsort # qsort(arr,low,pi-1)
second_rec:
popq %rbx # pop low
popq %rcx # pop high
popq %rax # pop pivot
movl %eax, %ecx # high = pi
addl $1, %ecx # high = pi+1
pushq %rax # push pivot
pushq %rcx # push high
pushq %rbx # push low
call qsort # qsort(arr,pi+1,high)
addq $24, %rsp
ret
partition:
movq 8(%rsp), %rax # get low
movq 16(%rsp), %rbx # get high
movl %eax, %ecx # j = low
subl $1, %eax # i = low-1
movl array(,%ebx,4), %edx # pivot = array[high]
swap_loop:
cmp %ebx, %ecx # high > j
jge swap_greater
movl array(,%ecx,4), %esi # esi = array[j]
cmp %edx, %esi # pivot > array[j] ?
jge loops # jmp if array[j] > pivot
inc %eax # i++
# swap
movsx %esi, %rsi
pushq %rsi
movl array(,%eax,4), %edi # edi = array[i]
movl %edi, array(,%ecx,4) # array[j]=array[i]
pop %rsi
movl %esi, array(,%eax,4) # array[i]= previous array[j]
loops:
inc %ecx # j++
jmp swap_loop
swap_greater:
inc %eax
movsx %edx, %rdx
pushq %rdx
movl array(,%eax,4), %edi # edi = array[i+1]
movl %edi, array(,%ebx,4) # array[high]=array[i+1]
pop %rdx
movl %edx, array(,%eax,4) # array[i+1]= previous array[high]
dec %eax
end_p:
inc %eax
ret
end:
ret
Thank you very much everyone for your help!
I have a C program here that invokes CreateProcess...
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
int main(int argc, char *argv[])
{
STARTUPINFO st;
ZeroMemory(%st, sizeof(STARTUPINFO));
st.cb = sizeof(STARTUPINFO);
PROCESS_INFORMATION pi;
CreateProcessA("C:\\WINDOWS\\system32\\cmd.exe",0,0,0,0,0,0,0,&st,&pi);
return 0;
}
Which runs fine, creating a shell within a shell.
I also have this code, written in GAS assembly via the MinGW compiler suite for Windows...
.extern _CreateProcessA#40
.def _CreateProcessA#40; .scl 2; .type 32; .endef
.extern _ExitProcess#4
.def _ExitProcess#4; .scl 2; .type 32; .endef
.text
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
push %ebp
movl %esp, %ebp
#PROCESS_INFORMATION...
subl $16, %esp
movl %esp, %eax
#STARTUPINFO...
subl $68, %esp
movl $68, (%esp)
movl %esp, %ebx
#Application name with path : C:\WINDOWS\system32\cmd.exe...
subl $29, %esp
xor %edx, %edx
movb %dx, 27(%esp)
movb $0x65, 26(%esp)
movw $0x7856, 24(%esp)
movl $0x2e646d63, 20(%esp)
movl $0x5c32336d, 16(%esp)
movl $0x65747379, 12(%esp)
movl $0x735c5357, 8(%esp)
movl $0x4f444e49, 4(%esp)
movl $0x575c3a43, (%esp)
movl %esp, %ecx
push %eax
push %ebx
push %edx
push %edx
push %edx
push %edx
push %edx
push %edx
push %edx
push %ecx
call _CreateProcessA#40
movl %ebp, %esp
pop %ebp
push %edx
call _ExitProcess#4
It compiles and links fine with;
as createProc.s -o createProc.o
ld createProc.o -o createProc.exe -lkernel32
When it runs though, and it does run, it executes without starting a second shell within a shell on the command line. What could be wrong?
Note : I'm inputting the string with the movl instructions for a reason, so please no suggestions saying that I should be using .data, .bss, or lables. Also note I have already tried using escaped slashes (\\) within the string in the assembly program to no avail, it actually crashes if escaped slashes are used.
About programming style
You should forget about mucking around with ESP.
The way to do it is to set up a stack frame at the start of your routine and use EBP to address the space thus created.
You have a typo in your path
You pass "c1\win...." as the path. That's not going to work.
You should double check the code against the ascii-table, or review the parameters in a debugger making the API call..
Also I have no idea why you need 29 bytes to store the string. It fits in 28 chars as far as I can tell.
Working code using a stack frame
Here's code that works using a stack frame the way it is supposed to be done.
//Set up stack frame.
00418200 55 push ebp
00418201 8BEC mov ebp,esp
00418203 83C490 add esp,-$70
//Zero StartupInfoA
00418206 57 push edi
00418207 8D45A0 lea eax,[ebp-$60]
0041820A 8BF8 mov edi,eax
0041820C 33C0 xor eax,eax
0041820E B911000000 mov ecx,$00000011
00418213 F3AB rep stosd
//st.cb = SizeOf(st)
00418215 C745A044000000 mov [ebp-$60],$00000044
//Set the string: path = 'c:\windows\system32\cmd.exe'; 28 chars including trailing 0.
0041821C C745E4433A5C77 mov [ebp-$1c],$775c3a43 //c:\w
00418223 C745E8696E646F mov [ebp-$18],$6f646e69 //indo
0041822A C745EC77735C73 mov [ebp-$14],$735c7377 //ws\s
00418231 C745F079737465 mov [ebp-$10],$65747379 //yste
00418238 C745F46D33325C mov [ebp-$0c],$5c32336d //m32\
0041823F C745F8636D642E mov [ebp-$08],$2e646d63 //cmd.
00418246 C745FC65786500 mov [ebp-$04],$00657865 //exe-
//Set up parameters for call
0041824D 8D4590 lea eax,[ebp-$70] //ProcessInfo
00418250 50 push eax
00418251 8D45A0 lea eax,[ebp-$60] //StartupInfoA
00418254 50 push eax
00418255 6A00 push $00
00418257 6A00 push $00
00418259 6A00 push $00
0041825B 6A00 push $00
0041825D 6A00 push $00
0041825F 6A00 push $00
00418261 6A00 push $00
00418263 8D45E4 lea eax,[ebp-$1c] //Path
00418266 50 push eax
//Call
00418267 E80823FFFF call CreateProcessA
//Clean up the stackframe
0041826C 5F pop edi
0041826D 8BE5 mov esp,ebp
0041826F 5D pop ebp
About messing with ESP
If you set ESP to an unaligned address it will seriously degrade performance.
#HarryJohnson figured it out, all that was needed was to zero out the STARTUPINFO structure,
.extern _CreateProcessA#40
.def _CreateProcessA#40; .scl 2; type 32; .endef
.extern _ExitProcess#4
.def _ExitProcess#4; .scl 2; type 32; .endef
.text
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
push %ebp
movl %ebp, %esp
xor %edx, %edx
#PROCESS_INFORMATION...
subl $16, %esp
movl %esp, %eax
#STARTUPINFO...
subl $68, %esp
movl %edx, 64(%esp)
movl %edx, 60(%esp)
movl %edx, 56(%esp)
movl %edx, 52(%esp)
movl %edx, 48(%esp)
movl %edx, 44(%esp)
movl %edx, 40(%esp)
movl %edx, 36(%esp)
movl %edx, 32(%esp)
movl %edx, 28(%esp)
movl %edx, 24(%esp)
movl %edx, 20(%esp)
movl %edx, 16(%esp)
movl %edx, 12(%esp)
movl %edx, 8(%esp)
movl %edx, 4(%esp)
movl %edx, (%esp)
movb $68, (%esp)
movl %esp, %ebx
#Application name (C:\WINDOWS\system32\cmd.exe)...
subl $28, %esp
movb %dl, 27(%esp)
movb $0x65, 26(%esp)
movw $0x7865, 24(%esp)
movl $0x2e646d63, 20(%esp)
movl $0x5c32336d, 16(%esp)
movl $0x65747379, 12(%esp)
movl $0x735c5357, 8(%esp)
movl $0x4f444e49, 4(%esp)
movl $0x575c3a43, (%esp)
movl %esp, %ecx
push %eax
push %ebx
push %edx
push %edx
push %edx
push %edx
push %edx
push %edx
push %edx
push %ecx
call _CreateProcessA#40
mov %ebp, %esp
pop %ebp
push %edx
call _ExitProcess#4
I'm checking the assembly code of the following simple program on https://godbolt.org/ with parameters -Wall -m32 using gcc 6.3 and to my surprise the generated assembly code takes three multiplications to perform one multiplication in C as shown below.
#include<stdio.h>
#include<inttypes.h>
int main(void){
int32_t ax=-854763;
int32_t bx=586478;
int64_t cx= (int64_t)ax*bx;
printf("%lld\n", cx);
}
The generated assembly code for the statement int64_t cx= (int64_t)ax*bx; is given below. There are two imull and one mull instructions.
movl -28(%ebp), %eax
movl %eax, %ecx
movl %eax, %ebx
sarl $31, %ebx
movl -32(%ebp), %eax
cltd
movl %ebx, %edi
imull %eax, %edi
movl %edx, %esi
imull %ecx, %esi
addl %edi, %esi
mull %ecx
leal (%esi,%edx), %ecx
movl %ecx, %edx
movl %eax, -40(%ebp)
movl %edx, -36(%ebp)
movl %eax, -40(%ebp)
movl %edx, -36(%ebp)`
Is it possible to enforce the gcc compiler/assembler to do it with just one multiplication without using inline assembly? Because in the original code I have to perform many such multiplications within a statement and in different statements.
Why the compiler does not perform it with one imul instruction? I have also checked the different versions of the gcc on https://godbolt.org/ but the result is same three multiplication instruction.
This is a homework task. I've got a C program that calls a function calc(int, float*, float*, float*, float*) implemented with NASM. I want to do floating-point division with the data passed from C, but first I wanted to check if I access the data correctly.
This is an excerpt from the C program:
printf("read.c: F data1[0]=%f\n", data1[0]);
printf("read.c: X data1[0]=%X\n", *(int*)(&data1[0]));
calc(nlines, data1, data2, result1, result2);
For testing, I wanted to print out exactly the same from the assembler code, but whatever I tried, it wouldn't give me the right results. To be precise, outputting the %X format gives the same result, but the %f format gives some incredibly huge number.
global calc
extern printf
; -----------------------------------------------------------------------
; extern void calc(int nlines, float* data1, float* data2,
; float* result1, float* result2)
; -----------------------------------------------------------------------
calc:
section .data
.strf db "calc.asm: F data1[0]=%f", 10, 0
.strx db "calc.asm: X data1[0]=%X", 10, 0
section .text
enter 0, 0
; Move the value of float* data1 into ecx.
mov ecx, [esp + 12]
; Move the contents of data1[0] into esi.
mov esi, [ecx]
push esi
push .strf
call printf
add esp, 8
push esi
push .strx
call printf
add esp, 8
leave
ret
Outputs
read.c: F data1[0]=20.961977
read.c: X data1[0]=41A7B221
calc.asm: F data1[0]=-8796958457989122902187458235483374032941932827208012972482327255932202912296419757153331437662235555722313731094096197990916443553479942683040096290755684437514827018615169352974748429901549205109479495668937369584705401541113350145698235773041651907978442730240007381959397006695721667307435228446926569472.000000
calc.asm: X data1[0]=41A7B221
I've also looked into fld, but I couldn't find out how I can push the loaded value on stack. This didnt work:
; Move float* data1 into ecx
mov ecx, [esp + 12]
; Load the floating point number into esi.
fld dword [ecx]
fst esi
How to do it right?
I've stripped down read.c to this code
#include <stdio.h>
#include <stdlib.h>
#define MAXLINES 1024
extern void calc(int, float*, float*, float*, float*);
int main(int argc, char** argv)
{
int nlines;
float* data1 = malloc(sizeof(float)*MAXLINES);
float*data2, *results1, *results2;
printf("read.c: F data1[0]=%f\n", data1[0]);
printf("read.c: X data1[0]=%X\n", *(int*)(&data1[0]));
calc(nlines, data1, data2, results1, results2);
return 0;
}
and this is the assembler output:
.file "test.c"
.section .rodata
.LC0:
.string "read.c: F data1[0]=%f\n"
.LC1:
.string "read.c: X data1[0]=%X\n"
.text
.globl main
.type main, #function
main:
.LFB2:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $64, %esp
movl $4096, (%esp)
call malloc
movl %eax, 44(%esp)
movl 44(%esp), %eax
flds (%eax)
fstpl 4(%esp)
movl $.LC0, (%esp)
call printf
movl 44(%esp), %eax
movl (%eax), %eax
movl %eax, 4(%esp)
movl $.LC1, (%esp)
call printf
movl 60(%esp), %eax
movl %eax, 16(%esp)
movl 56(%esp), %eax
movl %eax, 12(%esp)
movl 52(%esp), %eax
movl %eax, 8(%esp)
movl 44(%esp), %eax
movl %eax, 4(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call calc
movl $0, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE2:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4"
.section .note.GNU-stack,"",#progbits
.LC1:
.string "read.c: F data1[0]=%f\n"
.LC2:
.string "read.c: X data1[0]=%X\n"
.text
.globl main
.type main, #function
main:
.LFB4:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $64, %esp
movl 44(%esp), %eax
flds (%eax)
fstpl 4(%esp)
movl $.LC1, (%esp)
call printf
movl 44(%esp), %eax
movl (%eax), %eax
movl %eax, 4(%esp)
movl $.LC2, (%esp)
call printf
movl 60(%esp), %eax
movl %eax, 16(%esp)
movl 56(%esp), %eax
movl %eax, 12(%esp)
movl 52(%esp), %eax
movl %eax, 8(%esp)
movl 44(%esp), %eax
movl %eax, 4(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call calc
movl $0, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE4:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4"
.section .note.GNU-stack,"",#progbits
Ok, I've now had a chance to test this and verify that what I suggested in my comment works. Here's my modified version of the assembly code, with some comments to explain the things I've added/changed:
global _calc
extern _printf
; -----------------------------------------------------------------------
; extern void calc(int nlines, float* data1, float* data2,
; float* result1, float* result2)
; -----------------------------------------------------------------------
_calc:
section .data
.strf db "calc.asm: F data1[0]=%f", 10, 0
.strx db "calc.asm: X data1[0]=%X", 10, 0
section .text
enter 0, 0
; Move the value of float* data1 into ecx.
mov ecx, [esp + 12]
; Move the contents of data1[0] into esi.
mov esi, [ecx]
fld dword [ecx] ; Load a single-precision float onto the FP stack.
sub esp,8 ; Make room for a double on the stack.
fstp qword [esp] ; Store the top of the FP stack on the regular stack as
; a double, and pop it off the FP stack.
push .strf
call _printf
add esp, 12 ; 12 == sizeof(char*) + sizeof(double)
push esi
push .strx
call _printf
add esp, 8
leave
ret
I came across this inline asm. I am not sure how it should look without this syntax... Could someone show it to me?
__asm__ volatile ("lock\n\tincl %0"
:"=m"(llvm_cbe_tmp__29)
:"m"(*(llvm_cbe_tmp__29))"cc");
lock
incl llvm_cbe_tmp__29
However, because the operand is specified abstractly, the compiler will generate the code needed to reference it, even if that means a load and store. As a result it is possible that more than two instructions or an addressing mode will be added.
Using gcc -S on this:
int main()
{
int *p;
asm volatile ("lock\n\tincl %0":"=m"(p):"m"(*(p)):"cc");
}
gives
.type main, #function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $20, %esp
movl -8(%ebp), %eax
#APP
# 4 "asm.c" 1
lock
incl -8(%ebp)
# 0 "" 2
#NO_APP
addl $20, %esp
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret