Signed 64-Bit multiply and 128-Bit Divide on x86 in assembly - algorithm

I have 2 functions written in assembly (masm) in visual studio that i use in my C++ project. They are an Unsigned 64-Bit multiply function that produces a 128-Bit result, and a Unsigned 128-Bit divide function that produces a 128-Bit Quotient and returns a 32-Bit Remainder.
What i need is a signed version of the functions but I'm not sure how to do it.
Below is the code of the .asm file with the Unsigned functions:
.MODEL flat, stdcall
.CODE
MUL64 PROC, A:QWORD, B:QWORD, pu128:DWORD
push EAX
push EDX
push EBX
push ECX
push EDI
mov EDI,pu128
; LO(A) * LO(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B
MUL EDX
mov [EDI],EAX ; Save the partial product.
mov ECX,EDX
; LO(A) * HI(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B+4
MUL EDX
ADD EAX,ECX
ADC EDX,0
mov EBX,EAX
mov ECX,EDX
; HI(A) * LO(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B
MUL EDX
ADD EAX,EBX
ADC ECX,EDX
PUSHFD ; Save carry.
mov [EDI+4],EAX ; Save the partial product.
; HI(A) * HI(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B+4
MUL EDX
POPFD ; Retrieve carry from above.
ADC EAX,ECX
ADC EDX,0
mov [EDI+8],EAX ; Save the partial product.
mov [EDI+12],EDX ; Save the partial product.
pop EDI
pop ECX
pop EBX
pop EDX
pop EAX
ret 20
MUL64 ENDP
IMUL64 PROC, A:SQWORD, B:SQWORD, pi128:DWORD
; How to make this work?
ret 20
IMUL64 ENDP
DIV128 PROC, pDividend128:DWORD, Divisor:DWORD, pQuotient128:DWORD
push EDX
push EBX
push ESI
push EDI
MOV ESI,pDividend128
MOV EDI,pQuotient128
MOV EBX,Divisor
XOR EDX,EDX
MOV EAX,[ESI+12]
DIV EBX
MOV [EDI+12],EAX
MOV EAX,[ESI+8]
DIV EBX
MOV [EDI+8],EAX
MOV EAX,[ESI+4]
DIV EBX
MOV [EDI+4],EAX
MOV EAX,[ESI]
DIV EBX
MOV [EDI],EAX
MOV EAX,EDX
pop EDI
pop ESI
pop EBX
pop EDX
ret 12
DIV128 ENDP
IDIV128 PROC, pDividend128:DWORD, Divisor:DWORD, pQuotient128:DWORD
; How to make this work?
ret 12
IDIV128 ENDP
END
If you found this helpful in anyway please help the project by helping code the Signed version of the functions.

First, the MUL64 function does not work 100%
If you try to do 0xFFFFFFFFFFFFFFFF x 0xFFFFFFFFFFFFFFFF, the Hi 64-bit result is 0xFFFFFFFeFFFFFFFF, it should be 0xFFFFFFFFFFFFFFFe
To fix this, the carry flag after the POPFD instruction should be added to EDX, the highest 32-bit part of the result. Now following Peter Cordes advice, remove the push and pops of EAX/ECX/EDX. Finally use setc BL and movzx EBX,BL to save the flag. Note: you cannot easily use xor EBX,EBX to zero it because xor effects the flags. We use movzx because its faster than add BL,0xFF and add is faster than adc based on Skylake specs.
The Result:
MUL64 PROC, A:QWORD, B:QWORD, pu128:DWORD
push EBX
push EDI
mov EDI,pu128
; LO(A) * LO(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B
mul EDX
mov [EDI],EAX ; Save the partial product.
mov ECX,EDX
; LO(A) * HI(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B+4
mul EDX
add EAX,ECX
adc EDX,0
mov EBX,EAX
mov ECX,EDX
; HI(A) * LO(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B
mul EDX
add EAX,EBX
adc ECX,EDX
setc BL ; Save carry.
movzx EBX,BL ; Zero-Extend carry.
mov [EDI+4],EAX ; Save the partial product.
; HI(A) * HI(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B+4
mul EDX
add EDX,EBX ; Add carry from above.
add EAX,ECX
adc EDX,0
mov [EDI+8],EAX ; Save the partial product.
mov [EDI+12],EDX ; Save the partial product.
pop EDI
pop EBX
ret 20
MUL64 ENDP
Now, to make a signed version of the function use this formula:
my128.Hi -= (((A < 0) ? B : 0) + ((B < 0) ? A : 0));
The Result:
IMUL64 PROC, A:SQWORD, B:SQWORD, pi128:DWORD
push EBX
push EDI
mov EDI,pi128
; LO(A) * LO(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B
mul EDX
mov [EDI],EAX ; Save the partial product.
mov ECX,EDX
; LO(A) * HI(B)
mov EAX,DWORD PTR A
mov EDX,DWORD PTR B+4
mul EDX
add EAX,ECX
adc EDX,0
mov EBX,EAX
mov ECX,EDX
; HI(A) * LO(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B
mul EDX
add EAX,EBX
adc ECX,EDX
setc BL ; Save carry.
movzx EBX,BL ; Zero-Extend carry.
mov [EDI+4],EAX ; Save the partial product.
; HI(A) * HI(B)
mov EAX,DWORD PTR A+4
mov EDX,DWORD PTR B+4
mul EDX
add EDX,EBX ; Add carry from above.
add EAX,ECX
adc EDX,0
mov [EDI+8],EAX ; Save the partial product.
mov [EDI+12],EDX ; Save the partial product.
; Signed version only:
cmp DWORD PTR A+4,0
jg zero_b
jl use_b
cmp DWORD PTR A,0
jae zero_b
use_b:
mov ECX,DWORD PTR B
mov EBX,DWORD PTR B+4
jmp test_b
zero_b:
xor ECX,ECX
mov EBX,ECX
test_b:
cmp DWORD PTR B+4,0
jg zero_a
jl use_a
cmp DWORD PTR B,0
jae zero_a
use_a:
mov EAX,DWORD PTR A
mov EDX,DWORD PTR A+4
jmp do_last_op
zero_a:
xor EAX,EAX
mov EDX,EAX
do_last_op:
add EAX,ECX
adc EDX,EBX
sub [EDI+8],EAX
sbb [EDI+12],EDX
; End of signed version!
pop EDI
pop EBX
ret 20
IMUL64 ENDP
The DIV128 function should be fine (also probably the fastest) for getting a 128-bit quotient from a 32-bit divisor, but if you need to use a 128-bit divisor then look at this code https://www.codeproject.com/Tips/785014/UInt-Division-Modulus that has an example of using the Binary Shift Algorithm for 128-bit division. It could probably be 3x faster if written in assembly.
To make a signed version of DIV128, first determine if the sign of the divisor and dividend are the same or different. If they are the same, then the result should be positive. If they are different, then the result should be negative. So... Make the dividend and divisor positive if they are negative and call DIV128, after that, negate the results if the signs were different.
Here is some example code written in C++
VOID IDIV128(PSDQWORD Dividend, PSDQWORD Divisor, PSDQWORD Quotient, PSDQWORD Remainder)
{
BOOL Negate;
DQWORD DD, DV;
Negate = TRUE;
// Use local DD and DV so Dividend and Divisor dont get currupted.
DD.Lo = Dividend->Lo;
DD.Hi = Dividend->Hi;
DV.Lo = Divisor->Lo;
DV.Hi = Divisor->Hi;
// if the signs are the same then: Negate = FALSE;
if ((DD.Hi & 0x8000000000000000) == (DV.Hi & 0x8000000000000000)) Negate = FALSE;
// Covert Dividend and Divisor to possitive if negative: (negate)
if (DD.Hi & 0x8000000000000000) NEG128((PSDQWORD)&DD);
if (DV.Hi & 0x8000000000000000) NEG128((PSDQWORD)&DV);
DIV128(&DD, &DV, (PDQWORD)Quotient, (PDQWORD)Remainder);
if (Negate == TRUE)
{
NEG128(Quotient);
NEG128(Remainder);
}
}
EDIT:
Following Peter Cordes advice, we can optimize MUL64/IMUL64 even more. Look at the comments for specific changes being made. I have also replaced MUL64 PROC, A:QWORD, B:QWORD, pu128:DWORD with MUL64#20: and IMUL64#20: to eliminate unnecessary use of EBP that masm adds. I also optimized the sign-fixing work for IMUL64.
The current .asm file for MUL64/IMUL64
.MODEL flat, stdcall
EXTERNDEF MUL64#20 :PROC
EXTERNDEF IMUL64#20 :PROC
.CODE
MUL64#20:
push EBX
push EDI
; -----------------
; | pu128 |
; |---------------|
; | B |
; |---------------|
; | A |
; |---------------|
; | ret address |
; |---------------|
; | EBX |
; |---------------|
; ESP---->| EDI |
; -----------------
A TEXTEQU <[ESP+12]>
B TEXTEQU <[ESP+20]>
pu128 TEXTEQU <[ESP+28]>
mov EDI,pu128
; LO(A) * LO(B)
mov EAX,DWORD PTR A
mul DWORD PTR B
mov [EDI],EAX ; Save the partial product.
mov ECX,EDX
; LO(A) * HI(B)
mov EAX,DWORD PTR A
mul DWORD PTR B+4
add EAX,ECX
adc EDX,0
mov EBX,EAX
mov ECX,EDX
; HI(A) * LO(B)
mov EAX,DWORD PTR A+4
mul DWORD PTR B
add EAX,EBX
adc ECX,EDX
setc BL ; Save carry.
mov [EDI+4],EAX ; Save the partial product.
; HI(A) * HI(B)
mov EAX,DWORD PTR A+4
mul DWORD PTR B+4
add EAX,ECX
movzx ECX,BL ; Zero-Extend saved carry from above.
adc EDX,ECX
mov [EDI+8],EAX ; Save the partial product.
mov [EDI+12],EDX ; Save the partial product.
pop EDI
pop EBX
ret 20
IMUL64#20:
push EBX
push EDI
; -----------------
; | pi128 |
; |---------------|
; | B |
; |---------------|
; | A |
; |---------------|
; | ret address |
; |---------------|
; | EBX |
; |---------------|
; ESP---->| EDI |
; -----------------
A TEXTEQU <[ESP+12]>
B TEXTEQU <[ESP+20]>
pi128 TEXTEQU <[ESP+28]>
mov EDI,pi128
; LO(A) * LO(B)
mov EAX,DWORD PTR A
mul DWORD PTR B
mov [EDI],EAX ; Save the partial product.
mov ECX,EDX
; LO(A) * HI(B)
mov EAX,DWORD PTR A
mul DWORD PTR B+4
add EAX,ECX
adc EDX,0
mov EBX,EAX
mov ECX,EDX
; HI(A) * LO(B)
mov EAX,DWORD PTR A+4
mul DWORD PTR B
add EAX,EBX
adc ECX,EDX
setc BL ; Save carry.
mov [EDI+4],EAX ; Save the partial product.
; HI(A) * HI(B)
mov EAX,DWORD PTR A+4
mul DWORD PTR B+4
add EAX,ECX
movzx ECX,BL ; Zero-Extend saved carry from above.
adc EDX,ECX
mov [EDI+8],EAX ; Save the partial product.
mov [EDI+12],EDX ; Save the partial product.
; Signed version only:
mov BL,BYTE PTR B+7
and BL,80H
jz zero_a
mov EAX,DWORD PTR A
mov EDX,DWORD PTR A+4
jmp test_a
zero_a:
xor EAX,EAX
mov EDX,EAX
test_a:
mov BL,BYTE PTR A+7
and BL,80H
jz do_last_op
add EAX,DWORD PTR B
adc EDX,DWORD PTR B+4
do_last_op:
sub [EDI+8],EAX
sbb [EDI+12],EDX
; End of signed version!
pop EDI
pop EBX
ret 20
END

Related

Assembly Procedures not running Intel 8086

The code below, is supposed to get 20 user-entered numbers (6 digit numbers or less) and compute the average as well as sorting them, When I set it to get 6 or less numbers, it works fine. But when it is set to get 7-20 numbers, after getting the numbers, it skips the next procedures and someimes runs the GetNum procedure ( The one that gets the numbers from user) again and when it gets 11 numbers, I get this message "PROGRAM HAS RETURNED CONTROL TO THE OPERATING SYSTEM".
ShowMsg macro msg
mov ah, 09h
mov dx, offset msg
int 21h
endm
NewLine macro
mov ah, 02h
mov dl, 0ah
int 21h
mov dl, 0dh
int 21h
endm
data segment
sum dd 0
num dd 0
ave dd 0
array dd 20 dup(0)
msg1 db 'Enter 20 numbers:', '$'
msg2 db 0dh,0ah,'Average: ', '$'
temp dd ?
data ends
stack segment
dd 100 dup(?)
stack ends
code segment
assume cs:code, ds:data, ss:stack
Main Proc Far
mov ax, data
mov ds, ax
mov ax, stack
mov ss, ax
;Printing first message.
ShowMsg msg1
call GetNum
call Average
call Sort
call Print
mov ah, 4ch
int 21h
Main endp
proc GetNum
mov bp, 0
mov ch, 20
NextNumber:
NewLine
mov cl, 6
mov word ptr num, 0
mov word ptr num+2, 0
GetChar:
mov ah, 07h
int 21h
cmp al, 0dh
jz Flag
cmp al, 30h
jb GetChar
cmp al, 39h
ja GetChar
mov ah, 02h
mov dl, al
int 21h
sub al, 30h
mov bl, al
mov di, 10
mov ax, num
mul di
mov num, ax
push dx
mov ax, num+2
mul di
mov num+2, ax
pop dx
add num+2, dx
mov bh, 0
add num, bx
adc word ptr num+2, 0
dec cl
jnz GetChar
Flag:
mov ax, num
mov dx, num+2
mov array[bp], ax
mov array[bp+2], dx
add bp, 4
add sum, ax
adc sum+2, dx
dec ch
jnz NextNumber
ret
GetNum endp
proc Average
mov bx, 20
mov dx, 0
mov ax, word ptr sum+2
div bx
mov word ptr ave+2, ax
mov ax, word ptr sum
div bx
mov word ptr ave, ax
ShowMsg msg2
mov cl, 0
Next1:
mov bx, 10
mov dx, 0
mov ax, word ptr ave+2
div bx
mov word ptr ave+2, ax
mov ax, word ptr ave
div bx
mov word ptr ave, ax
push dx
inc cl
cmp ave, 0
jnz Next1
Next2:
pop dx
add dl, 30h
mov ah, 02h
int 21h
dec cl
jnz Next2
NewLine
ret
Average endp
proc Sort
mov ch, 20
OuterFor:
mov bp, 0
Cmp1:
mov ax, array[bp+2]
mov bx, array[bp+6]
cmp ax,bx
ja Xchange
cmp ax,bx
jz Cmp2
jmp Next
Cmp2:
mov ax, array[bp]
mov bx, array[bp+4]
cmp ax, bx
ja Xchange
jmp Next
Xchange:
mov ax, array[bp]
mov dx, array[bp+2]
mov temp, ax
mov temp+2, dx
mov ax, array[bp+4]
mov dx, array[bp+6]
mov array[bp], ax
mov array[bp+2], dx
mov ax, temp
mov dx, temp+2
mov array[bp+4], ax
mov array[bp+6], dx
Next:
add bp, 4
cmp bp, 76
jnz Cmp1
dec ch
jnz OuterFor
ret
Sort endp
proc Print
mov bp, 0
C:
mov cl, 0
A:
mov bx, 10
mov dx, 0
mov ax, array[bp+2]
div bx
mov array[bp+2], ax
mov ax, array[bp]
div bx
mov array[bp], ax
push dx
inc cl
mov ax, array[bp]
mov dx, array[bp+2]
or ax, dx
jnz A
B:
pop dx
add dl, 30h
mov ah, 02h
int 21h
dec cl
jnz B
add bp, 4
NewLine
cmp bp, 80
jnz C
ret
Print endp
code ends
end main
The problem lies with these two lines (and possibly similar elsewhere):
mov array[bp], ax
mov array[bp+2], dx
By default, the bp register addresses the stack segment, not the data segment where array is. You must either use another index register, or over ride the segment with
mov ds:array[bp], ax
mov ds:array[bp+2], dx
If it worked with a small number of elements, that was by luck that nothing was corrupted to make a crash or spoil the data.
UPDATE
I would suggest modifying the GetNum proc so you can use bx to index array, instead of bp.
proc GetNum
mov bx, 0
mov ch, 20
NextNumber:
push bx
NewLine
...
pop bx
mov array[bx], ax
mov array[bx+2], dx
add bx, 4
...
Similarly with your sorting function - swap the roles of bx and bp. It it better to use bp as a general purpose register and bx as an indexing register.

sorting 32 bit numbers in 8086

i wrote this code and think for it about 5 hours;
However i stuck in sorting part:
its begin to loop and never stop !
where did i wrong ???
IT SEEMS that the array didnt get the numbers correctly.
can any one help please ?
page 110,100
Title 'decimal sorting'
Data_here segment
A DD Dup 20(?)
msg1 DB 'Enter a maximum 6 digits number : ',0DH,0AH,'$'
msg2 DB 0DH,0AH,'The sorted results are : ',0DH,0AH,'$'
Data_here Ends
Stack_here segment
DW 10 DUP(?)
Stack_here Ends
Code_here segment
Assume CS:code_here , DS:Data_here ,SS:Stack_here
Main Proc near
mov AX,Data_here
mov DS,AX
mov AX,Stack_here
mov SS,AX
Call decimal_input
call sorting
call decimal_output
mov AH,4CH
int 21H
main Endp
decimal_input proc near
PUSH A
mov AH,09H
Lea DX,msg1
int 21H
mov CH,20
next_number:
mov BH,6
mov DX,0
mov si,4
next_digit:
mov AH,07H
int 21H
CMP AL,0DH
JNE check_digit
CMP BH,6
JE next_digit
DEC CH
JZ end
mov DX,0DH
mov AH,02H
int 21H
mov DX,0AH
Int 21H
jmp next_number
check_digit:
cmp AL,30H
JB next_digit
cmp AL,39H
JA next_digit
mov AH,02H
mov DL,AL
int 21H
SUB AL,30H
SHL DX,4
ADD DL,AL
DEC SI
JZ save
DEC BH
JNZ next_digit
jmp remain
save:
mov A[DI],DX
SHL A[DI],8
add si,4
DEC BH
jmp next_digit
remain:
ADD byte ptr A[DI],DL
mov DX,0DH
mov AH,02H
int 21H
mov DX,0AH
INT 21H
INC DI
DEC CH
JNZ next_number
end:
pop A
RET
decimal_input ENDp
sorting proc near
Push A
mov SI,DI
check:
mov AX,word ptr A[SI]
mov BX,word ptr A[DI+2]
CMP AX,BX
JA change
JE extra_check
add SI,2
continue:
add DI,2
CMP DI,38
JA finish
JB check
extra_check:
mov CX,word ptr A[DI+1]
mov DX,word ptr A[DI+3]
cmp CX,DX
JNA continue
mov word ptr A[DI+1],DX
mov word ptr A[DI+3],CX
jmp continue
change:
xchg AX,BX
mov CX,word ptr A[DI+1]
mov DX,word ptr A[DI+3]
xchg CX,DX
mov word ptr A[DI],AX
mov word ptr A[DI+1],CX
mov word ptr A[DI+2],BX
mov word ptr A[DI+3],DX
jmp continue
finish:
Add sp,2
cmp Sp,40
JE ending
mov DI,SP
jmp check
ending:
POP A
RET
sorting ENDP
decimal_output proc near
PUSH A
mov AH,09H
lea DX,msg2
INT 21H
next_no:
mov DL,0
mov AL,0
mov AH,0
next:
CMP AL,0
JE next1
mov DL,byte ptr A[DI]
ADD DL,30H
mov AL,1
int 21H
next1:
inc DI
inc AH
cmp AH,8
JB next
CMP DI,80
JA THE_END
mov DX,0DH
mov AH,02H
int 21H
mov DX,0AH
int 21H
jmp next_no
THE_END:
pop A
RET
decimal_output ENDp
code_here Ends
END MAIN
I think your problem might be here
Add sp,2
cmp Sp,40
You are using sp as a loop counter, but you never initialize it. Additionaly I also don't understand why you are using sp in the first place. Of course it is theoretically possible, but then you would have to restore it before you return. When your proc returns it will screw up, because the stack pointer is corrupted. So you shouild use some other register or a memory variable (bp is not used in your code).
And I would strongly recommend to reformat your code, to make it more readable (when I looked at it I formatted it like this which makes it better readable):
sorting proc near
Push A
mov SI,DI
check:
mov AX,word ptr A[SI]
mov BX,word ptr A[DI+2]
CMP AX,BX
JA change
JE extra_check
add SI,2
continue:
add DI,2
CMP DI,38
JA finish
JB check
...

What syntax does a disassembled .s file is not acceptable in nasm?

This code is a C program (bubble sort) disassembled into assembly. How can I make the following code run if I put it in a .asm file and use nasm to assemble? If you know what needs changing, please say what to change it to. For instance I understand that nasm won't accept DWORD PTR, but I haven't found out what to do instead. Thanks
.file "sort.c" .intel_syntax noprefix .text .globl
sort .type sort, #function
sort: .LFB0:
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
mov rbp, rsp
.cfi_def_cfa_register 6
mov QWORD PTR [rbp-24], rdi
mov DWORD PTR [rbp-28], esi
mov DWORD PTR [rbp-12], 0
jmp .L2
.L6:
mov DWORD PTR [rbp-8], 0
jmp .L3
.L5:
mov eax, DWORD PTR [rbp-8]
cdqe
sal rax, 2
add rax, QWORD PTR [rbp-24]
mov edx, DWORD PTR [rax]
mov eax, DWORD PTR [rbp-8]
cdqe
add rax, 1
sal rax, 2
add rax, QWORD PTR [rbp-24]
mov eax, DWORD PTR [rax]
cmp edx, eax
jle .L4
mov eax, DWORD PTR [rbp-8]
cdqe
sal rax, 2
add rax, QWORD PTR [rbp-24]
mov eax, DWORD PTR [rax]
mov DWORD PTR [rbp-4], eax
mov eax, DWORD PTR [rbp-8]
cdqe
sal rax, 2
add rax, QWORD PTR [rbp-24]
mov edx, DWORD PTR [rbp-8]
movsx rdx, edx
add rdx, 1
sal rdx, 2
add rdx, QWORD PTR [rbp-24]
mov edx, DWORD PTR [rdx]
mov DWORD PTR [rax], edx
mov eax, DWORD PTR [rbp-8]
cdqe
add rax, 1
sal rax, 2
add rax, QWORD PTR [rbp-24]
mov edx, DWORD PTR [rbp-4]
mov DWORD PTR [rax], edx
.L4:
add DWORD PTR [rbp-8], 1
.L3:
mov eax, DWORD PTR [rbp-28]
sub eax, 1
sub eax, DWORD PTR [rbp-12]
cmp eax, DWORD PTR [rbp-8]
jg .L5
add DWORD PTR [rbp-12], 1
.L2:
mov eax, DWORD PTR [rbp-28]
sub eax, 1
cmp eax, DWORD PTR [rbp-12]
jg .L6
pop rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size sort, .-sort
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
Just remove PTR and all nonsensical .somethings.
This assembles just fine:
; file: gas-nasm-sort.asm
bits 64
sort:
push rbp
mov rbp, rsp
mov QWORD [rbp-24], rdi
mov DWORD [rbp-28], esi
mov DWORD [rbp-12], 0
jmp .L2
.L6:
mov DWORD [rbp-8], 0
jmp .L3
.L5:
mov eax, DWORD [rbp-8]
cdqe
sal rax, 2
add rax, QWORD [rbp-24]
mov edx, DWORD [rax]
mov eax, DWORD [rbp-8]
cdqe
add rax, 1
sal rax, 2
add rax, QWORD [rbp-24]
mov eax, DWORD [rax]
cmp edx, eax
jle .L4
mov eax, DWORD [rbp-8]
cdqe
sal rax, 2
add rax, QWORD [rbp-24]
mov eax, DWORD [rax]
mov DWORD [rbp-4], eax
mov eax, DWORD [rbp-8]
cdqe
sal rax, 2
add rax, QWORD [rbp-24]
mov edx, DWORD [rbp-8]
movsx rdx, edx
add rdx, 1
sal rdx, 2
add rdx, QWORD [rbp-24]
mov edx, DWORD [rdx]
mov DWORD [rax], edx
mov eax, DWORD [rbp-8]
cdqe
add rax, 1
sal rax, 2
add rax, QWORD [rbp-24]
mov edx, DWORD [rbp-4]
mov DWORD [rax], edx
.L4:
add DWORD [rbp-8], 1
.L3:
mov eax, DWORD [rbp-28]
sub eax, 1
sub eax, DWORD [rbp-12]
cmp eax, DWORD [rbp-8]
jg .L5
add DWORD [rbp-12], 1
.L2:
mov eax, DWORD [rbp-28]
sub eax, 1
cmp eax, DWORD [rbp-12]
jg .L6
pop rbp
ret
Command:
nasm gas-nasm-sort.asm -f bin -o gas-nasm-sort.bin
But again, there's NASM documentation. Read it. In particular these sections:
2.2.2 NASM Requires Square Brackets For Memory References
2.2.3 NASM Doesn't Store Variable Types

Disassembling CGContextReplacePathWithShapePath()?

I am beginner at dis-assembly. Below is the disassembly of CGContextReplacePathWithShapePath from IDA pro. I am trying to figure out the parameters to the routine.
Any help is highly appreciated. Also please post some pointers/hints on how to attack the dis-assembly.
__text:00024D1F ; =============== S U B R O U T I N E =======================================
__text:00024D1F
__text:00024D1F ; Attributes: bp-based frame
__text:00024D1F
__text:00024D1F public _CGContextReplacePathWithShapePath
__text:00024D1F _CGContextReplacePathWithShapePath proc near
__text:00024D1F
__text:00024D1F var_2C = dword ptr -2Ch
__text:00024D1F var_20 = dword ptr -20h
__text:00024D1F var_1C = dword ptr -1Ch
__text:00024D1F arg_0 = dword ptr 8
__text:00024D1F arg_4 = dword ptr 0Ch
__text:00024D1F
__text:00024D1F push ebp
__text:00024D20 mov ebp, esp
__text:00024D22 push edi
__text:00024D23 push esi
__text:00024D24 push ebx
__text:00024D25 sub esp, 3Ch
__text:00024D28 call $+5
__text:00024D2D pop ebx
__text:00024D2E mov esi, [ebp+arg_0]
__text:00024D31 test esi, esi
__text:00024D33 jz short loc_24D3E
__text:00024D35 cmp dword ptr [esi+8], 43545854h
__text:00024D3C jz short loc_24D5F
__text:00024D3E
__text:00024D3E loc_24D3E: ; CODE XREF: _CGContextReplacePathWithShapePath+14j
__text:00024D3E mov [esp+8], esi
__text:00024D42 lea eax, (___func___26084 - 24D2Dh)[ebx] ; "CGContextReplacePathWithShapePath"
__text:00024D48 mov [esp+4], eax
__text:00024D4C lea eax, (aSInvalidContex - 24D2Dh)[ebx] ; "%s: invalid context %p"
__text:00024D52 mov [esp], eax
__text:00024D55 call _CGPostError
__text:00024D5A jmp loc_24DF2
__text:00024D5F ; ---------------------------------------------------------------------------
__text:00024D5F
__text:00024D5F loc_24D5F: ; CODE XREF: _CGContextReplacePathWithShapePath+1Dj
__text:00024D5F mov eax, [esi+58h]
__text:00024D62 test eax, eax
__text:00024D64 jz short loc_24D75
__text:00024D66 mov [esp], eax
__text:00024D69 call _CGPathRelease
__text:00024D6E mov dword ptr [esi+58h], 0
__text:00024D75
__text:00024D75 loc_24D75: ; CODE XREF: _CGContextReplacePathWithShapePath+45j
__text:00024D75 mov eax, [ebp+arg_4]
__text:00024D78 mov [esp], eax
__text:00024D7B call _CGSRegionPathEnumerator
__text:00024D80 mov edi, eax
__text:00024D82 xor ebx, ebx
__text:00024D84 lea eax, [ebp+var_20]
__text:00024D87 mov [ebp+var_2C], eax
__text:00024D8A jmp short loc_24DCA
__text:00024D8C ; ---------------------------------------------------------------------------
__text:00024D8C
__text:00024D8C loc_24D8C: ; CODE XREF: _CGContextReplacePathWithShapePath+BDj
__text:00024D8C jge short loc_24DB4
__text:00024D8E inc ebx
__text:00024D8F cmp ebx, 1
__text:00024D92 jz short loc_24D9C
__text:00024D94 mov [esp], esi
__text:00024D97 call _CGContextClosePath
__text:00024D9C
__text:00024D9C loc_24D9C: ; CODE XREF: _CGContextReplacePathWithShapePath+73j
__text:00024D9C mov eax, [ebp+var_1C]
__text:00024D9F mov [esp+8], eax
__text:00024DA3 mov eax, [ebp+var_20]
__text:00024DA6 mov [esp+4], eax
__text:00024DAA mov [esp], esi
__text:00024DAD call _CGContextMoveToPoint
__text:00024DB2 jmp short loc_24DCA
__text:00024DB4 ; ---------------------------------------------------------------------------
__text:00024DB4
__text:00024DB4 loc_24DB4: ; CODE XREF: _CGContextReplacePathWithShapePath:loc_24D8Cj
__text:00024DB4 mov eax, [ebp+var_1C]
__text:00024DB7 mov [esp+8], eax
__text:00024DBB mov eax, [ebp+var_20]
__text:00024DBE mov [esp+4], eax
__text:00024DC2 mov [esp], esi
__text:00024DC5 call _CGContextAddLineToPoint
__text:00024DCA
__text:00024DCA loc_24DCA: ; CODE XREF: _CGContextReplacePathWithShapePath+6Bj
__text:00024DCA ; _CGContextReplacePathWithShapePath+93j
__text:00024DCA mov eax, [ebp+var_2C]
__text:00024DCD mov [esp+4], eax
__text:00024DD1 mov [esp], edi
__text:00024DD4 call _CGSNextPoint
__text:00024DD9 cmp eax, 0
__text:00024DDC jnz short loc_24D8C
__text:00024DDE test ebx, ebx
__text:00024DE0 jz short loc_24DEA
__text:00024DE2 mov [esp], esi
__text:00024DE5 call _CGContextClosePath
__text:00024DEA
__text:00024DEA loc_24DEA: ; CODE XREF: _CGContextReplacePathWithShapePath+C1j
__text:00024DEA mov [esp], edi
__text:00024DED call _CGSReleaseRegionEnumerator
__text:00024DF2
__text:00024DF2 loc_24DF2: ; CODE XREF: _CGContextReplacePathWithShapePath+3Bj
__text:00024DF2 add esp, 3Ch
__text:00024DF5 pop ebx
__text:00024DF6 pop esi
__text:00024DF7 pop edi
__text:00024DF8 leave
__text:00024DF9 retn
__text:00024DF9 _CGContextReplacePathWithShapePath endp
The first parameter is a context parameter of some sort.
The function checks to see if it's NULL and in that case jumps to loc_24D3E (where you can see it calls an error function _CGPostError with the format string "%s: invalid context %p"). BTW: Right after it checks a magic value in the context (at context+8) to see if it is valid otherwise it exits through the same error path. The magic value43545854h is CTXT as chars btw.
__text:00024D2E mov esi, [ebp+arg_0]
__text:00024D31 test esi, esi
__text:00024D33 jz short loc_24D3E
As for the second parameter, it looks to be some kind of path as it is used as the sole parameter to CGSRegionPathEnumerator.
__text:00024D75 mov eax, [ebp+arg_4]
__text:00024D78 mov [esp], eax
__text:00024D7B call _CGSRegionPathEnumerator
In C the function would look something like this:
void CGContextReplacePathWithShapePath(Context* context, Path* path) {
if(context == NULL || context->magic != 0x43545854) {
CGPostError("%s: invalid context %p", "CGContextReplacePathWithShapePath", context);
return;
}
// loc_24D5F
if (context->path != NULL) {
CGPathRelease(context->path);
context->path = NULL;
}
// loc_24D75
RegionPathEnumerator* rpe = CGSRegionPathEnumerator(path);
// ....
}

Reversing of _PrepareMenuWindow() subroutine

Can some one help me with reversing of _PrepareMenuWindow() subroutine?
I am trying to find the signature of the method.
__text:000639A7 _PrepareMenuWindow proc near ; CODE XREF: DrawTheMenu(MenuSelectData *,__CFArray **,uchar,uchar *)+274p
__text:000639A7 ; PopUpMenuSelectCore(MenuData *,Point,double,Point,ushort,uint,Rect const*,ushort,ulong,Rect const*,Rect const*,__CFString const*,OpaqueMenuRef **,ushort *)+528p
__text:000639A7
__text:000639A7 var_44 = dword ptr -44h
__text:000639A7 var_40 = dword ptr -40h
__text:000639A7 var_3C = dword ptr -3Ch
__text:000639A7 var_34 = dword ptr -34h
__text:000639A7 var_30 = dword ptr -30h
__text:000639A7 var_2C = dword ptr -2Ch
__text:000639A7 var_28 = dword ptr -28h
__text:000639A7 var_24 = word ptr -24h
__text:000639A7 var_20 = dword ptr -20h
__text:000639A7 var_1A = word ptr -1Ah
__text:000639A7 arg_0 = dword ptr 8
__text:000639A7 arg_4 = dword ptr 0Ch
__text:000639A7 arg_8 = dword ptr 10h
__text:000639A7
__text:000639A7 push ebp
__text:000639A8 mov ebp, esp
__text:000639AA push edi
__text:000639AB push esi
__text:000639AC push ebx
__text:000639AD sub esp, 5Ch
__text:000639B0 xor edi, edi
__text:000639B2 mov eax, [ebp+arg_0]
__text:000639B5 test eax, eax
__text:000639B7 jz short loc_639C6
__text:000639B9 mov eax, [ebp+arg_0]
__text:000639BC mov [esp], eax
__text:000639BF call __ZNK8HIObject13GetEncodedRefEv ; HIObject::GetEncodedRef(void)
__text:000639C4 mov edi, eax
__text:000639C6
__text:000639C6 loc_639C6: ; CODE XREF: _PrepareMenuWindow+10j
__text:000639C6 mov ecx, [ebp+arg_4]
__text:000639C9 mov eax, [ecx]
__text:000639CB mov edx, [ecx+4]
__text:000639CE mov [ebp+var_2C], eax
__text:000639D1 mov [ebp+var_28], edx
__text:000639D4 lea eax, [ebp+var_1A]
__text:000639D7 mov [ebp+var_40], eax
__text:000639DA mov [esp+4], eax
__text:000639DE mov [esp], edi
__text:000639E1 call _GetMenuType
__text:000639E6 mov dword ptr [esp+4], 0
__text:000639EE mov [esp], edi
__text:000639F1 call _IsMenuItemEnabled
__text:000639F6 movzx edx, [ebp+var_1A]
__text:000639FA or dh, 1
__text:000639FD test al, al
__text:000639FF movzx ebx, [ebp+var_1A]
__text:00063A03 cmovz ebx, edx
__text:00063A06 mov [ebp+var_1A], bx
__text:00063A0A mov eax, [ebp+arg_8]
__text:00063A0D mov [esp+0Ch], eax
__text:00063A11 lea ecx, [ebp+var_2C]
__text:00063A14 mov [ebp+var_44], ecx
__text:00063A17 mov [esp+8], ecx
__text:00063A1B mov eax, [ebp+arg_4]
__text:00063A1E mov [esp+4], eax
__text:00063A22 mov [esp], edi
__text:00063A25 call __AddOpenMenu
__text:00063A2A mov ecx, [ebp+var_44]
__text:00063A2D mov [esp], ecx
__text:00063A30 call _EmptyRect
__text:00063A35 test al, al
__text:00063A37 jnz loc_63B94
__text:00063A3D mov [esp], edi
__text:00063A40 call __Z11GetMenuDataP13OpaqueMenuRef ; GetMenuData(OpaqueMenuRef *)
__text:00063A45 mov [ebp+var_3C], eax
__text:00063A48 call _NewRgn
__text:00063A4D mov esi, eax
__text:00063A4F test eax, eax
__text:00063A51 jz loc_63BDD
__text:00063A57 movzx ebx, bx
__text:00063A5A mov eax, [ebp+var_3C]
__text:00063A5D mov eax, [eax+40h]
__text:00063A60 test eax, eax
__text:00063A62 jnz loc_63B23
__text:00063A68 mov [ebp+var_1A], 0
__text:00063A6E mov eax, [ebp+var_2C]
__text:00063A71 mov edx, [ebp+var_28]
__text:00063A74 mov [ebp+var_34], eax
__text:00063A77 mov [ebp+var_30], edx
__text:00063A7A mov ecx, [ebp+var_40]
__text:00063A7D mov [esp+10h], ecx
__text:00063A81 mov dword ptr [esp+0Ch], 0
__text:00063A89 lea eax, [ebp+var_34]
__text:00063A8C mov [esp+8], eax
__text:00063A90 mov dword ptr [esp+4], 7
__text:00063A98 mov eax, [ebp+var_3C]
__text:00063A9B mov [esp], eax
__text:00063A9E call __Z12_CallMenuDefP8MenuDatasP4Rect5PointPs ; _CallMenuDef(MenuData *,short,Rect *,Point,short *)
__text:00063AA3 cmp [ebp+var_1A], 7473h
__text:00063AA9 jz short loc_63ADC
__text:00063AAB add word ptr [ebp+var_2C], 3
__text:00063AB0 mov dword ptr [esp+8], 0FFFFFFFCh
__text:00063AB8 mov dword ptr [esp+4], 0FFFFFFFCh
__text:00063AC0 mov ecx, [ebp+var_44]
__text:00063AC3 mov [esp], ecx
__text:00063AC6 call _InsetRect
__text:00063ACB mov eax, [ebp+var_44]
__text:00063ACE mov [esp+4], eax
__text:00063AD2 mov [esp], esi
__text:00063AD5 call _RectRgn
__text:00063ADA jmp short loc_63B23
__text:00063ADC ; ---------------------------------------------------------------------------
__text:00063ADC
__text:00063ADC loc_63ADC: ; CODE XREF: _PrepareMenuWindow+102j
__text:00063ADC lea eax, [ebp+var_24]
__text:00063ADF mov [esp+8], eax
__text:00063AE3 lea eax, [ebp+var_20]
__text:00063AE6 mov [esp+4], eax
__text:00063AEA mov [esp], edi
__text:00063AED call __GetMenuCallout
__text:00063AF2 movsx eax, [ebp+var_24]
__text:00063AF6 mov [esp+10h], eax
__text:00063AFA mov eax, [ebp+var_20]
__text:00063AFD mov [esp+0Ch], eax
__text:00063B01 mov [esp+8], esi
__text:00063B05 mov [esp+4], ebx
__text:00063B09 mov ecx, [ebp+var_44]
__text:00063B0C mov [esp], ecx
__text:00063B0F call __GetThemeMenuBackgroundRegionWithCallout
__text:00063B14 mov eax, [ebp+var_44]
__text:00063B17 mov [esp+4], eax
__text:00063B1B mov [esp], esi
__text:00063B1E call _GetRegionBounds
__text:00063B23
__text:00063B23 loc_63B23: ; CODE XREF: _PrepareMenuWindow+BBj
__text:00063B23 ; _PrepareMenuWindow+133j
__text:00063B23 mov [esp+0Ch], esi
__text:00063B27 mov ecx, [ebp+var_44]
__text:00063B2A mov [esp+8], ecx
__text:00063B2E mov [esp+4], ebx
__text:00063B32 mov [esp], edi
__text:00063B35 call __ZL13GetMenuWindowP13OpaqueMenuReftPK4RectP15OpaqueRgnHandle ; GetMenuWindow(OpaqueMenuRef *,ushort,Rect const*,OpaqueRgnHandle *)
__text:00063B3A test eax, eax
__text:00063B3C jz short loc_63BA1
__text:00063B3E mov [esp], eax
__text:00063B41 call _GetWindowPort
__text:00063B46 mov [esp], eax
__text:00063B49 call _SetPortWrapper
__text:00063B4E mov [esp], esi
__text:00063B51 call _SetClipWrapper
__text:00063B56 mov [esp], esi
__text:00063B59 call _DisposeRgn
__text:00063B5E mov eax, [ebp+var_3C]
__text:00063B61 mov eax, [eax+40h]
__text:00063B64 test eax, eax
__text:00063B66 jnz short loc_63BDD
__text:00063B68 mov dword ptr [esp+14h], 0
__text:00063B70 mov dword ptr [esp+10h], 0
__text:00063B78 mov [esp+0Ch], ebx
__text:00063B7C mov ecx, [ebp+arg_4]
__text:00063B7F mov [esp+8], ecx
__text:00063B83 mov eax, [ebp+var_44]
__text:00063B86 mov [esp+4], eax
__text:00063B8A mov [esp], edi
__text:00063B8D call __Z18DrawMenuBackgroundP13OpaqueMenuRefRK4RectS3_thPv ; DrawMenuBackground(OpaqueMenuRef *,Rect const&,Rect const&,ushort,uchar,void *)
__text:00063B92 jmp short loc_63BDD
__text:00063B94 ; ---------------------------------------------------------------------------
__text:00063B94
__text:00063B94 loc_63B94: ; CODE XREF: _PrepareMenuWindow+90j
__text:00063B94 mov ecx, [ebp+arg_0]
__text:00063B97 mov [esp], ecx
__text:00063B9A call _DisposeMenuWindow
__text:00063B9F jmp short loc_63BDD
__text:00063BA1 ; ---------------------------------------------------------------------------
__text:00063BA1
__text:00063BA1 loc_63BA1: ; CODE XREF: _PrepareMenuWindow+195j
__text:00063BA1 mov eax, [ebp+arg_0]
__text:00063BA4 mov [esp], eax
__text:00063BA7 call __Z11FindMBEntryP8MenuData ; FindMBEntry(MenuData *)
__text:00063BAC mov ecx, eax
__text:00063BAE test eax, eax
__text:00063BB0 jz short loc_63BD5
__text:00063BB2 mov word ptr [eax+1Eh], 0
__text:00063BB8 mov word ptr [eax+1Ch], 0
__text:00063BBE mov word ptr [eax+1Ah], 0
__text:00063BC4 mov word ptr [eax+18h], 0
__text:00063BCA mov eax, [eax+18h]
__text:00063BCD mov edx, [ecx+1Ch]
__text:00063BD0 mov [ecx], eax
__text:00063BD2 mov [ecx+4], edx
__text:00063BD5
__text:00063BD5 loc_63BD5: ; CODE XREF: _PrepareMenuWindow+209j
__text:00063BD5 mov [esp], esi
__text:00063BD8 call _DisposeRgn
__text:00063BDD
__text:00063BDD loc_63BDD: ; CODE XREF: _PrepareMenuWindow+AAj
__text:00063BDD ; _PrepareMenuWindow+1BFj ...
__text:00063BDD xor eax, eax
__text:00063BDF add esp, 5Ch
__text:00063BE2 pop ebx
__text:00063BE3 pop esi
__text:00063BE4 pop edi
__text:00063BE5 leave
__text:00063BE6 retn
__text:00063BE6 _PrepareMenuWindow endp
What have you got so far that isn't generated by IDA? (ie: your analysis of the function).
From the looks of it its a __cdecl function that always returns NULL/false/0. It also seems to take 3 arguments(which can be confirmed by looking at what cleanup is by the caller, if there is any).
Arg 0 is a MenuData*, arg 4 seems to be a Rect&(which is secretly just Rect*), arg 8 would be whatever type __AddOpenMenu takes as its fourth argument.
So i'd assume something along the lines of typedef BOOL(__cdecl*)(MenuData*,Rect&,void*)

Resources