is there a difference between al and dl? [duplicate] - macos

This question already has answers here:
How do AX, AH, AL map onto EAX?
(6 answers)
Closed 2 years ago.
First, it is my first assembly. And I use it with "NASM" and "64bit ASM" and "Intel" code and Mac OS.
When I try to write strdup function, it didn't work with al.
In using my strdup function, the string was broken.
So, my code is here.
global _strdup
extern _malloc
section .text
_strdup:
xor rcx, rcx
jmp strlen
strlen:
cmp [rdi + rcx], byte 0
je malloc
inc rcx
jmp strlen
malloc:
inc rcx
push rdi
mov rdi, rcx
call _malloc
pop rdi
cmp rax, 0
je error
xor rcx, rcx
jmp copy
copy:
mov al, byte [rdi + rcx]
mov byte [rax + rcx], al
cmp al, byte 0
je return
jmp increment
increment:
inc rcx
jmp copy
error:
mov rax, 0
ret
return:
ret
But, when I write same code with dl not al, it works!
global _strdup
extern _malloc
section .text
_strdup:
xor rcx, rcx
jmp strlen
strlen:
cmp [rdi + rcx], byte 0
je malloc
inc rcx
jmp strlen
malloc:
inc rcx
push rdi
mov rdi, rcx
call _malloc
pop rdi
cmp rax, 0
je error
xor rcx, rcx
jmp copy
copy:
mov dl, byte [rdi + rcx]
mov byte [rax + rcx], dl
cmp dl, byte 0
je return
jmp increment
increment:
inc rcx
jmp copy
error:
mov rax, 0
ret
return:
ret
I don't know why dl works and the difference between al and dl.
Anyone know about it? Thanks.

al is the smallest part of rax while dl is the smallest part of rdx. Changing al will change the value of rax as well.
Why your code is not working with al?
copy:
mov al, byte [rdi + rcx] ; you move "byte [rdi + rcx]" in "al"
; thus changing the value of "rax" as well.
; the result of the following statement becomes unexpected
mov byte [rax + rcx], al
But when you do it with dl, rax is not affected and hence your program works as expected.
Try the following code and run it through debugger to see the effect of changing the value of al on `rax:
section .text
global main
main:
nop
mov rax, 1000
mov al, byte 10
nop
Result:
After mov rax, 1000, value of rax: 1000
00000000 00000000 00000000 00000000 00000000 00000000 00000011 11101000
After mov al, 10, value of rax: 778
00000000 00000000 00000000 00000000 00000000 00000000 00000011 00001010
^^^^^^^^
So, changing al will affect rax.

Related

When I use global variables it crashes my kernel

I have had this happen before and worked around it for a while but now it slowly becomes more and more unavoidable, because now I need them.
For some weird reason, my kernel crashes when I try to use a global variable in my code.
This works:
int global;
void kmain()
{
//do some stuff...
}
This does not work:
int global;
void kmain()
{
global = 1;
//do some stuff...
}
I have no idea why this is happening.
As some additional resources here is my linker script:
OUTPUT_FORMAT(binary)
phys = 0x0500;
SECTIONS
{
.text phys : AT(phys) {
code = .;
*(.text)
*(.rodata)
. = ALIGN(4096);
}
.data : AT(phys + (data - code))
{
data = .;
*(.data)
. = ALIGN(4096);
}
.bss : AT(phys + (bss - code)) {
bss = .;
*(.bss)
. = ALIGN(4096);
}
end = .;
/DISCARD/
: {
*(.comment)
*(.eh_frame)
*(.note.gnu.build-id)
}
}
and my makefile:
bin/UmbrellaOS.img: bin/boot.bin bin/kernel.bin bin/zeros.bin
cat $^ > $#
bin/kernel.bin: tmp/kernel_entry.o tmp/kernel.o
x86_64-elf-ld -o $# -T link.ld $^
tmp/kernel.o: src/kernel/main.c
x86_64-elf-gcc -ffreestanding -m64 -g -c $^ -o $#
Edit:
To be more specific I use QEMU to test my OS upon starting QEMU it instantly closes. It should also be noted that if I try something like this:
int global;
void kmain()
{
return;
global = 0;
}
it works for some reason.
I can see a green L printed to the screen which is the last thing my bootloader does before passing control to the kernel after long mode has been entered.
btw here is my bootloader:
[bits 16]
[org 0x7C00]
KERNEL_LOC equ 0x0500
_start:
mov [_BootDisk], dl
xor ax, ax
mov ds, ax
mov es, ax
mov ss, ax
mov bp, 0x7BFF
mov sp, bp
push 0x7E00 ; buffer
push 1 ; sectors to read
push 2 ; sector num
call DiskRead
jc .error
push ebx
pushfd
pop eax
mov ebx, eax
xor eax, 0x200000
push eax
popfd
pushfd
pop eax
cmp eax, ebx
jnz .supported
push _CpuErrorString
call Print
jmp .error
.supported:
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .no64
mov eax, 0x80000001
cpuid
test edx, 1 << 29
jnz .is64
.no64:
push _64ErrorString
call Print
jmp .error
.is64:
push 0x8000
call MapMem
push KERNEL_LOC ; buffer
push 8 ; sectors to read
push 3 ; sector num
call DiskRead
jc .error
cli
lgdt [GDT_descriptor]
mov eax, cr0
or eax, 1
mov cr0, eax
jmp CODE_SEG:protected_mode
.error:
jmp $
Print:
push bp
mov bp, sp
mov bx, [bp+4]
mov ah, 0x0E
.loop:
mov al, [bx]
cmp al, 0
je .end
int 0x10
inc bx
jmp .loop
.end:
mov sp, bp
pop bp
ret 2
DiskRead:
push bp
mov bp, sp
mov ah, 0x02
mov al, [bp+6]
mov ch, 0
mov cl, [bp+4]
mov dh, 0
mov dl, [_BootDisk]
mov bx, [bp+8]
int 0x13
cmp al, [bp+6]
je .end
jnc .end
push _DiskErrorString
call Print
.end:
mov sp, bp
pop bp
ret 6
MapMem:
push bp
mov bp, sp
mov si, [bp+4]
mov di, [bp+4]
add di, 4
xor ebx, ebx
mov edx, 0x0534D4150
mov eax, 0xE820
mov [di+20], dword 1
mov ecx, 24
int 0x15
jc .failed
mov edx, 0x0534D4150
cmp eax, edx
jne .failed
test ebx, ebx
je .failed
.loop:
mov eax, 0xE820
mov [di+20], dword 1
mov ecx, 24
int 0x15
jc .finish
mov edx, 0x0534D4150
.jmpin:
jcxz .skip
cmp cl, 20
jbe .notext
test byte [di+20], 1
je .skip
.notext:
mov ecx, [di+8]
or ecx, [di+12]
jz .skip
inc dword [si]
add di, 24
.skip:
test ebx, ebx
jne .loop
.finish:
clc
jmp .end
.failed:
push _MemErrorString
call Print
stc
jmp .end
.end:
mov sp, bp
pop bp
ret 2
_BootDisk: db 0
_DiskErrorString: db "Disk read error!", 13, 10, 0
_MemErrorString: db "Memory mapping failed!", 13, 10, 0
_CpuErrorString: db "CPUID not supported!", 13, 10, 0
_64ErrorString: db "x64 bits not supported!", 13, 10, 0
CODE_SEG equ GDT_code - GDT_start
DATA_SEG equ GDT_data - GDT_start
GDT_start:
GDT_null:
dd 0x0
dd 0x0
GDT_code:
dw 0xffff
dw 0x0
db 0x0
db 0b10011010
db 0b11001111
db 0x0
GDT_data:
dw 0xffff
dw 0x0
db 0x0
db 0b10010010
db 0b11001111
db 0x0
GDT_end:
GDT_descriptor:
dw GDT_end - GDT_start - 1
dd GDT_start
times 510-($-$$) db 0
dw 0xAA55
[bits 32]
protected_mode:
mov ax, DATA_SEG
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ebp, 0x90000
mov esp, ebp
call Clear
mov ebx, VGA_MEM
mov byte [ebx], 'P'
inc ebx
mov byte [ebx], 14
mov eax, cr0
and eax, ~(1 << 31)
mov cr0, eax
mov edi, 0x1000
mov cr3, edi
xor eax, eax
mov ecx, 4096
rep stosd
mov edi, cr3
mov dword [edi], 0x2003
add edi, 0x1000
mov dword [edi], 0x3003
add edi, 0x1000
mov dword [edi], 0x4003
add edi, 0x1000
mov ebx, 0x00000003
mov ecx, 512
.set_entry:
mov dword [edi], ebx
add ebx, 0x1000
add edi, 8
loop .set_entry
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
wrmsr
mov eax, cr0
or eax, 1 << 31
mov cr0, eax
lgdt [GDT.Pointer]
jmp GDT.Code:long_mode
jmp $
Clear:
push ebp
mov ebp, esp
mov ecx, VGA_SIZE
mov eax, VGA_MEM
.loop:
mov byte [eax], 0
inc eax
loop .loop
mov esp, ebp
pop ebp
ret
PRESENT equ 1 << 7
NOT_SYS equ 1 << 4
EXEC equ 1 << 3
RW equ 1 << 1
ACCESSED equ 1 << 0
GRAN_4K equ 1 << 7
SZ_32 equ 1 << 6
LONG_MODE equ 1 << 5
GDT:
.Null: equ $ - GDT
dq 0
.Code: equ $ - GDT
dd 0xFFFF
db 0
db PRESENT | NOT_SYS | EXEC | RW
db GRAN_4K | LONG_MODE | 0xF
db 0
.Data: equ $ - GDT
dd 0xFFFF
db 0
db PRESENT | NOT_SYS | RW
db GRAN_4K | SZ_32 | 0xF
db 0
.TSS: equ $ - GDT
dd 0x00000068
dd 0x00CF8900
.Pointer:
dw $ - GDT - 1
dq GDT
[bits 64]
long_mode:
cli
mov ax, GDT.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
mov rbp, 0x0007FFFF
mov rsp, rbp
mov rbx, VGA_MEM
mov byte [rbx], 'L'
inc rbx
mov byte [rbx], 2
jmp KERNEL_LOC
VGA_MEM equ 0xB8000
VGA_WIDTH equ 80
VGA_HEIGHT equ 25
VGA_STRIDE equ 2
VGA_SIZE equ VGA_WIDTH * VGA_STRIDE * VGA_HEIGHT
VGA_LENGTH equ VGA_WIDTH * VGA_HEIGHT
times 1024-($-$$) db 0
And for anyone wanting to see the big picture here's the Github repository I made.
The problem was that I simply forgot that I put my page table structures at 0x1000 and accidentally overrode them when loading my kernel at 0x0500.
I ended up leaving the structures at 0x1000 and moved my kernel to 0x5000.
This was a rather simple problem but I would recommend that you still take a look at the comments because there's still a lot of useful information and things to consider.
First: ALWAYS Initialize variables especially global ones.
Second: The problem is surely from your bootloader, can you edit the post to show us how you load your kernel?
Try objdump to see if the variable is declared, use -monitor stdio with QEMU and check the value of CR2 Register, it may be a page fault due to the second problem.
Here is a solution to check if the variable really has a valid pointer:
You can remove these edits after everything is ok.
instead of :
jmp KERNEL_LOC
do:
call KERNEL_LOC ; RAX Has the pointer of the global variable
jmp $
In kmain just type:
return &global
Then run it on QEMU and type in the console info registers, RAX should contain the pointer of the variable named global.

Reversing an array and printing it in x86-64

I am trying to print an array, reverse it, and then print it again. I manage to print it once. I can also make 2 consecutive calls to _printy and it works. But the code breaks with the _reverse function. It does not segfault, it exits with code 24 (I looked online but this seems to mean that the maximum number of file descriptors has been exceeded, and I cannot get what this means in this context). I stepped with a debugger and the loop logic seems to make sense.
I am not passing the array in RDI, because _printy restores the content of that register when it exits. I also tried to load it directly into RDI before calling _reverse but that does not solve the problem.
I cannot figure out what the problem is. Any idea?
BITS 64
DEFAULT REL
; -------------------------------------
; -------------------------------------
; PRINT LIST
; -------------------------------------
; -------------------------------------
%define SYS_WRITE 0x02000004
%define SYS_EXIT 0x02000001
%define SYS_OPEN 0x02000005
%define SYS_CLOSE 0x02000006
%define SYS_READ 0x02000003
%define EXIT_SUCCESS 0
%define STDOUT 1
%define LF 10
%define INT_OFFSET 48
section .text
extern _printf
extern _puts
extern _exit
global _main
_main:
push rbp
lea rdi, [rel array]
call _printy
call _reverse
call _printy
pop rbp
call _exit
_reverse:
push rbp
lea rsi, [rdi + 4 * (length - 1) ]
.LOOP2:
cmp rdi, rsi
jge .DONE2
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
add rdi,4
sub rsi,4
jmp .LOOP2
.DONE2:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
_printy:
push rbp
xor rcx, rcx
mov r8, rdi
.loop:
cmp rcx, length
jge .done
push rcx
push r8
lea rdi, [rel msg]
mov rsi, [r8 + rcx * 4]
xor rax, rax
call _printf
pop r8
pop rcx
add rcx, 1
jmp .loop
.done:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
section .data
array: dd 78, 2, 3, 4, 5, 6
length: equ ($ - array) / 4
msg: db "%d => ", 0
Edit with some info from the debugger
Stepping into the _printy function gives the following msg, once reaching the call to _printf.
* thread #1, queue = 'com.apple.main-thread', stop reason = step over failed (Could not create return address breakpoint.)
frame #0: 0x0000000100003f8e a.out`printf
a.out`printf:
-> 0x100003f8e <+0>: jmp qword ptr [rip + 0x4074] ; (void *)0x00007ff80258ef0b: printf
0x100003f94: lea r11, [rip + 0x4075] ; _dyld_private
0x100003f9b: push r11
0x100003f9d: jmp qword ptr [rip + 0x5d] ; (void *)0x00007ff843eeb520: dyld_stub_binder
I am not an expert, but a quick research online led to the following
During the 'thread step-out' command, check that the memory we are about to place a breakpoint in is executable. Previously, if the current function had a nonstandard stack layout/ABI, and had a valid data pointer in the location where the return address is usually located, data corruption would occur when the breakpoint was written. This could lead to an incorrectly reported crash or silent corruption of the program's state. Now, if the above check fails, the command safely aborts.
So after all this might not be a problem (I am also able to track the execution of the printf call). But this is really the only understandable piece of information I am able to extract from the debugger. Deep in some quite obscure (to me) function calls I reach this
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff80256db7f libsystem_c.dylib`flockfile + 10
libsystem_c.dylib`flockfile:
-> 0x7ff80256db7f <+10>: call 0x7ff8025dd480 ; symbol stub for: __error
0x7ff80256db84 <+15>: mov r14d, dword ptr [rax]
0x7ff80256db87 <+18>: mov rdi, qword ptr [rbx + 0x68]
0x7ff80256db8b <+22>: add rdi, 0x8
Target 0: (a.out) stopped.
(lldb)
Process 61913 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff8025dd480 libsystem_c.dylib`__error
This is one of the function calls happening in _printf.
Ask further questions if there is something more I can do.
Your array consists of int32 numbers aka dd in nasm terminology, but your swap operates on 64 bit numbers:
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
Assuming you were not after some crazy optimizations where you swap a pair of elements simultaneously you want this to remain in 32 bits:
mov r8d, [rdi]
mov r9d, [rsi]
mov [rdi], r9d
mov [rsi], r8d

Comparing input to character not working in x86_64 Mac assembly nasm

In nasm assembly on mac with the processor architecture x86_64, I am struggling to compare input to a string or character. When comparing input (stdin) to a character, it's not being true when it should be. I am new to assembly.
Here is my code.
global start
section .bss
input resb 10
section .text
start:
;getting the input
mov rax, 0x2000003 ;meaning read
mov rdi, 0
mov rsi, input
mov rdx, 10
syscall ;special
;here is where I do the comparing
mov rax, r
mov rbx, input
cmp rax, rbx
je right
;jumping to the return function
jmp ret
right:
mov rax, 0x2000004 ;meaning write
mov rdi, 1
mov rsi, right_way
mov rdx, right_len
syscall ;special
ret:
mov rax, 0x2000001 ;return 0
xor rdi, rdi ;which means to make rdi = 0 could be replaced with mov rdi, 0 but xor is faster
syscall
section .data
right_way: db "You are correct!", 10, 0
right_len: equ $-right_way
r: db "r", 10
At the "je right" line, it should jump to the right function but it does not. Do I need to convert the input to something else?
Help would be appreciated. Thanks!

Declaring variables in Yasm

Here's is a simple program:
%include 'utils/system.inc'
section .data
first: db 'First is bigger', 0xA,0
second: db 'Second is bigger', 0xA,0
a: db 18
b: db 20
section .text
global start
start:
mov rax, [a wrt rip]
mov rbx, [b wrt rip]
cmp rax, rbx
jle else
mov rsi, qword first
mov rdx, 0x10
jmp end
else:
mov rsi, qword second
mov rdx, 0x11
end:
xor rax, rax
xor rbx, rbx
mov rax, 0x2000004
mov rdi, stdout
syscall
xor rdi, rdi
mov rax, 0x2000001
syscall
The problem is that variable a contains a different value than 18.
Here's what lldb shows me:
(lldb) p a
(void *) $0 = 0x0000000000001412
(lldb) p b
(void *) $1 = 0x0000000000000014
(lldb) p --format decimal a
Any ideas what's going on? I know that if I declare a as dq, it will be alright, but I want to understand why it's happening.

x64 Assembly Optimization

I am trying to optimize several assembly procedures for size, I am not concerned about the speed.
The optimizations I am familiar with are situations as follows:
;the following two lines
mov rbp, rsp
add rbp, 50h
;can be changed to
lea rbp, [rsp+50h]
What other optimizations I can use to reduce the number of bytes in the following procedure?
I am not asking anyone to fully optimize this procedure, just point out where I can improve.
;get procedure address
asmGetProc proc
push rcx ;pointer to function name
push rdx ;DllBase address (IMAGE_DOS_HEADER pointer)
push r8 ;pointer to IMAGE_EXPORT_DIRECTORY
push r9 ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]
push rbx ;saved pointer to function name
push r10 ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
mov rbx, rcx ;save the function name pointer to rax
mov r8d, [rdx+3ch] ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
add r8, rdx ;add DllBase to the e_lfanew offset
add r8, 88h ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
;r8 points to the IMAGE_DATA_DIRECTORY structure
mov r8d, [r8] ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
add r8, rdx ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)
mov r9d, [r8+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov r10d, [r8+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
add r10, rdx ;add DllBase to AddressOfNames (DWORD)
for_each_function:
;decrement function name counter
dec r9
;load current index of AddressOfNames into r11
lea rcx, [r10 + 4 * r9] ;AddressOfNames[i] - function string RVA (relative virtual address)
mov ecx, [rcx] ;r11d is the AddressOfName[r9] RVA (DWORD)
add rcx, rdx ;add DllBase to string RVA DWORD
call asmHsh ;hash the function name
cmp rax, rbx ;compare the function name hash with the passed hash
jnz for_each_function ;jump to top of loop is not a match
;r8 - export directory
;r9 - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
mov r10d, [r8+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
add r10, rdx ;add DllBase to AddressOfNameOrdinals DWORD
mov r9w, [r10+2*r9] ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov r10d, [r8+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
add r10, rdx ;add DllBase to AddressOfFunctions DWORD
mov eax, [r10+r9*4] ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
add rax, rdx ;add DllBase to function ordinal RVA DWORD
pop r10
pop rbx
pop r9
pop r8
pop rdx
pop rcx
ret ;return from procedure
asmGetProc endp
EDIT: Added asmHsh (my bad)
;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
push rdx
mov rax, 5381d
hl:
mov rdx, rax
shl rax, 5
add rax, rdx
xor al, [rcx]
inc rcx
;check for null termination
mov dl, [rcx]
cmp dl, 00h
jne short hl
pop rdx
pop rcx
ret
asmHsh endp
Optimizing assembly for space in 64-bit mode one should: (1) use DWORD width when that suffices (less prefixes); (2) stick to the old X86 registers eax-edx / esi / edi / ebp (tighter encoding).
Hopefully what's done below illustrates the idea. ML64 assembled the original routines to 135 bytes and the modified version to 103 bytes.
Examples of changes: (1) used rbp / rsi / rdi instead of r8 / r9 / r10; (2) shrunk instruction sequences that could be accomplished via multi-component address modes; (3) used DWORD dec where the data is known to be 32-bits; (4) used IMUL in place of shift/add.
" ;- " is in front of removed lines " ;## delta " is appended to added lines, where delta is the byte difference the new code produced. No attempt was made to adjust the comments.
;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
;-push rdx ;## -1
mov rax, 5381d
hl:
;- mov rdx, rax
;- shl rax, 5
;- add rax, rdx
imul rax,rax,33 ;## -6
xor al, [rcx]
inc rcx
;check for null termination
;-mov dl, [rcx]
;-cmp dl, 00h
cmp byte ptr [rcx], 00h ;## -2
jne short hl
;-pop rdx ;## -1
pop rcx
ret
asmHsh endp
;get procedure address
asmGetProc proc
push rcx ;pointer to function name
push rdx ;DllBase address (IMAGE_DOS_HEADER pointer)
;-push r8 ;pointer to IMAGE_EXPORT_DIRECTORY
push rbp ;## -1
;-push r9 ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
push rsi ;## -1
;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]
push rbx ;saved pointer to function name
;-push r10 ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
push rdi ;## -1
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
mov rbx, rcx ;save the function name pointer to rax
;-mov r8d, [rdx+3ch] ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
mov ebp, [rdx+3ch] ;## -1
;-add r8, rdx ;add DllBase to the e_lfanew offset
;-add r8, 88h ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
;- ;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
;- ;r8 points to the IMAGE_DATA_DIRECTORY structure
;-mov r8d, [r8] ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
mov ebp, [rbp+rdx+88h] ;## -5
;-add r8, rdx ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)
add rbp, rdx ;## 0
;-mov r9d, [r8+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov esi, [rbp+18h] ;## -1
;-mov r10d, [r8+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
mov edi, [rbp+20h] ;## -1
;-add r10, rdx ;add DllBase to AddressOfNames (DWORD)
add rdi, rdx ;## 0
for_each_function:
;decrement function name counter
;- dec r9
dec esi ;## -1
;load current index of AddressOfNames into r11
;- lea rcx, [r10 + 4 * r9] ;AddressOfNames[i] - function string RVA (relative virtual address)
;- mov ecx, [rcx] ;r11d is the AddressOfName[r9] RVA (DWORD)
mov ecx, [rdi + 4 * rsi] ;## -3
add rcx, rdx ;add DllBase to string RVA DWORD
call asmHsh ;hash the function name
cmp rax, rbx ;compare the function name hash with the passed hash
jnz for_each_function ;jump to top of loop is not a match
;r8 - export directory
;r9 - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
;-mov r10d, [r8+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
mov edi, [rbp+24h];## -1
;-add r10, rdx ;add DllBase to AddressOfNameOrdinals DWORD
add rdi, rdx; ## 0
;-mov r9w, [r10+2*r9] ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov si, [rdi+2*rsi] ;## -1
;-mov r10d, [r8+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
mov edi, [rbp+1ch] ;## -1
;-add r10, rdx ;add DllBase to AddressOfFunctions DWORD
add rdi, rdx ;## 0
;-mov eax, [r10+r9*4] ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
mov eax, [rdi+rsi*4] ; ## -1
add rax, rdx ;add DllBase to function ordinal RVA DWORD
;-pop r10
pop rdi ; ## -1
pop rbx
;-pop r9
pop rsi
;-pop r8
pop rbp ;## -1
pop rdx
pop rcx
ret ;return from procedure
asmGetProc endp

Resources