Calling FormatMessageA from a 64 bit assembler program - winapi

I've been working for days on an assembler subroutine to call Windows API function FormatMessageA, and I think I must have some systematic misunderstanding. My routine is shown below. I have traced it in debug mode, and I know that at the time the function is called: rcx has hex 1B00, which is the flag values I specified (dwFlags); rdx has hex 33, which is the bogus initial handle value that I plugged in to provoke the error (lpSource); r8 has 6, which is the error message number that was returned by GetLastError, which equates to ERROR_INVALID_HANDLE (dwMessageId); r9 has 0, for default language (dwLanguageId); rsp+32 has the address of msgptrp, which is my area to receive the address of the error message as allocated by the called function (lpBuffer); rsp+40 has hex 28, or decimal 40, which is the minimum number of characters I arbitrarily specified for the error message (nSize); and rsp+48 has the address of argmntsp, which according to the documentation should be ignored with the flags I specified (*Arguments).
If there's a problem, and obviously there is since rax is returned as zero, I suspect that it has to do with lpBuffer, which the documentation says has data type LPTSTR, whatever that is. Sometimes I want to shout, "Okay, that's what it is in terms of C++, but what is it really? I hope someone can easily spot where I ran off the rails, because I'm at my wits' end with this, and it's something I need to get working in order to do effective error checking for my future endeavors.
goterr PROC
;
.data
; flag values used:
; hex 00000100 FORMAT_MESSAGE_ALLOCATE_BUFFER
; hex 00000200 FORMAT_MESSAGE_IGNORE_INSERTS
; hex 00000800 FORMAT_MESSAGE_FROM_HMODULE
; hex 00001000 FORMAT_MESSAGE_FROM_SYSTEM
flagsp dd 00001B00h ; those flags combined
saveinitp dq ?
bmaskp dq 0fffffffffffffff0h
savshadp dq ?
msgptrp dq ?
handlep dd ?
argmntsp dd ?
;
.code
mov saveinitp, rsp ; save initial contents of stack pointer in this proc
sub rsp, 56 ; shadow space (max of 7 parameters * 8)
and rsp, bmaskp ; make sure it's 16 byte aligned
mov savshadp, rsp ; save address of aligned shadow area for this proc
;
mov handlep, ecx ; save passed I/O handle value locally
;
call GetLastError ; get the specific error
;
mov rsp, savshadp ; shadow area for this proc
mov ecx, flagsp ; flags for Windows error routine
mov edx, handlep ; handle passed from caller
mov r8d, eax ; msg id from GetLastError in low order dword
mov r9, 0 ; default language id
lea rax, msgptrp ; pointer to receive address of msg buffer
mov [rsp+32], rax ; put it on the stack
mov rax, 40 ; set lower doubleword to minimum size for msg buffer
mov [rsp+40], rax ; put it on the stack
lea rax, argmntsp ; variable arguments parameter (not used)
mov [rsp+48], rax ; put it on the stack
call FormatMessageA ; if rax eq 0 the called failed, otherwise rax is no chars.
mov rsp, saveinitp ; restore initial SP for this proc
ret
goterr ENDP

Related

Segmentation fault when adding 2 digits - nasm MacOS x86_64

I am trying to write a program that accepts 2 digits as user input, and then outputs their sum. I keep getting segmentation error when trying to run program(I am able to input 2 digits, but then the program crashes). I already check answers to similar questions and many of them pointed out to clear the registers, which I did, but I am still getting a segmentation fault.
section .text
global _main ;must be declared for linker (ld)
default rel
_main: ;tells linker entry point
call _readData
call _readData1
call _addData
call _displayData
mov RAX, 0x02000001 ;system call number (sys_exit)
syscall
_addData:
mov byte [sum], 0 ; init sum with 0
lea EAX, [buffer] ; load value from buffer to register
lea EBX, [buffer1] ; load value from buffer1 to register
sub byte [EAX], '0' ; transfrom to digit
sub byte [EBX], '0' ; transform to digit
add [sum], EAX ; increment value of sum by value from register
add [sum], EBX ; increment value of sum by value from 2nd register
add byte [sum], '0' ; convert to ASCI
xor EAX, EAX ; clear registers
xor EBX, EBX ; clear registers
ret
_readData:
mov RAX, 0x02000003
mov RDI, 2
mov RSI, buffer
mov RDX, SIZE
syscall
ret
_readData1:
mov RAX, 0x02000003
mov RDI, 2
mov RSI, buffer1
mov RDX, SIZE
syscall
ret
_displayData:
mov RAX, 0x02000004
mov RDI, 1
mov RSI, sum
mov RDX, SIZE
syscall
ret
section .bss
SIZE equ 4
buffer: resb SIZE
buffer1: resb SIZE
sum: resb SIZE
I see that, unlike other languages I learned, it is quite difficult to find a good source /tutorial about programming assembly using nasm on x86_64 architecture. Is there any kind of walkthrough for beginners(so I do not need to ask on SO everytime I am stuck :D)

How to print signed integer in x86 assembly (NASM) on Mac

I found an implementation of unsigned integer conversion in x86 assembly, and I tried plugging it in but being new to assembly and not having a debugging env there yet, it's difficult to understand why it's not working. I would also like it to work with signed integers so it can capture error messages from syscalls.
Wondering if one could show how to fix this code to get the signed integer to print, without using printf but using strprn provided by this answer.
%define a rdi
%define b rsi
%define c rdx
%define d r10
%define e r8
%define f r9
%define i rax
%define EXIT 0x2000001
%define EXIT_STATUS 0
%define READ 0x2000003 ; read
%define WRITE 0x2000004 ; write
%define OPEN 0x2000005 ; open(path, oflag)
%define CLOSE 0x2000006 ; CLOSE
%define MMAP 0x2000197 ; mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t offset)
; szstr computes the lenght of a string.
; rdi - string address
; rdx - contains string length (returned)
strsz:
xor rcx, rcx ; zero rcx
not rcx ; set rcx = -1 (uses bitwise id: ~x = -x-1)
xor al,al ; zero the al register (initialize to NUL)
cld ; clear the direction flag
repnz scasb ; get the string length (dec rcx through NUL)
not rcx ; rev all bits of negative -> absolute value
dec rcx ; -1 to skip the null-term, rcx contains length
mov rdx, rcx ; size returned in rdx, ready to call write
ret
; strprn writes a string to the file descriptor.
; rdi - string address
; rdx - contains string length
strprn:
push rdi ; push string address onto stack
call strsz ; call strsz to get length
pop rsi ; pop string to rsi (source index)
mov rax, WRITE ; put write/stdout number in rax (both 1)
mov rdi, 1 ; set destination index to rax (stdout)
syscall ; call kernel
ret
; mov ebx, 0xCCCCCCCD
itoa:
xor rdi, rdi
call itoal
ret
; itoa loop
itoal:
mov ecx, eax ; save original number
mul ebx ; divide by 10 using agner fog's 'magic number'
shr edx, 3 ;
mov eax, edx ; store quotient for next loop
lea edx, [edx*4 + edx] ; multiply by 10
shl rdi, 8 ; make room for byte
lea edx, [edx*2 - '0'] ; finish *10 and convert to ascii
sub ecx, edx ; subtract from original number to get remainder
lea rdi, [rdi + rcx] ; store next byte
test eax, eax
jnz itoal
exit:
mov a, EXIT_STATUS ; exit status
mov i, EXIT ; exit
syscall
_main:
mov rdi, msg
call strprn
mov ebx, -0xCCCCCCCD
call itoa
call strprn
jmp exit
section .text
msg: db 0xa, " Hello StackOverflow!!!", 0xa, 0xa, 0
With this working it will be possible to properly print signed integers to STDOUT, so you can log the registers values.
https://codereview.stackexchange.com/questions/142842/integer-to-ascii-algorithm-x86-assembly
How to print a string to the terminal in x86-64 assembly (NASM) without syscall?
How do I print an integer in Assembly Level Programming without printf from the c library?
https://baptiste-wicht.com/posts/2011/11/print-strings-integers-intel-assembly.html
How to get length of long strings in x86 assembly to print on assertion
My answer on How do I print an integer in Assembly Level Programming without printf from the c library? which you already linked shows that serializing an integer into memory as ASCII decimal gives you a length, so you have no use for (a custom version of) strlen here.
(Your msg has an assemble-time constant length, so it's silly not to use that.)
To print a signed integer, implement this logic:
if (x < 0) {
print('-'); // or just was_negative = 1
x = -x;
}
unsigned_intprint(x);
Unsigned covers the abs(most_negative_integer) case, e.g. in 8-bit - (-128) overflows to -128 signed. But if you treat the result of that conditional neg as unsigned, it's correct with no overflow for all inputs.
Instead of actually printing a - by itself, just save the fact that the starting number was negative and stick the - in front of the other digits after generating the last one. For bases that aren't powers of 2, the normal algorithm can only generate digits in reverse order of printing,
My x86-64 print integer with syscall answer treats the input as unsigned, so you should simply use that with some sign-handling code around it. It was written for Linux, but replacing the write system call number will make it work on Mac. They have the same calling convention and ABI.
And BTW, xor al,al is strictly worse than xor eax,eax unless you specifically want to preserve the upper 7 bytes of RAX. Only xor-zeroing of full registers is handled efficiently as a zeroing idiom.
Also, repnz scasb is not fast; about 1 compare per clock for large strings.
For strings up to 16 bytes, you can use a single XMM vector with pcmpeqb / pmovmskb / bsf to find the first zero byte, with no loop. (SSE2 is baseline for x86-64).

Multicore in NASM Windows: lpParameter data are wrong on entry

I have code in NASM (64 bit) in Windows to run four simultaneous threads (each assigned to a separate core) on a four-core Windows x86-64 machine.
The lpParameter is passed in r9 (the data variables for each thread to pass to the function). I am passing a pointer to an 8-element internal array (ThreadInfo) which contains variables to put into registers on entry to the function (variables are stored in registers for optimization purposes). All four threads call the same function.
The problem is that on entry to the function, the data passed in the ThreadInfo array are all large (e.g. 128-bit) negative numbers. The array contains 64-bit integers. The array is read-only for each of the threads, so no thread can alter the memory. I don't understand why the values should be wrong. I tested them in the thread creation code and they are correct, but they return incorrect values when read from the called function.
Here is the test code showing the relevant parts. Showing a reproducible example may not be possible due to the register values, but if this question can't be answered without that, then I can see if it's feasible to post a completely reproducible program.
Init_Cores_fn:
; EACH OF THE CORES CALLS Test_Function AND EXECUTES THE WHOLE PROGRAM.
; WE PASS THE STARTING BYTE (0, 8, 16, 24) AND THE "STRIDE" = NUMBER OF CORES.
; ON RETURN, WE SYNCHRONIZE ANY DATA. ON ENTRY TO EACH CORE, SET THE REGISTERS
; Populate the ThreadInfo array with vars to pass
; ThreadInfo: length, startbyte, stride, vars into registers on entry to each core
mov rdi,ThreadInfo
mov rax,ThreadInfoLength
mov [rdi],rax
mov rax,[stride]
mov [rdi+16],rax ; 8 x number of cores (32 in this example)
; Register Vars
mov [rdi+24],r15
mov [rdi+32],r14
mov [rdi+40],r13
mov [rdi+48],r12
mov [rdi+56],r10
mov rbp,rsp ; preserve caller's stack frame
sub rsp,56 ; Shadow space
; _____
label_0:
mov rdi,ThreadInfo
mov rax,[FirstByte]
mov [rdi+8],rax ; 0, 8, 16, or 24
; _____
; Create Threads
mov rcx,0 ; lpThreadAttributes (Security Attributes)
mov rdx,0 ; dwStackSize
mov r8,Test_Function ; lpStartAddress (function pointer)
mov r9,ThreadInfo ; lpParameter (array of data passed to each core)
mov rax,0
mov [rsp+40],rax ; use default creation flags
mov rax,[ThreadCount]
mov [rsp+32],rax ; ThreadID
call CreateThread
; Move the handle into ThreadHandles array (returned in rax)
mov rdi,ThreadHandles
mov rcx,[FirstByte]
mov [rdi+rcx],rax
mov rax,[FirstByte]
add rax,8
mov [FirstByte],rax
mov rax,[ThreadCount]
add rax,1
mov [ThreadCount],rax
mov rbx,4
cmp rax,rbx
jl label_0
; _____
; Wait
mov rcx,rax ; number of handles
mov rdx,ThreadHandles ; pointer to handles array
mov r8,1 ; wait for all threads to complete
mov r9,1000 ; milliseconds to wait
call WaitForMultipleObjects
; _____
;[ Code HERE to do cleanup if needed after the four threads finish ]
mov rsp,rbp
jmp label_900
; __________________
; The function for all threads to call
Test_Function:
; Populate registers
mov rdi,r9
mov rax,[rdi]
mov r15,[rdi+24]
mov rax,[rdi+8] ; start byte
mov r13,[rdi+40]
mov r12,[rdi+48]
mov r10,[rdi+56]
xor r11,r11
xor r9,r9
pxor xmm15,xmm15
pxor xmm15,xmm14
pxor xmm15,xmm13
The pointer is passed in r9, but on entry to Test_Function, the values are all large (e.g., 128-bit) negative values. Why?
Thanks for any help on this.

Open file, delete zeros, sort it - NASM

I am currently working on some problems and this is the one I am having trouble with. To make it all clear, I am a beginner, so any help is more than welcome.
Problem:
Sort the content of a binary file in descending order. The name of the file is passed as a command line argument. File content is interpreted as four-byte positive integers, where value 0, when found, is not written into the file. The result must be written in the same file that has been read.
The way I understand is that I have to have a binary file. Open it. Get its content. Find all characters while keeping in mind those are positive, four-byte integers, find zeros, get rid of zeros, sort the rest of the numbers.
We are allowed to use glibc, so this was my attempt:
section .data
warning db 'File does not exist!', 10, 0
argument db 'Enter your argument.', 10, 0
mode dd 'r+'
opened db 'File is open. Time to read.', 10, 0
section .bss
content resd 10
counter resb 1
section .text
extern printf, fopen, fgets, fputc
global main
main:
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
;location of argument's address
push rsi
cmp rdi, 2
je .openfile
mov rdi, argument
mov rax, 0
call printf
jmp .end
.openfile:
pop rbx
;First real argument of command line
mov rdi, [rbx + 8]
mov rsi, mode
mov rax, 0
call fopen
cmp al, 0
je .end
push rax
mov rdi, opened
mov rax, 0
call printf
.readfromfile:
mov rdi, content
mov rsi, 12 ;I wrote 10 numbers in my file
pop rdx
mov rax, 0
call fgets
cmp al, 0
je .end
push rax
mov rsi, tekst
pop rdi
.loop:
lodsd
inc byte[counter]
cmp eax, '0'
jne .loop
;this is the part where I am not sure what to do.
;I am trying to delete the zero with backspace, then use space and
;backspace again - I saw it here somewhere as a solution
mov esi, 0x08
call fputc
mov esi, 0x20
call fputc
mov esi, 0x08
call fputc
cmp eax, 0
je .end
jmp .loop
.end:
pop rdi
pop rsi
pop rbx
mov rsp, rbp
pop rbp
ret
So, my idea was to open the file, find zero, delete it by using backspace and space, then backspace again; Continue until I get to the end of the file, then sort it. As it can be seen I did not attempt to sort the content because I cannot get program to do the first part for me. I have been trying this for couple of days now and everything is getting foggy.
If someone can help me out, I would be very grateful. If there is something similar to this problem, feel free to link it to me. Anything that could help, I am ready to read and learn.
I am also unsure about how much information do I have to give. If something is unclear, please point it out to me.
Thank you
For my own selfish fun, an example of memory area being "collapsed" when dword zero value is detected:
to build in linux with NASM for target ELF64 executable:
nasm -f elf64 so_64b_collapseZeroDword.asm -l so_64b_collapseZeroDword.lst -w+all
ld -b elf64-x86-64 -o so_64b_collapseZeroDword so_64b_collapseZeroDword.o
And for debugger I'm using edb (built from sources) (the executable doesn't do anything observable by user, when it works correctly, it's supposed to be run in debugger single-stepping over instructions and having memory view over the .data segment to see how the values are moved around in memory).
source file so_64b_collapseZeroDword.asm
segment .text
collapseZeroDwords:
; input (custom calling convention, suitable only for calls from assembly):
; rsi - address of first element
; rdx - address beyond last element ("vector::end()" pointer)
; return: rdi - new "beyond last element" address
; modifies: rax, rsi, rdi
; the memory after new end() is not cleared (the zeroes are just thrown away)!
; search for first zero (up till that point the memory content will remain same)
cmp rsi, rdx
jae .noZeroFound ; if the (rsi >= end()), no zero was in the memory
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jne collapseZeroDwords
; first zero found, from here on, the non-zero values will be copied to earlier area
lea rdi, [rsi-4] ; address where the non-zero values should be written
.moveNonZeroValues:
cmp rsi, rdx
jae .wholeArrayCollapsed ; if (rsi >= end()), whole array is collapsed
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jz .moveNonZeroValues ; zero detected, skip the "store" value part
stosd ; [rdi] = eax, rdi += 4 (pointing beyond last element)
jmp .moveNonZeroValues
.noZeroFound:
mov rdi, rdx ; just return the original "end()" pointer
.wholeArrayCollapsed: ; or just return when rdi is already set as new end()
ret
global _start
_start: ; run some hardcoded simple tests, verify in debugger
lea rsi, [test1]
lea rdx, [test1+4*4]
call collapseZeroDwords
cmp rdi, test1+4*4 ; no zero collapsed
lea rsi, [test2]
lea rdx, [test2+4*4]
call collapseZeroDwords
cmp rdi, test2+3*4 ; one zero
lea rsi, [test3]
lea rdx, [test3+4*4]
call collapseZeroDwords
cmp rdi, test3+3*4 ; one zero
lea rsi, [test4]
lea rdx, [test4+4*4]
call collapseZeroDwords
cmp rdi, test4+2*4 ; two zeros
lea rsi, [test5]
lea rdx, [test5+4*4]
call collapseZeroDwords
cmp rdi, test5+2*4 ; two zeros
lea rsi, [test6]
lea rdx, [test6+4*4]
call collapseZeroDwords
cmp rdi, test6+0*4 ; four zeros
; exit back to linux
mov eax, 60
xor edi, edi
syscall
segment .data
; all test arrays are 4 elements long for simplicity
dd 0xCCCCCCCC ; debug canary value to detect any over-read or over-write
test1 dd 71, 72, 73, 74, 0xCCCCCCCC
test2 dd 71, 72, 73, 0, 0xCCCCCCCC
test3 dd 0, 71, 72, 73, 0xCCCCCCCC
test4 dd 0, 71, 0, 72, 0xCCCCCCCC
test5 dd 71, 0, 72, 0, 0xCCCCCCCC
test6 dd 0, 0, 0, 0, 0xCCCCCCCC
I tried to comment it extensively to show what/why/how it is doing, but feel free to ask about any particular part. The code was written with simplicity on mind, so it doesn't use any aggressive performance optimizations (like vectorized search for first zero value, etc).

NASM 64-bit OS X Inputted String Overwriting Bytes of Existing Value

I am trying to write a simple assembly program to add two numbers together. I want the user to be able to enter the values. The problem I am encountering is that when I display a string message and then read a value in, the next time the string is required the first x characters of the string have been overwritten by the data that was entered by the user.
My assumption is that this is related to the use of LEA to load the string into the register. I have been doing this because Macho64 complains if a regular MOV instruction is used in this situation (something to do with addressing space in 64-bits on the Mac).
My code is as follows:
section .data ;this is where constants go
input_message db 'Please enter your next number: '
length equ $-input_message
section .text ;declaring our .text segment
global _main ;telling where program execution should start
_main: ;this is where code starts getting executed
mov r8, 0
_loop_values:
call _get_value
call _write
inc r8 ;increment the loop counter
cmp r8, 2 ;compare loop counter to zero
jne _loop_values
call _exit
_get_value:
lea rcx, [rel input_message] ;move the input message into rcx for function call
mov rdx, length ;load the length of the message for function call
call _write
call _read
ret
_read:
mov rdx, 255 ;set buffer size for input
mov rdi, 0 ;stdout
mov rax, SYSCALL_READ
syscall
mov rdx, rax ;move the length from rax to rdx
dec rdx ;remove new line character from input length
mov rcx, rsi ;move the value input from rsi to rcx
ret
_write:
mov rsi, rcx ;load the output message
;mov rdx, rax
mov rax, SYSCALL_WRITE
syscall
ret
_exit:
mov rax, SYSCALL_EXIT
mov rdi, 0
syscall
The program loops twice as it should. The first time I get the following prompt:
Please enter your next number:
I would the enter something like 5 (followed by the return key)
The next prompt would be:
5
ease enter your next number:
Any assistance would be much appreciated.
I think all 64-bit code on Mac is required to be rip relative.
Absolute addresses are not supported. in this type of addressing you address your symbol relative to rip.
NASM documentation says:
default abs
mov eax,[foo] ; 32−bit absolute disp, sign−extended
mov eax,[a32 foo] ; 32−bit absolute disp, zero−extended
mov eax,[qword foo] ; 64−bit absolute disp
default rel
mov eax,[foo] ; 32−bit relative disp
mov eax,[a32 foo] ; d:o, address truncated to 32 bits(!)
mov eax,[qword foo] ; error
mov eax,[abs qword foo] ; 64−bit absolute disp
and you can also see this question.

Resources