What does DebugView's "Enable Verbose Kernel Output" do? - windows

I've followed Microsoft's "Write a Hello World Windows Driver (KMDF)" and wasn't sure how to see the output on WinDbg. After trying to set the Debug Print Filter registry to 0xFFFFFFFF, rebooting and other rain dance solutions, the one thing that worked was enabling DebugView's "Enable Verbose Kernel Output" option. Now, WinDbg shows debug outputs. Its too verbose but at least it's there.
So what did DebugView modify for WinDbg to show more verbose debug output?
I'm running WinDbg attached to a VM from my Windows host with a bridged connection.

TL;DR: it calls a driver to repeatedly call NtSetDebugFilterState on all kernel components, so that they are all able to print something on the debug output.
Program
Let start with the program itself; there's only one occurrence of the sentence "Enable Verbose Kernel Output":
mov [rsp+78h+mi.wID], 9C7Ch
lea rax, aEnableVerboseK ; "Enable &Verbose Kernel Output"
sbb ecx, ecx
mov [rsp+78h+mi.dwTypeData], rax
and ecx, 8
mov [rsp+78h+mi.fState], ecx
mov rcx, cs:hMenu ; hMenu
call cs:GetSubMenu
mov rcx, rax ; hmenu
lea r9, [rsp+78h+mi] ; lpmi
lea edx, [rdi+3] ; item
lea r8d, [rdi+1] ; fByPosition
call cs:InsertMenuItemA
The above code insert the sub-menu into the main menu. What's important here is the the menu ID, namely 0x9C7C.
This menu ID is used only once more here:
movzx edx, al ; al can either be 0 or 1
xor edi, edi
mov qword ptr [rsp+830h+iNumButtons], rdi ; lpOverlapped
lea rax, [rsp+830h+BytesReturned]
mov [rsp+830h+lpButtons], rax ; lpBytesReturned
xor edx, 1
mov dword ptr [rsp+830h+wBMID], edi ; nOutBufferSize
xor r9d, r9d ; nInBufferSize
xor r8d, r8d ; lpInBuffer
mov [rsp+830h+dwInitParam], rdi ; lpOutBuffer
lea edx, ds:0FFFFFFFF8305003Ch[rdx*4] ; dwIoControlCode
call cs:DeviceIoControl
movzx eax, cs:byte_1400935A3
mov edx, 9C7Ch ; uIDCheckItem
mov rcx, cs:hMenu ; hMenu
mov cs:byte_1400A2776, al
neg al
sbb r8d, r8d
and r8d, 8 ; uCheck
call cs:CheckMenuItem
The above code calls DeviceIoControl and then checks the menu item. The former means the program is actually talking with a device driver.
If we remove a bit of code we can see which IOCTL can be sent to the driver:
movzx edx, al ; al can either be 0 or 1
; snip
xor edx, 1 ; invert AL
; snip
lea edx, ds:0FFFFFFFF8305003Ch[rdx*4] ; dwIoControlCode
call cs:DeviceIoControl
Since RDX can be either 0 or 1 we end up with (base 10):
[rdx*4-2096824260]
Thus:
4 - 2096824260 = -2096824256
0 - 2096824260 = -2096824260
Looking at the handles opened by dbgview64.exe we can see a \Device\dbgv is currently opened.
0: kd> !devobj \Device\dbgv
Device object (ffffd58a97007630) is for:
Dbgv \Driver\DBGV DriverObject ffffd58a8688aaa0
Current Irp 00000000 RefCount 0 Type 00008305 Flags 00000048
SecurityDescriptor ffffe58fb8bdeea0 DevExt 00000000 DevObjExt ffffd58a97007780
ExtensionFlags (0x00000800) DOE_DEFAULT_SD_PRESENT
Characteristics (0000000000)
Device queue is not busy.
0: kd> dt _driver_object ffffd58a8688aaa0
nt!_DRIVER_OBJECT
+0x000 Type : 0n4
+0x002 Size : 0n336
+0x008 DeviceObject : 0xffffd58a`97007630 _DEVICE_OBJECT
+0x010 Flags : 0x12
+0x018 DriverStart : 0xfffff800`dcf90000 Void
+0x020 DriverSize : 0x9000
+0x028 DriverSection : 0xffffd58a`a3ba9be0 Void
+0x030 DriverExtension : 0xffffd58a`8688abf0 _DRIVER_EXTENSION
+0x038 DriverName : _UNICODE_STRING "\Driver\DBGV"
+0x048 HardwareDatabase : 0xfffff800`8372e990 _UNICODE_STRING "\REGISTRY\MACHINE\HARDWARE\DESCRIPTION\SYSTEM"
+0x050 FastIoDispatch : (null)
+0x058 DriverInit : 0xfffff800`dcf97058 long +0
+0x060 DriverStartIo : (null)
+0x068 DriverUnload : (null)
+0x070 MajorFunction : [28] 0xfffff800`dcf91b80 long +0
0: kd> dt nt!_LDR_DATA_TABLE_ENTRY 0xffffd58a`a3ba9be0 Full*
+0x048 FullDllName : _UNICODE_STRING "\??\C:\WINDOWS\system32\Drivers\Dbgv.sys"
So the driver is currently loaded from C:\WINDOWS\system32\Drivers\Dbgv.sys (or you can extract it from the .rsrc section...).
Driver
Looking at the driver, in the driver entry we spot the function used for IRP_MJ_DEVICE_CONTROL:
lea rax, sub_180001B80
mov [rdi+0E0h], rax ; IRP_MJ_DEVICE_CONTROL
mov [rdi+80h], rax
mov [rdi+70h], rax
Inside that function we have the usual setup before calling the right IOCTL:
movzx eax, [rcx+_IO_STACK_LOCATION.MajorFunction]
mov r9d, [rcx+_IO_STACK_LOCATION.Parameters.DeviceIoControl.OutputBufferLength]
mov r10d, [rcx+_IO_STACK_LOCATION.Parameters.DeviceIoControl.IoControlCode]
test al, al ; IRP_MJ_CREATE
jz loc_180001C6C
cmp al, 2 ; IRP_MJ_CLOSE
jz short loc_180001C0C
cmp al, 0Eh ; IRP_MJ_DEVICE_CONTROL
jnz ##CompleteRequest
mov eax, r10d
and eax, 3
cmp al, METHOD_NEITHER
jnz short loc_180001BDF
mov rdx, [rdi+_IRP.UserBuffer]
loc_180001BDF:
mov [rsp+98h+do], r11 ; _DEVICE_OBJECT*
mov [rsp+98h+IoStatus], rbx ; IoStatus
mov [rsp+98h+ioctl], r10d ; IoCtl
mov [rsp+98h+OutputBufferLength], r9d ; OuputBufferLength
mov r9d, [rcx+_IO_STACK_LOCATION.Parameters.DeviceIoControl.InputBufferLength] ; int
mov rcx, [rcx+_IO_STACK_LOCATION.FileObject]
mov qword ptr [rsp+98h+Buffer], rdx ; Buffer
mov dl, 1 ; int
call sub_1800017E0
jmp ##CompleteRequest
Inside the call (sub_1800017E0) we have a big switch for the IOCTL, here's the case -2096824260 (case -2096824256 is slightly different):
loc_1800018B9:
call sub_180002470 ; jumptable 000000018000182F case -2096824260
jmp loc_180001AEB
This function is mostly comprised of two loops:
loc_1800024A0:
xor ebx, ebx
##LoopQuerySetDebugFilter:
mov edx, ebx
mov ecx, esi
call cs:qword_180005438 ; DbgQueryDebugFilterState
mov r8b, 1 ; State
mov edx, ebx ; Level (keeps incrementing up to 0x1E)
mov ecx, esi ; ComponentId (keeps incrementing up to 0x82)
mov [rdi], al ; save current state.
call cs:qword_180005440 ; DbgSetDebugFilterState
inc ebx
inc rdi
cmp ebx, 1Eh
jb short ##LoopQuerySetDebugFilter
inc esi
cmp esi, 82h ; '‚'
jb short loc_1800024A0
Both calls are on DbgQueryDebugFilterState and DbgSetDebugFilterState (reactos source)
which is just a minimal wrapper around NtSetDebugFilterState (reactos source).
As far as we can see the debug filter state is queried, saved, and then set for all kernel components (following is the component tables from the kernel, there are a lot of them):
.rdata:00000001400073E0 KdComponentTable dq offset Kd_SYSTEM_Mask
.rdata:00000001400073E0 ; DATA XREF: NtQueryDebugFilterState+36↓o
.rdata:00000001400073E0 ; NtSetDebugFilterState+43↓o ...
.rdata:00000001400073E8 dq offset Kd_SMSS_Mask
.rdata:00000001400073F0 dq offset Kd_SETUP_Mask
.rdata:00000001400073F8 dq offset Kd_NTFS_Mask
.rdata:0000000140007400 dq offset Kd_FSTUB_Mask
.rdata:0000000140007408 dq offset Kd_CRASHDUMP_Mask
.rdata:0000000140007410 dq offset Kd_CDAUDIO_Mask
.rdata:0000000140007418 dq offset Kd_CDROM_Mask
.rdata:0000000140007420 dq offset Kd_CLASSPNP_Mask
....
Which finally means that all kernel components are able to print something to the debug output.
Note that the other IOCTL just reset the components masks to what they were before checking the menu in the main program.

Related

When I use global variables it crashes my kernel

I have had this happen before and worked around it for a while but now it slowly becomes more and more unavoidable, because now I need them.
For some weird reason, my kernel crashes when I try to use a global variable in my code.
This works:
int global;
void kmain()
{
//do some stuff...
}
This does not work:
int global;
void kmain()
{
global = 1;
//do some stuff...
}
I have no idea why this is happening.
As some additional resources here is my linker script:
OUTPUT_FORMAT(binary)
phys = 0x0500;
SECTIONS
{
.text phys : AT(phys) {
code = .;
*(.text)
*(.rodata)
. = ALIGN(4096);
}
.data : AT(phys + (data - code))
{
data = .;
*(.data)
. = ALIGN(4096);
}
.bss : AT(phys + (bss - code)) {
bss = .;
*(.bss)
. = ALIGN(4096);
}
end = .;
/DISCARD/
: {
*(.comment)
*(.eh_frame)
*(.note.gnu.build-id)
}
}
and my makefile:
bin/UmbrellaOS.img: bin/boot.bin bin/kernel.bin bin/zeros.bin
cat $^ > $#
bin/kernel.bin: tmp/kernel_entry.o tmp/kernel.o
x86_64-elf-ld -o $# -T link.ld $^
tmp/kernel.o: src/kernel/main.c
x86_64-elf-gcc -ffreestanding -m64 -g -c $^ -o $#
Edit:
To be more specific I use QEMU to test my OS upon starting QEMU it instantly closes. It should also be noted that if I try something like this:
int global;
void kmain()
{
return;
global = 0;
}
it works for some reason.
I can see a green L printed to the screen which is the last thing my bootloader does before passing control to the kernel after long mode has been entered.
btw here is my bootloader:
[bits 16]
[org 0x7C00]
KERNEL_LOC equ 0x0500
_start:
mov [_BootDisk], dl
xor ax, ax
mov ds, ax
mov es, ax
mov ss, ax
mov bp, 0x7BFF
mov sp, bp
push 0x7E00 ; buffer
push 1 ; sectors to read
push 2 ; sector num
call DiskRead
jc .error
push ebx
pushfd
pop eax
mov ebx, eax
xor eax, 0x200000
push eax
popfd
pushfd
pop eax
cmp eax, ebx
jnz .supported
push _CpuErrorString
call Print
jmp .error
.supported:
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .no64
mov eax, 0x80000001
cpuid
test edx, 1 << 29
jnz .is64
.no64:
push _64ErrorString
call Print
jmp .error
.is64:
push 0x8000
call MapMem
push KERNEL_LOC ; buffer
push 8 ; sectors to read
push 3 ; sector num
call DiskRead
jc .error
cli
lgdt [GDT_descriptor]
mov eax, cr0
or eax, 1
mov cr0, eax
jmp CODE_SEG:protected_mode
.error:
jmp $
Print:
push bp
mov bp, sp
mov bx, [bp+4]
mov ah, 0x0E
.loop:
mov al, [bx]
cmp al, 0
je .end
int 0x10
inc bx
jmp .loop
.end:
mov sp, bp
pop bp
ret 2
DiskRead:
push bp
mov bp, sp
mov ah, 0x02
mov al, [bp+6]
mov ch, 0
mov cl, [bp+4]
mov dh, 0
mov dl, [_BootDisk]
mov bx, [bp+8]
int 0x13
cmp al, [bp+6]
je .end
jnc .end
push _DiskErrorString
call Print
.end:
mov sp, bp
pop bp
ret 6
MapMem:
push bp
mov bp, sp
mov si, [bp+4]
mov di, [bp+4]
add di, 4
xor ebx, ebx
mov edx, 0x0534D4150
mov eax, 0xE820
mov [di+20], dword 1
mov ecx, 24
int 0x15
jc .failed
mov edx, 0x0534D4150
cmp eax, edx
jne .failed
test ebx, ebx
je .failed
.loop:
mov eax, 0xE820
mov [di+20], dword 1
mov ecx, 24
int 0x15
jc .finish
mov edx, 0x0534D4150
.jmpin:
jcxz .skip
cmp cl, 20
jbe .notext
test byte [di+20], 1
je .skip
.notext:
mov ecx, [di+8]
or ecx, [di+12]
jz .skip
inc dword [si]
add di, 24
.skip:
test ebx, ebx
jne .loop
.finish:
clc
jmp .end
.failed:
push _MemErrorString
call Print
stc
jmp .end
.end:
mov sp, bp
pop bp
ret 2
_BootDisk: db 0
_DiskErrorString: db "Disk read error!", 13, 10, 0
_MemErrorString: db "Memory mapping failed!", 13, 10, 0
_CpuErrorString: db "CPUID not supported!", 13, 10, 0
_64ErrorString: db "x64 bits not supported!", 13, 10, 0
CODE_SEG equ GDT_code - GDT_start
DATA_SEG equ GDT_data - GDT_start
GDT_start:
GDT_null:
dd 0x0
dd 0x0
GDT_code:
dw 0xffff
dw 0x0
db 0x0
db 0b10011010
db 0b11001111
db 0x0
GDT_data:
dw 0xffff
dw 0x0
db 0x0
db 0b10010010
db 0b11001111
db 0x0
GDT_end:
GDT_descriptor:
dw GDT_end - GDT_start - 1
dd GDT_start
times 510-($-$$) db 0
dw 0xAA55
[bits 32]
protected_mode:
mov ax, DATA_SEG
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ebp, 0x90000
mov esp, ebp
call Clear
mov ebx, VGA_MEM
mov byte [ebx], 'P'
inc ebx
mov byte [ebx], 14
mov eax, cr0
and eax, ~(1 << 31)
mov cr0, eax
mov edi, 0x1000
mov cr3, edi
xor eax, eax
mov ecx, 4096
rep stosd
mov edi, cr3
mov dword [edi], 0x2003
add edi, 0x1000
mov dword [edi], 0x3003
add edi, 0x1000
mov dword [edi], 0x4003
add edi, 0x1000
mov ebx, 0x00000003
mov ecx, 512
.set_entry:
mov dword [edi], ebx
add ebx, 0x1000
add edi, 8
loop .set_entry
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
wrmsr
mov eax, cr0
or eax, 1 << 31
mov cr0, eax
lgdt [GDT.Pointer]
jmp GDT.Code:long_mode
jmp $
Clear:
push ebp
mov ebp, esp
mov ecx, VGA_SIZE
mov eax, VGA_MEM
.loop:
mov byte [eax], 0
inc eax
loop .loop
mov esp, ebp
pop ebp
ret
PRESENT equ 1 << 7
NOT_SYS equ 1 << 4
EXEC equ 1 << 3
RW equ 1 << 1
ACCESSED equ 1 << 0
GRAN_4K equ 1 << 7
SZ_32 equ 1 << 6
LONG_MODE equ 1 << 5
GDT:
.Null: equ $ - GDT
dq 0
.Code: equ $ - GDT
dd 0xFFFF
db 0
db PRESENT | NOT_SYS | EXEC | RW
db GRAN_4K | LONG_MODE | 0xF
db 0
.Data: equ $ - GDT
dd 0xFFFF
db 0
db PRESENT | NOT_SYS | RW
db GRAN_4K | SZ_32 | 0xF
db 0
.TSS: equ $ - GDT
dd 0x00000068
dd 0x00CF8900
.Pointer:
dw $ - GDT - 1
dq GDT
[bits 64]
long_mode:
cli
mov ax, GDT.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
mov rbp, 0x0007FFFF
mov rsp, rbp
mov rbx, VGA_MEM
mov byte [rbx], 'L'
inc rbx
mov byte [rbx], 2
jmp KERNEL_LOC
VGA_MEM equ 0xB8000
VGA_WIDTH equ 80
VGA_HEIGHT equ 25
VGA_STRIDE equ 2
VGA_SIZE equ VGA_WIDTH * VGA_STRIDE * VGA_HEIGHT
VGA_LENGTH equ VGA_WIDTH * VGA_HEIGHT
times 1024-($-$$) db 0
And for anyone wanting to see the big picture here's the Github repository I made.
The problem was that I simply forgot that I put my page table structures at 0x1000 and accidentally overrode them when loading my kernel at 0x0500.
I ended up leaving the structures at 0x1000 and moved my kernel to 0x5000.
This was a rather simple problem but I would recommend that you still take a look at the comments because there's still a lot of useful information and things to consider.
First: ALWAYS Initialize variables especially global ones.
Second: The problem is surely from your bootloader, can you edit the post to show us how you load your kernel?
Try objdump to see if the variable is declared, use -monitor stdio with QEMU and check the value of CR2 Register, it may be a page fault due to the second problem.
Here is a solution to check if the variable really has a valid pointer:
You can remove these edits after everything is ok.
instead of :
jmp KERNEL_LOC
do:
call KERNEL_LOC ; RAX Has the pointer of the global variable
jmp $
In kmain just type:
return &global
Then run it on QEMU and type in the console info registers, RAX should contain the pointer of the variable named global.

when writing 64bit reverse shell in assembly got stuck at createrprocessA api

hello i am writing windows 64bit reverse shell in assembly and after gett connected to the targetmachine ip, i want to create process to spwan a shell, fistly i try to write startinfo struct for createprocess api, but after then i pass all the parameters to the function but it doesn't work, and here is full code https://pastebin.com/6Ft2jCMX
;STARTUPINFOA+PROCESS_INFORMATION
;----------------------------------
push byte 0x12 ; We want to place (18 * 4) = 72 null bytes onto the stack
pop rcx ; Set ECX for the loop
xor r11,r11
push_loop:
push r11 ; push a null dword
loop push_loop ; keep looping untill we have pushed enough nulls
lea r12,[rsp]
mov dl,104
xor rcx,rcx
mov [r12],dword edx
mov [r12+4],rcx
mov [r12+12],rcx
mov [r12+20],rcx
mov [r12+24],rcx
xor rdx,rdx
mov dl,255
inc rdx
mov [r12+0x3c],edx
mov [r12+0x50],r14 ; HANDLE hStdInput;
mov [r12+0x58],r14 ; HANDLE hStdOutput;
mov [r12+0x60],r14 ;HANDLE hStdError;
;createprocessA_calling
sub rsp, 0x70
push 'cmdA'
mov [rsp+3],byte dl
lea rdx,[rsp]
inc rcx
mov [rsp+32],rcx
xor rcx,rcx
xor r8,r8
mov [rsp+40],r8
mov [rsp+48],r8
mov [rsp+56],r8
lea r9,[r12]
mov [rsp+64],r9
lea r9,[r12+104]
mov [rsp+72],r9
xor r9,r9
call rbx ;createprocessA
so at last when i call the createprocessA it got stuck

InitializeCriticalSection fails in NASM

UPDATE: based on comments below, I revised the code below to add a struc and a pointer (new or revised code has "THIS IS NEW" or "THIS IS UPDATED" beside the code). Now the program does not crash, so the pointer is initialized, but the programs hangs at EnterCriticalSection. I suspect that in translating the sample MASM code below into NASM syntax, I did not declare the struc correctly. Any ideas? Thanks very much.
ORIGINAL QUESTION:
Below is a simple test program in 64-bit NASM, to test a critical section in
Windows. This is a dll and the entry point is Main_Entry_fn, which calls Init_Cores_fn, where we initialize four threads (cores) to call Test_fn.
I suspect that the problem is the pointer to the critical section. None of the online resources specifies what that pointer is. The doc "Using Critical Section Objects" at https://learn.microsoft.com/en-us/windows/desktop/sync/using-critical-section-objects shows a C++ example where the pointer appears to be relevant only to EnterCriticalSection and LeaveCriticalSection, but it's not a pointer to an independent object.
For those not familiar with NASM, the first parameter in a C++ signature goes into rcx and the second parameter goes into rds, but otherwise it should function the same as in C or C++. It's the same thing as InitializeCriticalSectionAndSpinCount(&CriticalSection,0x00000400) in C++.
Here's the entire program:
; Header Section
[BITS 64]
[default rel]
extern malloc, calloc, realloc, free
global Main_Entry_fn
export Main_Entry_fn
extern CreateThread, CloseHandle, ExitThread
extern WaitForMultipleObjects, GetCurrentThreadId
extern InitializeCriticalSectionAndSpinCount, EnterCriticalSection
extern LeaveCriticalSection, DeleteCriticalSection, InitializeCriticalSection
struc CRITICAL_SECTION ; THIS IS NEW
.cs_quad: resq 5
endstruc
section .data align=16
const_1000000000: dq 1000000000
ThreadID: dq 0
TestInfo: times 20 dq 0
ThreadInfo: times 3 dq 0
ThreadInfo2: times 3 dq 0
ThreadInfo3: times 3 dq 0
ThreadInfo4: times 3 dq 0
ThreadHandles: times 4 dq 0
Division_Size: dq 0
Start_Byte: dq 0
End_Byte: dq 0
Return_Data_Array: times 4 dq 0
Core_Number: dq 0
const_inf: dq 0xFFFFFFFF
SpinCount: dq 0x00000400
CriticalSection: ; THIS IS NEW
istruc CRITICAL_SECTION
iend
section .text
; ______________________________________
Init_Cores_fn:
; Calculate the data divisions
mov rax,[const_1000000000]
mov rbx,4 ;cores
xor rdx,rdx
div rbx
mov [End_Byte],rax
mov [Division_Size],rax
mov rax,0
mov [Start_Byte],rax
; Populate the ThreadInfo arrays to pass for each core
; ThreadInfo: (1) startbyte; (2) endbyte; (3) Core_Number
mov rdi,ThreadInfo
mov rax,[Start_Byte]
mov [rdi],rax
mov rax,[End_Byte]
mov [rdi+8],rax
mov rax,[Core_Number]
mov [rdi+16],rax
call DupThreadInfo ; Create ThreadInfo arrays for cores 2-4
mov rbp,rsp ; preserve caller's stack frame
sub rsp,56 ; Shadow space (was 32)
; _____
; Create four threads
label_0:
mov rax,[Core_Number]
cmp rax,0
jne sb2
mov rdi,ThreadInfo
jmp sb5
sb2:cmp rax,8
jne sb3
mov rdi,ThreadInfo2
jmp sb5
sb3:cmp rax,16
jne sb4
mov rdi,ThreadInfo3
jmp sb5
sb4:cmp rax,24
jne sb5
mov rdi,ThreadInfo4
sb5:
; _____
; Create Threads
mov rcx,0 ; lpThreadAttributes (Security Attributes)
mov rdx,0 ; dwStackSize
mov r8,Test_fn ; lpStartAddress (function pointer)
mov r9,rdi ; lpParameter (array of data passed to each core)
mov rax,0
mov [rsp+32],rax ; use default creation flags
mov rdi,ThreadID
mov [rsp+40],rdi ; ThreadID
call CreateThread
; Move the handle into ThreadHandles array (returned in rax)
mov rdi,ThreadHandles
mov rcx,[Core_Number]
mov [rdi+rcx],rax
mov rdi,TestInfo
mov [rdi+rcx],rax
mov rax,[Core_Number]
add rax,8
mov [Core_Number],rax
mov rbx,32 ; Four cores
cmp rax,rbx
jl label_0
mov rcx,CriticalSection ; THIS IS REVISED
mov rdx,[SpinCount]
call InitializeCriticalSectionAndSpinCount
; _____
; Wait
mov rcx,4 ;rax ; number of handles
mov rdx,ThreadHandles ; pointer to handles array
mov r8,1 ; wait for all threads to complete
mov r9,[const_inf] ;4294967295 ;0xFFFFFFFF
call WaitForMultipleObjects
; _____
mov rsp,rbp ; can we push rbp so we can use it internally?
jmp label_900
; ______________________________________
Test_fn:
mov rdi,rcx
mov r14,[rdi] ; Start_Byte
mov r15,[rdi+8] ; End_Byte
mov r13,[rdi+16] ; Core_Number
;______
; while(n < 1000000000)
label_401:
cmp r14,r15
jge label_899
mov rcx,CriticalSection
call EnterCriticalSection
; n += 1
add r14,1
mov rcx,CriticalSection
call LeaveCriticalSection
jmp label_401
;______
label_899:
mov rdi,Return_Data_Array
mov [rdi+r13],r14
mov rbp,ThreadHandles
mov rax,[rbp+r13]
call ExitThread
ret
; __________
label_900:
mov rcx,CriticalSection
call DeleteCriticalSection
mov rdi,Return_Data_Array
mov rax,rdi
ret
; __________
; Main Entry
Main_Entry_fn:
push rdi
push rbp
call Init_Cores_fn
pop rbp
pop rdi
ret
DupThreadInfo:
mov rdi,ThreadInfo2
mov rax,8
mov [rdi+16],rax ; Core Number
mov rax,[Start_Byte]
add rax,[Division_Size]
mov [rdi],rax
mov rax,[End_Byte]
add rax,[Division_Size]
mov [rdi+8],rax
mov [Start_Byte],rax
mov rdi,ThreadInfo3
mov rax,16
mov [rdi+16],rax ; Core Number
mov rax,[Start_Byte]
mov [rdi],rax
add rax,[Division_Size]
mov [rdi+8],rax
mov [Start_Byte],rax
mov rdi,ThreadInfo4
mov rax,24
mov [rdi+16],rax ; Core Number
mov rax,[Start_Byte]
mov [rdi],rax
add rax,[Division_Size]
mov [rdi+8],rax
mov [Start_Byte],rax
ret
The code above shows the functions in three separate places, but of course we test them one at a time (but they all fail).
To summarize, my question is why do InitializeCriticalSection and InitializeCriticalSectionAndSpinCount both fail in the code above? The inputs are dead simple, so I don't understand why it should not work.
InitializeCriticalSection take pointer to critical section object
The process is responsible for allocating the memory used by a
critical section object, which it can do by declaring a variable of
type CRITICAL_SECTION.
so code can be something like (i use masm syntax)
CRITICAL_SECTION STRUCT
DQ 5 DUP(?)
CRITICAL_SECTION ends
extern __imp_InitializeCriticalSection:QWORD
extern __imp_InitializeCriticalSectionAndSpinCount:QWORD
.DATA?
CriticalSection CRITICAL_SECTION {}
.CODE
lea rcx,CriticalSection
;mov edx,400h
;call __imp_InitializeCriticalSectionAndSpinCount
call __imp_InitializeCriticalSection
also you need declare all imported functions as
extern __imp_funcname:QWORD
instead
extern funcname

assembly code to scroll the screen one line down clearing the first line in the screen and then scrolls one line up if a key is pressed

I'm trying to scroll 1 line down then up but
a) I don't know how to test this code
b) I'm not sure which interrupt to use for "when a key is pressed"
I'd be much grateful for your help
Here's my code :
Data_segment_name segment para
firstline db 160 dup(0)
Data_segment_name ends
Stack_segment_name segment para stack
Stack_segment_name ends
Code_segment_name segment
Main_prog proc far
assume SS:Stack_segment_name,CS:Code_segment_name,DS:Data_segment_name
mov AX,Data_segment_name ; load the starting address of the data
mov DS,AX ; segment into DS reg.
;code scroll down (clear first line) then scroll back up(restore cleared line)
mov es,ax ;save first line
lea di,firstline
mov ax,0b800h
mov ds,ax
mov ax,0
mov si,ax
cld
mov cx,80
rep movsw ;save ends
;now let's scroll down :)
mov ax,0b800h
mov es,ax
mov ax,0
mov di,ax
mov ax,160
mov si,ax
cld
mov cx,24*80
rep movsw
;now let's scroll up :)
int 21h ;check
mov ax,160*24
mov si,ax
mov ax,160*25
mov di,ax
std
mov cx,24*80
rep movsw
;restore first line
mov AX,Data_segment_name ; load the starting address of the data
mov DS,AX ; segment into DS reg.
lea si,firstline
mov ax,0
mov di,ax
cld
mov cx,80
rep movsw
mov ax,4c00h ; exit program
int 21h
Main_prog endp
Code_segment_name ends
end Main_prog
Ad a):
The test tool is called "debugger". I recommend Turbo Debugger (google for it).
Ad b):
Ralf Brown's interrupt list and TechHelp are good references. At a glance: Int 10h is for video, Int 16h is for keyboard, Int 21h is for MS-DOS.
You should switch to the simplified segment directives .CODE, .DATA, .STACK and to procedural programming PROC, ENDP. When your project grows, it will help to keep track of it.
Example:
.MODEL small
.STACK 1000h
.DATA
firstline db 160 dup(0)
.CODE
save_firstline PROC
push ds
mov ax, ds
mov es, ax
lea di, firstline
mov ax, 0b800h
mov ds, ax
mov ax, 0
mov si, ax
mov cx, 80
rep movsw
pop ds
ret
save_firstline ENDP
restore_firstline PROC
lea si, firstline
mov ax, 0b800h
mov es, ax
mov ax, 0
mov di, ax
mov cx, 80
rep movsw
ret
restore_firstline ENDP
scroll_up PROC
call save_firstline
mov ah, 6 ; http://www.ctyme.com/intr/rb-0096.htm
mov al, 1 ; number of lines to scroll
mov bh, 0 ; attribute
mov ch, 0 ; row top
mov cl, 0 ; col left
mov dh, 25 ; row bottom
mov dl, 80 ; col right
int 10h
ret
scroll_up ENDP
scroll_down PROC
mov ah, 7 ; http://www.ctyme.com/intr/rb-0097.htm
mov al, 1 ; number of lines to scroll
mov bh, 0 ; attribute
mov ch, 0 ; row top
mov cl, 0 ; col left
mov dh, 25 ; row bottom
mov dl, 80 ; col right
int 10h
call restore_firstline
ret
scroll_down ENDP
main PROC
mov ax, #data
mov ds, ax
waitForKey: ; http://webpages.charter.net/danrollins/techhelp/0229.HTM
mov ah, 1
int 16h
jnz gotKey ; jmp if key is ready
jmp waitForKey ; loop back and check for a key
gotKey:
mov ah, 0 ; key is ready, get it
int 16h ; now process the key
cmp ah, 48h ; <UP>
jne #F
call scroll_up
jmp waitforKey
##:
cmp ah, 50h ; <DOWN>
jne #F
call scroll_down
jmp waitForKey
##:
cmp al, 1Bh ; <ESC>
jne waitForKey
mov ax, 4C00h
int 21h
main ENDP
END main

Help with Assembly. Segmentation fault when compiling samples on Mac OS X

I'm trying to learn assembly using Dr Paul Carter's pcasm book: http://www.drpaulcarter.com/pcasm/
The author doesn't packaged Mac OS X samples, then I've started using from linux sources. Here is the first sample, that uses his library asm_io.
I'm getting Segmentation Fault when running it. Why? What need to be changed to run in mac?
I think if you know asm, maybe you can tell me what's happening.
Here's the sources.
asm_io.asm:
;
; file: asm_io.asm
; Assembly I/O routines
; To assemble for DJGPP
; nasm -f coff -d COFF_TYPE asm_io.asm
; To assemble for Borland C++ 5.x
; nasm -f obj -d OBJ_TYPE asm_io.asm
; To assemble for Microsoft Visual Studio
; nasm -f win32 -d COFF_TYPE asm_io.asm
; To assemble for Linux
; nasm -f elf -d ELF_TYPE asm_io.asm
; To assemble for Watcom
; nasm -f obj -d OBJ_TYPE -d WATCOM asm_io.asm
; IMPORTANT NOTES FOR WATCOM
; The Watcom compiler's C library does not use the
; standard C calling convention. For example, the
; putchar() function gets its argument from the
; the value of EAX, not the stack.
%define NL 10
%define CF_MASK 00000001h
%define PF_MASK 00000004h
%define AF_MASK 00000010h
%define ZF_MASK 00000040h
%define SF_MASK 00000080h
%define DF_MASK 00000400h
%define OF_MASK 00000800h
;
; Linux C doesn't put underscores on labels
;
%ifdef ELF_TYPE
%define _scanf scanf
%define _printf printf
%define _getchar getchar
%define _putchar putchar
%endif
;
; Watcom puts underscores at end of label
;
%ifdef WATCOM
%define _scanf scanf_
%define _printf printf_
%define _getchar getchar_
%define _putchar putchar_
%endif
%ifdef OBJ_TYPE
segment .data public align=4 class=data use32
%else
segment .data
%endif
int_format db "%i", 0
string_format db "%s", 0
reg_format db "Register Dump # %d", NL
db "EAX = %.8X EBX = %.8X ECX = %.8X EDX = %.8X", NL
db "ESI = %.8X EDI = %.8X EBP = %.8X ESP = %.8X", NL
db "EIP = %.8X FLAGS = %.4X %s %s %s %s %s %s %s", NL
db 0
carry_flag db "CF", 0
zero_flag db "ZF", 0
sign_flag db "SF", 0
parity_flag db "PF", 0
overflow_flag db "OF", 0
dir_flag db "DF", 0
aux_carry_flag db "AF", 0
unset_flag db " ", 0
mem_format1 db "Memory Dump # %d Address = %.8X", NL, 0
mem_format2 db "%.8X ", 0
mem_format3 db "%.2X ", 0
stack_format db "Stack Dump # %d", NL
db "EBP = %.8X ESP = %.8X", NL, 0
stack_line_format db "%+4d %.8X %.8X", NL, 0
math_format1 db "Math Coprocessor Dump # %d Control Word = %.4X"
db " Status Word = %.4X", NL, 0
valid_st_format db "ST%d: %.10g", NL, 0
invalid_st_format db "ST%d: Invalid ST", NL, 0
empty_st_format db "ST%d: Empty", NL, 0
;
; code is put in the _TEXT segment
;
%ifdef OBJ_TYPE
segment text public align=1 class=code use32
%else
segment .text
%endif
global read_int, print_int, print_string, read_char
global print_char, print_nl, sub_dump_regs, sub_dump_mem
global sub_dump_math, sub_dump_stack
extern _scanf, _printf, _getchar, _putchar
read_int:
enter 4,0
pusha
pushf
lea eax, [ebp-4]
push eax
push dword int_format
call _scanf
pop ecx
pop ecx
popf
popa
mov eax, [ebp-4]
leave
ret
print_int:
enter 0,0
pusha
pushf
push eax
push dword int_format
call _printf
pop ecx
pop ecx
popf
popa
leave
ret
print_string:
enter 0,0
pusha
pushf
push eax
push dword string_format
call _printf
pop ecx
pop ecx
popf
popa
leave
ret
read_char:
enter 4,0
pusha
pushf
call _getchar
mov [ebp-4], eax
popf
popa
mov eax, [ebp-4]
leave
ret
print_char:
enter 0,0
pusha
pushf
%ifndef WATCOM
push eax
%endif
call _putchar
%ifndef WATCOM
pop ecx
%endif
popf
popa
leave
ret
print_nl:
enter 0,0
pusha
pushf
%ifdef WATCOM
mov eax, 10 ; WATCOM doesn't use the stack here
%else
push dword 10 ; 10 == ASCII code for \n
%endif
call _putchar
%ifndef WATCOM
pop ecx
%endif
popf
popa
leave
ret
sub_dump_regs:
enter 4,0
pusha
pushf
mov eax, [esp] ; read FLAGS back off stack
mov [ebp-4], eax ; save flags
;
; show which FLAGS are set
;
test eax, CF_MASK
jz cf_off
mov eax, carry_flag
jmp short push_cf
cf_off:
mov eax, unset_flag
push_cf:
push eax
test dword [ebp-4], PF_MASK
jz pf_off
mov eax, parity_flag
jmp short push_pf
pf_off:
mov eax, unset_flag
push_pf:
push eax
test dword [ebp-4], AF_MASK
jz af_off
mov eax, aux_carry_flag
jmp short push_af
af_off:
mov eax, unset_flag
push_af:
push eax
test dword [ebp-4], ZF_MASK
jz zf_off
mov eax, zero_flag
jmp short push_zf
zf_off:
mov eax, unset_flag
push_zf:
push eax
test dword [ebp-4], SF_MASK
jz sf_off
mov eax, sign_flag
jmp short push_sf
sf_off:
mov eax, unset_flag
push_sf:
push eax
test dword [ebp-4], DF_MASK
jz df_off
mov eax, dir_flag
jmp short push_df
df_off:
mov eax, unset_flag
push_df:
push eax
test dword [ebp-4], OF_MASK
jz of_off
mov eax, overflow_flag
jmp short push_of
of_off:
mov eax, unset_flag
push_of:
push eax
push dword [ebp-4] ; FLAGS
mov eax, [ebp+4]
sub eax, 10 ; EIP on stack is 10 bytes ahead of orig
push eax ; EIP
lea eax, [ebp+12]
push eax ; original ESP
push dword [ebp] ; original EBP
push edi
push esi
push edx
push ecx
push ebx
push dword [ebp-8] ; original EAX
push dword [ebp+8] ; # of dump
push dword reg_format
call _printf
add esp, 76
popf
popa
leave
ret 4
sub_dump_stack:
enter 0,0
pusha
pushf
lea eax, [ebp+20]
push eax ; original ESP
push dword [ebp] ; original EBP
push dword [ebp+8] ; # of dump
push dword stack_format
call _printf
add esp, 16
mov ebx, [ebp] ; ebx = original ebp
mov eax, [ebp+16] ; eax = # dwords above ebp
shl eax, 2 ; eax *= 4
add ebx, eax ; ebx = & highest dword in stack to display
mov edx, [ebp+16]
mov ecx, edx
add ecx, [ebp+12]
inc ecx ; ecx = # of dwords to display
stack_line_loop:
push edx
push ecx ; save ecx & edx
push dword [ebx] ; value on stack
push ebx ; address of value on stack
mov eax, edx
sal eax, 2 ; eax = 4*edx
push eax ; offset from ebp
push dword stack_line_format
call _printf
add esp, 16
pop ecx
pop edx
sub ebx, 4
dec edx
loop stack_line_loop
popf
popa
leave
ret 12
sub_dump_mem:
enter 0,0
pusha
pushf
push dword [ebp+12]
push dword [ebp+16]
push dword mem_format1
call _printf
add esp, 12
mov esi, [ebp+12] ; address
and esi, 0FFFFFFF0h ; move to start of paragraph
mov ecx, [ebp+8]
inc ecx
mem_outer_loop:
push ecx
push esi
push dword mem_format2
call _printf
add esp, 8
xor ebx, ebx
mem_hex_loop:
xor eax, eax
mov al, [esi + ebx]
push eax
push dword mem_format3
call _printf
add esp, 8
inc ebx
cmp ebx, 16
jl mem_hex_loop
mov eax, '"'
call print_char
xor ebx, ebx
mem_char_loop:
xor eax, eax
mov al, [esi+ebx]
cmp al, 32
jl non_printable
cmp al, 126
jg non_printable
jmp short mem_char_loop_continue
non_printable:
mov eax, '?'
mem_char_loop_continue:
call print_char
inc ebx
cmp ebx, 16
jl mem_char_loop
mov eax, '"'
call print_char
call print_nl
add esi, 16
pop ecx
loop mem_outer_loop
popf
popa
leave
ret 12
; function sub_dump_math
; prints out state of math coprocessor without modifying the coprocessor
; or regular processor state
; Parameters:
; dump number - dword at [ebp+8]
; Local variables:
; ebp-108 start of fsave buffer
; ebp-116 temp double
; Notes: This procedure uses the Pascal convention.
; fsave buffer structure:
; ebp-108 control word
; ebp-104 status word
; ebp-100 tag word
; ebp-80 ST0
; ebp-70 ST1
; ebp-60 ST2 ...
; ebp-10 ST7
;
sub_dump_math:
enter 116,0
pusha
pushf
fsave [ebp-108] ; save coprocessor state to memory
mov eax, [ebp-104] ; status word
and eax, 0FFFFh
push eax
mov eax, [ebp-108] ; control word
and eax, 0FFFFh
push eax
push dword [ebp+8]
push dword math_format1
call _printf
add esp, 16
;
; rotate tag word so that tags in same order as numbers are
; in the stack
;
mov cx, [ebp-104] ; ax = status word
shr cx, 11
and cx, 7 ; cl = physical state of number on stack top
mov bx, [ebp-100] ; bx = tag word
shl cl,1 ; cl *= 2
ror bx, cl ; move top of stack tag to lowest bits
mov edi, 0 ; edi = stack number of number
lea esi, [ebp-80] ; esi = address of ST0
mov ecx, 8 ; ecx = loop counter
tag_loop:
push ecx
mov ax, 3
and ax, bx ; ax = current tag
or ax, ax ; 00 -> valid number
je valid_st
cmp ax, 1 ; 01 -> zero
je zero_st
cmp ax, 2 ; 10 -> invalid number
je invalid_st
push edi ; 11 -> empty
push dword empty_st_format
call _printf
add esp, 8
jmp short cont_tag_loop
zero_st:
fldz
jmp short print_real
valid_st:
fld tword [esi]
print_real:
fstp qword [ebp-116]
push dword [ebp-112]
push dword [ebp-116]
push edi
push dword valid_st_format
call _printf
add esp, 16
jmp short cont_tag_loop
invalid_st:
push edi
push dword invalid_st_format
call _printf
add esp, 8
cont_tag_loop:
ror bx, 2 ; mov next tag into lowest bits
inc edi
add esi, 10 ; mov to next number on stack
pop ecx
loop tag_loop
frstor [ebp-108] ; restore coprocessor state
popf
popa
leave
ret 4
asm_io.inc:
extern read_int, print_int, print_string
extern read_char, print_char, print_nl
extern sub_dump_regs, sub_dump_mem, sub_dump_math, sub_dump_stack
%macro dump_regs 1
push dword %1
call sub_dump_regs
%endmacro
;
; usage: dump_mem label, start-address, # paragraphs
%macro dump_mem 3
push dword %1
push dword %2
push dword %3
call sub_dump_mem
%endmacro
%macro dump_math 1
push dword %1
call sub_dump_math
%endmacro
%macro dump_stack 3
push dword %3
push dword %2
push dword %1
call sub_dump_stack
%endmacro
first.asm
;
; file: first.asm
; First assembly program. This program asks for two integers as
; input and prints out their sum.
;
; To create executable:
; Using djgpp:
; nasm -f coff first.asm
; gcc -o first first.o driver.c asm_io.o
;
; Using Linux and gcc:
; nasm -f elf first.asm
; gcc -o first first.o driver.c asm_io.o
;
; Using Borland C/C++
; nasm -f obj first.asm
; bcc32 first.obj driver.c asm_io.obj
;
; Using MS C/C++
; nasm -f win32 first.asm
; cl first.obj driver.c asm_io.obj
;
; Using Open Watcom
; nasm -f obj first.asm
; wcl386 first.obj driver.c asm_io.obj
%include "asm_io.inc"
;
; initialized data is put in the .data segment
;
segment .data
;
; These labels refer to strings used for output
;
prompt1 db "Enter a number: ", 0 ; don't forget nul terminator
prompt2 db "Enter another number: ", 0
outmsg1 db "You entered ", 0
outmsg2 db " and ", 0
outmsg3 db ", the sum of these is ", 0
;
; uninitialized data is put in the .bss segment
;
segment .bss
;
; These labels refer to double words used to store the inputs
;
input1 resd 1
input2 resd 1
;
; code is put in the .text segment
;
segment .text
global _asm_main
_asm_main:
enter 0,0 ; setup routine
pusha
mov eax, prompt1 ; print out prompt
call print_string
call read_int ; read integer
mov [input1], eax ; store into input1
mov eax, prompt2 ; print out prompt
call print_string
call read_int ; read integer
mov [input2], eax ; store into input2
mov eax, [input1] ; eax = dword at input1
add eax, [input2] ; eax += dword at input2
mov ebx, eax ; ebx = eax
dump_regs 1 ; dump out register values
dump_mem 2, outmsg1, 1 ; dump out memory
;
; next print out result message as series of steps
;
mov eax, outmsg1
call print_string ; print out first message
mov eax, [input1]
call print_int ; print out input1
mov eax, outmsg2
call print_string ; print out second message
mov eax, [input2]
call print_int ; print out input2
mov eax, outmsg3
call print_string ; print out third message
mov eax, ebx
call print_int ; print out sum (ebx)
call print_nl ; print new-line
popa
mov eax, 0 ; return back to C
leave
ret
drive.c:
#include "cdecl.h"
int PRE_CDECL asm_main( void ) POST_CDECL;
int main()
{
int ret_status;
ret_status = asm_main();
return ret_status;
}
Now I compile it using:
nasm -f macho first.asm
nasm -f macho asm_io.asm
gcc first.o asm_io.o driver.c -o first -arch i386
Then run:
./first
Segmentation fault
It happens only when I'm using asm_io lib.
Thank you,
Daniel Koch
You seem to be using 32-bit assembly code here. One big difference among 32-bit Mac OS X and 32-bit Windows or Linux is that Mac requires the stack to be 16-byte aligned whenever you CALL a function. In other words, at the point in your code where you have a CALL instruction, it is required that ESP = #######0h.
The following may be interesting reads:
http://blogs.embarcadero.com/eboling/2009/05/20/5607
www.agner.org/optimize/calling_conventions.pdf

Resources