I am using Linux 2.6.26 kernel version and I am trying to change the interrupt descriptor table using a kernel module. I am only trying to change the page fault table entry here. So I make a copy of the original IDT and make changes to the page fault table entry only. The objective of the ISR is to print out information of the page fault before calling the original Page fault handler. But the kernel just crashes once I load it with insmod i.e it specifically crashed with the "loadIDTR" function. With further debugging, I found out that by not changing any entry if I load the IDTR it works fine. I am out of ideas.
I have pasted the code below
#include <linux/module.h> // for init_module()
#include <linux/init.h>
#include <linux/mm.h> // for get_free_page()
#include <linux/sched.h>
#include <linux/spinlock.h>
#define SUCCESS 0
#define PGFAULT_INT 0x0E
static char modname[] = "pgfaults";
static unsigned short oldidtr[3], newidtr[3];
static unsigned long long *oldidt, *newidt;
static unsigned long isr_orig, kpage;
static char *why[]={ "sra", "srp", "swa", "swp", "ura", "urp", "uwa", "uwp" };
unsigned long long gate_desc_orig,gate_desc_orig1;
static void my_intrept( unsigned long *tos )
{
// stack-layout:
// es,ds,edi,esi,ebp,esp,ebx,edx,ecx,eax,err,eip,cs,efl
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
volatile unsigned long vaddr;
struct task_struct *task = current;
unsigned long err = tos[ 10 ];
unsigned long eip = tos[ 11 ];
static int count = 0;
int exe, len = 0;
char msg[80]="";
// get the faulting virtual address from register CR2
asm(" mov %%cr2, %%eax ; movl %%eax, %0 " : "=m" (vaddr) );
// construct the diagnostic message
len += sprintf( msg+len, "#%-6d ", ++count );
len += sprintf( msg+len, "%16s ", task->comm );
len += sprintf( msg+len, "pid=%-5d ", task->pid );
len += sprintf( msg+len, "CR2=%08X ", (unsigned int) vaddr );
len += sprintf( msg+len, "EIP=%08X ", (unsigned int) eip );
len += sprintf( msg+len, "%s ", why[ err ] );
// note if an instruction-fetch caused the page-fault
if ( vaddr == eip ) exe = 'x'; else exe = ' ';
len += sprintf( msg+len, "%c ", exe );
// print this diagnostic message to the kernel log
printk( "<1> %s \n", msg );
}
//---------- NEW PAGE-FAULT EXCEPTION-HANDLER ---------//
asmlinkage void isr0x0E( void );
asm(" .text ");
asm(" .type isr0x0E, #function ");
asm("isr0x0E: ");
asm(" pushal ");
asm(" pushl %ds ");
asm(" pushl %es ");
//
asm(" movl %ss, %eax ");
asm(" movl %eax, %ds ");
asm(" movl %eax, %es ");
//
asm(" pushl %esp ");
asm(" call my_intrept ");
asm(" addl $4, %esp ");
//
asm(" popl %es ");
asm(" popl %ds ");
asm(" popal ");
asm(" jmp *isr_orig ");
//-------------------------------------------------------//
static void load_IDTR( void *regimage )
{
asm(" lidt %0 " : : "m" (*(unsigned short*)regimage) );
}
int pgfault_init( void )
{
int i;
unsigned long long gate_desc,gate_desc1,gate_desc2;
spinlock_t lock =SPIN_LOCK_UNLOCKED;
unsigned long flags;
unsigned short selector1;
// allocate a mapped kernel page for our new IDT
kpage =__get_free_page( GFP_KERNEL);
if ( !kpage ) return -ENOMEM;
// initialize our other global variables
asm(" sidt oldidtr ; sidt newidtr ");
memcpy( newidtr+1, &kpage, sizeof( kpage ) );
oldidt = (unsigned long long *)(*(unsigned long*)(oldidtr+1));
newidt = (unsigned long long *)(*(unsigned long*)(newidtr+1));
// extract and save entry-point to original page-pault handler
gate_desc_orig = oldidt[ PGFAULT_INT ];
gate_desc =gate_desc_orig & 0xFFFF00000000FFFF;
gate_desc |= ( gate_desc >> 32 );
isr_orig = (unsigned long)gate_desc;
// initialize our new Interrupt Descriptor Table
memcpy( newidt, oldidt, 256*sizeof( unsigned long long ) );
gate_desc_orig1 = (unsigned long)isr0x0E;
gate_desc = gate_desc_orig1 & 0x00000000FFFFFFFF;
gate_desc = gate_desc | ( gate_desc << 32 );
gate_desc1= 0xFFFF0000;
gate_desc1= gate_desc1 << 32;
gate_desc1= gate_desc1 | 0x0000FFFF;
gate_desc = gate_desc & gate_desc1;
gate_desc2= 0x0000EF00;
gate_desc2= gate_desc2 <<32;
gate_desc2= gate_desc2 | 0x00100000;
gate_desc = gate_desc | gate_desc2; // trap-gate
//Part which is most likely creating a fault when loading the idtr
newidt[ PGFAULT_INT ] = gate_desc;
//**********************************************
// activate the new IDT
spin_lock_irqsave(&lock,flags);
load_IDTR( newidtr );
spin_unlock_irqrestore(&lock,flags);
// smp_call_function( load_IDTR, oldidtr, 1, 1 );
return SUCCESS;
}
void pgfault_exit( void )
{
// reactivate the old IDT
unsigned long flags;
spinlock_t lock =SPIN_LOCK_UNLOCKED;
spin_lock_irqsave(&lock,flags);
load_IDTR( oldidtr );
spin_unlock_irqrestore(&lock,flags);
// smp_call_function( load_IDTR, oldidtr, 1, 1 );
// release allocated kernel page
if ( kpage ) free_page( kpage );
}
EXPORT_SYMBOL_GPL(my_intrept);
MODULE_LICENSE("GPL");
module_init( pgfault_init);
module_exit( pgfault_exit);
Why don't you use kernel function instead of fiddling with bits manually!
check it (it is initialization module func):
struct desc_ptr newidtr;
gate_desc *oldidt, *newidt;
store_idt(&__IDT_register);
oldidt = (gate_desc *)__IDT_register.address;
__IDT_page =__get_free_page(GFP_KERNEL);
if(!__IDT_page)
return -1;
newidtr.address = __IDT_page;
newidtr.size = __IDT_register.size;
newidt = (gate_desc *)newidtr.address;
memcpy(newidt, oldidt, __IDT_register.size);
pack_gate(&newidt[PGFAULT_NR], GATE_INTERRUPT, (unsigned long)isr0x0E, 0, 0, __KERNEL_CS);
__load_idt((void *)&newidtr);
smp_call_function(__load_idt, &newidtr, 0, 1);
return 0;
I have tested it it works!
Your segment selector in your trap gate descriptor appears to be hardcoded to 0x0010, when it should be __KERNEL_CS (which is 0x0060 in the 2.6.26 kernel sources I have).
By the way, this is pretty baroque:
gate_desc_orig1 = (unsigned long)isr0x0E;
gate_desc = gate_desc_orig1 & 0x00000000FFFFFFFF;
gate_desc = gate_desc | ( gate_desc << 32 );
gate_desc1= 0xFFFF0000;
gate_desc1= gate_desc1 << 32;
gate_desc1= gate_desc1 | 0x0000FFFF;
gate_desc = gate_desc & gate_desc1;
gate_desc2= 0x0000EF00;
gate_desc2= gate_desc2 <<32;
gate_desc2= gate_desc2 | 0x00100000;
gate_desc = gate_desc | gate_desc2; // trap-gate
You could simplify that down to (with the __KERNEL_CS fix):
gate_desc = (unsigned long long)isr0x0E * 0x100000001ULL;
gate_desc &= 0xFFFF00000000FFFFULL;
gate_desc |= 0x0000EF0000000000ULL; // trap-gate
gate_desc |= (unsigned long long)__KERNEL_CS << 16;
For reference, here is a working implementation of a custom page fault handler for the Linux x86_64 architecture. I just tested this module myself with kernel 3.2, it works perfectly.
https://github.com/RichardUSTC/intercept-page-fault-handler
Related
I'm trying to get the build number from a 64-bit PE file (from the resource directory), namely the Windows bootmanager (bootmgfw.efi), using an UEFI bootloader. Currently I'm doing the following: Load the PE file from disk using BootServices->LoadImage and get the image base address using BootServices->OpenProtocol. Then I'm using the base address for the function below (HbeGetBuildNumber). Note that this exact function works flawless on bootmgfw.efi if I load the file using MapViewOfFile from a Windows executable, but not when using it inside my bootloader.
Code to obtain the build number by image base address:
#define IMAGE_DOS_SIGNATURE 0x5A4D
#define IMAGE_NT_SIGNATURE 0x00004550
#define TRUE 1
#define FALSE 0
#define NULL 0
#define FIELD_OFFSET(type, field) ((long)(long long)&(((type *)0)->field))
#define MAKEINTRESOURCE(i) ((PWSTR)((DWORD_PTR)((WORD)(i))))
#define HIWORD(l) ((WORD)((((DWORD_PTR)(l)) >> 16) & 0xffff))
#define RT_VERSION MAKEINTRESOURCE(16)
#define DUMMYSTRUCTNAME
#define DUMMYUNIONNAME
#define DUMMYSTRUCTNAME2
#define DUMMYUNIONNAME2
typedef int BOOL, * PBOOL;
typedef unsigned short WCHAR_T, *PWSTR;
typedef unsigned long DWORD, *PDWORD;
typedef unsigned long long DWORD_PTR, * PDWORD_PTR;
typedef unsigned char BYTE, * PBYTE;
typedef BYTE UCHAR, * PUCHAR;
typedef void* PVOID;
typedef unsigned short WORD, *PWORD;
typedef struct _IMAGE_RUNTIME_FUNCTION_ENTRY {
DWORD BeginAddress;
DWORD EndAddress;
union {
DWORD UnwindInfoAddress;
DWORD UnwindData;
} DUMMYUNIONNAME;
} RUNTIME_FUNCTION, * PRUNTIME_FUNCTION, _IMAGE_RUNTIME_FUNCTION_ENTRY, * _PIMAGE_RUNTIME_FUNCTION_ENTRY;
typedef struct _IMAGE_SECTION_HEADER {
BYTE Name[8];
union {
DWORD PhysicalAddress;
DWORD VirtualSize;
} Misc;
DWORD VirtualAddress;
DWORD SizeOfRawData;
DWORD PointerToRawData;
DWORD PointerToRelocations;
DWORD PointerToLinenumbers;
WORD NumberOfRelocations;
WORD NumberOfLinenumbers;
DWORD Characteristics;
} IMAGE_SECTION_HEADER, * PIMAGE_SECTION_HEADER;
typedef struct _IMAGE_DATA_DIRECTORY {
DWORD VirtualAddress;
DWORD Size;
} IMAGE_DATA_DIRECTORY, * PIMAGE_DATA_DIRECTORY;
typedef struct _IMAGE_OPTIONAL_HEADER64 {
WORD Magic;
BYTE MajorLinkerVersion;
BYTE MinorLinkerVersion;
DWORD SizeOfCode;
DWORD SizeOfInitializedData;
DWORD SizeOfUninitializedData;
DWORD AddressOfEntryPoint;
DWORD BaseOfCode;
unsigned long long ImageBase;
DWORD SectionAlignment;
DWORD FileAlignment;
WORD MajorOperatingSystemVersion;
WORD MinorOperatingSystemVersion;
WORD MajorImageVersion;
WORD MinorImageVersion;
WORD MajorSubsystemVersion;
WORD MinorSubsystemVersion;
DWORD Win32VersionValue;
DWORD SizeOfImage;
DWORD SizeOfHeaders;
DWORD CheckSum;
WORD Subsystem;
WORD DllCharacteristics;
unsigned long long SizeOfStackReserve;
unsigned long long SizeOfStackCommit;
unsigned long long SizeOfHeapReserve;
unsigned long long SizeOfHeapCommit;
DWORD LoaderFlags;
DWORD NumberOfRvaAndSizes;
IMAGE_DATA_DIRECTORY DataDirectory[16];
} IMAGE_OPTIONAL_HEADER64, * PIMAGE_OPTIONAL_HEADER64;
typedef struct _IMAGE_FILE_HEADER {
WORD Machine;
WORD NumberOfSections;
DWORD TimeDateStamp;
DWORD PointerToSymbolTable;
DWORD NumberOfSymbols;
WORD SizeOfOptionalHeader;
WORD Characteristics;
} IMAGE_FILE_HEADER, * PIMAGE_FILE_HEADER;
typedef struct _IMAGE_NT_HEADERS64 {
DWORD Signature;
IMAGE_FILE_HEADER FileHeader;
IMAGE_OPTIONAL_HEADER64 OptionalHeader;
} IMAGE_NT_HEADERS64, * PIMAGE_NT_HEADERS64;
typedef struct _IMAGE_DOS_HEADER {
WORD e_magic;
WORD e_cblp;
WORD e_cp;
WORD e_crlc;
WORD e_cparhdr;
WORD e_minalloc;
WORD e_maxalloc;
WORD e_ss;
WORD e_sp;
WORD e_csum;
WORD e_ip;
WORD e_cs;
WORD e_lfarlc;
WORD e_ovno;
WORD e_res[4];
WORD e_oemid;
WORD e_oeminfo;
WORD e_res2[10];
long e_lfanew;
} IMAGE_DOS_HEADER, * PIMAGE_DOS_HEADER;
typedef struct _IMAGE_RESOURCE_DIRECTORY {
DWORD Characteristics;
DWORD TimeDateStamp;
WORD MajorVersion;
WORD MinorVersion;
WORD NumberOfNamedEntries;
WORD NumberOfIdEntries;
} IMAGE_RESOURCE_DIRECTORY, * PIMAGE_RESOURCE_DIRECTORY;
typedef struct _IMAGE_RESOURCE_DIRECTORY_ENTRY {
union {
struct {
DWORD NameOffset : 31;
DWORD NameIsString : 1;
} DUMMYSTRUCTNAME;
DWORD Name;
WORD Id;
} DUMMYUNIONNAME;
union {
DWORD OffsetToData;
struct {
DWORD OffsetToDirectory : 31;
DWORD DataIsDirectory : 1;
} DUMMYSTRUCTNAME2;
} DUMMYUNIONNAME2;
} IMAGE_RESOURCE_DIRECTORY_ENTRY, * PIMAGE_RESOURCE_DIRECTORY_ENTRY;
typedef struct _IMAGE_RESOURCE_DATA_ENTRY {
DWORD OffsetToData;
DWORD Size;
DWORD CodePage;
DWORD Reserved;
} IMAGE_RESOURCE_DATA_ENTRY, * PIMAGE_RESOURCE_DATA_ENTRY;
typedef struct tagVS_FIXEDFILEINFO
{
DWORD dwSignature;
DWORD dwStrucVersion;
DWORD dwFileVersionMS;
DWORD dwFileVersionLS;
DWORD dwProductVersionMS;
DWORD dwProductVersionLS;
DWORD dwFileFlagsMask;
DWORD dwFileFlags;
DWORD dwFileOS;
DWORD dwFileType;
DWORD dwFileSubtype;
DWORD dwFileDateMS;
DWORD dwFileDateLS;
} VS_FIXEDFILEINFO;
typedef struct tag_VS_VERSIONINFO
{
unsigned short wLength;
unsigned short wValueLength;
unsigned short wType;
WCHAR_T szKey[17];
VS_FIXEDFILEINFO Value;
}VS_VERSIONINFO, * PVS_VERSIONINFO;
PVOID HbGetPtr2(DWORD_PTR relativeAddress, PIMAGE_NT_HEADERS64 ntHeaders, DWORD_PTR imageBase)
{
PVOID retVal = NULL;
BOOL found = FALSE;
PIMAGE_SECTION_HEADER secHeader = (PIMAGE_SECTION_HEADER)((DWORD_PTR)ntHeaders + FIELD_OFFSET(IMAGE_NT_HEADERS64, OptionalHeader) + ntHeaders->FileHeader.SizeOfOptionalHeader);
for (DWORD i = 0; i < ntHeaders->FileHeader.NumberOfSections; i++, secHeader++)
{
if ((relativeAddress >= secHeader->VirtualAddress)
&& (relativeAddress < (secHeader->VirtualAddress + secHeader->Misc.VirtualSize)))
{
found = TRUE;
break;
}
}
if (found)
retVal = (PVOID)(imageBase + relativeAddress - (int)(secHeader->VirtualAddress - secHeader->PointerToRawData));
return retVal;
}
//Get build number for PE file
unsigned long HbeGetBuildNumber(void* imageBase)
{
DWORD retVal = 0;
PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)((DWORD_PTR)imageBase);
PIMAGE_NT_HEADERS64 ntHeaders = (PIMAGE_NT_HEADERS64)((DWORD_PTR)imageBase + dosHeader->e_lfanew);
PIMAGE_DATA_DIRECTORY dataDir = NULL;
PIMAGE_RESOURCE_DIRECTORY resDir = NULL, subDir = NULL;
PIMAGE_RESOURCE_DIRECTORY_ENTRY dirEntry = NULL, versionDir = NULL;
PIMAGE_RESOURCE_DATA_ENTRY dataEntry = NULL;
PVS_VERSIONINFO versionInfo;
if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE)
goto Done;
if (ntHeaders->Signature != IMAGE_NT_SIGNATURE)
goto Done;
dataDir = &ntHeaders->OptionalHeader.DataDirectory[2];
resDir = (PIMAGE_RESOURCE_DIRECTORY)HbGetPtr2(dataDir->VirtualAddress, ntHeaders, (DWORD_PTR)imageBase);
dirEntry = (PIMAGE_RESOURCE_DIRECTORY_ENTRY)(resDir + 1);
if (!resDir)
goto Done;
for (DWORD i = 0; i < resDir->NumberOfIdEntries; i++, dirEntry++)
{
if (dirEntry->Id == (WORD)RT_VERSION
&& dirEntry->DataIsDirectory)
{
subDir = (PIMAGE_RESOURCE_DIRECTORY)((dirEntry->OffsetToData & 0x7FFFFFFF) + (DWORD_PTR)resDir);
versionDir = (PIMAGE_RESOURCE_DIRECTORY_ENTRY)(subDir + 1);
if (versionDir->DataIsDirectory)
{
subDir = (PIMAGE_RESOURCE_DIRECTORY)((versionDir->OffsetToData & 0x7FFFFFFF) + (DWORD_PTR)resDir);
versionDir = (PIMAGE_RESOURCE_DIRECTORY_ENTRY)(subDir + 1);
dataEntry = (PIMAGE_RESOURCE_DATA_ENTRY)((versionDir->OffsetToData & 0x7FFFFFFF) + (DWORD_PTR)resDir);
versionInfo = (PVS_VERSIONINFO)HbGetPtr2(dataEntry->OffsetToData, ntHeaders, (DWORD_PTR)imageBase);
if (versionDir)
retVal = HIWORD(versionInfo->Value.dwFileVersionLS);
}
}
}
Done:
return retVal;
}
Through debugging I figured out that the error occurs when trying to locate PIMAGE_RESOURCE_DIRECTORY using HbGetPtr2, since resDir->NumberOfIDEntries should have the value 4, but in the error case it contains multiple thousand (obviously wrong). Where is my error in this code? Why does the code work on the same file if I run it from a Windows exe but not from my bootloader? Is there a difference between BootServices->LoadImage (Bootloader) and MapViewOfFile (Windows executable)?
Edit:
This is how I use the code above in my Windows executable (working):
HANDLE hFile;
HANDLE hFileMapping;
LPVOID lpFileBase;
hFile = CreateFileW(L"C:\\Users\\XY\Reverse Engineering\\EFI\\bootmgfw.efi", GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
if (hFile == INVALID_HANDLE_VALUE)
return 1;
hFileMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hFileMapping == 0)
{
CloseHandle(hFile);
return 1;
}
lpFileBase = MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0, 0);
if (lpFileBase == 0)
{
CloseHandle(hFileMapping);
CloseHandle(hFile);
return 1;
}
std::cout << HbeGetBuildNumber(lpFileBase) << std::endl;
This is how I use the code above in my bootloader (fails):
BOOL retVal = FALSE;
PEFI_BOOT_SERVICES services = systemTable->BootServices;
PEFI_LOADED_IMAGE loadedImage;
PEFI_LOADED_IMAGE_PROTOCOL winImageInfo;
PEFI_DEVICE_PATH winPath;
EFI_HANDLE winHandle = NULL;
EFI_STATUS loadSuccess = -1;
DWORD bootmgfwBuildNumber = 0;
if (EFI_ERROR(services->HandleProtocol(curImageHandle, &gEfiLoadedImageProtocolGuid, &loadedImage)))
goto Done;
if (!(winPath = FileDevicePath(loadedImage->DeviceHandle, L"EFI\\Microsoft\\Boot\\bootmgfw.efi")))
goto Done;
if (EFI_ERROR(loadSuccess = services->LoadImage(TRUE, curImageHandle, winPath, NULL, 0, &winHandle)))
goto Done;
DBGMSG(L"Loaded bootmgfw.efi\n");
FreePool(winPath);
if (EFI_ERROR(services->OpenProtocol(winHandle, &gEfiLoadedImageProtocolGuid, (PVOID *)&winImageInfo, curImageHandle, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL)))
goto Done;
DBGMSG(L"Got file information\n");
if (!(bootmgfwBuildNumber = HbGetBuildNumber(winImageInfo->ImageBase))) //ERROR: Returns 0
goto Done
//...
Done:
DBGMSG(L"Start image\n");
if (!EFI_ERROR(loadSuccess) && !EFI_ERROR(services->StartImage(winHandle, 0, NULL)))
retVal = TRUE;
return retVal;
The aim is to enable 16 bits segments for 16 bits addressing like on 64 bits Linux Kernel with the modify_ldt() system call.
I was unable to find if Cygwin provides a wrapper and I have only barely an idea that it’s about something like NtSetLdtEntries with :
typedef struct
{
ULONG Start;
ULONG Length;
LDT_ENTRY LdtEntries[1];
} PROCESS_LDT_INFORMATION, *PPROCESS_LDT_INFORMATION;
Please note this is not related to the vm86 mode (which is a different method of doing it used by Microsoft on 32 bits systems). And As stated above, this way is used on Linux for running 16 bits code in protected mode without any emulation. See CONFIG_X86_16BIT for more informations.
Of course, if it’s not supported by default it’s Ok to modify system files.
on x86-based windos possible (tested on xp and win 8.1 x86) set several descriptors in LDT table and use this. this can be done via NtSetInformationProcess with ProcessLdtInformation (undocumented) or, if we need set only 1 or 2 selectors - more easy use undocumented api:
EXTERN_C
__declspec(dllimport)
NTSTATUS
NTAPI
NtSetLdtEntries
(
__in_opt ULONG Selector1,
__in SEGMENT_ENTRY LdtEntry1,
__in_opt ULONG Selector2,
__in SEGMENT_ENTRY LdtEntry2
);
so we need allocate 1 or more SEGMENT_ENTRY (or LDT_ENTRY - declared in winnt.h), allocate memory for segment, and call api. I did not pay much attention for 16 bit code and fill actual descriptors, check only memory fill via LDT selector (assigned to ES) and then read it via "plain" selector.
typedef struct SEGMENT_ENTRY
{
ULONG LimitLow : 16;
ULONG BaseLow : 16;
ULONG BaseMid : 8;
ULONG Type : 4;
ULONG IsGegment : 1;// = 1
ULONG DPL : 2;
ULONG P : 1;// Present
ULONG LimitHi : 4;
ULONG AVL : 1;// Available For software use
ULONG L : 1;// Long-mode segment
ULONG D : 1;// Default operand size
ULONG G : 1;// Granularity
ULONG BaseHi : 8;
}*PSEGMENT_ENTRY;
typedef struct PROCESS_LDT_INFORMATION
{
ULONG StartSelector;
ULONG Length;
SEGMENT_ENTRY LdtEntries[];
} *PPROCESS_LDT_INFORMATION;
EXTERN_C
__declspec(dllimport)
NTSTATUS
NTAPI
NtSetLdtEntries
(
__in_opt ULONG Selector1,
IN SEGMENT_ENTRY LdtEntry1,
__in_opt ULONG Selector2,
IN SEGMENT_ENTRY LdtEntry2
);
NTSTATUS TestLdt()
{
PVOID BaseAddress = 0;
SIZE_T RegionSize = 0x100000;//1mb
NTSTATUS status = NtAllocateVirtualMemory(NtCurrentProcess(), &BaseAddress, 0, &RegionSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (0 <= status)
{
#if 1
SEGMENT_ENTRY LdtEntry = {};
LdtEntry.LimitLow = 0xffff;
LdtEntry.BaseLow = ((ULONG_PTR)BaseAddress) & 0xFFFF;
LdtEntry.BaseMid = ((ULONG_PTR)BaseAddress >> 16) & 0xff;
LdtEntry.BaseHi = ((ULONG_PTR)BaseAddress >> 24) & 0xff;
LdtEntry.P = 1;
LdtEntry.DPL = 3;
LdtEntry.IsGegment = 1;
LdtEntry.Type = 2;//ldt
status = NtSetLdtEntries(8, LdtEntry, 0, LdtEntry);
#else
const ULONG cb = sizeof(PROCESS_LDT_INFORMATION) + 1 * sizeof(LDT_ENTRY);
PPROCESS_LDT_INFORMATION LdtInfo = (PPROCESS_LDT_INFORMATION)alloca(cb);
LdtInfo->Length = 1 * sizeof(LDT_ENTRY);
LdtInfo->StartSelector = 8;
SEGMENT_ENTRY* LdtEntry = LdtInfo->LdtEntries;
LdtEntry->LimitLow = 0xffff;
LdtEntry->BaseLow = ((ULONG_PTR)BaseAddress) & 0xFFFF;
LdtEntry->BaseMid = ((ULONG_PTR)BaseAddress >> 16) & 0xff;
LdtEntry->BaseHi = ((ULONG_PTR)BaseAddress >> 24) & 0xff;
LdtEntry->L = 0;
LdtEntry->D = 0;
LdtEntry->G = 0;
LdtEntry->AVL = 0;
LdtEntry->P = 1;
LdtEntry->DPL = 3;
LdtEntry->IsGegment = 1;
LdtEntry->Type = 2;//ldt
status = NtSetInformationProcess(NtCurrentProcess(), ProcessLdtInformation, LdtInfo, cb);
#endif
if (0 <= status)
{
DbgPrint("%s\n", BaseAddress); // print empty string
#ifdef _X86_
__asm {
push edi
mov ax,0xf
mov dx,es
mov es,ax
mov ecx,32
mov al,0x33
xor edi,edi
rep stosb
mov es,dx
pop edi
}
#endif
DbgPrint("%s\n", BaseAddress);// print 33333333...
}
NtFreeVirtualMemory(NtCurrentProcess(), &BaseAddress, &RegionSize, MEM_RELEASE);
}
return status;
}
however This is valid only on x86-based windows systems.
if you call this on any x64 windows you got error STATUS_NOT_IMPLEMENTED. here windows not support LDT at all. and this can not be changed (even by modify system files. ?!)
more info - Local Descriptor Table on x64
i am trying to Read an Voltage Level via a ADC0 of my ATmega8, because of querying a 1 Pin 4x4 Matrix Keypad. The Problem is everytime I apply a Voltage to the ADC higher than GND the Atmega is stopping to do his work. The PWM outputs are still working, but communication via i2c is impossible and the LCD is clear.
My wiring is simple, AREF & AVCC are set to 5V, GND is set to GND and PC0 is my Input. Is there anything I fail to Notice? Thank You for your help.
Here is my Code:
void Initialisierung(void)
{
char text [2];
lcd_init();
cli();
//### TWI
init_twi_slave(SLAVE_ADRESSE); //TWI als Slave mit Adresse slaveadr starten
sei();
lcd_setcursor( 0, 1 );
lcd_string(">Booting...");
lcd_setcursor( 0, 2 );
itoa (SLAVE_ADRESSE,text,16);
lcd_string("I2C Adress=0x");
lcd_string(text);
for (int Index=0; Index<85; ++Index) {
rxbuffer[Index] = 0x20;
}
rxbuffer[81]=0xFF;
rxbuffer[82]=0xFF;
rxbuffer[83]=0xFF;
rxbuffer[84]=0xFF;
}
//update LCD
void lcd_update(void){
for (int o=1;o<=4; o++)
for (int i=1; i<=20; i++){
lcd_setcursor( i-1, o );
lcd_data(rxbuffer[i+((o-1)*20)]);
}
}
An here is the main function:
int main(void)
{
DDRC &= ~(1 << PC0);
PORTC &= ~(1 << PC0);
Initialisierung();
DDRB = (1 << DDB1) | (1 << DDB2);
OCR1A = eeprom_read_word(&brightness); // PWM einstellen,
OCR1B = eeprom_read_word(&contrast);
ICR1 = 1000; // TOP-wert
TCCR1A = (1<<COM1A1) | (1<<COM1B1) | (1<<WGM11); // 2-Kanal "non-inverting"
TCCR1B = (1<<WGM13)|(1<<WGM12) | (1<<CS11);
//Initialize ADC
ADCSRA = (1<<ADEN) | (1<<ADPS2) | (1<<ADPS0);
ADMUX=0x00;
unsigned int adc_value=0; // Variable to hold ADC result
char text[2];
while(1)
{
ADCSRA |= (1<<ADSC); // Start conversion
while (ADCSRA & (1<<ADSC)); // wait for conversion to complete
adc_value = ADCW; //Store ADC value
itoa (adc_value,text,16);
lcd_setcursor( 0,4 );
lcd_string(text);
for (int Index=0; Index<85; ++Index) {
txbuffer[Index] = rxbuffer[Index];
}
uint16_t brightness_i2c=0;
uint16_t contrast_i2c=0;
brightness_i2c=(rxbuffer[81]<<8)|(rxbuffer[82]);
contrast_i2c=(rxbuffer[83]<<8)|(rxbuffer[84]);
if (rxbuffer[0]==1){
lcd_update();
rxbuffer[0]=4;
}else if(brightness_i2c!=eeprom_read_word(&brightness) && brightness_i2c!=0xFFFF){
eeprom_write_word(&brightness,brightness_i2c);
OCR1A = eeprom_read_word(&brightness);
}else if (contrast_i2c!=eeprom_read_word(&contrast) && contrast_i2c!=0xFFFF){
eeprom_write_word(&contrast,contrast_i2c);
OCR1B = eeprom_read_word(&contrast);
}else{
for (uint8_t i=0; i<50; i++) _delay_ms(10);
lcd_setcursor( 19, 4 );
lcd_data(0xFF);
for (uint8_t i=0; i<50; i++) _delay_ms(10);
lcd_setcursor( 19, 4 );
lcd_data(0x20);
}
}
}
I finally got it:
I setup the ADC with interrupts but not freerunning:
ADCSRA =(1<<ADEN)|(1<<ADPS2)|(1<<ADPS1)|(1<<ADIE)| (1<<ADPS0);
And call the ADC everytime at the end of my While loop:
ADCSRA |= (1<<ADSC);
Here is the Code for the ISR:
ISR(ADC_vect)
{
char text[5];
itoa (ADC,text,16);
lcd_setcursor( 0,4 );
lcd_string(text);
}
Thanks for your time ;)
Problem
I am trying to find out whether mmx or xmm registers are faster for copying elements of an array to another array (I know about memcpy() but I need this function for a very specific purpose).
My souce code is below. The relevant function is copyarray(). I can use either mmx or xmm registers with movq or movsd respectively, and the result is correct. However, when I use mmx registers, any timer I use (either clock() or QueryPerformanceCounter) to time the operations returns NaN.
Compiled with: gcc -std=c99 -O2 -m32 -msse3 -mincoming-stack-boundary=2 -mfpmath=sse,387 -masm=intel copyasm.c -o copyasm.exe
This is a very strange bug and I cannot figure out why using mmx registers would cause a timer to return NaN seconds, while using xmm registers in exactly the same code returns a valid time value
EDIT
Results using xmm registers:
Elapsed time: 0.000000 seconds, Gigabytes copied per second: inf GB
Residual = 0.000000
0.937437 0.330424 0.883267 0.118717 0.962493 0.584826 0.344371 0.423719
0.937437 0.330424 0.883267 0.118717 0.962493 0.584826 0.344371 0.423719
Results using mmx register:
Elapsed time: nan seconds, Gigabytes copied per second: inf GB
Residual = 0.000000
0.000000 0.754173 0.615345 0.634724 0.611286 0.547655 0.729637 0.942381
0.935759 0.754173 0.615345 0.634724 0.611286 0.547655 0.729637 0.942381
Source Code
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <x86intrin.h>
#include <windows.h>
inline double
__attribute__ ((gnu_inline))
__attribute__ ((aligned(64))) copyarray(
double* restrict dst,
const double* restrict src,
const int n)
{
// int i = n;
// do {
// *dst++ = *src++;
// i--;
// } while(i);
__asm__ __volatile__
(
"mov ecx, %[n] \n\t"
"mov edi, %[dst] \n\t"
"mov esi, %[src] \n\t"
"xor eax, eax \n\t"
"sub ecx,1 \n\t"
"L%=: \n\t"
"movq mm0, QWORD PTR [esi+ecx*8] \n\t"
"movq QWORD PTR [edi+ecx*8], mm0 \n\t"
"sub ecx, 1 \n\t"
"jge L%= \n\t"
: // no outputs
: // inputs
[dst] "m" (dst),
[src] "m" (src),
[n] "g" (n)
: // register clobber
"eax","ecx","edi","esi",
"mm0"
);
}
void printarray(double* restrict a, int n)
{
for(int i = 0; i < n; ++i) {
printf(" %f ", *(a++));
}
printf("\n");
}
double residual(const double* restrict dst,
const double* restrict src,
const int n)
{
double residual = 0.0;
for(int i = 0; i < n; ++i)
residual += *(dst++) - *(src++);
return(residual);
}
int main()
{
double *A = NULL;
double *B = NULL;
int n = 8;
double memops;
double time3;
clock_t time1;
// LARGE_INTEGER frequency, time1, time2;
// QueryPerformanceFrequency(&frequency);
int trials = 1 << 0;
A = _mm_malloc(n*sizeof(*A), 64);
B = _mm_malloc(n*sizeof(*B), 64);
srand(time(NULL));
for(int i = 0; i < n; ++i)
*(A+i) = (double) rand()/RAND_MAX;
// QueryPerformanceCounter(&time1);
time1 = clock();
for(int i = 0; i < trials; ++i)
copyarray(B,A,n);
// QueryPerformanceCounter(&time2);
// time3 = (double)(time2.QuadPart - time1.QuadPart) / frequency.QuadPart;
time3 = (double) (clock() - time1)/CLOCKS_PER_SEC;
memops = (double) trials*n/time3*sizeof(*A)/1.0e9;
printf("Elapsed time: %f seconds, Gigabytes copied per second: %f GB\n",time3, memops);
printf("Residual = %f\n",residual(B,A,n));
printarray(A,n);
printarray(B,n);
_mm_free(A);
_mm_free(B);
}
You have to be careful when mixing MMX with floating point - use SSE instead if possible. If you must use MMX then read the section titled "MMX - State Management" on this page - note the requirement for the emms instruction after any MMX instructions before you next perform any floating point operations.
Based on the Wikipedia entry as well as the Intel manual, rdpmc should be available to user-mode processes as long as bit 8 of CR4 is set. However, I am still running into general protection error when trying to run rdpmc from userspace even with that bit set.
I am running on an 8-core Intel X3470 on kernel 2.6.32-279.el6.x86_64.
Here is the user-mode program I am trying to execute:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <sched.h>
#include <assert.h>
uint64_t
read_pmc(int ecx)
{
unsigned int a, d;
__asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx));
return ((uint64_t)a) | (((uint64_t)d) << 32);
}
int main(int ac, char **av)
{
uint64_t start, end;
cpu_set_t cpuset;
unsigned int c;
int i;
if (ac != 3) {
fprintf(stderr, "usage: %s cpu-id pmc-num\n", av[0]);
exit(EXIT_FAILURE);
}
i = atoi(av[1]);
c = atoi(av[2]);
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
assert(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0);
printf("%lu\n", read_pmc(c));
return 0;
}
Here is the kernel module which sets the bit and reads out CR4 so I can manually verify that the bit has been set.
/*
* Enable PMC in user mode.
*/
#include <linux/module.h>
#include <linux/kernel.h>
int init_module(void)
{
typedef long unsigned int uint64_t;
uint64_t output;
// Set CR4, Bit 8 to enable PMC
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 7),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
return 0;
}
void cleanup_module(void)
{
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 7), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
}
Apparently, when Intel says Bit 8, they are referring to the 9th bit from the right, since their indexing begins at 0. Replacing $(1 << 7) with $(1 << 8) globally resolves the issue, and allows rdpmc to be called from user mode.
Here is the updated kernel module, also using on_each_cpu to make sure that it is set on every core.
/*
* Read PMC in kernel mode.
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
static void printc4(void) {
typedef long unsigned int uint64_t;
uint64_t output;
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
}
static void setc4b8(void * info) {
// Set CR4, Bit 8 (9th bit from the right) to enable
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 8),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Check which CPU we are on:
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
printc4();
}
static void clearc4b8(void * info) {
printc4();
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 8), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
}
int init_module(void)
{
on_each_cpu(setc4b8, NULL, 0);
return 0;
}
void cleanup_module(void)
{
on_each_cpu(clearc4b8, NULL, 0);
}
Echoing "2" to /sys/bus/event_source/devices/cpu/rdpmc allows user processes
to access performance counters via the rdpmc instruction.
Note that behaviour has changed. Prior to 4.0 "1" meant "enabled"
while meant "0" disable. Now "1" means allow only for processes that have active perf events. More details: http://man7.org/linux/man-pages/man2/perf_event_open.2.html