No-overflow cast on x64 - windows

I have an existing C codebase that works on x86.
I'm now compiling it for x64.
What I'd like to do is cast a size_t to a DWORD, and throw an exception if there's a loss of data.
Q: Is there an idiom for this?
Here's why I'm doing this:
A bunch of Windows APIs accept DWORDs as arguments, and the code currently assumes sizeof(DWORD)==sizeof(size_t). That assumption holds for x86, but not for x64. So when compiling for x64, passing size_t in place of a DWORD argument, generates a compile-time warning.
In virtually all of these cases the actual size is not going to exceed 2^32. But I want to code it defensively and explicitly.
This is my first x64 project, so... be gentle.

see boost::numeric_cast
http://www.boost.org/doc/libs/1_33_1/libs/numeric/conversion/doc/numeric_cast.html

I just defined a function to perform the cast.
I included an assert-like behavior to insure I'm not silently rubbishing pointers.
DWORD ConvertSizeTo32bits(size_t sz, char *file, int line)
{
if (!(0 <= sz && sz <= INT32_MAX)) {
EmitLogMessage("Invalid Pointer size: %d file(%s) line(%d)",
sz, file, line);
ExitProcess( 0 );
}
return (DWORD) sz;
}

#define size_t_to_DWORD(st,dw) if ((DWORD)(st) != st) RaiseException(exLossOfData, 0, 0, NULL); else dw = (DWORD)(st)
size_t st;
DWORD dw;
st = 0xffffffff;
size_t_to_DWORD(st,dw); // this succeeds
st = 0xffffffff1;
size_t_to_DWORD(st,dw); // this throws
EDIT:
Or better yet, do this so you can use it in an expression:
DWORD MyRaiseException()
{
RaiseException(1, 0, 0, NULL);
return 0;
}
#define size_t_to_DWORD(st) (DWORD)(st) != (st) ? MyRaiseException() : (DWORD)(st)
void main(void)
{
size_t st;
DWORD dw;
st = 0xffffffff1;
dw = size_t_to_DWORD(st);
printf("%u %u\n", st, dw);
}

Related

How to exactly modify the Local Descriptor table on 64 bits Windows?

The aim is to enable 16 bits segments for 16 bits addressing like on 64 bits Linux Kernel with the modify_ldt() system call.
I was unable to find if Cygwin provides a wrapper and I have only barely an idea that it’s about something like NtSetLdtEntries with :
typedef struct
{
ULONG Start;
ULONG Length;
LDT_ENTRY LdtEntries[1];
} PROCESS_LDT_INFORMATION, *PPROCESS_LDT_INFORMATION;
Please note this is not related to the vm86 mode (which is a different method of doing it used by Microsoft on 32 bits systems). And As stated above, this way is used on Linux for running 16 bits code in protected mode without any emulation. See CONFIG_X86_16BIT for more informations.
Of course, if it’s not supported by default it’s Ok to modify system files.
on x86-based windos possible (tested on xp and win 8.1 x86) set several descriptors in LDT table and use this. this can be done via NtSetInformationProcess with ProcessLdtInformation (undocumented) or, if we need set only 1 or 2 selectors - more easy use undocumented api:
EXTERN_C
__declspec(dllimport)
NTSTATUS
NTAPI
NtSetLdtEntries
(
__in_opt ULONG Selector1,
__in SEGMENT_ENTRY LdtEntry1,
__in_opt ULONG Selector2,
__in SEGMENT_ENTRY LdtEntry2
);
so we need allocate 1 or more SEGMENT_ENTRY (or LDT_ENTRY - declared in winnt.h), allocate memory for segment, and call api. I did not pay much attention for 16 bit code and fill actual descriptors, check only memory fill via LDT selector (assigned to ES) and then read it via "plain" selector.
typedef struct SEGMENT_ENTRY
{
ULONG LimitLow : 16;
ULONG BaseLow : 16;
ULONG BaseMid : 8;
ULONG Type : 4;
ULONG IsGegment : 1;// = 1
ULONG DPL : 2;
ULONG P : 1;// Present
ULONG LimitHi : 4;
ULONG AVL : 1;// Available For software use
ULONG L : 1;// Long-mode segment
ULONG D : 1;// Default operand size
ULONG G : 1;// Granularity
ULONG BaseHi : 8;
}*PSEGMENT_ENTRY;
typedef struct PROCESS_LDT_INFORMATION
{
ULONG StartSelector;
ULONG Length;
SEGMENT_ENTRY LdtEntries[];
} *PPROCESS_LDT_INFORMATION;
EXTERN_C
__declspec(dllimport)
NTSTATUS
NTAPI
NtSetLdtEntries
(
__in_opt ULONG Selector1,
IN SEGMENT_ENTRY LdtEntry1,
__in_opt ULONG Selector2,
IN SEGMENT_ENTRY LdtEntry2
);
NTSTATUS TestLdt()
{
PVOID BaseAddress = 0;
SIZE_T RegionSize = 0x100000;//1mb
NTSTATUS status = NtAllocateVirtualMemory(NtCurrentProcess(), &BaseAddress, 0, &RegionSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (0 <= status)
{
#if 1
SEGMENT_ENTRY LdtEntry = {};
LdtEntry.LimitLow = 0xffff;
LdtEntry.BaseLow = ((ULONG_PTR)BaseAddress) & 0xFFFF;
LdtEntry.BaseMid = ((ULONG_PTR)BaseAddress >> 16) & 0xff;
LdtEntry.BaseHi = ((ULONG_PTR)BaseAddress >> 24) & 0xff;
LdtEntry.P = 1;
LdtEntry.DPL = 3;
LdtEntry.IsGegment = 1;
LdtEntry.Type = 2;//ldt
status = NtSetLdtEntries(8, LdtEntry, 0, LdtEntry);
#else
const ULONG cb = sizeof(PROCESS_LDT_INFORMATION) + 1 * sizeof(LDT_ENTRY);
PPROCESS_LDT_INFORMATION LdtInfo = (PPROCESS_LDT_INFORMATION)alloca(cb);
LdtInfo->Length = 1 * sizeof(LDT_ENTRY);
LdtInfo->StartSelector = 8;
SEGMENT_ENTRY* LdtEntry = LdtInfo->LdtEntries;
LdtEntry->LimitLow = 0xffff;
LdtEntry->BaseLow = ((ULONG_PTR)BaseAddress) & 0xFFFF;
LdtEntry->BaseMid = ((ULONG_PTR)BaseAddress >> 16) & 0xff;
LdtEntry->BaseHi = ((ULONG_PTR)BaseAddress >> 24) & 0xff;
LdtEntry->L = 0;
LdtEntry->D = 0;
LdtEntry->G = 0;
LdtEntry->AVL = 0;
LdtEntry->P = 1;
LdtEntry->DPL = 3;
LdtEntry->IsGegment = 1;
LdtEntry->Type = 2;//ldt
status = NtSetInformationProcess(NtCurrentProcess(), ProcessLdtInformation, LdtInfo, cb);
#endif
if (0 <= status)
{
DbgPrint("%s\n", BaseAddress); // print empty string
#ifdef _X86_
__asm {
push edi
mov ax,0xf
mov dx,es
mov es,ax
mov ecx,32
mov al,0x33
xor edi,edi
rep stosb
mov es,dx
pop edi
}
#endif
DbgPrint("%s\n", BaseAddress);// print 33333333...
}
NtFreeVirtualMemory(NtCurrentProcess(), &BaseAddress, &RegionSize, MEM_RELEASE);
}
return status;
}
however This is valid only on x86-based windows systems.
if you call this on any x64 windows you got error STATUS_NOT_IMPLEMENTED. here windows not support LDT at all. and this can not be changed (even by modify system files. ?!)
more info - Local Descriptor Table on x64

MapViewOfFile chunked loading of file

I want to map file into memory with chunk size equal system granularity. First chunk read without error and all others fails with error 5 (ERROR_ACCESS_DENIED). I tried run program with administrator privileges.
My code:
#include <windows.h>
#include <stdio.h>
int main() {
HANDLE hFile = CreateFile( TEXT("db.txt"),
GENERIC_READ,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hFile == INVALID_HANDLE_VALUE) {
printf("[ERROR] File opening error %d\n", GetLastError());
return 1;
}
printf("[DONE] File opened successfully.\n");
HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hMap == NULL) {
printf("[ERROR] Create mapping error %d\n", GetLastError());
return 2;
}
printf("[DONE] Create mapping successfully.\n");
LARGE_INTEGER file_size = { };
if (!GetFileSizeEx(hFile, &file_size)) {
printf("[ERROR] Getiing filesize error %d\n", GetLastError());
return 3;
}
printf("[DONE] Getting file size.\n");
SYSTEM_INFO info = { };
GetSystemInfo(&info);
printf("[DONE] Getting system memory granularity %d.\n", info.dwAllocationGranularity);
DWORD offset = 0;
int size = 0;
do {
char* ENTRY = (char*)MapViewOfFile(hMap, FILE_MAP_READ, HIWORD(offset), LOWORD(offset), info.dwAllocationGranularity);
if (ENTRY == NULL) {
printf("[ERROR] Map entry error %d\n", GetLastError());
} else {
printf("[DONE] MAPPING PART WITH OFFSET %d\n", offset);
//printf("%s\n", ENTRY);
}
if (offset + info.dwAllocationGranularity < file_size.QuadPart) {
offset += info.dwAllocationGranularity;
} else {
offset = file_size.QuadPart;
}
//offset += size;
UnmapViewOfFile(ENTRY);
} while (offset < file_size.QuadPart);
CloseHandle(hMap);
CloseHandle(hFile);
system("pause");
return 0;
}
How I fix it?
You're using HIWORD and LOWORD for the offset in the call to MapViewOfFile, but these only take a 32-bit value and split it into two 16-bit halves - what you want is a 64-bit value split into two 32-bit halves.
Instead you need HIDWORD and LODWORD, which are defined in <intsafe.h>:
#define LODWORD(_qw) ((DWORD)(_qw))
#define HIDWORD(_qw) ((DWORD)(((_qw) >> 32) & 0xffffffff))
Like so:
char* ENTRY = (char*)MapViewOfFile(hMap, FILE_MAP_READ, HIDWORD(offset), LODWORD(offset), info.dwAllocationGranularity);
You need this even though your offset variable is 32 bit (in which case, HIDWORD will just return 0 and the full value of offset is passed as the low-order DWORD).

Porting C++ project from VS 6.0 to VS 2010 brought to slower code

I ported one project from Visual C++ 6.0 to VS 2010 and found that a critical part of the code (scripting engine) now runs in about three times slower than in was before.
After some research I managed to extract code fragment which seems to cause the slowdown. I minimized it as much as possible, so it ill be easier to reproduce the problem.
The problem is reproduced when assigning a complex class (Variant) which contains another class (String), and the union of several other fields of simple types.
Playing with the example I discovered more "magic":
1. If I comment one of unused (!) class members, the speed increases, and the code finally runs faster than those complied with VS 6.2
2. The same is true if I remove the "union" wrapper"
3. The same is true event if change the value of the filed from 1 to 0
I have no idea what the hell is going on.
I have checked all code generation and optimization switches, but without any success.
The code sample is below:
On my Intel 2.53 GHz CPU this test, compiled under VS 6.2 runs 1.0 second.
Compiled under VS 2010 - 40 seconds
Compiled under VS 2010 with "magic" lines commented - 0.3 seconds.
The problem is reproduces with any optimization switch, but the "Whole program optimization" (/GL) should be disabled. Otherwise this too smart optimizer will know that out test actually does nothing, and the test will run 0 seconds.
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
class String
{
public:
char *ptr;
int size;
String() : ptr(NULL), size( 0 ) {};
~String() {if ( ptr != NULL ) free( ptr );};
String& operator=( const String& str2 );
};
String& String::operator=( const String& string2 )
{
if ( string2.ptr != NULL )
{
// This part is never called in our test:
ptr = (char *)realloc( ptr, string2.size + 1 );
size = string2.size;
memcpy( ptr, string2.ptr, size + 1 );
}
else if ( ptr != NULL )
{
// This part is never called in our test:
free( ptr );
ptr = NULL;
size = 0;
}
return *this;
}
struct Date
{
unsigned short year;
unsigned char month;
unsigned char day;
unsigned char hour;
unsigned char minute;
unsigned char second;
unsigned char dayOfWeek;
};
class Variant
{
public:
int dataType;
String valStr; // If we comment this string, the speed is OK!
// if we drop the 'union' wrapper, the speed is OK!
union
{
__int64 valInteger;
// if we comment any of these fields, unused in out test, the speed is OK!
double valReal;
bool valBool;
Date valDate;
void *valObject;
};
Variant() : dataType( 0 ) {};
};
void TestSpeed()
{
__int64 index;
Variant tempVal, tempVal2;
tempVal.dataType = 3;
tempVal.valInteger = 1; // If we comment this string, the speed is OK!
for ( index = 0; index < 200000000; index++ )
{
tempVal2 = tempVal;
}
}
int main(int argc, char* argv[])
{
int ticks;
char str[64];
ticks = GetTickCount();
TestSpeed();
sprintf( str, "%.*f", 1, (double)( GetTickCount() - ticks ) / 1000 );
MessageBox( NULL, str, "", 0 );
return 0;
}
This was rather interesting. First I was unable to reproduce the slow down in release build, only in debug build. Then I turned off SSE2 optimizations and got the same ~40s run time.
The problem seems to be in the compiler generated copy assignment for Variant. Without SSE2 it actually does a floating point copy with fld/fstp instructions because the union contains a double. And with some specific values this apparently is a really expensive operation. The 64-bit integer value 1 maps to 4.940656458412e-324#DEN which is a denormalized number and I believe this causes problems. When you leave tempVal.valInteger uninitialized it may contain a value that works faster.
I did a small test to confirm this:
union {
uint64_t i;
volatile double d1;
};
i = 0xcccccccccccccccc; //with this value the test takes 0.07 seconds
//i = 1; //change to 1 and now the test takes 36 seconds
volatile double d2;
for(int i=0; i<200000000; ++i)
d2 = d1;
So what you could do is define your own copy assignment for Variant that just does a simple memcpy of the union.
Variant& operator=(const Variant& rhs)
{
dataType = rhs.dataType;
union UnionType
{
__int64 valInteger;
double valReal;
bool valBool;
Date valDate;
void *valObject;
};
memcpy(&valInteger, &rhs.valInteger, sizeof(UnionType));
valStr = rhs.valStr;
return *this;
}

How to transfer UINT64 to 2 DWORDS?

Is there efficient way to do this?
That's something you could be using a union for:
union {
UINT64 ui64;
struct {
DWORD d0;
DWORD d1;
} y;
} un;
un.ui64 = 27;
// Use un.y.d0 and un.y.d1
An example (under Linix so using different types):
#include <stdio.h>
union {
long ui64;
struct {
int d0;
int d1;
} y;
} un;
int main (void) {
un.ui64 = 27;
printf ("%d %d\n", un.y.d0, un.y.d1);
return 0;
}
This produces:
27 0
Thought I would provide an example using LARGE_INTEGER FOR the windows platform.
If I have a variable called "value" that is 64 bit, then I do:
LARGE_INTEGER li;
li.QuadPart = value;
DWORD low = li.LowPart;
DWORD high = li.HighPart;
Yes, this copies it, but I like the readability of it.
Keep in mind that 64-bit integers have alignment restrictions at least as great as 32-bit integers on all platforms. Therefore, it's perfectly safe to cast a pointer to a 64-bit integer as a pointer to a 32-bit.
ULONGLONG largeInt;
printf( "%u %u\n", ((DWORD *)&largeInt)[ 0 ], ((DWORD *)&largeInt)[ 1 ] );
Obviously, Pax's solution is a lot cleaner, but this is technically more efficient since it doesn't require any data copying.

How to enumerate process' handles?

Is there any way how to enumerate process with given PID in windows, and get list of all his opened handles(locked files, etc.)?
EDIT: I dont care about language. If it is in .NET, I'd be glad, if in WinApi (C), it won't hurt. If in something else, I think I can rewrite it :-)
I did a deep googling and found this article.
This article gave a link to download source code:
I tried method in NtSystemInfoTest.cpp ( downloaded source code ) and it worked superbly.
void ListHandles( DWORD processID, LPCTSTR lpFilter )
The code has following declaimer:
// Written by Zoltan Csizmadia, zoltan_csizmadia#yahoo.com
// For companies(Austin,TX): If you would like to get my resume, send an email.
//
// The source is free, but if you want to use it, mention my name and e-mail address
//
//////////////////////////////////////////////////////////////////////////////////////
//
I hope this helps you.
The command-line 'Handle' tool from Sysinternals does this, if you just want a tool. This won't help you if you're looking for a code solution, though.
Here is an example using ZwQueryProcessInformation from the DDK. The DDK is now known as the "WDK" and is available with MSDN. If you don't have MSDN, apparantly, you can also get it from here.
I haven't tried it, I just googled your question.
#include "ntdll.h"
#include <stdlib.h>
#include <stdio.h>
#include "ntddk.h"
#define DUPLICATE_SAME_ATTRIBUTES 0x00000004
#pragma comment(lib,"ntdll.lib")
BOOL EnablePrivilege(PCSTR name)
{
TOKEN_PRIVILEGES priv = {1, {0, 0, SE_PRIVILEGE_ENABLED}};
LookupPrivilegeValue(0, name, &priv.Privileges[0].Luid);
HANDLE hToken;
OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hToken);
AdjustTokenPrivileges(hToken, FALSE, &priv, sizeof priv, 0, 0);
BOOL rv = GetLastError() == ERROR_SUCCESS;
CloseHandle(hToken);
return rv;
}
int main(int argc, char *argv[])
{
if (argc == 1) return 0;
ULONG pid = strtoul(argv[1], 0, 0);
EnablePrivilege(SE_DEBUG_NAME);
HANDLE hProcess = OpenProcess(PROCESS_DUP_HANDLE, FALSE, pid);
ULONG n = 0x1000;
PULONG p = new ULONG[n];
while (NT::ZwQuerySystemInformation(NT::SystemHandleInformation, p, n * sizeof *p, 0)
== STATUS_INFO_LENGTH_MISMATCH)
delete [] p, p = new ULONG[n *= 2];
NT::PSYSTEM_HANDLE_INFORMATION h = NT::PSYSTEM_HANDLE_INFORMATION(p + 1);
for (ULONG i = 0; i < *p; i++) {
if (h[i].ProcessId == pid) {
HANDLE hObject;
if (NT::ZwDuplicateObject(hProcess, HANDLE(h[i].Handle), NtCurrentProcess(), &hObject,
0, 0, DUPLICATE_SAME_ATTRIBUTES)
!= STATUS_SUCCESS) continue;
NT::OBJECT_BASIC_INFORMATION obi;
NT::ZwQueryObject(hObject, NT::ObjectBasicInformation, &obi, sizeof obi, &n);
printf("%p %04hx %6lx %2x %3lx %3ld %4ld ",
h[i].Object, h[i].Handle, h[i].GrantedAccess,
int(h[i].Flags), obi.Attributes,
obi.HandleCount - 1, obi.PointerCount - 2);
n = obi.TypeInformationLength + 2;
NT::POBJECT_TYPE_INFORMATION oti = NT::POBJECT_TYPE_INFORMATION(new CHAR[n]);
NT::ZwQueryObject(hObject, NT::ObjectTypeInformation, oti, n, &n);
printf("%-14.*ws ", oti[0].Name.Length / 2, oti[0].Name.Buffer);
n = obi.NameInformationLength == 0
? MAX_PATH * sizeof (WCHAR) : obi.NameInformationLength;
NT::POBJECT_NAME_INFORMATION oni = NT::POBJECT_NAME_INFORMATION(new CHAR[n]);
NTSTATUS rv = NT::ZwQueryObject(hObject, NT::ObjectNameInformation, oni, n, &n);
if (NT_SUCCESS(rv))
printf("%.*ws", oni[0].Name.Length / 2, oni[0].Name.Buffer);
printf("\n");
CloseHandle(hObject);
}
}
delete [] p;
CloseHandle(hProcess);
return 0;
}

Resources