I'm trying to implement an injection of my 64-bit DLL into a 64-bit process. My host process calls CreateRemoteThread with a thread subroutine pointing to LoadLibrary. The DLL later unloads itself "from within" by calling FreeLibraryAndExitThread.
My goal is to know if the injected LoadLibrary call succeeded. For that unfortunately I can't use GetExitCodeThread from within my (host) process since returned 64-bit HMODULE handle is truncated to by a remote thread to DWORD. And I don't want to use Tool Help APIs as they will introduce a race condition.
So thus I was wondering about the lower 32-bits of the HMODULE returned by LoadLibrary in a 64-bit process -- can I reliably assume that its lower 32-bits will not be 0's for a valid handle?
PS. I don't need the HMODULE handle itself, all I need to know if LoadLibrary succeeded or not.
Edit. The call from my host process is done as such (in a very concise pseudo-code -- no error checking):
CreateRemoteThread(hProcess, 0, 0,
GetProcAddress(GetModuleHandle(L"kernel32.dll"), "LoadLibraryW"),
pVmAddressOfMyDllsPathWrittenWith_WriteProcessMemory, 0, 0);
Can I reliably assume that its lower 32-bits will not be 0's for a valid handle?
No you cannot. An HMODULE is just the same in 64 bit as it is in 32 bit. It is the base address of the loaded module. So there is no reason why a valid HMODULE would have to have non-zero low order bits.
It's very simple for you to confirm this. Create a 64 bit DLL with an IMAGEBASE set to, for instance, 0x0000000100000000. Load that DLL, and inspect the value of the returned HMODULE.
instead CreateRemoteThread with a thread subroutine pointing to LoadLibraryW we can inject tiny shell code to remote process which first call LoadLibraryW and than, if it fail, GetLastError - as result remote thread return error code (0 if no error) - and you will be know exactly - are LoadLibrary ok and if not - have error code. the 64 asm code can be:
CONST segment
SHELLDATA struct
LoadLibrary DQ ?
GetLastError DQ ?
SHELLDATA ends
public RemoteThreadProc_begin
public RemoteThreadProc_end
RemoteThreadProc_begin:
RemoteThreadProc proc
nop
nop
nop
call ##0
___ SHELLDATA <>
##0:
xchg [rsp],rbp
sub rsp,20h
call SHELLDATA.LoadLibrary[rbp]
test rax,rax
jz ##1
xor eax,eax
##2:
add rsp,20h
pop rbp
ret
##1:
call SHELLDATA.GetLastError[rbp]
jmp ##2
RemoteThreadProc endp
RemoteThreadProc_end:
CONST ends
and c++ code:
extern "C"
{
extern UCHAR RemoteThreadProc_begin[], RemoteThreadProc_end[];
}
enum INJECT_PHASE {
fOpenProcess, fVirtualAlloc, fWriteProcessMemory, fCreateRemoteThread, fMax
};
ULONG injectDll(ULONG dwprocessId, PCWSTR dllFilePath, INJECT_PHASE& phase)
{
ULONG err = 0;
struct SHELLDATA
{
__int64 code;
PVOID LoadLibrary, GetLastError;
};
if (HANDLE hProcess = OpenProcess(PROCESS_CREATE_THREAD|PROCESS_VM_OPERATION|PROCESS_VM_WRITE, FALSE, dwprocessId))
{
SIZE_T cbStr = (wcslen(dllFilePath) + 1) * sizeof(WCHAR);
SIZE_T cbCode = ((RemoteThreadProc_end - RemoteThreadProc_begin) + sizeof(WCHAR) - 1) & ~(sizeof(WCHAR) - 1);
union {
PVOID RemoteAddress;
PBYTE pbRemote;
PTHREAD_START_ROUTINE lpStartAddress;
};
if (RemoteAddress = VirtualAllocEx(hProcess, 0, cbStr + cbCode, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
{
union {
PVOID pv;
PBYTE pb;
SHELLDATA* ps;
};
pv = alloca(cbStr + cbCode);
memcpy(pv, RemoteThreadProc_begin, cbCode);
memcpy(pb + cbCode, dllFilePath, cbStr);
HMODULE hmod = GetModuleHandle(L"kernel32");
ps->GetLastError = GetProcAddress(hmod, "GetLastError");
ps->LoadLibrary = GetProcAddress(hmod, "LoadLibraryW");
if (WriteProcessMemory(hProcess, RemoteAddress, pv, cbStr + cbCode, 0))
{
if (HANDLE hThread = CreateRemoteThread(hProcess, 0, 0, lpStartAddress, pbRemote + cbCode, 0, 0))
{
phase = fMax;
WaitForSingleObject(hThread, INFINITE);
GetExitCodeThread(hThread, &err);
CloseHandle(hThread);
}
else
{
phase = fCreateRemoteThread;
err = GetLastError();
}
}
else
{
phase = fWriteProcessMemory;
err = GetLastError();
}
VirtualFreeEx(hProcess, RemoteAddress, 0, MEM_RELEASE);
}
else
{
phase = fVirtualAlloc;
err = GetLastError();
}
CloseHandle(hProcess);
}
else
{
phase = fOpenProcess;
err = GetLastError();
}
return err;
}
Related
I am writing a utility to get the memory used by the process while getting into the function and while getting out of the function for each function. I plan to do it in the _penter and _pexit functions. Is there a simple way to get the name of the function in _penter and _pexit? I have the pdb file of my executable. Can somebody help me to get it using SymGetSymFromAddr64 and StackWalk64?
I was also looking for a solution and I couldn't find it but managed to solve it myself here is what I used
Configuration Properties > C/C++ > Command Line
Add compiler option to Additional Options box
Like so
Add flag /Gh for _penter hook
Add flag /GH for _pexit hook
Code I use for tracing / logging
#include <intrin.h>
extern "C" void __declspec(naked) __cdecl _penter(void) {
__asm {
push ebp; // standard prolog
mov ebp, esp;
sub esp, __LOCAL_SIZE
pushad; // save registers
}
// _ReturnAddress always returns the address directly after the call, but that is not the start of the function!
PBYTE addr;
addr = (PBYTE)_ReturnAddress() - 5;
SYMBOL_INFO* mysymbol;
HANDLE process;
process = GetCurrentProcess();
SymInitialize(process, NULL, TRUE);
mysymbol = (SYMBOL_INFO*)calloc(sizeof(SYMBOL_INFO) + 256 * sizeof(char), 1);
mysymbol->MaxNameLen = 255;
mysymbol->SizeOfStruct = sizeof(SYMBOL_INFO);
SymFromAddr(process, (DWORD64)((void*)addr), 0, mysymbol);
myprintf("Entered Function: %s [0x%X]\n", mysymbol->Name, addr);
_asm {
popad; // restore regs
mov esp, ebp; // standard epilog
pop ebp;
ret;
}
}
extern "C" void __declspec(naked) __cdecl _pexit(void) {
__asm {
push ebp; // standard prolog
mov ebp, esp;
sub esp, __LOCAL_SIZE
pushad; // save registers
}
// _ReturnAddress always returns the address directly after the call, but that is not the start of the function!
PBYTE addr;
addr = (PBYTE)_ReturnAddress() - 5;
SYMBOL_INFO* mysymbol;
HANDLE process;
process = GetCurrentProcess();
SymInitialize(process, NULL, TRUE);
mysymbol = (SYMBOL_INFO*)calloc(sizeof(SYMBOL_INFO) + 256 * sizeof(char), 1);
mysymbol->MaxNameLen = 255;
mysymbol->SizeOfStruct = sizeof(SYMBOL_INFO);
SymFromAddr(process, (DWORD64)((void*)addr), 0, mysymbol);
myprintf("Exit Function: %s [0x%X]\n", mysymbol->Name, addr);
_asm {
popad; // restore regs
mov esp, ebp; // standard epilog
pop ebp;
ret;
}
}
I've been trying to setup SEH on x64 windows using gcc by calling the RtlAddFunctionTable. Unfortunately, the API call returns success but my handler doesn't seem to ever be called. And I can't find out what's wrong. My small example is:
EXCEPTION_DISPOSITION catchDivZero( struct _EXCEPTION_RECORD* rec
, void* arg1 __attribute__((unused))
, struct _CONTEXT* ctxt __attribute__((unused))
, void* arg2 __attribute__((unused))
)
{
printf("Exception will be handled!\n");
return ExceptionContinueSearch;
}
HMODULE GetCurrentModule()
{ // NB: XP+ solution!
HMODULE hModule = NULL;
GetModuleHandleEx(
GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
(LPCTSTR)GetCurrentModule,
&hModule);
return hModule;
}
typedef struct {
UINT8 Version : 3;
UINT8 Flags : 5;
UINT8 SizeOfProlog;
UINT8 CountOfUnwindCodes;
UINT8 FrameRegister : 4;
UINT8 FrameRegisterOffset : 4;
ULONG ExceptionHandler;
} UNWIND_INFO;
/* Hack, for bug in ld. Will be removed soon. */
#if defined(__GNUC__)
#define __ImageBase __MINGW_LSYMBOL(_image_base__)
#endif
/* Get the end of the text section. */
extern char etext[] asm("etext");
/* Get the base of the module. */
/* This symbol is defined by ld. */
extern IMAGE_DOS_HEADER __ImageBase;
static UNWIND_INFO info[1];
static RUNTIME_FUNCTION handlers[1];
#define base (ULONG)((HINSTANCE)&__ImageBase)
int main()
{
HANDLE hProcess = GetCurrentProcess();
HMODULE hModule = GetCurrentModule();
MODULEINFO mi;
GetModuleInformation(hProcess, hModule, &mi, sizeof(mi));
printf( "Module: 0x%.8X (0x%.8X) 0x%.8X |0x%.8X| [0x%.8X] {0x%.8X}\n\n"
, mi.lpBaseOfDll
, base
, (char*)etext
, mi.SizeOfImage
, &catchDivZero
, (ULONG)(&catchDivZero - base)
);
printf("Building UNWIND_INFO..\n");
info[0].Version = 1;
info[0].Flags = UNW_FLAG_EHANDLER;
info[0].SizeOfProlog = 0;
info[0].CountOfUnwindCodes = 0;
info[0].FrameRegister = 0;
info[0].FrameRegisterOffset = 0;
info[0].ExceptionHandler = (ULONG)(&catchDivZero - base);
printf("Created UNWIND_INFO at {0x%.8X}\n", info[0].ExceptionHandler);
printf("Building SEH handlers...\n");
handlers[0].BeginAddress = 0;
handlers[0].EndAddress = (ULONG)(etext - base);
handlers[0].UnwindData = (ULONG)((char*)info - base);
printf("Adding SEH handlers to .pdata..\n");
printf("Handler Unwind: 0x%.8X\n", &info);
printf( "Handler Info:: s: 0x%.8X, e: 0x%.8X, u: 0x%.8X\n"
, handlers[0].BeginAddress
, handlers[0].EndAddress
, handlers[0].UnwindData
);
if (RtlAddFunctionTable(handlers, 1, (DWORD64)base))
{
printf("Hook succeeded.\nTesting..\n");
printf("Things to do: %i\n", 12 / 0);
}
else
{
printf("Hook failed\n");
DWORD result = GetLastError();
printf("Error code: 0x%.8X\n", result);
}
}
However when called the output I get is:
> .\a.exe
Module: 0x00400000 (0x00400000) 0x00402FF0 |0x00022000| [0x00401530] {0x00001530}
Building UNWIND_INFO..
Created UNWIND_INFO at {0x00001530}
Building SEH handlers...
Adding SEH handlers to .pdata..
Handler Unwind: 0x00407030
Handler Info:: s: 0x00000000, e: 0x00002FF0, u: 0x00007030
Hook succeeded.
Testing..
The message in my handler is never printed.
Any help/pointers would be greatly appreciated.
RtlAddFunctionTable() adds a dynamic function table; if there already is a static function table (.pdata section) for the base address, the RtlAddFunctionTable() calls succeeds, but the static function table still takes precedence.
You need to allocate memory outside the image range, e.g. using VirtualAlloc(), and have your code and runtime table and unwind info there. The address of allocated memory is the base address for all the RVAs in the tables, and needs to be passed to RtlAddFunctionTable().
You can experiment with RtlLookupFunctionEntry() to see if the function table entry is found for a given address.
Sample code showing RtlAddFunctionTable() in action is at https://pmeerw.net/blog/programming/RtlAddFunctionTable.html.
Didn't you forget to register your handler with call to SetUnhandledExceptionFilter (if you use SEH as stated in your post) or AddVectoredExceptionHandler (if you decide to switch to VEH)? In your code you add information about the handler but do not register it.
I have tested your sample with the change of the handler itself:
LONG WINAPI catchDivZero(EXCEPTION_POINTERS * ExceptionInfo)
{
printf("Exception will be handled!\n");
return ExceptionContinueSearch;
}
and adding the code:
if (::AddVectoredExceptionHandler(TRUE, catchDivZero))
{
printf("Set exception handler.\nContinuing..\n");
}
else
{
printf("Setting exception handler failed\n");
DWORD result = GetLastError();
printf("Error code: 0x%.8X\n", result);
return 1;
}
just before the call to RtlAddFunctionTable.
Now the message from the handler is printed.
To remove the handler use:
::RemoveVectoredExceptionHandler(catchDivZero);
Hope it helps.
Note: as an alternative you may use SetUnhandledExceptionFilter(catchDivZero)). Keep in mind that it's not that useful for debugging:
After calling this function, if an exception occurs in a process that
is not being debugged, and the exception makes it to the unhandled
exception filter, that filter will call the exception filter function
specified by the lpTopLevelExceptionFilter parameter.
With VEH way we can debug the handler function right in IDE, with SEH we can not (there is probably a solution to this but I do not know about it) so I've proposed VEH as the main solution.
I wrote a simple application on Qt4 that modifier network adapter parameters, for that I have a slot called setInterfaceParams, implemented as so:
DWORD WinNetInterface::setInterfaceParams(QString index, QString ip, QString netmask, QString gateway)
{
DWORD res = NULL;
HINSTANCE lib = (HINSTANCE) LoadLibrary((WCHAR *)"iphlpapi.dll");
_SetAdapterIpAddress SetAdapterIpAddress = (_SetAdapterIpAddress) GetProcAddress(lib, "SetAdapterIpAddress");
PWSTR pszGUID = NULL;
//char *szGUID = (char *)index.toStdString().c_str();
QByteArray a = index.toLocal8Bit();
char *szGUID = a.data();
WideCharToMultiByte(CP_ACP, 0, pszGUID, -1, szGUID, sizeof(szGUID), NULL, NULL);
// Method 01
res = SetAdapterIpAddress(szGUID,
0,
inet_addr(ip.toStdString().c_str()),
inet_addr(netmask.toStdString().c_str()),
inet_addr(gateway.toStdString().c_str()));
// End of method 01
// Method 02
/*res = SetAdapterIpAddress("{422C5689-A17B-402D-A6A2-22CE13E857B5}",
0,
inet_addr("192.168.1.10"),
inet_addr("255.255.255.0"),
inet_addr("192.168.1.1"));*/
// End of method 02
return res;
}
When I click on button that connected to slot setInterfaceParams, I get segmentation fault. If I comment method01, nothing happen, the some thing happen when I use method02.
I tried this function on a simple c++ application and it is work fine, test on Windows XP SP3.
#include <windows.h>
#include <winsock2.h>
#include <iphlpapi.h>
#include <stdio.h>
#include <iostream>
typedef DWORD (WINAPI *_SetAdapterIpAddress )(char *szAdapterGUID,
DWORD dwDHCP,
DWORD dwIP,
DWORD dwMask,
DWORD dwGateway);
int main()
{
HINSTANCE lib = (HINSTANCE) LoadLibrary("iphlpapi.dll");
_SetAdapterIpAddress SetAdapterIpAddress = (_SetAdapterIpAddress) GetProcAddress(lib, "SetAdapterIpAddress");
PWSTR pszGUID = NULL;
char szGUID[] = "{422C5689-A17B-402D-A6A2-22CE13E857B5}";
DWORD dwSize = 0;
WideCharToMultiByte(CP_ACP, 0, pszGUID, -1, szGUID, sizeof(szGUID), NULL, NULL);
DWORD res = SetAdapterIpAddress(szGUID,
0,
inet_addr("192.168.1.10"),
inet_addr("255.255.255.0"),
inet_addr("192.168.1.1"));
std::cout << res;
return 0;
}
LoadLibrary((WCHAR *)"iphlpapi.dll");
That can't work, the literal string is in 8-bits, casting it without real conversion doesn't make it wide, so the dll loading probably failed.
You should use the TEXT or _T macro around most of the literal strings passed to WinAPI functions to make them regular or wide depending on the compilation options:
LoadLibrary(_T("iphlpapi.dll"));
which will translate to either LoadLibrary("iphlpapi.dll"); or LoadLibrary(L"iphlpapi.dll");.
Also you should always check the value returned by the LoadLibrary and GetProcAddress functions, which return NULL if the call is unsuccessful.
I've been trying to write a program in C++ that will monitor running processes in the background and terminate a certain one if it's detected to be running. I have written a program that will do so, however the only way I can think of to do this is to use an infinite WHILE loop that keeps checking for the program. This, as you can imagine, constantly uses CPU power and resources to be constantly looping. In task manager, you can see that most processes that are running are always using 0% of the CPU. My question is: How can I write or modify this program to run in the background, utilizing 0% of the CPU until it detects the process it's supposed to terminate?
My entire program is below. In this example, I have used "Notepad.exe" in WinMain as the process the program should be terminating.
#include <Windows.h>
#include <iostream>
#include <tlhelp32.h>
#include <string>
#define TA_FAILED 0
#define TA_SUCCESS_CLEAN 1
#define TA_SUCCESS_KILL 2
DWORD WINAPI TerminateApp(DWORD dwPID, DWORD dwTimeout);
DWORD WINAPI Terminate16App(DWORD dwPID, DWORD dwThread, WORD w16Task, DWORD dwTimeout);
typedef struct {
DWORD dwID;
DWORD dwThread;
} TERMINFO;
BOOL CALLBACK TerminateAppEnum( HWND hwnd, LPARAM lParam ) ;
DWORD WINAPI TerminateApp( DWORD dwPID, DWORD dwTimeout ) {
HANDLE hProc ;
DWORD dwRet ;
// If we can't open the process with PROCESS_TERMINATE rights,
// then we give up immediately.
hProc = OpenProcess(SYNCHRONIZE|PROCESS_TERMINATE, FALSE,
dwPID);
if(hProc == NULL) {
return TA_FAILED ;
}
// TerminateAppEnum() posts WM_CLOSE to all windows whose PID
// matches your process's.
EnumWindows((WNDENUMPROC)TerminateAppEnum, (LPARAM) dwPID) ;
// Wait on the handle. If it signals, great. If it times out,
// then you kill it.
if(WaitForSingleObject(hProc, dwTimeout)!=WAIT_OBJECT_0)
dwRet=(TerminateProcess(hProc,0)?TA_SUCCESS_KILL:TA_FAILED);
else
dwRet = TA_SUCCESS_CLEAN ;
CloseHandle(hProc) ;
return dwRet ;
}
BOOL CALLBACK TerminateAppEnum( HWND hwnd, LPARAM lParam ) {
DWORD dwID ;
GetWindowThreadProcessId(hwnd, &dwID) ;
if(dwID == (DWORD)lParam) {
PostMessage(hwnd, WM_CLOSE, 0, 0) ;
}
return TRUE ;
}
DWORD FindProcessId(const std::string& processName);
int WINAPI WinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR szCmdLine, int iCmdShow) {
std::string process1 = "Notepad.exe";
while (1) {
TerminateApp(FindProcessId(process1),0);
}
return 0;
}
DWORD FindProcessId(const std::string& processName) {
PROCESSENTRY32 processInfo;
processInfo.dwSize = sizeof(processInfo);
HANDLE processesSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, NULL);
if (processesSnapshot == INVALID_HANDLE_VALUE) {
return 0;
}
Process32First(processesSnapshot, &processInfo);
if (!processName.compare(processInfo.szExeFile)) {
CloseHandle(processesSnapshot);
return processInfo.th32ProcessID;
}
while (Process32Next(processesSnapshot, &processInfo)) {
if (!processName.compare(processInfo.szExeFile)) {
CloseHandle(processesSnapshot);
return processInfo.th32ProcessID;
}
}
CloseHandle(processesSnapshot);
return 0;
}
You can use WMI and event notification to find when processes are created and destroyed. __InstanceCreationEvent is what you need to look for.
Creation of a resource: __InstanceCreationEvent
Suppose you are interested in receiving a notification if Notepad is run on a certain computer. When Notepad runs, a corresponding process is created. Processes can be managed by using WMI and are represented by the Win32_Process class. When Notepad starts running, a corresponding instance of the Win32_Process class becomes available through WMI. If you have registered your interest in this event (by issuing the appropriate event notification query), the availability of this instance results in the creation of an instance of the __InstanceCreationEvent class.
I have looked at the various topics relating to this, but couldn't find this specific issue I am having.
Things I looked at:
Injecting code into executable at runtime
C SIGSEGV Handler & Mprotect
Can I write-protect every page in the address space of a Linux process?
How to write a signal handler to catch SIGSEGV?
I am able to handle SIGSEGV gracefully when the protection needs to be set to either PROT_READ or PROT_WRITE in the handler. However, when I try to inject instructions with mmap, and then use mprotect to set it to PROT_READ only, and then I execute the instructions via inline assembly, it causes a SIGSEGV as intended, but the handler is unable to get the originating address causing the signal, so I am unable to mprotect it to PROT_READ | PROT_EXEC.
Example:
void sigHandler(int signum, siginfo_t *info, void *ptr) {
printf("Received signal number: %d\n", signum);
printf("Signal originates from process %lu\n",
(unsigned long)info->si_pid);
printf("SIGSEGV caused by this address: ? %p\n", info->si_addr);
char * alignedbaseAddr = (((unsigned int)(info->si_addr)) >> 12) * getPageSize();
printf("Aligning to %p\n", alignedbaseAddr);
//flip this page to be r+x
mprotect(alignedbaseAddr, getPageSize(), PROT_READ | PROT_EXEC);
}
void setupSignalHandler() {
action.sa_sigaction = sigHandler;
action.sa_flags = SA_SIGINFO;
sigemptyset(&action.sa_mask);
sigaction(SIGSEGV, &action, NULL);
}
int main(int argc, char *argv[]) {
char * baseAddr = (char*)mmap(NULL, getDiskSize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(baseAddr == MAP_FAILED) {
perror("Unable to mmap.");
}
printf("Process address space is %d\n", getDiskSize());
//no-op filler
for(int i = 0; i < (getDiskSize()) - 1; i++) {
baseAddr[i] = 0x90;
}
//ret instruction
baseAddr[i] = 0xc3;
if( mprotect(baseAddr, getDiskSize(), PROT_READ) == -1) {
perror("mprotect");
exit(1);
}
printf("Protecting addresses: %p to %p for READ_ONLY\n", baseAddr, baseAddr + getDiskSize() - 1);
setupSignalHandler();
__asm__
(
"call %%eax;"
: "=a" (output)
: "a" (baseAddr)
);
printf("Will this ever print?");
//close fd, and unmap memory
cleanUp();
return EXIT_SUCCESS;
}
Here is the resulting output:
Received signal number: 11
Signal originates from process 0
SIGSEGV caused by this address: ? (nil)
//the above output repeatedly loops, since it fails to "re mprotect" that page.
Architecture:
x86 32 bit
OS:
Ubuntu 11.04 - Linux version 2.6.38-12-generic (buildd#vernadsky) (gcc version 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4) )
Any ideas? The above logic works fine for simply read and writing into memory. Is there
a better way to execute instructions at runtime as opposed to inline assembly?
Thanks in advance!
In that case, the faulting address is the instruction pointer. Cast your third argument ptr (of your signal handler installed with SA_SIGINFO) to a ucontext_t, and retrieve the appropriate register, perhaps as (untested code!)
ucontext_t *uc = ptr;
void* faultyip = uc->uc_mcontext.gregs[REG_IP];
Read carefully /usr/include/sys/ucontext.h for more.
I'm interested to know why you are asking!!