I'm trying to convert assembly written in AT&T syntax from a DevC++ project to inline assembly in Visual Studio.
This is the AT&T I'm trying to convert:
void Painter::drawRectangle(int surface, int x, int y, int width, int height, int red, int green, int blue) {
asm("mov %0, %%eax":: "r" (0x004EAA90));
asm("call *%eax");
asm("mov %eax, %ecx");
asm("mov (%ecx), %eax");
asm("push %0":: "m" (blue));
asm("push %0":: "m" (green));
asm("push %0":: "m" (red));
asm("push %0":: "m" (height));
asm("push %0":: "m" (width));
asm("push %0":: "m" (y));
asm("push %0":: "m" (x));
asm("push %0":: "m" (surface));
asm("call *0x14(%eax)");
}
what i've done so far:
void _drawrectangle(int surface, int x, int y, int width, int height, int red, int green, int blue)
{
__asm
{
mov eax, 0x004eaa90
call dword ptr [eax]
mov ecx, eax
mov eax, [ecx]
push blue
push green
push red
push height
push width
push y
push x
push surface
call dword ptr [eax + 0x14]
}
}
I'm writing this in my DLL, which I've already injected into the game. The game crashes on opening. And I've already hooked another drawing function in C++, which worked.
Hopefully you can help me/guide me in the right direction. Thank you.
Here's how you could write your function in C++ without the use of inline assembly:
#ifndef _MSC_VER
/* For GCC and clang */
#undef __thiscall
#define __thiscall __attribute__((thiscall))
#endif
struct some_interface {
virtual void _unknown_0() = 0;
virtual void _unknown_4() = 0;
virtual void _unknown_8() = 0;
virtual void _unknown_C() = 0;
virtual void _unknown_10() = 0;
virtual void __thiscall drawRectangle(int surface, int x, int y,
int width, int height,
int red, int green, int blue) = 0;
};
const auto get_interface = (some_interface *(*)()) 0x4EAA90;
void
drawRectangle(int surface, int x, int y, int width, int height,
int red, int green, int blue) {
get_interface()->drawRectangle(surface, x, y, width, height,
red, green, blue);
}
The code you're trying to translate first calls a function that returns a pointer to some class object with at least 6 virtual methods defined. It then calls the 6th virtual method of that object. The some_interface struct minimally recreates that class so the 6th virtual method can be called. The get_interface constant is a function pointer that points to the function located at 0x4EAA90 and in C++ function pointers can be used just like a function.
The above code generates the following assembly in GCC 8.2:
drawRectangle(int, int, int, int, int, int, int, int):
subl $12, %esp
movl $5155472, %eax
call *%eax
movl (%eax), %edx
movl %eax, %ecx
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
call *20(%edx)
addl $12, %esp
ret
and the following assembly with Visual C++ 2017:
void drawRectangle(int,int,int,int,int,int,int,int) PROC ; drawRectangle, COMDAT
mov eax, 5155472 ; 004eaa90H
call eax
push DWORD PTR _blue$[esp-4]
mov ecx, eax
push DWORD PTR _green$[esp]
mov edx, DWORD PTR [eax]
push DWORD PTR _red$[esp+4]
push DWORD PTR _height$[esp+8]
push DWORD PTR _width$[esp+12]
push DWORD PTR _y$[esp+16]
push DWORD PTR _x$[esp+20]
push DWORD PTR _surface$[esp+24]
call DWORD PTR [edx+20]
ret 0
void drawRectangle(int,int,int,int,int,int,int,int) ENDP ; drawRectangle
Related
I've developed a little program that tests the performance of 32 bit Windows structured exception handling. To keep the overhead minimal in contrast to the rest, I wrote the code generating an filtering the exception in assembly.
This is the C++-code:
#include <Windows.h>
#include <iostream>
using namespace std;
bool __fastcall getPointerFaultSafe( void *volatile *from, void **to );
int main()
{
auto getThreadTimes = []( LONGLONG &kt, LONGLONG &ut )
{
union
{
FILETIME ft;
LONGLONG ll;
} creationTime, exitTime, kernelTime, userTime;
GetThreadTimes( GetCurrentThread(), &creationTime.ft, &exitTime.ft, &kernelTime.ft, &userTime.ft );
kt = kernelTime.ll;
ut = userTime.ll;
};
LONGLONG ktStart, utStart;
getThreadTimes( ktStart, utStart );
size_t const COUNT = 100'000;
void *pv;
for( size_t c = COUNT; c; --c )
getPointerFaultSafe( nullptr, &pv );
LONGLONG ktEnd, utEnd;
getThreadTimes( ktEnd, utEnd );
double ktNsPerException = (ktEnd - ktStart) * 100.0 / COUNT,
utNsPerException = (utEnd - utStart) * 100.0 / COUNT;
cout << "kernel-time per exception: " << ktNsPerException << "ns" << endl;
cout << "user-time per exception: " << utNsPerException << "ns" << endl;
return 0;
}
This is the assembly-code:
.686P
PUBLIC ?getPointerFaultSafe##YI_NPCRAXPAPAX#Z
PUBLIC sehHandler
.SAFESEH sehHandler
sehHandler PROTO
_DATA SEGMENT
byebyeOffset dd 0
_DATA ENDS
exc_ctx_eax = 0b0h
exc_ctx_eip = 0b8h
_TEXT SEGMENT
?getPointerFaultSafe##YI_NPCRAXPAPAX#Z PROC
ASSUME ds:_DATA
push OFFSET sehHandler
push dword ptr fs:0
mov dword ptr fs:0, esp
mov byebyeOffset, OFFSET byebye - OFFSET mightfail
mov al, 1
mightfail:
mov ecx, dword ptr [ecx]
mov dword ptr [edx], ecx
byebye:
mov edx, dword ptr [esp]
mov dword ptr fs:0, edx
add esp, 8
ret 0
?getPointerFaultSafe##YI_NPCRAXPAPAX#Z ENDP
sehHandler PROC
mov eax, dword ptr [esp + 12]
mov dword ptr [eax + exc_ctx_eax], 0
mov edx, byebyeOffset
add [eax + exc_ctx_eip], edx
mov eax, 0
ret 0
sehHandler ENDP
_TEXT ENDS
END
How do I get the asm-module of my program /SAFESEH-compatible?
Why does this program consume so much userland CPU-time? The library-code being called by the operating-system after the exception has begun to be handled should have only to save all the registers in the CONTEXT-structure, fill the EXCEPTION_RECORD-structure, call the topmost exception-filter which - in this case - shifts the execution two instructions further, and when it returns it will in my case restore all the registers an continue execution according to what I returned in EAX. That's should all not be so much time that almost 1/3 of the CPU-time will be spent in userland. That's about 2,3ms, i.e. when my old Ryzen 1800X is boosting on one core with 4GHz, about 5.200 clock-cycles.
I'm using the byebyeOffset-variable in my code to carry the distance between the unsafe instruction that might generate an access-violation and the safe code afterwards. I'm initializing this variable before the unsafe instruction. But it would be nice to have this offset statically as an immediate at the point where I add it on EIP in the exception-filter function sehHandler; but the offsets are scoped to getPointerFaultSafe. Of course storing the offset and fetching it from the variable take a negligible part of the overall computation time, but it would be nicer to have a clean solution.
I was trying to reduce the clutter in my original question (below), but I am afraid that made it harder to follow. So here is the original source along with IDA's disassembly.
My question still is this: why does getStruct() pop the return argument and only the return argument off the stack? (It's calling ret 4 instead of ret for no arguments or ret 12 for all three arguments).
#include <iostream>
struct SomeStruct {
char m_buff[0x1000];
};
SomeStruct getStruct(uint32_t someArg1, uint32_t someArg2)
{
return SomeStruct();
}
int main(int argc, const char * argv[])
{
SomeStruct myLocalStruct = getStruct(0x20,0x30);
return 0;
}
; _DWORD __stdcall getStruct(unsigned int, unsigned int)
public getStruct(unsigned int, unsigned int)
getStruct(unsigned int, unsigned int) proc near ; CODE XREF: _main+4Dp
var_8 = dword ptr -8
var_4 = dword ptr -4
arg_0 = dword ptr 8
arg_4 = dword ptr 0Ch
arg_8 = dword ptr 10h
push ebp
mov ebp, esp
sub esp, 18h
mov eax, [ebp+arg_8]
mov ecx, [ebp+arg_4]
mov edx, [ebp+arg_0]
mov [ebp+var_4], ecx
mov [ebp+var_8], eax
mov eax, esp
mov [eax], edx
mov dword ptr [eax+4], 1000h
call ___bzero
add esp, 18h
pop ebp
retn 4
getStruct(unsigned int, unsigned int) endp
; ---------------------------------------------------------------------------
align 10h
; =============== S U B R O U T I N E =======================================
; Attributes: bp-based frame
; int __cdecl main(int argc, const char **argv, const char **envp)
public _main
_main proc near
var_1020 = dword ptr -1020h
var_101C = dword ptr -101Ch
var_1018 = dword ptr -1018h
var_14 = dword ptr -14h
var_10 = dword ptr -10h
var_C = dword ptr -0Ch
argc = dword ptr 8
argv = dword ptr 0Ch
envp = dword ptr 10h
push ebp
mov ebp, esp
push edi
push esi
sub esp, 1030h
mov eax, [ebp+argv]
mov ecx, [ebp+argc]
lea edx, [ebp+var_1018]
mov esi, 20h
mov edi, 30h
mov [ebp+var_C], 0
mov [ebp+var_10], ecx
mov [ebp+var_14], eax
mov [esp], edx ; ptr to destination
mov dword ptr [esp+4], 20h ; unsigned int
mov dword ptr [esp+8], 30h
mov [ebp+var_101C], esi
mov [ebp+var_1020], edi
call getStruct(uint,uint)
sub esp, 4
mov eax, 0
add esp, 1030h
pop esi
pop edi
pop ebp
retn
_main endp
Original question below:
I have some function with the following declaration:
SomeStruct getStruct(uint32_t someArg1, uint32_t someArg2);
getStruct is being called like this:
myLocalStruct = getStruct(someArg1,someArg2);
When compiling this using clang on x86 the calling code looks roughly like this:
lea esi, [ebp-myLocalStructOffset]
mov [esp], esi
mov [esp+4], someArg1
mov [esp+8], someArg2
call getStruct;
sub esp, 4;
So the caller is restoring its stack pointer after the call. Sure enough, the implementation of getStruct ends with a ret 4, effectively popping the structs pointer.
This looks like it is partially cdecl with the caller being responsible for the stack cleanup, and partially stdcall with the callee removing the arguments. I just cannot figure out what the reason is for this approach. Why not leave all the cleanup to the caller? Is there any benefit to this ?
It looks as if you forgot to quote the few lines of assembler above the part you quoted. I assume there is something like:
sub esp,12
somewhere above what you quoted. The calling convention looks like pure stdcall, and the return value is in reality passed as a hidden pointer argument, i.e. the code is in fact compiled as if you had declared:
void __stdcall getStruct(SomeStruct *returnValue, uint32_t someArg1, uint32_t someArg2);
I have hooked a exported MFC DLL function using naked function.
the definition of naked function is as follows :
__declspec(naked)
static void __cdecl GenericHook(void* __this,class CScrollViewAccess* objParam1, class CRect& objParam2,unsigned int iParam1, unsigned long iParam2, char* szParam1,
void* vParam1, class CFont* objParam3,class CFont* objParam4,
class CBrush* objParam5)
{ /*function body start*/
__asm pushad; /* first "argument", which is also used to store registers */
__asm push ecx; /* padding so that ebp+8 refers to the first "argument" */
/* set up standard prologue */
__asm push ebp;
__asm mov ebp, esp;
__asm sub esp, __LOCAL_SIZE;
if(flg == false)
{
//RECT* rct = reinterpret_cast(&objParam2);
hInst = LoadLibrary("C:\\Sample.dll"); /// MFC Dll
funcPTR = (CMYCLASS_)(((int)hInst)+((int)0x00001032));
funcPTR(__this,objParam2);
/* standard epilogue */
__asm mov esp, ebp;
__asm pop ebp;
__asm pop ecx; /* clear padding */
__asm popad; /* clear first "argument" */
__asm jmp [Trampoline];
}
/*function body end*/
The Mfc dll has following function:
void CMyClass::returnRect(class CRect& objParam)
{
int width = objParam.Width();
int height = objParam.Height();
CPoint pt = objParam.TopLeft();
FILE* fp;
char szEnter[6] = {13,0,10,0,0,0};
fp = fopen("c:\\LogFolder\\log.txt","ab+");
fprintf(fp,"Width: %d Height: %d X co-ord: %d Y co-ord: %d\n%s",width,height,pt.x,pt.y,szEnter);
fclose(fp);
}
after passing CRect& parameter to the MFC DLL the values logged are wrong.
How to process the reference object?
I have solved this hooking problem as follows:
extern "C" __declspec(naked) __declspec(dllexport) void __stdcall GenericHook()
{ /*function body start*/
/* set up standard prologue */
__asm push ebp;
__asm mov ebp, esp;
__asm pushad;
// __asm sub esp, __LOCAL_SIZE; // Grow stack size
__asm mov eax,[ebp+4]; //Return Address
__asm mov objParam1,eax;
__asm mov eax,DWORD ptr[ebp+8]; //arg1
__asm mov objParam2,eax;
__asm mov eax,DWORD ptr[ebp+12]; //arg2
__asm mov objParam3,eax;
__asm mov eax,DWORD ptr[ebp+16]; //arg3
__asm mov objParam4,eax;
__asm mov eax,DWORD ptr[ebp+20]; //arg4
__asm mov objParam5,eax;
/*-------------PROCESSING START---------------------*/
fp = fopen("c:\\LogFolder\\log.txt","ab+");
fprintf(fp,"arg1: %lu~arg2: %lu~arg3: %lu~arg4: %lu~ar5: %lu\n",objParam1,objParam2,objParam3,objParam4,objParam5);
fprintf(fp,"==========================================================================\n\n");
fclose(fp);
/*-------------PROCESSING END-----------------------*/
/* standard epilogue
__asm add esp, __LOCAL_SIZE;*/
__asm popad;
__asm mov esp, ebp;
__asm pop ebp;
__asm jmp [Trampoline];
}
test a cdecl calling convention,but it's a little confusion about this:
original C code:
int __attribute__((cdecl)) add(int a,int b)
{
int i;
i = a+b;
return i;
}
void __attribute__((cdecl)) print(int i, ...)
{
int j,a,b;
a = 2;
b = 3;
j = add(a,b);
}
void __attribute__((cdecl)) main(void)
{
print(2,3);
}
void __attribute__((cdecl)) main(void)
{
print(2,3);
}
compile code with this:
gcc test3.c -o test3 -g -mrtd
the corresponding asm like that:
(gdb) disassemble print
Dump of assembler code for function print:
0x080483cb <+0>: push ebp
0x080483cc <+1>: mov ebp,esp
0x080483ce <+3>: sub esp,0x18
0x080483d1 <+6>: mov DWORD PTR [ebp-0x8],0x2
0x080483d8 <+13>:mov DWORD PTR [ebp-0xc],0x3
0x080483df <+20>:mov eax,DWORD PTR [ebp-0xc]
0x080483e2 <+23>:mov DWORD PTR [esp+0x4],eax
0x080483e6 <+27>:mov eax,DWORD PTR [ebp-0x8]
0x080483e9 <+30>:mov DWORD PTR [esp],eax
0x080483ec <+33>:call 0x80483b4 <add>
0x080483f1 <+38>:mov DWORD PTR [ebp-0x4],eax
0x080483f4 <+41>:leave
0x080483f5 <+42>:ret
(gdb) disassemble add
Dump of assembler code for function add:
0x080483b4 <+0>: push ebp
0x080483b5 <+1>: mov ebp,esp
0x080483b7 <+3>: sub esp,0x10
0x080483ba <+6>: mov eax,DWORD PTR [ebp+0xc]
0x080483bd <+9>: mov edx,DWORD PTR [ebp+0x8]
0x080483c0 <+12>:lea eax,[edx+eax*1]
0x080483c3 <+15>:mov DWORD PTR [ebp-0x4],eax
0x080483c6 <+18>:mov eax,DWORD PTR [ebp-0x4]
0x080483c9 <+21>:leave
0x080483ca <+22>:ret
(gdb) disassemble main
Dump of assembler code for function main:
0x080483f6 <+0>: push ebp
0x080483f7 <+1>: mov ebp,esp
0x080483f9 <+3>: sub esp,0x8
0x080483fc <+6>: mov DWORD PTR [esp+0x4],0x3
0x08048404 <+14>:mov DWORD PTR [esp],0x2
0x0804840b <+21>:call 0x80483cb <print>
0x08048410 <+26>:leave
0x08048411 <+27>:ret
the cdecl convention don't add some instruction like:
add esp 8 or other similar instructions after the calling function.
why this? Thank you.
The code uses the leave instruction, which restores the stack frame. Therefore there is no need to adjust the stack pointer separately.
I have a function in VS where I pass a pointer to the function. I then want to store the pointer in a register to further manipulate. How do you do that?
I have tried
float __declspec(align(16)) x[16] =
{
0.125000, 0.125000, 0.125000, 0,
-0.125000, 0.125000, -0.125000, 0,
0.125000, -0.125000, -0.125000, 0,
-0.125000, -0.125000, 0.125000, 0
};
void e()
{
__asm mov eax, x // doesn't work
__asm mov ebx, [eax]
}
void f(float *p)
{
__asm mov eax, p // does work
__asm mov ebx, [eax]
}
int main()
{
f(x);
e();
}
Option 1 actually seems to work fine. Consider the following program:
#include <stdio.h>
void f(int *p) {
__asm mov eax, p
__asm mov ebx, [eax]
// break here
}
void main()
{
int i = 0x12345678;
f(&i);
}
With Visual Studio 2008 SP1, a single-file C++ program and debug build, I'm getting the following in the registers window when stepping into the end of f():
EAX = 004DF960
EBX = 12345678
ECX = 00000000
EDX = 00000001
ESI = 00000000
EDI = 004DF884
EIP = 003013C3
ESP = 004DF7B8
EBP = 004DF884
EFL = 00000202
Looking at the values in EAX, EBX and ESP, that looks like a pretty good evidence that you actually have the pointer you wanted in EAX. The address in EAX is just a tad higher than the one in ESP, suggesting it's one frame higher up the stack. The dereferenced value loaded into EBX suggests we got the right address.
Loading the address of a global is subtly different. The following example uses the LEA instruction to accomplish the task.
#include <stdio.h>
int a[] = { 0x1234, 0x4567 };
void main()
{
// __asm mov eax, a ; interpreted as eax <- a[0]
__asm lea eax, a ; interpreted as eax <- &a[0]
__asm mov ebx, [eax]
__asm mov ecx, [eax+4]
// break here
}
Stepping to the end of main() gets you the following register values. EAX gets the address of the first element in the array, while EBX an ECX get the values of its members.
EAX = 00157038
EBX = 00001234
ECX = 00004567
EDX = 00000001
ESI = 00000000
EDI = 0047F800
EIP = 001513C9
ESP = 0047F734
EBP = 0047F800
EFL = 00000202
The magic isn't in the LEA instruction itself. Rather, it appears that the __asm directive treats C/C++ identifiers differently depending on whether a MOV or an LEA instruction is used. Here is the ASM dump of the same program, when the MOV instruction is uncommented. Notice how the MOV instruction gets the content of a[] as its argument (DWORD PTR), while the LEA instruction gets its offset.
; ...
PUBLIC ?a##3PAHA ; a
_DATA SEGMENT
?a##3PAHA DD 01234H ; a
DD 04567H
_DATA ENDS
; ...
mov eax, DWORD PTR ?a##3PAHA
lea eax, OFFSET ?a##3PAHA
mov ebx, DWORD PTR [eax]
mov ecx, DWORD PTR [eax+4]
; ...
I'm not sure if this is correct, but have you tried casting *p to an int first and then loading that value?
void f(*p)
{
int tmp = (int)p;
// asm stuff...
}