Trying to convert this Assembly code to C code - gcc

# x at %ebp+8, n at %ebp+12
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
movl $-1, %edi
movl $1, %edx
.L2:
movl %edx, %eax
andl %esi, %eax
xorl %eax, %edi
movl %ebx, %ecx
sall %cl, %edx
testl %edx, %edx
jne .L2
movl %edi, %eax
I converted the above code to the below code, but i am not completely sure if it is correct.
int loop(int x, int n){
int result = -1;
for (mask = 1; mask >= result; mask = x&1) {
result ^= n;
}
return result;
}
x and n are two integers stored in %ebp memory and are moved to registry %esi and %ebx. Result and Mask have a value of -1 and 1 and that is from the first part of the code
I think after .L2: the loop starts and that is where i get confused.
At the end result is returned movl %edi, %eax

Your code is completely wrong. You should have done some test by yourself before posting quesetions.
First of all, mask in your code is not declared in your function.
Then, after declareing mask as int, the function loop will fall into an infinite loop when result won't become positive via result ^= n;. On the other hand, the assembly code won't fall into an infinite loop unless n is multiple of 32 (including zero).
To convert the code to assembly:
1. I did direct conversion from assembly to C.
Note that I used unsigned type uint32_t because
Use unsigned type because left shift operation to signed integer will cause undefined behavior when overflow occures or the value to be shifted is negative.
Use uint32_t because size of unsigned int is dependent to environments and it may be less than 32-bit long while registers used here (except for %cl) are 32-bit long.
Quote from N1570 6.5.7 Bitwise shift operators:
4 The result of E1 << E2 is E1 left-shifted E2 bit positions; vacated bits are filled with zeros. If E1 has an unsigned type, the value of the result is E1 × 2E2, reduced modulo one more than the maximum value representable in the result type. If E1 has a signed type and nonnegative value, and E1 × 2E2 is representable in the result type, then that is the resulting value; otherwise, the behavior is undefined.
Also note that stdint.h or inttypes.h has to be included to use uint32_t.
The width to shift is masked to 5-bit long in x86 CPUs that is 80286 or later.
uint32_t loop(uint32_t x, uint32_t n) {
uint32_t esi = x; /* movl 8(%ebp), %esi */
uint32_t ebx = n; /* movl 12(%ebp), %ebx */
uint32_t edi = -1; /* movl $-1, %edi */
uint32_t edx = 1; /* movl $1, %edx */
uint32_t eax, ecx;
do { /* .L2: */
eax = edx; /* movl %edx, %eax */
eax &= esi; /* andl %esi, %eax */
edi ^= eax; /* xorl %eax, %edi */
ecx = ebx; /* movl %ebx, %ecx */
edx <<= (ecx & 0xff) & 31; /* sall %cl, %edx */
} while (edx != 0); /* testl %edx, %edx ; jne .L2 */
eax = edi; /* movl %edi, %eax */
return eax;
}
2. I introduced variable names to make their roles clear.
uint32_t loop(uint32_t x, uint32_t n) {
uint32_t result = -1;
uint32_t mask = 1;
uint32_t eax, ecx;
do {
eax = mask;
eax &= x;
result ^= eax;
ecx = n;
mask <<= (ecx & 0xff) & 31;
} while (mask != 0);
return result;
}
3. I merged some expressions.
uint32_t loop(uint32_t x, uint32_t n) {
uint32_t result = -1;
uint32_t mask = 1;
do {
result ^= mask & x;
mask <<= n & 31;
} while (mask != 0);
return result;
}
4. I changed do loop to for loop because your attempt uses it.
uint32_t loop(uint32_t x, uint32_t n) {
uint32_t result = -1;
uint32_t mask;
for (mask = 1; mask != 0; mask <<= n & 31) {
result ^= mask & x;
}
return result;
}
Full code for testing and demo:
#include <stdio.h>
#include <inttypes.h>
#include <limits.h>
__asm__ (
/* support both environments that does and doesn't add underscore before function name */
"loop_asm:\n"
"_loop_asm:\n"
"push %ebp\n"
"mov %esp, %ebp\n"
"push %esi\n"
"push %edi\n"
"push %ebx\n"
"# x at %ebp+8, n at %ebp+12\n"
"movl 8(%ebp), %esi\n"
"movl 12(%ebp), %ebx\n"
"movl $-1, %edi\n"
"movl $1, %edx\n"
".L2_test:\n" /* rename .L2 to .L2_test to avoid collision */
"movl %edx, %eax\n"
"andl %esi, %eax\n"
"xorl %eax, %edi\n"
"movl %ebx, %ecx\n"
"sall %cl, %edx\n"
"testl %edx, %edx\n"
"jne .L2_test\n"
"movl %edi, %eax\n"
"pop %ebx\n"
"pop %edi\n"
"pop %esi\n"
"leave\n"
"ret\n"
);
uint32_t loop_asm(uint32_t, uint32_t);
uint32_t loop_convert(uint32_t x, uint32_t n) {
uint32_t result = -1;
uint32_t mask;
for (mask = 1; mask != 0; mask <<= n & 31) {
result ^= mask & x;
}
return result;
}
int mask;
int loop(int x, int n){
int result = -1;
for (mask = 1; mask >= result; mask = x&1) {
result ^= n;
}
return result;
}
int main(void) {
int x, n;
uint32_t raw, test, conv;
int miss_count = 0;
/* search for mismatch in some range */
for (n = 1; n < 32; n++) {
uint32_t x_test;
for (x_test = 0; x_test < UINT32_C(100000); x_test++) {
if (loop_asm(x, n) != loop_convert(x, n)) {
printf("mismatch at x=%"PRIu32", n=%d\n", x_test, n);
if (miss_count < INT_MAX) miss_count++;
}
}
}
printf("%d mismatch(es) found.\n", miss_count);
/* print some examples */
x = 100;
n = 5;
raw = loop_asm(x, n);
conv = loop_convert(x, n);
printf("loop_asm(%d, %d) = %"PRIu32"\n", x, n, raw);
printf("loop_convert(%d, %d) = %"PRIu32"\n", x, n, conv);
fflush(stdout);
test = loop(x, n);
printf("loop(%d, %d) = %"PRIu32"\n", x, n, test);
return 0;
}

Related

How do I enable /SAFESEH with assemly-code / SEH-performance

I've developed a little program that tests the performance of 32 bit Windows structured exception handling. To keep the overhead minimal in contrast to the rest, I wrote the code generating an filtering the exception in assembly.
This is the C++-code:
#include <Windows.h>
#include <iostream>
using namespace std;
bool __fastcall getPointerFaultSafe( void *volatile *from, void **to );
int main()
{
auto getThreadTimes = []( LONGLONG &kt, LONGLONG &ut )
{
union
{
FILETIME ft;
LONGLONG ll;
} creationTime, exitTime, kernelTime, userTime;
GetThreadTimes( GetCurrentThread(), &creationTime.ft, &exitTime.ft, &kernelTime.ft, &userTime.ft );
kt = kernelTime.ll;
ut = userTime.ll;
};
LONGLONG ktStart, utStart;
getThreadTimes( ktStart, utStart );
size_t const COUNT = 100'000;
void *pv;
for( size_t c = COUNT; c; --c )
getPointerFaultSafe( nullptr, &pv );
LONGLONG ktEnd, utEnd;
getThreadTimes( ktEnd, utEnd );
double ktNsPerException = (ktEnd - ktStart) * 100.0 / COUNT,
utNsPerException = (utEnd - utStart) * 100.0 / COUNT;
cout << "kernel-time per exception: " << ktNsPerException << "ns" << endl;
cout << "user-time per exception: " << utNsPerException << "ns" << endl;
return 0;
}
This is the assembly-code:
.686P
PUBLIC ?getPointerFaultSafe##YI_NPCRAXPAPAX#Z
PUBLIC sehHandler
.SAFESEH sehHandler
sehHandler PROTO
_DATA SEGMENT
byebyeOffset dd 0
_DATA ENDS
exc_ctx_eax = 0b0h
exc_ctx_eip = 0b8h
_TEXT SEGMENT
?getPointerFaultSafe##YI_NPCRAXPAPAX#Z PROC
ASSUME ds:_DATA
push OFFSET sehHandler
push dword ptr fs:0
mov dword ptr fs:0, esp
mov byebyeOffset, OFFSET byebye - OFFSET mightfail
mov al, 1
mightfail:
mov ecx, dword ptr [ecx]
mov dword ptr [edx], ecx
byebye:
mov edx, dword ptr [esp]
mov dword ptr fs:0, edx
add esp, 8
ret 0
?getPointerFaultSafe##YI_NPCRAXPAPAX#Z ENDP
sehHandler PROC
mov eax, dword ptr [esp + 12]
mov dword ptr [eax + exc_ctx_eax], 0
mov edx, byebyeOffset
add [eax + exc_ctx_eip], edx
mov eax, 0
ret 0
sehHandler ENDP
_TEXT ENDS
END
How do I get the asm-module of my program /SAFESEH-compatible?
Why does this program consume so much userland CPU-time? The library-code being called by the operating-system after the exception has begun to be handled should have only to save all the registers in the CONTEXT-structure, fill the EXCEPTION_RECORD-structure, call the topmost exception-filter which - in this case - shifts the execution two instructions further, and when it returns it will in my case restore all the registers an continue execution according to what I returned in EAX. That's should all not be so much time that almost 1/3 of the CPU-time will be spent in userland. That's about 2,3ms, i.e. when my old Ryzen 1800X is boosting on one core with 4GHz, about 5.200 clock-cycles.
I'm using the byebyeOffset-variable in my code to carry the distance between the unsafe instruction that might generate an access-violation and the safe code afterwards. I'm initializing this variable before the unsafe instruction. But it would be nice to have this offset statically as an immediate at the point where I add it on EIP in the exception-filter function sehHandler; but the offsets are scoped to getPointerFaultSafe. Of course storing the offset and fetching it from the variable take a negligible part of the overall computation time, but it would be nicer to have a clean solution.

AT&T to inline asm in Visual Studio 2017

I'm trying to convert assembly written in AT&T syntax from a DevC++ project to inline assembly in Visual Studio.
This is the AT&T I'm trying to convert:
void Painter::drawRectangle(int surface, int x, int y, int width, int height, int red, int green, int blue) {
asm("mov %0, %%eax":: "r" (0x004EAA90));
asm("call *%eax");
asm("mov %eax, %ecx");
asm("mov (%ecx), %eax");
asm("push %0":: "m" (blue));
asm("push %0":: "m" (green));
asm("push %0":: "m" (red));
asm("push %0":: "m" (height));
asm("push %0":: "m" (width));
asm("push %0":: "m" (y));
asm("push %0":: "m" (x));
asm("push %0":: "m" (surface));
asm("call *0x14(%eax)");
}
what i've done so far:
void _drawrectangle(int surface, int x, int y, int width, int height, int red, int green, int blue)
{
__asm
{
mov eax, 0x004eaa90
call dword ptr [eax]
mov ecx, eax
mov eax, [ecx]
push blue
push green
push red
push height
push width
push y
push x
push surface
call dword ptr [eax + 0x14]
}
}
I'm writing this in my DLL, which I've already injected into the game. The game crashes on opening. And I've already hooked another drawing function in C++, which worked.
Hopefully you can help me/guide me in the right direction. Thank you.
Here's how you could write your function in C++ without the use of inline assembly:
#ifndef _MSC_VER
/* For GCC and clang */
#undef __thiscall
#define __thiscall __attribute__((thiscall))
#endif
struct some_interface {
virtual void _unknown_0() = 0;
virtual void _unknown_4() = 0;
virtual void _unknown_8() = 0;
virtual void _unknown_C() = 0;
virtual void _unknown_10() = 0;
virtual void __thiscall drawRectangle(int surface, int x, int y,
int width, int height,
int red, int green, int blue) = 0;
};
const auto get_interface = (some_interface *(*)()) 0x4EAA90;
void
drawRectangle(int surface, int x, int y, int width, int height,
int red, int green, int blue) {
get_interface()->drawRectangle(surface, x, y, width, height,
red, green, blue);
}
The code you're trying to translate first calls a function that returns a pointer to some class object with at least 6 virtual methods defined. It then calls the 6th virtual method of that object. The some_interface struct minimally recreates that class so the 6th virtual method can be called. The get_interface constant is a function pointer that points to the function located at 0x4EAA90 and in C++ function pointers can be used just like a function.
The above code generates the following assembly in GCC 8.2:
drawRectangle(int, int, int, int, int, int, int, int):
subl $12, %esp
movl $5155472, %eax
call *%eax
movl (%eax), %edx
movl %eax, %ecx
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
pushl 44(%esp)
call *20(%edx)
addl $12, %esp
ret
and the following assembly with Visual C++ 2017:
void drawRectangle(int,int,int,int,int,int,int,int) PROC ; drawRectangle, COMDAT
mov eax, 5155472 ; 004eaa90H
call eax
push DWORD PTR _blue$[esp-4]
mov ecx, eax
push DWORD PTR _green$[esp]
mov edx, DWORD PTR [eax]
push DWORD PTR _red$[esp+4]
push DWORD PTR _height$[esp+8]
push DWORD PTR _width$[esp+12]
push DWORD PTR _y$[esp+16]
push DWORD PTR _x$[esp+20]
push DWORD PTR _surface$[esp+24]
call DWORD PTR [edx+20]
ret 0
void drawRectangle(int,int,int,int,int,int,int,int) ENDP ; drawRectangle

What is wrong with mmap system-call on Mac OS X?

I am trying to write a simple application on Mac OS X using only syscalls, no standard library.
main.c
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define MAP_ANONYMOUS 0x20
#define MAP_PRIVATE 0x02
#define PAGE_SIZE 4096
#define NULL 0
#define STDOUT 1
#define SYSCALL_BASE 0x2000000
#define SYSCALL_GET(num) SYSCALL_BASE + num
long long syscall(long long arg1, long long arg2, long long arg3, long long arg4, long long arg5, long long arg6, long long cn);
void exit(long long status) {
syscall(status, 0, 0, 0, 0, 0, SYSCALL_GET(1));
}
long long write(long long fd, char *buf, long long len) {
return syscall(fd, buf, len, 0, 0, 0, SYSCALL_GET(4));
}
void *mmap(void *addr, long long length, long long prot, long long flags, long long fd, long long offset) {
return syscall(addr, length, prot, flags, fd, offset, SYSCALL_GET(197));
}
long long munmap(void *addr, long long length) {
return syscall(addr, length, 0, 0, 0, 0, SYSCALL_GET(73));
}
int strlen(char *s) {
int len = 0;
while (*(s++) != '\0') {
len++;
}
return len;
}
int putchar(char c) {
return write(STDOUT, &c, 1);
}
int main(int argc, char *argv[]) {
if (argc <= 1) {
return 0;
}
int *lengths = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
for (int i = 1; i < argc; i++) {
lengths[i] = strlen(argv[i]);
}
for (int i = 1; i < argc; i++) {
write(STDOUT, argv[i], lengths[i]);
putchar(' ');
}
putchar('\n');
munmap(lengths, PAGE_SIZE);
return 0;
}
start.s
.global start
.global _syscall
.text
start:
popq %rdi
movq %rsp, %rsi
andq $0xfffffffffffffff0, %rsp
call _main
movq %rax, %rdi
call _exit
_syscall:
movq %rcx, %r10
movq 8(%rsp), %rax
pushq %rbx # alignment
syscall
popq %rbx
retq
As you can see, the application basically reflects simple echo. When I run the program with no arguments, it successfully finishes, so I assume exit call works. But when I run it with any argument, it crashes with Segmentation fault: 11. As far as I understand it now, when mmap is called, kernel returns strange value: 9. I assume that 9 is not a proper address, but I cannot understand my mistake, because according to documentations, all the values passed to syscall are correct. Syscall numbers are taken from here.
I would like to know too.
I think apple uses MAP_ANON, which is a different number. Try
#define MAP_ANON 0x1000
https://github.com/nneonneo/osx-10.9-opensource/blob/master/xnu-2422.1.72/bsd/sys/mman.h#L150

How to view disassembly for optimized speed builds in Visual Studio

I have a C program in Visual Studio where I would like to compare the disassembly between the standard debug build and a build where speed is optimized.
For standard debug things work perfectly, I set a breakpoint and then right-click "Goto Dissassembly" to view the equivalent assembly code.
However, when I try to do that when the optimized for speed switch is enabled, I only see part of the assembly.
My procedure for creating an optimized speed build is (staying in the debug build configuration):
From the project properties -> C/C++->Optimization change Optimization from disabled /Od to Optimize for maximize speed /O2 and changing favor Size or Speed to 'Favor Fast Code /Ot)
Under Code Generation ->Basic Runtime checks change to 'Default'
Note: I decided to modify my debug configuration and not do a full optimization (which includes size) because I wanted to preserve my code symbols.
As part of my investigation, I created a trivial program for testing. Here is the source:
#include<stdio.h>
int main()
{
int x = 0;
int y = 0;
printf("The value of x is %d\n", x);
while (1)
{
++x;
if (x > 1000000)
break;
}
y = x;
printf("The value of y is %d\n", y);
return 0;
}
In full debug mode here is my disassembly: Note: I can easily see what is happening in the WHILE LOOP
int x = 0;
00CF1779 mov dword ptr [x],0
int y = 0;
00CF1780 mov dword ptr [y],0
printf("The value of x is %d\n", x);
00CF1787 mov eax,dword ptr [x]
00CF178A push eax
00CF178B push offset string "The value of x is %d\n" (0CF6B30h)
00CF1790 call _printf (0CF1320h)
00CF1795 add esp,8
while (1)
00CF1798 mov eax,1
00CF179D test eax,eax
00CF179F je main+47h (0CF17B7h)
{
++x;
00CF17A1 mov eax,dword ptr [x]
00CF17A4 add eax,1
00CF17A7 mov dword ptr [x],eax
if (x > 1000000)
00CF17AA cmp dword ptr [x],0F4240h
00CF17B1 jle main+45h (0CF17B5h)
break;
00CF17B3 jmp main+47h (0CF17B7h)
}
00CF17B5 jmp main+28h (0CF1798h)
y = x;
00CF17B7 mov eax,dword ptr [x]
00CF17BA mov dword ptr [y],eax
printf("The value of y is %d\n", y);
00CF17BD mov eax,dword ptr [y]
00CF17C0 push eax
00CF17C1 push offset string "The value of y is %d\n" (0CF6BE8h)
00CF17C6 call _printf (0CF1320h)
00CF17CB add esp,8
return 0;
Here is the disassembly from the optimized speed build - note there is no code shown for the while loop routine
int x = 0;
int y = 0;
printf("The value of x is %d\n", x);
00EE16F0 push 0
00EE16F2 push offset string "The value of x is %d\n" (0EE6B30h)
00EE16F7 call _printf (0EE1320h)
while (1)
{
++x;
if (x > 1000000)
break;
}
y = x;
printf("The value of y is %d\n", y);
00EE16FC push 0F4241h
00EE1701 push offset string "The value of y is %d\n" (0EE6BE8h)
00EE1706 call _printf (0EE1320h)
00EE170B add esp,10h
return 0;
00EE170E xor eax,eax
}
00EE1710 ret
Both builds output the same values for x and y in the printf statements.
Does anyone know how to solve my problem?

Assembler + WinApi MapViewOfFile

I ve got a little problem with using MapViewOfFile. This function returns the starting address of the mapped view so as I think it's a sequence of bytes. And this is where I ve stacked:
INVOKE MapViewOfFile, hMapFile, FILE_MAP_READ, 0, 0, 0
mov pMemory, eax
mov edx, DWORD PTR [pMemory]
The pointer is correct cause during saving as a whole block of memory to file, everything is fine. So my question is: how to refer to every single elements(bytes).
Thanks in advance
Cast pMemory to the correct type and move it around from pMemory to pMemory + size of the mapped memory - size of the type to which you refer...
In other words, you have effectively allocated memory and associated the menory with a file that is changed as you change the memory.
In C assuming pMemory is the pointer returned by MapViewOfFile:
int x = (*(int *)pMemory); // Read the first int
char c = (*(char *)pMemory); // Read the first char
typedef struct oddball { int x, int y, int z, char str[256] } oddball; // assume the typedef syntax is right...
oddball *p = (oddball *)pMemory; // point to the base of the mapped memory
p += 14; // p now points to the 15th instance of oddball in the file.
// Or... p = &p[14];
p->x = 0;
p->y = 0;
p->z = 0;
strcpy( p->str( "This is the 0, 0, 0 position" ) );
// You've now changed the memory to which p[14] refers.
// To read every byte... (Again in C, use the compiler to generate asm
// Assumes:
// fileSize is the size of the mapped memory in bytes
// pMemory is the pointer returned by MapViewOfFile
// buffer is a block of memory that will hold n bytes
// pos is the position from which you want to read
// n is the number of bytes to read from position pos and the smallest size in bytes to which buffer can point
void readBytes( unsigned int fileSize, char *pMemory, char *buffer, unsigned int n, unsigned int pos )
{
char *endP = pMemory + fileSize;
char *start = pMemory + pos;
char *end = start + n;
int i = 0;
// Code to stay within your memory boundaries
if( end > endP )
{
n -= (end - endP); // This might be wrong...
end = endP;
}
if( end < start )
return;
// end boundary check
for( ; start < end; start++, i++ )
{
buffer[i] = *start;
}
}
Here's the asm code generated from the code above by the compiler with -O2
.686P
.XMM
.model flat
PUBLIC _readBytes
_TEXT SEGMENT
_fileSize$ = 8 ; size = 4
_pMemory$ = 12 ; size = 4
_buffer$ = 16 ; size = 4
_n$ = 20 ; size = 4
_pos$ = 24 ; size = 4
_readBytes PROC ; COMDAT
mov eax, DWORD PTR _pMemory$[esp-4]
mov edx, DWORD PTR _fileSize$[esp-4]
mov ecx, DWORD PTR _n$[esp-4]
add edx, eax
add eax, DWORD PTR _pos$[esp-4]
add ecx, eax
cmp ecx, edx
jbe SHORT $LN5#readBytes
mov ecx, edx
$LN5#readBytes:
cmp eax, ecx
jae SHORT $LN1#readBytes
push esi
mov esi, DWORD PTR _buffer$[esp]
sub esi, eax
$LL3#readBytes:
mov dl, BYTE PTR [eax]
mov BYTE PTR [esi+eax], dl
inc eax
cmp eax, ecx
jb SHORT $LL3#readBytes
pop esi
$LN1#readBytes:
ret 0
_readBytes ENDP
_TEXT ENDS
END

Resources