Pretty self explanatory code. Why doesn't it work!
#include <stdio.h>
int main() {
__asm__("number dw 0"); // declare number?
printf("%d",number);
__asm__("mov %eax,number"
"inc %eax"
"mov number,%eax");
printf("%d",number);
return 0;
}
cc ex1.c -o ex1
ex1.c: In function ‘main’:
ex1.c:22:17: error: ‘number’ undeclared (first use in this function)
ex1.c:22:17: note: each undeclared identifier is reported only once for each function it appears in
make: *** [ex1] Error 1
Thanks.
I have a lot of knowledge gaps to fill... the gcc manual was confusing me with regards to inline assembly as was google results for tutorials...
working on an intel i7 processor
Use this syntax, you can access variables declared in C from the inline assembly
#include <stdio.h>
int main() {
int number = 0;
printf("%d\n",number);
asm(
"mov %[number],%%eax\n"
"inc %%eax\n"
"mov %%eax,%[number]\n"
: [number] "=m" (number) : "m" (number) : "eax", "cc" );
printf("%d\n",number);
return 0;
}
You can let the compiler load number into the eax register for you by specifying the "a" constraint on the input
#include <stdio.h>
int main() {
int number = 0;
printf("%d\n",number);
asm(
"inc %%eax\n"
"mov %%eax,%[number]\n"
: [number] "=m" (number) : "a" (number) : "cc" );
printf("%d\n",number);
return 0;
}
And since x86 inc instruction can operate on memory directly you could reduce it to this
#include <stdio.h>
int main() {
int number = 0;
printf("%d\n",number);
asm(
"incl %[number]\n" /* incl -> "long" (32-bits) */
: [number] "=m" (number) : "m" (number) : "cc" );
printf("%d\n",number);
return 0;
}
For more information see gcc documentation:
6.41 Assembler Instructions with C Expression Operands
Related
I tried to compile Csmith on my "SunOS sun4v 5.10" system, but I got errors like these:
platform.cpp: In function 'long unsigned int platform_gen_seed()':
platform.cpp:78: error: impossible constraint in 'asm'
Could anyone tell where is the mistake?
#if (TARGET_CPU_powerpc == 1 || TARGET_CPU_powerpc64 == 1)
/*For PPC, got from:
http://lists.ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html
*/
static unsigned long long read_time(void) {
unsigned long long retval;
unsigned long junk;
__asm__ __volatile__ ("\n\
1: mftbu %1\n\
mftb %L0\n\
mftbu %0\n\
cmpw %0,%1\n\
bne 1b"
: "=r" (retval), "=r" (junk));
return retval;
}
#else
#ifdef WIN32
static unsigned __int64 read_time(void) {
unsigned l, h;
_asm {rdtsc
mov l, eax
mov h, edx
}
return (h << 32) + l ;
}
#else
static long long read_time(void) {
long long l;
asm volatile( "rdtsc\n\t"
: "=A" (l)
);
return l;
}
#endif
#endif
unsigned long platform_gen_seed()
{
return (long) read_time();
}
The problem is that the code you're trying to compile assumes that any target CPU that's not a PowerPC must be an x86 processor. The code simply doesn't doesn't support your SPARC CPU.
Fortunately the code doesn't seem to be critical, it's apparently only used to seed a random number generator, which is then used to create random C programs. The goal being to prevent multiple instances of the program that are started at the same time from generating the same random programs. I'd replace the code with something more portable that's not dependent on the CPU. Something like this:
#ifdef WIN32
unsigned long platform_gen_seed()
{
LARGE_INTEGER now;
QueryPerformanceCounter(&now);
return now.LowPart;
}
#else /* assume something Unix-like */
static unsigned long generic_gen_seed() {
pid_t pid = getpid();
time_t now;
time(&now);
return (unsigned long)(now ^ (pid << 16 | ((pid >> 16) & 0xFFFF)));
}
#ifdef CLOCK_REALTIME
unsigned long platform_gen_seed()
{
struct timespec now, resolution;
if (clock_gettime(CLOCK_REALTIME, &now) == -1
|| clock_getres(CLOCK_REALTIME, &resolution) == -1
|| resolution.tv_sec > 0 || resolution.tv_nsec > 1000000) {
return generic_gen_seed();
}
return (now.tv_nsec / resolution.tv_nsec
+ now.tv_sec * resolution.tv_nsec);
}
#else
unsigned long platform_gen_seed()
{
return generic_gen_seed();
}
#endif /* CLOCK_REALTIME */
#endif /* WIN32 */
The code has been test in isolation on Linux and Windows. It should also work in isolation on Solaris SPARC, but I don't know how well it work in context of the actual program.
Based on the Wikipedia entry as well as the Intel manual, rdpmc should be available to user-mode processes as long as bit 8 of CR4 is set. However, I am still running into general protection error when trying to run rdpmc from userspace even with that bit set.
I am running on an 8-core Intel X3470 on kernel 2.6.32-279.el6.x86_64.
Here is the user-mode program I am trying to execute:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <sched.h>
#include <assert.h>
uint64_t
read_pmc(int ecx)
{
unsigned int a, d;
__asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx));
return ((uint64_t)a) | (((uint64_t)d) << 32);
}
int main(int ac, char **av)
{
uint64_t start, end;
cpu_set_t cpuset;
unsigned int c;
int i;
if (ac != 3) {
fprintf(stderr, "usage: %s cpu-id pmc-num\n", av[0]);
exit(EXIT_FAILURE);
}
i = atoi(av[1]);
c = atoi(av[2]);
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
assert(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0);
printf("%lu\n", read_pmc(c));
return 0;
}
Here is the kernel module which sets the bit and reads out CR4 so I can manually verify that the bit has been set.
/*
* Enable PMC in user mode.
*/
#include <linux/module.h>
#include <linux/kernel.h>
int init_module(void)
{
typedef long unsigned int uint64_t;
uint64_t output;
// Set CR4, Bit 8 to enable PMC
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 7),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
return 0;
}
void cleanup_module(void)
{
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 7), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
}
Apparently, when Intel says Bit 8, they are referring to the 9th bit from the right, since their indexing begins at 0. Replacing $(1 << 7) with $(1 << 8) globally resolves the issue, and allows rdpmc to be called from user mode.
Here is the updated kernel module, also using on_each_cpu to make sure that it is set on every core.
/*
* Read PMC in kernel mode.
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
static void printc4(void) {
typedef long unsigned int uint64_t;
uint64_t output;
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
}
static void setc4b8(void * info) {
// Set CR4, Bit 8 (9th bit from the right) to enable
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 8),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Check which CPU we are on:
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
printc4();
}
static void clearc4b8(void * info) {
printc4();
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 8), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
}
int init_module(void)
{
on_each_cpu(setc4b8, NULL, 0);
return 0;
}
void cleanup_module(void)
{
on_each_cpu(clearc4b8, NULL, 0);
}
Echoing "2" to /sys/bus/event_source/devices/cpu/rdpmc allows user processes
to access performance counters via the rdpmc instruction.
Note that behaviour has changed. Prior to 4.0 "1" meant "enabled"
while meant "0" disable. Now "1" means allow only for processes that have active perf events. More details: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
The following codes are used for calculate the sin and cos value if the angle is given. In order to calculate these functions as fast as possible, assembly code implementations are adopted.
#include <stdio.h>
float sinx( float degree ) {
float result, two_right_angles = 180.0f ;
__asm__ __volatile__ ( "fld %1;"
"fld %2;"
"fldpi;"
"fmul;"
"fdiv;"
"fsin;"
"fstp %0;"
: "=g" (result)
: "g"(two_right_angles), "g" (degree)
) ;
return result ;
}
float cosx( float degree ) {
float result, two_right_angles = 180.0f, radians ;
__asm__ __volatile__ ( "fld %1;"
"fld %2;"
"fldpi;"
"fmul;"
"fdiv;"
"fstp %0;"
: "=g" (radians)
: "g"(two_right_angles), "g" (degree)
) ;
__asm__ __volatile__ ( "fld %1;"
"fcos;"
"fstp %0;" : "=g" (result) : "g" (radians)
) ;
return result ;
}
float square_root( float val ) {
float result ;
__asm__ __volatile__ ( "fld %1;"
"fsqrt;"
"fstp %0;"
: "=g" (result)
: "g" (val)
) ;
return result ;
}
int main() {
float theta ;
printf( "Enter theta in degrees : " ) ;
scanf( "%f", &theta ) ;
printf( "sinx(%f) = %f\n", theta, sinx( theta ) );
printf( "cosx(%f) = %f\n", theta, cosx( theta ) );
printf( "square_root(%f) = %f\n", theta, square_root( theta ) ) ;
return 0 ;
}
The above codes comes from here, and I am trying to compile the above codes with gcc:
g++ -Wall -fexceptions -g -c /filename.cpp
However, it fails, and the following error messages are given:
Error: operand type mismatch for `fstp'|
Error: operand type mismatch for `fstp'|
I was wondering why the compilation would fail and how I can compile them successfully. Thanks!
The manual says about g constraint: Any register, memory or immediate integer operand is allowed, except for registers that are not general registers. The compiler probably picked a register which fstp doesn't accept but fits the constraints.
By the way, this is quite horrible inline asm.
Also note just because something is in asm, it will not necessarily be faster. The compiler is quite capable of optimizing things and it does a better job, too. You might be interested in the -ffast-math switch. From return sin(degree * M_PI / 180); the compiler produces this small piece of code:
fldl .LC0
fmuls 4(%esp)
fsin
ret
I am a newbie at kernel programming & I wish to run this kernel module (posted below)... and i ran the makefile (posted below) for that, but I am getting the following errors: Can someone please help me understand how to overcome this:
The kernel program should run error free as it is taken from Intel's implementation:
obj-m += hello-1.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
This is the error:
snehil#ubuntu:~/Desktop/measure$ make
make -C /lib/modules/3.0.0-12-generic/build M=/home/snehil/Desktop/measure modules
make[1]: Entering directory `/usr/src/linux-headers-3.0.0-12-generic'
CC [M] /home/snehil/Desktop/measure/measure1.o
/home/snehil/Desktop/measure/measure1.c: In function ‘hello_start’:
/home/snehil/Desktop/measure/measure1.c:108:2: error: implicit declaration of function
‘kmalloc’ [-Werror=implicit-function-declaration]
/home/snehil/Desktop/measure/measure1.c:108:8: warning: assignment makes pointer from
integer without a cast [enabled by default]
/home/snehil/Desktop/measure/measure1.c:115:11: warning: assignment makes pointer from
integer without a cast [enabled by default]
/home/snehil/Desktop/measure/measure1.c:124:12: warning: assignment makes pointer from
integer without a cast [enabled by default]
/home/snehil/Desktop/measure/measure1.c:130:13: warning: assignment makes pointer from
integer without a cast [enabled by default]
cc1: some warnings being treated as errors
make[2]: *** [/home/snehil/Desktop/measure/measure1.o] Error 1
make[1]: *** [_module_/home/snehil/Desktop/measure] Error 2
make[1]: Leaving directory `/usr/src/linux-headers-3.0.0-12-generic'
make: *** [all] Error 2
snehil#ubuntu:~/Desktop/measure$ gcc measure1
gcc: error: measure1: No such file or directory
gcc: fatal error: no input files
compilation terminated.
This is the kernel module code:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/preempt.h>
#include <linux/sched.h>
#define SIZE_OF_STAT 100000
#define BOUND_OF_LOOP 1000
#define UINT64_MAX (18446744073709551615ULL)
void inline Filltimes(uint64_t **times) {
unsigned long flags;
int i, j;
uint64_t start, end;
unsigned cycles_low, cycles_high, cycles_low1, cycles_high1;
volatile int variable = 0;
asm volatile ("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low)::"%rax", "%rbx", "%rcx",
"%rdx");
asm volatile ("CPUID\n\t"
"RDTSC\n\t"
"CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low):: "%rax", "%rbx", "%rcx",
"%rdx");
asm volatile ("CPUID\n\t"
"RDTSC\n\t"::: "%rax", "%rbx", "%rcx", "%rdx");
for (j=0; j<BOUND_OF_LOOP; j++) {
for (i =0; i<SIZE_OF_STAT; i++) {
variable = 0;
preempt_disable();
raw_local_irq_save(flags);
asm volatile (
"CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low):: "%rax", "%rbx", "%rcx",
"%rdx");
/*call the function to measure here*/
asm volatile(
"CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (cycles_high1), "=r" (cycles_low1):: "%rax", "%rbx", "%rcx",
"%rdx");
raw_local_irq_restore(flags);
preempt_enable();
start = ( ((uint64_t)cycles_high << 32) | cycles_low );
end = ( ((uint64_t)cycles_high1 << 32) | cycles_low1 );
if ( (end - start) < 0) {
printk(KERN_ERR "\n\n>>>>>>>>>>>>>> CRITICAL ERROR IN TAKING THE TIME!!!!!!\n loop(%d)
stat(%d) start = %llu, end = %llu, variable = %u\n", j, i, start, end, variable);
times[j][i] = 0;
}
else
{
times[j][i] = end - start;
}
}
}
return;
}
uint64_t var_calc(uint64_t *inputs, int size)
{
int i;
uint64_t acc = 0, previous = 0, temp_var = 0;
for (i=0; i< size; i++) {
if (acc < previous) goto overflow;
previous = acc;
acc += inputs[i];
}
acc = acc * acc;
if (acc < previous) goto overflow;
previous = 0;
for (i=0; i< size; i++){
if (temp_var < previous) goto overflow;
previous = temp_var;
temp_var+= (inputs[i]*inputs[i]);
}
temp_var = temp_var * size;
if (temp_var < previous) goto overflow;
temp_var =(temp_var - acc)/(((uint64_t)(size))*((uint64_t)(size)));
return (temp_var);
overflow:
printk(KERN_ERR "\n\n>>>>>>>>>>>>>> CRITICAL OVERFLOW ERROR IN var_calc!!!!!!\n\n");
return -EINVAL;
}
static int __init hello_start(void)
{
int i = 0, j = 0, spurious = 0, k =0;
uint64_t **times;
uint64_t *variances;
uint64_t *min_values;
uint64_t max_dev = 0, min_time = 0, max_time = 0, prev_min =0, tot_var=0,
max_dev_all=0, var_of_vars=0, var_of_mins=0;
printk(KERN_INFO "Loading hello module...\n");
times = kmalloc(BOUND_OF_LOOP*sizeof(uint64_t*), GFP_KERNEL);
if (!times) {
printk(KERN_ERR "unable to allocate memory for times\n");
return 0;
}
for (j=0; j<BOUND_OF_LOOP; j++) {
times[j] = kmalloc(SIZE_OF_STAT*sizeof(uint64_t), GFP_KERNEL);
if (!times[j]) {
printk(KERN_ERR "unable to allocate memory for times[%d]\n", j);
for (k=0; k<j; k++)
kfree(times[k]);
return 0;
}
}
variances = kmalloc(BOUND_OF_LOOP*sizeof(uint64_t), GFP_KERNEL);
if (!variances) {
printk(KERN_ERR "unable to allocate memory for variances\n");
return 0;
}
min_values = kmalloc(BOUND_OF_LOOP*sizeof(uint64_t), GFP_KERNEL);
if (!min_values) {
printk(KERN_ERR "unable to allocate memory for min_values\n");
return 0;
}
Filltimes(times);
for (j=0; j<BOUND_OF_LOOP; j++) {
max_dev = 0;
min_time = 0;
max_time = 0;
for (i =0; i<SIZE_OF_STAT; i++) {
if ((min_time == 0)||(min_time > times[j][i]))
min_time = times[j][i];
if (max_time < times[j][i])
max_time = times[j][i];
}
max_dev = max_time - min_time;
min_values[j] = min_time;
if ((prev_min != 0) && (prev_min > min_time))
spurious++;
if (max_dev > max_dev_all)
max_dev_all = max_dev;
variances[j] = var_calc(times[j], SIZE_OF_STAT);
tot_var += variances[j];
printk(KERN_ERR "loop_size:%d >>>> variance(cycles): %llu; max_deviation: %llu ;min
time: %llu", j, variances[j], max_dev, min_time);
prev_min = min_time;
}
var_of_vars = var_calc(variances, BOUND_OF_LOOP);
var_of_mins = var_calc(min_values, BOUND_OF_LOOP);
printk(KERN_ERR "\n total number of spurious min values = %d", spurious);
printk(KERN_ERR "\n total variance = %llu", (tot_var/BOUND_OF_LOOP));
printk(KERN_ERR "\n absolute max deviation = %llu", max_dev_all);
printk(KERN_ERR "\n variance of variances = %llu", var_of_vars);
printk(KERN_ERR "\n variance of minimum values = %llu", var_of_mins);
for (j=0; j<BOUND_OF_LOOP; j++) {
kfree(times[j]);
}
kfree(times);
kfree(variances);
kfree(min_values);
return 0;
}
static void __exit hello_end(void)
{
printk(KERN_INFO "Goodbye Mr.\n");
}
module_init(hello_start);
module_exit(hello_end);
If you are using kmalloc or kzalloc() for memory allocation
you have to include #include<linux/slab.h>.
They are called as slab allocators, these slab are chunks i.e. "cache" present
in RAM and are physically contiguous. These slab allocator use underlying
"Buddy System Algorithm", buddy allocator to provide more fine-grained allocation.
Fore more referrence go through the below link:
http://en.wikipedia.org/wiki/Slab_allocation
http://en.wikipedia.org/wiki/Buddy_algorithm
Hope this answers your question!!!!!.
You haven't included the header for kmalloc. Add #include <linux/slab.h> to your code.
good afternoon.
I got the code below on a book. I'm trying to execute it, but I don't know what is the "first" and "last" parameters on the MakeCodeWritable function, or where I can find them. Someone can help? This code is about C obfuscation method. I'm using Xcode program and LLVM GCC 4.2 compiler.
#include <stdio.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
typedef unsigned int uint32;
typedef char* caddr_t;
typedef uint32* waddr_t;
#define Tam_celula 64
#define ALIGN __attribute__((aligned(Tam_celula)))
void makeCodeWritable(char* first, char* last) {
char* firstpage = first - ((int)first % getpagesize());
char* lastpage = last - ((int)last % getpagesize());
int pages = (lastpage-firstpage)/getpagesize()+1;
if (mprotect(firstpage,pages*getpagesize(), PROT_READ|PROT_EXEC|PROT_WRITE)==-1) perror("mprotect");
}
void xor(caddr_t from, caddr_t to, int len){
int i;
for(i=0;i<len;i++){
*to ^= *from; from++; to++;
} }
void swap(caddr_t from, caddr_t to, int len){
int i;
for(i=0;i<len;i++){
char t = *from; *from = *to; *to = t; from++; to++;
} }
#define CELLSIZE 64
#define ALIGN asm volatile (".align 64\n");
void P() {
static int firsttime=1; if (firsttime) {
xor(&&cell5,&&cell2,CELLSIZE);
xor(&&cell0,&&cell3,CELLSIZE);
swap(&&cell1,&&cell4,CELLSIZE);
firsttime = 0; }
char* a[] = {&&align0,&&align1,&&align2,&&align3,&&align4,&&align5};
char*next[] ={&&cell0,&&cell1,&&cell2,&&cell3, &&cell4,&&cell5};
goto *next[0];
align0: ALIGN
cell0: printf("SPGM0\n");
xor(&&cell0,&&cell3,3*CELLSIZE);
goto *next[3];
align1: ALIGN
cell1: printf("SPGM2\n"); xor(&&cell0,&&cell3,3*CELLSIZE);
goto *next[4];
align2: ALIGN
cell2: printf("SPGM4\n"); xor(&&cell0,&&cell3,3*CELLSIZE);
goto *next[5];
align3: ALIGN
cell3: printf("SPGM1\n"); xor(&&cell3,&&cell0,3*CELLSIZE);
goto *next[1];
align4: ALIGN
cell4: printf("SPGM3\n"); xor(&&cell3,&&cell0,3*CELLSIZE);
goto *next[2];
align5: ALIGN
cell5: printf("SPGM5\n");
xor(&&cell3,&&cell0,3*CELLSIZE);
}
int main (int argc, char *argv[]) {
makeCodeWritable(...);
P(); P();
}
The first argument should be (char *)P, because it looks like you want to modify code inside function P. The second argument is the ending address of function P. You can first compile the code, and using objdump -d to see the address of beginning and end of P, then calculate the size of the function, SIZE, then manually specify in the makeCodeWritable( (char *)P, ((char *)P) + SIZE.
The second way is utilizing the as to get the size of function P, but it depends on the assembler language on your platform. This is code snipe I modified from your code, it should be able to compile and run in x86, x86_64 in GCC 4.x on Linux platform.
align5: ALIGN
cell5: printf("SPGM5\n");
xor(&&cell3,&&cell0,3*CELLSIZE);
// adding an label to the end of function P to assembly code
asm ("END_P: \n");
;
}
extern char __sizeof__myfunc[];
int main (int argc, char *argv[]) {
// calculate the code size, ending - starting address of P
asm (" __sizeof__myfunc = END_P-P \n");
// you can see the code size of P
printf("code size is %d\n", (unsigned)__sizeof__myfunc);
makeCodeWritable( (char*)P, ((char *)P) + (unsigned)__sizeof__myfunc);
P(); P();
}
With some modification to support LLVM GCC and as in Mac OS X
int main (int argc, char *argv[]) {
size_t sizeof__myfunc = 0;
asm volatile ("movq $(_END_P - _P),%0;"
: "=r" (sizeof__myfunc)
: );
printf("%d\n", sizeof__myfunc);