i need to override the interrupt vector generated automatically from
Atmel Studio. can anyone help me with this ?
Update with the interrupt code generated by the Atmel Studio.
00000000 <__vectors>:
0: 0c 94 34 00 jmp 0x68 ; 0x68 <__ctors_end>
4: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
8: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
10: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
14: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
18: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
1c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
20: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
24: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
28: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
2c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
30: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
34: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
38: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
3c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
40: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
44: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
48: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
4c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
50: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
54: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
58: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
5c: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
60: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
64: 0c 94 3e 00 jmp 0x7c ; 0x7c <__bad_interrupt>
00000068 <__ctors_end>:
68: 11 24 eor r1, r1
6a: 1f be out 0x3f, r1 ; 63
6c: cf ef ldi r28, 0xFF ; 255
6e: d8 e0 ldi r29, 0x08 ; 8
70: de bf out 0x3e, r29 ; 62
72: cd bf out 0x3d, r28 ; 61
74: 0e 94 44 00 call 0x88 ; 0x88 <main>
78: 0c 94 47 00 jmp 0x8e ; 0x8e <_exit>
0000007c <__bad_interrupt>:
7c: 0c 94 00 00 jmp 0 ; 0x0 <__vectors>
Update register interrupt
void (*fnVectors[NUM_INTERRUPTS])(void);
void IntRegister(unsigned int intrNum, void (*fnHandler)(void))
{
/* Assign ISR */
fnVectors[intrNum] = fnHandler;
}
These are linked using weak symbols in avr-libc; what you need is to declare your own ISRs using the ISR macro. See for instance the avr-libc interrupt documentation.
The actual source code for gcrt1.S uses a macro, which expands to a jump to symbols like __vector_4 and weakly defines that name equal to __bad_interrupt. If you define such routines (which the ISR macro helps you do), the weak definition is ignored. This same gcrt1.S file is assembled for each different MCU creating files such as crtatmega168.o. If you inspect one of those with avr-objdump -xd you'll find the weak references:
SYMBOL TABLE:
...
00000000 w .init0 00000000 __init
00000000 w .text 00000000 __vector_1
00000000 g .text 00000000 __bad_interrupt
...
00000000 <__vectors>:
0: 0c 94 00 00 jmp 0 ; 0x0 <__vectors>
0: R_AVR_CALL __init
4: 0c 94 00 00 jmp 0 ; 0x0 <__vectors>
4: R_AVR_CALL __vector_1
Related
Where are jump tables located in x86 elf code?
progname: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <main>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub $0x10,%rsp
8: c7 45 fc 02 00 00 00 movl $0x2,-0x4(%rbp)
f: 83 7d fc 09 cmpl $0x9,-0x4(%rbp)
13: 0f 87 ac 00 00 00 ja c5 <main+0xc5>
19: 8b 45 fc mov -0x4(%rbp),%eax
1c: 48 8b 04 c5 00 00 00 mov 0x0(,%rax,8),%rax
23: 00
20: R_X86_64_32S .rodata+0x48
24: ff e0 jmpq *%rax
26: bf 00 00 00 00 mov $0x0,%edi
27: R_X86_64_32 .rodata
2b: b8 00 00 00 00 mov $0x0,%eax
30: e8 00 00 00 00 callq 35 <main+0x35>
31: R_X86_64_PLT32 printf-0x4
35: e9 9b 00 00 00 jmpq d5 <main+0xd5>
3a: bf 00 00 00 00 mov $0x0,%edi
3b: R_X86_64_32 .rodata+0xc
3f: b8 00 00 00 00 mov $0x0,%eax
44: e8 00 00 00 00 callq 49 <main+0x49>
45: R_X86_64_PLT32 printf-0x4
49: e9 87 00 00 00 jmpq d5 <main+0xd5>
4e: bf 00 00 00 00 mov $0x0,%edi
4f: R_X86_64_32 .rodata+0x18
53: b8 00 00 00 00 mov $0x0,%eax
58: e8 00 00 00 00 callq 5d <main+0x5d>
59: R_X86_64_PLT32 printf-0x4
5d: eb 76 jmp d5 <main+0xd5>
5f: bf 00 00 00 00 mov $0x0,%edi
60: R_X86_64_32 .rodata
64: b8 00 00 00 00 mov $0x0,%eax
69: e8 00 00 00 00 callq 6e <main+0x6e>
6a: R_X86_64_PLT32 printf-0x4
6e: eb 65 jmp d5 <main+0xd5>
70: bf 00 00 00 00 mov $0x0,%edi
71: R_X86_64_32 .rodata+0xc
75: b8 00 00 00 00 mov $0x0,%eax
7a: e8 00 00 00 00 callq 7f <main+0x7f>
7b: R_X86_64_PLT32 printf-0x4
7f: eb 54 jmp d5 <main+0xd5>
81: bf 00 00 00 00 mov $0x0,%edi
82: R_X86_64_32 .rodata+0x18
86: b8 00 00 00 00 mov $0x0,%eax
8b: e8 00 00 00 00 callq 90 <main+0x90>
8c: R_X86_64_PLT32 printf-0x4
90: eb 43 jmp d5 <main+0xd5>
92: bf 00 00 00 00 mov $0x0,%edi
93: R_X86_64_32 .rodata
97: b8 00 00 00 00 mov $0x0,%eax
9c: e8 00 00 00 00 callq a1 <main+0xa1>
9d: R_X86_64_PLT32 printf-0x4
a1: eb 32 jmp d5 <main+0xd5>
a3: bf 00 00 00 00 mov $0x0,%edi
a4: R_X86_64_32 .rodata+0xc
a8: b8 00 00 00 00 mov $0x0,%eax
ad: e8 00 00 00 00 callq b2 <main+0xb2>
ae: R_X86_64_PLT32 printf-0x4
b2: eb 21 jmp d5 <main+0xd5>
b4: bf 00 00 00 00 mov $0x0,%edi
b5: R_X86_64_32 .rodata+0x18
b9: b8 00 00 00 00 mov $0x0,%eax
be: e8 00 00 00 00 callq c3 <main+0xc3>
bf: R_X86_64_PLT32 printf-0x4
c3: eb 10 jmp d5 <main+0xd5>
c5: bf 00 00 00 00 mov $0x0,%edi
c6: R_X86_64_32 .rodata+0x24
ca: b8 00 00 00 00 mov $0x0,%eax
cf: e8 00 00 00 00 callq d4 <main+0xd4>
d0: R_X86_64_PLT32 printf-0x4
d4: 90 nop
d5: b8 00 00 00 00 mov $0x0,%eax
da: c9 leaveq
db: c3 retq
I have a basic switch case with 10 cases (including default case). If x=2, then it goes to 2nd block in switch case code.
Need help in understanding where exactly gcc generated jump tables are located in elf files and in which section. I pasted output of objdump -dr progname above.
This question already has an answer here:
Assembly do we need the endings? [duplicate]
(1 answer)
Closed 1 year ago.
Recently, I read some books about computer science. I wrote some C code, and disassembled them, using gcc and objdump.
The following C code:
#include <stdio.h>
#include <stdbool.h>
int dojob()
{
static short num[ ][4] = { {2, 9, -1, 5}, {3, 8, 2, -6}};
static short *pn[ ] = {num[0], num[1]};
static short s[2] = {0, 0};
int i, j;
for (i=0; i<2; i++) {
for (j=0; j<4; j++){
s[i] += *pn[i]++;
}
printf ("sum of line %d: %d\n", i+1, s[i]);
}
return 0;
}
int main ( )
{
dojob();
}
got the following assembly code (AT&T syntex; only assembly of function dojob and some data is list):
00401350 <_dojob>:
401350: 55 push %ebp
401351: 89 e5 mov %esp,%ebp
401353: 83 ec 28 sub $0x28,%esp
401356: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp)
40135d: eb 75 jmp 4013d4 <_dojob+0x84>
40135f: c7 45 f0 00 00 00 00 movl $0x0,-0x10(%ebp)
401366: eb 3c jmp 4013a4 <_dojob+0x54>
401368: 8b 45 f4 mov -0xc(%ebp),%eax
40136b: 8b 04 85 00 20 40 00 mov 0x402000(,%eax,4),%eax
401372: 8d 48 02 lea 0x2(%eax),%ecx
401375: 8b 55 f4 mov -0xc(%ebp),%edx
401378: 89 0c 95 00 20 40 00 mov %ecx,0x402000(,%edx,4)
40137f: 0f b7 10 movzwl (%eax),%edx
401382: 8b 45 f4 mov -0xc(%ebp),%eax
401385: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
40138c: 00
40138d: 89 c1 mov %eax,%ecx
40138f: 89 d0 mov %edx,%eax
401391: 01 c8 add %ecx,%eax
401393: 89 c2 mov %eax,%edx
401395: 8b 45 f4 mov -0xc(%ebp),%eax
401398: 66 89 94 00 08 50 40 mov %dx,0x405008(%eax,%eax,1)
40139f: 00
4013a0: 83 45 f0 01 addl $0x1,-0x10(%ebp)
4013a4: 83 7d f0 03 cmpl $0x3,-0x10(%ebp)
4013a8: 7e be jle 401368 <_dojob+0x18>
4013aa: 8b 45 f4 mov -0xc(%ebp),%eax
4013ad: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
4013b4: 00
4013b5: 98 cwtl
4013b6: 8b 55 f4 mov -0xc(%ebp),%edx
4013b9: 83 c2 01 add $0x1,%edx
4013bc: 89 44 24 08 mov %eax,0x8(%esp)
4013c0: 89 54 24 04 mov %edx,0x4(%esp)
4013c4: c7 04 24 24 30 40 00 movl $0x403024,(%esp)
4013cb: e8 50 08 00 00 call 401c20 <_printf>
4013d0: 83 45 f4 01 addl $0x1,-0xc(%ebp)
4013d4: 83 7d f4 01 cmpl $0x1,-0xc(%ebp)
4013d8: 7e 85 jle 40135f <_dojob+0xf>
4013da: b8 00 00 00 00 mov $0x0,%eax
4013df: c9 leave
4013e0: c3 ret
Disassembly of section .data:
00402000 <__data_start__>:
402000: 08 20 or %ah,(%eax)
402002: 40 inc %eax
402003: 00 10 add %dl,(%eax)
402005: 20 40 00 and %al,0x0(%eax)
Disassembly of section .bss:
...
00405008 <_s.1927>:
405008: 00 00 add %al,(%eax)
...
I have two questions:
I don't understand the difference between mov and movl instruction? Why the compiler generate mov for some code, and movl for others?
I completely understand the meaning of the C code, but not the assembly that the compiler generated. Who can make some comments for it for me to understand? I will thank a lot.
The MOVL instruction was generated because you put two int (i and j variables), MOVL will perform a MOV of 32 bits, and integer' size is 32 bits.
a non exhaustive list of all MOV* exist (like MOVD for doubleword or MOVQ for quadword) to allow to optimize your code and use the better expression to gain most time as possible.
PS: may be the -M intel objdump's argument can help you to have a better comprehension of the disassembly, a lot of man on the Intel syntax can may be find easily.
I know when using objdump -dr in my file call shows up in machine code as e8 00 00 00 00 because it has not yet been linked. But I need to find out what the 00 00 00 00 will turn into after the linker has done it's job. I know it should calculate the offset, but I'm a little confused about that.
As an example with the code below, after the linker part is done, how should the e8 00 00 00 00 be? And how do I get to that answer?
I'm testing out with this sample code: (I'm trying to call moo)
Disassembly of section .text:
0000000000000000 <foo>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 89 7d fc mov %edi,-0x4(%rbp)
7: 8b 45 fc mov -0x4(%rbp),%eax
a: 83 e8 0a sub $0xa,%eax
d: 5d pop %rbp
e: c3 retq
000000000000000f <moo>:
f: 55 push %rbp
10: 48 89 e5 mov %rsp,%rbp
13: 89 7d fc mov %edi,-0x4(%rbp)
16: b8 01 00 00 00 mov $0x1,%eax
1b: 5d pop %rbp
1c: c3 retq
000000000000001d <main>:
1d: 55 push %rbp
1e: 48 89 e5 mov %rsp,%rbp
21: 48 83 ec 10 sub $0x10,%rsp
25: c7 45 fc 8e 0c 00 00 movl $0xc8e,-0x4(%rbp)
2c: 8b 45 fc mov -0x4(%rbp),%eax
2f: 89 c7 mov %eax,%edi
31: e8 00 00 00 00 callq 36 <main+0x19>
32: R_X86_64_PC32 moo-0x4
36: 89 45 fc mov %eax,-0x4(%rbp)
39: b8 00 00 00 00 mov $0x0,%eax
3e: c9 leaveq
3f: c3 retq
With objdump -r you have Relocations printed with your disassembly -d:
31: e8 00 00 00 00 callq 36 <main+0x19>
32: R_X86_64_PC32 moo-0x4
ld-linux.so.2 loader will relocate objects (in modern world it will relocate even executable to random address) and fill the relocations with correct address.
Check with gdb by adding breakpoint at main and starting program (linker works before main function is started):
gdb ./program
(gdb) start
(gdb) disassemble main
If you want to compile the code without relocations, show source code and compilation options.
Object files and executable files on several architectures that I know of do not necessarily fix jump destinations at link time.
This is a feature which provides flexibility.
Jump target addresses do not have to be fixed until just before the instruction executes. They do not need to be fixed up at link time—nor even at program start time!
Most systems (Windows, Linux, Unix, VAX/VMS) tag such locations in the object code as an address which needs adjustment. There is additional information about what the target address is, what type of reference it is (such as absolute or relative; 16-bit, 24-bit, 32-bit, 64-bit, etc.).
The zero value there is not necessarily a placeholder, but the base value upon which to evaluate the result. For example, if the instruction were—for whatever reason—call 5+external_address, then there might be 5 (e8 05 00 00 00) in the object code.
If you want to see what the address is at execution time, run the program under a debugger, place a breakpoint at that instruction and then view the instruction just before it executes.
A common anti-virus, security-enhancing feature known as ASLR (address space layout randomization) intentionally loads programs sections at inconsistent addresses to thwart malicious code which alters programs or data. Programs operating in this environment may not have some target addresses assigned until after the program runs a bit.
(Of related interest, VAX/VMS in particular has a complex fixup mode in which an equation describes the operations needed to compute a value. Operations include addition, subtraction, multiplication, division, shifting, rotating, and probably others. I never saw it actually used, but it was interesting to contemplate how one might apply the capability.)
but you clearly know how to do all of this. you know how to disassemble before linking just disassemble after to see how the linker modifies those instructions.
asm(".globl _start; _start: nop\n");
unsigned int foo ( unsigned int x )
{
return(x+5);
}
unsigned int moo ( unsigned int x )
{
return(foo(x)+3);
}
int main ( void )
{
return(moo(3)+2);
}
0000000000000000 <_start>:
0: 90 nop
0000000000000001 <foo>:
1: 55 push %rbp
2: 48 89 e5 mov %rsp,%rbp
5: 89 7d fc mov %edi,-0x4(%rbp)
8: 8b 45 fc mov -0x4(%rbp),%eax
b: 83 c0 05 add $0x5,%eax
e: 5d pop %rbp
f: c3 retq
0000000000000010 <moo>:
10: 55 push %rbp
11: 48 89 e5 mov %rsp,%rbp
14: 48 83 ec 08 sub $0x8,%rsp
18: 89 7d fc mov %edi,-0x4(%rbp)
1b: 8b 45 fc mov -0x4(%rbp),%eax
1e: 89 c7 mov %eax,%edi
20: e8 00 00 00 00 callq 25 <moo+0x15>
25: 83 c0 03 add $0x3,%eax
28: c9 leaveq
29: c3 retq
000000000000002a <main>:
2a: 55 push %rbp
2b: 48 89 e5 mov %rsp,%rbp
2e: bf 03 00 00 00 mov $0x3,%edi
33: e8 00 00 00 00 callq 38 <main+0xe>
38: 83 c0 02 add $0x2,%eax
3b: 5d pop %rbp
3c: c3 retq
0000000000001000 <_start>:
1000: 90 nop
0000000000001001 <foo>:
1001: 55 push %rbp
1002: 48 89 e5 mov %rsp,%rbp
1005: 89 7d fc mov %edi,-0x4(%rbp)
1008: 8b 45 fc mov -0x4(%rbp),%eax
100b: 83 c0 05 add $0x5,%eax
100e: 5d pop %rbp
100f: c3 retq
0000000000001010 <moo>:
1010: 55 push %rbp
1011: 48 89 e5 mov %rsp,%rbp
1014: 48 83 ec 08 sub $0x8,%rsp
1018: 89 7d fc mov %edi,-0x4(%rbp)
101b: 8b 45 fc mov -0x4(%rbp),%eax
101e: 89 c7 mov %eax,%edi
1020: e8 dc ff ff ff callq 1001 <foo>
1025: 83 c0 03 add $0x3,%eax
1028: c9 leaveq
1029: c3 retq
000000000000102a <main>:
102a: 55 push %rbp
102b: 48 89 e5 mov %rsp,%rbp
102e: bf 03 00 00 00 mov $0x3,%edi
1033: e8 d8 ff ff ff callq 1010 <moo>
1038: 83 c0 02 add $0x2,%eax
103b: 5d pop %rbp
103c: c3 retq
for example
20: e8 00 00 00 00 callq 25 <moo+0x15>
1033: e8 d8 ff ff ff callq 1010 <moo>
I am currently writing various implementations of a color to black/white image converter. I would like to do a :
Simple C++ implementation
Self made ASM implementation
Self made ASM implementation with AVX vector instructions.
The goal is to benchmark each one of these and analyse the performance improvement I get.
The following snippet of code is the C++ implementation. It only treats a single portion of the image, because I also want to do multithreaded computing.
void CBwConverter::run(const CImg<uint8_t> &src, CImg<uint8_t> &dst, uint32_t pixel, size_t size) const {
const uint8_t *rC = src.data(0,pixel,0,0);
const uint8_t *gC = src.data(0,pixel,0,1);
const uint8_t *bC = src.data(0,pixel,0,2);
uint8_t *mC = dst.data(0,pixel,0,0);
for(size_t c = 0; c < size; c++, rC++, gC++, bC++, mC++) {
*mC = (uint8_t)(0.299f*(*rC) + 0.587f*(*gC) + 0.114f*(*bC));
}
}
Now, before starting the ASM version, I had my C++ code compiled and disassembled just to see how it looks like. After compiling with gcc -std=c++11 -g -O2 -c CBwConverter.cc, I obtained the following output with objdump -d CBwConvert.o :
0000000000000000 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm>:
0: 53 push %rbx
1: 8b 3e mov (%rsi),%edi
3: 89 c8 mov %ecx,%eax
5: 44 8b 56 04 mov 0x4(%rsi),%r10d
9: 44 8b 5e 08 mov 0x8(%rsi),%r11d
d: 89 c9 mov %ecx,%ecx
f: 48 8b 5e 18 mov 0x18(%rsi),%rbx
13: 0f af c7 imul %edi,%eax
16: 4c 0f af d7 imul %rdi,%r10
1a: 4b 8d 34 1b lea (%r11,%r11,1),%rsi
1e: 4c 8d 0c 03 lea (%rbx,%rax,1),%r9
22: 4c 89 d7 mov %r10,%rdi
25: 49 0f af fb imul %r11,%rdi
29: 4c 0f af d6 imul %rsi,%r10
2d: 48 01 c7 add %rax,%rdi
30: 4c 01 d0 add %r10,%rax
33: 48 01 df add %rbx,%rdi
36: 48 8d 34 03 lea (%rbx,%rax,1),%rsi
3a: 8b 02 mov (%rdx),%eax
3c: 48 0f af c8 imul %rax,%rcx
40: 48 03 4a 18 add 0x18(%rdx),%rcx
44: 4d 85 c0 test %r8,%r8
47: 74 6b je b4 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0xb4>
49: 31 d2 xor %edx,%edx
4b: f3 0f 10 25 00 00 00 movss 0x0(%rip),%xmm4 # 53 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x53>
52: 00
53: f3 0f 10 1d 00 00 00 movss 0x0(%rip),%xmm3 # 5b <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x5b>
5a: 00
5b: f3 0f 10 15 00 00 00 movss 0x0(%rip),%xmm2 # 63 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x63>
62: 00
63: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
68: 41 0f b6 04 11 movzbl (%r9,%rdx,1),%eax
6d: 66 0f ef c0 pxor %xmm0,%xmm0
71: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
75: 0f b6 04 17 movzbl (%rdi,%rdx,1),%eax
79: 0f 28 c8 movaps %xmm0,%xmm1
7c: 66 0f ef c0 pxor %xmm0,%xmm0
80: f3 0f 59 cc mulss %xmm4,%xmm1
84: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
88: 0f b6 04 16 movzbl (%rsi,%rdx,1),%eax
8c: f3 0f 59 c3 mulss %xmm3,%xmm0
90: f3 0f 58 c1 addss %xmm1,%xmm0
94: 66 0f ef c9 pxor %xmm1,%xmm1
98: f3 0f 2a c8 cvtsi2ss %eax,%xmm1
9c: f3 0f 59 ca mulss %xmm2,%xmm1
a0: f3 0f 58 c1 addss %xmm1,%xmm0
a4: f3 0f 2c c0 cvttss2si %xmm0,%eax
a8: 88 04 11 mov %al,(%rcx,%rdx,1)
ab: 48 83 c2 01 add $0x1,%rdx
af: 49 39 d0 cmp %rdx,%r8
b2: 75 b4 jne 68 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x68>
b4: 5b pop %rbx
b5: c3 retq
I can already tell that the for-loop start at 68 and ends at b2.
Something bothers me in the disassembled program. Why does the compiler decide to set registers %xmm0 and %xmm1 to 0, typically at 6d with instruction pxor ? These registers are overwritten just after with instruction cvtsi2ss which loads an integer and converts it to a single-precision number and then finally stores it into them. Why set them to 0 when they are overwritten just after ? If the compiler does it, am I supposed to do the same when writing my own asm version ?
I made a simple linux kernel module which has a static function. When I use objdump or nm on the .ko file, I cannot see the entry for my static function. Where did it go?
Thanks.
Edit: Adding code
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
int p;
int q;
module_param(p, int, S_IRUGO);
module_param(q, int, S_IRUGO);
static int add(int x, int y)
{
return x + y;
}
static int __init hello_init(void)
{
int res;
res = add(p, q);
return res;
}
static void __exit hello_cleanup(void)
{
}
module_init(hello_init);
module_exit(hello_cleanup);
MODULE_VERSION("dev");
MODULE_LICENSE("Proprietary");
objdump output with non-static function:
Disassembly of section .text:
0000000000000000 <add>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: e8 00 00 00 00 callq 9 <add+0x9>
9: c9 leaveq
a: 8d 04 3e lea (%rsi,%rdi,1),%eax
d: c3 retq
e: 90 nop
f: 90 nop
Disassembly of section .init.text:
0000000000000000 <init_module>:
0: 55 push %rbp
1: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 7 <init_module+0x7>
7: 03 05 00 00 00 00 add 0x0(%rip),%eax # d <init_module+0xd>
d: 48 89 e5 mov %rsp,%rbp
10: c9 leaveq
11: c3 retq
Disassembly of section .exit.text:
0000000000000000 <cleanup_module>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: e8 00 00 00 00 callq 9 <cleanup_module+0x9>
9: c9 leaveq
a: c3 retq
objdump output with static function:
Disassembly of section .init.text:
0000000000000000 <init_module>:
0: 55 push %rbp
1: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 7 <init_module+0x7>
7: 03 05 00 00 00 00 add 0x0(%rip),%eax # d <init_module+0xd>
d: 48 89 e5 mov %rsp,%rbp
10: c9 leaveq
11: c3 retq
Disassembly of section .exit.text:
0000000000000000 <cleanup_module>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: e8 00 00 00 00 callq 9 <cleanup_module+0x9>
9: c9 leaveq
a: c3 retq