Related
I'm using floating point operations(software implementation) on a STM32F0 and found something weird in the listing. As soon as i use sqrtf, the linker is adding __aeabi_ddiv which is ~1.6kB of memory.
This code for example links to ddiv:
float value = 42.0f;
float root = sqrtf(value);
Removing sqrtf also removes ddiv. So my question:
Is this intended behavior?
If no, how can i fix it.
Is it possible to do sqrt without double?
Compiler: arm-atollic-eabi-gcc
Listing of sqrtf (ddiv at 0x800543e):
080053bc <sqrtf>:
80053bc: b5f0 push {r4, r5, r6, r7, lr}
80053be: 2500 movs r5, #0
80053c0: b08d sub sp, #52 ; 0x34
80053c2: 1c04 adds r4, r0, #0
80053c4: f000 f84a bl 800545c <__ieee754_sqrtf>
80053c8: 4b22 ldr r3, [pc, #136] ; (8005454 <sqrtf+0x98>)
80053ca: 1c06 adds r6, r0, #0
80053cc: 575d ldrsb r5, [r3, r5]
80053ce: 1c6b adds r3, r5, #1
80053d0: d030 beq.n 8005434 <sqrtf+0x78>
80053d2: 1c21 adds r1, r4, #0
80053d4: 1c20 adds r0, r4, #0
80053d6: f7fb febb bl 8001150 <__aeabi_fcmpun>
80053da: 1e07 subs r7, r0, #0
80053dc: d12a bne.n 8005434 <sqrtf+0x78>
80053de: 2100 movs r1, #0
80053e0: 1c20 adds r0, r4, #0
80053e2: f7fb f837 bl 8000454 <__aeabi_fcmplt>
80053e6: 2800 cmp r0, #0
80053e8: d024 beq.n 8005434 <sqrtf+0x78>
80053ea: 2301 movs r3, #1
80053ec: 9302 str r3, [sp, #8]
80053ee: 4b1a ldr r3, [pc, #104] ; (8005458 <sqrtf+0x9c>)
80053f0: 1c20 adds r0, r4, #0
80053f2: 9303 str r3, [sp, #12]
80053f4: 970a str r7, [sp, #40] ; 0x28
80053f6: f7fc faad bl 8001954 <__aeabi_f2d>
80053fa: 2200 movs r2, #0
80053fc: 9006 str r0, [sp, #24]
80053fe: 9107 str r1, [sp, #28]
8005400: 9004 str r0, [sp, #16]
8005402: 9105 str r1, [sp, #20]
8005404: 2300 movs r3, #0
8005406: 2d00 cmp r5, #0
8005408: d117 bne.n 800543a <sqrtf+0x7e>
800540a: 9208 str r2, [sp, #32]
800540c: 9309 str r3, [sp, #36] ; 0x24
800540e: a802 add r0, sp, #8
8005410: f000 f87a bl 8005508 <matherr>
8005414: 2800 cmp r0, #0
8005416: d018 beq.n 800544a <sqrtf+0x8e>
8005418: 9b0a ldr r3, [sp, #40] ; 0x28
800541a: 9301 str r3, [sp, #4]
800541c: 2b00 cmp r3, #0
800541e: d004 beq.n 800542a <sqrtf+0x6e>
8005420: f000 f874 bl 800550c <__errno>
8005424: 9b0a ldr r3, [sp, #40] ; 0x28
8005426: 9301 str r3, [sp, #4]
8005428: 6003 str r3, [r0, #0]
800542a: 9808 ldr r0, [sp, #32]
800542c: 9909 ldr r1, [sp, #36] ; 0x24
800542e: f7fc fae3 bl 80019f8 <__aeabi_d2f>
8005432: 1c06 adds r6, r0, #0
8005434: 1c30 adds r0, r6, #0
8005436: b00d add sp, #52 ; 0x34
8005438: bdf0 pop {r4, r5, r6, r7, pc}
800543a: 0010 movs r0, r2
800543c: 0019 movs r1, r3
800543e: f7fb ff55 bl 80012ec <__aeabi_ddiv>
8005442: 9008 str r0, [sp, #32]
8005444: 9109 str r1, [sp, #36] ; 0x24
8005446: 2d02 cmp r5, #2
8005448: d1e1 bne.n 800540e <sqrtf+0x52>
800544a: f000 f85f bl 800550c <__errno>
800544e: 2321 movs r3, #33 ; 0x21
8005450: 6003 str r3, [r0, #0]
8005452: e7e1 b.n 8005418 <sqrtf+0x5c>
8005454: 2000000c .word 0x2000000c
8005458: 08006096 .word 0x08006096
UPDATE I think I found the reason but still don't quite understand it.
Source of sqrtf
The double division is part of the exception handling, although 0.0/0.0 should be done at compile time right? If I call __ieee754_sqrtf directly ddiv is not linked. This solves my problem but I would like to know how to do this using sqrtf.
I'm trying to understand why some Cortex-M0 code behaves differently when it is linked versus unlinked. In both cases it is loaded to 0x20000000. It looks like despite my best efforts to generate position independent code by passing -fPIC to the compiler, the bl instruction appears to differ after the code has passed through the linker. Am I reading this correctly, is that just a part of the linker's job in ARM Thumb, and is there a better way to generate a position independent function call?
Linked:
20000000:
20000000: 0003 movs r3, r0
20000002: 4852 ldr r0, [pc, #328]
20000004: 4685 mov sp, r0
20000006: 0018 movs r0, r3
20000008: f000 f802 bl 20000010
2000000c: 46c0 nop ; (mov r8, r8)
2000000e: 46c0 nop ; (mov r8, r8)
Unlinked:
00000000:
0: 0003 movs r3, r0
2: 4852 ldr r0, [pc, #328]
4: 4685 mov sp, r0
6: 0018 movs r0, r3
8: f7ff fffe bl 10
c: 46c0 nop ; (mov r8, r8)
e: 46c0 nop ; (mov r8, r8)
start.s
.globl _start
_start:
.word 0x20001000
.word reset
.word hang
.word hang
.thumb
.thumb_func
reset:
bl notmain
.thumb_func
hang:
b .
notmain.c
unsigned int x;
unsigned int fun ( unsigned int );
void notmain ( void )
{
x=fun(x+5);
}
fun.c
unsigned int y;
unsigned int fun ( unsigned int z )
{
return(y+z+1);
}
memmap
MEMORY
{
ram : ORIGIN = 0x20000000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.bss*) } > ram
}
build
arm-none-eabi-as start.s -o start.o
arm-none-eabi-gcc -fPIC -O2 -c -mthumb fun.c -o fun.o
arm-none-eabi-gcc -fPIC -O2 -c -mthumb notmain.c -o notmain.o
arm-none-eabi-ld -T memmap start.o notmain.o fun.o -o so.elf
produces
20000000 <_start>:
20000000: 20001000 andcs r1, r0, r0
20000004: 20000011 andcs r0, r0, r1, lsl r0
20000008: 20000015 andcs r0, r0, r5, lsl r0
2000000c: 20000015 andcs r0, r0, r5, lsl r0
20000010 <reset>:
20000010: f000 f802 bl 20000018 <notmain>
20000014 <hang>:
20000014: e7fe b.n 20000014 <hang>
...
20000018 <notmain>:
20000018: b510 push {r4, lr}
2000001a: 4b06 ldr r3, [pc, #24] ; (20000034 <notmain+0x1c>)
2000001c: 4a06 ldr r2, [pc, #24] ; (20000038 <notmain+0x20>)
2000001e: 447b add r3, pc
20000020: 589c ldr r4, [r3, r2]
20000022: 6823 ldr r3, [r4, #0]
20000024: 1d58 adds r0, r3, #5
20000026: f000 f809 bl 2000003c <fun>
2000002a: 6020 str r0, [r4, #0]
2000002c: bc10 pop {r4}
2000002e: bc01 pop {r0}
20000030: 4700 bx r0
20000032: 46c0 nop ; (mov r8, r8)
20000034: 00000032 andeq r0, r0, r2, lsr r0
20000038: 00000000 andeq r0, r0, r0
2000003c <fun>:
2000003c: 4b03 ldr r3, [pc, #12] ; (2000004c <fun+0x10>)
2000003e: 4a04 ldr r2, [pc, #16] ; (20000050 <fun+0x14>)
20000040: 447b add r3, pc
20000042: 589b ldr r3, [r3, r2]
20000044: 681b ldr r3, [r3, #0]
20000046: 3301 adds r3, #1
20000048: 1818 adds r0, r3, r0
2000004a: 4770 bx lr
2000004c: 00000010 andeq r0, r0, r0, lsl r0
20000050: 00000004 andeq r0, r0, r4
Disassembly of section .got:
20000054 <.got>:
20000054: 20000068 andcs r0, r0, r8, rrx
20000058: 2000006c andcs r0, r0, ip, rrx
Disassembly of section .got.plt:
2000005c <_GLOBAL_OFFSET_TABLE_>:
...
Disassembly of section .bss:
20000068 <x>:
20000068: 00000000 andeq r0, r0, r0
2000006c <y>:
2000006c: 00000000 andeq r0, r0, r0
when it wants to find the global variable x what it appears to have done is it takes the program counter and a linker supplied/modfied offset 0x32 and uses that to find the entry in the global offset table. then takes an offset from that to find X. same for Y. so it appears that when you relocate you will need to modify the global offset table at runtime or load time depending.
If I get rid of those global variables, other than the vector table which is hardcoded and not PIC (and wasnt compiled anyway), this is all position independent.
20000000 <_start>:
20000000: 20001000 andcs r1, r0, r0
20000004: 20000011 andcs r0, r0, r1, lsl r0
20000008: 20000015 andcs r0, r0, r5, lsl r0
2000000c: 20000015 andcs r0, r0, r5, lsl r0
20000010 <reset>:
20000010: f000 f802 bl 20000018 <notmain>
20000014 <hang>:
20000014: e7fe b.n 20000014 <hang>
...
20000018 <notmain>:
20000018: b508 push {r3, lr}
2000001a: 2005 movs r0, #5
2000001c: f000 f804 bl 20000028 <fun>
20000020: 3006 adds r0, #6
20000022: bc08 pop {r3}
20000024: bc02 pop {r1}
20000026: 4708 bx r1
20000028 <fun>:
20000028: 3001 adds r0, #1
2000002a: 4770 bx lr
back to this version
unsigned int y;
unsigned int fun ( unsigned int z )
{
return(y+z+1);
}
position independent
00000000 <fun>:
0: 4b03 ldr r3, [pc, #12] ; (10 <fun+0x10>)
2: 4a04 ldr r2, [pc, #16] ; (14 <fun+0x14>)
4: 447b add r3, pc
6: 589b ldr r3, [r3, r2]
8: 681b ldr r3, [r3, #0]
a: 3301 adds r3, #1
c: 1818 adds r0, r3, r0
e: 4770 bx lr
10: 00000008 andeq r0, r0, r8
14: 00000000 andeq r0, r0, r0
not position independent
00000000 <fun>:
0: 4b02 ldr r3, [pc, #8] ; (c <fun+0xc>)
2: 681b ldr r3, [r3, #0]
4: 3301 adds r3, #1
6: 1818 adds r0, r3, r0
8: 4770 bx lr
a: 46c0 nop ; (mov r8, r8)
c: 00000000 andeq r0, r0, r0
the code has to do a bit more work to access the external variable. position dependent, some work because it is external but not as much. the linker will fill in the required items to make it work...to link it...
the elf file contains information for the linker to know to do this.
Relocation section '.rel.text' at offset 0x1a4 contains 2 entries:
Offset Info Type Sym.Value Sym. Name
00000010 00000a19 R_ARM_BASE_PREL 00000000 _GLOBAL_OFFSET_TABLE_
00000014 00000b1a R_ARM_GOT_BREL 00000004 y
or
Relocation section '.rel.text' at offset 0x174 contains 1 entries:
Offset Info Type Sym.Value Sym. Name
0000000c 00000a02 R_ARM_ABS32 00000004 y
notmain had these PIC
Relocation section '.rel.text' at offset 0x1cc contains 3 entries:
Offset Info Type Sym.Value Sym. Name
0000000e 00000a0a R_ARM_THM_CALL 00000000 fun
0000001c 00000b19 R_ARM_BASE_PREL 00000000 _GLOBAL_OFFSET_TABLE_
00000020 00000c1a R_ARM_GOT_BREL 00000004 x
and without.
Relocation section '.rel.text' at offset 0x198 contains 2 entries:
Offset Info Type Sym.Value Sym. Name
00000008 00000a0a R_ARM_THM_CALL 00000000 fun
00000014 00000b02 R_ARM_ABS32 00000004 x
so in short the toolchain is doing its job, you dont need to re-do its job. And note this has nothing to do with arm or thumb. any time you use the object and linker model and allow for external items from an object the linker has to patch things up to glue the code together. thats just how it works.
When i try to call a java/soap web service from iOS (objective c) using AFNetworking, after many times I get this error: "EXC_BAD_ACCESS"
on 0x71d678: ldr r0, [r4, #0x68].
But I noticed that if on iPhone (particularly iphone 6 or in generally on iOS8) I disable WIFI the error disappeared
I post the assembly code
libdispatch.dylib`_dispatch_timers_run:
0x71d5f0: push {r4, r5, r6, r7, lr}
0x71d5f2: add r7, sp, #0xc
0x71d5f4: push.w {r8, r10, r11}
0x71d5f8: sub sp, #0x18
0x71d5fa: movs r6, #0x0
0x71d5fc: str r0, [sp]
0x71d5fe: movw r0, #0xa83e
0x71d602: movt r0, #0x1
0x71d606: add r0, pc
0x71d608: add.w r1, r0, r6, lsl #6
0x71d60c: ldr r0, [r1, #8]!
0x71d610: str r1, [sp, #0x14]
0x71d612: cmp r0, #0x0
0x71d614: beq.w 0x71d776 ; _dispatch_timers_run + 390
0x71d618: ubfx r4, r6, #0x2, #0x1
0x71d61c: ldr r5, [sp]
0x71d61e: cbz r5, 0x71d632 ; _dispatch_timers_run + 66
0x71d620: ldr.w r0, [r5, r4, lsl #3]
0x71d624: str r0, [sp, #0x10]
0x71d626: cbz r0, 0x71d632 ; _dispatch_timers_run + 66
0x71d628: add.w r0, r5, r4, lsl #3
0x71d62c: ldr r0, [r0, #0x4]
0x71d62e: str r0, [sp, #0xc]
0x71d630: b 0x71d664 ; _dispatch_timers_run + 116
0x71d632: cmp r4, #0x0
0x71d634: bne 0x71d63c ; _dispatch_timers_run + 76
0x71d636: bl 0x70df20 ; _dispatch_get_nanoseconds
0x71d63a: b 0x71d648 ; _dispatch_timers_run + 88
0x71d63c: cmp r4, #0x1
0x71d63e: str r0, [sp, #0x10]
0x71d640: str r0, [sp, #0xc]
0x71d642: bne 0x71d64c ; _dispatch_timers_run + 92
0x71d644: blx 0x72b380 ; symbol stub for: mach_absolute_time
0x71d648: str r0, [sp, #0x10]
0x71d64a: str r1, [sp, #0xc]
0x71d64c: cbz r5, 0x71d664 ; _dispatch_timers_run + 116
0x71d64e: ldr r0, [sp, #0x10]
0x71d650: ldr r1, [sp, #0xc]
0x71d652: str.w r0, [r5, r4, lsl #3]
0x71d656: add.w r0, r5, r4, lsl #3
0x71d65a: str r1, [r0, #0x4]
0x71d65c: b 0x71d664 ; _dispatch_timers_run + 116
0x71d65e: mov r0, r4
0x71d660: bl 0x71c5bc ; _dispatch_timers_update
0x71d664: ldr r0, [sp, #0x14]
0x71d666: ldr.w r10, [r0]
0x71d66a: cmp.w r10, #0x0
0x71d66e: beq.w 0x71d776 ; _dispatch_timers_run + 390
0x71d672: ldr.w r0, [r10, #8]
0x71d676: mvns r4, r0
0x71d678: ldr r0, [r4, #0x68]
0x71d67a: cmp r0, r6
0x71d67c: bne 0x71d65e ; _dispatch_timers_run + 110
0x71d67e: mov r8, r10
0x71d680: ldr r5, [r8, #24]!
0x71d684: ldr r2, [sp, #0x10]
0x71d686: ldr.w r11, [r8, #4]
0x71d68a: subs r0, r5, #0x1
0x71d68c: sbc r1, r11, #0x0
0x71d690: cmp r0, r2
0x71d692: mov.w r0, #0x0
0x71d696: it hs
0x71d698: movhs r0, #0x1
0x71d69a: ldr r2, [sp, #0xc]
0x71d69c: cmp r1, r2
0x71d69e: mov.w r1, #0x0
0x71d6a2: it hs
0x71d6a4: movhs r1, #0x1
0x71d6a6: it eq
0x71d6a8: moveq r1, r0
0x71d6aa: cmp r1, #0x0
0x71d6ac: bne 0x71d776 ; _dispatch_timers_run + 390
0x71d6ae: ldr r0, [r4, #0x1c]
0x71d6b0: cmp r0, #0x1
0x71d6b2: bhi 0x71d65e ; _dispatch_timers_run + 110
0x71d6b4: ldr r0, [r4, #0x70]
0x71d6b6: cmp r0, #0x0
0x71d6b8: bne 0x71d65e ; _dispatch_timers_run + 110
0x71d6ba: str r6, [sp, #0x4]
0x71d6bc: ldr.w r6, [r10, #48]
0x71d6c0: ldr.w r3, [r10, #52]
0x71d6c4: ldr r0, [sp, #0x10]
0x71d6c6: ldr r1, [sp, #0xc]
0x71d6c8: mov r2, r6
0x71d6ca: str r3, [sp, #0x8]
0x71d6cc: subs r0, r0, r5
0x71d6ce: sbc.w r1, r1, r11
0x71d6d2: blx 0x72b240 ; symbol stub for: __udivdi3
0x71d6d6: mov r9, r6
0x71d6d8: adds r6, r0, #0x1
0x71d6da: adc r12, r1, #0x0
0x71d6de: lsrs r1, r6, #0x1f
0x71d6e0: orr.w r1, r1, r12, lsl #1
0x71d6e4: movs r2, #0x0
0x71d6e6: mvn r3, #0x80000000
0x71d6ea: orr.w r1, r1, r12, lsr #31
0x71d6ee: cmp r1, #0x0
0x71d6f0: mov.w r1, #0x0
0x71d6f4: itt ne
0x71d6f6: movne.w r12, #0x0
0x71d6fa: mvnne r6, #0x80000000
0x71d6fe: cmp.w r9, #0xffffffff
0x71d702: it eq
0x71d704: moveq r1, #0x1
0x71d706: ldr r0, [sp, #0x8]
0x71d708: cmp r0, #0x0
0x71d70a: it lt
0x71d70c: movlt r2, #0x1
0x71d70e: cmp r0, r3
0x71d710: it eq
0x71d712: moveq r2, r1
0x71d714: mov r3, r0
0x71d716: cbnz r2, 0x71d73c ; _dispatch_timers_run + 332
0x71d718: umull r1, r2, r6, r9
0x71d71c: mla r2, r6, r3, r2
0x71d720: mla r0, r12, r9, r2
0x71d724: adds r2, r1, r5
0x71d726: adc.w r3, r0, r11
0x71d72a: strd r2, r3, [r8]
0x71d72e: ldrd r0, r1, [r10, #56]
0x71d732: adds r0, r0, r2
0x71d734: adcs r1, r3
0x71d736: strd r0, r1, [r10, #32]
0x71d73a: b 0x71d750 ; _dispatch_timers_run + 352
0x71d73c: mov.w r0, #0xffffffff
0x71d740: str.w r0, [r8]
0x71d744: str.w r0, [r8, #4]
0x71d748: str.w r0, [r8, #8]
0x71d74c: str.w r0, [r8, #12]
0x71d750: mov r0, r4
0x71d752: bl 0x71c5bc ; _dispatch_timers_update
0x71d756: ldr r0, [sp, #0x10]
0x71d758: ldr r1, [sp, #0xc]
0x71d75a: strd r0, r1, [r10, #40]
0x71d75e: ldrex r0, [r4, #0x70]
0x71d762: add r0, r6
0x71d764: strex r1, r0, [r4, #0x70]
0x71d768: cmp r1, #0x0
0x71d76a: bne 0x71d75e ; _dispatch_timers_run + 366
0x71d76c: mov r0, r4
0x71d76e: bl 0x715934 ; _dispatch_wakeup
0x71d772: ldr r6, [sp, #0x4]
0x71d774: b 0x71d664 ; _dispatch_timers_run + 116
0x71d776: adds r6, #0x1
0x71d778: cmp r6, #0x7
0x71d77a: bne.w 0x71d5fe ; _dispatch_timers_run + 14
0x71d77e: add sp, #0x18
0x71d780: pop.w {r8, r10, r11}
0x71d784: pop {r4, r5, r6, r7, pc}
0x71d786: nop
Can someone help me, please?
On iphone 4s it's ok (iOS 7)
Thank you in advance
Claudio
I'm trying to get a STM32Cube project compiled using arm-none-eabi-gcc and a Makefile.
I have specified:
CFLAGS = -mthumb\
-march=armv6-m\
-mlittle-endian\
-mcpu=cortex-m0\
-ffunction-sections\
-fdata-sections\
-MMD\
-std=c99\
-Wall\
-g\
-D$(PART)\
-c
and:
LDFLAGS = -Wl,--gc-sections\
-Wl,-T$(LDFILE)\
-Wl,-v
The FW builds without problems.but when I boot the MCU i get stuck in Hard Fault.
Stack trace is:
#0 HardFault_Handler () at ./Src/main.c:156
#1 <signal handler called>
#2 0x0800221c in ____libc_init_array_from_thumb ()
#3 0x080021be in LoopFillZerobss () at Src/startup_stm32f030x8.s:103
#4 0x080021be in LoopFillZerobss () at Src/startup_stm32f030x8.s:103
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
and I go straight to Hard Fault when stepping to bl __libc_init_array in the startup file.
/* Zero fill the bss segment. */
FillZerobss:
movs r3, #0
str r3, [r2]
adds r2, r2, #4
LoopFillZerobss:
ldr r3, = _ebss
cmp r2, r3
bcc FillZerobss
/* Call the clock system intitialization function.*/
bl SystemInit
/* Call static constructors */
bl __libc_init_array
/* Call the application's entry point.*/
bl main
Any ideas what could be wrong?
My arm-none-eabi-gcc version is 4.8.4 20140725 (release)
[edit]
The disassembly of the calls
08002218 <____libc_init_array_from_thumb>:
8002218: 4778 bx pc
800221a: 46c0 nop ; (mov r8, r8)
800221c: eafff812 b 800026c <__libc_init_array>
0800026c <__libc_init_array>:
800026c: e92d4070 push {r4, r5, r6, lr}
8000270: e59f506c ldr r5, [pc, #108] ; 80002e4 <__libc_init_array+0x78>
8000274: e59f606c ldr r6, [pc, #108] ; 80002e8 <__libc_init_array+0x7c>
8000278: e0656006 rsb r6, r5, r6
800027c: e1b06146 asrs r6, r6, #2
8000280: 12455004 subne r5, r5, #4
8000284: 13a04000 movne r4, #0
8000288: 0a000005 beq 80002a4 <__libc_init_array+0x38>
800028c: e2844001 add r4, r4, #1
8000290: e5b53004 ldr r3, [r5, #4]!
8000294: e1a0e00f mov lr, pc
8000298: e12fff13 bx r3
800029c: e1560004 cmp r6, r4
80002a0: 1afffff9 bne 800028c <__libc_init_array+0x20>
80002a4: e59f5040 ldr r5, [pc, #64] ; 80002ec <__libc_init_array+0x80>
80002a8: e59f6040 ldr r6, [pc, #64] ; 80002f0 <__libc_init_array+0x84>
80002ac: e0656006 rsb r6, r5, r6
80002b0: eb0007ca bl 80021e0 <_init>
80002b4: e1b06146 asrs r6, r6, #2
80002b8: 12455004 subne r5, r5, #4
80002bc: 13a04000 movne r4, #0
80002c0: 0a000005 beq 80002dc <__libc_init_array+0x70>
80002c4: e2844001 add r4, r4, #1
80002c8: e5b53004 ldr r3, [r5, #4]!
80002cc: e1a0e00f mov lr, pc
80002d0: e12fff13 bx r3
80002d4: e1560004 cmp r6, r4
80002d8: 1afffff9 bne 80002c4 <__libc_init_array+0x58>
80002dc: e8bd4070 pop {r4, r5, r6, lr}
80002e0: e12fff1e bx lr
80002e4: 08002258 .word 0x08002258
80002e8: 08002258 .word 0x08002258
80002ec: 08002258 .word 0x08002258
80002f0: 08002260 .word 0x08002260
[edit 2]
The register values from gdb:
(gdb) info reg
r0 0x20000000 536870912
r1 0x1 1
r2 0x0 0
r3 0x40021000 1073876992
r4 0xffffffff -1
r5 0xffffffff -1
r6 0xffffffff -1
r7 0x20001fd0 536879056
r8 0xffffffff -1
r9 0xffffffff -1
r10 0xffffffff -1
r11 0xffffffff -1
r12 0xffffffff -1
sp 0x20001fd0 0x20001fd0
lr 0xfffffff9 -7
pc 0x800067c 0x800067c <HardFault_Handler+4>
xPSR 0x61000003 1627389955
That __libc_init_array is ARM code, not Thumb, hence the M0 will fall over trying to execute some nonsense it doesn't understand (actually, it never quite gets there since it faults on the attempt to switch to ARM state in the bx, but hey, same difference...)
You'll need to make sure you use pure-Thumb versions of any libraries - a Cortex-M-specific toolchain might be a better bet than a generic ARM one. If you have a multilib toolchain, I'd suggest checking the output of arm-none-eabi-gcc --print-multi-lib to make sure you've specified all the relevant options to get proper Cortex-M libraries, and if you're using a separate link step, make sure you invoke it with LD=arm-none-eabi-gcc (plus the relevant multilib options), rather than LD=arm-none-eabi-ld.
When the GCC 4.7.3 (20121207) for ARM Cortex-M3 takes the address of a function it doesn't get the exact address of the function. I can see an off-by-one in that pointer.
// assume at address 0x00001204;
int foo() {
return 42;
}
void bar() {
int(*p)() = &foo; // p = 0x1205;
p(); // executed successfully
foo(); // assembly: "bl 0x00001204;"
}
Although the pointer points to an odd address, the execution is successful. I would expect an exception at this point. Why does it takes that strange address and why doesn't it hurt.
Edit
The SO article describes a difference between thumb and ARM mode. Why is that offset not visible when the function is called directly although the CPU is in the same mode?
Should the odd address be kept or would resetting the bit 0 cause hard? (what I could not see until now)
I cobbled up something from one of my examples to quickly demonstrate what is going on.
vectors.s:
/* vectors.s */
.cpu cortex-m3
.thumb
.word 0x20002000 /* stack top address */
.word _start /* 1 Reset */
.word hang /* 2 NMI */
.word hello /* 3 HardFault */
.word hang /* 4 MemManage */
.word hang /* 5 BusFault */
.word hang /* 6 UsageFault */
.word hang /* 7 RESERVED */
.word hang /* 8 RESERVED */
.word hang /* 9 RESERVED*/
.word hang /* 10 RESERVED */
.word hang /* 11 SVCall */
.word hang /* 12 Debug Monitor */
.word hang /* 13 RESERVED */
.word hang /* 14 PendSV */
.word hang /* 15 SysTick */
.word hang /* 16 External Interrupt(0) */
.word hang /* 17 External Interrupt(1) */
.word hang /* 18 External Interrupt(2) */
.word hang /* 19 ... */
.thumb_func
.global _start
_start:
/*ldr r0,stacktop */
/*mov sp,r0*/
bl notmain
ldr r0,=notmain
mov lr,pc
bx r0
b hang
.thumb_func
hang: b .
hello: b .
.thumb_func
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.end
blinker01.c:
extern void PUT32 ( unsigned int, unsigned int );
int notmain ( void )
{
PUT32(0x12345678,0xAABBCCDD);
return(0);
}
Makefile:
#ARMGNU = arm-none-eabi
ARMGNU = arm-none-linux-gnueabi
AOPS = --warn --fatal-warnings
COPS = -Wall -Werror -O2 -nostdlib -nostartfiles -ffreestanding
all : blinker01.gcc.thumb.bin
vectors.o : vectors.s
$(ARMGNU)-as vectors.s -o vectors.o
blinker01.gcc.thumb.o : blinker01.c
$(ARMGNU)-gcc $(COPS) -mthumb -c blinker01.c -o blinker01.gcc.thumb.o
blinker01.gcc.thumb2.o : blinker01.c
$(ARMGNU)-gcc $(COPS) -mthumb -mcpu=cortex-m3 -march=armv7-m -c blinker01.c -o blinker01.gcc.thumb2.o
blinker01.gcc.thumb.bin : memmap vectors.o blinker01.gcc.thumb.o
$(ARMGNU)-ld -o blinker01.gcc.thumb.elf -T memmap vectors.o blinker01.gcc.thumb.o
$(ARMGNU)-objdump -D blinker01.gcc.thumb.elf > blinker01.gcc.thumb.list
$(ARMGNU)-objcopy blinker01.gcc.thumb.elf blinker01.gcc.thumb.bin -O binary
Disassembly:
Disassembly of section .text:
08000000 <_start-0x50>:
8000000: 20002000 andcs r2, r0, r0
8000004: 08000051 stmdaeq r0, {r0, r4, r6}
8000008: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
800000c: 0800005e stmdaeq r0, {r1, r2, r3, r4, r6}
8000010: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000014: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000018: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
800001c: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000020: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000024: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000028: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
800002c: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000030: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000034: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000038: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
800003c: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000040: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000044: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
8000048: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
800004c: 0800005d stmdaeq r0, {r0, r2, r3, r4, r6}
08000050 <_start>:
8000050: f000 f80a bl 8000068 <notmain>
8000054: 4803 ldr r0, [pc, #12] ; (8000064 <PUT32+0x4>)
8000056: 46fe mov lr, pc
8000058: 4700 bx r0
800005a: e7ff b.n 800005c <hang>
0800005c <hang>:
800005c: e7fe b.n 800005c <hang>
0800005e <hello>:
800005e: e7fe b.n 800005e <hello>
08000060 <PUT32>:
8000060: 6001 str r1, [r0, #0]
8000062: 4770 bx lr
8000064: 08000069 stmdaeq r0, {r0, r3, r5, r6}
08000068 <notmain>:
8000068: b508 push {r3, lr}
800006a: 4803 ldr r0, [pc, #12] ; (8000078 <notmain+0x10>)
800006c: 4903 ldr r1, [pc, #12] ; (800007c <notmain+0x14>)
800006e: f7ff fff7 bl 8000060 <PUT32>
8000072: 2000 movs r0, #0
8000074: bd08 pop {r3, pc}
8000076: 46c0 nop ; (mov r8, r8)
8000078: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
800007c: aabbccdd bge 6ef33f8 <_start-0x110cc58>
First off note hang vs hello, this is a gnuism you need to, in assembly, declare a label to be a thumb function in order for it to actually work for this kind of thing. hang is properly declared and the vector table properly uses the odd address, hello is not properly declared and the even address is put in there. C compiled code automatically does this properly.
Here is a prime example of what you are asking though, bl to the C function notmain does not, cannot, use an odd address. But to use bx you ask for the address to the function main and that address is provided to the code as 0x8000069 for for a function at address 0x8000068, if you did a bx to 0x800068 on an ARMvsometingT it would switch to arm mode and crash eventually if it hit thumb mode (hopefully crash and not stumble along) on a cortex-m a bx to an even address should fault immediately.
08000050 <_start>:
8000050: f000 f80a bl 8000068 <notmain>
8000054: 4803 ldr r0, [pc, #12] ; (8000064 <PUT32+0x4>)
8000056: 46fe mov lr, pc
8000058: 4700 bx r0
800005a: e7ff b.n 800005c <hang>
8000064: 08000069 stmdaeq r0, {r0, r3, r5, r6}
Why can't bl be odd? Look at the encoding above bl from 0x8000050 to 0x8000068, the pc is two ahead so 4 byte so take 0x8000068 - 0x8000054 = 0x14 divide that by 2 and you get 0x00A. That is the offset to the pc and that is what is encoded in the instructions (the 0A in the second half of the instruction). The divide by two is based on knowledge that thumb instructions are always 2 bytes (well at the time) and so they can reach twice as far if they put the offset in 2 byte instructions rather than in bytes. So the lsbit is lost of the delta between the two, so controlled by the hardware.
What your code did was in one place you asked for the address of a thumb function which gives the odd address, the other case was looking at the disassembly of a branch link which is always even.