Actual default linker script and settings gcc uses - gcc

Where can I find the actual linker script and settings gcc uses?
Things I've tried:
For concreteness, let's consider a small program:
empty.c
int main(void)
{
return 0;
}
build it statically, and look at the result:
$ gcc -static -o empty empty.c
$ readelf -W -l empty
Elf file type is EXEC (Executable file)
Entry point 0x400f4e
There are 6 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x000000 0x0000000000400000 0x0000000000400000 0x0bf581 0x0bf581 R E 0x200000
LOAD 0x0bfeb0 0x00000000006bfeb0 0x00000000006bfeb0 0x001d80 0x0042d8 RW 0x200000
NOTE 0x000190 0x0000000000400190 0x0000000000400190 0x000044 0x000044 R 0x4
TLS 0x0bfeb0 0x00000000006bfeb0 0x00000000006bfeb0 0x000020 0x000058 R 0x10
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x10
GNU_RELRO 0x0bfeb0 0x00000000006bfeb0 0x00000000006bfeb0 0x000150 0x000150 R 0x1
Section to Segment mapping:
Segment Sections...
00 .note.ABI-tag .note.gnu.build-id .rela.plt .init .plt .text __libc_freeres_fn __libc_thread_freeres_fn .fini .rodata __libc_subfreeres __libc_atexit __libc_thread_subfreeres .eh_frame .gcc_except_table
01 .tdata .init_array .fini_array .jcr .data.rel.ro .got .got.plt .data .bss __libc_freeres_ptrs
02 .note.ABI-tag .note.gnu.build-id
03 .tdata .tbss
04
05 .tdata .init_array .fini_array .jcr .data.rel.ro .got
Note the various sections, grouped into segments, and placed into memory regions of various permissions.
Now let's attempt to get as much information as possible on how it did this linking.
$ gcc -static -o empty empty.c -Wl,--verbose
GNU ld (GNU Binutils for Ubuntu) 2.24
Supported emulations:
elf_x86_64
elf32_x86_64
elf_i386
i386linux
elf_l1om
elf_k1om
i386pep
i386pe
using internal linker script:
==================================================
/* Script for -z combreloc: combine and sort reloc sections */
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(_start)
SEARCH_DIR("/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib");
SECTIONS
{
/* Read-only sections, merged into text segment: */
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
*(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
*(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
*(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
*(.rela.ifunc)
}
.rela.plt :
{
*(.rela.plt)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
.gcc_except_table.*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
/* Thread Local Storage sections */
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
.data :
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we don't
pad the .data section. */
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
.lbss :
{
*(.dynlbss)
*(.lbss .lbss.* .gnu.linkonce.lb.*)
*(LARGE_COMMON)
}
. = ALIGN(64 / 8);
. = SEGMENT_START("ldata-segment", .);
.lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.lrodata .lrodata.* .gnu.linkonce.lr.*)
}
.ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.ldata .ldata.* .gnu.linkonce.l.*)
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
. = ALIGN(64 / 8);
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}
==================================================
... <snip searching and linking actual object files>
But the script, while long, is missing most of the import information previously listed.
How does it know which sections to gather into different load segments?
There is no PHDRS command, and while the use of SEGMENT_START suggests there are some standard segments for this system defined somewhere else, none of the sections are listed with an associated segment.
Furthermore, how does it know where to load these segments, or what permissions these memory regions have?
There is no MEMORY command. And again, if there are some standard memory regions for this system defined somewhere else, none of the sections list which memory region to use.
When I've seen default linker scripts before for microcontrollers, they were incredibly detailed. This output however suggests there are more scripts and settings somewhere.
Where are these other linker script definitions and settings stored?

Well, I know that this is an old question, but I also found it frustrating that there is no precise info about options that are used during the linking process. This answer shows my journey to find them.
First of all, I was looking into official docs https://gcc.gnu.org/onlinedocs/ - I searched the GCC Manual and GCC Internals Manual. The only meaningful information that I found is that gcc uses an internal tool called collect2 to invoke the linker. According to https://gcc.gnu.org/onlinedocs/gccint/Collect2.html "The program collect2 works by linking the program once and looking through the linker output file for symbols with particular names indicating they are constructor functions". So it's used to make linking possible.
Next thing that I tried is getting through source code. You can browse code here https://code.woboq.org/gcc/gcc/collect2.c.html . The problem is that it wasn't really helpful. But I noticed that collect2 fork_execute function to invoke ld. You can deep dive into fork_execute to find out that it will fork (execute a new program in the forked program) and wait for it to finish. Because both forks and execs are system calls (to put it quickly - system calls are the way/functions the application uses to communicate with a system). I decided to give it a try.
So I made the simple program that doesn't require any compilation (it's already compiled to object file - so everything that gcc have to do is linking).
[Alex#Normandy tmp]$ gcc hello.c.s -o hello_gcc
[Alex#Normandy tmp]$ ./hello_gcc
Hello, World!
Then I use strace with following options:
-o forked.log save the output to forked.log
-s 1024 variables shorter than 1024 chars are not truncated (default 32 was not enough)
-f - enables strace on forked processes
-e trace=/exec - filter system calls so only ones starting with exec are shown
The final output was following.
[Alex#Normandy tmp]$ strace -o forked.log -s 1024 -f -e trace=/exec gcc hello.c.s -o hello_gcc
[Alex#Normandy tmp]$ grep 'ld' forked.log
2153 execve("/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/collect2", ["/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/collect2", "--build-id", "--no-add-needed", "--eh-frame-hdr", "--hash-style=gnu", "-m", "elf_x86_64", "-dynamic-linker", "/lib64/ld-linux-x86-64.so.2", "-o", "hello_gcc", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64", "-L/lib/../lib64", "-L/usr/lib/../lib64", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../..", "/tmp/ccyl36jf.o", "-lgcc", "--as-needed", "-lgcc_s", "--no-as-needed", "-lc", "-lgcc", "--as-needed", "-lgcc_s", "--no-as-needed", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o"], 0x17b9da0 /* 61 vars */) = 0
2154 execve("/usr/bin/ld", ["/usr/bin/ld", "--build-id", "--no-add-needed", "--eh-frame-hdr", "--hash-style=gnu", "-m", "elf_x86_64", "-dynamic-linker", "/lib64/ld-linux-x86-64.so.2", "-o", "hello_gcc", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64", "-L/lib/../lib64", "-L/usr/lib/../lib64", "-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../..", "/tmp/ccyl36jf.o", "-lgcc", "--as-needed", "-lgcc_s", "--no-as-needed", "-lc", "-lgcc", "--as-needed", "-lgcc_s", "--no-as-needed", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o", "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o"], 0x7fff14226a98 /* 61 vars */) = 0
So used ld command was
/usr/bin/ld --build-id --no-add-needed --eh-frame-hdr --hash-style=gnu -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o hello_gcc /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../.. /tmp/ccyl36jf.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o
So what the f*** was made? Well, all options can be found in the manual. Here is decomposed output.
/usr/bin/ld - linker program
--build-id - add build-id to binary. In my system default it is sha1.
--no-add-needed - it is depracaceted name for --no-copy-dt-needed-entries - it is connected with DT_NEEDED tags inside ELF, if I get that correctly it means that DT_NEEDED tag won't be copied from input libraries.
--eh-frame-hdr - "Request creation of ".eh_frame_hdr" section and ELF "PT_GNU_EH_FRAME" segment header." Whatever that means.
--hash-style=gnu - "Set the type of linker's hash table(s)." Default is sysv, but there is a newer format gnu. Binary can also have a hash table(s) in both formats.
-m elf_x86_64 - linkers emulates (makes elf type binary for x86_64)
-dynamic-linker /lib64/ld-linux-x86-64.so.2 - set name of expected dynamic linker
-o hello_gcc - set output binary
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o - code that is run before main of actual program
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o- code that is run before main of actual program
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o - code that is run before main of actual program
-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5 - additional library search path
-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64 - additional library search path
-L/lib/../lib64 - additional library search path
-L/usr/lib/../lib64 - additional library search path
-L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../.. - additional library search path
/tmp/ccyl36jf.o - this is actual program (binary object) with it's main function
-lgcc - -l option - "Add the archive or object file specified by namespec to the list of files to link." In that case it is gcc.
--as-needed - enable "as-needed" mode that checks if on particular point following library (namespace?) is needed
-lgcc_s - add gcc_s note that only if it's really needed at this moment.
--no-as-needed - disable "as-needed" mode that checks if on particular point following library (namespace?) is needed
-lc- standard C namespace/library
-lgcc - this lib should be already set. There might be something between this and previous usage of this option.
--as-needed - set "as-needed mode. There might be something between this and previous usage of this option.
-lgcc_s - already described. There might be something between this and previous usage of this option.
--no-as-needed -- disable "as-needed mode".
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o - additional code that run when program finish
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o - additional code that run when program finish.
More about: crt1.o, crti.o, crtbegin.o, crtend.o, crtn.o - they are startup, initialization, constructor, destructor and finalization files (according to Building Embedded Linux Systems By Karim Yaghmour).
Probably simpler way
During writing this answer I also "discovered" that you can invoke gcc with -v option and it will return COLLECT_GCC_OPTIONS, that is identical to invoked ld
COLLECT_GCC_OPTIONS='-v' '-o' 'hello_gcc' '-mtune=generic' '-march=x86-64'
/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/collect2 --build-id --no-add-needed --eh-frame-hdr --hash-style=gnu -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o hello_gcc /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../.. hello_gcc.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o
Still, if you want to be sure for 100% how ld was invoked - the strace is your best bet.
Lastly, note that I used Enterprise Linux v7 and v8 system to check if I'm right. Both of them uses the x86_64 arch, and the results might be different on different architectures.

On my Ubuntu system, the linker scripts are located at:
/lib/x86_64-linux-gnu/ldscripts
The base script appears to be chosen based on the target architecture, such as elf_x86_64, and the for each base architecture there are several variants.
I am not sure, but the variant appears to be chosen based on certain linker options.

Related

How do i set up data memory address when using "riscv32/64-unknown-elf-gcc"?

I designed RISCV32IM processor, and i used "riscv32/64-unknown-elf-gcc" to generate code for test.
Instruction memory setting has been solved with the options below(-Ttext option), but data memory setting has not been solved yet.
riscv64-unknown-elf-gcc -v -march=rv32im -mabi=ilp32 -nostartfiles -x c -Ttext 40000000 -o main.o main.c
Can I know if I can set the data memory address I want?
looks like you need to link linker script, something like:
OUTPUT_ARCH( "riscv" )
ENTRY(_start)
SECTIONS
{
. = 0x40000000;
.text.init : { *(.text.init) }
. = ALIGN(0x1000);
.text : { *(.text) }
. = ALIGN(0x1000);
.data : { *(.data) }
.bss : { *(.bss) }
_end = .;
}
_start is a start symbol and 0x40000000 is a memory start address,
followed by the section names aligned by 0x1000.
text this is the program itself
data is a statically initialized variables
bss
is a statically allocated variables

GCC -T link option issue

I have compiled the project with arm-none-eabi-gcc to object with command
arm-none-eabi-gcc --specs=nosys.specs -mcpu=cortex-m7 -mtune=cortex-m7 -Os -g -gdwarf-2 -c $< -o $(#)
currently I got the object file when I try to link it with command in object foler
arm-none-eabi-gcc --specs=nosys.specs -mcpu=cortex-m7 -mtune=cortex-m7 -g -gdwarf-2 -nostartfiles -T ..\project.ld -o target *
However I got some strange error
arm-none-eabi/bin/ld.exe: error: no memory region specified for loadable section `.text.memcmp'
I understand that when I use -T option the link script file will be used instead of default link script. it looks like the section definitions for some builtin function are missing. I tried to fix that put
.text.memcmp : {*(.text.memcmp)}
in my ld file, it looks like this section is fixed however I got another error:
arm-none-eabi/bin/ld.exe: error: no memory region specified for loadable section `.text.memset'
so I don't think put .text.memset in the LD file is correct fix, because after I put `.text.memset' in ld file , I got another error :
arm-none-eabi/bin/ld.exe: error: no memory region specified for loadable section `.text._snprintf_r'
I think I missed some options in GCC to create these default sections for builtin functions
What is the root cause of this issue and how to fix that? Thank you so much!
Update: Add the LD file below :
MEMORY {
INIT_SRAM : ORIGIN = 0x34400000, LENGTH = 0x1FB000
INIT_SRAM_NO_CACHEABLE : ORIGIN = 0x3460A000, LENGTH = 0x1DF00
INIT_SRAM_STACK : ORIGIN = 0x34628000, LENGTH = 0x15000
RAM_RSVD : ORIGIN = ., LENGTH = 0
BOOT_TEST : ORIGIN = 0x43840000 LENGTH = 0x50
}
SECTIONS
{
.boot_test : {*(.boot_test)}> BOOT_TEST
.exception_table ALIGN(4) : > {*(.exception_table)}>INIT_SRAM
.startup ALIGN(4) : {*(.startup)}
.ramcode ALIGN(4) : > {*(.ramcode)}
.text ALIGN(4) : { *(.text) }
.ramcode ALIGN(4) : { *(.ramcode) }
.rodata ALIGN(4) : { *(.rodata) }
.data ALIGN(4) : { *(.data) }
.bss ALIGN(16) : { *(.bss) }
_TEST_SESSION_START = .;
.TEST_SESSION :{*(.TEST_SESSION)}
_TEST_SESSION_END = (. - 1);
_Stack_start = .;
__STACK_SIZE = SIZEOF(INIT_SRAM_STACK);
__RAM_NO_CACHEABLE_START = ADDR(INIT_SRAM_NO_CACHEABLE);
}
Changing
.text ALIGN(4) : { *(.text) }
to
.text ALIGN(4) : { *(.text) *(.text.*) }
can fix this .text.xxx missing issue.

undefined reference to _GLOBAL_OFFSET_TABLE_ (only when generating binaries)

this is the problem:
When I link my scripts in C, using ld, when I generate elf32-i386 files as output format in ld, putting it as OUTPUT_FORMAT() in the ld script, I dont have any error, but if I try to put in this last OUTPUT_FORMAT() "binary" or try to output a file with .bin extension, I get a mixture of errors like:
kernel.o: In function `k_main':
kernel.c:(.text+0xe): undefined reference to `_GLOBAL_OFFSET_TABLE_'
kernelutils.o: In function `k_clear_screen':
kernelutils.c:(.text+0xc): undefined reference to `_GLOBAL_OFFSET_TABLE_'
kernelutils.o: In function `k_clear_screen_front':
kernelutils.c:(.text+0x56): undefined reference to `_GLOBAL_OFFSET_TABLE_'
kernelutils.o: In function `k_printf':
kernelutils.c:(.text+0xa0): undefined reference to `_GLOBAL_OFFSET_TABLE_'
kernelutils.o: In function `k_sleep_3sec':
kernelutils.c:(.text+0x152): undefined reference to `_GLOBAL_OFFSET_TABLE_'
kernelmalloc.o:kernelmalloc.c:(.text+0xc): more undefined references to `_GLOBAL_OFFSET_TABLE_' follow
This not only happens when compiling specific scripts, all scripts that try to use ld to link, or gcc since this calls ld, die in the attempt of get a binary with .bin extension.
When showing the symbols of one of the executables (kernel.o in the output of above) I see that the symbol _GLOBAL_OFFSET_TABLE_ isnt defined, and the most scary part, all the functions that returned error in the error output of above have their symbols removed, this is the nm output:
cristian#mymethodman:~/Desktop/kernel/0.0.3/Archivos$ nm kernel.o
U _GLOBAL_OFFSET_TABLE_
U k_clear_screen
U k_clear_screen_front
00000000 T k_main
U k_malloc
U k_printf
U k_sleep_3sec
00000000 T __x86.get_pc_thunk.bx
How I can solve this? I will leave the linker script below to ensure it isn a problem of the .ld file, with both "to get elf" and "to get binary" versions. Thanks in advance!
Ld scripts:
To get binary:
ENTRY(loader)
OUTPUT_FORMAT(binary)
SECTIONS {
/* The kernel will live at 3GB + 1MB in the virtual
address space, which will be mapped to 1MB in the
physical address space. */
. = 0xC0100000;
.text : AT(ADDR(.text) - 0xC0000000) {
*(.text)
*(.rodata*)
}
.data ALIGN (0x1000) : AT(ADDR(.data) - 0xC0000000) {
*(.data)
}
.bss : AT(ADDR(.bss) - 0xC0000000) {
_sbss = .;
*(COMMON)
*(.bss)
_ebss = .;
}
}
To get ELF:
ENTRY(loader)
OUTPUT_FORMAT(elf32-i386)
SECTIONS {
/* The kernel will live at 3GB + 1MB in the virtual
address space, which will be mapped to 1MB in the
physical address space. */
. = 0xC0100000;
.text : AT(ADDR(.text) - 0xC0000000) {
*(.text)
*(.rodata*)
}
.data ALIGN (0x1000) : AT(ADDR(.data) - 0xC0000000) {
*(.data)
}
.bss : AT(ADDR(.bss) - 0xC0000000) {
_sbss = .;
*(COMMON)
*(.bss)
_ebss = .;
}
}
As yo ucan see between both only changes the OUTPUT_FORMAT() line.
Your toolchain probably defaults to generating position-independent executables (PIE). Try compiling with gcc -fno-pie.
If you want to keep PIE for security reasons, you'll need a more complicated linker script and something that performs the initial relocation (such as a dynamic linker, but simpler constructions are possible as well).

Executing bare metal qemu execution with -pflash command

I have a website Hello world for bare metal ARM using QEMU that teaches how to run qemu for versatilePB.
The website example uses -kernel option to load the binary image into 0x10000; I just assume that the binary is loaded into 0x10000 internally with the -kernel.
This is the command qemu-system-arm -M versatilepb -m 128M -kernel test.bin -serial stdio, and the source can be found at - https://dl.dropboxusercontent.com/u/10773282/2014/b1.zip
The ld setup is as follows:
ENTRY(_Reset)
SECTIONS
{
. = 0x10000;
.startup . : { startup.o(.text) }
...
}
Start up assembly is simple as follows:
.global _Reset
_Reset:
LDR sp, =stack_top
BL c_entry
B .
The main c code (c_entry) is as follows:
volatile unsigned int * const UART0DR = (unsigned int *)0x101f1000;
void print_uart0(const char *s) {
while(*s != '\0') { /* Loop until end of string */
*UART0DR = (unsigned int)(*s); /* Transmit char */
s++; /* Next char */
}
}
void c_entry() {
print_uart0("Hello world!\n");
}
I need to modify the code to boot without -kernel, but with -pflash to emulate as if the binary is read from the flash drive. This is my approach in trying to make it work:
Change the startup assembly and test.ld
I just used the other example from the same author of my example: http://balau82.wordpress.com/2010/02/14/simplest-bare-metal-program-for-arm/
This is the startup code:
.section INTERRUPT_VECTOR, "x"
.global _Reset
_Reset:
B Reset_Handler /* Reset */
B . /* Undefined */
B . /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B . /* IRQ */
B . /* FIQ */
Reset_Handler:
LDR sp, =stack_top
BL c_entry
B .
This is the test.ld
ENTRY(_Reset)
SECTIONS
{
. = 0x0;
.text : {
startup.o (INTERRUPT_VECTOR)
*(.text)
}
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4kB of stack memory */
stack_top = .;
}
Update the build code
After the build to get the test.bin, I used the dd command to create a flash binary.
arm-none-eabi-as -mcpu=arm926ej-s -g startup.s -o startup.o
arm-none-eabi-gcc -c -mcpu=arm926ej-s -g test.c -o test.o
arm-none-eabi-ld -T test.ld test.o startup.o -o test.elf
arm-none-eabi-objcopy -O binary test.elf test.bin
dd if=/dev/zero of=flash.bin bs=4096 count=4096
dd if=test.bin of=flash.bin bs=4096 conv=notrunc
qemu execution
Executed qemu to get this error message.
qemu-system-arm -M versatilepb -m 128M -pflash flash.bin -nographic
>> failed to read the initial flash content
>> Initialization of device cfi.pflash01 failed
What might be wrong? I uploaded the examples and sample code.
not working with -pflash: https://dl.dropboxusercontent.com/u/10773282/2014/b2.zip
It seems like that the -M option affects the other option.
I tried with -M connex to use gumstix board, and it works fine.
The other thing that I notice was that with -M versatilepb, I had to use -kernel for loading and running the program.

Advice on linker script creation and verification

Long story short. I wish to learn how to create a good linker script so that should I change platforms/architectures/vendors, I'm not stuck at ground zero again with not knowing what to do. I'm not concerned with the difficulty of the task, so much as understanding it.
I've started a sort of project, as it were, to create a base or skeleton for programing and developing on STM's 32-bit Cortex-M3 chips. With the help of jsiei97 Beginning with the STM32F103RB (I also have a TI Stellaris LM3S828, but that's another issue), without the need of a licensed IDE. Since I am a student, and most students can't afford such things.
I understand that there's the ODev, and Eclipse Plugins and what not, and have read various blogs, wikis, docs/man pages and most projects provide you with a linker script with little to know explanation as to why and where things have been defined.
I've compiled an arm-none-eabi toolchain for the STM32 but where I get hung up is in the linker script. CodeSourcery also requires one as well. I have a basic concept of how to create them and their syntax after reading the gnu man pages, but I simply haven't a clue where to start with putting in the various extra sections apart from the obvious .text, .bss and .data.
I created a rudimentary version but I get linking errors asking for section definitions and that's where I get stuck. I know how to define them, but knowing if what I'm doing is even close to right is the problem.
I have a simple linker script I reuse regularly across platforms, just change some addresses as needed.
http://github.com/dwelch67/
There are a number of samples many with gcc samples and most of those have linker scripts.
MEMORY
{
rom : ORIGIN = 0x00000000, LENGTH = 0x40000
ram : ORIGIN = 0x10000000, LENGTH = 30K
}
SECTIONS
{
.text : { *(.text*) } > rom
.bss : { *(.bss*) } > ram
}
Here is a working linker script for an STM32F105RB (there are also versions for R8 and RC):
https://github.com/anrope/stm-arp/blob/github/arp.rb.ld (text below)
My top-of-the-head guess is that you wont have to change anything. Maybe the origin of the regions defined in the MEMORY{} statement. Hopefully the comments will be helpful to you.
I used this with a GNU/GCC cross-compiler I rolled myself. After compiling, it's helpful to run nm on your code to make sure sections are being placed at the correct addresses.
Edit:
I pieced this linker script together by using the GNU ld documentation:
http://sourceware.org/binutils/docs/ld/
and by examining the output of a GCC cross-compile with the standard linker script, using nm. I basically identified all the sections that were being output and figured out which ones were actually useful, and where in memory they should go for the STM32F105.
I made notes in the linker script of the purpose of each section.
/*
arp.{r8,rb,rc}.ld :
These linker scripts (one for each memory density of the stm32f105) are used by
the linker to arrange program symbols and sections in memory. This is especially
important for sections like the interrupt vector, which must be placed where the
processor is hard-coded to look for it.
*/
/*stm32f105 dev board linker script*/
/*
OUTPUT_FORMAT() defines the BFD (binary file descriptor) format
OUTPUT_FORMAT(default, big, little)
*/
OUTPUT_FORMAT ("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
/* ENTRY() defines the symbol at which to begin executing code */
ENTRY(_start)
/* tell ld where to look for archive libraries */
/*SEARCH_DIR("/home/arp/stm/ctc/arm-eabi/lib")*/
/*SEARCH_DIR("/home/arp/stm/ccbuild/method2/install/arm-eabi/lib")*/
SEARCH_DIR("/home/arp/stm32dev-root/usrlol/arm-eabi/lib")
/*
MEMORY{} defines the memory regions of the target device,
and gives them an alias for use later in the linker script.
*/
/* stm32f105rb */
MEMORY
{
ram (rwx) : ORIGIN = 0x20000000, LENGTH = 32k
flash (rx) : ORIGIN = 0x08000000, LENGTH = 128k
option_bytes_rom (rx) : ORIGIN = 0x1FFFF800, LENGTH = 16
}
_sheap = _ebss + 4;
_sstack = _ebss + 4;
/*placed __stack_base__ trying to figure out
global variable overwrite issue
__stack_base__ = _ebss + 4;*/
_eheap = ORIGIN(ram) + LENGTH(ram) - 1;
_estack = ORIGIN(ram) + LENGTH(ram) - 1;
/* SECTIONS{} defines all the ELF sections we want to create */
SECTIONS
{
/*
set . to an initial value (0 here).
. (dot) is the location counter. New sections are placed at the
location pointed to by the location counter, and the location counter
is automatically moved ahead the length of the new section. It is important
to maintain alignment (not handled automatically by the location counter).
*/
. = SEGMENT_START("text-segment", 0);
/*isr_vector contains the interrupt vector.
isr_vector is read only (could be write too?).
isr_vector must appear at start of flash (USR),
address 0x0800 0000*/
.isr_vector :
{
. = ALIGN(4);
_sisr_vector = .;
*(.isr_vector)
_eisr_vector = .;
} >flash
/*text contains executable code.
text is read and execute.*/
.text :
{
. = ALIGN(4);
*(.text)
. = ALIGN(4);
*(.text.*)
} >flash
/*init contains constructor functions
called before entering main. used by crt (?).*/
.init :
{
. = ALIGN(4);
KEEP(*(.init))
} >flash
/*fini contains destructor functions
called after leaving main. used by crt (?).*/
.fini :
{
. = ALIGN(4);
KEEP(*(.fini))
} >flash
/* rodata contains read only data.*/
.rodata :
{
. = ALIGN(4);
*(.rodata)
/* sidata contains the initial values
for variables in the data section.
sidata is read only.*/
. = ALIGN(4);
_sidata = .;
} >flash
/*data contains all initalized variables.
data is read and write.
.data (NOLOAD) : AT(_sidata)*/
.data : AT(_sidata)
{
. = ALIGN(4);
_sdata = .;
*(.data)
_edata = .;
} >ram
/*bss contains unintialized variables.
bss is read and write.
.bss (NOLOAD) :*/
.bss :
{
. = ALIGN(4);
_sbss = .;
__bss_start__ = .;
*(.bss)
. = ALIGN(4);
/*COMMON is a special section containing
uninitialized data.
Example: (declared globally)
int temp; //this will appear in COMMON */
*(COMMON)
_ebss = .;
__bss_end__ = .;
} >ram AT>flash
. = ALIGN(4);
end = .;
/* remove the debugging information from the standard libraries */
DISCARD :
{
libc.a ( * )
libm.a ( * )
libgcc.a ( * )
}
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
}

Resources