I am using GCC on a Cortex M0 from NXP.
I have a non-initialized buffer which needs to be placed at 512 byte boundary due to DMA access restrictions:
DMA_CH_DESCRIPTOR_T __attribute__ ((aligned (512))) Chip_DMA_Table[MAX_DMA_CHANNEL];
This will end up in .bss section, but of course, due to alignment, there will be some lost space before. I know that .bss starts (in my MCU) at 0x10000000 which is already 512 aligned.
So the big question is how can I force my buffer to be the first symbol in .bss ?
I already tried like this but it doesn't work
.bss : ALIGN(4)
{
_bss = .;
PROVIDE(__start_bss_RAM = .) ;
PROVIDE(__start_bss_SRAM = .) ;
drv_dma.o (.bss)
*(.bss*)
*(COMMON)
. = ALIGN(4) ;
_ebss = .;
PROVIDE(__end_bss_RAM = .) ;
PROVIDE(__end_bss_SRAM = .) ;
PROVIDE(end = .);
} > SRAM AT> SRAM
Note: I can see several potential resolves:
defining my own .bss_top for example, and modify my startup script to consider it as a separate .bss and initialize it.
defining a separate section BEFORE actual .bss and initialize my buffer from code somewhere
memset(...)
But I said it's worth to ask, maybe there is a simple linker catch on this one.
Thank you,
code is here at commit #489ee1c
I am writing a unix-like kernel following this tutorial for personal learning. Global variable symbols is incorrect when I debug a unix-like kernel wrote by myself.
I start the kernel using
qemu-system-i386 -d cpu_reset -s -S -D ./run.log -drive format=raw,file=os_image -m 8G
there is also a problem that physical memory is only 3GB in code while I set -m 4G.
and start a gdb stoping at init_global_mm_vars() functions
.gdbinit
set arch i386
symbol-file /root/os/2-kernel/kernel.elf
b init_global_mm_vars
target remote localhost:1234
You can see that the address of symbol Kernel_Vmm_End is 0x58d4 ,but used in asm is 0x68d4. all above global variable symbols is incorrect.
Why all the global variable symbols go wrong ?
I found that if I don't use link.ld script and just use -Ttext=0,when link and all problems seem gone.
ENTRY(kernel_main) /* Kernel entry label */
OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
SECTIONS {
. = 0x0; /* Kernel code is located at 0x0 */
Kernel_Text_Vmm_Start_p = .; /* Export labels */
.text : /* Align at 4KB and load at 4KB */
{
*(.text) /* All text sections from all files */
}
. = ALIGN(0x1000);
Kernel_Rodata_Vmm_Start_p =.;
.rodata ALIGN (0x1000) : AT(ADDR(.rodata)) /* Align at 4KB and load at 4KB */
{
*(.rodata) /* All read-only data sections from all files */
}
. = ALIGN(0x1000);
Kernel_Data_Vmm_Start_p =.;
.data ALIGN (0x1000) : AT(ADDR(.data)) /* Align at 4KB and load at 4KB */
{
*(.data) /* All data sections from all files */
}
. = ALIGN(0x1000);
Kernel_Bss_Vmm_Start_p =.;
.bss ALIGN (0x1000) : AT(ADDR(.bss)) /* Align at 4KB and load at 4KB */
{
*(COMMON) /* All COMMON sections from all files */
*(.bss) /* All bss sections from all files */
}
. = ALIGN(0x1000);
Kernel_Vmm_End_p = .;
}
Still have no idea why this ld script goes wrong?
I'm building an embeeded software for STM32 microcontroller with the toolchain GNU Tools for STM32 and I need the binary output without gaps.
The linker produces a gap between sections .text and .rodata. The problem is the alignment of the section .rodata. The issue appears by using of the GNU Tools for STM32 version 9-2020-q2-update. The previous version I had used (7-2018-q2-update) did not produced that issue.
Excerpt from the linker script (it's the same for both GNU Tools versions):
SECTIONS
{
.text :
{
. = ALIGN(4);
*(.text) /* .text sections (code) */
*(.text*) /* .text* sections (code) */
*(.glue_7) /* glue arm to thumb code */
*(.glue_7t) /* glue thumb to arm code */
*(.eh_frame)
KEEP (*(.init))
KEEP (*(.fini))
. = ALIGN(4); /* PaulV: change that to ALIGN(8) eliminates the gap */
} >FLASH
/* Constant data into "FLASH" Rom type memory */
.rodata :
{
. = ALIGN(4);
*(.rodata) /* .rodata sections (constants, strings, etc.) */
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
. = ALIGN(4);
} >FLASH
}
More details:
The version 7-2018-q2-update produces the output without gap.
The .lst file (note that section .rodata is aligned to bound 4):
K4_G1.elf: file format elf32-littlearm
Sections:
Idx Name Size VMA LMA File off Algn
....
3 .text 0001a20c 08100800 08100800 00010800 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
4 .rodata 00009b54 0811aa0c 0811aa0c 0002aa0c 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
and .map file (no gap between the non-empty sections .fini and .rodata):
.fini 0x000000000811aa04 0x8 c:/st/stm32cubeide_1.4.0/stm32cubeide/plugins/com.st.stm32cube.ide.mcu.externaltools.gnu-tools-for-stm32.7-2018-q2-update.win32_1.5.0.202011040924/tools/bin/../lib/gcc/arm-none-eabi/7.3.1/thumb/v7e-m/fpv5/hard/crtn.o
0x000000000811aa0c . = ALIGN (0x4)
0x000000000811aa0c _etext = .
.vfp11_veneer 0x000000000811aa0c 0x0
.vfp11_veneer 0x000000000811aa0c 0x0 linker stubs
.v4_bx 0x000000000811aa0c 0x0
.v4_bx 0x000000000811aa0c 0x0 linker stubs
.iplt 0x000000000811aa0c 0x0
.iplt 0x000000000811aa0c 0x0 c:/st/stm32cubeide_1.4.0/stm32cubeide/plugins/com.st.stm32cube.ide.mcu.externaltools.gnu-tools-for-stm32.7-2018-q2-update.win32_1.5.0.202011040924/tools/bin/../lib/gcc/arm-none-eabi/7.3.1/thumb/v7e-m/fpv5/hard/crtbegin.o
.rodata 0x000000000811aa0c 0x9b54
0x000000000811aa0c . = ALIGN (0x4)
*(.rodata)
.rodata 0x000000000811aa0c 0x8c Src/app_composer/init.o
The version 9-2020-q2-update produces the output with gap.
The .lst file (note that section .rodata is aligned to bound 8, but why?):
K4_G1.elf: file format elf32-littlearm
Sections:
Idx Name Size VMA LMA File off Algn
...
3 .text 0001923c 08100800 08100800 00010800 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
4 .rodata 000061f0 08119a40 08119a40 00029a40 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
and .map file (there is a gap between the non-empty sections .fini and .rodata):
.fini 0x0000000008119a34 0x8 c:/st/stm32cubeide_1.4.0/stm32cubeide/plugins/com.st.stm32cube.ide.mcu.externaltools.gnu-tools-for-stm32.9-2020-q2-update.win32_1.5.0.202011040924/tools/bin/../lib/gcc/arm-none-eabi/9.3.1/thumb/v7e-m+dp/hard/crtn.o
.vfp11_veneer 0x0000000008119a3c 0x0
.vfp11_veneer 0x0000000008119a3c 0x0 linker stubs
.v4_bx 0x0000000008119a3c 0x0
.v4_bx 0x0000000008119a3c 0x0 linker stubs
.iplt 0x0000000008119a3c 0x0
.iplt 0x0000000008119a3c 0x0 c:/st/stm32cubeide_1.4.0/stm32cubeide/plugins/com.st.stm32cube.ide.mcu.externaltools.gnu-tools-for-stm32.9-2020-q2-update.win32_1.5.0.202011040924/tools/bin/../lib/gcc/arm-none-eabi/9.3.1/thumb/v7e-m+dp/hard/crtbegin.o
.rodata 0x0000000008119a40 0x61f0
0x0000000008119a40 . = ALIGN (0x4)
*(.rodata)
.rodata 0x0000000008119a40 0x96 Src/app_composer/init.o
Edit 03/16/2021
There are no sections *(.rodata) in the input object files having
alignment on the boundary 8 or greater.
Changing the section name .rodata to the name .text eliminates the
gap (The same result if I join the sections .text and .rodata content to a
single .text section):
SECTIONS
{
.text :
{
. = ALIGN(4);
*(.text) /* .text sections (code) */
/* ... */
. = ALIGN(4);
} >FLASH
/* Constant data into "FLASH" Rom type memory */
.text : /* <-- the same name as the previous section instead of .rodata */
{
. = ALIGN(4);
*(.rodata) /* .rodata sections (constants, strings, etc.) */
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
. = ALIGN(4);
} >FLASH
}
The source code and build settings are also the same for both variants.
What could be the reason for the problem and how could it be solved? Do I missing something?
P.S. Of cause I can change the alignment at the end of .text section to 8. That would be treating the symptoms, but I want to understand the cause.
Thanks in advance for your help!
Run objdump -h on the input object files. I suspect that you will find that the compiler is putting a minimum alignment of 8 on one of the input .rodata sections. The linker then sets the output alignment to the maximum of the input sections.
I'm confused about the location counter, especially when setting it to
a new value in the output section.
I write a simple program and a linker script to make my question more
clear. The code is as follows:
test.s
section .text
.globl _start
_start:
movq $1, %rax
movq $0, %rbx
int $0x80
test.lds
SECTIONS
{
. = 0x10;
label_1 = .;
custom_section : {
. = 0x20;
label_2 = . ;
label_3 = ABSOLUTE(.) ;
*(.text) ;
}
}
After linking, use nm command to print symbol addresses:
0000000000000010 T label_1
0000000000000030 T label_2
0000000000000030 A label_3
0000000000000030 T _start
I can't understand why lable_2 is 0x30. As the LD documentation
says, if . is used inside a section description, it refers to the
byte offset from the start of that section, not an absolute address.
In the custom_section, . is set to 0x20 which is a relative offset,
so I think label_2 should also be 0x20. The value of label_3 is
reasonable because it's an absolute address.
Could someone please explain why label_2 is 0x30? Thanks!
I have one file-level static C variable that isn't getting initialized.
const size_t VGA_WIDTH = 80;
const size_t VGA_HEIGHT = 25;
static uint16_t* vgat_buffer = (uint16_t*)0x62414756; // VGAb
static char vgat_initialized= '\0';
In particular, vgat_initialized isn't always 0 the first time it is accessed. (Of course, the problem only appears on certain machines.)
I'm playing around with writing my own OS, so I'm pretty sure this is a problem with my linker script; but, I'm not clear how exactly the variables are supposed to be organized in the image produced by the linker (i.e., I'm not sure if this variable is supposed to go in .data, .bss, some other section, etc.)
VGA_WIDTH and VGA_HEIGHT get placed in the .rodata section as expected.
vgat_buffer is placed in the .data section, as expected (By initializing this variable to 0x62417656, I can clearly see where the linker places it in the resulting image file.)
I can't figure out where vgat_initialized is supposed to go. I've included the relevant parts of the assembly file below. From what I understand, the .comm directive is supposed to allocate space for the variable in the data section; but, I can't tell where. Looking in the linker's map file didn't provide any clues either.
Interestingly enough, if I change the initialization to
static char vgat_initialized= 'x';
everything works as expected: I can clearly see where the variable is placed in the resulting image file (i.e., I can see the x in the hexdump of the image file).
Assembly code generated from the C file:
.text
.LHOTE15:
.local buffer.1138
.comm buffer.1138,100,64
.local buffer.1125
.comm buffer.1125,100,64
.local vgat_initialized
.comm vgat_initialized,1,1
.data
.align 4
.type vgat_buffer, #object
.size vgat_buffer, 4
vgat_buffer:
.long 1648445270
.globl VGA_HEIGHT
.section .rodata
.align 4
.type VGA_HEIGHT, #object
.size VGA_HEIGHT, 4
VGA_HEIGHT:
.long 25
.globl VGA_WIDTH
.align 4
.type VGA_WIDTH, #object
.size VGA_WIDTH, 4
VGA_WIDTH:
.long 80
.ident "GCC: (GNU) 4.9.2"
compilers can conform to their own names for sections certainly but using the common .data, .text, .rodata, .bss that we know from specific compilers, this should land in .bss.
But that doesnt in any way automatically zero it out. There needs to be a mechanism, sometimes depending on your toolchain the toolchain takes care of it and creates a binary that in addition to .data, .rodata (and naturally .text) being filled in will fill in .bss in the binary. But depends on a few things, primarily is this a simple ram only image, is everything living under one memory space definition in the linker script.
you could for example put .data after .bss in the linker script and depending the binary format you use and/or tools that convert that you could end up with zeroed memory in the binary without any other work.
Normally though you should expect to using toolchain specific (linker scripts are linker specific not to be assumed to be universal to all tools) mechanism for defining where .bss is from your perspective, then some form of communication from the linker as to where it starts and what size, that information is used by the bootstrap whose job it is to zero it in that case, and one can assume it is always the bootstrap's job to zero .bss with naturally some exceptions. Likewise if the binary is meant to be on a read only media (rom, flash, etc) but .data, and .bss are read/write you need to have .data in its entirety on this media then someone has to copy it to its runtime position in ram, and .bss is either part of that depending on the toolchain and how you used it or the start address and size are on the read only media and someone has to zero that space at some point pre-main(). Here again this is the job of the bootstrap. Set the stack pointer, move .data if needed, zero .bss are the typical minimal jobs of the bootstrap, you can shortcut them in special cases or avoid using .data or .bss.
Since it is the linkers job to take all the little .data and .bss (and other) definitions from the objects being linked and combine them per the directions from the user (linker script, command line, whatever that tool uses), the linker ultimately knows.
In the case of gcc you use what I would call variables that are defined in the linker script, the linker script can fill in these values with matching variable/label names for the assembler such that a generic bootstrap can be used and you dont have to do any more work than that.
Like this but possibly more complicated
MEMORY
{
bob : ORIGIN = 0x8000, LENGTH = 0x1000
ted : ORIGIN = 0xA000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > bob
__data_rom_start__ = .;
.data : {
__data_start__ = .;
*(.data*)
} > ted AT > bob
__data_end__ = .;
__data_size__ = __data_end__ - __data_start__;
.bss : {
__bss_start__ = .;
*(.bss*)
} > bob
__bss_end__ = .;
__bss_size__ = __bss_end__ - __bss_start__;
}
then you can pull these into the assembly language bootstrap
.globl bss_start
bss_start: .word __bss_start__
.globl bss_end
bss_end: .word __bss_end__
.word __bss_size__
.globl data_rom_start
data_rom_start:
.word __data_rom_start__
.globl data_start
data_start:
.word __data_start__
.globl data_end
data_end:
.word __data_end__
.word __data_size__
and then write some code to operate on those as needed for your design.
you can simply put things like that in a linked in assembly language file without other code using them and assemble, compile other code and link and then the disassembly or other tools you prefer will show you what the linker generated, tweak that until you are satisfied then you can write or borrow or steal bootstrap code to use them.
for bare metal I prefer to not completely conform to the standard with my code, dont have any .data and dont expect .bss to be zero, so my bootstrap sets the stack pointer and calls main, done. For an operating system, you should conform. the toolchains already have this solved for the native platform, but if you are taking over that with your own linker script and boostrap then you need to deal with it, if you want to use an existing toolchains solution for an existing operating system then...done...just do that.
This answer is simply an extension of the others. As has been mentioned C standard has rules about initialization:
10) If an object that has automatic storage duration is not initialized explicitly, its value is indeterminate. If an object that has static storage duration is not initialized explicitly, then:
if it has pointer type, it is initialized to a null pointer;
if it has arithmetic type, it is initialized to (positive or unsigned) zero;
if it is an aggregate, every member is initialized (recursively) according to these rules;
if it is a union, the first named member is initialized (recursively) according to these rules.
The problem in your code is that a computers memory may not always be initialized to zero. It is up to you to make sure the BSS section is initialized to zero in a free standing environment (like your OS and bootloader).
The BSS sections usually don't (by default) take up space in a binary file and usually occupy memory in the area beyond the limits of the code and data that appears in the binary. This is done to reduce the size of the binary that has to be read into memory.
I know you are writing an OS for x86 booting with legacy BIOS. I know that you are using GCC from your other recent questions. I know you are using GNU assembler for part of your bootloader. I know that you have a linker script, but I don't know what it looks like. The usual mechanism to do this is via a linker script that places the BSS data at the end, and creates start and end symbols to define the address extents of the section. Once these symbols are defined by the linker they can be used by C code (or assembly code) to loop through the region and set it to zero.
I present a reasonably simple MCVE that does this. The code reads an extra sector with the kernel with Int 13h/AH=2h; enables the A20 line (using fast A20 method); loads a GDT with 32-bit descriptors; enables protected mode; completes the transition into 32-bit protected mode; and then calls a kernel entry point in C called kmain. kmain calls a C function called zero_bss that initializes the BSS section based on the starting and ending symbols (__bss_start and __bss_end) generated by a custom linker script.
boot.S:
.extern kmain
.globl mbrentry
.code16
.section .text
mbrentry:
# If trying to create USB media, a BPB here may be needed
# At entry DL contains boot drive number
# Segment registers to zero
xor %ax, %ax
mov %ax, %ds
mov %ax, %es
# Set stack to grow down from area under the place the bootloader was loaded
mov %ax, %ss
mov $0x7c00, %sp
cld # Ensure forward direction of MOVS/SCAS/LODS instructions
# which is required by generated C code
# Load kernel into memory
mov $0x02, %ah # Disk read
mov $1, %al # Read 1 sector
xor %ch, %ch # Cylinder 0
xor %dh, %dh # Head 0
mov $2, %cl # Start reading from second sector
mov $0x7e00, %bx # Load kernel at 0x7e00
int $0x13
# Quick and dirty A20 enabling. May not work on all hardware
a20fast:
in $0x92, %al
or $2, %al
out %al, $0x92
loadgdt:
cli # Turn off interrupts until a Interrupt Vector
# Table (IVT) is set
lgdt (gdtr)
mov %cr0, %eax
or $1, %al
mov %eax, %cr0 # Enable protected mode
jmp $0x08,$init_pm # FAR JMP to next instruction to set
# CS selector with a 32-bit code descriptor and to
# flush the instruction prefetch queue
.code32
init_pm:
# Set remaining 32-bit selectors
mov $DATA_SEG, %ax
mov %ax, %ds
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
mov %ax, %ss
# Start executing kernel
call kmain
cli
loopend: # Infinite loop when finished
hlt
jmp loopend
.align 8
gdt_start:
.long 0 # null descriptor
.long 0
gdt_code:
.word 0xFFFF # limit low
.word 0 # base low
.byte 0 # base middle
.byte 0b10011010 # access
.byte 0b11001111 # granularity/limit high
.byte 0 # base high
gdt_data:
.word 0xFFFF # limit low (Same as code)
.word 0 # base low
.byte 0 # base middle
.byte 0b10010010 # access
.byte 0b11001111 # granularity/limit high
.byte 0 # base high
end_of_gdt:
gdtr:
.word end_of_gdt - gdt_start - 1
# limit (Size of GDT)
.long gdt_start # base of GDT
CODE_SEG = gdt_code - gdt_start
DATA_SEG = gdt_data - gdt_start
kernel.c:
#include <stdint.h>
extern uintptr_t __bss_start[];
extern uintptr_t __bss_end[];
/* Zero the BSS section 4-bytes at a time */
static void zero_bss(void)
{
uint32_t *memloc = __bss_start;
while (memloc < __bss_end)
*memloc++ = 0;
}
int kmain(){
zero_bss();
return 0;
}
link.ld
ENTRY(mbrentry)
SECTIONS
{
. = 0x7C00;
.mbr : {
boot.o(.text);
boot.o(.*);
}
. = 0x7dfe;
.bootsig : {
SHORT(0xaa55);
}
. = 0x7e00;
.kernel : {
*(.text*);
*(.data*);
*(.rodata*);
}
.bss : SUBALIGN(4) {
__bss_start = .;
*(COMMON);
*(.bss*);
}
. = ALIGN(4);
__bss_end = .;
/DISCARD/ : {
*(.eh_frame);
*(.comment);
}
}
To compile, link and generate a binary file that can be used in a disk image from this code, you could use commands like:
as --32 boot.S -o boot.o
gcc -c -m32 -ffreestanding -O3 kernel.c
gcc -ffreestanding -nostdlib -Wl,--build-id=none -m32 -Tlink.ld \
-o boot.elf -lgcc boot.o kernel.o
objcopy -O binary boot.elf boot.bin
The C standard says that static variables must be zero-initialized, even in absence of explicit initializer, so static char vgat_initialized= '\0'; is equivalent to static char vgat_initialized;.
In ELF and other similar formats, the zero-initialized data, such as this vgat_initialized goes to the .bss section. If you load such an executable yourself into memory, you need to explicitly zero the .bss part of the data segment.
The other answers are very complete and very helpful. In turns out that, in my specific case, I just needed to know that static variables initialized to 0 were put in .bss and not .data. Adding a .bss section to the linker script placed a zeroed-out section of memory in the image which solved the problem.