I use a linker script to describe the memory map of my os kernel. In the linker script I defined many symbols to record location of section start/end. The linker script is as follow:
OUTPUT_FORMAT(binary)
SECTIONS {
/DISCARD/ : {
*(.comment)
*(.note.GNU-stack)
*(.eh_frame)
}
. = 0x100000;
kernel_load_addr = .;
.text : {
kernel_text_start = .;
*(.boot)
*(.text*)
kernel_text_end = .;
} = 0x90
.data : {
kernel_data_start = .;
*(.rodata*)
*(.data*)
kernel_data_end = .;
} = 0
.bss : {
kernel_bss_start = .;
*(COMMON)
*(.bss)
kernel_bss_end = .;
} = 0
kernel_end = .;
}
But if I print the address of these symbols out, the *_start symbol becomes same as *_end symbol.
I'm printing its value with following code:
extern char kernel_text_start;
extern char kernel_text_end;
extern char kernel_data_start;
extern char kernel_data_end;
extern char kernel_bss_start;
extern char kernel_bss_end;
...
raw_write(u64_to_str((uint64_t) &kernel_text_start, buf, 16), 0x0f, 0);
raw_write(u64_to_str((uint64_t) &kernel_text_end, buf, 16), 0x0f, 40);
raw_write(u64_to_str((uint64_t) &kernel_data_start, buf, 16), 0x0f, 80);
raw_write(u64_to_str((uint64_t) &kernel_data_end, buf, 16), 0x0f, 120);
raw_write(u64_to_str((uint64_t) &kernel_bss_start, buf, 16), 0x0f, 160);
raw_write(u64_to_str((uint64_t) &kernel_bss_end, buf, 16), 0x0f, 200);
and result is
The map file is:
Allocating common symbols
Common symbol size file
buddy_num 0x40 build/memory/page_alloc.c.o
buddy_map 0x40 build/memory/page_alloc.c.o
Discarded input sections
.comment 0x0000000000000000 0x4e build/kernel/main.c.o
.note.GNU-stack
0x0000000000000000 0x0 build/kernel/main.c.o
.eh_frame 0x0000000000000000 0x70 build/kernel/main.c.o
.comment 0x0000000000000000 0x4e build/memory/page_alloc.c.o
.note.GNU-stack
0x0000000000000000 0x0 build/memory/page_alloc.c.o
.eh_frame 0x0000000000000000 0xa8 build/memory/page_alloc.c.o
.comment 0x0000000000000000 0x4e build/memory/virt_alloc.c.o
.note.GNU-stack
0x0000000000000000 0x0 build/memory/virt_alloc.c.o
.comment 0x0000000000000000 0x4e build/library/bitmap.c.o
.note.GNU-stack
0x0000000000000000 0x0 build/library/bitmap.c.o
.eh_frame 0x0000000000000000 0x50 build/library/bitmap.c.o
.comment 0x0000000000000000 0x4e build/library/string.c.o
.note.GNU-stack
0x0000000000000000 0x0 build/library/string.c.o
.eh_frame 0x0000000000000000 0x150 build/library/string.c.o
Memory Configuration
Name Origin Length Attributes
*default* 0x0000000000000000 0xffffffffffffffff
Linker script and memory map
/DISCARD/
*(.comment)
*(.note.GNU-stack)
*(.eh_frame)
*(.comment)
*(.note.GNU-stack)
*(.eh_frame)
0x0000000000100000 . = 0x100000
0x0000000000100000 kernel_load_addr = .
0x0000000000100000 text_start_addr = .
.text 0x0000000000100000 0x262f
0x0000000000100000 kernel_text_start = .
*(.boot)
.boot 0x0000000000100000 0x29 build/boot/boot.asm.o
*(.text*)
*fill* 0x0000000000100029 0x7 90
.text 0x0000000000100030 0x0 build/boot/boot.asm.o
.text 0x0000000000100030 0x14f build/boot/init.asm.o
0x0000000000100030 wheel_init
*fill* 0x000000000010017f 0x1 90
.text 0x0000000000100180 0x73c build/kernel/main.c.o
0x0000000000100180 raw_write
0x0000000000100210 read_info
0x00000000001005e0 wheel_main
*fill* 0x00000000001008bc 0x4 90
.text 0x00000000001008c0 0x974 build/memory/page_alloc.c.o
0x00000000001008c0 page_alloc_init
0x0000000000100cb0 find_free_pages
0x0000000000100ef0 alloc_pages
0x00000000001010c0 free_pages
.text 0x0000000000101234 0x0 build/memory/virt_alloc.c.o
*fill* 0x0000000000101234 0xc 90
.text 0x0000000000101240 0x2ff build/library/bitmap.c.o
0x0000000000101240 bitmap_set
0x00000000001013c0 bitmap_clear
*fill* 0x000000000010153f 0x1 90
.text 0x0000000000101540 0x10ef build/library/string.c.o
0x0000000000101540 strlen
0x0000000000101650 strcpy
0x00000000001016c0 strncpy
0x0000000000101770 memcpy
0x0000000000101960 memset
0x0000000000101b60 u32_to_str
0x0000000000101c90 u64_to_str
0x0000000000101dd0 i32_to_str
0x0000000000101e60 i64_to_str
0x0000000000101ef0 vsprintf
0x00000000001025d0 sprintf
0x000000000010262f kernel_text_end = .
0x000000000010262f kernel_text_start = .
*(.boot)
*(.text*)
0x000000000010262f kernel_text_end = .
.data 0x0000000000102630 0x174
0x0000000000102630 kernel_data_start = .
*(.rodata*)
.rodata 0x0000000000102630 0x8 build/kernel/main.c.o
.rodata.str1.1
0x0000000000102638 0x82 build/kernel/main.c.o
*fill* 0x00000000001026ba 0x6 00000000
.rodata 0x00000000001026c0 0xa5 build/library/string.c.o
*(.data*)
*fill* 0x0000000000102765 0x3 00000000
.data 0x0000000000102768 0x3a build/boot/init.asm.o
*fill* 0x00000000001027a2 0x2 00000000
.data 0x00000000001027a4 0x0 build/kernel/main.c.o
.data 0x00000000001027a4 0x0 build/memory/page_alloc.c.o
.data 0x00000000001027a4 0x0 build/memory/virt_alloc.c.o
.data 0x00000000001027a4 0x0 build/library/bitmap.c.o
.data 0x00000000001027a4 0x0 build/library/string.c.o
0x00000000001027a4 kernel_data_end = .
0x00000000001027a4 kernel_data_start = .
*(.rodata*)
*(.data*)
0x00000000001027a4 kernel_data_end = .
0x00000000001027a4 data_end_addr = .
.bss 0x0000000000103000 0x6000
0x0000000000103000 kernel_bss_start = .
*(COMMON)
COMMON 0x0000000000103000 0x80 build/memory/page_alloc.c.o
0x0000000000103000 buddy_num
0x0000000000103040 buddy_map
*(.bss)
*fill* 0x0000000000103080 0xf80 00000000
.bss 0x0000000000104000 0x5000 build/boot/init.asm.o
0x0000000000104000 kernel_stack
0x0000000000105000 pml4t
.bss 0x0000000000109000 0x0 build/kernel/main.c.o
.bss 0x0000000000109000 0x0 build/memory/page_alloc.c.o
.bss 0x0000000000109000 0x0 build/memory/virt_alloc.c.o
.bss 0x0000000000109000 0x0 build/library/bitmap.c.o
.bss 0x0000000000109000 0x0 build/library/string.c.o
0x0000000000109000 kernel_bss_end = .
0x0000000000109000 kernel_bss_start = .
*(COMMON)
*(.bss)
0x0000000000109000 kernel_bss_end = .
0x0000000000109000 bss_end_addr = .
0x0000000000109000 kernel_end = .
LOAD build/boot/boot.asm.o
LOAD build/boot/init.asm.o
LOAD build/kernel/main.c.o
LOAD build/memory/page_alloc.c.o
LOAD build/memory/virt_alloc.c.o
LOAD build/library/bitmap.c.o
LOAD build/library/string.c.o
LOAD link.lds
0x0000000000100000 . = 0x100000
0x0000000000100000 kernel_load_addr = .
0x0000000000100000 text_start_addr = .
0x0000000000100000 data_end_addr = .
0x0000000000100000 bss_end_addr = .
0x0000000000100000 kernel_end = .
OUTPUT(build/kernel.bin binary)
You can see the value of these symbols got reassigned.
So how can I avoid this and get correct address in my program?
Related
I made this simple C program and compiled it without ASLR
#include <stdio.h>
#include <stdlib.h>
int a = 10;
int b = 20;
int main(int argc, char *argv[])
{
printf("%lx\n",&a);
printf("%lx\n",&b);
return 0;
}
Every time I execute it, the result is the same:
555555558018
55555555801c
Because of that, I am thinking that the data section should start somewhere near to 0x555555558018.
However, when I list the segments of my binary I see the following:
Elf file type is DYN (Position-Independent Executable file)
Entry point 0x1050
There are 13 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
PHDR 0x0000000000000040 0x0000000000000040 0x0000000000000040
0x00000000000002d8 0x00000000000002d8 R 0x8
INTERP 0x0000000000000318 0x0000000000000318 0x0000000000000318
0x000000000000001c 0x000000000000001c R 0x1
[Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000618 0x0000000000000618 R 0x1000
LOAD 0x0000000000001000 0x0000000000001000 0x0000000000001000
0x0000000000000195 0x0000000000000195 R E 0x1000
LOAD 0x0000000000002000 0x0000000000002000 0x0000000000002000
0x00000000000000e4 0x00000000000000e4 R 0x1000
LOAD 0x0000000000002dd0 0x0000000000003dd0 0x0000000000003dd0
0x0000000000000250 0x0000000000000258 RW 0x1000
DYNAMIC 0x0000000000002de0 0x0000000000003de0 0x0000000000003de0
0x00000000000001e0 0x00000000000001e0 RW 0x8
NOTE 0x0000000000000338 0x0000000000000338 0x0000000000000338
0x0000000000000020 0x0000000000000020 R 0x8
NOTE 0x0000000000000358 0x0000000000000358 0x0000000000000358
0x0000000000000044 0x0000000000000044 R 0x4
GNU_PROPERTY 0x0000000000000338 0x0000000000000338 0x0000000000000338
0x0000000000000020 0x0000000000000020 R 0x8
GNU_EH_FRAME 0x000000000000200c 0x000000000000200c 0x000000000000200c
0x000000000000002c 0x000000000000002c R 0x4
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x10
GNU_RELRO 0x0000000000002dd0 0x0000000000003dd0 0x0000000000003dd0
0x0000000000000230 0x0000000000000230 R 0x1
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.gnu.property .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt
03 .init .plt .plt.got .text .fini
04 .rodata .eh_frame_hdr .eh_frame
05 .init_array .fini_array .dynamic .got .got.plt .data .bss
06 .dynamic
07 .note.gnu.property
08 .note.gnu.build-id .note.ABI-tag
09 .note.gnu.property
10 .eh_frame_hdr
11
12 .init_array .fini_array .dynamic .got
There is not such an address. I think that maybe there is a difference between C pointers (I observe that they consist of 48 bits), and the Virtual Addresses of the segments (that consist of 64 bits). Where are the C pointers actually pointing to?
I have different hypothesis. Once I read that C pointers are actually offsets of their segments (not sure if this is true). The other thing I can think about, is that C pointers are logical addresses, while the segment's virtual addresses refer to the Linear Address Space. See the difference below:
Memory in x86
I am trying to write my own linker script.
The current version is here:
MEMORY
{
ROM (rx) : ORIGIN = 0x00000000, LENGTH = 0x00004000
RAM (rwx) : ORIGIN = 0x00004000, LENGTH = 0x00004000
}
STACK_SIZE = 0x3000;
BOOT_PC = 0x1000;
/* Section Definitions */
SECTIONS
{
/* Code and constants */
.text :
{
*(.rodata*);
KEEP(*(.vectors .vectors.*));
. = BOOT_PC;
KEEP(*start.o(.text*));
*(.text*);
_etext = . ;
_idata = . ;
} > ROM
/* Unitialized data */
.bss (NOLOAD) :
{
_sbss = . ;
*(.bss*);
*(COMMON);
_ebss = . ;
} > RAM
/* Initialized data */
.data : AT(_idata)
{
_sdata = . ;
*(.data*);
_edata = . ;
} > RAM
/* Stack */
.stack (NOLOAD):
{
. = ALIGN(8);
. = . + STACK_SIZE;
. = ALIGN(8);
_stack = . ;
} > RAM
}
In my C program, I have global variables which are supposed to go in .bss (foo) and .data(init, a1 and a2) sections:
int foo;
int init = 4;
int a1 = 4;
int a2 = 4;
When I use objdump, I have the following result:
elf/noste.elf: file format elf32-littleriscv
SYMBOL TABLE:
00000000 l d .text 00000000 .text
00004000 l d .sbss 00000000 .sbss
00004004 l d .sdata 00000000 .sdata
00004010 l d .stack 00000000 .stack
00000000 l d .comment 00000000 .comment
00000000 l d .riscv.attributes 00000000 .riscv.attributes
00000000 l df *ABS* 00000000 start.o
0000103c l .text 00000000 _end_trigger
00001050 l .text 00000000 _end_loop
00000000 l df *ABS* 00000000 main.c
00000000 l df *ABS* 00000000 reset.c
0000105c g F .text 0000008c reset_handler
00003000 g *ABS* 00000000 STACK_SIZE
000010e8 g .text 00000000 _etext
00004000 g .text 00000000 _sbss
00004008 g O .sdata 00000004 a1
00004004 g .sdata 00000000 _sdata
00004000 g .text 00000000 _ebss
000010e8 g .text 00000000 _idata
00001000 g .text 00000000 _start
0000400c g O .sdata 00000004 init
00001054 g F .text 00000008 main
00004004 g O .sdata 00000004 a2
00004000 g O .sbss 00000004 foo
00004004 g .sdata 00000000 _edata
0000103c g .text 00000000 _end
00007010 g .stack 00000000 _stack
00001000 g *ABS* 00000000 BOOT_PC
As expected, the different C variables are placed in the .sbss and .sdata sections.
However, _ebss and _edata are not incremented and have the same value than _sbss and _sdata.
Instead of _ebss = 00004000 and _edata = 00004004, I expected _ebss = 00004004 and _edata = 00004010.
An idea about my issue here ?
Thanks for the help.
There is a remote 64-bit *nix server that can compile a user-provided code (which should be written in Rust, but I don't think it matters since it uses LLVM). I don't know which compiler/linker flags it uses, but the compiled ELF executable looks weird - it has 4 LOAD segments:
$ readelf -e executable
...
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
...
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000004138 0x0000000000004138 R 0x1000
LOAD 0x0000000000005000 0x0000000000005000 0x0000000000005000
0x00000000000305e9 0x00000000000305e9 R E 0x1000
LOAD 0x0000000000036000 0x0000000000036000 0x0000000000036000
0x000000000000d808 0x000000000000d808 R 0x1000
LOAD 0x0000000000043da0 0x0000000000044da0 0x0000000000044da0
0x0000000000002290 0x00000000000024a0 RW 0x1000
...
On my own system all executables that I was looking at only have 2 LOAD segments:
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
...
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000003000c0 0x00000000003000c0 R E 0x200000
LOAD 0x00000000003002b0 0x00000000005002b0 0x00000000005002b0
0x00000000000776c8 0x000000000009b200 RW 0x200000
...
What are the circumstances (compiler/linker versions, flags etc) under which a compiler might build an ELF with 4 LOAD segments?
What is the point of having 4 LOAD segments? I imagine that having a segment with read but not execute permission might help against certain exploits, but why have two such segments?
A typical BFD-ld or Gold linked Linux executable has 2 loadable segments, with the ELF header merged with .text and .rodata into the first RE segment, and .data, .bss and other writable sections merged into the second RW segment.
Here is the typical section to segment mapping:
$ echo "int foo; int main() { return 0;}" | clang -xc - -o a.out-gold -fuse-ld=gold
$ readelf -Wl a.out-gold
Elf file type is EXEC (Executable file)
Entry point 0x400420
There are 9 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000040 0x0000000000400040 0x0000000000400040 0x0001f8 0x0001f8 R 0x8
INTERP 0x000238 0x0000000000400238 0x0000000000400238 0x00001c 0x00001c R 0x1
[Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
LOAD 0x000000 0x0000000000400000 0x0000000000400000 0x0006b0 0x0006b0 R E 0x1000
LOAD 0x000e18 0x0000000000401e18 0x0000000000401e18 0x0001f8 0x000200 RW 0x1000
DYNAMIC 0x000e28 0x0000000000401e28 0x0000000000401e28 0x0001b0 0x0001b0 RW 0x8
NOTE 0x000254 0x0000000000400254 0x0000000000400254 0x000020 0x000020 R 0x4
GNU_EH_FRAME 0x00067c 0x000000000040067c 0x000000000040067c 0x000034 0x000034 R 0x4
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x10
GNU_RELRO 0x000e18 0x0000000000401e18 0x0000000000401e18 0x0001e8 0x0001e8 RW 0x8
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .dynsym .dynstr .gnu.hash .hash .gnu.version .gnu.version_r .rela.dyn .init .text .fini .rodata .eh_frame .eh_frame_hdr
03 .fini_array .init_array .dynamic .got .got.plt .data .bss
04 .dynamic
05 .note.ABI-tag
06 .eh_frame_hdr
07
08 .fini_array .init_array .dynamic .got .got.plt
This optimizes the number of mmaps that the kernel must perform to load such executable, but at a security cost: the data in .rodata shouldn't be executable, but is (because it's merged with .text, which must be executable). This may significantly increase the attack surface for someone trying to hijack a process.
Newer Linux systems, in particular using LLD to link binaries, prioritize security over speed, and put ELF header and .rodata into the first R-only segment, resulting in 3 load segments and improved security. Here is a typical mapping:
$ echo "int foo; int main() { return 0;}" | clang -xc - -o a.out-lld -fuse-ld=lld
$ readelf -Wl a.out-lld
Elf file type is EXEC (Executable file)
Entry point 0x201000
There are 10 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000040 0x0000000000200040 0x0000000000200040 0x000230 0x000230 R 0x8
INTERP 0x000270 0x0000000000200270 0x0000000000200270 0x00001c 0x00001c R 0x1
[Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
LOAD 0x000000 0x0000000000200000 0x0000000000200000 0x000558 0x000558 R 0x1000
LOAD 0x001000 0x0000000000201000 0x0000000000201000 0x000185 0x000185 R E 0x1000
LOAD 0x002000 0x0000000000202000 0x0000000000202000 0x001170 0x002005 RW 0x1000
DYNAMIC 0x003010 0x0000000000203010 0x0000000000203010 0x000150 0x000150 RW 0x8
GNU_RELRO 0x003000 0x0000000000203000 0x0000000000203000 0x000170 0x001000 R 0x1
GNU_EH_FRAME 0x000440 0x0000000000200440 0x0000000000200440 0x000034 0x000034 R 0x1
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
NOTE 0x00028c 0x000000000020028c 0x000000000020028c 0x000020 0x000020 R 0x4
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .rodata .dynsym .gnu.version .gnu.version_r .gnu.hash .hash .dynstr .rela.dyn .eh_frame_hdr .eh_frame
03 .text .init .fini
04 .data .tm_clone_table .fini_array .init_array .dynamic .got .bss
05 .dynamic
06 .fini_array .init_array .dynamic .got
07 .eh_frame_hdr
08
09 .note.ABI-tag
Not to be left behind, the newer BFD-ld (my version is 2.31.1) also makes ELF header and .rodata read-only, but fails to merge two R-only segments into one, resulting in 4 loadable segments:
$ echo "int foo; int main() { return 0;}" | clang -xc - -o a.out-bfd -fuse-ld=bfd
$ readelf -Wl a.out-bfd
Elf file type is EXEC (Executable file)
Entry point 0x401020
There are 11 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000040 0x0000000000400040 0x0000000000400040 0x000268 0x000268 R 0x8
INTERP 0x0002a8 0x00000000004002a8 0x00000000004002a8 0x00001c 0x00001c R 0x1
[Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
LOAD 0x000000 0x0000000000400000 0x0000000000400000 0x0003f8 0x0003f8 R 0x1000
LOAD 0x001000 0x0000000000401000 0x0000000000401000 0x00018d 0x00018d R E 0x1000
LOAD 0x002000 0x0000000000402000 0x0000000000402000 0x000110 0x000110 R 0x1000
LOAD 0x002e40 0x0000000000403e40 0x0000000000403e40 0x0001e8 0x0001f0 RW 0x1000
DYNAMIC 0x002e50 0x0000000000403e50 0x0000000000403e50 0x0001a0 0x0001a0 RW 0x8
NOTE 0x0002c4 0x00000000004002c4 0x00000000004002c4 0x000020 0x000020 R 0x4
GNU_EH_FRAME 0x002004 0x0000000000402004 0x0000000000402004 0x000034 0x000034 R 0x4
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x10
GNU_RELRO 0x002e40 0x0000000000403e40 0x0000000000403e40 0x0001c0 0x0001c0 R 0x1
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .hash .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn
03 .init .text .fini
04 .rodata .eh_frame_hdr .eh_frame
05 .init_array .fini_array .dynamic .got .got.plt .data .bss
06 .dynamic
07 .note.ABI-tag
08 .eh_frame_hdr
09
10 .init_array .fini_array .dynamic .got
Finally, some of these choices are affected by the --(no)rosegment (or -Wl,z,noseparate-code for BFD ld) linker option.
I am currently trying to learn the ways of C and am using GCC to translate C to ARM instructions. To do so, I am attempting to draw a simple shape to the window, but am having a hard time working out how to do it without using a graphics package like . Here is what I have managed to work out so far:
My make file is pretty self-explanatory I would say.
CC=/cygdrive/c/Users/Ribbyon/Desktop/gcc-arm-none-eabi-5_3-2016q1-20160330-win32/bin/arm-none-eabi-gcc
LD=/cygdrive/c/Users/Ribbyon/Desktop/gcc-arm-none-eabi-5_3-2016q1-20160330-win32/bin/arm-none-eabi-ld.exe
AS=$(CC)
OBJCOPY=/cygdrive/c/Users/Ribbyon/Desktop/gcc-arm-none-eabi-5_3-2016q1-20160330-win32/bin/arm-none-eabi-objcopy.exe
QEMU=/cygdrive/c/Users/Ribbyon/Desktop/qemu/qemu-system-arm.exe
#these should be cross-platform...
CC+= -Wall -c -mcpu=arm926ej-s -marm -Werror
LD+=-Map kernelmap.txt -T linkerscript.txt
AS+= -c -x assembler-with-cpp -mcpu=arm926ej-s
QEMUARGS=-machine integratorcp -kernel kernel.bin -serial stdio
DISPLAY?=:0
export DISPLAY
SDL_STDIO_REDIRECT=no
export SDL_STDIO_REDIRECT
all:
$(AS) kernelasm.s
$(CC) kernelc.c
$(CC) console.c
$(LD) -o kernel.tmp kernelasm.o kernelc.o console.o
$(OBJCOPY) -Obinary kernel.tmp kernel.bin
$(QEMU) $(QEMUARGS) kernel.bin
clean:
-/bin/rm *.o *.exe *.bin *.img *.tmp
Linkerscript to put it together:
ENTRY (_start)
SECTIONS {
/* The kernel will be loaded at this address in RAM.
The dot (.) means "the current location" */
. = 0x10000 ;
.text : {
/* stext = start of text (read-only) section */
stext = .;
/* .text = program code. rodata and rdata = read-only data */
*(.text)
*(.rodata)
*(.rdata)
*(.rdata$zzz)
/* etext = end of text section */
etext = .;
/* pad to a 4K boundary */
. = ALIGN( ABSOLUTE(.) , 0x1000 );
/* start of data (writable) section */
_sdata = .;
sdata = .;
*(.data)
_edata = .;
edata = .;
/* end of data section */
/* bss: Block Started by Symbol: Uninitialized data */
_sbss = . ;
sbss = . ;
*(COMMON)
*(.bss)
_ebss = . ;
ebss = . ;
}
/DISCARD/ : {
*(.eh_frame)
*(.comment)
}
}
Kernel Map (for completeness):
Discarded input sections
.comment 0x0000000000000000 0x33 kernelc.o
.comment 0x0000000000000000 0x33 console.o
Memory Configuration
Name Origin Length Attributes
*default* 0x0000000000000000 0xffffffffffffffff
Linker script and memory map
0x0000000000010000 . = 0x10000
.text 0x0000000000010000 0x2000
0x0000000000010000 stext = .
*(.text)
.text 0x0000000000010000 0x10 kernelasm.o
.text 0x0000000000010010 0x14 kernelc.o
0x0000000000010010 kmain
.text 0x0000000000010024 0x0 console.o
*(.rodata)
*(.rdata)
*(.rdata$zzz)
0x0000000000010024 etext = .
0x0000000000011000 . = ALIGN (ABSOLUTE (.), 0x1000)
*fill* 0x0000000000010024 0xfdc
0x0000000000011000 _sdata = .
0x0000000000011000 sdata = .
*(.data)
.data 0x0000000000011000 0x1000 kernelasm.o
0x0000000000012000 stack
.data 0x0000000000012000 0x0 kernelc.o
.data 0x0000000000012000 0x0 console.o
0x0000000000012000 _edata = .
0x0000000000012000 edata = .
0x0000000000012000 _sbss = .
0x0000000000012000 sbss = .
*(COMMON)
*(.bss)
.bss 0x0000000000012000 0x0 kernelasm.o
.bss 0x0000000000012000 0x0 kernelc.o
.bss 0x0000000000012000 0x0 console.o
0x0000000000012000 _ebss = .
0x0000000000012000 ebss = .
.glue_7 0x0000000000012000 0x0
.glue_7 0x0000000000012000 0x0 linker stubs
.glue_7t 0x0000000000012000 0x0
.glue_7t 0x0000000000012000 0x0 linker stubs
.vfp11_veneer 0x0000000000012000 0x0
.vfp11_veneer 0x0000000000012000 0x0 linker stubs
.v4_bx 0x0000000000012000 0x0
.v4_bx 0x0000000000012000 0x0 linker stubs
.iplt 0x0000000000012000 0x0
.iplt 0x0000000000012000 0x0 kernelasm.o
.igot.plt 0x0000000000012000 0x0
.igot.plt 0x0000000000012000 0x0 kernelasm.o
.rel.dyn 0x0000000000012000 0x0
.rel.iplt 0x0000000000012000 0x0 kernelasm.o
/DISCARD/
*(.eh_frame)
*(.comment)
LOAD kernelasm.o
LOAD kernelc.o
LOAD console.o
OUTPUT(kernel.tmp elf32-littlearm)
.ARM.attributes
0x0000000000000000 0x32
.ARM.attributes
0x0000000000000000 0x24 kernelasm.o
.ARM.attributes
0x0000000000000024 0x36 kernelc.o
.ARM.attributes
0x000000000000005a 0x36 console.o
.note.GNU-stack
0x0000000000000000 0x0
.note.GNU-stack
0x0000000000000000 0x0 kernelc.o
.note.GNU-stack
0x0000000000000000 0x0 console.o
Kernelasm:
ldr sp,=stack
b kmain
forever:
b forever
.section .data
.global stack
.rept 1024
.word 0
.end
stack:
Now, I know in C, I could do something like:
#include<graphics.h>
#include<conio.h>
main()
{
int gd = DETECT, gm;
initgraph(&gd, &gm, "C:\\TC\\BGI");
setcolor(BLUE);
rectangle(50,50,100,100);
getch();
closegraph();
return 0;
}
But since I am working with ARM, it complicates matters. As such, I am thinking I need a kernel.
The actual file that does the work:
#define blue COLOR16(0,0,255)
void console_init(){
}
void setpixel(x, y, blue){
}
The kernel I am using it in:
#include "console.h"
void kmain(){
console_init();
//draw using setpixel
while(1){
}
}
I wrote up the skeleton of it, but I am not sure where to go from here. I believe I need to isolate blue from the RBG band and display it, but I am having a hard time figuring out how to go about utilizing the logic I used in the C example for the ARM.
I think I might need to define width and height of the screen for the placement of the rectangle:
#define WIDTH 800
#define HEIGHT 600
As well as a framebuffer to do the actual communication of where to put the rectangle on the screen:
#define framebuffer ((volatile unsigned short*) (((0x07ffffff - WIDTH*HEIGHT*2))&~0xf))
And a way to assign the blue to the screen:
framebuffer[ HEIGHT/2 * WIDTH + WIDTH/2 ] = #0000FF
Could I use something like a script to grab the colors?
((b >> 3) | (r << 8) | (g << 3))
Any insight would be very helpful in getting the hang of this.
When working on a small 32-bit kernel for the x86 architecture I discovered something strange with how ld handles nobits sections.
In my kernel I define a .bootstrap_stack section which holds a temporary stack for the initialisation part of the system. I also hold symbols for the beginning and end of the stack. This input section is redirected to the .bss output section. Each output section of my kernel has a symbol for the beginning and end of the section.
The problem is that in the final executable the symbol for the end of the stack is after the end of the .bss section. In the below examples the symbols stack_top and _kernel_ebss (and _kernel_end) have the same value, which isn't what I wanted.
I expected _kernel_ebss to equal stack_bottom.
However once I rename .bootstrap_stack to .bss this does not happen. Removing nobits also works, but the resulting binary is considerably larger.
Here are the stripped files that reproduce my issue:
boot.s
section .bootstrap_stack, nobits ; this does not work
;section .bootstrap_stack ; this works
;section .bss ; this also works
stack_top:
resb 8096
stack_bottom:
section .text
global _start
_start:
hlt
jmp _start
linker.ld
ENTRY(_start)
SECTIONS
{
. = 0xC0100000;
_kernel_start = .;
.text ALIGN(4K) : AT(ADDR(.text) - 0xC0000000)
{
_kernel_text = .;
*(.multiboot)
*(.text)
_kernel_etext = .;
}
.bss ALIGN(4K) : AT(ADDR(.bss) - 0xC0000000)
{
_kernel_bss = .;
*(COMMON)
*(.bss)
*(.bootstrap_stack)
_kernel_ebss = .;
}
_kernel_end = .;
}
Here are the symbols:
$ objdump -t kernel | sort
00000000 l df *ABS* 00000000 boot.s
c0100000 g .text 00000000 _kernel_start
c0100000 g .text 00000000 _kernel_text
c0100000 g .text 00000000 _start
c0100000 l d .text 00000000 .text
c0100003 g .text 00000000 _kernel_etext
c0101000 g .text 00000000 _kernel_bss
c0101000 g .text 00000000 _kernel_ebss
c0101000 g .text 00000000 _kernel_end
c0101000 l .bootstrap_stack, 00000000 stack_top
c0101000 l d .bootstrap_stack, 00000000 .bootstrap_stack,
c0102fa0 l .bootstrap_stack, 00000000 stack_bottom
By renaming .bootstrap_stack to .bss I get what I expected.
00000000 l df *ABS* 00000000 boot.s
c0100000 g .text 00000000 _kernel_start
c0100000 g .text 00000000 _kernel_text
c0100000 g .text 00000000 _start
c0100000 l d .text 00000000 .text
c0100003 g .text 00000000 _kernel_etext
c0101000 g .bss 00000000 _kernel_bss
c0101000 l .bss 00000000 stack_top
c0101000 l d .bss 00000000 .bss
c0102fa0 g .bss 00000000 _kernel_ebss
c0102fa0 g .bss 00000000 _kernel_end
c0102fa0 l .bss 00000000 stack_bottom
My question is whether this is expected behaviour of ld. If yes, what is the problem with my example, because as far as I understand .bss is also a nobits section, but it produces the expected result?
Okay I figured it out.
Apparently you're not supposed to have a comma right after the name of the section. objdump includes the comma in the name of the section so that clearly shows that that is the mistake.
So
section .bootstrap_stack, nobits
should be
section .bootstrap_stack nobits