freeRTOS linking process: multiple definition of `_start' - c

I am trying to compile freeRTOS for raspberry pi 2. Those are the commands I tried so far:
arm-none-eabi-gcc -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard test.c -o test.o
arm-none-eabi-as -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard startup.s -o startup.o
arm-none-eabi-ld test.o startup.o -static -Map kernel7.map -o
target.elf -T raspberrypi.ld
The two upper ones do work fine. However the last one doesn't, it gives me the following error:
startup.o: In function _start':
(.init+0x0): multiple definition of_start'
test.o::(.text+0x6c): first defined here
startup.o: In function swi_handler':
(.init+0x28): undefined reference tovPortYieldProcessor'
startup.o: In function irq_handler':
(.init+0x38): undefined reference tovFreeRTOS_ISR'
startup.o: In function zero_loop':
(.init+0xcc): undefined reference torpi_cpu_irq_disable'
This is the corresponding code:
test.c:
#include <stdio.h>
void exit(int code)
{
while(1)
;
}
int main(void)
{
return 0;
}
startup.s:
.extern system_init
.extern __bss_start
.extern __bss_end
.extern vFreeRTOS_ISR
.extern vPortYieldProcessor
.extern rpi_cpu_irq_disable
.extern main
.section .init
.globl _start
;;
_start:
;# All the following instruction should be read as:
;# Load the address at symbol into the program counter.
ldr pc,reset_handler ;# Processor Reset handler -- we will have to force this on the raspi!
;# Because this is the first instruction executed, of cause it causes an immediate branch into reset!
ldr pc,undefined_handler ;# Undefined instruction handler -- processors that don't have thumb can emulate thumb!
ldr pc,swi_handler ;# Software interrupt / TRAP (SVC) -- system SVC handler for switching to kernel mode.
ldr pc,prefetch_handler ;# Prefetch/abort handler.
ldr pc,data_handler ;# Data abort handler/
ldr pc,unused_handler ;# -- Historical from 26-bit addressing ARMs -- was invalid address handler.
ldr pc,irq_handler ;# IRQ handler
ldr pc,fiq_handler ;# Fast interrupt handler.
;# Here we create an exception address table! This means that reset/hang/irq can be absolute addresses
reset_handler: .word reset
undefined_handler: .word undefined_instruction
swi_handler: .word vPortYieldProcessor
prefetch_handler: .word prefetch_abort
data_handler: .word data_abort
unused_handler: .word unused
irq_handler: .word vFreeRTOS_ISR
fiq_handler: .word fiq
reset:
/* Disable IRQ & FIQ */
cpsid if
/* Check for HYP mode */
mrs r0, cpsr_all
and r0, r0, #0x1F
mov r8, #0x1A
cmp r0, r8
beq overHyped
b continueBoot
overHyped: /* Get out of HYP mode */
ldr r1, =continueBoot
msr ELR_hyp, r1
mrs r1, cpsr_all
and r1, r1, #0x1f ;# CPSR_MODE_MASK
orr r1, r1, #0x13 ;# CPSR_MODE_SUPERVISOR
msr SPSR_hyp, r1
eret
continueBoot:
;# In the reset handler, we need to copy our interrupt vector table to 0x0000, its currently at 0x8000
mov r0,#0x8000 ;# Store the source pointer
mov r1,#0x0000 ;# Store the destination pointer.
;# Here we copy the branching instructions
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load multiple values from indexed address. ; Auto-increment R0
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store multiple values from the indexed address. ; Auto-increment R1
;# So the branches get the correct address we also need to copy our vector table!
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load from 4*n of regs (8) as R0 is now incremented.
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store this extra set of data.
;# Set up the various STACK pointers for different CPU modes
;# (PSR_IRQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD2
msr cpsr_c,r0
mov sp,#0x8000
;# (PSR_FIQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD1
msr cpsr_c,r0
mov sp,#0x4000
;# (PSR_SVC_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD3
msr cpsr_c,r0
mov sp,#0x8000000
ldr r0, =__bss_start
ldr r1, =__bss_end
mov r2, #0
zero_loop:
cmp r0,r1
it lt
strlt r2,[r0], #4
blt zero_loop
bl rpi_cpu_irq_disable
;# mov sp,#0x1000000
b main ;# We're ready?? Lets start main execution!
.section .text
undefined_instruction:
b undefined_instruction
prefetch_abort:
b prefetch_abort
data_abort:
b data_abort
unused:
b unused
fiq:
b fiq
hang:
b hang
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl dummy
dummy:
bx lr
raspberrypi.ld:
/**
* BlueThunder Linker Script for the raspberry Pi!
*
*
*
**/
MEMORY
{
RESERVED (r) : ORIGIN = 0x00000000, LENGTH = 32K
INIT_RAM (rwx) : ORIGIN = 0x00008000, LENGTH = 32K
RAM (rwx) : ORIGIN = 0x00010000, LENGTH = 128M
}
ENTRY(_start)
SECTIONS {
/*
* Our init section allows us to place the bootstrap code at address 0x8000
*
* This is where the Graphics processor forces the ARM to start execution.
* However the interrupt vector code remains at 0x0000, and so we must copy the correct
* branch instructions to 0x0000 - 0x001C in order to get the processor to handle interrupts.
*
*/
.init : {
KEEP(*(.init))
} > INIT_RAM = 0
.module_entries : {
__module_entries_start = .;
KEEP(*(.module_entries))
KEEP(*(.module_entries.*))
__module_entries_end = .;
__module_entries_size = SIZEOF(.module_entries);
} > INIT_RAM
/**
* This is the main code section, it is essentially of unlimited size. (128Mb).
*
**/
.text : {
*(.text)
} > RAM
/*
* Next we put the data.
*/
.data : {
*(.data)
} > RAM
.bss :
{
__bss_start = .;
*(.bss)
*(.bss.*)
__bss_end = .;
} > RAM
/*
__exidx_start = .;
.ARM.exidx :
{
*(.ARM.exidx* .gnu.linkonce.armexidx.*)
} > RAM
__exidx_end = .;
*/
/**
* Place HEAP here???
**/
PROVIDE(__HEAP_START = __bss_end );
/**
* Stack starts at the top of the RAM, and moves down!
**/
_estack = ORIGIN(RAM) + LENGTH(RAM);
}
As you can see test.c doesn't contain an entry point called _start, neither does it have one in its assembly compiled form. Only startup.s does.
Any idea's about how I could solve my current issue?
EDIT: all the code if needed used can be found here:https://github.com/jameswalmsley/RaspberryPi-FreeRTOS

Related

Can't run a no-op function in qemu

I am using xpack qemu arm which is a fork of qemu with support for STM32 boards.
I am trying to run a simple program to get myself started.
I have my linker script
ENTRY(Reset_Handler)
MEMORY
{
FLASH (rx) : ORIGIN = 0x00000000, LENGTH = 0x08000000
RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 0x20000000
}
SECTIONS
{
. = ORIGIN(FLASH);
.text :
{
LONG(ORIGIN(RAM) + LENGTH(RAM)) /* set the SP initial value */
LONG(Reset_Handler) /* set the PC initial value */
*(.text)
}
}
my assembly file
.section .text
.global Reset_Handler
Reset_Handler:
BL main
BL .
and a c function, main
void main () {
return;
}
When I assemble, compile, and link, the generated memory contents are
00000000 <main-0x8>:
0: 40000000 .word 0x40000000
4: 00000020 .word 0x00000020
00000008 <main>:
void main () {
8: e52db004 push {fp} ; (str fp, [sp, #-4]!)
c: e28db000 add fp, sp, #0
return;
10: e1a00000 nop ; (mov r0, r0)
14: e24bd000 sub sp, fp, #0
18: e49db004 pop {fp} ; (ldr fp, [sp], #4)
1c: e12fff1e bx lr
00000020 <Reset_Handler>:
.section .text
.global Reset_Handler
Reset_Handler:
BL main
20: ebfffff8 bl 8 <main>
BL .
24: ebfffffe bl 24 <Reset_Handler+0x4>
I am using a STM32F407VG MCU, the docs state that
After this startup delay is over, the CPU fetches the top-of-stack value from address
0x0000 0000, then starts code execution from the boot memory starting from 0x0000 0004.
Thus, I store the initial value of the stack pointer 0x40000000 in memory location 0x00000000 and the initial value of the program counter in memory location 0x00000004
I start qemu like so
qemu-system-gnuarmeclipse -mcu STM32F407VG -machine STM32F4-Discovery -image myfile.elf -nographic --verbose --verbose -no-reboot -S
And I can see that the SP and PC registers (R13 and R15, respectively) are set to the expected values:
R00=00000000 R01=00000000 R02=00000000 R03=00000000
R04=00000000 R05=00000000 R06=00000000 R07=00000000
R08=00000000 R09=00000000 R10=00000000 R11=00000000
R12=00000000 R13=40000000 R14=00000000 R15=00000020
PSR=40000153 -Z-- A svc32
FPSCR: 00000000
So, following the memory mapping output, the program should flow like so:
PC is set to 0x20, which runs BL 8 <main>
This branches to memory location 0x8, which is the start of the main function, it also saves the return address in LR
This function should perform a no-op, with pushing and popping FP to/from the stack
the function should return to the address of LR (which was previously saved)
The next instruction should loop forever (24: ebfffffe bl 24 <Reset_Handler+0x4>)
However, I run this, and I get the following error:
(qemu) Bad ram pointer 0x4
I am a little lost on what this error means. Am I missing something in my setup?
ORIGIN = 0x00000000
The memory is aliased to 0 by the hardware but the real address is not zero,
You linker script has to the use corrent FLASH address not boot time alias.
0x8000000
I would suggest to use stm provided linker scripts as you not exactly understand the documentation of the chip.

How to implement SVC handler on ARM926EJ-S?

I'm writing an amateur operating system for ARM-based devices and currently trying to make it working in QEMU's versatilepb (ARM926EJ-S).
The problem arrives when I try to implement syscalls to my kernel. The idea is pretty simple: to implement system calls via SVC (SWI) instruction. So applications work in user mode, and to call a kernel function, they do SVC <code> instruction, so ARM processor switches to supervisor mode and calls the appropriate SVC handler.
But the problem is that when I call __asm__("SVC #0x08");, the device just resets and calls RESET_HANDLER, so it looks like the emulator just reboots.
I spent a few hours already to figure out what is the problem, but still got no idea.
Here is the code of ivt.s (the initial code with handlers):
.global __RESET
__RESET:
B RESET_HANDLER /* Reset */
B . /* Undefined */
B SWI_HANDLER /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B . /* IRQ */
B . /* FIQ */
RESET_HANDLER:
MSR CPSR_c, 0x13 /* Supervisor mode */
LDR SP, =stack_top
MSR CPSR_c, 0x10 /* User mode */
LDR SP, =usr_stack_top
BL usermode_function
B .
SWI_HANDLER:
PUSH {LR}
BL syscall
POP {LR}
MOVS PC, LR
This is how I make the syscall:
void usermode_function() {
__asm__("SVC #0x00"); // Make syscall
}
And syscall implementation:
void syscall() {
// NEVER CALLED
__asm__("PUSH {r0-r7}");
__asm__("POP {r0-r7}");
}
But the code under SWI_HANDLER even never invoked.
I really even don't know how to ask the question, since it looks like I'm missing some very basic information in my mind.
So what could be the problem? Which information I should provide to make you able to help me?
Here is also the linker script:
ENTRY(__RESET)
SECTIONS
{
. = 0x10000;
.ivt . : { ivt.o(.text) }
.text : { *(.text) }
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4KB of stack memory */
stack_top = .;
. = . + 0x100;
usr_stack_top = .;
}
Many thanks to #Jester and #old_timer, the problem is solved.
The problem was not with code, but with linker script. I have put my vector table at 0x10000, as you can see in the linker script, but it should be placed at 0x0. So SVC was not handled properly because the handler was placed in a wrong place.
When I changed the base address in my ld script and tried to load the firmware as ELF, everything starts to work perfectly.
You solved it one way but I'll still write my answer.
Very bare bare metal example...
strap.s
.globl _start
_start:
b reset
b hang
b swi_handler
b hang
reset:
msr cpsr_c, 0x13 /* Supervisor mode */
mov sp,#0x10000
msr cpsr_c, 0x10 /* User mode */
mov sp,#0x9000
bl notmain
hang:
b hang
swi_handler:
push {r0,r1,r2,r3,r4,lr}
pop {r0,r1,r2,r3,r4,lr}
movs pc,lr
.globl GETPC
GETPC:
mov r0,pc
bx lr
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
notmain.c
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );
#define UART_BASE 0x101F1000
#define UARTDR (UART_BASE+0x000)
static void uart_send ( unsigned int x )
{
PUT32(UARTDR,x);
}
static void hexstrings ( unsigned int d )
{
unsigned int rb;
unsigned int rc;
rb=32;
while(1)
{
rb-=4;
rc=(d>>rb)&0xF;
if(rc>9) rc+=0x37; else rc+=0x30;
uart_send(rc);
if(rb==0) break;
}
uart_send(0x20);
}
static void hexstring ( unsigned int d )
{
hexstrings(d);
uart_send(0x0D);
uart_send(0x0A);
}
int notmain ( void )
{
unsigned int ra;
hexstring(0x12345678);
hexstring(GETPC());
for(ra=0;ra<0x20;ra+=4)
{
hexstrings(ra);
hexstring(GET32(ra));
}
return(0);
}
memmap
MEMORY
{
ram : ORIGIN = 0x00010000, LENGTH = 32K
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.text*) } > ram
}
Build
arm-linux-gnueabi-as --warn --fatal-warnings -march=armv5t strap.s -o strap.o
arm-linux-gnueabi-gcc -c -Wall -O2 -nostdlib -nostartfiles -ffreestanding -march=armv5t notmain.c -o notmain.o
arm-linux-gnueabi-ld strap.o notmain.o -T memmap -o notmain.elf
arm-linux-gnueabi-objdump -D notmain.elf > notmain.list
arm-linux-gnueabi-objcopy notmain.elf -O binary notmain.bin
Execute
qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.bin
Output
12345678
0001003C
00000000 E3A00000
00000004 E59F1004
00000008 E59F2004
0000000C E59FF004
00000010 00000183
00000014 00000100
00000018 00010000
0000001C 00000000
Examine, assemble disassemble
.word 0xE3A00000
.word 0xE59F1004
.word 0xE59F2004
.word 0xE59FF004
.word 0x00000183
.word 0x00000100
.word 0x00010000
.word 0x00000000
0: e3a00000 mov r0, #0
4: e59f1004 ldr r1, [pc, #4] ; 10 <.text+0x10>
8: e59f2004 ldr r2, [pc, #4] ; 14 <.text+0x14>
c: e59ff004 ldr pc, [pc, #4] ; 18 <.text+0x18>
10: 00000183 andeq r0, r0, r3, lsl #3
14: 00000100 andeq r0, r0, r0, lsl #2
18: 00010000 andeq r0, r1, r0
1c: 00000000 andeq r0, r0, r0
So you can see that they are basically launching a Linux kernel the ATAGS/dtb is in ram at 0x100 perhaps. And they jump to 0x10000. 0001003C being the pc shown by the program as loaded with that command line using the -O binary version was loaded at 0x10000 and executed there. If you were to have an swi event then you would execute starting with the ldr r2 instruction and land on the rest handler in your code.
(Note incidentally that qemu doesn't properly model uarts, at least so far as I have found so you don't have to initialize them you don't have to wait for the tx buffer to be empty you just jam bytes into the tx buffer and they come out).
If you run the elf without changing the linker script
qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.elf
12345678
0001003C
00000000 00000000
00000004 00000000
00000008 00000000
0000000C 00000000
00000010 00000000
00000014 00000000
00000018 00000000
0000001C 00000000
Interesting it loads and runs at 0x10000 which is what it was linked for but doesn't bother to setup for coming out of reset at 0x00000000 and/or this is that linker issue that makes for bad elf files and it padded with zeros which is
1c: 00000000 andeq r0, r0, r0
So it could have executed from 0x00000000 to 0x10000 and run into our code.
If we change the linker script
ram : ORIGIN = 0x00000000, LENGTH = 32K
Run the elf not the bin
qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.elf
12345678
0000003C
00000000 EA000002
00000004 EA000006
00000008 EA000006
0000000C EA000004
00000010 E321F013
00000014 E3A0D801
00000018 E321F010
0000001C E3A0DA09
as expected.
Now for the swi.
strap.s
.globl _start
_start:
b reset
b hang
b swi_handler
b hang
reset:
msr cpsr_c, 0x13 /* Supervisor mode */
mov sp,#0x10000
msr cpsr_c, 0x10 /* User mode */
mov sp,#0x9000
bl notmain
hang:
b hang
swi_handler:
push {r0,r1,r2,r3,r4,lr}
bl handler
pop {r0,r1,r2,r3,r4,lr}
movs pc,lr
.globl GETPC
GETPC:
mov r0,pc
bx lr
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl do_swi
do_swi:
svc #0x08
bx lr
notmain.c
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );
void do_swi ( void );
#define UART_BASE 0x101F1000
#define UARTDR (UART_BASE+0x000)
static void uart_send ( unsigned int x )
{
PUT32(UARTDR,x);
}
static void hexstring ( unsigned int d )
{
unsigned int rb;
unsigned int rc;
rb=32;
while(1)
{
rb-=4;
rc=(d>>rb)&0xF;
if(rc>9) rc+=0x37; else rc+=0x30;
uart_send(rc);
if(rb==0) break;
}
uart_send(0x0D);
uart_send(0x0A);
}
void handler ( void )
{
hexstring(0x11223344);
}
int notmain ( void )
{
hexstring(0x12345678);
do_swi();
hexstring(0x12345678);
return(0);
}
memmap
MEMORY
{
ram : ORIGIN = 0x00000000, LENGTH = 32K
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.text*) } > ram
}
Run the elf, output is
12345678
11223344
12345678
as desired. But you could have also done this
strap.s
.globl _start
_start:
ldr pc,reset_addr
ldr pc,hang_addr
ldr pc,swi_handler_addr
ldr pc,hang_addr
reset_addr: .word reset
hang_addr: .word hang
swi_handler_addr: .word swi_handler
reset:
mov r0,#0x10000
mov r1,#0x00000
ldmia r0!,{r2,r3,r4,r5}
stmia r1!,{r2,r3,r4,r5}
ldmia r0!,{r2,r3,r4,r5}
stmia r1!,{r2,r3,r4,r5}
msr cpsr_c, 0x13 /* Supervisor mode */
mov sp,#0x10000
msr cpsr_c, 0x10 /* User mode */
mov sp,#0x9000
bl notmain
hang:
b hang
swi_handler:
push {r0,r1,r2,r3,r4,lr}
bl handler
pop {r0,r1,r2,r3,r4,lr}
movs pc,lr
.globl GETPC
GETPC:
mov r0,pc
bx lr
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl do_swi
do_swi:
svc #0x08
bx lr
notmain.c
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );
void do_swi ( void );
#define UART_BASE 0x101F1000
#define UARTDR (UART_BASE+0x000)
static void uart_send ( unsigned int x )
{
PUT32(UARTDR,x);
}
static void hexstring ( unsigned int d )
{
unsigned int rb;
unsigned int rc;
rb=32;
while(1)
{
rb-=4;
rc=(d>>rb)&0xF;
if(rc>9) rc+=0x37; else rc+=0x30;
uart_send(rc);
if(rb==0) break;
}
uart_send(0x0D);
uart_send(0x0A);
}
void handler ( void )
{
hexstring(0x11223344);
}
int notmain ( void )
{
unsigned int ra;
hexstring(0x12345678);
for(ra=0x10000;ra<0x10020;ra+=4) hexstring(GET32(ra));
for(ra=0x00000;ra<0x00020;ra+=4) hexstring(GET32(ra));
do_swi();
hexstring(0x12345678);
return(0);
}
memmap
MEMORY
{
ram : ORIGIN = 0x00010000, LENGTH = 32K
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.text*) } > ram
}
And now both the elf and the binary image versions work. I let the toolchain do the work for me:
00010010 <reset_addr>:
10010: 0001001c
00010014 <hang_addr>:
10014: 00010048
00010018 <swi_handler_addr>:
10018: 0001004c
The ldr pc, is position independent. I copy the four entries plus the four (well three) addresses so that 0x00000 matches 0x10000 and now the exception table (it is not a vector table btw) works.
With newer arm processors you could instead set VTOR to 0x10000 and it would use the one built into the binary, no copying necessary. Or as you solved just build and run your program from 0x00000 and there you go. I wanted to show the alternatives as well as how to figure out (by cheating, you have to love uarts in qemu) what qemu is doing and where it is loading without having to use a debugger.

Keil stm32, using assembly, scatter file and c. How to export c code entry point to assembly?

In order to combine .c and assembly, I want to pass start address of my .c code, and program microcontroller to know that its program starts at that address. As I am writing my startup file in assembly, I need to pass .c code starting address to assembly, and then to write this address to the specific memory region of microcontroller ( so the microcontroller can start execution on this address after RESET)
Trying to create a project for stm32f103 in Keil with this structure:
Some .c file, for example main.c (for the main part of the program).
Startup file in assembly language. Which gets the adress of entry to the function written in some .c file, to be passed to Reset_Handler
Scatter file, written in this way:
LR_IROM1 0x08000000 0x00010000 { ; load region size_region
ER_IROM1 0x08000000 0x00010000 { ; load address = execution address
*.o (RESET, +First) ; RESET is code section with I.V.T.
* (InRoot$$Sections)
.ANY (+RO)
.ANY (+XO)
}
RW_IRAM1 0x20000000 0x00005000 { ; RW data
.ANY (+RW +ZI)
}
}
The problem is passing the entry point to the .c function. Reset_Handler, which needs .c entry point(starting adress) passed by __main, looks like this:
Reset_Handler PROC
EXPORT Reset_Handler [WEAK]
IMPORT __main
LDR R0, =__main
BX R0
ENDP
bout entry point __main, as a answer for one assembly raleted question was written:
__main() is the compiler supplied entry point for your C code. It is not the main() function you write, but performs initialisation for the
standard library, static data, the heap before calling your `main()'
function.
So, how to get this entry point in my assembly file?
Edit>> If somebody is interested in solution for KEIL, here it is, its all that simple!
Simple assembly startup.s file:
AREA STACK, NOINIT, READWRITE
SPACE 0x400
Stack_top
AREA RESET, DATA, READONLY
dcd Stack_top
dcd Reset_Handler
EXPORT _InitMC
IMPORT notmain
AREA PROGRAM, CODE, READONLY
Reset_Handler PROC
bl notmain
ENDP
_InitMC PROC ;start of the assembly procedure
Loop
b Loop ;infinite loop
ENDP
END
Simple c file:
extern int _InitMC();
int notmain(void) {
_InitMC();
return 0;
}
Linker is the same as the one mentioned above.
Project build was successful.
Using the gnu toolchain for example:
Bootstrap:
.cpu cortex-m0
.thumb
.thumb_func
.global _start
_start:
stacktop: .word 0x20001000
.word reset
.word loop
.word loop
.word loop
.thumb_func
reset:
bl notmain
b loop
.thumb_func
loop: b .
.align
.thumb_func
.globl fun
fun:
bx lr
.end
C entry point (function name is not relevant, sometimes using main() adds garbage, depends on the compiler/toolchain)
void fun ( unsigned int );
int notmain ( void )
{
unsigned int ra;
for(ra=0;ra<1000;ra++) fun(ra);
return(0);
}
Linker script
MEMORY
{
rom : ORIGIN = 0x08000000, LENGTH = 0x1000
ram : ORIGIN = 0x20000000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > rom
.rodata : { *(.rodata*) } > rom
.bss : { *(.bss*) } > ram
}
Build
arm-none-eabi-gcc -Wall -Werror -O2 -nostdlib -nostartfiles -ffreestanding -mthumb -mcpu=cortex-m0 -march=armv6-m -c so.c -o so.thumb.o
arm-none-eabi-ld -o so.thumb.elf -T flash.ld flash.o so.thumb.o
arm-none-eabi-objdump -D so.thumb.elf > so.thumb.list
arm-none-eabi-objcopy so.thumb.elf so.thumb.bin -O binary
arm-none-eabi-gcc -Wall -Werror -O2 -nostdlib -nostartfiles -ffreestanding -mthumb -mcpu=cortex-m3 -march=armv7-m -c so.c -o so.thumb2.o
arm-none-eabi-ld -o so.thumb2.elf -T flash.ld flash.o so.thumb2.o
arm-none-eabi-objdump -D so.thumb2.elf > so.thumb2.list
arm-none-eabi-objcopy so.thumb2.elf so.thumb2.bin -O binary
Result (all thumb versions)
Disassembly of section .text:
08000000 <_start>:
8000000: 20001000
8000004: 08000015
8000008: 0800001b
800000c: 0800001b
8000010: 0800001b
08000014 <reset>:
8000014: f000 f804 bl 8000020 <notmain>
8000018: e7ff b.n 800001a <loop>
0800001a <loop>:
800001a: e7fe b.n 800001a <loop>
0800001c <fun>:
800001c: 4770 bx lr
800001e: 46c0 nop ; (mov r8, r8)
08000020 <notmain>:
8000020: b570 push {r4, r5, r6, lr}
8000022: 25fa movs r5, #250 ; 0xfa
8000024: 2400 movs r4, #0
8000026: 00ad lsls r5, r5, #2
8000028: 0020 movs r0, r4
800002a: 3401 adds r4, #1
800002c: f7ff fff6 bl 800001c <fun>
8000030: 42ac cmp r4, r5
8000032: d1f9 bne.n 8000028 <notmain+0x8>
8000034: 2000 movs r0, #0
8000036: bd70 pop {r4, r5, r6, pc}
Of course this has to be placed in flash at the right place with some tool.
The vector table is mapped by logic to 0x00000000 in the stm32 family.
08000000 <_start>:
8000000: 20001000
8000004: 08000015 <---- reset ORR 1
And in this minimal code the reset handler calls the C code the C code messes around and returns. Technically a fully functional program for most stm32s (change the stack init to a smaller value for those with less ram say 0x20000400 and it should work anywhere by using -mthumb by itself (armv4t) or adding the cortex-m0. well okay not the armv8ms they can technically not support all of armv6m but the one in the field I know about does.
I don't have Kiel so don't know how to translate to that, but it shouldn't be much of a stretch, just syntax.

ARM Cortex A9 Startup Code and Interrupt Setup

I try to program Cortex-A9 in a bare metal fashion. I use the 'hello world' code from:
https://github.com/tukl-msd/gem5.bare-metal which works. However, I'm not able to get interrupts working. When I create an Interrupt with Interrupt e.g. #47 my software doesn't jump in the ISR function. What I am missing? Do I have to do some more initialization?
Startup Code:
.section INTERRUPT_VECTOR, "x"
.global _Reset
_Reset:
B Reset_Handler /* Reset */
B . /* Undefined */
B . /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B irq_handler /* IRQ */
B irq_handler /* FIQ */
// Some Definitions for GIC:
.equ GIC_DIST, 0x10041000
.equ GIC_CPU , 0x10040000
// GIC Definitions for CPU interface
.equ ICCICR , 0x00
.equ ICCPMR , 0x04
.equ ICCEOIR , 0x10
.equ ICCIAR , 0x0C
// GIC Definitions for Distributor interface
.equ ICDDCR , 0x00
.equ ICDISER , 0x100
.equ ICDIPTR , 0x800
// Other Definitions
.equ USR_MODE , 0x10
GIC_dist_base : .word 0 // address of GIC distributor
GIC_cpu_base : .word 0 // address of GIC CPU interface
Reset_Handler:
LDR sp, =stack_top
// Enable Interrupts on CPU Side:
MRS r1, cpsr // get the cpsr.
BIC r1, r1, #0x80 // enable IRQ (ORR to disable).
MSR cpsr_c, r1 // copy it back, control field bit update.
// Configure GIC:
BL IC_init
// Branch to C code
BL main
B .
// Initialize GIC
.global GIC_init
IC_init:
stmfd sp!,{lr}
// Read GIC base from Configuration Base Address Register
// And use it to initialize GIC_dist_base and GIC_cpu_base
//mrc p15, 4, r0, c15, c0, 0
//add r2, r0, #GIC_DIST // Calculate address
ldr r2, =GIC_DIST
ldr r1, =GIC_dist_base
str r2,[r1] // Store address of GIC distributor
//add r2, r0, #GIC_CPU // Calculate address
ldr r2, =GIC_CPU
ldr r1, =GIC_cpu_base
str r2,[r1] // Store address of GIC CPU interface
// Register (ICCPMR) to enable interrutps of all priorities
ldr r1,=0xFFFF
ldr r2,=GIC_dist_base
str r1,[r2,#ICCPMR]
// Set the enable bit in the CPU interface control register
// ICCICR, allowing CPU(s) to receive interrupts
mov r1,#1
str r1,[r2,#ICCICR]
// Set the enable bit in the distributor control register
// ICDDCR, allowing interrpupts to be generated
ldr r2,=GIC_dist_base
ldr r2,[r2] // Nase address of distributor
mov r1, #1
str r1,[r2,#ICDDCR]
ldmfd sp!,{pc}
//config_interrupt (int ID , int CPU);
.global config_interrupt
config_interrupt:
stmfd sp!,{r4-r5, lr}
// Cinfigure the distributor interrupt set-enable registers (ICDISERn)
// enable the intterupt
// reg_offset = (M/32)*4 (shift and clear some bits)
// value = 1 << (N mod 32);
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2,#ICDISER // r2 <- base address of ICDSER regs
lsr r4,r0,#3 // clculate reg_offset
bic r4,r4,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDISERn
// Create a bit mask
and r2,r0,#0x1F // r2 <- N mod 32
mov r5,#1 // need to set one bit
lsl r2,r5,r2 // r2 <- value
// Using address in r4 and value in r2 to set the correct bit in the GIC register
ldr r3,[r4] // read ICDISERn
orr r3, r3, r2 // set the enable bit
str r3,[r4] // store the new register value
// Configure the distributor interrupt processor targets register (ICDIPTRn)
// select target CPU(s)
// reg_offset = (N/4)*4 (clear 2 bottom bits)
// index = N mod 4;
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2, #ICDIPTR // base address of ICDIPTR regs
bic r4,r0,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDIPTRn
// Get the address of th ebyte wihtih ICDIPTRn
and r2,r0,#0x3 // r2 <- index
add r4,r2,r4 // r4 <- byte address to be set
strb r1,[r4]
ldmfd sp!, {r4-r5, lr}
// int get_inLerrupt_number();
// Get the interrupt ID for the current interrupt. This should be called al the
// beginning of ISR. It also changes the state of the interrupt from pending to
// active, which helps to prevent other CPUs from trying to handle it.
.global get_interrupt_number
get_intterrupt_number:
// Read the JCCIAR from the CPU Interface
ldr r0,=GIC_cpu_base
ldr r0,[r0]
ldr r0,[r0,#ICCIAR]
mov pc,lr
// void end_of_interrupt (int ID);
// Notify the GIC that the interrupt has been processed. The state goes from
// active to inactive, or it goes from active and pending to pending.
.global end_of_interrupt
end_of_interrupt:
ldr r1,=GIC_cpu_base
ldr r1,[r1]
str r0,[r1,#ICCEOIR]
mov pc, lr
// IRQ Handler that calls the ISR function in C
.global irq_handler
irq_handler:
stmfd sp!,{r0-r7, lr}
// Call Interrupt Service Routine in C:
bl ISR
ldmfd sp!, {r0-r7, lr}
// Must substract 4 from lr
subs pc, lr, #4
Linker Script:
ENTRY(_Reset)
SECTIONS
{
. = 0x0;
.text : {
boot.o (INTERRUPT_VECTOR)
*(.text)
}
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4kB of stack memory */
stack_top = .;
PROVIDE (end = .) ;
}
Main C Program:
#include <stdio.h>
extern "C" void config_interrupt(int, int);
volatile unsigned int * const SHADOW = (unsigned int *)0x1000a000;
void sendShadow(unsigned int s)
{
*SHADOW = s;
}
int main(void)
{
config_interrupt(47,0);
unsigned int r = 1337;
while (1)
{
printf("Hello World! %d\n", r);
sendShadow(1);
}
}
void ISR(void)
{
printf("ISR");
}

How can I initialize the Raspberry properly?

I wrote a motor controller and I tested on a respberry pi using Arch Arm Linux distro, to calculate the control signal took ~0.4ms, so I thought I can make better if I'm using real time OS, so I started with ChibiOS, but there the runtime was ~2.5ms, first I used Crossfire cross compiler than I switch to linaro, with the linaro the runtime was a bit worse ~2.7ms. What can be the problem? Is there possible that I'm not initializing the HW in an optimal way?
/*
* Stack pointers initialization.
*/
ldr r0, =__ram_end__
/* Undefined */
msr CPSR_c, #MODE_UND | I_BIT | F_BIT
mov sp, r0
ldr r1, =__und_stack_size__
sub r0, r0, r1
/* Abort */
msr CPSR_c, #MODE_ABT | I_BIT | F_BIT
mov sp, r0
ldr r1, =__abt_stack_size__
sub r0, r0, r1
/* FIQ */
msr CPSR_c, #MODE_FIQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__fiq_stack_size__
sub r0, r0, r1
/* IRQ */
msr CPSR_c, #MODE_IRQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__irq_stack_size__
sub r0, r0, r1
/* Supervisor */
msr CPSR_c, #MODE_SVC | I_BIT | F_BIT
mov sp, r0
ldr r1, =__svc_stack_size__
sub r0, r0, r1
/* System */
msr CPSR_c, #MODE_SYS | I_BIT | F_BIT
mov sp, r0
mov r0,#0x8000
mov r1,#0x0000
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
;# enable fpu
mrc p15, 0, r0, c1, c0, 2
orr r0,r0,#0x300000 ;# single precision
orr r0,r0,#0xC00000 ;# double precision
mcr p15, 0, r0, c1, c0, 2
mov r0,#0x40000000
fmxr fpexc,r0
mov r0, #0
ldr r1, =_bss_start
ldr r2, =_bss_end
And the memory setup:
__und_stack_size__ = 0x0004;
__abt_stack_size__ = 0x0004;
__fiq_stack_size__ = 0x0010;
__irq_stack_size__ = 0x0080;
__svc_stack_size__ = 0x0004;
__sys_stack_size__ = 0x0400;
__stacks_total_size__ = __und_stack_size__ + __abt_stack_size__ + __fiq_stack_size__ + __irq_stack_size__ + __svc_stack_size__ + __sys_stack_size__;
MEMORY
{
ram : org = 0x8000, len = 0x06000000 - 0x20
}
__ram_start__ = ORIGIN(ram);
__ram_size__ = LENGTH(ram);
__ram_end__ = __ram_start__ + __ram_size__;
SECTIONS
{
. = 0;
.text : ALIGN(16) SUBALIGN(16)
{
_text = .;
KEEP(*(vectors))
*(.text)
*(.text.*)
*(.rodata)
*(.rodata.*)
*(.glue_7t)
*(.glue_7)
*(.gcc*)
*(.ctors)
*(.dtors)
} > ram
.ARM.extab : {*(.ARM.extab* .gnu.linkonce.armextab.*)} > ram
__exidx_start = .;
.ARM.exidx : {*(.ARM.exidx* .gnu.linkonce.armexidx.*)} > ram
__exidx_end = .;
.eh_frame_hdr : {*(.eh_frame_hdr)}
.eh_frame : ONLY_IF_RO {*(.eh_frame)}
. = ALIGN(4);
_etext = .;
_textdata = _etext;
.data :
{
_data = .;
*(.data)
. = ALIGN(4);
*(.data.*)
. = ALIGN(4);
*(.ramtext)
. = ALIGN(4);
_edata = .;
} > ram
.bss :
{
_bss_start = .;
*(.bss)
. = ALIGN(4);
*(.bss.*)
. = ALIGN(4);
*(COMMON)
. = ALIGN(4);
_bss_end = .;
} > ram
}
PROVIDE(end = .);
_end = .;
__heap_base__ = _end;
__heap_end__ = __ram_end__ - __stacks_total_size__;
__main_thread_stack_base__ = __ram_end__ - __stacks_total_size__;
Where do I make the mistake(s)?
A long time ago (yes, that means somewhen in the previous millenium), I used the old PC Speaker pcsp device driver (a little more current patch here) to control stepper motors via a relay attached to the data lines of the parallel port.
Note that's not the same driver as the current pcspkr driver (which only writes to the actual speaker, not to the parallel port); the parallel-output-capable parts of pcsp were never ported to the 2.6 audio architecture.
The trick there is that the driver can register a (high-priority, if needed) interrupt routine that does the actual device register / IO port writes to change the line state. As a result, you simply ioctl() the sample rate to the driver, and then just asynchronously write "ramps" (of data signals to step up/down to/from a certain speed or to perform a number of steps) created in-memory - the driver will then spool them for you, without the need for additional timing-/scheduling-sensitive code.
In the end you got an 8bit digital signal on the parallel port data pins, with timing precision as high as your timer interrupt allows.
There were sufficient lines to drive a stepper; if you wanted to make it turn a given number of steps, you had to:
create a "ramp up" to speed it up from still to fastest
create a "rect wave" to keep it turning
create a "ramp down" to slow it down to still again
If the number of steps was small, write the whole thing in one go, other wise, write the ramp-up, then write as many of the rect-wave blocks as needed, then the ramp down. Although you'd program possibly thousands of steps in one go, you'd only write three blocks of mem a few kB each, and the driver's interrupt handler does the rest.
It sounded rather funny if you attached a resistor-array DAC convertor ;-)
The approach can be generalized to the RaspPI; from the interrupt routine, simply write a GPIO control register (on ARM, device regs are always memory mapped, so it's simply a memory access).
Decoupling the "ramp" / "control signal" generation from the timing-sensitive state change (the "control signal application", in effect) and delegating the latter to the interrupt part of a device driver allows to do such tasks with "normal" Linux.
Your timing precision, again, is limited by rate and jitter of your timer interrupt. The RaspPI is capable of running higher timer interrupt rates than an i386 was. I'm pretty sure 1ms isn't a challenge with this approach (it wasn't in 1995). The methodology depends, as said, on the ability to precreate the signal.

Resources