How can I initialize the Raspberry properly? - c

I wrote a motor controller and I tested on a respberry pi using Arch Arm Linux distro, to calculate the control signal took ~0.4ms, so I thought I can make better if I'm using real time OS, so I started with ChibiOS, but there the runtime was ~2.5ms, first I used Crossfire cross compiler than I switch to linaro, with the linaro the runtime was a bit worse ~2.7ms. What can be the problem? Is there possible that I'm not initializing the HW in an optimal way?
/*
* Stack pointers initialization.
*/
ldr r0, =__ram_end__
/* Undefined */
msr CPSR_c, #MODE_UND | I_BIT | F_BIT
mov sp, r0
ldr r1, =__und_stack_size__
sub r0, r0, r1
/* Abort */
msr CPSR_c, #MODE_ABT | I_BIT | F_BIT
mov sp, r0
ldr r1, =__abt_stack_size__
sub r0, r0, r1
/* FIQ */
msr CPSR_c, #MODE_FIQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__fiq_stack_size__
sub r0, r0, r1
/* IRQ */
msr CPSR_c, #MODE_IRQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__irq_stack_size__
sub r0, r0, r1
/* Supervisor */
msr CPSR_c, #MODE_SVC | I_BIT | F_BIT
mov sp, r0
ldr r1, =__svc_stack_size__
sub r0, r0, r1
/* System */
msr CPSR_c, #MODE_SYS | I_BIT | F_BIT
mov sp, r0
mov r0,#0x8000
mov r1,#0x0000
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
;# enable fpu
mrc p15, 0, r0, c1, c0, 2
orr r0,r0,#0x300000 ;# single precision
orr r0,r0,#0xC00000 ;# double precision
mcr p15, 0, r0, c1, c0, 2
mov r0,#0x40000000
fmxr fpexc,r0
mov r0, #0
ldr r1, =_bss_start
ldr r2, =_bss_end
And the memory setup:
__und_stack_size__ = 0x0004;
__abt_stack_size__ = 0x0004;
__fiq_stack_size__ = 0x0010;
__irq_stack_size__ = 0x0080;
__svc_stack_size__ = 0x0004;
__sys_stack_size__ = 0x0400;
__stacks_total_size__ = __und_stack_size__ + __abt_stack_size__ + __fiq_stack_size__ + __irq_stack_size__ + __svc_stack_size__ + __sys_stack_size__;
MEMORY
{
ram : org = 0x8000, len = 0x06000000 - 0x20
}
__ram_start__ = ORIGIN(ram);
__ram_size__ = LENGTH(ram);
__ram_end__ = __ram_start__ + __ram_size__;
SECTIONS
{
. = 0;
.text : ALIGN(16) SUBALIGN(16)
{
_text = .;
KEEP(*(vectors))
*(.text)
*(.text.*)
*(.rodata)
*(.rodata.*)
*(.glue_7t)
*(.glue_7)
*(.gcc*)
*(.ctors)
*(.dtors)
} > ram
.ARM.extab : {*(.ARM.extab* .gnu.linkonce.armextab.*)} > ram
__exidx_start = .;
.ARM.exidx : {*(.ARM.exidx* .gnu.linkonce.armexidx.*)} > ram
__exidx_end = .;
.eh_frame_hdr : {*(.eh_frame_hdr)}
.eh_frame : ONLY_IF_RO {*(.eh_frame)}
. = ALIGN(4);
_etext = .;
_textdata = _etext;
.data :
{
_data = .;
*(.data)
. = ALIGN(4);
*(.data.*)
. = ALIGN(4);
*(.ramtext)
. = ALIGN(4);
_edata = .;
} > ram
.bss :
{
_bss_start = .;
*(.bss)
. = ALIGN(4);
*(.bss.*)
. = ALIGN(4);
*(COMMON)
. = ALIGN(4);
_bss_end = .;
} > ram
}
PROVIDE(end = .);
_end = .;
__heap_base__ = _end;
__heap_end__ = __ram_end__ - __stacks_total_size__;
__main_thread_stack_base__ = __ram_end__ - __stacks_total_size__;
Where do I make the mistake(s)?

A long time ago (yes, that means somewhen in the previous millenium), I used the old PC Speaker pcsp device driver (a little more current patch here) to control stepper motors via a relay attached to the data lines of the parallel port.
Note that's not the same driver as the current pcspkr driver (which only writes to the actual speaker, not to the parallel port); the parallel-output-capable parts of pcsp were never ported to the 2.6 audio architecture.
The trick there is that the driver can register a (high-priority, if needed) interrupt routine that does the actual device register / IO port writes to change the line state. As a result, you simply ioctl() the sample rate to the driver, and then just asynchronously write "ramps" (of data signals to step up/down to/from a certain speed or to perform a number of steps) created in-memory - the driver will then spool them for you, without the need for additional timing-/scheduling-sensitive code.
In the end you got an 8bit digital signal on the parallel port data pins, with timing precision as high as your timer interrupt allows.
There were sufficient lines to drive a stepper; if you wanted to make it turn a given number of steps, you had to:
create a "ramp up" to speed it up from still to fastest
create a "rect wave" to keep it turning
create a "ramp down" to slow it down to still again
If the number of steps was small, write the whole thing in one go, other wise, write the ramp-up, then write as many of the rect-wave blocks as needed, then the ramp down. Although you'd program possibly thousands of steps in one go, you'd only write three blocks of mem a few kB each, and the driver's interrupt handler does the rest.
It sounded rather funny if you attached a resistor-array DAC convertor ;-)
The approach can be generalized to the RaspPI; from the interrupt routine, simply write a GPIO control register (on ARM, device regs are always memory mapped, so it's simply a memory access).
Decoupling the "ramp" / "control signal" generation from the timing-sensitive state change (the "control signal application", in effect) and delegating the latter to the interrupt part of a device driver allows to do such tasks with "normal" Linux.
Your timing precision, again, is limited by rate and jitter of your timer interrupt. The RaspPI is capable of running higher timer interrupt rates than an i386 was. I'm pretty sure 1ms isn't a challenge with this approach (it wasn't in 1995). The methodology depends, as said, on the ability to precreate the signal.

Related

LINKER script GCC how to avoid veneer call

I work on the project where I copy some functions to the RAM from FLASH and call them. Everything is OK except one small problem I have - if I call function directly the compiler adds the veneer call instead (which calls the funtion in the RAM correctly).
IF I call it via the pointer all is OK. The debugger shows that resolved address of the function is correct.
#define RAMFCALL(func, ...) {unsigned (* volatile fptr)() = (unsigned (* volatile)())func; fptr(__VA_ARGS__);}
RAMFCALL(FLASH_EraseSector, 0, 0);
FLASH_EraseSector(0,0);
and the corresponding calls:
311 RAMFCALL(FLASH_EraseSector, 0, 0);
0801738e: ldr r3, [pc, #88] ; (0x80173e8 <flashSTMInit+140>)
08017390: str r3, [sp, #12]
08017392: ldr r3, [sp, #12]
08017394: movs r1, #0
08017396: mov r0, r1
08017398: blx r3
312 FLASH_EraseSector(0,0);
0801739a: movs r1, #0
0801739c: mov r0, r1
0801739e: bl 0x801e9f0 <__FLASH_EraseSector_veneer>
Debugger shows the correct addresses.
and the corresponding part of the linker script
OVERLAY : NOCROSSREFS
{
.RAM_functions
{
. = ALIGN(512);
RAM_functions_load = LOADADDR(.RAM_functions);
PROVIDE(RAM_VectorTable_start = .);
KEEP(*(.RAM_VectorTable))
KEEP(*(.RAM_VectorTable*))
PROVIDE(RAM_VectorTable_end = .);
. = ALIGN(4);
RAM_functions_start = .;
KEEP(*(.RAM_functions))
KEEP(*(.RAM_functions*))
RAM_functions_end = .;
. = ALIGN(4);
RAM_functionsDATA_start = .;
KEEP(*(.RAM_functionsDATA))
KEEP(*(.RAM_functionsDATA*))
RAM_functionsDATA_end = .;
. = ALIGN(4);
RAM_functionsBUFFER_start = .;
}
/* used by the startup to initialize data */
/* Initialized data sections goes into RAM, load LMA copy after code */
.data
{
. = ALIGN(4);
_sdata = .; /* create a global symbol at data start */
*(.data) /* .data sections */
*(.data*) /* .data* sections */
. = ALIGN(4);
_edata = .; /* define a global symbol at data end */
}
}>RAM AT> FLASH
And again the question: how to remove the veneer call
I will answer myself as I have found the reason :)
The bl instruction is += 32MB relative to PC. I was calling the function in the RAM from FLASH and the actual distance was much longer than 32MB. So the linker had to place the veneer function call.
Veneers could be eliminated by giving the -mlong-calls argument to the compiler. Each call site becomes a bit longer loosing some performance, however it might still be better than loosing performance in the veneers.
Individual functions can also be marked to be called through registers by applying the long_call attribute ( ARM assumed based on the assembly, decribed at https://gcc.gnu.org/onlinedocs/gcc/ARM-Function-Attributes.html#ARM-Function-Attributes )

freeRTOS linking process: multiple definition of `_start'

I am trying to compile freeRTOS for raspberry pi 2. Those are the commands I tried so far:
arm-none-eabi-gcc -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard test.c -o test.o
arm-none-eabi-as -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard startup.s -o startup.o
arm-none-eabi-ld test.o startup.o -static -Map kernel7.map -o
target.elf -T raspberrypi.ld
The two upper ones do work fine. However the last one doesn't, it gives me the following error:
startup.o: In function _start':
(.init+0x0): multiple definition of_start'
test.o::(.text+0x6c): first defined here
startup.o: In function swi_handler':
(.init+0x28): undefined reference tovPortYieldProcessor'
startup.o: In function irq_handler':
(.init+0x38): undefined reference tovFreeRTOS_ISR'
startup.o: In function zero_loop':
(.init+0xcc): undefined reference torpi_cpu_irq_disable'
This is the corresponding code:
test.c:
#include <stdio.h>
void exit(int code)
{
while(1)
;
}
int main(void)
{
return 0;
}
startup.s:
.extern system_init
.extern __bss_start
.extern __bss_end
.extern vFreeRTOS_ISR
.extern vPortYieldProcessor
.extern rpi_cpu_irq_disable
.extern main
.section .init
.globl _start
;;
_start:
;# All the following instruction should be read as:
;# Load the address at symbol into the program counter.
ldr pc,reset_handler ;# Processor Reset handler -- we will have to force this on the raspi!
;# Because this is the first instruction executed, of cause it causes an immediate branch into reset!
ldr pc,undefined_handler ;# Undefined instruction handler -- processors that don't have thumb can emulate thumb!
ldr pc,swi_handler ;# Software interrupt / TRAP (SVC) -- system SVC handler for switching to kernel mode.
ldr pc,prefetch_handler ;# Prefetch/abort handler.
ldr pc,data_handler ;# Data abort handler/
ldr pc,unused_handler ;# -- Historical from 26-bit addressing ARMs -- was invalid address handler.
ldr pc,irq_handler ;# IRQ handler
ldr pc,fiq_handler ;# Fast interrupt handler.
;# Here we create an exception address table! This means that reset/hang/irq can be absolute addresses
reset_handler: .word reset
undefined_handler: .word undefined_instruction
swi_handler: .word vPortYieldProcessor
prefetch_handler: .word prefetch_abort
data_handler: .word data_abort
unused_handler: .word unused
irq_handler: .word vFreeRTOS_ISR
fiq_handler: .word fiq
reset:
/* Disable IRQ & FIQ */
cpsid if
/* Check for HYP mode */
mrs r0, cpsr_all
and r0, r0, #0x1F
mov r8, #0x1A
cmp r0, r8
beq overHyped
b continueBoot
overHyped: /* Get out of HYP mode */
ldr r1, =continueBoot
msr ELR_hyp, r1
mrs r1, cpsr_all
and r1, r1, #0x1f ;# CPSR_MODE_MASK
orr r1, r1, #0x13 ;# CPSR_MODE_SUPERVISOR
msr SPSR_hyp, r1
eret
continueBoot:
;# In the reset handler, we need to copy our interrupt vector table to 0x0000, its currently at 0x8000
mov r0,#0x8000 ;# Store the source pointer
mov r1,#0x0000 ;# Store the destination pointer.
;# Here we copy the branching instructions
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load multiple values from indexed address. ; Auto-increment R0
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store multiple values from the indexed address. ; Auto-increment R1
;# So the branches get the correct address we also need to copy our vector table!
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load from 4*n of regs (8) as R0 is now incremented.
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store this extra set of data.
;# Set up the various STACK pointers for different CPU modes
;# (PSR_IRQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD2
msr cpsr_c,r0
mov sp,#0x8000
;# (PSR_FIQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD1
msr cpsr_c,r0
mov sp,#0x4000
;# (PSR_SVC_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD3
msr cpsr_c,r0
mov sp,#0x8000000
ldr r0, =__bss_start
ldr r1, =__bss_end
mov r2, #0
zero_loop:
cmp r0,r1
it lt
strlt r2,[r0], #4
blt zero_loop
bl rpi_cpu_irq_disable
;# mov sp,#0x1000000
b main ;# We're ready?? Lets start main execution!
.section .text
undefined_instruction:
b undefined_instruction
prefetch_abort:
b prefetch_abort
data_abort:
b data_abort
unused:
b unused
fiq:
b fiq
hang:
b hang
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl dummy
dummy:
bx lr
raspberrypi.ld:
/**
* BlueThunder Linker Script for the raspberry Pi!
*
*
*
**/
MEMORY
{
RESERVED (r) : ORIGIN = 0x00000000, LENGTH = 32K
INIT_RAM (rwx) : ORIGIN = 0x00008000, LENGTH = 32K
RAM (rwx) : ORIGIN = 0x00010000, LENGTH = 128M
}
ENTRY(_start)
SECTIONS {
/*
* Our init section allows us to place the bootstrap code at address 0x8000
*
* This is where the Graphics processor forces the ARM to start execution.
* However the interrupt vector code remains at 0x0000, and so we must copy the correct
* branch instructions to 0x0000 - 0x001C in order to get the processor to handle interrupts.
*
*/
.init : {
KEEP(*(.init))
} > INIT_RAM = 0
.module_entries : {
__module_entries_start = .;
KEEP(*(.module_entries))
KEEP(*(.module_entries.*))
__module_entries_end = .;
__module_entries_size = SIZEOF(.module_entries);
} > INIT_RAM
/**
* This is the main code section, it is essentially of unlimited size. (128Mb).
*
**/
.text : {
*(.text)
} > RAM
/*
* Next we put the data.
*/
.data : {
*(.data)
} > RAM
.bss :
{
__bss_start = .;
*(.bss)
*(.bss.*)
__bss_end = .;
} > RAM
/*
__exidx_start = .;
.ARM.exidx :
{
*(.ARM.exidx* .gnu.linkonce.armexidx.*)
} > RAM
__exidx_end = .;
*/
/**
* Place HEAP here???
**/
PROVIDE(__HEAP_START = __bss_end );
/**
* Stack starts at the top of the RAM, and moves down!
**/
_estack = ORIGIN(RAM) + LENGTH(RAM);
}
As you can see test.c doesn't contain an entry point called _start, neither does it have one in its assembly compiled form. Only startup.s does.
Any idea's about how I could solve my current issue?
EDIT: all the code if needed used can be found here:https://github.com/jameswalmsley/RaspberryPi-FreeRTOS

ARM Cortex A9 Startup Code and Interrupt Setup

I try to program Cortex-A9 in a bare metal fashion. I use the 'hello world' code from:
https://github.com/tukl-msd/gem5.bare-metal which works. However, I'm not able to get interrupts working. When I create an Interrupt with Interrupt e.g. #47 my software doesn't jump in the ISR function. What I am missing? Do I have to do some more initialization?
Startup Code:
.section INTERRUPT_VECTOR, "x"
.global _Reset
_Reset:
B Reset_Handler /* Reset */
B . /* Undefined */
B . /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B irq_handler /* IRQ */
B irq_handler /* FIQ */
// Some Definitions for GIC:
.equ GIC_DIST, 0x10041000
.equ GIC_CPU , 0x10040000
// GIC Definitions for CPU interface
.equ ICCICR , 0x00
.equ ICCPMR , 0x04
.equ ICCEOIR , 0x10
.equ ICCIAR , 0x0C
// GIC Definitions for Distributor interface
.equ ICDDCR , 0x00
.equ ICDISER , 0x100
.equ ICDIPTR , 0x800
// Other Definitions
.equ USR_MODE , 0x10
GIC_dist_base : .word 0 // address of GIC distributor
GIC_cpu_base : .word 0 // address of GIC CPU interface
Reset_Handler:
LDR sp, =stack_top
// Enable Interrupts on CPU Side:
MRS r1, cpsr // get the cpsr.
BIC r1, r1, #0x80 // enable IRQ (ORR to disable).
MSR cpsr_c, r1 // copy it back, control field bit update.
// Configure GIC:
BL IC_init
// Branch to C code
BL main
B .
// Initialize GIC
.global GIC_init
IC_init:
stmfd sp!,{lr}
// Read GIC base from Configuration Base Address Register
// And use it to initialize GIC_dist_base and GIC_cpu_base
//mrc p15, 4, r0, c15, c0, 0
//add r2, r0, #GIC_DIST // Calculate address
ldr r2, =GIC_DIST
ldr r1, =GIC_dist_base
str r2,[r1] // Store address of GIC distributor
//add r2, r0, #GIC_CPU // Calculate address
ldr r2, =GIC_CPU
ldr r1, =GIC_cpu_base
str r2,[r1] // Store address of GIC CPU interface
// Register (ICCPMR) to enable interrutps of all priorities
ldr r1,=0xFFFF
ldr r2,=GIC_dist_base
str r1,[r2,#ICCPMR]
// Set the enable bit in the CPU interface control register
// ICCICR, allowing CPU(s) to receive interrupts
mov r1,#1
str r1,[r2,#ICCICR]
// Set the enable bit in the distributor control register
// ICDDCR, allowing interrpupts to be generated
ldr r2,=GIC_dist_base
ldr r2,[r2] // Nase address of distributor
mov r1, #1
str r1,[r2,#ICDDCR]
ldmfd sp!,{pc}
//config_interrupt (int ID , int CPU);
.global config_interrupt
config_interrupt:
stmfd sp!,{r4-r5, lr}
// Cinfigure the distributor interrupt set-enable registers (ICDISERn)
// enable the intterupt
// reg_offset = (M/32)*4 (shift and clear some bits)
// value = 1 << (N mod 32);
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2,#ICDISER // r2 <- base address of ICDSER regs
lsr r4,r0,#3 // clculate reg_offset
bic r4,r4,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDISERn
// Create a bit mask
and r2,r0,#0x1F // r2 <- N mod 32
mov r5,#1 // need to set one bit
lsl r2,r5,r2 // r2 <- value
// Using address in r4 and value in r2 to set the correct bit in the GIC register
ldr r3,[r4] // read ICDISERn
orr r3, r3, r2 // set the enable bit
str r3,[r4] // store the new register value
// Configure the distributor interrupt processor targets register (ICDIPTRn)
// select target CPU(s)
// reg_offset = (N/4)*4 (clear 2 bottom bits)
// index = N mod 4;
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2, #ICDIPTR // base address of ICDIPTR regs
bic r4,r0,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDIPTRn
// Get the address of th ebyte wihtih ICDIPTRn
and r2,r0,#0x3 // r2 <- index
add r4,r2,r4 // r4 <- byte address to be set
strb r1,[r4]
ldmfd sp!, {r4-r5, lr}
// int get_inLerrupt_number();
// Get the interrupt ID for the current interrupt. This should be called al the
// beginning of ISR. It also changes the state of the interrupt from pending to
// active, which helps to prevent other CPUs from trying to handle it.
.global get_interrupt_number
get_intterrupt_number:
// Read the JCCIAR from the CPU Interface
ldr r0,=GIC_cpu_base
ldr r0,[r0]
ldr r0,[r0,#ICCIAR]
mov pc,lr
// void end_of_interrupt (int ID);
// Notify the GIC that the interrupt has been processed. The state goes from
// active to inactive, or it goes from active and pending to pending.
.global end_of_interrupt
end_of_interrupt:
ldr r1,=GIC_cpu_base
ldr r1,[r1]
str r0,[r1,#ICCEOIR]
mov pc, lr
// IRQ Handler that calls the ISR function in C
.global irq_handler
irq_handler:
stmfd sp!,{r0-r7, lr}
// Call Interrupt Service Routine in C:
bl ISR
ldmfd sp!, {r0-r7, lr}
// Must substract 4 from lr
subs pc, lr, #4
Linker Script:
ENTRY(_Reset)
SECTIONS
{
. = 0x0;
.text : {
boot.o (INTERRUPT_VECTOR)
*(.text)
}
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4kB of stack memory */
stack_top = .;
PROVIDE (end = .) ;
}
Main C Program:
#include <stdio.h>
extern "C" void config_interrupt(int, int);
volatile unsigned int * const SHADOW = (unsigned int *)0x1000a000;
void sendShadow(unsigned int s)
{
*SHADOW = s;
}
int main(void)
{
config_interrupt(47,0);
unsigned int r = 1337;
while (1)
{
printf("Hello World! %d\n", r);
sendShadow(1);
}
}
void ISR(void)
{
printf("ISR");
}

Link function to specific address

I am a little bit confused with ENTRY and STARTUP commands in GCC linker script.
As stated here: http://wiki.osdev.org/Linker_Scripts
ENTRY() makes any symbol to be linked as first item in .text section.
STARTUP() on the other hand makes whole compiled file to be placed as first item in .text section.
In my project however it behaves strange.
I am using gnu-arm-none-eabi toolchain and in my linker script command ENTRY(asm_start) makes no effect. Linker script:
ENTRY(asm_start)
MEMORY
{
RAM : ORIGIN = 0x10000, LENGTH = 0x1000000
PIC_BUFF : ORIGIN = 0x10000 + LENGTH(RAM), LENGTH = 200M
}
SECTIONS
{
.text : {*(.text)} > RAM
.data : {*(.data)} > PIC_BUFF
// etc.
assembly function:
.text
.global asm_start
.global exc_stack
.global supervisor_sp
asm_start:
# initialize Stack pointer for exception modes
mrs r4, cpsr
bic r4, r4, #0x1f
#FIQ Mode
orr r3, r4, #0x11
msr cpsr_c, r3
ldr sp, =exc_stack
#IRQ Mode
orr r3, r4, #0x12
msr cpsr_c, r3
// etc.
and asm_start finishes in some random place in memory.
On the other hand STARTUP() function works fine and desired file ends in proper place in .text section.
Could some please explain what exactly is happening in this case?
ENTRY() sets the PC starting point on the execution, and STARTUP() the first linked object in the .text
BTW, for baremental cortex ARM gnu-arm-none-eabi, ENTRY() is usually set to the Reset_Handler vector and .text must start with the interrupt vector table.
I've always seen it manually set. but it might be possible to use STARTUP() for that.

AM335x FreeRTOS port, unable to handle IRQs and SWI

I'm currently trying to port FreeRTOS to the TI AM335x processor, best known for being used on the BeagleBones. I am able to boot, run GPIOs and setup a compare match timer for running the system ticks. If I disable interrupts, i can see how the interrupt get set after a correct amount of time after the timer was started. And if I enable interrupts, my application dies after that same given time. The application also dies if I try to yield a task, aka calling the SWI handler. This makes me belive that the vector table is unavailable or incorrectly setup. The ROM Exception Vectors for SWI and IRQ has the contenct 4030CE08h and 4030CE18h. Which again in RAM executes some branching, the TRM says:
User code can redirect any exception to a custom handler either by writing its address to the appropriate location from 4030CE24h to 4030CE3Ch or by overriding the branch (load into PC) instruction between addresses from 4030CE04h to 4030CE1Ch.
My vIRQHandler function address is therefore written to 4030CE38h. One would hope this was enough, but sadly no. I suspect that there is something wrong in my boot.s file, however my assembly has never been that great and i'm struggling to understand the code. The boot.s and the rest of the project was started from a OMAP3 port.
Boot.s:
.section .startup,"ax"
.code 32
.align 0
b _start /* reset - _start */
ldr pc, _undf /* undefined - _undf */
ldr pc, _swi /* SWI - _swi */
ldr pc, _pabt /* program abort - _pabt */
ldr pc, _dabt /* data abort - _dabt */
nop /* reserved */
ldr pc, _irq /* IRQ - read the VIC */
ldr pc, _fiq /* FIQ - _fiq */
_undf: .word 0x4030CE24 /* undefined */
_swi: .word 0x4030CE28 /* SWI */
_pabt: .word 0x4030CE2C /* program abort */
_dabt: .word 0x4030CE30 /* data abort */
_irq: .word 0x4030CE38
_fiq: .word 0x4030CE3C /* FIQ */
The branch to start sets up a stack for each mode and clears the bss, not sure if that is relevant. This is the code which seems relevant to me, and I have updated the words to fit the AM335 instead of the OMAP3.
The setting IRQ handler:
#define E_IRQ (*(REG32 (0x4030CE38)))
....
/* Setup interrupt handler */
E_IRQ = ( long ) vIRQHandler;
If anyone have any pointers to an assembly newbie it would be much appriciated, because i'm completely stuck :)
U-boot had moved the exception vector table. However, instead of recompiling u-boot I just reset the exception vector table in my own start script.
Added this right before branching to main:
/* Set V=0 in CP15 SCTRL register - for VBAR to point to vector */
mrc p15, 0, r0, c1, c0, 0 # Read CP15 SCTRL Register
bic r0, #(1 << 13) # V = 0
mcr p15, 0, r0, c1, c0, 0 # Write CP15 SCTRL Register
/* Set vector address in CP15 VBAR register */
ldr r0, =_vector_table
mcr p15, 0, r0, c12, c0, 0 #Set VBAR
bl main
And put in the _vector_table label at the start of my exception vector table:
.section .startup,"ax"
.code 32
.align 0
_vector_table: b _start /* reset - _start */
ldr pc, _undf /* undefined - _undf */
ldr pc, _swi /* SWI - _swi */
ldr pc, _pabt /* program abort - _pabt */
ldr pc, _dabt /* data abort - _dabt */
nop /* reserved */
ldr pc, _irq /* IRQ - read the VIC */
ldr pc, _fiq /* FIQ - _fiq */
Now all the exceptions gets redirected to my code. Hopefully this will help anyone in the same situation that I was in:)

Resources