I try to program Cortex-A9 in a bare metal fashion. I use the 'hello world' code from:
https://github.com/tukl-msd/gem5.bare-metal which works. However, I'm not able to get interrupts working. When I create an Interrupt with Interrupt e.g. #47 my software doesn't jump in the ISR function. What I am missing? Do I have to do some more initialization?
Startup Code:
.section INTERRUPT_VECTOR, "x"
.global _Reset
_Reset:
B Reset_Handler /* Reset */
B . /* Undefined */
B . /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B irq_handler /* IRQ */
B irq_handler /* FIQ */
// Some Definitions for GIC:
.equ GIC_DIST, 0x10041000
.equ GIC_CPU , 0x10040000
// GIC Definitions for CPU interface
.equ ICCICR , 0x00
.equ ICCPMR , 0x04
.equ ICCEOIR , 0x10
.equ ICCIAR , 0x0C
// GIC Definitions for Distributor interface
.equ ICDDCR , 0x00
.equ ICDISER , 0x100
.equ ICDIPTR , 0x800
// Other Definitions
.equ USR_MODE , 0x10
GIC_dist_base : .word 0 // address of GIC distributor
GIC_cpu_base : .word 0 // address of GIC CPU interface
Reset_Handler:
LDR sp, =stack_top
// Enable Interrupts on CPU Side:
MRS r1, cpsr // get the cpsr.
BIC r1, r1, #0x80 // enable IRQ (ORR to disable).
MSR cpsr_c, r1 // copy it back, control field bit update.
// Configure GIC:
BL IC_init
// Branch to C code
BL main
B .
// Initialize GIC
.global GIC_init
IC_init:
stmfd sp!,{lr}
// Read GIC base from Configuration Base Address Register
// And use it to initialize GIC_dist_base and GIC_cpu_base
//mrc p15, 4, r0, c15, c0, 0
//add r2, r0, #GIC_DIST // Calculate address
ldr r2, =GIC_DIST
ldr r1, =GIC_dist_base
str r2,[r1] // Store address of GIC distributor
//add r2, r0, #GIC_CPU // Calculate address
ldr r2, =GIC_CPU
ldr r1, =GIC_cpu_base
str r2,[r1] // Store address of GIC CPU interface
// Register (ICCPMR) to enable interrutps of all priorities
ldr r1,=0xFFFF
ldr r2,=GIC_dist_base
str r1,[r2,#ICCPMR]
// Set the enable bit in the CPU interface control register
// ICCICR, allowing CPU(s) to receive interrupts
mov r1,#1
str r1,[r2,#ICCICR]
// Set the enable bit in the distributor control register
// ICDDCR, allowing interrpupts to be generated
ldr r2,=GIC_dist_base
ldr r2,[r2] // Nase address of distributor
mov r1, #1
str r1,[r2,#ICDDCR]
ldmfd sp!,{pc}
//config_interrupt (int ID , int CPU);
.global config_interrupt
config_interrupt:
stmfd sp!,{r4-r5, lr}
// Cinfigure the distributor interrupt set-enable registers (ICDISERn)
// enable the intterupt
// reg_offset = (M/32)*4 (shift and clear some bits)
// value = 1 << (N mod 32);
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2,#ICDISER // r2 <- base address of ICDSER regs
lsr r4,r0,#3 // clculate reg_offset
bic r4,r4,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDISERn
// Create a bit mask
and r2,r0,#0x1F // r2 <- N mod 32
mov r5,#1 // need to set one bit
lsl r2,r5,r2 // r2 <- value
// Using address in r4 and value in r2 to set the correct bit in the GIC register
ldr r3,[r4] // read ICDISERn
orr r3, r3, r2 // set the enable bit
str r3,[r4] // store the new register value
// Configure the distributor interrupt processor targets register (ICDIPTRn)
// select target CPU(s)
// reg_offset = (N/4)*4 (clear 2 bottom bits)
// index = N mod 4;
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2, #ICDIPTR // base address of ICDIPTR regs
bic r4,r0,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDIPTRn
// Get the address of th ebyte wihtih ICDIPTRn
and r2,r0,#0x3 // r2 <- index
add r4,r2,r4 // r4 <- byte address to be set
strb r1,[r4]
ldmfd sp!, {r4-r5, lr}
// int get_inLerrupt_number();
// Get the interrupt ID for the current interrupt. This should be called al the
// beginning of ISR. It also changes the state of the interrupt from pending to
// active, which helps to prevent other CPUs from trying to handle it.
.global get_interrupt_number
get_intterrupt_number:
// Read the JCCIAR from the CPU Interface
ldr r0,=GIC_cpu_base
ldr r0,[r0]
ldr r0,[r0,#ICCIAR]
mov pc,lr
// void end_of_interrupt (int ID);
// Notify the GIC that the interrupt has been processed. The state goes from
// active to inactive, or it goes from active and pending to pending.
.global end_of_interrupt
end_of_interrupt:
ldr r1,=GIC_cpu_base
ldr r1,[r1]
str r0,[r1,#ICCEOIR]
mov pc, lr
// IRQ Handler that calls the ISR function in C
.global irq_handler
irq_handler:
stmfd sp!,{r0-r7, lr}
// Call Interrupt Service Routine in C:
bl ISR
ldmfd sp!, {r0-r7, lr}
// Must substract 4 from lr
subs pc, lr, #4
Linker Script:
ENTRY(_Reset)
SECTIONS
{
. = 0x0;
.text : {
boot.o (INTERRUPT_VECTOR)
*(.text)
}
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4kB of stack memory */
stack_top = .;
PROVIDE (end = .) ;
}
Main C Program:
#include <stdio.h>
extern "C" void config_interrupt(int, int);
volatile unsigned int * const SHADOW = (unsigned int *)0x1000a000;
void sendShadow(unsigned int s)
{
*SHADOW = s;
}
int main(void)
{
config_interrupt(47,0);
unsigned int r = 1337;
while (1)
{
printf("Hello World! %d\n", r);
sendShadow(1);
}
}
void ISR(void)
{
printf("ISR");
}
Related
I am trying to apply a type of side channel attack I read about in this paper that tries to infer execution state from differences in IRQ latencies on a MCU with a cortex M4 processor. The attack carefully interrupts instructions that occur right after a branch and measures the interrupt latency. When different branches have instructions of different lengths, you can look at the interrupt latency to determine in which of these branches the interrupt occurred and leak some of the program state.
I wrote a simple function that I want to attack in the way described above. I am using the SysTick timer to generate the interrupt at the correct point in time. To get an initial good value for the interrupt timer I used GDB to stop the program at the target line to see the SysTick value at that time.
I implemented a very simple interrupt handler that
loads the SysTick timer value from memory
subtracts this value from the reload value to get the elapsed time since interrupt (i.e. the IRQ latency)
clears the interrupt and
void __attribute__((interrupt("IRQ"))) SysTick_Handler(void)
{
/* USER CODE BEGIN SysTick_IRQn 0 */
SysTick->CTRL &= 0xfffffffe; // disable SysTick (~SysTick_CTRL_ENABLE_Msk)
*timer_value = SysTick->VAL; // capture counter value (as quickly as possible)
*timer_value = SysTick->LOAD - *timer_value; // subtract it from reload value to get IRQ latency
SysTick->VAL = 0; // reset initial value
}
However I find that I always get the same IRQ latency, regardless of the instruction that was interrupted. I expect the interrupt latency to be longer when a longer instruction is interrupted.
This is the function I wrote to test the attack
extern uint32_t *timer_value;
int sample_function(int *a, int *b){
/*
* function description -- store the smallest of the two value in a, if MEASURE_CYCLESS defined return the number
* of clock cycles that have been elapsed since the timer has been started
* r0 contains pointer to a
* r1 contains pointer to b
*/
__asm volatile(
/* push working registers */
"PUSH {r4-r8} \n"
/* move counter into r8 */
"MOV r8, #10 \n"
/* begin loop */
"begin_loop: \n"
/* decrement counter variable*/
"SUB r8, r8, #1 \n"
/* if counter variable not equal to 0, jump back to start of loop */
"CMP r8, #0 \n"
/* if r8 not equal to 0, jump back to begin of loop*/
"BNE begin_loop \n"
/* load a into r2 */
"LDR r2, [r0] \n"
/* load b into r3 */
"LDR r3, [r1] \n"
/* store a-b in r4, setting status flags -- if result is 0 Z flag is set */
"SUBS r4, r2, r3 \n"
/* if a-b positive, a is larger otherwise, b is larger (assuming a not equal to b) */
"BPL a_larger \n"
#ifdef SPY
/* load address of (*timer_value) into r4 -- use of LDR pseudo-instruction places constant in a literal pool*/
"LDR r4, =timer_value \n"
/* Load (*timer_value) into r4 */
"LDR r4, [r4] \n"
/* load address of Systick VAL into r5 */
"LDR r5, =0xe000e018 \n"
/* Load value at address stored in R5 (= Systick Val) */
"LDR r5, [r5] \n"
/* Move Systick Val into adress stored at r4 (= *timer_value = address of timer_value)*/
"STR r5, [r4] \n"
#endif
"NOP \n"
/*instruction that gets interrupted -- swap value*/
"STR r2, [r1] \n"
/* load value at this address into r0 (return value) */
"STR r3, [r0] \n"
"B end \n"
"a_larger: \n"
"MOV r0, #0 \n" // instruction that gets interrupted
"end: POP {r4-r8}"
); // pop working registers
}
Note, the section of code in the #define block is used to automatically determine a good timer reload value (instead of using GDB), but I'm currently not using the value I obtained this way.
I also have an empty loop in there to delay the instruction that is meant to be interrupted a bit.
The instruction that gets interrupted is the instruction right after the #define block. When I remove the NOP instruction I still get the same interrupt latency. When I increase or decrease the timer value (to interrupt some cycles earlier or later) I also still get the same IRQ latency.
Am I missing something here? Is there some behavior I do not know about?
Also, is it important to use the attribute __attribute__((interrupt("IRQ")) for an interrupt handler?
This is what I was thinking and commenting on.
bootstrap
.thumb_func
reset:
bl notmain
ldr r4,=0xE000E018
ldr r0,=0xE000E010
mov r1,#7
str r1,[r0]
b hang
.thumb_func
hang:
nop
nop
nop
nop
nop
nop
nop
b hang
setup uart and systick
void notmain ( void )
{
uart_init();
hexstring(0x12345678);
PUT32(STK_CSR,4);
PUT32(STK_RVR,0xF40000);
PUT32(STK_CVR,0x00000000);
//PUT32(STK_CSR,7);
}
event handler
.thumb_func
.globl systick_handler
systick_handler:
ldr r0,[r4]
ldr r5,[sp,#0x18]
push {r0,lr}
bl hexstrings
mov r0,r5
bl hexstring
pop {r0,pc}
grab the timer and address of interrupted instruction and print them out.
00F3FFF4 08000054
00F3FFF4 08000056
00F3FFF4 08000058
00F3FFF4 0800005A
00F3FFF4 0800005C
00F3FFF4 0800005E
00F3FFF4 08000054
00F3FFF4 08000056
00F3FFF4 08000058
00F3FFF4 0800005A
00F3FFF4 08000050
08000050 <hang>:
8000050: bf00 nop
8000052: bf00 nop
8000054: bf00 nop
8000056: bf00 nop
8000058: bf00 nop
800005a: bf00 nop
800005c: bf00 nop
800005e: e7f7 b.n 8000050 <hang>
From ARM's documentation.
Interrupt Latency
There is a maximum of a twelve cycle latency from asserting the interrupt to execution of the first instruction of the ISR when the memory being accessed has no wait states being applied. When the FPU option is implemented and a floating point context is active and the lazy stacking is not enabled, this maximum latency is increased to twenty nine cycles. The first instructions to be executed are fetched in parallel to the stack push.
And that last line we can perhaps see happening here. You can try various instructions, but this architecture has the ability to restart the long duration instructions (reads and push/pop, multiply, and such). I think to see much of a latency difference you may need to create bus or shared resource contention (vs instructions)
Also systick is an exception not an interrupt, so there may be some differences with respect to latency.
This is code for my wdt_handler.s
.file "_wdt_handler.c"
.text
.balign 2
.global WDT
.section __interrupt_vector_11,"ax",#progbits
.word WDT
.text
.extern redrawScreen
.extern wdt_c_handler
WDT:
; start of function
; attributes: interrupt
; framesize_regs: 24
; framesize_locals: 0
; framesize_outgoing: 0
; framesize: 24
; elim ap -> fp 26
; elim fp -> sp 0
; saved regs: R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
; start of prologue
PUSH R15
PUSH R14
PUSH R13
PUSH R12
PUSH R11
PUSH R10
PUSH R9
PUSH R8
PUSH R7
PUSH R6
PUSH R5
PUSH R4
; end of prologue
CALL #wdt_c_handler
; start of epilogue
POP R4
POP R5
POP R6
POP R7
POP R8
POP R9
POP R10
POP R11
POP R12
POP R13
POP R14
POP R15
cmp #0, &redrawScreen
jz ball_no_move
and #0xffef, 0(r1) ; clear CPU off in saved SR
ball_no_move:
RETI
.size WDT, .-WDT
.local count
.comm count,1,1
.ident "GCC: (GNU) 4.9.1 20140707 (prerelease (msp430-14r1-364)) (GNUPro 14r1) (Based on: GCC 4.8 GDB 7.7 Binutils 2.24 Newlib 2.1)"
This is code for my main.c
void main()
{
P1DIR |= GREEN_LED; /**< Green led on when CPU on */
P1OUT |= GREEN_LED;
configureClocks();
lcd_init();
shapeInit();
p2sw_init(15);
buzzer_init();
shapeInit();
clearScreen(COLOR_WHITE);
enableWDTInterrupts(); /**< enable periodic interrupt */
or_sr(0x8); /**< GIE (enable interrupts) */
for (;;)
{
while (!redrawScreen)
{ /**< Pause CPU if screen doesn't need updating */
P1OUT &= ~GREEN_LED; /**< Green led off witHo CPU */
or_sr(0x10); /**< CPU OFF */
}
P1OUT |= GREEN_LED; /**< Green led on when CPU on */
redrawScreen = 0;
drawString8x12(0, 70, "Welcome to Lab", COLOR_RED);
drawString8x12(65, 75, "3", COLOR_RED);
randomDraw(50, 20, COLOR_BLACK);
drawRand(100, 150, COLOR_BLACK);
}
}
void wdt_c_handler()
{
u_int switches = p2sw_read();
static short count = 0;
P1OUT |= GREEN_LED; /**< Green LED on when cpu on */
count++;
if (count == 15)
{
stateAdvance_(); // assembly function not related to cpu
count = 0;
}
P1OUT &= ~GREEN_LED; /**< Green LED off when cpu off */
}
I understand in the wdt_handler.s we do a comparison for redraw screen if they're both zero then what line is executed next? what is the and #0xffef, 0(r1) doing?
Im struggling to understand how the cpu wakes up and sleeps, I know this is very vague but anything helps.
I am trying to compile freeRTOS for raspberry pi 2. Those are the commands I tried so far:
arm-none-eabi-gcc -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard test.c -o test.o
arm-none-eabi-as -march=armv7-a -mcpu=cortex-a7 -mfpu=neon-vfpv4
-mfloat-abi=hard startup.s -o startup.o
arm-none-eabi-ld test.o startup.o -static -Map kernel7.map -o
target.elf -T raspberrypi.ld
The two upper ones do work fine. However the last one doesn't, it gives me the following error:
startup.o: In function _start':
(.init+0x0): multiple definition of_start'
test.o::(.text+0x6c): first defined here
startup.o: In function swi_handler':
(.init+0x28): undefined reference tovPortYieldProcessor'
startup.o: In function irq_handler':
(.init+0x38): undefined reference tovFreeRTOS_ISR'
startup.o: In function zero_loop':
(.init+0xcc): undefined reference torpi_cpu_irq_disable'
This is the corresponding code:
test.c:
#include <stdio.h>
void exit(int code)
{
while(1)
;
}
int main(void)
{
return 0;
}
startup.s:
.extern system_init
.extern __bss_start
.extern __bss_end
.extern vFreeRTOS_ISR
.extern vPortYieldProcessor
.extern rpi_cpu_irq_disable
.extern main
.section .init
.globl _start
;;
_start:
;# All the following instruction should be read as:
;# Load the address at symbol into the program counter.
ldr pc,reset_handler ;# Processor Reset handler -- we will have to force this on the raspi!
;# Because this is the first instruction executed, of cause it causes an immediate branch into reset!
ldr pc,undefined_handler ;# Undefined instruction handler -- processors that don't have thumb can emulate thumb!
ldr pc,swi_handler ;# Software interrupt / TRAP (SVC) -- system SVC handler for switching to kernel mode.
ldr pc,prefetch_handler ;# Prefetch/abort handler.
ldr pc,data_handler ;# Data abort handler/
ldr pc,unused_handler ;# -- Historical from 26-bit addressing ARMs -- was invalid address handler.
ldr pc,irq_handler ;# IRQ handler
ldr pc,fiq_handler ;# Fast interrupt handler.
;# Here we create an exception address table! This means that reset/hang/irq can be absolute addresses
reset_handler: .word reset
undefined_handler: .word undefined_instruction
swi_handler: .word vPortYieldProcessor
prefetch_handler: .word prefetch_abort
data_handler: .word data_abort
unused_handler: .word unused
irq_handler: .word vFreeRTOS_ISR
fiq_handler: .word fiq
reset:
/* Disable IRQ & FIQ */
cpsid if
/* Check for HYP mode */
mrs r0, cpsr_all
and r0, r0, #0x1F
mov r8, #0x1A
cmp r0, r8
beq overHyped
b continueBoot
overHyped: /* Get out of HYP mode */
ldr r1, =continueBoot
msr ELR_hyp, r1
mrs r1, cpsr_all
and r1, r1, #0x1f ;# CPSR_MODE_MASK
orr r1, r1, #0x13 ;# CPSR_MODE_SUPERVISOR
msr SPSR_hyp, r1
eret
continueBoot:
;# In the reset handler, we need to copy our interrupt vector table to 0x0000, its currently at 0x8000
mov r0,#0x8000 ;# Store the source pointer
mov r1,#0x0000 ;# Store the destination pointer.
;# Here we copy the branching instructions
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load multiple values from indexed address. ; Auto-increment R0
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store multiple values from the indexed address. ; Auto-increment R1
;# So the branches get the correct address we also need to copy our vector table!
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Load from 4*n of regs (8) as R0 is now incremented.
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9} ;# Store this extra set of data.
;# Set up the various STACK pointers for different CPU modes
;# (PSR_IRQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD2
msr cpsr_c,r0
mov sp,#0x8000
;# (PSR_FIQ_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD1
msr cpsr_c,r0
mov sp,#0x4000
;# (PSR_SVC_MODE|PSR_FIQ_DIS|PSR_IRQ_DIS)
mov r0,#0xD3
msr cpsr_c,r0
mov sp,#0x8000000
ldr r0, =__bss_start
ldr r1, =__bss_end
mov r2, #0
zero_loop:
cmp r0,r1
it lt
strlt r2,[r0], #4
blt zero_loop
bl rpi_cpu_irq_disable
;# mov sp,#0x1000000
b main ;# We're ready?? Lets start main execution!
.section .text
undefined_instruction:
b undefined_instruction
prefetch_abort:
b prefetch_abort
data_abort:
b data_abort
unused:
b unused
fiq:
b fiq
hang:
b hang
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl dummy
dummy:
bx lr
raspberrypi.ld:
/**
* BlueThunder Linker Script for the raspberry Pi!
*
*
*
**/
MEMORY
{
RESERVED (r) : ORIGIN = 0x00000000, LENGTH = 32K
INIT_RAM (rwx) : ORIGIN = 0x00008000, LENGTH = 32K
RAM (rwx) : ORIGIN = 0x00010000, LENGTH = 128M
}
ENTRY(_start)
SECTIONS {
/*
* Our init section allows us to place the bootstrap code at address 0x8000
*
* This is where the Graphics processor forces the ARM to start execution.
* However the interrupt vector code remains at 0x0000, and so we must copy the correct
* branch instructions to 0x0000 - 0x001C in order to get the processor to handle interrupts.
*
*/
.init : {
KEEP(*(.init))
} > INIT_RAM = 0
.module_entries : {
__module_entries_start = .;
KEEP(*(.module_entries))
KEEP(*(.module_entries.*))
__module_entries_end = .;
__module_entries_size = SIZEOF(.module_entries);
} > INIT_RAM
/**
* This is the main code section, it is essentially of unlimited size. (128Mb).
*
**/
.text : {
*(.text)
} > RAM
/*
* Next we put the data.
*/
.data : {
*(.data)
} > RAM
.bss :
{
__bss_start = .;
*(.bss)
*(.bss.*)
__bss_end = .;
} > RAM
/*
__exidx_start = .;
.ARM.exidx :
{
*(.ARM.exidx* .gnu.linkonce.armexidx.*)
} > RAM
__exidx_end = .;
*/
/**
* Place HEAP here???
**/
PROVIDE(__HEAP_START = __bss_end );
/**
* Stack starts at the top of the RAM, and moves down!
**/
_estack = ORIGIN(RAM) + LENGTH(RAM);
}
As you can see test.c doesn't contain an entry point called _start, neither does it have one in its assembly compiled form. Only startup.s does.
Any idea's about how I could solve my current issue?
EDIT: all the code if needed used can be found here:https://github.com/jameswalmsley/RaspberryPi-FreeRTOS
I'm currently trying to port FreeRTOS to the TI AM335x processor, best known for being used on the BeagleBones. I am able to boot, run GPIOs and setup a compare match timer for running the system ticks. If I disable interrupts, i can see how the interrupt get set after a correct amount of time after the timer was started. And if I enable interrupts, my application dies after that same given time. The application also dies if I try to yield a task, aka calling the SWI handler. This makes me belive that the vector table is unavailable or incorrectly setup. The ROM Exception Vectors for SWI and IRQ has the contenct 4030CE08h and 4030CE18h. Which again in RAM executes some branching, the TRM says:
User code can redirect any exception to a custom handler either by writing its address to the appropriate location from 4030CE24h to 4030CE3Ch or by overriding the branch (load into PC) instruction between addresses from 4030CE04h to 4030CE1Ch.
My vIRQHandler function address is therefore written to 4030CE38h. One would hope this was enough, but sadly no. I suspect that there is something wrong in my boot.s file, however my assembly has never been that great and i'm struggling to understand the code. The boot.s and the rest of the project was started from a OMAP3 port.
Boot.s:
.section .startup,"ax"
.code 32
.align 0
b _start /* reset - _start */
ldr pc, _undf /* undefined - _undf */
ldr pc, _swi /* SWI - _swi */
ldr pc, _pabt /* program abort - _pabt */
ldr pc, _dabt /* data abort - _dabt */
nop /* reserved */
ldr pc, _irq /* IRQ - read the VIC */
ldr pc, _fiq /* FIQ - _fiq */
_undf: .word 0x4030CE24 /* undefined */
_swi: .word 0x4030CE28 /* SWI */
_pabt: .word 0x4030CE2C /* program abort */
_dabt: .word 0x4030CE30 /* data abort */
_irq: .word 0x4030CE38
_fiq: .word 0x4030CE3C /* FIQ */
The branch to start sets up a stack for each mode and clears the bss, not sure if that is relevant. This is the code which seems relevant to me, and I have updated the words to fit the AM335 instead of the OMAP3.
The setting IRQ handler:
#define E_IRQ (*(REG32 (0x4030CE38)))
....
/* Setup interrupt handler */
E_IRQ = ( long ) vIRQHandler;
If anyone have any pointers to an assembly newbie it would be much appriciated, because i'm completely stuck :)
U-boot had moved the exception vector table. However, instead of recompiling u-boot I just reset the exception vector table in my own start script.
Added this right before branching to main:
/* Set V=0 in CP15 SCTRL register - for VBAR to point to vector */
mrc p15, 0, r0, c1, c0, 0 # Read CP15 SCTRL Register
bic r0, #(1 << 13) # V = 0
mcr p15, 0, r0, c1, c0, 0 # Write CP15 SCTRL Register
/* Set vector address in CP15 VBAR register */
ldr r0, =_vector_table
mcr p15, 0, r0, c12, c0, 0 #Set VBAR
bl main
And put in the _vector_table label at the start of my exception vector table:
.section .startup,"ax"
.code 32
.align 0
_vector_table: b _start /* reset - _start */
ldr pc, _undf /* undefined - _undf */
ldr pc, _swi /* SWI - _swi */
ldr pc, _pabt /* program abort - _pabt */
ldr pc, _dabt /* data abort - _dabt */
nop /* reserved */
ldr pc, _irq /* IRQ - read the VIC */
ldr pc, _fiq /* FIQ - _fiq */
Now all the exceptions gets redirected to my code. Hopefully this will help anyone in the same situation that I was in:)
I'm working with 2 memories on my device, DDR and SRAM. The device is running a pre-OS code written in C and ARM.
I would like to conduct a DDR calibration, for that I need to copy a few functions to the SRAM, jump to it, run the calibration code, and go back to DDR when done.
In order to do so, I've modify my scatter file (.lds) so the relevant functions will be mapped to SRAM (instructions, data etc.).
After compiling the image, he is copied into the DDR and start running from there.
My problem is as follows:
How can I locate the starting address and size of these functions on DDR, so I'll be able to copy them to the SRAM and jump there?
Thanks you all in advance!
I am assuming you are talking about ARM architecture:
compile the code with __attribute__((always_inline)); on all related functions and compile with -fpic -fPIC read here for more info.
disassembling it and put it as-is on SRAM, e.g at adress 0xd1001000
reserve {r4-r15} on SRAM.
set pc to 0xd1001000 and sp properly to point the stack.
restore {r4-r15}
jump back to DDR.
You can look at here for a good resource of how to use the right gcc flags.
here is a refernce from uboot - it doesn't jump back to the initial place:
/*
* void relocate_code (addr_sp, gd, addr_moni)
*
* This "function" does not return, instead it continues in RAM
* after relocating the monitor code.
*
*/
.globl relocate_code
relocate_code:
mov r4, r0 /* save addr_sp */
mov r5, r1 /* save addr of gd */
mov r6, r2 /* save addr of destination */
/* Set up the stack */
stack_setup:
mov sp, r4
adr r0, _start
cmp r0, r6
moveq r9, #0 /* no relocation. relocation offset(r9) = 0 */
beq clear_bss /* skip relocation */
mov r1, r6 /* r1 <- scratch for copy_loop */
ldr r3, _image_copy_end_ofs
add r2, r0, r3 /* r2 <- source end address */
copy_loop:
ldmia r0!, {r9-r10} /* copy from source address [r0] */
stmia r1!, {r9-r10} /* copy to target address [r1] */
cmp r0, r2 /* until source end address [r2] */
blo copy_loop
#ifndef CONFIG_SPL_BUILD
/*
* fix .rel.dyn relocations
*/
ldr r0, _TEXT_BASE /* r0 <- Text base */
sub r9, r6, r0 /* r9 <- relocation offset */
ldr r10, _dynsym_start_ofs /* r10 <- sym table ofs */
add r10, r10, r0 /* r10 <- sym table in FLASH */
ldr r2, _rel_dyn_start_ofs /* r2 <- rel dyn start ofs */
add r2, r2, r0 /* r2 <- rel dyn start in FLASH */
ldr r3, _rel_dyn_end_ofs /* r3 <- rel dyn end ofs */
add r3, r3, r0 /* r3 <- rel dyn end in FLASH */
fixloop:
ldr r0, [r2] /* r0 <- location to fix up, IN FLASH! */
add r0, r0, r9 /* r0 <- location to fix up in RAM */
ldr r1, [r2, #4]
and r7, r1, #0xff
cmp r7, #23 /* relative fixup? */
beq fixrel
cmp r7, #2 /* absolute fixup? */
beq fixabs
/* ignore unknown type of fixup */
b fixnext
fixabs:
/* absolute fix: set location to (offset) symbol value */
mov r1, r1, LSR #4 /* r1 <- symbol index in .dynsym */
add r1, r10, r1 /* r1 <- address of symbol in table */
ldr r1, [r1, #4] /* r1 <- symbol value */
add r1, r1, r9 /* r1 <- relocated sym addr */
b fixnext
fixrel:
/* relative fix: increase location by offset */
ldr r1, [r0]
add r1, r1, r9
fixnext:
str r1, [r0]
add r2, r2, #8 /* each rel.dyn entry is 8 bytes */
cmp r2, r3
blo fixloop
b clear_bss
_rel_dyn_start_ofs:
.word __rel_dyn_start - _start
_rel_dyn_end_ofs:
.word __rel_dyn_end - _start
_dynsym_start_ofs:
.word __dynsym_start - _start
#endif /* #ifndef CONFIG_SPL_BUILD */
clear_bss:
#ifdef CONFIG_SPL_BUILD
/* No relocation for SPL */
ldr r0, =__bss_start
ldr r1, =__bss_end__
#else
ldr r0, _bss_start_ofs
ldr r1, _bss_end_ofs
mov r4, r6 /* reloc addr */
add r0, r0, r4
add r1, r1, r4
#endif
mov r2, #0x00000000 /* clear */
clbss_l:str r2, [r0] /* clear loop... */
add r0, r0, #4
cmp r0, r1
bne clbss_l
/*
* We are done. Do not return, instead branch to second part of board
* initialization, now running from RAM.
*/
jump_2_ram:
/*
* If I-cache is enabled invalidate it
*/
#ifndef CONFIG_SYS_ICACHE_OFF
mcr p15, 0, r0, c7, c5, 0 # invalidate icache
mcr p15, 0, r0, c7, c10, 4 # DSB
mcr p15, 0, r0, c7, c5, 4 # ISB
#endif
ldr r0, _board_init_r_ofs
adr r1, _start
add lr, r0, r1
add lr, lr, r9
/* setup parameters for board_init_r */
mov r0, r5 /* gd_t */
mov r1, r6 /* dest_addr */
/* jump to it ... */
mov pc, lr
_board_init_r_ofs:
.word board_init_r - _start