I was working on a project when I came across whit this behavior that I can't quite understand
Context
I have a function like:
float SCL_calculate(AVG_struct_type* data)
The inner workings of this function are not relevant, the function output a float number correctly (it has been fully debugged already)
I have a uint8_t global array defined like this:
char output_buff[9] = {0x0};
I'm trying to write a float number from the [1] index of this array
All this in the context of embedded systems
I'm using a STM32F411CEU6
The problem
Originally I had this code:
*( (float *) (&output_buff[1]) ) = SCL_calculate(&Voltage);
but if I tried to use this, then the UC jumped into the HardFault_Handler when trying to write into the array, but if instead I do:
float data;
data = SCL_calculate(&Voltage);
*( (float *) (&output_buff[1]) ) = data;
it works just fine.
My question
Why one way it jumps into the HardFault_Handler and the other way not?
minimal reproducible example
Here is a minimal reproducible example, I left all stm32 device configurations by default, I deleted all the compiler comments and functions to make it easier to read.
float SCL_calculate(void);
int main(void)
{
HAL_Init();
float data = SCL_calculate( );
*( (float *) (&output_buff[1]) ) = data; //NO ERROR
*( (float *) (&output_buff[1]) ) = SCL_calculate( ); //ERROR
while (1)
{
}
}
float SCL_calculate(void){
return 12.34;
}
Here is the full main.c file
/* USER CODE BEGIN Header */
/**
******************************************************************************
* #file : main.c
* #brief : Main program body
******************************************************************************
* #attention
*
* Copyright (c) 2023 STMicroelectronics.
* All rights reserved.
*
* This software is licensed under terms that can be found in the LICENSE file
* in the root directory of this software component.
* If no LICENSE file comes with this software, it is provided AS-IS.
*
******************************************************************************
*/
/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */
/* USER CODE END PTD */
/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */
/* USER CODE END PD */
/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */
/* USER CODE END PM */
/* Private variables ---------------------------------------------------------*/
/* USER CODE BEGIN PV */
/* USER CODE END PV */
/* Private function prototypes -----------------------------------------------*/
void SystemClock_Config(void);
/* USER CODE BEGIN PFP */
char output_buff[9] = {0x0};
/* USER CODE END PFP */
/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */
float SCL_calculate( void );
/* USER CODE END 0 */
/**
* #brief The application entry point.
* #retval int
*/
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MCU Configuration--------------------------------------------------------*/
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
/* USER CODE END Init */
/* Configure the system clock */
SystemClock_Config();
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
/* USER CODE BEGIN 2 */
float data = SCL_calculate( );
*( (float *) (&output_buff[1]) ) = data; //NO ERROR
*( (float *) (&output_buff[1]) ) = SCL_calculate( ); //ERROR
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
}
/* USER CODE END 3 */
}
/**
* #brief System Clock Configuration
* #retval None
*/
void SystemClock_Config(void)
{
RCC_OscInitTypeDef RCC_OscInitStruct = {0};
RCC_ClkInitTypeDef RCC_ClkInitStruct = {0};
/** Configure the main internal regulator output voltage
*/
__HAL_RCC_PWR_CLK_ENABLE();
__HAL_PWR_VOLTAGESCALING_CONFIG(PWR_REGULATOR_VOLTAGE_SCALE1);
/** Initializes the RCC Oscillators according to the specified parameters
* in the RCC_OscInitTypeDef structure.
*/
RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI;
RCC_OscInitStruct.HSIState = RCC_HSI_ON;
RCC_OscInitStruct.HSICalibrationValue = RCC_HSICALIBRATION_DEFAULT;
RCC_OscInitStruct.PLL.PLLState = RCC_PLL_NONE;
if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
{
Error_Handler();
}
/** Initializes the CPU, AHB and APB buses clocks
*/
RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK
|RCC_CLOCKTYPE_PCLK1|RCC_CLOCKTYPE_PCLK2;
RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_HSI;
RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1;
RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1;
RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV1;
if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_0) != HAL_OK)
{
Error_Handler();
}
}
/* USER CODE BEGIN 4 */
float SCL_calculate( void ){
return 12.34; //random number
}
/* USER CODE END 4 */
/**
* #brief This function is executed in case of error occurrence.
* #retval None
*/
void Error_Handler(void)
{
/* USER CODE BEGIN Error_Handler_Debug */
/* User can add his own implementation to report the HAL error return state */
__disable_irq();
while (1)
{
}
/* USER CODE END Error_Handler_Debug */
}
#ifdef USE_FULL_ASSERT
/**
* #brief Reports the name of the source file and the source line number
* where the assert_param error has occurred.
* #param file: pointer to the source file name
* #param line: assert_param error line source number
* #retval None
*/
void assert_failed(uint8_t *file, uint32_t line)
{
/* USER CODE BEGIN 6 */
/* User can add his own implementation to report the file name and line number,
ex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */
/* USER CODE END 6 */
}
#endif /* USE_FULL_ASSERT */
Why one way it jumps into the HardFault_Handler and the other way not?
Let's compile the code:
output_buff:
main:
push {r4, r7, lr}
sub sp, sp, #12
add r7, sp, #0
bl SCL_calculate
vstr.32 s0, [r7, #4]
ldr r2, .L3
ldr r3, [r7, #4] # float
str r3, [r2] # float
ldr r4, .L3
bl SCL_calculate
vmov.f32 s15, s0
vstr.32 s15, [r4]
.L2:
b .L2
.L3:
.word output_buff+1
SCL_calculate:
push {r7}
add r7, sp, #0
ldr r3, .L7
vmov s15, r3
vmov.f32 s0, s15
mov sp, r7
ldr r7, [sp], #4
bx lr
.L7:
.word 1095069860
First store is using str instruction which does not require aligned access.
str r3, [r2] # float
The latter is using FPU instruction vstr.32 (I assume standard Cube settings) and FPU instructions require aligned access.
vstr.32 s15, [r4]
That is the reason why the first one works, and the second does not. It can only happen if you do not enable the optimizations (-O3 version below):
main:
ldr r3, .L4
ldr r2, .L4+4
str r2, [r3, #1] # unaligned
.L2:
b .L2
.L4:
.word .LANCHOR0
.word 1095069860
SCL_calculate:
vldr.32 s0, .L7
bx lr
.L7:
.word 1095069860
output_buff:
How to prevent problems? Simply do not use pointer punning.
#define STORE(dest, src, type) do {type temp; temp = (src); memcpy(&(dest), &(temp), sizeof(temp));}while(0)
float SCL_calculate(void);
uint8_t output_buff[100];
int main(void)
{
float data = SCL_calculate( );
STORE(output_buff[1], data, float); //NO ERROR
STORE(output_buff[1], SCL_calculate(), float);
while (1)
{
}
}
float SCL_calculate(void){
return 12.34;
}
Calls to memcpy will be optimized out even if optimizations are mot enabled.
output_buff:
main:
push {r7, lr}
sub sp, sp, #16
add r7, sp, #0
bl SCL_calculate
vstr.32 s0, [r7, #12]
ldr r3, [r7, #12] # float
str r3, [r7, #8] # float
ldr r3, [r7, #8]
ldr r2, .L3
str r3, [r2, #1] # unaligned
bl SCL_calculate
vmov.f32 s15, s0
vstr.32 s15, [r7, #4]
ldr r3, [r7, #4]
ldr r2, .L3
str r3, [r2, #1] # unaligned
.L2:
b .L2
.L3:
.word output_buff
SCL_calculate:
push {r7}
add r7, sp, #0
ldr r3, .L7
vmov s15, r3
vmov.f32 s0, s15
mov sp, r7
ldr r7, [sp], #4
bx lr
.L7:
.word 1095069860
https://godbolt.org/z/37nY8Wbe9
Using memcpy will prevent another problem. If you port the code to for example Cortex-M0 it will actually call memcpy or will use byte size instructions as this core requires aligned access.
https://godbolt.org/z/9eo89anqa
You're writing a float (32-bit) into the second ([1]) element of char (8-bit) array. So if your array started at address 0x20000000, then you're writing a 32-bit (4-byte) value into 0x20000001. This is an alignment issue. You can write a 32-bit data block only to 32-bit aligned memory address, 0x20000000 or 0x20000004 or 0x20000008 and so on. Similarly, a 16-bit value should be 2-byte aligned and should not be partially in one 32-bit group and partially in another one.
Related
I'm trying to add some logic at boundaries between userspace and kernelspace particularly on the ARM architecture.
One such boundary appears to be the vector_swi routine implemented in arch/arm/kernel/entry-common.S. Right now, I have most of my code written in a C function which I would like to call somewhere at the start of vector_swi.
Thus, I did the following:
ENTRY(vector_swi)
sub sp, sp, #S_FRAME_SIZE
stmia sp, {r0 - r12} # Calling r0 - r12
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) # Calling sp, lr
THUMB( mov r8, sp )
THUMB( store_user_sp_lr r8, r10, S_SP ) # calling sp, lr
mrs r8, spsr # called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] # Save calling PC
str r8, [sp, #S_PSR] # Save CPSR
str r0, [sp, #S_OLD_R0] # Save OLD_R0
zero_fp
#ifdef CONFIG_BTM_BOUNDARIES
bl btm_entering_kernelspace # <--- My function
#endif
When the contents of my function are as follows everything works fine:
static int btm_enabled = 0;
asmlinkage inline void btm_entering_kernelspace(void)
{
int cpu;
int freq;
struct acpu_level *level;
if(!btm_enabled) {
return;
}
cpu = smp_processor_id();
freq = acpuclk_krait_get_rate(cpu);
(void) cpu;
(void) freq;
(void) level;
}
However, when I add some additional code, the kernel enters into a crash-reboot loop.
static int btm_enabled = 0;
asmlinkage inline void btm_entering_kernelspace(void)
{
int cpu;
int freq;
struct acpu_level *level;
if(!btm_enabled) {
return;
}
cpu = smp_processor_id();
freq = acpuclk_krait_get_rate(cpu);
(void) cpu;
(void) freq;
(void) level;
// --------- Added code ----------
for (level = drv.acpu_freq_tbl; level->speed.khz != 0; level++) {
if(level->speed.khz == freq) {
break;
}
}
}
Although the first instinct is to blame the logic of the added code, please note that none of it should ever execute since btm_enabled is 0.
I have double-checked and triple-checked to make sure btm_enabled is 0 by adding a sysfs entry to print out the value of the variable (with the added code removed).
Could someone explain what is going on here or what I'm doing wrong?
The first version will probably compile to just a return instruction as it has no side effect. The second needs to load btm_enabled and in the process overwrites one or two system call arguments.
When calling a C function from assembly language you need to ensure that registers that may be modified do not contain needed information.
To solve your specific problem, you could update your code to read:
#ifdef CONFIG_BTM_BOUNDARIES
stmdb sp!, {r0-r3, r12, lr} # <--- New instruction
bl btm_entering_kernelspace # <--- My function
ldmia sp!, {r0-r3, r12, lr} # <--- New instruction
#endif
The new instructions store registers r0-r3, r12 and lr onto the stack and restore them after your function call. These are the only registers a C function is allowed to modify, saving r12 here is unnecessary here is it's value is not used, but doing so keeps the stack 8-byte aligned as required by the ABI.
I have created a simple example program with the Xilinx SDK that has FreeRTOS and I am running into an issue which seems quite unexpected. I want to fire an software interrupt and so I have set up the code this way.
void software_test( void ) __attribute__((interrupt_handler));
void software_test( void )
{
// clear the interrupt
*((volatile uint32_t *) 0x4120000C) = 0x80;
interrupt_occurred++;
}
When I try to compile it complains about:
\interrupt_example_bsp\microblaze_0\libsrc\freertos823_xilinx_v1_1\src/portasm.S:288: multiple definition of `_interrupt_handler'
./src/freertos_hello_world.o:\Debug/../src/freertos_hello_world.c:130: first defined here
I checked portasm.S and it has the following code in it:
.global _interrupt_handler
... bunch more unreleated code here
.text
.align 4
_interrupt_handler:
portSAVE_CONTEXT
/* Stack the return address. */
swi r14, r1, portR14_OFFSET
/* Switch to the ISR stack. */
lwi r1, r0, pulISRStack
/* The parameter to the interrupt handler. */
ori r5, r0, configINTERRUPT_CONTROLLER_TO_USE
/* Execute any pending interrupts. */
bralid r15, XIntc_DeviceInterruptHandler
or r0, r0, r0
/* See if a new task should be selected to execute. */
lwi r18, r0, ulTaskSwitchRequested
or r18, r18, r0
/* If ulTaskSwitchRequested is already zero, then jump straight to
restoring the task that is already in the Running state. */
beqi r18, task_switch_not_requested
/* Set ulTaskSwitchRequested back to zero as a task switch is about to be
performed. */
swi r0, r0, ulTaskSwitchRequested
/* ulTaskSwitchRequested was not 0 when tested. Select the next task to
execute. */
bralid r15, vTaskSwitchContext
or r0, r0, r0
... bunch more code here
I am unclear how to fix this, has anyone else encountered this.
Any help is greatly appreciated. Thanks in advance.
Here is some information on implementing a Microblaze ISR using FreeRTOS: http://www.freertos.org/RTOS-Xilinx-Microblaze-KC705.html#implementing_an_ISR
I try to program Cortex-A9 in a bare metal fashion. I use the 'hello world' code from:
https://github.com/tukl-msd/gem5.bare-metal which works. However, I'm not able to get interrupts working. When I create an Interrupt with Interrupt e.g. #47 my software doesn't jump in the ISR function. What I am missing? Do I have to do some more initialization?
Startup Code:
.section INTERRUPT_VECTOR, "x"
.global _Reset
_Reset:
B Reset_Handler /* Reset */
B . /* Undefined */
B . /* SWI */
B . /* Prefetch Abort */
B . /* Data Abort */
B . /* reserved */
B irq_handler /* IRQ */
B irq_handler /* FIQ */
// Some Definitions for GIC:
.equ GIC_DIST, 0x10041000
.equ GIC_CPU , 0x10040000
// GIC Definitions for CPU interface
.equ ICCICR , 0x00
.equ ICCPMR , 0x04
.equ ICCEOIR , 0x10
.equ ICCIAR , 0x0C
// GIC Definitions for Distributor interface
.equ ICDDCR , 0x00
.equ ICDISER , 0x100
.equ ICDIPTR , 0x800
// Other Definitions
.equ USR_MODE , 0x10
GIC_dist_base : .word 0 // address of GIC distributor
GIC_cpu_base : .word 0 // address of GIC CPU interface
Reset_Handler:
LDR sp, =stack_top
// Enable Interrupts on CPU Side:
MRS r1, cpsr // get the cpsr.
BIC r1, r1, #0x80 // enable IRQ (ORR to disable).
MSR cpsr_c, r1 // copy it back, control field bit update.
// Configure GIC:
BL IC_init
// Branch to C code
BL main
B .
// Initialize GIC
.global GIC_init
IC_init:
stmfd sp!,{lr}
// Read GIC base from Configuration Base Address Register
// And use it to initialize GIC_dist_base and GIC_cpu_base
//mrc p15, 4, r0, c15, c0, 0
//add r2, r0, #GIC_DIST // Calculate address
ldr r2, =GIC_DIST
ldr r1, =GIC_dist_base
str r2,[r1] // Store address of GIC distributor
//add r2, r0, #GIC_CPU // Calculate address
ldr r2, =GIC_CPU
ldr r1, =GIC_cpu_base
str r2,[r1] // Store address of GIC CPU interface
// Register (ICCPMR) to enable interrutps of all priorities
ldr r1,=0xFFFF
ldr r2,=GIC_dist_base
str r1,[r2,#ICCPMR]
// Set the enable bit in the CPU interface control register
// ICCICR, allowing CPU(s) to receive interrupts
mov r1,#1
str r1,[r2,#ICCICR]
// Set the enable bit in the distributor control register
// ICDDCR, allowing interrpupts to be generated
ldr r2,=GIC_dist_base
ldr r2,[r2] // Nase address of distributor
mov r1, #1
str r1,[r2,#ICDDCR]
ldmfd sp!,{pc}
//config_interrupt (int ID , int CPU);
.global config_interrupt
config_interrupt:
stmfd sp!,{r4-r5, lr}
// Cinfigure the distributor interrupt set-enable registers (ICDISERn)
// enable the intterupt
// reg_offset = (M/32)*4 (shift and clear some bits)
// value = 1 << (N mod 32);
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2,#ICDISER // r2 <- base address of ICDSER regs
lsr r4,r0,#3 // clculate reg_offset
bic r4,r4,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDISERn
// Create a bit mask
and r2,r0,#0x1F // r2 <- N mod 32
mov r5,#1 // need to set one bit
lsl r2,r5,r2 // r2 <- value
// Using address in r4 and value in r2 to set the correct bit in the GIC register
ldr r3,[r4] // read ICDISERn
orr r3, r3, r2 // set the enable bit
str r3,[r4] // store the new register value
// Configure the distributor interrupt processor targets register (ICDIPTRn)
// select target CPU(s)
// reg_offset = (N/4)*4 (clear 2 bottom bits)
// index = N mod 4;
ldr r2,=GIC_dist_base
ldr r2,[r2] // Read GIC distributor base address
add r2,r2, #ICDIPTR // base address of ICDIPTR regs
bic r4,r0,#3 // r4 <- reg_offset
add r4,r2,r4 // r4 <- address of ICDIPTRn
// Get the address of th ebyte wihtih ICDIPTRn
and r2,r0,#0x3 // r2 <- index
add r4,r2,r4 // r4 <- byte address to be set
strb r1,[r4]
ldmfd sp!, {r4-r5, lr}
// int get_inLerrupt_number();
// Get the interrupt ID for the current interrupt. This should be called al the
// beginning of ISR. It also changes the state of the interrupt from pending to
// active, which helps to prevent other CPUs from trying to handle it.
.global get_interrupt_number
get_intterrupt_number:
// Read the JCCIAR from the CPU Interface
ldr r0,=GIC_cpu_base
ldr r0,[r0]
ldr r0,[r0,#ICCIAR]
mov pc,lr
// void end_of_interrupt (int ID);
// Notify the GIC that the interrupt has been processed. The state goes from
// active to inactive, or it goes from active and pending to pending.
.global end_of_interrupt
end_of_interrupt:
ldr r1,=GIC_cpu_base
ldr r1,[r1]
str r0,[r1,#ICCEOIR]
mov pc, lr
// IRQ Handler that calls the ISR function in C
.global irq_handler
irq_handler:
stmfd sp!,{r0-r7, lr}
// Call Interrupt Service Routine in C:
bl ISR
ldmfd sp!, {r0-r7, lr}
// Must substract 4 from lr
subs pc, lr, #4
Linker Script:
ENTRY(_Reset)
SECTIONS
{
. = 0x0;
.text : {
boot.o (INTERRUPT_VECTOR)
*(.text)
}
.data : { *(.data) }
.bss : { *(.bss COMMON) }
. = ALIGN(8);
. = . + 0x1000; /* 4kB of stack memory */
stack_top = .;
PROVIDE (end = .) ;
}
Main C Program:
#include <stdio.h>
extern "C" void config_interrupt(int, int);
volatile unsigned int * const SHADOW = (unsigned int *)0x1000a000;
void sendShadow(unsigned int s)
{
*SHADOW = s;
}
int main(void)
{
config_interrupt(47,0);
unsigned int r = 1337;
while (1)
{
printf("Hello World! %d\n", r);
sendShadow(1);
}
}
void ISR(void)
{
printf("ISR");
}
I am not sure what I am doing something in wrong way, essentially I would like to get readable assembly, intermixed with C calls.
Here is some example code:
example.cu:
#include <stdio.h>
__global__ void kernel()
{
unsigned long a, b, c;
a = 255;
b = 10;
c = a + b;
}
int main(void)
{
cudaFree(0);
kernel<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
As I looked into cuobjdump -h (emphasis mine):
--dump-sass (-sass)
Dump assembly for all listed device functions. Cuda source is
intermixed with the listed assembly in case option -G was specified to
nvcc during compilation, and if the source files can still be found.
I compile it with (thus example.cubin file is created):
nvcc -G -cubin -arch=sm_30 --ptxas-options=-v example.cu
Then I run:
cuobjdump -sass --function _Z6kernelv example.cubin
The output contains assembly instruction, but I see no C code anywhere:
code for sm_30
Function : _Z6kernelv
.headerflags #"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/*0000*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0008*/ ISUB R1, R1, 0x8; /* 0x4800c00020105d03 */
/*0010*/ S2R R0, SR_LMEMHIOFF; /* 0x2c000000dc001c04 */
/*0018*/ ISETP.GE.AND P0, PT, R1, R0, PT; /* 0x1b0e00000011dc23 */
/*0020*/ #P0 BRA 0x30; /* 0x40000000200001e7 */
/*0028*/ BPT.TRAP 0x1; /* 0xd00000000400c007 */
/*0030*/ IADD R0, R1, RZ; /* 0x48000000fc101c03 */
/*0038*/ MOV R2, R0; /* 0x2800000000009de4 */
/*0040*/ MOV R3, RZ; /* 0x28000000fc00dde4 */
/*0048*/ MOV R2, R2; /* 0x2800000008009de4 */
/*0050*/ MOV R3, R3; /* 0x280000000c00dde4 */
/*0058*/ MOV R4, c[0x0][0x24]; /* 0x2800400090011de4 */
/*0060*/ MOV R5, RZ; /* 0x28000000fc015de4 */
/*0068*/ IADD R2.CC, R2, R4; /* 0x4801000010209c03 */
/*0070*/ IADD.X R3, R3, R5; /* 0x480000001430dc43 */
/*0078*/ MOV32I R4, 0xff; /* 0x18000003fc011de2 */
/*0080*/ MOV R5, RZ; /* 0x28000000fc015de4 */
/*0088*/ MOV R4, R4; /* 0x2800000010011de4 */
I haven't found any option to tell explicitely where example.cu is located (it is in the same directory though). OTOH Nsight Eclipse Edition with the same code is clearly able to display SASS with C code (within debugging session in Dissassembly window):
It's not possible, currently, using cuobjdump. The referenced cuobjdump documentation/command line help is in error.
I'm working with 2 memories on my device, DDR and SRAM. The device is running a pre-OS code written in C and ARM.
I would like to conduct a DDR calibration, for that I need to copy a few functions to the SRAM, jump to it, run the calibration code, and go back to DDR when done.
In order to do so, I've modify my scatter file (.lds) so the relevant functions will be mapped to SRAM (instructions, data etc.).
After compiling the image, he is copied into the DDR and start running from there.
My problem is as follows:
How can I locate the starting address and size of these functions on DDR, so I'll be able to copy them to the SRAM and jump there?
Thanks you all in advance!
I am assuming you are talking about ARM architecture:
compile the code with __attribute__((always_inline)); on all related functions and compile with -fpic -fPIC read here for more info.
disassembling it and put it as-is on SRAM, e.g at adress 0xd1001000
reserve {r4-r15} on SRAM.
set pc to 0xd1001000 and sp properly to point the stack.
restore {r4-r15}
jump back to DDR.
You can look at here for a good resource of how to use the right gcc flags.
here is a refernce from uboot - it doesn't jump back to the initial place:
/*
* void relocate_code (addr_sp, gd, addr_moni)
*
* This "function" does not return, instead it continues in RAM
* after relocating the monitor code.
*
*/
.globl relocate_code
relocate_code:
mov r4, r0 /* save addr_sp */
mov r5, r1 /* save addr of gd */
mov r6, r2 /* save addr of destination */
/* Set up the stack */
stack_setup:
mov sp, r4
adr r0, _start
cmp r0, r6
moveq r9, #0 /* no relocation. relocation offset(r9) = 0 */
beq clear_bss /* skip relocation */
mov r1, r6 /* r1 <- scratch for copy_loop */
ldr r3, _image_copy_end_ofs
add r2, r0, r3 /* r2 <- source end address */
copy_loop:
ldmia r0!, {r9-r10} /* copy from source address [r0] */
stmia r1!, {r9-r10} /* copy to target address [r1] */
cmp r0, r2 /* until source end address [r2] */
blo copy_loop
#ifndef CONFIG_SPL_BUILD
/*
* fix .rel.dyn relocations
*/
ldr r0, _TEXT_BASE /* r0 <- Text base */
sub r9, r6, r0 /* r9 <- relocation offset */
ldr r10, _dynsym_start_ofs /* r10 <- sym table ofs */
add r10, r10, r0 /* r10 <- sym table in FLASH */
ldr r2, _rel_dyn_start_ofs /* r2 <- rel dyn start ofs */
add r2, r2, r0 /* r2 <- rel dyn start in FLASH */
ldr r3, _rel_dyn_end_ofs /* r3 <- rel dyn end ofs */
add r3, r3, r0 /* r3 <- rel dyn end in FLASH */
fixloop:
ldr r0, [r2] /* r0 <- location to fix up, IN FLASH! */
add r0, r0, r9 /* r0 <- location to fix up in RAM */
ldr r1, [r2, #4]
and r7, r1, #0xff
cmp r7, #23 /* relative fixup? */
beq fixrel
cmp r7, #2 /* absolute fixup? */
beq fixabs
/* ignore unknown type of fixup */
b fixnext
fixabs:
/* absolute fix: set location to (offset) symbol value */
mov r1, r1, LSR #4 /* r1 <- symbol index in .dynsym */
add r1, r10, r1 /* r1 <- address of symbol in table */
ldr r1, [r1, #4] /* r1 <- symbol value */
add r1, r1, r9 /* r1 <- relocated sym addr */
b fixnext
fixrel:
/* relative fix: increase location by offset */
ldr r1, [r0]
add r1, r1, r9
fixnext:
str r1, [r0]
add r2, r2, #8 /* each rel.dyn entry is 8 bytes */
cmp r2, r3
blo fixloop
b clear_bss
_rel_dyn_start_ofs:
.word __rel_dyn_start - _start
_rel_dyn_end_ofs:
.word __rel_dyn_end - _start
_dynsym_start_ofs:
.word __dynsym_start - _start
#endif /* #ifndef CONFIG_SPL_BUILD */
clear_bss:
#ifdef CONFIG_SPL_BUILD
/* No relocation for SPL */
ldr r0, =__bss_start
ldr r1, =__bss_end__
#else
ldr r0, _bss_start_ofs
ldr r1, _bss_end_ofs
mov r4, r6 /* reloc addr */
add r0, r0, r4
add r1, r1, r4
#endif
mov r2, #0x00000000 /* clear */
clbss_l:str r2, [r0] /* clear loop... */
add r0, r0, #4
cmp r0, r1
bne clbss_l
/*
* We are done. Do not return, instead branch to second part of board
* initialization, now running from RAM.
*/
jump_2_ram:
/*
* If I-cache is enabled invalidate it
*/
#ifndef CONFIG_SYS_ICACHE_OFF
mcr p15, 0, r0, c7, c5, 0 # invalidate icache
mcr p15, 0, r0, c7, c10, 4 # DSB
mcr p15, 0, r0, c7, c5, 4 # ISB
#endif
ldr r0, _board_init_r_ofs
adr r1, _start
add lr, r0, r1
add lr, lr, r9
/* setup parameters for board_init_r */
mov r0, r5 /* gd_t */
mov r1, r6 /* dest_addr */
/* jump to it ... */
mov pc, lr
_board_init_r_ofs:
.word board_init_r - _start