I have bootloader, when good jobed in lpc2xxx. But, when I copy this to lpc4078, bootloader not jump to main programm.
I tried:
1) Use #define USER_FLASH_START 0x8000
__asm void boot_jump(uint32_t address)
{
LDR SP, [R0]
LDR PC, [R0, #4]
}
main()
{
bool t = true;
while(t)
{
...WORKING CODE...
uart << "Hello, World!!!";
}
uart << "END";
__disable_irq();
__set_CONTROL(0);
__set_MSP(stack_adr);
SCB->VTOR = (USER_FLASH_START & 0x1FFFFF80);
boot_jump(USER_FLASH_START);
}
2) Use #define USER_FLASH_START 0x8000
void JumpToAppAt(unsigned int * vtbp)
{
__disable_irq();
__set_MSP(vtbp[0]); // load SP
((void (*)(void)) vtbp[1])(); // go...
}
main()
{
bool t = true;
while(t)
{
...WORKING CODE...
uart << "Hello, World!!!";
}
uart << "END";
__disable_irq();
__set_CONTROL(0);
__set_MSP(stack_adr);
SCB->VTOR = (USER_FLASH_START & 0x1FFFFF80);
JumpToAppAt((unsigned int *) USER_FLASH_START);
}
Main programm not start(.
I thought what not work MAIN_PROGRAMM and tried:
3) Use #define USER_FLASH_START 0x0, but nothing has changed.
LPC2xxx are based on ARM7 while LPC4078 is based on Cortex-M4. Regarding the reset vector, I see that you have adjusted to using the 2nd entry of the vector table. Please make sure that the addresses stored in the vector table have their LSB set to 1, i.e. bit[0] = 1, because this bit indicates Thumb instructions, and the Cortex-M4 processor only supports Thumb instructions. (reference)
Related
I'm a beginner to embedded software. I try to build my simple real time operating system kernel using C code with the ARM Cortex-M4F Based MCU Tiva C LaunchPad and run in the IAR Embedded Workbench IDE.
The system can support 3 tasks, task A blinks the red LED, task B blinks the blue LED and task C blinks the green LED. The tasks are scheduled in a round-robin way. It uses SysTick to trigger PendSV once per second for context switching.
The following code works fine to blink the LEDs as expected.
#include "include/tm4c_cmsis.h"
#include <intrinsics.h>
#define SYS_CLOCK_HZ 16000000U
#define LED_RED (1U << 1)
#define LED_BLUE (1U << 2)
#define LED_GREEN (1U << 3)
#define MAX_TASK_NUM 3
#define MAX_TASK_SIZE 0x40
int OSStack[MAX_TASK_NUM][MAX_TASK_SIZE] __attribute__ ((aligned (4)));
void task_A();
void task_B();
void task_C();
/* Task Control Block (TCB) */
typedef struct {
int *sp; /* stack pointer */
int status; // 0: does not exists, 1: created, 2: running
} OSTask;
OSTask OSTask_List[MAX_TASK_NUM];
int OS_curr; /* index of the current task */
int OS_next;
int OS_tn; // total task number
int *sp_curr;
int *sp_next;
void OSInit(){
// configure GPIOF for LED blinking
SYSCTL->RCGC2 |= (1U << 5);
GPIOF->DIR |= (1<<3)|(1<<2)|(1<<1);
GPIOF->DEN |= (1<<3)|(1<<2)|(1<<1);
SysTick->LOAD = SYS_CLOCK_HZ - 1;
SysTick->VAL = 0;
SysTick->CTRL = (1U << 2) | (1U << 1) | 1;
OS_tn = 0;
}
void OSCreateTask(void* taskH){
int n=OS_tn;
OS_tn++;
int* p = (int *) OSStack;
OSTask_List[n].sp = p + ((n+1) * MAX_TASK_SIZE);
// init the stack for each task
*(--OSTask_List[n].sp) = (1U << 24); /* xPSR */
*(--OSTask_List[n].sp) = (uint32_t)taskH; /* PC */
*(--OSTask_List[n].sp) = 0x0000000EU + n*16; /* LR */
*(--OSTask_List[n].sp) = 0x0000000CU + n*16; /* R12 */
*(--OSTask_List[n].sp) = 0x00000003U + n*16; /* R3 */
*(--OSTask_List[n].sp) = 0x00000002U + n*16; /* R2 */
*(--OSTask_List[n].sp) = 0x00000001U + n*16; /* R1 */
*(--OSTask_List[n].sp) = 0x00000000U + n*16; /* R0 */
*(--OSTask_List[n].sp) = 0x0000000BU + n*16; /* R11 */
*(--OSTask_List[n].sp) = 0x0000000AU + n*16; /* R10 */
*(--OSTask_List[n].sp) = 0x00000009U + n*16; /* R9 */
*(--OSTask_List[n].sp) = 0x00000008U + n*16; /* R8 */
*(--OSTask_List[n].sp) = 0x00000007U + n*16; /* R7 */
*(--OSTask_List[n].sp) = 0x00000006U + n*16; /* R6 */
*(--OSTask_List[n].sp) = 0x00000005U + n*16; /* R5 */
*(--OSTask_List[n].sp) = 0x00000004U + n*16; /* R4 */
}
// cannot create tasks out of order
void OSSchd(){
if (OS_curr == OS_tn-1)
OS_next = 0;
else
OS_next = OS_curr + 1;
sp_curr = OSTask_List[OS_curr].sp;
sp_next = OSTask_List[OS_next].sp;
*(volatile uint32_t *)0xE000ED04 = (1U << 28);
}
void PendSV_Handler(void) {
asm("PUSH {r4-r11}");
asm("LDR r3, =sp_curr");
asm("STR sp, [r3,#0x00]");
asm("LDR r3, =sp_next");
asm("LDR sp, [r3,#0x00]");
OS_curr = OS_next;
asm("POP {r4-r11}");
}
void SysTick_Handler(void) {
GPIOF->DATA = 0;
OSSchd();
}
void lightRed(void){
GPIOF->DATA_Bits[LED_RED] ^= LED_RED;
}
int main() {
OSInit();
OSCreateTask((void *)task_A);
OSCreateTask((void *)task_B);
OSCreateTask((void *)task_C);
__enable_interrupt();
while (1) {
}
}
void task_A() {
while (1) {
//lightRed();
GPIOF->DATA_Bits[LED_RED] ^= LED_RED;
}
}
void task_B() {
while (1) {
GPIOF->DATA_Bits[LED_BLUE] ^= LED_BLUE;
}
}
void task_C() {
while (1) {
GPIOF->DATA_Bits[LED_GREEN] ^= LED_GREEN;
}
}
However, when I try to change the code inside task_A by calling the function lightRed() as follow:
void lightRed(void){
GPIOF->DATA_Bits[LED_RED] ^= LED_RED;
}
...
void task_A() {
while (1) {
lightRed();
}
}
The three LEDs only blink for 2 cycles and no further response. I stop executing the code and the debugger shows the following problems:
: HardFault exception.
: The processor has escalated a configurable-priority exception to HardFault.
: An integrity check error has occurred on EXC_RETURN (CFSR.INVPC).
: Exception occured at PC = 0x7, LR = 0x1000000
: See the call stack for more information.
: The stack pointer for stack 'CSTACK' (currently 0x200000E0) is outside the stack range (0x20000330 to 0x20000B30)
Also, the call stack is as follow:
-> [__iar_zero_init3 + 0x39]
<Exception frame>
[__vector_table + 0x7]
How can I solve this problem?
This is, fittingly enough, definitely a case of stack overflow. The debug message you're getting is somewhat bogus; the valid stack range given (0x20000330 to 0x20000B30) is probably the configured range for the main stack, which you're not using (all of your tasks are using stacks from your OSStack array).
You've allocated 64 bytes (0x40) for each task's stack. The stack frame you've defined for a task that's suspended is already 64 bytes in size, and that's before it's done anything. If a task is running, and has used any stack space for anything whatsoever, then when it is suspended it will use an additional 64 bytes on top of whatever it is using at the time. This pretty much guarantees you a stack overflow on any nontrivial task, and in this case as soon as you introduce the function call into task_A() you'll be forcing it to use the stack.
The immediate fix is simple: just increase the value of the MAX_TASK_SIZE constant.
Incidentally, the Cortex-M4 has dual stack capability, so you can configure thread-mode code to use a different stack from handler-mode code. This can simplify analysis of stack usage and reduce the required size of task stacks, because interrupt service routines will not use the task stacks for their local storage. Your context switch can remain almost the same, but must read and write PSP to obtain and modify the task stack pointers rather than just using sp.
I'm learning how to program STM32 Nucleo F446RE board using registers.
To know the position of a register, I take from datasheets the boundary address and the offset.
However, I cannot calculate the sum of them. I show an exmaple:
volatile uint32_t *GPIOA = 0x0; // Initialization of the boundary adress
GPIOA = (uint32_t*)0x40020000; // Boundary adress from datasheet
volatile uint32_t *GPIOA_ODR = 0x0; // Initialization of GPIOA_ODR register
GPIOA_ODR = GPIOA + (uint32_t*)0x14; // Calculation of GPIOA_ODR as the sum of the boundary adress and the offset (i.e. 0x14.
Line 5 gives me an error, do you know how to calculate it correctly?
Thank you very much in advance.
It is wrong. If you want to use this extremely inconvenient way:
#define GPIOA 0x4002000
#define ODR_OFFSET 0x14
#define GPIO_ODR (*(volatile uint32_t *)(GPIOA + ODR_OFFSET))
why #define not the pointer? It is just more compiler friendly and saves one memory read.
https://godbolt.org/z/LdLLVN
#define GPIOA 0x4002000
#define ODR_OFFSET 0x14
#define GPIO_ODR (*(volatile uint32_t *)(GPIOA + ODR_OFFSET))
volatile uint32_t *pGPIO_ODR = (volatile uint32_t *)(GPIOA + ODR_OFFSET);
void foo(uint32_t x)
{
GPIO_ODR = x;
}
void bar(uint32_t x)
{
*pGPIO_ODR = x;
}
and resulting code
foo:
ldr r3, .L3
str r0, [r3, #20]
bx lr
.L3:
.word 67117056
bar:
ldr r3, .L6
ldr r3, [r3]
str r0, [r3]
bx lr
.L6:
.word .LANCHOR0
pGPIO_ODR:
.word 67117076
The cast should be outside the constant value, in other words, you are adding GPIOA address + 14 to generate a new address. So the cast must be outside them:
GPIOA_ODR = (uint32_t*)(GPIOA + 0x14);
I tried but nothing. If I insert the GPIOA_ODR = (uint32_t*)(0x40020000 + 0x14); it works, instead if I insert GPIOA_ODR = (uint32_t*)(GPIOA + 0x14); it doesn't work.
Some other ideas?
Thank you very much for the answer. The complete code I'm using is the following:
int main(int argc, char* argv[])
{
/** RCC **/
/* RCC */
volatile uint32_t *RCC = 0x0;
RCC = (uint32_t*)0x40023800;
/* RCC_AHB1ENR */
volatile uint32_t *RCC_AHB1ENR = 0x0;
RCC_AHB1ENR = (uint32_t*)(0x40023800 + 0x30);
*RCC_AHB1ENR |= 0x1;
/** GPIOA **/
/* GPIOA */
volatile uint32_t *GPIOA = 0x0;
GPIOA = (uint32_t*)0x40020000;
/* GPIOA_MODER */
volatile uint32_t *GPIOA_MODER = 0x0;
GPIOA_MODER = (uint32_t*)(0x40020000 + 0x00);
*GPIOA_MODER |= 1 << 16;
*GPIOA_MODER &= ~(0 << 17);
/* GPIOA_ODR */
volatile uint32_t *GPIOA_ODR = 0x0;
GPIOA_ODR = (uint32_t*)(GPIOA + 0x14);
*GPIOA_ODR |= 1 << 8;
}
This code doens't work correctly because of the line GPIOA_ODR = (uint32_t*)(GPIOA + 0x14);. If I insert GPIOA_ODR = (uint32_t*)(0x40020000 + 0x14) it works correctly.
I am trying to run a simple blink program on a STM32 board based on STM32F413RG. The led lights up and only toggles when stepping through, not when continuing without breakpoint or running freely on Release mode.
I have setup the eclipse(4.11) to debug the program using a J-link hardware debugger. The code uploads and the LED programmed lights up, but I can see it toggle only when manually stepping through. It does not run without breakpoints.
Sharing my code below where I have setup the clock to be sourced from PLL running on 32 MHz and trying to blink the LED connected to Port B pin 1 every 0.5 second.
One more interesting thing is, Even if I am able to set a breakpoint to see inside the delay() method, the debugger never stops at it / seems to jump across that line of code when single-stepping. Why does it so?
void setupClocks()
{
// we want to use the 24000000 HSE clock (xtal) as the base
RCC->CR |= RCC_CR_HSEON;
// so wait for it to be ready
while ((RCC->CR & RCC_CR_HSERDY) == 0) {}
enter code here
// now configure the PLL (HSE / 12 * 96 /6) gives 32 MHz
RCC->PLLCFGR |= RCC_PLLCFGR_PLLSRC_HSE;
RCC->PLLCFGR &= ~RCC_PLLCFGR_PLLM_Msk;
RCC->PLLCFGR |= (12)<< RCC_PLLCFGR_PLLM_Pos;
RCC->PLLCFGR &= ~RCC_PLLCFGR_PLLN_Msk;
RCC->PLLCFGR |= (96)<< RCC_PLLCFGR_PLLN_Pos; // 32 MHz
RCC->PLLCFGR &= ~RCC_PLLCFGR_PLLP_Msk;
RCC->PLLCFGR |= RCC_PLLCFGR_PLLP_1; // 6
RCC->CR |= RCC_CR_PLLON;
// wait for PLL to be ready
while ((RCC->CR & RCC_CR_PLLRDY) == 0) {}
// now setup the AHB1
// 1/2 of system clock (48 MHz)
RCC->CFGR |= RCC_CFGR_PPRE1_2;
// select PLL (clocked from HSE)
RCC->CFGR |= RCC_CFGR_SW_1;
//reset the ones we use
RCC->AHB1RSTR = RCC_AHB1RSTR_GPIOARST;
RCC->AHB1RSTR = RCC_AHB1RSTR_GPIOBRST;
RCC->AHB1RSTR = RCC_AHB1RSTR_GPIOCRST;
RCC->AHB1RSTR = RCC_AHB1RSTR_GPIODRST;
RCC->AHB1RSTR = 0;
SystemCoreClockUpdate();
}
void initLED()
{
// enable port B clock
RCC->AHB1ENR |= RCC_AHB1ENR_GPIOBEN;
// set as output for Port B pin 1
GPIOB->MODER |= GPIO_MODER_MODER1_0;
// start with MP10 LED on
GPIOB->ODR = GPIO_ODR_ODR_1;
}
void delay(uint32_t microsec)
{
// wait a bit
uint32_t counter = (microsec * (SystemCoreClock / 1000000U));
while(counter != 0U)
{
counter--;
}
}
void blinkCount(int count)
{
for (int i = 0; i < count; ++i)
{
GPIOB->ODR = ~GPIO_ODR_ODR_1 ;
delay(500000);
GPIOB->ODR = GPIO_ODR_ODR_1;
delay(500000);
}
delay(1000000);
}
int main()
{
setupClocks();
initLED();
while(1)
{
blinkCount(1);
delay(1000000);
}
return 0;
}
Expecting to blink the led as per desired frequency when the program is run without breakpoints or release mode, but the led activity is only visible when stepping through during debug mode.
Be consistent. If you set the clock using directly registers, do not call HAL cube generated functions like SystemCoreClockUpdate(); it is very likely the SystemCoreClock will not have the value you think it has
When doing blocking delays I advice using volatile variables as they will not be removed by the compiler. There is no need of using the 64 bits variables unless you want many minutes delays. Try to do not block. use SysTick (or any other timer) interrupt to implement delays.
example
void delay(volatile uint32_t delay)
{
while(delay--);
}
or for more precise control inline assembly:
void delay1(uint32_t delay)
{
while(delay--)
{
asm volatile("" : : "r"(delay) : "memory");
}
}
which leads to the code:
delay:
sub sp, sp, #8
str r0, [sp, #4]
.L2:
ldr r3, [sp, #4]
sub r2, r3, #1
cmp r3, #0
str r2, [sp, #4]
bne .L2
add sp, sp, #8
bx lr
delay1:
.L6:
subs r0, r0, #1
bxcc lr
b .L6
You need a small delay after enabling the GPIO clock. Put a __DSB() call between enabling the clock and accessing the GPIO registers.
See the product errata document for details.
I have been measuring clock cycle count on the cortex m4 and would now like to do it on the cortex m7. The board I use is STM32F746ZG.
For the m4 everything worked with:
volatile unsigned int *DWT_CYCCNT;
volatile unsigned int *DWT_CONTROL;
volatile unsigned int *SCB_DEMCR;
void reset_cnt(){
DWT_CYCCNT = (volatile unsigned int *)0xE0001004; //address of the register
DWT_CONTROL = (volatile unsigned int *)0xE0001000; //address of the register
SCB_DEMCR = (volatile unsigned int *)0xE000EDFC; //address of the register
*SCB_DEMCR = *SCB_DEMCR | 0x01000000;
*DWT_CYCCNT = 0; // reset the counter
*DWT_CONTROL = 0;
}
void start_cnt(){
*DWT_CONTROL = *DWT_CONTROL | 0x00000001 ; // enable the counter
}
void stop_cnt(){
*DWT_CONTROL = *DWT_CONTROL & 0xFFFFFFFE ; // disable the counter
}
unsigned int getCycles(){
return *DWT_CYCCNT;
}
The problem is that the DWT_CTRL register isn't changed when I run on the m7 and remains 0x40000000 instead of changing to 0x40000001 so the cycle count is always zero. From what I have read in other posts it seems like you need to set the FP_LAR register to 0xC5ACCE55 to be able to change DWT_CTRL.
I added these defines (have tried both FP_LAR_PTR addresses below):
#define FP_LAR_PTR ((volatile unsigned int *) 0xe0000fb0) //according to reference
//#define FP_LAR_PTR ((volatile unsigned int *) 0xe0002fb0) //according to guy on the internet
// Lock Status Register lock status bit
#define DWT_LSR_SLK_Pos 1
#define DWT_LSR_SLK_Msk (1UL << DWT_LSR_SLK_Pos)
// Lock Status Register lock availability bit
#define DWT_LSR_SLI_Pos 0
#define DWT_LSR_SLI_Msk (1UL << DWT_LSR_SLI_Pos)
// Lock Access key, common for all
#define DWT_LAR_KEY 0xC5ACCE55
and this function:
void dwt_access_enable(unsigned int ena){
volatile unsigned int *LSR;
LSR = (volatile unsigned int *) 0xe0000fb4;
uint32_t lsr = *LSR;;
//printf("LSR: %.8X - SLI MASK: %.8X\n", lsr, DWT_LSR_SLI_Msk);
if ((lsr & DWT_LSR_SLI_Msk) != 0) {
if (ena) {
//printf("LSR: %.8X - SLKMASK: %.8X\n", lsr, DWT_LSR_SLK_Msk);
if ((lsr & DWT_LSR_SLK_Msk) != 0) { //locked: access need unlock
*FP_LAR_PTR = DWT_LAR_KEY;
printf("FP_LAR directly after change: 0x%.8X\n", *FP_LAR_PTR);
}
} else {
if ((lsr & DWT_LSR_SLK_Msk) == 0) { //unlocked
*FP_LAR_PTR = 0;
//printf("FP_LAR directly after change: 0x%.8X\n", *FP_LAR_PTR);
}
}
}
}
When I call the uncommented print I get 0xC5ACCE55 but when I printed it after the return of the function I get 0x00000000 and I have no idea why. Am I on the right track or is this completely wrong?
Edit: I think it also would be good to mention that I have tried without all the extra code in the function and only tried to change the LAR register.
BR
Gustav
Looking at the docs again, I'm now incredibly suspicious of a typo or copy-paste error in the ARM TRM. 0xe0000fb0 is given as the address of ITM_LAR, DWT_LAR and FP_LSR (and equivalently for *_LSR). Since all the other ITM registers are in page 0xe0000000, it looks an awful lot like whoever was responsible for that part of the Cortex-M7 documentation took the Cortex-M4 register definitions, added the new LAR and LSR to the ITM page, then copied them to the DWT and FPB pages updating the names but overlooking to update the addresses.
I'd bet my dinner that you're unwittingly unlocking ITM_LAR (or the real FP_LAR), and DWT_LAR is actually at 0xe0001fb0.
EDIT by dwelch
Somebody owes somebody a dinner.
hexstring(GET32(0xE0001FB4));
hexstring(GET32(0xE0001000));
hexstring(GET32(0xE0001004));
hexstring(GET32(0xE0001004));
PUT32(0xE000EDFC,0x01000000);
hexstring(GET32(0xE0001FB4));
hexstring(GET32(0xE0001000));
hexstring(GET32(0xE0001004));
hexstring(GET32(0xE0001004));
PUT32(0xE0001000,0x40000001);
hexstring(GET32(0xE0001FB4));
hexstring(GET32(0xE0001000));
hexstring(GET32(0xE0001004));
hexstring(GET32(0xE0001004));
PUT32(0xE0001FB0,0xC5ACCE55);
PUT32(0xE0001000,0x40000001);
hexstring(GET32(0xE0001FB4));
hexstring(GET32(0xE0001000));
hexstring(GET32(0xE0001004));
hexstring(GET32(0xE0001004));
output
00000000
00000000
00000000
00000000
00000003
40000000
00000000
00000000
00000003
40000000
00000000
00000000
00000001
40000001
0000774F
0000B311
The table in the TRM is funny looking and as the other documentation shows you add the 0xFB0 and 0xFB4 to the base, the rest of the DWT for the Cortex-M7 is 0xE0001xxx and indeed it appears that the LAR and LSR are ate 0xE0001FB0 and 0xE0001FB4.
I would advise against creating your own register definitions when they are defined as part of the CMSIS - to do so requires that both the documentation and your interpretation of it are correct. In this case it appears that the documentation is indeed incorrect, but that the CMSIS headers are correct. It is a lot easier to validate the CMSIS headers automatically than it is to verify the documentation is correct, so I would trust the CMSIS every time.
I am not sure what register FP_LAR might refer to, but your address assignment refers to ITM_LAR, but it seems more likely that you intended DWT_LAR which Cortex-M4 lacks.
Despite my advice to trust it, CMSIS 4.00 omits to define masks for DWT_LSR/SWT_LAR, but I believe they are identical to the corresponding ITM masks.
Note also that the LAR is a write-only register - any attempt to read it is meaningless.
Your code using CMSIS would be:
#include "core_cm7.h" // Applies to all Cortex-M7
void reset_cnt()
{
CoreDebug->DEMCR |= 0x01000000;
DWT->CYCCNT = 0; // reset the counter
DWT->CTRL = 0;
}
void start_cnt()
{
DWT->CTRL |= 0x00000001 ; // enable the counter
}
void stop_cnt()
{
DWT->CTRL &= 0xFFFFFFFE ; // disable the counter
}
unsigned int getCycles()
{
return DWT->CYCCNT ;
}
// Not defined in CMSIS 4.00 headers - check if defined
// to allow for possible correction in later versions
#if !defined DWT_LSR_Present_Msk
#define DWT_LSR_Present_Msk ITM_LSR_Present_Msk
#endif
#if !defined DWT_LSR_Access_Msk
#define DWT_LSR_Access_Msk ITM_LSR_Access_Msk
#endif
#define DWT_LAR_KEY 0xC5ACCE55
void dwt_access_enable( unsigned ena )
{
uint32_t lsr = DWT->LSR;;
if( (lsr & DWT_LSR_Present_Msk) != 0 )
{
if( ena )
{
if ((lsr & DWT_LSR_Access_Msk) != 0) //locked: access need unlock
{
DWT->LAR = DWT_LAR_KEY;
}
}
else
{
if ((lsr & DWT_LSR_Access_Msk) == 0) //unlocked
{
DWT->LAR = 0;
}
}
}
}
I have some assembly codes encapsulated in a static function of my driver code. My codes is like
static int _ARMVAtoPA(void *pvAddr)
{
__asm__ __volatile__(
/* ; INTERRUPTS_OFF" */
" mrs r2, CPSR;\n" /* r2 saves current status */
"CPSID iaf;\n" /* Disable interrupts */
/*In order to handle PAGE OUT scenario, we need do the same operation
twice. In the first time, if PAGE OUT happens for the input address,
translation abort will happen and OS will do PAGE IN operation
Then the second time will succeed.
*/
"mcr p15, 0, r0, c7, c8, 0;\n "
/* ; get VA = <Rn> and run nonsecure translation
; with nonsecure privileged read permission.
; if the selected translation table has privileged
; read permission, the PA is loaded in the PA
; Register, otherwise abort information is loaded
; in the PA Register.
*/
/* read in <Rd> the PA value */
"mrc p15, 0, r1, c7, c4, 0;\n"
/* get VA = <Rn> and run nonsecure translation */
" mcr p15, 0, r0, c7, c8, 0;\n"
/* ; with nonsecure privileged read permission.
; if the selected translation table has privileged
; read permission, the PA is loaded in the PA
; Register, otherwise abort information is loaded
; in the PA Register.
*/
"mrc p15, 0, r0, c7, c4, 0;\n" /* read in <Rd> the PA value */
/* restore INTERRUPTS_ON/OFF status*/
"msr cpsr, r2;\n" /* re-enable interrupts */
"tst r0, #0x1;\n"
"ldr r2, =0xffffffff;\n"
/* if error happens,return INVALID_PHYSICAL_ADDRESS */
"movne r0, r2;\n"
"biceq r0, r0, #0xff;\n"
"biceq r0, r0, #0xf00;" /* if ok, clear the flag bits */
);
}
static unsigned long CpuUmAddrToCpuPAddr(void *pvCpuUmAddr)
{
int phyAdrs;
int mask = 0xFFF; /* low 12bit */
int offset = (int)pvCpuUmAddr & mask;
int phyAdrsReg = _ARMVAtoPA((void *)pvCpuUmAddr);
if (INVALID_PHYSICAL_ADDRESS != phyAdrsReg)
phyAdrs = (phyAdrsReg & (~mask)) + offset;
else
phyAdrs = INVALID_PHYSICAL_ADDRESS;
return phyAdrs;
}
As you can see, I tried to convert a virtual address which from user space to physical address. I'm porting this codes from another project, except I modify the _ARMVAtoPA function to static function.
When I'm using static int _ARMVAtoPA(void *pvAddr):
this convert function (which with bunch of assembly codes in it) is always return fffffff, error case for sure.
When I'm using int _ARMVAtoPA(void *pvAddr):
this convert function would working fine.
Can anyone explain to me, why results are vary when I use static and non-static function.
Thanks
The ASM code doesn't define which register holds the function argument pvAddr and which register holds the return value. It just assumes the compiler follows mips ABI.
But if the function is inlined (where probably static does), the register allocation may change, so the asm code can be totally wrong.
In order to fix the problem, you should use gcc extension to assign registers for function arguments and return value. And also declare which registers it will use w/o restore, so the compiler can restore registers after the call in case the function is inlined.