everyone, I am a beginner of embedded system programming, my first led test program for my board, which is mini2440, was written in assembly language as .S file:
.text
.global _start
_start:
bl disable_watch_dog
ldr r0, =0x56000010
mov r1, #0x15400
str r1, [r0]
ldr r0, =0x56000018
mov r1, #0x0
str r1, [r0]
ldr r0, =0x56000014
MAIN_LOOP:
mov r1, #0x0
str r1, [r0]
mov r2, #0x50000
bl delay
mov r1, #0x1e0
str r1, [r0]
mov r2, #0x50000
bl delay
b MAIN_LOOP
disable_watch_dog:
ldr r0, =0x53000000
mov r1, #0x0
str r1, [r0]
mov pc, r14
delay:
sub r2, r2, #0x1
cmp r2, #0x0
bne delay
mov pc, lr
After the compiling I got .bin file from this program and I wrote it to the band flash of mini2440 with opened:
telnet localhost 4444
halt
init_2440
nand erase 0 0x0 0x100000
nand write 0 /home/led/led.bin 0
reset
I did not see any error or warning from the terminal but the board just has no reaction, all the leds were dark. I appreciate it if anyone could explain me about this.
Thanks
Related
This is supposed to output the contents of each line in arm assembly. Though line 18 add r4, r5, r4, lsl #1 isn't being outputted correctly and I am not sure why.
.data
str1: .asciz "%d and %d are the results \n"
n: .word word 1
.text
.global main
main: stmfd sp!, {lr}
ldr r4,=n
ldr r4, [r4]
add r4,r4, #1
mov r1, r4
ldr r0, =str1
bl printf
mov r5, r4
mov r1, r4
mov r2, r5
ldr r0, =str1
bl printf
add r4, r5, r4, lsl #1
mov r1, r4
ldr r0, = str1
bl printf
ldmfd sp!, {lr}
mov r0, #0
mov PC, or
.end
I am using IAR to compile routines, but run error on ARM A7; then i got the question below when i open the .lst file generated by IAR.
It is a ISR, first push {r3, r4, r5, lr}, but POP {r0, r4, r5, lr} when return, the R0 value is changed to the value of R3 before push. So R0 is wrong when returned from irqHandler which lead to error in follow routines.
why ?
void irqHandler(void)
{
878: e92d4038 push {r3, r4, r5, lr}
volatile u32 *pt = (u32 *)AM_INTC_BASE;
87c: e3a044b0 mov r4, #176, 8 ; 0xb0000000
u32 id_spin;
id_spin = *(pt+0x200c/4) & 0x3ff;
880: e302000c movw r0, #8204 ; 0x200c
884: e7900004 ldr r0, [r0, r4]
888: e1b00b00 lsls r0, r0, #22
88c: e1b00b20 lsrs r0, r0, #22
890: e1b05000 movs r5, r0
if(id_spin<32)
894: e3550020 cmp r5, #32
898: 2a000000 bcs 8a0 <irqHandler+0x28>
{
#ifdef WHOLECHIPSIM
print("id_spid<32 error...\r\n",0);
#endif
while(1);
89c: eafffffe b 89c <irqHandler+0x24>
}
else
{
(pFuncIrq[id_spin-32])();
8a0: e59f0010 ldr r0, [pc, #16] ; 8b8 <.text_8>
8a4: e1b01105 lsls r1, r5, #2
8a8: e0910000 adds r0, r1, r0
8ac: e5100080 ldr r0, [r0, #-128] ; 0x80
8b0: e12fff30 blx r0
}
}
8b4: e8bd8031 pop {r0, r4, r5, pc}
The abi requires a 64 bit aligned stack, so the push of r3 simply facilitates that. Could have chosen any register not already specified. Likewise on the pop they need to clean up the stack the function is prototyped as void so the return (r0) is a dont care and r0-r3 are not expected to be preserved so no reason to match the r3 on each end nor match an r0 on each end.
had they chose a register numbered above r3 (r6 for example) on the push then that would have needed to be matched on the pop. Otherwise the pop would have to be one of r0-r3 to not trash a non-volatile register. (couldnt push r3 then pop r6 that would trash r6)
It does not matter as R0-R3, R12, LR, PC, xPSR are saved on the stack automaticly when the hardware invokes the interrupt vector routine. When bx, ldm, pop, or ldr with PC is invoked hardware executes interrupt routine exit poping those registers.
Do not check your compiler. It knows what it does. Check tour wrong logic - especially printing strings in the interrupt handler.
assemble code with the keyword __irq __arm is below:
__irq __arm void irqHandler(void)
{
878: e24ee004 sub lr, lr, #4
87c: e92d503f push {r0, r1, r2, r3, r4, r5, ip, lr}
volatile u32 *pt = (u32 *)AM_INTC_BASE;
880: e3a044b0 mov r4, #176, 8 ; 0xb0000000
u32 id_spin;
id_spin = *(pt+0x200c/4) & 0x3ff;
884: e302000c movw r0, #8204 ; 0x200c
888: e7900004 ldr r0, [r0, r4]
88c: e1b00b00 lsls r0, r0, #22
890: e1b00b20 lsrs r0, r0, #22
894: e1b05000 movs r5, r0
if(id_spin<32)
898: e3550020 cmp r5, #32
89c: 2a000000 bcs 8a4 <irqHandler+0x2c>
{
#ifdef WHOLECHIPSIM
print("id_spid<32 error...\r\n",0);
#endif
while(1);
8a0: eafffffe b 8a0 <irqHandler+0x28>
}
else
{
(pFuncIrq[id_spin-32])();
8a4: e59f0010 ldr r0, [pc, #16] ; 8bc <.text_8>
8a8: e1b01105 lsls r1, r5, #2
8ac: e0910000 adds r0, r1, r0
8b0: e5100080 ldr r0, [r0, #-128] ; 0x80
8b4: e12fff30 blx r0
}
}
8b8: e8fd903f ldm sp!, {r0, r1, r2, r3, r4, r5, ip, pc}^
Cortex A7 PUSH log ,it just push 7 register, so 32bit aligned is ok
follow link is the log info:
http://img.blog.csdn.net/20170819120758443?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvcmFpbmJvd2JpcmRzX2Flcw==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center
I am trying to build a binary translator for arm bear metal compiled code and I try to verify proper execution flow by comparing it to that of qemu-arm. I use the following command to dump the program flow:
qemu-arm -d in_asm,cpu -singlestep -D a.flow a.out
I noticed something strange, where the program seems to jump to an irrelevant instruction, since 0x000080b4 is not the branch nor the next instruction following 0x000093ec.
0x000093ec: 1afffff9 bne 0x93d8
R00=00000000 R01=00009c44 R02=00000002 R03=00000000
R04=00000001 R05=0001d028 R06=00000002 R07=00000000
R08=00000000 R09=00000000 R10=0001d024 R11=00000000
R12=f6ffed88 R13=f6ffed88 R14=000093e8 R15=000093ec
PSR=20000010 --C- A usr32
R00=00000000 R01=00009c44 R02=00000002 R03=00000000
R04=00000001 R05=0001d028 R06=00000002 R07=00000000
R08=00000000 R09=00000000 R10=0001d024 R11=00000000
R12=f6ffed88 R13=f6ffed88 R14=000093e8 R15=000093d8
PSR=20000010 --C- A usr32
----------------
IN:
0x000080b4: e59f3060 ldr r3, [pc, #96] ; 0x811c
The instruction that actually executes corresponds to the beggining of the <frame_dummy> tag in the disassembly. Can someone explain what actually happens within the emulator and is this behavior normal in the ARM architecture? The program was compiled with: arm-none-eabi-gcc --specs=rdimon.specs a.c
Here is the same segment of the program flow without the CPU state:
0x0000804c: e59f3018 ldr r3, [pc, #24] ; 0x806c
0x00008050: e3530000 cmp r3, #0 ; 0x0
0x00008054: 01a0f00e moveq pc, lr
----------------
IN: __libc_init_array
0x000093e8: e1560004 cmp r6, r4
0x000093ec: 1afffff9 bne 0x93d8
----------------
IN:
0x000080b4: e59f3060 ldr r3, [pc, #96] ; 0x811c
0x000080b8: e3530000 cmp r3, #0 ; 0x0
0x000080bc: 0a000009 beq 0x80e8
This is the disassembly of this part:
93d4: 0a000005 beq 93f0 <__libc_init_array+0x68>
93d8: e2844001 add r4, r4, #1
93dc: e4953004 ldr r3, [r5], #4
93e0: e1a0e00f mov lr, pc
93e4: e1a0f003 mov pc, r3
93e8: e1560004 cmp r6, r4
93ec: 1afffff9 bne 93d8 <__libc_init_array+0x50>
93f0: e8bd4070 pop {r4, r5, r6, lr}
It is a reverse jump to previously emitted TB, you don't even have to read that much back:
IN: __libc_init_array
0x000093d8: e2844001 add r4, r4, #1 ; 0x1
0x000093dc: e4953004 ldr r3, [r5], #4
0x000093e0: e1a0e00f mov lr, pc
0x000093e4: e1a0f003 mov pc, r3
----------------
IN: register_fini
0x0000804c: e59f3018 ldr r3, [pc, #24] ; 0x806c
0x00008050: e3530000 cmp r3, #0 ; 0x0
0x00008054: 01a0f00e moveq pc, lr
----------------
IN: __libc_init_array
0x000093e8: e1560004 cmp r6, r4
0x000093ec: 1afffff9 bne 0x93d8
So, qemu is not showing it again. Notice this loop is iterating function pointers, the first one points to register_fini and the second one to the magical 0x000080b4 address in question (no symbol for it). When this unnamed function conditionally returns with moveq pc, lr control is transferred back to __libc_init_array address 0x000093e8 which then determines that the array end has been reached and again just returns to its caller at 0x000093f0.
I'm still learning ARM and I couldn't understand what this function is supposed to do.
Can you guys help me out explaining how it works?
.text:0006379C EXPORT _nativeD2AB
.text:0006379C _nativeD2AB
.text:0006379C var_28 = -0x28
.text:0006379C
.text:0006379C STMFD SP!, {R4-R11,LR}
.text:000637A0 SUB SP, SP, #0x3A4
.text:000637A4 STMFA SP, {R0-R3}
.text:000637A8 LDR R0, =(_GLOBAL_OFFSET_ - 0x637B8)
.text:000637AC LDR R1, =(__stack_chk - 0x134EAC)
.text:000637B0 ADD R0, PC, R0 ; _GLOBAL_OFFSET_
.text:000637B4 LDR R0, [R1,R0] ; __stack_chk
.text:000637B8 LDR R0, [R0]
.text:000637BC STR R0, [SP,#0x3C8+var_28]
.text:000637C0 MOV R0, #1
.text:000637C4 ADR R1, sub_637D0
.text:000637C8 MUL R0, R1, R0
.text:000637CC MOV PC, R0
.text:000637CC ; End of function _nativeD2AB
.
.got:00134EAC _GLOBAL_OFFSET_TABLE_ DCD 0
.
.got:00134B0C AREA .got, DATA
.got:00134B0C __stack_chk DCD __stack_chkA
.
Found the rest of the function. If I understood some of it correctly, it seems to be scrambling the data, though that may be just a wild guess:
.text:000637D0 sub_637D0
.text:000637D0 MOV R0, #1
.text:000637D4 ADR R1, sub_637E0
.text:000637D8 MUL R0, R1, R0
.text:000637DC MOV PC, R0
.text:000637DC ; End of function sub_637D0
.text:000637E0 sub_637E0
.text:000637E0
.text:000637E0 arg_14 = 0x14
.text:000637E0
.text:000637E0 STR R2, [SP,#arg_14]
.text:000637E4 MOV R0, #1
.text:000637E8 ADR R1, loc_637F4
.text:000637EC MUL R0, R1, R0
.text:000637F0 MOV PC, R0
.text:000637F0 ; End of function sub_637E0
.text:000637F4 loc_637F4
.text:000637F4 STR R2, [SP,#0x28]
.text:000637F8 STR R0, [SP,#0x18]
.text:000637FC MOV R1, #2
.text:00063800 STR R2, [SP,#0x1C]
.text:00063804 STR R0, [SP,#0x20]
.text:00063808 STR R0, [SP,#0x24]
The function has several parts:
Store registers to the stacj and reserve space (Strangely, not restored)
Load to R0 the address of GLOBAL_OFFSET (Once added with PC), to actually access __stack_chk (When added to GLOBAL_OFFSET). This is done in a very strange way.
Load the data at __stack_chk and store it in the stack
Load to R0 the value of sub_637D0, by doing a multiplication by 1. This is the value returned by the function.
So in my opinion, this does not seem to do anything useful...
This question already has an answer here:
Rustc/LLVM generates faulty code for aarch64 with opt-level=0
(1 answer)
Closed 7 years ago.
I'm trying to get a small ARM kernel up and running on QEMU (Versatile Express for Cortex-A15). Currently it simply sets sp to the top of a small stack and sends a single character to UART0.
_start.arm:
.set stack_size, 0x10000
.comm stack, stack_size
.global _start
_start:
ldr sp, =stack+stack_size
bl start
1:
b 1b
.size _start, . - _start
start.c:
/* UART_0 is a struct overlaid on 0x1c090000 */
void printChar(char c)
{
while (UART_0->flags & TRANSMIT_FULL);
UART_0->data = c;
}
void start()
{
while (UART_0->flags & TRANSMIT_FULL);
UART_0->data = 'A';
printChar('a');
}
From GDB, I know that execution progresses through _start into start and successfully sends 'A' to UART_0. printChar gets called and completes, but doesn't seem to print anything to the serial port . When running without GDB, the kernel repeatedly prints 'A', though I'm not sure if this is the processor resetting or jumping incorrectly.
From objdump:
Disassembly of section .stub:
00010000 <_start>:
10000: e59fd004 ldr sp, [pc, #4] ; 1000c <__STACK_SIZE+0xc>
10004: eb000016 bl 10064 <start>
10008: eafffffe b 10008 <_start+0x8>
1000c: 000200d0 .word 0x000200d0
Disassembly of section .text:
00010010 <printChar>:
10010: e52db004 push {fp} ; (str fp, [sp, #-4]!)
10014: e28db000 add fp, sp, #0
10018: e24dd00c sub sp, sp, #12
1001c: e1a03000 mov r3, r0
10020: e54b3005 strb r3, [fp, #-5]
10024: e1a00000 nop ; (mov r0, r0)
10028: e3a03000 mov r3, #0
1002c: e3413c09 movt r3, #7177 ; 0x1c09
10030: e1d331ba ldrh r3, [r3, #26]
10034: e6ff3073 uxth r3, r3
10038: e2033020 and r3, r3, #32
1003c: e3530000 cmp r3, #0
10040: 1afffff8 bne 10028 <printChar+0x18>
10044: e3a03000 mov r3, #0
10048: e3413c09 movt r3, #7177 ; 0x1c09
1004c: e55b2005 ldrb r2, [fp, #-5]
10050: e6ff2072 uxth r2, r2
10054: e1c320b2 strh r2, [r3, #2]
10058: e24bd000 sub sp, fp, #0
1005c: e49db004 pop {fp} ; (ldr fp, [sp], #4)
10060: e12fff1e bx lr
00010064 <start>:
10064: e52db008 str fp, [sp, #-8]!
10068: e58de004 str lr, [sp, #4]
1006c: e28db004 add fp, sp, #4
10070: e1a00000 nop ; (mov r0, r0)
10074: e3a03000 mov r3, #0
10078: e3413c09 movt r3, #7177 ; 0x1c09
1007c: e1d331ba ldrh r3, [r3, #26]
10080: e6ff3073 uxth r3, r3
10084: e2033020 and r3, r3, #32
10088: e3530000 cmp r3, #0
1008c: 1afffff8 bne 10074 <start+0x10>
10090: e3a03000 mov r3, #0
10094: e3413c09 movt r3, #7177 ; 0x1c09
10098: e5d32002 ldrb r2, [r3, #2]
1009c: e3a02000 mov r2, #0
100a0: e3822041 orr r2, r2, #65 ; 0x41
100a4: e5c32002 strb r2, [r3, #2]
100a8: e5d32003 ldrb r2, [r3, #3]
100ac: e3a02000 mov r2, #0
100b0: e5c32003 strb r2, [r3, #3]
100b4: e3a00061 mov r0, #97 ; 0x61
100b8: ebffffd4 bl 10010 <printChar>
100bc: e24bd004 sub sp, fp, #4
100c0: e59db000 ldr fp, [sp]
100c4: e28dd004 add sp, sp, #4
100c8: e49df004 pop {pc} ; (ldr pc, [sp], #4)
000100cc <UART_0>:
100cc: 1c090000 ....
I may have missed something, but I am not seeing where you have enabled interrupts, or poll to see if you can send the next character. If you have enabled the interrupts and set up the UART hardware correctly, your driver my have a bug. If you have not setup the UART hardware correctly, it may not be generating interrupts, or it may not be doing the FIFO correctly, or any number of other problems.