Related
I am currently trying to get backtrace based on stack pointer and link register on ARM64 device using C program.
Below is example of objdump
bar() calls foo() with 240444: ebfffd68 bl 23f9ec <foo##Base>
I can get link register (lr) and from that getting 23f9ec, save it to backtrace list as last routine.
My question: From below assembly code with current lr 0023f9ec <foo##Base>:, how to calculate to get previous routine with lr is 0023fe14 <bar##Base> using C language?
here is my implementation, but getting wrong previous lr
int bt(void** backtrace, int max_size) {
unsigned long* sp = __get_SP();
unsigned long* ra = __get_LR();
int* funcbase = (int*)(int)&bt;
int spofft = (short)((*funcbase));
sp = (char*)sp-spofft;
unsigned long* wra = (unsigned long*)ra;
int spofft;
int depth = 0;
while(ra) {
wra = ra;
while((*wra >> 16) != 0xe92d) {
wra--;
}
if(wra == 0)
return 0;
spofft = (short)(*wra & 0xffff);
if(depth < max_size)
backtrace[depth] = ra;
else
break;
ra =(unsigned long *)((unsigned long)ra + spofft);
sp =(unsigned long *)((unsigned long)sp + spofft);
depth++;
}
return 1;
}
0023f9ec <foo##Base>:
23f9ec: e92d42f3 push {r0, r1, r4, r5, r6, r7, r9, lr}
23f9f0: e1a09001 mov r9, r1
23f9f4: e1a07000 mov r7, r0
23f9f8: ebfffff9 bl 23f9e4 <__get_SP##Base>
23f9fc: e59f4060 ldr r4, [pc, #96] ; 23fa64 <foo##Base+0x78>
23fa00: e08f4004 add r4, pc, r4
23fa04: e1a05000 mov r5, r0
23fa08: ebfffff3 bl 23f9dc <__get_LR##Base>
23fa0c: e59f3054 ldr r3, [pc, #84] ; 23fa68 <foo##Base+0x7c>
23fa10: e3002256 movw r2, #598 ; 0x256
23fa14: e59f1050 ldr r1, [pc, #80] ; 23fa6c <foo##Base+0x80>
23fa18: e7943003 ldr r3, [r4, r3]
23fa1c: e08f1001 add r1, pc, r1
23fa20: e5934000 ldr r4, [r3]
23fa24: e1a03005 mov r3, r5
23fa28: e6bf4074 sxth r4, r4
23fa2c: e58d4004 str r4, [sp, #4]
23fa30: e1a06000 mov r6, r0
23fa34: e58d0000 str r0, [sp]
23fa38: e59f0030 ldr r0, [pc, #48] ; 23fa70 <foo##Base+0x84>
23fa3c: e08f0000 add r0, pc, r0
23fa40: ebfd456d bl 190ffc <printf#plt>
23fa44: e1a03009 mov r3, r9
23fa48: e1a02007 mov r2, r7
23fa4c: e1a01006 mov r1, r6
23fa50: e0640005 rsb r0, r4, r5
23fa54: ebffff70 bl 23f81c <get_prev_sp_ra2##Base>
23fa58: e3a00000 mov r0, #0
23fa5c: e28dd008 add sp, sp, #8
23fa60: e8bd82f0 pop {r4, r5, r6, r7, r9, pc}
23fa64: 003d5be0 eorseq r5, sp, r0, ror #23
23fa68: 000026c8 andeq r2, r0, r8, asr #13
23fa6c: 002b7ba6 eoreq r7, fp, r6, lsr #23
23fa70: 002b73e5 eoreq r7, fp, r5, ror #7
0023fe14 <bar##Base>:
23fe14: e92d4ef0 push {r4, r5, r6, r7, r9, sl, fp, lr}
23fe18: e24dde16 sub sp, sp, #352 ; 0x160
23fe1c: e59f76a8 ldr r7, [pc, #1704] ; 2404cc <bar##Base+0x6b8>
23fe20: e1a04000 mov r4, r0
23fe24: e59f66a4 ldr r6, [pc, #1700] ; 2404d0 <bar##Base+0x6bc>
23fe28: e1a03000 mov r3, r0
23fe2c: e59f26a0 ldr r2, [pc, #1696] ; 2404d4 <bar##Base+0x6c0>
23fe30: e08f7007 add r7, pc, r7
23fe34: e08f6006 add r6, pc, r6
23fe38: e3a00000 mov r0, #0
23fe3c: e08f2002 add r2, pc, r2
23fe40: e1a05001 mov r5, r1
23fe44: e3a01003 mov r1, #3
23fe48: e59f9688 ldr r9, [pc, #1672] ; 2404d8 <bar##Base+0x6c4>
.....................................................................
24043c: e3a0100f mov r1, #15
240440: e1a0000a mov r0, sl
240444: ebfffd68 bl 23f9ec <foo##Base>
240448: e59f2108 ldr r2, [pc, #264] ; 240558 <bar##Base+0x744>
24044c: e3a01003 mov r1, #3
240450: e08f2002 add r2, pc, r2
240454: e1a05000 mov r5, r0
240458: e1a03000 mov r3, r0
24045c: e3a00000 mov r0, #0
I don't think there's an easy way to do this.
Normally the register ABI of any operating system contains a "frame pointer" register. For example, on Apple's armv7 ABI, this is r7:
0x10006fc0 b0b5 push {r4, r5, r7, lr}
0x10006fc2 02af add r7, sp, 8
0x10006fc4 0448 ldr r0, [0x10006fd8]
0x10006fc6 d0e90c45 ldrd r4, r5, [r0, 0x30]
0x10006fca 0020 movs r0, 0
0x10006fcc fff7a6ff bl 0x10006f1c
0x10006fd0 0019 adds r0, r0, r4
0x10006fd2 6941 adcs r1, r5
0x10006fd4 b0bd pop {r4, r5, r7, pc}
If you dereference r7 there, you get to a pair of pointers, the second of which is lr, and the first of which is the r7 of the calling function, allowing you to repeat this process until you reach the bottom of the stack.
Judging by the assembly you posted, the codebase you're looking at doesn't have that. This means that the only way to obtain the return address is the same way that the code itself does: step forward through each instruction and parse/interpret them until you reach something that loads into pc. This is of course imperfect, since there may be functions in your call stack that do not ever return, but there's not much you can do about that.
It may be tempting to search backwards instead, and while you can do a heuristic approach and probably reach quite reasonable results with it, that is even less reliable than searching forward, since you have absolutely no way of telling whether you arrived at address X by stepping forward from the previous instruction or by explicitly jumping there from somewhere else.
This question already has an answer here:
Rustc/LLVM generates faulty code for aarch64 with opt-level=0
(1 answer)
Closed 7 years ago.
I'm trying to get a small ARM kernel up and running on QEMU (Versatile Express for Cortex-A15). Currently it simply sets sp to the top of a small stack and sends a single character to UART0.
_start.arm:
.set stack_size, 0x10000
.comm stack, stack_size
.global _start
_start:
ldr sp, =stack+stack_size
bl start
1:
b 1b
.size _start, . - _start
start.c:
/* UART_0 is a struct overlaid on 0x1c090000 */
void printChar(char c)
{
while (UART_0->flags & TRANSMIT_FULL);
UART_0->data = c;
}
void start()
{
while (UART_0->flags & TRANSMIT_FULL);
UART_0->data = 'A';
printChar('a');
}
From GDB, I know that execution progresses through _start into start and successfully sends 'A' to UART_0. printChar gets called and completes, but doesn't seem to print anything to the serial port . When running without GDB, the kernel repeatedly prints 'A', though I'm not sure if this is the processor resetting or jumping incorrectly.
From objdump:
Disassembly of section .stub:
00010000 <_start>:
10000: e59fd004 ldr sp, [pc, #4] ; 1000c <__STACK_SIZE+0xc>
10004: eb000016 bl 10064 <start>
10008: eafffffe b 10008 <_start+0x8>
1000c: 000200d0 .word 0x000200d0
Disassembly of section .text:
00010010 <printChar>:
10010: e52db004 push {fp} ; (str fp, [sp, #-4]!)
10014: e28db000 add fp, sp, #0
10018: e24dd00c sub sp, sp, #12
1001c: e1a03000 mov r3, r0
10020: e54b3005 strb r3, [fp, #-5]
10024: e1a00000 nop ; (mov r0, r0)
10028: e3a03000 mov r3, #0
1002c: e3413c09 movt r3, #7177 ; 0x1c09
10030: e1d331ba ldrh r3, [r3, #26]
10034: e6ff3073 uxth r3, r3
10038: e2033020 and r3, r3, #32
1003c: e3530000 cmp r3, #0
10040: 1afffff8 bne 10028 <printChar+0x18>
10044: e3a03000 mov r3, #0
10048: e3413c09 movt r3, #7177 ; 0x1c09
1004c: e55b2005 ldrb r2, [fp, #-5]
10050: e6ff2072 uxth r2, r2
10054: e1c320b2 strh r2, [r3, #2]
10058: e24bd000 sub sp, fp, #0
1005c: e49db004 pop {fp} ; (ldr fp, [sp], #4)
10060: e12fff1e bx lr
00010064 <start>:
10064: e52db008 str fp, [sp, #-8]!
10068: e58de004 str lr, [sp, #4]
1006c: e28db004 add fp, sp, #4
10070: e1a00000 nop ; (mov r0, r0)
10074: e3a03000 mov r3, #0
10078: e3413c09 movt r3, #7177 ; 0x1c09
1007c: e1d331ba ldrh r3, [r3, #26]
10080: e6ff3073 uxth r3, r3
10084: e2033020 and r3, r3, #32
10088: e3530000 cmp r3, #0
1008c: 1afffff8 bne 10074 <start+0x10>
10090: e3a03000 mov r3, #0
10094: e3413c09 movt r3, #7177 ; 0x1c09
10098: e5d32002 ldrb r2, [r3, #2]
1009c: e3a02000 mov r2, #0
100a0: e3822041 orr r2, r2, #65 ; 0x41
100a4: e5c32002 strb r2, [r3, #2]
100a8: e5d32003 ldrb r2, [r3, #3]
100ac: e3a02000 mov r2, #0
100b0: e5c32003 strb r2, [r3, #3]
100b4: e3a00061 mov r0, #97 ; 0x61
100b8: ebffffd4 bl 10010 <printChar>
100bc: e24bd004 sub sp, fp, #4
100c0: e59db000 ldr fp, [sp]
100c4: e28dd004 add sp, sp, #4
100c8: e49df004 pop {pc} ; (ldr pc, [sp], #4)
000100cc <UART_0>:
100cc: 1c090000 ....
I may have missed something, but I am not seeing where you have enabled interrupts, or poll to see if you can send the next character. If you have enabled the interrupts and set up the UART hardware correctly, your driver my have a bug. If you have not setup the UART hardware correctly, it may not be generating interrupts, or it may not be doing the FIFO correctly, or any number of other problems.
Recently I ran into this strange issue(Thumb2 instr-set). Say I have following code:
000a8948 <some_func>:
a8948: e92d 4ff0 stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
a894c: 460f mov r7, r1
a894e: f8df 8260 ldr.w r8, [pc, #608]
a8952: 4606 mov r6, r0 --> r0 shows b89a8408 in backtrace
a8954: 4b97 ldr r3, [pc, #604]
a8956: f100 0410 add.w r4, r0, #16 --> only place r4 gets assigned, but it can't be 0.
a895a: 44f8 add r8, pc
a895c: f04f 0900 mov.w r9, #0
a8960: ed2d 8b04 vpush {d8-d9}
a8964: f858 1003 ldr.w r1, [r8, r3]
a8968: b095 sub sp, #84 ; 0x54
a896a: f8df b24c ldr.w fp, [pc, #588]
a896e: ad0c add r5, sp, #48
a8970: f8df c248 ldr.w ip, [pc, #584]
a8974: f8df a248 ldr.w sl, [pc, #584]
a8978: ed9f 8b8b vldr d8, [pc, #556]
a897c: 680a ldr r2, [r1, #0]
a897e: 44fb add fp, pc
a8980: 44fc add ip, pc
a8982: 44fa add sl, pc
a8984: 9107 str r1, [sp, #28]
a8986: 9213 str r2, [sp, #76]
a8988: 6823 ldr r3, [r4, #0] // ERR: SIGSEGV happens, code 1 (SEGV_MAPPER), fault addr 0x0
So when this issue happens, r4 turned to value 0, what I cannot figure out is that how r4 gets polluted, given it only be assigned to r0+16 at line a8956? Could it be that Neon instructions write r4 somehow?
I have the following C and ASM version of the (supposedly) same code. What it does is load 2 128bit ints represented by 2 64bit ints each to registers (first 4*lower 32bit, then 4*higher 32bit) and ADD/ADC to them. It is simple enough code and the ARM/ST manuals actually give the same example with 96bit (3 ADD/ADCs).
For simple calls both versions work (repeatedly adding (1 << x++) or 1..x). But for the longer testsuite the ARM assembly fails (board hangs). ATM I have no ability to trap/debug that and cannot use any printf() or the likes to find the test failing, which is irrelevant anyways, because there must be some basic fault in the ASM version, as the C version works as expected.
I don't get it, it's simple enough and very close to the C assembly output (sans branching). I tried the "memory" constraint (shouldn't be needed), I tried saving the carry between lower and upper 64bit in a register and adding that later, using ADD(C).W, alignment, using two LDR/STR instead of LDRD/STRD, etc.. I assume the board faults because some addition goes wrong and results in a divide by 0 or something like that.
The GCC ASM is below and uses similar basic technique, so I don't see the problem.
I'm really just looking for the fastest way to do the add, not to fix that code specifically. It's a shame you have to use constant register names because there is no constraint for specifying rX and rX+1. Also it's impossible to use as many registers as GCC as it will run out of them during compilation.
typedef struct I128 {
int64_t high;
uint64_t low;
} I128;
I128 I128add(I128 a, const I128 b) {
#if defined(USEASM) && defined(ARMx)
__asm(
"LDRD %%r2, %%r3, %[alo]\n"
"LDRD %%r4, %%r5, %[blo]\n"
"ADDS %%r2, %%r2, %%r4\n"
"ADCS %%r3, %%r3, %%r5\n"
"STRD %%r2, %%r3, %[alo]\n"
"LDRD %%r2, %%r3, %[ahi]\n"
"LDRD %%r4, %%r5, %[bhi]\n"
"ADCS %%r2, %%r2, %%r4\n"
"ADC %%r3, %%r3, %%r5\n"
"STRD %%r2, %%r3, %[ahi]\n"
: [alo] "+m" (a.low), [ahi] "+m" (a.high)
: [blo] "m" (b.low), [bhi] "m" (b.high)
: "r2", "r3", "r4", "r5", "cc"
);
return a;
#else
// faster to use temp than saving low and adding to a directly
I128 r = {a.high + b.high, a.low + b.low};
// check for overflow of low 64 bits, add carry to high
// avoid conditionals
//r.high += r.low < a.low || r.low < b.low;
// actually gcc produces faster code with conditionals
if(r.low < a.low || r.low < b.low) ++r.high;
return r;
}
GCC C version using " armv7m-none-eabi-gcc-4.7.2 -O3 -ggdb -fomit-frame-pointer -falign-functions=16 -std=gnu99 -march=armv7e-m":
b082 sub sp, #8
e92d 0ff0 stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
a908 add r1, sp, #32
e881 000c stmia.w r1, {r2, r3}
e9dd 890e ldrd r8, r9, [sp, #56] ; 0x38
e9dd 670a ldrd r6, r7, [sp, #40] ; 0x28
e9dd 2308 ldrd r2, r3, [sp, #32]
e9dd 450c ldrd r4, r5, [sp, #48] ; 0x30
eb16 0a08 adds.w sl, r6, r8
eb47 0b09 adc.w fp, r7, r9
1912 adds r2, r2, r4
eb43 0305 adc.w r3, r3, r5
45bb cmp fp, r7
bf08 it eq
45b2 cmpeq sl, r6
d303 bcc.n 8012c9a <I128add+0x3a>
45cb cmp fp, r9
bf08 it eq
45c2 cmpeq sl, r8
d204 bcs.n 8012ca4 <I128add+0x44>
2401 movs r4, #1
2500 movs r5, #0
1912 adds r2, r2, r4
eb43 0305 adc.w r3, r3, r5
e9c0 2300 strd r2, r3, [r0]
e9c0 ab02 strd sl, fp, [r0, #8]
e8bd 0ff0 ldmia.w sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
b002 add sp, #8
4770 bx lr
My ASM version that fails:
b082 sub sp, #8
b430 push {r4, r5}
a902 add r1, sp, #8
e881 000c stmia.w r1, {r2, r3}
e9dd 2304 ldrd r2, r3, [sp, #16]
e9dd 4508 ldrd r4, r5, [sp, #32]
1912 adds r2, r2, r4
416b adcs r3, r5
e9cd 2304 strd r2, r3, [sp, #16]
e9dd 2302 ldrd r2, r3, [sp, #8]
e9dd 4506 ldrd r4, r5, [sp, #24]
4162 adcs r2, r4
eb43 0305 adc.w r3, r3, r5
e9cd 2302 strd r2, r3, [sp, #8]
4604 mov r4, r0
c90f ldmia r1, {r0, r1, r2, r3}
e884 000f stmia.w r4, {r0, r1, r2, r3}
4620 mov r0, r4
bc30 pop {r4, r5}
b002 add sp, #8
4770 bx lr
I am not getting a hang from your code, but it isnt working either, not sure why. But it was very easy to patch the compiler generated code to handle the carry:
I128 I128add(I128 a, const I128 b) {
I128 r = {a.high + b.high, a.low + b.low};
return r;
}
becomes
000001e4 <I128add>:
1e4: b082 sub sp, #8
1e6: b4f0 push {r4, r5, r6, r7}
1e8: e9dd 4506 ldrd r4, r5, [sp, #24]
1ec: a904 add r1, sp, #16
1ee: e881 000c stmia.w r1, {r2, r3}
1f2: e9dd 230a ldrd r2, r3, [sp, #40] ; 0x28
1f6: 1912 adds r2, r2, r4
1f8: eb43 0305 adc.w r3, r3, r5
1fc: e9dd 6704 ldrd r6, r7, [sp, #16]
200: e9dd 4508 ldrd r4, r5, [sp, #32]
204: 1936 adds r6, r6, r4
206: eb47 0705 adc.w r7, r7, r5
20a: e9c0 6700 strd r6, r7, [r0]
20e: e9c0 2302 strd r2, r3, [r0, #8]
212: bcf0 pop {r4, r5, r6, r7}
214: b002 add sp, #8
216: 4770 bx lr
fixed up the adds
.thumb_func
.globl test2
test2:
sub sp, #8
push {r4, r5, r6, r7}
ldrd r4, r5, [sp, #24]
add r1, sp, #16
stmia r1, {r2, r3}
ldrd r2, r3, [sp, #40]
add r2, r4
adc r3, r5
ldrd r6, r7, [sp, #16]
ldrd r4, r5, [sp, #32]
adc r6, r4
adc r7, r5
strd r6, r7, [r0]
strd r2, r3, [r0, #8]
pop {r4, r5, r6, r7}
add sp, #8
bx lr
final result
00000024 <test2>:
24: b082 sub sp, #8
26: b4f0 push {r4, r5, r6, r7}
28: e9dd 4506 ldrd r4, r5, [sp, #24]
2c: a904 add r1, sp, #16
2e: c10c stmia r1!, {r2, r3}
30: e9dd 230a ldrd r2, r3, [sp, #40] ; 0x28
34: 1912 adds r2, r2, r4
36: 416b adcs r3, r5
38: e9dd 6704 ldrd r6, r7, [sp, #16]
3c: e9dd 4508 ldrd r4, r5, [sp, #32]
40: 4166 adcs r6, r4
42: 416f adcs r7, r5
44: e9c0 6700 strd r6, r7, [r0]
48: e9c0 2302 strd r2, r3, [r0, #8]
4c: bcf0 pop {r4, r5, r6, r7}
4e: b002 add sp, #8
50: 4770 bx lr
Notice the fewer number of thumb2 instructions, unless you are on a cortex-A that has thumb2 support, those fetches from flash (cortex-m) are (can be) slow. I see you are trying to save the push and pop of two more registers but you cost yourself more fetches. You could take the above and still re-arrange the loads and stores and save those two registers.
minimal testing so far. printfs show the upper words adding, where I didnt see that with your code. I am stilll trying to unwind the calling convention (please document your code more for us), looks like r0 is prepped by the caller to place the result, rest is on the stack. I am using a stellaris launchpad (cortex-m4).
I have some given code that uses variable length arrays on the stack in C. I cannot easily change that code to use malloced buffers on the heap, since I am working on some stuff where I have no operating system support for dynamic memory. However when I test my method the stack actually get's smashed instead (i.e. the SP get's set to a completely bogus address). To figure out what is going on I had a look at the assembly and I was completely confused by the compiler output.
I am running the code on a pandaboard ES (OMAP4460 ARM processor).
In order to test how variable size arrays on the stack are actually compiled, I produced a simple working test program. However again the code was much too confusing for me to understand.
Here is what I do not understand:
When I compile a simple function:
int test_method(size_t i)
{
unsigned char buffer[10];
return -1;
}
I get some very simple assembler code:
80003a68 <test_method>:
80003a68: b480 push {r7} ; store frame pointer
80003a6a: b087 sub sp, #28 ; make stack frame (size 28)
80003a6c: af00 add r7, sp, #0 ; set frame pointer
80003a6e: 6078 str r0, [r7, #4]; store parameter on stack
80003a70: f04f 0300 mov.w r3, #0 ; load return value
80003a74: 4618 mov r0, r3 ; put return value in return register
80003a76: f107 071c add.w r7, r7, #28 ; destroy stack frame
80003a7a: 46bd mov sp, r7 ; ...
80003a7c: bc80 pop {r7} ; pop stored frame pointer
80003a7e: 4770 bx lr ; return
But when I try this with a variable size array using the following code:
int test_method(size_t i)
{
unsigned char buffer[i];
return 0;
}
I get this assembly:
80003a68 <test_method>:
80003a68: e92d 03f0 stmdb sp!, {r4, r5, r6, r7, r8, r9}
80003a6c: b084 sub sp, #16
80003a6e: af00 add r7, sp, #0
80003a70: 6078 str r0, [r7, #4]
80003a72: 4669 mov r1, sp
80003a74: 460e mov r6, r1
80003a76: f8d7 c004 ldr.w ip, [r7, #4]
80003a7a: 4661 mov r1, ip
80003a7c: f101 31ff add.w r1, r1, #4294967295 ; 0xffffffff
80003a80: 60b9 str r1, [r7, #8]
80003a82: 4660 mov r0, ip
80003a84: f04f 0100 mov.w r1, #0
80003a88: f04f 38ff mov.w r8, #4294967295 ; 0xffffffff
80003a8c: f04f 090f mov.w r9, #15
80003a90: ea00 0008 and.w r0, r0, r8
80003a94: ea01 0109 and.w r1, r1, r9
80003a98: ea4f 7850 mov.w r8, r0, lsr #29
80003a9c: ea4f 05c1 mov.w r5, r1, lsl #3
80003aa0: ea48 0505 orr.w r5, r8, r5
80003aa4: ea4f 04c0 mov.w r4, r0, lsl #3
80003aa8: f04f 30ff mov.w r0, #4294967295 ; 0xffffffff
80003aac: f04f 010f mov.w r1, #15
80003ab0: ea04 0400 and.w r4, r4, r0
80003ab4: ea05 0501 and.w r5, r5, r1
80003ab8: 4660 mov r0, ip
80003aba: f04f 0100 mov.w r1, #0
80003abe: f04f 34ff mov.w r4, #4294967295 ; 0xffffffff
80003ac2: f04f 050f mov.w r5, #15
80003ac6: ea00 0004 and.w r0, r0, r4
80003aca: ea01 0105 and.w r1, r1, r5
80003ace: ea4f 7450 mov.w r4, r0, lsr #29
80003ad2: ea4f 03c1 mov.w r3, r1, lsl #3
80003ad6: ea44 0303 orr.w r3, r4, r3
80003ada: ea4f 02c0 mov.w r2, r0, lsl #3
80003ade: f04f 30ff mov.w r0, #4294967295 ; 0xffffffff
80003ae2: f04f 010f mov.w r1, #15
80003ae6: ea02 0200 and.w r2, r2, r0
80003aea: ea03 0301 and.w r3, r3, r1
80003aea: ea03 0301 and.w r3, r3, r1
80003aee: 4663 mov r3, ip
80003af0: f103 0307 add.w r3, r3, #7
80003af4: f103 0307 add.w r3, r3, #7
80003af8: ea4f 03d3 mov.w r3, r3, lsr #3
80003afc: ea4f 03c3 mov.w r3, r3, lsl #3
80003b00: ebad 0d03 sub.w sp, sp, r3
80003b04: 466b mov r3, sp
80003b06: f103 0307 add.w r3, r3, #7
80003b0a: ea4f 03d3 mov.w r3, r3, lsr #3
80003b0e: ea4f 03c3 mov.w r3, r3, lsl #3
80003b12: 60fb str r3, [r7, #12]
80003b14: f04f 0300 mov.w r3, #0
80003b18: 46b5 mov sp, r6
80003b1a: 4618 mov r0, r3
80003b1c: f107 0710 add.w r7, r7, #16
80003b20: 46bd mov sp, r7
80003b22: e8bd 03f0 ldmia.w sp!, {r4, r5, r6, r7, r8, r9}
80003b26: 4770 bx lr
Where does all this additional logic come frome? I would have thought, that it would have been enough to just enlarge the stack frame by the size i (passed in r0), maybe with an addition of some extra code to keep the alignment. But why are all these additional registers written with 0xffffffff and #15. I cannot really make any sense of this assembly the compiler is giving me.
This is not really an answer just collecting more info that doesnt fit in a comment
typedef unsigned int size_t;
int test_method1(size_t i)
{
unsigned char buffer[10];
return -1;
}
int test_method2(size_t i)
{
unsigned char buffer[i];
return 0;
}
arm-none-eabi-gcc -O2 -c tm1.c -o tm1.o
arm-none-eabi-objdump -D tm1.o
basically optimizes everything away
00000000 <test_method1>:
0: e3e00000 mvn r0, #0
4: e12fff1e bx lr
00000008 <test_method2>:
8: e3a00000 mov r0, #0
c: e12fff1e bx lr
although bad code this should get the compiler to not optimize everything away
typedef unsigned int size_t;
unsigned char *test_method1(size_t i, size_t j)
{
unsigned char buffer[10];
return(&buffer[j]);
}
unsigned char *test_method2(size_t i, size_t j)
{
unsigned char buffer[i];
return(&buffer[j]);
}
00000000 <test_method1>:
0: e24dd010 sub sp, sp, #16
4: e28d3004 add r3, sp, #4
8: e0830001 add r0, r3, r1
c: e28dd010 add sp, sp, #16
10: e12fff1e bx lr
00000014 <test_method2>:
14: e92d0808 push {r3, fp}
18: e2800007 add r0, r0, #7
1c: e3c00007 bic r0, r0, #7
20: e28db004 add fp, sp, #4
24: e04dd000 sub sp, sp, r0
28: e08d0001 add r0, sp, r1
2c: e24bd004 sub sp, fp, #4
30: e8bd0808 pop {r3, fp}
34: e12fff1e bx lr
And it did and I think it perhaps answered your question. How the compiler handles arrays that are variable length is that in this case it did math on the stack pointer in the amount of the size of the array, basically allocating the dynamic array on the stack as you would expect. For the static sized array the math done on the stack was a static number not a passed in parameter.