ARM assembly recursive power function - c

Need to convert the following C code into ARM assembly subroutine:
int power(int x, unsigned int n)
{
int y;
if (n == 0)
return 1;
if (n & 1)
return x * power(x, n - 1);
else
{ y = power(x, n >> 1);
return y * y;
}
}
Here's what I have so far but cant figure out how to get the link register to increment after each return (keeps looping back to the same point)
pow CMP r0, #0
MOVEQ r0, #1
BXEQ lr
TST r0, #1
BEQ skip
SUB r0, r0, #1
BL pow
MUL r0, r1, r0
BX lr
skip LSR r0, #1
BL pow
MUL r3, r0, r3
BX lr

The BL instruction does not automatically push or pop anything from the stack. This saves a memory access. It's the way it works with RISC processors (in part because they offer 30 general purpose registers.)
STR lr, [sp, #-4]! ; "PUSH lr"
BL pow
LDR lr, [sp], #4 ; "POP lr"
If you repeat a BL call, then you want to STR/LDR on the stack outside of the loop.

Related

How to use if-else in inline assembly?

I'm working on STM32CubeIDE.
To mix C and arm assembly, we initially used EXPORT.
Below is main.c:
#include <stdio.h>
extern int calc(int num, int* cnt);
int main()
{
int cnt = 5;
calc(4, &cnt);
return 0;
}
And then calc.s:
AREA calculator, CODE
EXPORT calc
ALIGN
calc:PROC
PUSH {r4,r5,lr}
MOV r5, #13
UDIV r4, r0, r5
MUL r5, r4, r5
SUB r4, r0, r5
CMP r4,#0
BEQ ace
CMP r4,#8
BGT jqk
then: ADD r4, r4, #1
B exit
ace:ADD [r1],#1
MOV r4, #11
B exit
jqk:MOV r4, #10
exit:MOV r0, r4
POP {r4, r5, pc}
ENDP
I put the two files in the same place and built main.c, but I get an error that says it's an unknown external reference.
So after giving up, I tried to put the ASM sentence in the c file using inline assembly.
calc PROC
PUSH {r4, lr}
AND r4, r0, #12
CMP r4, #0
BEQ ace
CMP r4, #8
BGT jqk
then ADD r4, r4, #1
B exit
ace ADD r1, r1, #1
MOV r4, #11
B exit
jqk MOV r4, #10
exit MOV r0, r4
ENDP
I've written these assembly codes, and I've adapted them to inline grammar.
reference : Labels in GCC inline assembly
int calc(int num, int *cnt)
{
int rst=0;
int c = *cnt;
__asm volatile("MOV R0, %0": :"r"(num));
__asm volatile("AND R0, R0, %0": :"r"(0x12));
__asm volatile("MOV R1, %0": :"r"(c));
__asm volatile("CMP R0, #0");
__asm volatile("BEQ ace%=");
__asm volatile("CMP R0,#8");
__asm volatile("BGT jqk%=");
__asm volatile("ADD R2, R0, #1");
__asm volatile("B exit%=");
__asm volatile("ace%=: ADD R1, R1, #1");
__asm volatile("MOV R2, #11");
__asm volatile("B exit%=");
__asm volatile("jqk%=: MOV R2, #10");
__asm volatile("exit%=: LDR %0, R2":"=r"(rst):);
__asm volatile("LDR %0, R1":"=r"(c):);
__asm volatile("POP {R0, R1, R2}");
*cnt = c;
return rst;
}
But even in this case, an error appears.
What should I change in my code?
'''
int calc(int num, int *cnt)
{
int tmp = num % 13;
if (tmp == 0)
{
tmp = *cnt;
*cnt = tmp+1;
return 11;
}
else if (tmp > 8)
return 10;
else
return tmp + 1;
}
'''
You actually did the right thing initially, putting the asm code in a separate .s file and not using inline asm at all. The only thing is that you need to explicitly compile and link the calc.s file along with main.c
cc -o program main.c calc.s
should compile and assemble both files and link them. If you're using an IDE, you need to specify both main.c and calc.s as source files of the project.

Execution does not go out of the loop in ARM

I want to print in ARM assembly language a given number in decimal in hexadecimal. I'm doing the function that does the conversion and the printing. So far the conversion works but the printing not at all.
It does only print a char at a time and it's not at all what I want, I want a special format of output such that I have 0x and 8 digits.
I wrote a function printf using the given function I had, called _writec that is working but only printing a char at a time. So I wrote a loop until I get the end of string function but here it seems that it doesn't care.
I've followed the execution step-by-step using gdb and it suddenly crash for no appearing reason. When r0 contain 0 it should go to .end according to my beq but it does not.
ARM Code:
.global _print_hex
_print_hex:
push {lr}
#According to .c algorithm : r0 = dec; r1 = quotient;
# r2 = temp; r3 = i ; r4 = j
mov fp, sp
sub sp, sp, #100 # 100 times size of char
mov r1, r0
mov r3, #0
_while:
cmp r1, #0
bne _computing
ldr r0, =.hex_0x
bl _printf
mov r4, #8
_for:
cmp r4, #0
bge _printing
ldr r0, =.endline
bl _printf
mov sp, fp
pop {pc}
_computing:
and r2, r1, #0xF
cmp r2, #10
blt .temp_less_10
add r2, #7
.temp_less_10:
add r2, #48
strb r2, [sp, r3]
add r3, #1
lsr r1, #4
b _while
_printing:
ldrb r0, [sp,r4]
bl _writec
sub r4, #1
b _for
_printf:
push {r0, r1, r2, r3, lr}
mov r1, r0
mov r2, #0
.loop:
ldrb r0, [r1,r2]
cmp r0, #0
beq .end
bl _writec
add r2, #1
b .loop
.end:
pop {r0, r1, r2, r3, lr}
bx lr
.hex_0x:
.asciz "0x"
.align 4
.endline:
.asciz "\n"
.align 4
C code (that I tried to translate):
void dec_to_hex(int dec){
int quotient, i, temp;
char hex[100];
quotient = dec;
i = 0;
while (quotient != 0){
temp = quotient % 16;
if (temp < 10){
temp += 48; // it goes in the ascii table between 48 and 57 that correspond to [0..9]
} else {
temp += 55; //it goes in the first cap letters from 65 to 70 [A..F]
}
hex[i]=(char)temp;
i++;
quotient /= 16;
}
printf("0x");
for(int j=i; j>=0; j--){
printf("%c", hex[j]);
}
printf("\n");
}
Here is the code of _writec :
/*
* Sends a character to the terminal through UART0
* The character is given in r0.
* IF the TX FIFO is full, this function awaits
* until there is room to send the given character.
*/
.align 2
.global _writec
.type _writec,%function
.func _writec,_writec
_writec:
push {r0,r1,r2,r3,lr}
mov r1, r0
mov r3, #1
lsl r3, #5 // TXFF = (1<<5)
ldr r0,[pc]
b .TXWAIT
.word UART0
.TXWAIT:
ldr r2, [r0,#0x18] // flags at offset 0x18
and r2, r2, r3 // TX FIFO Full set, so wait
cmp r2,#0
bne .TXWAIT
strb r1, [r0,#0x00] // TX at offset 0x00
pop {r0,r1,r2,r3,pc}
.size _writec, .-_writec
.endfunc
So in ARM when debugging it crashed at my first call of _printf and when I comment all the call to _printf it does print the result but not as the desired format. I only got the hex value.

I need help converting C code into Arm Assembly

I am trying to solve the N queens problem in arm assembly. I have the code in C and need to convert it into arm assembly. I have most of it, but am a little confused on the recursion portion. I know I need to push to the stack but am lost on when to pop the stack.
I have the following code:
__main
PROC
mov r9,#8
mov r8, #0
mov r7, #1
lsl r7,r7,#8
sub r7, #1
mov r3, #0
mov r4, #0
mov r5, #0
mov r6, #0
BL construct
construct
mov r10,#0
mov r1,#0
cmp r3, r9
beq addone
b constructelse
constructelse
orr r11,r4,r5
orr r11,r11,r6
mvn r11,r11
and r10,r11,r7
b loop
loop
cmp r10, #0
beq exit
neg r12, r10
and r1, r12, r10
eor r10, r10, r1
add r3, #1
orr r4, r4, r1
lsl r4, r4, #1
orr r5, r1
orr r6, r6, r1
lsr r6, r6, #1
push {r1}
push {r2}
push {r3}
push {r4}
push {r5}
push {r6}
push {r10}
push {r11}
push {r12}
b construct
addone
add r8, #1
b exit
exit
ENDP
END
I should be able to see how many solutions there are for the n queens problem and save the amount to a reference. Sorry about code formatting it is my first time posting here.
#include <stdio.h>
int SIZE, MASK, COUNT;
void Backtrack(int y, int left, int down, int right)
{
int bitmap, bit;
if (y == SIZE) {
COUNT++;
} else {
bitmap = MASK & ~(left | down | right);
while (bitmap != 0) {
bit = -bitmap & bitmap;
bitmap ^= bit;
Backtrack(y+1, (left | bit)<<1, down | bit, (right | bit)>>1);
}
}
}
int main(void)
{
SIZE = 8; /* <- N */
COUNT = 0; /* result */
MASK = (1 << SIZE) - 1;
Backtrack(0, 0, 0, 0);
return 0;
}

Convert the C function into ARM assembly language

How exactly do I convert this C program into assembly code? I am having a hard time understanding the unsigned int manipulation.
unsigned int sum(unsigned int n){
if(n==0) return 0;
else return n+sum(n-1);
}
I have done this if I consider int.How to think for unsigned int?
sum:
SUB sp, sp, #8
STR lr, [sp,#4]
STR r0, [sp,#0]
CMP r0,#0
BGE L1
MOV r0, #0
ADD sp, sp, #8
MOV pc, lr
L1: SUB r0, r0, #1
BL sum
MOV r12, r0
LDR r0, [sp,#0]
LDR lr, [sp,#4]
ADD sp, sp, #8
ADD r0, r0, r12
MOV pc, lr
It won't matter for unsigned int, instructions as ADD and SUB behave correctly in both.
Some ISAs provide unsigned ADD and SUB (ADDU and SUBU) as MIPS, which only differ in overflow behavior.

LPC810 - why won't machine code execute from an array of uint8_t in flash?

I'm writing embedded C/assembler code for the NXP LPC810 microcontroller (just a hobby project).
I have a function fn. I also have an exact copy of that function's machine code in an array of uint8_t. (I have checked the hex file.)
I create a function pointer fnptr, with the same type as fn and point it at the array, using a cast.
It all cross-compiles without warnings.
When the MCU executes fn it works correctly.
When the MCU executes fnptr it crashes (I can't see any debug, as there are only 8 pins, all in use).
The code is position independent.
The array has the correct 4 byte alignment.
fn is in the .text section of the elf file.
The array is forced into the .text section of the elf file (still in flash, not RAM).
I have assumed that there is no NX-like functionality on such a basic Coretex M0+ MCU. (Cortex M3 and M4 do have some form of read-only memory protection for code.)
Are there other reasons why the machine code in the array does not work?
Update:
Here is the code:
#include "stdio.h"
#include "serial.h"
extern "C" void SysTick_Handler() {
// generate an interrupt for delay
}
void delay(int millis) {
while (--millis >= 0) {
__WFI(); // wait for SysTick interrupt
}
}
extern "C" int fn(int a, int b) {
return a + b;
}
/* arm-none-eabi-objdump -d firmware.elf
00000162 <fn>:
162: 1840 adds r0, r0, r1
164: 4770 bx lr
166: 46c0 nop ; (mov r8, r8)
*/
extern "C" const uint8_t machine_code[6] __attribute__((aligned (4))) __attribute__((section (".text"))) = {
0x40,0x18,
0x70,0x47,
0xc0,0x46
};
int main() {
LPC_SWM->PINASSIGN0 = 0xFFFFFF04UL;
serial.init(LPC_USART0, 115200);
SysTick_Config(12000000/1000); // 1ms ticks
int(*fnptr)(int a, int b) = (int(*)(int, int))machine_code;
for (int a = 0; ; a++) {
int c = fnptr(a, 1000000);
printf("Hello world2 %d.\n", c);
delay(1000);
}
}
And here is the disassembled output from arm-none-eabi-objdump -D -Mforce-thumb firmware.elf:
00000162 <fn>:
162: 1840 adds r0, r0, r1
164: 4770 bx lr
166: 46c0 nop ; (mov r8, r8)
00000168 <machine_code>:
168: 1840 adds r0, r0, r1
16a: 4770 bx lr
16c: 46c0 nop ; (mov r8, r8)
16e: 46c0 nop ; (mov r8, r8)
00000170 <main>:
...
I amended the code to call the original fn though a function pointer too, in order to be able to generate working and non-working assembly code that was hopefully near-identical.
machine_code has become much longer, as I am now using no optimisation (-O0).
#include "stdio.h"
#include "serial.h"
extern "C" void SysTick_Handler() {
// generate an interrupt for delay
}
void delay(int millis) {
while (--millis >= 0) {
__WFI(); // wait for SysTick interrupt
}
}
extern "C" int fn(int a, int b) {
return a + b;
}
/*
000002bc <fn>:
2bc: b580 push {r7, lr}
2be: b082 sub sp, #8
2c0: af00 add r7, sp, #0
2c2: 6078 str r0, [r7, #4]
2c4: 6039 str r1, [r7, #0]
2c6: 687a ldr r2, [r7, #4]
2c8: 683b ldr r3, [r7, #0]
2ca: 18d3 adds r3, r2, r3
2cc: 1c18 adds r0, r3, #0
2ce: 46bd mov sp, r7
2d0: b002 add sp, #8
2d2: bd80 pop {r7, pc}
*/
extern "C" const uint8_t machine_code[24] __attribute__((aligned (4))) __attribute__((section (".text"))) = {
0x80,0xb5,
0x82,0xb0,
0x00,0xaf,
0x78,0x60,
0x39,0x60,
0x7a,0x68,
0x3b,0x68,
0xd3,0x18,
0x18,0x1c,
0xbd,0x46,
0x02,0xb0,
0x80,0xbd
};
int main() {
LPC_SWM->PINASSIGN0 = 0xFFFFFF04UL;
serial.init(LPC_USART0, 115200);
SysTick_Config(12000000/1000); // 1ms ticks
int(*fnptr)(int a, int b) = (int(*)(int, int))fn;
//int(*fnptr)(int a, int b) = (int(*)(int, int))machine_code;
for (int a = 0; ; a++) {
int c = fnptr(a, 1000000);
printf("Hello world2 %d.\n", c);
delay(1000);
}
}
I compiled the code above, generating firmware.fn.elf and firmware.machinecode.elf by uncommenting //int(*fnptr)(int a, int b) = (int(*)(int, int))machine_code; (and commenting-out the line above).
The first code (fn) worked, the second code (machine_code) crashed.
fn's text and the code at machine_code are identical:
000002bc <fn>:
2bc: b580 push {r7, lr}
2be: b082 sub sp, #8
2c0: af00 add r7, sp, #0
2c2: 6078 str r0, [r7, #4]
2c4: 6039 str r1, [r7, #0]
2c6: 687a ldr r2, [r7, #4]
2c8: 683b ldr r3, [r7, #0]
2ca: 18d3 adds r3, r2, r3
2cc: 1c18 adds r0, r3, #0
2ce: 46bd mov sp, r7
2d0: b002 add sp, #8
2d2: bd80 pop {r7, pc}
000002d4 <machine_code>:
2d4: b580 push {r7, lr}
2d6: b082 sub sp, #8
2d8: af00 add r7, sp, #0
2da: 6078 str r0, [r7, #4]
2dc: 6039 str r1, [r7, #0]
2de: 687a ldr r2, [r7, #4]
2e0: 683b ldr r3, [r7, #0]
2e2: 18d3 adds r3, r2, r3
2e4: 1c18 adds r0, r3, #0
2e6: 46bd mov sp, r7
2e8: b002 add sp, #8
2ea: bd80 pop {r7, pc}
000002ec <main>:
...
The only difference in the calling code is the location of the code called:
$ diff firmware.fn.bin.xxd firmware.machine_code.bin.xxd
54c54
< 0000350: 0040 0640 e02e 0000 bd02 0000 4042 0f00 .#.#........#B..
---
> 0000350: 0040 0640 e02e 0000 d402 0000 4042 0f00 .#.#........#B..
The second address d402 is the address of the machine_code array.
Curiously, the first address bd02 is a little-endian odd number (d is odd in hex).
The address of fn is 02bc (bc02 in big endian), so the pointer to fn is not the address of fn, but the address of fn plus one (or with the low bit set).
Changing the code to:
...
int main() {
LPC_SWM->PINASSIGN0 = 0xFFFFFF04UL;
serial.init(LPC_USART0, 115200);
SysTick_Config(12000000/1000); // 1ms ticks
//int(*fnptr)(int a, int b) = (int(*)(int, int))fn;
int machine_code_addr_low_bit_set = (int)machine_code | 1;
int(*fnptr)(int a, int b) = (int(*)(int, int))machine_code_addr_low_bit_set;
for (int a = 0; ; a++) {
int c = fnptr(a, 1000000);
printf("Hello world2 %d.\n", c);
delay(1000);
}
}
Makes it work.
Googling, I found:
The mechanism for switching makes use of the fact that all instructions must be (at least) halfword-aligned, which means that bit[0] of the branch target address is redundant. Therefore this bit can be re-used to indicate the target instruction set at that address. Bit[0] cleared to 0 means ARM and bit[0] set to 1 means Thumb.
on http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka12545.html
tl;dr
You need to set the low bit on function pointers when executing data as code on ARM Thumb.

Resources