Trying to translate C program to reverse a number into x86 assembly - c

I'm trying to translate the following program to x86 assembly ( AT&T ).
#include <stdio.h>
int main()
{
int n = 123;
int reverse = 0;
while (n != 0)
{
reverse = reverse * 10;
reverse = reverse + n%10;
n = n/10;
}
printf("%d\n", reverse);
return 0;
}
It's supposed to print 321.
However, with the code below, I'm getting a 0 instead.
Can anybody give me a clue about what I'm doing wrong here?
( I pasted just the relevant section below. I'm sure that initialization and printing are working fine. You can see the whole thing here)
movl $123, %esi # int n
movl $0, %edi # int reverse
movl $10, %ebx # divisor
L1: # while n != 0
cmpl $0, %esi
je L2
# reverse = reverse * 10
imul $10, %edi
# reverse = reverse + n % 10
movl $0, %edx
movl %edi, %eax
idivl %ebx
addl %edx, %edi
# n = n / 10
movl %esi, %eax
movl $0, %edx
idivl %ebx
movl %eax, %esi
jmp L1
L2: # end while
movl %edi, %eax
Maybe I'm not yet perfectly understanding what the idivl command is supposed to do. I understand that it divides %edx:%eax by %ebx and stores the quotient in %eax and the remainder in %edx.

# reverse = reverse + n % 10
movl $0, %edx
movl %edi, %eax ; <--- here
%edi is not n, according to the comments above:
movl $123, %esi # int n
So, it should be using %esi, i.e. movl %esi, %eax.

sometimes it is good to see what the compiler generates
int reverse(int x)
{
int r = 0;
while (x != 0)
{
r = r * 10;
r = r + x%10;
x = x/10;
}
return r;
}
and shortest version:
reverse:
xor eax, eax
mov esi, 10
.L2:
test edi, edi
je .L5
imul ecx, eax, 10
mov eax, edi
cdq
idiv esi
mov edi, eax
lea eax, [rdx+rcx]
jmp .L2
.L5:
ret
or the fastest:
reverse:
xor eax, eax
test edi, edi
je .L4
mov esi, 1717986919
.L3:
lea ecx, [rax+rax*4]
mov eax, edi
imul esi
mov eax, edi
sar eax, 31
sar edx, 2
sub edx, eax
lea eax, [rdx+rdx*4]
add eax, eax
sub edi, eax
test edx, edx
lea eax, [rdi+rcx*2]
mov edi, edx
jne .L3
rep ret
.L4:
rep ret
as you see the compilers same good/better than the 99.99% of the coders

Related

Explanation of array accessing in X86 assembly

I have the following C function:
int sum_arr(int b[], int size){
int counter = size-1;
int res = 0;
while(counter >= 0){
res = res + b[counter];
counter = counter - 1;
}
return res;
}
From which I generated the following assembly code using:
gcc -Og -S file.c
Out came the following assembly code (I have included the parts of interest only):
sum_arr:
.LFB41:
.cfi_startproc
subl $1, %esi
movl $0, %eax
jmp .L2
.L3:
movslq %esi, %rdx
addl (%rdi,%rdx,4), %eax
subl $1, %esi
.L2:
testl %esi, %esi
jns .L3
rep ret
.cfi_endproc
I am having some trouble with .L3. The way I understand it is that it starts off by moving the int counter from a 32 bit register %esi into a 64 bit register %rdx. Then I don't understand the following line:
addl (%rdi,%rdx,4), %eax
in particluar the (%rdi,%rdx,4) part, which gets added to the value in the %eax register.
And on the last line it decrements the counter with 1.
Could someone help me out with that part?
.L3:
movslq %esi, %rdx /* sign extend counter<%esi> to 64bit %rdx */
addl (%rdi,%rdx,4), %eax /* res<%eax> += b<%rdi>[counter<%rdx>]; */
subl $1, %esi /* counter<%esi> -= 1 */
.L2:
testl %esi, %esi /* do counter<%esi> & counter<%esi> */
jns .L3 /* if result is no 0, jump to L3 */
Basically addl (%rdi,%rdx,4), %eax is where you access the array (%rdi) with the index of counter (%rdx) and add the value of the element to res (%eax), the 4 is just the multiply of counter (%rdx) for the memory access as each address in the int array consume 4 bytes in memory in your system.
The line basically says res += MEMORY[addrssOf(b) + counter*4]
BTW, I believe you want to check that size > 0 before line int counter = size-1;, and also as P__J__ mentioned in his answer, your res can overflow as it have the same type of each element in the array you summing.
in this form it is easier to understand:
sum_arr:
sub esi, 1
js .L4
movsx rsi, esi
mov eax, 0
.L3:
add eax, DWORD PTR [rdi+rsi*4]
sub rsi, 1
test esi, esi
jns .L3
ret
.L4:
mov eax, 0
ret
Two remarks:
your integer is very likely to overflow so you should use long long as temporary & return value. It can be shortened as well
long long sum_arr(const int *b, size_t size){
long long res = 0;
while(size--){
res = res + *b++;
}
return res;
}

Converting my function to assembly from C

Hi I have a function in C that returns the max of a set of numbers in an array. I need to convert it into assembly and make it callable from C. nums is the array in which all the numbers are stored. len is the length of the array that was passed. The other variables that I made are local variables
.global max
.text
.equ word_size, 4
max:
#prologue
push %ebp
movl %esp, %ebp
.equ nums, 2*word_size
.equ len, 3*word_size
.equ cur_max, 4*word_size
.equ index, 5*word_size
#eax is index
#short cur_max = nums[0]
movl cur_max(%ebp), %eax
movl $0, %ebx
mov nums(%ebp), %edx
leal (%edx, %ebx, word_size), %edx
mov %edx, cur_max(%ebp)
#for(index = 1; index < len;index++)
mov index(%ebp), %eax
mov len(%ebp), %ebx
mov nums(%ebp), %ecx
mov cur_max(%ebp), %edx
mov $1, %eax
for_loop_begin:
cmp %ebx, %eax
jl loop_begin
jmp for_loop_end
loop_begin:
if_loop:
leal (%ecx,%eax,word_size), %esi #nums[i] = esi
cmp %edx,%esi
jg in_if_loop
in_if_loop_end:
inc %eax
jmp for_loop_begin
for_loop_end:
mov %edx,%eax
ret
in_if_loop:
mov %esi,%edx
movl %ebp, %esp
pop %ebp
jmp in_if_loop_end
This is the C code
short max(short* nums, int len){
int index;
short cur_max = nums[0];
for( index = 1; index < len; index++)
if( nums[index] > cur_max)
cur_max = nums[index];
return cur_max;
}

Assembly movsbl to C code

I'm trying to convert the following optimised assembly code to C code but keeping as close to the assembly code as possible. I have no idea how to change the movsbl call to C code. My understanding is that it moves a byte with a zero extension into a 32bit register. I have included comments as to what I believe to be happening in the assembly code.
file "my_sieve.c"
.text
.p2align 4,,15
.globl my_sieve
.type my_sieve, #function
my_sieve:
pushl %ebp
movl $4, %eax #eax = 4
pushl %edi
movl $1, %ebp #ebp = 1
pushl %esi
movl $2, %esi #esi = 2
pushl %ebx
movl 24(%esp), %edi #edi = max
cmpl $3, %edi #if edi > 3
jg .L9 #go to L9
jmp .L1 #otherwise go to L1
.p2align 4,,7
.p2align 3
.L6:
addl $1, %esi #esi + 1
movl %esi, %eax #eax = esi
imull %esi, %eax #eax*esi (i*i)
cmpl %edi, %eax #if i>max
jg .L1 #go to L1
.L9:
movl 20(%esp), %ebx #ebx = composite
movl %esi, %edx #edx = esi
sarl $3, %edx #shift edx right 3 bits
movsbl (%ebx,%edx), %ecx #??
movl %esi, %edx #edx = esi
andl $7, %edx #bit-wise AND between 7 & edx, stores result in edx
btl %edx, %ecx #copies edx to ecx
jc .L6 #jump to L6 if carry
.p2align 4,,7
.p2align 3
.L7:
movl %eax, %ecx #ecx = eax
movl %ebp, %ebx #ebx = ebp
andl $7, %ecx #bit-wise AND between 7 & ecx, stores result in ecx
movl %eax, %edx #edx = eax
sall %cl, %ebx #left shift ebx the number of bits held in cl
addl %esi, %eax #eax + esi
movl %ebx, %ecx #ecx = ebx
movl 20(%esp), %ebx #ebx = composite
sarl $3, %edx #sign preserving right shift edx 3 bits
orb %cl, (%ebx,%edx) #8-bit logical OR
cmpl %eax, %edi #compare edi & eax
jge .L7 #jump if greater or equal
jmp .L6 #otherwise jump to L6
.p2align 4,,7
.p2align 3
.L1:
popl %ebx
popl %esi
popl %edi
popl %ebp
ret #return
.size my_sieve, .-my_sieve
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",#progbits
And my incomplete attempt at creating C code.
I need help to fill in the blanks or someone to tell me what I'm attempting is completely wrong.
void my_sieve(char *composite, int max){
long ebp, eax, edi, esi, edi, ecx, edx;
eax = 4;
ebp = 1;
esi = 2;
edi = max;
if(edi > 3)
goto L9;
else
goto L1;
L6:
esi += 1;
eax = esi;
eax = eax*esi;
if(eax > edi)
goto L1;
L9:
ebx = composite;
edx = esi;
//right shift
edx = edx >> 3;
//movsbl
edx = esi;
//bit-wise
edx = edx & 7;
edx = ecx;
if(carry)
goto L6;
L7:
ecx = eax;
ebx = ebp;
//bit-wise
ecs = ecx & 7;
edx = eax;
//left shift
ebx = ebx & 0xFF;
eax += esi;
ecx = ebx;
ebx = composite;
//right shift
edx = edx >> 3;
//8 bit logical OR
if(edi >= eax)
goto L7;
else
goto L6;
L1:
return;
}
I think (int)((int8_t)value) would do it. Casting between unsigned and signed types of the same size has no effect on the bit pattern. Casting a smaller signed type to a larger one causes a sign extension.

How can I figure the definitions of C macro expressions in the following assembly?

I'm having so much difficulty with a question I was assigned for homework. I have the following C code and the subsequent assembly:
int foo(int n, int A[X(n)][Y(n)], int j){
int i;
int result = 0;
for (i = 0; i < X(n); i++)
result += A[i][j];
return result;
}
movl 8(%ebp), %eax
leal (%eax,%eax), %edx
leal (%edx,%eax), %ecx
movl %edx, %ebx
leal 1(%edx), %eax
movl $0, %edx
testl %eax, %eax
jle .L3
leal 0(,%ecx,4), %esi
movl 16(%ebp), %edx
movl 12(%ebp), %ecx
leal (%ecx,%edx,4), %eax
movl $0, %edx
movl $1, %ecx
addl $2, %ebx
.L4:
addl (%eax), %edx
addl $1, %ecx
addl %esi, %eax
cmpl %ebx, %ecx
jne .L4
.L3:
movl %edx, %eax
I need to find out the definitions of X and Y. I believe that n is initially stored in eax, and then 2n is stored in edx and 3n in ecx. So I think esi would equal 3n * 4. Also, because result is initially stored as movl $0, %edx and the following lines are incremented by one I'm thinking that X would be equal to #define X(n + 1). Also, I believe addl %esi, %eax would be Y. So since esi = %ecx * 4 does Y = 4n? However, this is where I begin to get severely confused. Thank's all.
Cute exercise.
The declaration seems to define A as a C99 variable-length-array. Incidentally these have exceedingly poor compiler support and are optional in C11.
The inner Y(n) dimension may then be inferred from the array stride across loop iterations, where EAX is the pointer and ESI the pitch, and appears to be defined as n*3. As for X(n) we may infer it from the loop entry condition when i = 0, and it appears to expand as N*2+1.
#define X(n) ((n)*2+1)
#define Y(n) ((n)*3)
Annotated assembly:
_foo:
;Prologue (assumed)
push ebp
mov ebp,esp
;Pre-scale N
mov eax,[ebp+8]
lea edx,[eax+eax]
lea ecx,[edx+eax] ;ECX = N*3
mov ebx,edx ;EBX = N*2
;Bail out earily if X(n) <= 0
lea eax,[edx+1] ;EAX = N*2+1
mov edx,0
test eax,eax ;(OF=0)
jle ##end ;Proceed if N*2+1 > 0
;Prepare loop counters
lea esi,[ecx*4] ;ESI = N*3*sizeof int, array stride
mov edx,[ebp+16] ;EDX = j
mov ecx,[ebp+12]
lea eax,[ecx+edx*4] ;EAX = &A[0][j]
mov edx,0 ;EDX = 0, accumulator
mov ecx,1 ;ECX = 1, loop counter
add ebx,2 ;EBX = N*2+2
;Step through the loop
##loop:
add edx,[eax] ;EDX += A[i][j]
add ecx,1 ;Increment loop counter
add eax,esi ;++A
cmp ecx,ebx
jne ##loop ;[1..N*2+2) <=> [0..N*2+1)
##end:
;Epilogue
mov eax,edx ;Return the sum
pop ebp
ret

How do i use arrays in x86 assembly code to replace letters of a word?

Hey everyone so i am working on an assignment involving arrays in assembly. I need to have the user enter a number, then clear the screen. After that a second player tries to guess the word. I did all that but i also have to display a hint everytime the second player tries to guess. For example if i entered the word hello the program displays h!l!o when the second player tries to guess. I have tried it but cant get it to work. Any help would be much appreciated, thank you.
.data
chose:
.ascii "Enter the Secret Word\n"
chose_length:
.int 22
lets_play_response:
.ascii "Try to Guess the Word Entered\n"
l_p_response_length:
.int 30
wrong_guess:
.ascii "Incorrect Guess, Try Again\n"
wrong_guess_length:
.int 27
correct:
.ascii "Correct Guess, Good Job\n"
correct_length:
.int 24
Screen_Clearer:
.ascii "\x1B[H\x1B[2J"
Screen_Clearer_length:
.int 11
letter:
.space 15
guess:
.space 15
.text
.global _start
_start:
mov $chose, %ecx
mov chose_length, %edx
mov $4, %eax
mov $1, %ebx
int $0x80
mov $letter, %ecx
mov $15, %edx
mov $3, %eax
mov $0, %ebx
int $0x80
call Screen_Clear
mov $lets_play_response, %ecx
mov l_p_response_length, %edx
mov $4, %eax
mov $1, %ebx
int $0x80
# Method to Print Word With Every Second Letter Replaced With !
# This is the area with the problems everything else works
mov $0, %edi
Loop:
cmp $4, %edi
jg End
mov $33, letter (%edi)
add $1, %edi
jmp Loop
End:
mov $letter, %ecx
mov $4, %eax
mov $1, %ebx
ret
# End of Method
call GuessLoop
mov $1, %eax
int $0x80
GuessLoop:
mov $guess, %ecx
mov $15, %edx
mov $3, %eax
mov $0, %ebx
int $0x80
mov guess, %ecx
mov letter, %edx
cmp %ecx, %edx
jne Incorrect
je Correct
Incorrect:
mov $wrong_guess, %ecx
mov wrong_guess_length, %edx
mov $4, %eax
mov $1, %ebx
int $0x80
jmp GuessLoop
Correct:
mov $correct, %ecx
mov correct_length, %edx
mov $4, %eax
mov $1, %ebx
int $0x80
ret
# Method That Clears the Screen #
Screen_Clear:
mov $Screen_Clearer, %ecx
mov Screen_Clearer_length, %edx
mov $4, %eax
mov $1, %ebx
int $0x80
ret
# End of Method to Clear Screen
If you are going to use Assembly, you will need to learn about Addressing Modes, Addressing Modes on Google
In this sample, I use the [Base + Index] mode. You will need one more variable to hold your hint string. It is not AT&T syntax, but it will give you the idea
%define sys_exit 1
%define sys_write 4
%define sys_read 3
%define stdin 0
%define stdout 1
SECTION .bss
hint resb 15
letter resb 15
leter_len equ $ - letter
SECTION .text
global _start
_start:
mov ecx, letter
mov edx, leter_len
mov ebx, stdin
mov eax, sys_read
int 80H
mov esi, hint
mov edi, letter
xor ecx, ecx
dec eax
.MakeHint:
mov dl, byte [edi + ecx] ; get byte from pointer + index
cmp dl, 10 ; is it linefeed
je .ShowIt
mov byte[esi + ecx], dl ; move byte into hint buffer
inc ecx ; increase index
cmp ecx, eax ; at the end?
je .ShowIt
mov byte[esi + ecx], 33 ; move ! to next index
inc ecx ; increase index
cmp ecx, eax ; at end?
jne .MakeHint
.ShowIt:
mov ecx, hint
mov edx, leter_len
mov ebx, stdout
mov eax, sys_write
int 80H
mov eax, sys_exit
xor ebx, ebx
int 80h

Resources