I have the following C function:
int sum_arr(int b[], int size){
int counter = size-1;
int res = 0;
while(counter >= 0){
res = res + b[counter];
counter = counter - 1;
}
return res;
}
From which I generated the following assembly code using:
gcc -Og -S file.c
Out came the following assembly code (I have included the parts of interest only):
sum_arr:
.LFB41:
.cfi_startproc
subl $1, %esi
movl $0, %eax
jmp .L2
.L3:
movslq %esi, %rdx
addl (%rdi,%rdx,4), %eax
subl $1, %esi
.L2:
testl %esi, %esi
jns .L3
rep ret
.cfi_endproc
I am having some trouble with .L3. The way I understand it is that it starts off by moving the int counter from a 32 bit register %esi into a 64 bit register %rdx. Then I don't understand the following line:
addl (%rdi,%rdx,4), %eax
in particluar the (%rdi,%rdx,4) part, which gets added to the value in the %eax register.
And on the last line it decrements the counter with 1.
Could someone help me out with that part?
.L3:
movslq %esi, %rdx /* sign extend counter<%esi> to 64bit %rdx */
addl (%rdi,%rdx,4), %eax /* res<%eax> += b<%rdi>[counter<%rdx>]; */
subl $1, %esi /* counter<%esi> -= 1 */
.L2:
testl %esi, %esi /* do counter<%esi> & counter<%esi> */
jns .L3 /* if result is no 0, jump to L3 */
Basically addl (%rdi,%rdx,4), %eax is where you access the array (%rdi) with the index of counter (%rdx) and add the value of the element to res (%eax), the 4 is just the multiply of counter (%rdx) for the memory access as each address in the int array consume 4 bytes in memory in your system.
The line basically says res += MEMORY[addrssOf(b) + counter*4]
BTW, I believe you want to check that size > 0 before line int counter = size-1;, and also as P__J__ mentioned in his answer, your res can overflow as it have the same type of each element in the array you summing.
in this form it is easier to understand:
sum_arr:
sub esi, 1
js .L4
movsx rsi, esi
mov eax, 0
.L3:
add eax, DWORD PTR [rdi+rsi*4]
sub rsi, 1
test esi, esi
jns .L3
ret
.L4:
mov eax, 0
ret
Two remarks:
your integer is very likely to overflow so you should use long long as temporary & return value. It can be shortened as well
long long sum_arr(const int *b, size_t size){
long long res = 0;
while(size--){
res = res + *b++;
}
return res;
}
Related
I'm trying to translate the following program to x86 assembly ( AT&T ).
#include <stdio.h>
int main()
{
int n = 123;
int reverse = 0;
while (n != 0)
{
reverse = reverse * 10;
reverse = reverse + n%10;
n = n/10;
}
printf("%d\n", reverse);
return 0;
}
It's supposed to print 321.
However, with the code below, I'm getting a 0 instead.
Can anybody give me a clue about what I'm doing wrong here?
( I pasted just the relevant section below. I'm sure that initialization and printing are working fine. You can see the whole thing here)
movl $123, %esi # int n
movl $0, %edi # int reverse
movl $10, %ebx # divisor
L1: # while n != 0
cmpl $0, %esi
je L2
# reverse = reverse * 10
imul $10, %edi
# reverse = reverse + n % 10
movl $0, %edx
movl %edi, %eax
idivl %ebx
addl %edx, %edi
# n = n / 10
movl %esi, %eax
movl $0, %edx
idivl %ebx
movl %eax, %esi
jmp L1
L2: # end while
movl %edi, %eax
Maybe I'm not yet perfectly understanding what the idivl command is supposed to do. I understand that it divides %edx:%eax by %ebx and stores the quotient in %eax and the remainder in %edx.
# reverse = reverse + n % 10
movl $0, %edx
movl %edi, %eax ; <--- here
%edi is not n, according to the comments above:
movl $123, %esi # int n
So, it should be using %esi, i.e. movl %esi, %eax.
sometimes it is good to see what the compiler generates
int reverse(int x)
{
int r = 0;
while (x != 0)
{
r = r * 10;
r = r + x%10;
x = x/10;
}
return r;
}
and shortest version:
reverse:
xor eax, eax
mov esi, 10
.L2:
test edi, edi
je .L5
imul ecx, eax, 10
mov eax, edi
cdq
idiv esi
mov edi, eax
lea eax, [rdx+rcx]
jmp .L2
.L5:
ret
or the fastest:
reverse:
xor eax, eax
test edi, edi
je .L4
mov esi, 1717986919
.L3:
lea ecx, [rax+rax*4]
mov eax, edi
imul esi
mov eax, edi
sar eax, 31
sar edx, 2
sub edx, eax
lea eax, [rdx+rdx*4]
add eax, eax
sub edi, eax
test edx, edx
lea eax, [rdi+rcx*2]
mov edi, edx
jne .L3
rep ret
.L4:
rep ret
as you see the compilers same good/better than the 99.99% of the coders
Hi I have a function in C that returns the max of a set of numbers in an array. I need to convert it into assembly and make it callable from C. nums is the array in which all the numbers are stored. len is the length of the array that was passed. The other variables that I made are local variables
.global max
.text
.equ word_size, 4
max:
#prologue
push %ebp
movl %esp, %ebp
.equ nums, 2*word_size
.equ len, 3*word_size
.equ cur_max, 4*word_size
.equ index, 5*word_size
#eax is index
#short cur_max = nums[0]
movl cur_max(%ebp), %eax
movl $0, %ebx
mov nums(%ebp), %edx
leal (%edx, %ebx, word_size), %edx
mov %edx, cur_max(%ebp)
#for(index = 1; index < len;index++)
mov index(%ebp), %eax
mov len(%ebp), %ebx
mov nums(%ebp), %ecx
mov cur_max(%ebp), %edx
mov $1, %eax
for_loop_begin:
cmp %ebx, %eax
jl loop_begin
jmp for_loop_end
loop_begin:
if_loop:
leal (%ecx,%eax,word_size), %esi #nums[i] = esi
cmp %edx,%esi
jg in_if_loop
in_if_loop_end:
inc %eax
jmp for_loop_begin
for_loop_end:
mov %edx,%eax
ret
in_if_loop:
mov %esi,%edx
movl %ebp, %esp
pop %ebp
jmp in_if_loop_end
This is the C code
short max(short* nums, int len){
int index;
short cur_max = nums[0];
for( index = 1; index < len; index++)
if( nums[index] > cur_max)
cur_max = nums[index];
return cur_max;
}
I'm trying to convert the following optimised assembly code to C code but keeping as close to the assembly code as possible. I have no idea how to change the movsbl call to C code. My understanding is that it moves a byte with a zero extension into a 32bit register. I have included comments as to what I believe to be happening in the assembly code.
file "my_sieve.c"
.text
.p2align 4,,15
.globl my_sieve
.type my_sieve, #function
my_sieve:
pushl %ebp
movl $4, %eax #eax = 4
pushl %edi
movl $1, %ebp #ebp = 1
pushl %esi
movl $2, %esi #esi = 2
pushl %ebx
movl 24(%esp), %edi #edi = max
cmpl $3, %edi #if edi > 3
jg .L9 #go to L9
jmp .L1 #otherwise go to L1
.p2align 4,,7
.p2align 3
.L6:
addl $1, %esi #esi + 1
movl %esi, %eax #eax = esi
imull %esi, %eax #eax*esi (i*i)
cmpl %edi, %eax #if i>max
jg .L1 #go to L1
.L9:
movl 20(%esp), %ebx #ebx = composite
movl %esi, %edx #edx = esi
sarl $3, %edx #shift edx right 3 bits
movsbl (%ebx,%edx), %ecx #??
movl %esi, %edx #edx = esi
andl $7, %edx #bit-wise AND between 7 & edx, stores result in edx
btl %edx, %ecx #copies edx to ecx
jc .L6 #jump to L6 if carry
.p2align 4,,7
.p2align 3
.L7:
movl %eax, %ecx #ecx = eax
movl %ebp, %ebx #ebx = ebp
andl $7, %ecx #bit-wise AND between 7 & ecx, stores result in ecx
movl %eax, %edx #edx = eax
sall %cl, %ebx #left shift ebx the number of bits held in cl
addl %esi, %eax #eax + esi
movl %ebx, %ecx #ecx = ebx
movl 20(%esp), %ebx #ebx = composite
sarl $3, %edx #sign preserving right shift edx 3 bits
orb %cl, (%ebx,%edx) #8-bit logical OR
cmpl %eax, %edi #compare edi & eax
jge .L7 #jump if greater or equal
jmp .L6 #otherwise jump to L6
.p2align 4,,7
.p2align 3
.L1:
popl %ebx
popl %esi
popl %edi
popl %ebp
ret #return
.size my_sieve, .-my_sieve
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",#progbits
And my incomplete attempt at creating C code.
I need help to fill in the blanks or someone to tell me what I'm attempting is completely wrong.
void my_sieve(char *composite, int max){
long ebp, eax, edi, esi, edi, ecx, edx;
eax = 4;
ebp = 1;
esi = 2;
edi = max;
if(edi > 3)
goto L9;
else
goto L1;
L6:
esi += 1;
eax = esi;
eax = eax*esi;
if(eax > edi)
goto L1;
L9:
ebx = composite;
edx = esi;
//right shift
edx = edx >> 3;
//movsbl
edx = esi;
//bit-wise
edx = edx & 7;
edx = ecx;
if(carry)
goto L6;
L7:
ecx = eax;
ebx = ebp;
//bit-wise
ecs = ecx & 7;
edx = eax;
//left shift
ebx = ebx & 0xFF;
eax += esi;
ecx = ebx;
ebx = composite;
//right shift
edx = edx >> 3;
//8 bit logical OR
if(edi >= eax)
goto L7;
else
goto L6;
L1:
return;
}
I think (int)((int8_t)value) would do it. Casting between unsigned and signed types of the same size has no effect on the bit pattern. Casting a smaller signed type to a larger one causes a sign extension.
I'm having so much difficulty with a question I was assigned for homework. I have the following C code and the subsequent assembly:
int foo(int n, int A[X(n)][Y(n)], int j){
int i;
int result = 0;
for (i = 0; i < X(n); i++)
result += A[i][j];
return result;
}
movl 8(%ebp), %eax
leal (%eax,%eax), %edx
leal (%edx,%eax), %ecx
movl %edx, %ebx
leal 1(%edx), %eax
movl $0, %edx
testl %eax, %eax
jle .L3
leal 0(,%ecx,4), %esi
movl 16(%ebp), %edx
movl 12(%ebp), %ecx
leal (%ecx,%edx,4), %eax
movl $0, %edx
movl $1, %ecx
addl $2, %ebx
.L4:
addl (%eax), %edx
addl $1, %ecx
addl %esi, %eax
cmpl %ebx, %ecx
jne .L4
.L3:
movl %edx, %eax
I need to find out the definitions of X and Y. I believe that n is initially stored in eax, and then 2n is stored in edx and 3n in ecx. So I think esi would equal 3n * 4. Also, because result is initially stored as movl $0, %edx and the following lines are incremented by one I'm thinking that X would be equal to #define X(n + 1). Also, I believe addl %esi, %eax would be Y. So since esi = %ecx * 4 does Y = 4n? However, this is where I begin to get severely confused. Thank's all.
Cute exercise.
The declaration seems to define A as a C99 variable-length-array. Incidentally these have exceedingly poor compiler support and are optional in C11.
The inner Y(n) dimension may then be inferred from the array stride across loop iterations, where EAX is the pointer and ESI the pitch, and appears to be defined as n*3. As for X(n) we may infer it from the loop entry condition when i = 0, and it appears to expand as N*2+1.
#define X(n) ((n)*2+1)
#define Y(n) ((n)*3)
Annotated assembly:
_foo:
;Prologue (assumed)
push ebp
mov ebp,esp
;Pre-scale N
mov eax,[ebp+8]
lea edx,[eax+eax]
lea ecx,[edx+eax] ;ECX = N*3
mov ebx,edx ;EBX = N*2
;Bail out earily if X(n) <= 0
lea eax,[edx+1] ;EAX = N*2+1
mov edx,0
test eax,eax ;(OF=0)
jle ##end ;Proceed if N*2+1 > 0
;Prepare loop counters
lea esi,[ecx*4] ;ESI = N*3*sizeof int, array stride
mov edx,[ebp+16] ;EDX = j
mov ecx,[ebp+12]
lea eax,[ecx+edx*4] ;EAX = &A[0][j]
mov edx,0 ;EDX = 0, accumulator
mov ecx,1 ;ECX = 1, loop counter
add ebx,2 ;EBX = N*2+2
;Step through the loop
##loop:
add edx,[eax] ;EDX += A[i][j]
add ecx,1 ;Increment loop counter
add eax,esi ;++A
cmp ecx,ebx
jne ##loop ;[1..N*2+2) <=> [0..N*2+1)
##end:
;Epilogue
mov eax,edx ;Return the sum
pop ebp
ret
So I'm trying to create a factorial function in assembler
In c:
#include<stdio.h>
int fat (int n)
{
if (n==0) return 1;
else return n*fat(n-1);
}
int main (void){
printf("%d\n", fat(4));
return 0;
}
In Assembly:
.text
.global fat
fat:push %ebp
mov %esp, %ebp
movl $1,%eax
movl 4(%ebp),%edx
LOOP:cmp $0,%edx
je FIM
sub $1,%edx
push %edx
call fat
imul %edx,%eax
FIM:mov %ebp, %esp
pop %ebp
ret
I keep getting the segmentation fault error and I don't know why...can someone help me?
The offset is probably wrong in this line:
movl 4(%ebp),%edx
The stack has the previous value of %ebp and the return address already, so your offset is going to have to be more than 4.
I recommend stepping through the assembly code with the debugger, and make sure that all the register values are exactly what you expect them to be. You will also have problems with the %edx register across calls unless you save and restore its value, too.
fat:push %ebp
mov %esp, %ebp
movl $1,%eax
movl 4(%ebp),%edx /* Must be 8(%ebp) because of the return address! */
LOOP:cmp $0,%edx
je FIM
sub $1,%edx
push %edx
call fat /* The call to fat() just trashed edx, oops. Gotta save/restore it! */
imul %edx,%eax /* The result will be in edx, but you need to return it in eax! */
/* Why isn't "push %edx" compensated here with "pop" or "addl $4,%esp"??? */
FIM:mov %ebp, %esp
pop %ebp
ret
Rewriting your C function, assemblyish style, may be helpful:
int fat (int n)
{
int eax, edx, savedEdx;
eax = 1;
edx = n; /* n = %8(%ebp) */
if (edx == 0)
goto done;
savedEdx = edx; /* can do this with pushl %edx */
--edx;
eax = fat(edx); /* pushl %edx; call fat; addl $4, %esp or popl %edx */
edx = savedEdx; /* popl %edx */
eax *= edx; /* can do this with imul %edx */
done:
return eax;
}