I have the following recursive function to count all the nodes having value 20, in a circular doubly linked list. I need to convert this to tail recursive function to prevent safety issues. Please help me with the same. Thanks
int count(node *start)
{
return count_helper(start, start);
}
int count_helper(node *current, node *start)
{
int c;
c = 0;
if(current == NULL)
return 0;
if((current->roll_no) == 20)
c = 1;
if(current->next == start) return c;
return (c + count_helper(current->next, start));
}
In order to take advantage of tail recursion, the recursive call simply has to be the last thing performed. Currently, the only thing standing in the way of this goal is an addition. So, to transform the function, that addition has to be moved around. A common way to accomplish this is by passing the variable c as a parameter to the recursive helper function, as so:
int count(node *start)
{
return count_helper(start,start,0);
}
int count_helper(node *current, node *start, int c)
{
if(current == NULL)
return c;
if((current->roll_no) == 20)
c+=1;
if(current->next == start)
return c;
return count_helper(current->next, start,c);
}
This unrolls as follows (using gcc 4.6.1, as produced by gcc -S -O2):
count_helper:
.LFB23:
.cfi_startproc
pushl %ebx
.cfi_def_cfa_offset 8
.cfi_offset 3, -8
movl 8(%esp), %edx
movl 12(%esp), %ebx
movl 16(%esp), %eax
testl %edx, %edx
jne .L15
jmp .L10
.p2align 4,,7
.p2align 3
.L14:
testl %edx, %edx
je .L10
.L15:
xorl %ecx, %ecx
cmpl $20, 4(%edx)
movl (%edx), %edx
sete %cl
addl %ecx, %eax
cmpl %ebx, %edx
jne .L14 # <-- this is the key line right here
.L10:
popl %ebx
.cfi_def_cfa_offset 4
.cfi_restore 3
ret
.cfi_endproc
Compare this to your original (done without -O2, as apparently the compiler finds a way to make your original tail recursive as well, although in the process it mucks it up so much that I can barely read it):
count_helper:
.LFB1:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $40, %esp
movl $0, -12(%ebp)
cmpl $0, 8(%ebp)
jne .L3
movl $0, %eax
jmp .L4
.L3:
movl 8(%ebp), %eax
movl 4(%eax), %eax
cmpl $20, %eax
jne .L5
movl $1, -12(%ebp)
.L5:
movl 8(%ebp), %eax
movl (%eax), %eax
cmpl 12(%ebp), %eax
jne .L6
movl -12(%ebp), %eax
jmp .L4
.L6:
movl 8(%ebp), %eax
movl (%eax), %eax
movl 12(%ebp), %edx
movl %edx, 4(%esp)
movl %eax, (%esp)
call count_helper # <-- this is the key line right here
addl -12(%ebp), %eax
.L4:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
Related
So I'm pretty much a noob in IA32 assembly language. I tried compiling this C function into IA32 assembly (-mpreferred-stack-boundary=2):
__attribute__((cdecl))
int odd_sum(int a[], int n, int sum) {
if (n == 0) return sum;
else if ((a[n-1] % 2) == 0)
return odd_sum(a, n-1, sum);
else return odd_sum(a, n-1, sum + a[n-1]);
}
and the GCC outputs this:
.file "test.c"
.text
.globl _odd_sum
.def _odd_sum; .scl 2; .type 32; .endef
_odd_sum:
LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $12, %esp
cmpl $0, 12(%ebp)
jne L2
movl 16(%ebp), %eax
jmp L3
L2:
movl 12(%ebp), %eax
addl $1073741823, %eax
leal 0(,%eax,4), %edx
movl 8(%ebp), %eax
addl %edx, %eax
movl (%eax), %eax
andl $1, %eax
testl %eax, %eax
jne L4
movl 12(%ebp), %eax
leal -1(%eax), %edx
movl 16(%ebp), %eax
movl %eax, 8(%esp)
movl %edx, 4(%esp)
movl 8(%ebp), %eax
movl %eax, (%esp)
call _odd_sum
jmp L3
L4:
movl 12(%ebp), %eax
addl $1073741823, %eax
leal 0(,%eax,4), %edx
movl 8(%ebp), %eax
addl %edx, %eax
movl (%eax), %edx
movl 16(%ebp), %eax
addl %eax, %edx
movl 12(%ebp), %eax
subl $1, %eax
movl %edx, 8(%esp)
movl %eax, 4(%esp)
movl 8(%ebp), %eax
movl %eax, (%esp)
call _odd_sum
L3:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE0:
.ident "GCC: (MinGW.org GCC-8.2.0-3) 8.2.0"
What I am not able to comprehend are these 2 lines:
addl $1073741823, %eax
leal 0(,%eax,4), %edx
I understand those 2 lines should have something to do with the a[n-1], but I can't seem to be able to understand what exactly they do in the process. Can someone help me with this problem please?
It is just a fancy way of computing the offset into the array a[n-1].
1073741823 is 0x3fffffff. If n is 3, for example, it will add them and get 0x40000002. Then it multiplies by 4 with the second instruction, which results in 0x00000008, discarding the top bits.
So we are left with an offset of 8 bytes, which is exactly the offset (in bytes) that you need for a[n-1], i.e. a[2] (when the size of an int is 4 bytes).
To get a more understandable output with the -S flag:
create assembler code:
c++ -S -fverbose-asm -g -O2 (other optimizaton flags) test.cc -o test.s
create asm interlaced with source lines:
as -alhnd test.s > test.lst
I'm trying to translate from C to assembly two functions, one that sort an array of index and one that find the minimum number starting from an index. When I run the program it works , the new array is sorted but at the end it gives me this error
1 2 3 3 4 6 7
*** stack smashing detected ***: ./invidia terminated
Annullato (core dump creato)
this is my .c file
#include <stdio.h>
#include <stdlib.h>
int* sort(int* v, int dim);
int min(int v[], int dim, int t);
int main(){
int v[] = {1,3,2,4,6,7,3};
int dim = 7,i;
int *c = sort(v,dim);
for (i = 0; i<dim; i++)
printf("%d ", c[i]);
return 0;
}
/*int* sort(int* v, int dim){
int i,j=0,t;
int mino,temp;
for (t = 0; t<dim; t++){
for(i=t;i<dim;i++){
mino = min (v,dim,t); <- min number from index t
if (v[i]==mino){
temp = v[j];
v[j] = v[i];
v[i] = temp;
j++;
break;
}
}
}
return v;
}
int min(int v[], int dim, int t){
int i, min = 1000;
for (i=t; i<dim;i++)
if (v[i]<min)
min = v[i];
return min;
}
*/
and my .s file
.globl sort
sort: #v->ebx dim-> esi i->edi, j->ecx t->edx, mino->eax, temp ->ebp
pushl %ebx
pushl %esi
pushl %edi
pushl %ebp
subl $32, %esp
movl 52(%esp), %ebx
movl 56(%esp), %esi
movl $0, %edi
movl $0, %ecx
movl $0, %eax
movl $0, %edx
movl $0, %ebp
FOR1:
cmpl %esi, %edx
jge EXIT
movl %edx, %edi
FOR2:
cmpl %esi, %edi
jge EXIT2
movl %ebx, (%esp)
movl %esi, 4(%esp)
movl %edx, 8(%esp)
movl %ebx, 12(%esp)
movl %eax, 16(%esp)
movl %ecx, 20(%esp)
movl %edx, 24(%esp)
call min
movl 12(%esp), %ebx
movl 20(%esp), %ecx
movl 24(%esp), %edx
cmpl (%ebx, %edi, 4), %eax
jne PSEUDOEND
movl (%ebx, %ecx, 4), %ebp
movl %esi, 12(%esp)
movl (%ebx, %edi, 4), %esi
movl %esi, (%ebx, %ecx, 4)
movl %ebp, (%ebx, %edi, 4)
movl 12(%esp), %esi
incl %ecx
incl %edx
jmp FOR1
PSEUDOEND:
incl %edi
jmp FOR2
EXIT:
movl %ebx, %eax
popl %ebp
popl %edi
popl %esi
popl %ebx
addl $32, %esp
ret
EXIT2:
incl %edx
jmp FOR1
.globl min
min:
movl 4(%esp), %ecx
movl 8(%esp), %edx
movl 12(%esp), %ebx
movl $1000, %eax
FOR:
cmpl %edx, %ebx
jge END
cmpl %eax, (%ecx, %ebx, 4)
cmovl (%ecx, %ebx, 4), %eax
incl %ebx
jmp FOR
END:
ret
to compile I use
gcc -m32 filename.c filename.s -o filename
I don't think that the problem is in the assembly translation because it prints correctly the new array. What could be the problem?
I have generated two assembly files - one that is optimized, and one that is not. The assembly-language code generated with optimization on should be more efficient than the other assembly-language code. I am more interested in how the efficiency is achieved. To my understanding, in the non-optimized version there will always have to be an offset call to the register %rbp to find the address. In the optimized version, the addresses are being stored in the registers, so you don't have to rely and call on %rbp to find them.
Am I correct? And if so, would there ever be a time when the optimized version will not be advantageous? Thank you for your time.
Here is a function that converts from 42 GIF to CYMK.
void rgb2cmyk(int r, int g, int b, int ret[]) {
int c = 255 - r;
int m = 255 - g;
int y = 255 - b;
int k = (c < m) ? (c < y ? c : y) : (m < y ? m : y);
c -= k; m -= k; y -= k;
ret[0] = c; ret[1] = m; ret[2] = y; ret[3] = k;
}
Here is the assembly-language code that has not been optimized. Note I have made notes using ;; in the code.
No Opt:
.section __TEXT,__text,regular,pure_instructions
.globl _rgb2cmyk
.align 4, 0x90
_rgb2cmyk: ## #rgb2cmyk
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
;;initializing variable c, m, y
movl $255, %eax
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl %edx, -12(%rbp)
movq %rcx, -24(%rbp)
movl %eax, %edx
subl -4(%rbp), %edx
movl %edx, -28(%rbp)
movl %eax, %edx
subl -8(%rbp), %edx
movl %edx, -32(%rbp)
subl -12(%rbp), %eax
movl %eax, -36(%rbp)
movl -28(%rbp), %eax
;;compare
cmpl -32(%rbp), %eax
jge LBB0_5
## BB#1:
movl -28(%rbp), %eax
cmpl -36(%rbp), %eax
jge LBB0_3
## BB#2:
movl -28(%rbp), %eax
movl %eax, -44(%rbp) ## 4-byte Spill
jmp LBB0_4
LBB0_3:
movl -36(%rbp), %eax
movl %eax, -44(%rbp) ## 4-byte Spill
LBB0_4:
movl -44(%rbp), %eax ## 4-byte Reload
movl %eax, -48(%rbp) ## 4-byte Spill
jmp LBB0_9
LBB0_5:
movl -32(%rbp), %eax
cmpl -36(%rbp), %eax
jge LBB0_7
## BB#6:
movl -32(%rbp), %eax
movl %eax, -52(%rbp) ## 4-byte Spill
jmp LBB0_8
LBB0_7:
movl -36(%rbp), %eax
movl %eax, -52(%rbp) ## 4-byte Spill
LBB0_8:
movl -52(%rbp), %eax ## 4-byte Reload
movl %eax, -48(%rbp) ## 4-byte Spill
LBB0_9:
movl -48(%rbp), %eax ## 4-byte Reload
movl %eax, -40(%rbp)
movl -40(%rbp), %eax
movl -28(%rbp), %ecx
subl %eax, %ecx
movl %ecx, -28(%rbp)
movl -40(%rbp), %eax
movl -32(%rbp), %ecx
subl %eax, %ecx
movl %ecx, -32(%rbp)
movl -40(%rbp), %eax
movl -36(%rbp), %ecx
subl %eax, %ecx
movl %ecx, -36(%rbp)
movl -28(%rbp), %eax
movq -24(%rbp), %rdx
movl %eax, (%rdx)
movl -32(%rbp), %eax
movq -24(%rbp), %rdx
movl %eax, 4(%rdx)
movl -36(%rbp), %eax
movq -24(%rbp), %rdx
movl %eax, 8(%rdx)
movl -40(%rbp), %eax
movq -24(%rbp), %rdx
movl %eax, 12(%rdx)
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
Optimization:
.section __TEXT,__text,regular,pure_instructions
.globl _rgb2cmyk
.align 4, 0x90
_rgb2cmyk: ## #rgb2cmyk
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
movl $255, %r8d
movl $255, %eax
subl %edi, %eax
movl $255, %edi
subl %esi, %edi
subl %edx, %r8d
cmpl %edi, %eax ##;; compare m and c
jge LBB0_2
## BB#1: ;; c < m
cmpl %r8d, %eax ## compare y and c
movl %r8d, %edx
cmovlel %eax, %edx
jmp LBB0_3
LBB0_2: ##;; c >= m
cmpl %r8d, %edi ## compare y and m
movl %r8d, %edx
cmovlel %edi, %edx
LBB0_3:
subl %edx, %eax
subl %edx, %edi
subl %edx, %r8d
movl %eax, (%rcx)
movl %edi, 4(%rcx)
movl %r8d, 8(%rcx)
movl %edx, 12(%rcx)
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
Yes. The optimized version performs many fewer memory read operations by storing intermediate values in registers and not reloading them over and over.
You are using call wrong. It is a technical term that means to push a return address on the stack and branch to a new location for instructions. The term you mean is simply to use the register.
Can you think of a reason that longer, slower code is "better"?
I have this IA32 assembly language code I'm trying to convert into regular C code.
.globl fn
.type fn, #function
fn:
pushl %ebp #setup
movl $1, %eax #setup 1 is in A
movl %esp, %ebp #setup
movl 8(%ebp), %edx # pointer X is in D
cmpl $1, %edx # (*x > 1)
jle .L4
.L5:
imull %edx, %eax
subl $1, %edx
cmpl $1, %edx
jne .L5
.L4:
popl %ebp
ret
The trouble I'm having is deciding what type of comparison is going on. I don't get how the program gets to the L5 cache. L5 seems to be a loop since there's a comparison within it. I'm also unsure of what is being returned because it seems like most of the work is done is the %edx register, but doesn't go back to %eax for returning.
What I have so far:
int fn(int x)
{
}
It looks to me like it's computing a factorial. Ignoring the stack frame manipulation and such, we're left with:
movl $1, %eax #setup 1 is in A
Puts 1 into eax.
movl 8(%ebp), %edx # pointer X is in D
Retrieves a parameter into edx
imull %edx, %eax
Multiplies eax by edx, putting the result into eax.
subl $1, %edx
cmpl $1, %edx
jne .L5
Decrements edx and repeats if edx != 1.
In other words, this is roughly equivalent to:
unsigned fact(unsigned input) {
unsigned retval = 1;
for ( ; input != 1; --input)
retval *= input;
return retval;
}
I'm trying to understand the assembly code during a recursive function call.
#include<stdio.h>
int recursive(int no){
if(no > 1){
no--;
recursive(no);
printf("\n %d \n",no);
}
else if(no == 1){
return 1;
}
}
int main(){
int a = 10;
recursive(a);
return 0;
}
disassembly :
.file "sample2.c"
.section .rodata
.LC0:
.string "\n %d \n"
.text
.globl recursive
.type recursive, #function
recursive:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
cmpl $1, 8(%ebp)
jle .L2
subl $1, 8(%ebp)
movl 8(%ebp), %eax
movl %eax, (%esp)
call recursive
movl $.LC0, %eax
movl 8(%ebp), %edx
movl %edx, 4(%esp)
movl %eax, (%esp)
call printf
jmp .L5
.L2:
cmpl $1, 8(%ebp)
jne .L5
movl $1, %eax
movl %eax, %edx
movl %edx, %eax
jmp .L4
.L5:
.L4:
leave
ret
.size recursive, .-recursive
.globl main
.type main, #function
main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $32, %esp
movl $10, 28(%esp)
movl 28(%esp), %eax
movl %eax, (%esp)
call recursive
movl $0, %eax
leave
ret
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
.section .note.GNU-stack,"",#progbits
I could understand .LC0 always holds the string literals. But I dont know what it really means. Would like to understand the code during the function call recursion was made.
I could not understand what this piece of assembly code does,
subl $24, %esp
cmpl $1, 8(%ebp)
jle .L2
subl $1, 8(%ebp)
movl 8(%ebp), %eax
movl %eax, (%esp)
call recursive
movl $.LC0, %eax
movl 8(%ebp), %edx
movl %edx, 4(%esp)
movl %eax, (%esp)
call printf
jmp .L5
.L2:
cmpl $1, 8(%ebp)
jne .L5
movl $1, %eax
movl %eax, %edx
movl %edx, %eax
jmp .L4
Q1:
The recursive function contains 1 parameter. so after the padding alignment, it has to be 8. why is it 24.
Also in .L2 ,
movl $1, %eax
movl %eax, %edx
movl %edx, %eax
jmp .L4
Q2:
we have moved '1' to the accumulater, why are we moving again to data register and then back to the accumulator.
Q3:
Are we popping out of stack. If leave is used for popping out of stack, are we not popping the rest of the 8 stack frames ?
To answer the only thing in your post that matches your title:
Why are we not popping out from the stack and only push instruction in the assembly.
Because leave is equivalent to:
movl %ebp, %esp
popl %ebp