x86 assembly code confusion - c

We've just begun the topic on assembly and I've been stuck on this problem for the longest time. I have to convert assembly to C code given the following:
C Code:
int foo(int *a, int n, int val) {
int i;
for (i = _________; ____________________________ ; i =___________) {
;
}
return i;
}
Assembly:
// what I've gathered so far
foo()
:
foo:
pushl %ebp
movl %esp,%ebp
movl 8(%ebp),%ecx // ecx: a
movl 16(%ebp),%edx // edx: val
movl 12(%ebp),%eax // eax: n
decl %eax // n = n--
js .L3 // if n < 0 goto done
.L7: // loop
cmpl %edx,(%ecx,%eax,4) // I don't understand how you would compute the
// address for (%ecx,%eax,4) I know it would be %ecx + %eax*4 = %ecx + eax << 2
jne .L3 // if (%ecx, %eax, 4) != val goto done (?)
decl %eax // n = n--
jns .L7 // if (n >= 0) jump to loop
.L3: // done
movl %ebp,%esp
popl %ebp
ret
I don't know how to figure out what i is being initialized to and what the body of the loop is. I'm assuming i = n since n serves as the update. It seems as if there are two conditions one being n > 0 and the other being the cmpl line. Please correct me if my understanding of the code is incorrect, and any clues to this problem is much appreciated.

I could have done some off-by 1 errors, but basically it is this:
int foo(int *a, int n, int val) {
int i;
for (i = n - 1; i >= 0 && a[i] == val; i = i - 1) {
;
}
return i;
}
The i is the %eax register; it loops from n - 1 to 0. The cmpl indexed access (%ecx,%eax,4) is addressed in bytes - this is equivalent to a[i], as size of int on ia32 is 4 bytes. The 4 bytes addressed thus is compared against val.
The %eax is implicitly returned.
Notice also, that js means < 0, and jns >= 0.
Another way to write it:
i = n;
i --; // decl %eax
if (i < 0) {
goto L3; // js .L3
}
L7:
if (a[i] != val) // cmpl %edx,(%ecx,%eax,4)
goto L3; // jne .L3
i --; // decl %eax
if (i >= 0)
goto L7; // jns .L7
L3:
return i;

An alternative using the preprocessor:
#define _________ n - 1
#define ____________________________ i >= 0 && a[i] == val
#define ___________ i + 1
int foo(int *a, int n, int val) {
int i;
for (i = _________; ____________________________ ; i =___________) {
;
}
return i;
}
Of course you can only use this for fun or to tease new programmers ;-)

Related

How to convert recursion to tail recursion in this example?

I have this recursive function to add the cubes of n even numbers and I want't to turn it to a tail recursion.
int sum_even_cubes_rec(int n) {
if (n < 2)
return 0;
if ((n % 2) == 0) {
return (n*n*n + sum_even_cubes_rec(n - 1));
} else {
return (0 + sum_even_cubes_rec(n - 1));
}
}
This is what I wrote but it is wrong and I don't know how to fix it.
Can you please help me.
int sum_even_cubes_rec2(int n, int acc) {
if ((n % 2) == 0) {
return sum_even_cubes_rec2 (n-1, acc + n*n*n);
} return acc;
}
int sum_even_cubes_helperFunktion(int n) {
return sum_even_cubes_rec2(n, 0);
}
Your approach is correct. You have already added acc argument, so that's what you need to return for the base case.
The rest of your code is almost right - you need to adjust what you add to acc for the next invocation:
int sum_even_cubes_rec2(int n, int acc) {
if (n < 2) {
return acc;
}
int nextAcc = (n % 2) == 0 ? acc + n*n*n : acc;
return sum_even_cubes_rec2 (n-1, nextAcc);
}
Simply it can be written as this
int sum_even_cubes_rec2(int n) {
static int ans = 0;
if(n<2){
int tmp =ans;
ans =0;
return tmp;
}
ans += ( (n%2==0)? n*n*n : 0 );
return sum_even_cubes_rec2(n-1);
}
int sum_even_cubes(int n) {
int ret =0;
if (n < 2) return 0;
ret = (n % 2) ? 0: n*n*n;
return ret + sum_even_cubes(n-1);
}
Gcc -O2 -S will compile this into (function argument is %edi; return value is in %eax; target for recursion-loop is .L4) :
sum_even_cubes:
.LFB0:
.cfi_startproc
xorl %eax, %eax
cmpl $1, %edi
jle .L5
.p2align 4,,10
.p2align 3
.L4:
xorl %edx, %edx
testb $1, %dil
jne .L3
movl %edi, %edx
imull %edi, %edx
imull %edi, %edx
.L3:
subl $1, %edi
addl %edx, %eax
cmpl $1, %edi
jne .L4
rep ret
.L5:
rep ret
.cfi_endproc
.LFE0:

Can someone convert this assembly to C

I have this class assignment that i can't seem to figure out.
the point is to convert this assembly to C (Code assembled with GNU assembler AT&T syntax):
.section .rdata,"dr"
LC0:
.ascii "%d\12\0"
.text
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $32, %esp
movl 12(%ebp), %eax
addl $4, %eax
movl (%eax), %eax
movl %eax, (%esp)
call _atoi
movl %eax, 24(%esp)
cmpl $4, 24(%esp)
je L2
cmpl $6, 24(%esp)
jle L3
cmpl $9, 24(%esp)
jg L3
L2:
movl 24(%esp), %eax
addl $20, %eax
movl %eax, 28(%esp)
jmp L4
L3:
cmpl $0, 24(%esp)
jne L5
movl $44, 28(%esp)
jmp L4
L5:
cmpl $-1, 24(%esp)
jne L6
movl $-44, 28(%esp)
jmp L4
L6:
movl $99, 28(%esp)
L4:
movl 28(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
movl $0, %eax
leave
ret
however I always get lost on L2. it seems that no matter if we get into the first if statement or not we will always execute the label L2, which doesn't make any sense. I tried to make sense of it all and the closest I got was this C code:
#include <stdio.h>
int main(int argc, char *argv[]){
int y = 0;
int x = atoi(*++argv);
if (x != 4){
if (x > 6 && x <= 9){
y = 20 + x;
}
else if (x == 0){
y = 44;
}
else if (x == -1){
y = -44;
}
else {
y = 99;
}
}
else{
y = 20 + x;
}
printf("%d %d", y, x);
}
can someone please help with this confusing issue i'm having,
thanks
In decoding compiler output it often helps to rewrite the assembly code in a denser, but still very low-level form (pseudo code), using only elementary, low-level transformations. The point is to group/combine small numbers of instructions in a way that is difficult to get wrong but that exposes the inner logic of the code fragment better. The next step is to eliminate redundant stores and temporaries (like EAX used for storing x + 20 to y). I'm skipping the first step here but it can be dangerous to do that in more complicated code.
The sequence of conditionals then becomes:
x equ [esp + 24]
y equ [esp + 28]
if (x == 4) goto L2;
if (x <= 6) goto L3;
if (x > 9) goto L3;
L2: y = x + 20; goto L4;
L3: if (x != 0) goto L5;
y = 44; goto L4;
L5: if (x != -1) goto L6;
y = -44; goto L4;
L6: y = 99;
L4: printf("%d\f", y);
The first three conditionals form a conspicuous pattern employed by compilers to evaluate complex conditions. The compiler inverted the second and third terms of that conditional to use its 'jump around' solution fragment; inverting again allows you to code the original condition (jumps to L2/'then' are ORs, jumps to L3/'else' are AND NOTs):
if (x == 4 || !(x <= 6) && !(x > 9))
->
if (x == 4 || (x > 6) && (x <= 9))
and Bob's your uncle. The other conditionals could have been the result of a chained if or of a switch statement, it's difficult to tell. But that hardly matters. Hence your decompilation was almost perfect already, you just missed a tiny beat. In an intermediate step the C-ified conditionals look like this:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else // L3
{
if (x == 0)
{
y = 44;
}
else // L5
{
if (x == -1)
{
y = -44;
}
else // L6
{
y = 99;
}
}
}
This can then be tightened to:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else if (x == 0)
{
y = 44;
}
else if (x == -1)
{
y = -44;
}
else
{
y = 99;
}
P.S.: the value of (argv + 1) is not stored back to argv, just dereferenced. Hence it's atoi(*(argv + 1)) or atoi(argv[1]).

Assembly Code to C

I was practicing some assembly code to C and need some help with two questions. Based on the GCC objdump it seems okay but I want to make sure I can do this WITHOUT a computer (still kind of new to assembly code)
Question 1 :
q1:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
cmpl $0, 8(%ebp)\\ compare variable1 to zero
jle .L2 \\jump if less than or equal to zero
movl $1, -4(%ebp)\\ ?? variable2 = 1??
jmp .L4\\else
.L2:
movl $0, -4(%ebp)\\ variable2 = 0
.L4:
movl -4(%ebp), %eax\\ variable2 = variable1
leave
ret
what I got was
int main(int x, int z)
{
if (x < 0)
z = 0;
else
z = x;
}
But I was not sure what the purpose of movl $1, -4(%ebp) was.
Question 2 :
fn:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
movl 8(%ebp), %edx
cmpl $1, %edx\\ compare variable1 to 1
jle .L4\\ less than or equal jump.
.L5:
imull %edx, %eax\\ multiply variable1 by variable 2
subl $1, %edx\\ variable1 -1
cmpl $1, %edx\\ compare variable1 with 1
jne .L5 Loop if not equal
.L4:
popl %ebp\\ return value
ret
How I interpreted the information
int main(int x)
{
int result;
if (x <= 1){
for (result=1; x != 1; x = x-1)
result *= x;}
else{return result;}
}
Not sure if my logic is correct on either of those.
Q1 you have one argument 8(%ebp) and one local variable at -4(%ebp). Return value will be in %eax. Knowing this, the function looks more like:
int foo(int arg)
{
int local;
if (arg <= 0) {
local = 0;
} else {
local = 1;
}
return local;
}
Q2 popl %ebp // return value that's not the return value, that's restoring the saved %ebp of the caller (that was pushed in the beginning). Also, the condition in the loop should use > not !=. You are missing an if (x > 1) conditional around the for loop. (Thanks to Mooing Duck for pointing this out.) Also, technically it's a do-while loop. Otherwise you got this function right.
int factorial(int x)
{
int result = 1;
if (x > 1) {
do {
result *= x;
x -= 1;
} while(x != 1);
}
return result;
}

understanding testl in assembly language

Trying to understand some assembly language, but I am not sure if I am understanding it correctly
movl 8(%ebp),%eax // assign %eax to a variable, say var
testl %eax,%eax // test if var is > 0 or not. if var is > 0, jump to .L3
jge .L3
addl $15,%eax // add 15 to var
.L3:
sarl $4,%eax // shift var 4 to the right , which is the same as multiplying var by 16
given by above understanding, I wrote the following code
int function(int x){
int var = x;
if(var>0) {
ret = ret * 16;
}
ret = ret + 15;
return ret;
}
however, my assembly code looks like the following
movl 8(%ebp), %ebp
movl %eax. %edx
sall $4, %edx
test1 %eax, %eax
cmovg %edx, %eax
addl $15, %eax
am I misunderstanding the original assembly code somewhere?
Edit: is there perhaps a loop involved?
Notice that the code continues with the shift even after the addition, and that jge also includes the equal case. Thus the code could look more like this:
int function(int x) {
int ret = x;
if (ret >= 0) goto skip_add;
ret = ret + 15;
skip_add:
ret = ret / 16;
return ret;
}
Or, to avoid the goto, reverse the condition:
int function(int x) {
int ret = x;
if(ret < 0) {
ret = ret + 15;
}
ret = ret / 16;
return ret;
}
PS: shifting right is division, shifting left would be multiplication.

Assembly to C matrix assitance

I'm really confused by my homework assignment. We are given C code and then assembly which are listed below. This is x86 assembly. Any assistance would be greatly appreciated. I've made an attempt to solve it based on what I'm understanding.
C code:
void transpose(Marray_t A) {
int i, j;
for (i = 0; i < M; i++)
for (j = 0; j < i; j++) {
int t = A[i][j];
A[i][j] = A[j][i];
A[j][i] = t;
}
}
Assembly code for ONLY inner loop:
1 .L3:
2 movl (%ebx), %eax //is this getting the mem location of %ebx and setting to %eax?
3 movl (%esi,%ecx,4), %edx //ecx * 4 + esi into edx
4 movl %eax, (%esi,%ecx,4) //
5 addl $1, %ecx //add 1 to ecx
6 movl %edx, (%ebx) //move edx to mem location of ebx???
7 addl $52, %ebx //I think this is M but I could be wrong
8 cmpl %edi, %ecx //compare edi & ecx
9 jl .L3
here is what I have to answer:
A. What is the value of M? ...I think this is 52...?
B. What registers hold program values i and j? ... I think edx and eax?
C. Write a C code version of transpose that makes use of the optimizations
that occur in this loop. Use the parameter M in your code rather than numeric
constants.
Attempt at (C):
void tranpose(Marray_t A) {
int i, j;
for(i = 0; i < M; i++) {
for(j = 0; j < i; j++) {
int *row = &A[i][0];
int *col = &A[0][j];
int value = (*row * 4) + *col;
}
}
}
1 .L3:
2 movl (%ebx), %eax // eax := read memory word at ebx
3 movl (%esi,%ecx,4), %edx // edx := read memory word at esi + 4*ecx
4 movl %eax, (%esi,%ecx,4) // store eax into that location
5 addl $1, %ecx // add 1 to ecx
6 movl %edx, (%ebx) // store edx into memory at ebx
7 addl $52, %ebx // add 52 to ebx
8 cmpl %edi, %ecx // compare edi & ecx
9 jl .L3
So in this code. %ebx is the the address of A[j][i], %esi is the address of A[i] and %ecx is j. 52 is sizeof(A[j]), so M is probably 13 (as the array element size is 4)

Resources