I was practicing some assembly code to C and need some help with two questions. Based on the GCC objdump it seems okay but I want to make sure I can do this WITHOUT a computer (still kind of new to assembly code)
Question 1 :
q1:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
cmpl $0, 8(%ebp)\\ compare variable1 to zero
jle .L2 \\jump if less than or equal to zero
movl $1, -4(%ebp)\\ ?? variable2 = 1??
jmp .L4\\else
.L2:
movl $0, -4(%ebp)\\ variable2 = 0
.L4:
movl -4(%ebp), %eax\\ variable2 = variable1
leave
ret
what I got was
int main(int x, int z)
{
if (x < 0)
z = 0;
else
z = x;
}
But I was not sure what the purpose of movl $1, -4(%ebp) was.
Question 2 :
fn:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
movl 8(%ebp), %edx
cmpl $1, %edx\\ compare variable1 to 1
jle .L4\\ less than or equal jump.
.L5:
imull %edx, %eax\\ multiply variable1 by variable 2
subl $1, %edx\\ variable1 -1
cmpl $1, %edx\\ compare variable1 with 1
jne .L5 Loop if not equal
.L4:
popl %ebp\\ return value
ret
How I interpreted the information
int main(int x)
{
int result;
if (x <= 1){
for (result=1; x != 1; x = x-1)
result *= x;}
else{return result;}
}
Not sure if my logic is correct on either of those.
Q1 you have one argument 8(%ebp) and one local variable at -4(%ebp). Return value will be in %eax. Knowing this, the function looks more like:
int foo(int arg)
{
int local;
if (arg <= 0) {
local = 0;
} else {
local = 1;
}
return local;
}
Q2 popl %ebp // return value that's not the return value, that's restoring the saved %ebp of the caller (that was pushed in the beginning). Also, the condition in the loop should use > not !=. You are missing an if (x > 1) conditional around the for loop. (Thanks to Mooing Duck for pointing this out.) Also, technically it's a do-while loop. Otherwise you got this function right.
int factorial(int x)
{
int result = 1;
if (x > 1) {
do {
result *= x;
x -= 1;
} while(x != 1);
}
return result;
}
Related
This question was proposed to me by a friend and I have no idea how to solve it.
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
And is supposed to map to this loop in C,
int loop(int a, int b){
int x, y;
y = ____;
for (____; ____; ____){
____;
}
return ____;
}
My attempt at converting the assembly to C,
y = 5a;
y = b + 2y;
x = 4y;
if (x < b){
x = 3a;
do{
y += x;
} while (b <= -2);
}
return y;
I assumed %eax = y, since 'y' in the code to fill is the first variable being assigned.
'x' follows as %edx since it's another assignment, and so should be at least part of the "Initialisation" of the for loop.
However this doesn't seem to fix into the blanks provided, so I am really stuck.
I think I've got a really close, if not perfect solution:
/* rdi = a, rsi = b */
/* rax = y, rdx = x */
/*
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
*/
int loop(int a, int b){
int x, y;
y = b + (a * 5) * 2;
for (x = y * 4; x > b;){
do y += (x = a * 3); while(b < -2);
break;
}
return y;
}
Not sure if break; is an issue but I can't find a better way.
I am rephrasing this question based on the comments received.
I have a loop that runs 30 Billion times and assigns values to a chunk of memory assigned using malloc();
When the loop contains a condition it runs much slower than when the condition is not present. Review the scenarios below:
Scenario A: Condition is present and program is slow (43 sec)
Scenario B: Condition is not present and program is much faster (4 sec)
// gcc -O3 -c block.c && gcc -o block block.o
#include <stdio.h>
#include <stdlib.h>
#define LEN 3000000000
int main (int argc, char** argv){
long i,j;
unsigned char *n = NULL;
unsigned char *m = NULL;
m = (unsigned char *) malloc (sizeof(char) * LEN);
n = m;
srand ((unsigned) time(NULL));
int t = (unsigned) time(NULL);
for (j = 0; j < 10; j++){
n = m;
for (i = 0; i < LEN; i++){
//////////// A: THIS IS SLOW
/*
if (i % 2){
*n = 1;
} else {
*n = 0;
}
*/
/////////// END OF A
/////////// B: THIS IS FAST
*n = 0;
i % 2;
*n = 1;
/////////// END OF B
n += 1;
}
}
printf("Done. %d sec \n", ((unsigned) time(NULL)) - t );
free(m);
return 0;
}
Regards,
KD
You can use gcc -S -O3 to have a look at the resulting assembler.
Here is an example on an Intel box:
Fast version:
movl %eax, %r12d
.p2align 4,,10
.p2align 3
.L2:
movl $3000000000, %edx
movl $1, %esi
movq %rbp, %rdi
call memset
subq $1, %rbx
jne .L2
Slow version:
movl $10, %edi
movl %eax, %ebp
movl $3000000000, %esi
.p2align 4,,10
.p2align 3
.L2:
xorl %edx, %edx
.p2align 4,,10
.p2align 3
.L5:
movq %rdx, %rcx
andl $1, %ecx
movb %cl, (%rbx,%rdx)
addq $1, %rdx
cmpq %rsi, %rdx
jne .L5
subq $1, %rdi
jne .L2
Conclusion: the compiler is smarter than you think. It is able to optimize the inner loop as a memset (which is faster because it uses SSE/AVX or REP instructions on Intel). However, this optimization cannot kick in if the condition is kept - because the result is different.
I have this class assignment that i can't seem to figure out.
the point is to convert this assembly to C (Code assembled with GNU assembler AT&T syntax):
.section .rdata,"dr"
LC0:
.ascii "%d\12\0"
.text
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $32, %esp
movl 12(%ebp), %eax
addl $4, %eax
movl (%eax), %eax
movl %eax, (%esp)
call _atoi
movl %eax, 24(%esp)
cmpl $4, 24(%esp)
je L2
cmpl $6, 24(%esp)
jle L3
cmpl $9, 24(%esp)
jg L3
L2:
movl 24(%esp), %eax
addl $20, %eax
movl %eax, 28(%esp)
jmp L4
L3:
cmpl $0, 24(%esp)
jne L5
movl $44, 28(%esp)
jmp L4
L5:
cmpl $-1, 24(%esp)
jne L6
movl $-44, 28(%esp)
jmp L4
L6:
movl $99, 28(%esp)
L4:
movl 28(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
movl $0, %eax
leave
ret
however I always get lost on L2. it seems that no matter if we get into the first if statement or not we will always execute the label L2, which doesn't make any sense. I tried to make sense of it all and the closest I got was this C code:
#include <stdio.h>
int main(int argc, char *argv[]){
int y = 0;
int x = atoi(*++argv);
if (x != 4){
if (x > 6 && x <= 9){
y = 20 + x;
}
else if (x == 0){
y = 44;
}
else if (x == -1){
y = -44;
}
else {
y = 99;
}
}
else{
y = 20 + x;
}
printf("%d %d", y, x);
}
can someone please help with this confusing issue i'm having,
thanks
In decoding compiler output it often helps to rewrite the assembly code in a denser, but still very low-level form (pseudo code), using only elementary, low-level transformations. The point is to group/combine small numbers of instructions in a way that is difficult to get wrong but that exposes the inner logic of the code fragment better. The next step is to eliminate redundant stores and temporaries (like EAX used for storing x + 20 to y). I'm skipping the first step here but it can be dangerous to do that in more complicated code.
The sequence of conditionals then becomes:
x equ [esp + 24]
y equ [esp + 28]
if (x == 4) goto L2;
if (x <= 6) goto L3;
if (x > 9) goto L3;
L2: y = x + 20; goto L4;
L3: if (x != 0) goto L5;
y = 44; goto L4;
L5: if (x != -1) goto L6;
y = -44; goto L4;
L6: y = 99;
L4: printf("%d\f", y);
The first three conditionals form a conspicuous pattern employed by compilers to evaluate complex conditions. The compiler inverted the second and third terms of that conditional to use its 'jump around' solution fragment; inverting again allows you to code the original condition (jumps to L2/'then' are ORs, jumps to L3/'else' are AND NOTs):
if (x == 4 || !(x <= 6) && !(x > 9))
->
if (x == 4 || (x > 6) && (x <= 9))
and Bob's your uncle. The other conditionals could have been the result of a chained if or of a switch statement, it's difficult to tell. But that hardly matters. Hence your decompilation was almost perfect already, you just missed a tiny beat. In an intermediate step the C-ified conditionals look like this:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else // L3
{
if (x == 0)
{
y = 44;
}
else // L5
{
if (x == -1)
{
y = -44;
}
else // L6
{
y = 99;
}
}
}
This can then be tightened to:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else if (x == 0)
{
y = 44;
}
else if (x == -1)
{
y = -44;
}
else
{
y = 99;
}
P.S.: the value of (argv + 1) is not stored back to argv, just dereferenced. Hence it's atoi(*(argv + 1)) or atoi(argv[1]).
The assembly function with commented c version:
/*
struct X
{
int c; // 4 bytes
struct X *next; // 4 bytes
};
int add2 (struct X *x)
{
if (x == NULL) return 0;
else return x->c + add2(x->next);
}
*/
.text
.globl add2
add2:
/********************************** prologue *************************************/
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %esi
/********************************************************************************/
movl 8(%ebp), %ebx
cmpl $0, %ebx
jne out
movl $0, %eax
jmp end
out:
/***************************** calculates in x->next *******************************/
pushl %ecx
pushl %edx
pushl %eax
movl 4(%ebx), %esi
pushl %esi
call add2
addl $4, %esp
popl %eax
popl %edx
popl %ecx
/********************************************************************************/
cmpl $0, (%ebx) /* > negative values */
js neg /* treatment < */
addl (%ebx), %eax /* return x->c + add2(x->next); */
neg:negl (%ebx) /* c = |c| */
subl (%ebx), %eax /* return x->(-)c + add2(x->next); */
end:
/****************************************end *************************************/
popl %esi
popl %ebx
movl %ebp, %esp
popl %ebp
ret
/*********************************************************************************/
The main c code:
#include <stdio.h>
#include <stdlib.h>
struct X
{
int c;
struct X * next;
};
typedef struct X Xlist;
Xlist * lst_create (void)
{
return NULL;
}
Xlist * lst_insert (Xlist * l, int c)
{
Xlist * new = (Xlist*) malloc(sizeof(Xlist));
new->c = c;
new->next = l;
return new;
}
int add2 (struct X * x);
int main (void)
{
// int i;
Xlist * l;
l = lst_create();
//for (i=-9;i<10;i++)
l = lst_insert(l, -1);
printf("%d\n", add2(l));
return 0;
}
The intention is to print the sum of the elements of a linked list.
I'm getting memory garbage when using negative values. I believe the error is somehow here:
neg:negl (%ebx) /* c = |c| */
subl (%ebx), %eax /* return x->(-)c + add2(x->next); */
But why?
Already used the same algorithm in other add function and it was ok.
It seems to me that a big problem is that your recursive call to add2() ignores the return value:
pushl %eax
movl 4(%ebx), %esi
pushl %esi
call add2
addl $4, %esp
popl %eax ; <-- overwrites what the add2 call returned
Also, your C equivalent code doesn't seem to be really be equivalent. The assembly version modifies the negative values in the list to be positive; that isn't reflected in your C code version.
Trying to understand some assembly language, but I am not sure if I am understanding it correctly
movl 8(%ebp),%eax // assign %eax to a variable, say var
testl %eax,%eax // test if var is > 0 or not. if var is > 0, jump to .L3
jge .L3
addl $15,%eax // add 15 to var
.L3:
sarl $4,%eax // shift var 4 to the right , which is the same as multiplying var by 16
given by above understanding, I wrote the following code
int function(int x){
int var = x;
if(var>0) {
ret = ret * 16;
}
ret = ret + 15;
return ret;
}
however, my assembly code looks like the following
movl 8(%ebp), %ebp
movl %eax. %edx
sall $4, %edx
test1 %eax, %eax
cmovg %edx, %eax
addl $15, %eax
am I misunderstanding the original assembly code somewhere?
Edit: is there perhaps a loop involved?
Notice that the code continues with the shift even after the addition, and that jge also includes the equal case. Thus the code could look more like this:
int function(int x) {
int ret = x;
if (ret >= 0) goto skip_add;
ret = ret + 15;
skip_add:
ret = ret / 16;
return ret;
}
Or, to avoid the goto, reverse the condition:
int function(int x) {
int ret = x;
if(ret < 0) {
ret = ret + 15;
}
ret = ret / 16;
return ret;
}
PS: shifting right is division, shifting left would be multiplication.