I'm reading a book Computer Systems: A Programmer's Perspective (2nd Edition)
and Practice Problem 3.23 are little confused me:
A function fun_b has the following overall structure:
int fun_b(unsigned x) {
int val = 0;
int i;
for ( ____;_____;_____) {
}
return val;
}
The gcc C compiler generates the following assembly code:
x at %ebp+8
1 movl 8(%ebp), %ebx
2 movl $0, %eax
3 movl $0, %ecx
.L13:
5 leal (%eax,%eax), %edx
6 movl %ebx, %eax
7 andl $1, %eax
8 orl %edx, %eax
9 shrl %ebx Shift right by 1
10 addl $1, %ecx
11 cmpl $32, %ecx
12 jne .L13
Reverse engineer the operation of this code and then do the following:
A. Use the assembly-code version to fill in the missing parts of the C code.
My solution.
int fun_b(unsigned x) {
int val = 0;
int i;
for ( i = 0 ;i < 32;i++) {
val += val; //because leal (%eax,%eax), edx --> %edx = %eax + %eax
val = val | x & 0x1;
x >>= 1;
}
return val;
}
Book's solution.
int fun_b(unsigned x) {
int val = 0;
int i;
for (i = 0; i < 32; i++) {
val = (val << 1) | (x & 0x1);
x >>= 1;
}
return val;
}
Please, explain to me why leal function has non typical behavior in this function.
And I dont understand how this assembly code is yielding this statement val = (val << 1) | (x & 0x1)
In your code:
val += val;
val = val | x & 0x1;
Here, val += val which is equivalent to (val*2) which is effectively equal to val left shifted by 1.
But I think your solution is correct only if the assembly code was something like:
x at %ebp+8
1 movl 8(%ebp), %ebx
2 movl $0, %eax
3 movl $0, %ecx
.L13:
5 addl %eax, %eax
6 movl %ebx, %edx
7 andl $1, %edx
8 orl %edx, %eax
9 shrl %ebx # shift right by 1
10 addl $1, %ecx
11 cmpl $32, %ecx
12 jne .L13
Because if val + val was a separate statement, compiler usually places it in eax register rather than in edx (i'm not sure this is the case always). So, for the code you have given, the possible solutions are:
val = (val << 1) | (x & 0x1);
or
val = (val + val) | (x & 0x1);
or
val = (val * 2) | (x & 0x1);
x >>= 1; means multiplying x by 2 which in binary is shifting to the left or adding 0 at the right side
x >>= 1; == x * 2; == x +=x;
Related
This question was proposed to me by a friend and I have no idea how to solve it.
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
And is supposed to map to this loop in C,
int loop(int a, int b){
int x, y;
y = ____;
for (____; ____; ____){
____;
}
return ____;
}
My attempt at converting the assembly to C,
y = 5a;
y = b + 2y;
x = 4y;
if (x < b){
x = 3a;
do{
y += x;
} while (b <= -2);
}
return y;
I assumed %eax = y, since 'y' in the code to fill is the first variable being assigned.
'x' follows as %edx since it's another assignment, and so should be at least part of the "Initialisation" of the for loop.
However this doesn't seem to fix into the blanks provided, so I am really stuck.
I think I've got a really close, if not perfect solution:
/* rdi = a, rsi = b */
/* rax = y, rdx = x */
/*
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
*/
int loop(int a, int b){
int x, y;
y = b + (a * 5) * 2;
for (x = y * 4; x > b;){
do y += (x = a * 3); while(b < -2);
break;
}
return y;
}
Not sure if break; is an issue but I can't find a better way.
I have this class assignment that i can't seem to figure out.
the point is to convert this assembly to C (Code assembled with GNU assembler AT&T syntax):
.section .rdata,"dr"
LC0:
.ascii "%d\12\0"
.text
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $32, %esp
movl 12(%ebp), %eax
addl $4, %eax
movl (%eax), %eax
movl %eax, (%esp)
call _atoi
movl %eax, 24(%esp)
cmpl $4, 24(%esp)
je L2
cmpl $6, 24(%esp)
jle L3
cmpl $9, 24(%esp)
jg L3
L2:
movl 24(%esp), %eax
addl $20, %eax
movl %eax, 28(%esp)
jmp L4
L3:
cmpl $0, 24(%esp)
jne L5
movl $44, 28(%esp)
jmp L4
L5:
cmpl $-1, 24(%esp)
jne L6
movl $-44, 28(%esp)
jmp L4
L6:
movl $99, 28(%esp)
L4:
movl 28(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
movl $0, %eax
leave
ret
however I always get lost on L2. it seems that no matter if we get into the first if statement or not we will always execute the label L2, which doesn't make any sense. I tried to make sense of it all and the closest I got was this C code:
#include <stdio.h>
int main(int argc, char *argv[]){
int y = 0;
int x = atoi(*++argv);
if (x != 4){
if (x > 6 && x <= 9){
y = 20 + x;
}
else if (x == 0){
y = 44;
}
else if (x == -1){
y = -44;
}
else {
y = 99;
}
}
else{
y = 20 + x;
}
printf("%d %d", y, x);
}
can someone please help with this confusing issue i'm having,
thanks
In decoding compiler output it often helps to rewrite the assembly code in a denser, but still very low-level form (pseudo code), using only elementary, low-level transformations. The point is to group/combine small numbers of instructions in a way that is difficult to get wrong but that exposes the inner logic of the code fragment better. The next step is to eliminate redundant stores and temporaries (like EAX used for storing x + 20 to y). I'm skipping the first step here but it can be dangerous to do that in more complicated code.
The sequence of conditionals then becomes:
x equ [esp + 24]
y equ [esp + 28]
if (x == 4) goto L2;
if (x <= 6) goto L3;
if (x > 9) goto L3;
L2: y = x + 20; goto L4;
L3: if (x != 0) goto L5;
y = 44; goto L4;
L5: if (x != -1) goto L6;
y = -44; goto L4;
L6: y = 99;
L4: printf("%d\f", y);
The first three conditionals form a conspicuous pattern employed by compilers to evaluate complex conditions. The compiler inverted the second and third terms of that conditional to use its 'jump around' solution fragment; inverting again allows you to code the original condition (jumps to L2/'then' are ORs, jumps to L3/'else' are AND NOTs):
if (x == 4 || !(x <= 6) && !(x > 9))
->
if (x == 4 || (x > 6) && (x <= 9))
and Bob's your uncle. The other conditionals could have been the result of a chained if or of a switch statement, it's difficult to tell. But that hardly matters. Hence your decompilation was almost perfect already, you just missed a tiny beat. In an intermediate step the C-ified conditionals look like this:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else // L3
{
if (x == 0)
{
y = 44;
}
else // L5
{
if (x == -1)
{
y = -44;
}
else // L6
{
y = 99;
}
}
}
This can then be tightened to:
if (x == 4 || x > 6 && x <= 9)
{
y = x + 20;
}
else if (x == 0)
{
y = 44;
}
else if (x == -1)
{
y = -44;
}
else
{
y = 99;
}
P.S.: the value of (argv + 1) is not stored back to argv, just dereferenced. Hence it's atoi(*(argv + 1)) or atoi(argv[1]).
I was practicing some assembly code to C and need some help with two questions. Based on the GCC objdump it seems okay but I want to make sure I can do this WITHOUT a computer (still kind of new to assembly code)
Question 1 :
q1:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
cmpl $0, 8(%ebp)\\ compare variable1 to zero
jle .L2 \\jump if less than or equal to zero
movl $1, -4(%ebp)\\ ?? variable2 = 1??
jmp .L4\\else
.L2:
movl $0, -4(%ebp)\\ variable2 = 0
.L4:
movl -4(%ebp), %eax\\ variable2 = variable1
leave
ret
what I got was
int main(int x, int z)
{
if (x < 0)
z = 0;
else
z = x;
}
But I was not sure what the purpose of movl $1, -4(%ebp) was.
Question 2 :
fn:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
movl 8(%ebp), %edx
cmpl $1, %edx\\ compare variable1 to 1
jle .L4\\ less than or equal jump.
.L5:
imull %edx, %eax\\ multiply variable1 by variable 2
subl $1, %edx\\ variable1 -1
cmpl $1, %edx\\ compare variable1 with 1
jne .L5 Loop if not equal
.L4:
popl %ebp\\ return value
ret
How I interpreted the information
int main(int x)
{
int result;
if (x <= 1){
for (result=1; x != 1; x = x-1)
result *= x;}
else{return result;}
}
Not sure if my logic is correct on either of those.
Q1 you have one argument 8(%ebp) and one local variable at -4(%ebp). Return value will be in %eax. Knowing this, the function looks more like:
int foo(int arg)
{
int local;
if (arg <= 0) {
local = 0;
} else {
local = 1;
}
return local;
}
Q2 popl %ebp // return value that's not the return value, that's restoring the saved %ebp of the caller (that was pushed in the beginning). Also, the condition in the loop should use > not !=. You are missing an if (x > 1) conditional around the for loop. (Thanks to Mooing Duck for pointing this out.) Also, technically it's a do-while loop. Otherwise you got this function right.
int factorial(int x)
{
int result = 1;
if (x > 1) {
do {
result *= x;
x -= 1;
} while(x != 1);
}
return result;
}
Trying to understand some assembly language, but I am not sure if I am understanding it correctly
movl 8(%ebp),%eax // assign %eax to a variable, say var
testl %eax,%eax // test if var is > 0 or not. if var is > 0, jump to .L3
jge .L3
addl $15,%eax // add 15 to var
.L3:
sarl $4,%eax // shift var 4 to the right , which is the same as multiplying var by 16
given by above understanding, I wrote the following code
int function(int x){
int var = x;
if(var>0) {
ret = ret * 16;
}
ret = ret + 15;
return ret;
}
however, my assembly code looks like the following
movl 8(%ebp), %ebp
movl %eax. %edx
sall $4, %edx
test1 %eax, %eax
cmovg %edx, %eax
addl $15, %eax
am I misunderstanding the original assembly code somewhere?
Edit: is there perhaps a loop involved?
Notice that the code continues with the shift even after the addition, and that jge also includes the equal case. Thus the code could look more like this:
int function(int x) {
int ret = x;
if (ret >= 0) goto skip_add;
ret = ret + 15;
skip_add:
ret = ret / 16;
return ret;
}
Or, to avoid the goto, reverse the condition:
int function(int x) {
int ret = x;
if(ret < 0) {
ret = ret + 15;
}
ret = ret / 16;
return ret;
}
PS: shifting right is division, shifting left would be multiplication.
The requirement is like following:
/* length must be >= 18 */
int calcActualLength(int length) {
int remainder = (length - 18) % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
using bit-wise operator, I could refactor the 1st line
int remainder = (length - 2) & 7;
Can it be further optimized?
((length+5)&~7)+2
int calcActualLength(int length) {
int remainder = (length - 18) % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
==>
int HELPER_calcActualLength(int length) {
int remainder = length % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
int calcActualLength(int length) {
return 18 + HELPER_calcActualLength(length - 18);
}
And HELPER_calcActualLength() equals to ROUNDUP_8() in the semantics when the argument >= 0
And more simpler ROUNDUP_8() can be:
#define ROUNDUP_8(x) (((x)+7)&~7)
int calcActualLength(int length) {
return 18 + ROUNDUP_8(length - 18);
}
==> 2 + ROUNDUP_8(length - 18 + 16);
==> 2 + ROUNDUP_8(length - 2);
==> 2 + (((length - 2)+7)&~7)
==> ((length+5)&~7)+2
Original code produces the following 64-bit assembly when compiling with gcc -O3:
movl %edi, %eax
leal -18(%rax), %ecx
movl %ecx, %edx
sarl $31, %edx
shrl $29, %edx
addl %edx, %ecx
andl $7, %ecx
subl %edx, %ecx
je .L2
addl $8, %eax
subl %ecx, %eax
.L2:
rep
As suggested in the comments to your question, changing the argument to unsigned int allows for greater optimisations and results in the following assembly:
leal -18(%rdi), %edx
movl %edi, %eax
andl $7, %edx
je .L3
leal 8(%rdi), %eax
subl %edx, %eax
.L3:
rep
Rounding up to a multiple of 8 can be performed by adding 7 and masking with ~7. It works like this: if the last three bits are not all zero, then adding 7 carries into the 4-th bit, otherwise no carry occurs. So your function could be simplified to:
return (((length - 18) + 7) & ~7) + 18;
or simpler:
return ((length - 11) & ~7) + 18;
GCC compiles the last line to simply:
leal -11(%rdi), %eax
andl $-8, %eax
addl $18, %eax
Note that the lea (Load Effective Address) instruciton is often "abused" for its ability to compute simple linear combinations like reg1 + size*reg2 + offset