Efficiency using bitwise operators - c

The requirement is like following:
/* length must be >= 18 */
int calcActualLength(int length) {
int remainder = (length - 18) % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
using bit-wise operator, I could refactor the 1st line
int remainder = (length - 2) & 7;
Can it be further optimized?

((length+5)&~7)+2
int calcActualLength(int length) {
int remainder = (length - 18) % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
==>
int HELPER_calcActualLength(int length) {
int remainder = length % 8;
if (remainder == 0)
return length;
return length + 8 - remainder;
}
int calcActualLength(int length) {
return 18 + HELPER_calcActualLength(length - 18);
}
And HELPER_calcActualLength() equals to ROUNDUP_8() in the semantics when the argument >= 0
And more simpler ROUNDUP_8() can be:
#define ROUNDUP_8(x) (((x)+7)&~7)
int calcActualLength(int length) {
return 18 + ROUNDUP_8(length - 18);
}
==> 2 + ROUNDUP_8(length - 18 + 16);
==> 2 + ROUNDUP_8(length - 2);
==> 2 + (((length - 2)+7)&~7)
==> ((length+5)&~7)+2

Original code produces the following 64-bit assembly when compiling with gcc -O3:
movl %edi, %eax
leal -18(%rax), %ecx
movl %ecx, %edx
sarl $31, %edx
shrl $29, %edx
addl %edx, %ecx
andl $7, %ecx
subl %edx, %ecx
je .L2
addl $8, %eax
subl %ecx, %eax
.L2:
rep
As suggested in the comments to your question, changing the argument to unsigned int allows for greater optimisations and results in the following assembly:
leal -18(%rdi), %edx
movl %edi, %eax
andl $7, %edx
je .L3
leal 8(%rdi), %eax
subl %edx, %eax
.L3:
rep
Rounding up to a multiple of 8 can be performed by adding 7 and masking with ~7. It works like this: if the last three bits are not all zero, then adding 7 carries into the 4-th bit, otherwise no carry occurs. So your function could be simplified to:
return (((length - 18) + 7) & ~7) + 18;
or simpler:
return ((length - 11) & ~7) + 18;
GCC compiles the last line to simply:
leal -11(%rdi), %eax
andl $-8, %eax
addl $18, %eax
Note that the lea (Load Effective Address) instruciton is often "abused" for its ability to compute simple linear combinations like reg1 + size*reg2 + offset

Related

x86-64 Assembly Loop

This question was proposed to me by a friend and I have no idea how to solve it.
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
And is supposed to map to this loop in C,
int loop(int a, int b){
int x, y;
y = ____;
for (____; ____; ____){
____;
}
return ____;
}
My attempt at converting the assembly to C,
y = 5a;
y = b + 2y;
x = 4y;
if (x < b){
x = 3a;
do{
y += x;
} while (b <= -2);
}
return y;
I assumed %eax = y, since 'y' in the code to fill is the first variable being assigned.
'x' follows as %edx since it's another assignment, and so should be at least part of the "Initialisation" of the for loop.
However this doesn't seem to fix into the blanks provided, so I am really stuck.
I think I've got a really close, if not perfect solution:
/* rdi = a, rsi = b */
/* rax = y, rdx = x */
/*
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
*/
int loop(int a, int b){
int x, y;
y = b + (a * 5) * 2;
for (x = y * 4; x > b;){
do y += (x = a * 3); while(b < -2);
break;
}
return y;
}
Not sure if break; is an issue but I can't find a better way.

How to convert recursion to tail recursion in this example?

I have this recursive function to add the cubes of n even numbers and I want't to turn it to a tail recursion.
int sum_even_cubes_rec(int n) {
if (n < 2)
return 0;
if ((n % 2) == 0) {
return (n*n*n + sum_even_cubes_rec(n - 1));
} else {
return (0 + sum_even_cubes_rec(n - 1));
}
}
This is what I wrote but it is wrong and I don't know how to fix it.
Can you please help me.
int sum_even_cubes_rec2(int n, int acc) {
if ((n % 2) == 0) {
return sum_even_cubes_rec2 (n-1, acc + n*n*n);
} return acc;
}
int sum_even_cubes_helperFunktion(int n) {
return sum_even_cubes_rec2(n, 0);
}
Your approach is correct. You have already added acc argument, so that's what you need to return for the base case.
The rest of your code is almost right - you need to adjust what you add to acc for the next invocation:
int sum_even_cubes_rec2(int n, int acc) {
if (n < 2) {
return acc;
}
int nextAcc = (n % 2) == 0 ? acc + n*n*n : acc;
return sum_even_cubes_rec2 (n-1, nextAcc);
}
Simply it can be written as this
int sum_even_cubes_rec2(int n) {
static int ans = 0;
if(n<2){
int tmp =ans;
ans =0;
return tmp;
}
ans += ( (n%2==0)? n*n*n : 0 );
return sum_even_cubes_rec2(n-1);
}
int sum_even_cubes(int n) {
int ret =0;
if (n < 2) return 0;
ret = (n % 2) ? 0: n*n*n;
return ret + sum_even_cubes(n-1);
}
Gcc -O2 -S will compile this into (function argument is %edi; return value is in %eax; target for recursion-loop is .L4) :
sum_even_cubes:
.LFB0:
.cfi_startproc
xorl %eax, %eax
cmpl $1, %edi
jle .L5
.p2align 4,,10
.p2align 3
.L4:
xorl %edx, %edx
testb $1, %dil
jne .L3
movl %edi, %edx
imull %edi, %edx
imull %edi, %edx
.L3:
subl $1, %edi
addl %edx, %eax
cmpl $1, %edi
jne .L4
rep ret
.L5:
rep ret
.cfi_endproc
.LFE0:

Assembly Code to C

I was practicing some assembly code to C and need some help with two questions. Based on the GCC objdump it seems okay but I want to make sure I can do this WITHOUT a computer (still kind of new to assembly code)
Question 1 :
q1:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
cmpl $0, 8(%ebp)\\ compare variable1 to zero
jle .L2 \\jump if less than or equal to zero
movl $1, -4(%ebp)\\ ?? variable2 = 1??
jmp .L4\\else
.L2:
movl $0, -4(%ebp)\\ variable2 = 0
.L4:
movl -4(%ebp), %eax\\ variable2 = variable1
leave
ret
what I got was
int main(int x, int z)
{
if (x < 0)
z = 0;
else
z = x;
}
But I was not sure what the purpose of movl $1, -4(%ebp) was.
Question 2 :
fn:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
movl 8(%ebp), %edx
cmpl $1, %edx\\ compare variable1 to 1
jle .L4\\ less than or equal jump.
.L5:
imull %edx, %eax\\ multiply variable1 by variable 2
subl $1, %edx\\ variable1 -1
cmpl $1, %edx\\ compare variable1 with 1
jne .L5 Loop if not equal
.L4:
popl %ebp\\ return value
ret
How I interpreted the information
int main(int x)
{
int result;
if (x <= 1){
for (result=1; x != 1; x = x-1)
result *= x;}
else{return result;}
}
Not sure if my logic is correct on either of those.
Q1 you have one argument 8(%ebp) and one local variable at -4(%ebp). Return value will be in %eax. Knowing this, the function looks more like:
int foo(int arg)
{
int local;
if (arg <= 0) {
local = 0;
} else {
local = 1;
}
return local;
}
Q2 popl %ebp // return value that's not the return value, that's restoring the saved %ebp of the caller (that was pushed in the beginning). Also, the condition in the loop should use > not !=. You are missing an if (x > 1) conditional around the for loop. (Thanks to Mooing Duck for pointing this out.) Also, technically it's a do-while loop. Otherwise you got this function right.
int factorial(int x)
{
int result = 1;
if (x > 1) {
do {
result *= x;
x -= 1;
} while(x != 1);
}
return result;
}

understanding testl in assembly language

Trying to understand some assembly language, but I am not sure if I am understanding it correctly
movl 8(%ebp),%eax // assign %eax to a variable, say var
testl %eax,%eax // test if var is > 0 or not. if var is > 0, jump to .L3
jge .L3
addl $15,%eax // add 15 to var
.L3:
sarl $4,%eax // shift var 4 to the right , which is the same as multiplying var by 16
given by above understanding, I wrote the following code
int function(int x){
int var = x;
if(var>0) {
ret = ret * 16;
}
ret = ret + 15;
return ret;
}
however, my assembly code looks like the following
movl 8(%ebp), %ebp
movl %eax. %edx
sall $4, %edx
test1 %eax, %eax
cmovg %edx, %eax
addl $15, %eax
am I misunderstanding the original assembly code somewhere?
Edit: is there perhaps a loop involved?
Notice that the code continues with the shift even after the addition, and that jge also includes the equal case. Thus the code could look more like this:
int function(int x) {
int ret = x;
if (ret >= 0) goto skip_add;
ret = ret + 15;
skip_add:
ret = ret / 16;
return ret;
}
Or, to avoid the goto, reverse the condition:
int function(int x) {
int ret = x;
if(ret < 0) {
ret = ret + 15;
}
ret = ret / 16;
return ret;
}
PS: shifting right is division, shifting left would be multiplication.

Leal instruction in for loop

I'm reading a book Computer Systems: A Programmer's Perspective (2nd Edition)
and Practice Problem 3.23 are little confused me:
A function fun_b has the following overall structure:
int fun_b(unsigned x) {
int val = 0;
int i;
for ( ____;_____;_____) {
}
return val;
}
The gcc C compiler generates the following assembly code:
x at %ebp+8
1 movl 8(%ebp), %ebx
2 movl $0, %eax
3 movl $0, %ecx
.L13:
5 leal (%eax,%eax), %edx
6 movl %ebx, %eax
7 andl $1, %eax
8 orl %edx, %eax
9 shrl %ebx Shift right by 1
10 addl $1, %ecx
11 cmpl $32, %ecx
12 jne .L13
Reverse engineer the operation of this code and then do the following:
A. Use the assembly-code version to fill in the missing parts of the C code.
My solution.
int fun_b(unsigned x) {
int val = 0;
int i;
for ( i = 0 ;i < 32;i++) {
val += val; //because leal (%eax,%eax), edx --> %edx = %eax + %eax
val = val | x & 0x1;
x >>= 1;
}
return val;
}
Book's solution.
int fun_b(unsigned x) {
int val = 0;
int i;
for (i = 0; i < 32; i++) {
val = (val << 1) | (x & 0x1);
x >>= 1;
}
return val;
}
Please, explain to me why leal function has non typical behavior in this function.
And I dont understand how this assembly code is yielding this statement val = (val << 1) | (x & 0x1)
In your code:
val += val;
val = val | x & 0x1;
Here, val += val which is equivalent to (val*2) which is effectively equal to val left shifted by 1.
But I think your solution is correct only if the assembly code was something like:
x at %ebp+8
1 movl 8(%ebp), %ebx
2 movl $0, %eax
3 movl $0, %ecx
.L13:
5 addl %eax, %eax
6 movl %ebx, %edx
7 andl $1, %edx
8 orl %edx, %eax
9 shrl %ebx # shift right by 1
10 addl $1, %ecx
11 cmpl $32, %ecx
12 jne .L13
Because if val + val was a separate statement, compiler usually places it in eax register rather than in edx (i'm not sure this is the case always). So, for the code you have given, the possible solutions are:
val = (val << 1) | (x & 0x1);
or
val = (val + val) | (x & 0x1);
or
val = (val * 2) | (x & 0x1);
x >>= 1; means multiplying x by 2 which in binary is shifting to the left or adding 0 at the right side
x >>= 1; == x * 2; == x +=x;

Resources