For example,
int main()
{
int aa = 1, bb =2, cc = 3;
int dd = ( (aa + 3 - 1)
/ bb)
<< cc;
printf("%d\n", dd);
return 1;
}
So I break int dd line to multiple lines just to demonstrate.
Then I use gdb to debug.
Breakpoint 1, main () at a.c:25
25 int aa = 1, bb =2, cc = 3;
(gdb) n
26 int dd = ( (aa + 3 - 1)
(gdb) n
27 / bb)
(gdb) n
26 int dd = ( (aa + 3 - 1)
(gdb) n
29 printf("%d\n", dd);
(gdb) n
8
30 return 1;
As you can see int dd shown multiple times and << cc does not show.
How to avoid this? For example when I type n and enter, gdb show the complete int dd line once, and when I type n again, gdb goes to next line?
There is nothing you can do from within GDB because it merely follows the debugging information produced by the compiler. You can see the assembly output with gcc -g -S -fverbose-asm -fno-dwarf2-cfi-asm -o - main.c. Thanks to the annotations you should be able to get the gist of it even if you don't know assembly.
Here is an excerpt:
# main.c:4: int dd = ( (aa + 3 - 1)
movl -4(%rbp), %eax # aa, tmp91
addl $2, %eax #, _1
# main.c:5: / bb)
cltd
idivl -8(%rbp) # bb
movl %eax, %edx # tmp92, _2
# main.c:4: int dd = ( (aa + 3 - 1)
movl -12(%rbp), %eax # cc, tmp94
movl %eax, %ecx # tmp94, tmp99
sall %cl, %edx # tmp99, _2
movl %edx, %eax # _2, tmp95
movl %eax, -16(%rbp) # tmp95, dd
The last block is equivalent to
int dd = _2 << cc;
Since the result of an operation has to be stored somewhere the shift and assignment are not split. The reference is to the line containing the assignment.
You should rewrite the code to have one operation per statement:
int t1 = (aa + 3 - 1);
int t2 = t1 / bb;
int dd = t2 << cc;
Here is the corresponding assembly:
# main.c:4: int t1 = (aa + 3 - 1);
movl -4(%rbp), %eax # aa, tmp92
addl $2, %eax #, tmp91
movl %eax, -16(%rbp) # tmp91, t1
# main.c:5: int t2 = t1 / bb;
movl -16(%rbp), %eax # t1, tmp96
cltd
idivl -8(%rbp) # bb
movl %eax, -20(%rbp) # tmp94, t2
# main.c:6: int dd = t2 << cc;
movl -12(%rbp), %eax # cc, tmp100
movl -20(%rbp), %edx # t2, tmp102
movl %eax, %ecx # tmp100, tmp106
sall %cl, %edx # tmp106, tmp102
movl %edx, %eax # tmp102, tmp101
movl %eax, -24(%rbp) # tmp101, dd
It is essentially the same as before but now the blocks match the statements perfectly. You can't control the compiler's output but it will try to preserve the boundaries between statements, so that is your best option.
Related
I'm new to assembly and I'm using IA32 architecture.
I'm trying code a .s function that produces the following operation: C + A - D + B
A is an 8-bit variable
B is a 16-bit variable
C and D are both 32-bit variables
The function should return a 64-bit value that must be printed in C and I can't figure this out.
I am trying this and it works for the tests of positive numbers, but when I'm working with negative numbers it doesn't work and I can't figure out the reason.
My function sum_and_subtract.s:
.section .data
.global A
.global B
.global C
.global D
.section .text
.global sum_and_subtract
# short sum_and_subtract(void)
sum_and_subtract:
#prologue
pushl %ebp
movl %esp, %ebp
pushl %ebx
#body of the function
movl $0, %eax # clear eax
movl C, %eax
movl $0, %ecx
movb A, %cl
addl %ecx, %eax
movl $0, %edx
movl D, %edx
subl %edx, %eax
movl $0, %ebx
movw B, %bx
addl %ebx, %eax
movl $0, %edx
adcl $0, %edx
cdq
#epilogue
fim:
popl %ebx
movl %ebp, %esp
popl %ebp
ret
An correct example is:
A = 0, B = 1, C = 0, D = 0; Expected = 1 -> Result = 1 and It works for this example
The error appears when:
A = 0, B = 0, C = 0, D = 1; Expected = -1 -> Result = 256
After seeing your comments I forgot to write my main code where I print my long long result.
main.c :
#include <stdio.h>
#include "sum_and_subtract.h"
char A = 0;
short B = 0;
long C = 0;
long D = 1;
int main(void) {
printf("A = %d\n", A);
printf("B = %hd\n", B);
printf("C = %ld\n", C);
printf("D = %ld\n", D);
long long result = sum_and_subtract();
printf("Result = %lld\n", result);
return 0;
}
Here it is.
I have this other file sum_and_subtract.h
long long sum_and_subtract(void);
I would follow the C compiler:
doit:
movsbl A(%rip), %eax
movswl B(%rip), %edx
addl C(%rip), %eax
subl D(%rip), %eax
addl %edx, %eax
cltq
ret
movsx eax, BYTE PTR A[rip]
movsx edx, WORD PTR B[rip]
add eax, DWORD PTR C[rip]
sub eax, DWORD PTR D[rip]
add eax, edx
cdqe
ret
It "prints" -1
It is rather a comment so do not UV. Feel free to DV
The complete code with print: https://godbolt.org/z/3a9YMo
And with the printing code: https://godbolt.org/z/KT8YWT
I'm to convert the following AT&T x86 assembly into C:
movl 8(%ebp), %edx
movl $0, %eax
movl $0, %ecx
jmp .L2
.L1
shll $1, %eax
movl %edx, %ebx
andl $1, %ebx
orl %ebx, %eax
shrl $1, %edx
addl $1, %ecx
.L2
cmpl $32, %ecx
jl .L1
leave
But must adhere to the following skeleton code:
int f(unsigned int x) {
int val = 0, i = 0;
while(________) {
val = ________________;
x = ________________;
i++;
}
return val;
}
I can tell that the snippet
.L2
cmpl $32, %ecx
jl .L1
can be interpreted as while(i<32). I also know that x is stored in %edx, val in %eax, and i in %ecx. However, I'm having a hard time converting the assembly within the while/.L1 loop into condensed high-level language that fits into the provided skeleton code. For example, can shll, shrl, orl, and andl simply be written using their direct C equivalents (<<,>>,|,&), or is there some more nuance to it?
Is there a standardized guide/"cheat sheet" for Assembly-to-C conversions?
I understand assembly to high-level conversion is not always clear-cut, but there are certainly patterns in assembly code that can be consistently interpreted as certain C operations.
For example, can shll, shrl, orl, and andl simply be written using
their direct C equivalents (<<,>>,|,&), or is there some more nuance
to it?
they can. Let's examine the loop body step-by-step:
shll $1, %eax // shift left eax by 1, same as "eax<<1" or even "eax*=2"
movl %edx, %ebx
andl $1, %ebx // ebx &= 1
orl %ebx, %eax // eax |= ebx
shrl $1, %edx // shift right edx by 1, same as "edx>>1" = "edx/=2"
gets us to
%eax *=2
%ebx = %edx
%ebx = %ebx & 1
%eax |= %ebx
%edx /= 2
ABI tells us (8(%ebp), %edx) that %edx is x, and %eax (return value) is val:
val *=2
%ebx = x // a
%ebx = %ebx & 1 // b
val |= %ebx // c
x /= 2
combine a,b,c: #2 insert a into b:
val *=2
%ebx = (x & 1) // b
val |= %ebx // c
x /= 2
combine a,b,c: #2 insert b into c:
val *=2
val |= (x & 1)
x /= 2
final step: combine both 'val =' into one
val = 2*val | (x & 1)
x /= 2
while (i < 32) { val = (val << 1) | (x & 1); x = x >> 1; i++; } except val and the return value should be unsigned and they aren't in your template. The function returns the bits in x reversed.
The actual answer to your question is more complicated and is pretty much: no there is no such guide and it can't exist because compilation loses information and you can't recreate that lost information from assembler. But you can often make a good educated guess.
I've written some code (main in c, subprogram in assembly x86) to calculate all the binomial coefficients recursively and print out all the binomial coefficients with n=10, restricted by m<=n.
So basically I'm trying to output a pascals triangle for n=10. (without the whole format of a triangle)
My problem is that I'm getting a segfault on compile and I'm having trouble figuring out how to print the individual values generated by the recursive function.
Segmentation fault (core dumped)
Here's the main program:
#include <stdio.h>
unsigned int result,m,n,i;
unsigned int binom(int,int);
int main(){
n=10;
for (i=0; i<n+1;i++){
printf("i=%d | %d \n", i, binom(n,i) );
}
return;
}
And the recursive sub program:
.text
.globl binom
binom:
mov $0x00, %edx #for difference calculation
cmp %edi, %esi #m=n?
je equalorzero #jump to equalorzero for returning of value 1
cmp $0x00, %esi #m=0?
je equalorzero
cmp $0x01, %esi #m=1?
mov %esi,%edx
sub %edi, %edx
cmp $0x01, %edx # n-m = 1 ?
je oneoronedifference
jmp otherwise
equalorzero:
add $1, %eax #return 1
ret
oneoronedifference:
add %edi, %eax #return n
ret
otherwise:
sub $1, %edi #binom(n-1,m)
call binom
sub $1, %esi #binom(n-1,m-1)
call binom
This is what gcc is giving me
./runtimes
i=0 | 12
Segmentation fault (core dumped)
The two major issues with your assembly code are: 1) you niether add nor return the sum of the two recursive calls; 2) you don't save your locals on the stack so they are wiped out by the recursive calls -- you're using the wrong values once you return from the calls. Here's my rework of your code, some of the changes are due to my writing this under OSX:
The recursive sub program:
.text
.globl _binom
_binom:
pushq %rbp # allocate space on stack for locals
movq %rsp, %rbp
subq $24, %rsp
cmpl %edi, %esi # m == n ?
je equalorzero # jump to equalorzero for returning of value 1
cmpl $0, %esi # m == 0 ?
je equalorzero
movl %esi, %edx
subl %edi, %edx
cmpl $1, %edx # n - m == 1 ?
je oneoronedifference
subl $1, %edi # binom(n - 1, m)
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
callq _binom
movl %eax, -12(%rbp) # save result to stack
movl -4(%rbp), %edi
movl -8(%rbp), %esi
subl $1, %esi # binom(n - 1, m - 1)
callq _binom
addl -12(%rbp), %eax # add results of the two recursive calls
addq $24, %rsp # release locals space on stack
popq %rbp
retq
equalorzero:
movl $1, %eax # return 1
addq $24, %rsp # release locals space on stack
popq %rbp
retq
oneoronedifference:
movl %edi, %eax # return n
addq $24, %rsp # release locals space on stack
popq %rbp
retq
The main program:
#include <stdio.h>
extern unsigned int binom(int, int);
int main() {
int n = 10;
for (int i = 0; i <= n; i++) {
printf("i=%d | %d\n", i, binom(n, i));
}
return 0;
}
And the results:
i=0 | 1
i=1 | 10
i=2 | 45
i=3 | 120
i=4 | 210
i=5 | 252
i=6 | 210
i=7 | 120
i=8 | 45
i=9 | 10
i=10 | 1
I have C code and Assembly code. I do not understand line 3 and 6 of assembled code.
C code:
int arith(int x, int y, int z)
{
int t1 = x+y;
int t2 = z*48
int t3= t1& 0xFFFF
int t4 = t2 * t3
return t4;
}
Assembly code:
x at % ebp+8, y at %ebp*12, z at %ebp+16
mol 16(%ebp), %eax
leal (%eax, %eax, 2) % eax
sall $4, %eax
movl 12(%ebp) %edx
addl 8(%ebp) %edx
andl $65535, %edx
imull %edx. $eax
on line 6, I do not understand how 65535 becomes 0xFFFF so that we have t3 = t1 & 0xFFFF.
Different question:
Consider the following C functino prototype, where num_t is a data type declared using typedef:
void store_prod(num_t *dest, unsigned x, num_t y)
{*dest=x*yl}
Gcc generates the following assemblyu code implementing the body of the computation:
dest at %ebp+8, x at ebp+12, y at %ebp+16
mov1 12($ebp), $eax
movl 20($ebp), $ecx
imull $eax, $ecx
mull 16(%ebp)
leal (%ecx,%edx), %edx
movl 8(%ebp), %ecx
movl %eax, (%ecx)
movl %edx, 4(%ecx)
line movl 20(%ebp), %ecx there is value in 20(%ebp), how is it grabbing y_t?
line leal (%ecx, %edx), %edx; there is nothing in edx? so what is being added to %ecx to be stored in %edx?
We want to calculate
t2=z*48
So to do that we first do z=z*3 then shift z left by 4(multiply by 16 ==left shift by 4)
line 3 calculates
z=z*3 (z+2z)
And line 4 does left shift by 4.
The compiler often generates combination of add and shift to perform multiplication as multiplication is more costly
As for line 6 65535 is decimal for 0xFFFF.
C version:
int arith(int x, int y, int z)
{
int t1 = x+y;
int t2 = z*48;
int t3 = t1 & 0xFFFF;
int t4 = t2 * t3;
return t4;
}
ATT Assembly version of the same program:
x at %ebp+8, y at %ebp+12, z at %ebp+16
movl 16(ebp), %eax
leal (%eax, %eax, 2), %eax
sall $4, %eax // t2 = z* 48... This is where I get confused
movl 12(%ebp), %edx
addl 8(%ebp), %edx
andl $65535, %edx
imull %edx, %eax
I understand everything it is doing at all points of the program besides the shift left.
I assume it is going to shift left 4 times. Why is that?
Thank you!
Edit: I also understand that the part I'm confused on is equivalent to the z*48 part of the C version.
What I'm not understanding is how does shifting left 4 times equate to z*48.
You missed the leal (%eax, %eax, 2), %eax line. Applying some maths the assembly code reads:
a := x
a := a + 2*a // a = 3*x
a := a * 2^4 // a = x * 3*16