My question is that does it matter if I declare a variable outside the loop and reinitialize every time inside the loop or declaring and initializing inside the loop? So basically is there any difference between these two syntaxes (Performance,standard etc)?
Method 1
int a,count=0;
while(count<10)
a=0;
Method 2
int count=0;
while(count<10)
int a=0;
Please assume that this is only a part of a bigger program and that the body inside loop requires that the variable a have a value of 0 every time. So, will there be any difference in the execution times in both the methods?
Yes, it does matter. In second case
int count=0;
while(count<10)
int a=0;
a can't be referenced out side of while loop. It has block scope; the portion of the program text in which the variable can be referenced.
Another thing that Jonathan Leffler pointed out in his answer is both of these loops are infinite loop. And second, the most important second snippet would not compile without {} (in C) because a variable definition/declaration is not a statement and cannot appear as the body of a loop.
int count =0;
while(count++ < 10)
{
int a=0;
}
This
void f1(void)
{
int a, count = 10;
while (count--)
a = 0;
}
void f2(void)
{
int count = 10;
while (count--)
{
int a = 0;
}
}
results in this (using non optimising gcc (Debian 4.4.5-8) 4.4.5):
.globl f1
.type f1, #function
f1:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.cfi_def_cfa_register 6
movl $10, -4(%rbp)
jmp .L2
.L3:
movl $0, -8(%rbp)
.L2:
cmpl $0, -4(%rbp)
setne %al
subl $1, -4(%rbp)
testb %al, %al
jne .L3
leave
ret
.cfi_endproc
.LFE0:
.size f1, .-f1
.globl f2
.type f2, #function
f2:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.cfi_def_cfa_register 6
movl $10, -8(%rbp)
jmp .L6
.L7:
movl $0, -4(%rbp)
.L6:
cmpl $0, -8(%rbp)
setne %al
subl $1, -8(%rbp)
testb %al, %al
jne .L7
leave
ret
.cfi_endproc
.LFE1:
.size f2, .-f2
The assembly code looks quiet the same.
The first compiles; the second doesn't. The first runs for a very long time.
If the second was:
int count = 0;
while (count++ < 10)
{
int a = 0;
...do something with a...
}
and you also made similar changes and did something with a in the first loop, then the difference is that a is set to zero on each iteration of the loop in the second case, but it holds whatever value it is set to in the loop in the first case. Also, in the second case, the variable a does not exist outside the loop and cannot therefore be referenced after the loop.
yes in your 1st method it is zero for next of your bigger program.
while in your method 2 the same 'a' cannot be accessed in further statements. if so would produce an error as undefined reference to variable 'a'. it would be zero for that loop life but is nothing after the loop. you can again define variable 'a' after that while loop.
Related
Taking the following C code
#include <stdio.h>
void test(unsigned char buffer[], int size) {
for (int i = 0; i < size; i++) {
unsigned char data = buffer[i];
printf("%c", data);
}
}
void main() {
unsigned char buffer[5] = "Hello";
test(buffer, 5);
return;
}
and compiling it the flags -fno-stack-protector -fno-asynchronous-unwind-tables -fno-unroll-loops for clarity produces the following assembly for the test() function:
test:
testl %esi, %esi
jle .L6
pushq %rbp
leal -1(%rsi), %eax
pushq %rbx
leaq 1(%rdi,%rax), %rbp
movq %rdi, %rbx
subq $8, %rsp
.p2align 4,,10
.p2align 3
.L3:
movzbl (%rbx), %edi
addq $1, %rbx
call putchar#PLT
cmpq %rbp, %rbx
jne .L3
addq $8, %rsp
popq %rbx
popq %rbp
ret
.p2align 4,,10
.p2align 3
.L6:
ret
.size test, .-test
.section .text.startup,"ax",#progbits
.p2align 4
It seems to me like the L3 label here is completely useless since it is never jumped to or entered. (Except by jne .L3, but that instruction is inside of the L3 label already).
Can anyone explain how and why this assembly still produces the expected effect?
If you read the assembler code from the top you will see that it reaches .L3, plus it also jumps to it with jne .L3, which is your for loop in C.
Take the following C program that has two functions:
// main.c
int times_two(int num) {
int b = num + num;
return b;
}
int main(void) {
int a=2;
int num = times_two(a) + a;
return num;
}
Is the following a more-or-less accurate way to represent that in x86 assembly? I know this is verbose and I have a bunch of extra push/pop and such on the stacks that I don't need, but does the following more or less faithfully follow the C code?
SYS_EXIT = 60
.globl _start
_start:
call main
mov %eax, %edi
mov $SYS_EXIT, %eax
syscall
main:
# function() {...
push %rbp
mov %rsp, %rbp
# int a = 2
sub $8, %rsp
movl $2, -4(%rbp)
# times_two(a)
movq -4(%rbp), %rdi
call times_two
# ... + a
addl -4(%rbp), %eax
# ...} // (return value already in eax)
add $8, %rsp
pop %rbp
ret
times_two:
push %rbp
mov %rsp, %rbp
# return num + num
xor %eax, %eax # <-- update: not necessary: next line wipes it out anyways.
mov %edi, %eax # <-- update: could also do lea (%eax,,2) to multiply by constant
add %edi, %eax
pop %rbp
ret
If not, what may I be screwing up or missing/doing wrong?
I have main function in C that runs code in assembly. I just want to make simple sum:
main.c
#include <stdio.h>
extern int addByAssembly(int first_number, int second_number);
int main (int argc, char **argv)
{
int sum=0;
sum = addByAssembly(5,4);
printf ("%d\n",sum);
return 0;
}
addByAssembly.s
.data
SYSREAD = 0
SYSWRITE = 1
SYSEXIT = 60
STDOUT = 1
STDIN = 0
EXIT_SUCCESS = 0
.text
#.global main
#main:
#call write
#movq $SYSEXIT, %rax
#movq $EXIT_SUCCESS, %rdi
#syscall
#********
.globl addByAssembly
addByAssembly:
pushq %rbp
movq %rsp, %rbp
movq 16(%rsp), %rax
addq 24(%rsp), %rax
movq %rbp, %rsp
popq %rbp
But i got mess in my sum. It looks like i badly pass arguments, beause if I do this:
movq $123, %rax
return value is 123. I 've tried many ways, but cannot find how to make this properly to sum.
Thanks 'Jester' for so much effort and time to get me this explained!
To sum up. Passing parameters from C to As ( and as well from As to C) has its own ABI convention.
As you can see there, params are send on order:
1) rdi
2) rsi
3) rdx
... and so on...
In case you have more parameters than in convention, it will be pushed to stack.
So in my case:
.globl addByAssembly
addByAssembly:
pushq %rbp
movq %rsp, %rbp
--movq 16(%rsp), %rax #this was wrong as my params are
--addq 24(%rsp), %rax # first in %rdi, second in %rsi
++lea (%rdi, %rsi), %rax # in my case this line will do
# %rdi+%rsi -> %rax (learn lea, usefull command)
# REMEMBER return value is always in %rax!
movq %rbp, %rsp
popq %rbp
Why is it that the code:
for( i = 0, j = 0; i < 4 , j < 3; i++, j++)
is slower than
for( i = 0, j = 0; i < 4 && j < 3; i++, j++)
Elaborating on that some users proposed that two if statemnts take more time than a single if statement with an && operator: I tested it without for loops and it is not true. Two if statements are faster than a single one with a && operator.
The first code is not slower; at least in gcc without optimization. In fact, it should be faster.
When you compile both codes and disassemble them, you will find this for the first code:
cmpl $0x2,-0x8(%rbp)
jle 26 <main+0x26>
And this for the second one:
cmpl $0x3,-0x4(%rbp)
jg 44 <main+0x44>
cmpl $0x2,-0x8(%rbp)
jle 26 <main+0x26>
In the first example, gcc evaluates just the second part, because the first one has no effect and is not used in the comparison. In the second one, it has to check for the first one, and then, if true, check the second one.
So, in the general case, the first example should be faster than the first one. If you find the first slower, maybe your way to measure it was not 100% correct.
Their may be no change in execution time but may very the number of iterations since :
If we put comma separated condition in for loop,it evaluates the value of the last one. So basically whichever condition you write first, it will be disregarded, and the second one will be checked. So j = 0; i < 4 will always check for i<4 where as i < 4 && j < 3 will examine and return true if and only if both the conditions are true.
Reference
If we do check the assembly of the code you have you may see the difference
program
int main()
{
int x,y;
for(x=0,y=0;x<4,y<5;x++,y++);
printf("New one");
for(x=0,y=0;x<4 && y<5;x++,y++);
}
command to get assembly : gcc -S <program name>
Assembly
.file "for1.c"
.section .rodata
.LC0:
.string "New one"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $32, %esp
movl $0, 24(%esp)
movl $0, 28(%esp)
jmp .L2
.L3:
addl $1, 24(%esp)
addl $1, 28(%esp)
.L2:
cmpl $4, 28(%esp) //Here only one condition
jle .L3
movl $.LC0, (%esp)
call printf
movl $0, 24(%esp)
movl $0, 28(%esp)
jmp .L4
.L6:
addl $1, 24(%esp)
addl $1, 28(%esp)
.L4:
cmpl $3, 24(%esp) //First Condition
jg .L7
cmpl $4, 28(%esp) //Second Condition
jle .L6
.L7:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
So, it is clear if we have 2 condition then it will be more time taking.
the first option one is two ifs,
the second option is a mathematical equation and one if, which is usually faster,
here you save one if by doing a calculation, that costs less process time.
first option -> if() && if(),
second option-> if(() && ())
For my homework assignment I am supposed to convert this C code
#define UPPER 15
const int lower = 12;
int sum = 0;
int main(void) {
int i;
for (i = lower; i < UPPER; i++) {
sum += i;
}
return sum;
}
into gcc assembly. I already compiled it to first study the code before doing it per hand (obviously translating by hand is going to look much differently). This is the assembler code I received:
.file "upper.c"
.globl lower
.section .rodata
.align 4
.type lower, #object
.size lower, 4
lower:
.long 12
.globl sum
.bss
.align 4
.type sum, #object
.size sum, 4
sum:
.zero 4
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $12, -4(%rbp)
jmp .L2
.L3:
movl sum(%rip), %edx
movl -4(%rbp), %eax
addl %edx, %eax
movl %eax, sum(%rip)
addl $1, -4(%rbp)
.L2:
cmpl $14, -4(%rbp)
jle .L3
movl sum(%rip), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (SUSE Linux) 4.8.1 20130909 [gcc-4_8-branch revision 202388]"
.section .note.GNU-stack,"",#progbits
Now I was wondering if someone could give me a few examples like
where the constructors i, lower, upper and sum are located it in code
where some of the expressions i = lower or i < UPPER are located
where the for-loop starts
and such things so I can then get an idea of how the assembler code is constructed. Thank you!
If I understood correctly you question, here is the answers:
Q: where the constructors i, lower, upper and sum are located it in code?
lower is located inside .rodata section (readonly data section). It's value is initialized by linux loader during program loading stage to the value .long 12. lower constructor is a linux loader. It just loads lower value from binary image.
.globl lower
.section .rodata
.align 4
.type lower, #object
.size lower, 4
lower:
.long 12
sum is located inside .bss section (data segment containing statically-allocated variables). It's value is initialized by _init function what gets called when program execution begins. It's value is zero (.zero 4). Every variable located inside .bss section has zero as initial value (link to wiki's article for .bss).
.globl sum
.bss
.align 4
.type sum, #object
.size sum, 4
sum:
.zero 4
upper is a constant. The compiler did not put it's declaration into assembly. There is a reference to upper-1 (as $14) here:
.L2:
cmpl $14, -4(%rbp)
i is a on stack temporary variable. It's value is accessed using addresses relative %rbp (%rbp is a pointer to current function stack frame). The is no explicit declaration of i into assembly. There is no explicit stack reservation for i (no instruction like sub $0x8,%rsp at main preamble), I think, because main doesn't call another functions. Here is code for i initialization (note compiler knows that lower initial value is $12 and removes access to lower during i initialization):
movl $12, -4(%rbp)
Q: where some of the expressions i = lower or i < UPPER are located
i = lower:
movl $12, -4(%rbp)
jmp .L2
i < UPPER:
.L2:
cmpl $14, -4(%rbp)
jle .L3
i++:
addl $1, -4(%rbp)
sum += i;:
movl sum(%rip), %edx
movl -4(%rbp), %eax
addl %edx, %eax
movl %eax, sum(%rip)
return sum; (%eax register is used to hold function return value - more about this: X86 calling conventions):
jle .L3
movl sum(%rip), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
Q: where the for-loop starts
it start here:
movl $12, -4(%rbp)
jmp .L2