Skipping an instruction using stack smashing - c

I have been trying to skip an instruction by changing the return address through stack smashing. The following code skips a++ in main and prints an output of "1 3". I have executed this code on a 32-bit intel machine.
#include<stdio.h>
void fun(int a,int b) {
// buffer
char buf[8];
char *p;
p = (char *)buf+24;
*p=*p+5;
return;
}
int main() {
int a=1,b=2;
fun(a,b);
a++;
b++;
printf("%d %d",a,b);
}
I am unable to understand why return address is stored at a displacement of 24 bytes from starting address of buf. I have tried executing the same code on a different 32-bit intel machine and I had to use a displacement of 20 bytes instead of 24 bytes. I have put my understanding in the following figure. I am not sure about what fills the gap represented by "?" in the figure. Does gcc put any canary value there or am I missing something ?
Link to figure: http://www.cse.iitb.ac.in/~shashankr/stack.png
Smashing the stack example3.c confusion asked the same question but could not explain the reason for displacement in general.
The following figure gives a view of the stack obtained by placing a breakpoint in function.
(source: shashankr at www.cse.iitb.ac.in)
The following is the assembly code for main and fun:
Dump of assembler (fun):
0x08048434 <+0>: push %ebp
0x08048435 <+1>: mov %esp,%ebp
0x08048437 <+3>: sub $0x18,%esp
0x0804843a <+6>: mov %gs:0x14,%eax
0x08048440 <+12>: mov %eax,-0xc(%ebp)
0x08048443 <+15>: xor %eax,%eax
0x08048445 <+17>: lea -0x14(%ebp),%eax
0x08048448 <+20>: add $0x18,%eax
0x0804844b <+23>: mov %eax,-0x18(%ebp)
0x0804844e <+26>: mov -0x18(%ebp),%eax
0x08048451 <+29>: movzbl (%eax),%eax
0x08048454 <+32>: add $0x5,%eax
0x08048457 <+35>: mov %eax,%edx
0x08048459 <+37>: mov -0x18(%ebp),%eax
0x0804845c <+40>: mov %dl,(%eax)
0x0804845e <+42>: mov -0xc(%ebp),%eax
0x08048461 <+45>: xor %gs:0x14,%eax
0x08048468 <+52>: je 0x804846f <fun+59>
0x0804846a <+54>: call 0x8048350 <__stack_chk_fail#plt>
0x0804846f <+59>: leave
0x08048470 <+60>: ret
Dump of assembler (main)
0x08048471 <+0>: push %ebp
0x08048472 <+1>: mov %esp,%ebp
0x08048474 <+3>: and $0xfffffff0,%esp
0x08048477 <+6>: sub $0x20,%esp
0x0804847a <+9>: movl $0x1,0x18(%esp)
0x08048482 <+17>: movl $0x2,0x1c(%esp)
0x0804848a <+25>: mov 0x1c(%esp),%eax
0x0804848e <+29>: mov %eax,0x4(%esp)
0x08048492 <+33>: mov 0x18(%esp),%eax
0x08048496 <+37>: mov %eax,(%esp)
0x08048499 <+40>: call 0x8048434 <fun>
0x0804849e <+45>: addl $0x1,0x18(%esp)
0x080484a3 <+50>: addl $0x1,0x1c(%esp)
0x080484a8 <+55>: mov $0x80485a0,%eax
0x080484ad <+60>: mov 0x1c(%esp),%edx
0x080484b1 <+64>: mov %edx,0x8(%esp)
0x080484b5 <+68>: mov 0x18(%esp),%edx
0x080484b9 <+72>: mov %edx,0x4(%esp)
0x080484bd <+76>: mov %eax,(%esp)
0x080484c0 <+79>: call 0x8048340 <printf#plt>
0x080484c5 <+84>: leave
0x080484c6 <+85>: ret

I believe the answer is nothing. Are you having different gcc versions? Anyway a compiler is allowed to allocate a bit more stack than necessary. Perhaps it's the initial "guess" based on the number of variables, but which isn't reduced by optimization stages, which are allowed to move any variable to a register. Or it's some reservoir to save ecx,ebp or other registers in case the subroutine needs to.
There's anyway one fixed address variable to overcome the problem: a.
Return address = &a[-1].

Related

Buffer overflow, call a function

I have to override a function's return address using buffer oveflow.
The function itself looks like this(I am passing the name argument):
void vuln(char *name)
{
int n = 20;
char buf[1024];
int f[n];
int i;
for (i=0; i<n; i++) {
f[i] = fib(i);
}
strcpy(buf, name);
...
}
I am disassembling it using gdb which gives me the following
0x080485ae <+0>: push %ebp
0x080485af <+1>: mov %esp,%ebp
0x080485b1 <+3>: push %ebx
0x080485b2 <+4>: sub $0x414,%esp
0x080485b8 <+10>: mov %esp,%eax
0x080485ba <+12>: mov %eax,%ebx
0x080485bc <+14>: movl $0x14,-0x10(%ebp)
0x080485c3 <+21>: mov -0x10(%ebp),%eax
0x080485c6 <+24>: lea -0x1(%eax),%edx
0x080485c9 <+27>: mov %edx,-0x14(%ebp)
0x080485cc <+30>: shl $0x2,%eax
0x080485cf <+33>: lea 0x3(%eax),%edx
0x080485d2 <+36>: mov $0x10,%eax
0x080485d7 <+41>: sub $0x1,%eax
0x080485da <+44>: add %edx,%eax
0x080485dc <+46>: mov $0x10,%ecx
0x080485e1 <+51>: mov $0x0,%edx
0x080485e6 <+56>: div %ecx
0x080485e8 <+58>: imul $0x10,%eax,%eax
0x080485eb <+61>: sub %eax,%esp
0x080485ed <+63>: mov %esp,%eax
0x080485ef <+65>: add $0x3,%eax
0x080485f2 <+68>: shr $0x2,%eax
0x080485f5 <+71>: shl $0x2,%eax
0x080485f8 <+74>: mov %eax,-0x18(%ebp)
0x080485fb <+77>: movl $0x0,-0xc(%ebp)
0x08048602 <+84>: jmp 0x8048621 <vuln+115>
0x08048604 <+86>: sub $0xc,%esp
0x08048607 <+89>: pushl -0xc(%ebp)
0x0804860a <+92>: call 0x8048560 <fib>
0x0804860f <+97>: add $0x10,%esp
0x08048612 <+100>: mov %eax,%ecx
0x08048614 <+102>: mov -0x18(%ebp),%eax
0x08048617 <+105>: mov -0xc(%ebp),%edx
0x0804861a <+108>: mov %ecx,(%eax,%edx,4)
0x0804861d <+111>: addl $0x1,-0xc(%ebp)
0x08048621 <+115>: mov -0xc(%ebp),%eax
0x08048624 <+118>: cmp -0x10(%ebp),%eax
0x08048627 <+121>: jl 0x8048604 <vuln+86>
0x08048629 <+123>: sub $0x8,%esp
0x0804862c <+126>: pushl 0x8(%ebp)
0x0804862f <+129>: lea -0x418(%ebp),%eax
0x08048635 <+135>: push %eax
0x08048636 <+136>: call 0x80483c0 <strcpy#plt>
0x0804863b <+141>: add $0x10,%esp
0x0804863e <+144>: sub $0x8,%esp
0x08048641 <+147>: lea -0x418(%ebp),%eax
0x08048647 <+153>: push %eax
0x08048648 <+154>: push $0x80487b7
0x0804864d <+159>: call 0x80483a0 <printf#plt>
0x08048652 <+164>: add $0x10,%esp
0x08048655 <+167>: movl $0x0,-0xc(%ebp)
0x0804865c <+174>: jmp 0x804867f <vuln+209>
0x0804865e <+176>: mov -0x18(%ebp),%eax
0x08048661 <+179>: mov -0xc(%ebp),%edx
=> 0x08048664 <+182>: mov (%eax,%edx,4),%eax
0x08048667 <+185>: sub $0x4,%esp
0x0804866a <+188>: push %eax
0x0804866b <+189>: pushl -0xc(%ebp)
0x0804866e <+192>: push $0x80487c4
0x08048673 <+197>: call 0x80483a0 <printf#plt>
0x08048678 <+202>: add $0x10,%esp
0x0804867b <+205>: addl $0x1,-0xc(%ebp)
0x0804867f <+209>: cmpl $0x13,-0xc(%ebp)
0x08048683 <+213>: jle 0x804865e <vuln+176>
0x08048685 <+215>: mov %ebx,%esp
0x08048687 <+217>: nop
0x08048688 <+218>: mov -0x4(%ebp),%ebx
0x0804868b <+221>: leave
0x0804868c <+222>: ret
The address of the function which should be called with the return of vuln() is 0x804850b.
How am I supposed to know the amount of fillers until I reach the return address to be overwritten?
I guess the name argument should be in the form "a"*n + "\x0b\x85\x04\x08", where n is some number I am trying to guess. I suppose this should be basic stuff but I am still a beginner so please don't judge me...
How am I supposed to know ...
Your code is:
0x080485ae <+0>: push %ebp
0x080485af <+1>: mov %esp,%ebp
...
0x0804862f <+129>: lea -0x418(%ebp),%eax
0x08048635 <+135>: push %eax
0x08048636 <+136>: call 0x80483c0 <strcpy#plt>
Before you enter the function, the return address is at offset 0(%esp).
After the first push, it's at 4(%esp). Since %esp is next copied to %ebp, it's also at 4(%ebp).
Next you see that the location you start copying into is at -0x418(%ebp).
Conclusion: the delta between &buf[0] and &return_address is 0x418 + 4 == 0x41C.
Alternative solution: fill name input with invalid addresses: 0x01010101, 0x01010102, ... 0x010102FF. Execute the code and observe on which address it crashed.
If my calculations are correct, it would crash when vuln tries to return to "slot" 0x41C / 4 == 0x107, which should contain 0x01010208.

instructions related to parameter passing of variable argument function "printf"

In the program binary, how to determine the instructions related to parameter passing of variable argument function "printf"? For example:
#include <stdio.h>
#include <string.h>
int fun(int a, int b){
return a+b;
}
void main (int argc, char* argv[]){
int a = 0;
int b = 1;
int c = 2;
int d = 3;
printf("a:fun(b,c):d: %d:%d:%d\n", a, fun(b,c), d);
}
is assembled as follows:
(gdb) disas main
Dump of assembler code for function main:
0x080483f1 <+0>: push %ebp
0x080483f2 <+1>: mov %esp,%ebp
0x080483f4 <+3>: and $0xfffffff0,%esp
0x080483f7 <+6>: sub $0x20,%esp
0x080483fa <+9>: movl $0x0,0x10(%esp)
0x08048402 <+17>: movl $0x1,0x14(%esp)
0x0804840a <+25>: movl $0x2,0x18(%esp)
0x08048412 <+33>: movl $0x3,0x1c(%esp)
0x0804841a <+41>: mov 0x18(%esp),%eax
0x0804841e <+45>: mov %eax,0x4(%esp)
0x08048422 <+49>: mov 0x14(%esp),%eax
0x08048426 <+53>: mov %eax,(%esp)
0x08048429 <+56>: call 0x80483e4 <fun>
=> 0x0804842e <+61>: mov $0x8048530,%edx
0x08048433 <+66>: mov 0x1c(%esp),%ecx
0x08048437 <+70>: mov %ecx,0xc(%esp)
0x0804843b <+74>: mov %eax,0x8(%esp)
0x0804843f <+78>: mov 0x10(%esp),%eax
0x08048443 <+82>: mov %eax,0x4(%esp)
=> 0x08048447 <+86>: mov %edx,(%esp)
0x0804844a <+89>: call 0x8048300 <printf#plt>
0x0804844f <+94>: leave
0x08048450 <+95>: ret
Whether the instructions related to parameter passing of variable argument function "print" are instructions between two instructions "0x0804842e <+61>: mov $0x8048530,%edx" and " 0x08048447 <+86>: mov %edx,(%esp)".
I have tested many cases of function "printf". In all my tested cases, the instructions related to parameter passing are between these two instructions.
You can find this out by knowing the C calling convention. That is arguments are pushed onto the stack in reverse order.
`0x0804842e <+61>: mov $0x8048530,%edx //Probably the string literal
0x08048433 <+66>: mov 0x1c(%esp),%ecx //Moving 3 literal into %ecx
0x08048437 <+70>: mov %ecx,0xc(%esp) // moving 3 onto top of the arguments on the stack (%esp is the stack pointer)
0x0804843b <+74>: mov %eax,0x8(%esp) //Moving return value from fun onto next slot in the stack, %eax store the return value from a function.
0x0804843f <+78>: mov 0x10(%esp),%eax //Moving 0 literal into %eax
0x08048443 <+82>: mov %eax,0x4(%esp) //Moving %eax into next slot in the stack
0x08048447 <+86>: mov %edx,(%esp) //moving string literal onto the stack
0x0804844a <+89>: call 0x8048300 `//calling printf

Why it shows puts when I disassemble no matter whether I'm using printf or puts? [duplicate]

This question already has answers here:
Compiler changes printf to puts
(2 answers)
Closed 3 years ago.
I'm pretty new to programming and wanted to ask why I get the same result with different code. I'm actually reading a book and the example in the book is with printf (also in Assembler). In this case it says <printf#plt>. The assembler code in the book differs from mine but C Code is the same. Is my processor just computing different?
(Problem is at call <+34> <puts#plt>)
Code 1:
#include <stdio.h>
int main()
{
int i;
for(i=0; i<10; i++)
{
printf("Hello, world!\n");
}
return 0;
}
Code 2:
#include <stdio.h>
int main()
{
int i;
for(i=0; i<10; i++)
{
puts("Hello, world!\n");
}
return 0;
}
Code 1 disassembled:
Dump of assembler code for function main:
0x080483eb <+0>: lea ecx,[esp+0x4]
0x080483ef <+4>: and esp,0xfffffff0
0x080483f2 <+7>: push DWORD PTR [ecx-0x4]
0x080483f5 <+10>: push ebp
0x080483f6 <+11>: mov ebp,esp
0x080483f8 <+13>: push ecx
=> 0x080483f9 <+14>: sub esp,0x14
0x080483fc <+17>: mov DWORD PTR [ebp-0xc],0x0
0x08048403 <+24>: jmp 0x8048419 <main+46>
0x08048405 <+26>: sub esp,0xc
0x08048408 <+29>: push 0x80484b0
0x0804840d <+34>: call 0x80482c0 <puts#plt>
0x08048412 <+39>: add esp,0x10
0x08048415 <+42>: add DWORD PTR [ebp-0xc],0x1
0x08048419 <+46>: cmp DWORD PTR [ebp-0xc],0x9
0x0804841d <+50>: jle 0x8048405 <main+26>
0x0804841f <+52>: mov eax,0x0
0x08048424 <+57>: mov ecx,DWORD PTR [ebp-0x4]
0x08048427 <+60>: leave
0x08048428 <+61>: lea esp,[ecx-0x4]
0x0804842b <+64>: ret
End of assembler dump.
Code 2 disassembled:
Dump of assembler code for function main:
0x080483eb <+0>: lea ecx,[esp+0x4]
0x080483ef <+4>: and esp,0xfffffff0
0x080483f2 <+7>: push DWORD PTR [ecx-0x4]
0x080483f5 <+10>: push ebp
0x080483f6 <+11>: mov ebp,esp
0x080483f8 <+13>: push ecx
0x080483f9 <+14>: sub esp,0x14
0x080483fc <+17>: mov DWORD PTR [ebp-0xc],0x0
0x08048403 <+24>: jmp 0x8048419 <main+46>
=> 0x08048405 <+26>: sub esp,0xc
0x08048408 <+29>: push 0x80484b0
0x0804840d <+34>: call 0x80482c0 <puts#plt>
0x08048412 <+39>: add esp,0x10
0x08048415 <+42>: add DWORD PTR [ebp-0xc],0x1
0x08048419 <+46>: cmp DWORD PTR [ebp-0xc],0x9
0x0804841d <+50>: jle 0x8048405 <main+26>
0x0804841f <+52>: mov eax,0x0
0x08048424 <+57>: mov ecx,DWORD PTR [ebp-0x4]
0x08048427 <+60>: leave
0x08048428 <+61>: lea esp,[ecx-0x4]
0x0804842b <+64>: ret
End of assembler dump.
The puts function is preferred because it is simpler to in both functionality (no format string decoding) and argument passing.
For instance, System V ABI x86 calling conventions require to set number of XMM (YMM) arguments (printf is variadic) in RAX. puts is easier, as there is only single argument passed with RDI.

Assembly Code to C Code (Add Instruction Issues)

So for my class project I am given a binary and what I have to do is bypass the security authentication. Now, once you change the binary to bypass the authentication, you have to create a C program that will replicate the "same" binary. Now, I have been struggling to understand what this area of the assembly code dump that I obtained does.
0x08048a59 <function8+54>: mov 0x8049e50,%eax
0x08048a5e <function8+59>: add $0x4,%eax
0x08048a61 <function8+62>: mov (%eax),%eax
0x08048a63 <function8+64>: movl $0x8048cd4,0x4(%esp)
I'm not very familiar with assembly, but I got most of it figured out. This is the original/entire assembly dump that I got using GDB.
0x08048a23 <function8+0>: push %ebp
0x08048a24 <function8+1>: mov %esp,%ebp
0x08048a26 <function8+3>: sub $0x28,%esp
0x08048a29 <function8+6>: movl $0xd6a1a,-0x18(%ebp)
0x08048a30 <function8+13>: mov 0x8(%ebp),%eax
0x08048a33 <function8+16>: mov %eax,-0x14(%ebp)
0x08048a36 <function8+19>: mov 0xc(%ebp),%eax
0x08048a39 <function8+22>: mov %eax,-0x10(%ebp)
0x08048a3c <function8+25>: movl $0x0,-0xc(%ebp)
0x08048a43 <function8+32>: mov -0xc(%ebp),%eax
0x08048a46 <function8+35>: mov %eax,%edx
0x08048a48 <function8+37>: sar $0x1f,%edx
0x08048a4b <function8+40>: idivl -0x18(%ebp)
0x08048a4e <function8+43>: imul -0x14(%ebp),%eax
0x08048a52 <function8+47>: imul -0x10(%ebp),%eax
0x08048a56 <function8+51>: mov %eax,-0xc(%ebp)
0x08048a59 <function8+54>: mov 0x8049e50,%eax
0x08048a5e <function8+59>: add $0x4,%eax
0x08048a61 <function8+62>: mov (%eax),%eax
0x08048a63 <function8+64>: movl $0x8048cd4,0x4(%esp)
0x08048a6b <function8+72>: mov %eax,(%esp)
0x08048a6e <function8+75>: call 0x8048434 <strcmp#plt>
0x08048a73 <function8+80>: test %eax,%eax
0x08048a75 <function8+82>: jne 0x8048a8d <function8+106>
0x08048a77 <function8+84>: mov $0x8048cdc,%eax
0x08048a7c <function8+89>: mov -0xc(%ebp),%edx
0x08048a7f <function8+92>: mov %edx,0x4(%esp)
0x08048a83 <function8+96>: mov %eax,(%esp)
0x08048a86 <function8+99>: call 0x8048414 <printf#plt>
0x08048a8b <function8+104>: jmp 0x8048a99 <function8+118>
0x08048a8d <function8+106>: movl $0x8048cfa,(%esp)
0x08048a94 <function8+113>: call 0x8048424 <puts#plt>
0x08048a99 <function8+118>: mov -0xc(%ebp),%eax
0x08048a9c <function8+121>: leave
0x08048a9d <function8+122>: ret
And so far I have managed to convert it to this in C:
int function8(one, two){
int a = 879130;
int b = one;
int c = two;
int d = 0;
d = (d / a * b * c);
if(strcmp(b, (d + 4)) == 0){
printf("You may enter using token %d", d);
}
else{
puts("You may not enter.");
}
return d;
}
int main(){
int one, two = 0;
function8(one, two);
}
I am know that
0x08048a59 <function8+54>: mov 0x8049e50,%eax
and
0x08048a63 <function8+64>: movl $0x8048cd4,0x4(%esp)
are pointing to a particular address and value respectively (correct me if I'm wrong), but don't know if I have to call it or not. If so, how do I call that particular address?
Any help would be appreciated!
Just in case you guys need it, this is my current output when I create a binary using GCC and then run GDB to get the dump:
0x08048434 <function8+0>: push %ebp
0x08048435 <function8+1>: mov %esp,%ebp
0x08048437 <function8+3>: sub $0x28,%esp
0x0804843a <function8+6>: movl $0xd6a1a,-0x18(%ebp)
0x08048441 <function8+13>: mov 0x8(%ebp),%eax
0x08048444 <function8+16>: mov %eax,-0x14(%ebp)
0x08048447 <function8+19>: mov 0xc(%ebp),%eax
0x0804844a <function8+22>: mov %eax,-0x10(%ebp)
0x0804844d <function8+25>: movl $0x0,-0xc(%ebp)
0x08048454 <function8+32>: mov -0xc(%ebp),%eax
0x08048457 <function8+35>: mov %eax,%edx
0x08048459 <function8+37>: sar $0x1f,%edx
0x0804845c <function8+40>: idivl -0x18(%ebp)
0x0804845f <function8+43>: imul -0x14(%ebp),%eax
0x08048463 <function8+47>: imul -0x10(%ebp),%eax
0x08048467 <function8+51>: mov %eax,-0xc(%ebp)
0x0804846a <function8+54>: mov -0xc(%ebp),%eax
0x0804846d <function8+57>: add $0x4,%eax
0x08048470 <function8+60>: mov %eax,0x4(%esp)
0x08048474 <function8+64>: mov -0x14(%ebp),%eax
0x08048477 <function8+67>: mov %eax,(%esp)
0x0804847a <function8+70>: call 0x8048364 <strcmp#plt>
0x0804847f <function8+75>: test %eax,%eax
0x08048481 <function8+77>: jne 0x8048499 <function8+101>
0x08048483 <function8+79>: mov $0x80485a0,%eax
0x08048488 <function8+84>: mov -0xc(%ebp),%edx
0x0804848b <function8+87>: mov %edx,0x4(%esp)
0x0804848f <function8+91>: mov %eax,(%esp)
0x08048492 <function8+94>: call 0x8048344 <printf#plt>
0x08048497 <function8+99>: jmp 0x80484a5 <function8+113>
0x08048499 <function8+101>: movl $0x80485bd,(%esp)
0x080484a0 <function8+108>: call 0x8048354 <puts#plt>
0x080484a5 <function8+113>: mov -0xc(%ebp),%eax
0x080484a8 <function8+116>: leave
0x080484a9 <function8+117>: ret
strcmp compares two strings that are passed in as pointers. The code here :
0x08048a59 <function8+54>: mov 0x8049e50,%eax
0x08048a5e <function8+59>: add $0x4,%eax
0x08048a61 <function8+62>: mov (%eax),%eax
0x08048a63 <function8+64>: movl $0x8048cd4,0x4(%esp)
0x08048a6b <function8+72>: mov %eax,(%esp)
0x08048a6e <function8+75>: call 0x8048434 <strcmp#plt>
is passing two pointers to strcmp, both of which are static/global data (not local, like you have in your C code). One is at 0x8048cd4 (that's the address of the string.. it's probably something in quotes : "example"). The other is a pointer load + 4 that's dereferenced. I'd recommend : (1) look at those addresses to see what's stored in them, and (2) if you're confused by the assembly pointer chasing, try writing simple C programs that call strcmp and seeing the resulting assembly.
good luck.
This is the part who is calling the strcmp as drivingon9 pointed.
0x08048a59 <function8+54>: mov 0x8049e50,%eax
0x08048a5e <function8+59>: add $0x4,%eax
0x08048a61 <function8+62>: mov (%eax),%eax
0x08048a63 <function8+64>: movl $0x8048cd4,0x4(%esp)
0x08048a6b <function8+72>: mov %eax,(%esp)
0x08048a6e <function8+75>: call 0x8048434 <strcmp#plt>
The result value of the strcmp will be stored in the register EAX.
Thats why we have a test eax,eax in the code bellow:
0x0804847f <function8+75>: test %eax,%eax
0x08048481 <function8+77>: jne 0x8048499 <function8+101>
0x08048483 <function8+79>: mov $0x80485a0,%eax
0x08048488 <function8+84>: mov -0xc(%ebp),%edx
The test eax, eax tests if eax register is equal to 0.
i'm not sure what part of the code will let you do what you want, but you can try change the line
0x08048481 <function8+77>: jne 0x8048499 <function8+101>
and change the instruction to a je(JUMP IF EQUAL) or a incondicional jump(JMP)

Why are there 12 bytes for parameters and locals on the stack in C on Ubuntu?

I'm looking at how the memory is laid out on the stack, but I don't understand why it looks like there are 12 bytes worth of space for storing each of the variables. Here is a simple C program that prints out the location of various variables:
#include <stdio.h>
#include <stdlib.h>
int test (long p1, long p2){
int l1 = 9999;
int l2 = 99993333;
printf("%p p1\n", &p1);
printf("%p p2\n", &p2);
printf("%p l1\n", &l1);
printf("%p l1\n", &l2);
}
int main(int argc, const char** argv)
{
register void* stack asm("esp");
int x = 22;
int y = 1000;
printf("%p stack\n", stack);
printf("%p argv\n", &argv);
printf("%p argc\n", &argc);
printf("%p l1\n", &x);
printf("%p l2\n", &y);
test(1, 888);
return 0;
}
When run, here is the output:
~/gc$ ./a.out
0x7fff5496b200 stack
0x7fff5496b200 argv
0x7fff5496b20c argc
0x7fff5496b218 l1
0x7fff5496b21c l2
0x7fff5496b1d8 p1
0x7fff5496b1d0 p2
0x7fff5496b1e8 l1
0x7fff5496b1ec l1
Why is there 12 space between the address of argv and argc, and 12 space between l1 and l2? I expected 8 for the longs and pointers, and for the int parameter of main, I would understand either 4 or 8 characters, but I don't see any reason it should be 12.
Someone mentioned that the assembly code would be useful, so I got that as well:
Dump of assembler code for function main:
0x0000000000400614 <+0>: push %rbp
0x0000000000400615 <+1>: mov %rsp,%rbp
0x0000000000400618 <+4>: sub $0x20,%rsp
0x000000000040061c <+8>: mov %edi,-0x14(%rbp)
0x000000000040061f <+11>: mov %rsi,-0x20(%rbp)
0x0000000000400623 <+15>: movl $0x16,-0x8(%rbp)
0x000000000040062a <+22>: movl $0x3e8,-0x4(%rbp)
0x0000000000400631 <+29>: mov %rsp,%rax
0x0000000000400634 <+32>: mov %rax,%rsi
0x0000000000400637 <+35>: mov $0x40079c,%edi
0x000000000040063c <+40>: mov $0x0,%eax
0x0000000000400641 <+45>: callq 0x400410 <printf#plt>
0x0000000000400646 <+50>: lea -0x20(%rbp),%rax
0x000000000040064a <+54>: mov %rax,%rsi
0x000000000040064d <+57>: mov $0x4007a6,%edi
0x0000000000400652 <+62>: mov $0x0,%eax
0x0000000000400657 <+67>: callq 0x400410 <printf#plt>
0x000000000040065c <+72>: lea -0x14(%rbp),%rax
0x0000000000400660 <+76>: mov %rax,%rsi
0x0000000000400663 <+79>: mov $0x4007af,%edi
0x0000000000400668 <+84>: mov $0x0,%eax
0x000000000040066d <+89>: callq 0x400410 <printf#plt>
0x0000000000400672 <+94>: lea -0x8(%rbp),%rax
0x0000000000400676 <+98>: mov %rax,%rsi
0x0000000000400679 <+101>: mov $0x400780,%edi
0x000000000040067e <+106>: mov $0x0,%eax
0x0000000000400683 <+111>: callq 0x400410 <printf#plt>
0x0000000000400688 <+116>: lea -0x4(%rbp),%rax
0x000000000040068c <+120>: mov %rax,%rsi
0x000000000040068f <+123>: mov $0x400787,%edi
0x0000000000400694 <+128>: mov $0x0,%eax
0x0000000000400699 <+133>: callq 0x400410 <printf#plt>
0x000000000040069e <+138>: mov $0x14d,%ecx
0x00000000004006a3 <+143>: mov $0x1589e,%edx
0x00000000004006a8 <+148>: mov $0x378,%esi
0x00000000004006ad <+153>: mov $0x1,%edi
0x00000000004006b2 <+158>: callq 0x40052c <test>
0x00000000004006b7 <+163>: mov $0x0,%eax
0x00000000004006bc <+168>: leaveq
0x00000000004006bd <+169>: retq
End of assembler dump.
(gdb) disassemble test
Dump of assembler code for function test:
0x000000000040052c <+0>: push %rbp
0x000000000040052d <+1>: mov %rsp,%rbp
0x0000000000400530 <+4>: sub $0x40,%rsp
0x0000000000400534 <+8>: mov %rdi,-0x28(%rbp)
0x0000000000400538 <+12>: mov %rsi,-0x30(%rbp)
0x000000000040053c <+16>: mov %rdx,-0x38(%rbp)
0x0000000000400540 <+20>: mov %rcx,-0x40(%rbp)
0x0000000000400544 <+24>: movl $0x270f,-0x18(%rbp)
0x000000000040054b <+31>: movq $0x5f5c6f5,-0x10(%rbp)
0x0000000000400553 <+39>: movl $0x63,-0x14(%rbp)
0x000000000040055a <+46>: movq $0x371,-0x8(%rbp)
0x0000000000400562 <+54>: lea -0x28(%rbp),%rax
0x0000000000400566 <+58>: mov %rax,%rsi
0x0000000000400569 <+61>: mov $0x400764,%edi
0x000000000040056e <+66>: mov $0x0,%eax
0x0000000000400573 <+71>: callq 0x400410 <printf#plt>
0x0000000000400578 <+76>: lea -0x30(%rbp),%rax
0x000000000040057c <+80>: mov %rax,%rsi
0x000000000040057f <+83>: mov $0x40076b,%edi
0x0000000000400584 <+88>: mov $0x0,%eax
0x0000000000400589 <+93>: callq 0x400410 <printf#plt>
0x000000000040058e <+98>: lea -0x38(%rbp),%rax
0x0000000000400592 <+102>: mov %rax,%rsi
0x0000000000400595 <+105>: mov $0x400772,%edi
0x000000000040059a <+110>: mov $0x0,%eax
0x000000000040059f <+115>: callq 0x400410 <printf#plt>
0x00000000004005a4 <+120>: lea -0x40(%rbp),%rax
0x00000000004005a8 <+124>: mov %rax,%rsi
0x00000000004005ab <+127>: mov $0x400779,%edi
0x00000000004005b0 <+132>: mov $0x0,%eax
0x00000000004005b5 <+137>: callq 0x400410 <printf#plt>
0x00000000004005ba <+142>: lea -0x18(%rbp),%rax
0x00000000004005be <+146>: mov %rax,%rsi
0x00000000004005c1 <+149>: mov $0x400780,%edi
0x00000000004005c6 <+154>: mov $0x0,%eax
0x00000000004005cb <+159>: callq 0x400410 <printf#plt>
0x00000000004005d0 <+164>: lea -0x10(%rbp),%rax
0x00000000004005d4 <+168>: mov %rax,%rsi
0x00000000004005d7 <+171>: mov $0x400787,%edi
0x00000000004005dc <+176>: mov $0x0,%eax
0x00000000004005e1 <+181>: callq 0x400410 <printf#plt>
0x00000000004005e6 <+186>: lea -0x14(%rbp),%rax
0x00000000004005ea <+190>: mov %rax,%rsi
0x00000000004005ed <+193>: mov $0x40078e,%edi
0x00000000004005f2 <+198>: mov $0x0,%eax
0x00000000004005f7 <+203>: callq 0x400410 <printf#plt>
0x00000000004005fc <+208>: lea -0x8(%rbp),%rax
0x0000000000400600 <+212>: mov %rax,%rsi
0x0000000000400603 <+215>: mov $0x400795,%edi
0x0000000000400608 <+220>: mov $0x0,%eax
0x000000000040060d <+225>: callq 0x400410 <printf#plt>
0x0000000000400612 <+230>: leaveq
0x0000000000400613 <+231>: retq
End of assembler dump.
You are using a 64-bit system (based on the size of the pointers printed), which means you're probably using x86-64.
Certain arguments to functions in the x86-64 ABI do not have addresses because they are passed in registers. However, according to the C standard, you are allowed to take their addresses. So, when you write &argc, the compiler reserves space for it on the stack and returns that address.
So it is just another local variable. The compiler is free to put argc anywhere on the stack. This behavior is not mandatory, it's just the way that your compiler works.
As for the reason why 12 happens coincidentally on this particular occasion to be the spacing, remember that the stack grows downwards on x86-64. So if you push argc onto the stack the stack pointer will go down 4 bytes, and if you push argv onto the stack it will first go down another 4 bytes to be aligned correctly, then it will go down 8 bytes after argv is pushed. Of course the compiler is free to do other things, like put argv and argc in arbitrary other locations on the stack.
Demonstration
C code:
void otherfunc(int *ptr);
int func(int value)
{
otherfunc(&value);
return 0;
}
Assembly code:
func:
subq $24, %rsp ; Allocate 24 bytes on the stack
movl %edi, 12(%rsp) ; Store 'value' on the stack
leaq 12(%rsp), %rdi ; Calculate the address of 'value'
call otherfunc ; Call 'otherfunc'
xorl %eax, %eax ; Return value 0
addq $24, %rsp ; Deallocate stack
ret ; Return
Remember that %rsp is the stack pointer, %edi / %rdi is the first parameter to a function, and %eax is the return value of a function.

Resources