I have to override a function's return address using buffer oveflow.
The function itself looks like this(I am passing the name argument):
void vuln(char *name)
{
int n = 20;
char buf[1024];
int f[n];
int i;
for (i=0; i<n; i++) {
f[i] = fib(i);
}
strcpy(buf, name);
...
}
I am disassembling it using gdb which gives me the following
0x080485ae <+0>: push %ebp
0x080485af <+1>: mov %esp,%ebp
0x080485b1 <+3>: push %ebx
0x080485b2 <+4>: sub $0x414,%esp
0x080485b8 <+10>: mov %esp,%eax
0x080485ba <+12>: mov %eax,%ebx
0x080485bc <+14>: movl $0x14,-0x10(%ebp)
0x080485c3 <+21>: mov -0x10(%ebp),%eax
0x080485c6 <+24>: lea -0x1(%eax),%edx
0x080485c9 <+27>: mov %edx,-0x14(%ebp)
0x080485cc <+30>: shl $0x2,%eax
0x080485cf <+33>: lea 0x3(%eax),%edx
0x080485d2 <+36>: mov $0x10,%eax
0x080485d7 <+41>: sub $0x1,%eax
0x080485da <+44>: add %edx,%eax
0x080485dc <+46>: mov $0x10,%ecx
0x080485e1 <+51>: mov $0x0,%edx
0x080485e6 <+56>: div %ecx
0x080485e8 <+58>: imul $0x10,%eax,%eax
0x080485eb <+61>: sub %eax,%esp
0x080485ed <+63>: mov %esp,%eax
0x080485ef <+65>: add $0x3,%eax
0x080485f2 <+68>: shr $0x2,%eax
0x080485f5 <+71>: shl $0x2,%eax
0x080485f8 <+74>: mov %eax,-0x18(%ebp)
0x080485fb <+77>: movl $0x0,-0xc(%ebp)
0x08048602 <+84>: jmp 0x8048621 <vuln+115>
0x08048604 <+86>: sub $0xc,%esp
0x08048607 <+89>: pushl -0xc(%ebp)
0x0804860a <+92>: call 0x8048560 <fib>
0x0804860f <+97>: add $0x10,%esp
0x08048612 <+100>: mov %eax,%ecx
0x08048614 <+102>: mov -0x18(%ebp),%eax
0x08048617 <+105>: mov -0xc(%ebp),%edx
0x0804861a <+108>: mov %ecx,(%eax,%edx,4)
0x0804861d <+111>: addl $0x1,-0xc(%ebp)
0x08048621 <+115>: mov -0xc(%ebp),%eax
0x08048624 <+118>: cmp -0x10(%ebp),%eax
0x08048627 <+121>: jl 0x8048604 <vuln+86>
0x08048629 <+123>: sub $0x8,%esp
0x0804862c <+126>: pushl 0x8(%ebp)
0x0804862f <+129>: lea -0x418(%ebp),%eax
0x08048635 <+135>: push %eax
0x08048636 <+136>: call 0x80483c0 <strcpy#plt>
0x0804863b <+141>: add $0x10,%esp
0x0804863e <+144>: sub $0x8,%esp
0x08048641 <+147>: lea -0x418(%ebp),%eax
0x08048647 <+153>: push %eax
0x08048648 <+154>: push $0x80487b7
0x0804864d <+159>: call 0x80483a0 <printf#plt>
0x08048652 <+164>: add $0x10,%esp
0x08048655 <+167>: movl $0x0,-0xc(%ebp)
0x0804865c <+174>: jmp 0x804867f <vuln+209>
0x0804865e <+176>: mov -0x18(%ebp),%eax
0x08048661 <+179>: mov -0xc(%ebp),%edx
=> 0x08048664 <+182>: mov (%eax,%edx,4),%eax
0x08048667 <+185>: sub $0x4,%esp
0x0804866a <+188>: push %eax
0x0804866b <+189>: pushl -0xc(%ebp)
0x0804866e <+192>: push $0x80487c4
0x08048673 <+197>: call 0x80483a0 <printf#plt>
0x08048678 <+202>: add $0x10,%esp
0x0804867b <+205>: addl $0x1,-0xc(%ebp)
0x0804867f <+209>: cmpl $0x13,-0xc(%ebp)
0x08048683 <+213>: jle 0x804865e <vuln+176>
0x08048685 <+215>: mov %ebx,%esp
0x08048687 <+217>: nop
0x08048688 <+218>: mov -0x4(%ebp),%ebx
0x0804868b <+221>: leave
0x0804868c <+222>: ret
The address of the function which should be called with the return of vuln() is 0x804850b.
How am I supposed to know the amount of fillers until I reach the return address to be overwritten?
I guess the name argument should be in the form "a"*n + "\x0b\x85\x04\x08", where n is some number I am trying to guess. I suppose this should be basic stuff but I am still a beginner so please don't judge me...
How am I supposed to know ...
Your code is:
0x080485ae <+0>: push %ebp
0x080485af <+1>: mov %esp,%ebp
...
0x0804862f <+129>: lea -0x418(%ebp),%eax
0x08048635 <+135>: push %eax
0x08048636 <+136>: call 0x80483c0 <strcpy#plt>
Before you enter the function, the return address is at offset 0(%esp).
After the first push, it's at 4(%esp). Since %esp is next copied to %ebp, it's also at 4(%ebp).
Next you see that the location you start copying into is at -0x418(%ebp).
Conclusion: the delta between &buf[0] and &return_address is 0x418 + 4 == 0x41C.
Alternative solution: fill name input with invalid addresses: 0x01010101, 0x01010102, ... 0x010102FF. Execute the code and observe on which address it crashed.
If my calculations are correct, it would crash when vuln tries to return to "slot" 0x41C / 4 == 0x107, which should contain 0x01010208.
Related
I am having trouble this piece of code in assembly language.
Essentially I have to input 2 numbers that matches 2 numbers the code is comparing with.
On line 0x08048c47 in phase_4, it compares the first input with 2, so I know the first input has to be 2. It then moves 4 spaces from the first input to next input, which then gets 2 subtracted from it. Now the (input-2) is compared with 2. It will continue the instruction if the inputs are below than or equal to 2. I've tested this with numbers 2,3,4 which pass the comparison. Other numbers greater than 4 and less than 2 do not pass the comparison and will cause the bomb to explode.
I'm stuck on this part because the value being returned from func4 is not the same was the value represented at 0x08048c6e in phase_4, which is 8(%esp). On my computer when I debug it, it shows that it is 8, and the answers to my inputs 2,3,4 are 40, 60, 80 respectively.
disas func4
0x08048bda <+0>: push %edi
0x08048bdb <+1>: push %esi
0x08048bdc <+2>: push %ebx
0x08048bdd <+3>: mov 0x10(%esp),%ebx
0x08048be1 <+7>: mov 0x14(%esp),%edi
0x08048be5 <+11>: test %ebx,%ebx
0x08048be7 <+13>: jle 0x8048c14 <func4+58>
0x08048be9 <+15>: mov %edi,%eax
0x08048beb <+17>: cmp $0x1,%ebx
0x08048bee <+20>: je 0x8048c19 <func4+63>
0x08048bf0 <+22>: sub $0x8,%esp
0x08048bf3 <+25>: push %edi
0x08048bf4 <+26>: lea -0x1(%ebx),%eax
0x08048bf7 <+29>: push %eax
0x08048bf8 <+30>: call 0x8048bda <func4>
0x08048bfd <+35>: add $0x8,%esp
0x08048c00 <+38>: lea (%edi,%eax,1),%esi
0x08048c03 <+41>: push %edi
0x08048c04 <+42>: sub $0x2,%ebx
0x08048c07 <+45>: push %ebx
0x08048c08 <+46>: call 0x8048bda <func4>
0x08048c0d <+51>: add $0x10,%esp
0x08048c10 <+54>: add %esi,%eax
0x08048c12 <+56>: jmp 0x8048c19 <func4+63>
0x08048c14 <+58>: mov $0x0,%eax
0x08048c19 <+63>: pop %ebx
0x08048c1a <+64>: pop %esi
0x08048c1b <+65>: pop %edi
0x08048c1c <+66>: ret
disas phase_4
0x08048c1d <+0>: sub $0x1c,%esp
0x08048c20 <+3>: mov %gs:0x14,%eax
0x08048c26 <+9>: mov %eax,0xc(%esp)
0x08048c2a <+13>: xor %eax,%eax
0x08048c2c <+15>: lea 0x4(%esp),%eax
0x08048c30 <+19>: push %eax
0x08048c31 <+20>: lea 0xc(%esp),%eax
0x08048c35 <+24>: push %eax
0x08048c36 <+25>: push $0x804a25f
0x08048c3b <+30>: pushl 0x2c(%esp)
0x08048c3f <+34>: call 0x8048810 <__isoc99_sscanf#plt>
0x08048c44 <+39>: add $0x10,%esp
0x08048c47 <+42>: cmp $0x2,%eax
0x08048c4a <+45>: jne 0x8048c58 <phase_4+59>
0x08048c4c <+47>: mov 0x4(%esp),%eax
0x08048c50 <+51>: sub $0x2,%eax
0x08048c53 <+54>: cmp $0x2,%eax
0x08048c56 <+57>: jbe 0x8048c5d <phase_4+64>
0x08048c58 <+59>: call 0x8049123 <explode_bomb>
0x08048c5d <+64>: sub $0x8,%esp
0x08048c60 <+67>: pushl 0xc(%esp)
0x08048c64 <+71>: push $0x6
0x08048c66 <+73>: call 0x8048bda <func4>
0x08048c6b <+78>: add $0x10,%esp
0x08048c6e <+81>: cmp 0x8(%esp),%eax
0x08048c72 <+85>: je 0x8048c79 <phase_4+92>
0x08048c74 <+87>: call 0x8049123 <explode_bomb>
0x08048c79 <+92>: mov 0xc(%esp),%eax
0x08048c7d <+96>: xor %gs:0x14,%eax
0x08048c84 <+103>: je 0x8048c8b <phase_4+110>
0x08048c86 <+105>: call 0x8048790 <__stack_chk_fail#plt>
0x08048c8b <+110>: add $0x1c,%esp
0x08048c8e <+113>: ret
8(%esp) is the first number, under the framework of x86.
enter 40 2 or 60 3 or 80 4 should work.
Equivalent to the following logic
#include <stdio.h>
#include <stdlib.h>
void explode_bomb()
{
printf("explode bomb.\n");
exit(1);
}
unsigned func4(int val, unsigned num)
{
int ret;
if (val <= 0)
return 0;
if (num == 1)
return 1;
ret = func4(val - 1, num);
ret += num;
val -= 2;
ret += func4(val, num);
return ret;
}
void phase_4(const char *input)
{
unsigned num1, num2;
if (sscanf(input, "%u %u", &num1, &num2) != 2)
explode_bomb();
if (num2 - 2 > 2)
explode_bomb();
if (func4(6, num2) != num1)
explode_bomb();
}
int main()
{
phase_4("40 2");
phase_4("60 3");
phase_4("80 4");
printf("success.\n");
return 0;
}
In the program binary, how to determine the instructions related to parameter passing of variable argument function "printf"? For example:
#include <stdio.h>
#include <string.h>
int fun(int a, int b){
return a+b;
}
void main (int argc, char* argv[]){
int a = 0;
int b = 1;
int c = 2;
int d = 3;
printf("a:fun(b,c):d: %d:%d:%d\n", a, fun(b,c), d);
}
is assembled as follows:
(gdb) disas main
Dump of assembler code for function main:
0x080483f1 <+0>: push %ebp
0x080483f2 <+1>: mov %esp,%ebp
0x080483f4 <+3>: and $0xfffffff0,%esp
0x080483f7 <+6>: sub $0x20,%esp
0x080483fa <+9>: movl $0x0,0x10(%esp)
0x08048402 <+17>: movl $0x1,0x14(%esp)
0x0804840a <+25>: movl $0x2,0x18(%esp)
0x08048412 <+33>: movl $0x3,0x1c(%esp)
0x0804841a <+41>: mov 0x18(%esp),%eax
0x0804841e <+45>: mov %eax,0x4(%esp)
0x08048422 <+49>: mov 0x14(%esp),%eax
0x08048426 <+53>: mov %eax,(%esp)
0x08048429 <+56>: call 0x80483e4 <fun>
=> 0x0804842e <+61>: mov $0x8048530,%edx
0x08048433 <+66>: mov 0x1c(%esp),%ecx
0x08048437 <+70>: mov %ecx,0xc(%esp)
0x0804843b <+74>: mov %eax,0x8(%esp)
0x0804843f <+78>: mov 0x10(%esp),%eax
0x08048443 <+82>: mov %eax,0x4(%esp)
=> 0x08048447 <+86>: mov %edx,(%esp)
0x0804844a <+89>: call 0x8048300 <printf#plt>
0x0804844f <+94>: leave
0x08048450 <+95>: ret
Whether the instructions related to parameter passing of variable argument function "print" are instructions between two instructions "0x0804842e <+61>: mov $0x8048530,%edx" and " 0x08048447 <+86>: mov %edx,(%esp)".
I have tested many cases of function "printf". In all my tested cases, the instructions related to parameter passing are between these two instructions.
You can find this out by knowing the C calling convention. That is arguments are pushed onto the stack in reverse order.
`0x0804842e <+61>: mov $0x8048530,%edx //Probably the string literal
0x08048433 <+66>: mov 0x1c(%esp),%ecx //Moving 3 literal into %ecx
0x08048437 <+70>: mov %ecx,0xc(%esp) // moving 3 onto top of the arguments on the stack (%esp is the stack pointer)
0x0804843b <+74>: mov %eax,0x8(%esp) //Moving return value from fun onto next slot in the stack, %eax store the return value from a function.
0x0804843f <+78>: mov 0x10(%esp),%eax //Moving 0 literal into %eax
0x08048443 <+82>: mov %eax,0x4(%esp) //Moving %eax into next slot in the stack
0x08048447 <+86>: mov %edx,(%esp) //moving string literal onto the stack
0x0804844a <+89>: call 0x8048300 `//calling printf
I'm working on the irq_lock() / irq_unlock() implementation for an RTOS and found an issue. We want to absolutely minimize how much time the CPU spends with interrupts locked. Right now, our irq_lock() inline function for x86 uses "memory" clobber:
static ALWAYS_INLINE unsigned int _do_irq_lock(void)
{
unsigned int key;
__asm__ volatile (
"pushfl;\n\t"
"cli;\n\t"
"popl %0;\n\t"
: "=g" (key)
:
: "memory"
);
return key;
}
The problem is that the compiler will still reorder potentially expensive operations into the critical section if they just touch registers and not memory. A specific example is here in our kernel's sleep function:
void k_sleep(int32_t duration)
{
__ASSERT(!_is_in_isr(), "");
__ASSERT(duration != K_FOREVER, "");
K_DEBUG("thread %p for %d ns\n", _current, duration);
/* wait of 0 ns is treated as a 'yield' */
if (duration == 0) {
k_yield();
return;
}
int32_t ticks = _TICK_ALIGN + _ms_to_ticks(duration);
int key = irq_lock();
_remove_thread_from_ready_q(_current);
_add_thread_timeout(_current, NULL, ticks);
_Swap(key);
}
The 'ticks' calculation, which does expensive math, gets reordered inside where we lock interrupts, so we are calling __divdi3 with interrupts locked which is not what we want:
Dump of assembler code for function k_sleep:
0x0010197a <+0>: push %ebp
0x0010197b <+1>: mov %esp,%ebp
0x0010197d <+3>: push %edi
0x0010197e <+4>: push %esi
0x0010197f <+5>: push %ebx
0x00101980 <+6>: mov 0x8(%ebp),%edi
0x00101983 <+9>: test %edi,%edi
0x00101985 <+11>: jne 0x101993 <k_sleep+25>
0x00101987 <+13>: lea -0xc(%ebp),%esp
0x0010198a <+16>: pop %ebx
0x0010198b <+17>: pop %esi
0x0010198c <+18>: pop %edi
0x0010198d <+19>: pop %ebp
0x0010198e <+20>: jmp 0x101944 <k_yield>
0x00101993 <+25>: pushf
0x00101994 <+26>: cli
0x00101995 <+27>: pop %esi
0x00101996 <+28>: pushl 0x104608
0x0010199c <+34>: call 0x101726 <_remove_thread_from_ready_q>
0x001019a1 <+39>: mov $0x64,%eax
0x001019a6 <+44>: imul %edi
0x001019a8 <+46>: mov 0x104608,%ebx
0x001019ae <+52>: add $0x3e7,%eax
0x001019b3 <+57>: adc $0x0,%edx
0x001019b6 <+60>: mov %ebx,0x20(%ebx)
0x001019b9 <+63>: movl $0x0,(%esp)
0x001019c0 <+70>: push $0x3e8
0x001019c5 <+75>: push %edx
0x001019c6 <+76>: push %eax
0x001019c7 <+77>: call 0x1000a0 <__divdi3>
0x001019cc <+82>: add $0x10,%esp
0x001019cf <+85>: inc %eax
0x001019d0 <+86>: mov %eax,0x28(%ebx)
0x001019d3 <+89>: movl $0x0,0x24(%ebx)
0x001019da <+96>: lea 0x18(%ebx),%edx
0x001019dd <+99>: mov $0x10460c,%eax
0x001019e2 <+104>: add $0x28,%ebx
0x001019e5 <+107>: mov $0x10162b,%ecx
0x001019ea <+112>: push %ebx
0x001019eb <+113>: call 0x101667 <sys_dlist_insert_at>
0x001019f0 <+118>: mov %esi,0x8(%ebp)
0x001019f3 <+121>: pop %eax
0x001019f4 <+122>: lea -0xc(%ebp),%esp
0x001019f7 <+125>: pop %ebx
0x001019f8 <+126>: pop %esi
0x001019f9 <+127>: pop %edi
0x001019fa <+128>: pop %ebp
0x001019fb <+129>: jmp 0x100f77 <_Swap>
End of assembler dump.
We discovered that we can get the ordering we want by declaring 'ticks' volatile:
Dump of assembler code for function k_sleep:
0x0010197a <+0>: push %ebp
0x0010197b <+1>: mov %esp,%ebp
0x0010197d <+3>: push %ebx
0x0010197e <+4>: push %edx
0x0010197f <+5>: mov 0x8(%ebp),%edx
0x00101982 <+8>: test %edx,%edx
0x00101984 <+10>: jne 0x10198d <k_sleep+19>
0x00101986 <+12>: call 0x101944 <k_yield>
0x0010198b <+17>: jmp 0x1019f5 <k_sleep+123>
0x0010198d <+19>: mov $0x64,%eax
0x00101992 <+24>: push $0x0
0x00101994 <+26>: imul %edx
0x00101996 <+28>: add $0x3e7,%eax
0x0010199b <+33>: push $0x3e8
0x001019a0 <+38>: adc $0x0,%edx
0x001019a3 <+41>: push %edx
0x001019a4 <+42>: push %eax
0x001019a5 <+43>: call 0x1000a0 <__divdi3>
0x001019aa <+48>: add $0x10,%esp
0x001019ad <+51>: inc %eax
0x001019ae <+52>: mov %eax,-0x8(%ebp)
0x001019b1 <+55>: pushf
0x001019b2 <+56>: cli
0x001019b3 <+57>: pop %ebx
0x001019b4 <+58>: pushl 0x1045e8
0x001019ba <+64>: call 0x101726 <_remove_thread_from_ready_q>
0x001019bf <+69>: mov 0x1045e8,%eax
0x001019c4 <+74>: mov -0x8(%ebp),%edx
0x001019c7 <+77>: movl $0x0,0x24(%eax)
0x001019ce <+84>: mov %edx,0x28(%eax)
0x001019d1 <+87>: mov %eax,0x20(%eax)
0x001019d4 <+90>: lea 0x18(%eax),%edx
0x001019d7 <+93>: add $0x28,%eax
0x001019da <+96>: mov %eax,(%esp)
0x001019dd <+99>: mov $0x10162b,%ecx
0x001019e2 <+104>: mov $0x1045ec,%eax
0x001019e7 <+109>: call 0x101667 <sys_dlist_insert_at>
0x001019ec <+114>: mov %ebx,(%esp)
0x001019ef <+117>: call 0x100f77 <_Swap>
0x001019f4 <+122>: pop %eax
0x001019f5 <+123>: mov -0x4(%ebp),%ebx
0x001019f8 <+126>: leave
0x001019f9 <+127>: ret
End of assembler dump.
However this just fixes it in one spot. We really need a way to modify the irq_lock() implementation such that it does the right thing everywhere, and right now the "memory" clobber is not sufficient.
Since your architecture is x86 anyway, try using __sync_synchronize instead of the memory clobber. It's a full hardware memory barrier supported by x86.
I am a bit over halfway through a Binary Bomb assignment and I am a little stumped. This is homework. Here is the dump of phase 4:
Dump of assembler code for function phase_4:
0x08048cdd <+0>: push %ebp
0x08048cde <+1>: mov %esp,%ebp
0x08048ce0 <+3>: sub $0x28,%esp
0x08048ce3 <+6>: lea -0x10(%ebp),%eax
0x08048ce6 <+9>: mov %eax,0xc(%esp)
0x08048cea <+13>: lea -0xc(%ebp),%eax
0x08048ced <+16>: mov %eax,0x8(%esp)
0x08048cf1 <+20>: movl $0x804a537,0x4(%esp)
0x08048cf9 <+28>: mov 0x8(%ebp),%eax
0x08048cfc <+31>: mov %eax,(%esp)
0x08048cff <+34>: call 0x8048860 <__isoc99_sscanf#plt> <=== calls for "%d %d"
0x08048d04 <+39>: cmp $0x2,%eax
0x08048d07 <+42>: jne 0x8048d14 <phase_4+55>
0x08048d09 <+44>: mov -0x10(%ebp),%eax
0x08048d0c <+47>: sub $0x2,%eax
0x08048d0f <+50>: cmp $0x2,%eax
0x08048d12 <+53>: jbe 0x8048d19 <phase_4+60>
0x08048d14 <+55>: call 0x804921d <explode_bomb>
0x08048d19 <+60>: mov -0x10(%ebp),%eax
0x08048d1c <+63>: mov %eax,0x4(%esp)
0x08048d20 <+67>: movl $0x5,(%esp)
0x08048d27 <+74>: call 0x8048c91 <func4>
0x08048d2c <+79>: cmp -0xc(%ebp),%eax <=== this is where I am stumped
0x08048d2f <+82>: je 0x8048d36 <phase_4+89>
0x08048d31 <+84>: call 0x804921d <explode_bomb>
0x08048d36 <+89>: leave
0x08048d37 <+90>: ret
End of assembler dump.
and this is the dump of func4
0x08048c91 <+0>: push %ebp
0x08048c92 <+1>: mov %esp,%ebp
0x08048c94 <+3>: push %edi
0x08048c95 <+4>: push %esi
0x08048c96 <+5>: push %ebx
0x08048c97 <+6>: sub $0x1c,%esp
0x08048c9a <+9>: mov 0x8(%ebp),%ebx
0x08048c9d <+12>: mov 0xc(%ebp),%esi
0x08048ca0 <+15>: test %ebx,%ebx
0x08048ca2 <+17>: jle 0x8048cd0 <func4+63>
0x08048ca4 <+19>: mov %esi,%eax
0x08048ca6 <+21>: cmp $0x1,%ebx
0x08048ca9 <+24>: je 0x8048cd5 <func4+68>
0x08048cab <+26>: mov %esi,0x4(%esp)
0x08048caf <+30>: lea -0x1(%ebx),%eax
0x08048cb2 <+33>: mov %eax,(%esp)
0x08048cb5 <+36>: call 0x8048c91 <func4>
0x08048cba <+41>: lea (%eax,%esi,1),%edi
0x08048cbd <+44>: mov %esi,0x4(%esp)
0x08048cc1 <+48>: sub $0x2,%ebx
0x08048cc4 <+51>: mov %ebx,(%esp)
0x08048cc7 <+54>: call 0x8048c91 <func4>
0x08048ccc <+59>: add %edi,%eax
0x08048cce <+61>: jmp 0x8048cd5 <func4+68>
0x08048cd0 <+63>: mov $0x0,%eax
0x08048cd5 <+68>: add $0x1c,%esp
0x08048cd8 <+71>: pop %ebx
0x08048cd9 <+72>: pop %esi
0x08048cda <+73>: pop %edi
0x08048cdb <+74>: pop %ebp
0x08048cdc <+75>: ret
I am getting through to +79 with the guesses: "2 3" but I am lost at +79. How do I access the value at -0xc(%ebp) to find what is being comapred to %eax? I know I need to set the guesses such that func4 returns a value equal to that. Any help is appreciated.
See comment above. "That's quite simple, it's the first number you entered. Since scanf got that address passed as argument. See line +13."
With 3 as the second number, func4 output 36, so entering guesses, "36 3" defused the bomb.
I'm looking at how the memory is laid out on the stack, but I don't understand why it looks like there are 12 bytes worth of space for storing each of the variables. Here is a simple C program that prints out the location of various variables:
#include <stdio.h>
#include <stdlib.h>
int test (long p1, long p2){
int l1 = 9999;
int l2 = 99993333;
printf("%p p1\n", &p1);
printf("%p p2\n", &p2);
printf("%p l1\n", &l1);
printf("%p l1\n", &l2);
}
int main(int argc, const char** argv)
{
register void* stack asm("esp");
int x = 22;
int y = 1000;
printf("%p stack\n", stack);
printf("%p argv\n", &argv);
printf("%p argc\n", &argc);
printf("%p l1\n", &x);
printf("%p l2\n", &y);
test(1, 888);
return 0;
}
When run, here is the output:
~/gc$ ./a.out
0x7fff5496b200 stack
0x7fff5496b200 argv
0x7fff5496b20c argc
0x7fff5496b218 l1
0x7fff5496b21c l2
0x7fff5496b1d8 p1
0x7fff5496b1d0 p2
0x7fff5496b1e8 l1
0x7fff5496b1ec l1
Why is there 12 space between the address of argv and argc, and 12 space between l1 and l2? I expected 8 for the longs and pointers, and for the int parameter of main, I would understand either 4 or 8 characters, but I don't see any reason it should be 12.
Someone mentioned that the assembly code would be useful, so I got that as well:
Dump of assembler code for function main:
0x0000000000400614 <+0>: push %rbp
0x0000000000400615 <+1>: mov %rsp,%rbp
0x0000000000400618 <+4>: sub $0x20,%rsp
0x000000000040061c <+8>: mov %edi,-0x14(%rbp)
0x000000000040061f <+11>: mov %rsi,-0x20(%rbp)
0x0000000000400623 <+15>: movl $0x16,-0x8(%rbp)
0x000000000040062a <+22>: movl $0x3e8,-0x4(%rbp)
0x0000000000400631 <+29>: mov %rsp,%rax
0x0000000000400634 <+32>: mov %rax,%rsi
0x0000000000400637 <+35>: mov $0x40079c,%edi
0x000000000040063c <+40>: mov $0x0,%eax
0x0000000000400641 <+45>: callq 0x400410 <printf#plt>
0x0000000000400646 <+50>: lea -0x20(%rbp),%rax
0x000000000040064a <+54>: mov %rax,%rsi
0x000000000040064d <+57>: mov $0x4007a6,%edi
0x0000000000400652 <+62>: mov $0x0,%eax
0x0000000000400657 <+67>: callq 0x400410 <printf#plt>
0x000000000040065c <+72>: lea -0x14(%rbp),%rax
0x0000000000400660 <+76>: mov %rax,%rsi
0x0000000000400663 <+79>: mov $0x4007af,%edi
0x0000000000400668 <+84>: mov $0x0,%eax
0x000000000040066d <+89>: callq 0x400410 <printf#plt>
0x0000000000400672 <+94>: lea -0x8(%rbp),%rax
0x0000000000400676 <+98>: mov %rax,%rsi
0x0000000000400679 <+101>: mov $0x400780,%edi
0x000000000040067e <+106>: mov $0x0,%eax
0x0000000000400683 <+111>: callq 0x400410 <printf#plt>
0x0000000000400688 <+116>: lea -0x4(%rbp),%rax
0x000000000040068c <+120>: mov %rax,%rsi
0x000000000040068f <+123>: mov $0x400787,%edi
0x0000000000400694 <+128>: mov $0x0,%eax
0x0000000000400699 <+133>: callq 0x400410 <printf#plt>
0x000000000040069e <+138>: mov $0x14d,%ecx
0x00000000004006a3 <+143>: mov $0x1589e,%edx
0x00000000004006a8 <+148>: mov $0x378,%esi
0x00000000004006ad <+153>: mov $0x1,%edi
0x00000000004006b2 <+158>: callq 0x40052c <test>
0x00000000004006b7 <+163>: mov $0x0,%eax
0x00000000004006bc <+168>: leaveq
0x00000000004006bd <+169>: retq
End of assembler dump.
(gdb) disassemble test
Dump of assembler code for function test:
0x000000000040052c <+0>: push %rbp
0x000000000040052d <+1>: mov %rsp,%rbp
0x0000000000400530 <+4>: sub $0x40,%rsp
0x0000000000400534 <+8>: mov %rdi,-0x28(%rbp)
0x0000000000400538 <+12>: mov %rsi,-0x30(%rbp)
0x000000000040053c <+16>: mov %rdx,-0x38(%rbp)
0x0000000000400540 <+20>: mov %rcx,-0x40(%rbp)
0x0000000000400544 <+24>: movl $0x270f,-0x18(%rbp)
0x000000000040054b <+31>: movq $0x5f5c6f5,-0x10(%rbp)
0x0000000000400553 <+39>: movl $0x63,-0x14(%rbp)
0x000000000040055a <+46>: movq $0x371,-0x8(%rbp)
0x0000000000400562 <+54>: lea -0x28(%rbp),%rax
0x0000000000400566 <+58>: mov %rax,%rsi
0x0000000000400569 <+61>: mov $0x400764,%edi
0x000000000040056e <+66>: mov $0x0,%eax
0x0000000000400573 <+71>: callq 0x400410 <printf#plt>
0x0000000000400578 <+76>: lea -0x30(%rbp),%rax
0x000000000040057c <+80>: mov %rax,%rsi
0x000000000040057f <+83>: mov $0x40076b,%edi
0x0000000000400584 <+88>: mov $0x0,%eax
0x0000000000400589 <+93>: callq 0x400410 <printf#plt>
0x000000000040058e <+98>: lea -0x38(%rbp),%rax
0x0000000000400592 <+102>: mov %rax,%rsi
0x0000000000400595 <+105>: mov $0x400772,%edi
0x000000000040059a <+110>: mov $0x0,%eax
0x000000000040059f <+115>: callq 0x400410 <printf#plt>
0x00000000004005a4 <+120>: lea -0x40(%rbp),%rax
0x00000000004005a8 <+124>: mov %rax,%rsi
0x00000000004005ab <+127>: mov $0x400779,%edi
0x00000000004005b0 <+132>: mov $0x0,%eax
0x00000000004005b5 <+137>: callq 0x400410 <printf#plt>
0x00000000004005ba <+142>: lea -0x18(%rbp),%rax
0x00000000004005be <+146>: mov %rax,%rsi
0x00000000004005c1 <+149>: mov $0x400780,%edi
0x00000000004005c6 <+154>: mov $0x0,%eax
0x00000000004005cb <+159>: callq 0x400410 <printf#plt>
0x00000000004005d0 <+164>: lea -0x10(%rbp),%rax
0x00000000004005d4 <+168>: mov %rax,%rsi
0x00000000004005d7 <+171>: mov $0x400787,%edi
0x00000000004005dc <+176>: mov $0x0,%eax
0x00000000004005e1 <+181>: callq 0x400410 <printf#plt>
0x00000000004005e6 <+186>: lea -0x14(%rbp),%rax
0x00000000004005ea <+190>: mov %rax,%rsi
0x00000000004005ed <+193>: mov $0x40078e,%edi
0x00000000004005f2 <+198>: mov $0x0,%eax
0x00000000004005f7 <+203>: callq 0x400410 <printf#plt>
0x00000000004005fc <+208>: lea -0x8(%rbp),%rax
0x0000000000400600 <+212>: mov %rax,%rsi
0x0000000000400603 <+215>: mov $0x400795,%edi
0x0000000000400608 <+220>: mov $0x0,%eax
0x000000000040060d <+225>: callq 0x400410 <printf#plt>
0x0000000000400612 <+230>: leaveq
0x0000000000400613 <+231>: retq
End of assembler dump.
You are using a 64-bit system (based on the size of the pointers printed), which means you're probably using x86-64.
Certain arguments to functions in the x86-64 ABI do not have addresses because they are passed in registers. However, according to the C standard, you are allowed to take their addresses. So, when you write &argc, the compiler reserves space for it on the stack and returns that address.
So it is just another local variable. The compiler is free to put argc anywhere on the stack. This behavior is not mandatory, it's just the way that your compiler works.
As for the reason why 12 happens coincidentally on this particular occasion to be the spacing, remember that the stack grows downwards on x86-64. So if you push argc onto the stack the stack pointer will go down 4 bytes, and if you push argv onto the stack it will first go down another 4 bytes to be aligned correctly, then it will go down 8 bytes after argv is pushed. Of course the compiler is free to do other things, like put argv and argc in arbitrary other locations on the stack.
Demonstration
C code:
void otherfunc(int *ptr);
int func(int value)
{
otherfunc(&value);
return 0;
}
Assembly code:
func:
subq $24, %rsp ; Allocate 24 bytes on the stack
movl %edi, 12(%rsp) ; Store 'value' on the stack
leaq 12(%rsp), %rdi ; Calculate the address of 'value'
call otherfunc ; Call 'otherfunc'
xorl %eax, %eax ; Return value 0
addq $24, %rsp ; Deallocate stack
ret ; Return
Remember that %rsp is the stack pointer, %edi / %rdi is the first parameter to a function, and %eax is the return value of a function.