Question about address values and pointer increment? [duplicate] - c

This question already has answers here:
Why a pointer + 1 add 4 actually
(6 answers)
Closed 3 years ago.
Let's say that I have a code like below
#include <stdio.h>
int main(void)
{
char char_array[5] = {'a', 'b', 'c', 'd', 'e'};
int int_array[5] = {1, 2, 3, 4, 5};
int i;
char *char_ptr;
int *int_ptr;
char_ptr = char_array;
int_ptr = int_array;
for(i = 0; i < 5; i++){
printf("[char_ptr]For %p address pointing to %c value\n", char_ptr, *char_ptr);
char_ptr +=1;
}
for(i = 0; i < 5; i++){
printf("[int_ptr]For %p address pointing to %d value\n", int_ptr, *int_ptr);
int_ptr += 1;
}
return 0;
}
Then the output will be
[char_ptr]For 0xbf7fc37f address pointing to a value
[char_ptr]For 0xbf7fc380 address pointing to b value
[char_ptr]For 0xbf7fc381 address pointing to c value
[char_ptr]For 0xbf7fc382 address pointing to d value
[char_ptr]For 0xbf7fc383 address pointing to e value
[int_ptr]For 0xbf7fc368 address pointing to 1 value
[int_ptr]For 0xbf7fc36c address pointing to 2 value
[int_ptr]For 0xbf7fc370 address pointing to 3 value
[int_ptr]For 0xbf7fc374 address pointing to 4 value
[int_ptr]For 0xbf7fc378 address pointing to 5 value
My question here is why is the increasement of the address different in int_ptr even though I only added only 1 to the address? I know that is a int variable which is a size of 4 bytes but I'm wondering how the program added 4 to the address even though I only added one to it.
Thank you
+addition
sorry for the confusion that I made from tagging reverse engineering, I was using gdb to look at some asm codes in order to answer my question.
0x00001199 <+0>: lea ecx,[esp+0x4]
0x0000119d <+4>: and esp,0xfffffff0
0x000011a0 <+7>: push DWORD PTR [ecx-0x4]
0x000011a3 <+10>: push ebp
0x000011a4 <+11>: mov ebp,esp
0x000011a6 <+13>: push ebx
0x000011a7 <+14>: push ecx
0x000011a8 <+15>: sub esp,0x30
0x000011ab <+18>: call 0x10a0 <__x86.get_pc_thunk.bx>
0x000011b0 <+23>: add ebx,0x2e50
0x000011b6 <+29>: mov DWORD PTR [ebp-0x19],0x64636261
0x000011bd <+36>: mov BYTE PTR [ebp-0x15],0x65
0x000011c1 <+40>: mov DWORD PTR [ebp-0x30],0x1
0x000011c8 <+47>: mov DWORD PTR [ebp-0x2c],0x2
0x000011cf <+54>: mov DWORD PTR [ebp-0x28],0x3
0x000011d6 <+61>: mov DWORD PTR [ebp-0x24],0x4
0x000011dd <+68>: mov DWORD PTR [ebp-0x20],0x5
0x000011e4 <+75>: lea eax,[ebp-0x19]
0x000011e7 <+78>: mov DWORD PTR [ebp-0x10],eax
0x000011ea <+81>: lea eax,[ebp-0x30]
0x000011ed <+84>: mov DWORD PTR [ebp-0x14],eax
0x000011f0 <+87>: mov DWORD PTR [ebp-0xc],0x0
0x000011f7 <+94>: jmp 0x1220 <main+135>
0x000011f9 <+96>: mov eax,DWORD PTR [ebp-0x10]
0x000011fc <+99>: movzx eax,BYTE PTR [eax]
0x000011ff <+102>: movsx eax,al
0x00001202 <+105>: sub esp,0x4
0x00001205 <+108>: push eax
0x00001206 <+109>: push DWORD PTR [ebp-0x10]
0x00001209 <+112>: lea eax,[ebx-0x1ff8]
0x0000120f <+118>: push eax
0x00001210 <+119>: call 0x1030 <printf#plt>
0x00001215 <+124>: add esp,0x10
0x00001218 <+127>: add DWORD PTR [ebp-0x10],0x1
0x0000121c <+131>: add DWORD PTR [ebp-0xc],0x1
0x00001220 <+135>: cmp DWORD PTR [ebp-0xc],0x4
0x00001224 <+139>: jle 0x11f9 <main+96>
0x00001226 <+141>: mov DWORD PTR [ebp-0xc],0x0
0x0000122d <+148>: jmp 0x1252 <main+185>
0x0000122f <+150>: mov eax,DWORD PTR [ebp-0x14]
0x00001232 <+153>: mov eax,DWORD PTR [eax]
0x00001234 <+155>: sub esp,0x4
0x00001237 <+158>: push eax
0x00001238 <+159>: push DWORD PTR [ebp-0x14]
0x0000123b <+162>: lea eax,[ebx-0x1fc8]
0x00001241 <+168>: push eax
0x00001242 <+169>: call 0x1030 <printf#plt>
0x00001247 <+174>: add esp,0x10
0x0000124a <+177>: add DWORD PTR [ebp-0x14],0x4
0x0000124e <+181>: add DWORD PTR [ebp-0xc],0x1
0x00001252 <+185>: cmp DWORD PTR [ebp-0xc],0x4
0x00001256 <+189>: jle 0x122f <main+150>
0x00001258 <+191>: mov eax,0x0
0x0000125d <+196>: lea esp,[ebp-0x8]
0x00001260 <+199>: pop ecx
0x00001261 <+200>: pop ebx
0x00001262 <+201>: pop ebp
0x00001263 <+202>: lea esp,[ecx-0x4]
0x00001266 <+205>: ret

Most modern CPUs are byte addressable. That means that each memory address refers to a single byte. Thus, for every char (which occupies 1 byte) in memory, you only need one address.
If you're working with a contiguous array of ints (which take up 4 bytes on most machines), however, your addresses will have to "hop" 4 by 4, because there are 4 bytes that memory needs to address and thus 4 memory addresses are taken.
In your example:
0xbf7fc368 refers to the first byte of the first `int`.
0xbf7fc369 refers to the second byte of the first `int`.
0xbf7fc36a refers to the third byte of the first `int`.
0xbf7fc36b refers to the fourth byte of the first `int`.
0xbf7fc36c refers to the first byte of the second `int`.
... and so on.

Related

how to draw the stack after analysing the assembly code

pleasse i need someone to assit me with my assighnment.
q1. Examine the code below and Draw the stack frame after analysing the assembly code when function1, function2 and function3 are called by the main program for a 32-bit system. Figure 2 shows a sample stack frame of a function.
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int function1(int x, int y, int z)
{
int result_func1;
result_func1 = x + y + z;
return result_func1;
}
int function2(int x, int y, char* input_string)
{
int result_func2;
char buffer[20];
strcpy(buffer, input_string);
printf("your input string %s is copied in the buffer \n", input_string);
result_func2= x - y;
return result_func2;
}
void function3(int result1, int result2)
{
printf("The result of function 1 is %d\n", result1);
printf("The result of function 1 is %d\n", result1);
}
void function4(void)
{
printf("The function never gets called is \n");
exit(-1);
}
int main(int argc, char* argv[])
{
int result1;
int result2;
result1 = function1(5, 10, 15);
result2 = function2(20, 8, argv[1]);
function3(result1, result1);
}
assembly code of the after the analysis:
0x00005555555552c0 <+0>: endbr64
0x00005555555552c4 <+4>: push %rbp
0x00005555555552c5 <+5>: mov %rsp,%rbp
0x00005555555552c8 <+8>: sub $0x20,%rsp
0x00005555555552cc <+12>: mov %edi,-0x14(%rbp)
0x00005555555552cf <+15>: mov %rsi,-0x20(%rbp)
0x00005555555552d3 <+19>: mov $0xf,%edx
0x00005555555552d8 <+24>: mov $0xa,%esi
0x00005555555552dd <+29>: mov $0x5,%edi
0x00005555555552e2 <+34>: callq 0x5555555551c9 <function1>
0x00005555555552e7 <+39>: mov %eax,-0x8(%rbp)
0x00005555555552ea <+42>: mov -0x20(%rbp),%rax
0x00005555555552ee <+46>: add $0x8,%rax
0x00005555555552f2 <+50>: mov (%rax),%rax
0x00005555555552f5 <+53>: mov %rax,%rdx
0x00005555555552f8 <+56>: mov $0x8,%esi
0x00005555555552fd <+61>: mov $0x14,%edi
0x0000555555555302 <+66>: callq 0x5555555551ef <function2>
0x0000555555555307 <+71>: mov %eax,-0x4(%rbp)
0x000055555555530a <+74>: mov -0x8(%rbp),%edx
0x000055555555530d <+77>: mov -0x8(%rbp),%eax
0x0000555555555310 <+80>: mov %edx,%esi
0x0000555555555312 <+82>: mov %eax,%edi
0x0000555555555314 <+84>: callq 0x555555555261 <function3>
0x0000555555555319 <+89>: mov $0x0,%eax
0x000055555555531e <+94>: leaveq
0x000055555555531f <+95>: retq
modidied(actual 32bit assembly code)
function1:
push ebp
mov ebp, esp
mov eax, DWORD PTR [ebp+12]
add eax, DWORD PTR [ebp+8]
add eax, DWORD PTR [ebp+16]
pop ebp
ret
.LC0:
.string "your input string %s is copied in the buffer \n"
function2:
push ebp
mov ebp, esp
push ebx
lea eax, [ebp-28]
sub esp, 44
mov ebx, DWORD PTR [ebp+16]
push ebx
push eax
call strcpy
pop eax
pop edx
push ebx
push OFFSET FLAT:.LC0
call printf
mov eax, DWORD PTR [ebp+8]
mov ebx, DWORD PTR [ebp-4]
sub eax, DWORD PTR [ebp+12]
leave
ret
.LC1:
.string "The result of function 1 is %d\n"
function3:
push ebp
mov ebp, esp
push ebx
sub esp, 12
mov ebx, DWORD PTR [ebp+8]
push ebx
push OFFSET FLAT:.LC1
call printf
mov DWORD PTR [ebp+12], ebx
add esp, 16
mov ebx, DWORD PTR [ebp-4]
mov DWORD PTR [ebp+8], OFFSET FLAT:.LC1
leave
jmp printf
.LC2:
.string "The function never gets called is "
function4:
push ebp
mov ebp, esp
sub esp, 20
push OFFSET FLAT:.LC2
call puts
mov DWORD PTR [esp], -1
call exit
main:
lea ecx, [esp+4]
and esp, -16
push DWORD PTR [ecx-4]
push ebp
mov ebp, esp
push ecx
sub esp, 8
mov eax, DWORD PTR [ecx+4]
push DWORD PTR [eax+4]
push 8
push 20
call function2
pop edx
pop ecx
push 30
push 30
call function3
mov ecx, DWORD PTR [ebp-4]
xor eax, eax
leave
lea esp, [ecx-4]
ret
please i need someone who can assist me with who to start

gdb watchpoint not activated

Consider the code:
#include <stdio.h>
#include <stdlib.h>
int update (int *arr, int size);
#define SIZE 10
int main() { // <---------------------- Breakpoint 1
int x[SIZE];
// Initialize array
for (int c = 0 ; c < SIZE ; c++) {
x[c] = c * 2;
}
// Do some random updates to an array
update((int*) &x, SIZE);
// Print the elements
for (int c = 0 ; c < SIZE ; c++) {
printf("%d\n", x[c]);
}
return EXIT_SUCCESS;
} // <----------------------Breakpoint 2
int update (int *arr, int size) {
for (int i = 0 ; i < size ; i++) {
arr[i] += i;
update(arr+i, size-1);
}
return 1;
}
Result of running info frame at breakpoint 1:
Stack level 0, frame at 0x7ffc176b2610:
rip = 0x56434b0d76b8 in main (array.c:12); saved rip = 0x7f8190fb92b1
source language c.
Arglist at 0x7ffc176b2600, args:
Locals at 0x7ffc176b2600, Previous frame's sp is 0x7ffc176b2610
Saved registers:
rbp at 0x7ffc176b2600, rip at 0x7ffc176b2608
Result of running info frame at breakpoint 2:
Stack level 0, frame at 0x7ffc176b2610:
rip = 0x56434b0d771a in main (array.c:24); saved rip = 0x2d28490fd6501
source language c.
Arglist at 0x7ffc176b2600, args:
Locals at 0x7ffc176b2600, Previous frame's sp is 0x7ffc176b2610
Saved registers:
rbp at 0x7ffc176b2600, rip at 0x7ffc176b2608
We see that main()'s saved return address rip at 0x7ffc176b2608 is mutated from 0x7f8190fb92b1 to 0x2d28490fd6501 between the two breakpoints.
However, setting a watchpoint on the address of rip with watch * 0x7ffc176b2608 and running the executable anew does not pause execution between the breakpoints as expected.
How can this be?
-----------EDIT-----------
Output of disassemble /s main:
Dump of assembler code for function main:
array.c:
8 int main() {
0x000056434b0d76b0 <+0>: push rbp
0x000056434b0d76b1 <+1>: mov rbp,rsp
0x000056434b0d76b4 <+4>: sub rsp,0x30
9 int x[SIZE];
10
11 // Initialize array
12 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76b8 <+8>: mov DWORD PTR [rbp-0x4],0x0
0x000056434b0d76bf <+15>: jmp 0x56434b0d76d4 <main+36>
13 x[c] = c * 2;
0x000056434b0d76c1 <+17>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d76c4 <+20>: lea edx,[rax+rax*1]
0x000056434b0d76c7 <+23>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d76ca <+26>: cdqe
0x000056434b0d76cc <+28>: mov DWORD PTR [rbp+rax*4-0x30],edx
12 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76d0 <+32>: add DWORD PTR [rbp-0x4],0x1
0x000056434b0d76d4 <+36>: cmp DWORD PTR [rbp-0x4],0x9
0x000056434b0d76d8 <+40>: jle 0x56434b0d76c1 <main+17>
14 }
15
16 // Do some random updates to an array
17 update((int*) &x, SIZE);
0x000056434b0d76da <+42>: lea rax,[rbp-0x30]
0x000056434b0d76de <+46>: mov esi,0xa
0x000056434b0d76e3 <+51>: mov rdi,rax
0x000056434b0d76e6 <+54>: call 0x56434b0d7721 <update>
18
19 // Print the elements
20 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76eb <+59>: mov DWORD PTR [rbp-0x8],0x0
0x000056434b0d76f2 <+66>: jmp 0x56434b0d7714 <main+100>
21 printf("%d\n", x[c]);
0x000056434b0d76f4 <+68>: mov eax,DWORD PTR [rbp-0x8]
0x000056434b0d76f7 <+71>: cdqe
0x000056434b0d76f9 <+73>: mov eax,DWORD PTR [rbp+rax*4-0x30]
0x000056434b0d76fd <+77>: mov esi,eax
0x000056434b0d76ff <+79>: lea rdi,[rip+0x12e] # 0x56434b0d7834
0x000056434b0d7706 <+86>: mov eax,0x0
0x000056434b0d770b <+91>: call 0x56434b0d7560 <printf#plt>
20 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d7710 <+96>: add DWORD PTR [rbp-0x8],0x1
0x000056434b0d7714 <+100>: cmp DWORD PTR [rbp-0x8],0x9
0x000056434b0d7718 <+104>: jle 0x56434b0d76f4 <main+68>
22 }
23
24 return EXIT_SUCCESS;
=> 0x000056434b0d771a <+106>: mov eax,0x0
25 }
0x000056434b0d771f <+111>: leave
0x000056434b0d7720 <+112>: ret
End of assembler dump.
Output of disassemble /s update:
Dump of assembler code for function update:
array.c:
27 int update (int *arr, int size) {
0x000056434b0d7721 <+0>: push rbp
0x000056434b0d7722 <+1>: mov rbp,rsp
0x000056434b0d7725 <+4>: sub rsp,0x20
0x000056434b0d7729 <+8>: mov QWORD PTR [rbp-0x18],rdi
0x000056434b0d772d <+12>: mov DWORD PTR [rbp-0x1c],esi
28 for (int i = 0 ; i < size ; i++) {
0x000056434b0d7730 <+15>: mov DWORD PTR [rbp-0x4],0x0
0x000056434b0d7737 <+22>: jmp 0x56434b0d7793 <update+114>
29 arr[i] += i;
0x000056434b0d7739 <+24>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d773c <+27>: cdqe
0x000056434b0d773e <+29>: lea rdx,[rax*4+0x0]
0x000056434b0d7746 <+37>: mov rax,QWORD PTR [rbp-0x18]
0x000056434b0d774a <+41>: add rax,rdx
0x000056434b0d774d <+44>: mov edx,DWORD PTR [rbp-0x4]
0x000056434b0d7750 <+47>: movsxd rdx,edx
0x000056434b0d7753 <+50>: lea rcx,[rdx*4+0x0]
0x000056434b0d775b <+58>: mov rdx,QWORD PTR [rbp-0x18]
0x000056434b0d775f <+62>: add rdx,rcx
0x000056434b0d7762 <+65>: mov ecx,DWORD PTR [rdx]
0x000056434b0d7764 <+67>: mov edx,DWORD PTR [rbp-0x4]
0x000056434b0d7767 <+70>: add edx,ecx
0x000056434b0d7769 <+72>: mov DWORD PTR [rax],edx
30 update(arr+i, size-1);
0x000056434b0d776b <+74>: mov eax,DWORD PTR [rbp-0x1c]
0x000056434b0d776e <+77>: lea edx,[rax-0x1]
0x000056434b0d7771 <+80>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d7774 <+83>: cdqe
0x000056434b0d7776 <+85>: lea rcx,[rax*4+0x0]
0x000056434b0d777e <+93>: mov rax,QWORD PTR [rbp-0x18]
0x000056434b0d7782 <+97>: add rax,rcx
0x000056434b0d7785 <+100>: mov esi,edx
0x000056434b0d7787 <+102>: mov rdi,rax
0x000056434b0d778a <+105>: call 0x56434b0d7721 <update>
28 for (int i = 0 ; i < size ; i++) {
0x000056434b0d778f <+110>: add DWORD PTR [rbp-0x4],0x1
0x000056434b0d7793 <+114>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d7796 <+117>: cmp eax,DWORD PTR [rbp-0x1c]
0x000056434b0d7799 <+120>: jl 0x56434b0d7739 <update+24>
31 }
32 return 1;
0x000056434b0d779b <+122>: mov eax,0x1
33 }
0x000056434b0d77a0 <+127>: leave
0x000056434b0d77a1 <+128>: ret
End of assembler dump.
Contents of ~/.gdbinit
# Security
set auto-load safe-path /
# Misc
set disassembly-flavor intel
set disable-randomization off
set pagination off
set follow-fork-mode child
# History
set history filename ~/.gdbhistory
set history save
set history expansion
disp/10i $pc
handle SIGXCPU SIG33 SIG35 SIGPWR nostop noprint
set tui enable
It is likely that this line in your .gdbinit is the source of your troubles:
set disable-randomization off
By default, GDB disables address space layout randomization (ASLR). That means that the binary under GDB starts at exactly the same address, with exactly the same stack pointer every time it runs. This is on by default precisely so you can set a watchpoints and breakpoints on a given address, and have it fire on each run.
By setting disable-randomization off, you are asking GDB to run your binary the same way it would run outside of GDB, i.e. with ASLR enabled. Now the location of stack variables (and globals for a PIE binary that you have) will change from run to run, and setting a watchpoint on a given stack address will only work randomly and rarely.
You can confirm that that is the cause by issuing info frame and run several times. You'll observe that the location where registers are saved changes between runs.
TL;DR: Don't put settings you don't completely understand into your .gdbinit.
the problem with the code is the clobbering of the stack.
The comment by #Mark Plotnick clarifies the problem and includes a suggestion on how to fix it.

Wrong Visual Studio assembly output?

I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...
The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".
Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP

Why does function's argument get written to main stack frame and not function stack frame?

So I have this piece of C code disassembled in gdb:
0x08048474 <main+0>: push ebp
0x08048475 <main+1>: mov ebp,esp
0x08048477 <main+3>: sub esp,0x8
0x0804847a <main+6>: and esp,0xfffffff0
0x0804847d <main+9>: mov eax,0x0
0x08048482 <main+14>: sub esp,eax
0x08048484 <main+16>: cmp DWORD PTR [ebp+8],0x1
0x08048488 <main+20>: jg 0x80484ab <main+55>
0x0804848a <main+22>: mov eax,DWORD PTR [ebp+12]
0x0804848d <main+25>: mov eax,DWORD PTR [eax]
0x0804848f <main+27>: mov DWORD PTR [esp+4],eax
0x08048493 <main+31>: mov DWORD PTR [esp],0x80485e5
0x0804849a <main+38>: call 0x804831c <printf#plt>
0x0804849f <main+43>: mov DWORD PTR [esp],0x0
0x080484a6 <main+50>: call 0x804833c <exit#plt>
0x080484ab <main+55>: mov eax,DWORD PTR [ebp+12]
0x080484ae <main+58>: add eax,0x4
0x080484b1 <main+61>: mov eax,DWORD PTR [eax]
0x080484b3 <main+63>: mov DWORD PTR [esp],eax
0x080484b6 <main+66>: call 0x8048414 <check_authentication>
0x080484bb <main+71>: test eax,eax
0x080484bd <main+73>: je 0x80484e5 <main+113>
0x080484bf <main+75>: mov DWORD PTR [esp],0x80485fb
0x080484c6 <main+82>: call 0x804831c <printf#plt>
0x080484cb <main+87>: mov DWORD PTR [esp],0x8048619
0x080484d2 <main+94>: call 0x804831c <printf#plt>
0x080484d7 <main+99>: mov DWORD PTR [esp],0x8048630
0x080484de <main+106>: call 0x804831c <printf#plt>
0x080484e3 <main+111>: jmp 0x80484f1 <main+125>
0x080484e5 <main+113>: mov DWORD PTR [esp],0x804864d
0x080484ec <main+120>: call 0x804831c <printf#plt>
0x080484f1 <main+125>: leave
0x080484f2 <main+126>: ret
End of assembler dump.
My main question revolves around these lines:
0x080484b3 <main+63>: mov DWORD PTR [esp],eax
0x080484b6 <main+66>: call 0x8048414 <check_authentication>
When I step through esp = 0xbffff7e0 at this point in time. When I step into the check_authentication function esp = 0xbffff7a0. These lines are writing the address for a (char *) that is the argument for check_authentication but they are writing it at 0xbffff7e0 and not within the stack frame of 0xbffff7a0 - 0xbffff7e0. The only rationale I can think of is that stacks are allocated padding when they are created and because this is probably padding the compiler is doing this to save space? Does anyone know if that's the case? Why doesn't it write the address at esp-4 which is actually inside the function's stack frame?
EDIT: Adding memory outputs to help with my poor explanation
Inside main():
(gdb) x/4xw $esp
0xbffff7e0: 0xb8000ce0 0x08048510 0xbffff848 0xb7eafebc
Inside of check_authentication():
(gdb) x/32xw $esp
0xbffff7a0: 0x00000000 0x08049744 0xbffff7b8 0x080482d9
0xbffff7b0: 0xb7f9f729 0xb7fd6ff4 0xbffff7e8 0x00000000
0xbffff7c0: 0xb7fd6ff4 0xbffff880 0xbffff7e8 0xb7fd6ff4
0xbffff7d0: 0xb7ff47b0 0x08048510 0xbffff7e8 0x080484bb
0xbffff7e0: 0xbffff9b7 0x08048510 0xbffff848 0xb7eafebc
In main 0xbffff7e0 = 0xe00c00b8
In check_authentication 0xbffff7e0 = 0xb7f9ffbf
The change occurs right before jumping into check_authentication but the change is meant for check_authentication so why is it at 0xbffff7e0 which is where mains() esp points to and not within check_function's stack frame. I'm racking my brain trying to figure this out.
There are no "allocated stack frames in x86". Stack is a continuous memory area, with SP pointing to the "top of stack" (its lowest address).
The assembly is not storing at some random address, but in [ESP], so it is within the function's stack frame - between the EBP and the current ESP inclusive.
PUSH opcode is defined not unlike the following C statement:
*--SP = value; // decrease pointer first, then store
whereas POP is
value = *SP++; // take the value pointed to, then increase pointer
Now in this line just before CALL check_authentication the compiler, instead of generating a PUSH, POP pair, just does the equivalent of
*SP = value;

How to understand the assembly result of the tiny c function?

Function in c:
PHPAPI char *php_pcre_replace(char *regex, int regex_len,
char *subject, int subject_len,
zval *replace_val, int is_callable_replace,
int *result_len, int limit, int *replace_count TSRMLS_DC)
{
pcre_cache_entry *pce; /* Compiled regular expression */
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
return NULL;
}
return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
}
Its assembly:
php5ts!php_pcre_replace:
1015db70 8b442408 mov eax,dword ptr [esp+8]
1015db74 8b4c2404 mov ecx,dword ptr [esp+4]
1015db78 56 push esi
1015db79 8b74242c mov esi,dword ptr [esp+2Ch]
1015db7d 56 push esi
1015db7e 50 push eax
1015db7f 51 push ecx
1015db80 e8cbeaffff call php5ts!pcre_get_compiled_regex_cache (1015c650)
1015db85 83c40c add esp,0Ch
1015db88 85c0 test eax,eax
1015db8a 7502 jne php5ts!php_pcre_replace+0x1e (1015db8e)
php5ts!php_pcre_replace+0x1c:
1015db8c 5e pop esi
1015db8d c3 ret
php5ts!php_pcre_replace+0x1e:
1015db8e 8b542428 mov edx,dword ptr [esp+28h]
1015db92 8b4c2424 mov ecx,dword ptr [esp+24h]
1015db96 56 push esi
1015db97 52 push edx
1015db98 8b542428 mov edx,dword ptr [esp+28h]
1015db9c 51 push ecx
1015db9d 8b4c2428 mov ecx,dword ptr [esp+28h]
1015dba1 52 push edx
1015dba2 8b542428 mov edx,dword ptr [esp+28h]
1015dba6 51 push ecx
1015dba7 8b4c2428 mov ecx,dword ptr [esp+28h]
1015dbab 52 push edx
1015dbac 8b542428 mov edx,dword ptr [esp+28h]
1015dbb0 51 push ecx
1015dbb1 52 push edx
1015dbb2 50 push eax
1015dbb3 e808000000 call php5ts!php_pcre_replace_impl (1015dbc0)
1015dbb8 83c424 add esp,24h
1015dbbb 5e pop esi
1015dbbc c3 ret
As we can see that pcre_get_compiled_regex_cache takes 2 parameters,but why 3 parameters are pushed into the stack?
1015db7d 56 push esi
1015db7e 50 push eax
1015db7f 51 push ecx
1015db80 e8cbeaffff call php5ts!pcre_get_compiled_regex_cache (1015c650)
I guess the TSRMLS_DC and TSRMLS_CC macros contain some hidden extra parameters. A quick google showed up these macros in PHP programming as global state data. It makes sense, the macro in the function declaration must have a parameter which is at [esp+02ch] on the stack - the tenth parameter - you have nine already, and is passed as the first value on the stack (values are pushed right to left), followed by regex_len and then regex.

Resources