How to understand the assembly result of the tiny c function? - c

Function in c:
PHPAPI char *php_pcre_replace(char *regex, int regex_len,
char *subject, int subject_len,
zval *replace_val, int is_callable_replace,
int *result_len, int limit, int *replace_count TSRMLS_DC)
{
pcre_cache_entry *pce; /* Compiled regular expression */
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
return NULL;
}
return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
}
Its assembly:
php5ts!php_pcre_replace:
1015db70 8b442408 mov eax,dword ptr [esp+8]
1015db74 8b4c2404 mov ecx,dword ptr [esp+4]
1015db78 56 push esi
1015db79 8b74242c mov esi,dword ptr [esp+2Ch]
1015db7d 56 push esi
1015db7e 50 push eax
1015db7f 51 push ecx
1015db80 e8cbeaffff call php5ts!pcre_get_compiled_regex_cache (1015c650)
1015db85 83c40c add esp,0Ch
1015db88 85c0 test eax,eax
1015db8a 7502 jne php5ts!php_pcre_replace+0x1e (1015db8e)
php5ts!php_pcre_replace+0x1c:
1015db8c 5e pop esi
1015db8d c3 ret
php5ts!php_pcre_replace+0x1e:
1015db8e 8b542428 mov edx,dword ptr [esp+28h]
1015db92 8b4c2424 mov ecx,dword ptr [esp+24h]
1015db96 56 push esi
1015db97 52 push edx
1015db98 8b542428 mov edx,dword ptr [esp+28h]
1015db9c 51 push ecx
1015db9d 8b4c2428 mov ecx,dword ptr [esp+28h]
1015dba1 52 push edx
1015dba2 8b542428 mov edx,dword ptr [esp+28h]
1015dba6 51 push ecx
1015dba7 8b4c2428 mov ecx,dword ptr [esp+28h]
1015dbab 52 push edx
1015dbac 8b542428 mov edx,dword ptr [esp+28h]
1015dbb0 51 push ecx
1015dbb1 52 push edx
1015dbb2 50 push eax
1015dbb3 e808000000 call php5ts!php_pcre_replace_impl (1015dbc0)
1015dbb8 83c424 add esp,24h
1015dbbb 5e pop esi
1015dbbc c3 ret
As we can see that pcre_get_compiled_regex_cache takes 2 parameters,but why 3 parameters are pushed into the stack?
1015db7d 56 push esi
1015db7e 50 push eax
1015db7f 51 push ecx
1015db80 e8cbeaffff call php5ts!pcre_get_compiled_regex_cache (1015c650)

I guess the TSRMLS_DC and TSRMLS_CC macros contain some hidden extra parameters. A quick google showed up these macros in PHP programming as global state data. It makes sense, the macro in the function declaration must have a parameter which is at [esp+02ch] on the stack - the tenth parameter - you have nine already, and is passed as the first value on the stack (values are pushed right to left), followed by regex_len and then regex.

Related

how to draw the stack after analysing the assembly code

pleasse i need someone to assit me with my assighnment.
q1. Examine the code below and Draw the stack frame after analysing the assembly code when function1, function2 and function3 are called by the main program for a 32-bit system. Figure 2 shows a sample stack frame of a function.
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int function1(int x, int y, int z)
{
int result_func1;
result_func1 = x + y + z;
return result_func1;
}
int function2(int x, int y, char* input_string)
{
int result_func2;
char buffer[20];
strcpy(buffer, input_string);
printf("your input string %s is copied in the buffer \n", input_string);
result_func2= x - y;
return result_func2;
}
void function3(int result1, int result2)
{
printf("The result of function 1 is %d\n", result1);
printf("The result of function 1 is %d\n", result1);
}
void function4(void)
{
printf("The function never gets called is \n");
exit(-1);
}
int main(int argc, char* argv[])
{
int result1;
int result2;
result1 = function1(5, 10, 15);
result2 = function2(20, 8, argv[1]);
function3(result1, result1);
}
assembly code of the after the analysis:
0x00005555555552c0 <+0>: endbr64
0x00005555555552c4 <+4>: push %rbp
0x00005555555552c5 <+5>: mov %rsp,%rbp
0x00005555555552c8 <+8>: sub $0x20,%rsp
0x00005555555552cc <+12>: mov %edi,-0x14(%rbp)
0x00005555555552cf <+15>: mov %rsi,-0x20(%rbp)
0x00005555555552d3 <+19>: mov $0xf,%edx
0x00005555555552d8 <+24>: mov $0xa,%esi
0x00005555555552dd <+29>: mov $0x5,%edi
0x00005555555552e2 <+34>: callq 0x5555555551c9 <function1>
0x00005555555552e7 <+39>: mov %eax,-0x8(%rbp)
0x00005555555552ea <+42>: mov -0x20(%rbp),%rax
0x00005555555552ee <+46>: add $0x8,%rax
0x00005555555552f2 <+50>: mov (%rax),%rax
0x00005555555552f5 <+53>: mov %rax,%rdx
0x00005555555552f8 <+56>: mov $0x8,%esi
0x00005555555552fd <+61>: mov $0x14,%edi
0x0000555555555302 <+66>: callq 0x5555555551ef <function2>
0x0000555555555307 <+71>: mov %eax,-0x4(%rbp)
0x000055555555530a <+74>: mov -0x8(%rbp),%edx
0x000055555555530d <+77>: mov -0x8(%rbp),%eax
0x0000555555555310 <+80>: mov %edx,%esi
0x0000555555555312 <+82>: mov %eax,%edi
0x0000555555555314 <+84>: callq 0x555555555261 <function3>
0x0000555555555319 <+89>: mov $0x0,%eax
0x000055555555531e <+94>: leaveq
0x000055555555531f <+95>: retq
modidied(actual 32bit assembly code)
function1:
push ebp
mov ebp, esp
mov eax, DWORD PTR [ebp+12]
add eax, DWORD PTR [ebp+8]
add eax, DWORD PTR [ebp+16]
pop ebp
ret
.LC0:
.string "your input string %s is copied in the buffer \n"
function2:
push ebp
mov ebp, esp
push ebx
lea eax, [ebp-28]
sub esp, 44
mov ebx, DWORD PTR [ebp+16]
push ebx
push eax
call strcpy
pop eax
pop edx
push ebx
push OFFSET FLAT:.LC0
call printf
mov eax, DWORD PTR [ebp+8]
mov ebx, DWORD PTR [ebp-4]
sub eax, DWORD PTR [ebp+12]
leave
ret
.LC1:
.string "The result of function 1 is %d\n"
function3:
push ebp
mov ebp, esp
push ebx
sub esp, 12
mov ebx, DWORD PTR [ebp+8]
push ebx
push OFFSET FLAT:.LC1
call printf
mov DWORD PTR [ebp+12], ebx
add esp, 16
mov ebx, DWORD PTR [ebp-4]
mov DWORD PTR [ebp+8], OFFSET FLAT:.LC1
leave
jmp printf
.LC2:
.string "The function never gets called is "
function4:
push ebp
mov ebp, esp
sub esp, 20
push OFFSET FLAT:.LC2
call puts
mov DWORD PTR [esp], -1
call exit
main:
lea ecx, [esp+4]
and esp, -16
push DWORD PTR [ecx-4]
push ebp
mov ebp, esp
push ecx
sub esp, 8
mov eax, DWORD PTR [ecx+4]
push DWORD PTR [eax+4]
push 8
push 20
call function2
pop edx
pop ecx
push 30
push 30
call function3
mov ecx, DWORD PTR [ebp-4]
xor eax, eax
leave
lea esp, [ecx-4]
ret
please i need someone who can assist me with who to start

Question about address values and pointer increment? [duplicate]

This question already has answers here:
Why a pointer + 1 add 4 actually
(6 answers)
Closed 3 years ago.
Let's say that I have a code like below
#include <stdio.h>
int main(void)
{
char char_array[5] = {'a', 'b', 'c', 'd', 'e'};
int int_array[5] = {1, 2, 3, 4, 5};
int i;
char *char_ptr;
int *int_ptr;
char_ptr = char_array;
int_ptr = int_array;
for(i = 0; i < 5; i++){
printf("[char_ptr]For %p address pointing to %c value\n", char_ptr, *char_ptr);
char_ptr +=1;
}
for(i = 0; i < 5; i++){
printf("[int_ptr]For %p address pointing to %d value\n", int_ptr, *int_ptr);
int_ptr += 1;
}
return 0;
}
Then the output will be
[char_ptr]For 0xbf7fc37f address pointing to a value
[char_ptr]For 0xbf7fc380 address pointing to b value
[char_ptr]For 0xbf7fc381 address pointing to c value
[char_ptr]For 0xbf7fc382 address pointing to d value
[char_ptr]For 0xbf7fc383 address pointing to e value
[int_ptr]For 0xbf7fc368 address pointing to 1 value
[int_ptr]For 0xbf7fc36c address pointing to 2 value
[int_ptr]For 0xbf7fc370 address pointing to 3 value
[int_ptr]For 0xbf7fc374 address pointing to 4 value
[int_ptr]For 0xbf7fc378 address pointing to 5 value
My question here is why is the increasement of the address different in int_ptr even though I only added only 1 to the address? I know that is a int variable which is a size of 4 bytes but I'm wondering how the program added 4 to the address even though I only added one to it.
Thank you
+addition
sorry for the confusion that I made from tagging reverse engineering, I was using gdb to look at some asm codes in order to answer my question.
0x00001199 <+0>: lea ecx,[esp+0x4]
0x0000119d <+4>: and esp,0xfffffff0
0x000011a0 <+7>: push DWORD PTR [ecx-0x4]
0x000011a3 <+10>: push ebp
0x000011a4 <+11>: mov ebp,esp
0x000011a6 <+13>: push ebx
0x000011a7 <+14>: push ecx
0x000011a8 <+15>: sub esp,0x30
0x000011ab <+18>: call 0x10a0 <__x86.get_pc_thunk.bx>
0x000011b0 <+23>: add ebx,0x2e50
0x000011b6 <+29>: mov DWORD PTR [ebp-0x19],0x64636261
0x000011bd <+36>: mov BYTE PTR [ebp-0x15],0x65
0x000011c1 <+40>: mov DWORD PTR [ebp-0x30],0x1
0x000011c8 <+47>: mov DWORD PTR [ebp-0x2c],0x2
0x000011cf <+54>: mov DWORD PTR [ebp-0x28],0x3
0x000011d6 <+61>: mov DWORD PTR [ebp-0x24],0x4
0x000011dd <+68>: mov DWORD PTR [ebp-0x20],0x5
0x000011e4 <+75>: lea eax,[ebp-0x19]
0x000011e7 <+78>: mov DWORD PTR [ebp-0x10],eax
0x000011ea <+81>: lea eax,[ebp-0x30]
0x000011ed <+84>: mov DWORD PTR [ebp-0x14],eax
0x000011f0 <+87>: mov DWORD PTR [ebp-0xc],0x0
0x000011f7 <+94>: jmp 0x1220 <main+135>
0x000011f9 <+96>: mov eax,DWORD PTR [ebp-0x10]
0x000011fc <+99>: movzx eax,BYTE PTR [eax]
0x000011ff <+102>: movsx eax,al
0x00001202 <+105>: sub esp,0x4
0x00001205 <+108>: push eax
0x00001206 <+109>: push DWORD PTR [ebp-0x10]
0x00001209 <+112>: lea eax,[ebx-0x1ff8]
0x0000120f <+118>: push eax
0x00001210 <+119>: call 0x1030 <printf#plt>
0x00001215 <+124>: add esp,0x10
0x00001218 <+127>: add DWORD PTR [ebp-0x10],0x1
0x0000121c <+131>: add DWORD PTR [ebp-0xc],0x1
0x00001220 <+135>: cmp DWORD PTR [ebp-0xc],0x4
0x00001224 <+139>: jle 0x11f9 <main+96>
0x00001226 <+141>: mov DWORD PTR [ebp-0xc],0x0
0x0000122d <+148>: jmp 0x1252 <main+185>
0x0000122f <+150>: mov eax,DWORD PTR [ebp-0x14]
0x00001232 <+153>: mov eax,DWORD PTR [eax]
0x00001234 <+155>: sub esp,0x4
0x00001237 <+158>: push eax
0x00001238 <+159>: push DWORD PTR [ebp-0x14]
0x0000123b <+162>: lea eax,[ebx-0x1fc8]
0x00001241 <+168>: push eax
0x00001242 <+169>: call 0x1030 <printf#plt>
0x00001247 <+174>: add esp,0x10
0x0000124a <+177>: add DWORD PTR [ebp-0x14],0x4
0x0000124e <+181>: add DWORD PTR [ebp-0xc],0x1
0x00001252 <+185>: cmp DWORD PTR [ebp-0xc],0x4
0x00001256 <+189>: jle 0x122f <main+150>
0x00001258 <+191>: mov eax,0x0
0x0000125d <+196>: lea esp,[ebp-0x8]
0x00001260 <+199>: pop ecx
0x00001261 <+200>: pop ebx
0x00001262 <+201>: pop ebp
0x00001263 <+202>: lea esp,[ecx-0x4]
0x00001266 <+205>: ret
Most modern CPUs are byte addressable. That means that each memory address refers to a single byte. Thus, for every char (which occupies 1 byte) in memory, you only need one address.
If you're working with a contiguous array of ints (which take up 4 bytes on most machines), however, your addresses will have to "hop" 4 by 4, because there are 4 bytes that memory needs to address and thus 4 memory addresses are taken.
In your example:
0xbf7fc368 refers to the first byte of the first `int`.
0xbf7fc369 refers to the second byte of the first `int`.
0xbf7fc36a refers to the third byte of the first `int`.
0xbf7fc36b refers to the fourth byte of the first `int`.
0xbf7fc36c refers to the first byte of the second `int`.
... and so on.

Why does gcc use a relative address to the function pointer in assembly? [duplicate]

This question already has answers here:
Why is the address of static variables relative to the Instruction Pointer?
(1 answer)
32-bit absolute addresses no longer allowed in x86-64 Linux?
(1 answer)
Closed 4 years ago.
The C source:
int sum(int a, int b) {
return a + b;
}
int main() {
int (*ptr_sum_1)(int,int) = sum; // assign the address of the "sum"
int (*ptr_sum_2)(int,int) = sum; // to the function pointer
int (*ptr_sum_3)(int,int) = sum;
int a = (*ptr_sum_1)(2,4); // call the "sum" through the pointer
int b = sum(2,4); // call the "sum" by usual way
return 0;
}
The crucial part of the assembly code:
lea rax, sum[rip]
mov QWORD PTR -24[rbp], rax
lea rax, sum[rip]
mov QWORD PTR -16[rbp], rax
lea rax, sum[rip]
mov QWORD PTR -8[rbp], rax
The executing program instructions from GDB:
0x5fa <sum>: push rbp
0x5fb <sum+1>: mov rbp,rsp
0x5fe <sum+4>: mov DWORD PTR [rbp-0x4],edi
0x601 <sum+7>: mov DWORD PTR [rbp-0x8],esi
0x604 <sum+10>: mov edx,DWORD PTR [rbp-0x4]
0x607 <sum+13>: mov eax,DWORD PTR [rbp-0x8]
0x60a <sum+16>: add eax,edx
0x60c <sum+18>: pop rbp
0x60d <sum+19>: ret
0x60e <main>: push rbp
0x60f <main+1>: mov rbp,rsp
0x612 <main+4>: sub rsp,0x20
0x616 <main+8>: lea rax,[rip+0xffffffffffffffdd] # 0x5fa <sum>
0x61d <main+15>: mov QWORD PTR [rbp-0x18],rax
0x621 <main+19>: lea rax,[rip+0xffffffffffffffd2] # 0x5fa <sum>
0x628 <main+26>: mov QWORD PTR [rbp-0x10],rax
0x62c <main+30>: lea rax,[rip+0xffffffffffffffc7] # 0x5fa <sum>
0x633 <main+37>: mov QWORD PTR [rbp-0x8],rax
0x637 <main+41>: mov rax,QWORD PTR [rbp-0x18]
0x63b <main+45>: mov esi,0x4
0x640 <main+50>: mov edi,0x2
0x645 <main+55>: call rax
0x647 <main+57>: mov DWORD PTR [rbp-0x20],eax
0x64a <main+60>: mov esi,0x4
0x64f <main+65>: mov edi,0x2
0x654 <main+70>: call 0x5fa <sum>
0x659 <main+75>: mov DWORD PTR [rbp-0x1c],eax
0x65c <main+78>: mov eax,0x0
0x661 <main+83>: leave
0x662 <main+84>: ret
I think that the sum label is just the starting address of the sum procedure - 0x5fa, so I don't understand why gcc can't use it directly, but uses the calculation sum[rip] for this.
Question:
Why is sum[rip] used in the lea rax, sum[rip] instruction in assembly, instead of the simple sum label, e.g. lea rax, sum?
Will the mov rax, 0x5fa instruction do the same? Because we know the sum address after linking: the call 0x5fa <sum> instruction just uses it directly.
I believe that it might depend on your build of GCC, but on the Linux distributions that I use everything is set up to default to PIC builds. That's Position Independent Code. It's better for both shared libraries and executables, because the result can be mapped into memory anywhere without needing a fixup pass. It's better for security because ASLR can be applied.
With x86-64 there's no significant penalty for using PIC so why wouldn't it be used everywhere?

gdb watchpoint not activated

Consider the code:
#include <stdio.h>
#include <stdlib.h>
int update (int *arr, int size);
#define SIZE 10
int main() { // <---------------------- Breakpoint 1
int x[SIZE];
// Initialize array
for (int c = 0 ; c < SIZE ; c++) {
x[c] = c * 2;
}
// Do some random updates to an array
update((int*) &x, SIZE);
// Print the elements
for (int c = 0 ; c < SIZE ; c++) {
printf("%d\n", x[c]);
}
return EXIT_SUCCESS;
} // <----------------------Breakpoint 2
int update (int *arr, int size) {
for (int i = 0 ; i < size ; i++) {
arr[i] += i;
update(arr+i, size-1);
}
return 1;
}
Result of running info frame at breakpoint 1:
Stack level 0, frame at 0x7ffc176b2610:
rip = 0x56434b0d76b8 in main (array.c:12); saved rip = 0x7f8190fb92b1
source language c.
Arglist at 0x7ffc176b2600, args:
Locals at 0x7ffc176b2600, Previous frame's sp is 0x7ffc176b2610
Saved registers:
rbp at 0x7ffc176b2600, rip at 0x7ffc176b2608
Result of running info frame at breakpoint 2:
Stack level 0, frame at 0x7ffc176b2610:
rip = 0x56434b0d771a in main (array.c:24); saved rip = 0x2d28490fd6501
source language c.
Arglist at 0x7ffc176b2600, args:
Locals at 0x7ffc176b2600, Previous frame's sp is 0x7ffc176b2610
Saved registers:
rbp at 0x7ffc176b2600, rip at 0x7ffc176b2608
We see that main()'s saved return address rip at 0x7ffc176b2608 is mutated from 0x7f8190fb92b1 to 0x2d28490fd6501 between the two breakpoints.
However, setting a watchpoint on the address of rip with watch * 0x7ffc176b2608 and running the executable anew does not pause execution between the breakpoints as expected.
How can this be?
-----------EDIT-----------
Output of disassemble /s main:
Dump of assembler code for function main:
array.c:
8 int main() {
0x000056434b0d76b0 <+0>: push rbp
0x000056434b0d76b1 <+1>: mov rbp,rsp
0x000056434b0d76b4 <+4>: sub rsp,0x30
9 int x[SIZE];
10
11 // Initialize array
12 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76b8 <+8>: mov DWORD PTR [rbp-0x4],0x0
0x000056434b0d76bf <+15>: jmp 0x56434b0d76d4 <main+36>
13 x[c] = c * 2;
0x000056434b0d76c1 <+17>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d76c4 <+20>: lea edx,[rax+rax*1]
0x000056434b0d76c7 <+23>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d76ca <+26>: cdqe
0x000056434b0d76cc <+28>: mov DWORD PTR [rbp+rax*4-0x30],edx
12 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76d0 <+32>: add DWORD PTR [rbp-0x4],0x1
0x000056434b0d76d4 <+36>: cmp DWORD PTR [rbp-0x4],0x9
0x000056434b0d76d8 <+40>: jle 0x56434b0d76c1 <main+17>
14 }
15
16 // Do some random updates to an array
17 update((int*) &x, SIZE);
0x000056434b0d76da <+42>: lea rax,[rbp-0x30]
0x000056434b0d76de <+46>: mov esi,0xa
0x000056434b0d76e3 <+51>: mov rdi,rax
0x000056434b0d76e6 <+54>: call 0x56434b0d7721 <update>
18
19 // Print the elements
20 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d76eb <+59>: mov DWORD PTR [rbp-0x8],0x0
0x000056434b0d76f2 <+66>: jmp 0x56434b0d7714 <main+100>
21 printf("%d\n", x[c]);
0x000056434b0d76f4 <+68>: mov eax,DWORD PTR [rbp-0x8]
0x000056434b0d76f7 <+71>: cdqe
0x000056434b0d76f9 <+73>: mov eax,DWORD PTR [rbp+rax*4-0x30]
0x000056434b0d76fd <+77>: mov esi,eax
0x000056434b0d76ff <+79>: lea rdi,[rip+0x12e] # 0x56434b0d7834
0x000056434b0d7706 <+86>: mov eax,0x0
0x000056434b0d770b <+91>: call 0x56434b0d7560 <printf#plt>
20 for (int c = 0 ; c < SIZE ; c++) {
0x000056434b0d7710 <+96>: add DWORD PTR [rbp-0x8],0x1
0x000056434b0d7714 <+100>: cmp DWORD PTR [rbp-0x8],0x9
0x000056434b0d7718 <+104>: jle 0x56434b0d76f4 <main+68>
22 }
23
24 return EXIT_SUCCESS;
=> 0x000056434b0d771a <+106>: mov eax,0x0
25 }
0x000056434b0d771f <+111>: leave
0x000056434b0d7720 <+112>: ret
End of assembler dump.
Output of disassemble /s update:
Dump of assembler code for function update:
array.c:
27 int update (int *arr, int size) {
0x000056434b0d7721 <+0>: push rbp
0x000056434b0d7722 <+1>: mov rbp,rsp
0x000056434b0d7725 <+4>: sub rsp,0x20
0x000056434b0d7729 <+8>: mov QWORD PTR [rbp-0x18],rdi
0x000056434b0d772d <+12>: mov DWORD PTR [rbp-0x1c],esi
28 for (int i = 0 ; i < size ; i++) {
0x000056434b0d7730 <+15>: mov DWORD PTR [rbp-0x4],0x0
0x000056434b0d7737 <+22>: jmp 0x56434b0d7793 <update+114>
29 arr[i] += i;
0x000056434b0d7739 <+24>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d773c <+27>: cdqe
0x000056434b0d773e <+29>: lea rdx,[rax*4+0x0]
0x000056434b0d7746 <+37>: mov rax,QWORD PTR [rbp-0x18]
0x000056434b0d774a <+41>: add rax,rdx
0x000056434b0d774d <+44>: mov edx,DWORD PTR [rbp-0x4]
0x000056434b0d7750 <+47>: movsxd rdx,edx
0x000056434b0d7753 <+50>: lea rcx,[rdx*4+0x0]
0x000056434b0d775b <+58>: mov rdx,QWORD PTR [rbp-0x18]
0x000056434b0d775f <+62>: add rdx,rcx
0x000056434b0d7762 <+65>: mov ecx,DWORD PTR [rdx]
0x000056434b0d7764 <+67>: mov edx,DWORD PTR [rbp-0x4]
0x000056434b0d7767 <+70>: add edx,ecx
0x000056434b0d7769 <+72>: mov DWORD PTR [rax],edx
30 update(arr+i, size-1);
0x000056434b0d776b <+74>: mov eax,DWORD PTR [rbp-0x1c]
0x000056434b0d776e <+77>: lea edx,[rax-0x1]
0x000056434b0d7771 <+80>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d7774 <+83>: cdqe
0x000056434b0d7776 <+85>: lea rcx,[rax*4+0x0]
0x000056434b0d777e <+93>: mov rax,QWORD PTR [rbp-0x18]
0x000056434b0d7782 <+97>: add rax,rcx
0x000056434b0d7785 <+100>: mov esi,edx
0x000056434b0d7787 <+102>: mov rdi,rax
0x000056434b0d778a <+105>: call 0x56434b0d7721 <update>
28 for (int i = 0 ; i < size ; i++) {
0x000056434b0d778f <+110>: add DWORD PTR [rbp-0x4],0x1
0x000056434b0d7793 <+114>: mov eax,DWORD PTR [rbp-0x4]
0x000056434b0d7796 <+117>: cmp eax,DWORD PTR [rbp-0x1c]
0x000056434b0d7799 <+120>: jl 0x56434b0d7739 <update+24>
31 }
32 return 1;
0x000056434b0d779b <+122>: mov eax,0x1
33 }
0x000056434b0d77a0 <+127>: leave
0x000056434b0d77a1 <+128>: ret
End of assembler dump.
Contents of ~/.gdbinit
# Security
set auto-load safe-path /
# Misc
set disassembly-flavor intel
set disable-randomization off
set pagination off
set follow-fork-mode child
# History
set history filename ~/.gdbhistory
set history save
set history expansion
disp/10i $pc
handle SIGXCPU SIG33 SIG35 SIGPWR nostop noprint
set tui enable
It is likely that this line in your .gdbinit is the source of your troubles:
set disable-randomization off
By default, GDB disables address space layout randomization (ASLR). That means that the binary under GDB starts at exactly the same address, with exactly the same stack pointer every time it runs. This is on by default precisely so you can set a watchpoints and breakpoints on a given address, and have it fire on each run.
By setting disable-randomization off, you are asking GDB to run your binary the same way it would run outside of GDB, i.e. with ASLR enabled. Now the location of stack variables (and globals for a PIE binary that you have) will change from run to run, and setting a watchpoint on a given stack address will only work randomly and rarely.
You can confirm that that is the cause by issuing info frame and run several times. You'll observe that the location where registers are saved changes between runs.
TL;DR: Don't put settings you don't completely understand into your .gdbinit.
the problem with the code is the clobbering of the stack.
The comment by #Mark Plotnick clarifies the problem and includes a suggestion on how to fix it.

Wrong Visual Studio assembly output?

I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...
The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".
Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP

Resources