Disassembly -- buffer overflow attack (homework) - c

I am working in a buffer overflow attack program for a class assignment. I have provided the C code, as well as the disassembled code, and one of my jobs is to annotate the disassembly code. I don't need anyone to annotate the whole thing, but am I on the right track with my comments? If not, maybe annotate a couple lines to get me on the right track. Thanks!
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
/* Like gets, except that characters are typed as pairs of hex digits.
Nondigit characters are ignored. Stops when encounters newline */
char *getxs(char *dest)
{
int c;
int even = 1; /* Have read even number of digits */
int otherd = 0; /* Other hex digit of pair */
char* sp = dest;
while ((c = getchar()) != EOF && c != '\n')
{
if (isxdigit(c))
{
int val;
if ('0' <= c && c <= '9')
val = c - '0';
else if ('A' <= c && c <= 'F')
val = c - 'A' + 10;
else
val = c - 'a' + 10;
if (even)
{
otherd = val;
even = 0;
}
else
{
*sp++ = otherd * 16 + val;
even = 1;
}
}
}
*sp++ = '\0';
return dest;
}
int getbuf()
{
char buf[12];
getxs(buf);
return 1;
}
void test()
{
int val;
printf("Type hex string: ");
val = getbuf();
printf("getbuf returned 0x%x\n", val);
}
int main()
{
int buf[16];
/* This little hack is an attempt to get the stack to be in a
stable position
*/
int offset = (((int)buf) & 0xFFF);
int* space = (int*) alloca(offset);
*space = 0; /* So that we don't get complaint of unused variable */
test();
return 0;
}
The annotated disassembly is:
buffer.o: file format elf32-i386
disassembly of section .text:
0000000 <getxs>:
0: 55 push %ebp // pushes stack pointer to top
1: 89 e5 mov %esp,%ebp // stack pointer = c
3: 83 ec 28 sub $0x28,%esp // allocates space for c
6: c7 45 e8 01 00 00 00 movl $0x1,-0x18(%ebp) // even = 1
d: c7 45 ec 00 00 00 00 movl $0x0,-0x14(%ebp) // otherd = 0
14: 8b 45 08 mov 0x8(%ebp),%eax // sp = dest
17: 89 45 f0 mov %eax,-0x10(%ebp) // conditional setup
1a: e9 89 00 00 00 jmp a8 <getxs+0xa8>
1f: e8 fc ff ff ff call 20 <getxs+0x20>
24: 8b 00 mov (%eax),%eax
26: 8b 55 e4 mov -0x1c(%ebp),%edx
29: 01 d2 add %edx,%edx
2b: 01 d0 movzwl (%eax),%eax
30: 0f b7 c0 add %edx,%eax
2d: 0f b7 00 movzwl %ax,%eax
33: 25 00 10 00 00 and $0x1000,%eax
38: 85 c0 test %eax,%eax
3a: 74 6c je a8 <getxs+0xa8>
3c: 83 7d e4 2f cmpl $0x2f,-0x1c(%ebp)
40: 7e 11 jle 53 <getxs+0x53>
42: 83 7d e4 39 cmpl $0x39,-0x1c(%ebp)
46: 7f 0b jg 53 <getxs+0x53>
48: 8b 45 e4 mov -0x1c(%ebp),%eax
4b: 83 e8 30 sub $0x30,%eax
4e: 89 45 f4 mov %eax,-0xc(%ebp)
51: eb 20 jmp 73 <getxs+0x73>
53: 83 7d e4 40 cmpl $0x40,-0x1c(%ebp)
57: 7e 11 jle 6a <getxs+0x6a>
59: 83 7d e4 46 cmpl $0x46,-0x1c(%ebp)
5d: 7f 0b jg 6a <getxs+0x6a>
5f: 8b 45 e4 mov -0x1c(%ebp),%eax
62: 83 e8 37 sub $0x37,%eax
65: 89 45 f4 mov %eax,-0xc(%ebp)
68: eb 09 jmp 73 <getxs+0x73>
6a: 8b 45 e4 mov -0x1c(%ebp),%eax
6d: 83 e8 57 sub $0x57,%eax
70: 89 45 f4 mov %eax,-0xc(%ebp)
73: 83 7d e8 00 cmpl $0x0,-0x18(%ebp)
77: 74 0f je 88 <getxs+0x88>
79: 8b 45 f4 mov -0xc(%ebp),%eax
7c: 89 45 ec mov %eax,-0x14(%ebp)
7f: c7 45 e8 00 00 00 00 movl $0x0,-0x18(%ebp)
86: eb 20 jmp a8 <getxs+0xa8>
88: 8b 45 ec mov -0x14(%ebp),%eax
8b: 89 c2 mov %eax,%edx
8d: c1 e2 04 shl $0x4,%edx
90: 8b 45 f4 mov -0xc(%ebp),%eax
93: 8d 04 02 lea (%edx,%eax,1),%eax
96: 89 c2 mov %eax,%edx
98: 8b 45 f0 mov -0x10(%ebp),%eax
9b: 88 10 mov %dl,(%eax)
9d: 83 45 f0 01 addl $0x1,-0x10(%ebp)
a1: c7 45 e8 01 00 00 00 movl $0x1,-0x18(%ebp)
a8: e8 fc ff ff ff call a9 <getxs+0xa9>
ad: 89 45 e4 mov %eax,-0x1c(%ebp)
b0: 83 7d e4 ff cmpl $0xffffffff,-0x1c(%ebp)
b4: 74 0a je c0 <getxs+0xc0>
b6: 83 7d e4 0a cmpl $0xa,-0x1c(%ebp)
ba: 0f 85 5f ff ff ff jne 1f <getxs+0x1f>
c0: 8b 45 f0 mov -0x10(%ebp),%eax
c3: c6 00 00 movb $0x0,(%eax)
c6: 83 45 f0 01 addl $0x1,-0x10(%ebp)
ca: 8b 45 08 mov 0x8(%ebp),%eax
cd: c9 leave
ce: c3 ret
00000cf <getbuf>:
cf: 55 push %ebp // pushes stack pointer to the top
d0: 89 e5 mov %esp,%ebp // stack pointer = buf[12]
d2: 83 ec 28 sub $0x28,%esp // allocates space (40 bits)
d5: 8d 45 ec lea -0x14(%ebp),%eax // rv = stack pointer - 20
d8: 89 04 24 mov %eax,(%esp)
db: e8 fc ff ff ff call dc <getbuf+0xd>
e0: b8 01 00 00 00 mov $0x1,%eax // return 1 -- want to return ef be ad de
e5: c9 leave
e6: c3 ret
00000e7 <test>:
e7: 55 push %ebp
e8: 89 e5 mov %esp,%ebp
ea: 83 ec 28 sub $0x28,%esp
ed: b8 00 00 00 00 mov $0x0,%eax
f2: 89 04 24 mov %eax,(%esp)
f5: e8 fc ff ff ff call f6 <test+0xf>
fa: e8 fc ff ff ff call fb <test+0x14>
ff: 89 45 f4 mov %eax,-0xc(%ebp)
102: b8 13 00 00 00 mov $0x13,%eax
107: 8b 55 f4 mov -0xc(%ebp),%edx
10a: 89 54 24 04 mov %edx,0x4(%esp)
10e: 89 04 24 mov %eax,(%esp)
111: e8 fc ff ff ff call 112 <test+0x2b>
116: c9 leave
117: c3 ret
0000118 <main>:
118: 8d 4c 24 04 lea 0x4(%esp),%ecx
11c: 83 e4 f0 and $0xfffffff0,%esp
11f: ff 71 fc pushl -0x4(%ecx)
122: 55 push %ebp
123: 89 e5 mov %esp,%ebp
125: 51 push %ecx
126: 83 ec 54 sub $0x54,%esp
129: 8d 45 b0 lea -0x50(%ebp),%eax
12c: 25 ff 0f 00 00 and $0xfff,%eax
131: 89 45 f0 mov %eax,-0x10(%ebp)
134: 8b 45 f0 mov -0x10(%ebp),%eax
137: 83 c0 0f add $0xf,%eax
13a: 83 c0 0f add $0xf,%eax
13d: c1 e8 04 shr $0x4,%eax
140: c1 e0 04 shl $0x4,%eax
143: 29 c4 sub %eax,%esp
145: 89 e0 mov %esp,%eax
147: 83 c0 0f add $0xf,%eax
14a: c1 e8 04 shr $0x4,%eax
14d: c1 e0 04 shl $0x4,%eax
150: 89 45 f4 mov %eax,-0xc(%ebp)
153: 8b 45 f4 mov -0xc(%ebp),%eax
156: c7 00 00 00 00 00 movl $0x0,(%eax)
15c: e8 fc ff ff ff call 15d <main+0x45>
161: b8 00 00 00 00 mov $0x0,%eax
166: 8b 4d fc mov -0x4(%ebp),%ecx
169: c9 leave
16a: 8d 61 fc lea -0x4(%ecx),%esp
16d: c3 ret

The annotations should describe the intent of the instruction or block of instructions. It shouldn't just parrot what the instruction does (incorrectly).
In the first line:
0: 55 push %ebp // pushes stack pointer to top
We can see that the instruction pushes the base pointer onto the stack, but the annotation incorrectly states that we're pushing the stack pointer on the stack.
Rather, the sequence of instructions:
0: 55 push %ebp // pushes stack pointer to top
1: 89 e5 mov %esp,%ebp // stack pointer = c
3: 83 ec 28 sub $0x28,%esp // allocates space for c
Is a standard function entry preeamble that establishes the stack frame and allocates 0x28 bytes of local storage. It is useful to document the layout of the stack frame, including the location of the function arguments:
0x08(%ebp): dest
0x04(%ebp): return-address
0x00(%ebp): prev %ebp
-0x04(%ebp): ?
-0x08(%ebp): ?
-0x0c(%ebp): ?
-0x10(%ebp): sp
-0x14(%ebp): otherd
-0x18(%ebp): even
-0x1c(%ebp): ?
-0x20(%ebp): ?
-0x24(%ebp): ?
-0x28(%ebp): ?
In the following:
14: 8b 45 08 mov 0x8(%ebp),%eax // sp = dest
17: 89 45 f0 mov %eax,-0x10(%ebp) // conditional setup
%eax is not really sp, it holds dest temporarily while it is moved from the function argument at 0x8(%ebp) to the local variable sp at -0x10(%ebp). There is no "conditional setup".

Related

Creating a print function in C 32-bit protected mode

I've been trying to develop a small OS and managed to switch into protected mode, in order to write C code instead of assembly, but since this means I can't use interrupt 10h anymore, I have to write chars to the video memory address. So I tried creating a new print function to easily print out whole strings instead of printing each char separately. That's where the problems came in, for some reason, while printing single chars with the printchar function works, this new print function doesn't work, no matter what I try.
Here's my C Code:
void print(char* message, int offset);
void printChar(char character, int offset);
void start() {
printChar('M', 2);
print("Test String", 4);
while (1) {
}
}
void print(char* msg, int offset) {
for (int i = 0; msg[i] != '\0'; i++)
{
printChar(msg[i], (i * 2) + offset);
}
}
void printChar(char character, int offset) {
unsigned char* vidmem = (unsigned char*)0xB8000;
*(vidmem + offset + 1) = character;
*(vidmem + offset + 2) = 0x0f;
}
I then use these commands to convert my code to binary and put it onto the second sector of a floppy disk with sectedit.
gcc -c test.c
objcopy -O binary -j .text test.o test.bin
Also here's the assembly code generated, when using objdump -d test.o
0000000000000000 <start>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 20 sub $0x20,%rsp
8: ba 02 00 00 00 mov $0x2,%edx
d: b9 4d 00 00 00 mov $0x4d,%ecx
12: e8 73 00 00 00 call 8a <printChar>
17: ba 04 00 00 00 mov $0x4,%edx
1c: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 23 <start+0x23>
23: 48 89 c1 mov %rax,%rcx
26: e8 02 00 00 00 call 2d <print>
2b: eb fe jmp 2b <start+0x2b>
000000000000002d <print>:
2d: 55 push %rbp
2e: 48 89 e5 mov %rsp,%rbp
31: 48 83 ec 30 sub $0x30,%rsp
35: 48 89 4d 10 mov %rcx,0x10(%rbp)
39: 89 55 18 mov %edx,0x18(%rbp)
3c: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
43: eb 29 jmp 6e <print+0x41>
45: 8b 45 fc mov -0x4(%rbp),%eax
48: 8d 14 00 lea (%rax,%rax,1),%edx
4b: 8b 45 18 mov 0x18(%rbp),%eax
4e: 01 c2 add %eax,%edx
50: 8b 45 fc mov -0x4(%rbp),%eax
53: 48 63 c8 movslq %eax,%rcx
56: 48 8b 45 10 mov 0x10(%rbp),%rax
5a: 48 01 c8 add %rcx,%rax
5d: 0f b6 00 movzbl (%rax),%eax
60: 0f be c0 movsbl %al,%eax
63: 89 c1 mov %eax,%ecx
65: e8 20 00 00 00 call 8a <printChar>
6a: 83 45 fc 01 addl $0x1,-0x4(%rbp)
6e: 8b 45 fc mov -0x4(%rbp),%eax
71: 48 63 d0 movslq %eax,%rdx
74: 48 8b 45 10 mov 0x10(%rbp),%rax
78: 48 01 d0 add %rdx,%rax
7b: 0f b6 00 movzbl (%rax),%eax
7e: 84 c0 test %al,%al
80: 75 c3 jne 45 <print+0x18>
82: 90 nop
83: 90 nop
84: 48 83 c4 30 add $0x30,%rsp
88: 5d pop %rbp
89: c3 ret
000000000000008a <printChar>:
8a: 55 push %rbp
8b: 48 89 e5 mov %rsp,%rbp
8e: 48 83 ec 10 sub $0x10,%rsp
92: 89 c8 mov %ecx,%eax
94: 89 55 18 mov %edx,0x18(%rbp)
97: 88 45 10 mov %al,0x10(%rbp)
9a: 48 c7 45 f8 00 80 0b movq $0xb8000,-0x8(%rbp)
a1: 00
a2: 8b 45 18 mov 0x18(%rbp),%eax
a5: 48 98 cltq
a7: 48 8d 50 01 lea 0x1(%rax),%rdx
ab: 48 8b 45 f8 mov -0x8(%rbp),%rax
af: 48 01 c2 add %rax,%rdx
b2: 0f b6 45 10 movzbl 0x10(%rbp),%eax
b6: 88 02 mov %al,(%rdx)
b8: 8b 45 18 mov 0x18(%rbp),%eax
bb: 48 98 cltq
bd: 48 8d 50 02 lea 0x2(%rax),%rdx
c1: 48 8b 45 f8 mov -0x8(%rbp),%rax
c5: 48 01 d0 add %rdx,%rax
c8: c6 00 0f movb $0xf,(%rax)
cb: 90 nop
cc: 48 83 c4 10 add $0x10,%rsp
d0: 5d pop %rbp
d1: c3 ret
d2: 90 nop
d3: 90 nop
d4: 90 nop
d5: 90 nop
d6: 90 nop
d7: 90 nop
d8: 90 nop
d9: 90 nop
da: 90 nop
db: 90 nop
dc: 90 nop
dd: 90 nop
de: 90 nop
df: 90 nop
edit: The problem basically lied in me not doing this on a linux distribution, with all the things I'd need to do to do it in Windows not properly set up, huge thanks to MichaelPetch who explained the problems to me, I've now switched to a linux VM and after slightly correcting the code, it works (as the comments pointed out my offset was weird, I used that offset as it worked in the broken setup I had, but normally it shouldn't).

Parse number of bytes reserved for local variables on the stack from GNU objdump output?

Consider the code snippet below.
The entry point of the program is main as defined in C-source code. Now, normally a function starts by decreasing %rsp to reserve space for local variables. But here, the GCC compiler reserves this space in some of the added (initial) functions.
My question is, where do I look for the number of bytes of reserved variables in these GCC-specific initialization functions? In this case, the number of reserved bytes is 0x08.
Also, in what order are these initial functions called?
00000000004003c0 <_start>:
4003c0: 31 ed xor ebp,ebp
4003c2: 49 89 d1 mov r9,rdx
4003c5: 5e pop rsi
4003c6: 48 89 e2 mov rdx,rsp
4003c9: 48 83 e4 f0 and rsp,0xfffffffffffffff0
4003cd: 50 push rax
4003ce: 54 push rsp
4003cf: 49 c7 c0 a0 05 40 00 mov r8,0x4005a0
4003d6: 48 c7 c1 30 05 40 00 mov rcx,0x400530
4003dd: 48 c7 c7 c0 04 40 00 mov rdi,0x4004c0
4003e4: e8 b7 ff ff ff call 4003a0 <__libc_start_main#plt>
4003e9: f4 hlt
4003ea: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
00000000004003f0 <deregister_tm_clones>:
4003f0: b8 37 10 60 00 mov eax,0x601037
4003f5: 55 push rbp
4003f6: 48 2d 30 10 60 00 sub rax,0x601030
4003fc: 48 83 f8 0e cmp rax,0xe
400400: 48 89 e5 mov rbp,rsp
400403: 76 1b jbe 400420 <deregister_tm_clones+0x30>
400405: b8 00 00 00 00 mov eax,0x0
40040a: 48 85 c0 test rax,rax
40040d: 74 11 je 400420 <deregister_tm_clones+0x30>
40040f: 5d pop rbp
400410: bf 30 10 60 00 mov edi,0x601030
400415: ff e0 jmp rax
400417: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0]
40041e: 00 00
400420: 5d pop rbp
400421: c3 ret
400422: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
400426: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40042d: 00 00 00
0000000000400430 <register_tm_clones>:
400430: be 30 10 60 00 mov esi,0x601030
400435: 55 push rbp
400436: 48 81 ee 30 10 60 00 sub rsi,0x601030
40043d: 48 c1 fe 03 sar rsi,0x3
400441: 48 89 e5 mov rbp,rsp
400444: 48 89 f0 mov rax,rsi
400447: 48 c1 e8 3f shr rax,0x3f
40044b: 48 01 c6 add rsi,rax
40044e: 48 d1 fe sar rsi,1
400451: 74 15 je 400468 <register_tm_clones+0x38>
400453: b8 00 00 00 00 mov eax,0x0
400458: 48 85 c0 test rax,rax
40045b: 74 0b je 400468 <register_tm_clones+0x38>
40045d: 5d pop rbp
40045e: bf 30 10 60 00 mov edi,0x601030
400463: ff e0 jmp rax
400465: 0f 1f 00 nop DWORD PTR [rax]
400468: 5d pop rbp
400469: c3 ret
40046a: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
0000000000400470 <__do_global_dtors_aux>:
400470: 80 3d b9 0b 20 00 00 cmp BYTE PTR [rip+0x200bb9],0x0 # 601030 <__TMC_END__>
400477: 75 11 jne 40048a <__do_global_dtors_aux+0x1a>
400479: 55 push rbp
40047a: 48 89 e5 mov rbp,rsp
40047d: e8 6e ff ff ff call 4003f0 <deregister_tm_clones>
400482: 5d pop rbp
400483: c6 05 a6 0b 20 00 01 mov BYTE PTR [rip+0x200ba6],0x1 # 601030 <__TMC_END__>
40048a: f3 c3 repz ret
40048c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
0000000000400490 <frame_dummy>:
400490: bf 20 0e 60 00 mov edi,0x600e20
400495: 48 83 3f 00 cmp QWORD PTR [rdi],0x0
400499: 75 05 jne 4004a0 <frame_dummy+0x10>
40049b: eb 93 jmp 400430 <register_tm_clones>
40049d: 0f 1f 00 nop DWORD PTR [rax]
4004a0: b8 00 00 00 00 mov eax,0x0
4004a5: 48 85 c0 test rax,rax
4004a8: 74 f1 je 40049b <frame_dummy+0xb>
4004aa: 55 push rbp
4004ab: 48 89 e5 mov rbp,rsp
4004ae: ff d0 call rax
4004b0: 5d pop rbp
4004b1: e9 7a ff ff ff jmp 400430 <register_tm_clones>
4004b6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
4004bd: 00 00 00
00000000004004c0 <main>:
4004c0: 55 push rbp
4004c1: 48 89 e5 mov rbp,rsp
4004c4: c7 45 f8 00 00 00 00 mov DWORD PTR [rbp-0x8],0x0
4004cb: c7 45 fc 01 00 00 00 mov DWORD PTR [rbp-0x4],0x1
4004d2: eb 46 jmp 40051a <.cend>
4004d4: 66 66 66 2e 0f 1f 84 data16 data16 nop WORD PTR cs:[rax+rax*1+0x0]
4004db: 00 00 00 00 00
4004e0: ff 05 4e 0b 20 00 inc DWORD PTR [rip+0x200b4e] # 601034 <sum>
4004e6: 50 push rax
4004e7: 53 push rbx
4004e8: 56 push rsi
4004e9: 48 31 c0 xor rax,rax
4004ec: 48 c7 c6 14 05 40 00 mov rsi,0x400514
00000000004004f3 <.cloop>:
4004f3: 48 0f b6 1e movzx rbx,BYTE PTR [rsi]
4004f7: 48 31 d8 xor rax,rbx
4004fa: 48 ff c6 inc rsi
4004fd: 48 81 fe 1a 05 40 00 cmp rsi,0x40051a
400504: 75 ed jne 4004f3 <.cloop>
400506: 48 83 f8 00 cmp rax,0x0
40050a: 74 05 je 400511 <.restore>
40050c: 48 31 c0 xor rax,rax
40050f: ff d0 call rax
0000000000400511 <.restore>:
400511: 5e pop rsi
400512: 5b pop rbx
400513: 58 pop rax
0000000000400514 <.cstart>:
400514: eb 01 jmp 400517 <.end>
0000000000400516 <.cslot>:
400516: ac lods al,BYTE PTR ds:[rsi]
0000000000400517 <.end>:
400517: ff 45 fc inc DWORD PTR [rbp-0x4]
000000000040051a <.cend>:
40051a: 83 7d fc 1e cmp DWORD PTR [rbp-0x4],0x1e
40051e: 7e c0 jle 4004e0 <main+0x20>
400520: 8b 05 0e 0b 20 00 mov eax,DWORD PTR [rip+0x200b0e] # 601034 <sum>
400526: 5d pop rbp
400527: c3 ret
400528: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40052f: 00
0000000000400530 <__libc_csu_init>:
400530: 41 57 push r15
400532: 41 56 push r14
400534: 41 89 ff mov r15d,edi
400537: 41 55 push r13
400539: 41 54 push r12
40053b: 4c 8d 25 ce 08 20 00 lea r12,[rip+0x2008ce] # 600e10 <__frame_dummy_init_array_entry>
400542: 55 push rbp
400543: 48 8d 2d ce 08 20 00 lea rbp,[rip+0x2008ce] # 600e18 <__init_array_end>
40054a: 53 push rbx
40054b: 49 89 f6 mov r14,rsi
40054e: 49 89 d5 mov r13,rdx
400551: 4c 29 e5 sub rbp,r12
400554: 48 83 ec 08 sub rsp,0x8
400558: 48 c1 fd 03 sar rbp,0x3
40055c: e8 0f fe ff ff call 400370 <_init>
400561: 48 85 ed test rbp,rbp
400564: 74 20 je 400586 <__libc_csu_init+0x56>
400566: 31 db xor ebx,ebx
400568: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40056f: 00
400570: 4c 89 ea mov rdx,r13
400573: 4c 89 f6 mov rsi,r14
400576: 44 89 ff mov edi,r15d
400579: 41 ff 14 dc call QWORD PTR [r12+rbx*8]
40057d: 48 83 c3 01 add rbx,0x1
400581: 48 39 eb cmp rbx,rbp
400584: 75 ea jne 400570 <__libc_csu_init+0x40>
400586: 48 83 c4 08 add rsp,0x8
40058a: 5b pop rbx
40058b: 5d pop rbp
40058c: 41 5c pop r12
40058e: 41 5d pop r13
400590: 41 5e pop r14
400592: 41 5f pop r15
400594: c3 ret
400595: 90 nop
400596: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40059d: 00 00 00
00000000004005a0 <__libc_csu_fini>:
4005a0: f3 c3 repz ret
Disassembly of section .fini:
00000000004005a4 <_fini>:
4005a4: 48 83 ec 08 sub rsp,0x8
4005a8: 48 83 c4 08 add rsp,0x8
4005ac: c3

Write is faster than read on x86? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
I observe some very weird performance for read and write access on Intel machine.
I wrote a C program that allocate an array first. The code of the program is at [1] ; You can compile it by running Make. (I don't use any compiling optimization.)
The sequence of the operations of the program is as follows:
allocate a char array;
init each element of array to be 1;
use clflush to flush the whole array from cache;
read each cache line of the array by using tmp = array[i];
(Do simple calculation after reading each cache line)
use clflush to flush the whole array from cache;
write each cache line of the array by using array[i] = tmp;
(Do the same simple calculation after reading each cache line)
I run the program on Intel(R) Xeon(R) CPU E5-1650 v2 # 3.50GHz (Haswell arch.) with turbo boost disabled.
The command I used to run the program is:
sudo ./rw-latency-test-compute 5210 10 1
I got the read latency for the whole array is 6670us, while the write latency for the whole array is 3518us.
The interesting part is
If I don't do any computation after I read/write a cache line, the read latency for the whole array is 2175us, while the write latency for the whole array is 3687us.
So doing computation seems speed up the execution... :-(
Do you have any suggestion/explanation on this weird performance?
The whole assembly code of the program can be found at [2].
The assembly code of the inner loop is as follows:
0000000000400898 <read_array>:
400898: 55 push %rbp
400899: 48 89 e5 mov %rsp,%rbp
40089c: 53 push %rbx
40089d: 48 83 ec 28 sub $0x28,%rsp
4008a1: 48 89 7d d8 mov %rdi,-0x28(%rbp)
4008a5: 48 89 75 d0 mov %rsi,-0x30(%rbp)
4008a9: c7 45 e8 00 00 00 00 movl $0x0,-0x18(%rbp)
4008b0: c7 45 e4 00 00 00 00 movl $0x0,-0x1c(%rbp)
4008b7: eb 58 jmp 400911 <read_array+0x79>
4008b9: b8 00 00 00 00 mov $0x0,%eax
4008be: e8 38 ff ff ff callq 4007fb <sw_barrier>
4008c3: 8b 45 e4 mov -0x1c(%rbp),%eax
4008c6: 48 98 cltq
4008c8: 48 03 45 d8 add -0x28(%rbp),%rax
4008cc: 0f b6 00 movzbl (%rax),%eax
4008cf: 88 45 ef mov %al,-0x11(%rbp)
4008d2: 0f be 45 ef movsbl -0x11(%rbp),%eax
4008d6: 89 c1 mov %eax,%ecx
4008d8: 03 4d e8 add -0x18(%rbp),%ecx
4008db: ba 01 80 00 80 mov $0x80008001,%edx
4008e0: 89 c8 mov %ecx,%eax
4008e2: f7 ea imul %edx
4008e4: 8d 04 0a lea (%rdx,%rcx,1),%eax
4008e7: 89 c2 mov %eax,%edx
4008e9: c1 fa 0f sar $0xf,%edx
4008ec: 89 c8 mov %ecx,%eax
4008ee: c1 f8 1f sar $0x1f,%eax
4008f1: 89 d3 mov %edx,%ebx
4008f3: 29 c3 sub %eax,%ebx
4008f5: 89 d8 mov %ebx,%eax
4008f7: 89 45 e8 mov %eax,-0x18(%rbp)
4008fa: 8b 55 e8 mov -0x18(%rbp),%edx
4008fd: 89 d0 mov %edx,%eax
4008ff: c1 e0 10 shl $0x10,%eax
400902: 29 d0 sub %edx,%eax
400904: 89 ca mov %ecx,%edx
400906: 29 c2 sub %eax,%edx
400908: 89 d0 mov %edx,%eax
40090a: 89 45 e8 mov %eax,-0x18(%rbp)
40090d: 83 45 e4 40 addl $0x40,-0x1c(%rbp)
400911: 8b 45 e4 mov -0x1c(%rbp),%eax
400914: 48 98 cltq
400916: 48 3b 45 d0 cmp -0x30(%rbp),%rax
40091a: 7c 9d jl 4008b9 <read_array+0x21>
40091c: b8 e1 0f 40 00 mov $0x400fe1,%eax
400921: 8b 55 e8 mov -0x18(%rbp),%edx
400924: 89 d6 mov %edx,%esi
400926: 48 89 c7 mov %rax,%rdi
400929: b8 00 00 00 00 mov $0x0,%eax
40092e: e8 3d fd ff ff callq 400670 <printf#plt>
400933: 48 83 c4 28 add $0x28,%rsp
400937: 5b pop %rbx
400938: 5d pop %rbp
400939: c3 retq
000000000040093a <write_array>:
40093a: 55 push %rbp
40093b: 48 89 e5 mov %rsp,%rbp
40093e: 53 push %rbx
40093f: 48 83 ec 28 sub $0x28,%rsp
400943: 48 89 7d d8 mov %rdi,-0x28(%rbp)
400947: 48 89 75 d0 mov %rsi,-0x30(%rbp)
40094b: c6 45 ef 01 movb $0x1,-0x11(%rbp)
40094f: c7 45 e8 00 00 00 00 movl $0x0,-0x18(%rbp)
400956: c7 45 e4 00 00 00 00 movl $0x0,-0x1c(%rbp)
40095d: eb 63 jmp 4009c2 <write_array+0x88>
40095f: b8 00 00 00 00 mov $0x0,%eax
400964: e8 92 fe ff ff callq 4007fb <sw_barrier>
400969: 8b 45 e4 mov -0x1c(%rbp),%eax
40096c: 48 98 cltq
40096e: 48 03 45 d8 add -0x28(%rbp),%rax
400972: 0f b6 55 ef movzbl -0x11(%rbp),%edx
400976: 88 10 mov %dl,(%rax)
400978: 8b 45 e4 mov -0x1c(%rbp),%eax
40097b: 48 98 cltq
40097d: 48 03 45 d8 add -0x28(%rbp),%rax
400981: 0f b6 00 movzbl (%rax),%eax
400984: 0f be c0 movsbl %al,%eax
400987: 89 c1 mov %eax,%ecx
400989: 03 4d e8 add -0x18(%rbp),%ecx
40098c: ba 01 80 00 80 mov $0x80008001,%edx
400991: 89 c8 mov %ecx,%eax
400993: f7 ea imul %edx
400995: 8d 04 0a lea (%rdx,%rcx,1),%eax
400998: 89 c2 mov %eax,%edx
40099a: c1 fa 0f sar $0xf,%edx
40099d: 89 c8 mov %ecx,%eax
40099f: c1 f8 1f sar $0x1f,%eax
4009a2: 89 d3 mov %edx,%ebx
4009a4: 29 c3 sub %eax,%ebx
4009a6: 89 d8 mov %ebx,%eax
4009a8: 89 45 e8 mov %eax,-0x18(%rbp)
4009ab: 8b 55 e8 mov -0x18(%rbp),%edx
4009ae: 89 d0 mov %edx,%eax
4009b0: c1 e0 10 shl $0x10,%eax
4009b3: 29 d0 sub %edx,%eax
4009b5: 89 ca mov %ecx,%edx
4009b7: 29 c2 sub %eax,%edx
4009b9: 89 d0 mov %edx,%eax
4009bb: 89 45 e8 mov %eax,-0x18(%rbp)
4009be: 83 45 e4 40 addl $0x40,-0x1c(%rbp)
4009c2: 8b 45 e4 mov -0x1c(%rbp),%eax
4009c5: 48 98 cltq
4009c7: 48 3b 45 d0 cmp -0x30(%rbp),%rax
4009cb: 7c 92 jl 40095f <write_array+0x25>
4009cd: b8 ee 0f 40 00 mov $0x400fee,%eax
4009d2: 8b 55 e8 mov -0x18(%rbp),%edx
4009d5: 89 d6 mov %edx,%esi
4009d7: 48 89 c7 mov %rax,%rdi
4009da: b8 00 00 00 00 mov $0x0,%eax
4009df: e8 8c fc ff ff callq 400670 <printf#plt>
4009e4: 48 83 c4 28 add $0x28,%rsp
4009e8: 5b pop %rbx
4009e9: 5d pop %rbp
4009ea: c3 retq
[1]https://github.com/PennPanda/rw-latency-test/blob/master/rw-latency-test-compute.c
[2] https://github.com/PennPanda/rw-latency-test/blob/2da88f1cccba40aba155317567199028b28bd250/rw-latency-test-compute.asm
Write is faster than read because if you read from RAM and use the value (that is, you don't just read and discard), the processor has to stall for the read at the point the value is used. However, write proceeds asynchronously and never stalls.

achieve stack smashing with executable file

I try to achieve stack smashing when I have only the executable file .
I use the objdump to get the assembly code for this source code :
#include<stdio.h>
#include<string.h>
void func(char *str) {
char buffer[24];
int *ret;
strcpy(buffer,str);
}
int main(int argc, char **argv) {
int x;
x = 0;
func(argv[1]);
x = 1;
printf("%d\n”,x);
}
at run time ./a,out (value)....I need to insert the (value ) in such away I insert the NOP in stack location and that last part of (value) is the address of my next instruction.
I have 40 byte before reaching the location that contain the return address of the fun() .
08048444 <func>:
8048444: 55 push %ebp
8048445: 89 e5 mov %esp,%ebp
8048447: 83 ec 48 sub $0x48,%esp
804844a: 8b 45 08 mov 0x8(%ebp),%eax
804844d: 89 45 d4 mov %eax,-0x2c(%ebp)
8048450: 65 a1 14 00 00 00 mov %gs:0x14,%eax
8048456: 89 45 f4 mov %eax,-0xc(%ebp)
8048459: 31 c0 xor %eax,%eax
804845b: 8b 45 d4 mov -0x2c(%ebp),%eax
804845e: 89 44 24 04 mov %eax,0x4(%esp)
8048462: 8d 45 dc lea -0x24(%ebp),%eax
8048465: 89 04 24 mov %eax,(%esp)
8048468: e8 eb fe ff ff call 8048358 <strcpy#plt>
804846d: 8b 45 f4 mov -0xc(%ebp),%eax
8048470: 65 33 05 14 00 00 00 xor %gs:0x14,%eax
8048477: 74 05 je 804847e <func+0x3a>
8048479: e8 fa fe ff ff call 8048378 <__stack_chk_fail#plt>
804847e: c9 leave
804847f: c3 ret
08048480 <main>:
8048480: 55 push %ebp
8048481: 89 e5 mov %esp,%ebp
8048483: 83 e4 f0 and $0xfffffff0,%esp
8048486: 83 ec 20 sub $0x20,%esp
8048489: c7 44 24 1c 00 00 00 movl $0x0,0x1c(%esp)
8048490: 00
8048491: 8b 45 0c mov 0xc(%ebp),%eax
8048494: 83 c0 04 add $0x4,%eax
8048497: 8b 00 mov (%eax),%eax
8048499: 89 04 24 mov %eax,(%esp)
804849c: e8 a3 ff ff ff call 8048444 <func>
80484a1: c7 44 24 1c 01 00 00 movl $0x1,0x1c(%esp)
80484a8: 00
80484a9: b8 90 85 04 08 mov $0x8048590,%eax
80484ae: 8b 54 24 1c mov 0x1c(%esp),%edx
80484b2: 89 54 24 04 mov %edx,0x4(%esp)
80484b6: 89 04 24 mov %eax,(%esp)
80484b9: e8 aa fe ff ff call 8048368 <printf#plt>
80484be: b8 00 00 00 00 mov $0x0,%eax
80484c3: c9 leave
80484c4: c3 ret
80484c5: 90 nop
80484c6: 90 nop
problem if I insert 00 its consider as (31) ASCII .How I can insert hex values.
... I hope the Que is clear
objdump -w -Mintel :
08048444 <func>:
8048444: 55 push ebp
8048445: 89 e5 mov ebp,esp
8048447: 83 ec 48 sub esp,0x48
804844a: 8b 45 08 mov eax,DWORD PTR [ebp+0x8]
804844d: 89 45 d4 mov DWORD PTR [ebp-0x2c],eax
8048450: 65 a1 14 00 00 00 mov eax,gs:0x14
8048456: 89 45 f4 mov DWORD PTR [ebp-0xc],eax
8048459: 31 c0 xor eax,eax
804845b: 8b 45 d4 mov eax,DWORD PTR [ebp-0x2c]
804845e: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
8048462: 8d 45 dc lea eax,[ebp-0x24]
8048465: 89 04 24 mov DWORD PTR [esp],eax
8048468: e8 eb fe ff ff call 8048358 <strcpy#plt>
804846d: 8b 45 f4 mov eax,DWORD PTR [ebp-0xc]
8048470: 65 33 05 14 00 00 00 xor eax,DWORD PTR gs:0x14
8048477: 74 05 je 804847e <func+0x3a>
8048479: e8 fa fe ff ff call 8048378 <__stack_chk_fail#plt>
804847e: c9 leave
804847f: c3 ret
08048480 <main>:
8048480: 55 push ebp
8048481: 89 e5 mov ebp,esp
8048483: 83 e4 f0 and esp,0xfffffff0
8048486: 83 ec 20 sub esp,0x20
8048489: c7 44 24 1c 00 00 00 00 mov DWORD PTR [esp+0x1c],0x0
8048491: 8b 45 0c mov eax,DWORD PTR [ebp+0xc]
8048494: 83 c0 04 add eax,0x4
8048497: 8b 00 mov eax,DWORD PTR [eax]
8048499: 89 04 24 mov DWORD PTR [esp],eax
804849c: e8 a3 ff ff ff call 8048444 <func>
80484a1: c7 44 24 1c 01 00 00 00 mov DWORD PTR [esp+0x1c],0x1
80484a9: b8 90 85 04 08 mov eax,0x8048590
80484ae: 8b 54 24 1c mov edx,DWORD PTR [esp+0x1c]
80484b2: 89 54 24 04 mov DWORD PTR [esp+0x4],edx
80484b6: 89 04 24 mov DWORD PTR [esp],eax
80484b9: e8 aa fe ff ff call 8048368 <printf#plt>
80484be: b8 00 00 00 00 mov eax,0x0`
You could use ./a.out $(perl -e "print '\x97';") and replace \x97 by the hex you want to use.
If C, the end of string character is 0x00 (or '\0' if you prefer). So if you make your string exactly 39 characters long, then the 40th character will be the zero - and it will be in exactly the right place. There is no way to copy more than one zero in a C string - unless you use a function other than strcpy (for example, memcpy). But if you are relying on the argv[1] to be the source of your zero, then this is the only way. You could of course subtract something from the string before processing it - if you want, you could do
L = strlen(argv[1]);
for(int ii = 0; ii < L; ii++) if(argv[1][ii] == '0') argv[1][ii] = '\0';
This would turn every '0' into '\0'. But then you can't do a simple strcpy, you would have to do memcpy.
And you have to hope that you don't get a segfault for writing to memory you don't own…

Assembly cmp how to examine the values it compares?

I have the following from an objdump. This was C code compiled by gcc for an IA32.
08048e9a <my_func>:
8048e9a: 55 push %ebp
8048e9b: 89 e5 mov %esp,%ebp
8048e9d: 83 ec 48 sub $0x48,%esp
8048ea0: 89 5d f4 mov %ebx,-0xc(%ebp)
8048ea3: 89 75 f8 mov %esi,-0x8(%ebp)
8048ea6: 89 7d fc mov %edi,-0x4(%ebp)
8048ea9: 8d 5d d0 lea -0x30(%ebp),%ebx
8048eac: 89 5c 24 04 mov %ebx,0x4(%esp)
8048eb0: 8b 45 08 mov 0x8(%ebp),%eax
8048eb3: 89 04 24 mov %eax,(%esp)
8048eb6: e8 52 04 00 00 call 804930d <read_num>
8048ebb: 8d 7d dc lea -0x24(%ebp),%edi
8048ebe: be 00 00 00 00 mov $0x0,%esi
8048ec3: 8b 03 mov (%ebx),%eax
8048ec5: 3b 43 0c cmp 0xc(%ebx),%eax
8048ec8: 74 05 je 8048ecf <my_func+0x35>
8048eca: e8 fc 03 00 00 call 80492cb <other_func>
8048ecf: 03 33 add (%ebx),%esi
I am interested in finding out the values being compared on line 8048ec5 In gdb I can step to this line and I can read %eax just fine from info registers but how can I read 0xc(%ebx)? This means 0xc offset from %ebx or 0xc + %ebx?
It refers to the 32-bit value at the address %ebx + 0xc in memory.

Resources