Grabbing 128bit key from shellcode - c

I'm trying to take out a 128bit key from shellcode. I have compiled shellcode as a C code within an array which is like
#include <stdio.h>
#include <stdlib.h>
/*shellcode.c*/
char code[] = "\x31\xC0\x50\x68\x75\x70\x25\x75\x68\x23\x78\x27\x78\x68\x25\x74\x72\x20\x68\x79\x24\x73\x77\x68\x71\x72\x77\x76\x68\x20\x25\x22\x70\x68\x23\x78\x75\x27\x68\x75\x20\x23\x75\x68\x76\x72\x79\x79\x68\x20\x70\x72\x73\x68\x71\x71\x24\x25\x68\x79\x27\x76\x77\x68\x24\x77\x71\x72\x68\x27\x79\x70\x70\x68\x74\x24\x24\x75\x68\x79\x73\x23\x23\x68\x74\x22\x75\x79\x68\x23\x74\x70\x27\x68\x20\x74\x24\x79\x68\x74\x77\x24\x78\x68\x25\x27\x70\x75\x68\x74\x77\x74\x78\x68\x23\x23\x71\x76\x68\x77\x70\x73\x71\x68\x27\x20\x77\x24\x68\x22\x72\x78\x75\x68\x25\x72\x79\x77\x68\x23\x75\x79\x76\x68\x72\x71\x72\x24\x68\x71\x23\x23\x79\x68\x79\x23\x79\x70\x68\x20\x20\x76\x77\x54\x5E\x8B\xFE\x8B\xD7\xFC\xB9\x80\x00\x00\x00\xBB\x41\x00\x00\x00\x31\xC0\x50\xAC\x33\xC3\xAA\xE2\xFA\x54\x5E\xCC";
int main(int argc, char **argv)
{
int (*func)();
func = (int (*)()) code;
(int)(*func)();
}
when I compiled it using GCC under linux. After disassembling it using GDB, as the shellcode located within code array;
(gdb) disas &code
Dump of assembler code for function code:
0x0000000000600840 <+0>: xor %eax,%eax
0x0000000000600842 <+2>: push %rax
0x0000000000600843 <+3>: pushq $0x75257075
0x0000000000600848 <+8>: pushq $0x78277823
0x000000000060084d <+13>: pushq $0x20727425
0x0000000000600852 <+18>: pushq $0x77732479
0x0000000000600857 <+23>: pushq $0x76777271
0x000000000060085c <+28>: pushq $0x70222520
0x0000000000600861 <+33>: pushq $0x27757823
0x0000000000600866 <+38>: pushq $0x75232075
0x000000000060086b <+43>: pushq $0x79797276
0x0000000000600870 <+48>: pushq $0x73727020
0x0000000000600875 <+53>: pushq $0x25247171
0x000000000060087a <+58>: pushq $0x77762779
0x000000000060087f <+63>: pushq $0x72717724
0x0000000000600884 <+68>: pushq $0x70707927
0x0000000000600889 <+73>: pushq $0x75242474
0x000000000060088e <+78>: pushq $0x23237379
0x0000000000600893 <+83>: pushq $0x79752274
0x0000000000600898 <+88>: pushq $0x27707423
0x000000000060089d <+93>: pushq $0x79247420
0x00000000006008a2 <+98>: pushq $0x78247774
0x00000000006008a7 <+103>: pushq $0x75702725
0x00000000006008ac <+108>: pushq $0x78747774
0x00000000006008b1 <+113>: pushq $0x76712323
0x00000000006008b6 <+118>: pushq $0x71737077
0x00000000006008bb <+123>: pushq $0x24772027
0x00000000006008c0 <+128>: pushq $0x75787222
0x00000000006008c5 <+133>: pushq $0x77797225
0x00000000006008ca <+138>: pushq $0x76797523
0x00000000006008cf <+143>: pushq $0x24727172
0x00000000006008d4 <+148>: pushq $0x79232371
0x00000000006008d9 <+153>: pushq $0x70792379
0x00000000006008de <+158>: pushq $0x77762020
0x00000000006008e3 <+163>: push %rsp
0x00000000006008e4 <+164>: pop %rsi
0x00000000006008e5 <+165>: mov %esi,%edi
0x00000000006008e7 <+167>: mov %edi,%edx
0x00000000006008e9 <+169>: cld
0x00000000006008ea <+170>: mov $0x80,%ecx
0x00000000006008ef <+175>: mov $0x41,%ebx
0x00000000006008f4 <+180>: xor %eax,%eax
0x00000000006008f6 <+182>: push %rax
0x00000000006008f7 <+183>: lods %ds:(%rsi),%al
0x00000000006008f8 <+184>: xor %ebx,%eax
0x00000000006008fa <+186>: stos %al,%es:(%rdi)
0x00000000006008fb <+187>: loop 0x6008f7 <code+183>
0x00000000006008fd <+189>: push %rsp
0x00000000006008fe <+190>: pop %rsi
0x00000000006008ff <+191>: int3
0x0000000000600900 <+192>: add %al,(%rax)
End of assembler dump.
Looking at disassembly 128bit key will be calculated after loop instructions at 6008fb. I'm not very comfortable with GDB. How can I obtain 128bit key out of this shellcode I suspect I need to put a pointer to right after loop and view content ? But I don't know how to do it.
Thank you very much in advance ...

If you can successfully run this code, it will stop at the int3 instruction at the end. At that point you'll have the decrypted string in memory starting at rsp+8 (which is the same as rsi+8). You can print it from gdb by x/s $rsp+8, for example. The +8 comes from the push %rax at 0x6008f6.
Note that this code seems to be wrongly ported 32 bit code. All the pushq instructions will store the immediate constant given in the instruction followed by 4 zero bytes. Decrypting that will result in every 4 letters coming out as AAAA (and only getting half of the key). The mov %esi,%edi at 0x6008e5 will zero the top bits of rdi so the code will only work if the stack is in the first 4GB of the address space (which it normally isn't on a 64 bit system).
Furthermore the code assumes it's on the stack, but your C wrapper puts it in the data section. In any case, you will need execute permissions too.
Alternatively, since this code does nothing beyond XOR-ing the values by 0x41 you can easily do that by hand, remembering that x86 is little endian so the constants printed in the disassembly have to be byte-swapped.

Related

Why gdb shows different addresses in RIP-relative mode from absolute address?

Having this in c:
#include <stdio.h>
#include <stdlib.h>
int x;
int main(){
printf("eneter x\n");
scanf("%i",&x);
printf("you enetered: %i\n", x);
return 0;
}
in gdb:
starti
disas main
0x0000555555555155 <+0>: push %rbp
0x0000555555555156 <+1>: mov %rsp,%rbp
0x0000555555555159 <+4>: lea 0xea4(%rip),%rdi # 0x555555556004
0x0000555555555160 <+11>: callq 0x555555555030 <puts#plt>
0x0000555555555165 <+16>: lea 0x2ed8(%rip),%rsi # 0x555555558044 <x>
0x000055555555516c <+23>: lea 0xe9a(%rip),%rdi # 0x55555555600d
0x0000555555555173 <+30>: mov $0x0,%eax
0x0000555555555178 <+35>: callq 0x555555555050 <__isoc99_scanf#plt>
0x000055555555517d <+40>: mov 0x2ec1(%rip),%eax # 0x555555558044 <x>
0x0000555555555183 <+46>: mov %eax,%esi
0x0000555555555185 <+48>: lea 0xe84(%rip),%rdi # 0x555555556010
0x000055555555518c <+55>: mov $0x0,%eax
0x0000555555555191 <+60>: callq 0x555555555040 <printf#plt>
0x0000555555555196 <+65>: mov $0x0,%eax
0x000055555555519b <+70>: pop %rbp
0x000055555555519c <+71>: retq
here the relative address of x variable is $rip+0x2ed8 (from instruction lea 0x2ed8(%rip),%rsi # 0x555555558044). But as you can see in the comment #, the absolute address is 0x555555558044. Ok will I get that address when try to read from the relative one? Lets see:
x $rip+0x2ed8
0x555555558055: 0x00000000
nop - relative address did not use the absolute address, where the x var is really stored (0x555555558055 != 0x555555558044) the difference is 17 bytes. Is it the number of bytes of the instruction itself (lea + operands)? I do not know, but do not think so. So why does relative and absolute addressing differ in gdb?
PS, generated assembly:
.file "a.c"
.comm x,4,4
.section .rodata
.LC0:
.string "eneter x"
.LC1:
.string "%i"
.LC2:
.string "you enetered: %i\n"
.text
.globl main
.type main, #function
main:
pushq %rbp #
movq %rsp, %rbp #,
# a.c:5: printf("eneter x\n");
leaq .LC0(%rip), %rdi #,
call puts#PLT #
# a.c:6: scanf("%i",&x);
leaq x(%rip), %rsi #,
leaq .LC1(%rip), %rdi #,
movl $0, %eax #,
call __isoc99_scanf#PLT #
# a.c:7: printf("you enetered: %i\n", x);
movl x(%rip), %eax # x, x.0_1
movl %eax, %esi # x.0_1,
leaq .LC2(%rip), %rdi #,
movl $0, %eax #,
call printf#PLT #
# a.c:8: return 0;
movl $0, %eax #, _6
# a.c:9: }
popq %rbp #
ret
.size main, .-main
.ident "GCC: (Debian 8.3.0-6) 8.3.0"
.section .note.GNU-stack,"",#progbits
Here, the RIP-relative mode is used:
# a.c:6: scanf("%i",&x);
leaq x(%rip), %rsi #,
where the x is position of the x symbol. But in comments, someone said, that $rip+0x2ed8 is not the same, and the offset 0x2ed8 does not lead to the address of the x. But why those two differ? but should be RIP-relative mode addressing and both should gain the same offset (and thus address).
0x0000555555555165 <+16>: lea 0x2ed8(%rip),%rsi # 0x555555558044 <x>
0x000055555555516c <+23>: lea 0xe9a(%rip),%rdi # 0x55555555600d
A RIP relative address in an instruction is relative to the address just after the current instruction (i.e. the address of the instruction plus the size of the instruction, or the address of the following instruction). This is because when the instruction has been loaded into the processor, the RIP register is advanced by the size of the current instruction just before it is executed. (At least that is the model that is followed even though modern processors use all sorts of tricks behind the scenes to speed up execution.) (Note: The above is true for several CPU architectures, including x86 variants, but some other CPU architectures differ in the point from which PC-relative addresses are measured1.)
The first instruction above is at address 0x555555555165 and the following instruction is at address 0x55555555516c (the instruction is 7 bytes long). In the first instruction, the RIP relative address 0x2ed8(%rip) refers to 0x2ed8 + 0x000055555555516c = 0x555555558044.
Note that if you set a breakpoint on an instruction in a debugger and show the registers when the breakpoint is reached, RIP will point to the current instruction, not the next one, because the current instruction is not being executed yet.
1 Thanks to Peter Cordes for details about PC-relative addressing for ARM and RISC-V CPU architectures.

Tracing program in assembly.

I'am trying to understand how a C program looks like at assembly level so i run gdb and used disassemble on main and get_input. The program is short so that i can follow it better.
There are 2 lines that i don't understand. First on in main() is:
0x00000000004005a3 <+4>: mov $0x0,%eax
We save the old value of rbp and save current value of rsp to rbp. What is the purpose of that instruction?
The other in get_input() is:
000000000400581 <+4>: sub $0x10,%rsp
Here too we start by saving old value of rbp, by pushing it to the stack. Then giving rbp the current value of rsp. Then 16 bytes are subtracted from rsp. I understand this is space allocated but why is it 16 bytes and not 8 bytes? I made the buffer 8 bytes only, what are the purpose of the other 8 bytes?
#include <stdio.h>
void get_input()
{
char buffer[8];
gets(buffer);
puts(buffer);
}
int main()
{
get_input();
return 0;
}
Dump of assembler code for function main:
0x000000000040059f <+0>: push %rbp
0x00000000004005a0 <+1>: mov %rsp,%rbp
0x00000000004005a3 <+4>: mov $0x0,%eax
0x00000000004005a8 <+9>: callq 0x40057d <get_input>
0x00000000004005ad <+14>: mov $0x0,%eax
0x00000000004005b2 <+19>: pop %rbp
0x00000000004005b3 <+20>: retq
End of assembler dump.
Dump of assembler code for function get_input:
0x000000000040057d <+0>: push %rbp
0x000000000040057e <+1>: mov %rsp,%rbp
0x0000000000400581 <+4>: sub $0x10,%rsp
0x0000000000400585 <+8>: lea -0x10(%rbp),%rax
0x0000000000400589 <+12>: mov %rax,%rdi
0x000000000040058c <+15>: callq 0x400480 <gets#plt>
0x0000000000400591 <+20>: lea -0x10(%rbp),%rax
0x0000000000400595 <+24>: mov %rax,%rdi
0x0000000000400598 <+27>: callq 0x400450 <puts#plt>
0x000000000040059d <+32>: leaveq
0x000000000040059e <+33>: retq
For main()...
0x000000000040059f <+0>: push %rbp
Push %RBP's value onto the stack.
0x00000000004005a0 <+1>: mov %rsp,%rbp
Copy %RSP's value into %RBP (create a new stack frame).
0x00000000004005a3 <+4>: mov $0x0,%eax
Move the immediate value 0x0 into %EAX. That is, it zeroes %EAX. As you're in 64-bit mode, this also clears all of %RAX.
0x00000000004005a8 <+9>: callq 0x40057d <get_input>
Push %RIP's value (undoable directly), then jump to label/function get_input().
0x00000000004005ad <+14>: mov $0x0,%eax
According to the AMD64 System V ABI, a function's return value is stored in %RAX (not taking into account floating point and large structures). It also says that there are two groups of registers: caller-saved and callee-saved. When you call a function, you can't expected caller-saved registers to remain the same, you must save them yourself in the stack if necessary. Likewise, a function that gets called must preserve callee-saved registers if it uses them. The caller-saved registers are %RAX, %RDI, %RSI, %RDX, %RCX, %R8, %R9, %R10, and %R11. The callee-saved registers are %RBX, %RSP, %RBP, %R12, %R13, %R14, and %R15.
Now, as main() apparently performs return 0, it must return that 0 in %RAX, right? However, two things should be taken into account. Firstly, in the AMD64 System V ABI, sizeof(int) == 4. %RAX is 8 bytes wide, but %EAX is 4 bytes wide, so %EAX should be used for manipulating int-wide stuff, such as main()'s return value. Secondly, %EAX is part of %RAX, and %RAX is caller-saved, thus we can't rely on its value after a call. So, we perform MOV $0x0, %EAX in order to set the function's return value to zero.
0x00000000004005b2 <+19>: pop %rbp
Restore main()'s caller's %RBP, that is, destroy main()'s stack frame.
0x00000000004005b3 <+20>: retq
Return from main() with a return value of 0.
Then, we have get_input()...
0x000000000040057d <+0>: push %rbp
Push %RBP's value onto the stack.
0x000000000040057e <+1>: mov %rsp,%rbp
Copy %RSP's value into %RBP (create a new stack frame).
0x0000000000400581 <+4>: sub $0x10,%rsp
Subtract 16 from %RSP (reserve 16 bytes of temporary storage for the current frame).
0x0000000000400585 <+8>: lea -0x10(%rbp),%rax
Load the effective address -0x10(%RBP) into %RAX. That is, it loads into %RAX the result of subtracting 16 from %RBP's value. This means that %RAX now points to the first byte of local temporary storage.
0x0000000000400589 <+12>: mov %rax,%rdi
According to the ABI, a function's first argument is given on %RDI, the second on %RSI, etc... In this case, %RAX's value is given as the first argument to the to-be-called function.
0x000000000040058c <+15>: callq 0x400480 <gets#plt>
Call function gets().
0x0000000000400591 <+20>: lea -0x10(%rbp),%rax
The same as above.
0x0000000000400595 <+24>: mov %rax,%rdi
Pass %RAX as the first argument.
0x0000000000400598 <+27>: callq 0x400450 <puts#plt>
Call function puts().
0x000000000040059d <+32>: leaveq
Equivalent to MOV %RBP, %RSP then POP %RBP, that is, destroys the stack frame.
0x000000000040059e <+33>: retq
Return from function get_input() without a proper return value.
Now...
MOV $0x0, %EAX
What is the purpose of that instruction?
The second instance of that instruction is quite important, as it sets the return value of main(). However, the first one is actually redundant. You probably have optimizations disabled on your compiler.
Then 16 bytes are subtracted from rsp. I understand this is space allocated but why is it 16 bytes and not 8 bytes? I made the buffer 8 bytes only, what are the purpose of the other 8 bytes?
The ABI requires that %RSP shall be positioned on a 16-byte boundary before each function call. BTW, you should get away from statically-sized buffers and gets().
The first instruction, mov $0x0, %eax, moves a zero into EAX in order to set the return code.
The second instruction, sub $0x10,%rsp is allocating memory and aligning the stack for system calls. The calling standard requires 16 byte alignment, not 8.

Why does a C compiler generate NOPs after a RET instruction? [duplicate]

This question already has answers here:
Why does GCC pad functions with NOPs?
(3 answers)
Closed 7 years ago.
On OSX 64bit, compiling a dummy C program like that:
#include <stdio.h>
void foo1() {
}
void foo2() {
}
int main() {
printf("Helloooo!\n");
foo1();
foo2();
return 0;
}
Produces the following ASM code (obtained disassembling the binary with otool):
(__TEXT,__text) section
_foo1:
0000000100000f10 55 pushq %rbp
0000000100000f11 4889e5 movq %rsp, %rbp
0000000100000f14 897dfc movl %edi, -0x4(%rbp)
0000000100000f17 5d popq %rbp
0000000100000f18 c3 retq
0000000100000f19 0f1f8000000000 nopl (%rax)
_foo2:
0000000100000f20 55 pushq %rbp
0000000100000f21 4889e5 movq %rsp, %rbp
0000000100000f24 5d popq %rbp
0000000100000f25 c3 retq
0000000100000f26 662e0f1f840000000000 nopw %cs:(%rax,%rax)
_main:
0000000100000f30 55 pushq %rbp
0000000100000f31 4889e5 movq %rsp, %rbp
0000000100000f34 4883ec10 subq $0x10, %rsp
0000000100000f38 488d3d4b000000 leaq 0x4b(%rip), %rdi ## literal pool for: "Helloooo!\n"
0000000100000f3f c745fc00000000 movl $0x0, -0x4(%rbp)
0000000100000f46 b000 movb $0x0, %al
0000000100000f48 e81b000000 callq 0x100000f68 ## symbol stub for: _printf
0000000100000f4d bf06000000 movl $0x6, %edi
0000000100000f52 8945f8 movl %eax, -0x8(%rbp)
0000000100000f55 e8b6ffffff callq _foo1
0000000100000f5a e8c1ffffff callq _foo2
0000000100000f5f 31c0 xorl %eax, %eax
0000000100000f61 4883c410 addq $0x10, %rsp
0000000100000f65 5d popq %rbp
0000000100000f66 c3 retq
What are the "nop" instructions found right after the "ret" on functions foo1() and foo2()? They are, of course, never executed since the "ret" instructions return from the function call. Is that any kind of padding or it has a different meaning?
From the Assembly language for x86 processors, Kip R. Irvine
The safest (and the most useless) instruction you can write is called NOP (no operation). It takes up 1 byte of program storage and doesn’t do any work. It is sometimes used by compilers and assemblers to align code to even-address boundaries
00000000 66 8B C3 mov ax,bx
00000003 90 nop ; align next instruction
00000004 8B D1 mov edx,ecx
What are the "nop" instructions found right after the "ret" on functions foo1() and foo2()?
The nop is a no-operation instruction (do nothing), from the linked Wikipedia page (emphasis mine)
A NOP is most commonly used for timing purposes, to force memory alignment, to prevent hazards, to occupy a branch delay slot, to render void an existing instruction such as a jump, or as a place-holder to be replaced by active instructions later on in program development (or to replace removed instructions when refactoring would be problematic or time-consuming).
nop is short for No Operation. The nop instructions in this case are providing execution code alignment. Notice that labels are on 16 byte boundaries. On OSX, the linker (ld) should have a -segalign option that will affect this behavior.

What do the instructions mov %edi and mov %rsi do?

I've written a basic C program that defines an integer variable x, sets it to zero and returns the value of that variable:
#include <stdio.h>
int main(int argc, char **argv) {
int x;
x = 0;
return x;
}
When I dump the object code using objdump (compiled on Linux X86-64 with gcc):
0x0000000000400474 <main+0>: push %rbp
0x0000000000400475 <main+1>: mov %rsp,%rbp
0x0000000000400478 <main+4>: mov %edi,-0x14(%rbp)
0x000000000040047b <main+7>: mov %rsi,-0x20(%rbp)
0x000000000040047f <main+11>: movl $0x0,-0x4(%rbp)
0x0000000000400486 <main+18>: mov -0x4(%rbp),%eax
0x0000000000400489 <main+21>: leaveq
0x000000000040048a <main+22>: retq
I can see the function prologue, but before we set x to 0 at address 0x000000000040047f there are two instructions that move %edi and %rsi onto the stack. What are these for?
In addition, unlike where we set x to 0, the mov instruction as shown in GAS syntax does not have a suffix.
If the suffix is not specified, and there are no memory operands for the instruction, GAS infers the operand size from the size of the destination register operand.
In this case, are -0x14(%rsbp) and -0x20(%rbp) both memory operands and what are their sizes? Since %edi is a 32 bit register, are 32 bits moved to -0x14(%rsbp) whereas since %rsi is a 64 bit register, 64 bits are moved to %rsi,-0x20(%rbp)?
In this simple case, why don't you ask your compiler directly? For GCC, clang and ICC there's the -fverbose-asm option.
main:
pushq %rbp #
movq %rsp, %rbp #,
movl %edi, -20(%rbp) # argc, argc
movq %rsi, -32(%rbp) # argv, argv
movl $0, -4(%rbp) #, x
movl -4(%rbp), %eax # x, D.2607
popq %rbp #
ret
So, yes, they save argv and argv onto the stack by using the "old" frame pointer method since new architectures allow subtracting/adding from/to the stack pointer directly, thus omitting the frame pointer (-fomit-frame-pointer).
Purpose of ESI & EDI registers?
Based on this and the context, I'm not an expert, but my guess is these are capturing the main() input parameters. EDI takes a standard width, which would match the int argc, whereas RSI takes a long, which would match the char **argv pointer.

Why does LLVM add two extra instructions for the same program?

I am compiling this C program and comparing the generated assembly code:
int main(){ return 0; }
GCC gives this main function (cc hello.c -S):
_main:
LFB2:
pushq %rbp
LCFI0:
movq %rsp, %rbp
LCFI1:
movl $0, %eax
leave
ret
LLVM gives this main function (clang hello.c -S):
_main:
Leh_func_begin0:
pushq %rbp
Ltmp0:
movq %rsp, %rbp
Ltmp1:
movl $0, %eax
movl $0, -4(%rbp)
popq %rbp
ret
Leh_func_end0:
What are movl $0, -4(%rbp) and popq %rbp needed for? Moving something on the stack and popping it directly afterwards seems useless to me.
The movl $0, -4(%rbp) instruction is dead, because this is unoptimized code. Try passing in -O to both compilers to see what changes.
Actually, they're comparable. Leave is a high level instruction:
From the Intel manual:
16-bit: C9 LEAVE A Valid Valid Set SP to BP, then pop BP.
32-bit: C9 LEAVE A N.E. Valid Set ESP to EBP, then pop EBP.
64-bit: C9 LEAVE A Valid N.E. Set RSP to RBP, then pop RBP.
basically, leave is equivalent to
movq %rbp, %rsp
popq %rbp
It looks like LLVM is using a traditional function prolog/epilog, whereas GCC is taking advantage of the fact that the entry point doesn't need to clean up

Resources