debugging with timer/signal always ends up in <timer_settime+16> - c

I am using gdb to debug a code that starts a timer. When the timer rings in gdb I always end up at instruction timer_settime+16.
Is this expected behavior?
As an example I slightly modified the code of timer_settime man page. The idea is to pass two arguments: a string of integers and a nsec value. The code launches the timer to ring after nsec, then copies the string.
I expected that by incrementing the nsec value, gdb stopped at different code lines, to end up inside the copy loop. However it always stops at .
So is this expected behavior?
Is it documented somewhere?
Is there a way to achieve what I expected (i.e.: launch a timer that when ring makes gdb stops where the program was just before (or after) the signal)? (always with nsec granularity).
Code:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <time.h>
#define CLOCKID CLOCK_REALTIME
#define SIG SIGUSR1
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
unsigned char OUT[32];
unsigned char IN[32];
unsigned char ascii2hex(char in){
unsigned char out;
if( ('0' <= in) && (in <= '9') )
out = in - '0';
if( ('A' <= in) && (in <= 'F') )
out = in - 'A' + 10;
if( ('a' <= in) && (in <= 'f') )
out = in - 'a' + 10;
return out;
}
void asciiStr2hex(char * in, unsigned char * out, unsigned int len){
int i = 0;
int j = 0;
for( i = 0; i < len; i+=2){
out[j++] = (ascii2hex(in[i ]) << 4) + ascii2hex(in[i+1]);
}
}
void testcode(unsigned char *out, unsigned char *in, unsigned int len){
unsigned int i;
for (i=0;i<len;i++)
out[i] = in[i];
}
static void print_siginfo(siginfo_t *si)
{
timer_t *tidp;
int or;
tidp = si->si_value.sival_ptr;
printf(" sival_ptr = %p; ", si->si_value.sival_ptr);
printf(" *sival_ptr = 0x%lx\n", (long) *tidp);
or = timer_getoverrun(*tidp);
if (or == -1)
errExit("timer_getoverrun");
else
printf(" overrun count = %d\n", or);
}
static void handler(int sig, siginfo_t *si, void *uc)
{
/* Note: calling printf() from a signal handler is not
strictly correct, since printf() is not async-signal-safe;
see signal(7) */
printf("Caught signal %d\n", sig);
print_siginfo(si);
signal(sig, SIG_IGN);
}
int main(int argc, char *argv[])
{
timer_t timerid;
struct sigevent sev;
struct itimerspec its;
long long freq_nanosecs;
//sigset_t mask;
struct sigaction sa;
if (argc != 3) {
fprintf(stderr, "Usage: %s <16byte> <time-nanosecs>\n",
argv[0]);
exit(EXIT_FAILURE);
}
asciiStr2hex(argv[1], IN, 32);
/* Establish handler for timer signal */
printf("Establishing handler for signal %d\n", SIG);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIG, &sa, NULL) == -1)
errExit("sigaction");
/* Block timer signal temporarily */
/* printf("Blocking signal %d\n", SIG);
sigemptyset(&mask);
sigaddset(&mask, SIG);
if (sigprocmask(SIG_SETMASK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
/* Create the timer */
sev.sigev_notify = SIGEV_SIGNAL;
sev.sigev_signo = SIG;
sev.sigev_value.sival_ptr = &timerid;
if (timer_create(CLOCKID, &sev, &timerid) == -1)
errExit("timer_create");
printf("timer ID is 0x%lx\n", (long) timerid);
/* Start the timer */
freq_nanosecs = atoll(argv[2]);
its.it_value.tv_sec = freq_nanosecs / 1000000000;
its.it_value.tv_nsec = freq_nanosecs % 1000000000;
its.it_interval.tv_sec = its.it_value.tv_sec;
its.it_interval.tv_nsec = its.it_value.tv_nsec;
if (timer_settime(timerid, 0, &its, NULL) == -1)
errExit("timer_settime");
/* Sleep for a while; meanwhile, the timer may expire
multiple times */
printf("Sleeping for %d seconds\n", atoi(argv[1]));
testcode(OUT, IN, 16);
/* Unlock the timer signal, so that timer notification
can be delivered */
/* printf("Unblocking signal %d\n", SIG);
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
errExit("sigprocmask");
*/
exit(EXIT_SUCCESS);
}
When debug into gdb with r 00112233445566778899001122334455 2
I obtain:
Program received signal SIGUSR1, User defined signal 1.
0x76fc7c38 in timer_settime () from /lib/arm-linux-gnueabihf/librt.so.1
(gdb) x/30i $pc
=> 0x76fc7c38 <timer_settime+16>: cmn r0, #4096 ; 0x1000
0x76fc7c3c <timer_settime+20>: mov r4, r0
0x76fc7c40 <timer_settime+24>: bhi 0x76fc7c4c <timer_settime+36>
0x76fc7c44 <timer_settime+28>: mov r0, r4
0x76fc7c48 <timer_settime+32>: pop {r3, r4, r7, pc}
0x76fc7c4c <timer_settime+36>: bl 0x76fc55b4
0x76fc7c50 <timer_settime+40>: rsb r3, r4, #0
0x76fc7c54 <timer_settime+44>: mvn r4, #0
0x76fc7c58 <timer_settime+48>: str r3, [r0]
0x76fc7c5c <timer_settime+52>: b 0x76fc7c44 <timer_settime+28>
0x76fc7c60 <timer_settime+56>: andeq r0, r0, r2, lsl #2
0x76fc7c64: push {r4, r5, r6, r7, r8, r9, r10, lr}
0x76fc7c68: sub sp, sp, #600 ; 0x258
0x76fc7c6c: ldr r4, [pc, #340] ; 0x76fc7dc8
0x76fc7c70: add r1, sp, #512 ; 0x200
0x76fc7c74: add r4, pc, r4
0x76fc7c78: mov r0, r4
0x76fc7c7c: bl 0x76fc56b0
0x76fc7c80: cmp r0, #0
0x76fc7c84: bne 0x76fc7c98
0x76fc7c88: ldr r2, [sp, #512] ; 0x200
0x76fc7c8c: ldr r3, [pc, #312] ; 0x76fc7dcc
0x76fc7c90: cmp r2, r3
0x76fc7c94: beq 0x76fc7d94
0x76fc7c98: ldr r5, [pc, #304] ; 0x76fc7dd0
0x76fc7c9c: ldr r0, [pc, #304] ; 0x76fc7dd4
0x76fc7ca0: add r5, pc, r5
0x76fc7ca4: add r0, pc, r0
0x76fc7ca8: mov r1, r5
0x76fc7cac: bl 0x76fc5524
I am running such code on a raspberry pi, but I'am pretty sure I had the same behavior on another linux machine x86_64.
I have tested with "handle stop SIGUSR1".

I finally found that the problem was that I have to set unwindonsignal off in gdb to obtain the behavior I expected.

Related

Cannot access the arguments pushed by interrupt handlers correctly. Osdev

I am trying to write my own operating system. Recently I wanted to turn my operating system into a 64 bit one (used to be 32 bit). But that screwed quite a bit, unfortunately. I set up the IDT and all but when an ISR or an IRQ is called the stack frame that is passed onto the handler is corrupt/wrong. By corrupt/wrong I mean for example the int_no is 0 for all cases, error code is always zero, all the registers are zero. I just get some garbage value in ss.
Here is the struct for the stack frame:
typedef struct {
uint64_t rdi, rsi, rbp, useless, rbx, rdx, rcx, rax;
uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
uint64_t int_no, err_code;
uint64_t rip, cs, rflags, rsp, ss;
}__attribute__((packed)) interrupt_frame_t;
And here is my main IDT file:
#include "IDT.h"
#define PUSHALL \
".intel_syntax noprefix\n\t" \
"push r15\n\t"\
"push r14\n\t"\
"push r13\n\t"\
"push r12\n\t"\
"push r11\n\t"\
"push r10\n\t"\
"push r9\n\t"\
"push r8\n\t"\
"push rax\n\t"\
"push rcx\n\t"\
"push rdx\n\t"\
"push rbx\n\t"\
"push rsp\n\t"\
"push rbp\n\t"\
"push rsi\n\t"\
"push rdi\n\t"\
".att_syntax prefix\n\t" \
#define POPALL\
".intel_syntax noprefix\n\t" \
"pop rdi\n\t"\
"pop rsi\n\t"\
"pop rbp\n\t"\
"add rsp, 8\n\t"\
"pop rbx\n\t"\
"pop rdx\n\t"\
"pop rcx\n\t"\
"pop rax\n\t"\
"pop r8\n\t"\
"pop r9\n\t"\
"pop r10\n\t"\
"pop r11\n\t"\
"pop r12\n\t"\
"pop r13\n\t"\
"pop r14\n\t"\
"pop r15\n\t"\
"add rsp, 0x10\n\t"\
".att_syntax prefix\n\t" \
/* Common body for interrupt handler */
#define MAKE_INTERRUPT_COMMON_STUB(intno, intfunc) \
"push $"#intno"\n\t" \
PUSHALL \
"mov %rsp, %rdi\n\t"\
"sub $0x28, %rsp\n\t"\
"cld\n\t" \
"call " #intfunc "\n\t" \
"add $0x28, %rsp\n\t" /* Skip int_num and err_code */ \
POPALL \
"iretq \n\t"
#define MAKE_IRQ_COMMON_STUB(intno, intfunc) \
"push $"#intno"\n\t" \
PUSHALL \
"mov %rsp, %rdi\n\t"\
"cld\n\t" \
"call " #intfunc "\n\t" \
POPALL \
"iretq \n\t"
/* Make interrupt for exception without error code. Push a dummy value for the
* error code in it place. Push all the segment registers and the segment registers
* so that they are available to interrupt function (intfun). Pushes a unique
* interrupt number (intno) after the error code so that a handler can be multiplexed
* if needed. Restore all the registers upon exit.
*
* intentry: Is the interrupt entry point that can be used in an Interrupt
* Descriptor Table (IDT) entry.
* intfunc: Is the C interrupt function that the stub calls to do processing
* intno: Interrupt number. Can be used to multiplex multiple interrupts to one
* intfunc handler.
*/
#define MAKE_INTERRUPT(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n\t" \
".align 16\n\t" \
#intentry ":\n\t" \
"push $0\n\t" /* Push dummy error code */ \
MAKE_INTERRUPT_COMMON_STUB(intno, intfunc));
#define MAKE_IRQ(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n\t" \
".align 16\n\t" \
#intentry ":\n\t" \
"push $0\n\t" /* Push dummy error code */ \
MAKE_IRQ_COMMON_STUB(intno, intfunc));
/* Make interrupt for exception with error code. The processor pushes the error code
* after the return address automatically. Push all the segment registers and the
* segment registers so that they are available to interrupt function (intfun).
* Pushes a unique interrupt number (into) after the error code so that a handler
* can be multiplexed if needed. Restore all the registers upon exit.
*
* inventory: This is the interrupt entry point that can be used in an Interrupt
* Descriptor Table (IDT) entry.
* intfunc: Is the C interrupt function that the stub calls to do processing
* into: Interrupt number. Can be used to multiplex multiple interrupts to one
* intfunc handler.
*/
#define MAKE_INTERRUPT_ERRCODE(intentry, intfunc, intno) \
extern void intentry (void); \
__asm__(".global " #intentry "\n" \
".align 16\n\t" \
#intentry ":\n\t" \
MAKE_INTERRUPT_COMMON_STUB(intno, intfunc));
MAKE_INTERRUPT (isr0, isr0_handler, 0x00)
MAKE_INTERRUPT (isr1, isr1_handler, 0x01)
MAKE_INTERRUPT (isr2, isr2_handler, 0x02)
MAKE_INTERRUPT (isr3, isr3_handler, 0x03)
MAKE_INTERRUPT (isr4, isr4_handler, 0x04)
MAKE_INTERRUPT (isr5, isr5_handler, 0x05)
MAKE_INTERRUPT (isr6, isr6_handler, 0x06)
MAKE_INTERRUPT (isr7, isr7_handler, 0x07)
MAKE_INTERRUPT_ERRCODE(isr8, isr8_handler, 0x08)
MAKE_INTERRUPT (isr9, isr9_handler, 0x09)
MAKE_INTERRUPT_ERRCODE(isr10, isr10_handler, 0x0a)
MAKE_INTERRUPT_ERRCODE(isr11, isr11_handler, 0x0b)
MAKE_INTERRUPT_ERRCODE(isr12, isr12_handler, 0x0c)
MAKE_INTERRUPT_ERRCODE(isr13, isr13_handler, 0x0d)
MAKE_INTERRUPT_ERRCODE(isr14, isr14_handler, 0x0e)
/* Reserved 0x0f */
MAKE_INTERRUPT (isr16, isr0_handler, 0x10)
MAKE_INTERRUPT_ERRCODE(isr17, isr0_handler, 0x11)
MAKE_INTERRUPT (isr18, isr0_handler, 0x12)
MAKE_INTERRUPT (isr19, isr0_handler, 0x13)
MAKE_INTERRUPT (isr20, isr0_handler, 0x14)
/* Reserved 0x15 to 0x1d */
MAKE_INTERRUPT_ERRCODE(isr30, isr0_handler, 0x1e)
/* Reserved 0x1f */
/* IRQ handlers */
MAKE_IRQ (irq0, irq0_handler, 0x0)
MAKE_IRQ (irq1, irq1_handler, 0x1)
MAKE_IRQ (irq2, irq2_handler, 0x2)
MAKE_IRQ (irq3, irq3_handler, 0x3)
MAKE_IRQ (irq4, irq4_handler, 0x4)
MAKE_IRQ (irq5, irq5_handler, 0x5)
MAKE_IRQ (irq6, irq6_handler, 0x6)
MAKE_IRQ (irq7, irq7_handler, 0x7)
MAKE_IRQ (irq8, irq8_handler, 0x8)
MAKE_IRQ (irq9, irq9_handler, 0x9)
MAKE_IRQ (irq10, irq10_handler, 0xA)
MAKE_IRQ (irq11, irq11_handler, 0xB)
MAKE_IRQ (irq12, irq12_handler, 0xC)
MAKE_IRQ (irq13, irq13_handler, 0xD)
MAKE_IRQ (irq14, irq14_handler, 0xE)
MAKE_IRQ (irq15, irq15_handler, 0xF)
void init_idt_entry(int num, unsigned int offset, unsigned short select,
unsigned short flags)
{
_idt_entries[num].offset0 = (unsigned short)(offset & 0x000000000000FFFF);
_idt_entries[num].offset1 = (unsigned short)((offset & 0x00000000FFFF0000) >> 16);
_idt_entries[num].offset2 = (unsigned int)((offset & 0xFFFFFFFF00000000) >> 32);
_idt_entries[num].selector = select;
_idt_entries[num].flags = flags;
return;
}
///////////////////////////////////////
void idt_flush(struct idt *idtr)
{
asm volatile("lidt %0" :: "m"(*idtr));
}
#define PIC1_COMMAND 0x20
#define PIC1_DATA 0x21
#define PIC2_COMMAND 0xA0
#define PIC2_DATA 0xA1
#define PIC_EOI 0x20
#define ICW1_INIT 0x10
#define ICW1_ICW4 0x01
#define ICW4_8086 0x01
void init_pic()
{
/*
outb(0x20,0x11);
outb(0xA0,0x11);
outb(0x21, 0x20);
outb(0xA1, 40);
outb(0x21, 0x04);
outb(0xA1, 0x02);
outb(0x21, 0x01);
outb(0xA1, 0x01);
outb(0x21, 0x0);
outb(0xA1, 0x0);
*/
unsigned char a1, a2;
a1 = inb(PIC1_DATA);
io_wait();
a2 = inb(PIC2_DATA);
io_wait();
outb(PIC1_COMMAND, ICW1_INIT | ICW1_ICW4);
io_wait();
outb(PIC2_COMMAND, ICW1_INIT | ICW1_ICW4);
io_wait();
outb(PIC1_DATA, 0x20);
io_wait();
outb(PIC2_DATA, 0x28);
io_wait();
outb(PIC1_DATA, 4);
io_wait();
outb(PIC2_DATA, 2);
io_wait();
outb(PIC1_DATA, ICW4_8086);
io_wait();
outb(PIC2_DATA, ICW4_8086);
io_wait();
outb(PIC1_DATA, a1);
io_wait();
outb(PIC2_DATA, a2);
}
void init_idt()
{
_idt_entries = RequestPage();
Tidt.limit = 16 * 256;
Tidt.base = _idt_entries;
memset(_idt_entries, 0, 16*256);
for(int i = 0; i < 256 ; i++){
init_idt_entry(i,(int)&isr0,0x08, 0x8E);
}
init_idt_entry(0,(int)&isr0,0x08, 0x8E);
init_idt_entry(1,(int)&isr1,0x08, 0x8E);
init_idt_entry(2,(int)&isr2,0x08, 0x8E);
init_idt_entry(3,(int)&isr3,0x08, 0x8E);
init_idt_entry(4,(int)&isr4,0x08, 0x8E);
init_idt_entry(5,(int)&isr5,0x08, 0x8E);
init_idt_entry(6,(int)&isr6,0x08, 0x8E);
init_idt_entry(7,(int)&isr7,0x08, 0x8E);
init_idt_entry(8,(int)&isr8,0x08, 0x8E);
init_idt_entry(9,(int)&isr9,0x08, 0x8E);
init_idt_entry(10,(int)&isr10,0x08, 0x8E);
init_idt_entry(11,(int)&isr11,0x08, 0x8E);
init_idt_entry(12,(int)&isr12,0x08, 0x8E);
init_idt_entry(13,(int)&isr13,0x08, 0x8E);
init_idt_entry(14,(int)&isr14,0x08, 0x8E);
/* ISR15 is reserved */
init_idt_entry(16,(int)&isr16,0x08, 0x8E);
init_idt_entry(17,(int)&isr17,0x08, 0x8E);
init_idt_entry(18,(int)&isr18,0x08, 0x8E);
init_idt_entry(19,(int)&isr19,0x08, 0x8E);
init_idt_entry(20,(int)&isr20,0x08, 0x8E);
/* ISR21 to ISR2F are reserved */
init_idt_entry(30,(int)&isr30,0x08, 0x8E);
/* IRQ handlers */
init_idt_entry(32,(int)&irq0,0x08, 0x8E);
init_idt_entry(33,(int)&irq1,0x08, 0x8E);
init_idt_entry(34,(int)&irq2,0x08, 0x8E);
init_idt_entry(35,(int)&irq3,0x08, 0x8E);
init_idt_entry(36,(int)&irq4,0x08, 0x8E);
init_idt_entry(37,(int)&irq5,0x08, 0x8E);
init_idt_entry(38,(int)&irq6,0x08, 0x8E);
init_idt_entry(39,(int)&irq7,0x08, 0x8E);
init_idt_entry(40,(int)&irq8,0x08, 0x8E);
init_idt_entry(41,(int)&irq9,0x08, 0x8E);
init_idt_entry(42,(int)&irq10,0x08, 0x8E);
init_idt_entry(43,(int)&irq11,0x08, 0x8E);
init_idt_entry(44,(int)&irq12,0x08, 0x8E);
init_idt_entry(45,(int)&irq13,0x08, 0x8E);
init_idt_entry(46,(int)&irq14,0x08, 0x8E);
init_idt_entry(47,(int)&irq15,0x08, 0x8E);
init_pic();
SetPITSpeed(100);
drawStringToCursor("Set PIT speed\n",0xffffff, 0x000000);
idt_flush(&Tidt);
int s = initPS2Controller();
if(s == 0) drawStringToCursor("Successfully initialized PS2 Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing PS2 Controller\n",0xffffff, 0x000000);
s = 0;
s = initKeyboard();
if(s == 0xFA) drawStringToCursor("Successfully initialized Keyboard Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing Keyboard Controller\n",0xffffff, 0x000000);
s = 0;
s = initMouse();
if(s == 0) drawStringToCursor("Successfully initialized Mouse Controller\n",0xffffff, 0x000000);
else drawStringToCursor("A problem occured while initializing Mouse Controller\n",0xffffff, 0x000000);
outb(PIC1_DATA, 0);
outb(PIC2_DATA, 0);
//int d = 5 / 0;
asm volatile("sti;");
//while(1);
}
I think that there may be 3 problems: Something is wrong with the way I push them to the stack (which seems to me to most possible one), something wrong with the way I receive them or Something gets overwritten or lost in paging. Yet I couldn't spot the issue. I would be more than happy to post more code here, but here is the entire code if you want to check it out:
https://github.com/Danyy427/OSDEV.git

bpf how to inspect syscall arguments

trace_output_kern.c traces sys_write syscall and prints the pid in userland:
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") my_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = 2,
};
SEC("kprobe/sys_write")
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
u64 pid;
u64 cookie;
} data;
data.pid = bpf_get_current_pid_tgid();
data.cookie = 0x12345678;
bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
sys_read has a signature of sys_read(unsigned int fd, char __user *buf, size_t count);, and currently we only see the PID. The premise of tracing is that we get to intercept, and inspect the arguments. I was trying to see the arguments that gets passed on as well.
If I change that struct S to hold a char array to hold char *buf as
struct S {
u64 pid;
u64 cookie;
char bleh[128]; //<-- added this
} data;
it is throwing a fit:
/usr/src/linux-5.4/samples/bpf# ./trace_output
bpf_load_program() err=13
0: (bf) r6 = r1
1: (85) call bpf_get_current_pid_tgid#14
2: (b7) r1 = 305419896
3: (7b) *(u64 *)(r10 -136) = r1
4: (7b) *(u64 *)(r10 -144) = r0
5: (bf) r4 = r10
6: (07) r4 += -144
7: (bf) r1 = r6
8: (18) r2 = 0xffff8975bd44aa00
10: (b7) r3 = 0
11: (b7) r5 = 144
12: (85) call bpf_perf_event_output#25
invalid indirect read from stack off -144+16 size 144
processed 12 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
0: (bf) r6 = r1
1: (85) call bpf_get_current_pid_tgid#14
2: (b7) r1 = 305419896
3: (7b) *(u64 *)(r10 -136) = r1
4: (7b) *(u64 *)(r10 -144) = r0
5: (bf) r4 = r10
6: (07) r4 += -144
7: (bf) r1 = r6
8: (18) r2 = 0xffff8975bd44aa00
10: (b7) r3 = 0
11: (b7) r5 = 144
12: (85) call bpf_perf_event_output#25
invalid indirect read from stack off -144+16 size 144
processed 12 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
if sys_write is a bad (question) example, I've been also trying to trace sys_execve, which has an arg list of
asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp);
Please point me the correct direction, thanks!
Edit 1
How do I intercept the arguments that was used for __x64_sys_execve?
When I try this below,
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") my_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = 2,
};
//SEC("kprobe/sys_write")
SEC("kprobe/__x64_sys_execve")
/* Signature of sys_execve:
asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp);
*/
int bpf_prog1(struct pt_regs *ctx, const char *filename)
{
struct S {
u64 pid;
u64 cookie;
char bleh[128];
} data;
data.pid = bpf_get_current_pid_tgid();
data.cookie = 0x12345678;
//bpf_get_current_comm(&data.bleh, 128);
bpf_probe_read(&data.bleh, 128, (void *)filename);
bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
It blows up thusly:
/usr/src/linux-5.4/samples/bpf# ./borky
bpf_load_program() err=13
0: (bf) r6 = r2
R2 !read_ok
processed 1 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
0: (bf) r6 = r2
R2 !read_ok
processed 1 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
The first part of your question was answered by pchaigno: if you extend your struct S and try to read it (bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));) without having initialised it, the verifier complains, because reading uninitialised memory from the kernel introduces a security risk. What you could do is, for example, zero the whole struct when declaring it:
struct S {
u64 pid;
u64 cookie;
char bleh[128];
} data = {0};
Regarding the second part of your question with sys_execve, it turns out you cannot pass the syscall arguments to your function bpf_prog1() as you try to do. Your function should only take the struct pt_regs *ctx.
The confusion likely comes from the syntax used in bcc, where arguments are passed this way, but it is important to understand that bcc rewrites some parts under the hood, in particular this thing about accessing the arguments.
What you could use instead is the set of PT_REGS_PARM*(ctx) macros that are specifically defined to access the arguments of the probed function, from the relevant computer registers (example, definition). I think bcc also uses them when doing its rewriting job, but you wouldn't see it.

Shellcode in the signal handler

Why won't my shellcode (int3) be hit with the signal handler?
Apart from not like to have printf() in the handler, I care
for how to deliver the shellcode (as not inline assembler)
within the signal handler, being executed at runtime.
However, I present here a longer gdb session, that shows register
state and backtrace.
<code> Pid 19750 waiting for SIGUSR1 Program received signal SIGUSR1,
User defined signal 1. 0x0e5f9f89 in nanosleep () at <stdin>:2 2
<stdin>: No such file or directory.
in <stdin> Current language: auto; currently asm (gdb) bt
#0 0x0e5f9f89 in nanosleep () at <stdin>:2
#1 0x0e650348 in sleep (seconds=10) at /usr/src/lib/libc/gen/sleep.c:45
#2 0x18cb3d5b in main () at sig5.c:37 (gdb) i r eax 0x5b 91 ecx 0x0 0 edx 0xa 10 ebx
0x2e5df594 777909652 esp 0xcfbf73fc 0xcfbf73fc
ebp 0xcfbf7438 0xcfbf7438 esi 0x38cb62df
952853215 edi 0x38cb61e0 952852960 eip
0xe5f9f89 0xe5f9f89 eflags 0x206 518 cs
0x2b 43 ss 0x33 51 ds 0x33 51 es
0x33 51 fs 0x5b 91 gs 0x63 99
(gdb) c Continuing.
Program received signal SIGUSR1, User defined signal 1. 0x0e5f9f89 in
nanosleep () at <stdin>:2 2 in <stdin> (gdb) c Continuing.
Signal 30 from pid 0, should int3
Program received signal SIGSEGV, Segmentation fault. 0x18cb3c7a in
sigusr1 (signo=30, si=0xcfbf737c, data=0xcfbf7328) at sig5.c:23 23
ret(); Current language: auto; currently c (gdb) bt
#0 0x18cb3c7a in sigusr1 (signo=30, si=0xcfbf737c, data=0xcfbf7328) at sig5.c:23
#1 <signal handler called>
#2 0x0e5f9f89 in nanosleep () at <stdin>:2
#3 0x0e650348 in sleep (seconds=10) at /usr/src/lib/libc/gen/sleep.c:45
#4 0x18cb3d5b in main () at sig5.c:37 (gdb) i r eax 0xcfbf7305 -809536763 ecx 0x0 0 edx
0x0 0 ebx 0x38cb5124 952848676 esp
0xcfbf72e8 0xcfbf72e8 ebp 0xcfbf7310 0xcfbf7310
esi 0x38cb62df 952853215 edi 0x38cb61e0
952852960 eip 0x18cb3c7a 0x18cb3c7a eflags
0x10282 66178 cs 0x2b 43 ss 0x33 51
ds 0x33 51 es 0x33 51 fs
0x5b 91 gs 0x63 99 (gdb) bt full
#0 0x18cb3c7a in sigusr1 (signo=30, si=0xcfbf737c, data=0xcfbf7328) at sig5.c:23
code = "ëÌ"
ret = (int (*)()) 0xcfbf7305
#1 <signal handler called> No symbol table info available.
#2 0x0e5f9f89 in nanosleep () at <stdin>:2 No locals.
#3 0x0e650348 in sleep (seconds=10) at /usr/src/lib/libc/gen/sleep.c:45
rqt = {tv_sec = 10, tv_nsec = 0}
rmt = {tv_sec = 0, tv_nsec = 0}
#4 0x18cb3d5b in main () at sig5.c:37
sa = {__sigaction_u = {__sa_handler = 0x18cb3c04 <sigusr1>,
__sa_sigaction = 0x18cb3c04 <sigusr1>}, sa_mask = 0, sa_flags = 64}
-bash-4.3$ cat sig5.c
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
void sigusr1(int signo, siginfo_t *si, void *data) {
(void)signo;
(void)data;
unsigned char code[] = \
"\xeb\xcc";
int (*ret)() = (int(*)())code;
printf("Signal %d from pid %lu, should int3\n", (int)si->si_signo,
(unsigned long)si->si_pid);
sleep (1);
ret();
exit(0); }
int main(void) {
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = sigusr1;
if (sigaction(SIGUSR1, &sa, 0) == -1) {
fprintf(stderr, "%s: %s\n", "sigaction", strerror(errno));
}
printf("Pid %lu waiting for SIGUSR1\n", (unsigned long)getpid());
for (;;) {
sleep(10);
}
return 0; } </code>
Any Ideas?
So this uses mprotect(), but still does not lead to the shell. We really need to update
radare2 shellcode sources :)
<code>
#include "errno.h"
#include "signal.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "unistd.h"
#include "sys/mman.h"
void sigusr1(int signo, siginfo_t *si, void *data) {
(void)signo;
(void)data;
unsigned char sc[] = \
"\xcc";
mprotect(sc,strlen(sc),PROT_EXEC|PROT_READ|PROT_WRITE);
int (*r)() = (int(*)())sc; /* Thanks, maybe change to define? */
r();
}
int main(void) {
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_flags = 5; /* SIGTRAP via Sil... */
sa.sa_sigaction = sigusr1;
if (sigaction(SIGUSR1, &sa, 0) == -1) {
fprintf(stderr, "%s: %s\n", "sigaction", strerror(errno));
}
printf("Pid %lu waiting for SIGUSR1\n", (unsigned long)getpid());
for (;;) {
sleep(10);
}
return 0;
}
</code>

How to measure program execution time in ARM Cortex-A53 processor?

I was using following method to read clock in cortex-a15:
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
if (!enabled) {
// program the performance-counter control-register:
asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17));
//enable all counters
asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
//Clear overflow.
asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f));
enabled = 1;
}
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
And final performance profilinglooks like:
before = readticks();
foo();
after = readticks();
clock_cycles = after - before.
I want to use same logic in cortex-A53, ARM64 (not aarch32).
I have tried this after following online portals:
/* All counters, including PMCCNTR_EL0, are disabled/enabled */
#define QUADD_ARMV8_PMCR_E (1 << 0)
/* Reset all event counters, not including PMCCNTR_EL0, to 0
*/
#define QUADD_ARMV8_PMCR_P (1 << 1)
/* Reset PMCCNTR_EL0 to 0 */
#define QUADD_ARMV8_PMCR_C (1 << 2)
/* Clock divider: PMCCNTR_EL0 counts every clock cycle/every 64 clock cycles */
#define QUADD_ARMV8_PMCR_D (1 << 3)
/* Export of events is disabled/enabled */
#define QUADD_ARMV8_PMCR_X (1 << 4)
/* Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited */
#define QUADD_ARMV8_PMCR_DP (1 << 5)
/* Long cycle count enable */
#define QUADD_ARMV8_PMCR_LC (1 << 6)
static inline unsigned int armv8_pmu_pmcr_read(void)
{
unsigned int val;
/* Read Performance Monitors Control Register */
asm volatile("mrs %0, pmcr_el0" : "=r" (val));
return val;
}
static inline void armv8_pmu_pmcr_write(unsigned int val)
{
asm volatile("msr pmcr_el0, %0" : :"r" (val & QUADD_ARMV8_PMCR_WR_MASK));
}
static void enable_all_counters(void)
{
unsigned int val;
/* Enable all counters */
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
armv8_pmu_pmcr_write(val);
}
static void reset_all_counters(void)
{
unsigned int val;
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
armv8_pmu_pmcr_write(val);
}
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
unsigned int val;
if (!enabled) {
reset_all_counters();
enable_all_counters();
enabled = 1;
}
cc = armv8_pmu_pmcr_read();
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
But it gives "Illegal instruction" as error while I am trying profiling. Can anyone help me to change the above code for cortex-a53?
You need to enable the PMU for user mode. Here is the kernel module I wrote for it(For ARM V7 in Raspberry Pi 2):
/* Module source file 'module.c'. */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
arm_write(unsigned long val)
{
//Enabling both read and write - note difference between mcr and mrc
asm volatile("mrc p15, 0, %0, c9, c14, 0" :: "r"(1));
asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
}
static int enabler(void)
{
unsigned long value = 1;
printk(KERN_INFO "Enabling PMU usermode.\n");
arm_write(value);
return 0;
}
static void end(void)
{
printk(KERN_INFO "module unloaded.\n");
}
module_init(enabler);
module_exit(end);
MODULE_AUTHOR("Sama");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blahblah");
This will enable user mode access to the PMU. once you compiled it , you need to enable the PMU counters as follow:
int main(int argc, char **argv){
int enable_divider =1;
int do_reset=1;
int value = 1;
// peform reset:
if (do_reset) {
value |= 2; // reset all counters to zero.
value |= 4; // reset cycle counter to zero.
}
if (enable_divider)
value |= 8; // enable "by 64" divider for CCNT. You really do not want to get all cycle count. This will increment the counter by 1 for every 64 cpu cycle.
value |= 16;
// program the performance-counter control-register with mask constructed above
asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value));
// enable all counters:
asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f));
// clear overflows:
asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x80000001));
// Select individual counter (0)
asm volatile ("MCR p15, 0, %0, c9 , c12 , 5\t\n":: "r"(0x00));
// Write event (0x11 = Cycle count)
asm volatile ("MCR p15, 0, %0, c9 , c13 , 1\t\n":: "r"(0xD));
printf("Hi");
unsigned int output;
// Read current event counter
asm volatile ("MRC p15, 0, %0, c9 , c13 , 2\t\n": "=r"(output));
printf("Event count 0: %ul\n", output);
printf("Normal Execution, No Buffer Overflow Occurred.\n");
return 0;
}
However unfortunately what you get is not only your program cpu cycle, but entire system cpu cycle!. So what I recommend is to use perf.
Write your asm code in an inline assembly code in C and then put it like this:
int dummya(int z, int b){
//This is my function you need to change it for yourself
struct perf_event_attr pe;
long long count;
int fd;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_CPU_CYCLES;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
//From here the counter starts.
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
//Disabling Counter
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("%lld\n", count);
close(fd);
return 5;
}
And be advised you need a new kernels to access the Perf driver.

Linux module: performance counter does not work

I want to monitor the cache request number in the last level cache. I wrote a Linux module to get that information based on the tutorial here.
It can compile and run, but the output result is always 0. In other words, when I use rdmsr, it always give me edx=0, eax=0. I even tried the demo code in the tutorial, the output is still 0.
I'm stuck at this problem for a whole week. Could anyone help me point out the mistake I made in the program?
I knew there are some existing programs doing the same thing, but I have to know how to write the code by myself, because I want to monitor the cache request in Xen hypervisor. I cannot use those tools in Xen unless I incorporate the tools into Xen's hypervisor, which seems more work.
/*
* Record the cache miss rate of Intel Sandybridge cpu
* To confirm the event is correctly set!
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
/*4 Performance Counters Selector for %ecx in insn wrmsr*/
#define PERFEVTSEL0 0x186
#define PERFEVTSEL1 0x187
#define PERFEVTSEL2 0x188
#define PERFEVTSEL3 0x189
/*4 MSR Performance Counter for the above selector*/
#define PMC0 0xc1
#define PMC1 0xc2
#define PMC2 0xc2
#define PMC3 0xc3
/*Intel Software Developer Manual Page 2549*/ /*L1I L1D cache events has not been confirmed!*/
/*L1 Instruction Cache Performance Tuning Events*/
#define L1I_ALLHIT_EVENT 0x80
#define L1I_ALLHIT_MASK 0x01
#define L1I_ALLMISS_EVENT 0x80 /*confirmed*/
#define L1I_ALLMISS_MASK 0x02 /*confirmed*/
/*L1 Data Cache Performance Tuning Events*/
/*Intel does not have the ALLREQ Miss mask; have to add LD_miss and ST_miss*/
#define L1D_ALLREQ_EVENT 0x43
#define L1D_ALLREQ_MASK 0x01
#define L1D_LDMISS_EVENT 0x40
#define L1D_LDMISS_MASK 0x01
#define L1D_STMISS_EVENT 0x28
#define L1D_STMISS_MASK 0x01
/*L2 private cache for each core*/ /*confirmed*/
#define L2_ALLREQ_EVENT 0x24
#define L2_ALLREQ_MASK L2_ALLCODEREQ_MASK /*0xFF*/
#define L2_ALLMISS_EVENT 0x24
#define L2_ALLMISS_MASK L2_ALLCODEMISS_MASK /*0xAA*/
#define L2_ALLCODEREQ_MASK 0x30
#define L2_ALLCODEMISS_MASK 0x20
/*L3 shared cache*/ /*confirmed*/
/*Use the last level cache event and mask*/
#define L3_ALLREQ_EVENT 0x2E
#define L3_ALLREQ_MASK 0x4F
#define L3_ALLMISS_EVENT 0x2E
#define L3_ALLMISS_MASK 0x41
#define USR_BIT (0x01UL << 16)
#define OS_BIT (0x01UL << 17)
#define SET_MSR_USR_BIT(eax) eax |= USR_BIT
#define CLEAR_MSR_USR_BIT(exa) eax &= (~USR_BIT)
#define SET_MSR_OS_BIT(eax) eax |= OS_BIT
#define CLEAR_MSR_OS_BIT(eax) eax &= (~OS_BIT)
#define SET_EVENT_MASK(eax, event, umask) eax |= (event | (umask << 8))
/*MSR EN flag: when set start the counter!*/
//#define MSR_ENFLAG (0x1<<22)
#define MSR_ENFLAG (0x1<<22)
/* 32bit insn v3*/
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx)
{
/*clear counter first*/
__asm__ __volatile__ ("movl %0, %%ecx\n\t"
"xorl %%edx, %%edx\n\t"
"xorl %%eax, %%eax\n\t"
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx)
: "eax", "ecx", "edx" /* all clobbered */);
eax |= MSR_ENFLAG;
__asm__("movl %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
"xorl %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
"movl %1, %%eax\n\t" /* eax contains the log bits to set the MSR to */
"wrmsr\n\t"
: /* no outputs */
: "m" (ecx), "m" (eax)
: "eax", "ecx", "edx" /* clobbered */);
}
static inline void rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx)
{ __asm__ __volatile__(\
"rdmsr"\
:"=d" (*edx), "=a" (*eax)\
:"c"(*ecx)
);
}
static inline void delay(void )
{
char tmp[1000];
int i;
for( i = 0; i < 1000; i++ )
{
tmp[i] = i * 2;
}
}
enum cache_level
{
UOPS,
L1I,
L1D,
L2,
L3
};
int init_module(void)
{
enum cache_level op;
uint32_t eax, edx, ecx;
uint64_t l3_all;
op = UOPS;
switch(op)
{
case UOPS:
eax = 0x0001010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}
/*
* A non 0 return means init_module failed; module can't be loaded.
*/
return 0;
}
void cleanup_module(void)
{
printk(KERN_INFO "Goodbye world 1.\n");
}
The result I have is:
[ 1780.946584] UOPS Demo: write_msr: eax=0x0001010e, ecx=0x00000187
[ 1780.946590] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
[ 1818.595055] Goodbye world 1.
[ 1821.153947] UOPS Demo: write_msr: eax=0x0041010e, ecx=0x00000187
[ 1821.153950] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000
I finally solve it with the help of #Manuel Selva!
The correct flow of setting a perf. counter is:
Step 1: set msr and enable the counter by setting the EN bit in eax;
Step 2: stop the counter by writing to msr
Step 3: read the counter
I missed the step 2, that's why it always gives me 0. It makes sense to report 0 if I want to read the counter before stopping it.
The correct code of the switch statement is as follows:
switch(op)
{
case UOPS:
eax = 0x0051010E;
eax |= MSR_ENFLAG;
ecx = 0x187;
printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
//stop counting
eax = 0x0011010E;
rtxen_write_msr(eax,ecx);
ecx = 0xc2;
eax = 1;
edx = 2;
rtxen_read_msr(&ecx, &eax, &edx);
printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
break;
case L3:
eax = 0;
SET_MSR_USR_BIT(eax);
SET_MSR_OS_BIT(eax);
SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
eax |= MSR_ENFLAG;
eax |= (1<<20); //INT bit: counter overflow
ecx = PERFEVTSEL2;
printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
printk(KERN_INFO "L3 all request set MSR PMC2\n");
printk(KERN_INFO "delay by access an array\n");
delay();
eax &= (~MSR_ENFLAG);
rtxen_write_msr(eax, ecx);
printk(KERN_INFO "stop the counter, eax=%#010x\n", eax);
ecx = PMC2;
eax = 1;
edx = 2;
printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
l3_all = ( ((uint64_t) edx << 32) | eax );
printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
break;
default:
printk(KERN_INFO "operation not implemented yet\n");
}

Resources