System call hooking example arguments are incorrect - c

I wrote an example of system call hooking from our Linux Kernel module.
Updated open system call in system call table to use my entry point instead of the default.
#include <linux/module.h>
#include <linux/kallsyms.h>
MODULE_LICENSE("GPL");
char *sym_name = "sys_call_table";
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *sys_call_table;
typedef asmlinkage long (*custom_open) (const char __user *filename, int flags, umode_t mode);
custom_open old_open;
static asmlinkage long my_open(const char __user *filename, int flags, umode_t mode)
{
char user_msg[256];
pr_info("%s\n",__func__);
memset(user_msg, 0, sizeof(user_msg));
long copied = strncpy_from_user(user_msg, filename, sizeof(user_msg));
pr_info("copied:%ld\n", copied);
pr_info("%s\n",user_msg);
return old_open(filename, flags, mode);
}
static int __init hello_init(void)
{
sys_call_table = (sys_call_ptr_t *)kallsyms_lookup_name(sym_name);
old_open = (custom_open)sys_call_table[__NR_open];
// Temporarily disable write protection
write_cr0(read_cr0() & (~0x10000));
sys_call_table[__NR_open] = (sys_call_ptr_t)my_open;
// Re-enable write protection
write_cr0(read_cr0() | 0x10000);
return 0;
}
static void __exit hello_exit(void)
{
// Temporarily disable write protection
write_cr0(read_cr0() & (~0x10000));
sys_call_table[__NR_open] = (sys_call_ptr_t)old_open;
// Re-enable write protection
write_cr0(read_cr0() | 0x10000);
}
module_init(hello_init);
module_exit(hello_exit);
I wrote a simple user program to verify.
#define _GNU_SOURCE
#include <sys/syscall.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
int main(int argc, char *argv[])
{
int fd = syscall(__NR_open, "hello.txt", O_RDWR|O_CREAT, 0777);
exit(EXIT_SUCCESS);
}
File gets created in my folder, but strncpy_user fails with bad address
[ 927.415905] my_open
[ 927.415906] copied:-14
What is the mistake in the above code?

OP is probably using a kernel/architecture that uses "syscall wrappers" where the system call table contains a wrapper function that calls the real syscall function (possibly as an inline function call). The x86_64 architecture has used syscall wrappers since kernel version 4.17.
For x86_64 on kernel 4.17 or later, sys_call_table[__NR_open] points to __x64_sys_open (with prototype asmlinkage long __x64_sys_open(const struct pt_regs *regs)), which calls static function __se_sys_open (with prototype static long __se_sys_open(const __user *filename, int flags, umode_t mode)), which calls inline function __do_sys_open (with prototype static inline long __do_sys_open(const __user *filename, int flags, umode_t mode). Those will all be defined by the SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) macro call in "fs/open.c" and the function body that follows the macro call.
SYSCALL_DEFINE3 is defined in "include/linux/syscalls.h" and uses the SYSCALL_DEFINEx macro in the same file, which uses the __SYSCALL_DEFINEx macro. Since x86_64 defines CONFIG_ARCH_HAS_SYSCALL_WRAPPER, the __SYSCALL_DEFINEx macro is defined by #include <asm/syscall_wrapper.h>, which maps to "arch/x86/include/asm/syscall_wrapper.h".
For background on this change, see
LWN: use struct pt_regs based syscall calling for x86-64
LKML: [PATCH 000/109] remove in-kernel calls to syscalls https://lkml.org/lkml/2018/3/29/409
It seems the motivation is to only pass a pointer to pt_regs, instead of having a bunch of user-space values in registers down the call chain. (Perhaps to increase resistance to Spectre attacks by making gadgets less useful?)
Why open still worked, even though the wrapper didn't:
If OP is indeed using x86_64 kernel 4.17 or later, and replacing the sys_call_table[__NR_open] entry with a pointer to a function that uses a different prototype and calls the original function (pointed to by old_open) with the same parameters, that explains why the call to strncpy_from_user(user_msg, filename, sizeof(user_msg)) failed. Although declared as const char * __user filename, the filename pointer is actually pointing to the original struct pt_regs in kernel space.
In the subsequent call to old_open(filename, flags, mode), the first parameter filename is still pointing to the original struct pt_regs so the old function (which expects a single parameter of type struct pt_regs *) still works as expected.
i.e. the function passed on its first pointer arg unchanged, despite calling it a different type.

Update: Below is working code, Thanks everyone for providing inputs
#include <linux/module.h>
#include <linux/kallsyms.h>
MODULE_LICENSE("GPL");
char *sym_name = "sys_call_table";
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *sys_call_table;
sys_call_ptr_t old_open;
static asmlinkage long my_open(const struct pt_regs *regs)
{
char __user *filename = (char *)regs->di;
char user_filename[256] = {0};
long copied = strncpy_from_user(user_filename, filename, sizeof(user_filename));
if (copied > 0)
pr_info("%s filename:%s\n",__func__, user_filename);
return old_open(regs);
}
static int __init hello_init(void)
{
sys_call_table = (sys_call_ptr_t *)kallsyms_lookup_name(sym_name);
old_open = sys_call_table[__NR_open];
// Temporarily disable write protection
write_cr0(read_cr0() & (~0x10000));
sys_call_table[__NR_open] = my_open;
// Re-enable write protection
write_cr0(read_cr0() | 0x10000);
return 0;
}
static void __exit hello_exit(void)
{
// Temporarily disable write protection
write_cr0(read_cr0() & (~0x10000));
sys_call_table[__NR_open] = old_open;
// Re-enable write protection
write_cr0(read_cr0() | 0x10000);
}
module_init(hello_init);
module_exit(hello_exit);

Related

How to Override A C System Call?

So the problem is the following. The project needs to intercept all file IO
operations, like open() and close(). I am trying to add printf() before calling the corresponding open() or close(). I am not supposed to rewrite the source code by changing open() or close() to myOpen() or myClose() for example. I have been trying to use LD_PRELOAD environment variable. But the indefinite loop problem came up. My problem is like this one.
int open(char * path,int flags,int mode)
{
// print file name
printf("open :%s\n",path);
return __open(path,flags,mode);
}
Yes, you want LD_PRELOAD.
You need to create a shared library (.so) that has code for all functions that you want to intercept. And, you want to set LD_PRELOAD to use that shared library
Here is some sample code for the open function. You'll need to do something similar for each function you want to intercept:
#define _GNU_SOURCE
#include <dlfcn.h>
int
open(const char *file,int flags,int mode)
{
static int (*real_open)(const char *file,int flags,int mode) = NULL;
int fd;
if (real_open == NULL)
real_open = dlsym(RTLD_NEXT,"open");
// do whatever special stuff ...
fd = real_open(file,flags,mode);
// do whatever special stuff ...
return fd;
}
I believe RTLD_NEXT is easiest and may be sufficient. Otherwise, you could add a constructor that does dlopen once on libc
UPDATE:
I am not familiar with C and I got the following problems with gcc. "error: 'NULL' undeclared (first use in this function)",
This is defined by several #include files, so try #include <stdio.h>. You'll need that if you want to call printf.
"error: 'RTLD_NEXT' undeclared (first use in this function)",
That is defined by doing #include <dlfcn.h> [as shown in my example]
and "symbol lookup error: ./hack_stackoverflow.so: undefined symbol: dlsym".
From man dlsym, it says: Link with -ldl So, add -ldl to the line that builds your .so.
Also, you have to be careful to prevent infinite recursion if the "special stuff" does something that loops back on your intercept function.
Notably, you want to call printf. If you intercept the write syscall, bad things may happen.
So, you need to keep track of when you're already in one of your intercept functions and not do anything special if already there. See the in_self variable.
#define _GNU_SOURCE
#include <stdio.h>
#include <dlfcn.h>
ssize_t
write(int fd,const void *buf,size_t len)
{
static ssize_t (*real_write)(int fd,const void *buf,size_t len) = NULL;
static int in_self = 0;
ssize_t err;
if (real_write == NULL)
real_write = dlsym(RTLD_NEXT,"write");
++in_self;
if (in_self == 1)
printf("mywrite: fd=%d buf=%p len=%ld\n",fd,buf,len);
err = real_write(fd,buf,len);
if (in_self == 1)
printf("mywrite: fd=%d buf=%p err=%ld\n",fd,buf,err);
--in_self;
return err;
}
The above works okay for single threaded programs/environments, but if you're intercepting an arbitrary one, it could be multithreaded.
So, we'd have to initialize all the real_* pointers in a constructor. This is a function with a special attribute that tells the dynamic loader to call the function ASAP automatically.
And, we have to put in_self into thread local storage. We do this by adding the __thread attribute.
You may need to link with -lpthread as well as -ldl for the multithreaded version.
Edit: We also have to preserve the correct errno value
Putting it all together:
#define _GNU_SOURCE
#include <stdio.h>
#include <dlfcn.h>
#include <errno.h>
static int (*real_open)(const char *file,int flags,int mode) = NULL;
static ssize_t (*real_write)(int fd,const void *buf,size_t len) = NULL;
__attribute__((constructor))
void
my_lib_init(void)
{
real_open = dlsym(RTLD_NEXT,"open");
real_write = dlsym(RTLD_NEXT,"write");
}
int
open(const char *file,int flags,int mode)
{
int fd;
// do whatever special stuff ...
fd = real_open(file,flags,mode);
// do whatever special stuff ...
return fd;
}
ssize_t
write(int fd,const void *buf,size_t len)
{
static int __thread in_self = 0;
int sverr;
ssize_t ret;
++in_self;
if (in_self == 1)
printf("mywrite: fd=%d buf=%p len=%ld\n",fd,buf,len);
ret = real_write(fd,buf,len);
// preserve errno value for actual syscall -- otherwise, errno may
// be set by the following printf and _caller_ will get the _wrong_
// errno value
sverr = errno;
if (in_self == 1)
printf("mywrite: fd=%d buf=%p ret=%ld\n",fd,buf,ret);
--in_self;
// restore correct errno value for write syscall
errno = sverr;
return ret;
}

How to use statx syscall?

Ubuntu 18.04
I'm trying to use statx syscall introduced in the Linux Kernel 4.11. There is a manual entry:
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h> /* Definition of AT_* constants */
int statx(int dirfd, const char *pathname, int flags,
unsigned int mask, struct statx *statxbuf);
So I tried to write an example by myself:
const char *dir_path = NULL;
const char *file_path = NULL;
//read from command line arguments
int dir_fd = open(dir_path, O_DIRECTORY);
struct statx st; //<--------------------------- compile error
statx(dir_fd, file_path, 0, &statx);
But it simply does not compile. The error is the sizeof(statx) is unknown. And actually it is not defined in sys/stat.h, but in linux/stat.h which is not included by sys/stat.h. But after including linux/stat.h the problem is there is no definition for
int statx(int dirfd, const char *pathname, int flags,
unsigned int mask, struct statx *statxbuf);
I expected that since
$ uname -r
4.15.0-39-generic
and 4.15.0-39-generic newer than 4.11 I can use it.
What's wrong?
Currently as the glibc does not provide a wrapper for the statx call, you have to use your kernels definitions. So either copy the statx structure definition from your kernel or just use it from the API the linux kernel provides. The struct statx is currently defined in linux/stat.h.
linux provides a example call to statx available here.
#update library support was added in glibc 2.28

C - Linux Kernel - Assistance with current_uid()

I have been working on part of an assignment which I am having trouble fixing. The requirement was to intercept the system call open and to replace it with a new system open call only for regular users and would print out the user id and filename in the system log. Otherwise it would just execute the standard system open call. Here is part of what I have that is troubling me:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/syscalls.h>
unsigned long **sys_call_table;
asmlinkage long (*ref_sys_open)(const char *filename, int flags, umode_t mode);
asmlinkage long (*ref_sys_close)(unsigned int fd);
asmlinkage long new_sys_open(const char *filename, int flags, umode_t mode) {
if (current_uid() >= 1000) {
printk(KERN_INFO "User %d is opening file: %s\n", current_uid(), filename);
} else {
(*ref_sys_open)(filename, flags, mode);
}
return 0;
}
The problem I am getting is that the returning value of current_uid() is a struct with type kuid_t. I looked into it further and found that the struct looks like this:
typedef struct {
uid_t val;
} kuid_t;
I was wondering how do I compare int 1000 to type uid_t val?
On a side note, did I call the old versions of the system call correctly?
from here, uid_t is just a typedef of __kernel_uid32_t, which is unsigned int according to here

linux kernel module assignment makes integer from pointer without a cast

this is simple sys_call_table hooking code
#include <asm/unistd.h>
#include <linux/autoconf.h>
#include <linux/in.h>
#include <linux/init_task.h>
#include <linux/ip.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/tcp.h>
#include <linux/types.h>
#include <linux/unistd.h>
#include <linux/version.h>
#include <linux/workqueue.h>
ssize_t *sys_call_table = (ssize_t *)0xc0026e04;
asmlinkage ssize_t (*orig_open)(const char *pathname, int flags);
asmlinkage ssize_t hacked_open(const char *pathname, int flags)
{
printk(KERN_INFO "SYS_OPEN called : %s\n", pathname);
return orig_open(pathname, flags);
}
int init_module(void)
{
orig_open = sys_call_table[__NR_open]; /* line 33 */
sys_call_table[__NR_open] = hacked_open; /* line 34 */
return 0;
}
void cleanup_module(void)
{
sys_call_table[__NR_open] = orig_open; /* line 40 */
}
MODULE_LICENSE("GPL");
i got an warning like below
this code works fine but i want to delete warnings. how can i do?
/home/tester/tools/lkm/a.c: In function 'init_module':
/home/tester/tools/lkm/a.c:33: warning: assignment makes integer from pointer without a cast
/home/tester/tools/lkm/a.c:34: warning: assignment makes integer from pointer without a cast
/home/tester/tools/lkm/a.c: In function 'cleanup_module':
/home/tester/tools/lkm/a.c:40: warning: assignment makes integer from pointer without a cast
If you want to silence your compiler, you have to add typecasts (even if it is often a bad idea, this is how your compiler turns it).
ssize_t *sys_call_table = (ssize_t *)0xc0026e04;
typedef ssize_t (*ftype)(const char *, int);
ftype orig_open;
ssize_t hacked_open(const char *pathname, int flags)
{
printf("SYS_OPEN called : %s\n", pathname);
return orig_open(pathname, flags);
}
int init_module(void)
{
orig_open = (ftype)sys_call_table[__NR_open];
sys_call_table[__NR_open] = (ssize_t)hacked_open;
return 0;
}
When you look at line 33 you will see the problem:
orig_open = sys_call_table[__NR_open];
You have defined sys_call_table to be a pointer to integer. That's the reason for the warning.
Same problem with the other lines. If you define sys_call_table properly, the warnings will go away.
You should at least define it as an array of pointers or pointer to pointers, because if ssize_t is only 32 bit on 64 bit system, you might truncate the 64 bit addresses to a 32 bit integer.

Linux Kernel: System call hooking example

I'm trying to write some simple test code as a demonstration of hooking the system call table.
"sys_call_table" is no longer exported in 2.6, so I'm just grabbing the address from the System.map file, and I can see it is correct (Looking through the memory at the address I found, I can see the pointers to the system calls).
However, when I try to modify this table, the kernel gives an "Oops" with "unable to handle kernel paging request at virtual address c061e4f4" and the machine reboots.
This is CentOS 5.4 running 2.6.18-164.10.1.el5. Is there some sort of protection or do I just have a bug? I know it comes with SELinux, and I've tried putting it in to permissive mode, but it doesn't make a difference
Here's my code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
// Hook: Crashes here
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
I finally found the answer myself.
http://www.linuxforums.org/forum/linux-kernel/133982-cannot-modify-sys_call_table.html
The kernel was changed at some point so that the system call table is read only.
cypherpunk:
Even if it is late but the Solution
may interest others too: In the
entry.S file you will find: Code:
.section .rodata,"a"
#include "syscall_table_32.S"
sys_call_table -> ReadOnly You have to
compile the Kernel new if you want to
"hack" around with sys_call_table...
The link also has an example of changing the memory to be writable.
nasekomoe:
Hi everybody. Thanks for replies. I
solved the problem long ago by
modifying access to memory pages. I
have implemented two functions that do
it for my upper level code:
#include <asm/cacheflush.h>
#ifdef KERN_2_6_24
#include <asm/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int set_page_ro(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ;
return change_page_attr(pg, 1, prot);
}
#else
#include <linux/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(_addr, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(_addr, 1);
}
#endif // KERN_2_6_24
Here's a modified version of the original code that works for me.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
set_page_rw(sys_call_table);
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
Thanks Stephen, your research here was helpful to me. I had a few problems, though, as I was trying this on a 2.6.32 kernel, and getting WARNING: at arch/x86/mm/pageattr.c:877 change_page_attr_set_clr+0x343/0x530() (Not tainted) followed by a kernel OOPS about not being able to write to the memory address.
The comment above the mentioned line states:
// People should not be passing in unaligned addresses
The following modified code works:
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
Note that this still doesn't actually set the page as read/write in some situations. The static_protections() function, which is called inside of set_memory_rw(), removes the _PAGE_RW flag if:
It's in the BIOS area
The address is inside .rodata
CONFIG_DEBUG_RODATA is set and the kernel is set to read-only
I found this out after debugging why I still got "unable to handle kernel paging request" when trying to modify the address of kernel functions. I was eventually able to solve that problem by finding the page table entry for the address myself and manually setting it to writable. Thankfully, the lookup_address() function is exported in version 2.6.26+. Here is the code I wrote to do that:
void set_addr_rw(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
if (pte->pte &~ _PAGE_RW) pte->pte |= _PAGE_RW;
}
void set_addr_ro(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
pte->pte = pte->pte &~_PAGE_RW;
}
Finally, while Mark's answer is technically correct, it'll case problem when ran inside Xen. If you want to disable write-protect, use the read/write cr0 functions. I macro them like this:
#define GPF_DISABLE write_cr0(read_cr0() & (~ 0x10000))
#define GPF_ENABLE write_cr0(read_cr0() | 0x10000)
Hope this helps anyone else who stumbles upon this question.
Note that the following will also work instead of using change_page_attr and cannot be depreciated:
static void disable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (value & 0x00010000) {
value &= ~0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
static void enable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (!(value & 0x00010000)) {
value |= 0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
If you are dealing with kernel 3.4 and later (it can also work with earlier kernels, I didn't test it) I would recommend a smarter way to acquire the system callы table location.
For example
#include <linux/module.h>
#include <linux/kallsyms.h>
static unsigned long **p_sys_call_table;
/* Aquire system calls table address */
p_sys_call_table = (void *) kallsyms_lookup_name("sys_call_table");
That's it. No addresses, it works fine with every kernel I've tested.
The same way you can use a not exported Kernel function from your module:
static int (*ref_access_remote_vm)(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write);
ref_access_remote_vm = (void *)kallsyms_lookup_name("access_remote_vm");
Enjoy!
As others have hinted, the whole story is a bit different now on modern kernels. I'll be covering x86-64 here, for syscall hijacking on modern arm64 refer to this other answer of mine. Also NOTE: this is plain and simple syscall hijacking. Non-invasive hooking can be done in a much nicer way using kprobes.
Since Linux v4.17, x86 (both 64 and 32 bit) now uses syscall wrappers that take a struct pt_regs * as the only argument (see commit 1, commit 2). You can see arch/x86/include/asm/syscall.h for the definitions.
Additionally, as others have described already in different answers, the simplest way to modify sys_call_table is to temporarily disable CR0 WP (Write-Protect) bit, which could be done using read_cr0() and write_cr0(). However, since Linux v5.3, [native_]write_cr0 will check sensitive bits that should never change (like WP) and refuse to change them (commit). In order to work around this, we need to write CR0 manually using inline assembly.
Here is a working kernel module (tested on Linux 5.10 and 5.18) that does syscall hijacking on modern Linux x86-64 considering the above caveats and assuming that you already know the address of sys_call_table (if you also want to find that in the module, see Proper way of getting the address of non-exported kernel symbols in a Linux kernel module):
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/**
* Test syscall table hijacking on x86-64. This module will replace the `read`
* syscall with a simple wrapper which logs every invocation of `read` using
* printk().
*
* Tested on Linux x86-64 v5.10, v5.18.
*
* Usage:
*
* sudo cat /proc/kallsyms | grep sys_call_table # grab address
* sudo insmod syscall_hijack.ko sys_call_table_addr=0x<address_here>
*/
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <asm/special_insns.h> // {read,write}_cr0()
#include <asm/processor-flags.h> // X86_CR0_WP
#include <asm/unistd.h> // __NR_*
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *real_sys_call_table;
static sys_call_ptr_t original_read;
static unsigned long sys_call_table_addr;
module_param(sys_call_table_addr, ulong, 0);
MODULE_PARM_DESC(sys_call_table_addr, "Address of sys_call_table");
// Since Linux v5.3 [native_]write_cr0 won't change "sensitive" CR0 bits, need
// to re-implement this ourselves.
static void write_cr0_unsafe(unsigned long val)
{
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
}
static long myread(const struct pt_regs *regs)
{
pr_info("read(%ld, 0x%lx, %lx)\n", regs->di, regs->si, regs->dx);
return original_read(regs);
}
static int __init modinit(void)
{
unsigned long old_cr0;
real_sys_call_table = (typeof(real_sys_call_table))sys_call_table_addr;
pr_info("init\n");
// Temporarily disable CR0 WP to be able to write to read-only pages
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Overwrite syscall and save original to be restored later
original_read = real_sys_call_table[__NR_read];
real_sys_call_table[__NR_read] = myread;
// Restore CR0 WP
write_cr0_unsafe(old_cr0);
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
unsigned long old_cr0;
pr_info("exit\n");
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Restore original syscall
real_sys_call_table[__NR_read] = original_read;
write_cr0_unsafe(old_cr0);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Test syscall table hijacking on x86-64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("Dual MIT/GPL");

Resources