dma_alloc_coherent crashes kernel module - c

I want to use DMA in a driver that I'm working on. The ultimate goal is to ensure that the data is in physical RAM and not hidden in a cache. For that I was trying to implement a simple test driver before merging it with my current project.
From what I understand I can setup a DMA mask and allocate a coherent buffer and then simply write to the virtual address. My other device can then read from the physical address. It would be nice if someone could confirm if that is indeed how it works.
Unfortunately the allocation fails and I can't decipher the syslogs for why that could be. I'm probably doing something wrong with the struct device. What is the device reference in dma_alloc_coherent used for?
Here's my current attempt. It's a modified dummy character device (note that I now know that it is complete garbage and not at all how you should do dma):
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/dma-mapping.h>
static char hello_world[]="Hello World\n";
static dev_t hello_dev_number;
static struct cdev *driver_object;
static struct class *hello_class;
static struct device *hello_dev;
int errorType;
void *virtAddr;
size_t size = 32;
dma_addr_t *physAddr;
int flag = GFP_KERNEL;
static ssize_t driver_read( struct file *instance, char __user *user, size_t count, loff_t *offset )
{
unsigned long not_copied, to_copy;
to_copy = min( count, strlen(hello_world)+1 );
not_copied=copy_to_user(user,hello_world,to_copy);
*offset += to_copy-not_copied;
return to_copy-not_copied;
}
static struct file_operations fops = {
.owner= THIS_MODULE,
.read= driver_read,
};
static int __init mod_init( void )
{
int debug;
printk("starting insertion");
if (alloc_chrdev_region(&hello_dev_number,0,1,"Hello")<0)
return -EIO;
driver_object = cdev_alloc();
if (driver_object==NULL){
errorType = EIO;
goto free_device_number;
}
driver_object->owner = THIS_MODULE;
driver_object->ops = &fops;
if (cdev_add(driver_object,hello_dev_number,1)){
errorType=EIO;
goto free_cdev;
}
hello_class = class_create( THIS_MODULE, "Hello" );
if (IS_ERR( hello_class )) {
pr_err( "hello: no udev support\n");
errorType=EIO;
goto free_cdev;
}
hello_dev = device_create( hello_class, NULL, hello_dev_number, NULL, "%s", "hello" );
if (IS_ERR( hello_dev )) {
pr_err( "hello: device_create failed\n");
errorType=EIO;
goto free_class;
}
printk("dma is happening");
debug = dma_set_mask_and_coherent(hello_dev, DMA_BIT_MASK(32));
printk("dma mask returns: %d", debug);
if(debug==0){
printk("setting mask failed");
errorType=EIO;
goto free_dev;
}
printk("goodbye crule world!");
virtAddr = dma_alloc_coherent(hello_dev, size, physAddr, flag);
printk("I'm actually alive");
if(virtAddr==NULL){
printk("virtual address null, dma failed!");
errorType=ENOMEM;
goto free_dev;
}
printk("success");
return 0;
free_dev:
device_destroy( hello_class, hello_dev_number );
free_class:
class_destroy( hello_class );
free_cdev:
kobject_put( &driver_object->kobj );
free_device_number:
unregister_chrdev_region( hello_dev_number, 1 );
return -errorType;
}
static void __exit mod_exit( void )
{
dma_free_coherent(hello_dev, size, virtAddr, *physAddr);
device_destroy( hello_class, hello_dev_number );
class_destroy( hello_class );
cdev_del( driver_object );
unregister_chrdev_region( hello_dev_number, 1 );
return;
}
module_init( mod_init );
module_exit( mod_exit );
MODULE_AUTHOR("ME");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Testing DMA.");
Here's the Makefile that I use:
obj-m+=dma_test.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
Running this on an ARMv7 AM335x Sitara CPU (Beaglebone Black), Linux Kernel 5.4.106-ti-r27
Here's the kernel log:
Mar 14 02:38:23 beaglebone kernel: [ 100.662640] starting insertion
Mar 14 02:38:23 beaglebone kernel: [ 100.667068] dma is happening
Mar 14 02:38:23 beaglebone kernel: [ 100.667087] dma mask returns: -5
Mar 14 02:38:23 beaglebone kernel: [ 100.667092] goodbye crule world!
Mar 14 02:38:23 beaglebone kernel: [ 100.667100] ------------[ cut here ]------------
Mar 14 02:38:23 beaglebone kernel: [ 100.667128] WARNING: CPU: 0 PID: 2360 at kernel/dma/mapping.c:272 dma_alloc_attrs+0x118/0x128
Mar 14 02:38:23 beaglebone kernel: [ 100.667134] Modules linked in: dma_test(O+) c_can_platform c_can can_dev evdev usb_f_acm u_serial usb_f_ecm usb_f_mass_storage usb_f_rndis u_ether libcomposite uio_pdrv_genirq(O) uio iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_filter dmatest(O) ip_tables x_tables icss_iep prueth_ecap spidev
Mar 14 02:38:23 beaglebone kernel: [ 100.667216] CPU: 0 PID: 2360 Comm: insmod Tainted: G O 5.4.106-ti-r27 #1buster
Mar 14 02:38:23 beaglebone kernel: [ 100.667222] Hardware name: Generic AM33XX (Flattened Device Tree)
Mar 14 02:38:23 beaglebone kernel: [ 100.667228] Backtrace:
Mar 14 02:38:23 beaglebone kernel: [ 100.667248] [<c0e37fd8>] (dump_backtrace) from [<c0e38390>] (show_stack+0x20/0x24)
Mar 14 02:38:23 beaglebone kernel: [ 100.667259] r7:600f0113 r6:c14e3154 r5:00000000 r4:c14e3154
Mar 14 02:38:23 beaglebone kernel: [ 100.667274] [<c0e38370>] (show_stack) from [<c0e49af0>] (dump_stack+0xb8/0xcc)
Mar 14 02:38:23 beaglebone kernel: [ 100.667289] [<c0e49a38>] (dump_stack) from [<c013c6cc>] (__warn+0xe0/0x108)
Mar 14 02:38:23 beaglebone kernel: [ 100.667299] r7:00000110 r6:00000009 r5:c01c7090 r4:c11194a8
Mar 14 02:38:23 beaglebone kernel: [ 100.667309] [<c013c5ec>] (__warn) from [<c0e38c00>] (warn_slowpath_fmt+0x70/0xd8)
Mar 14 02:38:23 beaglebone kernel: [ 100.667318] r7:00000110 r6:c11194a8 r5:c1405fc8 r4:00000000
Mar 14 02:38:23 beaglebone kernel: [ 100.667328] [<c0e38b94>] (warn_slowpath_fmt) from [<c01c7090>] (dma_alloc_attrs+0x118/0x128)
Mar 14 02:38:23 beaglebone kernel: [ 100.667339] r9:00000cc0 r8:00000000 r7:00000020 r6:dc78d600 r5:c1405fc8 r4:c0f01734
Mar 14 02:38:23 beaglebone kernel: [ 100.667368] [<c01c6f78>] (dma_alloc_attrs) from [<bf00c1b4>] (mod_init+0x1b4/0x1000 [dma_test])
Mar 14 02:38:23 beaglebone kernel: [ 100.667379] r9:bf0e00cc r8:d29cff30 r7:dc78d600 r6:fffffffb r5:00000000 r4:bf0e0300
Mar 14 02:38:23 beaglebone kernel: [ 100.667399] [<bf00c000>] (mod_init [dma_test]) from [<c0103268>] (do_one_initcall+0x50/0x2d0)
Mar 14 02:38:23 beaglebone kernel: [ 100.667408] r7:00000000 r6:bf0e01f0 r5:bf00c000 r4:c1405fc8
Mar 14 02:38:23 beaglebone kernel: [ 100.667423] [<c0103218>] (do_one_initcall) from [<c01f0878>] (do_init_module+0x70/0x274)
Mar 14 02:38:23 beaglebone kernel: [ 100.667433] r8:d29cff30 r7:bf0e00c0 r6:bf0e01f0 r5:db20cf40 r4:bf0e00c0
Mar 14 02:38:23 beaglebone kernel: [ 100.667444] [<c01f0808>] (do_init_module) from [<c01f2adc>] (load_module+0x1f64/0x2370)
Mar 14 02:38:23 beaglebone kernel: [ 100.667452] r6:bf0e01f0 r5:00000000 r4:bf0e01c0
Mar 14 02:38:23 beaglebone kernel: [ 100.667463] [<c01f0b78>] (load_module) from [<c01f318c>] (sys_finit_module+0xc0/0x110)
Mar 14 02:38:23 beaglebone kernel: [ 100.667474] r10:0000017b r9:d29ce000 r8:c0101204 r7:0043c7e0 r6:00000003 r5:00000000
Mar 14 02:38:23 beaglebone kernel: [ 100.667480] r4:c1405fc8
Mar 14 02:38:23 beaglebone kernel: [ 100.667491] [<c01f30cc>] (sys_finit_module) from [<c0101000>] (ret_fast_syscall+0x0/0x54)
Mar 14 02:38:23 beaglebone kernel: [ 100.667499] Exception stack(0xd29cffa8 to 0xd29cfff0)
Mar 14 02:38:23 beaglebone kernel: [ 100.667511] ffa0: 3d6d5800 00000000 00000003 0043c7e0 00000000 bece2578
Mar 14 02:38:23 beaglebone kernel: [ 100.667522] ffc0: 3d6d5800 00000000 00000000 0000017b 015347e0 00000000 bece26f8 00000000
Mar 14 02:38:23 beaglebone kernel: [ 100.667531] ffe0: bece2528 bece2518 00434e41 b6cbbd92
Mar 14 02:38:23 beaglebone kernel: [ 100.667540] r7:0000017b r6:00000000 r5:00000000 r4:3d6d5800
Mar 14 02:38:23 beaglebone kernel: [ 100.667547] ---[ end trace 870c2d2ad09e80fa ]---
Mar 14 02:38:23 beaglebone kernel: [ 100.667565] Hello hello: coherent DMA mask is unset
Mar 14 02:38:23 beaglebone kernel: [ 100.667571] I'm actually alive

You need to allocate space for the dma_addr_t value.
- dma_addr_t *physAddr;
+ dma_addr_t physAddr;
- virtAddr = dma_alloc_coherent(hello_dev, size, physAddr, flag);
+ virtAddr = dma_alloc_coherent(hello_dev, size, &physAddr, flag);
- dma_free_coherent(hello_dev, size, virtAddr, *physAddr);
+ dma_free_coherent(hello_dev, size, virtAddr, physAddr);
Here is an online example and looking through the source for callers of dma_alloc_coherent() should confirm this.
The API is this way as 'C' does not allow multiple return values. In C++, it might have been a reference. To most kernel developers that have a breadth of experience with 'C', this would seem second nature.
How did I deduce this? This helps,
[<c013c5ec>] (__warn) from [<c0e38c00>] (warn_slowpath_fmt+0x70/0xd8)
[<c01c7090>] (dma_alloc_attrs+0x118/0x128)
[<bf00c1b4>] (mod_init+0x1b4/0x1000 [dma_test])
A call to warn was issued so some parameter seemed to be wrong.

Besides the issue pointed by artless noise, your codes still had 2 bugs.
dev->dma_mask is NULL, so dma_set_mask_and_coherent() returns -5 (-EIO). So you need to add something as follows into between device_create() and dma_set_mask_and_coherent().
u64 dma_mask = DMA_BIT_MASK(32);
hello_dev->dma_mask = &dma_mask;
dma_set_mask_and_coherent() returns non-zero on errors. So you need to change the code to if (debug)

Related

register_reboot_notifier doesn't notify

Playing with LKMs (out-of-tree) I'm trying to perform some work before system reboot using register_reboot_notifier, but cant make even this simple code to work:
#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/reboot.h>
static int notify_me(struct notifier_block *nb, unsigned long action, void *data) {
printk(KERN_INFO "Hey! I'm your reboot notification!\n");
return NOTIFY_OK;
}
static struct notifier_block reboot_notifier = {
.notifier_call = notify_me,
.next = NULL,
.priority = 0,
};
static int __init notifier_init(void) {
register_reboot_notifier(&reboot_notifier);
printk(KERN_INFO "Reboot notifier registered\n");
return 0;
}
static void __exit notifier_exit(void) {
unregister_reboot_notifier(&reboot_notifier);
}
MODULE_LICENSE("GPL");
module_init(notifier_init);
module_exit(notifier_exit);
The module successfully compiles and loads indicating that the notifier is registered
# insmod reboot_notifier.ko
# dmesg
...
Jun 09 13:26:09 ubuntu kernel: Reboot notifier registered
# reboot
, but the "Hey! I'm your reboot notification!\n" message never appears in logs after reboot:
# journalctl -n 10000 | grep kernel | grep notif
Jun 09 13:26:09 ubuntu kernel: reboot_notifier: loading out-of-tree module taints kernel.
Jun 09 13:26:09 ubuntu kernel: reboot_notifier: module verification failed: signature and/or required key missing - tainting kernel
Jun 09 13:26:09 ubuntu kernel: Reboot notifier registered
Tested it on different systems/kernels:
Linux centos 3.10.0-1160.59.1.el7.x86_64 #1 SMP
Linux ubuntu 5.15.0-37-generic #39-Ubuntu SMP
Linux desktop 5.15.41-1-MANJARO #1 SMP PREEMPT
Still no luck. Google doesn't help me either.
Here is a full journalctl -k -b -1 output after reboot.

lkm func hijacking BUG

I've written a little linux kernel module, to see, how nowadays implement kernel function hijacking.
https://pastebin.com/99YJFnaq
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/time.h>
#include <linux/preempt.h>
#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>
#define BUFFER_SIZE 512
#define MODULE_NAME "hacked_read"
#define dbg( format, arg... ) do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... ) pr_err( MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )
MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.1" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <mail#domain.com>" );
static char debug_buffer[ BUFFER_SIZE ];
unsigned long ( *original_read ) ( unsigned int, char *, size_t );
void **sct;
unsigned long icounter = 0;
static inline void rw_enable( void ) {
asm volatile ( "cli \n"
"pushq %rax \n"
"movq %cr0, %rax \n"
"andq $0xfffffffffffeffff, %rax \n"
"movq %rax, %cr0 \n"
"popq %rax " );
}
static inline uint64_t getcr0(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr0, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void rw_disable( register uint64_t val ) {
asm volatile(
"movq %0, %%cr0\n"
"sti "
:
:"r"(val)
);
}
static void* find_sym( const char *sym ) {
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
} else return 0;
};// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
unsigned long r = 1;
if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
return original_read( fd, buf, count );
} else {
icounter++;
if ( icounter % 1000 == 0 ) {
info( "test2 icounter = %ld\n", icounter );
info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
}
r = original_read( fd, buf, count );
strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}
int hacked_read_init( void ) {
register uint64_t cr0;
info( "Module was loaded\n" );
sct = find_sym( "sys_call_table" );
original_read = (void *)sct[ __NR_read ];
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = hacked_read_test;
rw_disable( cr0 );
return 0;
}
void hacked_read_exit( void ) {
register uint64_t cr0;
info( "Module was unloaded\n" );
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = original_read;
rw_disable( cr0 );
}
module_init( hacked_read_init );
module_exit( hacked_read_exit );
Makefile:
CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)
TARGET = hacked_read
obj-m := $(TARGET).o
default:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
#rm -f *.o .*.cmd .*.flags *.mod.c *.order
#rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
#rm -fR .tmp*
#rm -rf .tmp_versions
Thereafter, I'm making the module and inserting it.
Of-course, the better way is to do it - inside qemu machine. I'm using default Kali 2018.1 installed on image hdd.qcow2 [30Gb]. Kernel 4.14.13 is a default kernel built by me with DEBUG flags:
# diff /boot/config-4.14.13 /boot/config-4.14.0-kali3-amd64
3c3
< # Linux/x86_64 4.14.13 Kernel Configuration
---
> # Linux/x86 4.14.12 Kernel Configuration
7620c7620
< CONFIG_GDB_SCRIPTS=y
---
> # CONFIG_GDB_SCRIPTS is not set
7652,7655c7652
< CONFIG_DEBUG_KMEMLEAK=y
< CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=400
< CONFIG_DEBUG_KMEMLEAK_TEST=m
< # CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
---
> # CONFIG_DEBUG_KMEMLEAK is not set
CONFIG_DEBUG_KMEMLEAK - is useless on amd64, so there is only CONFIG_GDB_SCRIPTS plays a role.
Back to the game:
# make
# cp hacked_read.ko /lib/modules/4.14.13/hacked_read.ko
# depmod
# modprobe hacked_read
Thereafter, I'm typing different symbols, mostly a and left arrow and delete, as you can see from syslog: icounter = 44000, so it is 44k symbols was typed by me, before bug appears, sometimes more, sometimes less... To get this number faster I'm using /usr/bin/xset r rate 20 60,
or even insert false in if/else statement like this if ( fd != 0 && false ) { // fd == 0 --> stdin (sh, sshd) - this will automate the process.
The Bug
/var/log/syslog/
Aug 30 10:20:37 kali kernel: [ 1540.483650] hacked_read: test2 icounter = 44000
Aug 30 10:20:37 kali kernel: [ 1540.483654] hacked_read: strlen( debug_buffer ) = 202
Aug 30 10:20:42 kali kernel: [ 1546.187954] hacked_read: test2 icounter = 45000
Aug 30 10:20:42 kali kernel: [ 1546.187958] hacked_read: strlen( debug_buffer ) = 376
Aug 30 10:20:58 kali kernel: [ 1561.366421] BUG: unable to handle kernel paging request at ffffffffc071909b
Aug 30 10:20:58 kali kernel: [ 1561.366434] IP: 0xffffffffc071909b
Aug 30 10:20:58 kali kernel: [ 1561.366436] PGD b3a0e067 P4D b3a0e067 PUD b3a10067 PMD 2346c4067 PTE 0
Aug 30 10:20:58 kali kernel: [ 1561.366441] Oops: 0010 [#1] SMP PTI
Aug 30 10:20:58 kali kernel: [ 1561.366443] Modules linked in: hacked_read(O) 9p fscache fuse ppdev bochs_drm sg ttm 9pnet_virtio evdev joydev drm_kms_helper pcspkr serio_raw 9pnet drm parport_pc parport button binfmt_misc ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 crc32c_generic fscrypto ecb sr_mod cdrom sd_mod ata_generic crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc ata_piix libata scsi_mod aesni_intel aes_x86_64 crypto_simd glue_helper cryptd psmouse floppy virtio_pci virtio_ring virtio e1000 i2c_piix4 [last unloaded: hacked_read]
Aug 30 10:20:58 kali kernel: [ 1561.366488] CPU: 0 PID: 1788 Comm: tee Tainted: G O 4.14.13 #1
Aug 30 10:20:58 kali kernel: [ 1561.366490] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Aug 30 10:20:58 kali kernel: [ 1561.366491] task: ffff9939ac178000 task.stack: ffffb2570359c000
Aug 30 10:20:58 kali kernel: [ 1561.366493] RIP: 0010:0xffffffffc071909b
Aug 30 10:20:58 kali kernel: [ 1561.366494] RSP: 0018:ffffb2570359ff38 EFLAGS: 00010292
Aug 30 10:20:58 kali kernel: [ 1561.366496] RAX: 000000000000005e RBX: 00007ffe554f8940 RCX: 0000000000000000
Aug 30 10:20:58 kali kernel: [ 1561.366497] RDX: 0000000000000000 RSI: ffff9939a0af7c10 RDI: ffff9939c0a20bb8
Aug 30 10:20:58 kali kernel: [ 1561.366498] RBP: 0000000000002000 R08: 0000000000000000 R09: 0000000000000000
Aug 30 10:20:58 kali kernel: [ 1561.366499] R10: 000000000000005e R11: 00000000000003f1 R12: ffffffffc071b360
Aug 30 10:20:58 kali kernel: [ 1561.366501] R13: 000055ae361bb4a0 R14: 0000000000000010 R15: 00007ffe554faa98
Aug 30 10:20:58 kali kernel: [ 1561.366502] FS: 00007f60491184c0(0000) GS:ffff9939ffc00000(0000) knlGS:0000000000000000
Aug 30 10:20:58 kali kernel: [ 1561.366504] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Aug 30 10:20:58 kali kernel: [ 1561.366505] CR2: ffffffffc071909b CR3: 00000001d9018005 CR4: 00000000000606f0
Aug 30 10:20:58 kali kernel: [ 1561.366514] Call Trace:
Aug 30 10:20:58 kali kernel: [ 1561.366524] ? system_call_fast_compare_end+0xc/0x6f
Aug 30 10:20:58 kali kernel: [ 1561.366526] Code: Bad RIP value.
Aug 30 10:20:58 kali kernel: [ 1561.366532] RIP: 0xffffffffc071909b RSP: ffffb2570359ff38
Aug 30 10:20:58 kali kernel: [ 1561.366532] CR2: ffffffffc071909b
Aug 30 10:20:58 kali kernel: [ 1561.366535] ---[ end trace ca74de96d373ac0b ]---
Could somebody, please, tell me which way to dig?
There is no overflows inside debug_buffer array - it is completely true.
There is no conflicts in asm code, while hijacking is carried out.
It is tiny, light script... Where is the BUG?
Update1:
Looks like I've found a reason why it starts crashing. The BUG appears right after command rmmod hacked_read. So module_exit() is wrong, probably asm's cli & sti not enough.
As the module is removed from the Linux kernel, all memory used by the module (data and code) is released. The exit() function of the module restores the pointer to the original function. However, the kernel may be executing the code of the substitute function at the time the module is removed. Suddenly, right in the middle of that the function disappears as the memory taken by the module's code is released. Hence the bug.
Obviously you can't remove the module after you restore the pointer to the original function until you're sure that there are no kernel threads that (may) execute the code of the substitute function. After the pointer is restored, all new kernel threads will execute the original function, so you need to wait until any current threads finish the execution of the substitute function. How to do that is another issue. You may need to employ some tricks like reference counters, etc.
As #Aleksey mentioned, the issue was outside of the module.
The tee command used read() in its sleeping manner. While I've removed module nothing happened, but there was my little bash script:
#!/bin/bash
logfile="micro-test.log"
while sleep 0;do
echo -n "$(date): $(uptime): "
echo "1 2" | awk '{print $1}'
sleep 60;
done | tee -a $logfile
How I've found the piece of BUG:
As I said, my guest's kernel was compiled with CONFIG_GDB_SCRIPTS=y. Now I'm attaching guest from the host's gdb:
# gdb
(gdb) set logging file gdbcmd2.out
(gdb) set logging on
Copying output to gdbcmd2.out.
(gdb)
Already logging to gdbcmd2.out.
(gdb) target remote :1234
Remote debugging using :1234
warning: No executable has been specified and target does not support
determining executable automatically. Try using the "file" command.
0xffffffff99082e42 in ?? ()
(gdb) add-auto-load-safe-path /usr/src/linux-source-4.14/scripts/gdb/vmlinux-gdb.py
(gdb) file /usr/src/linux-source-4.14/vmlinux
A program is being debugged already.
Are you sure you want to change the file? (y or n) y
Reading symbols from /usr/src/linux-source-4.14/vmlinux...done
On the guest side, I'm extracting addresses:
root#kali:~# cat /sys/module/hacked_read/sections/.text
0xffffffffc06e9000
root#kali:~# cat /sys/module/hacked_read/sections/.bss
0xffffffffc06eb34
On the host side, adding module to debugging:
(gdb) add-symbol-file /usr/src/hacked_read/hacked_read.ko 0xffffffffc06e9000 -s .bss 0xffffffffc06eb34
(gdb) p hacked_read_test
$1 = {unsigned long (unsigned int, char *, size_t)} 0xffffffffc06e9030 <hacked_read_test>
(gdb) maintenance info line-table
... BIG-BIG-OUT-PUT ...
Thereafter, I can see in logfile: gdbcmd2.out - listing of my code with addresses. For example, 0xffffffffc06e9030 - the address of hacked_read_test function:
# grep 0xffffffffc06e9030 gdbcmd2.out
$1 = {unsigned long (unsigned int, char *, size_t)} 0xffffffffc06e9030 <hacked_read_test>
6 77 0xffffffffc06e9030
77 - line of code
$ head -n 77 hacked_read.c | tail -n 1
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
Bingo!
Now, on the guest side, I'm doing rmmod hacked_read. After 60+- seconds appears BUG:
Sep 9 06:35:28 kali kernel: [281996.592759] hacked_read: Module was unloaded
Sep 9 06:36:11 kali kernel: [282040.218523] BUG: unable to handle kernel paging request at ffffffffc06e909b
Sep 9 06:36:11 kali kernel: [282040.218530] IP: 0xffffffffc06e909b
Sep 9 06:36:11 kali kernel: [282040.218531] PGD 22980e067 P4D 22980e067 PUD 229810067 PMD 2356e3067 PTE 0
Sep 9 06:36:11 kali kernel: [282040.218534] Oops: 0010 [#9] SMP PTI
Sep 9 06:36:11 kali kernel: [282040.218536] Modules linked in: sctp_diag sctp libcrc32c tcp_diag udp_diag dccp_diag dccp inet_diag unix_diag 9p fscache fuse bochs_drm ttm ppdev drm_kms_helper joydev evdev serio_raw pcspkr sg 9pnet_virtio 9pnet parport_pc parport button drm binfmt_misc ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 crc32c_generic fscrypto ecb sr_mod cdrom sd_mod ata_generic crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd ata_piix psmouse floppy virtio_pci virtio_ring virtio e1000 i2c_piix4 libata scsi_mod [last unloaded: hacked_read]
Sep 9 06:36:11 kali kernel: [282040.218567] CPU: 0 PID: 32196 Comm: tee Tainted: G D O 4.14.13 #1
Comm: tee & BUG: unable to handle kernel paging request at ffffffffc06e909b
Host:
# grep ffffffffc06e909b gdbcmd2.out
18 88 0xffffffffc06e909b
88 - line of code:
$ head -n 88 hacked_read.c | tail -n 1
strncat( debug_buffer, buf, 1 );
It is easy to see, that kernel unable to give tee the address of the next line after original_read():
77:unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
78. unsigned long r = 1;
79. if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
80. return original_read( fd, buf, count );
81. } else {
82. icounter++;
83. if ( icounter % 1000 == 0 ) {
84. info( "test2 icounter = %ld\n", icounter );
85. info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
86. }
87. r = original_read( fd, buf, count );
88. strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}

Values of array of structs suddenly change (?)

So I've been looking to this code for hours and I can't seem to understand what is happening, despite my efforts to debug this with printf's everywhere. The following function is where my main problem is:
void drawFolders() {
printf("Going to call getSubFolder\n");
getSubFolders(".");
printf("DEBUG 1 - Return from getFolderName is: %s\n", getFolderName(3));
printf("DEBUG 2 - Return from getFolderName is: %s\n", getFolderName(5));
int k;
for(k = 0; k < getNumberFolders(); k++) {
printf("DEBUG INLOOP %d - Return from getFolderName is: %s\n",k, getFolderName(k));
draw_sprite(k*60 + 30, 50, folder);
//draw_string(temp[i].name,i*60 + 30, 50);
}
}
Basically, in DEBUG1 and DEBUG2 my function getFolderName() seems to work just fine, ie, a string is returned with no problems. However, when I try to call this function inside my for loop, all I get from getFolderName is garbage (except the first one) , even when k is 3 or 5 (cases which I've tested before the for loop and worked fine). k is not wrong, as I print it too. getFolderName does nothing to the array , here is the function :
char* getFolderName(int index) {
printf("Values inside getFolderName() function : %s\n", currentFolders[index].name);
return currentFolders[index].name ;
}
Also it might be important to show what currentFolders is:
typedef struct Directories {
char* name;
int active;
} Directory;
Directory currentFolders[30];
OUTPUT
Dec 20 22:18:48 192 kernel: Vai chamar o getSubFolder
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: .
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: ..
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: usr
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: lab4
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: proj
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: bin
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: boot
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: dev
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: etc
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: home
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: mnt
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: root
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: sbin
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: tmp
Dec 20 22:18:48 192 kernel: Values inside getSubFolders function: var
Dec 20 22:18:48 192 kernel: DEBUG 0Values inside getFolderName() function : lab4
Dec 20 22:18:48 192 kernel: DEBUG 1 - Return from getFolderName is: lab4
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function : bin
Dec 20 22:18:48 192 kernel: DEBUG 2 - Return from getFolderName is: bin
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function : .
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 0 - Return from getFolderName is: .
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 1 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 2 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 3 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 4 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 5 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 6 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 7 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 8 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 9 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 10 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 11 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 12 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 13 - Return from getFolderName is:
Dec 20 22:18:48 192 kernel: Values inside getFolderName() function :
Dec 20 22:18:48 192 kernel: DEBUG INLOOP 14 - Return from getFolderName is:
Other function called that isn't related to the problem, I think, but you might want to take a look so here it is anyway:
int getSubFolders(char* foldername) {
num_folders = 0;
struct dirent *de=NULL;
DIR *d=NULL;
d=opendir(foldername);
if(d == NULL) {
perror("Couldn't open directory");
return(2);
}
int i = 0;
// Loop while not NULL
while(de = readdir(d)) {
num_folders++;
Directory temp;
currentFolders[i].name = de->d_name;
strcpy( currentFolders[i].name, de->d_name);
// printf("nome %s\n",de->d_name);
//printf("temp.name %s\n",temp.name);
// currentFolders[i] = temp;
printf("Values inside getSubFolders function: %s\n",currentFolders[i].name);
i++;
}
printCurrentFolders();
closedir(d);
return(0);
}
Fix it this way, the struct declare it like this
typedef struct Directories {
char name[256];
int active;
} Directory;
and remove this line
currentFolders[i].name = de->d_name;
The problem is caused by closedir(d); cleaning up resources used by opendir, the struct dirent * being part of those resources.

Watchdog Kernel Module Unknown symbol watchdog_register_device

currently I am trying to develop a Kernel Module to control the hardware Watchdog and when it is not fed turn off the system, as watchdog should. I know there is a user space daemon already in place that can be "controlled" from the /dev/watchdog file but I need a kernel module. To expand on that I already have a kernel module to receive GPIO input, have registered an IRQ for that, and would like that IRQ to feed the watchdog. Thus if the GPIO interrupt doesn't fire in X amount of time the system shuts down.
However, I am having trouble getting the watchdog header file to cooperate with me. I read the little documentation there is on watchdog kernel api from here https://www.kernel.org/doc/Documentation/watchdog/watchdog-kernel-api.txt but this seems to say it will just work.
Below I have pasted my test code for getting a watchdog to register followed by the error I am running into.
testWatchdog.c
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fcntl.h>
#include <linux/watchdog.h>
static int __init watchdog_init(void)
{
struct watchdog_device test;
printk(KERN_INFO "WATCHDOG\n");
watchdog_register_device(&test);
test.ops->start(&test);
test.ops->ping(&test);
return 0;
}
static void __exit watchdog_exit(void)
{
printk(KERN_INFO "GG!!!\n");
}
MODULE_LICENSE("GPL");
module_init(watchdog_init);
module_exit(watchdog_exit);
Makefile
obj-m += testWatchdog.o
export EXTRA_CFLAGS := -std=gnu99
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
The code I do have can be made but when running insmod I get an error
insmod: error inserting './testWatchdog.ko': -1 Invalid parameters
After running dmesg I see the following error
Unknown symbol watchdog_register_device (err 0)
If anyone could please at least point me in the correct direction I would appreciate it a lot. I'm fairly new to kernel modules but am hopeful for this.
UPDATE:
For some reason I decided to rerun this insmod command and my output has changed. As far as I know I didn't change anything but maybe I updated a file. Anyway when I try to insmod now I don't get an error but my Kernel says I'm doing a null pointer dereference. Here is the output of my error.
[68964.787385] WATCHDOG
[68964.787405] BUG: unable to handle kernel NULL pointer dereference at 00000005
[68964.787409] IP: [<f84c7020>] watchdog_init+0x20/0x1000 [testWatchdog]
[68964.787420] *pdpt = 000000003676a001 *pde = 0000000000000000
[68964.787423] Oops: 0000 [#1] SMP
[68964.787427] Modules linked in: testWatchdog(OF+) vboxsf(OF) vesafb(F) microcode(F) vboxguest(OF) psmouse(F) serio_raw(F) i2c_piix4(F) mac_hid(F) vboxvideo(OF) rfcomm(F) bnep(F) parport_pc(F) bluetooth(F) ppdev(F) drm(F) binfmt_misc(F) lp(F) parport(F) pcnet32(F) ahci(F) libahci(F)
[68964.787446] Pid: 3180, comm: insmod Tainted: GF O 3.8.0-34-generic #49~precise1-Ubuntu innotek GmbH VirtualBox/VirtualBox
[68964.787448] EIP: 0060:[<f84c7020>] EFLAGS: 00010296 CPU: 0
[68964.787451] EIP is at watchdog_init+0x20/0x1000 [testWatchdog]
[68964.787453] EAX: e1989e3c EBX: f84c4000 ECX: 00000000 EDX: 00000001
[68964.787455] ESI: 00001299 EDI: e1989f60 EBP: e1989eb8 ESP: e1989e38
[68964.787456] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[68964.787461] CR0: 8005003b CR2: 00000005 CR3: 34865000 CR4: 000006f0
[68964.787477] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[68964.787482] DR6: ffff0ff0 DR7: 00000400
[68964.787484] Process insmod (pid: 3180, ti=e1988000 task=f3f5e680 task.ti=e1988000)
[68964.787485] Stack:
[68964.787487] f84c3024 c103fb63 00000000 e1989ed0 00000000 00000000 e1989eac c1040885
[68964.787492] 00000000 e1989e6c 00000000 f84c8000 00000001 00000000 80000000 e1989ed0
[68964.787496] 00000000 80000000 00000000 00000000 00000001 00000001 0003d5dc c10decd6
[68964.787501] Call Trace:
[68964.787528] [<c103fb63>] ? cpa_flush_range+0x43/0xc0
[68964.787532] [<c1040885>] ? change_page_attr_set_clr+0x225/0x250
[68964.787543] [<c10decd6>] ? tracepoint_module_notify+0x26/0xc0
[68964.787546] [<f84c7000>] ? 0xf84c6fff
[68964.787549] [<c1040a7f>] ? set_memory_nx+0x5f/0x70
[68964.787552] [<c1003054>] do_one_initcall+0x34/0x170
[68964.787555] [<f84c7000>] ? 0xf84c6fff
[68964.787567] [<c161b039>] do_init_module+0x80/0x1c6
[68964.787583] [<c10af03b>] load_module+0x41b/0x5f0
[68964.787586] [<c10acee0>] ? add_kallsyms+0x1e0/0x1e0
[68964.787611] [<c12f92f2>] ? _copy_from_user+0x42/0x60
[68964.787615] [<c10af296>] sys_init_module+0x86/0xa0
[68964.787620] [<c163518d>] sysenter_do_call+0x12/0x28
[68964.787622] Code: <ff> 52 04 8b 55 d0 8d 45 84 ff 52 0c 31 c0 c9 c3 00 00 00 00 00 00
[68964.787642] EIP: [<f84c7020>] watchdog_init+0x20/0x1000 [testWatchdog] SS:ESP 0068:e1989e38
[68964.787646] CR2: 0000000000000005
[68964.787650] ---[ end trace fd9234fd8f14c123 ]---

there are errors to replace Linux kernel function

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/types.h>
#define CODESIZE 7
static unsigned char original_code[CODESIZE];
static unsigned char jump_code[CODESIZE] =
"\xb8\x00\x00\x00\x00" /* movq $0, %rax */
"\xff\xe0" /* jump *%rax */
;
void (*sync_readahead)( struct address_space *mapping, struct file_ra_state *ra, struct file *filp, pgoff_t offset, unsigned long req_size ) = (void (*)(struct address_space *, struct file_ra_state *, struct file *, pgoff_t , unsigned long ) )0xc0197100;
int hijack_start(void);
void hijack_stop(void);
void intercept_init(void);
void intercept_start(void);
void intercept_stop(void);
void fake_printk(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, pgoff_t offset, unsigned long req_size);
int hijack_start()
{
printk(KERN_INFO "I can haz hijack?\n" );
intercept_init();
return 0;
}
void hijack_stop()
{
intercept_stop();
return;
}
void intercept_init()
{
printk(KERN_INFO "in the intercept_init\n" );
memcpy( original_code, sync_readahead, 7 );
*(long *)&jump_code[1] = (long)fake_printk;
memcpy( sync_readahead, jump_code, 7 );
printk(KERN_INFO "in the hijack?\n" );
//real_printk=NULL;
printk(KERN_INFO "begin the hijack?\n" );
memcpy( sync_readahead, jump_code, CODESIZE );
printk(KERN_INFO "begin the hijack?\n" );
return;
}
void intercept_stop()
{
memcpy( sync_readahead, original_code, CODESIZE );
}
void fake_printk(struct address_space *map, struct file_ra_state *a, struct file *fil, pgoff_t offse, unsigned long req_siz)
{
printk(KERN_INFO "in the fake printk\n");
// return ret;
}
MODULE_LICENSE("GPL");
module_init( hijack_start );
module_exit( hijack_stop );
I want to replace Linux kernel function by address (/proc/kallsyms), but when I memcpy the new function to the address (Linux kernel):
memcpy( sync_readahead, jump_code, CODESIZE );
there are errors (segmentation fault). I have seen some examples to replace Linux kernel function in the same way. Would you please help me to solve the problem? Thank you very much.
Information as follows:
ubuntu kernel: [ 574.826458] *pde = 0087d067 *pte = 00197161
ubuntu kernel: [ 574.826468] Modules linked in: hijack(+) test(+) binfmt_misc bridge stp bnep input_polldev video output vmblock vsock vmmemctl vmhgfs pvscsi acpiphp lp ppdev pcspkr psmouse serio_raw snd_ens1371 gameport snd_ac97_codec ac97_bus snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device snd soundcore snd_page_alloc vmci i2c_piix4 parport_pc parport intel_agp agpgart shpchp mptspi mptscsih mptbase scsi_transport_spi floppy fbcon tileblit font bitblit softcursor vmxnet
ubuntu kernel: [ 574.826491]
ubuntu kernel: [ 574.826493] Pid: 4694, comm: insmod Tainted: G D (2.6.28-11-generic #42-Ubuntu) VMware Virtual Platform
ubuntu kernel: [ 574.826496] EIP: 0060:[<f7c92101>] EFLAGS: 00010246 CPU: 0
ubuntu kernel: [ 574.826498] EIP is at intercept_init+0x41/0x70 [hijack]
ubuntu kernel: [ 574.826499] EAX: f5ec4b60 EBX: 00000000 ECX: ffffffff EDX: 00004c4c
ubuntu kernel: [ 574.826501] ESI: f7c9252c EDI: c0197100 EBP: f5edbe18 ESP: f5edbe0c
ubuntu kernel: [ 574.826502] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
ubuntu kernel: [ 574.826506] f7c921a6 f7c92130 00000000 f5edbe24 f7c92147 f7c921d5 f5edbf8c c010111e
ubuntu kernel: [ 574.826618] ---[ end trace ccc07e4b4d814976 ]---
Kernel function hijacking is very tricky business, and it needs to be exactly right in order to not run into all kinds of issues.
I am currently working on a module that does this, and it (at the time of this writing) works for 2.6.18+ kernels:
https://github.com/cormander/tpe-lkm
You'll be most interested in the hijacks.c file.
Many portions of this process are architecture, kernel version dependent, and CPU feature dependent as well.
UPDATE
The module now uses the 0XE9 jump opcode and should work for you. The nitty gritty details are in hijacks.c, and the "high level" logic you'll be most interested in is in the hijack_syscalls() function in security.c

Resources