Understanding spinlocks in netfilter hook - c

I am writing a small kernel module for measuring the time that a network packet takes to exit a node.
This module is a hook in the netfilter library.
For each packet it receives it calculates an hash, gets the tstamp from skbuff and the actual timestamp, and saves all this data in a linked list.
To pass this data to userspace I've created a proc device, and when the user reads from the device I send one of the entries of the linked list.
To make changes to the list (read and write) I have a spinlock. The problem is that sometimes when I read from the proc device while I am processing packets the system crash.
I think that the problem is in the function "dump_data_to_proc", more specifically when try to acquire the spinlock. I've made some tests and it only crashes(softlockup) when running in a tplink router. When I run the module in a "normal" pc(single core) it don't crash,
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
#include <net/ipv6.h>
#include <linux/proc_fs.h> /* Necessary because of proc fs */
#include <asm/uaccess.h> /* for copy_from_user */
#include "kmodule_measure_process_time.h"
#include "hash.c"
//DEBUG >=5 is very slow in the tplink
#define DEBUG 2
#define PROCFS_MAX_SIZE 64
#define PROCFS_NAME "measures"
#define MAXIMUM_SAMPLES 10000
static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;
struct values_list *HEAD;
spinlock_t list_lock ;
static int hello_proc(struct seq_file *m, void *v) {
seq_printf(m, " stats Mod initialized.\n");
return 0;
}
static int proc_open(struct inode *inode, struct file *file) {
return single_open(file, hello_proc, NULL);
}
ssize_t dump_data_to_proc(struct file *filp, char *buffer, size_t length, loff_t *offset){
int bytesRead = 0;
struct values_list *temp=NULL;
int bytesError=0;
char buff[PROCFS_MAX_SIZE];
spin_lock(&list_lock);
temp=HEAD;
if(temp!=NULL){
HEAD = temp->next;
}
spin_unlock(&list_lock);
if(temp!=NULL){
bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
length = length - bytesRead+1;
kfree(temp);
temp_counter--;
}
bytesError= copy_to_user(buffer, buff, bytesRead);
if(bytesError!=0){
#if DEBUG >0
printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}
static const struct file_operations proc_fops = {
.owner = THIS_MODULE,
.open = proc_open,
.read = dump_data_to_proc,
.llseek = seq_lseek,
.release = single_release,
};
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
uint32_t hash=0;
ktime_t now_timeval;
struct timespec now;
u64 timestamp_arrival_time=0;
u64 timestamp_now=0;
struct ipv6hdr * ipheader;
struct values_list *node;
int number_of_samples=0;
spin_lock(&list_lock);
number_of_samples=temp_counter;
spin_unlock(&list_lock);
if(number_of_samples > MAXIMUM_SAMPLES){
#if DEBUG > 5
printk(KERN_INFO "Discarded one sample because the list is full.\n");
#endif
total_packets_discarded++; // probably this should be inside a spinlock
return NF_ACCEPT;
}
//calculate arrival time and actual time in ns
timestamp_arrival_time = ktime_to_ns(skb->tstamp);
getnstimeofday(&now);
now_timeval = timespec_to_ktime(now);
timestamp_now = ktime_to_ns(now_timeval);
//get Ipv6 addresses
ipheader = (struct ipv6hdr *)skb_network_header(skb);
hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
total_packets_processed++;
node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
if(!node){
#if DEBUG >0
printk(KERN_INFO "Error cannot malloc\n");
#endif
return NF_ACCEPT;
}
node->hash=hash;
node->arrival_timestap=timestamp_arrival_time;
node->exit_timestap=timestamp_now;
spin_lock(&list_lock);
node->next=HEAD;
HEAD=node;
temp_counter++;
spin_unlock(&list_lock);
return NF_ACCEPT;
}
static int __init init_main(void)
{
nfho.hook = hook_func;
nfho.hooknum = NF_INET_POST_ROUTING;
nfho.pf = PF_INET6;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif
proc_create(PROCFS_NAME, 0, NULL, &proc_fops);
spin_lock_init(&list_lock);
//Some distros/devices disable timestamping of packets
net_enable_timestamp();
return 0;
}
static void __exit cleanup_main(void)
{
struct values_list *temp;
nf_unregister_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif
remove_proc_entry(PROCFS_NAME, NULL);
while(HEAD!=NULL){
temp=HEAD;
HEAD= HEAD->next;
kfree(temp);
}
}
module_init(init_main);
module_exit(cleanup_main);
/* * Declaring code as GPL. */
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);

There are 2 problems with your code:
Use Linux kernel macro for your code. http://makelinux.com/ldd3/chp-11-sect-5 . Just add struct list_head as element to your struct values_list and use list_entry, list_add and other
Netfilter hools are run in softirq context, so you must use spin_lock_irqsave and spin_unlock_irqrestore. This is most likely reason why your system crashes with softlockup. Read carefully http://makelinux.com/ldd3/chp-5-sect-5

Related

insmod: ERROR: could not insert module kernel.ko: Invalid parameters - Error related to naming scheme of kernel module

I'm using C to create a custom kernel module to hook into the netfilter operation on my Ubuntu box. However, I'm running into a problem revolving around the module_param argument. When inserting the module, I'm attempting to add a custom field, specifically this will drop ICMP traffic when specified. The code compiles fine using a standard make file but when using insmod to insert it, I get the error
insmod: ERROR: could not insert module kernel.ko: Invalid parameters
I'm using the command
insmod kernel.ko dropicmp=1
From what I've read, this should work with the module params argument, but nothing I've tried has fixed this.
Please find my code below.
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/tcp.h>
static struct nf_hook_ops nfho;
struct iphdr *iph;
struct tcphdr *tcp_header;
struct sk_buff *sock_buff;
unsigned int sport, dport;
// command line argument | called using insmod kernel_firewall.ko drop_icmp=1
static int dropicmp = 1;
module_param(dropicmp, int , 0); // takes in an int from command line | (name, variable, permissions)
unsigned int hook_func(unsigned int hooknum,
struct sk_buff **skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *)){
sock_buff = skb;
if (!sock_buff) { // if there is no socket buffer, accept
return NF_ACCEPT;
}
iph = (struct iphdr *)skb_network_header(sock_buff); // using the socket buffer, create our ip header structure out of packets in it
if (!iph) {
printk(KERN_INFO "no ip header, dropping\n"); // self explanatory
return NF_DROP;
}
if(iph->protocol==IPPROTO_TCP) {
if(iph->saddr | 0x11000000){ // if the first prefix is in the 192 range | might need to change the if statement up | considering sprintf
printk(KERN_INFO "192 subnet detected, dropping\n");
return NF_DROP;
}
else{
return NF_ACCEPT;
}
}
if(iph->protocol==IPPROTO_ICMP) { // if ICMP
if(dropicmp == 1){
return NF_DROP; // drop our ICMP traffic if required
}
else{
return NF_ACCEPT;
}
}
return NF_ACCEPT; // default to accept
}
// initialize
static int __init initialize(void) {
nfho.hook = hook_func;
nfho.hooknum = NF_INET_POST_ROUTING;
nfho.pf = PF_INET;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
return 0;
}
// rmmod
static void __exit teardown(void) {
nf_unregister_hook(&nfho);
}
module_init(initialize);
module_exit(teardown);
This was all due to my dumb naming scheme... I named the module kernel... Which is obviously already in use by the kernel...... So don't do that...

char driver node is not opening

I wrote a simple char driver for my beaglebone kernel
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <asm/uaccess.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/fs.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("mrigendra.chaubey#gmail.com");
#define DEVICE_NAME "experm"
#define CLASS_NAME "exp"
static struct class* myclass = NULL; ///< The device-driver class struct pointer
static struct device* mychardevice = NULL; ///< The device-driver device struct pointer
static int myopen(struct inode *, struct file *);
static int release(struct inode *, struct file *);
static int myioctl(struct inode *, struct file *, unsigned int cmd, unsigned long arg);
static size_t myread(struct file *,char * , size_t, loff_t *);
static size_t mywrite(struct file *,char * , size_t, loff_t *);
static dev_t mydev;
static int myopen(struct inode *nd, struct file *fp)
{
printk(KERN_INFO "myopen\n");
return 0;
}
static int myrelease(struct inode *nd, struct file *fp)
{
printk(KERN_INFO "myrelease\n");
return 0;
}
static int myioctl(struct inode *nd, struct file *fp, unsigned int cmd, unsigned long arg)
{
printk(KERN_INFO "myioctl\n");
return 0;
}
static size_t myread(struct file *fp, char *buf, size_t len, loff_t *ofs)
{
printk(KERN_INFO "myread\n");
return 0;
}
static size_t mywrite(struct file *fp, char *buf, size_t len, loff_t *ofs)
{
printk(KERN_INFO "mywrite\n");
return 0;
}
static struct file_operations fops = {
.open = myopen,
.release = myrelease,
.read = myread,
.write = mywrite,
.unlocked_ioctl = myioctl,
};
static int __init myinit(void)
{
int err;
//extern int alloc_chrdev_region(dev_t *, unsigned minor number, unsigned total, const char *);
err = alloc_chrdev_region(&mydev, 0, 1, "expermdev");
if(err<0)
{
printk(KERN_INFO "major and minor can't be created, err = %d\n", err);
return err;
}
//struct class * class_create ( struct module *owner, const char *name);
myclass = class_create(THIS_MODULE, CLASS_NAME);
if(IS_ERR(myclass))
{
unregister_chrdev(MAJOR(mydev), "expermdev");
printk(KERN_ALERT "Failed to register device class\n");
return PTR_ERR(myclass);
}
//struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...);
//This function can be used by char device classes. A struct device will be created in sysfs, registered to the specified class.
mychardevice = device_create(myclass, NULL, mydev, NULL, "expermdev");
if(IS_ERR(mychardevice))
{
class_destroy(myclass);
unregister_chrdev(MAJOR(mydev), "expermdev");
printk(KERN_ALERT "Failed to create the device\n");
return PTR_ERR(mychardevice);
}
printk(KERN_INFO "my device created correctly\n");
return 0;
}
static void __exit myexit(void)
{
device_destroy(mychardevice, mydev);
class_unregister(myclass);
class_destroy(myclass);
unregister_chrdev(MAJOR(mydev), "expermdev");
printk(KERN_INFO "exited\n");
}
module_init(myinit);
module_exit(myexit);
and app.c file
#include <stdio.h>
#include<fcntl.h>
int main()
{
int fp;
fp = open ("/dev/expermdev", O_RDWR);
if(fp < 0)
printf("file can't be opened\n");
else
printf("file opened\n");
return 0;
}
I am compiling the driver as a module and insmod it, also compiled app.c with the same cross compiler, and put this binary in bin directory. I ran this bin file, but it says
file can't be opened
What am I doing wrong?
Got it. I did not used cdev struture and added fops to it. After that I have to tell kernel about it.
cdev = cdev_alloc();//allocate memory to Char Device structure
cdev->ops = &fops;//link our file operations to the char device
result=cdev_add(cdev,mydev,1);//Notify the kernel abt the new device
also delete this cdev in exit function before unregistering the driver.
In register_chrdev_region we do both things at the same time, but in dynamic allocation of major and minor numbers we have to use cdev to do the rest.

Protecting shared memory segment between kernel and user space

I have shared memory segment created in kernel using mmap. I need to access this mapped memory from both kernel and user space. What mechanism should I use to protect the memory from concurrent access ?
I want to have something like:
Kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/mm.h>
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct dentry *file;
struct mmap_info
{
char *data;
int reference;
};
void mmap_open(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
void mmap_close(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
struct vm_operations_struct mmap_vm_ops =
{
.open = mmap_open,
.close = mmap_close,
.fault = mmap_fault,
};
int op_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &mmap_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
mmap_open(vma);
return 0;
}
int mmapfop_close(struct inode *inode, struct file *filp)
{
struct mmap_info *info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int mmapfop_open(struct inode *inode, struct file *filp)
{
struct mmap_info *info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
info->data = (char *)get_zeroed_page(GFP_KERNEL);
memcpy(info->data, "hello from kernel this is file: ", 32);
memcpy(info->data + 32, filp->f_dentry->d_name.name, strlen(filp->f_dentry->d_name.name));
/* assign this info struct to the file */
filp->private_data = info;
return 0;
}
static const struct file_operations mmap_fops = {
.open = mmapfop_open,
.release = mmapfop_close,
.mmap = op_mmap,
};
static int __init mmapexample_module_init(void)
{
file = debugfs_create_file("mmap_example", 0644, NULL, NULL, &mmap_fops);
return 0;
}
static void __exit mmapexample_module_exit(void)
{
debugfs_remove(file);
}
module_init(mmapexample_module_init);
module_exit(mmapexample_module_exit);
MODULE_LICENSE("GPL");
User space:
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#define PAGE_SIZE 4096
int main ( int argc, char **argv )
{
int configfd;
char * address = NULL;
configfd = open("/sys/kernel/debug/mmap_example", O_RDWR);
if(configfd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, configfd, 0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("Initial message: %s\n", address);
memcpy(address + 11 , "*user*", 6);
printf("Changed message: %s\n", address);
close(configfd);
return 0;
}
but with locks.
Kernel space and user space have no shared mechanisms for concurrent access protection. If you want them, you need to implement them by yourself.
It can be some sort of mutex, implemented within you kernel module, and accessed from user space via special ioctl requests:
Kernel:
DECLARE_WAIT_QUEUE_HEAD(wq);
int my_mutex_val = 0;
/*
* Lock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_LOCK)' by user.
*/
void my_mutex_lock(void)
{
spin_lock(&wq.lock);
wait_event_interruptible_locked(&wq, my_mutex_val == 0);
my_mutex_val = 1;
spin_unlock(&wq.lock);
}
/*
* Unlock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_UNLOCK)' by user.
*/
void my_mutex_unlock(void)
{
spin_lock(&wq.lock);
my_mutex_val = 0;
wake_up(&wq);
spin_unlock(&wq.lock);
}
long unlocked_ioctl (struct file * filp, unsigned int cmd, unsigned long val)
{
switch(cmd) {
case MY_CMD_LOCK:
my_mutex_lock();
break;
case MY_CMD_UNLOCK:
my_mutex_unlock();
break;
}
}
User:
int main()
{
...
ioctl(MY_CMD_LOCK);
<read data>
ioctl(MY_CMD_UNLOCK);
...
}
It can be some sort of spinlock, which value is stored in mmap-ed area (so visible both for kernel space and user space).
In any case, kernel module should be prepared for the case, when user space application doesn't follow locking conventions. This, probably, would cancel any expectation about mmap-ed area content, generated by the kernel, but kernel module shouldn't crash in that case. [This is why standard kernel's struct mutex is not used in the code above: user space may use it incorrectly].
The problem with the ioctl is you need a kernel switch every time you want to access the share info->data. If that is okay then the ioctl is good - but then why not just do a standard character read/write file operation instead?
You can also try a lock-less mechanism. In the shared info->data area add a barrier variable. When the user needs access, it will do an atomic_compare_and_xchg on the barrier variable until it is set to 0 (unused) and then set it to 1. When the kernel needs access it will do the same but set it to 2. See the gcc atomic builtin documentation.

Content wrong inside mmap'ed memory (Kernelspace<>Userspace)

I implement a memory mapping via mmap. My Kernel module writes something into this memory and a userspace application read this. In short I allocate 0x10000 memory (with kcalloc on kernel side and with mmap on userspace side). Then I write something to the address offsets 0x0, 0xf00 and 0xf000 using memcpy. On kernelside I can read back the memory correctly. But on userspace side the content of the first 0x1000 Bytes are repetitive through the whole memory (16 times). But why?
Her comes the code of the kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/mm.h>
#define DEV_MODULENAME "expdev"
#define DEV_CLASSNAME "expdevclass"
static int majorNumber;
static struct class *devClass = NULL;
static struct device *devDevice = NULL;
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct mmap_info
{
char *data;
int reference;
};
static void
dev_vm_ops_open( struct vm_area_struct *vma )
{
struct mmap_info *info;
// counting how many applications mapping on this dataset
info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
static void
dev_vm_ops_close( struct vm_area_struct *vma )
{
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct dev_vm_ops =
{
.open = dev_vm_ops_open,
.close = dev_vm_ops_close,
.fault = dev_vm_ops_fault,
};
int
fops_mmap( struct file *filp,
struct vm_area_struct *vma)
{
vma->vm_ops = &dev_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
dev_vm_ops_open(vma);
return 0;
}
int
fops_close( struct inode *inode,
struct file *filp)
{
struct mmap_info *info;
info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int
fops_open( struct inode *inode,
struct file *p_file)
{
struct mmap_info *info;
char *data;
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
// allocating memory on the heap for the data
data = kcalloc(0x10000,sizeof(char),GFP_KERNEL);
if( data==NULL )
{
printk(KERN_ERR "insufficient memory\n");
/* insufficient memory: you must handle this error! */
return ENOMEM;
}
info->data = data;
printk(KERN_INFO " > ->data: 0x%16p\n",info->data);
memcpy(info->data, "Initial entry on mapped memory by the kernel module", 52);
memcpy((info->data)+0xf00, "Somewhere", 9);
memcpy((info->data)+0xf000, "Somehow", 7);
printk(KERN_INFO " > ->data: %c%c%c\n", // the output here is correct
*(info->data+0xf000+0),
*(info->data+0xf000+1),
*(info->data+0xf000+2));
/* assign this info struct to the file */
p_file->private_data = info;
return 0;
}
static const struct file_operations dev_fops =
{
.open = fops_open,
.release = fops_close,
.mmap = fops_mmap,
};
static int __init
_module_init(void)
{
int ret = 0;
// Try to dynamically allocate a major number for the device
majorNumber = register_chrdev(0, DEV_MODULENAME, &dev_fops);
if (majorNumber<0)
{
printk(KERN_ALERT "Failed to register a major number.\n");
return -EIO; // I/O error
}
// Register the device class
devClass = class_create(THIS_MODULE, DEV_CLASSNAME);
// Check for error and clean up if there is
if (IS_ERR(devClass))
{
printk(KERN_ALERT "Failed to register device class.\n");
ret = PTR_ERR(devClass);
goto goto_unregister_chrdev;
}
// Create and register the device
devDevice = device_create(devClass,
NULL,
MKDEV(majorNumber, 0),
NULL,
DEV_MODULENAME
);
// Clean up if there is an error
if( IS_ERR(devDevice) )
{
printk(KERN_ALERT "Failed to create the device.\n");
ret = PTR_ERR(devDevice);
goto goto_class_destroy;
}
printk(KERN_INFO "Module registered.\n");
return ret;
// Error handling - using goto
goto_class_destroy:
class_destroy(devClass);
goto_unregister_chrdev:
unregister_chrdev(majorNumber, DEV_MODULENAME);
return ret;
}
static void __exit
_module_exit(void)
{
device_destroy(devClass, MKDEV(majorNumber, 0));
class_unregister(devClass);
class_destroy(devClass);
unregister_chrdev(majorNumber, DEV_MODULENAME);
printk(KERN_INFO "Module unregistered.\n");
}
module_init(_module_init);
module_exit(_module_exit);
MODULE_LICENSE("GPL");
here comes the code of the application
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define PAGE_SIZE (0x10000)
int main ( int argc, char **argv )
{
int fd;
char *address = NULL;
time_t t = time(NULL);
char *sbuff;
int i;
sbuff = (char*) calloc(PAGE_SIZE,sizeof(char));
fd = open("/dev/expdev", O_RDWR);
if(fd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap( NULL,
PAGE_SIZE,
PROT_READ|PROT_WRITE,
MAP_SHARED,
fd,
0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("%s: first userspace read\n",tbuff);
memcpy(sbuff, address,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf00,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf000,80);
printf("Initial message: %s\n", sbuff);
for(i=0; i<PAGE_SIZE; i++)
{
printf("%16p: %c\n",address+i, (char)*(address+i));
}
if (munmap(address, PAGE_SIZE) == -1)
{
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
and this is the output of the application:
0x7fe61b522000: I
0x7fe61b522001: n
0x7fe61b522002: i
0x7fe61b522003: t
0x7fe61b522004: i
0x7fe61b522005: a
0x7fe61b522006: l
...
0x7fe61b522f00: S
0x7fe61b522f01: o
0x7fe61b522f02: m
0x7fe61b522f03: e
0x7fe61b522f04: w
0x7fe61b522f05: h
0x7fe61b522f06: e
0x7fe61b522f07: r
0x7fe61b522f08: e
...
0x7fe61b523000: I
0x7fe61b523001: n
0x7fe61b523002: i
0x7fe61b523003: t
0x7fe61b523004: i
0x7fe61b523005: a
0x7fe61b523006: l
...
0x7fe61b523f00: S
0x7fe61b523f01: o
0x7fe61b523f02: m
0x7fe61b523f03: e
0x7fe61b523f04: w
0x7fe61b523f05: h
0x7fe61b523f06: e
0x7fe61b523f07: r
0x7fe61b523f08: e
...
0x7fe61b524000: I
0x7fe61b524001: n
0x7fe61b524002: i
0x7fe61b524003: t
0x7fe61b524004: i
0x7fe61b524005: a
0x7fe61b524006: l
...
It seems to me, that the repetition comes with the size of one page. But this makes no sense to me.
EDIT 1:
Add Somewhere to the output. Note: Only Somehow never occurs!
EDIT 2:
Corrected fault handler. This now considered the offset of the calling vmf. Now it runs like a charm. Thanks to Tsyvarev!
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page((info->data)+(vmf->pgoff*PAGE_SIZE));
get_page(page);
vmf->page = page;
return 0;
}
But on userspace side the content of the first 0x1000`
0x1000 is a size of the page mapped with
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
Callback .fault of structure vm_operations_struct is called for every page (4096 bytes), which is accessed by the user but not mapped yet.
So your code just map first 4096 bytes (0x1000) of data to every page which user space accesses.

How to append data on a packet from kernel space?

I am trying to append some data on a packet from kernel space. I have an echo client and server. I type in the command line like: ./client "message" and the server just echoes it back. The server was run with ./server .
Now, the client and server are on two different machines (may be VMs). I am writing a kernel module which runs on the client machine. Its work is to append "12345" after "message" while the packet goes out of the machine. I am presenting the code below.
/*
* This is ibss_obsf_cat.c
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
#include <linux/ip.h>
#undef __KERNEL__
#include <linux/netfilter_ipv4.h>
#define __KERNEL__
/*
* Function prototypes ...
*/
static unsigned int cat_obsf_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
static void hex_dump (char str[], int len)
{
}
/*
* struct nf_hook_ops instance initialization
*/
static struct nf_hook_ops cat_obsf_ops __read_mostly = {
.pf = NFPROTO_IPV4,
.priority = 1,
.hooknum = NF_IP_POST_ROUTING,
.hook = cat_obsf_begin,
};
/*
* Module init and exit functions.
* No need to worry about that.
*/
static int __init cat_obsf_init (void)
{
printk(KERN_ALERT "cat_obsf module started...\n");
return nf_register_hook(&cat_obsf_ops);
}
static void __exit cat_obsf_exit (void)
{
nf_unregister_hook(&cat_obsf_ops);
printk(KERN_ALERT "cat_obsf module stopped...\n");
}
/*
* Modification of the code begins here.
* Here are all the functions and other things.
*/
static unsigned int cat_obsf_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct udphdr *udph;
unsigned char *data;
unsigned char dt[] = "12345";
unsigned char *tmp;
unsigned char *ptr;
int i, j, len;
if (skb){
iph = ip_hdr(skb);
if (iph && iph->protocol && (iph->protocol == IPPROTO_UDP)){
udph = (struct udphdr *) ((__u32 *)iph + iph->ihl);
data = (char *)udph + 8;
if(ntohs(udph->dest) == 6000){
for (i=0; data[i]; i++);
len = i;
//printk(KERN_ALERT "\nData length without skb: %d", len);
//printk(KERN_ALERT "Data is: %s", data);
//printk(KERN_ALERT "dt size: %lu", sizeof(dt));
//printk(KERN_ALERT "skb->len: %d", skb->len);
tmp = kmalloc(200*sizeof(char), GFP_KERNEL);
memcpy(tmp, data, len);
ptr = tmp + len;
memcpy(ptr, dt, sizeof(dt));
printk(KERN_ALERT "tmp: %s", tmp);
printk(KERN_ALERT "skb->tail: %d", skb->tail);
//skb_put(skb, sizeof(dt));
printk(KERN_ALERT "skb->end: %d", skb->end);
printk(KERN_ALERT "skb->tail: %d", skb->tail);
printk(KERN_ALERT "skb->tail(int): %d", (unsigned int)skb->tail);
//memset(data, 0, len + sizeof(dt));
//memcpy(data, tmp, len + sizeof(dt));
//skb_add_data(skb, tmp, len+sizeof(dt));
printk(KERN_ALERT "Now data is: %s", data);
for(i=0; data[i]; i++);
printk(KERN_ALERT "data length: %d", i);
kfree(tmp);
}
}
}
return NF_ACCEPT;
}
/*
* Nothing to be touched hereafter
*/
module_init(cat_obsf_init);
module_exit(cat_obsf_exit);
MODULE_AUTHOR("Rifat");
MODULE_DESCRIPTION("Module for packet mangling");
MODULE_LICENSE("GPL");
I want to get the "message" to be "message12345" while sending out of the client machine from kernel space. So that the server will get "message12345" and echo it back, and the client will the read just "message12345" But I am having trouble with skb_put() and skb_add_data() functions. I do not understand what error was made by me. If anyone can help me with the code, I will be highly grateful. Thanks in advance. I am also giving the Makefile for convenience. This is for the distribution kernel, not for a built kernel.
#If KERNELRELEASE is defined, we've been invoked from the
#kernel build system and use its language
ifneq ($(KERNELRELEASE),)
obj-m := ibss_obsf_cat.o
#Otherwise we were called directly from the command
#line; invoke the kernel build system.
else
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
endif
Now I am quite convinced that
skb->end - skb->tail
is so small that I will have to create new packets in kernel space. I have used
alloc_skb()
skb_reserve()
skb_header_pointer()
and other useful skb functions for creating a new skb, but the thing I am running out of idea is that how to route the newly created packet in the packet flowing path. How to use
ip_route_me_harder()
I looked in the xtables-addons package for suggestion, but the function they used is different from the one in linux kernel. Any suggestion is welcomed.
About one year ago for kernel 2.6.26 I did it like this:
// Do we need extra space?
if(len - skb_tailroom(skb) > 0){
// Expand skb tail until we have enough room for the extra data
if (pskb_expand_head(skb, 0, extra_data_len - skb_tailroom(skb), GFP_ATOMIC)) {
// allocation failed. Do whatever you need to do
}
// Allocation succeeded
// Reserve space in skb and return the starting point
your_favourite_structure* ptr = (your_favourite_structure*)
skb_push(skb, sizeof(*ptr));
// Now either set each field of your structure or memcpy into it.
// Remember you can use a char*
}
Don't forget:
Recalculate UDP checksum, because you changed data in the transported data.
Change the field tot_len(total length) in the ip header, because you added data to the packet.
Recalculate the IP header checksum, because you changed the tot_len field.
Extra note:
This is just a simple thing. I see in your code you're allocating tmp as a 200 byte array and using that to store the data of your message. If you send a bigger packet you'll have a hard time debugging this as kernel crashes due to memory overflows are too painful.
I have solved the problem. It was trivial. And all I am going to do is to post my code for future references and discussion.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <net/ip.h>
#include <net/udp.h>
#include <net/route.h>
#undef __KERNEL__
#include <linux/netfilter_ipv4.h>
#define __KERNEL__
#define IP_HDR_LEN 20
#define UDP_HDR_LEN 8
#define TOT_HDR_LEN 28
static unsigned int pkt_mangle_begin(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
static struct nf_hook_ops pkt_mangle_ops __read_mostly = {
.pf = NFPROTO_IPV4,
.priority = 1,
.hooknum = NF_IP_LOCAL_OUT,
.hook = pkt_mangle_begin,
};
static int __init pkt_mangle_init(void)
{
printk(KERN_ALERT "\npkt_mangle module started ...");
return nf_register_hook(&pkt_mangle_ops);
}
static void __exit pkt_mangle_exit(void)
{
nf_unregister_hook(&pkt_mangle_ops);
printk(KERN_ALERT "pkt_mangle module stopped ...");
}
static unsigned int pkt_mangle_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct udphdr *udph;
unsigned char *data;
unsigned int data_len;
unsigned char extra_data[] = "12345";
unsigned char *temp;
unsigned int extra_data_len;
unsigned int tot_data_len;
unsigned int i;
__u16 dst_port, src_port;
if (skb) {
iph = (struct iphdr *) skb_header_pointer (skb, 0, 0, NULL);
if (iph && iph->protocol &&(iph->protocol == IPPROTO_UDP)) {
udph = (struct udphdr *) skb_header_pointer (skb, IP_HDR_LEN, 0, NULL);
src_port = ntohs (udph->source);
dst_port = ntohs (udph->dest);
if (src_port == 6000) {
printk(KERN_ALERT "UDP packet goes out");
data = (unsigned char *) skb_header_pointer (skb, IP_HDR_LEN+UDP_HDR_LEN, 0, NULL);
data_len = skb->len - TOT_HDR_LEN;
temp = kmalloc(512 * sizeof(char), GFP_ATOMIC);
memcpy(temp, data, data_len);
unsigned char *ptr = temp + data_len - 1;
extra_data_len = sizeof(extra_data);
memcpy(ptr, extra_data, extra_data_len);
tot_data_len = data_len + extra_data_len - 1;
skb_put(skb, extra_data_len - 1);
memcpy(data, temp, tot_data_len);
/* Manipulating necessary header fields */
iph->tot_len = htons(tot_data_len + TOT_HDR_LEN);
udph->len = htons(tot_data_len + UDP_HDR_LEN);
/* Calculation of IP header checksum */
iph->check = 0;
ip_send_check (iph);
/* Calculation of UDP checksum */
udph->check = 0;
int offset = skb_transport_offset(skb);
int len = skb->len - offset;
udph->check = ~csum_tcpudp_magic((iph->saddr), (iph->daddr), len, IPPROTO_UDP, 0);
}
}
}
return NF_ACCEPT;
}
module_init(pkt_mangle_init);
module_exit(pkt_mangle_exit);
MODULE_AUTHOR("Rifat Rahman Ovi: <rifatrahmanovi#gmail.com>");
MODULE_DESCRIPTION("Outward Packet Mangling and Decryption in Kernel Space");
MODULE_LICENSE("GPL");
Here the thing is that, I forgot to update the length fields and forgot to update the checksum. Now, if I present the code correctly here, all should go well. There are some
other helper functions which are not included here.

Resources