How to append data on a packet from kernel space? - c

I am trying to append some data on a packet from kernel space. I have an echo client and server. I type in the command line like: ./client "message" and the server just echoes it back. The server was run with ./server .
Now, the client and server are on two different machines (may be VMs). I am writing a kernel module which runs on the client machine. Its work is to append "12345" after "message" while the packet goes out of the machine. I am presenting the code below.
/*
* This is ibss_obsf_cat.c
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
#include <linux/ip.h>
#undef __KERNEL__
#include <linux/netfilter_ipv4.h>
#define __KERNEL__
/*
* Function prototypes ...
*/
static unsigned int cat_obsf_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
static void hex_dump (char str[], int len)
{
}
/*
* struct nf_hook_ops instance initialization
*/
static struct nf_hook_ops cat_obsf_ops __read_mostly = {
.pf = NFPROTO_IPV4,
.priority = 1,
.hooknum = NF_IP_POST_ROUTING,
.hook = cat_obsf_begin,
};
/*
* Module init and exit functions.
* No need to worry about that.
*/
static int __init cat_obsf_init (void)
{
printk(KERN_ALERT "cat_obsf module started...\n");
return nf_register_hook(&cat_obsf_ops);
}
static void __exit cat_obsf_exit (void)
{
nf_unregister_hook(&cat_obsf_ops);
printk(KERN_ALERT "cat_obsf module stopped...\n");
}
/*
* Modification of the code begins here.
* Here are all the functions and other things.
*/
static unsigned int cat_obsf_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct udphdr *udph;
unsigned char *data;
unsigned char dt[] = "12345";
unsigned char *tmp;
unsigned char *ptr;
int i, j, len;
if (skb){
iph = ip_hdr(skb);
if (iph && iph->protocol && (iph->protocol == IPPROTO_UDP)){
udph = (struct udphdr *) ((__u32 *)iph + iph->ihl);
data = (char *)udph + 8;
if(ntohs(udph->dest) == 6000){
for (i=0; data[i]; i++);
len = i;
//printk(KERN_ALERT "\nData length without skb: %d", len);
//printk(KERN_ALERT "Data is: %s", data);
//printk(KERN_ALERT "dt size: %lu", sizeof(dt));
//printk(KERN_ALERT "skb->len: %d", skb->len);
tmp = kmalloc(200*sizeof(char), GFP_KERNEL);
memcpy(tmp, data, len);
ptr = tmp + len;
memcpy(ptr, dt, sizeof(dt));
printk(KERN_ALERT "tmp: %s", tmp);
printk(KERN_ALERT "skb->tail: %d", skb->tail);
//skb_put(skb, sizeof(dt));
printk(KERN_ALERT "skb->end: %d", skb->end);
printk(KERN_ALERT "skb->tail: %d", skb->tail);
printk(KERN_ALERT "skb->tail(int): %d", (unsigned int)skb->tail);
//memset(data, 0, len + sizeof(dt));
//memcpy(data, tmp, len + sizeof(dt));
//skb_add_data(skb, tmp, len+sizeof(dt));
printk(KERN_ALERT "Now data is: %s", data);
for(i=0; data[i]; i++);
printk(KERN_ALERT "data length: %d", i);
kfree(tmp);
}
}
}
return NF_ACCEPT;
}
/*
* Nothing to be touched hereafter
*/
module_init(cat_obsf_init);
module_exit(cat_obsf_exit);
MODULE_AUTHOR("Rifat");
MODULE_DESCRIPTION("Module for packet mangling");
MODULE_LICENSE("GPL");
I want to get the "message" to be "message12345" while sending out of the client machine from kernel space. So that the server will get "message12345" and echo it back, and the client will the read just "message12345" But I am having trouble with skb_put() and skb_add_data() functions. I do not understand what error was made by me. If anyone can help me with the code, I will be highly grateful. Thanks in advance. I am also giving the Makefile for convenience. This is for the distribution kernel, not for a built kernel.
#If KERNELRELEASE is defined, we've been invoked from the
#kernel build system and use its language
ifneq ($(KERNELRELEASE),)
obj-m := ibss_obsf_cat.o
#Otherwise we were called directly from the command
#line; invoke the kernel build system.
else
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
endif
Now I am quite convinced that
skb->end - skb->tail
is so small that I will have to create new packets in kernel space. I have used
alloc_skb()
skb_reserve()
skb_header_pointer()
and other useful skb functions for creating a new skb, but the thing I am running out of idea is that how to route the newly created packet in the packet flowing path. How to use
ip_route_me_harder()
I looked in the xtables-addons package for suggestion, but the function they used is different from the one in linux kernel. Any suggestion is welcomed.

About one year ago for kernel 2.6.26 I did it like this:
// Do we need extra space?
if(len - skb_tailroom(skb) > 0){
// Expand skb tail until we have enough room for the extra data
if (pskb_expand_head(skb, 0, extra_data_len - skb_tailroom(skb), GFP_ATOMIC)) {
// allocation failed. Do whatever you need to do
}
// Allocation succeeded
// Reserve space in skb and return the starting point
your_favourite_structure* ptr = (your_favourite_structure*)
skb_push(skb, sizeof(*ptr));
// Now either set each field of your structure or memcpy into it.
// Remember you can use a char*
}
Don't forget:
Recalculate UDP checksum, because you changed data in the transported data.
Change the field tot_len(total length) in the ip header, because you added data to the packet.
Recalculate the IP header checksum, because you changed the tot_len field.
Extra note:
This is just a simple thing. I see in your code you're allocating tmp as a 200 byte array and using that to store the data of your message. If you send a bigger packet you'll have a hard time debugging this as kernel crashes due to memory overflows are too painful.

I have solved the problem. It was trivial. And all I am going to do is to post my code for future references and discussion.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <net/ip.h>
#include <net/udp.h>
#include <net/route.h>
#undef __KERNEL__
#include <linux/netfilter_ipv4.h>
#define __KERNEL__
#define IP_HDR_LEN 20
#define UDP_HDR_LEN 8
#define TOT_HDR_LEN 28
static unsigned int pkt_mangle_begin(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
static struct nf_hook_ops pkt_mangle_ops __read_mostly = {
.pf = NFPROTO_IPV4,
.priority = 1,
.hooknum = NF_IP_LOCAL_OUT,
.hook = pkt_mangle_begin,
};
static int __init pkt_mangle_init(void)
{
printk(KERN_ALERT "\npkt_mangle module started ...");
return nf_register_hook(&pkt_mangle_ops);
}
static void __exit pkt_mangle_exit(void)
{
nf_unregister_hook(&pkt_mangle_ops);
printk(KERN_ALERT "pkt_mangle module stopped ...");
}
static unsigned int pkt_mangle_begin (unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct udphdr *udph;
unsigned char *data;
unsigned int data_len;
unsigned char extra_data[] = "12345";
unsigned char *temp;
unsigned int extra_data_len;
unsigned int tot_data_len;
unsigned int i;
__u16 dst_port, src_port;
if (skb) {
iph = (struct iphdr *) skb_header_pointer (skb, 0, 0, NULL);
if (iph && iph->protocol &&(iph->protocol == IPPROTO_UDP)) {
udph = (struct udphdr *) skb_header_pointer (skb, IP_HDR_LEN, 0, NULL);
src_port = ntohs (udph->source);
dst_port = ntohs (udph->dest);
if (src_port == 6000) {
printk(KERN_ALERT "UDP packet goes out");
data = (unsigned char *) skb_header_pointer (skb, IP_HDR_LEN+UDP_HDR_LEN, 0, NULL);
data_len = skb->len - TOT_HDR_LEN;
temp = kmalloc(512 * sizeof(char), GFP_ATOMIC);
memcpy(temp, data, data_len);
unsigned char *ptr = temp + data_len - 1;
extra_data_len = sizeof(extra_data);
memcpy(ptr, extra_data, extra_data_len);
tot_data_len = data_len + extra_data_len - 1;
skb_put(skb, extra_data_len - 1);
memcpy(data, temp, tot_data_len);
/* Manipulating necessary header fields */
iph->tot_len = htons(tot_data_len + TOT_HDR_LEN);
udph->len = htons(tot_data_len + UDP_HDR_LEN);
/* Calculation of IP header checksum */
iph->check = 0;
ip_send_check (iph);
/* Calculation of UDP checksum */
udph->check = 0;
int offset = skb_transport_offset(skb);
int len = skb->len - offset;
udph->check = ~csum_tcpudp_magic((iph->saddr), (iph->daddr), len, IPPROTO_UDP, 0);
}
}
}
return NF_ACCEPT;
}
module_init(pkt_mangle_init);
module_exit(pkt_mangle_exit);
MODULE_AUTHOR("Rifat Rahman Ovi: <rifatrahmanovi#gmail.com>");
MODULE_DESCRIPTION("Outward Packet Mangling and Decryption in Kernel Space");
MODULE_LICENSE("GPL");
Here the thing is that, I forgot to update the length fields and forgot to update the checksum. Now, if I present the code correctly here, all should go well. There are some
other helper functions which are not included here.

Related

Multibale connection in Contiki-ng Stack error

I'm trying to implement my version of clustering in Contiki-ng. I took some inspiration from this code: Clustring example
What I did basically was that I created two connection:
static struct simple_udp_connection broad_conn;
static struct simple_udp_connection uni_conn;
However, though the code compiles and starts running in cooja, it stops showing me an error in the stack. I traced the error message and it looked like it came from this part of the code in stack-check.c:
if(p >= (uint8_t*)GET_STACK_ORIGIN()) {
/* This means the stack is screwed. */
return -1;
}
actual = stack_check_get_usage();
allowed = stack_check_get_reserved_size();
if(actual < 0 || allowed < 0) {
LOG_ERR("Check in inconsistent state: %" PRId32 " vs. %" PRId32 "\n", actual, allowed);
There is no memory overflow, and besides the callback functions, there is nothing in the code. The only thing that I think off is that it's because of the connections, but I just don't see why.
This is the code of nodes.c:
#include "contiki.h"
#include "simple-udp.h"
#include "sys/log.h"
#define LOG_MODULE "SensorNode"
#define LOG_LEVEL LOG_LEVEL_INFO
#define UDP_PORT_BROADCAST 1234
#define UDP_PORT_UNICAST 4321
static struct simple_udp_connection broadcast_conn;
static struct simple_udp_connection unicast_conn;
static uip_ipaddr_t CH_address;
PROCESS(nodes_process, "Nodes");
AUTOSTART_PROCESSES(&nodes_process);
/*---------------------------------Not Me------------------------------------------*/
static void
broadcast_receiver(struct simple_udp_connection *c,
const uip_ipaddr_t *sender_addr,
uint16_t sender_port,
const uip_ipaddr_t *receiver_addr,
uint16_t receiver_port,
const uint8_t *data,
uint16_t datalen)
{
LOG_INFO("Received broadcast '%.*s' from ", datalen, (char *) data);
LOG_INFO_6ADDR(sender_addr);
LOG_INFO_("\n");
uip_ipaddr_copy(&CH_address, sender_addr);
}
/*---------------------------------------------------------------------------*/
static void
unicast_receiver(struct simple_udp_connection *c,
const uip_ipaddr_t *sender_addr,
uint16_t sender_port,
const uip_ipaddr_t *receiver_addr,
uint16_t receiver_port,
const uint8_t *data,
uint16_t datalen)
{
LOG_INFO("Received reply '%.*s' from ", datalen, (char *) data);
LOG_INFO_6ADDR(sender_addr);
LOG_INFO_("\n");
}
/*---------------------------------------------------------------------------*/
PROCESS_THREAD(nodes_process, ev, data)
{
static struct etimer periodic_timer;
static unsigned count;
static char str[32];
PROCESS_BEGIN();
/* Not me also */
/* Initialize UDP broadcast connection */
simple_udp_register(&broadcast_conn, UDP_PORT_BROADCAST, NULL,
UDP_PORT_BROADCAST, broadcast_receiver);
/* Initialize UDP unicast connection */
simple_udp_register(&unicast_conn, UDP_PORT_UNICAST, NULL,
UDP_PORT_UNICAST, unicast_receiver);
/* Send messages to the clusterhead every 60 seconds */
etimer_set(&periodic_timer, 60*CLOCK_SECOND);
while(1) {
PROCESS_WAIT_EVENT_UNTIL(etimer_expired(&periodic_timer));
LOG_INFO("Sending message %u to ", count);
LOG_INFO_6ADDR(&CH_address);
LOG_INFO_("\n");
snprintf(str, sizeof(str), "hello %d", count);
simple_udp_sendto(&unicast_conn, str, strlen(str), &CH_address);
count++;
etimer_reset(&periodic_timer);
}
PROCESS_END();
}
Any help in explaining this would be great.
Thank you.

Protecting shared memory segment between kernel and user space

I have shared memory segment created in kernel using mmap. I need to access this mapped memory from both kernel and user space. What mechanism should I use to protect the memory from concurrent access ?
I want to have something like:
Kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/mm.h>
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct dentry *file;
struct mmap_info
{
char *data;
int reference;
};
void mmap_open(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
void mmap_close(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
struct vm_operations_struct mmap_vm_ops =
{
.open = mmap_open,
.close = mmap_close,
.fault = mmap_fault,
};
int op_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &mmap_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
mmap_open(vma);
return 0;
}
int mmapfop_close(struct inode *inode, struct file *filp)
{
struct mmap_info *info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int mmapfop_open(struct inode *inode, struct file *filp)
{
struct mmap_info *info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
info->data = (char *)get_zeroed_page(GFP_KERNEL);
memcpy(info->data, "hello from kernel this is file: ", 32);
memcpy(info->data + 32, filp->f_dentry->d_name.name, strlen(filp->f_dentry->d_name.name));
/* assign this info struct to the file */
filp->private_data = info;
return 0;
}
static const struct file_operations mmap_fops = {
.open = mmapfop_open,
.release = mmapfop_close,
.mmap = op_mmap,
};
static int __init mmapexample_module_init(void)
{
file = debugfs_create_file("mmap_example", 0644, NULL, NULL, &mmap_fops);
return 0;
}
static void __exit mmapexample_module_exit(void)
{
debugfs_remove(file);
}
module_init(mmapexample_module_init);
module_exit(mmapexample_module_exit);
MODULE_LICENSE("GPL");
User space:
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#define PAGE_SIZE 4096
int main ( int argc, char **argv )
{
int configfd;
char * address = NULL;
configfd = open("/sys/kernel/debug/mmap_example", O_RDWR);
if(configfd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, configfd, 0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("Initial message: %s\n", address);
memcpy(address + 11 , "*user*", 6);
printf("Changed message: %s\n", address);
close(configfd);
return 0;
}
but with locks.
Kernel space and user space have no shared mechanisms for concurrent access protection. If you want them, you need to implement them by yourself.
It can be some sort of mutex, implemented within you kernel module, and accessed from user space via special ioctl requests:
Kernel:
DECLARE_WAIT_QUEUE_HEAD(wq);
int my_mutex_val = 0;
/*
* Lock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_LOCK)' by user.
*/
void my_mutex_lock(void)
{
spin_lock(&wq.lock);
wait_event_interruptible_locked(&wq, my_mutex_val == 0);
my_mutex_val = 1;
spin_unlock(&wq.lock);
}
/*
* Unlock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_UNLOCK)' by user.
*/
void my_mutex_unlock(void)
{
spin_lock(&wq.lock);
my_mutex_val = 0;
wake_up(&wq);
spin_unlock(&wq.lock);
}
long unlocked_ioctl (struct file * filp, unsigned int cmd, unsigned long val)
{
switch(cmd) {
case MY_CMD_LOCK:
my_mutex_lock();
break;
case MY_CMD_UNLOCK:
my_mutex_unlock();
break;
}
}
User:
int main()
{
...
ioctl(MY_CMD_LOCK);
<read data>
ioctl(MY_CMD_UNLOCK);
...
}
It can be some sort of spinlock, which value is stored in mmap-ed area (so visible both for kernel space and user space).
In any case, kernel module should be prepared for the case, when user space application doesn't follow locking conventions. This, probably, would cancel any expectation about mmap-ed area content, generated by the kernel, but kernel module shouldn't crash in that case. [This is why standard kernel's struct mutex is not used in the code above: user space may use it incorrectly].
The problem with the ioctl is you need a kernel switch every time you want to access the share info->data. If that is okay then the ioctl is good - but then why not just do a standard character read/write file operation instead?
You can also try a lock-less mechanism. In the shared info->data area add a barrier variable. When the user needs access, it will do an atomic_compare_and_xchg on the barrier variable until it is set to 0 (unused) and then set it to 1. When the kernel needs access it will do the same but set it to 2. See the gcc atomic builtin documentation.

How to send and receive messages from function other than registered callback function in Netlink socket?

In following kernel module, I hooked syscall sys_open, and now trying to send filename to process in userspace using Netlink socket, in response process will return a msg, and then according to msg, the kernel module will proceed further.
source code: foo.c
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <linux/syscalls.h>
#include <linux/delay.h> // loops_per_jiffy
//===============netlink=================
#include <linux/module.h>
#include <net/sock.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#define NETLINK_USER 31
struct sock *nl_sk = NULL;
//===============netlink=================
#define CR0_WP 0x00010000 // Write Protect Bit (CR0:16)
/* Just so we do not taint the kernel */
MODULE_LICENSE("GPL");
void **syscall_table;
unsigned long **find_sys_call_table(void);
long (*orig_sys_open)(const char __user *filename, int flags, int mode);
//===============netlink=================
static void hello_nl_recv_msg(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
int pid;
struct sk_buff *skb_out;
int msg_size;
char *msg = "Hello from kernel";
int res;
printk(KERN_INFO "Entering: %s\n", __FUNCTION__);
msg_size = strlen(msg);
nlh = (struct nlmsghdr *)skb->data;
printk(KERN_INFO "Netlink received msg payload: %s\n", (char *)nlmsg_data(nlh));
pid = nlh->nlmsg_pid; /*pid of sending process */
skb_out = nlmsg_new(msg_size, 0);
if (!skb_out)
{
printk(KERN_ERR "Failed to allocate new skb\n");
return;
}
nlh = nlmsg_put(skb_out, 0, 0, NLMSG_DONE, msg_size, 0);
NETLINK_CB(skb_out).dst_group = 0; /* not in mcast group */
strncpy(nlmsg_data(nlh), msg, msg_size);
res = nlmsg_unicast(nl_sk, skb_out, pid);
if (res < 0)
printk(KERN_INFO "Error while sending bak to user\n");
}
//===============netlink=================
unsigned long **find_sys_call_table()
{
unsigned long ptr;
unsigned long *p;
for (ptr = (unsigned long)sys_close;
ptr < (unsigned long)&loops_per_jiffy;
ptr += sizeof(void *))
{
p = (unsigned long *)ptr;
if (p[__NR_close] == (unsigned long)sys_close)
{
printk(KERN_DEBUG "Found the sys_call_table!!!\n");
return (unsigned long **)p;
}
}
return NULL;
}
long my_sys_open(const char __user *filename, int flags, int mode)
{
long ret;
//Send filename & get response from user space app
if(/*user_space_response ==*/ 0)
{
/*Other processing*/
}
ret = orig_sys_open(filename, flags, mode);
printk(KERN_DEBUG "file %s has been opened with mode %d\n", filename, mode);
return ret;
}
static int __init syscall_init(void)
{
int ret;
unsigned long addr;
unsigned long cr0;
syscall_table = (void **)find_sys_call_table();
if (!syscall_table)
{
printk(KERN_DEBUG "Cannot find the system call address\n");
return -1;
}
//===============netlink=================
nl_sk = netlink_kernel_create(&init_net, NETLINK_USER, 0, hello_nl_recv_msg, NULL, THIS_MODULE);
if (!nl_sk)
{
printk(KERN_DEBUG "Error creating socket.\n");
return -1;
}
//===============netlink=================
cr0 = read_cr0();
write_cr0(cr0 & ~CR0_WP);
addr = (unsigned long)syscall_table;
ret = set_memory_rw(PAGE_ALIGN(addr) - PAGE_SIZE, 3);
if(ret)
{
printk(KERN_DEBUG "Cannot set the memory to rw (%d) at addr %16lX\n", ret, PAGE_ALIGN(addr) - PAGE_SIZE);
}
else
{
printk(KERN_DEBUG "3 pages set to rw");
}
orig_sys_open = syscall_table[__NR_open];
syscall_table[__NR_open] = my_sys_open;
write_cr0(cr0);
return 0;
}
static void __exit syscall_release(void)
{
unsigned long cr0;
cr0 = read_cr0();
write_cr0(cr0 & ~CR0_WP);
syscall_table[__NR_open] = orig_sys_open;
write_cr0(cr0);
netlink_kernel_release(nl_sk);
}
module_init(syscall_init);
module_exit(syscall_release);
The function 'hello_nl_recv_msg' which is a callback function sends and receives msgs to the process but How can I send msg (i.e. filename) from function 'my_sys_open' to process in user space? and how to wait for response?
Makefile :
obj-m += foo.o
all:
make -C /usr/src/linux-headers-3.2.0-23-generic/ M=$(PWD) modules
clean:
make -C /usr/src/linux-headers-3.2.0-23-generic/ M=$(PWD) clean
Thanks for your time ;)
How can I send msg (i.e. filename) from function 'my_sys_open' to process in user space?
User-space program should create socket AF_NETLINK, address of this socket will be used to send message to it. For detailed info read man netlink.
and how to wait for response?
You can use any standard mechanism for make my_sys_open waiting responce event in hello_nl_recv_msg, e.g. wait_event. Simplified code:
/*
* Whether responce is recieved.
*
* For process concurrent open's this should be map,
* e.g., struct task_struct -> bool.
*/
int have_responce = 0;
DECLARE_WAIT_QUEUE_HEAD(responce_waitqueue); // Waitqueue for wait responce.
static void hello_nl_recv_msg(struct sk_buff *skb)
{
...
if(<detect responce from user program>)
{
have_responce = 1;
wake_up_all(responce_waitqueue);
}
...
}
long my_sys_open(const char __user *filename, int flags, int mode)
{
struct sk_buff *skb_out;
...
have_responce = 0; // clear responce flag
nlmsg_unicast(nl_sk, skb_out, <stored_user_pid>);// send message
wait_event(responce_waitqueue, have_responce); //wait until responce is received
....
}

Understanding spinlocks in netfilter hook

I am writing a small kernel module for measuring the time that a network packet takes to exit a node.
This module is a hook in the netfilter library.
For each packet it receives it calculates an hash, gets the tstamp from skbuff and the actual timestamp, and saves all this data in a linked list.
To pass this data to userspace I've created a proc device, and when the user reads from the device I send one of the entries of the linked list.
To make changes to the list (read and write) I have a spinlock. The problem is that sometimes when I read from the proc device while I am processing packets the system crash.
I think that the problem is in the function "dump_data_to_proc", more specifically when try to acquire the spinlock. I've made some tests and it only crashes(softlockup) when running in a tplink router. When I run the module in a "normal" pc(single core) it don't crash,
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
#include <net/ipv6.h>
#include <linux/proc_fs.h> /* Necessary because of proc fs */
#include <asm/uaccess.h> /* for copy_from_user */
#include "kmodule_measure_process_time.h"
#include "hash.c"
//DEBUG >=5 is very slow in the tplink
#define DEBUG 2
#define PROCFS_MAX_SIZE 64
#define PROCFS_NAME "measures"
#define MAXIMUM_SAMPLES 10000
static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;
struct values_list *HEAD;
spinlock_t list_lock ;
static int hello_proc(struct seq_file *m, void *v) {
seq_printf(m, " stats Mod initialized.\n");
return 0;
}
static int proc_open(struct inode *inode, struct file *file) {
return single_open(file, hello_proc, NULL);
}
ssize_t dump_data_to_proc(struct file *filp, char *buffer, size_t length, loff_t *offset){
int bytesRead = 0;
struct values_list *temp=NULL;
int bytesError=0;
char buff[PROCFS_MAX_SIZE];
spin_lock(&list_lock);
temp=HEAD;
if(temp!=NULL){
HEAD = temp->next;
}
spin_unlock(&list_lock);
if(temp!=NULL){
bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
length = length - bytesRead+1;
kfree(temp);
temp_counter--;
}
bytesError= copy_to_user(buffer, buff, bytesRead);
if(bytesError!=0){
#if DEBUG >0
printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}
static const struct file_operations proc_fops = {
.owner = THIS_MODULE,
.open = proc_open,
.read = dump_data_to_proc,
.llseek = seq_lseek,
.release = single_release,
};
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
uint32_t hash=0;
ktime_t now_timeval;
struct timespec now;
u64 timestamp_arrival_time=0;
u64 timestamp_now=0;
struct ipv6hdr * ipheader;
struct values_list *node;
int number_of_samples=0;
spin_lock(&list_lock);
number_of_samples=temp_counter;
spin_unlock(&list_lock);
if(number_of_samples > MAXIMUM_SAMPLES){
#if DEBUG > 5
printk(KERN_INFO "Discarded one sample because the list is full.\n");
#endif
total_packets_discarded++; // probably this should be inside a spinlock
return NF_ACCEPT;
}
//calculate arrival time and actual time in ns
timestamp_arrival_time = ktime_to_ns(skb->tstamp);
getnstimeofday(&now);
now_timeval = timespec_to_ktime(now);
timestamp_now = ktime_to_ns(now_timeval);
//get Ipv6 addresses
ipheader = (struct ipv6hdr *)skb_network_header(skb);
hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
total_packets_processed++;
node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
if(!node){
#if DEBUG >0
printk(KERN_INFO "Error cannot malloc\n");
#endif
return NF_ACCEPT;
}
node->hash=hash;
node->arrival_timestap=timestamp_arrival_time;
node->exit_timestap=timestamp_now;
spin_lock(&list_lock);
node->next=HEAD;
HEAD=node;
temp_counter++;
spin_unlock(&list_lock);
return NF_ACCEPT;
}
static int __init init_main(void)
{
nfho.hook = hook_func;
nfho.hooknum = NF_INET_POST_ROUTING;
nfho.pf = PF_INET6;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif
proc_create(PROCFS_NAME, 0, NULL, &proc_fops);
spin_lock_init(&list_lock);
//Some distros/devices disable timestamping of packets
net_enable_timestamp();
return 0;
}
static void __exit cleanup_main(void)
{
struct values_list *temp;
nf_unregister_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif
remove_proc_entry(PROCFS_NAME, NULL);
while(HEAD!=NULL){
temp=HEAD;
HEAD= HEAD->next;
kfree(temp);
}
}
module_init(init_main);
module_exit(cleanup_main);
/* * Declaring code as GPL. */
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
There are 2 problems with your code:
Use Linux kernel macro for your code. http://makelinux.com/ldd3/chp-11-sect-5 . Just add struct list_head as element to your struct values_list and use list_entry, list_add and other
Netfilter hools are run in softirq context, so you must use spin_lock_irqsave and spin_unlock_irqrestore. This is most likely reason why your system crashes with softlockup. Read carefully http://makelinux.com/ldd3/chp-5-sect-5

Netfilter string module example usage

Can anybody point me to some examples in using the xt_string module with netfilter or provide a example.
What I am trying to do is to write netfilter module that will drop packets that contain a certain string in the skb->data field.
I initially tried simply strnstr(skb->data, "mystring", strlen("mystring")) but this seem to be incorrect approach to this problem (and it does not seem to be working as i dont see any packets being dropped).
Thanks in advance
If you mean using iptables string match in user-space, here is one example:
iptables -I INPUT 1 -p tcp --dport 80 -m string --string "domain.com" --algo kmp -j DROP
Or if you mean in kernel space, you can use textsearch API which provides KMP/BM/FSM algorithms, the following example is from kernel source lib/textsearch.c:
int pos;
struct ts_config *conf;
struct ts_state state;
const char *pattern = "chicken";
const char *example = "We dance the funky chicken";
conf = textsearch_prepare("kmp", pattern, strlen(pattern),
GFP_KERNEL, TS_AUTOLOAD);
if (IS_ERR(conf)) {
err = PTR_ERR(conf);
goto errout;
}
pos = textsearch_find_continuous(conf, &state, example, strlen(example));
if (pos != UINT_MAX)
panic("Oh my god, dancing chickens at %d\n", pos);
textsearch_destroy(conf);
what you are looking for may be this one, "skb_find_text".
It uses the infra in linux mentioned by #Cong Wang.
You can also find some examples in the kernel codes.
here after a source code of netfilter. it's a module to drop received ICMP ECHO
you can use this code to help you to develop your module. You have just to get data from skb and then check it.
#define __KERNEL__
#define MODULE
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/inet.h>
MODULE_LICENSE("GPL");
static struct nf_hook_ops netfilter_ops_in;/* IP PRE ROUTING */
static struct nf_hook_ops netfilter_ops_out; /* NF_IP_POST_ROUTING */
struct sk_buff *sock_buff;
struct iphdr *ip_header;
struct net_device *dev;
char *in_face = "eth0";
char *out_face = "eth1";
void log_ip(int sadd,int dadd)
{
int b1,b2,b3,b4;
b1 = 255 & sadd;
b2 = (0xff00 & sadd) >> 8;
b3 = (0xff0000 & sadd) >> 16;
b4 = (0xff000000 &sadd) >>24;
printk("SrcIP: %d.%d.%d.%d",b1,b2,b3,b4);
b1 = 255 & dadd;
b2 = (0xff00 & dadd) >> 8;
b3 = (0xff0000 & dadd) >> 16;
b4 = (0xff000000 & dadd) >>24;
printk(" DstIP: %d.%d.%d.%d",b1,b2,b3,b4);
}
unsigned int main_hook(unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int(*okfn)(struct sk_buff*))
{
struct icmphdr* icmp;
sock_buff = skb_copy(skb,GFP_ATOMIC);
ip_header = (struct iphdr*)(sock_buff->network_header);
//ip_header = ip_hdr(sock_buff);
icmp = (struct icmphdr*) ((char*)ip_header + sizeof(struct iphdr));
//icmp = icmp_hdr(skb); /* do not return a good value in all cases*/
log_ip(ip_header->saddr,ip_header->daddr);
printk(" Dev:%s\n",sock_buff->dev);
if (icmp->type == ICMP_ECHO)
{
printk("ICMP ECHO received and droped\n");
return NF_DROP;
}
return NF_ACCEPT;
}
int init_module(void)
{
netfilter_ops_in.hook = main_hook;
netfilter_ops_in.pf = PF_INET;
netfilter_ops_in.hooknum = NF_INET_PRE_ROUTING; /*NF_INET_PRE_ROUTING;*/
netfilter_ops_in.priority = NF_IP_PRI_FIRST;
nf_register_hook(&netfilter_ops_in);
printk(KERN_INFO "sw: init_module() called\n");
return 0;
}
void cleanup_module(void)
{
printk(KERN_INFO "sw: cleanup_module() called\n");
nf_unregister_hook(&netfilter_ops_in);
//nf_unregister_hook(&netfilter_ops_out);
printk(KERN_INFO "sw: hook unregisted, quit called\n");
}

Resources