Protecting shared memory segment between kernel and user space - c

I have shared memory segment created in kernel using mmap. I need to access this mapped memory from both kernel and user space. What mechanism should I use to protect the memory from concurrent access ?
I want to have something like:
Kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/mm.h>
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct dentry *file;
struct mmap_info
{
char *data;
int reference;
};
void mmap_open(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
void mmap_close(struct vm_area_struct *vma)
{
struct mmap_info *info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
struct vm_operations_struct mmap_vm_ops =
{
.open = mmap_open,
.close = mmap_close,
.fault = mmap_fault,
};
int op_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &mmap_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
mmap_open(vma);
return 0;
}
int mmapfop_close(struct inode *inode, struct file *filp)
{
struct mmap_info *info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int mmapfop_open(struct inode *inode, struct file *filp)
{
struct mmap_info *info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
info->data = (char *)get_zeroed_page(GFP_KERNEL);
memcpy(info->data, "hello from kernel this is file: ", 32);
memcpy(info->data + 32, filp->f_dentry->d_name.name, strlen(filp->f_dentry->d_name.name));
/* assign this info struct to the file */
filp->private_data = info;
return 0;
}
static const struct file_operations mmap_fops = {
.open = mmapfop_open,
.release = mmapfop_close,
.mmap = op_mmap,
};
static int __init mmapexample_module_init(void)
{
file = debugfs_create_file("mmap_example", 0644, NULL, NULL, &mmap_fops);
return 0;
}
static void __exit mmapexample_module_exit(void)
{
debugfs_remove(file);
}
module_init(mmapexample_module_init);
module_exit(mmapexample_module_exit);
MODULE_LICENSE("GPL");
User space:
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#define PAGE_SIZE 4096
int main ( int argc, char **argv )
{
int configfd;
char * address = NULL;
configfd = open("/sys/kernel/debug/mmap_example", O_RDWR);
if(configfd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, configfd, 0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("Initial message: %s\n", address);
memcpy(address + 11 , "*user*", 6);
printf("Changed message: %s\n", address);
close(configfd);
return 0;
}
but with locks.

Kernel space and user space have no shared mechanisms for concurrent access protection. If you want them, you need to implement them by yourself.
It can be some sort of mutex, implemented within you kernel module, and accessed from user space via special ioctl requests:
Kernel:
DECLARE_WAIT_QUEUE_HEAD(wq);
int my_mutex_val = 0;
/*
* Lock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_LOCK)' by user.
*/
void my_mutex_lock(void)
{
spin_lock(&wq.lock);
wait_event_interruptible_locked(&wq, my_mutex_val == 0);
my_mutex_val = 1;
spin_unlock(&wq.lock);
}
/*
* Unlock mutex.
*
* May be used directly by the kernel or via 'ioctl(MY_CMD_UNLOCK)' by user.
*/
void my_mutex_unlock(void)
{
spin_lock(&wq.lock);
my_mutex_val = 0;
wake_up(&wq);
spin_unlock(&wq.lock);
}
long unlocked_ioctl (struct file * filp, unsigned int cmd, unsigned long val)
{
switch(cmd) {
case MY_CMD_LOCK:
my_mutex_lock();
break;
case MY_CMD_UNLOCK:
my_mutex_unlock();
break;
}
}
User:
int main()
{
...
ioctl(MY_CMD_LOCK);
<read data>
ioctl(MY_CMD_UNLOCK);
...
}
It can be some sort of spinlock, which value is stored in mmap-ed area (so visible both for kernel space and user space).
In any case, kernel module should be prepared for the case, when user space application doesn't follow locking conventions. This, probably, would cancel any expectation about mmap-ed area content, generated by the kernel, but kernel module shouldn't crash in that case. [This is why standard kernel's struct mutex is not used in the code above: user space may use it incorrectly].

The problem with the ioctl is you need a kernel switch every time you want to access the share info->data. If that is okay then the ioctl is good - but then why not just do a standard character read/write file operation instead?
You can also try a lock-less mechanism. In the shared info->data area add a barrier variable. When the user needs access, it will do an atomic_compare_and_xchg on the barrier variable until it is set to 0 (unused) and then set it to 1. When the kernel needs access it will do the same but set it to 2. See the gcc atomic builtin documentation.

Related

Character device I/O across users

I'm learning Linux and I have the task that I can't handle. I have to create character device and make possible to read and write across users. For instance, I open first terminal and I use echo 'test' > /tmp/ringdev (where ringdev is my character device). I also open second terminal and I use cat /tmp/ringdev and I expect that output will be test. I have already read some example snippets, but all of them are just about the basics which I already have.
The question is how to read from / write to character device across users?
#define pr_fmt(fmt) "ringdev: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
#include <linux/sched.h>
DECLARE_WAIT_QUEUE_HEAD(head);
/*
* mutex used for access synchronization to buffer (ringdev_buf and ringdev_len)
*/
static struct mutex ringdev_lock;
/*
* buffer and number of written bytes in the buffer
*/
static char ringdev_buf[4096];
static size_t ringdev_len;
static int ringdev_open(struct inode *inode, struct file *filp)
{
printk("device_open called \n");
return 0;
}
static ssize_t ringdev_read(struct file *filp, char __user *buf, size_t count,
loff_t *off)
{
ssize_t ret = 0;
printk("device_read called \n");
/*
* access to ringdev_buf i ringdev_len is protected by ringdev_lock,
* take that lock
*/
//wait_event_interruptible(head,ringdev_len!=0);
mutex_lock(&ringdev_lock);
if (*off > ringdev_len)
count = 0;
else if (count >= ringdev_len - *off)
count = ringdev_len - *off;
/*
* for access to user memory special functions must be used,
* to copy to user memory copy_to_user must be used.
*/
ret = -EFAULT;
if (copy_to_user(buf, ringdev_buf + *off, count))
goto out_unlock;
ret = count;
*off += ret;
out_unlock:
mutex_unlock(&ringdev_lock);
ringdev_len=0;
return ret;
}
static ssize_t ringdev_write(struct file *filp, const char __user *buf,
size_t count, loff_t *off)
{
ssize_t ret=0;
ringdev_len=0;
printk("device_write called \n");
mutex_lock(&ringdev_lock);
ret=-EFAULT;
if(copy_from_user(ringdev_buf+ringdev_len ,buf, count)==0){
ringdev_len= ringdev_len + count;
ret=count;
wake_up_interruptible(&head);
goto out_unlock;
}
else{
ret=-ENOSPC;
}
/*
* not supported yet. Do not forget about copy_from_user().
*/
out_unlock:
mutex_unlock(&ringdev_lock);
return ret;
}
static int ringdev_release(struct inode *inode, struct file *filp)
{
mutex_unlock(&ringdev_lock);
printk("device_release called \n");
return 0;
}
static const struct file_operations ringdev_fops = {
.owner = THIS_MODULE,
.open = ringdev_open,
.read = ringdev_read,
.write = ringdev_write,
.release = ringdev_release,
};
static struct miscdevice ringdev_miscdevice = {
.minor = MISC_DYNAMIC_MINOR,
.name = "ringdev",
.fops = &ringdev_fops
};
static int __init ringdev_init(void)
{
int ret;
mutex_init(&ringdev_lock);
ret = misc_register(&ringdev_miscdevice);
if (ret < 0) {
pr_err("can't register miscdevice.\n");
return ret;
}
pr_info("minor %d\n", ringdev_miscdevice.minor);
return 0;
}
static void __exit ringdev_exit(void)
{
misc_deregister(&ringdev_miscdevice);
mutex_destroy(&ringdev_lock);
}
module_init(ringdev_init);
module_exit(ringdev_exit);
MODULE_DESCRIPTION("Device");
MODULE_AUTHOR("xxx");
MODULE_LICENSE("GPL");

How to send signal from Linux kernel space to user space in order to notify about an input hardware event

My kernel module code needs to send signal to a user land program, to transfer its execution to registered signal handler.
In fact, I have developed a C program for my embedded board which make LED turns on and off when I push BUTTON ( the input event ). On the other hand, I have just developed a simple Linux module with its basic functions ( OPEN, CLOSE, READ, WRITE ).
I just don't have any idea how to modify my principal program and my kernel module in order to arrive to my objective.
I share with you my user space program :
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/select.h>
#include <sys/time.h>
#include <errno.h>
#include <linux/input.h>
#define BTN_FILE_PATH "/dev/input/event0"
#define LED_PATH "/sys/class/leds"
#define green "green"
void change_led_state(char *led_path, int led_value)
{
char lpath[64];
FILE *led_fd;
strncpy(lpath, led_path, sizeof(lpath) - 1);
lpath[sizeof(lpath) - 1] = '\0';
led_fd = fopen(lpath, "w");
if (led_fd == NULL) {
fprintf(stderr, "simplekey: unable to access led\n");
return;
}
fprintf(led_fd, "%d\n", led_value);
fclose(led_fd);
}
void reset_leds(void)
{
change_led_state(LED_PATH "/" green "/brightness", 0);
}
int configure_leds(void)
{
FILE *r_fd;
char *none_str = "none";
/* Configure leds for hand control */
r_fd = fopen(LED_PATH "/" green "/trigger", "w");
fprintf(r_fd, "%s\n", none_str);
fclose(r_fd);
/* Switch off leds */
reset_leds();
return 0;
}
void eval_keycode(int code)
{
static int green_state = 0;
switch (code) {
case 260:
printf("BTN left pressed\n");
/* figure out green state */
green_state = green_state ? 0 : 1;
change_led_state(LED_PATH "/" green "/brightness", green_state);
break;
}
}
int main(void)
{
int file;
/* how many bytes were read */
size_t rb;
int ret;
int yalv;
/* the events (up to 64 at once) */
struct input_event ev[64];
char *str = BTN_FILE_PATH;
printf("Starting simplekey app\n");
ret = configure_leds();
if (ret < 0)
exit(1);
printf("File Path: %s\n", str);
if((file = open(str, O_RDONLY)) < 0) {
perror("simplekey: File can not open");
exit(1);
}
for (;;) {
/* Blocking read */
rb= read(file, &ev, sizeof(ev));
if (rb < (int) sizeof(struct input_event)) {
perror("simplekey: short read");
exit(1);
}
for (yalv = 0;
yalv < (int) (rb / sizeof(struct input_event));
yalv++) {
if (ev[yalv].type == EV_KEY) {
printf("%ld.%06ld ",
ev[yalv].time.tv_sec,
ev[yalv].time.tv_usec);
printf("type %d code %d value %d\n",
ev[yalv].type,
ev[yalv].code, ev[yalv].value);
/* Change state on button pressed */
if (ev[yalv].value == 0)
eval_keycode(ev[yalv].code);
}
}
}
close(file);
And this is the basic kernel module :
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/input.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gaston");
MODULE_DESCRIPTION("A simple Linux char driver");
MODULE_VERSION("0.1");
ssize_t exer_open(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device has been opened\n");
return 0;
}
ssize_t exer_read(struct file *pfile, char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_write(struct file *pfile, const char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_close(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device successfully closed\n");
return 0;
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
};
int exer_simple_module_init(void) {
printk(KERN_INFO "Initializing the LKM\n");
register_chrdev(240, "Simple Char Drv", &exer_file_operations);
return 0;
}
void exer_simple_module_exit(void) {
unregister_chrdev(240, "Simple Char Drv");
}
module_init(exer_simple_module_init);
module_exit(exer_simple_module_exit);
I hope you will help me. Thank you!
I will concentrate on sending a signal, since that is what you asked for, although sending signals to a process is quite brutal. It would be better to implement poll and read file operations so the user code can wait for events from the device and read them.
Anyway, for sending a signal to the processes that opened the device, the things you need are:
You need a struct fasync_struct * in the private data of your device:
struct fasync_struct *pasync_queue;
It needs to be initialized to NULL by some means during initialization of your device private data. How you do that is up to you.
You need a fasync file operation handler pointed to by the fasync member of your struct file_operations. The implementation of the fasync handler is very simple as it just needs to call fasync_helper() using supplied parameters and a pointer to your device's private struct fasync_struct *:
static int exer_fasync(int fd, struct file *pfile, int mode)
{
// N.B. Change this code to use the pasync_queue member from your device private data.
struct fasync_struct **fapp = &pasync_queue;
return fasync_helper(fd, pfile, mode, fapp);
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
.fasync = exer_fasync,
};
Your device driver can send a SIGIO signal by calling kill_fasync() as follows:
// N.B. Change this code to use the pasync_queue member from your device private data.
struct fasync_struct **fapp = &pasync_queue;
kill_fasync(fapp, SIGIO, POLL_IN);
N.B. The last parameter (value POLL_IN in this case) affects the value of the si_band member of the siginfo_t that your application sees in its signal handler.
Your application needs to set a signal handler for the SIGIO signal. I recommend usingsigaction() to set this up.
Your application needs to set the O_ASYNC flag when it opens the device file, or set it by calling fcntl(fd, F_SETFL, O_ASYNC); after opening the device file.

Correct way to initialize semaphore in linux driver

I wrote this linux char driver just to control open call,
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/semaphore.h>
#include <linux/device.h>
#include <linux/cdev.h>
MODULE_LICENSE("GPL");
#define CLASS_NAME "myclass"
#define MINOR_NUM 0
#define MINOR_CNT 1
static struct class *myclass=NULL;
static struct device *mydevice=NULL;
static dev_t mycdevt;
static struct cdev *mycdev;
static struct semaphore *sem;
static int myopen(struct inode *inod, struct file *fp)
{
down(sem);
printk(KERN_INFO "critical section\n");
return 0;
}
static int myclose(struct inode *inod, struct file *fp )
{
up(sem);
printk(KERN_INFO "critical section freed\n");
return 0;
}
static ssize_t myread(struct file *fp, char *buf, size_t len, loff_t *off)
{
return 0;
}
static ssize_t mywrite(struct file *fp, char *buf, size_t len, loff_t *off)
{
return 0;
}
static struct file_operations fops =
{
.open = myopen,
.release = myclose,
.read = myread,
.write = mywrite,
};
static int __init myinit(void)
{
int ret;
ret = alloc_chrdev_region ( &mycdevt, MINOR_NUM, MINOR_CNT, "mycdevt");
if(ret<0)
{
printk(KERN_INFO "chardev can't be allocated\n");
// goto label;//todo
}
mycdev = cdev_alloc();//instead of cdev_alloc, we can use cdev_init(&mycdev, &fops);
if(mycdev == NULL)
{
printk(KERN_INFO"cdev_alloc failed\n");
// goto label;//todo
}
mycdev->ops = &fops;
ret = cdev_add(mycdev, mycdevt, 1);
if(ret < 0)
{
printk(KERN_INFO"cdev_add failed\n");
// goto label;//todo
}
myclass = class_create(THIS_MODULE, CLASS_NAME);
if(myclass == NULL)
{
printk(KERN_INFO"class create failed\n");
// goto label;//todo
}
mydevice = device_create(myclass, NULL, mycdevt, NULL, "mydevice");
if(mydevice == NULL)
{
printk(KERN_INFO"device create failed\n");
// goto label;//todo
}
sema_init(sem, 1);//here is the problem
printk(KERN_INFO"myinit done\n");
return 0;
}
static void __exit myexit(void)
{
device_destroy(myclass, mycdevt);
class_unregister(myclass);
class_destroy(myclass);
cdev_del(mycdev);
unregister_chrdev(MAJOR(mycdevt), "mycdevt");
printk(KERN_INFO "exited\n");
}
module_init(myinit);
module_exit(myexit);
I am following ldd3 book, and tried to use simple APIs to use semahore for an app.
What happened is my kernel crashed when it called sema_init function. I read that semaphore is used in mutex mode
http://www.makelinux.net/ldd3/chp-5-sect-3
I also read that semaphore is not a mutex as there is no ownership. I have yet to explore that 'ownership' thing, but for now I am not able to make a simple semaphore.
What wrong I am doing here?
There's no actual semaphore.
This line:
static struct semaphore *sem;
creates a pointer to a semaphore, not a semaphore itself. It's almost certain initialized to NULL.
You probably want something like
static struct semaphore sem;
and therefore
sema_init( &sem, 1);
along with
up( &sem );
and
down( &sem );
Note that these calls are taking the address of an actual semaphore.

Content wrong inside mmap'ed memory (Kernelspace<>Userspace)

I implement a memory mapping via mmap. My Kernel module writes something into this memory and a userspace application read this. In short I allocate 0x10000 memory (with kcalloc on kernel side and with mmap on userspace side). Then I write something to the address offsets 0x0, 0xf00 and 0xf000 using memcpy. On kernelside I can read back the memory correctly. But on userspace side the content of the first 0x1000 Bytes are repetitive through the whole memory (16 times). But why?
Her comes the code of the kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/mm.h>
#define DEV_MODULENAME "expdev"
#define DEV_CLASSNAME "expdevclass"
static int majorNumber;
static struct class *devClass = NULL;
static struct device *devDevice = NULL;
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct mmap_info
{
char *data;
int reference;
};
static void
dev_vm_ops_open( struct vm_area_struct *vma )
{
struct mmap_info *info;
// counting how many applications mapping on this dataset
info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
static void
dev_vm_ops_close( struct vm_area_struct *vma )
{
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct dev_vm_ops =
{
.open = dev_vm_ops_open,
.close = dev_vm_ops_close,
.fault = dev_vm_ops_fault,
};
int
fops_mmap( struct file *filp,
struct vm_area_struct *vma)
{
vma->vm_ops = &dev_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
dev_vm_ops_open(vma);
return 0;
}
int
fops_close( struct inode *inode,
struct file *filp)
{
struct mmap_info *info;
info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int
fops_open( struct inode *inode,
struct file *p_file)
{
struct mmap_info *info;
char *data;
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
// allocating memory on the heap for the data
data = kcalloc(0x10000,sizeof(char),GFP_KERNEL);
if( data==NULL )
{
printk(KERN_ERR "insufficient memory\n");
/* insufficient memory: you must handle this error! */
return ENOMEM;
}
info->data = data;
printk(KERN_INFO " > ->data: 0x%16p\n",info->data);
memcpy(info->data, "Initial entry on mapped memory by the kernel module", 52);
memcpy((info->data)+0xf00, "Somewhere", 9);
memcpy((info->data)+0xf000, "Somehow", 7);
printk(KERN_INFO " > ->data: %c%c%c\n", // the output here is correct
*(info->data+0xf000+0),
*(info->data+0xf000+1),
*(info->data+0xf000+2));
/* assign this info struct to the file */
p_file->private_data = info;
return 0;
}
static const struct file_operations dev_fops =
{
.open = fops_open,
.release = fops_close,
.mmap = fops_mmap,
};
static int __init
_module_init(void)
{
int ret = 0;
// Try to dynamically allocate a major number for the device
majorNumber = register_chrdev(0, DEV_MODULENAME, &dev_fops);
if (majorNumber<0)
{
printk(KERN_ALERT "Failed to register a major number.\n");
return -EIO; // I/O error
}
// Register the device class
devClass = class_create(THIS_MODULE, DEV_CLASSNAME);
// Check for error and clean up if there is
if (IS_ERR(devClass))
{
printk(KERN_ALERT "Failed to register device class.\n");
ret = PTR_ERR(devClass);
goto goto_unregister_chrdev;
}
// Create and register the device
devDevice = device_create(devClass,
NULL,
MKDEV(majorNumber, 0),
NULL,
DEV_MODULENAME
);
// Clean up if there is an error
if( IS_ERR(devDevice) )
{
printk(KERN_ALERT "Failed to create the device.\n");
ret = PTR_ERR(devDevice);
goto goto_class_destroy;
}
printk(KERN_INFO "Module registered.\n");
return ret;
// Error handling - using goto
goto_class_destroy:
class_destroy(devClass);
goto_unregister_chrdev:
unregister_chrdev(majorNumber, DEV_MODULENAME);
return ret;
}
static void __exit
_module_exit(void)
{
device_destroy(devClass, MKDEV(majorNumber, 0));
class_unregister(devClass);
class_destroy(devClass);
unregister_chrdev(majorNumber, DEV_MODULENAME);
printk(KERN_INFO "Module unregistered.\n");
}
module_init(_module_init);
module_exit(_module_exit);
MODULE_LICENSE("GPL");
here comes the code of the application
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define PAGE_SIZE (0x10000)
int main ( int argc, char **argv )
{
int fd;
char *address = NULL;
time_t t = time(NULL);
char *sbuff;
int i;
sbuff = (char*) calloc(PAGE_SIZE,sizeof(char));
fd = open("/dev/expdev", O_RDWR);
if(fd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap( NULL,
PAGE_SIZE,
PROT_READ|PROT_WRITE,
MAP_SHARED,
fd,
0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("%s: first userspace read\n",tbuff);
memcpy(sbuff, address,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf00,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf000,80);
printf("Initial message: %s\n", sbuff);
for(i=0; i<PAGE_SIZE; i++)
{
printf("%16p: %c\n",address+i, (char)*(address+i));
}
if (munmap(address, PAGE_SIZE) == -1)
{
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
and this is the output of the application:
0x7fe61b522000: I
0x7fe61b522001: n
0x7fe61b522002: i
0x7fe61b522003: t
0x7fe61b522004: i
0x7fe61b522005: a
0x7fe61b522006: l
...
0x7fe61b522f00: S
0x7fe61b522f01: o
0x7fe61b522f02: m
0x7fe61b522f03: e
0x7fe61b522f04: w
0x7fe61b522f05: h
0x7fe61b522f06: e
0x7fe61b522f07: r
0x7fe61b522f08: e
...
0x7fe61b523000: I
0x7fe61b523001: n
0x7fe61b523002: i
0x7fe61b523003: t
0x7fe61b523004: i
0x7fe61b523005: a
0x7fe61b523006: l
...
0x7fe61b523f00: S
0x7fe61b523f01: o
0x7fe61b523f02: m
0x7fe61b523f03: e
0x7fe61b523f04: w
0x7fe61b523f05: h
0x7fe61b523f06: e
0x7fe61b523f07: r
0x7fe61b523f08: e
...
0x7fe61b524000: I
0x7fe61b524001: n
0x7fe61b524002: i
0x7fe61b524003: t
0x7fe61b524004: i
0x7fe61b524005: a
0x7fe61b524006: l
...
It seems to me, that the repetition comes with the size of one page. But this makes no sense to me.
EDIT 1:
Add Somewhere to the output. Note: Only Somehow never occurs!
EDIT 2:
Corrected fault handler. This now considered the offset of the calling vmf. Now it runs like a charm. Thanks to Tsyvarev!
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page((info->data)+(vmf->pgoff*PAGE_SIZE));
get_page(page);
vmf->page = page;
return 0;
}
But on userspace side the content of the first 0x1000`
0x1000 is a size of the page mapped with
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
Callback .fault of structure vm_operations_struct is called for every page (4096 bytes), which is accessed by the user but not mapped yet.
So your code just map first 4096 bytes (0x1000) of data to every page which user space accesses.

Understanding spinlocks in netfilter hook

I am writing a small kernel module for measuring the time that a network packet takes to exit a node.
This module is a hook in the netfilter library.
For each packet it receives it calculates an hash, gets the tstamp from skbuff and the actual timestamp, and saves all this data in a linked list.
To pass this data to userspace I've created a proc device, and when the user reads from the device I send one of the entries of the linked list.
To make changes to the list (read and write) I have a spinlock. The problem is that sometimes when I read from the proc device while I am processing packets the system crash.
I think that the problem is in the function "dump_data_to_proc", more specifically when try to acquire the spinlock. I've made some tests and it only crashes(softlockup) when running in a tplink router. When I run the module in a "normal" pc(single core) it don't crash,
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
#include <net/ipv6.h>
#include <linux/proc_fs.h> /* Necessary because of proc fs */
#include <asm/uaccess.h> /* for copy_from_user */
#include "kmodule_measure_process_time.h"
#include "hash.c"
//DEBUG >=5 is very slow in the tplink
#define DEBUG 2
#define PROCFS_MAX_SIZE 64
#define PROCFS_NAME "measures"
#define MAXIMUM_SAMPLES 10000
static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;
struct values_list *HEAD;
spinlock_t list_lock ;
static int hello_proc(struct seq_file *m, void *v) {
seq_printf(m, " stats Mod initialized.\n");
return 0;
}
static int proc_open(struct inode *inode, struct file *file) {
return single_open(file, hello_proc, NULL);
}
ssize_t dump_data_to_proc(struct file *filp, char *buffer, size_t length, loff_t *offset){
int bytesRead = 0;
struct values_list *temp=NULL;
int bytesError=0;
char buff[PROCFS_MAX_SIZE];
spin_lock(&list_lock);
temp=HEAD;
if(temp!=NULL){
HEAD = temp->next;
}
spin_unlock(&list_lock);
if(temp!=NULL){
bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
length = length - bytesRead+1;
kfree(temp);
temp_counter--;
}
bytesError= copy_to_user(buffer, buff, bytesRead);
if(bytesError!=0){
#if DEBUG >0
printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}
static const struct file_operations proc_fops = {
.owner = THIS_MODULE,
.open = proc_open,
.read = dump_data_to_proc,
.llseek = seq_lseek,
.release = single_release,
};
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
uint32_t hash=0;
ktime_t now_timeval;
struct timespec now;
u64 timestamp_arrival_time=0;
u64 timestamp_now=0;
struct ipv6hdr * ipheader;
struct values_list *node;
int number_of_samples=0;
spin_lock(&list_lock);
number_of_samples=temp_counter;
spin_unlock(&list_lock);
if(number_of_samples > MAXIMUM_SAMPLES){
#if DEBUG > 5
printk(KERN_INFO "Discarded one sample because the list is full.\n");
#endif
total_packets_discarded++; // probably this should be inside a spinlock
return NF_ACCEPT;
}
//calculate arrival time and actual time in ns
timestamp_arrival_time = ktime_to_ns(skb->tstamp);
getnstimeofday(&now);
now_timeval = timespec_to_ktime(now);
timestamp_now = ktime_to_ns(now_timeval);
//get Ipv6 addresses
ipheader = (struct ipv6hdr *)skb_network_header(skb);
hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
total_packets_processed++;
node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
if(!node){
#if DEBUG >0
printk(KERN_INFO "Error cannot malloc\n");
#endif
return NF_ACCEPT;
}
node->hash=hash;
node->arrival_timestap=timestamp_arrival_time;
node->exit_timestap=timestamp_now;
spin_lock(&list_lock);
node->next=HEAD;
HEAD=node;
temp_counter++;
spin_unlock(&list_lock);
return NF_ACCEPT;
}
static int __init init_main(void)
{
nfho.hook = hook_func;
nfho.hooknum = NF_INET_POST_ROUTING;
nfho.pf = PF_INET6;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif
proc_create(PROCFS_NAME, 0, NULL, &proc_fops);
spin_lock_init(&list_lock);
//Some distros/devices disable timestamping of packets
net_enable_timestamp();
return 0;
}
static void __exit cleanup_main(void)
{
struct values_list *temp;
nf_unregister_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif
remove_proc_entry(PROCFS_NAME, NULL);
while(HEAD!=NULL){
temp=HEAD;
HEAD= HEAD->next;
kfree(temp);
}
}
module_init(init_main);
module_exit(cleanup_main);
/* * Declaring code as GPL. */
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
There are 2 problems with your code:
Use Linux kernel macro for your code. http://makelinux.com/ldd3/chp-11-sect-5 . Just add struct list_head as element to your struct values_list and use list_entry, list_add and other
Netfilter hools are run in softirq context, so you must use spin_lock_irqsave and spin_unlock_irqrestore. This is most likely reason why your system crashes with softlockup. Read carefully http://makelinux.com/ldd3/chp-5-sect-5

Resources