I am developing Linux kernel module which is communicating with my user-space C application. In this module, I am creating a thread. this is my module where I am facing the problem :
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <asm/siginfo.h> //siginfo
#include <linux/rcupdate.h> //rcu_read_lock
#include <linux/sched/signal.h> //find_task_by_pid_type
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include<linux/slab.h>
#include <linux/input.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/random.h>
#include <linux/kthread.h>
#define SIG_TEST 44 // we choose 44 as our signal number (real-time signals are in the range of 33 to 64)
#define BTN_FILE_PATH "/dev/input/event0"
char *str = BTN_FILE_PATH;
int file;
struct file *f; // keyboard driver
// prototypage des fonctions read_in_thread & read_pid
int read_in_thread(void *data);
static ssize_t read_pid(struct file *pfile, char __user *buffer, size_t length, loff_t *offset);
static ssize_t write_pid(struct file *pfile, const char __user *buffer,
size_t length, loff_t *offset)
{
return 0;
}
struct read_args {
struct file *pfile;
const char __user *buffer;
size_t length;
loff_t *offset;
};
static ssize_t read_pid(struct file *pfile, char __user *buffer, size_t length, loff_t *offset)
{
// création de la structure des arguments
struct read_args args ;
args.pfile = pfile;
args.buffer = buffer;
args.length = length;
args.offset = offset;
struct task_struct *thread1;
char our_thread[20];
unsigned int rand;
get_random_bytes(&rand, sizeof(rand));
rand = rand % 250;
sprintf(our_thread, "thread%u", rand);
if(thread1==NULL)
{
thread1 = kthread_create(read_in_thread,&args,our_thread);
if((thread1))
{
printk(KERN_INFO "Thread is created\n");
printk("thread name %s\n", our_thread);
// lancement du thread
wake_up_process(thread1);
printk(KERN_INFO "Thread is awake\n");
}
}
else
printk("\nTHREAD1 IS NOT NULL!!! CAN NOT CREATE THREAD!!!\n");
return 0;
}
int read_in_thread(void *data) {
/************************** récupération des arguments *******************/
struct read_args *const args = data;
/*************************** corps de la fonction ***********************/
// init des variables
char mybuf[10];
enum { MAX_BUF_SIZE = 4096 };
size_t buf_size = 0;
char *buf = NULL;
ssize_t total = 0;
ssize_t rc = 0;
struct task_struct *t;
struct input_event ev[64];
int yalv;
int ret;
struct siginfo info;
int pid =0;
size_t amount = sizeof(ev);
// récupération de l'ID du processus appelant
/* read the value from user space */
if(args->length > 10)
return -EINVAL;
copy_from_user(mybuf, args->buffer, args->length);
sscanf(mybuf, "%d", &pid);
printk("pid = %d\n", pid);
// the signal
memset(&info, 0, sizeof(struct siginfo));
info.si_signo = SIG_TEST;
info.si_code = SI_QUEUE; // this is bit of a trickery: SI_QUEUE is normally used by sigqueue from user space,
// and kernel space should use SI_KERNEL. But if SI_KERNEL is used the real_time data
// is not delivered to the user space signal handler function.
info.si_int = 260; //real time signals may have 32 bits of data.
rcu_read_lock();
t = pid_task(find_vpid(pid), PIDTYPE_PID); //find the task_struct associated with this pid
if(t == NULL){
printk("no such pid\n");
rcu_read_unlock();
return -ENODEV;
}
rcu_read_unlock();
// lecture blocquante
rc = kernel_read(f, ev, amount, &f->f_pos);
// récupération de l'événement
if (rc > 0) {
for (yalv = 0; yalv < (int) (rc / sizeof(struct input_event)); yalv++) {
if (ev[yalv].type == EV_KEY) {
if (ev[yalv].value == 0)
//eval_keycode(ev[yalv].code);
info.si_int = ev[yalv].code;
// envoie du signal vers le processus appelant avec les événements lu
ret = send_sig_info(SIG_TEST, &info, t); //send the signal
printk("signal was send\n");
if (ret < 0) {
printk("error sending signal\n");
kfree(buf);
return ret;
}
}
}
if (rc < amount) {
/* Didn't read the full amount, so terminate early. */
rc = 0;
}
}
/* Free temporary buffer. */
kfree(buf);
return 0;
}
static const struct file_operations my_fops = {
.owner = THIS_MODULE,
.write = write_pid,
.read = read_pid,
//.open = open_pid,
};
static int __init signalexample_module_init(void)
{
printk(KERN_INFO "Initializing LKM");
/* we need to know the pid of the user space process
* -> we use debugfs for this. As soon as a pid is written to
* this file, a signal is sent to that pid
*/
/* only root can write to this file (no read) */
register_chrdev(240, "mod", &my_fops);
file = debugfs_create_file("signalconfpid", 0200, NULL, NULL, &my_fops);
f = filp_open(str, O_RDONLY , 0);
//printk("%d",f);
return 0;
}
static void __exit signalexample_module_exit(void)
{
unregister_chrdev(240, "mod");
debugfs_remove(file);
}
module_init(signalexample_module_init);
module_exit(signalexample_module_exit);
MODULE_LICENSE("GPL");
When I execute my user-space program for the first time, everything is working well and it prints for me on the console :
Thread is created
thread name thread91
Thread is awake
But when I try to execute it again, it prints :
THREAD1 IS NOT NULL!!! CAN NOT CREATE THREAD!!!
I think that the probelm is in task_struct *thread1 which still holding information about the first thread which was created in my first execution of the program.
Could anyone help me how to solve the problem ? How can I edit my code in order to make it run much more times correctly ?
Thank you.
As you mentionned, the thread is correctly marked as not null since it is already running. So the behaviour is intended.
So in order to solve your issue you have to describe what is you actual intended behavior.
If you want to make it run multiple times, you might want to use a local variable to run the thread (for instance storing it in an array for later communication).
You could also wait the previous thread to complete before running the next one.
Your kernel code lives before and after your user space code started and finished to run. So when you execute your command for the first time, the kernel thread is created, as it is the first time the read_pid called so the thread is created. But after the read_pid done, the kernel did not stop. So the thread is still there. It looks like you should create your thread in the driver's probe function, and write the threads logic in the way that it only prepares data for the read_pid call. The signal usage looks absolutely unnecessary in your case.
Related
In the case of linux kernel device drivers there is the file_operations struct, or fops struct, which allows the driver to define handlers for various file operations.
My question is about the .release fop handler.
I know the release handler will only be called when the last file descriptor (fd) for the file object is closed (or munmapped). This is done when fput is called on the file and the file->f_count reaches 0.
However - I am unclear on if other file operations can be running simultaneously in a another thread when release is entered.
For example:
could 1 thread of a process be inside the ioctl handler for the file (or fd), while another thread of the same process is inside of the release handler?
Can release be a factor in race conditions for the file object?
could 1 thread of a process be inside the ioctl handler for the file (or fd), while another thread of the same process is inside of the release handler?
No. The release entry point is called when the reference counter on the
file entry is 0. ioctl() increments the reference counter on the file. So, the release entry point will not be called while an ioctl() is on tracks.
Foreword
The source code discussed below is:
GLIBC 2.31
Linux 5.4
GLIBC's pthread management
The GLIBC's pthread_create() actually involves a clone() system call with
the following flags:
CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID
According to the manual of clone(), the CLONE_FILES flag makes the threads of a process
share the same file descriptor table. Any file descriptor created by
one thread is also valid in the other threads. Similarly, if one thread closes a file descriptor, or changes its associated flags (using the fcntl() F_SETFD operation), the other threads are also affected.
clone() on the kernel side
When clone() is passed CLONE_FILES, the files_struct is not duplicated but a reference counter is incremented. As a consequence, the task structures of both threads point on the same files_struct (files field):
. The task structure is defined in include/linux/sched.h:
struct task_struct {
[...]
/* Open file information: */
struct files_struct *files; /// <==== Table of open files shared between thread
[...]
. In kernel/fork.c, the clone() service calls copy_files() to increment the reference counter on the files_struct
static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
struct files_struct *oldf, *newf;
int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
goto out;
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count); // <==== Ref counter incremented: files_struct is shared
goto out;
}
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
tsk->files = newf;
error = 0;
out:
return error;
}
. The files_struct is defined in include/linux/fdtable.h:
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count; // <==== Reference counter
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
ioctl() operation
ioctl() system call is defined fs/ioctl.c. It calls fdget() first to increment the reference counter on the file entry, do the requested operation and then call fdput()
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
if (!f.file)
return -EBADF;
error = security_file_ioctl(f.file, cmd, arg);
if (!error)
error = do_vfs_ioctl(f.file, fd, cmd, arg);
fdput(f);
return error;
}
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
return ksys_ioctl(fd, cmd, arg);
}
The file entry is defined in include/linux/fs.h. Its reference counter is the f_count field:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count; // <===== Reference counter
unsigned int f_flags;
[...]
} __randomize_layout
__attribute__((aligned(4)));
Example
Here is a simple device driver into which the file operations merely display a message when they are triggered. The ioctl() entry makes the caller sleep 5 seconds:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/delay.h>
MODULE_LICENSE("GPL");
#define DEVICE_NAME "device"
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
static long int device_ioctl(struct file *, unsigned int, unsigned long);
static int device_flush(struct file *, fl_owner_t);
static const struct file_operations fops = {
.owner = THIS_MODULE,
.read = device_read,
.write = device_write,
.unlocked_ioctl = device_ioctl,
.open = device_open,
.flush = device_flush,
.release = device_release
};
struct cdev *device_cdev;
dev_t deviceNumbers;
static int __init init(void)
{
// This returns the major number chosen dynamically in deviceNumbers
int ret = alloc_chrdev_region(&deviceNumbers, 0, 1, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ALERT "Error registering: %d\n", ret);
return -1;
}
device_cdev = cdev_alloc();
cdev_init(device_cdev, &fops);
ret = cdev_add(device_cdev, deviceNumbers, 1);
printk(KERN_INFO "Device initialized (major number is %d)\n", MAJOR(deviceNumbers));
return 0;
}
static void __exit cleanup(void)
{
unregister_chrdev_region(deviceNumbers, 1);
cdev_del(device_cdev);
printk(KERN_INFO "Device unloaded\n");
}
static int device_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device open\n");
return 0;
}
static int device_flush(struct file *file, fl_owner_t id)
{
printk(KERN_INFO "Device flush\n");
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device released\n");
return 0;
}
static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device write\n");
return len;
}
static ssize_t device_read(struct file *filp, char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device read\n");
return 0;
}
static long int device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
printk(KERN_INFO "Device ioctl enter\n");
msleep_interruptible(5000);
printk(KERN_INFO "Device ioctl out\n");
return 0;
}
module_init(init);
module_exit(cleanup);
Here is a user space program which involves the main thread and a secondary one. The main thread opens the above device and waits for the secondary thread to start (barrier) before closing the device after 1 second. Meanwhile, the secondary thread calls ioctl() on the above device which makes it sleep 5 seconds. Then it calls ioctl() a second time before exiting.
The expected behavior is to make the main thread close the device file while the secondary thread is running the ioctl().
#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>
static int dev_fd;
static pthread_barrier_t barrier;
void *entry(void *arg)
{
int rc;
printf("Thread running...\n");
// Rendez-vous with main thread
pthread_barrier_wait(&barrier);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
return NULL;
}
int main(void)
{
pthread_t tid;
dev_fd = open("/dev/device", O_RDWR);
pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&tid,NULL, entry, NULL);
pthread_barrier_wait(&barrier);
sleep(1);
close(dev_fd);
pthread_join(tid,NULL);
return 0;
}
Installation of the kernel module:
$ sudo insmod ./device.ko
$ dmesg
[13270.589766] Device initialized (major number is 237)
$ sudo mknod /dev/device c 237 0
$ sudo chmod 666 /dev/device
$ ls -l /dev/device
crw-rw-rw- 1 root root 237, 0 janv. 27 10:55 /dev/device
The execution of the program shows that the first ioctl() makes the thread wait 5 seconds. But the second returns in error with EBADF (9) because meanwhile the device file has been closed by the main thread:
$ gcc p1.c -lpthread
$ ./a.out
Thread running...
rc = 0, errno = 0
rc = -1, errno = 9
In the kernel log, we can see that the close() in the main thread merely triggered a flush() operation on the device while the first ioctl() was on tracks in the secondary thread. Then, once the first ioctl() returned, the internals of the kernel freed the file entry (reference counter dropped to 0) and so, the second ioctl() did not reach the device as the file descriptor no longer referenced an opened file. Hence, the EBADF error on the second call:
[13270.589766] Device initialized (major number is 237)
[13656.862951] Device open <==== Open() in the main thread
[13656.863315] Device ioctl enter <==== 1st ioctl() in secondary thread
[13657.863523] Device flush <==== 1 s later, flush() = close() in the main thread
[13661.941238] Device ioctl out <==== 5 s later, the 1st ioctl() returns
[13661.941244] Device released <==== The file is released because the reference counter reached 0
I'm trying to trigger system call in kernel space and it works fine if the system call does not take arguments such as getpid().
The method how I do it:
get the address of system table
static void **syscall_table;
use it with system call number you want and as a function pointer:
typedef long (*sys_call_ptr_t)(const struct __user pt_regs *);
// call system call
((sys_call_ptr_t *)syscall_table)[system_call_number](reg);
if system call have argument, store them into regs before calling it:
struct __user pt_regs *reg = kmalloc....;
reg->di = ...
reg->si = ...
Currently, I'm trying to use write but it fails.
write(int fd, const void *buf, size_t count);
For buf, I've tried both user space address and kernel space address. count may not be a problem. So, I guess problem maybe occur in file descriptor (maybe fd is different between in lower level's and user space's). For basic testing, I only want to write text into terminal, so fd should be 1 (at least in user space).
There're two questions here:
In some reason, I need to stick to the method calling syscall described above. Is it reasonable or any step I miss and cause failure of using write?
If something wrong when I called write? Does the problem come from fd? If so, how do I get the corresponding fd with 1 in user space?
Foreword
By definition, a system call is a service offered by the system to the user space applications. When one is running inside the system, he should not call
a service destined to user space. Hence, this is unadvised to make it.
First try with a kernel space buffer
The write() system call is defined in fs/read_write.c. It calls ksys_write() which calls vfs_write():
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
if (!ret) {
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
file_start_write(file);
ret = __vfs_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
}
return ret;
}
[...]
ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos, *ppos = file_ppos(f.file);
if (ppos) {
pos = *ppos;
ppos = &pos;
}
ret = vfs_write(f.file, buf, count, ppos);
if (ret >= 0 && ppos)
f.file->f_pos = pos;
fdput_pos(f);
}
return ret;
}
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
return ksys_write(fd, buf, count);
}
The file descriptor passed as first parameter is not a problem. The value passed from user space is used to retrieve the file structure of the output file (in ksys_write()). But the second parameter must reference a user space memory area.
In vfs_write(), a check is done on the second parameter:
if (unlikely(!access_ok(buf, count)))
return -EFAULT;
access_ok() checks if the buffer is in the user-level space. Hence, if you
pass an address referencing the kernel space, the returned code from read() will be -EFAULT (-14).
The example below is a simple module calling the write() system call with a kernel space buffer. On x86_64, the convention for the parameters of the system calls are:
RDI = arg#0
RSI = arg#1
RDX = arg#2
R10 = arg#3
R8 = arg#4
R9 = arg#5
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/ptrace.h>
#include <linux/socket.h>
#include <linux/kallsyms.h>
MODULE_LICENSE("GPL");
typedef int (* syscall_wrapper)(struct pt_regs *);
unsigned long sys_call_table_addr;
#define DEV_NAME "[DEVICE2]"
#define DEV_STR DEV_NAME "String from driver"
static char buf[1024];
static int __init device2_init(void) {
syscall_wrapper write_syscall;
int rc;
struct pt_regs param;
printk(KERN_INFO DEV_NAME "module has been loaded\n");
sys_call_table_addr = kallsyms_lookup_name("sys_call_table");
printk(KERN_INFO DEV_NAME "sys_call_table#%lx\n", sys_call_table_addr);
write_syscall = ((syscall_wrapper *)sys_call_table_addr)[__NR_write];
/*
Call to write() system call with a kernel space buffer
*/
snprintf(buf, sizeof(buf), "%s\n", DEV_STR);
param.di = 1;
param.si = (unsigned long)buf;
param.dx = strlen(buf);
rc = (* write_syscall)(¶m);
printk(KERN_INFO DEV_NAME "write() with a kernel space buffer = %d\n", rc);
return 0;
}
static void __exit device2_exit(void) {
printk(KERN_INFO DEV_NAME "module has been unloaded\n");
}
module_init(device2_init);
module_exit(device2_exit);
At module insertion time, we can verify that the system call returns -EFAULT:
$ sudo insmod ./device2.ko
$ dmesg
[15716.262977] [DEVICE2]module has been loaded
[15716.270566] [DEVICE2]sys_call_table#ffffffff926013a0
[15716.270568] [DEVICE2]write() with a kernel space buffer = -14
But the same module with a system call like dup() which involves a file descriptor but no user space buffers, this works. Let's change the previous code with:
static int __init device2_init(void) {
syscall_wrapper write_syscall;
syscall_wrapper dup_syscall;
syscall_wrapper close_syscall;
int rc;
struct pt_regs param;
printk(KERN_INFO DEV_NAME "module has been loaded\n");
sys_call_table_addr = kallsyms_lookup_name("sys_call_table");
printk(KERN_INFO DEV_NAME "sys_call_table#%lx\n", sys_call_table_addr);
write_syscall = ((syscall_wrapper *)sys_call_table_addr)[__NR_write];
dup_syscall = ((syscall_wrapper *)sys_call_table_addr)[__NR_dup];
close_syscall = ((syscall_wrapper *)sys_call_table_addr)[__NR_close];
/*
Call to write() system call with a kernel space buffer
*/
snprintf(buf, sizeof(buf), "%s\n", DEV_STR);
param.di = 1;
param.si = (unsigned long)buf;
param.dx = strlen(buf);
rc = (* write_syscall)(¶m);
printk(KERN_INFO DEV_NAME "write() with a kernel space buffer = %d\n", rc);
/*
Call to dup() system call
*/
param.di = 1;
rc = (* dup_syscall)(¶m);
printk(KERN_INFO DEV_NAME "dup() = %d\n", rc);
/*
Call to close() system call
*/
param.di = 0;
rc = (* close_syscall)(¶m);
printk(KERN_INFO DEV_NAME "close() = %d\n", rc);
/*
Call to dup() system call ==> Must return 0 as it is available
*/
param.di = 1;
rc = (* dup_syscall)(¶m);
printk(KERN_INFO DEV_NAME "dup() = %d\n", rc);
return 0;
}
The result of dup() is OK:
$ sudo insmod ./device2.ko
$ dmesg
[17444.098469] [DEVICE2]module has been loaded
[17444.106935] [DEVICE2]sys_call_table#ffffffff926013a0
[17444.106937] [DEVICE2]write() with a kernel space buffer = -14
[17444.106939] [DEVICE2]dup() = 4
[17444.106940] [DEVICE2]close() = 0
[17444.106940] [DEVICE2]dup() = 0
The first call to dup() returns 4 because the current process is insmod. The latter opened the module file and got file descriptor 3. Hence, the first available file descriptor is 4. The second call to dup() returns 0 because we closed the file descriptor 0.
Second try with a user space buffer
To use a user space buffer, let's add some file operations to the kernel module (open(), release() and write()). In the write() entry point we echo back what is passed from user space into stderr (file descriptor 2) using the user space buffer passed to the write() entry point:
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/ptrace.h>
#include <linux/socket.h>
#include <linux/kallsyms.h>
#include <linux/cdev.h>
MODULE_LICENSE("GPL");
typedef int (* syscall_wrapper)(struct pt_regs *);
static unsigned long sys_call_table_addr;
#define DEV_NAME "[DEVICE2]"
static syscall_wrapper write_syscall;
static ssize_t device2_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
struct pt_regs param;
int rc;
printk(KERN_INFO DEV_NAME "write %p, %zu\n", buff, len);
/*
Call to write() system call to echo the write to stderr
*/
param.di = 2;
param.si = (unsigned long)buff;
param.dx = len;
rc = (* write_syscall)(¶m);
printk(KERN_INFO DEV_NAME "write() = %d\n", rc);
return len; // <-------------- To stop the write
}
static int device2_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO DEV_NAME "open\n");
return 0;
}
static int device2_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO DEV_NAME "released\n");
return 0;
}
static const struct file_operations fops =
{
.owner= THIS_MODULE,
.write=device2_write,
.open= device2_open,
.release= device2_release
};
struct cdev *device_cdev;
dev_t deviceNumbers;
static int __init device2_init(void) {
int rc;
printk(KERN_INFO DEV_NAME "module has been loaded\n");
// This returns the major number chosen dynamically in deviceNumbers
rc = alloc_chrdev_region(&deviceNumbers, 0, 1, DEV_NAME);
if (rc < 0) {
printk(KERN_ALERT DEV_NAME "Error registering: %d\n", rc);
return -1;
}
device_cdev = cdev_alloc();
cdev_init(device_cdev, &fops);
cdev_add(device_cdev, deviceNumbers, 1);
printk(KERN_INFO DEV_NAME "initialized (major number is %d)\n", MAJOR(deviceNumbers));
sys_call_table_addr = kallsyms_lookup_name("sys_call_table");
printk(KERN_INFO DEV_NAME "sys_call_table#%lx\n", sys_call_table_addr);
write_syscall = ((syscall_wrapper *)sys_call_table_addr)[__NR_write];
printk(KERN_INFO DEV_NAME "write_syscall#%p\n", write_syscall);
return 0;
}
static void __exit device2_exit(void) {
printk(KERN_INFO DEV_NAME "module has been unloaded\n");
}
module_init(device2_init);
module_exit(device2_exit);
The loading of the module:
$ sudo insmod device2.ko
$ dmesg
[ 2255.183196] [DEVICE2]module has been loaded
[ 2255.183202] [DEVICE2]initialized (major number is 508)
[ 2255.193255] [DEVICE2]sys_call_table#ffffffffbcc013a0
[ 2255.193256] [DEVICE2]write_syscall#0000000030394929
Make the device entry in the file system to be able to write into it:
$ sudo mknod /dev/device2 c 508 0
$ sudo chmod 666 /dev/device2
$ sudo ls -l /dev/device2
crw-rw-rw- 1 root root 508, 0 janv. 24 16:55 /dev/device2
The writing into the device triggers the expected echo on stderr:
$ echo "qwerty for test purposes" > /dev/device2
qwerty for test purposes
$ echo "another string" > /dev/device2
another string
$ dmesg
[ 2255.183196] [DEVICE2]module has been loaded
[ 2255.183202] [DEVICE2]initialized (major number is 508)
[ 2255.193255] [DEVICE2]sys_call_table#ffffffffbcc013a0
[ 2255.193256] [DEVICE2]write_syscall#0000000030394929
[ 2441.674250] [DEVICE2]open
[ 2441.674268] [DEVICE2]write 0000000032fb5249, 25
[ 2441.674281] [DEVICE2]write() = 25
[ 2441.674286] [DEVICE2]released
[ 2475.538140] [DEVICE2]open
[ 2475.538159] [DEVICE2]write 0000000032fb5249, 15
[ 2475.538171] [DEVICE2]write() = 15
[ 2475.538175] [DEVICE2]released
Obviuosly, it's a unsuprising newbie's question after a lot of troubles with kernel programming. I try to launch a program that gets driver file in /dev folder available for some reading and writing (indeed, I realize it's rather unsafe idea, but I need strongly going ahead with all that experience). Let's look at a module source code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
MODULE_LICENSE("GPL");
int init_module(void); // driver file initialization as opening it
void cleanup_module(void); // exec files removal ahead of shutting driver file
static int device_open(struct inode *, struct file *); // driver file opening
static int device_release(struct inode *, struct file *); // return of system resource control
static ssize_t device_read(struct file *, char *, size_t, loff_t *); // reading from driver file
static ssize_t device_write(struct file *, const char *, size_t, loff_t *); // writing into driver file
#define SUCCESS 1
#define DEVICE_NAME "sample device"
#define BUF_LEN 80
static int Major; // device's major number
static int Device_Open = 0; // device access counter
static char message[BUF_LEN]; // buffer for both read and write operations
static char *message_ptr;
// list of basic operations executable by driver
static struct file_operations ops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release
};
int init_module(void)
{
Major = register_chrdev(0, DEVICE_NAME, &ops); // major number assignment
// evaluate whether driver file is accessible
if(Major < 0) {
printk(KERN_ALERT "Device registration attempt failed\n");
return Major;
}
return SUCCESS;
}
void cleanup_module(void)
{
unregister_chrdev(Major, DEVICE_NAME); // cancelling driver registration in file system before exit
printk(KERN_ALERT "Driver file of /dev/%s c %d 0 has been destroyed\n", DEVICE_NAME, Major);
return;
}
static int device_open(struct inode * node, struct file * file)
{
printk(KERN_INFO "Trying access /dev/%s c %d 0\n", DEVICE_NAME, Major);
static int counter = 0; // access counter initializing
// file control evaluation
if(Device_Open)
return -EBUSY;
Device_Open++; // increment counter to avert driver's immanent running
sprintf(message, "This sentence displayed %d times\n", counter++);
message_ptr = message;
try_module_get(THIS_MODULE);
return SUCCESS;
}
static int device_release(struct inode * node, struct file * file)
{
printk(KERN_INFO "Trying closure of /dev/%s c %d 0\n", DEVICE_NAME, Major);
Device_Open--; // decrement counter to keep driver file removable as well
module_put(THIS_MODULE);
return SUCCESS;
}
static ssize_t device_read(struct file * file, char * ch, size_t num, loff_t * off)
{
int read_bytes = 0; // output size
printk(KERN_INFO "Trying read from /dev/%s c %d 0\n", DEVICE_NAME, Major);
if(*message_ptr == 0)
return 0;
// loop-executed reading from file
while(num && *message_ptr) {
put_user(*(message_ptr++), ch++);
num--;
read_bytes++;
}
printk("%d bytes read, %d bytes to be handled", read_bytes, num);
return read_bytes;
}
// updated stuff
static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
char message_from_user[BUF_LEN];
if(copy_from_user(message_from_user, buff, len)) return -EINVAL;
printk(KERN_INFO "length of message:%d message:'%s'", (int)len, message_from_user);
return len;
}
To test reading/writing, I use this code:
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <linux/unistd.h>
extern int errno;
int main()
{
int fd; // file descriptor id
size_t cnt = 0; // input / output number of bytes
size_t cnt_2 = 0;
char inputBuffer[30] = "Device file is open"; // write operation buffer
char outputBuffer[50]; // read operation buffer
printf("Continuing with basics of Linux drivers...\n");
// evaluate accessibility of driver file
fd = open("/dev/dev", O_RDWR);
if(fd == -1) {
close(fd);
printf("File opening isn't completed\n");
return 1;
}
printf("Driver file is open now\n");
// writing from file
cnt = write(fd, inputBuffer, sizeof(inputBuffer));
printf("Driver got written %d bytes\n", cnt);
// read into file
cnt = read(fd, outputBuffer, sizeof(outputBuffer));
printf("Driver received %d bytes\n", cnt);
int i = 0;
// display an input message
while(i < cnt) {
printf("%c", outputBuffer[i]);
printf("%s", "\n");
i++;
}
close(fd); // wrap up driver connection and clear memory
printf("Driver file is close\n");
return 0;
}
Altough the module was built in as well as dev file was made by mknod (I run it on Ubuntu 18.04), I'm stuck at write operation due to some miscomprehension of driver calls in user/kernel spaces. Once I start my program, outputs are here as follows:
Continuing with basics of Linux drivers...
Driver file is open now
Driver got written -1 bytes
Followed by last line output, the system becomes inoperable (no response until I make off PC). That's a case I think of like a matter of memory control or, most probably, some driver file properties. However, user rights have been granted to reading / writing / executing, no access restrictions are inferable indeed. Hopefully, it's possible to point out to what's wrongness in the code posted here.
Seeing your code you don't handle the writing part.
static ssize_t device_write(struct file * file, const char * ch, size_t num, loff_t * off)
{
printk(KERN_ALERT "Operation denied\n");
return -EINVAL;
}
Thus there is no way your module can possibly work.
But your crash comes from memory accesses in your reading function (check this with strace). I let you understand your issue. dmesg should help (or in the case your system panics you can make the log persistant to debug it after rebooting your system).
My kernel module code needs to send signal to a user land program, to transfer its execution to registered signal handler.
In fact, I have developed a C program for my embedded board which make LED turns on and off when I push BUTTON ( the input event ). On the other hand, I have just developed a simple Linux module with its basic functions ( OPEN, CLOSE, READ, WRITE ).
I just don't have any idea how to modify my principal program and my kernel module in order to arrive to my objective.
I share with you my user space program :
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/select.h>
#include <sys/time.h>
#include <errno.h>
#include <linux/input.h>
#define BTN_FILE_PATH "/dev/input/event0"
#define LED_PATH "/sys/class/leds"
#define green "green"
void change_led_state(char *led_path, int led_value)
{
char lpath[64];
FILE *led_fd;
strncpy(lpath, led_path, sizeof(lpath) - 1);
lpath[sizeof(lpath) - 1] = '\0';
led_fd = fopen(lpath, "w");
if (led_fd == NULL) {
fprintf(stderr, "simplekey: unable to access led\n");
return;
}
fprintf(led_fd, "%d\n", led_value);
fclose(led_fd);
}
void reset_leds(void)
{
change_led_state(LED_PATH "/" green "/brightness", 0);
}
int configure_leds(void)
{
FILE *r_fd;
char *none_str = "none";
/* Configure leds for hand control */
r_fd = fopen(LED_PATH "/" green "/trigger", "w");
fprintf(r_fd, "%s\n", none_str);
fclose(r_fd);
/* Switch off leds */
reset_leds();
return 0;
}
void eval_keycode(int code)
{
static int green_state = 0;
switch (code) {
case 260:
printf("BTN left pressed\n");
/* figure out green state */
green_state = green_state ? 0 : 1;
change_led_state(LED_PATH "/" green "/brightness", green_state);
break;
}
}
int main(void)
{
int file;
/* how many bytes were read */
size_t rb;
int ret;
int yalv;
/* the events (up to 64 at once) */
struct input_event ev[64];
char *str = BTN_FILE_PATH;
printf("Starting simplekey app\n");
ret = configure_leds();
if (ret < 0)
exit(1);
printf("File Path: %s\n", str);
if((file = open(str, O_RDONLY)) < 0) {
perror("simplekey: File can not open");
exit(1);
}
for (;;) {
/* Blocking read */
rb= read(file, &ev, sizeof(ev));
if (rb < (int) sizeof(struct input_event)) {
perror("simplekey: short read");
exit(1);
}
for (yalv = 0;
yalv < (int) (rb / sizeof(struct input_event));
yalv++) {
if (ev[yalv].type == EV_KEY) {
printf("%ld.%06ld ",
ev[yalv].time.tv_sec,
ev[yalv].time.tv_usec);
printf("type %d code %d value %d\n",
ev[yalv].type,
ev[yalv].code, ev[yalv].value);
/* Change state on button pressed */
if (ev[yalv].value == 0)
eval_keycode(ev[yalv].code);
}
}
}
close(file);
And this is the basic kernel module :
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/input.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gaston");
MODULE_DESCRIPTION("A simple Linux char driver");
MODULE_VERSION("0.1");
ssize_t exer_open(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device has been opened\n");
return 0;
}
ssize_t exer_read(struct file *pfile, char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_write(struct file *pfile, const char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_close(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device successfully closed\n");
return 0;
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
};
int exer_simple_module_init(void) {
printk(KERN_INFO "Initializing the LKM\n");
register_chrdev(240, "Simple Char Drv", &exer_file_operations);
return 0;
}
void exer_simple_module_exit(void) {
unregister_chrdev(240, "Simple Char Drv");
}
module_init(exer_simple_module_init);
module_exit(exer_simple_module_exit);
I hope you will help me. Thank you!
I will concentrate on sending a signal, since that is what you asked for, although sending signals to a process is quite brutal. It would be better to implement poll and read file operations so the user code can wait for events from the device and read them.
Anyway, for sending a signal to the processes that opened the device, the things you need are:
You need a struct fasync_struct * in the private data of your device:
struct fasync_struct *pasync_queue;
It needs to be initialized to NULL by some means during initialization of your device private data. How you do that is up to you.
You need a fasync file operation handler pointed to by the fasync member of your struct file_operations. The implementation of the fasync handler is very simple as it just needs to call fasync_helper() using supplied parameters and a pointer to your device's private struct fasync_struct *:
static int exer_fasync(int fd, struct file *pfile, int mode)
{
// N.B. Change this code to use the pasync_queue member from your device private data.
struct fasync_struct **fapp = &pasync_queue;
return fasync_helper(fd, pfile, mode, fapp);
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
.fasync = exer_fasync,
};
Your device driver can send a SIGIO signal by calling kill_fasync() as follows:
// N.B. Change this code to use the pasync_queue member from your device private data.
struct fasync_struct **fapp = &pasync_queue;
kill_fasync(fapp, SIGIO, POLL_IN);
N.B. The last parameter (value POLL_IN in this case) affects the value of the si_band member of the siginfo_t that your application sees in its signal handler.
Your application needs to set a signal handler for the SIGIO signal. I recommend usingsigaction() to set this up.
Your application needs to set the O_ASYNC flag when it opens the device file, or set it by calling fcntl(fd, F_SETFL, O_ASYNC); after opening the device file.
I am testing kernel asynchronous io functions (not posix aio) and am trying to figure out how it works. The code below is a complete program where I simply write an array repeatedly to a file opened using O_DIRECT. I get an error in the callback function "write missed bytes expect 1024 got 0" (see the fprintf statement in work_done()).
For those not familiar with kernel aio, the code below does the following:
Init some structs
Prepare aio (io_prep_pwrite)
Submit io requests (io_submit)
Check for event completion (io_getevents)
Call a callback function to see if everything went ok.
I get an error at step 5. If I do not open the file using O_DIRECT, things work fine, but it beats the purpose of having async writes.
Can someone tell me what I am doing wrong? Is this the correct usage of kernel aio, for example, is my use of callbacks correct? Are there any restrictions on the usage of O_DIRECT?
I compile using 'gcc -Wall test.c -laio'
Thanks in advance.
/*
* File: myaiocp.c
* Author: kmehta
*
* Created on July 11, 2011, 12:50 PM
*
*
* Testing kernel aio.
* Program creates a 2D matrix and writes it multiple times to create a file of desired size.
* Writes are performed using kernel aio functions (io_prep_pwrite, io_submit, etc.)
*/
#define _GNU_SOURCE
#define _XOPEN_SOURCE 600
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <pthread.h>
#include <fcntl.h>
#include <string.h>
#include <sys/uio.h>
#include <sys/time.h>
#include <omp.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <libaio.h>
char ** buf;
long seg_size;
int seg_rows;
double total_size;
char * filename;
static int wait_count = 0;
void io_task();
void cleanup();
void allocate_2D_matrix(int[]);
int file_open(char *);
void wr_done(io_context_t ctx, struct iocb* iocb, long res, long res2);
int main(int argc, char **argv) {
total_size = 1048576; //1MB
seg_size = 1024; //1kB
seg_rows = 1024;
filename = "aio.out";
int dims[] = {seg_rows, seg_size};
allocate_2D_matrix(dims); //Creates 2D matrix
io_task();
cleanup();
return 0;
}
/*
* Create a 2D matrix
*/
void allocate_2D_matrix(int dims[2]) {
int i;
char *data;
//create the matrix
data = (char *) calloc(1, dims[0] * dims[1] * sizeof (char));
if (data == NULL) {
printf("\nCould not allocate memory for matrix.\n");
exit(1);
}
buf = (char **) malloc(dims[0] * sizeof (char *));
if (buf == NULL) {
printf("\nCould not allocate memory for matrix.\n");
exit(1);
}
for (i = 0; i < dims[0]; i++) {
buf[i] = &(data[i * dims[1]]);
}
}
static void io_error(const char *func, int rc)
{
if (rc == -ENOSYS)
fprintf(stderr, "AIO not in this kernel\n");
else if (rc < 0)
fprintf(stderr, "%s: %s\n", func, strerror(-rc));
else
fprintf(stderr, "%s: error %d\n", func, rc);
exit(1);
}
/*
* Callback function
*/
static void work_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
{
if (res2 != 0) {
io_error("aio write", res2);
}
if (res != iocb->u.c.nbytes) {
fprintf(stderr, "write missed bytes expect %lu got %ld\n",
iocb->u.c.nbytes, res2);
exit(1);
}
wait_count --;
printf("%d ", wait_count);
}
/*
* Wait routine. Get events and call the callback function work_done()
*/
int io_wait_run(io_context_t ctx, long iter)
{
struct io_event events[iter];
struct io_event *ep;
int ret, n;
/*
* get up to aio_maxio events at a time.
*/
ret = n = io_getevents(ctx, iter, iter, events, NULL);
printf("got %d events\n", n);
/*
* Call the callback functions for each event.
*/
for (ep = events ; n-- > 0 ; ep++) {
io_callback_t cb = (io_callback_t)ep->data ; struct iocb *iocb = ep->obj ; cb(ctx, iocb, ep->res, ep->res2);
}
return ret;
}
void io_task() {
long offset = 0;
int bufIndex = 0;
//Open file
int fd = file_open(filename);
//Initialize structures
long i;
long iter = total_size / seg_size; //No. of iterations to reach desired file size (total_size)
io_context_t myctx;
if(0 != io_queue_init(iter, &myctx))
{
perror("Could not initialize io queue");
exit(EXIT_FAILURE);
}
struct iocb * ioq[iter];
//loop through iter times to reach desired file size
for (i = 0; i < iter; i++) {
struct iocb *io = (struct iocb*) malloc(sizeof (struct iocb));
io_prep_pwrite(io, fd, buf[bufIndex], seg_size, offset);
io_set_callback(io, work_done);
ioq[i] = io;
offset += seg_size;
bufIndex ++;
if (bufIndex > seg_rows - 1) //If entire matrix written, start again from index 0
bufIndex = 0;
}
printf("done preparing. Now submitting..\n");
if(iter != io_submit(myctx, iter, ioq))
{
perror("Failure on submit");
exit(EXIT_FAILURE);
}
printf("now awaiting completion..\n");
wait_count = iter;
int res;
while (wait_count) {
res = io_wait_run(myctx, iter);
if (res < 0)
io_error("io_wait_run", res);
}
close(fd);
}
void cleanup() {
free(buf[0]);
free(buf);
}
int file_open(char *filename) {
int fd;
if (-1 == (fd = open(filename, O_DIRECT | O_CREAT | O_WRONLY | O_TRUNC, 0666))) {
printf("\nError opening file. \n");
exit(-1);
}
return fd;
}
First of all, good job using libaio instead of POSIX aio.
Are there any restrictions on the usage of O_DIRECT ?
I'm not 100% sure this is the real problem, but O_DIRECT has some requirements (quoting mostly from TLPI):
The data buffer being transferred must be aligned on a memory boundary that is a multiple of the block size (use posix_memalign)
The file or device offset at which data transfer commences must be a multiple of the block size
The length of the data to be transferred must be a multiple of the block size
At a glance, I can see you are not taking aby precautions to align memory in allocate_2D_matrix.
If I do not open the file using O_DIRECT, things work fine, but it
beats the purpose of having async writes.
This happens not to be the case. Asynchronous I/O works well without O_DIRECT (for instance think of the number of system calls slashed).