Hi I was hoping someone might help me understand the read section of the read_write.c kernel file when I look at it I don't really understand a thing.
I can't really tell which part is actually reading the file considering there are several instances where read functions are called. I ask because I have to know where to modify it and how for an assignment I have where I have to modify the output of the read without actually modifying the file.
By the way I am using the latest version of the Linux kernel from kernel.org version 4.9 any and all help is appreciated thank you. Below is where I believe the read is happening.
typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
};
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, iter_fn_t fn, int flags)
{
struct kiocb kiocb;
ssize_t ret;
if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
return -EOPNOTSUPP;
init_sync_kiocb(&kiocb, filp);
if (flags & RWF_HIPRI)
kiocb.ki_flags |= IOCB_HIPRI;
if (flags & RWF_DSYNC)
kiocb.ki_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
kiocb.ki_pos = *ppos;
ret = fn(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer)
{
unsigned long seg;
ssize_t ret;
struct iovec *iov = fast_pointer;
/*
* SuS says "The readv() function *may* fail if the iovcnt argument
* was less than or equal to 0, or greater than {IOV_MAX}. Linux has
* traditionally returned zero for zero segments, so...
*/
if (nr_segs == 0) {
ret = 0;
goto out;
}
/*
* First get the "struct iovec" from user memory and
* verify all the pointers
*/
if (nr_segs > UIO_MAXIOV) {
ret = -EINVAL;
goto out;
}
if (nr_segs > fast_segs) {
iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
printk(KERN_DEBUG "Hello from read_write.c\n");
printk(KERN_DEBUG "Inside the copy check uvector method\n");
if (iov == NULL) {
ret = -ENOMEM;
goto out;
}
}
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
ret = -EFAULT;
goto out;
}
/*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
*
* Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
* overflow case.
*/
ret = 0;
for (seg = 0; seg < nr_segs; seg++) {
void __user *buf = iov[seg].iov_base;
ssize_t len = (ssize_t)iov[seg].iov_len;
/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
if (len < 0) {
ret = -EINVAL;
goto out;
}
if (type >= 0
&& unlikely(!access_ok(vrfy_dir(type), buf, len))) {
ret = -EFAULT;
goto out;
}
if (len > MAX_RW_COUNT - ret) {
len = MAX_RW_COUNT - ret;
iov[seg].iov_len = len;
}
ret += len;
}
out:
*ret_pointer = iov;
return ret;
}
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, io_fn_t fn, int flags)
{
ssize_t ret = 0;
if (flags & ~RWF_HIPRI)
return -EOPNOTSUPP;
while (iov_iter_count(iter)) {
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
if (nr < 0) {
if (!ret)
ret = nr;
break;
}
ret += nr;
if (nr != iovec.iov_len)
break;
iov_iter_advance(iter, nr);
}
return ret;
}
static ssize_t do_readv_writev(int type, struct file *file,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos,
int flags)
{
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
iter_fn_t iter_fn;
ret = import_iovec(type, uvector, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
if (ret < 0)
return ret;
tot_len = iov_iter_count(&iter);
if (!tot_len)
goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
if (type == READ) {
fn = file->f_op->read;
iter_fn = file->f_op->read_iter;
} else {
fn = (io_fn_t)file->f_op->write;
iter_fn = file->f_op->write_iter;
file_start_write(file);
}
if (iter_fn)
ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
else
ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
if (type != READ)
file_end_write(file);
out:
kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
else
fsnotify_modify(file);
}
return ret;
}
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
return do_readv_writev(READ, file, vec, vlen, pos, flags);
}
EXPORT_SYMBOL(vfs_readv);
static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
unsigned long vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
}
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret = -EBADF;
if (pos < 0)
return -EINVAL;
f = fdget(fd);
if (f.file) {
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
ret = vfs_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
}
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
static ssize_t compat_do_readv_writev(int type, struct file *file,
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos,
int flags)
{
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
iter_fn_t iter_fn;
ret = compat_import_iovec(type, uvector, nr_segs,
UIO_FASTIOV, &iov, &iter);
if (ret < 0)
return ret;
tot_len = iov_iter_count(&iter);
if (!tot_len)
goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
if (type == READ) {
fn = file->f_op->read;
iter_fn = file->f_op->read_iter;
} else {
fn = (io_fn_t)file->f_op->write;
iter_fn = file->f_op->write_iter;
file_start_write(file);
}
if (iter_fn)
ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
else
ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
if (type != READ)
file_end_write(file);
out:
kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
else
fsnotify_modify(file);
}
return ret;
}
static size_t compat_readv(struct file *file,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
ssize_t ret = -EBADF;
if (!(file->f_mode & FMODE_READ))
goto out;
ret = -EINVAL;
if (!(file->f_mode & FMODE_CAN_READ))
goto out;
ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
out:
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
static size_t do_compat_readv(compat_ulong_t fd,
const struct compat_iovec __user *vec,
compat_ulong_t vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
if (!f.file)
return -EBADF;
pos = f.file->f_pos;
ret = compat_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
f.file->f_pos = pos;
fdput_pos(f);
return ret;
}
COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen)
{
return do_compat_readv(fd, vec, vlen, 0);
}
static long do_compat_preadv64(unsigned long fd,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret;
if (pos < 0)
return -EINVAL;
f = fdget(fd);
if (!f.file)
return -EBADF;
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
ret = compat_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
return ret;
}
Apart from bad formatting you can easily see that do_readv and do_preadv both call vfs_readv. In those functions no hint is seen that they do some reading on their own.
You can also see that vfs_readv doesn't do any reading but only calls do_readv_writev.
The actual reading is done here:
if (type == READ) {
fn = file->f_op->read;
iter_fn = file->f_op->read_iter;
}
...
if (iter_fn)
ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
else
ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
Well, it is not the actual reading, but it is the closest to reading that you can get from your code snippet.
What happens in these functions and more important, what was stored in iter_fn and fn is not visible from your code.
And I am no Linux expert to tell you more details.
Related
I wrote a kernel module demonstrating on how ioctl works.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
int base_minor = 0;
char *device_name = "msg";
int count = 1;
dev_t devicenumber;
static struct class *class = NULL;
static struct device *device = NULL;
static struct cdev mycdev;
#define MAX_SIZE 1024
char kernel_buffer[MAX_SIZE];
int buffer_index;
MODULE_LICENSE("GPL");
static int device_open(struct inode *inode, struct file *file)
{
pr_info("%s\n", __func__);
file->f_pos = 0;
buffer_index = 0;
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
pr_info("%s\n", __func__);
return 0;
}
static ssize_t device_read(struct file *file, char __user *user_buffer,
size_t read_count, loff_t *offset)
{
int bytes_read;
int available_space;
int bytes_to_read;
pr_info("%s read offset:%lld\n", __func__, *offset);
available_space = MAX_SIZE - *(offset);
if (read_count < available_space)
bytes_to_read = read_count;
else
bytes_to_read = available_space;
pr_info("bytes_to_read:%d\n", bytes_to_read);
if (bytes_to_read == 0) {
pr_err("%s: No available space in the buffer for reading\n",
__func__);
return -ENOSPC;
}
if (buffer_index > *offset)
bytes_to_read = buffer_index - *offset;
else
return 0;
bytes_read = bytes_to_read - copy_to_user(user_buffer, kernel_buffer+*offset, bytes_to_read);
pr_info("%s: Copy to user returned:%d\n", __func__, bytes_to_read);
//update file offset
*offset += bytes_read;
return bytes_read;
}
static ssize_t device_write(struct file *file, const char __user *user_buffer,
size_t write_count, loff_t *offset)
{
int bytes_written;
int available_space;
int bytes_to_write;
pr_info("%s write offset:%lld\n", __func__, *offset);
available_space = MAX_SIZE - *(offset);
if (write_count < available_space)
bytes_to_write = write_count;
else
bytes_to_write = available_space;
if (bytes_to_write == 0) {
pr_err("%s: No available space in the buffer for writing\n",
__func__);
return -ENOSPC;
}
bytes_written = bytes_to_write - copy_from_user(kernel_buffer+*offset, user_buffer, bytes_to_write);
pr_info("%s: Bytes written:%d\n", __func__, bytes_written);
pr_info("%s: kernel_buffer:%s\n", __func__, kernel_buffer);
//update file offset
*offset += bytes_written;
buffer_index += bytes_written;
return bytes_written;
}
static loff_t device_lseek(struct file *file, loff_t offset, int orig)
{
loff_t new_pos = 0;
switch(orig) {
case 0 : /*seek set*/
new_pos = offset;
break;
case 1 : /*seek cur*/
new_pos = file->f_pos + offset;
break;
case 2 : /*seek end*/
new_pos = MAX_SIZE - offset;
break;
}
if(new_pos > MAX_SIZE)
new_pos = MAX_SIZE;
if(new_pos < 0)
new_pos = 0;
file->f_pos = new_pos;
return new_pos;
}
long device_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
unsigned char ch;
pr_info("%s: Cmd:%u\t Arg:%lu\n", __func__, cmd, arg);
switch(cmd)
{
//Get Length of buffer
case 0x01:
pr_info("Get Buffer Length\n");
put_user(MAX_SIZE, (unsigned int *)arg);
break;
//clear buffer
case 0x02:
pr_info("Clear buffer\n");
memset(kernel_buffer, 0, sizeof(kernel_buffer));
break;
//fill character
case 0x03:
get_user(ch, (unsigned char *)arg);
pr_info("Fill Character:%c\n", ch);
memset(kernel_buffer, ch, sizeof(kernel_buffer));
buffer_index = sizeof(kernel_buffer);
break;
default:
pr_info("Unknown Command:%u\n", cmd);
return -EINVAL;
}
return 0;
}
struct file_operations device_fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release,
.llseek = device_lseek,
.unlocked_ioctl = device_ioctl
};
static int test_hello_init(void)
{
class = class_create(THIS_MODULE, "myclass");
if (!alloc_chrdev_region(&devicenumber, base_minor, count, device_name)) {
printk("Device number registered\n");
printk("Major number received:%d\n", MAJOR(devicenumber));
device = device_create(class, NULL, devicenumber, NULL, device_name);
cdev_init(&mycdev, &device_fops);
mycdev.owner = THIS_MODULE;
cdev_add(&mycdev, devicenumber, count);
}
else
printk("Device number registration Failed\n");
return 0;
}
static void test_hello_exit(void)
{
device_destroy(class, devicenumber);
class_destroy(class);
cdev_del(&mycdev);
unregister_chrdev_region(devicenumber, count);
}
module_init(test_hello_init);
module_exit(test_hello_exit);
Then i wrote a user space code
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/ioctl.h>
int main(int argc, char *argv[])
{
char buffer[1024];
int fd;
unsigned int length;
unsigned char ch = 'A';
int i = 0;
fd = open("/dev/msg", O_RDWR);
if (fd < 0) {
perror("fd failed");
exit(2);
}
//Get Length - 0x01
ioctl(fd, 0x01, &length);
printf("Length:%u\n", length);
ioctl(fd, 0x02);
//Set Character - 0x03
ioctl(fd, 0x03, &ch);
perror("ioctl");
lseek(fd, 0, SEEK_SET);
perror("lseek");
length = read(fd, buffer, 1024);
perror("Read");
printf("length:%d\n", length);
buffer[1023] = '\0';
printf("Buffer:%s\n", buffer);
close(fd);
}
ioctl commands 1, 3 work but not 2. Can you please provide what's the mistake in the code
You should review the requirements for ioctl on the man page:
DESCRIPTION
The ioctl() system call manipulates the underlying device parameters of
special files. In particular, many operating characteristics of char‐
acter special files (e.g., terminals) may be controlled with ioctl()
requests. The argument fd must be an open file descriptor.
The second argument is a device-dependent request code. The third
argument is an untyped pointer to memory. It's traditionally char
*argp (from the days before void * was valid C), and will be so named
for this discussion.
An ioctl() request has encoded in it whether the argument is an in
parameter or out parameter, and the size of the argument argp in bytes.
Macros and defines used in specifying an ioctl() request are located in
the file <sys/ioctl.h>.
Maybe this question makes no sense, but I was wondering if there was a "recommended practice" on how to open a file descriptor for a device inside an open function of the created module.
In fact, I developped a simple Linux kernel module with its basic functions :
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/input.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gaston");
MODULE_DESCRIPTION("A simple Linux char driver");
MODULE_VERSION("0.1");
ssize_t exer_open(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device has been opened\n");
return 0;
}
ssize_t exer_read(struct file *pfile, char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_write(struct file *pfile, const char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_close(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device successfully closed\n");
return 0;
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
};
int exer_simple_module_init(void) {
printk(KERN_INFO "Initializing the LKM\n");
register_chrdev(240, "Simple Char Drv", &exer_file_operations);
return 0;
}
void exer_simple_module_exit(void) {
unregister_chrdev(240, "Simple Char Drv");
}
module_init(exer_simple_module_init);
module_exit(exer_simple_module_exit);
I compile it and no errors occured.
Now I want to open the file descriptor of my device ( BUTTON ) in order to manipulate it later from user space program, so I made some modifications by adding the BUTTON device path and another open function like this :
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/input.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gaston");
MODULE_DESCRIPTION("A simple Linux char driver");
MODULE_VERSION("0.1");
#define BTN_FILE_PATH "/dev/input/event0"
int file;
char *str = BTN_FILE_PATH;
ssize_t exer_open(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device has been opened\n");
if((file = open(str, O_RDONLY)) < 0) {
printk("simplekey: File can not open");
return(-1);
}
return 0;
}
ssize_t exer_read(struct file *pfile, char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_write(struct file *pfile, const char __user *buffer, size_t length, loff_t *offset) {
return 0;
}
ssize_t exer_close(struct inode *pinode, struct file *pfile) {
printk(KERN_INFO "Device successfully closed\n");
return 0;
}
struct file_operations exer_file_operations = {
.owner = THIS_MODULE,
.open = exer_open,
.read = exer_read,
.write = exer_write,
.release = exer_close,
};
int exer_simple_module_init(void) {
printk(KERN_INFO "Initializing the LKM\n");
register_chrdev(240, "Simple Char Drv", &exer_file_operations);
return 0;
}
void exer_simple_module_exit(void) {
unregister_chrdev(240, "Simple Char Drv");
}
module_init(exer_simple_module_init);
module_exit(exer_simple_module_exit);
But the problem, when I try to compile the module now errors are printed :
/home/gaston/ledshared/exer_simple_char_drv.c: In function
‘exer_open’: /home/gaston/ledshared/exer_simple_char_drv.c:32:13:
error: implicit declaration of function ‘open’
[-Werror=implicit-function-declaration]
if((file = open(str,O_RDONLY)) < 0) {
How can I fix the problem please ?
open() is a user-space function. The equivalent kernel-space function is filp_open(), but it returns a struct file * instead of a int file descriptor. The returned struct file * could be an error code instead of a valid pointer. Use the IS_ERR(ptr) macro to check for that, and the PTR_ERR(ptr) macro to extract the error code (which will be a negated errno value).
Use of the filp_open function is discouraged, but here are some modifications to your code to use this function:
int exer_open(struct inode *pinode, struct file *pfile) {
struct file *f;
f = filp_open(str, O_RDONLY);
if (IS_ERR(f)) {
printk("simplekey: File can not open");
return(PTR_ERR(f));
}
pfile->private_data = f;
printk(KERN_INFO "Device has been opened\n");
return 0;
}
The close function should look something like this:
int exer_close(struct inode *pinode, struct file *pfile) {
struct file *f = pfile->private_data;
int rc;
rc = filp_close(f, NULL);
if (rc == 0) {
printk(KERN_INFO "Device successfully closed\n");
}
return rc;
}
There is no legitimate way for a module to read from a struct file * directly into a user-space buffer or write from a user-space buffer to a struct file *, so an intermediate buffer in kernel memory is needed, so that kernel_read() or kernel_write() can be used to read or write the file:
ssize_t exer_read(struct file *pfile, char __user *buffer, size_t length, loff_t *offset) {
struct file *f = pfile->private_data;
enum { MAX_BUF_SIZE = 4096 };
size_t buf_size = 0;
char *buf = NULL;
ssize_t total = 0;
ssize_t rc = 0;
/* Allocate temporary buffer. */
if (length) {
buf_size = min_t(size_t, MAX_BUF_SIZE, length);
buf = kmalloc(buf_size, GFP_KERNEL);
if (buf == NULL) {
return -ENOMEM;
}
}
/* Read file to buffer in chunks. */
do {
size_t amount = min_t(size_t, length, buf_size);
rc = kernel_read(f, buf, amount, offset);
if (rc > 0) {
/* Have read some data from file. */
if (copy_to_user(buffer, buf, rc) != 0) {
/* Bad user memory! */
rc = -EFAULT;
} else {
/* Update totals. */
total += rc;
buffer += rc;
*offset += rc;
length -= rc;
if (rc < amount) {
/* Didn't read the full amount, so terminate early. */
rc = 0;
}
}
}
} while (rc > 0 && length > 0);
/* Free temporary buffer. */
kfree(buf);
if (total > 0) {
return total;
}
return rc;
}
ssize_t exer_write(struct file *pfile, const char __user *buffer, size_t length, loff_t *offset) {
struct file *f = pfile->private_data;
enum { MAX_BUF_SIZE = 4096 };
size_t buf_size = 0;
char *buf = NULL;
ssize_t total = 0;
ssize_t rc = 0;
/* Allocate temporary buffer. */
if (length) {
buf_size = min_t(size_t, MAX_BUF_SIZE, length);
buf = kmalloc(buf_size, GFP_KERNEL);
if (buf == NULL) {
return -ENOMEM;
}
}
/* Write file from buffer in chunks. */
do {
size_t amount = min_t(size_t, length, buf_size);
if (copy_from_user(buf, buffer, amount) != 0) {
/* Bad user memory! */
rc = -EFAULT;
} else {
rc = kernel_write(f, buf, amount, offset);
if (rc > 0) {
/* Have written some data to file. */
/* Update totals. */
total += rc;
buffer += rc;
*offset += rc;
length -= rc;
if (rc < amount) {
/* Didn't write the full amount, so terminate early. */
rc = 0;
}
}
}
} while (rc > 0 && length > 0);
/* Free temporary buffer. */
kfree(buf);
if (total > 0) {
return total;
}
return rc;
}
I was looking at the old Linux kernel code (3.10.1), particularly the IO path.
So when the IO enters the VFS layer, the function vfs_write() is called.
Here I can see a call to file->f_op->write(), which is a blocking call as the man page of the system call write() says.
The other option in the code is when file->f_op->write pointer is not defined, in that case vfs_write() calls do_sync_write().
do_sync_write() goes ahead and calls filp->f_op->aio_write(), which is an async call as the man page of aio_write() explains.
Now, my question is, why was the function do_sync_write() named "sync", when it clearly goes on to call an async IO function?
I might be missing something probably, or there was a blunder made here back in those times?
Function definitions for reference,
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_READ, buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
if (ret >= 0) {
count = ret;
file_start_write(file);
if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);
else
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
}
return ret;
}
ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
why was the function do_sync_write() named "sync", when it clearly goes on to call an async IO function?
It calls async function and then waits for its completion with
ret = wait_on_sync_kiocb(&kiocb);
So from the view of the caller of do_sync_write function, the whole function behavior is synced.
I have write this code but i have a problem
first i have a function that create a file descriptor (fd)
int fd;//global
static int init_fd(int fd) {
remove("file descriptor.txt");//if yet present
fd = open("file descriptor.txt", O_WRONLY | O_CREAT, 0666);
if (fd == -1) {
printf("Error in opening the file descriptor!\n");
exit(0);
}
return fd;
}
the second function is a handler function
static int handler(struct connection *conn, enum event ev) {
...
int i;
for (i = 0; i < array_size; i++) {
if (!strncmp(conn->uri, uri_array[i], strlen(uri_array[i]))) {
func_array[i](conn->request_method, conn->uri, NULL, init_fd(fd));
close(fd);
fd = open("file descriptor.txt", O_RDONLY);
ret = read(fd, &buf, BUFSIZ);
if (ret == -1) {
printf("Error in reading!\n");
exit(0);
}
...
}
with fun_array is a pointer to function
httpCallback_t func_array[MAXARRAY];
and the function is
void http_serve1(const char *method, const char *path, const httpOptions_t *options, int fd) {
const char *string = "All is ok1!";
int ret_value;
// send header: 200 OK
ret_value = sendHeaders(fd, TIMEOUT_SEC, NETHTTP_HTTP_HEADER_200, NETHTTP_Content_Type_text_html_utf8, NETHTTP_CRLF, NULL);
// close the file descriptor
close(fd);
}
and the function sendHeaders is
size_t sendHeaders(int fd, int seconds, const char* header1, ...) {
va_list args;
va_start(args, header1);
size_t totalSize = 0;
const char* hdr = header1;
while (hdr != NULL) {
size_t result = sendHeaders(fd, seconds, hdr, NULL); // segmentation fault
if (result < 0) {
return result;
}
totalSize += result;
hdr = va_arg(args, const char*);
va_end(args);
return totalSize;
}
if (size == SIZE) {
setErrorCode(ERROR);
return ERROR;
}
size_t sizewrite = 1024;
tmp[size] = strdup(hdr);
write(fd, tmp, sizewrite);
setErrorCode(SUCCESS);
return SUCCESS;
}
my problem is that my code create a file descriptor, but it doesn't write inside, and during the run i have problem with segmentation fault. anyone have a suggest?
I'm trying to allocate some memory for a char* as follows.
static ssize_t memo_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos){
ssize_t retval = -ENOMEM;
printk("write function\n");
if((data = kmalloc(strlen(buf), GFP_KERNEL)) == NULL)
printk("kmalloc fail\n");
if(copy_from_user(data, buf, strlen(buf))){
retval = -EFAULT;
goto out;
}
*f_pos += strlen(buf);
retval = strlen(buf);
out:
return retval;
}
'data' is declared in a header file as
char *data;
When I call the write function, the 'kmalloc fail' line isn't reached, which leads me to believe the kmalloc succeeded, however the data isn't displayed when I try to read from the 'data' variable again.
More confusingly, if I get rid of the kmalloc bit altogether, the data can be read from the driver. Although the problem then is it is followed by a load of other data because i don't have the opportunity to memset() it.
Am I using kmalloc correctly? Presumably not. How should I be doing this?
Additionally, my read function is as follows.
static ssize_t memo_read(struct file *f, char __user *buf,
size_t count, loff_t *f_pos){
ssize_t retval = 0;
printk("read function\n");
printk("data = %s\n", data);
if(*f_pos >= strlen(data)){
printk("EOF\n");
goto out;
}
if(copy_to_user(buf, data, strlen(data))){
retval = -EFAULT;
goto out;
}
printk("copy_to_user success\n");
*f_pos += strlen(data);
retval = strlen(data);
out:
return retval;
}
Thanks.
You should be using strlen_user() on the userspace pointer, instead of strlen() - and you should only call it once, and keep the result around (otherwise, you have a potential kernel exploit, because a second userspace thread could change the buffer while you're working on it).
Alternatively, you could use strncpy_from_user().
Apart from that, the kmalloc looks OK.
(But really, as ephemient says, you should rethink your whole approach and use the count argument instead of treating the input as a string).
Since you can't rely on the data written to a file being nul-terminated strings, you'll need to keep a data_len length parameter around alongside the data. Then your read/write implementations would be along these lines:
static char *data = NULL;
static size_t data_len;
static DEFINE_MUTEX(data_mutex);
static ssize_t memo_read(struct file *f, char __user *buf, size_t count, loff_t *f_pos
{
ssize_t retval = 0;
char *start;
mutex_lock(&data_mutex);
if (!data)
{
retval = -EINVAL; /* Or whatever you want to do here... */
goto out;
}
if (*f_pos >= data_len)
goto out; /* EOF */
start = data + *f_pos;
retval = data_len - *f_pos;
if (retval > count)
retval = count;
if (copy_to_user(buf, start, retval))
{
retval = -EFAULT;
goto out;
}
*f_pos += retval;
out:
mutex_unlock(&data_mutex);
return retval;
}
static ssize_t memo_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
{
ssize_t retval = -ENOMEM;
mutex_lock(&data_mutex);
if (data)
kfree(data);
data = kmalloc(count, GFP_KERNEL);
if (!data)
goto out;
if (copy_from_user(data, buf, count))
{
kfree(data);
retval = -EFAULT;
goto out;
}
*f_pos = count;
retval = count;
data_len = count;
out:
mutex_unlock(&data_mutex);
return retval;
}
Don't forget to kfree(data) in your error cases...
In any case, buf is a pointer to user memory, so DON'T call strlen(buf). You must copy_from_user first. Why not
data = kmalloc(count);
copy_from_user(data, buf, count);
?
Your read handler assumes that data is a NUL-terminated string. When you were using an array, this may have been true by accident, but you never actually ensure this in your write handler. My guess is that copy_to_user fails.
Here's a working example of a "memo" module that I wrote up just now, using kmalloc:
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/uaccess.h>
static char *data;
static size_t len;
static ssize_t
memo_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
ssize_t copy_len = min(len - min(len, *ppos), count);
ssize_t retval;
if (copy_to_user(buf, data + *ppos, copy_len)) {
retval = -EFAULT;
goto out;
}
*ppos += copy_len;
retval = copy_len;
out:
return retval;
}
static ssize_t
memo_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
ssize_t retval;
char *newdata;
newdata = kmalloc(count, GFP_KERNEL);
if (!newdata) {
retval = -ENOMEM;
goto out;
}
if (copy_from_user(newdata, buf, count)) {
retval = -EFAULT;
goto out;
}
kfree(data);
data = newdata;
newdata = NULL;
retval = len = count;
out:
kfree(newdata);
return retval;
}
static const struct file_operations memo_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = memo_read,
.write = memo_write,
};
static struct miscdevice memo_misc = { MISC_DYNAMIC_MINOR, "memo", &memo_fops };
static int __init memo_init(void)
{
int result;
result = misc_register(&memo_misc);
if (result < 0)
return -ENODEV;
return 0;
}
static void __exit memo_exit(void)
{
misc_deregister(&memo_misc);
kfree(data);
return;
}
module_init(memo_init);
module_exit(memo_exit);
MODULE_AUTHOR("ephemient");
MODULE_LICENSE("GPL");
Of course this is missing locking and other safety precautions, but I hope this helps.