I writing a kernel module which accepts data(form of a structure) and passes back some data(same structure format) to the user-space. I can successfully receive the message from user but i get segmentation fault while i try tp dereference some of the members of the structure of the data received from kernel. I am using netlink socket API.
Sample code is as below:
user-space.c
#define NETLINK_USER 27
#define MAX_PAYLOAD 10000 /* maximum payload size*/
struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
struct iovec iov;
int sock_fd;
struct msghdr msg;
Response *p;
Response *req;
test r1;
test r2;
int main()
{
char *data;
data = malloc(4096 * sizeof(char));;
data = "data from user";
sock_fd=socket(PF_NETLINK, SOCK_RAW, NETLINK_USER);
if(sock_fd<0)
return -1;
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
dest_addr.nl_family = AF_NETLINK;
dest_addr.nl_pid = 0;
dest_addr.nl_groups = 0;
nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
nlh->nlmsg_len = NLMSG_LENGTH(MAX_PAYLOAD);
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = 0;
p = malloc(2*sizeof(Response));
p[0].index = 1;
p[0].dataSize = 2;
p[0].data = data;
p[0].test2 = 3;
p[0].test3 = 4;
p[0].test4 = 5;
r1.t = 10;
r1.ip_addr = malloc(50*sizeof(char));
r1.ip_addr = "192.168.10.2";
p[0].test = &r1;
/* Extra for testing */
p[1].index = 2;
p[1].dataSize = 3;
//strcpy(p[1].data , "Data2 from User");
p[1].data = data;
p[1].test2 = 4;
p[1].test3 = 5;
p[1].test4 = 6;
r2.t = 20;
r2.ip_addr = malloc(50*sizeof(char));
r2.ip_addr = "192.168.20.2";
p[1].test = &r2;
memcpy(NLMSG_DATA(nlh), (void *)p, 2 * sizeof(*p));
iov.iov_base = (void *)nlh;
iov.iov_len = NLMSG_SPACE(MAX_PAYLOAD);
msg.msg_name = (void *)&dest_addr;
msg.msg_namelen = sizeof(dest_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
printf("Sending message to kernel\n");
sendmsg(sock_fd,&msg,0);
printf("Waiting for message from kernel\n");
/* Read message from kernel */
//recvmsg(sock_fd, &msg, 0);
req = (Response*)NLMSG_DATA(nlh);
printf("Recieved from Kernel:\n");
printf("index %d\n", req[0].index);
printf("dataSize %d\n", req[0].dataSize);
printf("data: %s\n", req[0].data); <---Segmentation fault from here
printf("test2 %d\n", req[0].test2);
printf("test3 %d\n", req[0].test3);
printf("test4 %d\n", req[0].test4);
printf("contents of test structure are %d,%s\n",req[0].test->t,req[0].test->ip_addr); <-----Segmentation fault
close(sock_fd);
Kernel_module.c
#include <linux/module.h>
#include <net/sock.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#define NETLINK_USER 27
struct sock *nl_sk = NULL;
typedef struct _Response Response;
typedef struct _test test;
struct _test{
int t;
char *ip_addr;
};
struct _Response
{
int index;
int dataSize;
char *data;
int test2;
int test3;
int test4;
test *test;
};
static void hello_nl_recv_msg(struct sk_buff *skb) {
struct nlmsghdr *nlh;
int pid;
struct sk_buff *skb_out;
int msg_size;
int res;
Response *req;
Response *req1;
test t;
t.t = 1;
t.ip_addr = kmalloc(50*sizeof(char), GFP_KERNEL);
t.ip_addr = "129.63.45.1";
req1 = (Response *)kmalloc_array(2, sizeof(Response), GFP_KERNEL);
char *data;
data = kmalloc(4096 * sizeof(char), GFP_KERNEL);
data = "data from kernel";
printk(KERN_INFO "Entering: %s\n", __FUNCTION__);
msg_size= 2 * sizeof(*req1);
req1[0].index = 100;
req1[0].dataSize = 100;
req1[0].data = data;
req1[0].test2 = 100;
req1[0].test3 = 100;
req1[0].test4 = 100;
req1[0].test = &t;
printk("Sending to Userspace:\n");
/*Second set of message*/
req1[1].index = 102;
req1[1].dataSize = 103;
//strcpy(req1[1].data , "Data from Kernel");
req1[1].data = data;
req1[1].test2 = 100;
req1[1].test3 = 100;
req1[1].test4 = 100;
req1[1].test = &t;
nlh=(struct nlmsghdr*)skb->data;
req = (Response *)NLMSG_DATA(nlh); <--message received from user-space
printk("Recieved from Userspace:\n");
printk("index %d\n", req[0].index);
printk("dataSize %d\n", req[0].dataSize);
printk("data: %s\n", req[0].data);
printk("test2 %d\n", req[0].test2);
printk("test3 %d\n", req[0].test3);
printk("test4 %d\n", req[0].test4);
printk("contents of test are %d, %s\n",req[0].test->t,req[0].test->ip_addr);
printk("Next set of data\n");
printk("Recieved from Userspace:\n");
printk("index %d\n", req[1].index);
printk("dataSize %d\n", req[1].dataSize);
printk("data: %s\n", req[1].data);
printk("test2 %d\n", req[1].test2);
printk("test3 %d\n", req[1].test3);
printk("test4 %d\n", req[1].test4);
printk("contents of test are %d, %s\n",req[1].test->t,req[1].test->ip_addr);
pid = nlh->nlmsg_pid; /*pid of sending process */
skb_out = nlmsg_new(10000,0);
if(!skb_out)
{
printk(KERN_ERR "Failed to allocate new skb\n");
return;
}
nlh=nlmsg_put(skb_out,0,0,NLMSG_DONE,10000,0);
NETLINK_CB(skb_out).dst_group = 0; /* not in mcast group */
/*End of second set*/
memcpy(NLMSG_DATA(nlh), req1, 2 * sizeof(*req1));
res=nlmsg_unicast(nl_sk,skb_out,pid);
if(res<0)
printk(KERN_INFO "Error while sending bak to user\n");
}
static int __init hello_init(void) {
printk("Entering: %s\n",__FUNCTION__);
struct netlink_kernel_cfg cfg = {
.input = hello_nl_recv_msg,
};
nl_sk = netlink_kernel_create(&init_net, NETLINK_USER, &cfg);
if(!nl_sk)
{
printk(KERN_ALERT "Error creating socket.\n");
return -10;
}
return 0;
}
static void __exit hello_exit(void) {
printk(KERN_INFO "exiting hello module\n");
netlink_kernel_release(nl_sk);
}
module_init(hello_init); module_exit(hello_exit);
MODULE_LICENSE("GPL");
}
global.h
#ifndef __GLOBAL_H
#define __GLOBAL_H
typedef struct _Response Response;
typedef struct _test test;
struct _test{
int t;
char *ip_addr;
};
struct _Response
{
int index;
int dataSize;
char *data;
int test2;
int test3;
int test4;
test *test;
};
#endif
Basically the problem is with the pointers. If i use char data[4096] instead of character pointer, i receive the message. But i get segmentation fault while dereferencing the test structure. How to resolve this issue?
Thanks
This code is very wrong and full critique is likely unnecessary, so I'll just point out 2 most obvious types of violations.
char *data;
data = kmalloc(4096 * sizeof(char), GFP_KERNEL);
data = "data from kernel";
First a nit: sizeof(char) is guaranteed to be 1. The real problem though is that this instantly overwrites the value of data, losing the pointer returned by malloc. This is present in multiple places in the code.
req = (Response *)NLMSG_DATA(nlh); <--message received from user-space
printk("Recieved from Userspace:\n");
printk("index %d\n", req[0].index);
printk("dataSize %d\n", req[0].dataSize);
printk("data: %s\n", req[0].data);
This code, while likely "works", is completely wrong. The userspace buffer (.data) cannot be safely accessed in this manner and on some architectures it cannot be accessed at all. It may "work" because the complete address space is divided into both the kernel and userspace and the kernel in principle has access there. Problems arise if the pointer is to something totally bogus or within the kernel, or maybe to something unmapped.
The userspace portion of the code tries to do a reverse trick and unsurprisingly that fails -- you can't access kernel memory or there would be no security whatsoever.
The problem only arises because the protocol is wrong. You should have a buffer allocated in userspace and a variable holding its size. Then you can tell the kernel where to put data (and how much tops).
Either way, I think you are not ready for kernel work at this point and as such strongly suggest sticking to userspace for the time being.
Related
I tried to use perf_event_open() to track all the store instructions to get their access address. I found only when I set attr.precise_ip > 0, I can get the non-zero address. But when I ran the same process on vm instead of host, the error massage was "Operation not supported", I can fix this problem by setting precise_ip = 0 on vm, but now I only get bunch of addresses equal to zero. I don't understand why precise_ip is related to the sample addrress which is not pointed out on document, and I also don't understand why I can't set precise_ip = 1 on vm while I can do it on host. Is there anybody can help me??
FYI: I use - cpu host option when I start vm using qemu-system-x86_64
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#define PERF_PAGES (1 + (1 << 16))
struct perf_sample {
struct perf_event_header header;
__u64 ip;
__u32 pid, tid; /* if PERF_SAMPLE_TID */
__u64 addr; /* if PERF_SAMPLE_ADDR */
__u64 weight; /* if PERF_SAMPLE_WEIGHT */
/* __u64 data_src; /\* if PERF_SAMPLE_DATA_SRC *\/ */
__u64 phy_addr;
};
int perf_event_open(struct perf_event_attr *attr,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}
void workload()
{
int i,c=0;
for(i=0;i<100000000;i++)
{
c+=i*i;
c-=i*100;
c+=i*i*i/100;
}
}
int startup()
{
struct perf_event_attr attr;
memset(&attr,0,sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(struct perf_event_attr);
attr.config = 0x82d0;
attr.config1 = 0;
attr.sample_period = 1000;
attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR ;
attr.disabled = 0;
//attr.inherit = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_callchain_kernel = 1;
attr.exclude_callchain_user = 1;
attr.precise_ip = 1; // when i set attr.precise_ip = 0 , all the addr = 0;
int fd=perf_event_open(&attr,0,-1,-1,0);
if(fd<0)
{
perror("Cannot open perf fd!");
return -1;
}
return fd;
}
void scan_thread(struct perf_event_mmap_page *p)
{
char *pbuf = (char *)p + p->data_offset;
__sync_synchronize();
printf("%d,\n", p->data_size);
if(p->data_head == p->data_tail) {
return;
}
struct perf_event_header *ph = (void *)(pbuf + (p->data_tail % p->data_size));
struct perf_sample* ps;
switch(ph->type) {
case PERF_RECORD_SAMPLE:
ps = (struct perf_sample*)ph;
// assert(ps != NULL);
if(ps == NULL)
{
printf("null\n");
}
if(ps!= NULL && ps->addr != 0) {
printf("ip %lx\n", ps->ip);
printf("tid %d\n", ps->tid);
printf("addr: %lx \n", ps->addr);
}
//printf("addr, %lx\n", ps->addr);
//printf("phy addr, %lx\n", ps->phy_addr);
break;
default:
printf("type %d\n", ph->type);
break;
}
}
int main()
{
int fd = startup();
size_t mmap_size = sysconf(_SC_PAGESIZE) * PERF_PAGES;
struct perf_event_mmap_page *p = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
// start to perf
ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
int a= 0;
while(1)
{
// uint64_t instructions;
// read(fd,&instructions,sizeof(instructions));
// printf("instructions=%ld\n",instructions);
// sleep(1);
workload();
scan_thread(p);
sleep(1);
}
}
I am new to DPDK and trying to create a packet to send it from one DPDK enabled machine to another connected directly via an ethernet. I modified an example/rxtx_callbacks/main.c provided with DPDK at both side. However, I am not receiving anything at the receiver. What wrong am I doing?
Modified function at transmitter: lcore_main is modified:
static __attribute__((noreturn)) void lcore_main()
{
uint16_t port;
struct ether_hdr *eth_hdr;
struct ether_addr daddr;
daddr.addr_bytes[0] = 116;
daddr.addr_bytes[1] = 225;
daddr.addr_bytes[2] = 228;
daddr.addr_bytes[3] = 204;
daddr.addr_bytes[4] = 106;
daddr.addr_bytes[5] = 82;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
int ret;
RTE_ETH_FOREACH_DEV(port)
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
//struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
struct rte_mbuf *m_head[BURST_SIZE];
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
if(rte_pktmbuf_alloc_bulk(mbuf_pool, m_head, BURST_SIZE)!=0)
{
printf("Allocation problem\n");
}
for(i = 0; i < BURST_SIZE; i++) {
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
//eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(m_head[i],
// sizeof(struct ether_hdr));
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
rte_memcpy(&(eth_hdr->d_addr), &daddr, sizeof(struct ether_addr));
}
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
if (unlikely(nb_tx < BURST_SIZE)) {
uint16_t buf;
for (buf = nb_tx; buf < BURST_SIZE; buf++)
rte_pktmbuf_free(m_head[buf]);
}
}
}
}
receiver side RTE_ETH_FOREACH_DEV of tx part is modified to:
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, bufs, BURST_SIZE);
//printf("Number of Packets received %d\n", nb_rx);
for(i = 0; i < nb_rx; i++) {
//ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *,
// sizeof(struct ether_hdr));
//printf("Packet ip received %d\n", ipv4_hdr->src_addr);
eth_hdr = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
printf("Packet ip received %d\n", eth_hdr->ether_type);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
Please let me know if I missed something.
There are few issues with the code:
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
Unlike rte_pktmbuf_append(), the rte_pktmbuf_mtod() does not change the packet length, so it should be set manually before the tx.
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
If we set ETHER_TYPE_IPv4, a correct IPv4 header must follow. So we need either to add the header or to change the ether_type.
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
Where is the source address comes from?
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
Looks like we transmit a burst of zero-sized packets with invalid IPv4 headers. Please also make sure the source/destination addresses are correct.
As suggested by #andriy-berestovsky, I used rte_eth_stats_get() and it shows packets are present in ethernet ring via the field ipackets but rte_eth_rx_burst is not returning any packets. Full code is included here, please let me know what I am doing wrong. (I am using testpmd at transmitter side)
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_ip.h>
#include <rte_mbuf.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <signal.h>
#define MAX_SOURCE_SIZE (0x100000)
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.max_rx_pkt_len = ETHER_MAX_LEN,
},
};
static struct {
uint64_t total_cycles;
uint64_t total_pkts;
} latency_numbers;
static volatile bool force_quit;
struct rte_mempool *mbuf_pool;
static void
signal_handler(int signum)
{
struct rte_eth_stats eth_stats;
int i;
if (signum == SIGINT || signum == SIGTERM) {
printf("\n\nSignal %d received, preparing to exit...\n",
signum);
RTE_ETH_FOREACH_DEV(i) {
rte_eth_stats_get(i, ð_stats);
printf("Total number of packets received %llu, dropped rx full %llu and rest= %llu, %llu, %llu\n", eth_stats.ipackets, eth_stats.imissed, eth_stats.ierrors, eth_stats.rx_nombuf, eth_stats.q_ipackets[0]);
}
force_quit = true;
}
}
struct ether_addr addr;
/*
* Initialises a given port using global settings and with the rx buffers
* coming from the mbuf_pool passed as parameter
*/
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default;
const uint16_t rx_rings = 1, tx_rings = 1;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
if (!rte_eth_dev_is_valid_port(port))
return -1;
rte_eth_dev_info_get(port, &dev_info);
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0) {
printf("Error in adjustment\n");
return retval;
}
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0) {
printf("RX queue setup prob\n");
return retval;
}
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
retval = rte_eth_dev_start(port);
if (retval < 0) {
printf("Error in start\n");
return retval;
}
rte_eth_macaddr_get(port, &addr);
printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
(unsigned)port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
rte_eth_promiscuous_enable(port);
return 0;
}
/*
* Main thread that does the work, reading from INPUT_PORT
* and writing to OUTPUT_PORT
*/
static __attribute__((noreturn)) void
lcore_main(void)
{
uint16_t port;
struct ether_hdr *eth_hdr;
//struct ether_addr addr;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
RTE_ETH_FOREACH_DEV(port)
{
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
}
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,bufs, BURST_SIZE);
for(i = 0; i < nb_rx; i++) {
ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *, sizeof(struct ether_hdr));
printf("Packet ip received %d\n", ipv4_hdr->src_addr);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
if(force_quit)
break;
}
}
/* Main function, does initialisation and calls the per-lcore functions */
int
main(int argc, char *argv[])
{
uint16_t nb_ports;
uint16_t portid, port;
/* init EAL */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
nb_ports = rte_eth_dev_count_avail();
printf("size ordered %lld\n", NUM_MBUFS *nb_ports);
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (nb_ports < 1)
rte_exit(EXIT_FAILURE, "Error: number of ports must be greater than %d\n", nb_ports);
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
// initialize all ports
RTE_ETH_FOREACH_DEV(portid)
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
portid);
if (rte_lcore_count() > 1)
printf("\nWARNING: Too much enabled lcores - "
"App uses only 1 lcore\n");
// call lcore_main on master core only
lcore_main();
return 0;
}
It seems to be a problem of ethernet card with ubuntu 14.04. With ubuntu 16.04 it is working fine.
I implement a memory mapping via mmap. My Kernel module writes something into this memory and a userspace application read this. In short I allocate 0x10000 memory (with kcalloc on kernel side and with mmap on userspace side). Then I write something to the address offsets 0x0, 0xf00 and 0xf000 using memcpy. On kernelside I can read back the memory correctly. But on userspace side the content of the first 0x1000 Bytes are repetitive through the whole memory (16 times). But why?
Her comes the code of the kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/mm.h>
#define DEV_MODULENAME "expdev"
#define DEV_CLASSNAME "expdevclass"
static int majorNumber;
static struct class *devClass = NULL;
static struct device *devDevice = NULL;
#ifndef VM_RESERVED
# define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP)
#endif
struct mmap_info
{
char *data;
int reference;
};
static void
dev_vm_ops_open( struct vm_area_struct *vma )
{
struct mmap_info *info;
// counting how many applications mapping on this dataset
info = (struct mmap_info *)vma->vm_private_data;
info->reference++;
}
static void
dev_vm_ops_close( struct vm_area_struct *vma )
{
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
info->reference--;
}
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct dev_vm_ops =
{
.open = dev_vm_ops_open,
.close = dev_vm_ops_close,
.fault = dev_vm_ops_fault,
};
int
fops_mmap( struct file *filp,
struct vm_area_struct *vma)
{
vma->vm_ops = &dev_vm_ops;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = filp->private_data;
dev_vm_ops_open(vma);
return 0;
}
int
fops_close( struct inode *inode,
struct file *filp)
{
struct mmap_info *info;
info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
int
fops_open( struct inode *inode,
struct file *p_file)
{
struct mmap_info *info;
char *data;
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
// allocating memory on the heap for the data
data = kcalloc(0x10000,sizeof(char),GFP_KERNEL);
if( data==NULL )
{
printk(KERN_ERR "insufficient memory\n");
/* insufficient memory: you must handle this error! */
return ENOMEM;
}
info->data = data;
printk(KERN_INFO " > ->data: 0x%16p\n",info->data);
memcpy(info->data, "Initial entry on mapped memory by the kernel module", 52);
memcpy((info->data)+0xf00, "Somewhere", 9);
memcpy((info->data)+0xf000, "Somehow", 7);
printk(KERN_INFO " > ->data: %c%c%c\n", // the output here is correct
*(info->data+0xf000+0),
*(info->data+0xf000+1),
*(info->data+0xf000+2));
/* assign this info struct to the file */
p_file->private_data = info;
return 0;
}
static const struct file_operations dev_fops =
{
.open = fops_open,
.release = fops_close,
.mmap = fops_mmap,
};
static int __init
_module_init(void)
{
int ret = 0;
// Try to dynamically allocate a major number for the device
majorNumber = register_chrdev(0, DEV_MODULENAME, &dev_fops);
if (majorNumber<0)
{
printk(KERN_ALERT "Failed to register a major number.\n");
return -EIO; // I/O error
}
// Register the device class
devClass = class_create(THIS_MODULE, DEV_CLASSNAME);
// Check for error and clean up if there is
if (IS_ERR(devClass))
{
printk(KERN_ALERT "Failed to register device class.\n");
ret = PTR_ERR(devClass);
goto goto_unregister_chrdev;
}
// Create and register the device
devDevice = device_create(devClass,
NULL,
MKDEV(majorNumber, 0),
NULL,
DEV_MODULENAME
);
// Clean up if there is an error
if( IS_ERR(devDevice) )
{
printk(KERN_ALERT "Failed to create the device.\n");
ret = PTR_ERR(devDevice);
goto goto_class_destroy;
}
printk(KERN_INFO "Module registered.\n");
return ret;
// Error handling - using goto
goto_class_destroy:
class_destroy(devClass);
goto_unregister_chrdev:
unregister_chrdev(majorNumber, DEV_MODULENAME);
return ret;
}
static void __exit
_module_exit(void)
{
device_destroy(devClass, MKDEV(majorNumber, 0));
class_unregister(devClass);
class_destroy(devClass);
unregister_chrdev(majorNumber, DEV_MODULENAME);
printk(KERN_INFO "Module unregistered.\n");
}
module_init(_module_init);
module_exit(_module_exit);
MODULE_LICENSE("GPL");
here comes the code of the application
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define PAGE_SIZE (0x10000)
int main ( int argc, char **argv )
{
int fd;
char *address = NULL;
time_t t = time(NULL);
char *sbuff;
int i;
sbuff = (char*) calloc(PAGE_SIZE,sizeof(char));
fd = open("/dev/expdev", O_RDWR);
if(fd < 0)
{
perror("Open call failed");
return -1;
}
address = mmap( NULL,
PAGE_SIZE,
PROT_READ|PROT_WRITE,
MAP_SHARED,
fd,
0);
if (address == MAP_FAILED)
{
perror("mmap operation failed");
return -1;
}
printf("%s: first userspace read\n",tbuff);
memcpy(sbuff, address,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf00,80);
printf("Initial message: %s\n", sbuff);
memcpy(sbuff, address+0xf000,80);
printf("Initial message: %s\n", sbuff);
for(i=0; i<PAGE_SIZE; i++)
{
printf("%16p: %c\n",address+i, (char)*(address+i));
}
if (munmap(address, PAGE_SIZE) == -1)
{
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
and this is the output of the application:
0x7fe61b522000: I
0x7fe61b522001: n
0x7fe61b522002: i
0x7fe61b522003: t
0x7fe61b522004: i
0x7fe61b522005: a
0x7fe61b522006: l
...
0x7fe61b522f00: S
0x7fe61b522f01: o
0x7fe61b522f02: m
0x7fe61b522f03: e
0x7fe61b522f04: w
0x7fe61b522f05: h
0x7fe61b522f06: e
0x7fe61b522f07: r
0x7fe61b522f08: e
...
0x7fe61b523000: I
0x7fe61b523001: n
0x7fe61b523002: i
0x7fe61b523003: t
0x7fe61b523004: i
0x7fe61b523005: a
0x7fe61b523006: l
...
0x7fe61b523f00: S
0x7fe61b523f01: o
0x7fe61b523f02: m
0x7fe61b523f03: e
0x7fe61b523f04: w
0x7fe61b523f05: h
0x7fe61b523f06: e
0x7fe61b523f07: r
0x7fe61b523f08: e
...
0x7fe61b524000: I
0x7fe61b524001: n
0x7fe61b524002: i
0x7fe61b524003: t
0x7fe61b524004: i
0x7fe61b524005: a
0x7fe61b524006: l
...
It seems to me, that the repetition comes with the size of one page. But this makes no sense to me.
EDIT 1:
Add Somewhere to the output. Note: Only Somehow never occurs!
EDIT 2:
Corrected fault handler. This now considered the offset of the calling vmf. Now it runs like a charm. Thanks to Tsyvarev!
static int
dev_vm_ops_fault( struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
info = (struct mmap_info *)vma->vm_private_data;
if (!info->data)
{
printk("No data\n");
return 0;
}
page = virt_to_page((info->data)+(vmf->pgoff*PAGE_SIZE));
get_page(page);
vmf->page = page;
return 0;
}
But on userspace side the content of the first 0x1000`
0x1000 is a size of the page mapped with
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
Callback .fault of structure vm_operations_struct is called for every page (4096 bytes), which is accessed by the user but not mapped yet.
So your code just map first 4096 bytes (0x1000) of data to every page which user space accesses.
I am trying to get some message from kernel space to userspace, when a condition fails!
Here's my kernel code:
#define MESSAGTOUSER 1
int ret_val;
struct siginfo sinfo;
pid_t id;
struct task_struct *task;
unsigned char msgBuffer[20];
unsigned char buf1[20]= "HI";
static int major_no;
static struct class *safe_class;
static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
static int device_open(struct inode *inode, struct file *file);
static int device_write(struct file *file, const char *gdata, size_t len, loff_t *off);
static int device_read(struct file *file, char *buf, size_t len, loff_t *off);
static int device_release(struct inode *inode, struct file *file);
int failureDetection (char* faultMsg) {
strcpy (msgBuffer, faultMsg);
printk(KERN_ALERT"\nMessage from HBM %s\n", msgBuffer);
printk(KERN_ALERT".......... RETURN VALUE ...... : %d", ret_val);
int Reg_Dev(void);
memset (&sinfo, 0, sizeof(struct siginfo));
sinfo.si_signo = SIGUSR1;
sinfo.si_code = SI_USER;
if (id == 0) {
printk("\ncan't find User PID: %d\n", id);
}else {
//task = pid_task(find_vpid(pid), PIDTYPE_PID);
task = find_task_by_vpid(id);
send_sig_info(SIGUSR1, &sinfo, task);
}
return 0;
}
static int device_open(struct inode *inode, struct file *file){
/*sucess*/
return 0;
}
void strPrint(void) {
printk("value of msgBuffer: %s", msgBuffer);
}
static int device_write(struct file *file, const char *gdata, size_t len, loff_t *off){
get_user (id,(int *)gdata);
if(id <0)
printk(KERN_ALERT"Cann't find PID from userspace its : %i", id);
else
printk(KERN_ALERT"Successfully received the PID of userspace %i", id);
return len;
}
static int
device_read(struct file *file, char *buf, size_t len, loff_t *off){
/*success*/
return 0;
}
static int device_release(struct inode *inode, struct file *file){
/*success*/
return 0;
}
static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
switch (cmd) {
case MESSAGTOUSER:
ret_val = copy_to_user((char *)arg, msgBuffer, sizeof(arg));
printk("Msg of Kernel %s", msgBuffer);
break;
default:
break;
}
return 0;
}
static struct file_operations fops = {
.open = device_open,
.write = device_write,
.read = device_read,
.release = device_release,
.unlocked_ioctl = device_ioctl
};
int Reg_Dev(void) {
major_no = register_chrdev(0, "safe_dev", &fops);
safe_class = class_create(THIS_MODULE, "safe_dev");
device_create(safe_class,NULL, MKDEV(major_no, 0), "safe_dev");
printk("\n Device Registered and Created \n");
return 0;
}
void UnReg_dev (void) {
printk("\nUser PID : %d\n", id);
unregister_chrdev(major_no, "safe_dev");
device_destroy(safe_class, MKDEV(major_no,0));
class_unregister(safe_class);
class_destroy(safe_class);
printk("\n Device Un-Registered and Destroyed \n");
}
extern int Reg_Dev(void);
for he userspace i have this code:
#define PORT 9930
#define G_IP "192.168.10.71"
#define BUFLEN 512
#define MESSAGTOUSER 0
unsigned char *str[20];
char b1[BUFLEN], b2[BUFLEN];
struct sockaddr_in me,client;
int s, i, n=sizeof(me);
int fd;
void error_handler(char *s) {
perror(s);
exit(1);
}
void signal_handler (int signum) {
if(signum == SIGUSR1)
{
printf("\n%s\n",str);
if((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1)
error_handler("\nERROR: in Socket\n");
memset((char *) &me, 0, sizeof(me));
me.sin_family = AF_INET;
me.sin_port = PORT;
if (inet_aton(G_IP, &me.sin_addr)==0)
{
fprintf(stderr, "inet_aton() failed\n");
exit(1);
}
printf("Message from Kernel : %s", &str);
//strcpy (str, newStr);
int cntr =0; sprintf(b2, "\nFailure Message: %s\n",str);
printf("\nsending Fault to PMN Group : Tick - %d\n", cntr++);
if(sendto(s, str, sizeof(str),0,(struct sockaddr *) &me,n)==-1)
error_handler("\nERROR: in sendto()\n");
close (s);
// counter ++;
// sendAndReceiveOverUDP();
return;
}
}
int main() {
pid_t u_id;
u_id = getpid();
int i = 1;
fd = open("/dev/safe_dev",O_RDWR);
write(fd, &u_id, 4);
ioctl (fd, MESSAGTOUSER, &str);
printf("\n PID sent to device successfully: %d \n", u_id);
close(fd);
signal(SIGUSR1, signal_handler);
printf("\nMy PID is: %d\n",u_id);
//printf("Subnet 1 working fine.. Tick - %d", tv.tv_sec);
while (1)
sleep(1);
return 0;
}
Now what I am expecting to receive on Userspace:
Message from Kernel: A<->B
Sending Fault o PMN Group : tick - 0
Message from Kernel: B<->B
Sending Fault o PMN Group : tick - 1
....
...
but what is the output:
Message from Kernel:
Sending Fault o PMN Group : tick - 0
Message from Kernel:
Sending Fault o PMN Group : tick - 1
....
...
It seems that copy_to_user is not working, while in simple program just copying a string from kernel to user is working fine, but while i am using in this scenario then its not working, its compiling without any warning,
Some other Details:
failureDetection() is getting a string like A<->B mentioned in output from rest of the programs..
the same message from failureDetection is printing on kernel level but not transferring at the user level.
I have also tried to create an own string in this and tried to transfer that, but it is also not working! suppose msgBuffer = HI, then I should receive HI on to the userspace. but its not happening! can anyone please please make me correct whats wrong with this code? how can i get updates onto the userspace!!??
Sindhu..
The copy_to_user() only happens in response to the ioctl(), which only happens once, very early on in your code. Presumably at that point the kernel buffer msgBuffer is empty, because the failureDetection() function has not yet run at that point. It doesn't matter if failureDetection() runs later and sets msgBuffer then, because your userspace program never calls the ioctl() again so it doesn't see the new contents of msgBuffer.
You also have a bug in your copy_to_user() call - instead of sizeof(args) (which is a constant 4) you should probably use sizeof msgBuffer.
#caf: Thank you so much..
void signal_handler (int signum) {
if(signum == SIGUSR1)
{
fd = open ("/dev/safe_dev",O_RDWR);
ioctl (fd, MESSAGTOUSER, &str);
close (fd);
if((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1)
error_handler("\nERROR: in Socket\n");
memset((char *) &me, 0, sizeof(me));
me.sin_family = AF_INET;
me.sin_port = PORT;
if (inet_aton(G_IP, &me.sin_addr)==0)
{
fprintf(stderr, "inet_aton() failed\n");
exit(1);
}
printf("Failure Detected on Eth Cards as : %s are non reachable.", str);
printf("\nsending Fault to PMN Group : Tick - %d\n", cntr++);
sprintf(b2, "\nFailure Message: %s\n",str);
if(sendto(s, str, sizeof(str),0,(struct sockaddr *) &me,n)==-1)
error_handler("\nERROR: in sendto()\n");
close (s);
return;
}
}
I was just making a stupid mistake.. hehehe.. i was not adding it in between file open and close block.. your suggestion resolved my issue...
Thank you so much for your response..
Rahee..
Getting seg-fault when I run this code. I commented where I'm getting the seg-fault (in handler() function). I'm not sure, may be I'm wrapping data twice that's why or what's the problem? It's printing correctly till "start_timer" method.
#include <time.h>
#include <stdio.h>
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
#include <errno.h>
typedef struct _data{
char *name;
}data;
struct timer_list{
void* timer_data;
unsigned long expires;
void (*function)(sigval_t);
};
volatile long int second = 0;
void handler(sigval_t val)
{
data *data_handler = val.sival_ptr;
printf("Handler: address of data: %p\n", data_handler);
printf("Handler: address of &data_handler->name: %p\n", &data_handler->name);
printf("Handler entered with value :%s\n", data_handler->name); `**//**SEG-FAULT HERE****`
}
void timer_handler(union sigval val)
{
printf(" ----- Seconds: %ld\n", ++second);
}
/* start timer with all we got as data is timer */
void start_timer(struct timer_list *timer)
{
printf("\nStart_timer...: Timer->data address: %p\n", timer->timer_data);
data *data_handler = timer->timer_data;
printf("Start_timer...: entered with value :%s\n", data_handler->name);
int Ret;
pthread_attr_t attr;
pthread_attr_init( &attr );
//pthread_t tid;
struct sched_param parm;
parm.sched_priority = 255;
pthread_attr_setschedparam(&attr, &parm);
struct sigevent sig;
sigval_t val;
val.sival_ptr = timer->timer_data;
sig.sigev_notify = SIGEV_THREAD;
sig.sigev_notify_function = timer->function;
// sig.sigev_value.sival_int = val;
sig.sigev_value = val;
sig.sigev_notify_attributes = &attr;
data *data_handler1 = (data *)val.sival_ptr;
printf("From sigval...: handler_data address: %p\n", data_handler1);
printf("From sigval...: handler_data->name address: %p\n", &data_handler1->name);
printf("From sigval...: Handler entered with value :%s\n", data_handler1->name);
//create a new timer.
timer_t timerid;
Ret = timer_create(CLOCK_REALTIME, &sig, &timerid);
if (Ret == 0)
{
struct itimerspec in, out;
in.it_value.tv_sec = timer->expires;
in.it_value.tv_nsec = 0;
in.it_interval.tv_sec = 0;
in.it_interval.tv_nsec = 0;
timer_settime(timerid, 0, &in, &out);
}
}
/* Start_timer_on: wrapping up data into one timer structure, and starting timer */
void start_timer_on(data timer_data, unsigned long expires)
{
struct timer_list *timer = (struct timer_list *)malloc(sizeof(struct timer_list)); //Problem was here ... forgot to use malloc
timer->timer_data = &timer_data;
printf("\nTimer->data address: %p\n", &timer_data);
timer->function = handler;
timer->expires = expires;
start_timer(timer);
}
/* Main */
void main()
{
data handler_data1 = {"Handler Data 1"};
//data handler_data2 = {"Handler Data 2"};
//void *data1 = &handler_data1;
//void *data2 = &handler_data2;
pthread_attr_t attr;
pthread_attr_init( &attr );
struct sched_param parm;
parm.sched_priority = 255;
pthread_attr_setschedparam(&attr, &parm);
struct sigevent sig;
sig.sigev_notify = SIGEV_THREAD;
sig.sigev_notify_function = timer_handler;
sig.sigev_notify_attributes = &attr;
//create a new timer - clock.
timer_t timerid;
timer_create(CLOCK_REALTIME, &sig, &timerid);
struct itimerspec in, out;
in.it_value.tv_sec = 1;
in.it_value.tv_nsec = 0;
in.it_interval.tv_sec = 1;
in.it_interval.tv_nsec = 0;
printf("*** *** *** Main clock starts *** *** ***\n");
timer_settime(timerid, 0, &in, &out);
printf("***** Start timer for data1 for 2 sec *****\n");
start_timer_on(handler_data1, 2);
// printf("***** Start timer for data1 for 5 sec *****\n");
// start_timer(data2, 5);
sleep(20);
}
This might be the problem. In the code below, timer_data is local to function start_timer_on. The object is destroyed as soon as the function exits. So, when accessing the name in handler, it will segfault.
void start_timer_on(data timer_data, unsigned long expires)
{
struct timer_list *timer;
timer->timer_data = &timer_data;
printf("\nTimer->data address: %p\n", &timer_data);
timer->function = handler;
timer->expires = expires;
start_timer(timer);
}
You should use void start_timer_on(data *timer_data, unsigned long expires), so that the data is not freed until main exits.