inotify_add_watch fails on /sys/class/net/eth0/operstate - c

I have used inotify in Linux, to get the event raised when ever the network interface link changes. whenever interface link changes, /sys/class/net/eth40/operstate/ file gets modified. But in the below code snippet even though the file is getting modified, read function is still in blocked state.
#include <stdio.h>
#include <sys/inotify.h>
#include <stdlib.h>
#include <limits.h>
#include <signal.h>
#define FILE_TO_WATCH "/sys/class/net/eth40/operstate"
#define EVENT_SIZE (sizeof (struct inotify_event))
#define EVENT_BUFFER_LENGTH (1024 * EVENT_SIZE + NAME_MAX + 1)
void print_event(struct inotify_event *event) {
int ret = 0;
if (event->mask & IN_CREATE)
printf("file created in directory\n");
if (event->mask & IN_DELETE)
printf("file deleted in directory\n");
if (event->mask & IN_ACCESS)
printf("file accessed\n");
if (event->mask & IN_CLOSE)
printf("file closed after reading or writing \n");
if (event->mask & IN_OPEN)
printf("file opened\n");
if (event->len)
printf("name: %s\n", event->name);
}
int main(int argc, char** argv)
{
int notify_fd;
int watch_fd;
long input_len;
char *ptr;
char buffer[EVENT_BUFFER_LENGTH];
struct inotify_event *event;
notify_fd = inotify_init();
if (notify_fd < 0) {
perror("cannot init inotify");
exit(EXIT_FAILURE);
}
printf("done1\n");
watch_fd = inotify_add_watch(notify_fd,FILE_TO_WATCH,IN_ACCESS|IN_MODIFY);
if (watch_fd < 0) {
perror("cannot add file");
exit(EXIT_FAILURE);
}
printf("done2\n");
while (1) {
input_len = read(notify_fd, buffer, EVENT_BUFFER_LENGTH);
if (input_len <= 0) {
perror("error reading from inotify fd");
exit(EXIT_FAILURE);
}
printf("done3\n");
ptr = buffer;
while (ptr < buffer + input_len) {
event = (struct inotify_event *) ptr;
print_event(event);
ptr += sizeof (struct inotify_event) +event->len;
}
}
}
am'I missing something?

/sys is not a regular file system, but a special in-memory file system called sysfs
To quote a kernel developer:
inotify does not and will not work on sysfs. Or procfs. Or devpts.
Or any number of network filesystems. No matter how hard somebody
might wish it to work, that's simply not feasible.
For network link events you can use rtnetlink, though stuff like this is hardly documented, here's an starting point example that will show you link (and a few other) events, you'll have to figure out which events/flags and similar that is relevant for your particular case.
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <asm/types.h>
#include <asm/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/if.h>
#include <linux/rtnetlink.h>
#define ENTRY(x) {x, #x}
struct {
unsigned flag;
const char *name;
} ifi_flag_map[] = {
ENTRY(IFF_UP),
ENTRY(IFF_BROADCAST),
ENTRY(IFF_DEBUG),
ENTRY(IFF_LOOPBACK),
ENTRY(IFF_POINTOPOINT),
ENTRY(IFF_NOTRAILERS),
ENTRY(IFF_RUNNING),
ENTRY(IFF_NOARP),
ENTRY(IFF_PROMISC),
ENTRY(IFF_ALLMULTI),
ENTRY(IFF_MASTER),
ENTRY(IFF_SLAVE),
ENTRY(IFF_MULTICAST),
ENTRY(IFF_PORTSEL),
ENTRY(IFF_AUTOMEDIA),
ENTRY(IFF_DYNAMIC),
ENTRY(IFF_LOWER_UP),
ENTRY(IFF_DORMANT),
ENTRY(IFF_ECHO),
};
struct {
unsigned type;
const char *name;
} nlmrt_type_map[] = {
ENTRY(RTM_NEWLINK ),
ENTRY(RTM_DELLINK),
ENTRY(RTM_GETLINK),
ENTRY(RTM_SETLINK),
ENTRY(RTM_NEWADDR ),
ENTRY(RTM_DELADDR),
ENTRY(RTM_GETADDR),
ENTRY(RTM_NEWROUTE ),
ENTRY(RTM_DELROUTE),
ENTRY(RTM_GETROUTE),
ENTRY(RTM_NEWNEIGH ),
ENTRY(RTM_DELNEIGH),
ENTRY(RTM_GETNEIGH),
ENTRY(RTM_NEWRULE ),
ENTRY(RTM_DELRULE),
ENTRY(RTM_GETRULE),
ENTRY(RTM_NEWQDISC ),
ENTRY(RTM_DELQDISC),
ENTRY(RTM_GETQDISC),
ENTRY(RTM_NEWTCLASS ),
ENTRY(RTM_DELTCLASS),
ENTRY(RTM_GETTCLASS),
ENTRY(RTM_NEWTFILTER ),
ENTRY(RTM_DELTFILTER),
ENTRY(RTM_NEWACTION ),
ENTRY(RTM_DELACTION),
ENTRY(RTM_GETACTION),
ENTRY(RTM_NEWPREFIX ),
ENTRY(RTM_GETMULTICAST ),
ENTRY(RTM_GETANYCAST ),
ENTRY(RTM_NEWNEIGHTBL ),
ENTRY(RTM_GETNEIGHTBL ),
ENTRY(RTM_SETNEIGHTBL),
ENTRY(RTM_NEWNDUSEROPT ),
ENTRY(RTM_NEWADDRLABEL ),
ENTRY(RTM_DELADDRLABEL),
ENTRY(RTM_GETADDRLABEL),
ENTRY(RTM_GETDCB ),
ENTRY(RTM_SETDCB),
ENTRY(RTM_NEWNETCONF ),
ENTRY(RTM_GETNETCONF ),
ENTRY(RTM_NEWMDB ),
ENTRY(RTM_DELMDB ),
ENTRY(RTM_GETMDB ),
};
void print_type(unsigned type)
{
size_t i;
for (i = 0; i < sizeof nlmrt_type_map/sizeof nlmrt_type_map[0]; i++) {
if (type == nlmrt_type_map[i].type) {
printf("\t\tMsg Type: %s\n", nlmrt_type_map[i].name);
return;
}
}
printf("\t\tMsg Type: unknown(%d)\n", type);
}
void print_flags(unsigned flags, unsigned change)
{
size_t i;
printf("\t\tflags: ");
for (i = 0; i < sizeof ifi_flag_map/sizeof ifi_flag_map[0]; i++) {
if (flags & ifi_flag_map[i].flag) {
if (change & ifi_flag_map[i].flag) {
printf("%s(C) ", ifi_flag_map[i].name);
} else {
printf("%s ", ifi_flag_map[i].name);
}
}
}
puts("");
}
oid read_msg(int fd)
{
int len;
char buf[4096];
struct iovec iov = { buf, sizeof(buf) };
struct sockaddr_nl sa;
struct msghdr msg = { (void *)&sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
struct nlmsghdr *nh;
len = recvmsg(fd, &msg, 0);
if(len == -1) {
perror("recvmsg");
return;
}
for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len);
nh = NLMSG_NEXT (nh, len)) {
struct ifinfomsg *ifimsg;
/* The end of multipart message. */
printf("netlink message: len = %u, type = %u, flags = 0x%X, seq = %u, pid = %u\n",
nh->nlmsg_len,
nh->nlmsg_type,
nh->nlmsg_flags,
nh->nlmsg_seq,
nh->nlmsg_pid);
if (nh->nlmsg_type == NLMSG_DONE)
return;
if (nh->nlmsg_type == NLMSG_ERROR) {
continue;
}
ifimsg = NLMSG_DATA(nh);
printf("\tifi_family = %u, ifi_type = %u, ifi_index = %u, ifi_flags = 0x%X, ifi_change = 0x%X\n",
ifimsg->ifi_family ,
ifimsg->ifi_type ,
ifimsg->ifi_index ,
ifimsg->ifi_flags ,
ifimsg->ifi_change);
print_type(nh->nlmsg_type);
print_flags(ifimsg->ifi_flags, ifimsg->ifi_change);
}
}
int main(int argc, char *argv[])
{
struct sockaddr_nl sa;
int fd;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if(fd == -1) {
perror("socket");
return 1;
}
if(bind(fd, (struct sockaddr *) &sa, sizeof(sa)) == -1) {
perror("bind");
return 1;
}
for(;;) {
read_msg(fd);
}
return 0;
}

Related

Manage Linux routing rules using RTNETLINK

I'm trying to write a small C based user space app that provides feature of managing routing rules using RTNETLINK. Below is an example accepting 3 arguments: add/del (rule), IP address and iface.
The problem with code below that it adds routing rule for "to" direction, while it doesn't for "from" direction. So basically code below is equal to: ip rule add to <src_addr> table <table_id>, and I would like to rewrite it so it can also do ip rule add from <src_addr> table <table_id>. Any suggestions?
/*
*
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <linux/rtnetlink.h>
/* Open netlink socket */
int open_netlink()
{
struct sockaddr_nl saddr;
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
perror("Failed to open netlink socket");
return -1;
}
memset(&saddr, 0, sizeof(saddr));
return sock;
}
/* Helper structure for ip address data and attributes */
typedef struct {
char family;
char bitlen;
unsigned char data[sizeof(struct in6_addr)];
} _inet_addr;
/* */
#define NLMSG_TAIL(nmsg) \
((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
/* Add new data to rtattr */
int rtattr_add(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen)
{
int len = RTA_LENGTH(alen);
struct rtattr *rta;
if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
fprintf(stderr, "rtattr_add error: message exceeded bound of %d\n", maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen) {
memcpy(RTA_DATA(rta), data, alen);
}
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}
int do_rule(int sock, int cmd, int flags, _inet_addr *address, int if_idx)
{
struct {
struct nlmsghdr n;
struct rtmsg r;
char buf[4096];
} nl_request;
/* Initialize request structure */
nl_request.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
nl_request.n.nlmsg_flags = NLM_F_REQUEST | flags;
nl_request.n.nlmsg_type = cmd;
nl_request.r.rtm_family = address->family;
nl_request.r.rtm_table = 1;
nl_request.r.rtm_scope = RT_SCOPE_LINK;
/* Set additional flags if NOT deleting route */
if (cmd != RTM_DELRULE) {
nl_request.r.rtm_protocol = RTPROT_BOOT;
nl_request.r.rtm_type = RTN_UNICAST;
}
nl_request.r.rtm_family = address->family;
nl_request.r.rtm_dst_len = address->bitlen;
/* Select scope, for simplicity we supports here only IPv6 and IPv4 */
if (nl_request.r.rtm_family == AF_INET6) {
nl_request.r.rtm_scope = RT_SCOPE_UNIVERSE;
} else {
nl_request.r.rtm_scope = RT_SCOPE_LINK;
}
/* Set destination network */
rtattr_add(&nl_request.n, sizeof(nl_request), /*RTA_NEWDST*/ RTA_DST, &address->data, address->bitlen / 8);
/* Send message to the netlink */
return send(sock, &nl_request, sizeof(nl_request), 0);
}
/* Simple parser of the string IP address
*/
int read_addr(char *addr, _inet_addr *res)
{
if (strchr(addr, ':')) {
res->family = AF_INET6;
res->bitlen = 128;
} else {
res->family = AF_INET;
res->bitlen = 32;
}
return inet_pton(res->family, addr, res->data);
}
#define NEXT_CMD_ARG() do { argv++; if (--argc <= 0) exit(-1); } while(0)
int main(int argc, char **argv)
{
int default_gw = 0;
int if_idx = 0;
int nl_sock;
_inet_addr to_addr = { 0 };
_inet_addr gw_addr = { 0 };
_inet_addr address = { 0 };
int nl_cmd;
int nl_flags;
/* Parse command line arguments */
while (argc > 0) {
if (strcmp(*argv, "add") == 0) {
nl_cmd = RTM_NEWRULE;
nl_flags = NLM_F_CREATE | NLM_F_EXCL;
} else if (strcmp(*argv, "del") == 0) {
nl_cmd = RTM_DELRULE;
nl_flags = 0;
} else if (strcmp(*argv, "to") == 0) {
NEXT_CMD_ARG(); /* skip "to" and jump to the actual destination addr */
if (read_addr(*argv, &address) != 1) {
fprintf(stderr, "Failed to parse destination network %s\n", *argv);
exit(-1);
}
} else if (strcmp(*argv, "dev") == 0) {
NEXT_CMD_ARG(); /* skip "dev" */
if_idx = if_nametoindex(*argv);
}
argc--; argv++;
}
nl_sock = open_netlink();
if (nl_sock < 0) {
exit(-1);
}
// do_route(nl_sock, nl_cmd, nl_flags, &to_addr, &gw_addr, default_gw, if_idx);
do_rule(nl_sock, nl_cmd, nl_flags, &address, if_idx);
close (nl_sock);
return 0;
}
Created netlink socket and request, however parts of request structure might be configured incorrectly to achieve the goal.

Question about sharing mmapped area between 2 different processes

I'm trying to share mmapped area in 2 processes.
In my program, I create memory_update() process and memory_read() process.
This memory_update() process update mmapped area and tried to read that area in memory_read() process.
But I got when I tried to read the mmapped area.
So far, I don't have good luck to find a solution for this problem.
If you have any idea, please leave your comments.
Here is the source code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/stat.h>
#include <sys/wait.h>
#define COLUMN 80
#define ROW 10
#define BUFSIZE 80
#define SHM_KEY 0x9998
struct _row {
int32_t flag;
unsigned char *buffer;
};
typedef enum {
DATA_READY,
DATA_RESET
} msg_type_t;
struct _ipc_message {
msg_type_t type;
int32_t value;
};
typedef struct _ipc_message ipc_message_t;
typedef struct _row row_t;
int32_t format_number_string(char *buf, int32_t num)
{
sprintf(buf, "%02d", num);
return 0;
}
int32_t update_row(char *buf, char *str)
{
strncpy(buf, str, 80);
return 0;
}
int32_t print_row(char *buf)
{
printf("print_row buf = %p\n", (void *)buf);
printf("%s\n", buf);
return 0;
}
int32_t memory_update(int32_t sk)
{
row_t *p_row;
ipc_message_t msg;
unsigned char *shared_mem;
unsigned char *ptr;
char rbuf[BUFSIZE];
char nbuf[3];
int32_t shmid;
int32_t i;
int32_t ret;
int32_t fd;
shmid = shmget(SHM_KEY, ROW * sizeof(row_t), 0644 | IPC_CREAT);
if (shmid == -1)
{
perror("Shared Memory Error");
return -1;
}
/* Attach Shared Memory */
shared_mem = shmat(shmid, NULL, 0);
if (shared_mem == (void *)-1)
{
perror("Shared Memory Attach Error");
return -1;
}
fd = open("testfile", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (fd < 0)
{
perror("File Open Error");
}
ptr = mmap(
NULL,
COLUMN * ROW,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
printf("ptr = %p\n", (void *)ptr);
for (i = 0 ; i < ROW ; i++)
{
format_number_string(nbuf, i);
memset(rbuf, 0x20, BUFSIZE);
sprintf(rbuf, "LINE %s :", nbuf);
rbuf[strlen(rbuf)] = ' ';
rbuf[BUFSIZE-1] = 0x0;
update_row(&ptr[i * COLUMN], rbuf);
}
for (i = 0 ; i < ROW ; i++)
{
p_row = (row_t *)&shared_mem[i * sizeof(row_t)];
p_row->flag = 0x99;
p_row->buffer = &ptr[i * COLUMN];
// print_row(p_row->buffer);
}
msg.type = DATA_READY;
msg.value = 0;
send(sk, (void *)&msg, sizeof(ipc_message_t), 0);
i = 0;
for ( ; i < ROW ; i++)
{
p_row = (row_t *)&shared_mem[i * sizeof(row_t)];
if (p_row->flag == 0x0)
{
printf("row[%d] has processed\n", i);
}
else
{
i--;
sleep(1);
}
}
/* Detach Shared Memory */
ret = shmdt(shared_mem);
if (ret == -1)
{
perror("Shared Memory Detach Error");
return -1;
}
ret = munmap(ptr, COLUMN * ROW);
if (ret != 0)
{
printf("UnMapping Failed\n");
return -1;
}
close(fd);
return 0;
}
int32_t memory_read(int32_t sk)
{
row_t *p_row;
ipc_message_t msg;
unsigned char *shared_mem;
int32_t shmid;
int32_t ret;
int32_t i;
while (1)
{
ret = recv(sk, (void *)&msg, sizeof(ipc_message_t), 0);
if (ret < 0)
{
perror("recv error");
return -1;
}
if (msg.type != DATA_READY)
{
continue;
}
else
{
break;
}
}
shmid = shmget(SHM_KEY, ROW * sizeof(row_t), 0644 | IPC_CREAT);
if (shmid == -1)
{
perror("Shared Memory Error");
return -1;
}
/* Attach Shared Memory */
shared_mem = shmat(shmid, NULL, 0);
if (shared_mem == (void *)-1)
{
perror("Shared Memory Attach Error");
return -1;
}
for (i = 0 ; i < ROW ; i++)
{
p_row = (row_t *)&shared_mem[i * sizeof(row_t)];
printf("memory_read process [%d]\n", i);
print_row(p_row->buffer);
p_row->flag = 0x0;
sleep(1);
}
/* Detach Shared Memory */
ret = shmdt(shared_mem);
if (ret == -1)
{
perror("Shared Memory Detach Error");
return -1;
}
return 0;
}
int32_t main(void)
{
pid_t pid;
int32_t sp[2];
int32_t ret;
static const int32_t ps = 0;
static const int32_t cs = 1;
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
if (ret == -1)
{
perror("socketpair");
return -1;
}
pid = fork();
if (pid == 0)
{
close(sp[ps]);
memory_update(sp[cs]);
}
else
{
close(sp[cs]);
memory_read(sp[ps]);
}
return 0;
}
And this is the output.
$ ./mmm
ptr = 0x7fdc214a8000
memory_read process [0]
print_row buf = 0x7fdc214a8000
Segmentation fault (core dumped)
Modifed code to mmap before forking.
And it is working as expected.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/stat.h>
#include <sys/wait.h>
#define COLUMNS 80
#define ROWS 60
#define BUFSIZE 80
typedef enum {
DATA_PRODUCED,
DATA_CONSUMED
} msg_type_t;
struct _ipc_message {
msg_type_t type;
int32_t value;
};
typedef struct _ipc_message ipc_message_t;
int32_t memory_update(int32_t sk, char *buf)
{
ipc_message_t msg;
int32_t ret;
char *msg2 = "updated buffer";
printf("memory_update : 1.buf = %s\n", buf);
memset(buf, 0, 80);
strncpy(buf, msg2, strlen(msg2));
buf[strlen(msg2)] = 0;
printf("memory_update : 2.buf = %s\n", buf);
printf("memory_update : send message from memory_update process\n");
msg.type = DATA_PRODUCED;
msg.value = 0;
send(sk, (void *)&msg, sizeof(ipc_message_t), 0);
while (1)
{
printf("memory_update : receive message from memory_read process\n");
ret = recv(sk, (void *)&msg, sizeof(ipc_message_t), 0);
if (ret < 0)
{
perror("recv error");
return -1;
}
if (msg.type != DATA_CONSUMED)
{
continue;
}
else
{
break;
}
}
printf("memory_update : 3.buf = %s\n", buf);
return 0;
}
int32_t memory_read(int32_t sk, char *buf)
{
ipc_message_t msg;
int32_t ret;
int32_t i;
char *msg3 = "buffer processed";
printf("memory_read : 1.buf = %s\n", buf);
while (1)
{
printf("memory_read : receive message from memory_update process\n");
ret = recv(sk, (void *)&msg, sizeof(ipc_message_t), 0);
if (ret < 0)
{
perror("recv error");
return -1;
}
if (msg.type != DATA_PRODUCED)
{
continue;
}
else
{
break;
}
}
printf("memory_read : 2.buf = %s\n", buf);
memset(buf, 0, 80);
strncpy(buf, msg3, strlen(msg3));
buf[strlen(msg3)] = 0;
printf("memory_read : 3.buf = %s\n", buf);
printf("memory_read : send message from memory_update process\n");
msg.type = DATA_CONSUMED;
msg.value = 0;
send(sk, (void *)&msg, sizeof(ipc_message_t), 0);
return 0;
}
int32_t main(void)
{
pid_t pid;
int32_t sp[2];
int32_t ret;
int32_t fd;
int32_t i;
char *ptr;
char *msg1 = "initial message";
static const int32_t ps = 0;
static const int32_t cs = 1;
fd = open("80bytes", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (fd < 0)
{
perror("File Open Error");
}
ptr = mmap(
NULL,
80,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
0);
memset(ptr, 0, 80);
strncpy(ptr, msg1, strlen(msg1));
ptr[strlen(msg1)] = 0;
printf("ptr = %s\n", ptr);
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
if (ret == -1)
{
perror("socketpair");
return -1;
}
pid = fork();
if (pid == 0) /* child process */
{
close(sp[ps]);
memory_update(sp[cs], ptr);
}
else /* parent process */
{
close(sp[cs]);
memory_read(sp[ps], ptr);
ret = munmap(ptr, 80);
if (ret != 0)
{
printf("UnMapping Failed\n");
return -1;
}
close(fd);
}
return 0;
}

Using perf_event_open() to get sample address ,however the addr=0

I tried to use perf_event_open() to track all the store instructions to get their access address. I found only when I set attr.precise_ip > 0, I can get the non-zero address. But when I ran the same process on vm instead of host, the error massage was "Operation not supported", I can fix this problem by setting precise_ip = 0 on vm, but now I only get bunch of addresses equal to zero. I don't understand why precise_ip is related to the sample addrress which is not pointed out on document, and I also don't understand why I can't set precise_ip = 1 on vm while I can do it on host. Is there anybody can help me??
FYI: I use - cpu host option when I start vm using qemu-system-x86_64
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#define PERF_PAGES (1 + (1 << 16))
struct perf_sample {
struct perf_event_header header;
__u64 ip;
__u32 pid, tid; /* if PERF_SAMPLE_TID */
__u64 addr; /* if PERF_SAMPLE_ADDR */
__u64 weight; /* if PERF_SAMPLE_WEIGHT */
/* __u64 data_src; /\* if PERF_SAMPLE_DATA_SRC *\/ */
__u64 phy_addr;
};
int perf_event_open(struct perf_event_attr *attr,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}
void workload()
{
int i,c=0;
for(i=0;i<100000000;i++)
{
c+=i*i;
c-=i*100;
c+=i*i*i/100;
}
}
int startup()
{
struct perf_event_attr attr;
memset(&attr,0,sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(struct perf_event_attr);
attr.config = 0x82d0;
attr.config1 = 0;
attr.sample_period = 1000;
attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR ;
attr.disabled = 0;
//attr.inherit = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_callchain_kernel = 1;
attr.exclude_callchain_user = 1;
attr.precise_ip = 1; // when i set attr.precise_ip = 0 , all the addr = 0;
int fd=perf_event_open(&attr,0,-1,-1,0);
if(fd<0)
{
perror("Cannot open perf fd!");
return -1;
}
return fd;
}
void scan_thread(struct perf_event_mmap_page *p)
{
char *pbuf = (char *)p + p->data_offset;
__sync_synchronize();
printf("%d,\n", p->data_size);
if(p->data_head == p->data_tail) {
return;
}
struct perf_event_header *ph = (void *)(pbuf + (p->data_tail % p->data_size));
struct perf_sample* ps;
switch(ph->type) {
case PERF_RECORD_SAMPLE:
ps = (struct perf_sample*)ph;
// assert(ps != NULL);
if(ps == NULL)
{
printf("null\n");
}
if(ps!= NULL && ps->addr != 0) {
printf("ip %lx\n", ps->ip);
printf("tid %d\n", ps->tid);
printf("addr: %lx \n", ps->addr);
}
//printf("addr, %lx\n", ps->addr);
//printf("phy addr, %lx\n", ps->phy_addr);
break;
default:
printf("type %d\n", ph->type);
break;
}
}
int main()
{
int fd = startup();
size_t mmap_size = sysconf(_SC_PAGESIZE) * PERF_PAGES;
struct perf_event_mmap_page *p = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
// start to perf
ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
int a= 0;
while(1)
{
// uint64_t instructions;
// read(fd,&instructions,sizeof(instructions));
// printf("instructions=%ld\n",instructions);
// sleep(1);
workload();
scan_thread(p);
sleep(1);
}
}

How to use c-ares with epoll?

I have a working code with performs asynchronous DNS resolution with c-ares library calls. The program uses select to monitor file descriptors up to a maximum of FD_SETSIZE which is 1024 on my system. I want to use many more file descriptors so want to rewrite the code to use epoll instead of select.
Here is the select based function of my current program:
static void
wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
I've done some googling before posting my question and I've found out that to implement this with epoll I can no longer use ares_fds, ares_timeout and ares_process but must use ares_getsock() and ares_process_fd() instead. But further than that I have no idea how to do this and can't find any example codes using epoll with c-ares. Can anyone modify the code provided below to use epoll instead of select? Or at least give me some pointers to get me started?
#include <ares.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define TIMEOUT 3000 /* Max. number of ms for first try */
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
static int nwaiting;
static void
state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void
callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
printf("%s\n%s\n", host->h_name, ip);
}
}
static void
wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
int
main(int argc, char *argv[])
{
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = TIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do {
if (nwaiting >= MAXWAITING || done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
fprintf(stderr, "done sending\n");
done = 1;
}
}
} while (nwaiting > 0);
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
return 0;
}
The program requires a file with a domain name on each line to work.
This is what I ended up coming up with.
#include <ares.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/epoll.h>
#include <errno.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define TIMEOUT 3000 /* Max. number of ms for first try */
#define DNS_MAX_EVENTS 10000
#define DNS_MAX_SERVERS 2
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
static int nwaiting;
ares_socket_t dns_client_fds[ARES_GETSOCK_MAXNUM] = {0};
struct epoll_event ev, events[DNS_MAX_EVENTS];
int i,bitmask,nfds, epollfd, timeout, fd_count, ret;
static void
state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void
callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
printf("%s\n%s\n", host->h_name, ip);
}
}
static void
wait_ares(ares_channel channel)
{
nfds=0;
bitmask=0;
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, dns_client_fds[i], NULL) < 0) {
continue;
}
}
}
memset(dns_client_fds, 0, sizeof(dns_client_fds));
bitmask = ares_getsock(channel, dns_client_fds, DNS_MAX_SERVERS);
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
ev.events = 0;
if (ARES_GETSOCK_READABLE(bitmask, i)) {
ev.events |= EPOLLIN;
}
if (ARES_GETSOCK_WRITABLE(bitmask, i)) {
ev.events |= EPOLLOUT;
}
ev.data.fd = dns_client_fds[i];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, dns_client_fds[i], &ev) < 0) {
if(errno == EEXIST) {
nfds++;
continue;
}
continue;
}
nfds++;
}
}
if(nfds==0)
{
return;
}
timeout = 1000;//millisecs
fd_count = epoll_wait(epollfd, events, DNS_MAX_EVENTS, timeout);
if (fd_count < 0) {
return;
}
if (fd_count > 0) {
for (i = 0; i < fd_count; ++i) {
ares_process_fd(channel, ((events[i].events) & (EPOLLIN) ? events[i].data.fd:ARES_SOCKET_BAD), ((events[i].events) & (EPOLLOUT)? events[i].data.fd:ARES_SOCKET_BAD));
}
} else {
ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
}
}
int
main(int argc, char *argv[])
{
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = TIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
memset(dns_client_fds, 0, sizeof(dns_client_fds));
memset((char *)&ev, 0, sizeof(struct epoll_event));
memset((char *)&events[0], 0, sizeof(events));
epollfd = epoll_create(DNS_MAX_SERVERS);
if (epollfd < 0) {
perror("epoll_create: ");
}
do {
if (nwaiting >= MAXWAITING || done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
fprintf(stderr, "done sending\n");
done = 1;
}
}
} while (nwaiting > 0);
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
return 0;
}

DPDK create a packet for transmission

I am new to DPDK and trying to create a packet to send it from one DPDK enabled machine to another connected directly via an ethernet. I modified an example/rxtx_callbacks/main.c provided with DPDK at both side. However, I am not receiving anything at the receiver. What wrong am I doing?
Modified function at transmitter: lcore_main is modified:
static __attribute__((noreturn)) void lcore_main()
{
uint16_t port;
struct ether_hdr *eth_hdr;
struct ether_addr daddr;
daddr.addr_bytes[0] = 116;
daddr.addr_bytes[1] = 225;
daddr.addr_bytes[2] = 228;
daddr.addr_bytes[3] = 204;
daddr.addr_bytes[4] = 106;
daddr.addr_bytes[5] = 82;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
int ret;
RTE_ETH_FOREACH_DEV(port)
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
//struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
struct rte_mbuf *m_head[BURST_SIZE];
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
if(rte_pktmbuf_alloc_bulk(mbuf_pool, m_head, BURST_SIZE)!=0)
{
printf("Allocation problem\n");
}
for(i = 0; i < BURST_SIZE; i++) {
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
//eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(m_head[i],
// sizeof(struct ether_hdr));
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
rte_memcpy(&(eth_hdr->d_addr), &daddr, sizeof(struct ether_addr));
}
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
if (unlikely(nb_tx < BURST_SIZE)) {
uint16_t buf;
for (buf = nb_tx; buf < BURST_SIZE; buf++)
rte_pktmbuf_free(m_head[buf]);
}
}
}
}
receiver side RTE_ETH_FOREACH_DEV of tx part is modified to:
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, bufs, BURST_SIZE);
//printf("Number of Packets received %d\n", nb_rx);
for(i = 0; i < nb_rx; i++) {
//ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *,
// sizeof(struct ether_hdr));
//printf("Packet ip received %d\n", ipv4_hdr->src_addr);
eth_hdr = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
printf("Packet ip received %d\n", eth_hdr->ether_type);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
Please let me know if I missed something.
There are few issues with the code:
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
Unlike rte_pktmbuf_append(), the rte_pktmbuf_mtod() does not change the packet length, so it should be set manually before the tx.
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
If we set ETHER_TYPE_IPv4, a correct IPv4 header must follow. So we need either to add the header or to change the ether_type.
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
Where is the source address comes from?
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
Looks like we transmit a burst of zero-sized packets with invalid IPv4 headers. Please also make sure the source/destination addresses are correct.
As suggested by #andriy-berestovsky, I used rte_eth_stats_get() and it shows packets are present in ethernet ring via the field ipackets but rte_eth_rx_burst is not returning any packets. Full code is included here, please let me know what I am doing wrong. (I am using testpmd at transmitter side)
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_ip.h>
#include <rte_mbuf.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <signal.h>
#define MAX_SOURCE_SIZE (0x100000)
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.max_rx_pkt_len = ETHER_MAX_LEN,
},
};
static struct {
uint64_t total_cycles;
uint64_t total_pkts;
} latency_numbers;
static volatile bool force_quit;
struct rte_mempool *mbuf_pool;
static void
signal_handler(int signum)
{
struct rte_eth_stats eth_stats;
int i;
if (signum == SIGINT || signum == SIGTERM) {
printf("\n\nSignal %d received, preparing to exit...\n",
signum);
RTE_ETH_FOREACH_DEV(i) {
rte_eth_stats_get(i, &eth_stats);
printf("Total number of packets received %llu, dropped rx full %llu and rest= %llu, %llu, %llu\n", eth_stats.ipackets, eth_stats.imissed, eth_stats.ierrors, eth_stats.rx_nombuf, eth_stats.q_ipackets[0]);
}
force_quit = true;
}
}
struct ether_addr addr;
/*
* Initialises a given port using global settings and with the rx buffers
* coming from the mbuf_pool passed as parameter
*/
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default;
const uint16_t rx_rings = 1, tx_rings = 1;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
if (!rte_eth_dev_is_valid_port(port))
return -1;
rte_eth_dev_info_get(port, &dev_info);
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0) {
printf("Error in adjustment\n");
return retval;
}
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0) {
printf("RX queue setup prob\n");
return retval;
}
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
retval = rte_eth_dev_start(port);
if (retval < 0) {
printf("Error in start\n");
return retval;
}
rte_eth_macaddr_get(port, &addr);
printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
(unsigned)port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
rte_eth_promiscuous_enable(port);
return 0;
}
/*
* Main thread that does the work, reading from INPUT_PORT
* and writing to OUTPUT_PORT
*/
static __attribute__((noreturn)) void
lcore_main(void)
{
uint16_t port;
struct ether_hdr *eth_hdr;
//struct ether_addr addr;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
RTE_ETH_FOREACH_DEV(port)
{
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
}
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,bufs, BURST_SIZE);
for(i = 0; i < nb_rx; i++) {
ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *, sizeof(struct ether_hdr));
printf("Packet ip received %d\n", ipv4_hdr->src_addr);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
if(force_quit)
break;
}
}
/* Main function, does initialisation and calls the per-lcore functions */
int
main(int argc, char *argv[])
{
uint16_t nb_ports;
uint16_t portid, port;
/* init EAL */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
nb_ports = rte_eth_dev_count_avail();
printf("size ordered %lld\n", NUM_MBUFS *nb_ports);
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (nb_ports < 1)
rte_exit(EXIT_FAILURE, "Error: number of ports must be greater than %d\n", nb_ports);
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
// initialize all ports
RTE_ETH_FOREACH_DEV(portid)
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
portid);
if (rte_lcore_count() > 1)
printf("\nWARNING: Too much enabled lcores - "
"App uses only 1 lcore\n");
// call lcore_main on master core only
lcore_main();
return 0;
}
It seems to be a problem of ethernet card with ubuntu 14.04. With ubuntu 16.04 it is working fine.

Resources