Manage Linux routing rules using RTNETLINK - c

I'm trying to write a small C based user space app that provides feature of managing routing rules using RTNETLINK. Below is an example accepting 3 arguments: add/del (rule), IP address and iface.
The problem with code below that it adds routing rule for "to" direction, while it doesn't for "from" direction. So basically code below is equal to: ip rule add to <src_addr> table <table_id>, and I would like to rewrite it so it can also do ip rule add from <src_addr> table <table_id>. Any suggestions?
/*
*
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <linux/rtnetlink.h>
/* Open netlink socket */
int open_netlink()
{
struct sockaddr_nl saddr;
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
perror("Failed to open netlink socket");
return -1;
}
memset(&saddr, 0, sizeof(saddr));
return sock;
}
/* Helper structure for ip address data and attributes */
typedef struct {
char family;
char bitlen;
unsigned char data[sizeof(struct in6_addr)];
} _inet_addr;
/* */
#define NLMSG_TAIL(nmsg) \
((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
/* Add new data to rtattr */
int rtattr_add(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen)
{
int len = RTA_LENGTH(alen);
struct rtattr *rta;
if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
fprintf(stderr, "rtattr_add error: message exceeded bound of %d\n", maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen) {
memcpy(RTA_DATA(rta), data, alen);
}
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}
int do_rule(int sock, int cmd, int flags, _inet_addr *address, int if_idx)
{
struct {
struct nlmsghdr n;
struct rtmsg r;
char buf[4096];
} nl_request;
/* Initialize request structure */
nl_request.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
nl_request.n.nlmsg_flags = NLM_F_REQUEST | flags;
nl_request.n.nlmsg_type = cmd;
nl_request.r.rtm_family = address->family;
nl_request.r.rtm_table = 1;
nl_request.r.rtm_scope = RT_SCOPE_LINK;
/* Set additional flags if NOT deleting route */
if (cmd != RTM_DELRULE) {
nl_request.r.rtm_protocol = RTPROT_BOOT;
nl_request.r.rtm_type = RTN_UNICAST;
}
nl_request.r.rtm_family = address->family;
nl_request.r.rtm_dst_len = address->bitlen;
/* Select scope, for simplicity we supports here only IPv6 and IPv4 */
if (nl_request.r.rtm_family == AF_INET6) {
nl_request.r.rtm_scope = RT_SCOPE_UNIVERSE;
} else {
nl_request.r.rtm_scope = RT_SCOPE_LINK;
}
/* Set destination network */
rtattr_add(&nl_request.n, sizeof(nl_request), /*RTA_NEWDST*/ RTA_DST, &address->data, address->bitlen / 8);
/* Send message to the netlink */
return send(sock, &nl_request, sizeof(nl_request), 0);
}
/* Simple parser of the string IP address
*/
int read_addr(char *addr, _inet_addr *res)
{
if (strchr(addr, ':')) {
res->family = AF_INET6;
res->bitlen = 128;
} else {
res->family = AF_INET;
res->bitlen = 32;
}
return inet_pton(res->family, addr, res->data);
}
#define NEXT_CMD_ARG() do { argv++; if (--argc <= 0) exit(-1); } while(0)
int main(int argc, char **argv)
{
int default_gw = 0;
int if_idx = 0;
int nl_sock;
_inet_addr to_addr = { 0 };
_inet_addr gw_addr = { 0 };
_inet_addr address = { 0 };
int nl_cmd;
int nl_flags;
/* Parse command line arguments */
while (argc > 0) {
if (strcmp(*argv, "add") == 0) {
nl_cmd = RTM_NEWRULE;
nl_flags = NLM_F_CREATE | NLM_F_EXCL;
} else if (strcmp(*argv, "del") == 0) {
nl_cmd = RTM_DELRULE;
nl_flags = 0;
} else if (strcmp(*argv, "to") == 0) {
NEXT_CMD_ARG(); /* skip "to" and jump to the actual destination addr */
if (read_addr(*argv, &address) != 1) {
fprintf(stderr, "Failed to parse destination network %s\n", *argv);
exit(-1);
}
} else if (strcmp(*argv, "dev") == 0) {
NEXT_CMD_ARG(); /* skip "dev" */
if_idx = if_nametoindex(*argv);
}
argc--; argv++;
}
nl_sock = open_netlink();
if (nl_sock < 0) {
exit(-1);
}
// do_route(nl_sock, nl_cmd, nl_flags, &to_addr, &gw_addr, default_gw, if_idx);
do_rule(nl_sock, nl_cmd, nl_flags, &address, if_idx);
close (nl_sock);
return 0;
}
Created netlink socket and request, however parts of request structure might be configured incorrectly to achieve the goal.

Related

How to use c-ares with epoll?

I have a working code with performs asynchronous DNS resolution with c-ares library calls. The program uses select to monitor file descriptors up to a maximum of FD_SETSIZE which is 1024 on my system. I want to use many more file descriptors so want to rewrite the code to use epoll instead of select.
Here is the select based function of my current program:
static void
wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
I've done some googling before posting my question and I've found out that to implement this with epoll I can no longer use ares_fds, ares_timeout and ares_process but must use ares_getsock() and ares_process_fd() instead. But further than that I have no idea how to do this and can't find any example codes using epoll with c-ares. Can anyone modify the code provided below to use epoll instead of select? Or at least give me some pointers to get me started?
#include <ares.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define TIMEOUT 3000 /* Max. number of ms for first try */
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
static int nwaiting;
static void
state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void
callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
printf("%s\n%s\n", host->h_name, ip);
}
}
static void
wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
int
main(int argc, char *argv[])
{
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = TIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do {
if (nwaiting >= MAXWAITING || done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
fprintf(stderr, "done sending\n");
done = 1;
}
}
} while (nwaiting > 0);
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
return 0;
}
The program requires a file with a domain name on each line to work.
This is what I ended up coming up with.
#include <ares.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/epoll.h>
#include <errno.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define TIMEOUT 3000 /* Max. number of ms for first try */
#define DNS_MAX_EVENTS 10000
#define DNS_MAX_SERVERS 2
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
static int nwaiting;
ares_socket_t dns_client_fds[ARES_GETSOCK_MAXNUM] = {0};
struct epoll_event ev, events[DNS_MAX_EVENTS];
int i,bitmask,nfds, epollfd, timeout, fd_count, ret;
static void
state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void
callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
printf("%s\n%s\n", host->h_name, ip);
}
}
static void
wait_ares(ares_channel channel)
{
nfds=0;
bitmask=0;
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, dns_client_fds[i], NULL) < 0) {
continue;
}
}
}
memset(dns_client_fds, 0, sizeof(dns_client_fds));
bitmask = ares_getsock(channel, dns_client_fds, DNS_MAX_SERVERS);
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
ev.events = 0;
if (ARES_GETSOCK_READABLE(bitmask, i)) {
ev.events |= EPOLLIN;
}
if (ARES_GETSOCK_WRITABLE(bitmask, i)) {
ev.events |= EPOLLOUT;
}
ev.data.fd = dns_client_fds[i];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, dns_client_fds[i], &ev) < 0) {
if(errno == EEXIST) {
nfds++;
continue;
}
continue;
}
nfds++;
}
}
if(nfds==0)
{
return;
}
timeout = 1000;//millisecs
fd_count = epoll_wait(epollfd, events, DNS_MAX_EVENTS, timeout);
if (fd_count < 0) {
return;
}
if (fd_count > 0) {
for (i = 0; i < fd_count; ++i) {
ares_process_fd(channel, ((events[i].events) & (EPOLLIN) ? events[i].data.fd:ARES_SOCKET_BAD), ((events[i].events) & (EPOLLOUT)? events[i].data.fd:ARES_SOCKET_BAD));
}
} else {
ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
}
}
int
main(int argc, char *argv[])
{
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = TIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
memset(dns_client_fds, 0, sizeof(dns_client_fds));
memset((char *)&ev, 0, sizeof(struct epoll_event));
memset((char *)&events[0], 0, sizeof(events));
epollfd = epoll_create(DNS_MAX_SERVERS);
if (epollfd < 0) {
perror("epoll_create: ");
}
do {
if (nwaiting >= MAXWAITING || done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
fprintf(stderr, "done sending\n");
done = 1;
}
}
} while (nwaiting > 0);
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
return 0;
}

DPDK create a packet for transmission

I am new to DPDK and trying to create a packet to send it from one DPDK enabled machine to another connected directly via an ethernet. I modified an example/rxtx_callbacks/main.c provided with DPDK at both side. However, I am not receiving anything at the receiver. What wrong am I doing?
Modified function at transmitter: lcore_main is modified:
static __attribute__((noreturn)) void lcore_main()
{
uint16_t port;
struct ether_hdr *eth_hdr;
struct ether_addr daddr;
daddr.addr_bytes[0] = 116;
daddr.addr_bytes[1] = 225;
daddr.addr_bytes[2] = 228;
daddr.addr_bytes[3] = 204;
daddr.addr_bytes[4] = 106;
daddr.addr_bytes[5] = 82;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
int ret;
RTE_ETH_FOREACH_DEV(port)
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
//struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
struct rte_mbuf *m_head[BURST_SIZE];
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
if(rte_pktmbuf_alloc_bulk(mbuf_pool, m_head, BURST_SIZE)!=0)
{
printf("Allocation problem\n");
}
for(i = 0; i < BURST_SIZE; i++) {
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
//eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(m_head[i],
// sizeof(struct ether_hdr));
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
rte_memcpy(&(eth_hdr->d_addr), &daddr, sizeof(struct ether_addr));
}
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
if (unlikely(nb_tx < BURST_SIZE)) {
uint16_t buf;
for (buf = nb_tx; buf < BURST_SIZE; buf++)
rte_pktmbuf_free(m_head[buf]);
}
}
}
}
receiver side RTE_ETH_FOREACH_DEV of tx part is modified to:
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, bufs, BURST_SIZE);
//printf("Number of Packets received %d\n", nb_rx);
for(i = 0; i < nb_rx; i++) {
//ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *,
// sizeof(struct ether_hdr));
//printf("Packet ip received %d\n", ipv4_hdr->src_addr);
eth_hdr = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
printf("Packet ip received %d\n", eth_hdr->ether_type);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
Please let me know if I missed something.
There are few issues with the code:
eth_hdr = rte_pktmbuf_mtod(m_head[i], struct ether_hdr *);
Unlike rte_pktmbuf_append(), the rte_pktmbuf_mtod() does not change the packet length, so it should be set manually before the tx.
eth_hdr->ether_type = htons(ETHER_TYPE_IPv4);
If we set ETHER_TYPE_IPv4, a correct IPv4 header must follow. So we need either to add the header or to change the ether_type.
rte_memcpy(&(eth_hdr->s_addr), &addr, sizeof(struct ether_addr));
Where is the source address comes from?
const uint16_t nb_tx = rte_eth_tx_burst(port, 0, m_head, BURST_SIZE);
Looks like we transmit a burst of zero-sized packets with invalid IPv4 headers. Please also make sure the source/destination addresses are correct.
As suggested by #andriy-berestovsky, I used rte_eth_stats_get() and it shows packets are present in ethernet ring via the field ipackets but rte_eth_rx_burst is not returning any packets. Full code is included here, please let me know what I am doing wrong. (I am using testpmd at transmitter side)
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_ip.h>
#include <rte_mbuf.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <signal.h>
#define MAX_SOURCE_SIZE (0x100000)
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.max_rx_pkt_len = ETHER_MAX_LEN,
},
};
static struct {
uint64_t total_cycles;
uint64_t total_pkts;
} latency_numbers;
static volatile bool force_quit;
struct rte_mempool *mbuf_pool;
static void
signal_handler(int signum)
{
struct rte_eth_stats eth_stats;
int i;
if (signum == SIGINT || signum == SIGTERM) {
printf("\n\nSignal %d received, preparing to exit...\n",
signum);
RTE_ETH_FOREACH_DEV(i) {
rte_eth_stats_get(i, &eth_stats);
printf("Total number of packets received %llu, dropped rx full %llu and rest= %llu, %llu, %llu\n", eth_stats.ipackets, eth_stats.imissed, eth_stats.ierrors, eth_stats.rx_nombuf, eth_stats.q_ipackets[0]);
}
force_quit = true;
}
}
struct ether_addr addr;
/*
* Initialises a given port using global settings and with the rx buffers
* coming from the mbuf_pool passed as parameter
*/
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default;
const uint16_t rx_rings = 1, tx_rings = 1;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
if (!rte_eth_dev_is_valid_port(port))
return -1;
rte_eth_dev_info_get(port, &dev_info);
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0) {
printf("Error in adjustment\n");
return retval;
}
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0) {
printf("RX queue setup prob\n");
return retval;
}
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
retval = rte_eth_dev_start(port);
if (retval < 0) {
printf("Error in start\n");
return retval;
}
rte_eth_macaddr_get(port, &addr);
printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
(unsigned)port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
rte_eth_promiscuous_enable(port);
return 0;
}
/*
* Main thread that does the work, reading from INPUT_PORT
* and writing to OUTPUT_PORT
*/
static __attribute__((noreturn)) void
lcore_main(void)
{
uint16_t port;
struct ether_hdr *eth_hdr;
//struct ether_addr addr;
//rte_eth_macaddr_get(portid, &addr);
struct ipv4_hdr *ipv4_hdr;
int32_t i;
RTE_ETH_FOREACH_DEV(port)
{
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
}
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
for (;;) {
RTE_ETH_FOREACH_DEV(port) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,bufs, BURST_SIZE);
for(i = 0; i < nb_rx; i++) {
ipv4_hdr = rte_pktmbuf_mtod_offset(bufs[i], struct ipv4_hdr *, sizeof(struct ether_hdr));
printf("Packet ip received %d\n", ipv4_hdr->src_addr);
}
if (unlikely(nb_rx == 0))
continue;
const uint16_t nb_tx = 0; // = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
if(force_quit)
break;
}
}
/* Main function, does initialisation and calls the per-lcore functions */
int
main(int argc, char *argv[])
{
uint16_t nb_ports;
uint16_t portid, port;
/* init EAL */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
nb_ports = rte_eth_dev_count_avail();
printf("size ordered %lld\n", NUM_MBUFS *nb_ports);
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (nb_ports < 1)
rte_exit(EXIT_FAILURE, "Error: number of ports must be greater than %d\n", nb_ports);
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
// initialize all ports
RTE_ETH_FOREACH_DEV(portid)
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
portid);
if (rte_lcore_count() > 1)
printf("\nWARNING: Too much enabled lcores - "
"App uses only 1 lcore\n");
// call lcore_main on master core only
lcore_main();
return 0;
}
It seems to be a problem of ethernet card with ubuntu 14.04. With ubuntu 16.04 it is working fine.

why the performance of packet transmission is so low

Trying to create a raw socket based program using mmap_packet to send packets at fast rate.
The following code is adopted from the example at this gist. It does send packets but it doesn't send it fast. On my 1Gbps nic (r8169 driver), it only sends at a rate of about 95,000 packets/second on my corei7 processor (3.1GHz). I believe it could have sent at much higher rate.
Not sure what is the bottleneck. Any ideas? Thanks!
Here is the code snippet:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <arpa/inet.h>
#include <netinet/if_ether.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <linux/if.h>
#include <linux/if_packet.h>
#include <sys/time.h>
#define PACKET_QDISC_BYPASS 20
/// The number of frames in the ring
// This number is not set in stone. Nor are block_size, block_nr or frame_size
#define CONF_RING_FRAMES 1024
#define CONF_DEVICE "eth0"
/// Offset of data from start of frame
#define PKT_OFFSET (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + \
TPACKET_ALIGN(sizeof(struct sockaddr_ll)))
/// (unimportant) macro for loud failure
#define RETURN_ERROR(lvl, msg) \
do { \
fprintf(stderr, msg); \
return lvl; \
} while(0);
static struct sockaddr_ll txring_daddr;
double getTS() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec/1000000.0;
}
/// create a linklayer destination address
// #param ringdev is a link layer device name, such as "eth0"
static int
init_ring_daddr(int fd, const char *ringdev)
{
struct ifreq ifreq;
// get device index
strcpy(ifreq.ifr_name, ringdev);
if (ioctl(fd, SIOCGIFINDEX, &ifreq)) {
perror("ioctl");
return -1;
}
txring_daddr.sll_family = AF_PACKET;
txring_daddr.sll_protocol = htons(ETH_P_IP);
txring_daddr.sll_ifindex = ifreq.ifr_ifindex;
// set the linklayer destination address
// NOTE: this should be a real address, not ff.ff....
txring_daddr.sll_halen = ETH_ALEN;
memset(&txring_daddr.sll_addr, 0xff, ETH_ALEN);
return 0;
}
/// Initialize a packet socket ring buffer
// #param ringtype is one of PACKET_RX_RING or PACKET_TX_RING
static char *
init_packetsock_ring(int fd, int ringtype)
{
struct tpacket_req tp;
char *ring;
// tell kernel to export data through mmap()ped ring
tp.tp_block_size = CONF_RING_FRAMES * getpagesize();
tp.tp_block_nr = 1;
tp.tp_frame_size = getpagesize();
tp.tp_frame_nr = CONF_RING_FRAMES;
if (setsockopt(fd, SOL_PACKET, ringtype, (void*) &tp, sizeof(tp))) {
perror("setting up ring");
RETURN_ERROR(NULL, "setsockopt() ring\n");
}
#ifdef TPACKET_V2
printf("it's TPACKET_V2\n");
val = TPACKET_V1;
setsockopt(fd, SOL_PACKET, PACKET_HDRLEN, &val, sizeof(val));
#endif
// open ring
ring = mmap(0, tp.tp_block_size * tp.tp_block_nr,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (!ring)
RETURN_ERROR(NULL, "mmap()\n");
if (init_ring_daddr(fd, CONF_DEVICE))
return NULL;
return ring;
}
/// Create a packet socket. If param ring is not NULL, the buffer is mapped
// #param ring will, if set, point to the mapped ring on return
// #return the socket fd
static int
init_packetsock(char **ring, int ringtype)
{
int fd;
// open packet socket
//fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
//fd = socket(AF_INET,SOCK_RAW,htons(ETH_P_ALL)); //ETH_P_ALL = 3
fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd < 0) {
perror("open socket");
RETURN_ERROR(-1, "Root priliveges are required\nsocket() rx. \n");
}
if (ring) {
*ring = init_packetsock_ring(fd, ringtype);
if (!*ring) {
close(fd);
return -1;
}
}
return fd;
}
static int
exit_packetsock(int fd, char *ring)
{
if (munmap(ring, CONF_RING_FRAMES * getpagesize())) {
perror("munmap");
return 1;
}
if (close(fd)) {
perror("close");
return 1;
}
return 0;
}
/// transmit a packet using packet ring
// NOTE: for high rate processing try to batch system calls,
// by writing multiple packets to the ring before calling send()
//
// #param pkt is a packet from the network layer up (e.g., IP)
// #return 0 on success, -1 on failure
static int process_tx(int fd, char *ring, const char *pkt, int pktlen)
{
static int ring_offset = 0;
struct tpacket_hdr *header;
struct pollfd pollset;
char *off;
int ret;
// fetch a frame
// like in the PACKET_RX_RING case, we define frames to be a page long,
// including their header. This explains the use of getpagesize().
header = (void *) ring + (ring_offset * getpagesize());
assert((((unsigned long) header) & (getpagesize() - 1)) == 0);
while (header->tp_status != TP_STATUS_AVAILABLE) {
// if none available: wait on more data
pollset.fd = fd;
pollset.events = POLLOUT;
pollset.revents = 0;
ret = poll(&pollset, 1, 1000 /* don't hang */);
if (ret < 0) {
if (errno != EINTR) {
perror("poll");
return -1;
}
//return 0;
}
}
// fill data
off = ((void *) header) + (TPACKET_HDRLEN - sizeof(struct sockaddr_ll));
memcpy(off, pkt, pktlen);
// fill header
header->tp_len = pktlen;
header->tp_status = TP_STATUS_SEND_REQUEST;
// increase consumer ring pointer
ring_offset = (ring_offset + 1) & (CONF_RING_FRAMES - 1);
// notify kernel
return 0;
}
/// Example application that opens a packet socket with rx_ring
int main(int argc, char **argv)
{
char *ring;
char pkt[125] = {0x00,0x0c,0x29,0xa4,0xff,0xbc,0x40,0x25,0xc2,0xd9,0xfb,0x8c,0x08,0x00,0x45,0x00,0x00,0x6f,0x24,0x1b,0x40,0x00,0x40,0x06,0x02,0x4b,0x0a,0x00,0x00,0x07,0x0a,0x00,0x00,0x1d,0xb8,0x64,0x01,0xbb,0x80,0x9e,0xaa,0x77,0x17,0x6d,0xa2,0x04,0x80,0x18,0x00,0x73,0x03,0xa0,0x00,0x00,0x01,0x01,0x08,0x0a,0x01,0x27,0x8e,0xaf,0x00,0x01,0xe8,0x71,0x16,0x03,0x01,0x00,0x36,0x01,0x00,0x00,0x32,0x03,0x02,0x55,0xf5,0x01,0xa9,0xc0,0xca,0xae,0xd6,0xd2,0x9b,0x6a,0x79,0x6d,0x9a,0xe8,0x9d,0x78,0xe2,0x64,0x98,0xf0,0xac,0xcb,0x2c,0x0d,0x51,0xa5,0xf8,0xc4,0x0f,0x93,0x87,0x00,0x00,0x04,0x00,0x35,0x00,0xff,0x01,0x00,0x00,0x05,0x00,0x0f,0x00,0x01,0x01};
int fd;
printf("page size %x\n", getpagesize());
fd = init_packetsock(&ring, PACKET_TX_RING);
if (fd < 0)
return 1;
// TODO: make correct IP packet out of pkt
int i;
double startTs = getTS();
double currentTs;
int pktCnt = 0;
int sendCnt = 0;
while (1) {
for (i=0; i<1000; i++) {
pkt[1] ++; pktCnt++;
process_tx(fd, ring, pkt, 125);
}
if (sendto(fd, NULL, 0, 0, (void *) &txring_daddr, sizeof(txring_daddr)) < 0) {
perror("sendto");
return -1;
}
sendCnt++;
usleep(300);
currentTs = getTS();
if ((currentTs - startTs) >= 1.0) {
startTs += 1.0;
printf("%7d %6d\n", pktCnt, sendCnt);
pktCnt = 0; sendCnt = 0;
}
}
if (exit_packetsock(fd, ring))
return 1;
printf("OK\n");
return 0;
}
UPDATE1
The current NIC is RealTek RTL8111/8168/8411 NIC. After upgrading the driver to the version as of 8.044, the rate goes up to 135K/second.
Ran the same program on Intel 82577LM Gigabit NIC, got about 430K/seconds rate.

howto: setup a bidirectional UDP connection for a message based communication

The problem I want to solve is to build a stable connection for exchanging data between a PC and my Raspberry Pi(RPi). They are connected via WLAN in a LAN by a router.
I created a simple way, by defining on every device a client(c) and a server(s). I give in short the pseudo-code for that:
#init:
s = createSocket
c = createSocket
s = bind to "localhost"
create thread for message handling
#message handling thread:
msg = recvfrom(s)
#main:
init(serverPort=10001, clientIP="raspberryPi", clientPort=10002)
sendto(c, "hello")
The problem with UDP via WLAN is, that some messages can get lost. So I decided to create a simple protocol for that data exchange. The idea is that the server acknowledges the reception of the data. The problem changes into that kind pseudo-code:
#init:
s = createSocket
c = createSocket
s = bind to "localhost"
create thread for message handling
#message handling thread:
msg = recvfrom(s)
sendto (c, "ack")
#main:
sendto(c, "hello")
wait for 100ms for res = recvfrom(s)
if res == timeout goto sendto
if res <> 'ack' wrong message
I am running into a problem, that the sending and receiving process using both recvfrom(s). Also the easy loop back test by using the same port for client and server can not be done.
Any ideas?
Some not working c code follows:
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <pthread.h>
#include <semaphore.h>
#include <errno.h>
// sockets
#ifdef WIN32
#ifndef WINVER
// set min win version to Win XP
#define WINVER 0x0501
#endif
//use lib: ws2_32
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <sys/un.h>
#include <unistd.h>
#include <arpa/inet.h>
#define ADDR_ANY INADDR_ANY
#define SOCKET_ERROR (-1)
#define INVALID_SOCKET (SOCKET)(~0)
#define closesocket(x) (close(x))
typedef int SOCKET;
typedef struct sockaddr_in SOCKADDR_IN;
typedef struct sockaddr SOCKADDR;
#endif
typedef int (* TfkpTCPcallback) (uint8_t * pData, size_t amount);
// size of the header
#define dStjTCPSocketControlMsg (sizeof(uint_32))
// start data msg struct
// <uint_32> id = 's'
// <uint_32> len
// res struct
// <uint_32> id = 'r'
// <uint_32> error code (0 = no error)
enum eStjTCPSocketControlMsgIDs {
eStjTCPSocketControlMsgID_start = 's',
eStjTCPSocketControlMsgID_result = 'r'
};
enum eStjTCPSocketControlMsgErrorIDs {
eStjTCPSocketControlMsgErrorID_noError = 0,
eStjTCPSocketControlMsgErrorID_otherError,
eStjTCPSocketControlMsgErrorID_socket,
eStjTCPSocketControlMsgErrorID_msgID,
eStjTCPSocketControlMsgErrorID_realloc,
eStjTCPSocketControlMsgErrorID_amount,
};
//! type to control a udp socket based message communication
typedef struct SstjTCPSocketControl {
pthread_t srvThr;
SOCKET sCli; //!< socket for the input
SOCKET sSrv; //!< socket for the output
struct sockaddr_in sAddrCli; //!< client address
int cliConnectedFlag; //!< <>0 if the client is connected
uint8_t * pMsgBuffer;
size_t msgBufferSize;
sem_t serverSign;
TfkpTCPcallback rxCB;
} TstjTCPSocketControl;
//! a global variable to control a udp message based communication
TstjTCPSocketControl gTCPsocketControl = {
.srvThr = NULL,
.sCli = -1,
.sSrv = -1,
.cliConnectedFlag = 0,
.pMsgBuffer = NULL,
.msgBufferSize = 0,
};
int recvResult(SOCKET s) {
int r;
uint32_t contrlMsg[2];
// recv that the server is ready to transmit
r = recv(s , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if(r < 0) {
return eStjTCPSocketControlMsgErrorID_socket;
}
if (r != sizeof(contrlMsg)) {
return eStjTCPSocketControlMsgErrorID_amount;
}
if (contrlMsg[0] != eStjTCPSocketControlMsgID_result) {
return eStjTCPSocketControlMsgErrorID_msgID;
}
return contrlMsg[1];
}
int sendResult(SOCKET s, uint32_t errorCode) {
uint32_t contrlMsg[2];
int r;
contrlMsg[0] = eStjTCPSocketControlMsgID_result;
contrlMsg[1] = errorCode;
r = send(s , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if (r < 0) return eStjTCPSocketControlMsgErrorID_socket;
return eStjTCPSocketControlMsgErrorID_noError;
}
//! sends a block of data
int TCPcontrolSend(uint8_t * pD, size_t dataSize) {
int r;
uint32_t contrlMsg[2];
// check if we have to connect
if (!gTCPsocketControl.cliConnectedFlag) {
if (connect(gTCPsocketControl.sCli , (struct sockaddr *)&gTCPsocketControl.sAddrCli , sizeof(gTCPsocketControl.sAddrCli)) < 0){
gTCPsocketControl.cliConnectedFlag = 0;
return -1;
} else {
gTCPsocketControl.cliConnectedFlag = 1;
}
}
// ok we are connected - lets send the data
start:
contrlMsg[0] = eStjTCPSocketControlMsgID_start;
contrlMsg[1] = dataSize;
// send that we what to transmit some data
r = send(gTCPsocketControl.sCli , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if(r < 0) {
return -2;
}
// recv that the server is ready to transmit
r = recvResult(gTCPsocketControl.sCli);
if (eStjTCPSocketControlMsgErrorID_socket == r) return -3;
if (eStjTCPSocketControlMsgErrorID_amount == r) goto start;
// ok let's send
r = send(gTCPsocketControl.sCli , pD ,dataSize , 0);
if(r < 0) {
return -2;
}
// get ack from the server
r = recvResult(gTCPsocketControl.sCli);
if (eStjTCPSocketControlMsgErrorID_socket == r) return -3;
if (eStjTCPSocketControlMsgErrorID_amount == r) goto start;
return r;
}
//! the message pump
void * TCPcontrolMsgPump (void *pParams) {
int r;
uint32_t contrlMsg[2];
struct sockaddr_in cliAddr;
SOCKET sCli;
uint32_t dataSize;
socklen_t cliAddrSize;
sem_post(&gTCPsocketControl.serverSign);
//accept connection from an incoming client
cliAddrSize = sizeof(struct sockaddr_in);
sCli = accept(gTCPsocketControl.sSrv, (struct sockaddr *)&cliAddr, (socklen_t*)&cliAddrSize);
if (sCli < 0) goto end;
// run the pump
for (;;) {
// ok we are connected
// read start message
r = recv(sCli , (char *)contrlMsg , sizeof(contrlMsg), 0);
if (r < 0) goto end;
if (r != sizeof(contrlMsg)) {
sendResult(sCli, eStjTCPSocketControlMsgErrorID_amount);
continue;
}
if (contrlMsg[0] != eStjTCPSocketControlMsgID_start) {
sendResult(sCli, eStjTCPSocketControlMsgErrorID_msgID);
continue;
}
dataSize = contrlMsg[1];
// check if we have to realloc the rx buffer
if (gTCPsocketControl.msgBufferSize < dataSize) {
uint8_t *pNB = realloc(gTCPsocketControl.pMsgBuffer, dataSize);
if (!pNB) {
sendResult(sCli, eStjTCPSocketControlMsgErrorID_realloc);
continue;
}
gTCPsocketControl.pMsgBuffer = pNB;
gTCPsocketControl.msgBufferSize = dataSize;
}
sendResult(sCli, eStjTCPSocketControlMsgErrorID_noError);
// recv data
r = recv(sCli , gTCPsocketControl.pMsgBuffer , gTCPsocketControl.msgBufferSize, 0);
if (r < 0) goto end;
if (r != dataSize) {
sendResult(sCli, eStjTCPSocketControlMsgErrorID_amount);
continue;
}
sendResult(sCli, eStjTCPSocketControlMsgErrorID_noError);
// handle message
gTCPsocketControl.rxCB(gTCPsocketControl.pMsgBuffer , gTCPsocketControl.msgBufferSize);
continue;
}
end:
sem_post(&gTCPsocketControl.serverSign);
return (void *) -1;
}
//! init
int TCPcontrolInit (
int serverPort, //!< server tx port number - best over 1000
const char * szClient, //!< "family-PC" or "192.168.1.3"
int clientPort, //!< client tx port number
TfkpTCPcallback rxCB, //!< the rx data callback
long timeOut, //!< the time out of the rx operation in ms
size_t rxBufferSize, //!< the size of the rx buffer
size_t maxTCPdataSize //!< maximum size of a TCP datagram (400 Bytes seems a good size)
) {
#ifdef WIN32
// local data
WSADATA wsaData;
// start sockets
if ((WSAStartup(MAKEWORD(2, 2), &wsaData))) {
perror("WSAStartup failed!");
return -1;
}
#endif
char * szIPserver;
char * szIPclient;
struct hostent * pHostDescr;
struct sockaddr_in sAddr;
//if (serverPort == clientPort) return -1;
// -----------------
// get ip strings
// get ip of the server
pHostDescr = gethostbyname("localhost");
// check if found a host
if (!pHostDescr) {
return -11;
}
szIPserver = inet_ntoa(*(struct in_addr*)*pHostDescr->h_addr_list);
// get ip of the client
if (strcmp(szClient, "")) {
pHostDescr = gethostbyname(szClient);
} else {
pHostDescr = gethostbyname("localhost");
}
// check if found a host
if (!pHostDescr) {
return -12;
}
szIPclient = inet_ntoa(*(struct in_addr*)*pHostDescr->h_addr_list);
// -----------------
// try to create sockets
// try to create socket for the server
gTCPsocketControl.sSrv = socket(PF_INET , SOCK_STREAM, IPPROTO_TCP);
if (-1 == gTCPsocketControl.sSrv) return -21;
// try to create socket for the client
gTCPsocketControl.sCli = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (-1 == gTCPsocketControl.sCli) return -22;
// -----------------
// bind input to IP and port
memset(&sAddr,0,sizeof(sAddr));
sAddr.sin_family = PF_INET;
sAddr.sin_addr.s_addr = INADDR_ANY;
sAddr.sin_port = htons( serverPort );
// bind server socket to address
if (bind(gTCPsocketControl.sSrv, (SOCKADDR *)&sAddr, sizeof(SOCKADDR_IN))) {
return -31;
}
// and listen for incoming connections
if (listen(gTCPsocketControl.sSrv , 3)) {
return -32;
}
// -----------------
// connect output to IP and port
memset(&gTCPsocketControl.sAddrCli,0,sizeof(sAddr));
gTCPsocketControl.sAddrCli.sin_family = PF_INET;
gTCPsocketControl.sAddrCli.sin_addr.s_addr = inet_addr(szIPclient);
gTCPsocketControl.sAddrCli.sin_port = htons( clientPort );
if (connect(gTCPsocketControl.sCli , (struct sockaddr *)&gTCPsocketControl.sAddrCli , sizeof(gTCPsocketControl.sAddrCli)) < 0){
gTCPsocketControl.cliConnectedFlag = 0;
} else {
gTCPsocketControl.cliConnectedFlag = 1;
}
// create sign semaphore
sem_init(&gTCPsocketControl.serverSign, 0, 0);
// create buffers
gTCPsocketControl.pMsgBuffer = malloc(rxBufferSize);
if (!gTCPsocketControl.pMsgBuffer) {
return -32;
}
gTCPsocketControl.msgBufferSize = rxBufferSize;
// set callback
gTCPsocketControl.rxCB = rxCB;
// start rx thread
if(pthread_create(&gTCPsocketControl.srvThr , NULL, TCPcontrolMsgPump, NULL)) {
return -40;
}
// wait till rx server is running
sem_wait(&gTCPsocketControl.serverSign);
return 0;
}
//! closes the TCP server and client
void TCPcontrolClose () {
closesocket (gTCPsocketControl.sSrv);
closesocket (gTCPsocketControl.sCli);
free(gTCPsocketControl.pMsgBuffer);
memset(&gTCPsocketControl, 0, sizeof(TstjTCPSocketControl));
#ifdef WIN32
WSACleanup();
#endif
}
// -----------------------------------------
// test
int stFlag = 0;
#define dSTsize (1024 * 1024)
uint8_t STB[dSTsize];
int rxCB (uint8_t * pData, size_t amount) {
if (!stFlag) {
pData[amount] = 0;
printf("rx: %s\n",pData);
} else {
size_t i;
for (i = 0; i < dSTsize; i++) {
if (pData[i] != (uint8_t)((size_t)i & 0xFF)) {
fprintf(stderr, "stress test error at position %i\n",(int) i);
return 0;
}
}
printf("rx: stress test successful\n");
}
fflush(stdout);
return 0;
}
int main(void) {
int srvPort;
int clientPort;
const size_t ipLen = 256;
char szIP[ipLen];
const size_t dummyStrLen = 1024;
char szDummy[dummyStrLen];
size_t i;
int r;
// pre init for the stress test
for (i = 0; i < dSTsize; i++) {
STB[i] = (uint8_t)((size_t)i & 0xFF);
}
printf("TCP demo\n");
printf("enter server port: ");
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
srvPort = atoi(szDummy);
printf("enter IP address of the other server: ");
fgets(szIP, 255, stdin);
szIP[strcspn(szIP, "\r\n")] = 0;
printf("enter client port: ");
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
clientPort = atoi(szDummy);
if (TCPcontrolInit (
srvPort, //!< server port number - best over 1000
szIP, //!< "family-PC" or "192.168.1.3"
clientPort, //!< client port number
rxCB, //!< the rx data callback
100, //!< the time out of the rx operation in ms
10,//!< the size of the rx buffer
400 //!< maximum size of a TCP datagram (400 Bytes seems a good size)
) < 0 ){
fprintf(stderr, "TCP control setup failed!");
goto errorExit;
}
printf("commands:\n s - send\n t - tx stress test\n a - activate/deactivate rx for stress test\n h - help\n e - exit\n");
for(;;) {
printf("command: ");
fgets(szDummy, dummyStrLen, stdin);
switch(tolower(szDummy[0])) {
case 's':
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
r = TCPcontrolSend((uint8_t *)szDummy, strlen(szDummy)+1);
if(r) {
fprintf(stderr,"sending data failed with code %i(%s)\n", r, strerror(errno));
}
break;
case 't':
r = TCPcontrolSend(STB, dSTsize);
if (r) {
fprintf(stderr,"stress test sending data failed with code %i\n", r);
}
break;
case 'a':
stFlag = (!stFlag) ? 1 : 0;
if (stFlag) {
printf("stress test RX now active\n");
} else {
printf("stress test RX deactivated\n");
}
break;
case 'h':
printf("commands:\n s - send\n t - tx stress test\n a - activate/deactivate rx for stress test\n h - help\n e - exit\n");
break;
case 'e':
goto stdExit;
}
}
stdExit:
TCPcontrolClose ();
return EXIT_SUCCESS;
errorExit:
TCPcontrolClose ();
return EXIT_FAILURE;
}
If you need a UDP file transfer application, try UFTP.
I wrote it primarily for multicast, but it works just as well with unicast. Give it a try, and let me know how it goes.
The TCP approach works fine. With the code below a full duplex connection with asynchronous RX TX works fine. Its tested in Linux and Windows:
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <pthread.h>
#include <semaphore.h>
#include <errno.h>
// sockets
#ifdef WIN32
#ifndef WINVER
// set min win version to Win XP
#define WINVER 0x0501
#endif
//use lib: ws2_32
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <sys/un.h>
#include <unistd.h>
#include <arpa/inet.h>
#define ADDR_ANY INADDR_ANY
#define SOCKET_ERROR (-1)
#define INVALID_SOCKET (SOCKET)(~0)
#define closesocket(x) (close(x))
typedef int SOCKET;
typedef struct sockaddr_in SOCKADDR_IN;
typedef struct sockaddr SOCKADDR;
#endif
typedef int (* TfkpTCPcallback) (uint8_t * pData, size_t amount);
// size of the header
#define dStjTCPSocketControlMsg (sizeof(uint_32))
// start data msg struct
// <uint_32> id = 's'
// <uint_32> len
// res struct
// <uint_32> id = 'r'
// <uint_32> error code (0 = no error)
enum eStjTCPSocketControlMsgIDs {
eStjTCPSocketControlMsgID_start = 's',
eStjTCPSocketControlMsgID_packet = 'p',
eStjTCPSocketControlMsgID_result = 'r'
};
enum eStjTCPSocketControlMsgErrorIDs {
eStjTCPSocketControlMsgErrorID_noError = 0,
eStjTCPSocketControlMsgErrorID_otherError,
eStjTCPSocketControlMsgErrorID_socket,
eStjTCPSocketControlMsgErrorID_msgID,
eStjTCPSocketControlMsgErrorID_realloc,
eStjTCPSocketControlMsgErrorID_amount,
eStjTCPSocketControlMsgErrorID_wrongPacket,
};
//! type to control a udp socket based message communication
typedef struct SstjTCPSocketControl {
pthread_t srvThr;
SOCKET sCli; //!< socket for the input
SOCKET sSrv; //!< socket for the output
struct sockaddr_in sAddrCli; //!< client address
int cliConnectedFlag; //!< <>0 if the client is connected
uint8_t * pMsgBuffer;
size_t msgBufferSize;
sem_t serverSign;
TfkpTCPcallback rxCB;
int maxTXsize;
} TstjTCPSocketControl;
//! a global variable to control a udp message based communication
TstjTCPSocketControl gTCPsocketControl = {
.srvThr = NULL,
.sCli = -1,
.sSrv = -1,
.cliConnectedFlag = 0,
.pMsgBuffer = NULL,
.msgBufferSize = 0,
};
static inline int _TCPcontrolRecvResult(SOCKET s) {
int r;
uint32_t contrlMsg[2];
// recv that the server is ready to transmit
r = recv(s , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if(r < 0) {
return eStjTCPSocketControlMsgErrorID_socket;
}
if (r != sizeof(contrlMsg)) {
return eStjTCPSocketControlMsgErrorID_amount;
}
if (contrlMsg[0] != eStjTCPSocketControlMsgID_result) {
return eStjTCPSocketControlMsgErrorID_msgID;
}
return contrlMsg[1];
}
static inline int _TCPcontrolSendResult(SOCKET s, uint32_t errorCode) {
uint32_t contrlMsg[2];
int r;
contrlMsg[0] = eStjTCPSocketControlMsgID_result;
contrlMsg[1] = errorCode;
r = send(s , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if (r < 0) return eStjTCPSocketControlMsgErrorID_socket;
return eStjTCPSocketControlMsgErrorID_noError;
}
//! sends a block of data
int TCPcontrolSend(uint8_t * pD, size_t dataSize) {
int r;
uint32_t contrlMsg[2];
uint32_t p;
uint32_t packets;
uint8_t * pB;
size_t am, amTotal;
// check if we have to connect
if (!gTCPsocketControl.cliConnectedFlag) {
if (connect(gTCPsocketControl.sCli , (struct sockaddr *)&gTCPsocketControl.sAddrCli , sizeof(gTCPsocketControl.sAddrCli)) < 0){
gTCPsocketControl.cliConnectedFlag = 0;
return -1;
} else {
gTCPsocketControl.cliConnectedFlag = 1;
}
}
// ok we are connected - lets send the data
start:
contrlMsg[0] = eStjTCPSocketControlMsgID_start;
contrlMsg[1] = dataSize;
// send that we what to transmit some data
r = send(gTCPsocketControl.sCli , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if(r < 0) {
return -2;
}
// recv that the server is ready to transmit
r = _TCPcontrolRecvResult(gTCPsocketControl.sCli);
if (eStjTCPSocketControlMsgErrorID_socket == r) return -3;
if (eStjTCPSocketControlMsgErrorID_amount == r) goto start;
// ok let's send
packets = dataSize / gTCPsocketControl.maxTXsize;
if (dataSize % gTCPsocketControl.maxTXsize) packets++;
pB = pD;
amTotal = dataSize;
for (p = 0; p < packets; p++) {
// send packet pre header
contrlMsg[0] = eStjTCPSocketControlMsgID_packet;
contrlMsg[1] = p;
r = send(gTCPsocketControl.sCli , (char *)contrlMsg , sizeof(contrlMsg) , 0);
if(r < 0) {
return -4;
}
r = _TCPcontrolRecvResult(gTCPsocketControl.sCli);
if (eStjTCPSocketControlMsgErrorID_socket == r) return -5;
if (eStjTCPSocketControlMsgErrorID_amount == r) goto start;
am = (amTotal > gTCPsocketControl.maxTXsize) ? gTCPsocketControl.maxTXsize : amTotal;
sendPacket:
r = send(gTCPsocketControl.sCli ,(char *) pB ,am , 0);
if(r < 0) {
return -5;
}
// get ack from the server
r = _TCPcontrolRecvResult(gTCPsocketControl.sCli);
if (eStjTCPSocketControlMsgErrorID_socket == r) return -3;
if (eStjTCPSocketControlMsgErrorID_amount == r) goto sendPacket;
pB += am;
amTotal -= am;
}
return r;
}
//! the message pump
void * TCPcontrolMsgPump (void *pParams) {
int r;
uint32_t contrlMsg[2];
struct sockaddr_in cliAddr;
SOCKET sCli;
uint32_t dataSize;
socklen_t cliAddrSize;
uint32_t packets;
uint8_t * pB;
size_t am, amTotal;
uint32_t p;
sem_post(&gTCPsocketControl.serverSign);
//accept connection from an incoming client
cliAddrSize = sizeof(struct sockaddr_in);
sCli = accept(gTCPsocketControl.sSrv, (struct sockaddr *)&cliAddr, (socklen_t*)&cliAddrSize);
if (sCli < 0) goto end;
// run the pump
for (;;) {
// ok we are connected
// read start message
r = recv(sCli , (char *)contrlMsg , sizeof(contrlMsg), 0);
if (r < 0) goto end;
if (r != sizeof(contrlMsg)) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_amount);
continue;
}
if (contrlMsg[0] != eStjTCPSocketControlMsgID_start) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_msgID);
continue;
}
dataSize = contrlMsg[1];
// check if we have to realloc the rx buffer
if (gTCPsocketControl.msgBufferSize < dataSize) {
uint8_t *pNB = realloc(gTCPsocketControl.pMsgBuffer, dataSize);
if (!pNB) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_realloc);
continue;
}
gTCPsocketControl.pMsgBuffer = pNB;
gTCPsocketControl.msgBufferSize = dataSize;
}
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_noError);
// recv data
packets = dataSize / gTCPsocketControl.maxTXsize;
if (dataSize % gTCPsocketControl.maxTXsize) packets++;
pB = gTCPsocketControl.pMsgBuffer;
amTotal = dataSize;
for (p = 0; p < packets; p++) {
// receive packet header
r = recv(sCli , (char *)contrlMsg , sizeof(contrlMsg), 0);
if (r < 0) goto end;
if (r != sizeof(contrlMsg)) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_amount);
continue;
}
if (contrlMsg[0] != eStjTCPSocketControlMsgID_packet) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_msgID);
continue;
}
if (contrlMsg[1] != p) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_wrongPacket);
continue;
}
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_noError);
am = (amTotal > gTCPsocketControl.maxTXsize) ? gTCPsocketControl.maxTXsize : amTotal;
// ok the next message will contain the data
recvPacket:
r = recv(sCli , (char *)pB , am, 0);
if (r < 0) goto end;
if (r != am) {
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_amount);
goto recvPacket;
}
_TCPcontrolSendResult(sCli, eStjTCPSocketControlMsgErrorID_noError);
pB += am;
amTotal -= am;
}
// handle message
gTCPsocketControl.rxCB(gTCPsocketControl.pMsgBuffer , dataSize);
continue;
}
end:
sem_post(&gTCPsocketControl.serverSign);
return (void *) -1;
}
//! init
int TCPcontrolInit (
int serverPort, //!< server tx port number - best over 1000
const char * szClient, //!< "family-PC" or "192.168.1.3"
int clientPort, //!< client tx port number
TfkpTCPcallback rxCB, //!< the rx data callback
size_t rxBufferSize, //!< the size of the rx buffer
size_t maxTCPdataSize //!< maximum size of a TCP datagram (400 Bytes seems a good size)
) {
#ifdef WIN32
// local data
WSADATA wsaData;
// start sockets
if ((WSAStartup(MAKEWORD(2, 2), &wsaData))) {
perror("WSAStartup failed!");
return -1;
}
#endif
char * szIPserver;
char * szIPclient;
struct hostent * pHostDescr;
struct sockaddr_in sAddr;
// -----------------
// get ip strings
// get ip of the server
pHostDescr = gethostbyname("localhost");
// check if found a host
if (!pHostDescr) {
return -11;
}
szIPserver = inet_ntoa(*(struct in_addr*)*pHostDescr->h_addr_list);
// get ip of the client
if (strcmp(szClient, "")) {
pHostDescr = gethostbyname(szClient);
} else {
pHostDescr = gethostbyname("localhost");
}
// check if found a host
if (!pHostDescr) {
return -12;
}
szIPclient = inet_ntoa(*(struct in_addr*)*pHostDescr->h_addr_list);
// -----------------
// try to create sockets
// try to create socket for the server
gTCPsocketControl.sSrv = socket(PF_INET , SOCK_STREAM, IPPROTO_TCP);
if (-1 == gTCPsocketControl.sSrv) return -21;
// try to create socket for the client
gTCPsocketControl.sCli = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (-1 == gTCPsocketControl.sCli) return -22;
// -----------------
// bind input to IP and port
memset(&sAddr,0,sizeof(sAddr));
sAddr.sin_family = PF_INET;
sAddr.sin_addr.s_addr = INADDR_ANY;
sAddr.sin_port = htons( serverPort );
// bind server socket to address
if (bind(gTCPsocketControl.sSrv, (SOCKADDR *)&sAddr, sizeof(SOCKADDR_IN))) {
return -31;
}
// and listen for incoming connections
if (listen(gTCPsocketControl.sSrv , 3)) {
return -32;
}
// -----------------
// connect output to IP and port
memset(&gTCPsocketControl.sAddrCli,0,sizeof(sAddr));
gTCPsocketControl.sAddrCli.sin_family = PF_INET;
gTCPsocketControl.sAddrCli.sin_addr.s_addr = inet_addr(szIPclient);
gTCPsocketControl.sAddrCli.sin_port = htons( clientPort );
if (connect(gTCPsocketControl.sCli , (struct sockaddr *)&gTCPsocketControl.sAddrCli , sizeof(gTCPsocketControl.sAddrCli)) < 0){
gTCPsocketControl.cliConnectedFlag = 0;
} else {
gTCPsocketControl.cliConnectedFlag = 1;
}
// create sign semaphore
sem_init(&gTCPsocketControl.serverSign, 0, 0);
// create buffers
gTCPsocketControl.pMsgBuffer = malloc(rxBufferSize);
if (!gTCPsocketControl.pMsgBuffer) {
return -32;
}
gTCPsocketControl.msgBufferSize = rxBufferSize;
// set callback
gTCPsocketControl.rxCB = rxCB;
gTCPsocketControl.maxTXsize = maxTCPdataSize;
// start rx thread
if(pthread_create(&gTCPsocketControl.srvThr , NULL, TCPcontrolMsgPump, NULL)) {
return -40;
}
// wait till rx server is running
sem_wait(&gTCPsocketControl.serverSign);
return 0;
}
//! closes the TCP server and client
void TCPcontrolClose () {
closesocket (gTCPsocketControl.sSrv);
closesocket (gTCPsocketControl.sCli);
free(gTCPsocketControl.pMsgBuffer);
memset(&gTCPsocketControl, 0, sizeof(TstjTCPSocketControl));
#ifdef WIN32
WSACleanup();
#endif
}
//! inits the TCP control via stdin inputs
int TCPcontrolInitFromStdIn (
TfkpTCPcallback rxCB, //!< the rx data callback
size_t rxBufferSize, //!< the size of the rx buffer
size_t maxTCPdataSize //!< maximum size of a TCP datagram (400 Bytes seems a good size)
) {
int srvPort;
int clientPort;
const size_t ipLen = 256;
char szIP[ipLen];
const size_t dummyStrLen = 100;
char szDummy[dummyStrLen];
int r;
printf("====| TCP client/server setup |====\n");
printf("server listen port: ");
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
srvPort = atoi(szDummy);
printf("client send IP address or name: ");
fgets(szIP, 255, stdin);
szIP[strcspn(szIP, "\r\n")] = 0;
printf("client port: ");
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
clientPort = atoi(szDummy);
r = TCPcontrolInit (
srvPort, //!< server port number - best over 1000
szIP, //!< "family-PC" or "192.168.1.3"
clientPort, //!< client port number
rxCB, //!< the rx data callback
rxBufferSize, //!< the size of the rx buffer
maxTCPdataSize //!< maximum size of a TCP datagram (400 Bytes seems a good size)
);
if (!r) {
printf("setup finished successfully!\n");
printf("===================================\n");
} else {
printf("setup error: %i \n", r);
printf("===================================\n");
}
return r;
}
// -----------------------------------------
// test
enum eStates {
eState_std = 0,
eState_stressTest = 1,
eState_multiTX = 2
};
int stateID = eState_std;
#define dSTsize (1024 * 1024)
uint8_t STB[dSTsize];
int rxCB (uint8_t * pData, size_t amount) {
size_t i;
switch (stateID) {
case eState_std:
pData[amount] = 0;
printf("rx: %s\n",pData);
break;
case eState_stressTest:
for (i = 0; i < dSTsize; i++) {
if (pData[i] != (uint8_t)((size_t)i & 0xFF)) {
fprintf(stderr, "stress test error at position %i\n",(int) i);
fflush(stdout);
return 0;
}
}
printf("rx: stress test successful\n");
break;
case eState_multiTX:
printf("rx %iBytes\n", (int)amount);
break;
}
fflush(stdout);
return 0;
}
int main(void) {
const size_t dummyStrLen = 1024;
char szDummy[dummyStrLen];
size_t i;
int r, am, j;
// pre init for the stress test
for (i = 0; i < dSTsize; i++) {
STB[i] = (uint8_t)((size_t)i & 0xFF);
}
printf("TCP demo\n");
if (TCPcontrolInitFromStdIn(rxCB, 4096, 500)) goto errorExit;
printf("commands:\n s - send\n t - tx stress test\n a - activate/deactivate rx for stress test\n m - multi tx test\n h - help\n e - exit\n");
for(;;) {
printf("command: ");
fgets(szDummy, dummyStrLen, stdin);
switch(tolower(szDummy[0])) {
case 's':
stateID = eState_std;
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
r = TCPcontrolSend((uint8_t *)szDummy, strlen(szDummy)+1);
if(r) {
fprintf(stderr,"sending data failed with code %i(%s)\n", r, strerror(errno));
} else {
printf("succeeded\n");
}
break;
case 't':
printf("sending packets...\n");
r = TCPcontrolSend(STB, dSTsize);
if (r) {
fprintf(stderr,"stress test sending data failed with code %i\n", r);
} else {
printf("succeeded\n");
}
break;
case 'a':
stateID = eState_stressTest;
printf("stress test RX now active\n");
break;
case 'm':
stateID = eState_multiTX;
printf("amount of transmissions: ");
fgets(szDummy, dummyStrLen, stdin);
szDummy[strcspn(szDummy, "\r\n")] = 0;
am = atoi(szDummy);
for (j = 0; j < am; j++) {
printf("tm %i...", j);
sprintf(szDummy,"tm %i",j);
r = TCPcontrolSend((uint8_t *)szDummy, strlen(szDummy)+1);
if (!r) printf("successful\n");
else printf("failed\n");
}
break;
case 'h':
printf("commands:\n s - send\n t - tx stress test\n a - activate/deactivate rx for stress test\n m - multi tx test\n h - help\n e - exit\n");
break;
case 'e':
goto stdExit;
}
}
stdExit:
TCPcontrolClose ();
return EXIT_SUCCESS;
errorExit:
TCPcontrolClose ();
return EXIT_FAILURE;
}
a note to the stress test and the initial connection. Under weak WLAN connections it could took some time.

inotify_add_watch fails on /sys/class/net/eth0/operstate

I have used inotify in Linux, to get the event raised when ever the network interface link changes. whenever interface link changes, /sys/class/net/eth40/operstate/ file gets modified. But in the below code snippet even though the file is getting modified, read function is still in blocked state.
#include <stdio.h>
#include <sys/inotify.h>
#include <stdlib.h>
#include <limits.h>
#include <signal.h>
#define FILE_TO_WATCH "/sys/class/net/eth40/operstate"
#define EVENT_SIZE (sizeof (struct inotify_event))
#define EVENT_BUFFER_LENGTH (1024 * EVENT_SIZE + NAME_MAX + 1)
void print_event(struct inotify_event *event) {
int ret = 0;
if (event->mask & IN_CREATE)
printf("file created in directory\n");
if (event->mask & IN_DELETE)
printf("file deleted in directory\n");
if (event->mask & IN_ACCESS)
printf("file accessed\n");
if (event->mask & IN_CLOSE)
printf("file closed after reading or writing \n");
if (event->mask & IN_OPEN)
printf("file opened\n");
if (event->len)
printf("name: %s\n", event->name);
}
int main(int argc, char** argv)
{
int notify_fd;
int watch_fd;
long input_len;
char *ptr;
char buffer[EVENT_BUFFER_LENGTH];
struct inotify_event *event;
notify_fd = inotify_init();
if (notify_fd < 0) {
perror("cannot init inotify");
exit(EXIT_FAILURE);
}
printf("done1\n");
watch_fd = inotify_add_watch(notify_fd,FILE_TO_WATCH,IN_ACCESS|IN_MODIFY);
if (watch_fd < 0) {
perror("cannot add file");
exit(EXIT_FAILURE);
}
printf("done2\n");
while (1) {
input_len = read(notify_fd, buffer, EVENT_BUFFER_LENGTH);
if (input_len <= 0) {
perror("error reading from inotify fd");
exit(EXIT_FAILURE);
}
printf("done3\n");
ptr = buffer;
while (ptr < buffer + input_len) {
event = (struct inotify_event *) ptr;
print_event(event);
ptr += sizeof (struct inotify_event) +event->len;
}
}
}
am'I missing something?
/sys is not a regular file system, but a special in-memory file system called sysfs
To quote a kernel developer:
inotify does not and will not work on sysfs. Or procfs. Or devpts.
Or any number of network filesystems. No matter how hard somebody
might wish it to work, that's simply not feasible.
For network link events you can use rtnetlink, though stuff like this is hardly documented, here's an starting point example that will show you link (and a few other) events, you'll have to figure out which events/flags and similar that is relevant for your particular case.
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <asm/types.h>
#include <asm/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/if.h>
#include <linux/rtnetlink.h>
#define ENTRY(x) {x, #x}
struct {
unsigned flag;
const char *name;
} ifi_flag_map[] = {
ENTRY(IFF_UP),
ENTRY(IFF_BROADCAST),
ENTRY(IFF_DEBUG),
ENTRY(IFF_LOOPBACK),
ENTRY(IFF_POINTOPOINT),
ENTRY(IFF_NOTRAILERS),
ENTRY(IFF_RUNNING),
ENTRY(IFF_NOARP),
ENTRY(IFF_PROMISC),
ENTRY(IFF_ALLMULTI),
ENTRY(IFF_MASTER),
ENTRY(IFF_SLAVE),
ENTRY(IFF_MULTICAST),
ENTRY(IFF_PORTSEL),
ENTRY(IFF_AUTOMEDIA),
ENTRY(IFF_DYNAMIC),
ENTRY(IFF_LOWER_UP),
ENTRY(IFF_DORMANT),
ENTRY(IFF_ECHO),
};
struct {
unsigned type;
const char *name;
} nlmrt_type_map[] = {
ENTRY(RTM_NEWLINK ),
ENTRY(RTM_DELLINK),
ENTRY(RTM_GETLINK),
ENTRY(RTM_SETLINK),
ENTRY(RTM_NEWADDR ),
ENTRY(RTM_DELADDR),
ENTRY(RTM_GETADDR),
ENTRY(RTM_NEWROUTE ),
ENTRY(RTM_DELROUTE),
ENTRY(RTM_GETROUTE),
ENTRY(RTM_NEWNEIGH ),
ENTRY(RTM_DELNEIGH),
ENTRY(RTM_GETNEIGH),
ENTRY(RTM_NEWRULE ),
ENTRY(RTM_DELRULE),
ENTRY(RTM_GETRULE),
ENTRY(RTM_NEWQDISC ),
ENTRY(RTM_DELQDISC),
ENTRY(RTM_GETQDISC),
ENTRY(RTM_NEWTCLASS ),
ENTRY(RTM_DELTCLASS),
ENTRY(RTM_GETTCLASS),
ENTRY(RTM_NEWTFILTER ),
ENTRY(RTM_DELTFILTER),
ENTRY(RTM_NEWACTION ),
ENTRY(RTM_DELACTION),
ENTRY(RTM_GETACTION),
ENTRY(RTM_NEWPREFIX ),
ENTRY(RTM_GETMULTICAST ),
ENTRY(RTM_GETANYCAST ),
ENTRY(RTM_NEWNEIGHTBL ),
ENTRY(RTM_GETNEIGHTBL ),
ENTRY(RTM_SETNEIGHTBL),
ENTRY(RTM_NEWNDUSEROPT ),
ENTRY(RTM_NEWADDRLABEL ),
ENTRY(RTM_DELADDRLABEL),
ENTRY(RTM_GETADDRLABEL),
ENTRY(RTM_GETDCB ),
ENTRY(RTM_SETDCB),
ENTRY(RTM_NEWNETCONF ),
ENTRY(RTM_GETNETCONF ),
ENTRY(RTM_NEWMDB ),
ENTRY(RTM_DELMDB ),
ENTRY(RTM_GETMDB ),
};
void print_type(unsigned type)
{
size_t i;
for (i = 0; i < sizeof nlmrt_type_map/sizeof nlmrt_type_map[0]; i++) {
if (type == nlmrt_type_map[i].type) {
printf("\t\tMsg Type: %s\n", nlmrt_type_map[i].name);
return;
}
}
printf("\t\tMsg Type: unknown(%d)\n", type);
}
void print_flags(unsigned flags, unsigned change)
{
size_t i;
printf("\t\tflags: ");
for (i = 0; i < sizeof ifi_flag_map/sizeof ifi_flag_map[0]; i++) {
if (flags & ifi_flag_map[i].flag) {
if (change & ifi_flag_map[i].flag) {
printf("%s(C) ", ifi_flag_map[i].name);
} else {
printf("%s ", ifi_flag_map[i].name);
}
}
}
puts("");
}
oid read_msg(int fd)
{
int len;
char buf[4096];
struct iovec iov = { buf, sizeof(buf) };
struct sockaddr_nl sa;
struct msghdr msg = { (void *)&sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
struct nlmsghdr *nh;
len = recvmsg(fd, &msg, 0);
if(len == -1) {
perror("recvmsg");
return;
}
for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len);
nh = NLMSG_NEXT (nh, len)) {
struct ifinfomsg *ifimsg;
/* The end of multipart message. */
printf("netlink message: len = %u, type = %u, flags = 0x%X, seq = %u, pid = %u\n",
nh->nlmsg_len,
nh->nlmsg_type,
nh->nlmsg_flags,
nh->nlmsg_seq,
nh->nlmsg_pid);
if (nh->nlmsg_type == NLMSG_DONE)
return;
if (nh->nlmsg_type == NLMSG_ERROR) {
continue;
}
ifimsg = NLMSG_DATA(nh);
printf("\tifi_family = %u, ifi_type = %u, ifi_index = %u, ifi_flags = 0x%X, ifi_change = 0x%X\n",
ifimsg->ifi_family ,
ifimsg->ifi_type ,
ifimsg->ifi_index ,
ifimsg->ifi_flags ,
ifimsg->ifi_change);
print_type(nh->nlmsg_type);
print_flags(ifimsg->ifi_flags, ifimsg->ifi_change);
}
}
int main(int argc, char *argv[])
{
struct sockaddr_nl sa;
int fd;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if(fd == -1) {
perror("socket");
return 1;
}
if(bind(fd, (struct sockaddr *) &sa, sizeof(sa)) == -1) {
perror("bind");
return 1;
}
for(;;) {
read_msg(fd);
}
return 0;
}

Resources