Linux recvmsg() not getting a software timestamp from socket - c

I'm receiving packets via recvmsg() and expecting 3 timestamps:
Software
Hardware (NIC) converted to software
Hardware (NIC)
I see the first and third timestamps but I get zeros for the second.
I'm basing my code on this Onload example:
https://github.com/Xilinx-CNS/onload/blob/master/src/tests/onload/hwtimestamping/rx_timestamping.c
I export the environment variable for timestamping:
export EF_RX_TIMESTAMPING=1
I preload the onload library.
I set the socket options:
int enable = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE |
SOF_TIMESTAMPING_SYS_HARDWARE | SOF_TIMESTAMPING_SOFTWARE;
assert(setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &enable, sizeof(int)) == 0);
I create the message header and use recvmsg() to retrieve packets:
while(_running)
{
struct iovec iov;
char control[1024];
struct msghdr msg;
iov.iov_base = buffer;
iov.iov_len = 2048;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_namelen = sizeof(struct sockaddr_in);
msg.msg_control = control;
msg.msg_controllen = 1024;
int num bytes = recvmsg(sock, &msg, 0);
handle_time(&msg);
LOG("Got packet");
}
and I read the timestamps:
static void handle_time(struct msghdr* msg)
{
struct timespec* ts = NULL;
struct cmsghdr* cmsg;
for( cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg,cmsg) ) {
if( cmsg->cmsg_level != SOL_SOCKET )
continue;
switch( cmsg->cmsg_type ) {
case SO_TIMESTAMPNS:
ts = (struct timespec*) CMSG_DATA(cmsg);
break;
case SO_TIMESTAMPING:
ts = (struct timespec*) CMSG_DATA(cmsg);
break;
default:
/* Ignore other cmsg options */
break;
}
}
print_time(ts);
}
static void print_time(struct timespec* ts)
{
if( ts != NULL ) {
/* Hardware timestamping provides three timestamps -
* system (software)
* transformed (hw converted to sw)
* raw (hardware)
* in that order - though depending on socket option, you may have 0 in
* some of them.
*/
printf("timestamps " TIME_FMT TIME_FMT TIME_FMT "\n",
(uint64_t)ts[0].tv_sec, (uint64_t)ts[0].tv_nsec,
(uint64_t)ts[1].tv_sec, (uint64_t)ts[1].tv_nsec,
(uint64_t)ts[2].tv_sec, (uint64_t)ts[2].tv_nsec );
} else
{
printf( "no timestamp\n" );
}
}
Is there anything missing to receive the second timestamp?

It's probably the network interface driver that needs to add the timestamp from the hardware. What kind of hardware are we talking about? Can you show some ethanol output?

Related

AES-GCM with the Kernel Crypto API from Userspace

I am trying to use the kernel crypto API to communicate with a hardware accelerator from userspace.
I use af_alg for the communication with the kernel. I can use the API for hashing or "normal" symmetric encryption but I can not get it to work with an aead algorithm. The drivers are registered under /proc/crypto.
I follow the kernel documentation for the userspace interface to prepare the message but I get the same error Invalig Argument every time.
#ifndef AF_ALG
#define AF_ALG 38
#endif
#ifndef SOL_ALG
#define SOL_ALG 279
#endif
int main(void)
{
int openfd;
int tfmfd;
char key [16] = "SecretKeyforAES!";
char initVector[12] = "123456789012";
//initiaze vector, in/out vector and buffer
struct af_alg_iv *iv;
struct iovec iov;
char buf[64];
//size of the aad and the auth tag
uint32_t aadSize = 8;
uint32_t taglen = 12;
//define algorithm used for cryptography
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "aead",
.salg_name = "gcm(aes)"
};
//create and bind socket
tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
printf("open socket\n");
int bindvalue = bind(tfmfd, (struct sockaddr *) &sa, sizeof(sa));
if(bindvalue != 0){
perror("send != 0");
}
assert(bindvalue == 0);
printf("socket bound\n");
//set socket options: key, AEAD Authentication size
setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, key, 16);
int setTag = setsockopt(tfmfd, SOL_ALG, ALG_SET_AEAD_AUTHSIZE, NULL, taglen);
assert(setTag >= 0);
printf("set socket options\n");
//accept connection
openfd = accept(tfmfd, NULL, 0);
//check errors with the connections
assert(tfmfd > 0);
assert(openfd > 0);
//Prepare Message
struct msghdr msg = { 0 };
struct cmsghdr *cmsg = NULL;
char cbuf[128] = {0};
msg.msg_control = cbuf;
msg.msg_controllen = sizeof(cbuf);
msg.msg_iovlen = 0;
msg.msg_iov = NULL;
//set the Headervalues for the Operation
cmsg = CMSG_FIRSTHDR(&msg);
assert(cmsg != NULL);
cmsg->cmsg_level = SOL_ALG;
cmsg->cmsg_type = ALG_SET_OP;
cmsg->cmsg_len = CMSG_LEN(CMSG_SPACE(sizeof(uint32_t)));
*(__u32 *) CMSG_DATA(cmsg) = ALG_OP_ENCRYPT;
//set headervalues for IV
cmsg = CMSG_NXTHDR(&msg, cmsg);
assert(cmsg != NULL);
cmsg->cmsg_level = SOL_ALG;
cmsg->cmsg_type = ALG_SET_IV;
cmsg->cmsg_len = CMSG_LEN(CMSG_SPACE(sizeof(initVector))); //iv_msg_size
//set value for iv
iv = (void *)CMSG_DATA(cmsg);
iv->ivlen = 12;
memcpy(iv->iv, initVector, 12);
//set headervalues for aad
uint32_t *assoclen = NULL;
cmsg = CMSG_NXTHDR(&msg, cmsg);
assert(cmsg != NULL);
cmsg->cmsg_level = SOL_ALG;
cmsg->cmsg_type = ALG_SET_AEAD_ASSOCLEN;
cmsg->cmsg_len = CMSG_LEN(sizeof(*assoclen));
assoclen = (void *) CMSG_DATA(cmsg);
*assoclen = (uint32_t)aadSize;
printf("Header values set\n");
//send initial message
int send = sendmsg(openfd, &msg, MSG_MORE);
if(send < 0){
perror("send < 0"); //This is where the error occures
}
assert(send > 0); //This assertion fails following the error above
printf("Send initial Message...\n");
Following the kernel doc I prepared the initial message with the algorithm, iv and length of aad information.
I set the taglen and mode with setsockopt as well. I read the man pages of msghdr, cmsg and sendmsg but did not find my error.
EDIT:
Added a check for binding failure but check exits with 0 -> no error
You are not quite correct in counting cmsg_len.
For the first value that's correct though CMSG_LEN(CMSG_SPACE(sizeof(uint32_t))) reflects ALG_SET_OP.
But not CMSG_LEN(CMSG_SPACE(sizeof(initVector))); //iv_msg_size where the data is iv = (void *)CMSG_DATA(cmsg); -> struct af_alg_iv *iv;.
Thus it should beCMSG_LEN(sizeof(struct af_alg_iv) + sizeof(initVector)).
Also, char cbuf[128] = {0}; must consist the total size of ancillary data, what is char cbuf[CMSG_SPACE(sizeof(uint32_t)) + CMSG_SPACE(sizeof(struct af_alg_iv) + sizeof(initVector))] = {0}

Can socket in SCM_RIGHTS protocol type send data even file description is -1

Now I use SCM_RIGHTS protocol to send fd (file description). But sometimes my fd will get -1 for some reason. And I want tell receiver that sender got wronge even at this time.
But when I use "sendmsg" will got: Failed to send message: Bad file descriptor
My sender code below:
int32_t SendFrame(const char *sockPath, char *sendFrameInfo, uint32_t sendFrameInfoLen,
int32_t inFd) {
int SockFd = socket(AF_UNIX, SOCK_DGRAM, 0);
struct sockaddr_un peer;
/* Create name. */
peer.sun_family = AF_UNIX;
strcpy(peer.sun_path, sockPath);
int ret = connect(SockFd, (struct sockaddr *)&peer, sizeof(struct sockaddr_un));
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(sizeof(int))];
memset(buf, '\0', sizeof(buf));
struct iovec io = {.iov_base = (void *)sendFrameInfo, .iov_len = sendFrameInfoLen};
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
memcpy((int *)CMSG_DATA(cmsg), &inFd, sizeof(int));
if (sendmsg(SockFd, &msg, 0) < 0) {
printf("SendFrame(%s) Failed to send message: %s", sockPath, strerror(errno));
return -1;
}
return 0;
};
My receiver code below:
int32_t recvFrame(char *buf, int32_t fd, uint32_t timeout_ms, uint32_t bufsize) {
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char msg_control[CMSG_SPACE(sizeof(int))];
memset(msg_control, '\0', sizeof(msg_control));
struct iovec io = { .iov_base = &buf, .iov_len = bufsize };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = msg_control;
msg.msg_controllen = sizeof(msg_control);
struct timeval timeout;
timeout.tv_sec = timeout_ms / 1000;
timeout.tv_usec = (timeout_ms % 1000) * 1000;
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout));
ssize_t len = recvmsg(fd, &msg, 0);
if (len < 0) {
perror("Failed to receive message");
return -errno;
}
cmsg = CMSG_FIRSTHDR(&msg);
int new fd = *((int32_t *)CMSG_DATA(cmsg));
return 0;
}
Now I can use So how can I use "msg_iov" to send other message. So I want receive message event sender's fd is -1 then I can check "msg_iov" what is happened.
Thanks!
BR/Tim
No, you can only pass valid file descriptors, and -1 is never a valid file descriptor (because it is returned when an error occurs).
What I would recommend, is to include a special byte (char) in the message, describing the error – for example, as the very first char in the message. It is zero if a file descriptor is passed, and nonzero if an error occurs (the value describing the error). This way, the payload data is never of zero length, which makes it much easier to detect socket close before sending any data. (Zero-length reads/receives normally indicate end-of-input, you see.)
Because errno values do not necessarily match across machines, you should translate the special byte values to errno codes and vice versa.
Alternatively, if the message has no other data, you could send the error message as a string, including the '\0' end-of-string mark at end; with just a '\0' as the data payload when a file descriptor is passed.

Error from recvfrom

I try to make a function that sends a icmp packet to another computer and when the other computer sends something back, the function returns a 1, else a 0. But the recvfrom function returns an error: "errno: Resource temporarily unavailable". I work on an Mac OS X, so I don't include headers from the linux kernel. Can anyone help me, cause I'm stuck.
#include "info.h"
char *getip()
{
char buffer[256];
struct hostent *h;
gethostname(buffer, 256);
h = gethostbyname(buffer);
return inet_ntoa(*(struct in_addr *)h->h_addr);
}
int host_alive(char *dst_addr, char *src_addr)
{
struct ip *ippacket;
struct ip *ip_reply;
struct icmp *icmppacket;
struct sockaddr_in connection;
struct timeval tv;
char *packet;
char *buffer;
int optval;
int addrlen;
int size;
int sock = 0;
packet = malloc(sizeof(struct ip) + sizeof(struct icmp));
buffer = malloc(sizeof(struct ip) + sizeof(struct icmp));
check(getuid() == 0, "Root priviliges are needed. Try: sudo ./bin/main");
ippacket = (struct ip *) packet;
icmppacket = (struct icmp *) (packet + sizeof(struct ip));
ippacket->ip_hl = 5;
ippacket->ip_v = 4;
ippacket->ip_tos = 0;
ippacket->ip_len = sizeof(struct ip) + sizeof(struct icmp);
ippacket->ip_id = htons(random());
ippacket->ip_ttl = 255;
ippacket->ip_p = IPPROTO_ICMP;
inet_aton(src_addr, &ippacket->ip_src);
inet_aton(dst_addr, &ippacket->ip_dst);
tv.tv_sec = 5;
tv.tv_usec = 0;
check((sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1,\
"Failed to create socket");
check(setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &optval, sizeof(int)) != -1,\
"Failed to set the option to the socket.");
check(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv, sizeof(struct timeval)) != -1,\
"Failed to set the option to the socket.");
icmppacket->icmp_type = ICMP_ECHO;
icmppacket->icmp_code = 0;
icmppacket->icmp_id = 0;
icmppacket->icmp_seq = 0;
icmppacket->icmp_cksum = in_cksum((unsigned short *)icmppacket, sizeof(struct icmp));
ippacket->ip_sum = in_cksum((unsigned short *)ippacket, sizeof(struct ip));
connection.sin_family = AF_INET;
connection.sin_addr.s_addr = inet_addr(dst_addr);
sendto(sock, packet, ippacket->ip_len, 0, (struct sockaddr *)&connection,\
sizeof(struct sockaddr));
addrlen = sizeof(connection);
check((size = recvfrom(sock, buffer, sizeof(struct ip) + sizeof(struct icmp), 0,\
(struct sockaddr *)&connection, (socklen_t *)&addrlen)) != -1,\
"Failed to receive a message.");
printf("Received %d byte reply from %s:\n", size , dst_addr);
ip_reply = (struct ip*) buffer;
printf("ID: %d\n", ntohs(ip_reply->ip_id));
printf("TTL: %d\n", ip_reply->ip_ttl);
close(sock);
free(packet);
free(buffer);
return 1;
error:
if (sock)
close(sock);
free(packet);
free(buffer);
return 0;
}
unsigned short in_cksum(unsigned short *addr, int len)
{
int sum = 0;
u_short answer = 0;
u_short *w = addr;
int nleft = len;
while (nleft > 1) {
sum += *w++;
nleft -= 2;
}
if (nleft == 1) {
*(u_char *) (&answer) = *(u_char *) w;
sum += answer;
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
answer = ~sum;
return (answer);
}
According to the documentation for recvfrom this is expected if you're using a non-blocking call:
If no messages are available at the socket, the receive call waits for a message to arrive, unless the socket is nonblocking (see fcntl(2)) in which case the value -1 is returned and the external vari-
able errno set to EAGAIN. The receive calls normally return any data available, up to the requested amount, rather than waiting for receipt of the full amount requested; this behavior is affected by the
socket-level options SO_RCVLOWAT and SO_RCVTIMEO described in getsockopt(2).
You can look up errno values in /usr/include/sys/errno.h if you're curious as to what values map to what errors.
If you want this function to block you may need to set the MSG_WAITALL flag which "requests that the operation block until the full request is satisfied".
Normally in low-level UNIX socket code you'd do a select on that socket to wait for a read signal, then call recvfrom to receive the data if and only if that signal triggered. You can also do a non-blocking receive and on an EAGAIN just wait a short period of time before trying again, though that is less efficient.

Linux, UDP datagrams, and kernel timestamps: Lots of examples and stackoversflow entries later, and still cannot get timestamps at all

I have been trying and failing to get Linux (kernel 4.1.4) to give me timestamps for when UDP datagrams are sent and received. I have read the original kernel docs (https://www.kernel.org/doc/Documentation/networking/timestamping.txt), along with lots of examples and a number of stackoverflow entries. I can send datagrams between sender and receiver with no problems. But I cannot get timestamps for sending or receiving datagrams, and I can't figure out what I'm doing wrong.
One bizarre thing is that when I use the MSG_ERRQUEUE channel for getting timestamp info on a sent datagram, I do get the original outgoing packet, and I do get the first ancillary message (SOL_IP, IP_RECVERR), but I do not get a second message (which should be level SOL_SOCKET, type SCM_TIMESTAMPING).
In another stackoverflow entry on getting timestamps for sent packets (Timestamp outgoing packets), someone mentioned that some drivers might not implement the call to skb_tx_timestamp, but I checked mine (Realtek), and that call is definitely in there.
Here's how I set up the UDP receiver (error handling code not shown):
inf->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
timestampOn = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE;
r = setsockopt(inf->fd, SOL_SOCKET, SO_TIMESTAMPING, &timestampOn, sizeof(timestampOn));
r = setsockopt(inf->fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
memset(&(inf->local), 0, sizeof(struct sockaddr_in));
inf->local.sin_family = AF_INET;
inf->local.sin_port = htons(port);
inf->local.sin_addr.s_addr = htonl(INADDR_ANY);
r = bind(inf->fd, (struct sockaddr *)&(inf->local), sizeof(struct sockaddr_in));
Using SO_REUSEPORT or not doesn't seem to matter.
For receiving, my understanding is that we don't use MSG_ERRQUEUE. That's only if we want timestamps for sent messages. Besides, when I use MSG_ERRQUEUE with recvmsg, I get "resource temporarily unavailable." Here's how I receive datagrams:
int recv_len;
struct msghdr msg;
struct iovec iov;
memset(&msg, 0, sizeof(msg));
memset(&iov, 0, sizeof(iov));
// Space for control message info plus timestamp
char ctrl[2048];
memset(ctrl, 0, sizeof(ctrl));
//struct cmsghdr *cmsg = (struct cmsghdr *) &ctrl;
// Ancillary data buffer and length
msg.msg_control = (char *) ctrl;
msg.msg_controllen = sizeof(ctrl);
// Dest address info
msg.msg_name = (struct sockaddr *) &(inf->remote);
msg.msg_namelen = sizeof(struct sockaddr_in);
// Array of data buffers (scatter/gather)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
// Data buffer pointer and length
iov.iov_base = buf;
iov.iov_len = len;
recv_len = recvmsg(inf->fd, &msg, 0);
And then I pass a pointer to msg to another function (handle_time) that does this:
struct timespec* ts = NULL;
struct cmsghdr* cmsg;
struct sock_extended_err *ext;
for( cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg,cmsg) ) {
printf("level=%d, type=%d, len=%zu\n", cmsg->cmsg_level, cmsg->cmsg_type, cmsg->cmsg_len);
}
Zero messages are received. So that's the first problem. My setup code above matches like half a dozen other examples I've found on the web, but I'm getting no ancillary data from this.
Next, let's turn to sending datagrams. Here's the setup:
inf->port = port;
inf->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
memset(&(inf->remote), 0, sizeof(struct sockaddr_in));
inf->remote.sin_family = AF_INET;
inf->remote.sin_port = htons(port);
timestampOn = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE;
r = setsockopt(inf->fd, SOL_SOCKET, SO_TIMESTAMPING, &timestampOn, sizeof(timestampOn));
on = 1;
r = setsockopt(inf->fd, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on));
r = inet_aton(address, &(inf->remote.sin_addr));
And this is how I send a datagram:
int send_len, r, i;
struct msghdr msg;
struct iovec iov;
memset(&msg, 0, sizeof(msg));
memset(&iov, 0, sizeof(iov));
// Space for control message info plus timestamp
char ctrl[2048];
memset(ctrl, 0, sizeof(ctrl));
//struct cmsghdr *cmsg = (struct cmsghdr *) &ctrl;
// Ancillary data buffer and length
//msg.msg_control = (char *) ctrl;
//msg.msg_controllen = sizeof(ctrl);
// Dest address info
msg.msg_name = (struct sockaddr *) &(inf->remote);
msg.msg_namelen = sizeof(struct sockaddr_in);
// Array of data buffers (scatter/gather)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
// Data buffer pointer and length
iov.iov_base = buf;
iov.iov_len = len;
send_len = sendmsg(inf->fd, &msg, 0);
Examples I've seen reuse the msg and iov data structures, but in my experimentation, I added code to make sure things were cleared, just in case the send left anything behind, although it didn't make any difference. Here's the code for getting the timestamp:
memset(&msg, 0, sizeof(msg));
memset(&iov, 0, sizeof(iov));
memset(ctrl, 0, sizeof(ctrl));
msg.msg_control = (char *) ctrl;
msg.msg_controllen = sizeof(ctrl);
msg.msg_name = (struct sockaddr *) &(inf->remote);
msg.msg_namelen = sizeof(struct sockaddr_in);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
iov.iov_base = junk_buf;
iov.iov_len = sizeof(junk_buf);
for (;;) {
r = recvmsg(inf->fd, &msg, MSG_ERRQUEUE);
if (r<0) {
fprintf(stderr, "Didn't get kernel time\n");
return send_len;
}
printf("recvmsg returned %d\n", r);
handle_time(&msg);
}
The data buffer contains the original datagram as expected. The ancillary data I get back includes a single message, which handle_time prints as:
level=0, type=11, len=48
This is level SOL_IP and type IP_RECVERR, which is expected according to the docs. Looking into the payload (a struct sock_extended_err), the errno is 42 (ENOMSG, No message of desired type) and origin is 4 (SO_EE_ORIGIN_TXSTATUS). From the docs, this is supposed to happen and demonstrates that in fact I did manage to inform the kernel that I want TX status messages. But there is no second ancillary message!
I have tried to see if there is any kernel compile option that might disable this, but I haven't found any. So I'm just completely baffled here. Can anyone help me figure out what I'm doing wrong?
Thanks!
UPDATE: I tried running this same code on another Linux machine, this time CentOS 7 (kernel 3.10.0-693.2.2.el7.x86_64). I can't figure out what what kind of NIC that machine has, but when I try to send datagrams, I get some other weird behavior. For the very first datagram, when I start this program, I get back the message and a single ancillary message, just as above. For every subsequent sendmsg call, errno tells me that I get an "Invalid argument" error. This error goes away if I don't enable timestamps on the socket.
UPDATE 2: I discovered that I had not been making an ioctl necessary to enable timestamps in the driver. Unfortunately, when I do this call, I get ENODEV from errno (no such device). Here's how I'm trying to do it (which I'm imitating from https://github.com/majek/openonload/blob/master/src/tests/onload/hwtimestamping/tx_timestamping.c):
struct ifreq ifr;
struct hwtstamp_config hwc;
inf->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
memset(&ifr, 0, sizeof(ifr));
hwc.flags = 0;
hwc.tx_type = HWTSTAMP_TX_ON;
hwc.rx_filter = HWTSTAMP_FILTER_ALL;
ifr.ifr_data = (char*)&hwc;
r = ioctl(inf->fd, SIOCSHWTSTAMP, &ifr);
That being said, I'd be relatively happy with software timestamps, which should not need this call. So I'm not sure this is helpful anyhow.
UPDATE 3: A compilable example was requested. The whole program is pretty minimal, so I put it into pastebin here: https://pastebin.com/qd0gspRc
Also, here's the output from ethtool:
Time stamping parameters for eth0:
Capabilities:
software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
software-system-clock (SOF_TIMESTAMPING_SOFTWARE)
PTP Hardware Clock: none
Hardware Transmit Timestamp Modes: none
Hardware Receive Filter Modes: none
Since this obviously doesn't support hardware timestamps, the ioctl is moot. I tried changing the SO_TIMESTAMPING setting to SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_RX_SOFTWARE for sender and receiver. That didn't help.
Then I tried adding SOF_TIMESTAMPING_SOFTWARE to both. I finally started getting something:
level=1, type=37, len=64
Level 1 is SOL_SOCKET, and type 37 is SCM_TIMESTAMPING. I'll go back to the docs and figure out how to interpret this. It says something about passing an array of three time structures. The driver's call to skb_tx_timestamp should have been sufficient so that it wouldn't require that I enable "fake" software timestamps to get something out.
Like I say in comment the use of SOF_TIMESTAMPING_SOFTWARE and SOF_TIMESTAMPING_RAW_HARDWARE is necessary because if I understand correctly the documentation, some bits are to generate the timestamp and some bits are here to report them in control message:
1.3.1 Timestamp Generation
Some bits are requests to the stack to try to generate timestamps. Any
combination of them is valid. Changes to these bits apply to newly
created packets, not to packets already in the stack. As a result, it
is possible to selectively request timestamps for a subset of packets
(e.g., for sampling) by embedding an send() call within two setsockopt
calls, one to enable timestamp generation and one to disable it.
Timestamps may also be generated for reasons other than being
requested by a particular socket, such as when receive timestamping is
enabled system wide, as explained earlier.
1.3.2 Timestamp Reporting
The other three bits control which timestamps will be reported in a
generated control message. Changes to the bits take immediate effect
at the timestamp reporting locations in the stack. Timestamps are only
reported for packets that also have the relevant timestamp generation
request set.
After, to use the data documentation say:
2.1 SCM_TIMESTAMPING records
These timestamps are returned in a control message with cmsg_level
SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type
struct scm_timestamping { struct timespec ts[3]; };
...
The structure can return up to three timestamps. This is a legacy
feature. At least one field is non-zero at any time. Most timestamps
are passed in ts[0]. Hardware timestamps are passed in ts[2].
To get transmit timestamp this require some configuration, first you need to know that software timestamp are not always available, I only achieve to get hardware transmit timestamp. But I'm not an expert in these domain, I just try to implemented timestamp with information that I found.
Secondly, I needed to activate hardware feature with linuxptp tool, I use hwstamp_cli:
hwstamp_ctl -i eth0 -r 1 -t 1
With this and some modification on your code I achieve to get hardware transmit timestamp but only with ethX interface because lo interface don't have these feature AFAIK so the final code is:
#include <arpa/inet.h>
#include <errno.h>
#include <inttypes.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#define UDP_MAX_LENGTH 1500
typedef struct {
int fd;
int port;
int err_no;
struct sockaddr_in local;
struct sockaddr_in remote;
struct timeval time_kernel;
struct timeval time_user;
int64_t prev_serialnum;
} socket_info;
static int setup_udp_receiver(socket_info *inf, int port) {
inf->port = port;
inf->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (inf->fd < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_server: socket failed: %s\n",
strerror(inf->err_no));
return inf->fd;
}
int timestampOn =
SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE |
// SOF_TIMESTAMPING_OPT_TSONLY |
0;
int r = setsockopt(inf->fd, SOL_SOCKET, SO_TIMESTAMPING, &timestampOn,
sizeof timestampOn);
if (r < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_server: setsockopt failed: %s\n",
strerror(inf->err_no));
return r;
}
int on = 1;
r = setsockopt(inf->fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof on);
if (r < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_server: setsockopt2 failed: %s\n",
strerror(inf->err_no));
return r;
}
inf->local = (struct sockaddr_in){.sin_family = AF_INET,
.sin_port = htons((uint16_t)port),
.sin_addr.s_addr = htonl(INADDR_ANY)};
r = bind(inf->fd, (struct sockaddr *)&inf->local, sizeof inf->local);
if (r < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_server: bind failed: %s\n",
strerror(inf->err_no));
return r;
}
inf->prev_serialnum = -1;
return 0;
}
static int setup_udp_sender(socket_info *inf, int port, char *address) {
inf->port = port;
inf->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (inf->fd < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_client: socket failed: %s\n",
strerror(inf->err_no));
return inf->fd;
}
int timestampOn =
SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE |
// SOF_TIMESTAMPING_OPT_TSONLY |
0;
int r = setsockopt(inf->fd, SOL_SOCKET, SO_TIMESTAMPING, &timestampOn,
sizeof timestampOn);
if (r < 0) {
inf->err_no = errno;
fprintf(stderr, "setup_udp_server: setsockopt failed: %s\n",
strerror(inf->err_no));
return r;
}
inf->remote = (struct sockaddr_in){.sin_family = AF_INET,
.sin_port = htons((uint16_t)port)};
r = inet_aton(address, &inf->remote.sin_addr);
if (r == 0) {
fprintf(stderr, "setup_udp_client: inet_aton failed\n");
inf->err_no = 0;
return -1;
}
inf->local = (struct sockaddr_in){.sin_family = AF_INET,
.sin_port = htons(0),
.sin_addr.s_addr = htonl(INADDR_ANY)};
inf->prev_serialnum = -1;
return 0;
}
static void handle_scm_timestamping(struct scm_timestamping *ts) {
for (size_t i = 0; i < sizeof ts->ts / sizeof *ts->ts; i++) {
printf("timestamp: %lld.%.9lds\n", (long long)ts->ts[i].tv_sec,
ts->ts[i].tv_nsec);
}
}
static void handle_time(struct msghdr *msg) {
for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); cmsg;
cmsg = CMSG_NXTHDR(msg, cmsg)) {
printf("level=%d, type=%d, len=%zu\n", cmsg->cmsg_level, cmsg->cmsg_type,
cmsg->cmsg_len);
if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR) {
struct sock_extended_err *ext =
(struct sock_extended_err *)CMSG_DATA(cmsg);
printf("errno=%d, origin=%d\n", ext->ee_errno, ext->ee_origin);
continue;
}
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
switch (cmsg->cmsg_type) {
case SO_TIMESTAMPNS: {
struct scm_timestamping *ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
handle_scm_timestamping(ts);
} break;
case SO_TIMESTAMPING: {
struct scm_timestamping *ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
handle_scm_timestamping(ts);
} break;
default:
/* Ignore other cmsg options */
break;
}
}
printf("End messages\n");
}
static ssize_t udp_receive(socket_info *inf, char *buf, size_t len) {
char ctrl[2048];
struct iovec iov = (struct iovec){.iov_base = buf, .iov_len = len};
struct msghdr msg = (struct msghdr){.msg_control = ctrl,
.msg_controllen = sizeof ctrl,
.msg_name = &inf->remote,
.msg_namelen = sizeof inf->remote,
.msg_iov = &iov,
.msg_iovlen = 1};
ssize_t recv_len = recvmsg(inf->fd, &msg, 0);
gettimeofday(&inf->time_user, NULL);
if (recv_len < 0) {
inf->err_no = errno;
fprintf(stderr, "udp_receive: recvfrom failed: %s\n",
strerror(inf->err_no));
}
handle_time(&msg);
return recv_len;
}
static ssize_t udp_send(socket_info *inf, char *buf, size_t len) {
struct iovec iov = (struct iovec){.iov_base = buf, .iov_len = len};
struct msghdr msg = (struct msghdr){.msg_name = &inf->remote,
.msg_namelen = sizeof inf->remote,
.msg_iov = &iov,
.msg_iovlen = 1};
gettimeofday(&inf->time_user, NULL);
ssize_t send_len = sendmsg(inf->fd, &msg, 0);
if (send_len < 0) {
inf->err_no = errno;
fprintf(stderr, "udp_send: sendmsg failed: %s\n", strerror(inf->err_no));
}
return send_len;
}
static ssize_t meq_receive(socket_info *inf, char *buf, size_t len) {
struct iovec iov = (struct iovec){.iov_base = buf, .iov_len = len};
char ctrl[2048];
struct msghdr msg = (struct msghdr){.msg_control = ctrl,
.msg_controllen = sizeof ctrl,
.msg_name = &inf->remote,
.msg_namelen = sizeof inf->remote,
.msg_iov = &iov,
.msg_iovlen = 1};
ssize_t recv_len = recvmsg(inf->fd, &msg, MSG_ERRQUEUE);
if (recv_len < 0) {
inf->err_no = errno;
if (errno != EAGAIN) {
fprintf(stderr, "meq_receive: recvmsg failed: %s\n",
strerror(inf->err_no));
}
return recv_len;
}
handle_time(&msg);
return recv_len;
}
typedef struct {
int64_t serialnum;
int64_t user_time_serialnum;
int64_t user_time;
int64_t kernel_time_serialnum;
int64_t kernel_time;
size_t message_bytes;
} message_header;
static const size_t payload_max = UDP_MAX_LENGTH - sizeof(message_header);
static ssize_t generate_random_message(socket_info *inf, char *buf,
size_t len) {
if (len < sizeof(message_header)) {
return -1;
}
message_header *header = (message_header *)buf;
char *payload = (char *)(header + 1);
size_t payload_len = (size_t)random() % (payload_max + 1);
if (payload_len > len - sizeof(message_header)) {
payload_len = len - sizeof(message_header);
}
for (size_t i = 0; i < payload_len; i++) {
payload[i] = (char)random();
}
static int64_t serial_num = 0;
*header = (message_header){
.user_time_serialnum = inf->prev_serialnum,
.user_time = inf->time_user.tv_sec * 1000000000L + inf->time_user.tv_usec,
.kernel_time_serialnum = inf->prev_serialnum,
.kernel_time =
inf->time_kernel.tv_sec * 1000000000L + inf->time_kernel.tv_usec,
.serialnum = serial_num,
.message_bytes = payload_len};
size_t total = payload_len + sizeof *header;
printf("uts%5" PRId64 ": kt=%" PRId64 ", ut=%" PRId64 ", sn=%" PRId64
": s=%zu\n",
header->user_time_serialnum, header->kernel_time, header->user_time,
header->serialnum, total);
inf->prev_serialnum = serial_num++;
return (ssize_t)total;
}
static void sender_loop(char *host) {
socket_info inf;
int ret = setup_udp_sender(&inf, 8000, host);
if (ret < 0) {
return;
}
for (int i = 0; i < 2000; i++) {
useconds_t t = random() % 2000000;
usleep(t);
char packet_buffer[4096];
ssize_t len =
generate_random_message(&inf, packet_buffer, sizeof packet_buffer);
if (len < 0) {
return;
}
udp_send(&inf, packet_buffer, (size_t)len);
while (meq_receive(&inf, packet_buffer, sizeof packet_buffer) != -1) {
}
}
}
static void receiver_loop(void) {
socket_info inf;
int ret = setup_udp_receiver(&inf, 8000);
if (ret < 0) {
return;
}
for (int i = 0; i < 1000; i++) {
char packet_buffer[4096];
udp_receive(&inf, packet_buffer, sizeof packet_buffer);
}
}
#define USAGE "Usage: %s [-r | -s host]\n"
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, USAGE, argv[0]);
return 0;
}
if (0 == strcmp(argv[1], "-s")) {
if (argc < 3) {
fprintf(stderr, USAGE, argv[0]);
return 0;
}
sender_loop(argv[2]);
} else if (0 == strcmp(argv[1], "-r")) {
receiver_loop();
} else {
fprintf(stderr, USAGE, argv[0]);
}
}
Exemple output:
$ ./a.out -r
level=1, type=37, len=64
timestamp: 1511196758.087209387s
timestamp: 0.000000000s
timestamp: 0.000000000s
End messages
level=1, type=37, len=64
timestamp: 1511196759.333507671s
timestamp: 0.000000000s
timestamp: 0.000000000s
End messages
$ ./a.out -s "8.8.8.8"
uts -1: kt=238059712, ut=140918979990070, sn=0: s=482
uts 0: kt=238059712, ut=1511197522000237457, sn=1: s=132
level=1, type=37, len=64
timestamp: 0.000000000s
timestamp: 0.000000000s
timestamp: 1511197359.637050597s
level=0, type=11, len=48
errno=42, origin=4
End messages
uts 1: kt=238059712, ut=1511197523000483805, sn=2: s=1454
level=1, type=37, len=64
timestamp: 0.000000000s
timestamp: 0.000000000s
timestamp: 1511197360.883295397s
level=0, type=11, len=48
errno=42, origin=4
End messages
Live test: sender, receiver

Read ICMP payload from a recvmsg with MSG_ERRQUEUE flag

I'm using BSD sockets to build an advanced traceroute program that doesn't need root privileges (like tracepath).
Using UDP and a bound socket, I call:
recvmsg(socket, header, MSG_ERRQUEUE)
I receive info about the supposed ICMP notification that a previously sent packet has triggered.
Do you know if it's possible to access the ICMP payload (which should be the previously-sent packet)?
I read from recvmsg man page:
[..] The payload of the original packet that caused the error
is passed as normal data via msg_iovec. [..]
But I can't find anything useful there, it just seems random data (I used wireshark to cross-check data).
You can use this sample code to check which ICMP Error you got and handle it (The sample contain some comments and links):
#define BUFFER_MAX_SIZE 1024
int on = 1;
/* Set the option, so we can receive errors */
setsockopt(socket, SOL_IP, IP_RECVERR,(char*)&on, sizeof(on));
/* Other code here */
/* .... */
/* Handle receving ICMP Errors */
int return_status;
char buffer[BUFFER_MAX_SIZE];
struct iovec iov; /* Data array */
struct msghdr msg; /* Message header */
struct cmsghdr *cmsg; /* Control related data */
struct sock_extended_err *sock_err; /* Struct describing the error */
struct icmphdr icmph; /* ICMP header */
struct sockaddr_in remote; /* Our socket */
for (;;)
{
iov.iov_base = &icmph;
iov.iov_len = sizeof(icmph);
msg.msg_name = (void*)&remote;
msg.msg_namelen = sizeof(remote);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_flags = 0;
msg.msg_control = buffer;
msg.msg_controllen = sizeof(buffer);
/* Receiving errors flog is set */
return_status = recvmsg(socket, &msg, MSG_ERRQUEUE);
if (return_status < 0)
continue;
/* Control messages are always accessed via some macros
* http://www.kernel.org/doc/man-pages/online/pages/man3/cmsg.3.html
*/
for (cmsg = CMSG_FIRSTHDR(&msg);cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg))
{
/* Ip level */
if (cmsg->cmsg_level == SOL_IP)
{
/* We received an error */
if (cmsg->cmsg_type == IP_RECVERR)
{
fprintf(stderror, "We got IP_RECVERR message\n");
sock_err = (struct sock_extended_err*)CMSG_DATA(cmsg);
if (sock_err)
{
/* We are interested in ICMP errors */
if (sock_err->ee_origin == SO_EE_ORIGIN_ICMP && sock_err->ee_type == ICMP_DEST_UNREACH)
{
/* Handle ICMP destination unreachable error codes */
switch (sock_err->ee_code)
{
case ICMP_NET_UNREACH:
/* Handle this error */
fprintf(stderror, "Network Unreachable Error\n");
break;
case ICMP_HOST_UNREACH:
/* Handle this error */
fprintf(stderror, "Host Unreachable Error\n");
break;
/* Handle all other cases. Find more errors :
* http://lxr.linux.no/linux+v3.5/include/linux/icmp.h#L39
*/
}
}
}
}
}
}
}

Resources