What is the need for "maxfd" when creating a tunnel? - c

In this link, https://backreference.org/2010/03/26/tuntap-interface-tutorial/, there's a code sample that uses tun/tap interface to create a TCP tunnel as below.
/* net_fd is the network file descriptor (to the peer), tap_fd is the
descriptor connected to the tun/tap interface */
/* use select() to handle two descriptors at once */
maxfd = (tap_fd > net_fd)?tap_fd:net_fd;
while(1) {
int ret;
fd_set rd_set;
FD_ZERO(&rd_set);
FD_SET(tap_fd, &rd_set); FD_SET(net_fd, &rd_set);
ret = select(maxfd + 1, &rd_set, NULL, NULL, NULL);
if (ret < 0 && errno == EINTR) {
continue;
}
if (ret < 0) {
perror("select()");
exit(1);
}
if(FD_ISSET(tap_fd, &rd_set)) {
/* data from tun/tap: just read it and write it to the network */
nread = cread(tap_fd, buffer, BUFSIZE);
/* write length + packet */
plength = htons(nread);
nwrite = cwrite(net_fd, (char *)&plength, sizeof(plength));
nwrite = cwrite(net_fd, buffer, nread);
}
if(FD_ISSET(net_fd, &rd_set)) {
/* data from the network: read it, and write it to the tun/tap interface.
* We need to read the length first, and then the packet */
/* Read length */
nread = read_n(net_fd, (char *)&plength, sizeof(plength));
/* read packet */
nread = read_n(net_fd, buffer, ntohs(plength));
/* now buffer[] contains a full packet or frame, write it into the tun/tap interface */
nwrite = cwrite(tap_fd, buffer, nread);
}
}
What's the purpose of "maxfd" in that code excerpt? The exact lines are:
maxfd = (tap_fd > net_fd)?tap_fd:net_fd;
ret = select(maxfd + 1, &rd_set, NULL, NULL, NULL);

It's an artifact of the way the dangerous and obsolete select function works. It requires an argument that is a bound on the size (in bits) of the fd_set objects passed to it, and cannot work with fd numbers larger than an arbitrary limit imposed by FD_SETSIZE. If you fail to meet these requirements, Undefined Behavior results.
Wherever you see select, you should replace it by poll which does not suffer from these limitations, has an easier-to-use interface, and has more features.

Related

What is the best way to determine packet size with recv()?

Extremely new to socket programming and C in general. I am trying to write a basic program to send and receive data between two machines. I understand that recv will not get all your data at once -- you essentially have to loop it until it has read the whole message.
In lieu of just setting a limit on both machines, I have created a simple Message struct on the client side:
struct Message {
size_t length;
char contents[1024 - sizeof(size_t)];
} message;
message.length = sizeof(struct Message);
message.contents = information_i_want_to_send;
When it arrives at the server, I have recv read into a buffer: received = recv(ioSock, &buffer, 1024, 0) (Which coincidentally is the same size as my Message struct -- but assuming it wasn't...).
I then extract Message.length from the buffer like this:
size_t messagelength;
messagelength = *((size_t *) &buffer);
Then I loop recv into the buffer while received < messagelength.
This works, but I can't help feeling it's really ugly and it feels hacky. (Especially if the first recv call reads less than sizeof(size_t) or the machines are different bit architectures, in which case the size_t cast won't work..). Is there a better way to do this?
You have a fixed-size message, so you can use something like this:
#include <errno.h>
#include <limits.h>
// Returns the number of bytes read.
// EOF was reached if the number of bytes read is less than requested.
// On error, returns -1 and sets errno.
ssize_t recv_fixed_amount(int sockfd, char *buf, size_t size) {
if (size > SSIZE_MAX) {
errno = EINVAL;
return -1;
}
ssize_t bytes_read = 0;
while (size > 0) {
ssize_t rv = recv(sockfd, buf, size, 0);
if (rv < 0)
return -1;
if (rv == 0)
return bytes_read;
size -= rv;
bytes_read += rv;
buf += rv;
}
return bytes_read;
}
It would be used something like this:
typedef struct {
uint32_t length;
char contents[1020];
} Message;
Message message;
ssize_t bytes_read = recv_fixed_amount(sockfd, &(message.length), sizeof(message.length));
if (bytes_read == 0) {
printf("EOF reached\n");
exit(EXIT_SUCCESS);
}
if (bytes_read < 0) {
perror("recv");
exit(EXIT_FAILURE);
}
if (bytes_read != sizeof(message.length)) {
fprintf(stderr, "recv: Premature EOF.\n");
exit(EXIT_FAILURE);
}
bytes_read = recv_fixed_amount(sockfd, &(message.content), sizeof(message.content));
if (bytes_read < 0) {
perror("recv");
exit(EXIT_FAILURE);
}
if (bytes_read != msg_size) {
fprintf(stderr, "recv: Premature EOF.\n");
exit(EXIT_FAILURE);
}
Notes:
size_t is not going to be the same everywhere, so I switched to a uint32_t.
I read the fields independently because the padding within the struct can vary between implementations. They would need to be sent that way as well.
The receiver is populating message.length with the information from the stream, but doesn't actually use it.
A malicious or buggy sender could provide a value for message.length that's too large and crash the receiver (or worse) if it doesn't validate it. Same goes for contents. It might not be NUL-terminated if that's expected.
But what if the length wasn't fixed? Then the sender would need to somehow communicate how much the reader needs to read. A common approach is a length prefix.
typedef struct {
uint32_t length;
char contents[];
} Message;
uint32_t contents_size;
ssize_t bytes_read = recv_fixed_amount(sockfd, &contents_size, sizeof(contents_size));
if (bytes_read == 0) {
printf("EOF reached\n");
exit(EXIT_SUCCESS);
}
if (bytes_read < 0) {
perror("recv");
exit(EXIT_FAILURE);
}
if (bytes_read != sizeof(contents_size)) {
fprintf(stderr, "recv: Premature EOF.\n");
exit(EXIT_FAILURE);
}
Message *message = malloc(sizeof(Message)+contents_size);
if (!message) {
perror("malloc");
exit(EXIT_FAILURE);
}
message->length = contents_size;
bytes_read = recv_fixed_amount(sockfd, &(message->contents), contents_size);
if (bytes_read < 0) {
perror("recv");
exit(EXIT_FAILURE);
}
if (bytes_read != contents_size) {
fprintf(stderr, "recv: Premature EOF.\n");
exit(EXIT_FAILURE);
}
Notes:
message->length contains the size of message->contents instead of the size of the structure. This is far more useful.
Another approach is to use a sentinel value. This is a value that tells the reader the message is over. This is what the NUL that terminates C strings is. This is more complicated because you don't know how much to read in advance. Reading byte-by-byte is too expensive, so one normally uses a buffer.
while (1) {
extend_buffer_if_necessary();
recv_into_buffer();
while (buffer_contains_a_sentinel()) {
// This also shifts the remainder of the buffer's contents.
extract_contents_of_buffer_up_to_sentinel();
process_extracted_message();
}
}
The advantage of using a sentinel value is that one doesn't need to know the length of the message in advance (so the sender can start sending it before it's fully created.)
The disadvantage is the same as for C strings: The message can't contain the sentinel value unless some form of escaping mechanism is used. Between this and the complexity of the reader, you can see why a length prefix is usually preferred over a sentinel value. :)
Finally, there's a better solution than sentinel values for large messages that you want to start sending before they are fully created: A sequence of length-prefixed chunks. One keeps reading chunks until a chunk of size 0 is encountered, signaling the end.
HTTP supports both length-prefixed messages (in the form of Content-Length: <length> header) and this approach (in the form of the Transfer-Encoding: chunked header).
There are Two ways to do that...
1.)
Use Binary Synchronous protocol. (Use of STX - Start of Text and ETX - End of Text ) for identification of the Text start and end.
2.)
Attach the number of bytes of data being sent at the start of Data. The socket will read those number of bytes and will get the number of bytes to be received from the socket. Then read all data and get the amount of data required.
Hmm... Seems tough...?? Let me give you an example.
Actual Data need to be sent: ABCDEFGHIJ
New Data format : 0010ABCDEFGHIJ
Data required in server side: ABCDE
recv function will read the first 4 bytes to get the number of bytes of actual data(In loop untill it gets 4 bytes):
int received1= recv(ioSock, recvbuf, 4, 0);
As per the above case, 'recvbuf' will be 0010 converted to an integer will give value as '10' which can be stored in some integer variable. So we have :
int toReadVal = 10
Now all we need is to read these 10 digits in next recv call :
int received= recv(ioSock, recvbuf1, toReadVal, 0);
Finally, we get the value of recvbuf1 as ABCDEFGHIG. Now you can truncate the value as per your requirement.

pthreads: wait until read() returns a value > 0

I'm working on a client program that will operate as a basic instant messenger. I'm using pthread to to open up a thread dedicated to waiting for a message to be received and the the message to be read. Is using pthread_cond_wait the correct way to go about waiting for read(sockfd, buffer, 256) to be above 0?
void *threadRead() {
while (1) {
bzero(buffer,256);
pthread_cond_wait(&buffer_lock, read(sockfd, buffer, 255) > 0);
n = read(sockfd, buffer, 255);
printf("%s\n",buffer);
}
}
You see I just need to wait until read() comes back with a value above 0 to continue and I can't find the right system to do that. If anyone could link something that would put me on the right track or give me a hint that would be great.
No. pthread_cond_wait() is for waiting on a condition that will be changed by one of your other threads.
If you just want to wait for read() to return something, just call read(). Unless you have specifically marked the socket as non-blocking, it will block the calling thread until there is something to return.
If read() ever returns 0 then it indicates end of file: it means that the socket has been closed on the remote side, so there will never be any more to read.
You should use select() instead, like this
int running;
running = 1;
while (running != 0) /* Just in case you want to end the loop, you can */
{
fd_set rdset;
struct timeval timeout;
timeout.tv_sec = NUMBER_OF_SECONDS_TO_WAIT;
timeout.tv_usec = YOU_CAN_HAVE_MICRO_SECONDS_PRECISION;
FD_ZERO(&rdset);
FD_SET(fd, &rdset);
if (select(fd + 1, &rdset, NULL, NULL, &timeout) == 1)
{
ssize_t length;
char buffer[100];
length = read(fd, buffer, sizeof(buffer));
/* use buffer now */
}
else
{
/* Timed out and still nothing to read */
/* do something meanwhile and retry if */
/* you want to. */
}
running = use_a_function_to_check_this();
}
you can use it in a different thread, but you need to be careful.
Non-blocking IO is difficult, it doesn't matter how you implement it is hard.
One more thing, this
n = read(sockfd, buffer, 255);
printf("%s\n",buffer);
is likely undefined behavior, since apparently buffer is
char buffer[256];
you could
n = read(sockfd, buffer, 255 /* or sizeof(buffer) - 1 */);
buffer[n] = '\0';
printf("%s\n",buffer);
ensuring that buffer is nul terminated.

C using select() to read from two named pipes (FIFO)

I am currently trying to write a program in C which will read from two named pipes and print any data to stdout as it becomes available.
for example: If I open two terminals and ./execute pipe1 pipe2 in one of the terminals (with pipe1 and pipe2 being valid named pipes) and then type echo "Data here." > pipe1 then the name of the pipe (here it is pipe1), the size, and the data should print to stdout-- Here it would look like pipe1 [25]: Data here.
I know I need to open the pipes with the O_RDONLY and O_NONBLOCK flags. I have looked at many examples (quite a few on this forum) of people using select() and I still don't understand what the different parameters being passed to select() are doing. If anyone can provide guidance here it would be hugely helpful. Below is the code I have so far.
int pipeRouter(char[] fifo1, char[] fifo2){
fileDescriptor1 = open(fifo1, O_RDONLY, O_NONBLOCK);
fileDescriptor2 = open(fifo2, O_RDONLY, O_NONBLOCK);
if(fileDescriptor1 < 0){
printf("%s does not exist", fifo1);
}
if(fileDescriptor2 < 0){
printf("%s does not exist", fifo2);
}
}
The select lets you wait for an i/o event instead of waisting CPU cycles on read.
So, in your example, the main loop can look like:
for (;;)
{
int res;
char buf[256];
res = read(fileDescriptor1, buf, sizeof(buf));
if (res > 0)
{
printf("Read %d bytes from channel1\n", res);
}
res = read(fileDescriptor2, buf, sizeof(buf));
if (res > 0)
{
printf("Read %d bytes from channel2\n", res);
}
}
If you add the code and run it, you would notice that:
The program actually does what you want - it reads from both pipes.
CPU utilization is 100% for one core, i.e. program wastes CPU even when there is no data to read.
To solve issue, select and poll APIs are introduced. For select we need to know descriptors (we do), and the maximum out of them.
So let's modify the code a bit:
for (;;)
{
fd_set fds;
int maxfd;
FD_ZERO(&fds); // Clear FD set for select
FD_SET(fileDescriptor1, &fds);
FD_SET(fileDescriptor2, &fds);
maxfd = fileDescriptor1 > fileDescriptor2 ? fileDescriptor1 : fileDescriptor2;
select(maxfd + 1, &fds, NULL, NULL, NULL);
// The minimum information for select: we are asking only about
// read operations, ignoring write and error ones; and not
// defining any time restrictions on wait.
// do reads as in previous example here
}
When running the improved code, the CPU would not be wasted as much, but you will notice, that the read operation is performed even when there is no data for a particular pipe, but there is for another.
To check, which pipe actually has the data, use FD_ISSET after select call:
if (FD_ISSET(fileDescriptor1, &fds))
{
// We can read from fileDescriptor1
}
if (FD_ISSET(fileDescriptor2, &fds))
{
// We can read from fileDescriptor2
}
So, after joining said above, the code would look like:
for (;;)
{
fd_set fds;
int maxfd;
int res;
char buf[256];
FD_ZERO(&fds); // Clear FD set for select
FD_SET(fileDescriptor1, &fds);
FD_SET(fileDescriptor2, &fds);
maxfd = fileDescriptor1 > fileDescriptor2 ? fileDescriptor1 : fileDescriptor2;
select(maxfd + 1, &fds, NULL, NULL, NULL);
if (FD_ISSET(fileDescriptor1, &fds))
{
// We can read from fileDescriptor1
res = read(fileDescriptor1, buf, sizeof(buf));
if (res > 0)
{
printf("Read %d bytes from channel1\n", res);
}
}
if (FD_ISSET(fileDescriptor2, &fds))
{
// We can read from fileDescriptor2
res = read(fileDescriptor2, buf, sizeof(buf));
if (res > 0)
{
printf("Read %d bytes from channel2\n", res);
}
}
}
So, add error handling, and you would be set.

Trying to pipe data from a child-process server to its parent process

I'm working on an assignment for my Distributed Systems class. I'm a master's student in C.S., but my specialty in programming is .NET and I'm working on a project that requires some fairly involved Unix knowledge, which is tripping me up.
The assignment is implementing a flush channel protocol API. So I'm coding a small function library that other apps can implement to use flush channel communication. I've set it up so that when the init function is called, it forks a child process to act as the server for incoming messages. The child communicates with the parent process by sending incoming data to the parent through a pipe.
This works OK if messages are sent and received one at a time; e.g.,
send -> receive -> send -> receive -> etc.
However, if multiple messages are sent before doing any receives; e.g.,
send -> send -> send -> receive
then it gets messed up. Specifically, the first message is received correctly, but when I go to receive the second message, the program hangs and needs to be killed. I've done a lot of searching online and been plugging away at this for hours but haven't made much progress.
The program as a whole is far too large to show here, but here are the most relevant bits. Here's the part where I get the server going and receive messages. Note the line
write(fd[1], buffer, (strlen(buffer)+1));
-- I think that's a good candidate for being the source of the problem here, but not sure what to do differently. (Tried fwrite() and that didn't work at all.)
fd = malloc(2 * sizeof(int));
int nbytes;
if (pipe(fd) < 0) {
perror("Could not create pipe");
return -1;
}
pID = fork();
if (pID < 0) {
perror("Failed to fork");
return -1;
} else if (pID == 0) { // child
close(fd[0]); // close input side of pipe
int cc;
int fsize;
struct sockaddr_in from;
int serials[500];
int i;
for (i = 0; i < 500; i++) serials[i] = 0;
char buffer[2048];
while (1) {
fsize = sizeof(from);
cc = recvfrom(socketNo, buffer, 2048, 0, (struct sockaddr*)&from, &fsize);
if (cc < 0) perror("Receive error");
datagram data = decodeDatagram(buffer);
if (serials[data.serial] == 0) {
write(fd[1], buffer, (strlen(buffer)+1));
serials[data.serial] = 1;
}
}
} else { // parent
close(fd[1]); // close output side of pipe
return 0;
}
(The "serials" array is for not forwarding repeated messages, as messages are sent multiple times to improve reliability. I know a fixed size for this array is not good practice, but the tests for this assignment don't send that many messages so it's OK in this context.)
The beginning of the receive function looks like this:
int fRecv(int* id, char* buf, int nbytes) {
checkDatagramTable(*id);
char* tbuf = malloc((nbytes + 9) * sizeof(char));
int rbytes = read(fd[0], tbuf, nbytes + 9);
The "+9" is to accommodate additional information that gets packaged along with the message to be sent, for flush channel ordering. This is also a pretty sketchy area, but allocating more space to be extra sure has not helped the issue.
I know there's quite a bit of extraneous stuff in here, references to other functions etc. But the problem surely lies in how I'm piping the data through, so the source of my issue should lie there somewhere.
Thanks in advance for your assistance; it is truly appreciated.
This looks suspicious. (what is in the packets? They could be binary) Where is the typedefinition for datagram ?
fsize = sizeof(from);
cc = recvfrom(socketNo, buffer, 2048, 0, (struct sockaddr*)&from, &fsize);
if (cc < 0) perror("Receive error");
datagram data = decodeDatagram(buffer);
if (serials[data.serial] == 0) {
write(fd[1], buffer, (strlen(buffer)+1)); // <-- ????
serials[data.serial] = 1;
}
I'd try instead:
write(fd[1], buffer, cc);
UPDATE:
If the message is not null terminated, you'll have to terminate it explicitly:
(if cc == 2048) cc -= 1;
buffer [cc] = '\0'; // <<--
datagram data = decodedatagram(buffer);
...
Also, it is advisable to use "sizeof buffer" instead of "2048".
UPDATE2:
You could test if the strings in the packets are really null-terminated by:
unsigned pos;
cc = recvfrom(socketNo, buffer, 2048, 0, (struct sockaddr*)&from, &fsize);
if (cc < 0) perror("Receive error");
for pos=0; pos < cc; pos++) {
if (buff[pos] == 0) break;
}
switch (cc-pos) {
case 0: fprintf (stderr, "No nul byte found in packet: I lose!\n" ); break;
default: fprintf (stderr, "Spurious nul byte found in the middle of packet\n" );
case 1: break;
}
datagram data = decodeDatagram(buffer);
if (serials[data.serial] == 0) {
write(fd[1], buffer, cc);
serials[data.serial] = 1;
}

using select() with pipe

I am reading/writing to a pipe created by pipe(pipe_fds). So basically with following code, I am reading from that pipe:
fp = fdopen(pipe_fds[0], "r");
And when ever I get something, I print it out by:
while (fgets(buf, 200, fp)) {
printf("%s", buf);
}
What I want is, when for certain amount of time nothing appears on the pipe to read from, I want to know about it and do:
printf("dummy");
Can this be achieved by select() ? Any pointers on how to do that will be great.
Let's say you wanted to wait 5 seconds and then if nothing was written to the pipe, you print out "dummy."
fd_set set;
struct timeval timeout;
/* Initialize the file descriptor set. */
FD_ZERO(&set);
FD_SET(pipe_fds[0], &set);
/* Initialize the timeout data structure. */
timeout.tv_sec = 5;
timeout.tv_usec = 0;
/* In the interest of brevity, I'm using the constant FD_SETSIZE, but a more
efficient implementation would use the highest fd + 1 instead. In your case
since you only have a single fd, you can replace FD_SETSIZE with
pipe_fds[0] + 1 thereby limiting the number of fds the system has to
iterate over. */
int ret = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
// a return value of 0 means that the time expired
// without any acitivity on the file descriptor
if (ret == 0)
{
printf("dummy");
}
else if (ret < 0)
{
// error occurred
}
else
{
// there was activity on the file descripor
}
IIRC, select has a timeout that you then check with FD_ISSET to tell if it was I/O or not that returned.

Resources