set connect() timeout on unix domain socket

set connect() timeout on unix domain socket - c

Is using alarm() is the only to set connect() timeout on unix domain socket? I've tried select() which is described here but seems like select() returns ok immediately on unix domain socket every time and
no error occurred by calling getsockopt(SO_ERROR), but a send() on the fd returns an error says Transport endpoint is not connected. I paste the select() code below.
I think using alarm would meet the case, but seems it's considered as an old-fashion way. So I'm here to see if there's any other solutions for this. Thanks in advance.
if ((flags = fcntl(fd, F_GETFL, 0)) == -1) {
syslog(LOG_USER|LOG_ERR, "fcntl get failed: %s", strerror(errno));
close(fd);
return -1;
}
if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
syslog(LOG_USER|LOG_ERR, "set fd nonblocking failed: %s", strerror(errno));
close(fd);
return -1;
}
if(connect(fd, (struct sockaddr *) &address, sizeof(struct sockaddr_un)) != 0) {
if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINPROGRESS) {
close(fd);
return -1;
}
FD_ZERO(&set);
FD_SET(fd, &set);
if(select(fd + 1, NULL, &set, NULL, &timeout) <= 0) {
close(fd);
return -1;
}
/*
if(connect(fd, (struct sockaddr *) &address, sizeof(struct sockaddr_un)) != 0) {
close(fd);
return -1;
}
*/
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, (socklen_t *)&len) < 0) {
syslog(LOG_USER|LOG_ERR, "getsockopt failed: %s", strerror(errno));
close(fd);
return -1;
}
if(error != 0) {
syslog(LOG_USER|LOG_ERR, "getsockopt return error: %d", error);
close(fd);
return -1;
}
}
if (fcntl(fd, F_SETFL, flags & ~O_NONBLOCK) == -1) {
syslog(LOG_USER|LOG_ERR, "set fd blocking failed: %s", strerror(errno));
close(fd);
return -1;
}

Somewhere (I did not bookmark that page) in another post I found that the connect() only establishes a TCP connection. It only means that on the other end, there is a working TCP stack, but it does not mean, the server has actually accept()-ed!
The example there was connect() is like calling a support center, and the automatic voice tells you, you are in a queue, but you still cannot communicate. accept() is the actual operator taking your call.
My solution for the same problem will be to have the client wait for the server to actually send something, before moving on with other client-stuff. I can put this in a select-timeout loop.
listen() has a parameter, how many connections can be put in a backlog before starting to drop client connection attempts.

You can use select() or poll() after EINPROGRESS, as described in the connect man page. If you get EAGAIN or EWOULDBLOCK, the Unix domain socket has run out of backlog entries, the queue length specified by the server with the listen() call. The connect() failed.
Note that a connecting client can be able to write to Unix domain sockets until the system buffer is full, before the server even accepted the call. That works for each backlog buffer. Failures occur afterwards.
A failed connect() might need a new socket before retrying. select() might return 0 also if the connection was refused, such as if the server didn't listen(). That depends on system and libray. At any rate, after an error of EAGAIN, it is necessary to retry. For example:
int rtc, so_error, max_retry = 5;
socklen_t len = sizeof so_error;
while ((rtc = connect(fd, (struct sockaddr *)&address, sizeof address)) != 0
&& errno == EAGAIN && --max_retry >= 0) {
sleep(1);
// new socket?
}
if (rtc < 0 && errno != EINPROGRESS) {
syslog(LOG_USER|LOG_ERR, "connect returned %d: %s", rtc, strerror(errno));
close(fd);
return -1;
}
if (rtc < 0)
{
fd_set set, wset, eset;
struct timeval timeout;
timeout.tv_sec = 10;
timeout.tv_usec = 0;
FD_ZERO(&set);
FD_SET(fd, &set);
wset = set;
eset = set;
if(select(fd + 1, &set, &wset, &eset, &timeout) <= 0) {
close(fd);
return -1;
}
// [...]
}

Related

select() returns 0 with an error Operation now in progress after connect()

So here I am setting up a TCP connection to the server and is invoked from an application in a loop and sometimes I end up seeing the following error
select() timed out after 4 seconds - Operation now in progress
which means select did return 0 which means it timed out in 5 seconds without observing any activity on the file descriptor.
My understanding is nonblocking mode is set after connect() in case it doesn't connect right away with getsockopt() indicating whether connect() call did establish but for some reason, select seems to be returning 0. Does it have to with delay being too small?
int InitializeSocket(int sockType, int protocol, long timeout)
{
int socketFd = socket(AF_INET, sockType, protocol);
if (socketFd < 0)
{
perror ("Failed to create a client socket of type %d", sockType);
return -1;
}
if (timeout > 0)
{
struct timeval sockTimeout = {.tv_sec = timeout, .tv_usec = 0};
// setting the receive timeout
if (setsockopt(socketFd, SOL_SOCKET, SO_RCVTIMEO, &sockTimeout, sizeof(sockTimeout)) < 0)
{
perror ("Failed to set the RX timeout");
return -1;
}
// setting the send timeout
if (setsockopt(socketFd, SOL_SOCKET, SO_SNDTIMEO, &sockTimeout, sizeof(sockTimeout)) < 0)
{
perror ("Failed to set the TX timeout");
return -1;
}
}
return socketFd;
}
void OpenTcpConnection(int serverTimeout, int port, const char *ipAddr)
{
struct sockaddr_in *address
int socketFd = InitializeSocket(SOCK_STREAM, 0, serverTimeout);
if (socketFd == -1)
{
return -1;
}
address->sin_family = AF_INET;
address->sin_port = htons(port);
address->sin_addr.s_addr = inet_addr(ipAddr);
memset(address->sin_zero, '\0', sizeof(address->sin_zero));
// get the existing file flags
long arg = 0;
if( (arg = fcntl(socketFd, F_GETFL, NULL)) < 0)
{
perror ("Failed to get file status flags");
exit(0);
}
// set the socket to nonblocking mode
arg |= O_NONBLOCK;
if( fcntl(socketFd, F_SETFL, arg) < 0)
{
perror ("Failed to set to nonblocking mode");
return -1;
}
// connect to the server
int res = connect(socketFd, (struct sockaddr *) &address, sizeof(address));
fd_set fdset;
struct timeval tv;
long selectTimeout = 4; // connect() timeout
if (res < 0)
{
// the socket is nonblocking & the connection cannot be completed immediately
if (errno == EINPROGRESS)
{
do
{
tv.tv_sec = selectTimeout;
tv.tv_usec = 0;
FD_ZERO(&fdset);
FD_SET(socketFd, &fdset);
res = select(socketFd+1, NULL, &fdset, NULL, &tv);
if (res < 0 && errno != EINTR)
{
perror ("Failed to monitor socket FD %d", socketFd);
return -1;
}
else if (res > 0)
{
int so_error;
socklen_t len = sizeof so_error;
int valopt;
// check whether connect() completed successfully
if (getsockopt(socketFd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &len) < 0)
{
perror ("Error in getsockopt");
return -1;
}
if (valopt)
{
perror ("Error in delayed connection");
return -1;
}
break;
}
else
{
perror ("select() timed out after %ld seconds", selectTimeout); // ERROR HERE !!!
return -1;
}
} while(1);
}
}
}

Per the select() man page:
select() returns the number of ready descriptors that are contained in
the descriptor sets, or -1 if an error occurred. If the time limit
expires, select() returns 0.
... so if select() is returning 0, it's because no I/O operations were completed before your timeout was reached.
As for why no I/O operations were completed: if you were waiting for a TCP connection to complete, then the most likely explanation is that the TCP connection hadn't completed yet (perhaps because of a slow, overloaded, or broken network or server?).
Another (less likely, but possible) explanation might be that you are running your program under Windows, and under Windows, if a non-blocking connect() fails, that failure is indicated by setting a bit in the exceptions fd_set (i.e. the one that you would pass in as the fourth argument to select(), just before the timeout-argument). In the posted code you are passing in NULL for that argument, which means that under Windows you would have no way of knowing when your non-blocking TCP connection attempt has failed. (under other OS's, a failed connection would cause the socket to select as ready-for-read and ready-for-write also, making a connection-failure easier to react to)

My understanding is nonblocking mode is set after connect() in case it doesn't connect right away with getsockopt() indicating whether connect() call did establish but for some reason, select seems to be returning 0. Does it have to with delay being too small?
nonblocking had to be set after the socket(2) call, and before the connect(2) call, or the connect would be blocked (not reaching the select() call) until the connect(2) fails. This is normally over two minutes, and this trick is done to wait only 5s. in the connect call.
A 5 sec delay is normally small for a remote connection of a remote site. In a lan, if you don't get connected in 5s. then it means something is wrong.
My bet is that something is wrong, you are trying to connect to a socket that is not available (non-existent host, check that the server is listening in the address:port you are trying to connect to), you have forgot to convert into net byte order some fields in the sockaddr_in structure (this appears to be correct in your snippet) or a firewall is blocking you from connecting (this can be the thing). You are waiting for the socketFd to be available for writing, which is correct, as it wouldn't be (and block) if the connection is not connected first, so apparently you are doing things correctly, so some address has been mispelled or a firewall is cutting the access to the server.
Either way, a timeout in select is not an error, but just a timeout. The software your are using is considering a timeout of 5s. fatal in a socket, so you have to ask the developer or check your network connection.

close() does not close tcp connection if interface lost its ip address

My program has an established tcp connection when linux box loses its DHCP IP address lease. After that it tries to close the connection so when dhcp server recovers it will re-establish tcp connection again. It uses SO_REUSEADDR.
I did read this http://hea-www.harvard.edu/~fine/Tech/addrinuse.html but in this application reuse address is a requirement.
The way I reproduce this problem is by issuing ifconfig etho 0.0.0.0
However, the result of close(sockfd) is unpredictable. Sometimes it closes socket properly. Sometimes netstat -ant continuously shows
tcp 0 0 192.168.1.119:54322 192.168.1.41:54321 (STATE)
where (STATE) can one of ESTABLISHED, or FIN_WAIT1, or CLOSE_WAIT.
Originally my code had just close(). After reading multiple sources online, I tried some suggestions.
First I tried this (based on http://deepix.github.io/2016/10/21/tcprst.html)
if (sockFd != -1) {
linger lin;
lin.l_onoff = 1;
lin.l_linger = 0;
if (setsockopt(sockFd, SOL_SOCKET, SO_LINGER, (const char *)&lin, sizeof(linger)) == -1) {
std::cout << "Error setting socket opt SO_LINGER while trying to close " << std::endl;
}
close(sockFd);
}
It did not help, so I tried this (based on close() is not closing socket properly )
bool haveInput(int fd, double timeout) {
int status;
fd_set fds;
struct timeval tv;
FD_ZERO(&fds);
FD_SET(fd, &fds);
tv.tv_sec = (long)timeout; // cast needed for C++
tv.tv_usec = (long)((timeout - tv.tv_sec) * 1000000); // 'suseconds_t'
while (1) {
if (!(status = select(fd + 1, &fds, 0, 0, &tv)))
return FALSE;
else if (status > 0 && FD_ISSET(fd, &fds))
return TRUE;
else if (status > 0)
break;
else if (errno != EINTR)
break;
}
}
void myClose(int sockFd)
{
if (sockFd != -1) {
int err = 1;
socklen_t len = sizeof err;
getsockopt(sockFd, SOL_SOCKET, SO_ERROR, (char *)&err, &len);
shutdown(sockFd, SHUT_WR);
usleep(20000);
char discard[99];
while (haveInput(sockFd, 0.01))
if (!read(sockFd, discard, sizeof discard))
break;
shutdown(sockFd, SHUT_RD);
usleep(20000);
close(sockFd);
sockFd = -1;
}
}
As before, sometimes it closes connection, and sometimes it does not.
I understand that in this case the other side can send neither FIN nor ACK, so graceful close is just not possible.
Is there a reliable way to completely close tcp connection in such conditions?
Thank you

Can't seem to get a timeout working when connecting to a socket

I'm trying to supply a timeout for connect(). I've searched around and found several articles related to this. I've coded up what I believe should work but unfortunately I get no error reported from getsockopt(). But then when I come to the write() it fails with an errno of 107 - ENOTCONN.
A couple of points. I'm running on Fedora 23. The docs for connect() says it should return failure with an errno of EINPROGRESS for a connect that is not complete yet however I was experiencing EAGAIN so I added that to my check. Currently my socket server is setting the backlog to zero in the listen() call. Many of the calls succeed but the ones that fail all fail with the 107 - ENOTCONN I had mentioned in the write() call.
I'm hoping I'm just missing something but so far can't figure out what.
int domain_socket_send(const char* socket_name, unsigned char* buffer,
unsigned int length, unsigned int timeout)
{
struct sockaddr_un addr;
int fd = -1;
int result = 0;
// Create socket.
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd == -1)
{
result = -1;
goto done;
}
if (timeout != 0)
{
// Enabled non-blocking.
int flags;
flags = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
// Set socket name.
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
// Connect.
result = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
if (result == -1)
{
// If some error then we're done.
if ((errno != EINPROGRESS) && (errno != EAGAIN))
goto done;
fd_set write_set;
struct timeval tv;
// Set timeout.
tv.tv_sec = timeout / 1000000;
tv.tv_usec = timeout % 1000000;
unsigned int iterations = 0;
while (1)
{
FD_ZERO(&write_set);
FD_SET(fd, &write_set);
result = select(fd + 1, NULL, &write_set, NULL, &tv);
if (result == -1)
goto done;
else if (result == 0)
{
result = -1;
errno = ETIMEDOUT;
goto done;
}
else
{
if (FD_ISSET(fd, &write_set))
{
socklen_t len;
int socket_error;
len = sizeof(socket_error);
// Get the result of the connect() call.
result = getsockopt(fd, SOL_SOCKET, SO_ERROR,
&socket_error, &len);
if (result == -1)
goto done;
// I think SO_ERROR will be zero for a successful
// result and errno otherwise.
if (socket_error != 0)
{
result = -1;
errno = socket_error;
goto done;
}
// Now that the socket is writable issue another connect.
result = connect(fd, (struct sockaddr*) &addr,
sizeof(addr));
if (result == 0)
{
if (iterations > 1)
{
printf("connect() succeeded on iteration %d\n",
iterations);
}
break;
}
else
{
if ((errno != EAGAIN) && (errno != EINPROGRESS))
{
int err = errno;
printf("second connect() failed, errno = %d\n",
errno);
errno = err;
goto done;
}
iterations++;
}
}
}
}
}
// If we put the socket in non-blocking mode then put it back
// to blocking mode.
if (timeout != 0)
{
// Turn off non-blocking.
int flags;
flags = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
}
// Write buffer.
result = write(fd, buffer, length);
if (result == -1)
{
int err = errno;
printf("write() failed, errno = %d\n", err);
errno = err;
goto done;
}
done:
if (result == -1)
result = errno;
else
result = 0;
if (fd != -1)
{
shutdown(fd, SHUT_RDWR);
close(fd);
}
return result;
}
UPDATE 04/05/2016:
It dawned on me that maybe I need to call connect() multiple times until successful, after all this is non-blocking io not async io. Just like I have to call read() again when there is data to read after encountering an EAGAIN on a read(). In addition, I found the following SO question:
Using select() for non-blocking sockets to connect always returns 1
in which EJP's answer says you need to issue multiple connect()'s. Also, from the book EJP references:
https://books.google.com/books?id=6H9AxyFd0v0C&pg=PT681&lpg=PT681&dq=stevens+and+wright+tcp/ip+illustrated+non-blocking+connect&source=bl&ots=b6kQar6SdM&sig=kt5xZubPZ2atVxs2VQU4mu7NGUI&hl=en&sa=X&ved=0ahUKEwjmp87rlfbLAhUN1mMKHeBxBi8Q6AEIIzAB#v=onepage&q=stevens%20and%20wright%20tcp%2Fip%20illustrated%20non-blocking%20connect&f=false
it seems to indicate you need to issue multiple connect()'s. I've modified the code snippet in this question to call connect() until it succeeds. I probably still need to make changes around possibly updating the timeout value passed to select(), but that's not my immediate question.
Calling connect() multiple times appears to have fixed my original problem, which was that I was getting ENOTCONN when calling write(), I guess because the socket was not connected. However, you can see from the code that I'm tracking how many times through the select loop until connect() succeeds. I've seen the number go into the thousands. This gets me worried that I'm in a busy wait loop. Why is the socket writable even though it's not in a state that connect() will succeed? Is calling connect() clearing that writable state and it's getting set again by the OS for some reason, or am I really in a busy wait loop?
Thanks,
Nick

From http://lxr.free-electrons.com/source/net/unix/af_unix.c:
441 static int unix_writable(const struct sock *sk)
442 {
443 return sk->sk_state != TCP_LISTEN &&
444 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445 }
I'm not sure what these buffers are that are being compared, but it looks obvious that the connected state of the socket is not being checked. So unless these buffers are modified when the socket becomes connected it would appear my unix socket will always be marked as writable and thus I can't use select() to determine when the non-blocking connect() has finished.
and based on this snippet from http://lxr.free-electrons.com/source/net/unix/af_unix.c:
1206 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1207 int addr_len, int flags)
.
.
.
1230 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
.
.
.
1271 if (unix_recvq_full(other)) {
1272 err = -EAGAIN;
1273 if (!timeo)
1274 goto out_unlock;
1275
1276 timeo = unix_wait_for_peer(other, timeo);
.
.
.
it appears setting the send timeout might be capable of timing out the connect. Which also matches the documentation for SO_SNDTIMEO at http://man7.org/linux/man-pages/man7/socket.7.html.
Thanks,
Nick

Your error handling on select() could use some cleanup. You don't really need to query SO_ERROR unless except_set is set. If select() returns > 0 then either write_set and/or except_set is set, and if except_set is not set then the connection was successful.
Try something more like this instead:
int domain_socket_send(const char* socket_name, unsigned char* buffer,
unsigned int length, unsigned int timeout)
{
struct sockaddr_un addr;
int fd;
int result;
// Create socket.
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd == -1)
return errno;
if (timeout != 0)
{
// Enabled non-blocking.
int flags = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
// Set socket name.
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
// Connect.
result = connect(fd, (struct sockaddr*) &addr, sizeof(addr));
if (result == -1)
{
// If some error then we're done.
if ((errno != EINPROGRESS) && (errno != EAGAIN))
goto done;
// Now select() to find out when connect() has finished.
fd_set write_set;
fd_set except_set;
FD_ZERO(&write_set);
FD_ZERO(&write_set);
FD_SET(fd, &write_set);
FD_SET(fd, &except_set);
struct timeval tv;
// Set timeout.
tv.tv_sec = timeout / 1000000;
tv.tv_usec = timeout % 1000000;
result = select(fd + 1, NULL, &write_set, &except_set, &tv);
if (result == -1)
{
goto done;
}
else if (result == 0)
{
result = -1;
errno = ETIMEDOUT;
goto done;
}
else if (FD_ISSET(fd, &except_set))
{
int socket_error;
socklen_t len = sizeof(socket_error);
// Get the result of the connect() call.
result = getsockopt(fd, SOL_SOCKET, SO_ERROR, &socket_error, &len);
if (result != -1)
{
result = -1;
errno = socket_error;
}
goto done;
}
else
{
// connected
}
}
// If we put the socket in non-blocking mode then put it back
// to blocking mode.
if (timeout != 0)
{
int flags = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
}
// Write buffer.
result = write(fd, buffer, length);
done:
if (result == -1)
result = errno;
else
result = 0;
if (fd != -1)
{
shutdown(fd, SHUT_RDWR);
close(fd);
}
return result;
}

Non-blocking connect() and EINTR

I am using connect_nonb() from Stevens, UNIX Network programming:
int
connect_nonb(int sockfd, const SA *saptr, socklen_t salen, int nsec)
{
int flags, n, error;
socklen_t len;
fd_set rset, wset;
struct timeval tval;
flags = Fcntl(sockfd, F_GETFL, 0);
Fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
error = 0;
if ( (n = connect(sockfd, saptr, salen)) < 0)
if (errno != EINPROGRESS)
return(-1);
/* Do whatever we want while the connect is taking place. */
if (n == 0)
goto done; /* connect completed immediately */
FD_ZERO(&rset);
FD_SET(sockfd, &rset);
wset = rset;
tval.tv_sec = nsec;
tval.tv_usec = 0;
if ( (n = Select(sockfd+1, &rset, &wset, NULL,
nsec ? &tval : NULL)) == 0) {
close(sockfd); /* timeout */
errno = ETIMEDOUT;
return(-1);
}
if (FD_ISSET(sockfd, &rset) || FD_ISSET(sockfd, &wset)) {
len = sizeof(error);
if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &error, &len) < 0)
return(-1); /* Solaris pending error */
} else
err_quit("select error: sockfd not set");
done:
Fcntl(sockfd, F_SETFL, flags); /* restore file status flags */
if (error) {
close(sockfd); /* just in case */
errno = error;
return(-1);
}
return(0);
}
This function allows a custom timeout of connect(). If, whilst blocking in select() waiting for the connect to succeed, a signal is received, select() exits with -1 (EINTR). At this point the select() timeout has not expired, the connect has not succeeded (i.e. the target host could be disconnected) but the subsequent getsockopt() does not return an error.
Should getsockopt() return an error or should the Stevens code check the return code (and errno) of select()?
Currently when connecting to a non-existent host and a signal interrupts select() this function returns success incorrectly.

I'm not sure what Select() is. I assume it's some kind of thin wrapper around select().
In most applications, whenever select() fails with EINTR, you should silently loop and call select() again, possibly after recalculating the timeout to account for the fact that some time has elapsed in the prior call to select().
This case is no exception. select() should be in a loop.

Block Socket with Unix and C/C++ Help

I'm trying to figure out what is blocking my program. I'm running a server that uses POSIX threads. I have to for my computer programming lab. The main function listens for new connections. Once it accepts a connection, it creates a new thread by passing the FD to the thread. I'm able to successfully connect to the server using multiple telnet/client connections. I can send data to the server successfully once, but if I try sending again the server won't do anything.
Part of the main function
int active_thread = 0;
//The Running loop
while(running)
{
if(active_thread > NUMBTHREADS)
{
printf("Unable to accept client connection! Threads are all used up");
running = false;
}
else
{
if(FD_ISSET(sockfd, &readfds))
{
if((bindfd[active_thread] = accept(sockfd, (struct sockaddr *) &client_addr, &client_sock_size)) == -1)
{
fprintf(stderr, "Unable to accept client \n");
perror("What");
break;
}
activethreads[active_thread] = pthread_create( &threads[active_thread], NULL, server_handler, (void*) &bindfd[active_thread]);
//close(bindfd[active_thread]);
//pthread_join( threads[active_thread], NULL);
active_thread++;
//running = false;
}
}
}
close(sockfd);
return 0;
}
Part of the POSIX THREAD CODE
void *server_handler( void *sockfd)
{
int bindfd = *( (int *) sockfd);
char buffer[MESSAGELENGTH];
bool running = true;
printf("Thread was created successfully\n");
char intro[] = "Successfully Connected to server!\n";
struct pollfd pfd;
pfd.fd = bindfd;
pfd.events = POLLIN;
if ( (send(bindfd, intro, strlen(intro), 0)) < 0)
{
perror("Unable to send");
}
while(running){
char msg[] = "\nYou have the following options!\n1) Insert an integer: insert <integer>\n2) Remove An Integer: remove <integer>\n3) Get number of integers in list: get_count\n4) Get first integer: get_first\n5) Get last integer: get_last\n6) Quit program: quit\n ";
if ( (send(bindfd, msg, strlen(msg), 0)) < 0)
{
perror("Unable to send");
}
memset(&buffer, 0, MESSAGELENGTH);
if (recv(bindfd, buffer, MESSAGELENGTH, 0) > 0)
{
//SOme other code
}
}
I think its blocking at either the accept or recv. I've heard of select() and various other methods, but I'm having difficulty trying to implement them. Thanks!

The root cause of your issue appears to be that you are unconditionally executing close(sockfd); return 0; at the bottom of your while (running) loop, which means that the loop only ever executes once.
Additionally, you should not be using FD_ISSET() unless you are also using select(). Your main loop should look something more like:
int active_thread = 0;
while (active_thread < NUMBTHREADS)
{
if((bindfd[active_thread] = accept(sockfd, (struct sockaddr *) &client_addr, &client_sock_size)) == -1)
{
fprintf(stderr, "Unable to accept client \n");
perror("What");
break;
}
activethreads[active_thread] = pthread_create( &threads[active_thread], NULL, server_handler, (void*) &bindfd[active_thread]);
active_thread++;
}
if (active_thread >= NUMBTHREADS)
{
printf("Unable to accept client connection! Threads are all used up.\n");
}
running = false;
close(sockfd);
return 0;

By default network sockets are blocking. You need to set the O_NONBLOCK flag on the socket.
if(fcntl(fd, F_GETFL, &flags) < 0 ||
fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0)
perror("Failed to set socket as non-blocking");
Now, instead of blocking when there is no input (or buffer space to store output), the error EAGAIN (or EWOUDLBLOCK) is returned. Lastly, you will need to use select() or poll() when you have nothing else to do but wait on I/O. These calls will only wake the process when either there is input, room for output, or possibly a time-out period passes.
int maxFd;
fdset fds;
FD_ZERO(&fds);
FD_SET(listenFd, &fds);
FD_SET(sockFd1, &fds);
FD_SET(sockFd2, &fds);
maxFd = listenFd+1;
maxFd = sockFd1 > maxFd ? sockFd1+1 : maxFd;
maxFd = sockFd2 > maxFd ? sockFd2+1 : maxFd;
if(select(maxFd, &fds, &fds, &fds, NULL) < 0) {
perror("Failed on select()");
exit(1);
}
if(FD_ISSET(listenFd, &fds))
...
This example is not complete or neccessarily 100% correct, but should be a good start. Also, I tend to reserve using send*() and recv*() when dealing with SOCK_DGRAM sockets and just use read(), write() on SOCK_STREAM sockets.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

set connect() timeout on unix domain socket - c

Related

select() returns 0 with an error Operation now in progress after connect()

close() does not close tcp connection if interface lost its ip address

Can't seem to get a timeout working when connecting to a socket

Non-blocking connect() and EINTR

Block Socket with Unix and C/C++ Help

Categories

Resources