esp32 idf multi-socket-server - c

It's my first post so ask for remotely anything if it can help and I didn't provide it.
My application requires multiple sockets being opened at once from Master, then the slaves connect to WiFi, and then to the sockets
Problem is: I have to make it "bulletproof" against constant reconnecting from slaves and i get Accept error:
E (23817) TCP SOCKET: accept error: -1 Too many open files in system
It appears when I reconnect client for 5th time, when Max Number of Open Sockets = 5 in menuconfig,
I disconnect clients from the server when they don't send anything in 1second -> then i assume they got DC-d.
I do it with close() procedure.
void closeOvertimedTask(void * ignore)
{
while(1)
{
for(int i = 0; i < openedSockets;)
{
if(needsRestart[i] == 1)
{
ESP_LOGI("RESTARTING", " task#%d",i);
//lwip_close_r(clientSock[i]);
//closesocket(clientSock[i]);
//ESP_LOGI("closing result", "%d", close(clientSock[i]));
stopSocketHandler(i);
needsRestart[i] = 0;
//if(isSocketOpened[i])
{
}
ESP_LOGI("close", "%d", lwip_close_r(clientSock[i]));
isSocketOpened[i] = 0;
xTaskCreate( handleNthSocket, "TCP_HANDLER", 10*1024, &(sockNums[i]) , tskIDLE_PRIORITY, &socketHandlerHandle[i]);
configASSERT(socketHandlerHandle[i]);
needsRestart[i] = 0;
}
if(isSocketOpened[i])
{
int diff = ((int)((uint64_t)esp_timer_get_time()) - lastWDT[i]) - 2*TCPWDT;
if(diff > 0)
{
if(isSocketOpened[i])
{
ESP_LOGI("I FOUND OUT HE DC-d","");
//closesocket(clientSock[i]);
}
ESP_LOGI("close", "%d", close(clientSock[i]));
stopSocketHandler(i);
isSocketOpened[i] = 0;
xTaskCreate( handleNthSocket, "TCP_HANDLER", 10*1024, &(sockNums[i]) , tskIDLE_PRIORITY, &socketHandlerHandle[i]);
configASSERT(socketHandlerHandle[i]);
}
}
}
}
}
For each socket I run 1 task that is supposed to receive from that socket and act further.
For all of them I have an other task that checks last time a message arrived and restarts tasks when time has exceeded (it's 2 seconds)
I need around 16 sockets opened in the final version so there is no room to have sockets that are still closing after Slave has restarted whole connection
How to properly close a Task with running recv() procedure in it to properly close Socket.
Is there a way to read from Server side that socket has been closed if WiFi hasn't realized STA DC-d
Is this about TIME_WAIT from tcp stack ?
Socket read code:
void handleNthSocket(void * param) // 0 <= whichSocket < openedSockets
{
int whichSocket = *((int *) param);
ESP_LOGI("TCP SOCKET", "%s #%d", getSpaces(whichSocket), whichSocket);
struct sockaddr_in clientAddress;
while (1)
{
if(needsRestart [whichSocket] == 0)
{
socklen_t clientAddressLength = sizeof(clientAddress);
clientSock[whichSocket] = accept(sock[whichSocket], (struct sockaddr *)&clientAddress, &clientAddressLength);
if (clientSock[whichSocket] < 0)
{
ESP_LOGE("TCP SOCKET", "accept error: %d %s", clientSock[whichSocket], strerror(errno)); //HERE IT FLIPS
//E (232189) TCP SOCKET: accept error: -1 Too many open files in system
isSocketOpened[whichSocket] = 0;
needsRestart[whichSocket] = 1;
continue;
}
//isSocketOpened[whichSocket] = 1;
// We now have a new client ...
int total = 1000;
char dataNP[1000];
char *data;
data = &dataNP[0];
for(int z = 0; z < total; z++)
{
dataNP[z] = 0;
}
ESP_LOGI("TCP SOCKET", "%snew client",getSpaces(whichSocket));
ESP_LOGI(" ", "%s#%d connected",getSpaces(whichSocket), whichSocket);
lastWDT[whichSocket] = (uint64_t)esp_timer_get_time() + 1000000;
isSocketOpened[whichSocket] = 1;
// Loop reading data.
while(isSocketOpened[whichSocket])
{
/*
if (sizeRead < 0)
{
ESP_LOGE(tag, "recv: %d %s", sizeRead, strerror(errno));
goto END;
}
if (sizeRead == 0)
{
break;
}
sizeUsed += sizeRead;
*/
ssize_t sizeRead = recv(clientSock[whichSocket], data, total, 0);
/*for (int k = 0; k < sizeRead; k++)
{
if(*(data+k) == '\n')
{
ESP_LOGI("TCP DATA ", "%sthere was enter", getSpaces(whichSocket));
//ESP_LOGI("TIME ", "%d", (int)esp_timer_get_time());
}
//ESP_LOGI("last wdt", "%d", (int)lastWDT[whichSocket]);
}*/
lastWDT[whichSocket] = (uint64_t)esp_timer_get_time();
int diff = ((int)((uint64_t)esp_timer_get_time()) - lastWDT[whichSocket]) - 2*TCPWDT;
ESP_LOGI("last wdt", "%d, data = %s", (int)lastWDT[whichSocket], data);
if(diff > 0)
{
ESP_LOGI("last wdt", "too long - %d", diff);
isSocketOpened[whichSocket] = 0;
}
if (sizeRead < 0)
{
isSocketOpened[whichSocket] = 0;
}
//TODO: all RX from slave routine
for(int k = 0; k < sizeRead; k++)
{
*(data+k) = 0;
}
// ESP_LOGI("lol data", "clientSock[whichSocket]=%d,
/*if(sizeRead > -1)
{
ESP_LOGI("TCP DATA: ", "%c", *(data + sizeRead-1));
}
else
{
ESP_LOGI("TCP DC ", "");
goto END;
}*/
}
if(isSocketOpened[whichSocket])
{
ESP_LOGI("closing result", "%d", close(clientSock[whichSocket]));
}
}
}
}

I don't see you closing your sockets anywhere?
Sockets, no matter the platform, is usually a limited resource, and a resource that will be reused. If you don't close the sockets then the system will think that you still use then, and can't reuse those sockets for new connections (and on POSIX systems even opening files will be affected).
So close connections immediately when they are not needed any more.
Usually this is done by checking what recv and send returns: If they return a value less than zero an error occured and in most cases it's a non-recoverable errors, so connection should be closed. Even if it is a recoverable error, it's easier to close the connection and let the client reconnect.
For recv there's also the special case when it returns zero. That means the other end has closed the connection. That of course you need to close your end as well.

this post solved all my problems
https://www.esp32.com/viewtopic.php?t=911

Related

Chat room using socket programming with select() - winsock - C

I try to create a server-client application where the server provides a chat service to all clients that connect to the server. The server and client use cryptographic algorithms and protocols to secure data transmitted over the network. I can't figure out why the chat code isn't working properly.
I use the select() function to operate multiple drawers at the same time. If I use only a piece of code when multiple clients connect to the server and send data to the server and it gets everything, that's fine, but as soon as I try to write a piece of code that would be a chat function, even if multiple clients connect, the server serves only the last connected client. I use a link dynamic list to store the necessary client information, and when I can list currently connected clients, if I don't use part of the chat room code, all clients I connect will be accepted, and as soon as I use the chat room code part, only the last connected client.
This is code for server:
while(1) {
fd_set reads;
reads = master;
//The select function determines the status of one or more sockets, waiting if necessary, to perform synchronous I/O
if (select(max_socket+1, &reads, 0, 0, 0) < 0) {
fprintf(stderr, "select() failed. (%d)\n", GETSOCKETERRNO());
return 1;
}
SOCKET i;
//Loop through each possible socket
for(i = 1; i <= max_socket; ++i) {
if (FD_ISSET(i, &reads)) {
//If socket_listen, create TCP connection of accept() function
if (i == socket_listen) {
//
client_info = create_client();
client_info->client_len = sizeof(client_info->client_address);
client_info->sock_fd = accept(socket_listen,
(struct sockaddr*) &client_info->client_address,
&client_info->client_len);
if (!ISVALIDSOCKET(client_info->sock_fd)) {
fprintf(stderr, "accept() failed. (%d)\n",
GETSOCKETERRNO());
return 1;
}
FD_SET(client_info->sock_fd, &master);
if (client_info->sock_fd > max_socket)
max_socket = client_info->sock_fd;
//Prints the client address using the getnameinfo() function
getnameinfo((struct sockaddr*)&client_info->client_address,
client_info->client_len,
client_info->address_buffer,
100, 0, 0,
NI_NUMERICHOST);
printf("New connection %s\n", client_info->address_buffer);
printf("\nWaiting for succeses Salt handshake...\n");
//Salt handshake
salt_hndshk(client_info);
//Insert client to the list of clients
insert(p_list, client_info);
//List of clients connected to the server with a successful Salt handshake
listing_clients(p_list);
} else {
memset(rx_buffer, 0, sizeof(hndsk_buffer));
//Search for clients by sockets and the is in the list
//the server decrypts the data from the client
CLIENT *client_decrypt = create_client();
client_decrypt = search_client(p_list, i);
ret_msg = salt_read_begin_pom(&client_decrypt->channel, rx_buffer,
sizeof(rx_buffer), &msg_in, pom_buffer, &decrypt_size);
//Check if SALT_ERROR from message
if(ret_msg == SALT_ERROR) {
printf("\tThe client disconnects from the server.\n");
printf("\tThe server has closed him socket\n");
realese_client(p_list, client_decrypt);
FD_CLR(i, &master);
CLOSESOCKET(i);
continue;
}
//Freeing client memory
free(client_decrypt);
}
//Chat room service
SOCKET j;
for(j = 1; j <= max_socket; ++j){
if(FD_ISSET(j, &master)){
if (j == socket_listen || j == i){
continue;
} else {
memset(rx_buffer, 0, sizeof(hndsk_buffer));
//Search for clients by sockets and the is in the list
CLIENT *client_encrypt = create_client();
client_encrypt = search_client(p_list, j);
//Prepare data before send
salt_write_begin(tx_buffer, sizeof(tx_buffer), &msg_out);
//Copy clear text message to be encrypted to next encrypted package
salt_write_next(&msg_out, (uint8_t * )pom_buffer, decrypt_size);
//Wrapping, creating encrpted messages
salt_write_execute(&client_encrypt->channel, &msg_out, false);
//Freeing client memory
free(client_encrypt);
}
} //if(FD_ISSET(j, &master)
} //for(j = 1; j <= max_socket; ++j)
//Finish chat room service
} //if FD_ISSET
} //for i to max_socket
}
There is a link to the application on this link:
tcp_salt
You have logic errors in both of your inner for loops.
When reading from/writing to a non-listening client socket, DO NOT call create_client() at all, you are creating memory leaks with it:
CLIENT *client_decrypt = create_client();
client_decrypt = search_client(...); // <-- LEAK!
CLIENT *client_encrypt = create_client();
client_encrypt = search_client(...); // <-- LEAK!
Call create_client() ONLY when you accept() a new client. And DO NOT call free() on any CLIENT you read from/write to. Call that ONLY when you are removing a CLIENT from p_list.
You are corrupting your p_list on each loop iteration, leaving it with a bunch of dangling pointers to invalid CLIENTs.
Also, your writing code is not checking for errors to disconnect and remove dead clients.
Try something more like this:
while(1) {
fd_set reads;
reads = master;
//The select function determines the status of one or more sockets, waiting if necessary, to perform synchronous I/O
if (select(max_socket+1, &reads, 0, 0, 0) < 0) {
fprintf(stderr, "select() failed. (%d)\n", GETSOCKETERRNO());
return 1;
}
//Loop through each possible socket
for(SOCKET i = 1; i <= max_socket; ++i) {
if (!FD_ISSET(i, &master)) {
continue;
}
if (FD_ISSET(i, &reads)) {
//If socket_listen, create TCP connection of accept() function
if (i == socket_listen) {
//
CLIENT *client_info = create_client();
client_info->client_len = sizeof(client_info->client_address);
client_info->sock_fd = accept(socket_listen,
(struct sockaddr*) &client_info->client_address,
&client_info->client_len);
if (!ISVALIDSOCKET(client_info->sock_fd)) {
fprintf(stderr, "accept() failed. (%d)\n",
GETSOCKETERRNO());
return 1;
}
FD_SET(client_info->sock_fd, &master);
if (client_info->sock_fd > max_socket)
max_socket = client_info->sock_fd;
//Prints the client address using the getnameinfo() function
getnameinfo((struct sockaddr*)&client_info->client_address,
client_info->client_len,
client_info->address_buffer,
100, 0, 0,
NI_NUMERICHOST);
printf("New connection %s\n", client_info->address_buffer);
printf("\nWaiting for succesful Salt handshake...\n");
//Salt handshake
salt_hndshk(client_info);
//Insert client to the list of clients
insert(p_list, client_info);
//List of clients connected to the server with a successful Salt handshake
listing_clients(p_list);
continue;
}
memset(rx_buffer, 0, sizeof(rx_buffer));
//Search for clients by sockets and the is in the list
//the server decrypts the data from the client
CLIENT *client_decrypt = search_client(p_list, i);
ret_msg = salt_read_begin_pom(&client_decrypt->channel, rx_buffer,
sizeof(rx_buffer), &msg_in, pom_buffer, &decrypt_size);
//Check if SALT_ERROR from message
if (ret_msg == SALT_ERROR) {
printf("\tThe client disconnects from the server.\n");
printf("\tThe server has closed his socket\n");
release_client(p_list, client_decrypt);
free(client_decrypt);
CLOSESOCKET(i);
FD_CLR(i, &master);
continue;
}
//Chat room service
for(SOCKET j = 1; j <= max_socket; ++j){
if (!FD_ISSET(j, &master) || j == socket_listen || j == i){
continue;
}
memset(rx_buffer, 0, sizeof(rx_buffer));
//Search for clients by sockets and the is in the list
CLIENT *client_encrypt = search_client(p_list, j);
//Prepare data before send
ret_msg = salt_write_begin(tx_buffer, sizeof(tx_buffer), &msg_out);
//Copy clear text message to be encrypted to next encrypted package
if (ret_msg != SALT_ERROR)
ret_msg = salt_write_next(&msg_out, (uint8_t * )pom_buffer, decrypt_size);
//Wrapping, creating encrpted messages
if (ret_msg != SALT_ERROR
ret_msg = salt_write_execute(&client_encrypt->channel, &msg_out, false);
//Check if SALT_ERROR from message
if (ret_msg == SALT_ERROR) {
printf("\tThe client disconnects from the server.\n");
printf("\tThe server has closed his socket\n");
release_client(p_list, client_decrypt);
free(client_decrypt);
CLOSESOCKET(i);
FD_CLR(i, &master);
continue;
}
}
}
}
}

TCP Socket Multiplexing Send Large Data

Got some trouble with TCP socket multiplexing.
//socket is non-blocking
const int MAX = 4096;
char *buff[MAX];
char *p = buff;
int fd, rvalue;
rvalue = 0;
if ( (fd = open(path, O_RDONLY)) < 0 ) {
return errno;
} else {
int didsend, didread;
int shouldsend;
while ((didread = read(fd, buff, MAX)) > 0) {
p = buff;
shouldsend = didread;
while ( 1 ) {
didsend = send(sockfd, p, shouldsend, 0);
//if send succeeds and returns the number of bytes fewer than asked for then try to send rest part in next time.
if (didsend < shouldsend) {
p += didsent;
shouldsend -= didsend;
continue;
}
//if there is no place for new data to send, then wait a brief time and try again.
if ( didsend < 0 && (errno == EWOULDBLOCK || errno == EAGAIN) ) {
usleep(1000);
continue;
}
//if all data has been sent then sending loop is over.
if (didsend == shouldsend) {
break;
}
//send error
if ( didsend < 0 ) {
rvalue = errno;
break;
}
}
}
close(fd);
if (didread == -1) {
return errno;
}
return rvalue;
}
Assume I use an I/O Multiplexing function poll() or kqueue(), and non-blocking socket, then if there are only some small data like send a short message, it works fine.
But if it comes to large data, I mean larger than send()'s buffer size, since using non-blocking socket, send() will just send a portion of data, and return how much data it sends, the rest part of data can only be sent in another call of send(), but it takes time, and can't tell how long it will takes. So the second while() is actually a blocking send which using non-blocking socket.
Equivalent to:
//socket is blocking
const int MAX = 4096;
char *buff[MAX];
int fd, n;
if ( (fd = open(path, O_RDONLY)) < 0 ) {
return errno;
} else {
while ((n = read(fd, buff, MAX)) > 0) {
if (send(sockfd, buff, n, 0) < 0) {
return errno;
}
}
close(fd);
return 0;
}
So, what is the solution to this, multithreading might work but that's kind of wasting resource maybe.
This is the general pattern for a single-threaded server that works with multiple connections and non-blocking sockets.
It's primarily pseudo-code in C and doesn't do the necessary error checking. But it gives you an idea that for each accepted connection, you keep a struct instance that maintains the socket handle, request parsing state, response stream, and any other "state" members of that connection. Then you just loop using "select" to wait or having multiple threads doing this same thing.
Again this is only pseudo-code and uses select/poll as an example. You can get even more scalability with epoll.
while (1)
{
fd_set readset = {};
fd_set writeset = {};
for (int i = 0; i < number_of_client_connections; i++)
{
if (client_connections[i].reading_request)
FD_SET(client_connection.sock, &readset);
else
FD_SET(client_connection.sock, &writeset);
}
// add the listen socket to the read set
FD_SET(listen_socket, &readset);
select(n + 1, &readset, &writeset, &timeout); // wait for a socket to be ready (not shown - check for errors and return value)
if (FD_ISSET(listen_socket, &readset))
{
int new_client_socket = accept(listen_socket, &addr, &addrlength);
// create a struct that keeps track of the connection state data
struct ConnectionData client_connection = {};
client_connection.sock = new_client_socket;
client_connection.reading_request = 1; // awaiting for all the request bytes to come in
client_connections[number_of_client_connections++] = client_connection; // pseudo code, add the client_connection to the list
}
for (int i = 0; i < number_of_client_connections; i++)
{
if (client_connections[i].reading_request)
{
if (FD_ISSET(client_connections[i], &readset))
{
char buffer[2000];
int len = recv(client_connections[i].sock, buffer, 2000, 0);
// not shown - handle error case when (recv < 0)
// not shown - handle case when (recv == 0)
ProcessIncomingData(client_connections[i], buffer, len); // do all the request parsing here. Flip the client_connections[i].reading_request to 0 if ready to respond
}
}
else if (client_connections[i].reading_request == 0)
{
if (FD_ISSET(client_connections[i], &writeset))
{
client_connection* conn = &client_connections[i];
int len = send(conn->sock, conn->response_buffer + conn->txCount, conn->response_size - conn->txCount, 0);
conn->txCount += len;
if (conn->txCount == conn->response_size)
{
// done sending response - we can close this connection or change it to back to the reading state
}
}
}
}

client messages don't arrive to server in tcp winsock

I have server client application.
When I'm sending messages in a row(without scanf in the code below), it's seems the server doesn't get them(doesn't print).
if I wait a little bit(with the scanf in the code below) and then send the next message the server works fine and prints all messages.
what's the problem?
how can I fix it, cause I want to do more with the message(not just to print it) that arrived to the server.
in my client code(where server prints nothing)
char message[(100)] = {0};
int x = rand();
while(i < 3)
{
printf(" I send %d\n", x);fflush(NULL);
sprintf(message, "%d",x);
if( send(mainSockfd, message,strlen(message),0) == -1)
{
printf("ERRRRRORRRR\n");fflush(NULL);
}
i++;
x = rand() % 100;
}
in my client code(when server prints the messages)
char message[(100)] = {0};
int x = rand();
while(i < 3)
{
printf(" I send %d\n", x);fflush(NULL);
sprintf(message, "%d",x);
if( send(mainSockfd, message,strlen(message),0) == -1)
{
printf("ERRRRRORRRR\n");fflush(NULL);
}
i++;
x = rand() % 100;
scanf("%d",&x); // this is the only change
}
in my server code
char command[(100+1)] = {0};
while(1)
{
readLength = recv(sockfd, command, 100+1,0);
if(readLength > 0)
{
printf("arrived = %s,\n",command);fflush(NULL);
ZeroMemory(command, sizeof(command));
}
else if( readLength == 0)
{
break;
}
else if ( readLength < 0 ){
if(GetLastError() == 10035)
{
continue;
}
if(GetLastError() == 10057 || GetLastError() == 10054)
{
break;
}
continue;
}
}
As you seem to be transferring 0-terminated "strings" without the 0 termination, you should read one char less then the read buffer provides to always have the read buffer being 0-terminated, as if you try to printf a non 0-terminated "string" you provoke undefined behaviour.
So change this
readLength = recv(sockfd, command, 100+1,0);
to become this
readLength = recv(sockfd, command, 100,0);

Why does the server enter an infinite loop while closing client side connection

I am trying to send data through a Tcp connection using C.
I am able to send data properly , but when I close the client side application (CTRL-C), the loop in the server side runs infinitely.
Can anyone explain me what I am doing wrong ? What can I do to prevent it?
//Server-Side code.
while (TRUE)
{
accepted_socket = accept(connection_socket, (struct sockaddr*)0, 0) ;
if(accepted_socket < 0 )
{
perror("accept function in main() ") ;
close(connection_socket) ;
exit(1) ;
}
do
{
int recieved_bytes = recv(accepted_socket, &buff,1, 0) ; // it will store the recieved characters inside the buff.
if(recieved_bytes < 0 )
{
perror("Error occurred ! Recieved bytes less than zero. in mainloop.") ;
}
printf("%c", buff) ;
}
while(buff!= ' ') ; // This loop runs infinitely.
}
//Client Side-Code
char c = 'c' ;
do
{
c = getchar() ;
if(send(*connection_socket, &c, 1, 0) < 1 )
{
if(errno == ECONNRESET)
{
fprintf(stderr, "Your message couldn't be sent, since connection was reset by the server.\n") ;
exit(1) ;
}
perror("Not all bytes sent in send() in main()") ;
}
}
Your server code runs in 2 loops: the outer one waits for more connections, and as soon as you have a connection, it goes on running.
There is currently no reason to terminate one of them. If you want to terminate the inner one, you should additionally check for the result value being == 0, meaning the end of the connection.
Even if you do
while (TRUE)
{
accepted_socket = accept(connection_socket, (struct sockaddr*)0, 0);
if (accepted_socket < 0)
{
perror("accept function in main() ");
close(connection_socket);
exit(1);
}
// here starts the loop for the accepted_socket:
do
{
int recieved_bytes = recv(accepted_socket, &buff,1, 0); // it will store the recieved characters inside the buff.
if(recieved_bytes < 0)
{
perror("recv");
}
size_t i;
for (i=0; i < received_bytes; i++) printf("%c", buff[i]);
} while(received_bytes != 0);
}
your outer loop goes on running.

SIGPIPE With Running Program

I have two daemons, and A is speaking to B. B is listening on a port, and A opens a tcp connection to that port. A is able to open a socket to B, but when it attempts to actually write said socket, I get a SIGPIPE, so I'm trying to figure out where B could be closing the open socket.
However, if I attach to both daemons in gdb, the SIGPIPE happens before any of the code for handling data is called. This kind of makes sense, because the initial write is never successful, and the listeners are triggered from receiving data. My question is - what could cause daemon B to close the socket before any data is sent? The socket is closed less than a microsecond after opening it, so I'm thinking it can't be a timeout or anything of the sort. I would love a laundry list of possibilities to track down, as I've been chewing on this one for a few days and I'm pretty much out of ideas.
As requested, here is the code that accepts and handles communication:
{
extern char *PAddrToString(pbs_net_t *);
int i;
int n;
time_t now;
fd_set *SelectSet = NULL;
int SelectSetSize = 0;
int MaxNumDescriptors = 0;
char id[] = "wait_request";
char tmpLine[1024];
struct timeval timeout;
long OrigState = 0;
if (SState != NULL)
OrigState = *SState;
timeout.tv_usec = 0;
timeout.tv_sec = waittime;
SelectSetSize = sizeof(char) * get_fdset_size();
SelectSet = (fd_set *)calloc(1,SelectSetSize);
pthread_mutex_lock(global_sock_read_mutex);
memcpy(SelectSet,GlobalSocketReadSet,SelectSetSize);
/* selset = readset;*/ /* readset is global */
MaxNumDescriptors = get_max_num_descriptors();
pthread_mutex_unlock(global_sock_read_mutex);
n = select(MaxNumDescriptors, SelectSet, (fd_set *)0, (fd_set *)0, &timeout);
if (n == -1)
{
if (errno == EINTR)
{
n = 0; /* interrupted, cycle around */
}
else
{
int i;
struct stat fbuf;
/* check all file descriptors to verify they are valid */
/* NOTE: selset may be modified by failed select() */
for (i = 0; i < MaxNumDescriptors; i++)
{
if (FD_ISSET(i, GlobalSocketReadSet) == 0)
continue;
if (fstat(i, &fbuf) == 0)
continue;
/* clean up SdList and bad sd... */
pthread_mutex_lock(global_sock_read_mutex);
FD_CLR(i, GlobalSocketReadSet);
pthread_mutex_unlock(global_sock_read_mutex);
} /* END for each socket in global read set */
free(SelectSet);
log_err(errno, id, "Unable to select sockets to read requests");
return(-1);
} /* END else (errno == EINTR) */
} /* END if (n == -1) */
for (i = 0; (i < max_connection) && (n != 0); i++)
{
pthread_mutex_lock(svr_conn[i].cn_mutex);
if (FD_ISSET(i, SelectSet))
{
/* this socket has data */
n--;
svr_conn[i].cn_lasttime = time(NULL);
if (svr_conn[i].cn_active != Idle)
{
void *(*func)(void *) = svr_conn[i].cn_func;
netcounter_incr();
pthread_mutex_unlock(svr_conn[i].cn_mutex);
func((void *)&i);
/* NOTE: breakout if state changed (probably received shutdown request) */
if ((SState != NULL) &&
(OrigState != *SState))
break;
}
else
{
pthread_mutex_lock(global_sock_read_mutex);
FD_CLR(i, GlobalSocketReadSet);
pthread_mutex_unlock(global_sock_read_mutex);
close_conn(i, TRUE);
pthread_mutex_unlock(svr_conn[i].cn_mutex);
pthread_mutex_lock(num_connections_mutex);
sprintf(tmpLine, "closed connections to fd %d - num_connections=%d (select bad socket)",
i,
num_connections);
pthread_mutex_unlock(num_connections_mutex);
log_err(-1, id, tmpLine);
}
}
else
pthread_mutex_unlock(svr_conn[i].cn_mutex);
} /* END for i */
/* NOTE: break out if shutdown request received */
if ((SState != NULL) && (OrigState != *SState))
return(0);
/* have any connections timed out ?? */
now = time((time_t *)0);
for (i = 0;i < max_connection;i++)
{
struct connection *cp;
pthread_mutex_lock(svr_conn[i].cn_mutex);
cp = &svr_conn[i];
if (cp->cn_active != FromClientDIS)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue;
}
if ((now - cp->cn_lasttime) <= PBS_NET_MAXCONNECTIDLE)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue;
}
if (cp->cn_authen & PBS_NET_CONN_NOTIMEOUT)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue; /* do not time-out this connection */
}
/* NOTE: add info about node associated with connection - NYI */
snprintf(tmpLine, sizeof(tmpLine), "connection %d to host %s has timed out after %d seconds - closing stale connection\n",
i,
PAddrToString(&cp->cn_addr),
PBS_NET_MAXCONNECTIDLE);
log_err(-1, "wait_request", tmpLine);
/* locate node associated with interface, mark node as down until node responds */
/* NYI */
close_conn(i, TRUE);
pthread_mutex_unlock(svr_conn[i].cn_mutex);
} /* END for (i) */
return(0);
}
NOTE: I didn't write this code.
Is it possible you messed up and somewhere else in the program you try to close the same handle twice?
That could do this to you very easily.
HINT: systrace can determine if this is happening.

Resources