This question is elaboration and continuation of my previous question - On epoll_pwait, POSIX timers and X11 events. Most of X11 events is either delayed or dropped. I work with XCB window that displays some graphical information. The application has an insteractive input and had more or less static display. Now the requirenments had changed and I need to add some pereodic computations and have a interactive input.
The problem with the new requirenments is that computations goes at high rate. With that, I get inconsistent interaction with the XCB window. Input may lag behind, or change the rate of processing.
The setup is multiplex events with epoll_pwait. The events are signals, X11 events and recently added timers/timeout.
What I understand as of now, I need to separate user interaction from the computations. The problem with my setup, as of now, is that rate of X11 events changes in a way I can't explain.
So I decide to separate waiting on X11 events with the rest of the logic. Can you suggest a proper way to do it? Will having a X11 window in a separate thread/process/epoll set help?
And the actual question, as I look upon it now, is, what is a frequency of epoll_wait wakeups? I plan to have one epoll_wait in a loop. Maybe some processes to be wait on. I understand that epoll_wait will "wakeup" at some random points int time.
Update
My setup is close to this one:
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <math.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/signalfd.h>
#include <sys/epoll.h>
#include <sys/timerfd.h>
#include <sys/socket.h>
#include <xcb/xcb.h>
static struct timespec tm_p, tm_c, tm_diff;
static unsigned long nevents = 0;
static inline void timespec_diff(struct timespec *a, struct timespec *b, struct timespec *res) {
res->tv_sec = a->tv_sec - b->tv_sec;
res->tv_nsec = a->tv_nsec - b->tv_nsec;
if (res->tv_nsec < 0) {
--res->tv_sec;
res->tv_nsec += 1000000000L;
}
}
static double compute(){
double t = 1.0;
double eps = 1e-7;
double eps_2 = eps / 2.0;
clock_gettime(CLOCK_MONOTONIC, &tm_p);
while(t > eps){
t -= eps;
t += eps;
t -= eps_2 + eps_2;
}
clock_gettime(CLOCK_MONOTONIC, &tm_c);
timespec_diff(&tm_c, &tm_p, &tm_diff);
printf(" compute: %ld %f\n", tm_diff.tv_sec, tm_diff.tv_nsec / 1e9);
return (int)t;
}
/* defines for epoll */
#define MAXEVENTS 64
#define SET_EV(_ev,_fd,_events) _ev.data.fd = _fd; _ev.events = _events
static int xcb_get_atom(xcb_connection_t *c,
const char *name,
xcb_atom_t *atom)
{
xcb_intern_atom_cookie_t cookie;
xcb_generic_error_t *error;
xcb_intern_atom_reply_t *reply;
cookie = xcb_intern_atom(c, 0, strlen(name), name);
reply = xcb_intern_atom_reply(c, cookie, &error);
if(NULL == reply){
free(error);
return -1;
}
*atom = reply->atom;
free(reply);
return 0;
}
static int xcb_change_prop_wm_close(xcb_connection_t *c,
xcb_window_t window,
xcb_atom_t wm_p,
xcb_atom_t atom)
{
xcb_void_cookie_t cookie;
xcb_generic_error_t *error;
xcb_atom_enum_t type = XCB_ATOM_ATOM;
uint8_t format = 32;
uint32_t data_len = 1;
cookie = xcb_change_property_checked(c, /* xcb connection */
XCB_PROP_MODE_REPLACE, /* mode */
window, /* window */
wm_p, /* the property to change */
type, /* type of the property */
format, /* format(bits) */
data_len, /* number of elements(see format) */
&atom /* property data */
);
error = xcb_request_check(c, cookie);
if (error) {
free(error);
return -1;
}
return 0;
}
int main()
{
xcb_connection_t *c;
xcb_screen_t *screen;
xcb_window_t win;
xcb_atom_t a_wm_p;
xcb_atom_t wm_p_close;
struct epoll_event ep_ev, *ep_evs = NULL;
struct signalfd_siginfo siginf;
sigset_t mask_sigs, mask_osigs;
int sig_fd = -1, x11_fd = -1, ep_fd = -1, tm_fd = -1;
/* set up signals */
if(sigemptyset(&mask_sigs) < 0){
perror(" * sigemptyset(&mask_sigs)");
goto main_terminate;
}
if(sigaddset(&mask_sigs, SIGINT)){ /* these signals will be blocked. the signals will arrive */
perror(" * sigaddset(&mask_sigs, SIGINT)");
goto main_terminate;
}
if(sigaddset(&mask_sigs, SIGQUIT)){ /* to epoll and not to a default signal handler. */
perror(" * sigaddset(&mask_sigs, SIGQUIT)");
goto main_terminate;
}
/* save old sigmask, replace it with new sigmask */
if(sigprocmask(SIG_BLOCK, &mask_sigs, &mask_osigs) < 0){
perror(" * sigprocmask(SIG_BLOCK, &mask_sigs, &mask_osigs)");
goto main_terminate;
}
/* get signal file descriptor */
if((sig_fd = signalfd(-1, &mask_sigs, 0)) < 0){
perror(" * signalfd(-1, &mask_sigs, 0)");
goto main_terminate;
}
/* set signal fd as non-blocking */
{
int on = 1;
if(ioctl(sig_fd, FIONBIO, (char *)&on) < 0){
perror(" * ioctl(sig_fd, FIONBIO)");
goto main_terminate;
}
}
/* Open the connection to the X server */
c = xcb_connect (NULL, NULL);
/* Get the first screen */
screen = xcb_setup_roots_iterator (xcb_get_setup (c)).data;
/* Ask for our window's Id */
win = xcb_generate_id(c);
/* Create the window */
{
unsigned int cw_mask = XCB_CW_BORDER_PIXEL
| XCB_CW_EVENT_MASK
;
/* values must follow in the incresing order of the cw_mask constants */
unsigned int cw_values[] = {screen->white_pixel,
XCB_EVENT_MASK_KEY_PRESS|XCB_EVENT_MASK_KEY_RELEASE
};
xcb_create_window (c, /* Connection */
XCB_COPY_FROM_PARENT, /* depth (same as root)*/
win, /* window Id */
screen->root, /* parent window */
0, 0, /* x, y */
150, 150, /* width, height */
10, /* border_width */
XCB_WINDOW_CLASS_INPUT_OUTPUT, /* class */
screen->root_visual, /* visual */
cw_mask, cw_values); /* masks */
}
/* get x11 connection file descriptor */
x11_fd = xcb_get_file_descriptor(c);
/* atom WM_PROTOCOLS */
if(xcb_get_atom(c, "WM_PROTOCOLS", &a_wm_p) < 0){
fprintf(stderr, " %s:%s:%d\n", __FILE__, __func__, __LINE__);
return -1;
}
/* atom window close */
if(xcb_get_atom(c, "WM_DELETE_WINDOW", &wm_p_close) < 0){
fprintf(stderr, " %s:%s:%d\n", __FILE__, __func__, __LINE__);
return -1;
}
{ /* wm prop: intercept close */
if(xcb_change_prop_wm_close(c, win, a_wm_p, wm_p_close) < 0){
fprintf(stderr, " %s:%s:%d\n", __FILE__, __func__, __LINE__);
goto main_terminate;
}
}
/* create epoll set file descriptor */
if((ep_fd = epoll_create(1)) < 0){
perror(" * epoll_create");
goto main_terminate;
}
/* allocate events for epoll queue */
if(NULL == (ep_evs = (struct epoll_event*)calloc(MAXEVENTS,sizeof(ep_ev)))){
perror(" * calloc(MAXEVENTS)");
goto main_terminate;
}
{ /* fd timer */
struct itimerspec ts;
if((tm_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) < 0){
perror(" * timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)");
goto main_terminate;
}
ts.it_value.tv_sec = 1;
ts.it_value.tv_nsec = 0;
ts.it_interval.tv_sec = 0;
ts.it_interval.tv_nsec = (unsigned long)10e6; /* 10 msec */
if(timerfd_settime(tm_fd, 0, &ts, NULL) < 0){
perror(" * timerfd_settime(tm_fd, 0, &ts, NULL)");
goto main_terminate;
}
}
/* add X11 event */
SET_EV(ep_ev,x11_fd,EPOLLIN);
if(epoll_ctl (ep_fd, EPOLL_CTL_ADD, x11_fd, &ep_ev) < 0){
perror(" * epoll_ctl x11_fd");
goto main_terminate;
}
/* add timer event */
SET_EV(ep_ev,tm_fd,EPOLLIN);
if(epoll_ctl (ep_fd, EPOLL_CTL_ADD, tm_fd, &ep_ev) < 0){
perror(" * epoll_ctl tm_fd");
goto main_terminate;
}
/* add signal event */
SET_EV(ep_ev,sig_fd,EPOLLIN);
if(epoll_ctl (ep_fd, EPOLL_CTL_ADD, sig_fd, &ep_ev) < 0){
perror(" * epoll_ctl sig_fd");
goto main_terminate;
}
/* window title */
const char *title = "epoll_pwait";
xcb_change_property (c,
XCB_PROP_MODE_REPLACE,
win,
XCB_ATOM_WM_NAME,
XCB_ATOM_STRING,
8,
strlen (title),
title );
/* Map the window on the screen */
xcb_map_window (c, win);
/* Make sure commands are sent before we pause, so window is shown */
xcb_flush (c);
while(1){
int n, i, fd, status;
bool f_compute = false;
bool f_exit_sig = false;
bool f_win_close = false;
n = epoll_pwait (ep_fd, ep_evs, MAXEVENTS, -1, &mask_sigs); /* wait, signal safe */
if(n < 0){
fprintf(stderr, " * main(): %s:%s:%d\n", __FILE__, __func__, __LINE__);
status = 1;
goto main_terminate;
}
for(i = 0; i < n; ++i){ /* service epoll events */
fd = ep_evs[i].data.fd;
if(fd == sig_fd){ /* signal */
status = read(fd, &siginf, sizeof(siginf));
if(status != sizeof(siginf)){
fprintf(stderr,"read != sizeof(siginf)");
goto main_terminate;
}
if(siginf.ssi_signo == SIGINT){
printf("got SIGINT\n");
f_exit_sig = true;
}else if(siginf.ssi_signo == SIGQUIT){
printf("got SIGQUIT\n");
f_exit_sig = true;
goto main_terminate;
}else {
printf("got unregistered signal\n");
}
}else if(fd == x11_fd){ /* x11 event */
xcb_generic_event_t *event;
while((event = xcb_poll_for_event(c))){
if (event && (event->response_type == 0)){ /* error recieved */
free(event);
goto main_terminate;
}
switch(event->response_type & ~0x80){
case XCB_CLIENT_MESSAGE: { /* window close */
xcb_client_message_event_t *ce = (xcb_client_message_event_t*)event;
if(ce->data.data32[0] == wm_p_close){ /* window should close */
f_win_close = true;
}
} break;
case XCB_KEY_PRESS: { /* keyboard key press */
printf("XCB_KEY_PRESS\n");
nevents++;
} break;
} /* end switch */
free(event);
} /* end while event loop */
}else if(fd == tm_fd){ /* timer event */
uint64_t overrun;
status = read(fd, &overrun, sizeof(uint64_t));
if(status != EAGAIN) {
//~ printf(" ! timer overruns: %lu\n", overrun);
}
f_compute = true;
}
} /* finish service epoll events */
if(f_exit_sig){ /* exit signal */
goto main_terminate;
}
if(f_win_close){ /* window close */
goto main_terminate;
}
if(f_compute){ /* do some computations */
compute();
xcb_flush(c);
}
} /* end while(1) */
main_terminate:
if(sig_fd != -1){
close(sig_fd);
}
if(tm_fd != -1){
close(tm_fd);
}
if(ep_fd != -1){
close(ep_fd);
}
if(ep_evs){
free(ep_evs);
}
xcb_disconnect(c);
if (sigprocmask(SIG_SETMASK, &mask_osigs, NULL) < 0){
perror(" * sigprocmask(SIG_SETMASK, &mask_osigs, NULL)");
}
printf("received %lu events\n", nevents);
return 0;
}
With the above, I can not reproduce input lag I do have in more verbose program. I test the above with xdotool, send some input X11 events, and my eye can't catch any visible input delay. All events get delivered. For now I can not post a full code that I have problem with.
what is a frequency of epoll_wait wakeups?
I'll assume you are talking about the precision of wakeups. This surely depends on kernel details, but let's just make an experiment: We can write a small program that tests how long epoll_wait takes. This will test both a timeout of zero and a timeout of 1 ms since a timeout of zero might be treated specially.
Output on my system:
empty: avg 0.015636 μs, min 0.014 μs, max 0.065 μs
sleep 1 ms: avg 1108.55 μs, min 1014.72 μs, max 1577.42 μs
epoll_wait 0: avg 0.761084 μs, min 0.38 μs, max 37.787 μs
epoll_wait 1: avg 1108.97 μs, min 1017.22 μs, max 2602.4 μs
Doing nothing (empty) measures less than a microsecond, so the following results should be somewhat reliable. The minimum time is also not zero, so the clock has enough precision for what we are doing.
Sleeping 1 ms sleeps at least 1.014 ms, but there also was a case of 1.5 ms. I guess that means that wakeups are not all that precise.
Using epoll_wait() to do nothing take less than a microsecond. More than doing nothing, but this still does basically nothing, so perhaps this really just measures the syscall overhead...?
Sleeping 1 ms with epoll_wait() behaves more or less the same as sleeping for 1 ms with nanosleep().
If you want to improve this experiment, you could actually register some FDs via epoll_ctl(). It might be that the kernel handles "empty" epoll FDs specially.
#include <stdio.h>
#include <sys/epoll.h>
#include <time.h>
#include <unistd.h>
#define MEASUREMENT_COUNT 10000
#define NUM_EVENTS 42
static int epoll_fd;
double diff_micro_sec(struct timespec *a, struct timespec *b) {
double sec = a->tv_sec - b->tv_sec;
double ns = a->tv_nsec - b->tv_nsec;
return sec * 1e6 + ns / 1e3;
}
static void empty(void) {
}
static void sleep_one_ms(void) {
struct timespec spec;
spec.tv_sec = 0;
spec.tv_nsec = 1000 * 1000;
nanosleep(&spec, NULL);
}
static void epoll_wait_0(void) {
struct epoll_event events[NUM_EVENTS];
epoll_wait(epoll_fd, events, NUM_EVENTS, 0);
}
static void epoll_wait_1(void) {
struct epoll_event events[NUM_EVENTS];
epoll_wait(epoll_fd, events, NUM_EVENTS, 1);
}
static void do_it(const char *name, void (*func)(void)) {
double sum, min, max;
struct timespec a, b;
for (int i = 0; i < MEASUREMENT_COUNT; i++) {
double diff;
clock_gettime(CLOCK_MONOTONIC, &a);
func();
clock_gettime(CLOCK_MONOTONIC, &b);
diff = diff_micro_sec(&b, &a);
if (i == 0) {
sum = diff;
min = diff;
max = diff;
} else {
sum += diff;
if (diff < min)
min = diff;
if (diff > max)
max = diff;
}
}
printf("%14s: avg %g μs, min %g μs, max %g μs\n", name, sum / MEASUREMENT_COUNT, min, max);
}
int main() {
do_it("empty", empty);
do_it("sleep 1 ms", sleep_one_ms);
epoll_fd = epoll_create(1);
do_it("epoll_wait 0", epoll_wait_0);
do_it("epoll_wait 1", epoll_wait_1);
close(epoll_fd);
return 0;
}
I have written some code that tries to call a function (called worker) every x seconds (in this example, I chose 1s as the interval time). The code is a minimal working example that in reality is way more complex than this.
The code works when it is this simple, but I stumble across errors when running the more complex version for a longer period of time. Thus, I want to increase the robustness of this code an would like to get some ideas on how to do that.
Basically, the worker gets some data, processes it an writes it to a file. I open the file during every call to the worker. In the tests, after some time I get an error that the file cannot be opened anymore. In this regard I also noticed that this happens (maybe just by chance) everytime the worker execution time exceeds the interval time. Reason for this is the getter function which pulls data from remote and this can take some time, depending on the network traffic.
I've been thinking of trying a multithreaded approach, but I am not sure if this is worth the hassle. I would be grateful for any pointers on how to do this in a more robust way.
#include <unistd.h>
#include <stdbool.h>
#include <stdio.h>
#include <signal.h>
#include <stdint.h>
#include <time.h>
#define ALARM_INTERVAL_SEC 1
#define ALARM_INTERVAL_USEC 0
static bool running = true;
static struct itimerval alarm_interval;
static struct timeval previous_time;
static uint64_t loop_count = 0;
static FILE* testfile;
static void
signal_handler(int signum)
{
if (signum == SIGINT || signum == SIGTERM)
{
running = false;
}
}
static void
worker(int signum)
{
// Reset the alarm interval
if(setitimer(ITIMER_REAL, &alarm_interval, NULL) < 0)
{
perror("Error: setitimer");
raise(SIGTERM);
return;
}
struct timeval current_time;
gettimeofday(¤t_time, NULL);
printf("Loop count: %lu\n", loop_count);
printf("Loop time: %f us\n\n", (current_time.tv_sec - previous_time.tv_sec) * 1e6 +
(current_time.tv_usec - previous_time.tv_usec));
previous_time = current_time;
// convert time to human-readable format
char tmbuf[64];
char buf[64];
time_t nowtime = current_time.tv_sec;
struct tm *nowtm = localtime(&nowtime);
strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
snprintf(buf, sizeof(buf), "%s.%06ld", tmbuf, current_time.tv_usec);
sleep(0.5);
// DO STH
testfile = fopen("testfile.txt", "ab+");
if(testfile == NULL)
{
printf("Error: open testfile");
raise(SIGTERM);
return;
}
fprintf(testfile, "[%s] Loop count: %lu\n", buf, loop_count);
fclose(testfile);
loop_count++;
}
int
main(int argc, char* argv[])
{
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
signal(SIGALRM, worker);
// Set the alarm interval
alarm_interval.it_interval.tv_sec = 0;
alarm_interval.it_interval.tv_usec = 0;
alarm_interval.it_value.tv_sec = ALARM_INTERVAL_SEC;
alarm_interval.it_value.tv_usec = ALARM_INTERVAL_USEC;
if(setitimer(ITIMER_REAL, &alarm_interval, NULL) < 0)
{
perror("Error: setitimer");
return -1;
}
gettimeofday(&previous_time, NULL);
while(running)
{
sleep(1);
}
alarm_interval.it_value.tv_sec = 0;
alarm_interval.it_value.tv_usec = 0;
if(setitimer(ITIMER_REAL, &alarm_interval, NULL) < 0)
{
perror("Error: resetting itimer failed");
return -1;
}
return 0;
}
i have the following loop that is supposed to run at a little above 40Hz (24ms between each run). It works great for a couple of second to minutes, but then goes down to 20Hz for no apparent (at least not to me) reason.
I have tried running the code on a onion Omega 2 (that runs openwrt) and a ubuntu laptop with the same result. (the code went down to 25Hz on the omega2, and 20Hz on my laptop)
How can i force it to continue to run at the right speed? I have checked with an oscilloscope on the output, and the send_brk and send_dmx functions seem to run perfectly even when the program is throttled, so i don't think they are the problem.
double t1 = now();
double t2 = now();
while ( 1 ) {
// Attempt receive UDP data
int x = read(sock, &DMX[1], SIZE - 1);
t2 = now();
double dt = t2 - t1;
if(dt < 0.024){
usleep(500);
continue;
}
t1 = t2;
printf("Frame in %f %f/sec\n", dt, 1.0/dt);
if (x < 0) {
if(errno != EAGAIN){
fprintf(stderr, "Error reading from socket %s\n", strerror(errno));
}
}
send_brk();
send_dmx();
}
the functions called are here:
send_brk() sends a serial break that lasts 88us
void send_brk() {
ioctl(fd, TIOCSBRK);
double start = now();
while((now() - start) < 0.000088){
}
ioctl(fd, TIOCCBRK);
}
send_dmx() writes a byte buffer of lenght 512 to the serial port
void send_dmx() {
// setmode(TX);
int n = write(fd, DMX, SIZE);
if(n == -1) {
if (errno != EAGAIN) {
fprintf(stderr, "Error writing to serial %s\n", strerror(errno));
exit(1);
}
}
if (n != SIZE) {
printf("couldn't write full frame =(\n");
}
}
and now() returns the current time in second as a float.
double now() {
struct timeval tv;
gettimeofday(&tv, NULL);
double time = tv.tv_sec;
time = time + (tv.tv_usec / 1000000.0);
return time;
}
I'm trying to control a process using a timer. (Just trying to see how it works in order to implement it in another program. The purpose of that program is to call a function in a specific interval. But when I did that, it turned out that it was using too much CPU. I'm also trying to avoid using sleep.I got the timer to work, what it was implemented in a while loop, so it was hogging all of the CPU.
What I want is for the timer_handler to unlock the thread so that it can proceed with the rest of the program, and also for it to perform the same exact action at precise intervals.
This is the code that I have so far
int sem_init(sem_t *sem, int pshared, unsigned int value);
int sem_wait(sem_t *sem);
int sem_getvalue(sem_t *sem, int *sval);
int set_count = 0;
static int count = 0;
int timer_set();
sem_t sem;
struct inits{
struct sigaction sa;
struct itimerval timer;
}inits[1];
int timer_handler (int signum)
{
printf ("timer expired %d times\n", ++count);
if(sem_post(&sem)== -1){
write(STDERR_FILENO, "sem_post() failed \n", 18);
_exit(EXIT_FAILURE);
return 0;
}
else
return 0;
}
int timer_set(){
memset (&inits[0].sa, 0, sizeof (inits[0].sa));
inits[0].sa.sa_handler = timer_handler;
sigemptyset(&inits[0].sa.sa_mask);
inits[0].sa.sa_flags = 0;
printf("sigaction successfull: \n");
/* Configure the timer to expire after 1000 msec... */
inits[0].timer.it_value.tv_sec = 1;
inits[0].timer.it_value.tv_usec = 0;
/* ... and every 1000 msec after that. */
inits[0].timer.it_interval.tv_sec = 1;
inits[0].timer.it_interval.tv_usec = 0;
/* Start a virtual timer. It counts down whenever this process is
executing. */
if(setitimer (ITIMER_VIRTUAL, &inits[0].timer, NULL) == -1)
handle_error("timer set failed: ");
printf("timerset successful\n");
return 0;
}
int main ()
{
int s = 1;
int j;
int* sval;
printf("S value = %d\n", s);
// scanf("\n%d", &set_count);
if (sem_init(&sem, 1, 1) == -1)
handle_error("sem_init");
else{
printf("sem_init successful\n");
printf("sem_init has been set s = %d\n", s);
}
printf("j = %d\n", j=timer_set());
/* Do busy work. */
while (1)
{
sem_getvalue(&sem, sval);
if (errno==EINVAL)
handle_error("Error returned on sem_getvalue: ");
printf("entered while loop\n, S = %d\n", *sval);
while ((s= sem_wait (&sem))==0 && errno == EINTR)
{
printf("entered while loop, S = %d\n", *sval);
printf("entered sem while loop\n");
continue;
}
if (s == -1)
handle_error("Semaphore Wait failed\n");
else
printf("Semaphore Successful\n");
}
printf("thank you for using this timer. ADIOS!\n");
exit(1);
}
The problem is that it doesn't go through the second loop of while, and freezes.
Sorry I haven't yet figured out how to post proper code, so I apologize if it looks weird. .
Also, I've been researching this for a while now so I apologize if there's something that I have missed in my research which may or may not have answered my question.
Thanks in advance
I have two daemons, and A is speaking to B. B is listening on a port, and A opens a tcp connection to that port. A is able to open a socket to B, but when it attempts to actually write said socket, I get a SIGPIPE, so I'm trying to figure out where B could be closing the open socket.
However, if I attach to both daemons in gdb, the SIGPIPE happens before any of the code for handling data is called. This kind of makes sense, because the initial write is never successful, and the listeners are triggered from receiving data. My question is - what could cause daemon B to close the socket before any data is sent? The socket is closed less than a microsecond after opening it, so I'm thinking it can't be a timeout or anything of the sort. I would love a laundry list of possibilities to track down, as I've been chewing on this one for a few days and I'm pretty much out of ideas.
As requested, here is the code that accepts and handles communication:
{
extern char *PAddrToString(pbs_net_t *);
int i;
int n;
time_t now;
fd_set *SelectSet = NULL;
int SelectSetSize = 0;
int MaxNumDescriptors = 0;
char id[] = "wait_request";
char tmpLine[1024];
struct timeval timeout;
long OrigState = 0;
if (SState != NULL)
OrigState = *SState;
timeout.tv_usec = 0;
timeout.tv_sec = waittime;
SelectSetSize = sizeof(char) * get_fdset_size();
SelectSet = (fd_set *)calloc(1,SelectSetSize);
pthread_mutex_lock(global_sock_read_mutex);
memcpy(SelectSet,GlobalSocketReadSet,SelectSetSize);
/* selset = readset;*/ /* readset is global */
MaxNumDescriptors = get_max_num_descriptors();
pthread_mutex_unlock(global_sock_read_mutex);
n = select(MaxNumDescriptors, SelectSet, (fd_set *)0, (fd_set *)0, &timeout);
if (n == -1)
{
if (errno == EINTR)
{
n = 0; /* interrupted, cycle around */
}
else
{
int i;
struct stat fbuf;
/* check all file descriptors to verify they are valid */
/* NOTE: selset may be modified by failed select() */
for (i = 0; i < MaxNumDescriptors; i++)
{
if (FD_ISSET(i, GlobalSocketReadSet) == 0)
continue;
if (fstat(i, &fbuf) == 0)
continue;
/* clean up SdList and bad sd... */
pthread_mutex_lock(global_sock_read_mutex);
FD_CLR(i, GlobalSocketReadSet);
pthread_mutex_unlock(global_sock_read_mutex);
} /* END for each socket in global read set */
free(SelectSet);
log_err(errno, id, "Unable to select sockets to read requests");
return(-1);
} /* END else (errno == EINTR) */
} /* END if (n == -1) */
for (i = 0; (i < max_connection) && (n != 0); i++)
{
pthread_mutex_lock(svr_conn[i].cn_mutex);
if (FD_ISSET(i, SelectSet))
{
/* this socket has data */
n--;
svr_conn[i].cn_lasttime = time(NULL);
if (svr_conn[i].cn_active != Idle)
{
void *(*func)(void *) = svr_conn[i].cn_func;
netcounter_incr();
pthread_mutex_unlock(svr_conn[i].cn_mutex);
func((void *)&i);
/* NOTE: breakout if state changed (probably received shutdown request) */
if ((SState != NULL) &&
(OrigState != *SState))
break;
}
else
{
pthread_mutex_lock(global_sock_read_mutex);
FD_CLR(i, GlobalSocketReadSet);
pthread_mutex_unlock(global_sock_read_mutex);
close_conn(i, TRUE);
pthread_mutex_unlock(svr_conn[i].cn_mutex);
pthread_mutex_lock(num_connections_mutex);
sprintf(tmpLine, "closed connections to fd %d - num_connections=%d (select bad socket)",
i,
num_connections);
pthread_mutex_unlock(num_connections_mutex);
log_err(-1, id, tmpLine);
}
}
else
pthread_mutex_unlock(svr_conn[i].cn_mutex);
} /* END for i */
/* NOTE: break out if shutdown request received */
if ((SState != NULL) && (OrigState != *SState))
return(0);
/* have any connections timed out ?? */
now = time((time_t *)0);
for (i = 0;i < max_connection;i++)
{
struct connection *cp;
pthread_mutex_lock(svr_conn[i].cn_mutex);
cp = &svr_conn[i];
if (cp->cn_active != FromClientDIS)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue;
}
if ((now - cp->cn_lasttime) <= PBS_NET_MAXCONNECTIDLE)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue;
}
if (cp->cn_authen & PBS_NET_CONN_NOTIMEOUT)
{
pthread_mutex_unlock(svr_conn[i].cn_mutex);
continue; /* do not time-out this connection */
}
/* NOTE: add info about node associated with connection - NYI */
snprintf(tmpLine, sizeof(tmpLine), "connection %d to host %s has timed out after %d seconds - closing stale connection\n",
i,
PAddrToString(&cp->cn_addr),
PBS_NET_MAXCONNECTIDLE);
log_err(-1, "wait_request", tmpLine);
/* locate node associated with interface, mark node as down until node responds */
/* NYI */
close_conn(i, TRUE);
pthread_mutex_unlock(svr_conn[i].cn_mutex);
} /* END for (i) */
return(0);
}
NOTE: I didn't write this code.
Is it possible you messed up and somewhere else in the program you try to close the same handle twice?
That could do this to you very easily.
HINT: systrace can determine if this is happening.