How to manage two or more consumers via pthreads? - c

I have a generic problem I am looking to solve, where chunks of binary data sent from a standard input or regular file stream to an application, which in turn converts that binary data into text. Using threads, I want to process the text before piping it over to the next application, which modifies that text even further, and so on.
As a simple test case, I want to extract compressed data via gunzip. Specifically, I am looking at using gunzip -c - to extract chunks of binary data sent to it via its (reassigned) stdin file descriptor, and then pulling out chunks of text from its (reassigned) stdout file descriptor. I can then print these chunks of text to the real stdout or stderr (or do other stuff, later on).
(I realize that I can do gzip-based compression and extraction on the command line. My goal here is to use this test case to learn how to correctly pass around generic chunks of binary and text data between threads that either run that data through binaries, or process it further.)
In the case of my test program, I have set up three pthread_t threads:
produce_gzip_chunk_thread
consume_gzip_chunk_thread
consume_gunzip_chunk_thread
I pass each of these threads a shared data instance called thread_data, which contains a thread lock, two conditions, and some buffers and counter variables. I also include a set of file descriptors for a gunzip process opened with popen3():
typedef struct pthread_data pthread_data_t;
typedef struct popen3_desc popen3_desc_t;
struct pthread_data {
pthread_mutex_t in_lock;
pthread_cond_t in_cond;
pthread_cond_t out_cond;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes;
size_t n_in_bytes_written_to_gunzip;
size_t n_out_bytes_read_from_gunzip;
FILE *in_file_ptr;
boolean in_eof;
char in_line[LINE_LENGTH_VALUE];
popen3_desc_t *gunzip_ptr;
};
struct popen3_desc {
int in;
int out;
int err;
};
The produce_gzip_chunk_thread reads in a 1024-byte chunk of gzip-compressed bytes from a regular file called foo.gz.
These bytes are written to an unsigned char buffer called in_buf, which is part of the shared data struct I am passing to each thread:
void * produce_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> produce_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes = 0;
d->in_eof = kFalse;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
n_in_bytes = fread(in_buf, sizeof(in_buf[0]), sizeof(in_buf), d->in_file_ptr);
if (n_in_bytes > 0) {
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
memcpy(d->in_buf, in_buf, n_in_bytes);
d->n_in_bytes = n_in_bytes;
#ifdef DEBUG
fprintf(stderr, "Debug: ######## [%07zu] produced chunk\n", d->n_in_bytes);
#endif
pthread_cond_signal(&d->in_cond);
}
else if (feof(d->in_file_ptr) || ferror(d->in_file_ptr))
break;
}
d->in_eof = kTrue;
pthread_mutex_unlock(&d->in_lock);
pthread_cond_signal(&d->in_cond);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> produce_gzip_chunk()\n");
#endif
return NULL;
}
Once there is a positive number of bytes stored in n_bytes — that is, we have pulled data from our input gzip archive that needs to be processed with gunzip — this triggers a condition that permits the second thread consume_gzip_chunk_thread to operate:
void * consume_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_in_bytes_written_to_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes == 0 && !d->in_eof)
pthread_cond_wait(&d->in_cond, &d->in_lock);
if (d->n_in_bytes) {
#ifdef DEBUG
fprintf(stderr, "Debug: ........ [%07zu] processing chunk\n", d->n_in_bytes);
#endif
if (!d->gunzip_ptr) {
#ifdef DEBUG
fprintf(stderr, "Debug: * setting up gunzip ptr\n");
#endif
d->gunzip_ptr = malloc(sizeof(popen3_desc_t));
if (!d->gunzip_ptr) {
fprintf(stderr, "Error: Could not create gunzip file handle struct\n");
exit(EXIT_FAILURE);
}
popen3("gunzip -c -",
&(d->gunzip_ptr->in),
&(d->gunzip_ptr->out),
&(d->gunzip_ptr->err),
kTrue,
kTrue);
memset(d->in_line, 0, LINE_LENGTH_VALUE);
}
n_in_bytes_written_to_gunzip = (long) write(d->gunzip_ptr->in, d->in_buf, d->n_in_bytes);
#ifdef DEBUG
fprintf(stderr, "Debug: ................ wrote [%07ld] bytes into the gunzip process\n", n_in_bytes_written_to_gunzip);
#endif
if (n_in_bytes_written_to_gunzip > 0)
d->n_in_bytes_written_to_gunzip = n_in_bytes_written_to_gunzip;
d->n_in_bytes = 0;
pthread_cond_signal(&d->out_cond);
}
if (d->in_eof)
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gzip_chunk()\n");
#endif
return NULL;
}
When consuming the gzip data chunk, we use the write function to send n_bytes of in_buf to the gunzip process's input file descriptor. At the end, we send another thread signal, but this time to out_cond, so as to help reawaken consume_gunzip_chunk_thread, which reads from gunzip's output to do more work:
void * consume_gunzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gunzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_out_bytes_read_from_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
sleep(1);
n_out_bytes_read_from_gunzip = read(d->gunzip_ptr->out, d->in_line, LINE_LENGTH_VALUE);
#ifdef DEBUG
fprintf(stderr, "Debug: ------------------------ read [%07ld] bytes out from the gunzip process\n", n_out_bytes_read_from_gunzip);
fprintf(stderr, "Debug: ------------------------ gunzip output chunk:\n[%s]\n", d->in_line);
#endif
memset(d->in_line, 0, strlen(d->in_line));
if (n_out_bytes_read_from_gunzip > 0)
d->n_out_bytes_read_from_gunzip = n_out_bytes_read_from_gunzip;
d->n_in_bytes_written_to_gunzip = 0;
pthread_cond_signal(&d->in_cond);
}
if (d->in_eof && (d->n_in_bytes_written_to_gunzip == 0))
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gunzip_chunk()\n");
#endif
return NULL;
}
This attempts to read any available bytes from the gunzip process's output file descriptor. For debugging purposes, I just want to print them to stderr for now.
The problem I am facing is that I need to add a sleep(1) statement in consume_gunzip_chunk, before doing the read, in order to get things working properly.
Without this sleep(1) statement, my test program will usually output nothing — except once every 8-10 attempts, when the compressed data are extracted correctly.
Question - What am I doing wrong about my arrangement of conditions, such that the sleep(1) call is required to make the gzip-extraction work properly? In a production scenario, working with much larger input files, forcibly waiting a second every 1kB seems like a bad idea.
For reproducibility with the full source code, here are the two relevant files. Here is the header:
/*
* convert.h
*/
#ifndef CONVERT_H
#define CONVERT_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <getopt.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#define CB_VERSION "1.0"
#define LINE_LENGTH_VALUE 65536
#define BUF_LENGTH_VALUE 1024
#define POPEN3_READ 0
#define POPEN3_WRITE 1
typedef int boolean;
extern const boolean kTrue;
extern const boolean kFalse;
const boolean kTrue = 1;
const boolean kFalse = 0;
typedef enum {
kGzip,
kUnknown
} format_t;
typedef struct pthread_data pthread_data_t;
typedef struct popen3_desc popen3_desc_t;
struct pthread_data {
pthread_mutex_t in_lock;
pthread_cond_t in_cond;
pthread_cond_t out_cond;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes;
size_t n_in_bytes_written_to_gunzip;
size_t n_out_bytes_read_from_gunzip;
boolean in_eof;
FILE *in_file_ptr;
popen3_desc_t *gunzip_ptr;
char in_line[LINE_LENGTH_VALUE];
};
struct popen3_desc {
int in;
int out;
int err;
};
static const char *name = "convert";
static const char *version = CB_VERSION;
static const char *authors = "Alex Reynolds";
static const char *usage = "\n" \
"Usage: convert --input-format=str <input-file>\n" \
" Process Flags:\n\n" \
" --input-format=str | -f str Input format (str = [ gzip ]; required)\n" \
" --help | -h Show this usage message\n";
static struct convert_globals_t {
char *input_format_str;
format_t input_format;
char **filenames;
int num_filenames;
} convert_globals;
static struct option convert_client_long_options[] = {
{ "input-format", required_argument, NULL, 'f' },
{ "help", no_argument, NULL, 'h' },
{ NULL, no_argument, NULL, 0 }
};
static const char *convert_client_opt_string = "f:h?";
void * consume_gunzip_chunk (void *t_data);
void * consume_gzip_chunk (void *t_data);
void * produce_gzip_chunk (void *t_data);
FILE * new_file_ptr (const char *in_fn);
void delete_file_ptr (FILE **file_ptr);
pid_t popen3 (const char *command,
int *in_desc,
int *out_desc,
int *err_desc,
boolean nonblock_in,
boolean nonblock_outerr);
off_t fsize (const char *fn);
void initialize_globals ();
void parse_command_line_options (int argc,
char **argv);
void print_usage (FILE *stream);
#endif
Here is the implementation:
/*
* convert.c
*/
#include "convert.h"
int main(int argc, char **argv)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> main()\n");
#endif
pthread_t produce_gzip_chunk_thread = NULL;
pthread_t consume_gzip_chunk_thread = NULL;
pthread_t consume_gunzip_chunk_thread = NULL;
pthread_data_t *thread_data = NULL;
parse_command_line_options(argc, argv);
/* initialize thread data */
thread_data = malloc(sizeof(pthread_data_t));
thread_data->n_in_bytes = 0;
thread_data->n_in_bytes_written_to_gunzip = 0;
thread_data->n_out_bytes_read_from_gunzip = 0;
thread_data->in_eof = kFalse;
thread_data->in_file_ptr = new_file_ptr(convert_globals.filenames[0]);
pthread_mutex_init(&(thread_data->in_lock), NULL);
pthread_cond_init(&(thread_data->in_cond), NULL);
pthread_cond_init(&(thread_data->out_cond), NULL);
/* parse input */
if (convert_globals.input_format == kGzip)
{
if (pthread_create(&produce_gzip_chunk_thread, NULL, produce_gzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gzip chunk production thread\n");
return EXIT_FAILURE;
}
if (pthread_create(&consume_gzip_chunk_thread, NULL, consume_gzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_create(&consume_gunzip_chunk_thread, NULL, consume_gunzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gunzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_join(produce_gzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gzip chunk production thread\n");
return EXIT_FAILURE;
}
if (pthread_join(consume_gzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_join(consume_gunzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gunzip chunk consumption thread\n");
return EXIT_FAILURE;
}
}
else
{
/*
handle text formats
*/
}
/* cleanup */
delete_file_ptr(&thread_data->in_file_ptr);
pthread_mutex_destroy(&(thread_data->in_lock));
pthread_cond_destroy(&(thread_data->in_cond));
pthread_cond_destroy(&(thread_data->out_cond));
free(thread_data);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> main()\n");
#endif
return EXIT_SUCCESS;
}
void * consume_gunzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gunzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_out_bytes_read_from_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
sleep(1);
n_out_bytes_read_from_gunzip = read(d->gunzip_ptr->out, d->in_line, LINE_LENGTH_VALUE);
#ifdef DEBUG
fprintf(stderr, "Debug: ------------------------ read [%07ld] bytes out from the gunzip process\n", n_out_bytes_read_from_gunzip);
fprintf(stderr, "Debug: ------------------------ gunzip output chunk:\n[%s]\n", d->in_line);
#endif
memset(d->in_line, 0, strlen(d->in_line));
if (n_out_bytes_read_from_gunzip > 0)
d->n_out_bytes_read_from_gunzip = n_out_bytes_read_from_gunzip;
d->n_in_bytes_written_to_gunzip = 0;
pthread_cond_signal(&d->in_cond);
}
if (d->in_eof && (d->n_in_bytes_written_to_gunzip == 0))
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gunzip_chunk()\n");
#endif
return NULL;
}
void * consume_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_in_bytes_written_to_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes == 0 && !d->in_eof)
pthread_cond_wait(&d->in_cond, &d->in_lock);
if (d->n_in_bytes) {
#ifdef DEBUG
fprintf(stderr, "Debug: ........ [%07zu] processing chunk\n", d->n_in_bytes);
#endif
if (!d->gunzip_ptr) {
#ifdef DEBUG
fprintf(stderr, "Debug: * setting up gunzip ptr\n");
#endif
d->gunzip_ptr = malloc(sizeof(popen3_desc_t));
if (!d->gunzip_ptr) {
fprintf(stderr, "Error: Could not create gunzip file handle struct\n");
exit(EXIT_FAILURE);
}
popen3("gunzip -c -",
&(d->gunzip_ptr->in),
&(d->gunzip_ptr->out),
&(d->gunzip_ptr->err),
kTrue,
kTrue);
memset(d->in_line, 0, LINE_LENGTH_VALUE);
}
n_in_bytes_written_to_gunzip = (long) write(d->gunzip_ptr->in, d->in_buf, d->n_in_bytes);
#ifdef DEBUG
fprintf(stderr, "Debug: ................ wrote [%07ld] bytes into the gunzip process\n", n_in_bytes_written_to_gunzip);
#endif
if (n_in_bytes_written_to_gunzip > 0)
d->n_in_bytes_written_to_gunzip = n_in_bytes_written_to_gunzip;
d->n_in_bytes = 0;
/* pthread_cond_signal(&d->in_cond); */
pthread_cond_signal(&d->out_cond);
}
if (d->in_eof)
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gzip_chunk()\n");
#endif
return NULL;
}
void * produce_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> produce_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes = 0;
d->in_eof = kFalse;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
n_in_bytes = fread(in_buf, sizeof(in_buf[0]), sizeof(in_buf), d->in_file_ptr);
if (n_in_bytes > 0) {
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
memcpy(d->in_buf, in_buf, n_in_bytes);
d->n_in_bytes = n_in_bytes;
#ifdef DEBUG
fprintf(stderr, "Debug: ######## [%07zu] produced chunk\n", d->n_in_bytes);
#endif
pthread_cond_signal(&d->in_cond);
}
else if (feof(d->in_file_ptr) || ferror(d->in_file_ptr))
break;
}
d->in_eof = kTrue;
pthread_mutex_unlock(&d->in_lock);
pthread_cond_signal(&d->in_cond);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> produce_gzip_chunk()\n");
#endif
return NULL;
}
FILE * new_file_ptr(const char *in_fn)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> new_file_ptr()\n");
#endif
FILE *file_ptr = NULL;
boolean not_stdin = kTrue;
not_stdin = strcmp(in_fn, "-");
file_ptr = (not_stdin) ? fopen(in_fn, "r") : stdin;
if (!file_ptr) {
fprintf(stderr, "Error: Could not open input stream\n");
exit(EXIT_FAILURE);
}
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> new_file_ptr()\n");
#endif
return file_ptr;
}
void delete_file_ptr(FILE **file_ptr)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> delete_file_ptr()\n");
#endif
fclose(*file_ptr);
*file_ptr = NULL;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> delete_file_ptr()\n");
#endif
}
pid_t popen3(const char *command, int *in_desc, int *out_desc, int *err_desc, boolean nonblock_in, boolean nonblock_outerr)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> popen3()\n");
#endif
int p_stdin[2], p_stdout[2], p_stderr[2];
pid_t pid;
if (pipe(p_stdin) != 0 || pipe(p_stdout) != 0 || pipe(p_stderr) != 0)
return -1;
if (nonblock_in) {
fcntl(p_stdin[POPEN3_WRITE], F_SETFL, fcntl(p_stdin[POPEN3_WRITE], F_GETFL) | O_NONBLOCK);
}
if (nonblock_outerr) {
fcntl(p_stdout[POPEN3_READ], F_SETFL, fcntl(p_stdout[POPEN3_READ], F_GETFL) | O_NONBLOCK);
fcntl(p_stderr[POPEN3_READ], F_SETFL, fcntl(p_stderr[POPEN3_READ], F_GETFL) | O_NONBLOCK);
}
pid = fork();
if (pid < 0)
return pid; /* error */
if (pid == 0) {
close(p_stdin[POPEN3_WRITE]);
close(p_stdout[POPEN3_READ]);
close(p_stderr[POPEN3_READ]);
dup2(p_stdin[POPEN3_READ], fileno(stdin));
dup2(p_stdout[POPEN3_WRITE], fileno(stderr));
dup2(p_stdout[POPEN3_WRITE], fileno(stdout));
execl("/bin/sh", "sh", "-c", command, NULL);
fprintf(stderr, "Error: Could not execl [%s]\n", command);
exit(EXIT_FAILURE);
}
if (in_desc == NULL)
close(p_stdin[POPEN3_WRITE]);
else
*in_desc = p_stdin[POPEN3_WRITE];
if (out_desc == NULL)
close(p_stdout[POPEN3_READ]);
else
*out_desc = p_stdout[POPEN3_READ];
if (err_desc == NULL)
close(p_stderr[POPEN3_READ]);
else
*err_desc = p_stderr[POPEN3_READ];
#ifdef DEBUG
fprintf(stderr, "Debug: New *in_desc = %d\n", *in_desc);
fprintf(stderr, "Debug: New *out_desc = %d\n", *out_desc);
fprintf(stderr, "Debug: New *err_desc = %d\n", *err_desc);
#endif
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> popen3()\n");
#endif
return pid;
}
off_t fsize(const char *fn)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> fsize()\n");
#endif
struct stat st;
if (stat(fn, &st) == 0)
return st.st_size;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> fsize()\n");
#endif
return EXIT_FAILURE;
}
void initialize_globals()
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> initialize_globals()\n");
#endif
convert_globals.input_format = kUnknown;
convert_globals.filenames = NULL;
convert_globals.num_filenames = 0;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> initialize_globals()\n");
#endif
}
void parse_command_line_options(int argc, char **argv)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> parse_command_line_options()\n");
#endif
int client_long_index;
int client_opt = getopt_long(argc,
argv,
convert_client_opt_string,
convert_client_long_options,
&client_long_index);
char *in_format_str = NULL;
opterr = 0; /* disable error reporting by GNU getopt */
initialize_globals();
while (client_opt != -1)
{
switch (client_opt)
{
case 'f':
in_format_str = optarg;
break;
case 'h':
print_usage(stdout);
exit(EXIT_SUCCESS);
case '?':
print_usage(stdout);
exit(EXIT_SUCCESS);
default:
break;
}
client_opt = getopt_long(argc,
argv,
convert_client_opt_string,
convert_client_long_options,
&client_long_index);
}
convert_globals.filenames = argv + optind;
convert_globals.num_filenames = argc - optind;
if (!in_format_str) {
fprintf(stderr, "Error: Specified input format was omitted; please specify one of required input formats\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
else if (convert_globals.num_filenames != 1) {
fprintf(stderr, "Error: Please specify an input file (either a regular file or '-' for stdin\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
/* map format string to setting */
if (strcmp(in_format_str, "gzip") == 0)
convert_globals.input_format = kGzip;
else {
fprintf(stderr, "Error: Specified input format is unknown; please specify one of required input formats\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> parse_command_line_options()\n");
#endif
}
void print_usage(FILE *stream)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> print_usage()\n");
#endif
fprintf(stream,
"%s\n" \
" version: %s\n" \
" author: %s\n" \
"%s\n",
name,
version,
authors,
usage);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> print_usage()\n");
#endif
}
Here is the build process:
$ mkdir -p objects
$ cc -Wall -Wextra -pedantic -std=c99 -D__STDC_CONSTANT_MACROS -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE=1 -DDEBUG=1 -g -O0 -fno-inline -c convert.c -o objects/convert.o -iquote${PWD}
$ cc -Wall -Wextra -pedantic -std=c99 -D__STDC_CONSTANT_MACROS -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE=1 -DDEBUG=1 -g -O0 -fno-inline objects/convert.o -o convert -lpthread
I have been able to build this test code on OS X and Linux hosts with reasonably modern compile environments.
Thanks in advance for any useful advice!

I will start by saying that I feel pthreads conditions and mutexes were not really necessary here, nor was non-blocking I/O the best reaction to the problems you describe.
In my opinion, the problems you describe with your condition- and mutex-less version are symptoms of forgetting to close() assiduously the ends of your pipes, with the result that a copy of the writing-end file descriptor of the pipe feeding the child process's stdin leaked (into that child or others) alive.
Then, given that a writing-end corresponding to stdin's reading-end still existed, the system did not give EOF but instead blocked indefinitely.
In your case, you did prevent the pipe-end file descriptors from leaking to the spawned child (with the correct close() calls on the child-side of the fork() within your popen3(), although you forgot to close() the wrong-end pipe ends on the parent-side). However, you did not prevent this leakage to all other children! If you call popen3() twice, the leakage of the set of three descriptors into the child is prevented, but as the parent still owns them, when the next call to popen3() happens, after the fork() there are now 6 file descriptors to close (The old set of three and and the new set of three you just created).
In your case, therefore, you should set the close-on-exec flag on those pipe ends, thusly:
fcntl(fdIn [PIPEWR], F_SETFD, fcntl(fdIn [PIPEWR], F_GETFD) | FD_CLOEXEC);
fcntl(fdOut[PIPERD], F_SETFD, fcntl(fdOut[PIPERD], F_GETFD) | FD_CLOEXEC);
fcntl(fdErr[PIPERD], F_SETFD, fcntl(fdErr[PIPERD], F_GETFD) | FD_CLOEXEC);
Here is code that spawns 6 threads and 3 processes, and passes its input unmodified to the output, after internally compressing then decompressing it. It effectively implements gzip -c - | XOR 0x55 | XOR 0x55 | gunzip -c - | cat, where:
Standard input is fed to to gzip by thread srcThrd.
gzip's output is read by thread a2xor0Thrd and fed to thread xor0Thrd.
Thread xor0Thrd XORs its input with 0x55 before passing it on to thread xor1Thrd.
Thread xor1Thrd XORs its input with 0x55 before passing it on to thread xor22BThrd.
Thread xor22BThrd feeds its input to process gunzip.
Process gunzip feeds its output directly (without going through a thread) to cat
Process cat's output is read by thread dstThrd and printed to standard output.
Compression is done by inter-process pipe communication, while XORing is done by intra-process pipe communication. No mutexes or condition variables are used. main() is extremely easy to understand. This code should be easy to extend to your situation.
/* Includes */
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
/* Defines */
#define PIPERD 0
#define PIPEWR 1
/* Data structures */
typedef struct PIPESET{
int Ain[2];
int Aout[2];
int Aerr[2];
int xor0[2];
int xor1[2];
int xor2[2];
int Bin[2];
int BoutCin[2];
int Berr[2];
int Cout[2];
int Cerr[2];
} PIPESET;
/* Function Implementations */
/**
* Source thread main method.
*
* Slurps from standard input and feeds process A.
*/
void* srcThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(0, &c, 1) > 0){
write(pipeset->Ain[PIPEWR], &c, 1);
}
close(pipeset->Ain[PIPEWR]);
pthread_exit(NULL);
}
/**
* A to XOR0 thread main method.
*
* Manually pipes from standard output of process A to input of thread XOR0.
*/
void* a2xor0ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char buf[65536];
ssize_t bytesRead;
while((bytesRead = read(pipeset->Aout[PIPERD], buf, 65536)) > 0){
write(pipeset->xor0[PIPEWR], buf, bytesRead);
}
close(pipeset->xor0[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR0 thread main method.
*
* XORs input with 0x55 and outputs to input of XOR1.
*/
void* xor0ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->xor0[PIPERD], &c, 1) > 0){
c ^= 0x55;
write(pipeset->xor1[PIPEWR], &c, 1);
}
close(pipeset->xor1[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR1 thread main method.
*
* XORs input with 0x55 and outputs to input of process B.
*/
void* xor1ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->xor1[PIPERD], &c, 1) > 0){
c ^= 0x55;
write(pipeset->xor2[PIPEWR], &c, 1);
}
close(pipeset->xor2[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR2 to B thread main method.
*
* Manually pipes from input (output of XOR1) to input of process B.
*/
void* xor22BThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char buf[65536];
ssize_t bytesRead;
while((bytesRead = read(pipeset->xor2[PIPERD], buf, 65536)) > 0){
write(pipeset->Bin[PIPEWR], buf, bytesRead);
}
close(pipeset->Bin[PIPEWR]);
pthread_exit(NULL);
}
/**
* Destination thread main method.
*
* Manually copies the standard output of process C to the standard output.
*/
void* dstThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->Cout[PIPERD], &c, 1) > 0){
write(1, &c, 1);
}
pthread_exit(NULL);
}
/**
* Set close on exec flag on given descriptor.
*/
void setCloExecFlag(int fd){
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
}
/**
* Set close on exec flag on given descriptor.
*/
void unsetCloExecFlag(int fd){
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) & ~FD_CLOEXEC);
}
/**
* Pipe4.
*
* Create a pipe with some ends possibly marked close-on-exec.
*/
#define PIPE4_FLAG_NONE (0U)
#define PIPE4_FLAG_RD_CLOEXEC (1U << 0)
#define PIPE4_FLAG_WR_CLOEXEC (1U << 1)
int pipe4(int fd[2], int flags){
int ret = pipe(fd);
if(flags&PIPE4_FLAG_RD_CLOEXEC){setCloExecFlag(fd[PIPERD]);}
if(flags&PIPE4_FLAG_WR_CLOEXEC){setCloExecFlag(fd[PIPEWR]);}
return ret;
}
/**
* Pipe4 explicit derivatives.
*/
#define pipe4_cloexec(fd) pipe4((fd), PIPE4_FLAG_RD_CLOEXEC|PIPE4_FLAG_WR_CLOEXEC)
/**
* Popen4.
*
* General-case for spawning a process and tethering it with cloexec pipes on stdin,
* stdout and stderr.
*
* #param [in] cmd The command to execute.
* #param [in/out] pin The pointer to the cloexec pipe for stdin.
* #param [in/out] pout The pointer to the cloexec pipe for stdout.
* #param [in/out] perr The pointer to the cloexec pipe for stderr.
* #param [in] flags A bitwise OR of flags to this function. Available
* flags are:
*
* POPEN4_FLAG_NONE:
* Explicitly specify no flags.
* POPEN4_FLAG_NOCLOSE_PARENT_STDIN,
* POPEN4_FLAG_NOCLOSE_PARENT_STDOUT,
* POPEN4_FLAG_NOCLOSE_PARENT_STDERR:
* Don't close pin[PIPERD], pout[PIPEWR] and perr[PIPEWR] in the parent,
* respectively.
* POPEN4_FLAG_CLOSE_CHILD_STDIN,
* POPEN4_FLAG_CLOSE_CHILD_STDOUT,
* POPEN4_FLAG_CLOSE_CHILD_STDERR:
* Close the respective streams in the child. Ignores pin, pout and perr
* entirely. Overrides a NOCLOSE_PARENT flag for the same stream.
*/
#define POPEN4_FLAG_NONE (0U)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDIN (1U << 0)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDOUT (1U << 1)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDERR (1U << 2)
#define POPEN4_FLAG_CLOSE_CHILD_STDIN (1U << 3)
#define POPEN4_FLAG_CLOSE_CHILD_STDOUT (1U << 4)
#define POPEN4_FLAG_CLOSE_CHILD_STDERR (1U << 5)
pid_t popen4(const char* cmd, int pin[2], int pout[2], int perr[2], int flags){
/********************
** FORK PROCESS **
********************/
pid_t ret = fork();
if(ret < 0){
/**
* Error in fork(), still in parent.
*/
fprintf(stderr, "fork() failed!\n");
return ret;
}else if(ret == 0){
/**
* Child-side of fork
*/
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDIN){
close(0);
}else{
unsetCloExecFlag(pin [PIPERD]);
dup2(pin [PIPERD], 0);
}
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDOUT){
close(1);
}else{
unsetCloExecFlag(pout[PIPEWR]);
dup2(pout[PIPEWR], 1);
}
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDERR){
close(2);
}else{
unsetCloExecFlag(perr[PIPEWR]);
dup2(perr[PIPEWR], 2);
}
execl("/bin/sh", "sh", "-c", cmd, NULL);
fprintf(stderr, "exec() failed!\n");
exit(-1);
}else{
/**
* Parent-side of fork
*/
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDIN &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDIN){
close(pin [PIPERD]);
}
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDOUT &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDOUT){
close(pout[PIPEWR]);
}
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDERR &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDERR){
close(perr[PIPEWR]);
}
return ret;
}
/* Unreachable */
return ret;
}
/**
* Main Function.
*
* Sets up the whole piping scheme.
*/
int main(int argc, char* argv[]){
pthread_t srcThrd, a2xor0Thrd, xor0Thrd, xor1Thrd, xor22BThrd, dstThrd;
pid_t gzip, gunzip, cat;
PIPESET pipeset;
pipe4_cloexec(pipeset.Ain);
pipe4_cloexec(pipeset.Aout);
pipe4_cloexec(pipeset.Aerr);
pipe4_cloexec(pipeset.Bin);
pipe4_cloexec(pipeset.BoutCin);
pipe4_cloexec(pipeset.Berr);
pipe4_cloexec(pipeset.Cout);
pipe4_cloexec(pipeset.Cerr);
pipe4_cloexec(pipeset.xor0);
pipe4_cloexec(pipeset.xor1);
pipe4_cloexec(pipeset.xor2);
/* Spawn processes */
gzip = popen4("gzip -c -", pipeset.Ain, pipeset.Aout, pipeset.Aerr, POPEN4_FLAG_NONE);
gunzip = popen4("gunzip -c -", pipeset.Bin, pipeset.BoutCin, pipeset.Berr, POPEN4_FLAG_NONE);
cat = popen4("cat", pipeset.BoutCin, pipeset.Cout, pipeset.Cerr, POPEN4_FLAG_NONE);
/* Spawn threads */
pthread_create(&srcThrd, NULL, srcThrdMain, &pipeset);
pthread_create(&a2xor0Thrd, NULL, a2xor0ThrdMain, &pipeset);
pthread_create(&xor0Thrd, NULL, xor0ThrdMain, &pipeset);
pthread_create(&xor1Thrd, NULL, xor1ThrdMain, &pipeset);
pthread_create(&xor22BThrd, NULL, xor22BThrdMain, &pipeset);
pthread_create(&dstThrd, NULL, dstThrdMain, &pipeset);
pthread_join(srcThrd, (void**)NULL);
pthread_join(a2xor0Thrd, (void**)NULL);
pthread_join(xor0Thrd, (void**)NULL);
pthread_join(xor1Thrd, (void**)NULL);
pthread_join(xor22BThrd, (void**)NULL);
pthread_join(dstThrd, (void**)NULL);
return 0;
}
Commentary on your own code
There are many issues with your code, most of which have nothing to do with threading.
You don't close() the file descriptor d->gunzip_ptr->in. This means that gunzip can never know that no more input is forthcoming on its stdin, so it will never exit.
Since gunzip doesn't ever exit, it will never close() its stdout, and thus a blocking read() at the other end will never unblock. A non-blocking read will instead always give -1, with errno == EAGAIN.
Your popen3() doesn't close() p_stdin[POPEN3_READ], p_stdout[POPEN3_WRITE] or p_stderr[POPEN3_WRITE] on the parent side of the fork(). Only the child should have those descriptors. Failing to close these means that when the parent itself tries to read the stdout and stderr of the child, it will never see EOF, again for the same reasons as above: Because it itself still owns a write-end pipe in which it could write, making new data appear to the read-end.
Your code implicitly relies on gunzip writing out at least one byte for every 1024 you write in. There is no guarantee that this will be the case, since gunzip may, at its leisure, buffer internally.
This is because your code reads then copies chunks of at most BUF_LENGTH_VALUE bytes into d->in_buf. You then assign the number of bytes you read through fread() to d->n_in_bytes. This same d->n_in_bytes is used in your write() call to write to gunzip's stdin. You then signal for consume_gunzip_chunk() to wake up, then pthread_cond_wait()'s for the next gzip-compressed chunk. But this gzip-compressed chunk may never come, since there is no guarantee that gunzip will be able to unpack useful output from just the first 1024 bytes of input, nor even a guarantee that it will write() it out instead of buffering it until it has, say, 4096 bytes (a full page) of output. Therefore, the read() call in consume_gunzip_chunk() may never succeed (or even return, if read() was blocking). And if read() never returns, then consume_gunzip_chunk() doesn't signal d->in_cond, and so all three threads get stuck. And even if read() is non-blocking, the last block of output from gzip may never come, since gzip's input is never closed, so it doesn't flush out its buffers by write()'ing them out, so read() on the other end will never get useful data and no amount of pleading will elicit it without a close().
POSSIBLE (LIKELY?) CAUSE OF BUG: d->n_out_bytes_read_from_gunzip, once it becomes non-0, will never become 0 again. This means that the extremely baffling
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
within produce_gzip_chunk() will, once entered with d->n_out_bytes_read_from_gunzip != 0, forever remain stuck. By calling sleep(1) within consume_gunzip_chunk(), which sets d->n_out_bytes_read_from_gunzip, you may have defused the problem by reading all input before consume_gunzip_chunk() could lock up the system by setting d->n_out_bytes_read_from_gunzip to a non-zero value.
There are two threads that call pthread_cond_wait(&d->in_cond, &d->in_lock);, these being produce_gzip_chunk() and consume_gzip_chunk(). There is absolutely no guarantee that when consume_gunzip_chunk() calls pthread_cond_signal(&d->in_cond);, that the "correct" thread (whichever it is in your design) will receive the signal. To ensure that all of them will, use pthread_cond_broadcast(), but then you expose yourself to the thundering herd problem. Needing to use pthread_cond_broadcast() in this situation is, again, a symptom of a bad design in my opinion.
Related, you call pthread_cond_signal(&d->in_cond) within a thread (indeed, a function) in which you call pthread_cond_wait(&d->in_cond, &d->in_lock). What purpose does that serve?
You use d->in_lock for too many disparate purposes, exposing yourself to the possibility of deadlock, or low performance due to excessive protection. In particular you use it as the protection for both d->in_cond and d->out_cond. This is too strong a protection – the output of gunzip into d->in_line should be able to happen simultaneously with the input of gunzip being written into and out of d->in_buf.
Within consume_gunzip_chunk(), you have
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
...
This if can never fail! Is there a case you may have in mind?
Consider making the entire struct pthread_data volatile (or at least those integer elements which are used by multiple threads), since the compiler might decide to optimize out loads and stores that should, in fact, remain.
Compliments
So as to not sound too negative, I would like to say that in general your problems were not due to misuse of the pthreads API but due to erroneous consumer-producer logic and lack of close()s. Additionally, you appear to understand that pthread_cond_wait() may wake up spuriously, and so you have wrapped it up in a loop that checks the invariant.
In the future:
I would use pipes, even between threads. This absolves you from needing to implement your own consumer-producer scheme; The kernel has solved it for you already, and provides you with the pipe(), read() and write() primitives, which are all you need to take advantage of this ready-made solution. It also makes the code cleaner and void of mutexes and condition variables. One must simply be diligent in closing the ends, and one must be supremely careful around pipes in the presence of fork(). The rules are simple:
If a pipe's write-end exists, a read() on an open read-end will not give EOF but will block or EAGAIN.
If a pipe's write-ends have all been closed, a read() on an open read-end will give EOF.
If a pipe's read-ends have all been closed, a write() to any of its write-ends will cause SIGPIPE.
fork() duplicates the entire process, including all descriptors (modulo maybe crazy stuff in pthread_atfork())!

Ah. So I think I misunderstood the question.... sorry.
I had thought you wanted to run gunzip and then one other internal filter, and wanted to do that 'N' times.
It seems what you really want to do is run many stages of filters, one after the other... some using external commands and some (perhaps ?) internal to the program. Hence the desire to manage some inter-stage buffering.
So... I've had another go at this. The objective is to run any number of stages, starting with the input stage, then extrenal command or internal function "filter" stages, and finally the output stage. Each external command stage had three pthreads -- for stdin, stdout and stderr. Internal function stages use one pthread and the initial input and final output one pthread each. Between the stages is a small pipe structure (called a "straw") to "double buffer" and decouple the stages... I hope this is closer to what you had in mind.
The "straw" is the essence of the thing:
struct straw
{
pthread_mutex_t* mutex ;
struct lump* free ;
pthread_cond_t* free_cond ;
bool free_waiting ;
struct lump* ready ;
pthread_cond_t* ready_cond ;
bool ready_waiting ;
struct lump* lumps[2] ;
} ;
where a struct lump contains a buffer and what-not. The "straw" has two such "lumps", and at any moment one pthread may be filling one lump, while another is draining the other. Or both lumps may be free (on the free list) or both ready (full) waiting on the ready list.
Then to aquire an empty lump to fill it (eg when reading from a pipe):
static struct lump*
lump_acquire(struct straw* strw)
{
struct lump* lmp ;
pthread_mutex_lock(strw->mutex) ;
while (strw->free == NULL)
{
strw->free_waiting = true ;
pthread_cond_wait(strw->free_cond, strw->mutex) ;
strw->free_waiting = false ;
} ;
lmp = strw->free ;
strw->free = lmp->next ;
pthread_mutex_unlock(strw->mutex) ;
lmp->next = NULL ; /* tidy */
lmp->ptr = lmp->end = lmp->buff ; /* empty */
lmp->done = false ;
return lmp ;
} ;
Then to blow the completed lump into (one end of) the straw.
static void
lump_blow(struct lump* lmp)
{
struct straw* strw ;
strw = lmp->strw ;
qassert((lmp == strw->lumps[0]) || (lmp == strw->lumps[1])) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
lmp->ptr = lmp->buff ;
pthread_mutex_lock(strw->mutex) ;
if (strw->ready == NULL)
strw->ready = lmp ;
else
strw->ready->next = lmp ;
lmp->next = NULL ;
if (strw->ready_waiting)
pthread_cond_signal(strw->ready_cond) ;
pthread_mutex_unlock(strw->mutex) ;
} ;
To suck a lump out of (the other end of) the straw:
static struct lump*
lump_suck(struct straw* strw)
{
struct lump* lmp ;
pthread_mutex_lock(strw->mutex) ;
while (strw->ready == NULL)
{
strw->ready_waiting = true ;
pthread_cond_wait(strw->ready_cond, strw->mutex) ;
strw->ready_waiting = false ;
} ;
lmp = strw->ready ;
strw->ready = lmp->next ;
pthread_mutex_unlock(strw->mutex) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
lmp->ptr = lmp->buff ; /* lmp->ptr..lmp->end */
lmp->next = NULL ; /* tidy */
return lmp ;
} ;
And the final piece, freeing a lump once it has been drained:
static void
lump_free(struct lump* lmp)
{
struct straw* strw ;
strw = lmp->strw ;
qassert((lmp == strw->lumps[0]) || (lmp == strw->lumps[1])) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
pthread_mutex_lock(strw->mutex) ;
if (strw->free == NULL)
strw->free = lmp ;
else
strw->free->next = lmp ;
lmp->next = NULL ; /* end of list of free */
lmp->ptr = lmp->end = lmp->buff ; /* empty */
lmp->done = false ;
if (strw->free_waiting)
pthread_cond_signal(strw->free_cond) ;
pthread_mutex_unlock(strw->mutex) ;
} ;
The entire program is too big to fit in an answer -- see: pipework.c where that starts:
/*==============================================================================
* pipework.c
*
* Copyright (c) Chris Hall (GMCH) 2014, All rights reserved.
*
* Though you may do what you like with this, provided you recognise that
* it is offered "as is", gratis, and may or may not be fit for any purpose
* whatsoever -- you are on your own.
*
*------------------------------------------------------------------------------
*
* This will read from stdin, pass the data through an arbitrary number of
* "filter" stages and finally write the result to stdout.
*
* A filter stage may be an external command taking a piped stdin and
* outputting to a piped stdout. Anything it says to stderr is collected
* and output to the program's stderr.
*
* A filter stage may also be an internal function.
*
* The input, filter and output stages are implemented as a number of pthreads,
* with internal, miniature pipes (called "straws") between them. All I/O is
* blocking. This is an experiment in the use of pthreads to simplify things.
*
* ============================
* This is v0.08 of 4-Jul-2014
* ============================
*
* The 'main' below runs eight stages: input, 4 commands, 2 internal filters
* and the output. The output should be an exact copy of the input.
*
* In order to test the stderr handling, the following small perl script is
* used as two of the command filters:
*
* chatter.pl
* --------------------------------------------------------
use strict ;
use warnings ;
my $line = 0 ;
while (<STDIN>)
{
my $len = length($_) ;
my $r = rand ;
$line += 1 ;
print STDERR "|$line:$len:$r|" ;
if (int($r * 100) == 0)
{
print STDERR "\n" ;
} ;
print $_ ;
} ;
* --------------------------------------------------------
*
*------------------------------------------------------------------------------
* Limitations
*
* * this will crash if it gets some error its not expecting or not
* designed to overcome. Clearly, to be useful this needs to be more
* robust and more informative.
*
* * some (possible/theoretical) errors are simply ignored.
*
* * no attempt is made to collect up the child processes or to discover
* their return codes. If the child process reports errors or anything
* else on stderr, then that will be visible. But otherwise, if it just
* crashes then the pipeline will run to completion, but the result may
* be nonsense.
*
* * if one of the child processes stalls, the whole thing stalls.
*
* * an I/O error in a stage will send 'end' downstream, but the program
* will continue until everything upstream has completed.
*
* * generally... not intended for production use !!
*/
And the perl script is available as: chatter.pl
HTH

Regarding the part 'How to manage two or more consumers via pthreads?' of your post let me cite these points about 'Designing Threaded Programs':
In general though, in order for a program to take advantage of
Pthreads, it must be able to be organized into discrete, independent
tasks which can execute concurrently. For example, if routine1 and
routine2 can be interchanged, interleaved and/or overlapped in real
time, they are candidates for threading.
and
Several common models for threaded programs exist:
Manager/worker: a single thread, the manager assigns work to other threads, the workers. Typically, the manager handles all input and
parcels out work to the other tasks. At least two forms of the
manager/worker model are common: static worker pool and dynamic
worker pool.
Pipeline: a task is broken into a series of suboperations, each of which is handled in series, but concurrently, by a different thread.
An automobile assembly line best describes this model.
Peer: similar to the manager/worker model, but after the main thread creates other threads, it participates in the work.
Regarding your problem...
The problem I am facing is that I need to add a sleep(1) statement in
consume_gunzip_chunk, before doing the read, in order to get things
working properly.
Eric Lippert Best Practices with Multithreading in C# might not solve it but, they should help you finding the right solution to your multi-threaded program, particularly points 5, and 8:
5.At all costs avoid shared memory. Most threading bugs are caused by a failure to understand real-world shared memory semantics. If you
must make threads, treat them as though they were processes: give them
everything they need to do their work and let them work without
modifying the memory associated with any other thread. Just like a
process doesn't get to modify the memory of any other process.
8.If you use Thread.Sleep with an argument other than zero or one in any production code, you are possibly doing something wrong. Threads
are expensive; you don't pay a worker to sleep, so don't pay a thread
to sleep either. If you are using sleeps to solve a correctness issue
by avoiding a timing problem -- as you appear to be in your code --
then you definitely have done something deeply wrong. Multithreaded
code needs to be correct irrespective of accidents of timing.

Related

How do I expose custom files similar to /procfs on Linux?

I have a writer process which outputs its status at regular intervals as a readable chunck of wchar_t.
I would need to ensure the following properties:
When there's and update, the readers shouldn't read partial/corrupted data
The file should be volatile in memory so that when the writer quits, the file is gone
The file content size is variable
Multiple readers could read the file in parallel, doesn't matter if the content is synced, as long as is non partial for each client
If using truncate and then write, clients should only read the full file and not observe such partial operations
How could I implement such /procfs-like file, outside /procfs filesystem?
I was thinking to use classic c Linux file APIs and create something under /dev/shm by default, but I find it hard to implement effectively point 1 and 5 most of all.
How could I expose such file?
Typical solution is to create a new file in the same directory, then rename (hardlink) it over the old one.
This way, processes see either an old one or a new one, never a mix; and it only depends on the moment when they open the file.
The Linux kernel takes care of the caching, so if the file is accessed often, it will be in RAM (page cache). The writer must, however, remember to delete the file when it exits.
A better approach is to use fcntl()-based advisory record locks (typically over the entire file, i.e. .l_whence = SEEK_SET, .l_start = 0, .l_len = 0).
The writer will grab a write/exclusive lock before truncating and rewriting the contents, and readers a read/shared lock before reading the contents.
This requires cooperation, however, and the writer must be prepared to not be able to lock (or grabbing the lock may take undefined amount of time).
A Linux-only scheme would be to use atomic replacement (via rename/hardlinking), and file leases.
(When the writer process has an exclusive lease on an open file, it gets a signal whenever another process wants to open that same file (inode, not file name). It has at least a few seconds to downgrade or release the lease, at which point the opener gets access to the contents.)
Basically, the writer process creates an empty status file, and obtains exclusive lease on it. Whenever the writer receives a signal that a reader wants to access the status file, it writes the current status to the file, releases the lease, creates a new empty file in the same directory (same mount suffices) as the status file, obtains an exclusive lease on that one, and renames/hardlinks it over the status file.
If the status file contents do not change all the time, only periodically, then the writer process creates an empty status file, and obtains exclusive lease on it. Whenever the writer receives a signal that a reader wants to access the (empty) status file, it writes the current status to the file, and releases the lease. Then, when the writer process' status is updated, and there is no lease yet, it creates a new empty file in the status file directory, takes an exclusive lease on it, and renames/hardlinks over the status file.
This way, the status file is always updated just before a reader opens it, and only then. If there are multiple readers at the same time, they can open the status file without interruption when the writer releases the lease.
It is important to note that the status information should be collected in a single structure or similar, so that writing it out to the status file is efficient. Leases are automatically broken if not released soon enough (but there are a few seconds at least to react), and the lease is on the inode – file contents – not the file name, so we still need the atomic replacement.
Here's a crude example implementation:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdarg.h>
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#define LEASE_SIGNAL (SIGRTMIN+0)
static pthread_mutex_t status_lock = PTHREAD_MUTEX_INITIALIZER;
static int status_changed = 0;
static size_t status_len = 0;
static char *status = NULL;
static pthread_t status_thread;
static char *status_newpath = NULL;
static char *status_path = NULL;
static int status_fd = -1;
static int status_errno = 0;
char *join2(const char *src1, const char *src2)
{
const size_t len1 = (src1) ? strlen(src1) : 0;
const size_t len2 = (src2) ? strlen(src2) : 0;
char *dst;
dst = malloc(len1 + len2 + 1);
if (!dst) {
errno = ENOMEM;
return NULL;
}
if (len1 > 0)
memcpy(dst, src1, len1);
if (len2 > 0)
memcpy(dst+len1, src2, len2);
dst[len1+len2] = '\0';
return dst;
}
static void *status_worker(void *payload __attribute__((unused)))
{
siginfo_t info;
sigset_t mask;
int err, num;
/* This thread blocks all signals except LEASE_SIGNAL. */
sigfillset(&mask);
sigdelset(&mask, LEASE_SIGNAL);
err = pthread_sigmask(SIG_BLOCK, &mask, NULL);
if (err)
return (void *)(intptr_t)err;
/* Mask for LEASE_SIGNAL. */
sigemptyset(&mask);
sigaddset(&mask, LEASE_SIGNAL);
/* This thread can be canceled at any cancellation point. */
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
while (1) {
num = sigwaitinfo(&mask, &info);
if (num == -1 && errno != EINTR)
return (void *)(intptr_t)errno;
/* Ignore all but the lease signals related to the status file. */
if (num != LEASE_SIGNAL || info.si_signo != LEASE_SIGNAL || info.si_fd != status_fd)
continue;
/* We can be canceled at this point safely. */
pthread_testcancel();
/* Block cancelability for a sec, so that we maintain the mutex correctly. */
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
pthread_mutex_lock(&status_lock);
status_changed = 0;
/* Write the new status to the file. */
if (status && status_len > 0) {
const char *ptr = status;
const char *const end = status + status_len;
ssize_t n;
while (ptr < end) {
n = write(status_fd, ptr, (size_t)(end - ptr));
if (n > 0) {
ptr += n;
} else
if (n != -1) {
if (!status_errno)
status_errno = EIO;
break;
} else
if (errno != EINTR) {
if (!status_errno)
status_errno = errno;
break;
}
}
}
/* Close and release lease. */
close(status_fd);
status_fd = -1;
/* After we release the mutex, we can be safely canceled again. */
pthread_mutex_unlock(&status_lock);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
pthread_testcancel();
}
}
static int start_status_worker(void)
{
sigset_t mask;
int result;
pthread_attr_t attrs;
/* This thread should block LEASE_SIGNAL signals. */
sigemptyset(&mask);
sigaddset(&mask, LEASE_SIGNAL);
result = pthread_sigmask(SIG_BLOCK, &mask, NULL);
if (result)
return errno = result;
/* Create the worker thread. */
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 2*PTHREAD_STACK_MIN);
result = pthread_create(&status_thread, &attrs, status_worker, NULL);
pthread_attr_destroy(&attrs);
/* Ready. */
return 0;
}
int set_status(const char *format, ...)
{
va_list args;
char *new_status = NULL;
int len;
if (!format)
return errno = EINVAL;
va_start(args, format);
len = vasprintf(&new_status, format, args);
va_end(args);
if (len < 0)
return errno = EINVAL;
pthread_mutex_lock(&status_lock);
free(status);
status = new_status;
status_len = len;
status_changed++;
/* Do we already have a status file prepared? */
if (status_fd != -1 || !status_newpath) {
pthread_mutex_unlock(&status_lock);
return 0;
}
/* Prepare the status file. */
do {
status_fd = open(status_newpath, O_WRONLY | O_CREAT | O_CLOEXEC, 0666);
} while (status_fd == -1 && errno == EINTR);
if (status_fd == -1) {
pthread_mutex_unlock(&status_lock);
return 0;
}
/* In case of failure, do cleanup. */
do {
/* Set lease signal. */
if (fcntl(status_fd, F_SETSIG, LEASE_SIGNAL) == -1)
break;
/* Get exclusive lease on the status file. */
if (fcntl(status_fd, F_SETLEASE, F_WRLCK) == -1)
break;
/* Replace status file with the new, leased one. */
if (rename(status_newpath, status_path) == -1)
break;
/* Success. */
pthread_mutex_unlock(&status_lock);
return 0;
} while (0);
if (status_fd != -1) {
close(status_fd);
status_fd = -1;
}
unlink(status_newpath);
pthread_mutex_unlock(&status_lock);
return 0;
}
int main(int argc, char *argv[])
{
char *line = NULL;
size_t size = 0;
ssize_t len;
if (argc != 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *argv0 = (argc > 0 && argv[0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s STATUS-FILE\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "This program maintains a pseudofile-like status file,\n");
fprintf(stderr, "using the contents from standard input.\n");
fprintf(stderr, "Supply an empty line to exit.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
status_path = join2(argv[1], "");
status_newpath = join2(argv[1], ".new");
unlink(status_path);
unlink(status_newpath);
if (start_status_worker()) {
fprintf(stderr, "Cannot start status worker thread: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
if (set_status("Empty\n")) {
fprintf(stderr, "Cannot create initial empty status: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
while (1) {
len = getline(&line, &size, stdin);
if (len < 1)
break;
line[strcspn(line, "\n")] = '\0';
if (line[0] == '\0')
break;
set_status("%s\n", line);
}
pthread_cancel(status_thread);
pthread_join(status_thread, NULL);
if (status_fd != -1)
close(status_fd);
unlink(status_path);
unlink(status_newpath);
return EXIT_SUCCESS;
}
Save the above as server.c, then compile using e.g.
gcc -Wall -Wextra -O2 server.c -lpthread -o server
This implements a status server, storing each line from standard input to the status file if necessary. Supply an empty line to exit. For example, to use the file status in the current directory, just run
./server status
Then, if you use another terminal window to examine the directory, you see it has a file named status (with typically zero size). But, cat status shows you its contents; just like procfs/sysfs pseudofiles.
Note that the status file is only updated if necessary, and only for the first reader/accessor after status changes. This keeps writer/server overhead and I/O low, even if the status changes very often.
The above example program uses a worker thread to catch the lease-break signals. This is because pthread mutexes cannot be locked or released safely in a signal handler (pthread_mutex_lock() etc. are not async-signal safe). The worker thread maintains its cancelability, so that it won't be canceled when it holds the mutex; if canceled during that time, it will be canceled after it releases the mutex. It is careful that way.
Also, the temporary replacement file is not random, it is just the status file name with .new appended at end. Anywhere on the same mount would work fine.
As long as other threads also block the lease break signal, this works fine in multithreaded programs, too. (If you create other threads after the worker thread, they'll inherit the correct signal mask from the main thread; start_status_worker() sets the signal mask for the calling thread.)
I do trust the approach in the program, but there may be bugs (and perhaps even thinkos) in this implementation. If you find any, please comment or edit.

Towards understanding availability of xdg-open

I want to open an image, and in Windows I do:
#include <windows.h>
..
ShellExecute(NULL, "open", "https://gsamaras.files.wordpress.com/2018/11/chronosgod.png", NULL, NULL, SW_SHOWNORMAL);
I would like to use a Linux approach, where it's so much easier to run something on the fly. Example:
char s[100];
snprintf(s, sizeof s, "%s %s", "xdg-open", "https://gsamaras.files.wordpress.com/2018/11/chronosgod.png");
system(s);
In my Ubuntu, it works. However, when running that in Wandbox (Live Demo), or in any other online compiler, I would most likely get an error:
sh: 1: xdg-open: not found
despite the fact that these online compilers seem to live in Linux (checked). I don't expect the online compiler to open a browser for me, but I did expect the code to run without an error. Ah, and forget Mac (personal laptop, limiting my machines).
Since I have no other Linux machine to check, my question is: Can I expect that this code will work in most of the major Linux distributions?
Maybe the fact that it failed on online compilers is misleading.
PS: This is for my post on God of Time, so no worries about security.
Although Antti Haapala already completely answered the question, I thought some comments about the approach, and an example function making safe use trivial, might be useful.
xdg-open is part of desktop integration utilities from freedesktop.org, as part of the Portland project. One can expect them to be available on any computer running a desktop environment participating in freedesktop.org. This includes GNOME, KDE, and Xfce.
Simply put, this is the recommended way of opening a resource (be it a file or URL) when a desktop environment is in use, in whatever application the user prefers.
If there is no desktop environment in use, then there is no reason to expect xdg-open to be available either.
For Linux, I would suggest using a dedicated function, perhaps along the following lines. First, a couple of internal helper functions:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
//
// SPDX-License-Identifier: CC0-1.0
//
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <dirent.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
/* Number of bits in an unsigned long. */
#define ULONG_BITS (CHAR_BIT * sizeof (unsigned long))
/* Helper function to open /dev/null to a specific descriptor.
*/
static inline int devnullfd(const int fd)
{
int tempfd;
/* Sanity check. */
if (fd == -1)
return errno = EINVAL;
do {
tempfd = open("/dev/null", O_RDWR | O_NOCTTY);
} while (tempfd == -1 && errno == EINTR);
if (tempfd == -1)
return errno;
if (tempfd != fd) {
if (dup2(tempfd, fd) == -1) {
const int saved_errno = errno;
close(tempfd);
return errno = saved_errno;
}
if (close(tempfd) == -1)
return errno;
}
return 0;
}
/* Helper function to close all except small descriptors
specified in the mask. For obvious reasons, this is not
thread safe, and is only intended to be used in recently
forked child processes. */
static void closeall(const unsigned long mask)
{
DIR *dir;
struct dirent *ent;
int dfd;
dir = opendir("/proc/self/fd/");
if (!dir) {
/* Cannot list open descriptors. Just try and close all. */
const long fd_max = sysconf(_SC_OPEN_MAX);
long fd;
for (fd = 0; fd < ULONG_BITS; fd++)
if (!(mask & (1uL << fd)))
close(fd);
for (fd = ULONG_BITS; fd <= fd_max; fd++)
close(fd);
return;
}
dfd = dirfd(dir);
while ((ent = readdir(dir)))
if (ent->d_name[0] >= '0' && ent->d_name[0] <= '9') {
const char *p = &ent->d_name[1];
int fd = ent->d_name[0] - '0';
while (*p >= '0' && *p <= '9')
fd = (10 * fd) + *(p++) - '0';
if (*p)
continue;
if (fd == dfd)
continue;
if (fd < ULONG_MAX && (mask & (1uL << fd)))
continue;
close(fd);
}
closedir(dir);
}
closeall(0) tries hard to close all open file descriptors, and devnullfd(fd) tries to open fd to /dev/null. These are used to make sure that even if the user spoofs xdg-open, no file descriptors are leaked; only the file name or URL is passed.
On non-Linux POSIXy systems, you can replace them with something more suitable. On BSDs, use closefrom(), and handle the first ULONG_MAX descriptors in a loop.
The xdg_open(file-or-url) function itself is something along the lines of
/* Launch the user-preferred application to open a file or URL.
Returns 0 if success, an errno error code otherwise.
*/
int xdg_open(const char *file_or_url)
{
pid_t child, p;
int status;
/* Sanity check. */
if (!file_or_url || !*file_or_url)
return errno = EINVAL;
/* Fork the child process. */
child = fork();
if (child == -1)
return errno;
else
if (!child) {
/* Child process. */
uid_t uid = getuid(); /* Real, not effective, user. */
gid_t gid = getgid(); /* Real, not effective, group. */
/* Close all open file descriptors. */
closeall(0);
/* Redirect standard streams, if possible. */
devnullfd(STDIN_FILENO);
devnullfd(STDOUT_FILENO);
devnullfd(STDERR_FILENO);
/* Drop elevated privileges, if any. */
if (setresgid(gid, gid, gid) == -1 ||
setresuid(uid, uid, uid) == -1)
_Exit(98);
/* Have the child process execute in a new process group. */
setsid();
/* Execute xdg-open. */
execlp("xdg-open", "xdg-open", file_or_url, (char *)0);
/* Failed. xdg-open uses 0-5, we return 99. */
_Exit(99);
}
/* Reap the child. */
do {
status = 0;
p = waitpid(child, &status, 0);
} while (p == -1 && errno == EINTR);
if (p == -1)
return errno;
if (!WIFEXITED(status)) {
/* Killed by a signal. Best we can do is I/O error, I think. */
return errno = EIO;
}
switch (WEXITSTATUS(status)) {
case 0: /* No error. */
return errno = 0; /* It is unusual, but robust to explicitly clear errno. */
case 1: /* Error in command line syntax. */
return errno = EINVAL; /* Invalid argument */
case 2: /* File does not exist. */
return errno = ENOENT; /* No such file or directory */
case 3: /* A required tool could not be found. */
return errno = ENOSYS; /* Not implemented */
case 4: /* Action failed. */
return errno = EPROTO; /* Protocol error */
case 98: /* Identity shenanigans. */
return errno = EACCES; /* Permission denied */
case 99: /* xdg-open does not exist. */
return errno = ENOPKG; /* Package not installed */
default:
/* None of the other values should occur. */
return errno = ENOSYS; /* Not implemented */
}
}
As already mentioned, it tries hard to close all open file descriptors, redirects the standard streams to /dev/null, ensures the effective and real identity matches (in case this is used in a setuid binary), and passes success/failure using the child process exit status.
The setresuid() and setresgid() calls are only available on OSes that have saved user and group ids. On others, use seteuid(uid) and setegid() instead.
This implementation tries to balance user configurability with security. Users can set the PATH so that their favourite xdg-open gets executed, but the function tries to ensure that no sensitive information or privileges are leaked to that process.
(Environment variables could be filtered, but they should not contain sensitive information in the first place, and we don't really know which ones a desktop environment uses. So better not mess with them, to keep user surprises to a minimum.)
As a minimal test main(), try the following:
int main(int argc, char *argv[])
{
int arg, status;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s FILE-OR-URL ...\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This example program opens each specified file or URL\n");
fprintf(stderr, "xdg-open(1), and outputs success or failure for each.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
status = EXIT_SUCCESS;
for (arg = 1; arg < argc; arg++)
if (xdg_open(argv[arg])) {
printf("%s: %s.\n", argv[arg], strerror(errno));
status = EXIT_FAILURE;
} else
printf("%s: Opened.\n", argv[arg]);
return status;
}
As the SPDX license identifier states, this example code is licensed under Creative Commons Zero 1.0. Use it any way you wish, in any code you want.
The xdg-open is part of the xdg-utils. They're almost always installed with the GUI desktop of any Linux distribution.
A Linux distribution can be installed without any Graphical User Interface, on servers say, and most probably then they would lack xdg-open.
Instead of system, you could - and should - use fork + exec - if exec fails then xdg-open could not be executed.
The online compilers most probably don't have any Desktop GUI installed on them, thus the lack of that utility.

Segfault on Server after Multithreading in C

So I'm trying to code a multi-threading server. I've spent an enormous time on the internet figuring out the correct way to do this and the answer as always seems to be it depends. Whenever I execute my code, the client successfully connects, and executes but when the thread terminates and returns to the while loop the whole program segfaults.
I probably could use a good spanking on a few other things as well such as my usage of global variables. The entirety of code is below, sorry for the inconsistent space/tabbing.
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <signal.h>
#include <math.h>
#include <pthread.h>
#include <sys/stat.h>
#include <fcntl.h>
/* ---------------------------------------------------------------------
This is a basic whiteboard server. You can query it, append to it and
clear in it. It understands both encrypted and unencrypted data.
--------------------------------------------------------------------- */
struct whiteboard {
int line;
char type;
int bytes;
char string[1024];
} *Server;
int serverSize, threadcount, id[5];
bool debug = true;
struct whiteboard *Server;
pthread_mutex_t mutex;
pthread_t thread[5];
/* -------------------------------------------
function: sigint_handler
Opens a file "whiteboard.all" in writemode
and writes all white board information in
command mode.
------------------------------------------- */
void sigint_handler(int sig)
{
if (debug) printf("\nInduced SIGINT.\n");
FILE *fp;
fp=fopen("whiteboard.all","w");
int x=0;
for (x;x<serverSize;x++) // Loop Responsible for iterating all the whiteboard entries.
{
if (debug) printf("#%d%c%d\n%s\n",Server[x].line,Server[x].type,Server[x].bytes,Server[x].string);
fprintf(fp,"#%d%c%d\n%s\n",Server[x].line,Server[x].type,Server[x].bytes,Server[x].string);
}
if (debug) printf("All values stored.\n");
free(Server); // Free dynamically allocated memory
exit(1);
}
/* -------------------------------------------
function: processMessage
Parses '!' messages into their parts -
returns struct in response.
------------------------------------------- */
struct whiteboard processMessage(char * message)
{
int lineNumber, numBytes;
char stringType, entry[1028];
if (debug) printf("Update Statement!\n");
// Read line sent by Socket
sscanf(message,"%*c%d%c%d\n%[^\n]s",&lineNumber,&stringType,&numBytes,entry);
if (debug) printf("Processed: Line: %d, Text: %s\n",lineNumber,entry);
// Parse information into local Struct
struct whiteboard Server;
Server.line = lineNumber;
Server.type = stringType;
Server.bytes = numBytes;
strcpy(Server.string,entry);
// If there is no bytes, give nothing
if (numBytes == 0)
{
strcpy(Server.string,"");
}
return Server;
}
/* -------------------------------------------
function: handleEverything
Determines type of message recieved and
process and parses accordingly.
------------------------------------------- */
char * handleEverything(char* message, struct whiteboard *Server, char* newMessage)
{
bool updateFlag = false, queryFlag = false;
// If message is an Entry
if (message[0] == '#')
{
if (debug) printf("Triggered Entry!\n");
// Create Temporary Struct
struct whiteboard messageReturn;
messageReturn = processMessage(message);
// Store Temporary Struct in Correct Heap Struct
Server[messageReturn.line] = messageReturn;
sprintf(newMessage,"!%d%c%d\n%s\n",messageReturn.line, messageReturn.type, messageReturn.bytes, messageReturn.string);
return newMessage;
}
// If message is a query
if (message[0] == '?')
{
if (debug) printf("Triggered Query!\n");
int x;
queryFlag = true;
sscanf(message,"%*c%d",&x); // Parse Query
if (x > serverSize) // Check if Query out of Range
{
strcpy(newMessage,"ERROR: Query out of Range.\n");
return newMessage;
}
sprintf(newMessage,"!%d%c%d\n%s\n",Server[x].line,Server[x].type,Server[x].bytes,Server[x].string);
if (debug) printf("newMessage as of handleEverything:%s\n",newMessage);
return newMessage;
}
}
/* -------------------------------------------
function: readFile
If argument -f given, read file
process and parse into heap memory.
------------------------------------------- */
void readFile(char * filename)
{
FILE *fp;
fp=fopen(filename,"r");
int line, bytes, count = 0, totalSize = 0;
char type, check, string[1028], individualLine[1028];
// Loop to determine size of file. **I know this is sloppy.
while (fgets(individualLine, sizeof(individualLine), fp))
{
totalSize++;
}
// Each line shoud have totalSize - 2 (to account for 0)
// (answer) / 2 to account for string line and instruction.
totalSize = (totalSize - 2) / 2;
serverSize = totalSize+1;
if (debug) printf("Total Size is: %d\n",serverSize);
// Open and Allocate Memory
fp=fopen(filename,"r");
if (debug) printf("File Mode Calloc Initialize\n");
Server = calloc(serverSize+2, sizeof(*Server));
// Write to Heap Loop
while (fgets(individualLine, sizeof(individualLine), fp)) {
if (individualLine[0] == '#') // Case of Header Line
{
sscanf(individualLine,"%c%d%c%d",&check,&line,&type,&bytes);
if (debug) printf("Count: %d, Check:%c, Line:%d, Type: %c, Bytes:%d \n",count,check,line,type,bytes);
Server[count].line = line;
Server[count].type = type;
Server[count].bytes = bytes;
count++;
}
else
{
// For case of no data
if (individualLine[0] == '\n')
{
strcpy(string,"");
}
// Then scan data line
sscanf(individualLine,"%[^\n]s",string);
if (debug) printf("String: %s\n",string);
strcpy(Server[count-1].string,string);
}
}
return;
}
void *threadFunction(int snew)
{
char tempmessage[1024], message[2048];
// Compile and Send Server Message
strcpy(tempmessage, "CMPUT379 Whiteboard Server v0\n");
send(snew, tempmessage, sizeof(tempmessage), 0);
// Recieve Message
char n = recv(snew, message, sizeof(message), 0);
pthread_mutex_lock(&mutex);
if (debug) printf("Attempt to Malloc for newMessage\n");
char * newMessage = malloc(1024 * sizeof(char));
if (debug) printf("goto: handleEverything\n");
newMessage = handleEverything(message, Server, newMessage);
if (debug) printf("returnMessage:%s\n",newMessage);
strcpy(message,newMessage);
free(newMessage);
pthread_mutex_unlock(&mutex);
if (debug) printf("message = %s\n", message);
send(snew, message, sizeof(message), 0);
printf("End of threadFunction\n");
return;
}
/* -------------------------------------------
function: main
Function Body of Server
------------------------------------------- */
int main(int argc, char * argv[])
{
int sock, fromlength, outnum, i, socketNumber, snew;
bool cleanMode;
// Initialize Signal Handling
struct sigaction act;
act.sa_handler = sigint_handler;
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
sigaction(SIGINT, &act, 0);
// For correct number of arguments.
if (argc == 4)
{
// If "-n" parameter (cleanMode)
if (strcmp(argv[2], "-n") == 0)
{
// Get size + 1
cleanMode = true;
sscanf(argv[3],"%d",&serverSize);
serverSize += 1;
if (debug) printf("== Clean Mode Properly Initiated == \n");
if (debug) printf("serverSize: %d\n",serverSize);
if (debug) printf("Clean Mode Calloc\n");
Server = calloc(serverSize, sizeof(*Server));
int i = 0;
for (i; i < serverSize; i++) // Initialize allocated Memory
{
Server[i].line = i;
Server[i].type = 'p';
Server[i].bytes = 0;
strcpy(Server[i].string,"");
}
}
// If "-f" parameter (filemode)
else if (strcmp(argv[2], "-f") == 0)
{
// Read File
cleanMode = false;
readFile(argv[3]);
if (debug) printf("== Statefile Mode Properly Initiated == \n");
if (debug) printf("serverSize: %d\n",serverSize);
}
// Otherwise incorrect parameter.
else
{
printf("Incorrect Argument. \n");
printf("Usage: wbs279 pornumber {-n number | -f statefile}\n");
exit(1);
}
sscanf(argv[1],"%d",&socketNumber);
}
// Send Error for Incorrect Number of Arguments
if (argc != 4)
{
printf("Error: Incorrect Number of Input Arguments.\n");
printf("Usage: wbs279 portnumber {-n number | -f statefile}\n");
exit(1);
}
// == Do socket stuff ==
char tempmessage[1024], message[2048];
struct sockaddr_in master, from;
if (debug) printf("Assrt Socket\n");
sock = socket (AF_INET, SOCK_STREAM, 0);
if (sock < 0)
{
perror ("Server: cannot open master socket");
exit (1);
}
master.sin_family = AF_INET;
master.sin_addr.s_addr = INADDR_ANY;
master.sin_port = htons (socketNumber);
if (bind (sock, (struct sockaddr*) &master, sizeof (master)))
{
perror ("Server: cannot bind master socket");
exit (1);
}
// == Done socket stuff ==
listen (sock, 5);
int threadNumber = 0;
while(1)
{
printf("But what about now.\n");
if (debug) printf("-- Wait for Input --\n");
printf("Enie, ");
fromlength = sizeof (from);
printf("Meanie, ");
snew = accept (sock, (struct sockaddr*) & from, & fromlength);
printf("Miney, ");
if (snew < 0)
{
perror ("Server: accept failed");
exit (1);
}
printf("Moe\n");
pthread_create(&thread[threadNumber],NULL,threadFunction(snew), &id[threadNumber]);
//printf("Can I join?!\n");
//pthread_join(thread[0],NULL);
//printf("Joined?!\n");
threadNumber++;
close (snew);
}
}
I'm also curious as to how exactly to let multiple clients use the server at once. Is how I've allocated the whiteboard structure data appropriate for this process?
I'm very sorry if these don't make any sense.
You seem to somehow expect this:
pthread_create(&thread[threadNumber],NULL,threadFunction(snew), &id[threadNumber]);
/* ... */
close (snew);
To make sense, while it clearly doesn't.
Instead of starting a thread that runs threadFunction, passing it snew, you call the thread function and pass the return value to pthread_create(), which will interpret it as a function pointer. This will break, especially considering that the thread function incorrectly ends with:
return;
This shouldn't compile, since it's declared to return void *.
Also assuming you managed to start the thread, passing it snew to use as its socket: then you immediately close that socket, causing any reference to it from the thread to be invalid!
Please note that pthread_create() does not block and wait for the thread to exit, that would be kind of ... pointless. It starts off the new thread to run in parallel with the main thread, so of course you can't yank the carpet away from under it.
This signal handler is completely unsafe:
void sigint_handler(int sig)
{
if (debug) printf("\nInduced SIGINT.\n");
FILE *fp;
fp=fopen("whiteboard.all","w");
int x=0;
for (x;x<serverSize;x++) // Loop Responsible for iterating all the whiteboard entries.
{
if (debug) printf("#%d%c%d\n%s\n",Server[x].line,Server[x].type,Server[x].bytes,Server[x].string);
fprintf(fp,"#%d%c%d\n%s\n",Server[x].line,Server[x].type,Server[x].bytes,Server[x].string);
}
if (debug) printf("All values stored.\n");
free(Server); // Free dynamically allocated memory
exit(1);
}
Per 2.4.3 Signal Actions of the POSIX standard (emphasis added):
The following table defines a set of functions that shall be
async-signal-safe. Therefore, applications can call them, without
restriction, from signal-catching functions. ...
[list of async-signal-safe functions]
Any function not in the above table may be unsafe with respect to signals. Implementations may make other interfaces
async-signal-safe. In the presence of signals, all functions defined
by this volume of POSIX.1-2008 shall behave as defined when called
from or interrupted by a signal-catching function, with the exception
that when a signal interrupts an unsafe function or equivalent
(such as the processing equivalent to exit() performed after a return
from the initial call to main()) and the signal-catching function
calls an unsafe function, the behavior is undefined. Additional
exceptions are specified in the descriptions of individual functions
such as longjmp().
Your signal handler invokes undefined behavior.

Why is this message not only displayed when a file is written to (using the poll C Linux function)?

I was reading about poll in C programming and built an application given on the poll(2) man page.
Here is the example:
#include<stdio.h>
#include <stropts.h>
#include <poll.h>
#include <fcntl.h>
int main() {
struct pollfd fds[2];
int timeout_msecs = -1;
int ret;
int i;
/* Open STREAMS device. */
fds[0].fd = open("/home/jeshwanth/mywork/poll/dev0", O_RDONLY);
fds[1].fd = open("/home/jeshwanth/mywork/poll/dev1", O_RDONLY);
fds[0].events = POLLOUT | POLLWRBAND;
fds[1].events = POLLOUT | POLLWRBAND;
while (1) {
ret = poll(fds, 2, timeout_msecs);
if (ret > 0) {
/* An event on one of the fds has occurred. */
for (i = 0; i < 2; i++) {
if (fds[i].revents != 0) {
/* Priority data may be written on device number i. */
printf(
"Priority Data may be written on device number %d POLLWRBAND\n",
i);
}
if (fds[i].revents = !0) {
/* Data may be written on device number i. */
printf("Data may be written on device number %d POLLOUT\n",
i);
}
if (fds[i].revents = !0) {
/* A hangup has occurred on device number i. */
printf("A hangup has occurred on device number %d\n", i);
}
}
}
}
return 0;
}
Note: dev0 and dev1 are normal files. When I run the program, if no event occurred in dev0 and dev1, the message is displayed. But I was expecting when some write into the file happens, only then should it display the message. Am I wrong?
Polling it for output readiness doesn't mean you will get notified when some output occurs: it means that you'll get notified when there is output buffer space available so you can output (but you should still check the return value of your output function. The buffer state may have changed between polling and outputting; always check return values).
Minimal FIFO named pipe example
You won't be able to see anything interesting with regular files, since those always give POLLIN immediately: How can select() wait on regular file descriptors (non-sockets)?
The simplest way to play around with poll is to use named pipes as shown below. This should prepare you for their major application: sockets and device files.
Source below. Usage:
sudo mknod poll0.tmp p
sudo mknod poll1.tmp p
sudo chmod 666 poll*.tmp
./poll.out
On another shell:
printf a > poll0.tmp
printf b > poll1.tmp
Output:
loop
POLLIN i=0 n=1 buf=a
loop
POLLHUP i=0
loop
POLLIN i=1 n=1 buf=b
POLLHUP i=1
loop
So notice how poll waits for the reads without looping.
Cooler example:
(while true; do date; sleep 1; done) > poll0.tmp &
(while true; do date; sleep 2; done) > poll1.tmp &
0 gets written every one second, and 1 every two seconds, which shows how poll() is dealing with both inputs concurrently, without stalling each other.
Source:
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* creat, O_CREAT */
#include <poll.h> /* poll */
#include <stdio.h> /* printf, puts, snprintf */
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
#include <unistd.h> /* read */
int main(void) {
enum { N = 2 };
char buf[1024], path[1024];
int fd, i, n;
short revents;
struct pollfd pfds[N];
for (i = 0; i < N; ++i) {
snprintf(path, sizeof(path), "poll%d.tmp", i);
/* O_NONBLOCK is required or else the open blocks
* until the other side of the pipe opens. */
fd = open(path, O_RDONLY | O_NONBLOCK);
if (fd == -1) {
perror("open");
exit(EXIT_FAILURE);
}
pfds[i].fd = fd;
/* Only events in this mask will be listened to.
* However, there are also some events that are unmaskable,
* notably POLLHUP when pipe closes! */
pfds[i].events = POLLIN;
}
while (1) {
puts("loop");
i = poll(pfds, N, -1);
if (i == -1) {
perror("poll");
exit(EXIT_FAILURE);
}
for (i = 0; i < N; ++i) {
revents = pfds[i].revents;
if (revents & POLLIN) {
n = read(pfds[i].fd, buf, sizeof(buf));
printf("POLLIN i=%d n=%d buf=%.*s\n", i, n, n, buf);
}
if (revents & POLLHUP) {
printf("POLLHUP i=%d\n", i);
/* This happens when the other side closed.
* This event is only cleared when we close the reader. */
/* poll won't set POLLHUP anymore once all fds are closed.
* Any futher polls on this will give the POLLNVAL event instead. */
close(pfds[i].fd);
/* negative fds are ignored. So if we negate an FD,
* we can both turn if off for a while, and turn it on
* later on by re-nagating it. */
pfds[i].fd *= -1;
}
}
}
}
Compile with:
gcc -o poll.out -std=c99 poll.c
Tested in Ubuntu 14.04.
GitHub upstream.
The lines:
close(pfds[i].fd);
pfds[i].fd *= -1;
are required or else you get POLLHUP forever, see also: How to use the poll C function to watch named pipes in Linux?
For even more fun, create a Linux kernel module what implements the poll fops: How to add poll function to the kernel module code?
I'll give you a hint on how to correct it. revents is interpreted as several bit flags.
/* check for priority write readiness */
if (fds[i].revents & POLLWRBAND) {
printf("Priority Data may be written on device number %d POLLWRBAND\n", i);
}
/* check for write readiness */
if (fds[i].revents & POLLOUT) {
printf("Data may be written on device number %d POLLOUT\n", i);
}
/* check for hang-up */
if (fds[i].revents & POLLHUP) {
printf("A hangup has occurred on device number %d\n", i);
}

How to detect if the current process is being run by GDB

The standard way would be the following:
if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1)
printf("traced!\n");
In this case, ptrace returns an error if the current process is traced (e.g., running it with GDB or attaching to it).
But there is a serious problem with this: if the call returns successfully, GDB may not attach to it later. Which is a problem since I'm not trying to implement anti-debug stuff. My purpose is to emit an 'int 3' when a condition is met (e.g., an assert fails) and GDB is running (otherwise I get a SIGTRAP which stops the application).
Disabling SIGTRAP and emitting an 'int 3' every time is not a good solution because the application I'm testing might be using SIGTRAP for some other purpose (in which case I'm still screwed, so it wouldn't matter, but it's the principle of the thing :))
On Windows there is an API, IsDebuggerPresent, to check if process is under debugging. At Linux, we can check this with another way (not so efficient).
Check "/proc/self/status" for "TracerPid" attribute.
Example code:
#include <sys/stat.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
bool debuggerIsAttached()
{
char buf[4096];
const int status_fd = open("/proc/self/status", O_RDONLY);
if (status_fd == -1)
return false;
const ssize_t num_read = read(status_fd, buf, sizeof(buf) - 1);
close(status_fd);
if (num_read <= 0)
return false;
buf[num_read] = '\0';
constexpr char tracerPidString[] = "TracerPid:";
const auto tracer_pid_ptr = strstr(buf, tracerPidString);
if (!tracer_pid_ptr)
return false;
for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1; characterPtr <= buf + num_read; ++characterPtr)
{
if (isspace(*characterPtr))
continue;
else
return isdigit(*characterPtr) != 0 && *characterPtr != '0';
}
return false;
}
The code I ended up using was the following:
int
gdb_check()
{
int pid = fork();
int status;
int res;
if (pid == -1)
{
perror("fork");
return -1;
}
if (pid == 0)
{
int ppid = getppid();
/* Child */
if (ptrace(PTRACE_ATTACH, ppid, NULL, NULL) == 0)
{
/* Wait for the parent to stop and continue it */
waitpid(ppid, NULL, 0);
ptrace(PTRACE_CONT, NULL, NULL);
/* Detach */
ptrace(PTRACE_DETACH, getppid(), NULL, NULL);
/* We were the tracers, so gdb is not present */
res = 0;
}
else
{
/* Trace failed so GDB is present */
res = 1;
}
exit(res);
}
else
{
waitpid(pid, &status, 0);
res = WEXITSTATUS(status);
}
return res;
}
A few things:
When ptrace(PTRACE_ATTACH, ...) is successful, the traced process will stop and has to be continued.
This also works when GDB is attaching later.
A drawback is that when used frequently, it will cause a serious slowdown.
Also, this solution is only confirmed to work on Linux. As the comments mentioned, it won't work on BSD.
You could fork a child which would try to PTRACE_ATTACH its parent (and then detach if necessary) and communicates the result back. It does seem a bit inelegant though.
As you mention, this is quite costly. I guess it's not too bad if assertions fail irregularly. Perhaps it'd be worthwhile keeping a single long-running child around to do this - share two pipes between the parent and the child, child does its check when it reads a byte and then sends a byte back with the status.
I had a similar need, and came up with the following alternatives
static int _debugger_present = -1;
static void _sigtrap_handler(int signum)
{
_debugger_present = 0;
signal(SIGTRAP, SIG_DFL);
}
void debug_break(void)
{
if (-1 == _debugger_present) {
_debugger_present = 1;
signal(SIGTRAP, _sigtrap_handler);
raise(SIGTRAP);
}
}
If called, the debug_break function will only interrupt if a debugger is attached.
If you are running on x86 and want a breakpoint which interrupts in the caller (not in raise), just include the following header, and use the debug_break macro:
#ifndef BREAK_H
#define BREAK_H
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
int _debugger_present = -1;
static void _sigtrap_handler(int signum)
{
_debugger_present = 0;
signal(SIGTRAP, SIG_DFL);
}
#define debug_break() \
do { \
if (-1 == _debugger_present) { \
_debugger_present = 1; \
signal(SIGTRAP, _sigtrap_handler); \
__asm__("int3"); \
} \
} while(0)
#endif
I found that a modified version of the file descriptor "hack" described by Silviocesare and blogged by xorl worked well for me.
This is the modified code I use:
#include <stdio.h>
#include <unistd.h>
// gdb apparently opens FD(s) 3,4,5 (whereas a typical prog uses only stdin=0, stdout=1,stderr=2)
int detect_gdb(void)
{
int rc = 0;
FILE *fd = fopen("/tmp", "r");
if (fileno(fd) > 5)
{
rc = 1;
}
fclose(fd);
return rc;
}
If you just want to know whether the application is running under GDB for debugging purposes, the simplest solution on Linux is to readlink("/proc/<ppid>/exe"), and search the result for "gdb".
This is similar to terminus' answer, but uses pipes for communication:
#include <unistd.h>
#include <stdint.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#if !defined(PTRACE_ATTACH) && defined(PT_ATTACH)
# define PTRACE_ATTACH PT_ATTACH
#endif
#if !defined(PTRACE_DETACH) && defined(PT_DETACH)
# define PTRACE_DETACH PT_DETACH
#endif
#ifdef __linux__
# define _PTRACE(_x, _y) ptrace(_x, _y, NULL, NULL)
#else
# define _PTRACE(_x, _y) ptrace(_x, _y, NULL, 0)
#endif
/** Determine if we're running under a debugger by attempting to attach using pattach
*
* #return 0 if we're not, 1 if we are, -1 if we can't tell.
*/
static int debugger_attached(void)
{
int pid;
int from_child[2] = {-1, -1};
if (pipe(from_child) < 0) {
fprintf(stderr, "Debugger check failed: Error opening internal pipe: %s", syserror(errno));
return -1;
}
pid = fork();
if (pid == -1) {
fprintf(stderr, "Debugger check failed: Error forking: %s", syserror(errno));
return -1;
}
/* Child */
if (pid == 0) {
uint8_t ret = 0;
int ppid = getppid();
/* Close parent's side */
close(from_child[0]);
if (_PTRACE(PTRACE_ATTACH, ppid) == 0) {
/* Wait for the parent to stop */
waitpid(ppid, NULL, 0);
/* Tell the parent what happened */
write(from_child[1], &ret, sizeof(ret));
/* Detach */
_PTRACE(PTRACE_DETACH, ppid);
exit(0);
}
ret = 1;
/* Tell the parent what happened */
write(from_child[1], &ret, sizeof(ret));
exit(0);
/* Parent */
} else {
uint8_t ret = -1;
/*
* The child writes a 1 if pattach failed else 0.
*
* This read may be interrupted by pattach,
* which is why we need the loop.
*/
while ((read(from_child[0], &ret, sizeof(ret)) < 0) && (errno == EINTR));
/* Ret not updated */
if (ret < 0) {
fprintf(stderr, "Debugger check failed: Error getting status from child: %s", syserror(errno));
}
/* Close the pipes here, to avoid races with pattach (if we did it above) */
close(from_child[1]);
close(from_child[0]);
/* Collect the status of the child */
waitpid(pid, NULL, 0);
return ret;
}
}
Trying the original code under OS X, I found waitpid (in the parent) would always return -1 with an EINTR (System call interrupted). This was caused by pattach, attaching to the parent and interrupting the call.
It wasn't clear whether it was safe to just call waitpid again (that seemed like it might behave incorrectly in some situations), so I just used a pipe to do the communication instead. It's a bit of extra code, but will probably work reliably across more platforms.
This code has been tested on OS X v10.9.3 (Mavericks), Ubuntu 14.04 (Trusty Tahr) (3.13.0-24-generic) and FreeBSD 10.0.
For Linux, which implements process capabilities, this method will only work if the process has the CAP_SYS_PTRACE capability, which is typically set when the process is run as root.
Other utilities (gdb and lldb) also have this capability set as part of their filesystem metadata.
You can detect whether the process has effective CAP_SYS_PTRACE by linking against -lcap,
#include <sys/capability.h>
cap_flag_value_t value;
cap_t current;
/*
* If we're running under Linux, we first need to check if we have
* permission to to ptrace. We do that using the capabilities
* functions.
*/
current = cap_get_proc();
if (!current) {
fprintf(stderr, "Failed getting process capabilities: %s\n", syserror(errno));
return -1;
}
if (cap_get_flag(current, CAP_SYS_PTRACE, CAP_PERMITTED, &value) < 0) {
fprintf(stderr, "Failed getting permitted ptrace capability state: %s\n", syserror(errno));
cap_free(current);
return -1;
}
if ((value == CAP_SET) && (cap_get_flag(current, CAP_SYS_PTRACE, CAP_EFFECTIVE, &value) < 0)) {
fprintf(stderr, "Failed getting effective ptrace capability state: %s\n", syserror(errno));
cap_free(current);
return -1;
}
C++ version of Sam Liao's answer (Linux only):
// Detect if the application is running inside a debugger.
bool being_traced()
{
std::ifstream sf("/proc/self/status");
std::string s;
while (sf >> s)
{
if (s == "TracerPid:")
{
int pid;
sf >> pid;
return pid != 0;
}
std::getline(sf, s);
}
return false;
}

Resources