STDIN input size restriction to 1k in C program - c

Probably a silly question, with read and other functions you can specify the number of bytes you want to read, however when reading from stdin I find that I can only type 1024 characters in the prompt, if I type the 1025 character, it's not written and if I want the line to be read (pressing ENTER key) I need to remove the 1024 character in order to leave space for '\n' I suppose. This occurs only in my c program not the shell so what's causing this restriction?
#include <unistd.h>
#include <stdio.h>
int main() {
char buf[2048];
int c;
c = read(fileno(stdin), &buf, sizeof(buf));
printf("%s\n", buf);
return 0;
}

Transferring select comments to form an answer.
General diagnosis
This is a property of the terminal driver on your system, rather than of the program or the C library. Modern shells such as Bash don't read a single line; they read characters as they become available using non-canonical input. See also Canonical vs non-canonical terminal input.
Barmar noted:
Note that read() doesn't add a null terminator to the input that it reads, but printf() expects a null-terminated string.
Instead of adding a null terminator, you could tell printf() how many characters to print:
printf("%.*s\n", c, buf);
That is, however, tangential to the question of how to get a long line of input.
If you use an open source o/s, you can modify the terminal driver source code and recompile your kernel to allow you to type more than 1 KiB on a single line, but anything much short of that isn't going to work. The terminal driver imposes a limit; you have to change the terminal driver to change that limit. If you're on Linux, you can poke around the /proc file system to see if there's a dynamic configuration parameter you can change (so you don't have to recompile the kernel, but you do have to alter the settings of the terminal driver); I've not heard of that being possible.
The limit can be a nuisance if you copy'n'paste more than 1 KiB of text with no newlines in it from a browser and want to paste it into a file on your system. Use a program such as Vim to manage it — it puts the terminal into a non-canonical mode and therefore doesn't run into the limit.
Using POSIX termios to slurp input from a terminal
If you want a program to read from a terminal without the line lengths (but also with line editing such as erase or kill processing), then you could consider this program — slurp:
/*
#(#)File: $RCSfile: slurp.c,v $
#(#)Version: $Revision: 1.3 $
#(#)Last changed: $Date: 2018/10/28 17:14:24 $
#(#)Purpose: Put terminal into non-canonical mode to slurp input
#(#)Author: J Leffler
*/
/*TABSTOP=4*/
#include "posixver.h"
#include "stderr.h"
#include <assert.h>
#include <fcntl.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
static const char optstr[] = "a:ho:V";
static const char usestr[] = "[-hV][-a output | -o output]";
static const char hlpstr[] =
" -a output Append to named file (creating it if necessary)\n"
" -h Print this help message and exit\n"
" -o output Output to named file (truncating it if it exists)\n"
" -V Print version information and exit\n"
;
static struct termios saved = { 0 };
static bool sigint_enabled = false;
static bool sigquit_enabled = false;
static bool slurping = false;
static void reset_termios(void);
static void set_non_canonical(void);
static void sig_handler(int signum);
static void set_signal_handling(void);
static void slurp(int ofd, const char *filename);
#ifndef lint
/* Prevent over-aggressive optimizers from eliminating ID string */
extern const char jlss_id_slurp_c[];
const char jlss_id_slurp_c[] = "#(#)$Id: slurp.c,v 1.3 2018/10/28 17:14:24 jonathanleffler Exp $";
#endif /* lint */
int main(int argc, char **argv)
{
const char *filename = "standard output";
int ofd = STDOUT_FILENO;
int oflag = 0;
err_setarg0(argv[0]);
int opt;
while ((opt = getopt(argc, argv, optstr)) != -1)
{
switch (opt)
{
case 'h':
err_help(usestr, hlpstr);
/*NOTREACHED*/
case 'o':
case 'a':
if (ofd != STDOUT_FILENO)
{
err_remark("the -a and -o flags are mutually exclusive\n");
err_usage(usestr);
}
oflag = (opt == 'o') ? O_TRUNC : O_APPEND;
if ((ofd = open(optarg, O_WRONLY | O_CREAT | oflag, 0644)) < 0)
err_syserr("failed to open file %s for writing: ", optarg);
filename = optarg;
break;
case 'V':
err_version("PROG", &"#(#)$Revision: 1.3 $ ($Date: 2018/10/28 17:14:24 $)"[4]);
/*NOTREACHED*/
default:
err_usage(usestr);
/*NOTREACHED*/
}
}
if (optind != argc)
{
err_remark("unexpected file name options (first is '%s')\n", argv[optind]);
err_usage(usestr);
}
set_non_canonical();
if (slurping)
set_signal_handling();
slurp(ofd, filename);
return 0;
}
static void reset_termios(void)
{
tcsetattr(STDIN_FILENO, 0, &saved);
}
static void set_non_canonical(void)
{
if (tcgetattr(STDIN_FILENO, &saved) == 0)
{
struct termios modified = saved;
atexit(reset_termios);
/*
** On macOS 10.14 (at least), if you don't reset ISIG, the
** signal characters are not transferred to the program, so
** you can't detect those signals. With ICANON reset, they
** don't generate the signal either. The code does not try
** to handle the suspend (^Z) key specially, nor any other
** keys than EOF, INTR, QUIT.
*/
modified.c_lflag &= ~(ICANON | ISIG);
modified.c_cc[VMIN] = 1;
modified.c_cc[VTIME] = 0;
tcsetattr(STDIN_FILENO, TCSANOW, &modified);
slurping = true;
}
}
static void sig_handler(int signum)
{
reset_termios();
_exit(128 + signum);
}
/* Almost worth a data structure and a loop, but not quite */
static void set_signal_handling(void)
{
/* Simulate SIGINT and SIGQUIT */
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
{
(void)signal(SIGINT, sig_handler);
sigint_enabled = true;
}
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
{
(void)signal(SIGQUIT, sig_handler);
sigquit_enabled = true;
}
/* Have program terminate when sent normal signals */
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
(void)signal(SIGHUP, sig_handler);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
(void)signal(SIGTERM, sig_handler);
if (signal(SIGPIPE, SIG_IGN) != SIG_IGN)
(void)signal(SIGPIPE, sig_handler);
}
static void slurp(int ofd, const char *filename)
{
char buffer[4096];
int nbytes;
while ((nbytes = read(STDIN_FILENO, buffer, sizeof(buffer))) > 0)
{
/* Simulate EOF and interrupt and quit signals */
if (nbytes == 1 && slurping)
{
if (buffer[0] == saved.c_cc[VEOF])
break;
if (sigint_enabled && buffer[0] == saved.c_cc[VINTR])
exit(128 + SIGINT);
if (sigquit_enabled && buffer[0] == saved.c_cc[VQUIT])
exit(128 + SIGQUIT);
}
if (write(ofd, buffer, nbytes) != nbytes)
err_syserr("failed to write %d bytes to %s: ", nbytes, filename);
}
}
The library code used is available in my SOQ (Stack Overflow Questions) repository on GitHub as files stderr.c, stderr.h and posixver.h in the libsoq sub-directory.
This deals with most of the traps for the unwary. It does its best to reset the terminal back to the initial ('known good') state when it exits. It does simulate EOF, interrupt and quit keyboard signals, but it does not simulate regular terminal processing such as erase or kill.
It doesn't make sense to use this when the standard input is not a terminal, but the code should handle that OK too (it simply does normal reads). You can send the output to standard output (default) or to a file (-o file to create or truncate a file, -a file to append or create a file).

Related

Can not read from a pipe, and another stdin issue

So, I asked here just a while ago, but half of that question was just me being dumb. And I still have issues. I hope that this will be clearer than the question before.
I'm writing POSIX cat, I nearly got it working, but I have couple of issues:
My cat can not read from a pipe and I really do not know why (redirecting (<) works fine)
I can not figure out how to make it continuously read stdin, without some issues. I had a version that worked "fine", but would create a stack-overflow. The other version wouldn't stop reading from stdin if there was only stdin i.e.: my-cat < file would read from stdin until it got terminated which it shouldn't, but it has to read from stdin and wait for termination if no files are suplied.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, char *argv[])
{
char opt;
while ((opt = getopt(argc, argv, "u")) != EOF) {
switch(opt) {
case 'u':
/* Make the output un-buffered */
setbuf(stdout, NULL);
break;
default:
break;
}
}
argc -= optind;
argv += optind;
int i = 0, fildes, fs = 0;
do {
/* Check for operands, if none or operand = "-". Read from stdin */
if (argc == 0 || !strcmp(argv[i], "-")) {
fildes = STDIN_FILENO;
} else {
fildes = open(argv[i], O_RDONLY);
}
/* Check for directories */
struct stat fb;
if (!fstat(fildes, &fb) && S_ISDIR(fb.st_mode)) {
fprintf(stderr, "pcat: %s: Is a directory\n", argv[i]);
i++;
continue;
}
/* Get file size */
fs = fb.st_size;
/* If bytes are read, write them to stdout */
char *buf = malloc(fs * sizeof(char));
while ((read(fildes, buf, fs)) > 0)
write(STDOUT_FILENO, buf, fs);
free(buf);
/* Close file if it's not stdin */
if (fildes != STDIN_FILENO)
close(fildes);
i++;
} while (i < argc);
return 0;
}
Pipes don't have a size, and nor do terminals. The contents of the st_size field is undefined for such files. (On my system it seems to always contain 0, but I don't think there is any cross-platform guarantee of that.)
So your plan of reading the entire file at one go and writing it all out again is not workable for non-regular files, and is risky even for them (the read is not guaranteed to return the full number of bytes requested). It's also an unnecessary memory hog if the file is large.
A better strategy is to read into a fixed-size buffer, and write out only the number of bytes you successfully read. You repeat this until end-of-file is reached, which is indicated by read() returning 0. This is how you solve your second problem.
On a similar note, write() is not guaranteed to write out the full number of bytes you asked it to, so you need to check its return value, and if it was short, try again to write out the remaining bytes.
Here's an example:
#define BUFSIZE 65536 // arbitrary choice, can be tuned for performance
ssize_t nread;
char buf[BUFSIZE]; // or char *buf = malloc(BUFSIZE);
while ((nread = read(filedes, buf, BUFSIZE)) > 0) {
ssize_t written = 0;
while (written < nread) {
ssize_t ret = write(STDOUT_FILENO, buf + written, nread - written);
if (ret <= 0)
// handle error
written += ret;
}
}
if (nread < 0)
// handle error
As a final comment, your program lacks error checking in general; e.g. if the file cannot be opened, it will proceed anyway with filedes == -1. It is important to check the return value of every system call you issue, and handle errors accordingly. This would be essential for a program to be used in real life, and even for toy programs created just as an exercise, it will be very helpful in debugging them. (Error checking would probably have given you some clues in figuring out what was wrong with this program, for instance.)
Your cat (You can call it my-cat, but I preferred to call it felix, just permit me the pun) should be used with stdio all the time to get the benefit of the buffering done by the stdio package. Below is a simplified version of cat using exclusively stdio package (almost exactly equal as it appears in K&R) and you'll see that is completely efficient as shown (you will see that the structure is almost exactly as yours, but I simplify the processing of the data copy /like K&R book/ and the processing of arguments /yours is a bit meshy/):
felix.c
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#define ERR(_code, _fmt, ...) do { \
fprintf(stderr,"%s: " _fmt, progname, \
##__VA_ARGS__); \
if (_code) exit(_code); \
} while (0)
char *progname = "cat";
void process(FILE *f);
int main(int argc, char **argv)
{
int opt;
while ((opt = getopt(argc, argv, "u")) != EOF) {
switch (opt) {
case 'u': setbuf(stdout, NULL); break;
}
}
/* for the case it has been renamed, calculate the basename
* of argv[0] (progname is used in the macro ERR above) */
progname = strrchr(argv[0], '/');
progname = progname
? progname + 1
: argv[0];
/* shift options */
argc -= optind;
argv += optind;
if (argc) {
int i;
for (i = 0; i < argc; i++) {
FILE *f = fopen(argv[i], "r");
if (!f) {
ERR(EXIT_FAILURE,
"%s: %s (errno = %d)\n",
argv[i], strerror(errno), errno);
}
process(f);
fclose(f);
}
} else {
process(stdin);
}
exit(EXIT_SUCCESS);
}
/* you don't need to complicate here, fgetc and putchar use buffering as you stated in main
* (no output buffering if you do the setbuf(NULL) and input buffering all the time). The buffer
* size is best to leave stdio to calculate it, as it queries the filesystem to get the best
* input/output size and create buffers this size. and the processing is simple with a loop like
* the one below. You'll get no appreciable difference between this and any other input/output.
* you can believe me, I've tested it. */
void process(FILE *f)
{
int c;
while ((c = fgetc(f)) != EOF) {
putchar(c);
}
}
As you see, nothing has been specially done to support redirection, as redirection is not done inside a program, but done by the program that calls it (in this case by the shell) When you start a program, you receive three already open file descriptors. These are the ones that the shell is using, or the ones that the shell just puts in the places of 0, 1, and 2 before starting your program. So your program has nothing to do to cope with redirection. Everything is done (in this case) in the shell... and this is why your program redirection works, even if you have not done anything for it to work. You have only to do redirection if you are going to call a program with its input, output or standard error redirected somewhere (and this somewhere is not the standard input, output or error you have received from your parent process)... but this is not the case of my-cat.

Exit a program using a specific keypress combination? [duplicate]

I want to capture the Ctrl+D signal in my program and write a signal handler for it.
How can I do that?
I am working on C and using a Linux system.
As others have already said, to handle Control+D, handle "end of file"s.
Control+D is a piece of communication between the user and the pseudo-file that you see as stdin. It does not mean specifically "end of file", but more generally "flush the input I typed so far". Flushing means that any read() call on stdin in your program returns with the length of the input typed since the last flush. If the line is nonempty, the input becomes available to your program although the user did not type "return" yet. If the line is empty, then read() returns with zero, and that is interpreted as "end of file".
So when using Control+D to end a program, it only works at the beginning of a line, or if you do it twice (first time to flush, second time for read() to return zero).
Try it:
$ cat
foo
(type Control-D once)
foofoo (read has returned "foo")
(type Control-D again)
$
Ctrl+D is not a signal, it's EOF (End-Of-File). It closes the stdin pipe. If read(STDIN) returns 0, it means stdin closed, which means Ctrl+D was hit (assuming there is a keyboard at the other end of the pipe).
A minimalistic example:
#include <unistd.h>
#include <stdio.h>
#include <termios.h>
#include <signal.h>
void sig_hnd(int sig){ (void)sig; printf("(VINTR)"); }
int main(){
setvbuf(stdout,NULL,_IONBF,0);
struct termios old_termios, new_termios;
tcgetattr(0,&old_termios);
signal( SIGINT, sig_hnd );
new_termios = old_termios;
new_termios.c_cc[VEOF] = 3; // ^C
new_termios.c_cc[VINTR] = 4; // ^D
tcsetattr(0,TCSANOW,&new_termios);
char line[256]; int len;
do{
len=read(0,line,256); line[len]='\0';
if( len <0 ) printf("(len: %i)",len);
if( len==0 ) printf("(VEOF)");
if( len >0 ){
if( line[len-1] == 10 ) printf("(line:'%.*s')\n",len-1,line);
if( line[len-1] != 10 ) printf("(partial line:'%s')",line);
}
}while( line[0] != 'q' );
tcsetattr(0,TCSANOW,&old_termios);
}
The program change the VEOF char (from Ctrl-D) to Ctrl-C and the VINTR char (from Ctrl-C) to Ctrl-D. If You press Ctrl-D then the terminal driver will send a SIGINT to the signal handler of the program.
Note: pressing VINTR will erase the terminal input buffer so You can not read the characters typed in the line before the VINTR key pressed.
There's no need to process signals.
You need to ensure ISIG is not set on the terminal flags, that's all.
Here's a complete contained example using select to avoid blocking on stdin:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <time.h>
#include <sys/select.h>
#define STDIN_FILENO 0
struct termios org_opts;
/** Select to check if stdin has pending input */
int pending_input(void) {
struct timeval tv;
fd_set fds;
tv.tv_sec = 0;
tv.tv_usec = 0;
FD_ZERO(&fds);
FD_SET(STDIN_FILENO, &fds); //STDIN_FILENO is 0
select(STDIN_FILENO+1, &fds, NULL, NULL, &tv);
return FD_ISSET(STDIN_FILENO, &fds);
}
/** Input terminal mode; save old, setup new */
void setup_terminal(void) {
struct termios new_opts;
tcgetattr(STDIN_FILENO, &org_opts);
memcpy(&new_opts, &org_opts, sizeof(new_opts));
new_opts.c_lflag &= ~(ICANON | ECHO | ECHOE | ECHOK | ECHONL | ECHOPRT | ECHOKE | ISIG | ICRNL);
tcsetattr(STDIN_FILENO, TCSANOW, &new_opts);
}
/** Shutdown terminal mode */
void reset_terminal(void) {
tcsetattr(STDIN_FILENO, TCSANOW, &org_opts);
}
/** Return next input or -1 if none */
int next_input(void) {
if (!pending_input())
return -1;
int rtn = fgetc(stdin);
printf("Found: %d\n", rtn);
return(rtn);
}
int main()
{
setup_terminal();
printf("Press Q to quit...\n");
for (;;) {
int key = next_input();
if (key != -1) {
if ((key == 113) || (key == 81)) {
printf("\nNormal exit\n");
break;
}
}
}
reset_terminal();
return 0;
}
Output:
doug-2:rust-sys-sterm doug$ cc junk.c
doug-2:rust-sys-sterm doug$ ./a.out
Press Q to quit...
Found: 4
Found: 3
Found: 27
Found: 26
Found: 113
Normal exit
NB. 3 is control C and 4 is control D; 26 is control z. 113 is 'q'.
See: http://en.wikipedia.org/wiki/ASCII#ASCII_control_characters for a full table.
As far as I know Ctrl+D is translated by the system to end of standard input so your app won't get any signal.
I think that the only way to intercept Ctrl+D is to work directly with the system api (like accessing tty)
You can use poll() and watch for POLLHUP on fd #1, because the TTY layer translates ^D to EOF.
Ctrl + D value in ascci table is 4 and is a non printable characters. So your can capture it in a terminal with the following code.
When getline function get Ctrl + D an error occur and return value is -1.
Your can make a condition on the return value.
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
char *buf = malloc(sizeof(char) * 500);
size_t size = 500;
int nb = getline(&buf, &size, stdin);
if (nb == -1)
printf("CTRL + D captured\n");
free(buf);
return (0);
}
You can check if stdin is not of with the feof method like so:
if (feof(stdin))
{
// some code
exit(0);
}
See this for more details/
According to the man page, getline() will return -1 on failure to read a line (including the end-of-file condition).
This means:
When getline() encounters a failure it will return -1
When getline() reads CTRL+D it will return -1
Since getline() returns a value of type ssize_t which is defined as a signed integer value, you can construct your code in such a way to test for the -1 value which will be returned in the event of an end-of-file condition or failure to read a line.
#include <stdio.h>
#include <stdlib>
int main(void)
{
char *buffer = NULL;
size_t bufsize = 0;
ssize_t characters;
characters = getline(&buffer, &bufsize, stdin);
if (characters == -1)
{
printf("You entered CTRL+D\n");
}
free(buffer);
return (0);
}

popen: intercepting user's input

I have a code which runs bc thru popen(). I can intercept the calculator's output and prepend it with "Output=" text. But how can I intercept what user's is writing to bc?
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE *in;
char buff[512];
if(!(in = popen("bc", "r"))){
exit(1);
}
while(fgets(buff, sizeof(buff), in)!=NULL){
printf("Output = %s", buff);
}
pclose(in);
return 0;
}
You can combine bc and echo with a pipe: echo '12*4' | bc
Example typing 12*4:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void) {
FILE *in;
char buff[512];
char cmd[512];
while (fgets(buff, sizeof(buff), stdin)!=NULL){
strcpy(cmd, "echo '");
strcat(cmd, buff);
strcat(cmd, "' | bc");
if(!(in = popen(cmd, "r"))){
exit(1);
}
fgets(buff, sizeof(buff), in);
printf("output:%s", buff);
}
pclose(in);
return 0;
}
Output:
david#debian:~$ ./demo
12*4
output:48
You need to use pipe() and fork/exec(). However, manual piping is quite complex:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main(void) {
int write_pipe[2], read_pipe[2];
pipe(read_pipe); pipe(write_pipe);
#define PARENT_READ read_pipe[0]
#define CHILD_WRITE read_pipe[1]
#define CHILD_READ write_pipe[0]
#define PARENT_WRITE write_pipe[1]
int child = fork();
if (child == 0) { /* in child */
close(PARENT_WRITE);
close(PARENT_READ);
dup2(CHILD_READ, 0); close(CHILD_READ);
dup2(CHILD_WRITE, 1); close(CHILD_WRITE);
execl("/usr/bin/bc", "/usr/bin/bc");
} else { /* in parent */
close(CHILD_READ);
close(CHILD_WRITE);
write(PARENT_WRITE, "2+3\n", 4);
char buff[512];
int output_len=read(PARENT_READ, buff, sizeof(buff));
write(1, buff, output_len);
close(PARENT_READ);
}
return 0;
}
What you're looking to do is to start a subprocess, then simultaneously:
When activity occurs on standard input, execute some function on that input before passing it to the subprocess.
When activity occurs on the subprocess output, execute some function on that output before passing it to standard output.
The system call that allows you to wait for activity on two handles is called poll, but before we do that, we need to create the handles and start the subprocess:
int a[2], b[2];
if(pipe(a)==-1)abort(); // for communicating with subprocess input
if(pipe(b)==-1)abort(); // for communicating with subprocess output
switch(fork()) {
case -1: abort();
case 0: dup2(a[0],0), dup2(b[1],1), execlp("/usr/bin/bc", "bc", 0); exit(1);
};
Note how pipe works: Data written to fildes[1] appears on (i.e., can be read from) fildes[0]. This means we want to read from the standard output of our subprocess, b[0] and write to the standard input of our subprocess a[1].
Before we do that, we can use the poll instruction to wait for activity on either standard input (fd #0), or the subprocess output (b[0]):
for(;;) {
struct pollfd p[2]={0};
p[0].fd = 0; p[1].fd = b[0];
p[0].events = p[1].events = POLLIN;
while (poll(p,2,-1) <= 0);
At this point, there is activity on at least one of these file descriptors. You can see which one by examining the .revents member.
if(p[0].revents & POLLIN) {
r = read(0, buffer, sizeof(buffer));
write(a[1], buffer, r); // check for errors, or perhaps modify buffer
}
if(p[1].revents & POLLIN) {
r = read(b[0], buffer, sizeof(buffer));
write(1, buffer, r); // check for errors, or perhaps modify buffer
}
Note especially we use the opposite member a[1] and b[0] from the member we dup2'd onto the subprocesses standard input (0) and standard output (1).
At this point you can loop back up to poll again:
}
Disconnects (like EOF, program crash, etc) will be presented as read() returning 0, so watch carefully for this case, and break; out of the loop if so desired.

How to manage two or more consumers via pthreads?

I have a generic problem I am looking to solve, where chunks of binary data sent from a standard input or regular file stream to an application, which in turn converts that binary data into text. Using threads, I want to process the text before piping it over to the next application, which modifies that text even further, and so on.
As a simple test case, I want to extract compressed data via gunzip. Specifically, I am looking at using gunzip -c - to extract chunks of binary data sent to it via its (reassigned) stdin file descriptor, and then pulling out chunks of text from its (reassigned) stdout file descriptor. I can then print these chunks of text to the real stdout or stderr (or do other stuff, later on).
(I realize that I can do gzip-based compression and extraction on the command line. My goal here is to use this test case to learn how to correctly pass around generic chunks of binary and text data between threads that either run that data through binaries, or process it further.)
In the case of my test program, I have set up three pthread_t threads:
produce_gzip_chunk_thread
consume_gzip_chunk_thread
consume_gunzip_chunk_thread
I pass each of these threads a shared data instance called thread_data, which contains a thread lock, two conditions, and some buffers and counter variables. I also include a set of file descriptors for a gunzip process opened with popen3():
typedef struct pthread_data pthread_data_t;
typedef struct popen3_desc popen3_desc_t;
struct pthread_data {
pthread_mutex_t in_lock;
pthread_cond_t in_cond;
pthread_cond_t out_cond;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes;
size_t n_in_bytes_written_to_gunzip;
size_t n_out_bytes_read_from_gunzip;
FILE *in_file_ptr;
boolean in_eof;
char in_line[LINE_LENGTH_VALUE];
popen3_desc_t *gunzip_ptr;
};
struct popen3_desc {
int in;
int out;
int err;
};
The produce_gzip_chunk_thread reads in a 1024-byte chunk of gzip-compressed bytes from a regular file called foo.gz.
These bytes are written to an unsigned char buffer called in_buf, which is part of the shared data struct I am passing to each thread:
void * produce_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> produce_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes = 0;
d->in_eof = kFalse;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
n_in_bytes = fread(in_buf, sizeof(in_buf[0]), sizeof(in_buf), d->in_file_ptr);
if (n_in_bytes > 0) {
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
memcpy(d->in_buf, in_buf, n_in_bytes);
d->n_in_bytes = n_in_bytes;
#ifdef DEBUG
fprintf(stderr, "Debug: ######## [%07zu] produced chunk\n", d->n_in_bytes);
#endif
pthread_cond_signal(&d->in_cond);
}
else if (feof(d->in_file_ptr) || ferror(d->in_file_ptr))
break;
}
d->in_eof = kTrue;
pthread_mutex_unlock(&d->in_lock);
pthread_cond_signal(&d->in_cond);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> produce_gzip_chunk()\n");
#endif
return NULL;
}
Once there is a positive number of bytes stored in n_bytes — that is, we have pulled data from our input gzip archive that needs to be processed with gunzip — this triggers a condition that permits the second thread consume_gzip_chunk_thread to operate:
void * consume_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_in_bytes_written_to_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes == 0 && !d->in_eof)
pthread_cond_wait(&d->in_cond, &d->in_lock);
if (d->n_in_bytes) {
#ifdef DEBUG
fprintf(stderr, "Debug: ........ [%07zu] processing chunk\n", d->n_in_bytes);
#endif
if (!d->gunzip_ptr) {
#ifdef DEBUG
fprintf(stderr, "Debug: * setting up gunzip ptr\n");
#endif
d->gunzip_ptr = malloc(sizeof(popen3_desc_t));
if (!d->gunzip_ptr) {
fprintf(stderr, "Error: Could not create gunzip file handle struct\n");
exit(EXIT_FAILURE);
}
popen3("gunzip -c -",
&(d->gunzip_ptr->in),
&(d->gunzip_ptr->out),
&(d->gunzip_ptr->err),
kTrue,
kTrue);
memset(d->in_line, 0, LINE_LENGTH_VALUE);
}
n_in_bytes_written_to_gunzip = (long) write(d->gunzip_ptr->in, d->in_buf, d->n_in_bytes);
#ifdef DEBUG
fprintf(stderr, "Debug: ................ wrote [%07ld] bytes into the gunzip process\n", n_in_bytes_written_to_gunzip);
#endif
if (n_in_bytes_written_to_gunzip > 0)
d->n_in_bytes_written_to_gunzip = n_in_bytes_written_to_gunzip;
d->n_in_bytes = 0;
pthread_cond_signal(&d->out_cond);
}
if (d->in_eof)
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gzip_chunk()\n");
#endif
return NULL;
}
When consuming the gzip data chunk, we use the write function to send n_bytes of in_buf to the gunzip process's input file descriptor. At the end, we send another thread signal, but this time to out_cond, so as to help reawaken consume_gunzip_chunk_thread, which reads from gunzip's output to do more work:
void * consume_gunzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gunzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_out_bytes_read_from_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
sleep(1);
n_out_bytes_read_from_gunzip = read(d->gunzip_ptr->out, d->in_line, LINE_LENGTH_VALUE);
#ifdef DEBUG
fprintf(stderr, "Debug: ------------------------ read [%07ld] bytes out from the gunzip process\n", n_out_bytes_read_from_gunzip);
fprintf(stderr, "Debug: ------------------------ gunzip output chunk:\n[%s]\n", d->in_line);
#endif
memset(d->in_line, 0, strlen(d->in_line));
if (n_out_bytes_read_from_gunzip > 0)
d->n_out_bytes_read_from_gunzip = n_out_bytes_read_from_gunzip;
d->n_in_bytes_written_to_gunzip = 0;
pthread_cond_signal(&d->in_cond);
}
if (d->in_eof && (d->n_in_bytes_written_to_gunzip == 0))
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gunzip_chunk()\n");
#endif
return NULL;
}
This attempts to read any available bytes from the gunzip process's output file descriptor. For debugging purposes, I just want to print them to stderr for now.
The problem I am facing is that I need to add a sleep(1) statement in consume_gunzip_chunk, before doing the read, in order to get things working properly.
Without this sleep(1) statement, my test program will usually output nothing — except once every 8-10 attempts, when the compressed data are extracted correctly.
Question - What am I doing wrong about my arrangement of conditions, such that the sleep(1) call is required to make the gzip-extraction work properly? In a production scenario, working with much larger input files, forcibly waiting a second every 1kB seems like a bad idea.
For reproducibility with the full source code, here are the two relevant files. Here is the header:
/*
* convert.h
*/
#ifndef CONVERT_H
#define CONVERT_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <getopt.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#define CB_VERSION "1.0"
#define LINE_LENGTH_VALUE 65536
#define BUF_LENGTH_VALUE 1024
#define POPEN3_READ 0
#define POPEN3_WRITE 1
typedef int boolean;
extern const boolean kTrue;
extern const boolean kFalse;
const boolean kTrue = 1;
const boolean kFalse = 0;
typedef enum {
kGzip,
kUnknown
} format_t;
typedef struct pthread_data pthread_data_t;
typedef struct popen3_desc popen3_desc_t;
struct pthread_data {
pthread_mutex_t in_lock;
pthread_cond_t in_cond;
pthread_cond_t out_cond;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes;
size_t n_in_bytes_written_to_gunzip;
size_t n_out_bytes_read_from_gunzip;
boolean in_eof;
FILE *in_file_ptr;
popen3_desc_t *gunzip_ptr;
char in_line[LINE_LENGTH_VALUE];
};
struct popen3_desc {
int in;
int out;
int err;
};
static const char *name = "convert";
static const char *version = CB_VERSION;
static const char *authors = "Alex Reynolds";
static const char *usage = "\n" \
"Usage: convert --input-format=str <input-file>\n" \
" Process Flags:\n\n" \
" --input-format=str | -f str Input format (str = [ gzip ]; required)\n" \
" --help | -h Show this usage message\n";
static struct convert_globals_t {
char *input_format_str;
format_t input_format;
char **filenames;
int num_filenames;
} convert_globals;
static struct option convert_client_long_options[] = {
{ "input-format", required_argument, NULL, 'f' },
{ "help", no_argument, NULL, 'h' },
{ NULL, no_argument, NULL, 0 }
};
static const char *convert_client_opt_string = "f:h?";
void * consume_gunzip_chunk (void *t_data);
void * consume_gzip_chunk (void *t_data);
void * produce_gzip_chunk (void *t_data);
FILE * new_file_ptr (const char *in_fn);
void delete_file_ptr (FILE **file_ptr);
pid_t popen3 (const char *command,
int *in_desc,
int *out_desc,
int *err_desc,
boolean nonblock_in,
boolean nonblock_outerr);
off_t fsize (const char *fn);
void initialize_globals ();
void parse_command_line_options (int argc,
char **argv);
void print_usage (FILE *stream);
#endif
Here is the implementation:
/*
* convert.c
*/
#include "convert.h"
int main(int argc, char **argv)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> main()\n");
#endif
pthread_t produce_gzip_chunk_thread = NULL;
pthread_t consume_gzip_chunk_thread = NULL;
pthread_t consume_gunzip_chunk_thread = NULL;
pthread_data_t *thread_data = NULL;
parse_command_line_options(argc, argv);
/* initialize thread data */
thread_data = malloc(sizeof(pthread_data_t));
thread_data->n_in_bytes = 0;
thread_data->n_in_bytes_written_to_gunzip = 0;
thread_data->n_out_bytes_read_from_gunzip = 0;
thread_data->in_eof = kFalse;
thread_data->in_file_ptr = new_file_ptr(convert_globals.filenames[0]);
pthread_mutex_init(&(thread_data->in_lock), NULL);
pthread_cond_init(&(thread_data->in_cond), NULL);
pthread_cond_init(&(thread_data->out_cond), NULL);
/* parse input */
if (convert_globals.input_format == kGzip)
{
if (pthread_create(&produce_gzip_chunk_thread, NULL, produce_gzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gzip chunk production thread\n");
return EXIT_FAILURE;
}
if (pthread_create(&consume_gzip_chunk_thread, NULL, consume_gzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_create(&consume_gunzip_chunk_thread, NULL, consume_gunzip_chunk, (void *) thread_data) != 0) {
fprintf(stderr, "Error: Could not create gunzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_join(produce_gzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gzip chunk production thread\n");
return EXIT_FAILURE;
}
if (pthread_join(consume_gzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gzip chunk consumption thread\n");
return EXIT_FAILURE;
}
if (pthread_join(consume_gunzip_chunk_thread, NULL) != 0) {
fprintf(stderr, "Error: Could not join gunzip chunk consumption thread\n");
return EXIT_FAILURE;
}
}
else
{
/*
handle text formats
*/
}
/* cleanup */
delete_file_ptr(&thread_data->in_file_ptr);
pthread_mutex_destroy(&(thread_data->in_lock));
pthread_cond_destroy(&(thread_data->in_cond));
pthread_cond_destroy(&(thread_data->out_cond));
free(thread_data);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> main()\n");
#endif
return EXIT_SUCCESS;
}
void * consume_gunzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gunzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_out_bytes_read_from_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
sleep(1);
n_out_bytes_read_from_gunzip = read(d->gunzip_ptr->out, d->in_line, LINE_LENGTH_VALUE);
#ifdef DEBUG
fprintf(stderr, "Debug: ------------------------ read [%07ld] bytes out from the gunzip process\n", n_out_bytes_read_from_gunzip);
fprintf(stderr, "Debug: ------------------------ gunzip output chunk:\n[%s]\n", d->in_line);
#endif
memset(d->in_line, 0, strlen(d->in_line));
if (n_out_bytes_read_from_gunzip > 0)
d->n_out_bytes_read_from_gunzip = n_out_bytes_read_from_gunzip;
d->n_in_bytes_written_to_gunzip = 0;
pthread_cond_signal(&d->in_cond);
}
if (d->in_eof && (d->n_in_bytes_written_to_gunzip == 0))
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gunzip_chunk()\n");
#endif
return NULL;
}
void * consume_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> consume_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
long n_in_bytes_written_to_gunzip;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
while (d->n_in_bytes == 0 && !d->in_eof)
pthread_cond_wait(&d->in_cond, &d->in_lock);
if (d->n_in_bytes) {
#ifdef DEBUG
fprintf(stderr, "Debug: ........ [%07zu] processing chunk\n", d->n_in_bytes);
#endif
if (!d->gunzip_ptr) {
#ifdef DEBUG
fprintf(stderr, "Debug: * setting up gunzip ptr\n");
#endif
d->gunzip_ptr = malloc(sizeof(popen3_desc_t));
if (!d->gunzip_ptr) {
fprintf(stderr, "Error: Could not create gunzip file handle struct\n");
exit(EXIT_FAILURE);
}
popen3("gunzip -c -",
&(d->gunzip_ptr->in),
&(d->gunzip_ptr->out),
&(d->gunzip_ptr->err),
kTrue,
kTrue);
memset(d->in_line, 0, LINE_LENGTH_VALUE);
}
n_in_bytes_written_to_gunzip = (long) write(d->gunzip_ptr->in, d->in_buf, d->n_in_bytes);
#ifdef DEBUG
fprintf(stderr, "Debug: ................ wrote [%07ld] bytes into the gunzip process\n", n_in_bytes_written_to_gunzip);
#endif
if (n_in_bytes_written_to_gunzip > 0)
d->n_in_bytes_written_to_gunzip = n_in_bytes_written_to_gunzip;
d->n_in_bytes = 0;
/* pthread_cond_signal(&d->in_cond); */
pthread_cond_signal(&d->out_cond);
}
if (d->in_eof)
break;
}
pthread_mutex_unlock(&d->in_lock);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> consume_gzip_chunk()\n");
#endif
return NULL;
}
void * produce_gzip_chunk(void *t_data)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> produce_gzip_chunk()\n");
#endif
pthread_data_t *d = (pthread_data_t *)t_data;
unsigned char in_buf[BUF_LENGTH_VALUE];
size_t n_in_bytes = 0;
d->in_eof = kFalse;
pthread_mutex_lock(&d->in_lock);
while(kTrue) {
n_in_bytes = fread(in_buf, sizeof(in_buf[0]), sizeof(in_buf), d->in_file_ptr);
if (n_in_bytes > 0) {
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
memcpy(d->in_buf, in_buf, n_in_bytes);
d->n_in_bytes = n_in_bytes;
#ifdef DEBUG
fprintf(stderr, "Debug: ######## [%07zu] produced chunk\n", d->n_in_bytes);
#endif
pthread_cond_signal(&d->in_cond);
}
else if (feof(d->in_file_ptr) || ferror(d->in_file_ptr))
break;
}
d->in_eof = kTrue;
pthread_mutex_unlock(&d->in_lock);
pthread_cond_signal(&d->in_cond);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> produce_gzip_chunk()\n");
#endif
return NULL;
}
FILE * new_file_ptr(const char *in_fn)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> new_file_ptr()\n");
#endif
FILE *file_ptr = NULL;
boolean not_stdin = kTrue;
not_stdin = strcmp(in_fn, "-");
file_ptr = (not_stdin) ? fopen(in_fn, "r") : stdin;
if (!file_ptr) {
fprintf(stderr, "Error: Could not open input stream\n");
exit(EXIT_FAILURE);
}
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> new_file_ptr()\n");
#endif
return file_ptr;
}
void delete_file_ptr(FILE **file_ptr)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> delete_file_ptr()\n");
#endif
fclose(*file_ptr);
*file_ptr = NULL;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> delete_file_ptr()\n");
#endif
}
pid_t popen3(const char *command, int *in_desc, int *out_desc, int *err_desc, boolean nonblock_in, boolean nonblock_outerr)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> popen3()\n");
#endif
int p_stdin[2], p_stdout[2], p_stderr[2];
pid_t pid;
if (pipe(p_stdin) != 0 || pipe(p_stdout) != 0 || pipe(p_stderr) != 0)
return -1;
if (nonblock_in) {
fcntl(p_stdin[POPEN3_WRITE], F_SETFL, fcntl(p_stdin[POPEN3_WRITE], F_GETFL) | O_NONBLOCK);
}
if (nonblock_outerr) {
fcntl(p_stdout[POPEN3_READ], F_SETFL, fcntl(p_stdout[POPEN3_READ], F_GETFL) | O_NONBLOCK);
fcntl(p_stderr[POPEN3_READ], F_SETFL, fcntl(p_stderr[POPEN3_READ], F_GETFL) | O_NONBLOCK);
}
pid = fork();
if (pid < 0)
return pid; /* error */
if (pid == 0) {
close(p_stdin[POPEN3_WRITE]);
close(p_stdout[POPEN3_READ]);
close(p_stderr[POPEN3_READ]);
dup2(p_stdin[POPEN3_READ], fileno(stdin));
dup2(p_stdout[POPEN3_WRITE], fileno(stderr));
dup2(p_stdout[POPEN3_WRITE], fileno(stdout));
execl("/bin/sh", "sh", "-c", command, NULL);
fprintf(stderr, "Error: Could not execl [%s]\n", command);
exit(EXIT_FAILURE);
}
if (in_desc == NULL)
close(p_stdin[POPEN3_WRITE]);
else
*in_desc = p_stdin[POPEN3_WRITE];
if (out_desc == NULL)
close(p_stdout[POPEN3_READ]);
else
*out_desc = p_stdout[POPEN3_READ];
if (err_desc == NULL)
close(p_stderr[POPEN3_READ]);
else
*err_desc = p_stderr[POPEN3_READ];
#ifdef DEBUG
fprintf(stderr, "Debug: New *in_desc = %d\n", *in_desc);
fprintf(stderr, "Debug: New *out_desc = %d\n", *out_desc);
fprintf(stderr, "Debug: New *err_desc = %d\n", *err_desc);
#endif
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> popen3()\n");
#endif
return pid;
}
off_t fsize(const char *fn)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> fsize()\n");
#endif
struct stat st;
if (stat(fn, &st) == 0)
return st.st_size;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> fsize()\n");
#endif
return EXIT_FAILURE;
}
void initialize_globals()
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> initialize_globals()\n");
#endif
convert_globals.input_format = kUnknown;
convert_globals.filenames = NULL;
convert_globals.num_filenames = 0;
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> initialize_globals()\n");
#endif
}
void parse_command_line_options(int argc, char **argv)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> parse_command_line_options()\n");
#endif
int client_long_index;
int client_opt = getopt_long(argc,
argv,
convert_client_opt_string,
convert_client_long_options,
&client_long_index);
char *in_format_str = NULL;
opterr = 0; /* disable error reporting by GNU getopt */
initialize_globals();
while (client_opt != -1)
{
switch (client_opt)
{
case 'f':
in_format_str = optarg;
break;
case 'h':
print_usage(stdout);
exit(EXIT_SUCCESS);
case '?':
print_usage(stdout);
exit(EXIT_SUCCESS);
default:
break;
}
client_opt = getopt_long(argc,
argv,
convert_client_opt_string,
convert_client_long_options,
&client_long_index);
}
convert_globals.filenames = argv + optind;
convert_globals.num_filenames = argc - optind;
if (!in_format_str) {
fprintf(stderr, "Error: Specified input format was omitted; please specify one of required input formats\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
else if (convert_globals.num_filenames != 1) {
fprintf(stderr, "Error: Please specify an input file (either a regular file or '-' for stdin\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
/* map format string to setting */
if (strcmp(in_format_str, "gzip") == 0)
convert_globals.input_format = kGzip;
else {
fprintf(stderr, "Error: Specified input format is unknown; please specify one of required input formats\n");
print_usage(stderr);
exit(EXIT_FAILURE);
}
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> parse_command_line_options()\n");
#endif
}
void print_usage(FILE *stream)
{
#ifdef DEBUG
fprintf(stderr, "Debug: Entering --> print_usage()\n");
#endif
fprintf(stream,
"%s\n" \
" version: %s\n" \
" author: %s\n" \
"%s\n",
name,
version,
authors,
usage);
#ifdef DEBUG
fprintf(stderr, "Debug: Leaving --> print_usage()\n");
#endif
}
Here is the build process:
$ mkdir -p objects
$ cc -Wall -Wextra -pedantic -std=c99 -D__STDC_CONSTANT_MACROS -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE=1 -DDEBUG=1 -g -O0 -fno-inline -c convert.c -o objects/convert.o -iquote${PWD}
$ cc -Wall -Wextra -pedantic -std=c99 -D__STDC_CONSTANT_MACROS -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE=1 -DDEBUG=1 -g -O0 -fno-inline objects/convert.o -o convert -lpthread
I have been able to build this test code on OS X and Linux hosts with reasonably modern compile environments.
Thanks in advance for any useful advice!
I will start by saying that I feel pthreads conditions and mutexes were not really necessary here, nor was non-blocking I/O the best reaction to the problems you describe.
In my opinion, the problems you describe with your condition- and mutex-less version are symptoms of forgetting to close() assiduously the ends of your pipes, with the result that a copy of the writing-end file descriptor of the pipe feeding the child process's stdin leaked (into that child or others) alive.
Then, given that a writing-end corresponding to stdin's reading-end still existed, the system did not give EOF but instead blocked indefinitely.
In your case, you did prevent the pipe-end file descriptors from leaking to the spawned child (with the correct close() calls on the child-side of the fork() within your popen3(), although you forgot to close() the wrong-end pipe ends on the parent-side). However, you did not prevent this leakage to all other children! If you call popen3() twice, the leakage of the set of three descriptors into the child is prevented, but as the parent still owns them, when the next call to popen3() happens, after the fork() there are now 6 file descriptors to close (The old set of three and and the new set of three you just created).
In your case, therefore, you should set the close-on-exec flag on those pipe ends, thusly:
fcntl(fdIn [PIPEWR], F_SETFD, fcntl(fdIn [PIPEWR], F_GETFD) | FD_CLOEXEC);
fcntl(fdOut[PIPERD], F_SETFD, fcntl(fdOut[PIPERD], F_GETFD) | FD_CLOEXEC);
fcntl(fdErr[PIPERD], F_SETFD, fcntl(fdErr[PIPERD], F_GETFD) | FD_CLOEXEC);
Here is code that spawns 6 threads and 3 processes, and passes its input unmodified to the output, after internally compressing then decompressing it. It effectively implements gzip -c - | XOR 0x55 | XOR 0x55 | gunzip -c - | cat, where:
Standard input is fed to to gzip by thread srcThrd.
gzip's output is read by thread a2xor0Thrd and fed to thread xor0Thrd.
Thread xor0Thrd XORs its input with 0x55 before passing it on to thread xor1Thrd.
Thread xor1Thrd XORs its input with 0x55 before passing it on to thread xor22BThrd.
Thread xor22BThrd feeds its input to process gunzip.
Process gunzip feeds its output directly (without going through a thread) to cat
Process cat's output is read by thread dstThrd and printed to standard output.
Compression is done by inter-process pipe communication, while XORing is done by intra-process pipe communication. No mutexes or condition variables are used. main() is extremely easy to understand. This code should be easy to extend to your situation.
/* Includes */
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
/* Defines */
#define PIPERD 0
#define PIPEWR 1
/* Data structures */
typedef struct PIPESET{
int Ain[2];
int Aout[2];
int Aerr[2];
int xor0[2];
int xor1[2];
int xor2[2];
int Bin[2];
int BoutCin[2];
int Berr[2];
int Cout[2];
int Cerr[2];
} PIPESET;
/* Function Implementations */
/**
* Source thread main method.
*
* Slurps from standard input and feeds process A.
*/
void* srcThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(0, &c, 1) > 0){
write(pipeset->Ain[PIPEWR], &c, 1);
}
close(pipeset->Ain[PIPEWR]);
pthread_exit(NULL);
}
/**
* A to XOR0 thread main method.
*
* Manually pipes from standard output of process A to input of thread XOR0.
*/
void* a2xor0ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char buf[65536];
ssize_t bytesRead;
while((bytesRead = read(pipeset->Aout[PIPERD], buf, 65536)) > 0){
write(pipeset->xor0[PIPEWR], buf, bytesRead);
}
close(pipeset->xor0[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR0 thread main method.
*
* XORs input with 0x55 and outputs to input of XOR1.
*/
void* xor0ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->xor0[PIPERD], &c, 1) > 0){
c ^= 0x55;
write(pipeset->xor1[PIPEWR], &c, 1);
}
close(pipeset->xor1[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR1 thread main method.
*
* XORs input with 0x55 and outputs to input of process B.
*/
void* xor1ThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->xor1[PIPERD], &c, 1) > 0){
c ^= 0x55;
write(pipeset->xor2[PIPEWR], &c, 1);
}
close(pipeset->xor2[PIPEWR]);
pthread_exit(NULL);
}
/**
* XOR2 to B thread main method.
*
* Manually pipes from input (output of XOR1) to input of process B.
*/
void* xor22BThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char buf[65536];
ssize_t bytesRead;
while((bytesRead = read(pipeset->xor2[PIPERD], buf, 65536)) > 0){
write(pipeset->Bin[PIPEWR], buf, bytesRead);
}
close(pipeset->Bin[PIPEWR]);
pthread_exit(NULL);
}
/**
* Destination thread main method.
*
* Manually copies the standard output of process C to the standard output.
*/
void* dstThrdMain(void* arg){
PIPESET* pipeset = (PIPESET*)arg;
char c;
while(read(pipeset->Cout[PIPERD], &c, 1) > 0){
write(1, &c, 1);
}
pthread_exit(NULL);
}
/**
* Set close on exec flag on given descriptor.
*/
void setCloExecFlag(int fd){
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
}
/**
* Set close on exec flag on given descriptor.
*/
void unsetCloExecFlag(int fd){
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) & ~FD_CLOEXEC);
}
/**
* Pipe4.
*
* Create a pipe with some ends possibly marked close-on-exec.
*/
#define PIPE4_FLAG_NONE (0U)
#define PIPE4_FLAG_RD_CLOEXEC (1U << 0)
#define PIPE4_FLAG_WR_CLOEXEC (1U << 1)
int pipe4(int fd[2], int flags){
int ret = pipe(fd);
if(flags&PIPE4_FLAG_RD_CLOEXEC){setCloExecFlag(fd[PIPERD]);}
if(flags&PIPE4_FLAG_WR_CLOEXEC){setCloExecFlag(fd[PIPEWR]);}
return ret;
}
/**
* Pipe4 explicit derivatives.
*/
#define pipe4_cloexec(fd) pipe4((fd), PIPE4_FLAG_RD_CLOEXEC|PIPE4_FLAG_WR_CLOEXEC)
/**
* Popen4.
*
* General-case for spawning a process and tethering it with cloexec pipes on stdin,
* stdout and stderr.
*
* #param [in] cmd The command to execute.
* #param [in/out] pin The pointer to the cloexec pipe for stdin.
* #param [in/out] pout The pointer to the cloexec pipe for stdout.
* #param [in/out] perr The pointer to the cloexec pipe for stderr.
* #param [in] flags A bitwise OR of flags to this function. Available
* flags are:
*
* POPEN4_FLAG_NONE:
* Explicitly specify no flags.
* POPEN4_FLAG_NOCLOSE_PARENT_STDIN,
* POPEN4_FLAG_NOCLOSE_PARENT_STDOUT,
* POPEN4_FLAG_NOCLOSE_PARENT_STDERR:
* Don't close pin[PIPERD], pout[PIPEWR] and perr[PIPEWR] in the parent,
* respectively.
* POPEN4_FLAG_CLOSE_CHILD_STDIN,
* POPEN4_FLAG_CLOSE_CHILD_STDOUT,
* POPEN4_FLAG_CLOSE_CHILD_STDERR:
* Close the respective streams in the child. Ignores pin, pout and perr
* entirely. Overrides a NOCLOSE_PARENT flag for the same stream.
*/
#define POPEN4_FLAG_NONE (0U)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDIN (1U << 0)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDOUT (1U << 1)
#define POPEN4_FLAG_NOCLOSE_PARENT_STDERR (1U << 2)
#define POPEN4_FLAG_CLOSE_CHILD_STDIN (1U << 3)
#define POPEN4_FLAG_CLOSE_CHILD_STDOUT (1U << 4)
#define POPEN4_FLAG_CLOSE_CHILD_STDERR (1U << 5)
pid_t popen4(const char* cmd, int pin[2], int pout[2], int perr[2], int flags){
/********************
** FORK PROCESS **
********************/
pid_t ret = fork();
if(ret < 0){
/**
* Error in fork(), still in parent.
*/
fprintf(stderr, "fork() failed!\n");
return ret;
}else if(ret == 0){
/**
* Child-side of fork
*/
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDIN){
close(0);
}else{
unsetCloExecFlag(pin [PIPERD]);
dup2(pin [PIPERD], 0);
}
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDOUT){
close(1);
}else{
unsetCloExecFlag(pout[PIPEWR]);
dup2(pout[PIPEWR], 1);
}
if(flags & POPEN4_FLAG_CLOSE_CHILD_STDERR){
close(2);
}else{
unsetCloExecFlag(perr[PIPEWR]);
dup2(perr[PIPEWR], 2);
}
execl("/bin/sh", "sh", "-c", cmd, NULL);
fprintf(stderr, "exec() failed!\n");
exit(-1);
}else{
/**
* Parent-side of fork
*/
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDIN &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDIN){
close(pin [PIPERD]);
}
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDOUT &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDOUT){
close(pout[PIPEWR]);
}
if(~flags & POPEN4_FLAG_NOCLOSE_PARENT_STDERR &&
~flags & POPEN4_FLAG_CLOSE_CHILD_STDERR){
close(perr[PIPEWR]);
}
return ret;
}
/* Unreachable */
return ret;
}
/**
* Main Function.
*
* Sets up the whole piping scheme.
*/
int main(int argc, char* argv[]){
pthread_t srcThrd, a2xor0Thrd, xor0Thrd, xor1Thrd, xor22BThrd, dstThrd;
pid_t gzip, gunzip, cat;
PIPESET pipeset;
pipe4_cloexec(pipeset.Ain);
pipe4_cloexec(pipeset.Aout);
pipe4_cloexec(pipeset.Aerr);
pipe4_cloexec(pipeset.Bin);
pipe4_cloexec(pipeset.BoutCin);
pipe4_cloexec(pipeset.Berr);
pipe4_cloexec(pipeset.Cout);
pipe4_cloexec(pipeset.Cerr);
pipe4_cloexec(pipeset.xor0);
pipe4_cloexec(pipeset.xor1);
pipe4_cloexec(pipeset.xor2);
/* Spawn processes */
gzip = popen4("gzip -c -", pipeset.Ain, pipeset.Aout, pipeset.Aerr, POPEN4_FLAG_NONE);
gunzip = popen4("gunzip -c -", pipeset.Bin, pipeset.BoutCin, pipeset.Berr, POPEN4_FLAG_NONE);
cat = popen4("cat", pipeset.BoutCin, pipeset.Cout, pipeset.Cerr, POPEN4_FLAG_NONE);
/* Spawn threads */
pthread_create(&srcThrd, NULL, srcThrdMain, &pipeset);
pthread_create(&a2xor0Thrd, NULL, a2xor0ThrdMain, &pipeset);
pthread_create(&xor0Thrd, NULL, xor0ThrdMain, &pipeset);
pthread_create(&xor1Thrd, NULL, xor1ThrdMain, &pipeset);
pthread_create(&xor22BThrd, NULL, xor22BThrdMain, &pipeset);
pthread_create(&dstThrd, NULL, dstThrdMain, &pipeset);
pthread_join(srcThrd, (void**)NULL);
pthread_join(a2xor0Thrd, (void**)NULL);
pthread_join(xor0Thrd, (void**)NULL);
pthread_join(xor1Thrd, (void**)NULL);
pthread_join(xor22BThrd, (void**)NULL);
pthread_join(dstThrd, (void**)NULL);
return 0;
}
Commentary on your own code
There are many issues with your code, most of which have nothing to do with threading.
You don't close() the file descriptor d->gunzip_ptr->in. This means that gunzip can never know that no more input is forthcoming on its stdin, so it will never exit.
Since gunzip doesn't ever exit, it will never close() its stdout, and thus a blocking read() at the other end will never unblock. A non-blocking read will instead always give -1, with errno == EAGAIN.
Your popen3() doesn't close() p_stdin[POPEN3_READ], p_stdout[POPEN3_WRITE] or p_stderr[POPEN3_WRITE] on the parent side of the fork(). Only the child should have those descriptors. Failing to close these means that when the parent itself tries to read the stdout and stderr of the child, it will never see EOF, again for the same reasons as above: Because it itself still owns a write-end pipe in which it could write, making new data appear to the read-end.
Your code implicitly relies on gunzip writing out at least one byte for every 1024 you write in. There is no guarantee that this will be the case, since gunzip may, at its leisure, buffer internally.
This is because your code reads then copies chunks of at most BUF_LENGTH_VALUE bytes into d->in_buf. You then assign the number of bytes you read through fread() to d->n_in_bytes. This same d->n_in_bytes is used in your write() call to write to gunzip's stdin. You then signal for consume_gunzip_chunk() to wake up, then pthread_cond_wait()'s for the next gzip-compressed chunk. But this gzip-compressed chunk may never come, since there is no guarantee that gunzip will be able to unpack useful output from just the first 1024 bytes of input, nor even a guarantee that it will write() it out instead of buffering it until it has, say, 4096 bytes (a full page) of output. Therefore, the read() call in consume_gunzip_chunk() may never succeed (or even return, if read() was blocking). And if read() never returns, then consume_gunzip_chunk() doesn't signal d->in_cond, and so all three threads get stuck. And even if read() is non-blocking, the last block of output from gzip may never come, since gzip's input is never closed, so it doesn't flush out its buffers by write()'ing them out, so read() on the other end will never get useful data and no amount of pleading will elicit it without a close().
POSSIBLE (LIKELY?) CAUSE OF BUG: d->n_out_bytes_read_from_gunzip, once it becomes non-0, will never become 0 again. This means that the extremely baffling
while (d->n_in_bytes != 0 || d->n_out_bytes_read_from_gunzip != 0)
pthread_cond_wait(&d->in_cond, &d->in_lock);
within produce_gzip_chunk() will, once entered with d->n_out_bytes_read_from_gunzip != 0, forever remain stuck. By calling sleep(1) within consume_gunzip_chunk(), which sets d->n_out_bytes_read_from_gunzip, you may have defused the problem by reading all input before consume_gunzip_chunk() could lock up the system by setting d->n_out_bytes_read_from_gunzip to a non-zero value.
There are two threads that call pthread_cond_wait(&d->in_cond, &d->in_lock);, these being produce_gzip_chunk() and consume_gzip_chunk(). There is absolutely no guarantee that when consume_gunzip_chunk() calls pthread_cond_signal(&d->in_cond);, that the "correct" thread (whichever it is in your design) will receive the signal. To ensure that all of them will, use pthread_cond_broadcast(), but then you expose yourself to the thundering herd problem. Needing to use pthread_cond_broadcast() in this situation is, again, a symptom of a bad design in my opinion.
Related, you call pthread_cond_signal(&d->in_cond) within a thread (indeed, a function) in which you call pthread_cond_wait(&d->in_cond, &d->in_lock). What purpose does that serve?
You use d->in_lock for too many disparate purposes, exposing yourself to the possibility of deadlock, or low performance due to excessive protection. In particular you use it as the protection for both d->in_cond and d->out_cond. This is too strong a protection – the output of gunzip into d->in_line should be able to happen simultaneously with the input of gunzip being written into and out of d->in_buf.
Within consume_gunzip_chunk(), you have
while (d->n_in_bytes_written_to_gunzip == 0) {
pthread_cond_wait(&d->out_cond, &d->in_lock);
}
if (d->n_in_bytes_written_to_gunzip) {
...
This if can never fail! Is there a case you may have in mind?
Consider making the entire struct pthread_data volatile (or at least those integer elements which are used by multiple threads), since the compiler might decide to optimize out loads and stores that should, in fact, remain.
Compliments
So as to not sound too negative, I would like to say that in general your problems were not due to misuse of the pthreads API but due to erroneous consumer-producer logic and lack of close()s. Additionally, you appear to understand that pthread_cond_wait() may wake up spuriously, and so you have wrapped it up in a loop that checks the invariant.
In the future:
I would use pipes, even between threads. This absolves you from needing to implement your own consumer-producer scheme; The kernel has solved it for you already, and provides you with the pipe(), read() and write() primitives, which are all you need to take advantage of this ready-made solution. It also makes the code cleaner and void of mutexes and condition variables. One must simply be diligent in closing the ends, and one must be supremely careful around pipes in the presence of fork(). The rules are simple:
If a pipe's write-end exists, a read() on an open read-end will not give EOF but will block or EAGAIN.
If a pipe's write-ends have all been closed, a read() on an open read-end will give EOF.
If a pipe's read-ends have all been closed, a write() to any of its write-ends will cause SIGPIPE.
fork() duplicates the entire process, including all descriptors (modulo maybe crazy stuff in pthread_atfork())!
Ah. So I think I misunderstood the question.... sorry.
I had thought you wanted to run gunzip and then one other internal filter, and wanted to do that 'N' times.
It seems what you really want to do is run many stages of filters, one after the other... some using external commands and some (perhaps ?) internal to the program. Hence the desire to manage some inter-stage buffering.
So... I've had another go at this. The objective is to run any number of stages, starting with the input stage, then extrenal command or internal function "filter" stages, and finally the output stage. Each external command stage had three pthreads -- for stdin, stdout and stderr. Internal function stages use one pthread and the initial input and final output one pthread each. Between the stages is a small pipe structure (called a "straw") to "double buffer" and decouple the stages... I hope this is closer to what you had in mind.
The "straw" is the essence of the thing:
struct straw
{
pthread_mutex_t* mutex ;
struct lump* free ;
pthread_cond_t* free_cond ;
bool free_waiting ;
struct lump* ready ;
pthread_cond_t* ready_cond ;
bool ready_waiting ;
struct lump* lumps[2] ;
} ;
where a struct lump contains a buffer and what-not. The "straw" has two such "lumps", and at any moment one pthread may be filling one lump, while another is draining the other. Or both lumps may be free (on the free list) or both ready (full) waiting on the ready list.
Then to aquire an empty lump to fill it (eg when reading from a pipe):
static struct lump*
lump_acquire(struct straw* strw)
{
struct lump* lmp ;
pthread_mutex_lock(strw->mutex) ;
while (strw->free == NULL)
{
strw->free_waiting = true ;
pthread_cond_wait(strw->free_cond, strw->mutex) ;
strw->free_waiting = false ;
} ;
lmp = strw->free ;
strw->free = lmp->next ;
pthread_mutex_unlock(strw->mutex) ;
lmp->next = NULL ; /* tidy */
lmp->ptr = lmp->end = lmp->buff ; /* empty */
lmp->done = false ;
return lmp ;
} ;
Then to blow the completed lump into (one end of) the straw.
static void
lump_blow(struct lump* lmp)
{
struct straw* strw ;
strw = lmp->strw ;
qassert((lmp == strw->lumps[0]) || (lmp == strw->lumps[1])) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
lmp->ptr = lmp->buff ;
pthread_mutex_lock(strw->mutex) ;
if (strw->ready == NULL)
strw->ready = lmp ;
else
strw->ready->next = lmp ;
lmp->next = NULL ;
if (strw->ready_waiting)
pthread_cond_signal(strw->ready_cond) ;
pthread_mutex_unlock(strw->mutex) ;
} ;
To suck a lump out of (the other end of) the straw:
static struct lump*
lump_suck(struct straw* strw)
{
struct lump* lmp ;
pthread_mutex_lock(strw->mutex) ;
while (strw->ready == NULL)
{
strw->ready_waiting = true ;
pthread_cond_wait(strw->ready_cond, strw->mutex) ;
strw->ready_waiting = false ;
} ;
lmp = strw->ready ;
strw->ready = lmp->next ;
pthread_mutex_unlock(strw->mutex) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
lmp->ptr = lmp->buff ; /* lmp->ptr..lmp->end */
lmp->next = NULL ; /* tidy */
return lmp ;
} ;
And the final piece, freeing a lump once it has been drained:
static void
lump_free(struct lump* lmp)
{
struct straw* strw ;
strw = lmp->strw ;
qassert((lmp == strw->lumps[0]) || (lmp == strw->lumps[1])) ;
qassert( (lmp->buff <= lmp->ptr)
&& (lmp->ptr <= lmp->end)
&& (lmp->end <= lmp->limit) ) ;
pthread_mutex_lock(strw->mutex) ;
if (strw->free == NULL)
strw->free = lmp ;
else
strw->free->next = lmp ;
lmp->next = NULL ; /* end of list of free */
lmp->ptr = lmp->end = lmp->buff ; /* empty */
lmp->done = false ;
if (strw->free_waiting)
pthread_cond_signal(strw->free_cond) ;
pthread_mutex_unlock(strw->mutex) ;
} ;
The entire program is too big to fit in an answer -- see: pipework.c where that starts:
/*==============================================================================
* pipework.c
*
* Copyright (c) Chris Hall (GMCH) 2014, All rights reserved.
*
* Though you may do what you like with this, provided you recognise that
* it is offered "as is", gratis, and may or may not be fit for any purpose
* whatsoever -- you are on your own.
*
*------------------------------------------------------------------------------
*
* This will read from stdin, pass the data through an arbitrary number of
* "filter" stages and finally write the result to stdout.
*
* A filter stage may be an external command taking a piped stdin and
* outputting to a piped stdout. Anything it says to stderr is collected
* and output to the program's stderr.
*
* A filter stage may also be an internal function.
*
* The input, filter and output stages are implemented as a number of pthreads,
* with internal, miniature pipes (called "straws") between them. All I/O is
* blocking. This is an experiment in the use of pthreads to simplify things.
*
* ============================
* This is v0.08 of 4-Jul-2014
* ============================
*
* The 'main' below runs eight stages: input, 4 commands, 2 internal filters
* and the output. The output should be an exact copy of the input.
*
* In order to test the stderr handling, the following small perl script is
* used as two of the command filters:
*
* chatter.pl
* --------------------------------------------------------
use strict ;
use warnings ;
my $line = 0 ;
while (<STDIN>)
{
my $len = length($_) ;
my $r = rand ;
$line += 1 ;
print STDERR "|$line:$len:$r|" ;
if (int($r * 100) == 0)
{
print STDERR "\n" ;
} ;
print $_ ;
} ;
* --------------------------------------------------------
*
*------------------------------------------------------------------------------
* Limitations
*
* * this will crash if it gets some error its not expecting or not
* designed to overcome. Clearly, to be useful this needs to be more
* robust and more informative.
*
* * some (possible/theoretical) errors are simply ignored.
*
* * no attempt is made to collect up the child processes or to discover
* their return codes. If the child process reports errors or anything
* else on stderr, then that will be visible. But otherwise, if it just
* crashes then the pipeline will run to completion, but the result may
* be nonsense.
*
* * if one of the child processes stalls, the whole thing stalls.
*
* * an I/O error in a stage will send 'end' downstream, but the program
* will continue until everything upstream has completed.
*
* * generally... not intended for production use !!
*/
And the perl script is available as: chatter.pl
HTH
Regarding the part 'How to manage two or more consumers via pthreads?' of your post let me cite these points about 'Designing Threaded Programs':
In general though, in order for a program to take advantage of
Pthreads, it must be able to be organized into discrete, independent
tasks which can execute concurrently. For example, if routine1 and
routine2 can be interchanged, interleaved and/or overlapped in real
time, they are candidates for threading.
and
Several common models for threaded programs exist:
Manager/worker: a single thread, the manager assigns work to other threads, the workers. Typically, the manager handles all input and
parcels out work to the other tasks. At least two forms of the
manager/worker model are common: static worker pool and dynamic
worker pool.
Pipeline: a task is broken into a series of suboperations, each of which is handled in series, but concurrently, by a different thread.
An automobile assembly line best describes this model.
Peer: similar to the manager/worker model, but after the main thread creates other threads, it participates in the work.
Regarding your problem...
The problem I am facing is that I need to add a sleep(1) statement in
consume_gunzip_chunk, before doing the read, in order to get things
working properly.
Eric Lippert Best Practices with Multithreading in C# might not solve it but, they should help you finding the right solution to your multi-threaded program, particularly points 5, and 8:
5.At all costs avoid shared memory. Most threading bugs are caused by a failure to understand real-world shared memory semantics. If you
must make threads, treat them as though they were processes: give them
everything they need to do their work and let them work without
modifying the memory associated with any other thread. Just like a
process doesn't get to modify the memory of any other process.
8.If you use Thread.Sleep with an argument other than zero or one in any production code, you are possibly doing something wrong. Threads
are expensive; you don't pay a worker to sleep, so don't pay a thread
to sleep either. If you are using sleeps to solve a correctness issue
by avoiding a timing problem -- as you appear to be in your code --
then you definitely have done something deeply wrong. Multithreaded
code needs to be correct irrespective of accidents of timing.

How to capture Control+D signal?

I want to capture the Ctrl+D signal in my program and write a signal handler for it.
How can I do that?
I am working on C and using a Linux system.
As others have already said, to handle Control+D, handle "end of file"s.
Control+D is a piece of communication between the user and the pseudo-file that you see as stdin. It does not mean specifically "end of file", but more generally "flush the input I typed so far". Flushing means that any read() call on stdin in your program returns with the length of the input typed since the last flush. If the line is nonempty, the input becomes available to your program although the user did not type "return" yet. If the line is empty, then read() returns with zero, and that is interpreted as "end of file".
So when using Control+D to end a program, it only works at the beginning of a line, or if you do it twice (first time to flush, second time for read() to return zero).
Try it:
$ cat
foo
(type Control-D once)
foofoo (read has returned "foo")
(type Control-D again)
$
Ctrl+D is not a signal, it's EOF (End-Of-File). It closes the stdin pipe. If read(STDIN) returns 0, it means stdin closed, which means Ctrl+D was hit (assuming there is a keyboard at the other end of the pipe).
A minimalistic example:
#include <unistd.h>
#include <stdio.h>
#include <termios.h>
#include <signal.h>
void sig_hnd(int sig){ (void)sig; printf("(VINTR)"); }
int main(){
setvbuf(stdout,NULL,_IONBF,0);
struct termios old_termios, new_termios;
tcgetattr(0,&old_termios);
signal( SIGINT, sig_hnd );
new_termios = old_termios;
new_termios.c_cc[VEOF] = 3; // ^C
new_termios.c_cc[VINTR] = 4; // ^D
tcsetattr(0,TCSANOW,&new_termios);
char line[256]; int len;
do{
len=read(0,line,256); line[len]='\0';
if( len <0 ) printf("(len: %i)",len);
if( len==0 ) printf("(VEOF)");
if( len >0 ){
if( line[len-1] == 10 ) printf("(line:'%.*s')\n",len-1,line);
if( line[len-1] != 10 ) printf("(partial line:'%s')",line);
}
}while( line[0] != 'q' );
tcsetattr(0,TCSANOW,&old_termios);
}
The program change the VEOF char (from Ctrl-D) to Ctrl-C and the VINTR char (from Ctrl-C) to Ctrl-D. If You press Ctrl-D then the terminal driver will send a SIGINT to the signal handler of the program.
Note: pressing VINTR will erase the terminal input buffer so You can not read the characters typed in the line before the VINTR key pressed.
There's no need to process signals.
You need to ensure ISIG is not set on the terminal flags, that's all.
Here's a complete contained example using select to avoid blocking on stdin:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <time.h>
#include <sys/select.h>
#define STDIN_FILENO 0
struct termios org_opts;
/** Select to check if stdin has pending input */
int pending_input(void) {
struct timeval tv;
fd_set fds;
tv.tv_sec = 0;
tv.tv_usec = 0;
FD_ZERO(&fds);
FD_SET(STDIN_FILENO, &fds); //STDIN_FILENO is 0
select(STDIN_FILENO+1, &fds, NULL, NULL, &tv);
return FD_ISSET(STDIN_FILENO, &fds);
}
/** Input terminal mode; save old, setup new */
void setup_terminal(void) {
struct termios new_opts;
tcgetattr(STDIN_FILENO, &org_opts);
memcpy(&new_opts, &org_opts, sizeof(new_opts));
new_opts.c_lflag &= ~(ICANON | ECHO | ECHOE | ECHOK | ECHONL | ECHOPRT | ECHOKE | ISIG | ICRNL);
tcsetattr(STDIN_FILENO, TCSANOW, &new_opts);
}
/** Shutdown terminal mode */
void reset_terminal(void) {
tcsetattr(STDIN_FILENO, TCSANOW, &org_opts);
}
/** Return next input or -1 if none */
int next_input(void) {
if (!pending_input())
return -1;
int rtn = fgetc(stdin);
printf("Found: %d\n", rtn);
return(rtn);
}
int main()
{
setup_terminal();
printf("Press Q to quit...\n");
for (;;) {
int key = next_input();
if (key != -1) {
if ((key == 113) || (key == 81)) {
printf("\nNormal exit\n");
break;
}
}
}
reset_terminal();
return 0;
}
Output:
doug-2:rust-sys-sterm doug$ cc junk.c
doug-2:rust-sys-sterm doug$ ./a.out
Press Q to quit...
Found: 4
Found: 3
Found: 27
Found: 26
Found: 113
Normal exit
NB. 3 is control C and 4 is control D; 26 is control z. 113 is 'q'.
See: http://en.wikipedia.org/wiki/ASCII#ASCII_control_characters for a full table.
As far as I know Ctrl+D is translated by the system to end of standard input so your app won't get any signal.
I think that the only way to intercept Ctrl+D is to work directly with the system api (like accessing tty)
You can use poll() and watch for POLLHUP on fd #1, because the TTY layer translates ^D to EOF.
Ctrl + D value in ascci table is 4 and is a non printable characters. So your can capture it in a terminal with the following code.
When getline function get Ctrl + D an error occur and return value is -1.
Your can make a condition on the return value.
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
char *buf = malloc(sizeof(char) * 500);
size_t size = 500;
int nb = getline(&buf, &size, stdin);
if (nb == -1)
printf("CTRL + D captured\n");
free(buf);
return (0);
}
You can check if stdin is not of with the feof method like so:
if (feof(stdin))
{
// some code
exit(0);
}
See this for more details/
According to the man page, getline() will return -1 on failure to read a line (including the end-of-file condition).
This means:
When getline() encounters a failure it will return -1
When getline() reads CTRL+D it will return -1
Since getline() returns a value of type ssize_t which is defined as a signed integer value, you can construct your code in such a way to test for the -1 value which will be returned in the event of an end-of-file condition or failure to read a line.
#include <stdio.h>
#include <stdlib>
int main(void)
{
char *buffer = NULL;
size_t bufsize = 0;
ssize_t characters;
characters = getline(&buffer, &bufsize, stdin);
if (characters == -1)
{
printf("You entered CTRL+D\n");
}
free(buffer);
return (0);
}

Resources