File pointer goes to end after read by child - c

Parent has opened a file to read, I fork two children to read from file and write on different files.
child 1 reads the first line, and child 2,reads nothing. When I do an ftell, it reaches the end.
Can anyone please explain this behaviour?
f[0] = fopen("input", "r");
for ( i = 1; i <= 2; i++ ){
if ((pid = fork()) != 0){
waitpid(pid);
}
else
{
snprintf ( buffer, 10, "output%d", i );
printf("opening file %s \n",buffer);
f[i] = fopen( buffer, "w");
fgets(buff2, 10, f[0]);
fprintf(f[i], "%s", buff2);
fclose(f[i]);
_exit(0);
}
}
fclose(f[0]);

Your problem is buffering. stdio reads files on fully buffered mode by default, which means a call to fgets(3) will actually read a huge block of characters from the file, buffer everything, and then return the first line, while leaving the rest in the buffer, in the perspective of being called again in the future (remember that stdio strives for minimizing the number of read(2) and write(2) syscalls). Note that stdio buffering is a user-space thing; all the kernel sees is a single process reading a huge block on that file, and so the cursor is updated accordingly.
Common block sizes are 4096 and 8192; your input file is probably smaller than that and so the first process that calls fgets(3) ends up reading the whole file, leaving the cursor in the end. Buffering is tricky.
What can you do? One solution I can think of is to disable buffering (since this is an input stream we're talking about, we can't use line buffered mode, because line buffering is meaningless for input streams). So if you disable buffering on the input stream before forking, everything will work. This is done with setvbuf(3).
Here's a working example:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
static FILE *f[3];
static char buffer[128];
static char buff2[128];
int main(void) {
pid_t pid;
int i;
if ((f[0] = fopen("input", "r")) == NULL) {
perror("Error opening input file");
exit(EXIT_FAILURE);
}
if (setvbuf(f[0], NULL, _IONBF, 0) < 0) {
perror("setvbuf(3) failed");
exit(EXIT_FAILURE);
}
for (i = 1; i <= 2; i++) {
if ((pid = fork()) < 0) {
perror("fork(2) failed");
exit(EXIT_FAILURE);
}
if (pid != 0) {
if (waitpid(pid, NULL, 0) < 0) {
perror("waitpid(2) failed");
exit(EXIT_FAILURE);
}
} else {
snprintf(buffer, sizeof(buffer), "output%d", i);
printf("opening file %s\n", buffer);
if ((f[i] = fopen(buffer, "w")) == NULL) {
perror("fopen(2) failed");
exit(EXIT_FAILURE);
}
errno = 0;
if (fgets(buff2, sizeof(buff2), f[0]) == NULL) {
if (errno != 0) {
perror("fgets(3) error");
exit(EXIT_FAILURE);
}
}
fprintf(f[i], "%s", buff2);
fclose(f[i]);
exit(EXIT_SUCCESS);
}
}
fclose(f[0]);
return 0;
}
Note that this may incur a significant performance hit. Your code will be making a lot more syscalls, and it might be too expensive for huge files, but it doesn't seem to be a problem since apparently you're dealing with relatively small input files.

Here's an extract of my fork() man page:
The child process has its own copy of the parent's descriptors. These descriptors reference the same underlying objects, so that, for instance, file pointers in file objects are shared between the child and the parent, so that an lseek(2) on a descriptor in the child process can affect a subsequent read or write by the parent. This descriptor copying is also used by the shell to establish standard input and output for newly created processes as well as to set up pipes.
So your behaviour is completely normal. If you want your child to have its own file descriptor, it should open its own file.
For example, you could do the following:
for ( i = 1; i <= 2; i++ )
{
if ((pid = fork()) != 0)
{
waitpid(pid);
}
else
{
f[0] = fopen("input", "r"); // New
snprintf ( buffer, 10, "output%d", i );
printf("opening file %s \n",buffer);
f[i] = fopen( buffer, "w");
fgets(buff2, 10, f[0]);
fprintf(f[i], "%s", buff2);
fclose(f[i]);
fclose(f[0]); //New
_exit(0);
}
}
Also, you should check for errors (almost all the functions in your else could fail with error).

Related

C program to convert to upper case using fork

I need to create a program that has a child process and a parent process. The child process has to convert lines sent by the parent proccess in to upper case, and the parent proccess has to send lines to the child proccess to convert, and show this converted lines by stdin. I already have this, but at the time when i execute on my terminal, the upper case lines doesn't show.
Any sugestion
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <string.h>
#include <ctype.h>
int main(void) {
int p1[2];
int p2[2];
pid_t pid;
char buffer[1024];
FILE *fp1;
FILE *fp2;
pipe(p1);
pipe(p2);
pid = fork();
if (pid < 0) {
fprintf(stderr, "Error fork\n");
exit(1);
}
if (pid == 0) {
// Close pipes entrances that aren't used
close(p1[1]);
close(p2[0]);
// Open file descriptors in used entrances
fp1 = fdopen(p1[0], "r");
fp2 = fdopen(p2[1], "w");
// Read from the corresponding file descriptor (pipe extreme)
while (fgets(buffer, 1024, fp1) != NULL) {
for (int i = 0; i < strlen(buffer); i++) {
buffer[i] = toupper(buffer[i]);
}
fputs(buffer, fp2);
}
// Once finished, close the reaming pipes entrances
fclose(fp1);
fclose(fp2);
exit(1);
}
// Close unused pipes entrances
close(p1[0]);
close(p2[1]);
// Open dile descriptors
fp1 = fdopen(p1[1], "w");
fp2 = fdopen(p2[0], "r");
while (fgets(buffer, 1024, stdin) != NULL) {
fputs(buffer, fp1); // Send buffer to write line pipe
fgets(buffer, 1024, fp2); // Get buffer readed from read line pipe
printf("%s", buffer); // Print in stdout the buffer
}
// Once finished, close the reaming pipes entrances
fclose(fp1);
fclose(fp2);
// Wait fork
wait(NULL);
return 0;
}
When using a FILE * stream and the C library stream API, it's important to keep in mind that I/O operations can be "buffered". In most cases, by default, when performing writes via fputs(...) the bytes will not actually be sent to the underlying file object (a pipe end in this case) until the buffer is flushed. In your code above, you could add calls to fflush(fpN) (where N matches the numbers in your code) after both calls to fputs(...). This should help fix your problem.
Note that alternatively there are ways of manually changing the buffering mode of a given file stream. This information can be found in man setbuf.

getline() is repeatedly reading the file, when fork() is used

I am developing a simple shell program, a command line interpreter and I wanted to read input from the file line by line, so I used getline() function. At the first time, the program works correctly, however, when it reaches the end of the file, instead of terminating, it starts to read a file from the start and it runs infinitely.
Here are some codes in main function that are related to getline():
int main(int argc,char *argv[]){
int const IN_SIZE = 255;
char *input = NULL;
size_t len = IN_SIZE;
// get file address
fileAdr = argv[2];
// open file
srcFile = fopen(fileAdr, "r");
if (srcFile == NULL) {
printf("No such file!\n");
exit(-1);
}
while (getline( &input, &len, srcFile) != -1) {
strtok(input, "\n");
printf("%s\n", input);
// some code that parses input, firstArgs == input
execSimpleCmd(firstArgs);
}
fclose(srcFile);
}
I am using fork() in my program and most probably it causes this problem.
void execSimpleCmd(char **cmdAndArgs) {
pid_t pid = fork();
if (pid < 0) {
// error
fprintf(stderr, "Fork Failed");
exit(-1);
} else if (pid == 0) {
// child process
if (execvp(cmdAndArgs[0], cmdAndArgs) < 0) {
printf("There is no such command!\n");
}
exit(0);
} else {
// parent process
wait(NULL);
return;
}
}
In addition, sometimes the program reads and prints a combinations of multiple lines. For example, if an input file as below:
ping
ww
ls
ls -l
pwd
it prints something like pwdg, pwdww, etc. How to fix it?
It appears that closing a FILE in some cases seeks the underlying file descriptor back to the position where the application actually read to, effectively undoing the effect of the read buffering. This matters, since the OS level file descriptors of the parent and the child point to the same file description, and the same file offset in particular.
The POSIX description of fclose() has this phrase:
[CX] [Option Start] If the file is not already at EOF, and the file is one capable of seeking, the file offset of the underlying open file description shall be set to the file position of the stream if the stream is the active handle to the underlying file description.
(Where CX means an extension to the ISO C standard, and exit() of course runs fclose() on all streams.)
I can reproduce the odd behavior with this program (on Debian 9.8):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
int main(int argc, char *argv[]){
FILE *f;
if ((f = fopen("testfile", "r")) == NULL) {
perror("fopen");
exit(1);
}
int right = 0;
if (argc > 1)
right = 1;
char *line = NULL;
size_t len = 0;
// first line
getline(&line, &len, f);
printf("%s", line);
pid_t p = fork();
if (p == -1) {
perror("fork");
} else if (p == 0) {
if (right)
_exit(0); // exit the child
else
exit(0); // wrong way to exit
} else {
wait(NULL); // parent
}
// rest of the lines
while (getline(&line, &len, f) > 0) {
printf("%s", line);
}
fclose(f);
}
Then:
$ printf 'a\nb\nc\n' > testfile
$ gcc -Wall -o getline getline.c
$ ./get
getline getline2
$ ./getline
a
b
c
b
c
Running it with strace -f ./getline clearly shows the child seeking the file descriptor back:
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f63794e0710) = 25117
strace: Process 25117 attached
[pid 25116] wait4(-1, <unfinished ...>
[pid 25117] lseek(3, -4, SEEK_CUR) = 2
[pid 25117] exit_group(1) = ?
(I didn't see the seek back with a code that didn't involve forking, but I don't know why.)
So, what happens is that the C library on the main program reads a block of data from the file, and the application prints the first line. After the fork, the child exits, and seeks the fd back to where the application level file pointer is. Then the parent continues, processes the rest of the read buffer, and when it's finished, it continues reading from the file. Because the file descriptor was seeked back, the lines starting from the second are again available.
In your case, the repeated fork() on every iteration seems to result in an infinite loop.
Using _exit() instead of exit() in the child fixes the problem in this case, since _exit() only exits the process, it doesn't do any housekeeping with the stdio buffers.
With _exit(), any output buffers are also not flushed, so you'll need to call fflush() manually on stdout and any other files you're writing to.
However, if you did this the other way around, with the child reading and buffering more than it processes, then it would be useful for the child to seek back the fd so that the parent could continue from where the child actually left.
Another solution would be not to mix stdio with fork().

C: redirecting stdin, stdout to pipes

I'm in the process of properly understanding pipes and FDs and I'm trying to program the following thing:
The program basically compresses stuff like gzip does with the options -cf.
The basic idea is:
I create two pipes in the parent process, then I fork it twice so that I'll have two children. In the first child, I redirect the first pipe's read end to stdin, and the second pipe's write end to stdout. Then I exec gzip with the -cf options so that it'll write to stdout (now the writing end of pipe2)
In the second child, I read from pipe2 and either output it directly or save it to a file.
The problem is, however, that no data arrives at the second child and I'm not really sure why. Here's the code:
int main(int argc, char **argv) {
char *file;
int out = 0;
if(argc == 2) {
file = argv[1];
out = 1;
} else if (argc > 2) {
exit(EXIT_FAILURE);
}
int c1pipe[2];
int c2pipe[2];
pipe(c1pipe);
pipe(c2pipe);
int f;
for(int i = 0; i < 2; i++) {
switch(f = fork()) {
case 0: //child
if(i == 0) { //first loop iteration, child 1
close(c1pipe[1]);
dup2(c1pipe[0], fileno(stdin));
close(c1pipe[0]);
close(c2pipe[0]);
dup2(c2pipe[1], fileno(stdout));
close(c2pipe[1]);
execlp("gzip", "gzip", "-cf", (char *) NULL);
} else if (i == 1) { //second loop iteration, child2
close(c1pipe[0]);
close(c1pipe[1]);
close(c2pipe[1]);
FILE *read = fdopen(c2pipe[0], "r");
char buffer[1024];
if(out == 0) { //output to stdout
while(fgets(buffer, 1024, read) != NULL) {
fprintf(stdout, "%s", buffer);
fflush(stdout);
}
} else { //write to specified file
FILE *writeto = fopen(file, "w");
while(fread(buffer, sizeof(char), strlen(buffer)+1, read) > 0) {
fwrite(buffer, sizeof(char), strlen(buffer)+1, writeto);
fflush(writeto);
}
fclose(writeto);
}
close(c2pipe[0]);
fclose(read);
}
break;
case -1: //err
//not implemented
break;
default: //parent
if(i == 0) {
close(c2pipe[0]);
close(c2pipe[1]);
close(c1pipe[0]);
FILE *writer;
writer = fdopen(c1pipe[1], "w");
char buffer[1024];
while(fgets(buffer, sizeof buffer, stdin) != NULL) {
fwrite(buffer, sizeof (char), strlen(buffer)+1, writer);
}
close(c1pipe[1]);
fclose(writer);
}
break;
}
}
return 0;
}
Please excuse the missing error handling as I wanted to create a quick-and-dirty version.
Any help is appreciated.
In the parent process, you are closing both ends of c2pipe before you have forked the second child.
You'd probably have figured this out already if you had put any error handling in on any of the read/write calls. In fact, if you checked for an error on the dup2 calls and then looked at errno, you probably would have found that it was EBADF (bad file descriptor).
Another issue is that your parent process exits before it knows that both child processes have finished. This means that the child processes will receive a signal and will themselves be terminated. The parent needs to call one of the variants of wait() to make sure both children have gone.

Q: Reading from pipe to screen

I'm trying to write a program that reads some text from a file and prints it to the screen. The parent will read the content of the file write it to n number of pipes and the children will read it and then print it.
So far this is what I've got:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
int main (void)
{
pid_t pid;
char c;
FILE *fd;
char buf[100];
int N_CHILDREN = 2;
int p[N_CHILDREN][2];
int i,j;
for(i=0; i<N_CHILDREN; i++)
{
pipe(p[i]);
}
fd=fopen("123.txt","r");
for(j=0; j < N_CHILDREN;j++)
{
pid = fork ();
if (pid == 0)
{
close (p[j][1]);
while(read(p[j][0], &fd,sizeof(buf)) > 0)
printf("\n%c",&fd);
}
if (pid < 0)
{
//Fork Failed
fprintf (stderr, "Fork failure.\n");
return EXIT_FAILURE;
}
if ( pid > 0) //Parent
{
close (p[j][0]);
write(p[j][1], fd ,sizeof(buf));
}
}
}
Problem is it's not really reading the content from the file. I've tried sending it a string of characters instead of reading from a file and it worked as intended, both children printed the message one time and the program ended.
Any thoughts about it? After reading the manuals I still can't see where the problem is.
You are confusing C Standard I/O streams (created with fopen(); written to with fprintf() et al., read with fscanf() et al.) with Unix file descriptor I/O (created with open() or pipe() et al., written to with write() et al., read with read() et al.)
Standard I/O functions take an opaque FILE * as a handle; Unix I/O functions take a file descriptor (a small int) as a handle.
Once you understand the conceptual difference, I'm sure you will realize that
FILE *fd = ...
read(..., &fd, ...);
is reading into a pointer-to-FILE -- not terribly useful :-)
Several problems here:
you make bad usage of read function by passing &fd, which is a FILE*. This function needs a pointer to the "buffer" to print, here I guess buf.
you don't check errors. For example if fopen fails.
you never read data from your file, so you have "nothing" to send to children.
you have to get returned value of read (in children) because it is the effective amount of data that you get. So it is the amount of data that you have to print after that (to stdout).
So here is an example code, see comments inside:
// put here all the needed includes (see manpages of functions)
// it is better to create a function for the child: the code
// is easier to read
// the child just get the file descriptor to read (the pipe)
void child(int fd) {
char buf[100]; // buffer to store data read
int ret; // the number of bytes that are read
// we read from 'fd', into 'buf'. It returns the number of bytes
// really read (could be smaller than size). Return <=0 when over
while((ret = read(fd, buf, sizeof(buf))) > 0) {
// write the 'ret' bytes to STDOUT (which as file descriptor 1)
write(1, buf, ret);
}
}
int main (void) {
pid_t pid;
char buf[100];
int N_CHILDREN = 2;
int p[N_CHILDREN][2];
int i,j, ret;
int fdi;
// create the pipes
for(i=0; i<N_CHILDREN; i++) {
if (pipe(p[i]) == -1) {
perror("pipe"); // ALWAYS check for errors
exit(1);
}
}
// open the file (with 'open' not 'fopen', more suitable for
// reading raw data
fdi = open("123.txt",O_RDONLY);
if (fdi < 0) {
perror("open"); // ALWAYS check for errors
exit(1);
}
// just spawn the children
for(j=0; j < N_CHILDREN;j++) {
pid = fork();
if (pid < 0) {
perror("fork"); // ALWAYS check for errors
exit(1);
}
if (pid == 0) { // child
close(p[j][1]); // close the writing part
child(p[j][0]); // call child function with corresp. FD
exit(0); // leave : the child should do nothing else
}
}
// don't need that part
for(j=0; j<N_CHILDREN; j++) {
close(p[j][0]); // close the read-part of pipes
}
// need to read file content, see comment in child() function
while ((ret = read(fdi, buf, sizeof(buf))) > 0) {
// write the data to all children
for(j=0; j<N_CHILDREN; j++) {
write(p[j][1], buf , ret); // we write the size we get
}
}
// close everithing
for(j=0; j<N_CHILDREN; j++) {
close(p[j][1]); // needed, see text after
}
close(fdi); // close read file
return(0); // main returns a int, 0 is "ok"
}
You have to close every parts of pipes when not needed or when it is over. Until a file descriptor is open a read will block the process. Only when last write counterpart is closed the read returns <=0.
Note: 1. the correct usage of read/write function 2. checking for errors 3. reading from the file and writing to the pipe(s) 4. dealing with effective amount of data read (ret variable) so that you can write (to "screen" or to an other file descriptor the right amount of data.
You're not reading anything in to buf as far as I can tell.

how to control popen stdin, stdout, stderr redirection?

I am confused about how popen() redirects stdin, stdout and stderr of the child process in unix. The man page on popen() is not very clear in this regard. The call
FILE *p = popen("/usr/bin/foo", "w");
forks a child process and executes a shell with arguments "-c", "/usr/bin/foo", and redirects stdin of this shell (which is redirected stdin of foo), stdout to p. But what happens with stderr? What is the general principle behind it?
I noticed that, if I open a file in foo (using fopen, socket, accept etc.), and the parent process has no stdout, it gets assigned the next available file number, which is 1 and so on. This delivers unexpected results from calls like fprintf(stderr, ...).
It can be avoided by writing
FILE *p = popen("/usr/bin/foo 2>/dev/null", "w");
in the parent program, but are their better ways?
popen(3) is just a library function, which relies on fork(2) and pipe(2) to do the real work.
However pipe(2) can only create unidirectional pipes. To send the child process input, and also capture the output, you need to open two pipes.
If you want to capture the stderr too, that's possible, but then you'll need three pipes, and a select loop to arbitrate reads between the stdout and stderr streams.
There's an example here for the two-pipe version.
simple idea: why not add "2>&1" to the command string to force the bash to redirect stderr to stdout (OK, writing to stdin still is not possible but at least we get stderr and stdout into our C program).
The return value from popen() is a normal standard I/O stream in all
respects save that it must be closed with pclose() rather than
fclose(3). Writing to such a stream writes to the standard input of
the command; the command's standard output is the same as that of the
process that called popen(), unless this is altered by the command
itself. Conversely, reading from a "popened" stream reads the
command's standard output, and the command's standard input is the
same as that of the process that called popen().
From its manpage, so it allows you to read the commands standard output or write into its standard input. It doesn't say anything about stderr. Thus that is not redirected.
If you provide "w", you will send your stuff to the stdin of the shell that is executed. Thus, doing
FILE * file = popen("/bin/cat", "w");
fwrite("hello", 5, file);
pclose(file);
Will make the shell execute /bin/cat, and pass it the string "hello" as its standard input stream. If you want to redirect, for example stderr to the file "foo" do this first, before you execute the code above:
FILE * error_file = fopen("foo", "w+");
if(error_file) {
dup2(fileno(error_file), 2);
fclose(error_file);
}
It will open the file, and duplicate its file-descriptor to 2, closing the original file descriptor afterwards.
Now, if you have your stdout closed in your parent, then if the child calls open it will get 1, since that's (if stdin is already opened) the next free file-descriptor. Only solution i see is to just use dup2 and duplicate something into that in the parent, like the above code. Note that if the child opens stdout, it will not make stdout open in the parent too. It stays closed there.
Check out popenRWE by Bart Trojanowski. Clean way to do all 3 pipes.
if you just want to get STDERR, try this:
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <malloc.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
/*
* Pointer to array allocated at run-time.
*/
static pid_t *childpid = NULL;
/*
* From our open_max(), {Prog openmax}.
*/
static int maxfd;
FILE *
mypopen(const char *cmdstring, const char *type)
{
int i;
int pfd[2];
pid_t pid;
FILE *fp;
/* only allow "r" "e" or "w" */
if ((type[0] != 'r' && type[0] != 'w' && type[0] != 'e') || type[1] != 0) {
errno = EINVAL; /* required by POSIX */
return(NULL);
}
if (childpid == NULL) { /* first time through */
/* allocate zeroed out array for child pids */
maxfd = 256;
if ((childpid = calloc(maxfd, sizeof(pid_t))) == NULL)
return(NULL);
}
if (pipe(pfd) < 0)
return(NULL); /* errno set by pipe() */
if ((pid = fork()) < 0) {
return(NULL); /* errno set by fork() */
} else if (pid == 0) { /* child */
if (*type == 'e') {
close(pfd[0]);
if (pfd[1] != STDERR_FILENO) {
dup2(pfd[1], STDERR_FILENO);
close(pfd[1]);
}
} else if (*type == 'r') {
close(pfd[0]);
if (pfd[1] != STDOUT_FILENO) {
dup2(pfd[1], STDOUT_FILENO);
close(pfd[1]);
}
} else {
close(pfd[1]);
if (pfd[0] != STDIN_FILENO) {
dup2(pfd[0], STDIN_FILENO);
close(pfd[0]);
}
}
/* close all descriptors in childpid[] */
for (i = 0; i < maxfd; i++)
if (childpid[i] > 0)
close(i);
execl("/bin/sh", "sh", "-c", cmdstring, (char *)0);
_exit(127);
}
/* parent continues... */
if (*type == 'e') {
close(pfd[1]);
if ((fp = fdopen(pfd[0], "r")) == NULL)
return(NULL);
} else if (*type == 'r') {
close(pfd[1]);
if ((fp = fdopen(pfd[0], type)) == NULL)
return(NULL);
} else {
close(pfd[0]);
if ((fp = fdopen(pfd[1], type)) == NULL)
return(NULL);
}
childpid[fileno(fp)] = pid; /* remember child pid for this fd */
return(fp);
}
int
mypclose(FILE *fp)
{
int fd, stat;
pid_t pid;
if (childpid == NULL) {
errno = EINVAL;
return(-1); /* popen() has never been called */
}
fd = fileno(fp);
if ((pid = childpid[fd]) == 0) {
errno = EINVAL;
return(-1); /* fp wasn't opened by popen() */
}
childpid[fd] = 0;
if (fclose(fp) == EOF)
return(-1);
while (waitpid(pid, &stat, 0) < 0)
if (errno != EINTR)
return(-1); /* error other than EINTR from waitpid() */
return(stat); /* return child's termination status */
}
int shellcmd(char *cmd){
FILE *fp;
char buf[1024];
fp = mypopen(cmd,"e");
if (fp==NULL) return -1;
while(fgets(buf,1024,fp)!=NULL)
{
printf("shellcmd:%s", buf);
}
pclose(fp);
return 0;
}
int main()
{
shellcmd("ls kangear");
}
and you will get this:
shellcmd:ls: cannot access kangear: No such file or directory

Resources