Related
There are already multiple answers to this question but none of them have been able to help me solve my problem. I am trying to understand IPC using an anonymous pipe in C.
From my understanding of pipes, they are a one way communication channel with one read end and one write end.
Assuming we have two c files one named parent.c and the other child.c. What I am trying to achieve is to be able to create 5 or more child processes. After this the parent and the child should communicate with the child processes through standard input and standard output, but since I want to be able to print what the parent receives from the child I'll instead tie the pipes to standard error output using dup2.
In summary
1. Run a parent program which spins up 5 or more child processes and runs them.
2. The child process waits for an input from the parent using scanf.
3. The parent sends a message to the child process.
4. The child process receives the message and sends a reply to the parent and exits.
5. The parent process prints the received message and prints it then exits.
parent.c
// Parentc
#include <stdio.h>
#include <stdlib.h>
#include <uinstd.h>
#include <sys/types.h>
#include <sys/wait.h>
int main(int argc, const char *argv[]){
// File descriptors for the pipes
int read_pipe[2]; // From child to parent
int write_pipe[2]; // From parent to child
pid_t process_id;
int exit_status;
// Try to fork 5 child processes
for(int i = 0; i < 5; i++){
if(pipe(write_pipe) == -1 || pipe(read_pipe) == -1){
perror("Pipe");
exit(1);
}
// Spin a child process
process_id = fork();
if(process_id == -1){
perror("Fork");
exit(1);
} else if(processId == 0) {
// The child process
// I don't know what to do here, The idea is to close the
// unneeded end of the pipes and wait for input from the parent
// process
// Start the ./child
execl("./child", "");
} else {
// The parent process
char recieved_data[1024];
// Send data to child since stderr is duplicated in the pipe
// It sends the pid of the child
fprintf(stderr, "Test data to %d ", process_id);
// Wait to recieve data from child
// Don't know how to do that
// Print the recieved data
printf("Parent recieved: \"%s\"\n", recieved_data);
wait(&exit_status); // Will wait till all children exit before exiting
}
}
return 0;
}
The child.c is a simple program as shown below
child.c
#include <stdio.h>
int main(int argc, const char *argv[]){
char data_buffer[1024];
// Wait for input from parent
scanf("%s", data_buffer);
// Send data back to parent
printf("Child process: %s", data_buffer);
return 0;
}
Expected output
$ ./parent
parent recived: "Child process: Test data to 12345"
parent recived: "Child process: Test data to 12346"
parent recived: "Child process: Test data to 12347"
parent recived: "Child process: Test data to 12348"
parent recived: "Child process: Test data to 12349"
Where 12345, 12346....12349 is the process id of the child process
Here you have a code i did, and i will use to explain to you:
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
int main() {
char buff[1024];
int aux, i, count;
int fds[2], fdss[2];
pipe(fds); //Here we initialize the file descriptors
pipe(fdss);
mode_t fd_mode = S_IRWXU;
for (i = 0; i < 3; i++) {
aux = fork();
if (aux == 0)
break;
}
switch (i) {
case 0:
printf("Write something:\n");
scanf("%s[^\n]", buff);
i = 0;
count = 0;
while(buff[i] != '\0') {
count++;
i++;
}
dup2(fds[1], 1);
close(fds[1]);
close(fds[0]);
close(fdss[0]);
close(fdss[1]);
write (1, buff, sizeof(buff));
break;
case 1:
dup2(fds[0], 0);
dup2(fdss[1], 1);
close(fds[0]);
close(fds[1]);
close(fdss[0]);
close(fdss[1]);
//
if (execl("/bin/grep", "grep", "example", NULL) == -1) {
printf("Error\n");
exit (1);
}
break;
case 2:
aux = open("result.txt", O_RDWR | O_CREAT , S_IRWXU);
dup2(fdss[0], 0);
dup2(aux, 1);
close(fds[0]);
close(fds[1]);
close(fdss[0]);
close(fdss[1]);
close(aux);
if (execl("/usr/bin/wc", "wc", "-l", NULL) == -1) {
printf("Error \n");
exit (1);
}
}
close(fds[0]);
close(fds[1]);
close(fdss[0]);
close(fdss[1]);
for (i = 0; i < 3; i++) wait(NULL);
return 0;
}
Ok, let's start:
We create and initialize pipes with pipe()
Then we write our code and before execl() we change the file descriptors, in order to pass the text we will write in the console, through processes and finally write in a file called result.txt the result of the "grep example" command applied to the text we have written.
The function dup2(new_descriptor, old_descriptor) is copying the new descriptor into the old descriptor and closes the old descriptor. For example:
Before dup2(fds[1], 1) we have:
0 STDIN
1 STDOUT
2 STDERR
After dup2(fds[1], 1) we have:
0 STDIN
1 fds[1]
2 STDERR
NOTE: If you don't want to use 1, yo can simply write STDOUT_FILENO
So now we are able to write through processes and in my example to a file too
I would like to ask you guys some help with C programming. Basically Im having issues with fork() system call.
Here's my question:
We have a Manager Process which has to create POP_SIZE Student processes. Manager Process and Student Processes itself cannot do anything else until all Student Processes have been created.
Every Student Process is identified by:
1) its identification number (6-digit integer)
2) grade obtained in specific exam (integer)
Here's the code I managed to write:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
#define POP_SIZE 10
int main(int argc, char *argv[]){
pid_t firstFork;
int *status;
int numStudents = 0;
pid_t managerChild, managerParent;
pid_t students[POP_SIZE];
int studentStatus[POP_SIZE];
switch(firstFork = fork()){
case -1:
perror("Something wrong with fork()\n");
break;
case 0:
managerChild = getpid();
printf("Manager Child Process %d started\n", managerChild);
printf("I have to create %d Student Processes\n", POP_SIZE);
for(int i = 0; i < POP_SIZE; i++){
switch(students[i] = fork()){
case -1:
perror("Something wrong with FORK in Manager Child Process\n");
break;
case 0:
printf("Created first Student Process PID: %d\n", getpid());
numStudents++;
break;
default:
printf("Haven't created all Student Processes\n");
waitpid(managerChild, status, WUNTRACED | WNOHANG);
printf("%d Student Processes succesfully created\n", numStudents);
break;
}
}
break;
default:
for(int i = 0; i < POP_SIZE; i++)
wait(NULL);
}
}
I'd need some help in understanding where to put wait(*status) or waitpid(pid, *status, __options) functions in my code in order to achieve my requirements specified above?
Moreover, how can I assign and keep storing of variables for every single process?
Thank you very much
Since you will be creating many child processes, it is best to start by creating a function that creates the child process, and has it execute a function specified by the caller. Let's assume both the ID number and grade are ints. Then,
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
/* Run func(id, grade) in a child process.
Returns the child process PID if success,
or -1 with errno set in case an error occurs.
*/
pid_t run_child(int id, int grade,
int (*func)(int id, int grade))
{
pid_t p;
p = fork();
if (p == -1) {
/* fork() failed; it set errno to indicate the error. */
return -1;
} else
if (!p) {
/* Run child process function. When it returns,
have the child exit with that exit status. */
exit(func(id, grade));
} else {
/* Parent process. p is positive. */
return p;
}
}
Note that the third parameter is a function pointer. We specify it using the function name. That function must take two int parameters (the ID and the grade, respectively), and return an int. For example:
/* Each child process runs this function.
*/
int child_process(int id, int grade)
{
printf("Child: id = %d, grade = %d, PID = %d.\n", id, grade, (int)getpid());
return EXIT_SUCCESS;
}
We can create a child process that runs that function using child_pid = run_child(123456, 5, child_process);. Note how the name of the function can be used to specify a function pointer. The standard C qsort() function uses the exact same mechanism to allow one to quicksort anything; the caller just needs to specify a function that can compare two elements in the array to be sorted.
We will be creating several children, and reaping them at once. That means it makes sense to write a function that reaps all child processes, essentially blocking until they all exit. We are likely interested in the exit statuses of at least some of them, so let's pass the interesting child processes PIDs, ints to save the status to, and the number of processes in those arrays, as parameters:
/* Reap all child processes.
If child_count > 0, child processes with PID in child_pid[]
will have child_pid[] negated when reaped, with exit status saved
in child_status.
The function returns the number of child processes reaped.
*/
size_t reap_children(pid_t *child_pid, int *child_status, size_t child_count)
{
size_t reaped = 0;
size_t i;
int status;
pid_t p;
while (1) {
/* Reap a child process, if any. */
p = wait(&status);
if (p == -1) {
/* errno == EINTR is not an error; it occurs when a
signal is delivered to a hander installed without
SA_RESTART flag. This will not occur in this program,
but it is good practice to handle that case gracefully. */
if (errno == EINTR)
continue;
/* errno set by wait(). */
return reaped;
}
/* Another child process was reaped. */
reaped++;
/* If the reaped child was one of the interesting ones,
negate its pid and save the exit status. */
for (i = 0; i < child_count; i++) {
if (child_pid[i] == p) {
child_pid[i] = -p;
child_status[i] = status;
break;
}
}
}
}
Note that p = wait(&status) reaps a child process. This means that if one or more child processes have already exited, it picks one of them, and returns its PID, with exit status saved to &status. If all child processes left are still running, the call will wait until at least one of them exits. If there are no more child processes, it returns -1 with errno set to ECHILD.
If signal handlers were used, wait() can also return -1 with errno set to EINTR, if a signal was delivered to a signal handler that was installed without the SA_RESTART flag with sigaction(). Many programmers forgo this check (because "it'll never happen"), but I do like to include that check because it is easy, and makes sure adding signal handling to my code won't bite me in the butt later on. I very often do, too. (Add signal handling, I mean.)
The reason we negate the pids when the respective child process is reaped, is simple: it allows us to easily detect which child processes were reaped. (POSIX says all process IDs are positive, and pid_t is a signed type. Negating a PID is a commonly used technique, too; just see e.g. waitpid().)
If we wanted to reap a specific child process, we'd use waitpid(). For example,
pid_t child, p; /* wait for 'child'. */
int status;
do {
p = waitpid(child, &status, 0);
if (p == -1) {
if (errno == EINTR)
continue;
break;
}
} while (p != child);
if (p == child) {
/* Reaped 'child', status in 'status'. */
} else {
/* Error: failed to reap 'child'. See 'strerror(errno)'. */
}
Do note that in POSIX/Unix terminology 'child process' refers to processes created by this process only; not "grandchildren", processes created by child processes.
I prefer to write my processes to take in parameters from the command line. If no parameters are specified, or -h or --help is specified, a short help ("usage") is displayed; this is extremely common in POSIX and Unix command-line tools, and therefore very intuitive.
The following main() takes one or more ID:grade as command-line parameters. For each one, it creates a child process, and has it run the child_process() function with the specified ID and grade. The main program will then reap them all, and describe the exit status of each child process.
int main(int argc, char *argv[])
{
pid_t child_pid[argc];
int child_status[argc];
int count, i, n, arg, id, grade, status;
char dummy;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s ID:GRADE [ ID:GRADE ]*\n", argv[0]);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
status = EXIT_SUCCESS;
count = 0;
for (arg = 1; arg < argc; arg++) {
if (sscanf(argv[arg], "%d:%d %c", &id, &grade, &dummy) == 2) {
child_pid[count] = run_child(id, grade, child_process);
if (child_pid[count] == -1) {
fprintf(stderr, "Cannot fork a child process: %s.\n", strerror(errno));
status = EXIT_FAILURE;
} else
count++;
} else {
fprintf(stderr, "%s: Not a valid ID:GRADE specification.\n", argv[arg]);
status = EXIT_FAILURE;
}
}
if (count < 0) {
fprintf(stderr, "No running child processes.\n");
return EXIT_FAILURE;
}
n = reap_children(child_pid, child_status, count);
printf("Reaped %d child processes.\n", n);
for (i = 0; i < count; i++) {
if (child_pid[i] < 0) {
printf("Child process %d (%d of %d)", (int)(-child_pid[i]), i + 1, count);
if (WIFEXITED(child_status[i])) {
if (WEXITSTATUS(child_status[i]) == EXIT_SUCCESS)
printf(" exited with success (EXIT_SUCCESS), %d.\n", EXIT_SUCCESS);
else
if (WEXITSTATUS(child_status[i]) == EXIT_FAILURE)
printf(" exited with failure (EXIT_FAILURE), %d.\n", EXIT_FAILURE);
else
printf(" exited with status %d.\n", WEXITSTATUS(child_status[i]));
} else
if (WIFSIGNALED(child_status[i])) {
printf(" died from signal %d.\n", WTERMSIG(child_status[i]));
} else {
printf(" died from unknown causes.\n");
}
} else {
printf("Child process %d (%d of %d) was lost!\n", (int)child_pid[i], i + 1, count);
}
}
return status;
}
If you save the above as example.c, you can compile it to example using e.g.
gcc -Wall -O2 example.c -o example
If you then run say
./example 100001:1 100002:5 100003:3 21532:4
the output will be something like
Child: id = 100002, grade = 5, PID = 1260.
Child: id = 100001, grade = 1, PID = 1259.
Child: id = 100003, grade = 3, PID = 1261.
Child: id = 21532, grade = 4, PID = 1262.
Reaped 4 child processes.
Child process 1259 (1 of 4) exited with success (EXIT_SUCCESS), 0.
Child process 1260 (2 of 4) exited with success (EXIT_SUCCESS), 0.
Child process 1261 (3 of 4) exited with success (EXIT_SUCCESS), 0.
Child process 1262 (4 of 4) exited with success (EXIT_SUCCESS), 0.
Note that the initial Child: lines can be in any order, because the child processes run essentially in parallel. Each child process runs as soon as it is started, so this example is not a copy-and-paste answer to OP's requirements.
If you want to experiment with complex process hierarchies, I recommend using Graphviz to visualize them. For example, dot-kids.c:
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
static void reap_all(void)
{
pid_t p;
int status;
while (1) {
p = wait(&status);
if (p == -1) {
if (errno == EINTR)
continue;
if (errno == ECHILD)
return;
fprintf(stderr, "Process %d: reap_all(): %s.\n", (int)getpid(), strerror(errno));
return;
}
printf(" \"%d\" -> \"%d\" [ color=\"#ff0000\" ];\n", (int)p, (int)getpid());
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == EXIT_SUCCESS)
printf(" \"%d\" [ label=\"%d\" ];\n", (int)p, (int)p);
else
printf(" \"%d\" [ label=\"%d (exit %d)\" ];\n", (int)p, (int)p, WEXITSTATUS(status));
} else
if (WIFSIGNALED(status))
printf(" \"%d\" [ label=\"%d (signal %d)\" ];\n", (int)p, (int)p, WTERMSIG(status));
else
printf(" \"%d\" [ label=\"%d (lost)\" ];\n", (int)p, (int)p);
fflush(stdout);
}
}
static pid_t run_child(int (*child)(int depth, int width), int depth, int width)
{
pid_t p;
fflush(stdout);
fflush(stderr);
p = fork();
if (p == -1) {
fprintf(stderr, "Process %d: Cannot fork: %s.\n", (int)getpid(), strerror(errno));
return -1;
} else
if (!p) {
exit(child(depth, width));
} else {
printf(" \"%d\" -> \"%d\" [ color=\"#0000ff\" ];\n", (int)getpid(), (int)p);
fflush(stdout);
return p;
}
}
int child(int depth, int width)
{
if (depth > 0) {
while (width > 0)
run_child(child, depth - 1, width--);
reap_all();
}
return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
int depth, width, i;
char dummy;
if (argc != 3 || !strcmp(argv[1], "-h") || !strcmp(argv[2], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s depth width | dot -Tx11\n", argv[0]);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
if (sscanf(argv[1], " %d %c", &depth, &dummy) != 1 || depth < 0) {
fprintf(stderr, "%s: Invalid depth.\n", argv[1]);
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %d %c", &width, &dummy) != 1 || width < 1) {
fprintf(stderr, "%s: Invalid width.\n", argv[2]);
return EXIT_FAILURE;
}
printf("digraph {\n");
printf(" \"%d\" [ shape=\"box\", label=\"%d\" ];\n", (int)getpid(), (int)getpid());
fflush(stdout);
for (i = 0; i < width; i++)
run_child(child, depth, width - 1);
reap_all();
printf("}\n");
return EXIT_SUCCESS;
}
Compile it using e.g.
gcc -Wall -O2 dot-kids.c -o dot-kids
and run using e.g.
./dot-kids 1 3 | dot -Tx11
to see a process graph similar to
where the numbers are process IDs, blue arrows show which process created which, and red arrows show which process reaped which.
I think there are some mistakes in your code. The output I get is something like:
5 Student Processes succesfully created
Haven't created all Student Processes
Haven't created all Student Processes
3 Student Processes succesfully created
4 Student Processes succesfully created
Created first Student Process PID: 11436
Created first Student Process PID: 11438
Created first Student Process PID: 11437
Haven't created all Student Processes
4 Student Processes succesfully created
Haven't created all Student Processes
3 Student Processes succesfully created
Created first Student Process PID: 11439
Haven't created all Student Processes
3 Student Processes succesfully created
Created first Student Process PID: 11440
Haven't created all Student Processes
3 Student Processes succesfully created
Created first Student Process PID: 11441
Haven't created all Student Processes
2 Student Processes succesfully created
Created first Student Process PID: 11442
Created first Student Process PID: 11443
You see there are too much children executing, so this should make you suspicious (paricularly note that sometimes the number for the student processes seems decreasing from print to print). The parent will continue executing the for loop. However the child continues executing from the point where the fork is called and being it inside a loop, it will fork too creating another child and so on and so on. To avoid that you need a break from the for loop for the children processes.
You can try something like the following. I added a variable jj that if <0 means it is a child process executing. Before next loop iteration the variable is checked and if <0 it breaks from the for loop.
It is not the most elegant solution but seems ok.
#include<stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
#define POP_SIZE 10
int main(int argc, char *argv[]){
pid_t firstFork;
int *status;
int numStudents = 0;
pid_t managerChild, managerParent;
pid_t students[POP_SIZE];
int studentStatus[POP_SIZE];
switch(firstFork = fork()){
case -1:
printf("Something wrong with fork()\n");
break;
case 0:
managerChild = getpid();
printf("Manager Child Process %d started\n", managerChild);
printf("I have to create %d Student Processes\n", POP_SIZE);
int jj = 0;
for(int i = 0; i < POP_SIZE; i++){
switch(students[i] = fork()){
case -1:
printf("Something wrong with FORK in Manager Child Process\n");
jj = -1;
break;
case 0:
printf("Created first Student Process PID: %d\n", getpid());
numStudents++;
jj = -1;
break;
default:
printf("Haven't created all Student Processes\n");
waitpid(managerChild, status, WUNTRACED | WNOHANG);
printf("%d Student Processes succesfully created\n", numStudents);
break;
}
if (jj<0) break;
}
break;
default:
for(int i = 0; i < POP_SIZE; i++)
wait(NULL);
}
}
I try to go in folders and read files using fork(). I use file tree walk function to go in the folders recursively. The basic idea is that there will be children number of files and directory in a directory. The children read each file seperately and concurrently. But, if there are directories the children will be parents of the children to read files.
static int soner_each_time(const char *filepath, const struct stat *info,
int typeflag, struct FTW *ftwinfo)
{
pid_t pid = 0;
char buf[BUFSIZE];
int status;
int i = 0;
/* The variables are related to functions of reading file */
int totalLines;
char arr[TOTALNUMBEROFLINES][BUFSIZE];
int retval;
const char *const filename = filepath + ftwinfo->base;
if (( pid = fork()) < 0) {
const int cause = errno;
fprintf(stderr, "Fork error: %s\n", strerror(cause));
errno = cause;
return -1;
}
else if( pid > 0 ) // parent
{
if (typeflag == FTW_DP || typeflag == FTW_D)
{
sprintf(buf, "%*s%s\n\n", ftwinfo->level * 4, "", filepath);
write(1, buf, strlen(buf));
pid = wait(&status);
if (pid == -1)
perror("Failed to wait for child");
else if (WIFEXITED(status) && !WEXITSTATUS(status))
printf("parent [%d] reaped child [%d]\n", getpid(), pid);
else if (WIFEXITED(status))
printf("Child %ld terminated with return status %d\n",
(long)pid, WEXITSTATUS(status));
else if (WIFSIGNALED(status))
printf("Child %ld terminated due to uncaught signal %d\n",
(long)pid, WTERMSIG(status));
else if (WIFSTOPPED(status))
printf("Child %ld stopped due to signal %d\n",
(long)pid, WSTOPSIG(status));
}
}
if (pid == 0) // child
{
if (typeflag == FTW_F)
{
sprintf(buf, "||| Child [%d] of parent [%d]: %s |||\n", getpid(), getppid(), filename);
write(1, buf, strlen(buf));
/* Both of them are about reading function */
totalLines = storeLinesInArray(filename, arr);
retval = for_each_file(filename, totalLines, key, arr);
sprintf(buf, "||| Child [%d] of parent [%d] is about to exit |||\n", getpid(), getppid());
write(1, buf, strlen(buf));
}
else if (typeflag == FTW_DP || typeflag == FTW_D)
{
sprintf(buf, "%*s%s\n\n", ftwinfo->level * 4, "", filepath);
write(1, buf, strlen(buf));
}
}
return 0;
}
FTW_DP and FTW_D indicates folders FTW_F indicates files. Basically, I tried in the code fork() every time. If it is parent, It waits for its children read the files. Because the function is recursive, it will fork every calling. But, there's something about it I cant't get it forks more than one for one file. For example there should be one child for 1a.txt but for this scheme it is 8. Forking subject is really difficult. I do everyday exercises and try to understand it. Your explanations and helps will improve my skill in that branch.
#edit: mcve code
#define _POSIX_C_SOURCE 200809L
#define _XOPEN_SOURCE 700
#include <unistd.h>
#include <dirent.h>
#include <stdlib.h>
#include <locale.h>
#include <string.h>
#include <ftw.h>
#include <stdio.h>
#define TOTALNUMBEROFLINES 1000
#define BUFSIZE 1000
void err_sys(const char *const str)
{
perror(str);
fflush(stdout);
exit(1);
}
int storeLinesInArray(const char *file, char lines[][BUFSIZE])
{
return 0;
}
static int for_each_file(const char *filepath, int totalLines, const char *key, const char arr[][BUFSIZE])
{
fprintf(stdout, "File name is = %s\n", filepath);
fflush(stdout);
return 0;
}
static int soner_each_time(const char *filepath, const struct stat *info,
int typeflag, struct FTW *ftwinfo)
{
pid_t pid = 0;
char buf[BUFSIZE];
int status;
/* The variables are related to functions of reading file */
int totalLines;
char arr[TOTALNUMBEROFLINES][BUFSIZE];
int retval;
const char *const filename = filepath + ftwinfo->base;
if (( pid = fork()) < 0) {
perror("failed fork");
exit(-1);
}
else if( pid > 0 ) // parent
{
if (typeflag == FTW_DP || typeflag == FTW_D)
{
sprintf(buf, "%*s%s\n\n", ftwinfo->level * 4, "", filepath);
write(1, buf, strlen(buf));
pid = wait(&status);
if (pid == -1)
perror("Failed to wait for child");
else if (WIFEXITED(status) && !WEXITSTATUS(status))
printf("parent [%d] reaped child [%d]\n", getpid(), pid);
else if (WIFEXITED(status))
printf("Child %ld terminated with return status %d\n",
(long)pid, WEXITSTATUS(status));
else if (WIFSIGNALED(status))
printf("Child %ld terminated due to uncaught signal %d\n",
(long)pid, WTERMSIG(status));
else if (WIFSTOPPED(status))
printf("Child %ld stopped due to signal %d\n",
(long)pid, WSTOPSIG(status));
}
}
if (pid == 0) // child
{
if (typeflag == FTW_F)
{
sprintf(buf, "||| Child [%d] of parent [%d]: %s |||\n", getpid(), getppid(), filename);
write(1, buf, strlen(buf));
/* Both of them are about reading function */
totalLines = storeLinesInArray(filename, arr);
retval = for_each_file(filename, totalLines, "not needed now", arr);
sprintf(buf, "||| Child [%d] of parent [%d] is about to exit |||\n", getpid(), getppid());
write(1, buf, strlen(buf));
}
else if (typeflag == FTW_DP || typeflag == FTW_D)
{
sprintf(buf, "%*s%s\n\n", ftwinfo->level * 4, "", filepath);
write(1, buf, strlen(buf));
}
}
return 0;
}
int main(int argc, char *argv[])
{
if (nftw("here is directory path", soner_each_time, 15, FTW_CHDIR)) {
fprintf(stderr, "Failed directory.\n");
exit(-1);
}
return 0;
}
You had a few bugs. The corrected code is below.
The child did not do an exit call, so it would continue with it's own nftw, so many files were being redundantly processed. I added the exit(0).
forks were being done so fast that the system would run out of free pids.
I've added three things to fix this:
A "reap" routine that loops on waitpid(0,&status,WNOHANG) to catch done children
Added a loop around the fork to catch the "out of slots" problem
Added a throttling mechanism to limit the number of active children to a sane/useful value
I've annotated the source to point out the places were the bugs were.
While not hard bugs, doing a fork for each file adds significant overhead. The disk bandwidth will saturate with about four active child threads, so using more just slows things down. Forking a child for the directory doesn't do much since the "meaty" processing is going to be for the file.
Anyway, here's the corrected code [please pardon the gratuitous style cleanup]:
#define _POSIX_C_SOURCE 200809L
#define _XOPEN_SOURCE 700
#include <unistd.h>
#include <dirent.h>
#include <stdlib.h>
#include <locale.h>
#include <string.h>
#include <errno.h>
#include <ftw.h>
#include <stdio.h>
#include <sys/wait.h>
#define TOTALNUMBEROFLINES 1000
#define BUFSIZE 1000
// output verbose/debug messages
int opt_v;
// limit of number of children that can be used at one time (if non-zero)
int opt_T;
int pendcnt; // number of active children
void
err_sys(const char *const str)
{
perror(str);
fflush(stdout);
exit(1);
}
int
storeLinesInArray(const char *file, char lines[][BUFSIZE])
{
return 0;
}
static int
for_each_file(const char *filepath, int totalLines, const char *key, const char arr[][BUFSIZE])
{
fprintf(stdout, "File name is = %s\n", filepath);
fflush(stdout);
return 0;
}
// reap_some -- reap a few processes
int
reap_some(int final)
{
pid_t pid;
int status;
int reapcnt;
reapcnt = 0;
// reap all completed children
while (1) {
pid = waitpid(0,&status,WNOHANG);
if (pid == 0)
break;
if (pid == -1) {
if (errno != ECHILD)
perror("Failed to wait for child");
break;
}
if (WIFSIGNALED(status)) {
printf("Child %ld terminated due to uncaught signal %d\n",
(long) pid, WTERMSIG(status));
++reapcnt;
continue;
}
if (WIFSTOPPED(status)) {
printf("Child %ld stopped due to signal %d\n",
(long) pid, WSTOPSIG(status));
continue;
}
if (WIFEXITED(status)) {
++reapcnt;
if (WEXITSTATUS(status) == 0) {
if (opt_v)
printf("parent [%d] reaped child [%d]\n", getpid(), pid);
}
else
printf("Child %ld terminated with return status %d\n",
(long) pid, WEXITSTATUS(status));
continue;
}
}
// bump down the number of children that are "in-flight"
pendcnt -= reapcnt;
return reapcnt;
}
static int
soner_each_time(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftwinfo)
{
pid_t pid = 0;
char *bp;
int lvl;
char buf[BUFSIZE];
/* The variables are related to functions of reading file */
int totalLines;
char arr[TOTALNUMBEROFLINES][BUFSIZE];
int retval;
const char *const filename = filepath + ftwinfo->base;
switch (typeflag) {
case FTW_DP:
case FTW_D:
bp = buf;
for (lvl = 0; lvl < ftwinfo->level; ++lvl)
bp += sprintf(bp," ");
bp += sprintf(bp, "%s\n\n",filepath);
write(1, buf, strlen(buf));
//reap_some(0);
break;
case FTW_F:
// BUGFIX:
// limit the number of in-flight children
// too many children serves no purpose -- they saturate the system
// resources and performance actually goes _down_ because the system
// spends more time doing context switches between them than the actual
// work. more than a few children to process files produces little
// benefit after the disk I/O is running at maximum
if (opt_T) {
while (pendcnt > opt_T)
reap_some(0);
}
// BUGFIX:
// without a throttle, we spawn children so fast we're going to get
// [many] failures here (i.e. we use up _all_ available pids)
while (1) {
pid = fork();
if (pid >= 0)
break;
reap_some(0);
}
// parent
// keep track of the child count
if (pid > 0) {
++pendcnt;
break;
}
// child
sprintf(buf, "||| Child [%d] of parent [%d]: %s |||\n",
getpid(), getppid(), filename);
if (opt_v)
write(1, buf, strlen(buf));
/* Both of them are about reading function */
totalLines = storeLinesInArray(filename, arr);
retval = for_each_file(filename, totalLines, "not needed now", arr);
sprintf(buf, "||| Child [%d] of parent [%d] is about to exit (RETVAL: %d) |||\n", getpid(), getppid(), retval);
if (opt_v)
write(1, buf, strlen(buf));
// BUGFIX:
// child won't exit without this -- causing multiple children to redo
// the same files (i.e. they would continue the nftw -- only parent
// should do that)
exit(0);
break;
}
return 0;
}
int
main(int argc, char **argv)
{
char *cp;
--argc;
++argv;
opt_T = 10;
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
case 'T': // throttle
cp += 2;
opt_T = (*cp != 0) ? atoi(cp) : 0;
break;
case 'v': // verbose messages
opt_v = 1;
break;
}
}
cp = *argv;
printf("opt_T=%d opt_v=%d -- %s\n",opt_T,opt_v,cp);
sleep(3);
printf("away we go ...\n");
if (nftw(cp, soner_each_time, 15, FTW_CHDIR)) {
fprintf(stderr, "Failed directory.\n");
exit(1);
}
// wait for all children to complete
while (pendcnt > 0)
reap_some(1);
return 0;
}
UPDATE:
Changed the code to do directory processing in the parent only (i.e. child is forked only for files). Fixed a bug. So, now, the -T throttle parameter works with a much lower value and can be the equivalent of "number of workers". Changed program to use a default throttle value.
UPDATE #2:
I said parent because there is only one parent. I wonder whether I may trace wrong.
No, you are correct. There is only one parent. That was by design.
I would like to make parent for each directory like explained in the first scheme.
Actually, you wouldn't/won't with a proper understanding of what's truly involved. Obi Wan Kenobi: "These are not the droids you're looking for"
There are a number of technical, performance, and system saturation issues with doing a recursive fork on each directory. The example I coded avoids all these with the best compromise for design and performance. It also allowed the master to "run ahead" of the children and keep children as busy as possible, regardless of the number of files/subdirs in a given directory.
Side note: I've got 40+ years experience and I've written a number of nftw equivalent programs. So, the following comes from all that.
What's the desired end result?
You've only got skeleton code, but what you actually do [intend to do] influences the architecture. Your ultimate program may be:
CPU bound [constantly waiting for CPU operations like multiplies, etc]
Memory bound [constantly waiting for reads from or writes to DRAM to complete]
I/O bound [constantly waiting for I/O operations to complete]
Also, do you want pre-order or post-order [like FTW_DEPTH] traversal? I presume pre-order
You can no longer use nftw.
You will need to do your equivalent using opendir/readdir/closedir [which is what nftw does].
What you need is a process that does a single level in the hierarchy. It's torture to get nftw to abort and start a new one to achieve that.
Below is some pseudo code for this.
But ... The implementation becomes more complex and will not provide better performance and may actually degrade performance. It may also cause unrelated programs to crash, such as Firefox, vlc, window managers, etc.
You'll now need interprocess communication and shared memory
With my example above, there was only one control process. To maintain throttling, only a simple increment/decrement of pendcnt was required.
When you add recursive forks for directories, now any subprocess forked for a directory has to increment/decrement the global copy of pendcnt in shared memory. It must use an interprocess semaphore to control access to that variable. Or, perhaps, some atomic increment/decrement primitives [ala C11 atomics].
Now, contention for that semaphore becomes a [delay] factor.
Performance:
Having more than a few active processes actually degrades performance. In other words, forking for the directory will actually run slower than a single process.
Beyond a few "worker" processes that do something with a file, the disk I/O bandwidth will be used up. You'll get no further benefit by adding more processes.
With many processes, they may actually interfere with one another. Consider that process A requests a disk read. But, so does process B. A's read completes inside the kernel, but before it can be returned to A, the kernel buffer for A's read has to be repurposed to fulfill B's read. A's read will have to be repeated.
This is what's known as [virtual memory] page "thrashing".
Locking up and crashing the system
As more and more disk I/O is done, more and more kernel buffers have to be used to contain the data. The kernel may have to evict page buffers to make room. Some of them may be for the unrelated programs mentioned above.
In other words, your program's many processes may monopolize the CPU, disk, and memory usage. Some programs like Firefox will timeout [and crash] because they see long delays that they wouldn't see otherwise and assume that something internal to them caused the delay.
I've run such an nftw program and seen Firefox say: "Killing locked up javascript script".
Worse yet, I've had vlc fall behind in timing and start skipping frames. This caused the window manager to get confused because it thought this was due to some logic error instead of just a very slow response system. The end result was that the window manager aborted and had to be manually restarted.
This can also slow down more critical programs and kernel daemons.
In certain cases, this could only be cleaned up by a system reboot.
Also, running many processes on a system you share with others will turn you into a "bad citizen", so be careful about consuming too many resources.
Anyway, here's the pseudo code:
// pseudo -- loose pseudo-code for non-nftw method
//
// NOTES:
// (1) pendcnt must now be a _shared_ memory variable (e.g. shmget, etc)
// (2) access must be locked by a shared memory semaphore
// (3) we must now have a list of our outstanding children
// (4) we can no longer do a blind waitpid(0,&status,WNOHANG) as we need to
// keep track of when our direct children complete
struct entfile {
struct dirent ent;
struct stat st;
};
// dodir -- enter/exit directory and perform all actions
void
dodir(const char *subdir)
{
// NOTE: you can create a wrapper struct for this that also has stat
struct entfile dirlist[1000];
// add subdir to directory stack ...
dirstack_push(subdir);
// enter directory
chdir(subdir);
// do whatever you'd like ...
process_directory(subdir);
// open directory
dirctl = opendir(".");
// pre-save all entries [skipping "." and ".."]
// this prevents too many open directory descriptors
// NOTE: we should stat(2) the file if d_type not supported
while (1) {
dirent = readdir(dirctl);
stat(dirent->d_name,&st);
add_to_dirent_list(dirlist,dirent,&st);
}
// close directory _before_ we process any entries
closedir(dirctl);
// process all file entries -- pre-order
for (ALL_IN_DIRLIST(ent,dirlist)) {
if (ent->ent.d_type == ISFILE)
doentry(ent);
}
wait_for_all_on_pendlist();
// process all directory entries -- pre-order
for (ALL_IN_DIRLIST(dirent,dirlist)) {
if (ent->ent.d_type == ISDIR)
doentry(ent);
}
wait_for_all_on_pendlist();
// remove directory from stack
dirstack_pop();
// exit directory
chdir("..")
}
// doentry -- process a directory entry
void
doentry(struct entfile *ent)
{
char *tail;
tail = ent->ent.d_name;
do {
// does throttle, etc.
pid = forkme();
// parent
// see notes above
if (pid) {
// NOTE: these semaphore waits can be costly
sem_wait();
++pendcnt;
sem_post();
add_pid_to_pendlist(pid,tail,...);
break;
}
// child
switch (ent->st.st.st_mode & ...) {
case ISFILE:
process_file(tail);
break;
case ISDIR:
dodir(tail);
break;
}
exit(0);
} while (0);
}
// wait for immediate children
void
wait_for_all_on_pendlist(void)
{
while (MORE_IN_PENDLIST) {
for (FORALL_IN_PENDLIST(tsk)) {
pid = waitpid(tsk->pid,&tsk->status,WNOHANG);
// check status like reap_some
if (pid > 0)
remove_pid_from_pendlist(tsk);
}
}
}
I am writing a program in which I need to create two child processes, a producer and a consumer. The producer writes on a file what is read from stdin, the consumer reads the same file after the producer has written the line. I need to synchronize the two processes and I wanted to do so by using signals, but I have now a problem in that I cannot send (using the kill() function) the signals from the consumer to the producer.
This is my program:
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
void catcherp(){};
void catcherc(){};
pid_t producer, consumer;
int main () {
int status_consumer, status_producer;
char string[128], reading[128];
FILE * fout, *finn;
producer = fork();
if (producer == 0){
signal(SIGUSR2, catcherp);
// producer process, child
while(1){
fout = fopen ("test.txt", "w");
printf ("?: ");
scanf ("%s", string);
fputs(string, fout);
fclose(fout);
kill(consumer, SIGUSR1);
pause();
}
exit(0);
} else {
// parent process
consumer = fork ();
if (consumer == 0) {
signal(SIGUSR1, catcherc);
// consumer process, child
while(1) {
pause();
finn = fopen ("test.txt", "r");
fgets(reading, 128, finn);
printf("%s\n", reading);
fclose(finn);
kill (producer, SIGUSR2);
}
exit(0);
} else {
printf("This is the parent process\n");
waitpid(producer, &status_producer, 0);
waitpid(consumer, &status_consumer, 0);
printf("The children exited\n");
}
}
return 0;
}
The exit(0) commands in both child processes are there because I still have to implement the exit condition for the loop. I am pretty sure that my problem lies in how I create the consumer process after creating the producer process. That means that the producer sees the "consumer" pid to be 0, which terminates the program.
Now, I would like to understand how I'm supposed to create two concurrent processes using the fork() function (if it's possible), can someone enlighten me?
In the end I managed to solve the problem, but I had to use the temporary file in order to get the pid of the producer process when inside the consumer process.
I was hoping to find a smarter way but the solution given by the course was basically the same.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
void catcher(){};
int main () {
int status_consumer, status_producer; // needed for the waitpid functions
pid_t producer, consumer; // pids of the child processes
char string[128]; // input string
FILE * f; // fp
signal (SIGUSR1, catcher);
consumer = fork();
if (consumer == 0) {
// child process
while (1) {
pause(); // wait for the ready signal from the sender
f = fopen ("tmp.txt", "r");
fscanf (f, "%d %s", &producer, string); // read string and the pid of the sender
printf("%s\n", string);
fclose(f);
if (strcmp("end", string) == 0) {
break; // exit the loop when the word "end" is read
}
kill (producer, SIGUSR1);
}
exit(0);
} else {
producer = fork ();
if (producer == 0) {
// child process
while(1) {
f = fopen ("tmp.txt", "w");
printf("?: ");
scanf("%s", string); // read from stdin the string
fprintf(f, "%d\t%s\n", getpid(), string); // write on tmp.txt the string
fclose(f);
kill(consumer, SIGUSR1);
if (strcmp("end", string) == 0){
break; // exit the loop when the word "end" is read
}
pause();
}
} else {
// parent process
waitpid(producer, &status_producer, 0);
waitpid(consumer, &status_consumer, 0);
}
}
return 0; // end of program
}
Thanks for the comments
I know I'm going to need to use fork(), but this just creates a single child process. Do i simply call fork again from within the child process? Also, I need them to communicate through a signal or pipe, which is easier to implement and what do i need to know for doing that (functions, etc..)
To create a second process, call fork() again - either within the parent or the child (but not both!). Which you choose depends on whether you want this process to be a child of the original parent or a child of the first child process (it is usual for it to be a child of the original parent).
Communicating through a pipe is much simpler and more reliable than using signals. pipe(), close(), read(), write() and select() are the key functions here.
For example, to have the parent create two child processes, you would do something like:
pid_t child_a, child_b;
child_a = fork();
if (child_a == 0) {
/* Child A code */
} else {
child_b = fork();
if (child_b == 0) {
/* Child B code */
} else {
/* Parent Code */
}
}
Another fancy code using && operator:
pid_t c1_pid, c2_pid;
(c1_pid = fork()) && (c2_pid = fork()); // Creates two children
if (c1_pid == 0) {
/* Child 1 code goes here */
} else if (c2_pid == 0) {
/* Child 2 code goes here */
} else {
/* Parent code goes here */
}
#include <stdio.h>
#include <unistd.h>
void main(){
int pi_d ;
int pid ;
pi_d = fork();
if(pi_d == 0){
printf("Child Process B:\npid :%d\nppid:%d\n",getpid(),getppid());
}
if(pi_d > 0){
pid = fork();
if(pid > 0){
printf("\nParent Process:\npid:%d\nppid :%d\n",getpid(),getppid());
}
else if(pid == 0){
printf("Child Process A:\npid :%d\nppid:%d\n",getpid(),getppid());
}
}
}
output :
Parent Process:
pid:3648
ppid :2379
Child Process B:
pid :3649
ppid:3648
Child Process A:
pid :3650
ppid:3648
You can put the fork in a loop and generate as many child processes as you need.
I did that on a project recently.
for(nSon=0; nSon < nSonsAsked; nSon++) {
Log_Print("Setup son #%.2u ", nSon+1);
if((pid = fork()) == 0) {
/* Do child stuff init, like connect the pipes, close shared handles */
return iTMInChild(...); /* A specific function of the child work */
/* The life of the child should not go beyond that point, i.e. the loop is over
or else the child will spawn even more processes. */
}
else if(pid > 0) {
/* Father process stuff. Here I initialise an array with the pid of the forked */
/* processes, this way I can index with the number of processes.*/
pid[nSon] = pid;
}
else
return Err_Print(ERR_FORK_FAILED, "fork failed. errno=%d \"%s\"\n", errno, strerror(errno));
}
Log_Print() and Err_Print() are internal functions but quite obvious so I let them like they are.
There is one aspect with the variables that has to be explained. nSon and nSonAsked should be declared as globals not as stack variables. This way, their value persists in the forked process. This means that the nSon variable will have a different value in each of the children. This allows it to have a simpler numbering scheme than the ownpid() number.
To get it completely right, there are a lot of details to get right. You will have to set signal handlers in the father process to detect the death of a child, likewise the other way round (only possible on Linux, other Unix (at least Solaris) do not support parent death signals).
You have to be aware that open file descriptors in the father process will be also open in the child after fork and it will be the same one. This opens a lot of concurrency problems if you're not aware of it (the solution is using dup() and close() in the right places).
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
int main()
{
system ("clear");
int i ;
pid_t childa,childb,childa1,childa2,childb1,childb2;
printf("\n \t \t I am the parent process with ID %d \n",getpid());
childa=fork();
if (childa == 0 )
{
printf("\nI am a child A with PID %d and my parent ID is %d\n",getpid(),getppid());
}
else
{
childb = fork();
if (childb == 0)
{
printf("\nI am Child B with ID %d and my parent ID is %d\n",getpid(),getppid());
}
else
{
sleep(1);
}
}
}
In this example they are just sleeping for a few random sec. It also has all the pid, so we can send SIGNAL to communicate... Most of the #includes are commented cause they were useless where I compiled.
#include <stdlib.h> // exit() ...
#include <stdio.h> // printf() ...
// Compile with -lrt -> cc file_name.c -lrt
//#include <fcntl.h>
//#include <sys/stat.h>
//#include <sys/types.h>
//#include <sys/wait.h> // may need this for wait()
//#include <time.h>
//#include <unistd.h> // and this one for fork()
// In the start function you can do whatever you want.
void start (const int azon) {
// For children processes
srand( time(NULL) );
unsigned t = rand()%5; // printf("%d\n", t);
sleep(t);
printf("%d. process reached the end.\n", azon);
exit(0);
}
int main() {
const int N = 5;
pid_t pids[N];
int i;
// The 'for' loop make 'N' process with 'fork()'.
// The children processes will call the start function.
// Since after fork() you will get 2 process. One Parent, and One Child
// The returning value from fork() is saved in "pids" which is an
// integer AND it is (<0) IF something went wrong.
// it is (>0) IF 'we are' in the Parent process,
// because this number is the Child process' ID (pid).
// and Last it is (==0) IF 'we are' in the Child process.
for (i = 0; i < N; i++) {
pids[i] = fork();
sleep(1);
if (pids[i] == 0) start(i+1); // ... OR you can make a switch(..)
}
// This 'for' loop in the wait(NULL) statement ONLY move on when a
// process ended, so it waits until 'N' proc ends.
for (i = 0; i < N; i++)
wait(NULL);
printf("Partent process reached the end\n");
return 0;
}
Just a little contribution, if you want to create 2 childs from the same parent you could use this code below. In which one father create 2 child processes (lazy and active).
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
int main (){
pid_t lazy_child;
lazy_child = fork();
if(lazy_child == 0){ // This is the lazy child process.
printf("LAZY CHILD:%d\n", getpid());
}
else if(lazy_child > 0){ // This is the father process.
pid_t active_child = fork();
if(active_child == 0){ // This is the active child process.
printf("ACTIVE CHILD:%d\n", getpid());
}
else if(active_child > 0){ // This is the father process.
printf("FATHER:%d\n", getpid());
}
else{ // Fork doesnt work.
printf("fork error\n");
exit(1);
}
}
else{ // Fork doesnt work.
printf("fork error\n");
exit(1);
}
return 0;
}
If you run this code, you should get a similar output:
$ ./a.out
FATHER:14501
ACTIVE CHILD:14503
LAZY CHILD:14502
#include <sys/wait.h>
#include <stdio.h>
#include <unistd.h>
int main()
{
pid_t AliceID, BobID;
double n=0;
int i1 =0;
/* fork a child process */
AliceID = fork();
if (AliceID < 0) { /* error occurred */
fprintf(stderr, "Fork Failed");
return 1;
}
else if (AliceID == 0) { /* child Alice code */
for(int i=1; i<11; i++)
{n = n+i;
i1++; }
double avg1 = n/i1;
printf("From Alice: the average of 1,2, …, 10 is the-average-she-calculated");
printf(" sum = %.2f and avg = %.2f \n",n, avg1);
}
else {
BobID = fork();
if (BobID == 0) { /* Child Bob code */
printf("From Bob: I am born to print this and then die.\n");
} else { /* Parent Code */
/* parent will wait for the child to complete */
wait(NULL);
printf("From parent: AliceID is %d \n", AliceID);
printf("From parent: Bob is %d \n", BobID);
printf("Parent ID %d \n", getpid());
}
}
return 0;
}