How not to open a file twice in linux? - c

I have a linked list with an fd and a string I used to open this file in each entry. I want to open and add files to this list only if this file is not already opened, because I open and parse this files and do not want to do it twice. My idea was to compare the filename with every single name in this list, but my program do it multiple times and one file in Linux can have multiple names (soft/hard links). I think it should not be so complicated, because its easy for the OS to check, whether I already used a inode or not, r?
I already tried to open the same file with and without flock, but I always get a new fd.

When you successfully open a file use fstat on the file. Check to see if the st_ino and st_dev of the struct stat filed in by fstat have already been recorded in your linked list. If so then close the file descriptor and move on to the next file. Otherwise add the file descriptor, the file name and st_ino and st_dev values to the list.
You can instead use stat to check before opening the file, but using fstat after will be slightly faster if the usual case is that file hasn't already been opened.

In situations like this, it's often useful to consider your data structures. Change to a data structure which does not allow duplicates, such as a hash table.
Maintain a set of which data you've seen before. I've used a hash table for this set. As per #RossRidge's answer, use the inode and device as the key. This allows duplicates to be discovered in O(1) time.
Here is an example implementation.
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <glib.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
static int get_fd(GHashTable *fds, const char *filename, int mode) {
int fd;
struct stat stat;
int keysize = 33;
char key[keysize]; /* Two 64 bit numbers as hex and a separator */
/* Resolve any symlinks */
char *real_filename = realpath(filename, NULL);
if( real_filename == NULL ) {
printf("%s could not be resolved.\n", filename);
return -1;
}
/* Open and stat */
fd = open( real_filename, mode );
if( fd < 0 ) {
printf("Could not open %s: %s.\n", real_filename, strerror(errno));
return -1;
}
if( fstat(fd, &stat) != 0 ) {
printf("Could not stat %s: %s.\n", real_filename, strerror(errno));
return -1;
}
/* Make a key for tracking which data we've processed.
This uses both the inode and the device it's on.
It could be done more efficiently as a bit field.
*/
snprintf(key, keysize, "%lx|%lx", (long int)stat.st_ino, (long int)stat.st_dev);
/* See if we've already processed that */
if( g_hash_table_contains(fds, key) ) {
return 0;
}
else {
/* Note that we've processed it */
g_hash_table_add(fds, key);
return fd;
}
}
int main(int argc, char** argv) {
int mode = O_RDONLY;
int fd;
GHashTable *fds = g_hash_table_new(&g_str_hash, &g_str_equal);
for(int i = 1; i < argc; i++) {
char *filename = argv[i];
fd = get_fd(fds, filename, mode);
if( fd == 0 ) {
printf("%s has already been processed.\n", filename);
}
else if( fd < 0 ) {
printf("%s could not be processed.\n", filename);
}
else {
printf("%s: %d\n", filename, fd);
}
}
}
And here's a sample result.
$ touch one two three
$ ln one one_link
$ ln -s two two_sym
$ ./test one* two* three*
one: 3
one_link has already been processed.
two: 5
two_sym has already been processed.
three: 7

As long as you don't close the successfully and intentionally opened files, you can use nonblocking flock to prevent another lock on the same file:
#include <unistd.h>
#include <sys/file.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <assert.h>
int openAndLock(const char* fn){
int fd = -1;
if(((fd = open(fn, O_RDONLY)) >= 0) && (flock(fd, LOCK_EX|LOCK_NB) == 0)){
fprintf(stderr, "Successfully opened and locked %s\n", fn);
return fd;
}else{
fprintf(stderr, "Failed to open or lock %s\n", fn);
close(fd);
return -1;
}
}
int main(int argc, char** argv){
for(int i=1; i<argc; i++){
openAndLock(argv[i]);
}
return 0;
}
Example:
$ touch foo
$ ln foo bar
$ ./a.out foo foo
Successfully opened and locked foo
Failed to open or lock foo
$ ./a.out foo bar
Successfully opened and locked foo
Failed to open or lock bar

Related

program that checks if the files match

My program check if two files (with a different path / name) match or not. The hard and symbolic links between the files are followed to determine it.
How can I modify the program to see what happens if the files I want to compare are device files? Or directories? Both seem valid uses. Also checking the device
#include <unistd.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
void printUsage()
{
printf("Use: ./leg <name_file1> <name_file2>.\n");
}
int createStat(const char *file, struct stat *fileStat)
{
if (stat(file, fileStat) < 0)
{
fprintf(stderr,"Error! File %s does not exist.\n", file);
return 0;
}
return 1;
}
int main(int argc, char **argv)
{
if (argc < 3)
{
printf("Insufficient number of parameters.\n");
printUsage();
return -1;
}
struct stat fileStat1;
struct stat fileStat2;
if (createStat(argv[1], &fileStat1) == 0)
{
return -1;
}
if (createStat(argv[2], &fileStat2) == 0)
{
return -1;
}
if (S_ISREG(fileStat1.st_mode) && S_ISREG(fileStat2.st_mode)) {
if ((fileStat1.st_dev == fileStat2.st_dev) && (fileStat1.st_ino == fileStat2.st_ino)) {
printf("Files '%s' and '%s' coincide.\n", argv[1], argv[2]);
} else
printf("Files '%s' and '%s' do not match.\n", argv[1], argv[2]);
} else
fprintf(stderr,"'%s' and '%s' there are no files.\n", argv[1], argv[2]);
return 0;
}
For regular files and directories, comparing the st_dev and st_ino fields is sufficient.
For paths representing character or block devices, comparing the st_dev and st_ino fields will tell you if they are the same file, ie: different paths to the same directory entry, including symbolic link indirections. Comparing the st_rdev fields will tell if they represent the same device, which is also useful but a different thing.
Also note that fprintf(stderr,"Error! File %s does not exist.\n", file); may produce a misleading message: access failure may be caused by other reasons. It is simple and efficient to produce the correct message this way:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
int createStat(const char *file, struct stat *fileStat) {
if (stat(file, fileStat) < 0) {
fprintf(stderr, "Cannot stat file %s: %s\n", file, strerror(errno));
return 0;
}
return 1;
}

Is there a way to read a text file byte by byte asynchronously with POSIX threads?

I am trying to read and copy its contents to another file asynchronously with POSIX threads in C. Assuming a file contains "aabbcc" and i have 4 threads, how can i copy "aabbcc" to another file with threads asynchronously in C. This part has been stuck in my head for the entire day. What i have done so far is shown below.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <pthread.h>
#include <aio.h>
#include <math.h> //for ceil() and floor()
#include <sys/types.h>
#include <unistd.h>
#define FILE_SIZE 1024 //in bytes
//>cc code.c -o code.out -lrt -lpthread
//>./code.out
char alphabets[52] = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z',
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z'};
long prepareInputFile(char* filename)
{
FILE *fp;
fp = fopen(filename, "w");
if(fp == NULL)
{
printf("Cannot write to input file\n");
return;
}
int index;
char str[FILE_SIZE];
int rand_size = (rand() % 1024)+1;
for(index = 0;index < rand_size;index++) /*Generate the file with random sizes in bytes*/
{
int num2 = (rand() % 52); /*Get a random char in char array*/
putc(alphabets[num2],fp); /*Write that char to the file pointed to by fp*/
}
putc('\n',fp);
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
return size;
}
//Perform main operation inside this function
void *writeToFileAsync(void *src_file, void *dest_file,
void *thread, void *t_count, void *filesize)
{
int readfd, writefd;
struct aiocb aio_write, aio_read;
memset(&aio_read, 0, sizeof(aio_read));
aio_read.aio_fildes = readfd;
aio_read.aio_nbytes = (int)filesize/(int)t_count;
readfd = open((char *)src_file, O_RDONLY);
if(readfd < 0)
{
printf("Cannot open the file for reading\n");
}
memset(&aio_write, 0, sizeof(aio_write));
aio_read.aio_fildes = writefd;
aio_read.aio_nbytes = (int)filesize/(int)t_count;
writefd = open((void *)dest_file, O_CREAT | O_WRONLY);
if(writefd < 0)
{
printf("Cannot open the file for writing\n");
}
return;
}
int main(int argc, char *argv[])
{
int i,threadCount;
char sourcePath[100], destPath[100];
strcpy(sourcePath,argv[1]);
if(strcmp(sourcePath, "-") == 0)
{
getcwd(sourcePath, sizeof(sourcePath));
strcpy(sourcePath, strcat(sourcePath, "/source.txt"));
}
else
{
strcpy(sourcePath, strcat(sourcePath, "source.txt"));
}
printf("Source path is: %s\n", sourcePath);
strcpy(destPath,argv[2]);
if(strcmp(destPath, "-") == 0)
{
getcwd(destPath, sizeof(destPath));
strcpy(destPath, strcat(destPath, "/destination.txt"));
}
else
{
strcpy(destPath, strcat(destPath, "destination.txt"));
}
printf("Dest path is: %s\n", destPath);
threadCount = strtol(argv[3],NULL,10);
long file_size = prepareInputFile(sourcePath);
pthread_t threads[threadCount];
for(i=0;i<threadCount;i++)
{
pthread_create(&threads[i],NULL,(void *)writeToFileAsync, NULL);
}
return 0;
}
Any help would be appreciated.
It is unlikely that parallelizing this operation would help, as it is probably bound by I/O rather than CPU time, and copying this way will certainly not be faster than simply copying via system call.
However, if you wanted to do this, one method would be: map the input file into memory (with mmap() or the equivalent), create the destination buffer or memory-mapped file, divide the source and destination files into equal slices, and have each thread copy its slice of the file. You might use memcpy(), but a modern compiler can see what your loop is doing and optimize it.
Even this is not going to be as fast as reading or mapping the source file into a buffer, then writing it back out from the same buffer with write(). If all you need to do is copy the file to disk, you don’t need to copy the bytes at all. In fact, you might even be able to make a second link to the file on disk.
This would probably work best if the slices are aligned to page boundaries. Be very careful about having two threads write to the same cache line, as this creates a race condition.

Permission Denied When Trying to Append to File C

I am attempting to open files and append to them using C. I am dynamically naming the directory based on process Id and creating the filenames based on the "room" that has been randomly selected in the loop. My intention is to open the file, append the room name into the file, and then close the file and move to the next room and perform the same function. The issue I am having is with "open". It seems to only be returning -1, which indicates an error. The error message is stating "Permission denied". I am confused by this because I appear to be setting the proper permissions in the open function. I tried using fopen(), but that kept producing a segmentation fault 11. Is there an issue with my roomFilePath declaration and usage or is my usage of open incorrect? Here is the portion of the code that contains the issue. The makeRooms() function is where I check to see if the file was opened correctly. Thanks!
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#define NUM_ROOMS 10
#define NUM_USED_ROOMS 7
#define MAX_CONNECTIONS 6
time_t t;
char* usedRooms[NUM_USED_ROOMS];
int i;
char directoryName[100];
char* baseDirectory = "walterer.rooms.";
int processId;
char roomFilePath[75];
int adjacencyMatrix[7][7] = {0};
int useableConnections;
int e;
int totConnections = 0;
int openRoom;
int file_descriptor;
char* roomNames[] = {
"cleveland",
"columbus",
"dallas",
"toledo",
"miami",
"sarasota",
"boston",
"chicago",
"denver",
"phoenix"
};
int connections[10] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
};
void makeDirectory() {
processId = getpid();
sprintf(directoryName, "%s%d", baseDirectory,processId);
//printf("%s\n", directoryName);
mkdir(directoryName, 777);
}
void makeRooms() {
/* Initializes random number generator */
srand((unsigned) time(&t));
/* Create 7 rooms */
for(i = 0; i < NUM_USED_ROOMS; ){
/* Generate random number between 0 to 10 */
int randomNumber = rand() % NUM_ROOMS;
/* Loop as long the array does not contain any connections at the index */
while(connections[randomNumber] == 0) {
/* Append the room path to the end of my ONID path */
sprintf(roomFilePath,"%s/%s", directoryName, roomNames[randomNumber]);
printf("%s\n",roomFilePath);
/* Create file */
FILE *filePointer;
/* Open file to append*/
//filePointer = open(roomFilePath, O_WRONLY | O_CREAT, 0600);
//!!!Returning -1
file_descriptor = open(roomFilePath, O_APPEND, 0600);
printf("%d\n",file_descriptor);
if (file_descriptor == -1)
{
printf("open() failed on \"%s\"\n", roomFilePath);
perror("In createRooms()");
exit(1);
}
/*if (filePointer == NULL)
{
fprintf(stderr, "Error Creating File\n");
printf("something went wrong with read()! %s\n", strerror(errno));
}*/
/* Print the room name in the file */
/* SEG FAULT HERE!!!! */
fprintf(filePointer, "ROOM NAME: %s\n", roomNames[randomNumber]);
/* Close the file */
//fclose(filePointer);
usedRooms[i] = roomNames[randomNumber];
connections[randomNumber] = 1;
//printf("Room %d is %s \n", i+1, roomNames[randomNumber]);
i++;
}
}
}
First, you're missing part of the required flags argument to open() in this line:
file_descriptor = open(roomFilePath, O_APPEND, 0600);
The only open() flag you're passing is O_APPEND, but open() also requires at least one of the O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, or O_SEARCH flags. (the last two are rarely used.)
Your code should be something like
file_descriptor = open(roomFilePath, O_RDONLY | O_APPEND, 0600);
Per the POSIX standard for open():
SYNOPSIS
#include <sys/stat.h> #include <fcntl.h>
int open(const char *path, int oflag, ...);
...
Values for oflag are constructed by a bitwise-inclusive OR of flags
from the following list, defined in <fcntl.h>. Applications shall
specify exactly one of the first five values (file access modes) below
in the value of oflag:
O_EXEC
Open for execute only (non-directory files). The result is
unspecified if this flag is applied to a directory.
O_RDONLY
Open for reading only.
O_RDWR
Open for reading and writing. The result is undefined if this flag
is applied to a FIFO.
O_SEARCH
Open directory for search only. The result is unspecified if this
flag is applied to a non-directory file.
O_WRONLY
Open for writing only.
In your posted code, of course you get a segmentation fault at this line:
/* SEG FAULT HERE!!!! */
fprintf(filePointer, "ROOM NAME: %s\n", roomNames[randomNumber]);
The filePointer variable has not been initialized in your posted code, so using the value causes a segmentation fault.

How does ls sort filenames?

I'm trying to write a function that mimics the output of the ls command in Unix. I was originally trying to perform this using scandir and alphasort, and this did indeed print the files in the directory, and it did sort them, but for some reason, this sorted list does not seem to match the same "sorted list" of filenames that ls gives.
For example, if I have a directory that contains file.c, FILE.c, and ls.c.
ls displays them in the order: file.c FILE.c ls.c
But when I sort it using alphasort/scandir, it sorts them as: FILE.c file.c ls.c
How does ls sort the files in the directory such that it gives such a differently ordered result?
To emulate default ls -1 behaviour, make your program locale-aware by calling
setlocale(LC_ALL, "");
near the beginning of your main(), and use
count = scandir(dir, &array, my_filter, alphasort);
where my_filter() is a function that returns 0 for names that begin with a dot ., and 1 for all others. alphasort() is a POSIX function that uses the locale collation order, same order as strcoll().
The basic implementation is something along the lines of
#define _POSIX_C_SOURCE 200809L
#define _ATFILE_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <locale.h>
#include <string.h>
#include <dirent.h>
#include <stdio.h>
#include <errno.h>
static void my_print(const char *name, const struct stat *info)
{
/* TODO: Better output; use info too, for 'ls -l' -style output? */
printf("%s\n", name);
}
static int my_filter(const struct dirent *ent)
{
/* Skip entries that begin with '.' */
if (ent->d_name[0] == '.')
return 0;
/* Include all others */
return 1;
}
static int my_ls(const char *dir)
{
struct dirent **list = NULL;
struct stat info;
DIR *dirhandle;
int size, i, fd;
size = scandir(dir, &list, my_filter, alphasort);
if (size == -1) {
const int cause = errno;
/* Is dir not a directory, but a single entry perhaps? */
if (cause == ENOTDIR && lstat(dir, &info) == 0) {
my_print(dir, &info);
return 0;
}
/* Print out the original error and fail. */
fprintf(stderr, "%s: %s.\n", dir, strerror(cause));
return -1;
}
/* We need the directory handle for fstatat(). */
dirhandle = opendir(dir);
if (!dirhandle) {
/* Print a warning, but continue. */
fprintf(stderr, "%s: %s\n", dir, strerror(errno));
fd = AT_FDCWD;
} else {
fd = dirfd(dirhandle);
}
for (i = 0; i < size; i++) {
struct dirent *ent = list[i];
/* Try to get information on ent. If fails, clear the structure. */
if (fstatat(fd, ent->d_name, &info, AT_SYMLINK_NOFOLLOW) == -1) {
/* Print a warning about it. */
fprintf(stderr, "%s: %s.\n", ent->d_name, strerror(errno));
memset(&info, 0, sizeof info);
}
/* Describe 'ent'. */
my_print(ent->d_name, &info);
}
/* Release the directory handle. */
if (dirhandle)
closedir(dirhandle);
/* Discard list. */
for (i = 0; i < size; i++)
free(list[i]);
free(list);
return 0;
}
int main(int argc, char *argv[])
{
int arg;
setlocale(LC_ALL, "");
if (argc > 1) {
for (arg = 1; arg < argc; arg++) {
if (my_ls(argv[arg])) {
return EXIT_FAILURE;
}
}
} else {
if (my_ls(".")) {
return EXIT_FAILURE;
}
}
return EXIT_SUCCESS;
}
Note that I deliberately made this more complex than strictly needed for your purposes, because I did not want you to just copy and paste the code. It will be easier for you to compile, run, and investigate this program, then port the needed changes -- possibly just the one setlocale("", LC_ALL); line! -- to your own program, than try and explain to your teacher/lecturer/TA why the code looks like it was copied verbatim from somewhere else.
The above code works even for files specified on the command line (the cause == ENOTDIR part). It also uses a single function, my_print(const char *name, const struct stat *info) to print each directory entry; and to do that, it does call stat for each entry.
Instead of constructing a path to the directory entry and calling lstat(), my_ls() opens a directory handle, and uses fstatat(descriptor, name, struct stat *, AT_SYMLINK_NOFOLLOW) to gather the information in basically the same manner as lstat() would, but name being a relative path starting at the directory specified by descriptor (dirfd(handle), if handle is an open DIR *).
It is true that calling one of the stat functions for each directory entry is "slow" (especially if you do /bin/ls -1 style output). However, the output of ls is intended for human consumption; and very often piped through more or less to let the human view it at leisure. This is why I would personally do not think the "extra" stat() call (even when not really needed) is a problem here. Most human users I know of tend to use ls -l or (my favourite) ls -laF --color=auto anyway. (auto meaning ANSI colors are used only if standard output is a terminal; i.e. when isatty(fileno(stdout)) == 1.)
In other words, now that you have the ls -1 order, I would suggest you modify the output to be similar to ls -l (dash ell, not dash one). You only need to modify my_print() for that.
In alphanumeric (dictionary) order.
That changes with language, of course. Try:
$ LANG=C ls -1
FILE.c
file.c
ls.c
And:
$ LANG=en_US.utf8 ls -1
file.c
FILE.c
ls.c
That is related to the "collating order". Not a simple issue by any measure.

Applying fork() and pipe() (or fifo()) on counting words code

I've completed writing of counting words code finally. It counts total number of words in files. (i.e. txt). Now, I want to use multiple fork() to access and read every file. I studied in the last week. Besides, I use global variable to hold number of counted words. As far as I know, If I apply fork(), used global variables are assigned as 0. To avoid it, I tried to use mmap() and similar functions this is okey. But, I also want to use pipe() also (fifo() if it is possible) to communicate (hold values of numbers).
I use nftw() function to go in folders and files. My logic is on the below picture. How can use fork() and pipe() (fifo()) on this code ? fork() is really complicated for me because of my inexperience. I'm new using of pipe() and fork(). According to my idea logic of the code is that if I can use fork() and pipe(), there will be fork() every file(i.e. txt) and access them by using fork. If there is another folder and there are files, again creates fork() from one of created forks , then access file. I try to explain also drawing below. Thank you. I want to learn using of them.
int countInEveryFolder(const char *dir)
is used because I don't know how to count files until the next folder in nftw() function. Number of files is necessary because it is number of fork.
Every folder should be parent of files. The files are included by the folder.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
#include <ftw.h>
#include <ctype.h>
#include <sys/mman.h>
#include <locale.h>
#include <errno.h>
#define MAX_PATH_LEN 2048
unsigned long total_words = 0UL;
unsigned long total_dirs = 0UL;
unsigned long total_files = 0UL;
// Just proves counting number of file in a folder
int countInEveryFolder(const char *dir) {
struct stat stDirInfo;
struct dirent * stFiles;
DIR * stDirIn;
char szFullName[MAX_PATH_LEN];
char szDirectory[MAX_PATH_LEN];
struct stat stFileInfo;
int numOfFile = 0;
strncpy( szDirectory, dir, MAX_PATH_LEN - 1 );
if (lstat( szDirectory, &stDirInfo) < 0)
{
perror (szDirectory);
return 0;
}
if (!S_ISDIR(stDirInfo.st_mode))
return 0;
if ((stDirIn = opendir( szDirectory)) == NULL)
{
perror( szDirectory );
return 0;
}
while (( stFiles = readdir(stDirIn)) != NULL)
{
if (!strcmp(stFiles->d_name, ".") || !strcmp(stFiles->d_name, ".."))
continue;
sprintf(szFullName, "%s/%s", szDirectory, stFiles -> d_name );
if (lstat(szFullName, &stFileInfo) < 0)
perror ( szFullName );
/* is the file a directory? */
if (S_ISREG(stFileInfo.st_mode))
{
printf( "Filename: %s\n", szFullName );
numOfFile++;
}
} // end while
closedir(stDirIn);
return numOfFile;
}
// Count words in files.
unsigned long count_words_in_file(const char *const filename)
{
unsigned long count = 0UL;
int errnum = 0;
int c;
FILE *in;
in = fopen(filename, "rt");
if (in == NULL) {
errnum = errno;
fprintf(stderr, "%s: %s.\n", filename, strerror(errnum));
errno = errnum;
return 0UL;
}
/* Skip leading whitespace. */
do {
c = getc(in);
} while (isspace(c));
/* Token loop. */
while (c != EOF) {
/* This token is a word, if it starts with a letter. */
if (isalpha(c))
count++;
/* Skip the rest of this token. */
while (!isspace(c) && c != EOF)
c = getc(in);
/* Skip the trailing whitespace. */
while (isspace(c))
c = getc(in);
}
/* Paranoid checking for I/O errors. */
if (!feof(in) || ferror(in)) {
fclose(in);
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
} else
if (fclose(in)) {
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
}
errno = errnum;
return count;
}
// Recursively go in folders
int nftw_callback(const char *filepath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
{
// Directory
if (typeflag == FTW_DP || typeflag == FTW_D)
{
total_dirs++;
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
//countInEveryFolder(filepath);
}
// Folder
else if (typeflag == FTW_F)
{
total_files++;
total_words += count_words_in_file(filepath);
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
}
return 0;
}
/* Error message */
void err_sys(const char *msg)
{
perror(msg);
fflush(stdout);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
total_files = total_dirs = total_words = 0UL;
if (nftw(argv[1], nftw_callback, 15, FTW_PHYS) == 0) {
/* Success! */
printf("%s: %lu files, %lu directories, %lu words total.\n",
argv[1], total_files, total_dirs, total_words);
} else {
/* Failed... */
err_sys("ntfw");
}
putchar('\n');
//printf( "\nTotal words = %d\n\n", *wordCount);
//printf( "\nTotal folders = %d\n\n", *folderCount);
//printf( "\nTotal childs = %d\n\n", *childCount); //fork()
return 0;
}
To start I would write the program with two phases. A single-process phase in which all the file-paths are queued up (into a linked-list or dequeue), and a multi-process phase in which the worker processes receive work via their pipe() and send counts back to the main process via their pipe(). The main process would use select() to multiplex the input from its children.
Once you understand how to use select() with pipe()s, then work on having the filepath discovery be concurrent.
This design would be much easier to implement in Go, node.js, or greenlet with Python, but learning how to do it in C gives you a level of understanding for the underlying operations that you don't get with newer languages.

Resources