C: reading a file line by line - c

Input files have one entry per line and be of the form T S where T is the arrival time and S is the sector to be read.
I have a file.txt where it looks like
1 6
2 7
3 8
Would indicate three disk accesses for sectors 6, 7, and 8 arriving at time 1 2 and 3 respectively.
how do i parse it so that the first number goes to T and the second goes to S?

There are many ways to accomplish this task.
The scanf() (or perhaps fscanf()) method mentioned by pmg is perhaps most commonly taught to new programmers by academia.
The readLine() method, referred to by doniyor is an alternate approach which reads one character at a time to assemble a complete line, which then can be parsed as a string.
You might even try using fgets() to read an entire line, and then parse the numbers out of the line buffer.
The above list of methods is by no means complete. There are many other ways to accomplish the task.
Of course, you have probably have a full working understanding of these common methods, and you are looking for something with some zing! Perhaps the following will help you on your way:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int main(
int I__argC,
char *I__argV[]
)
{
int rCode=0;
struct stat statBuf;
FILE *fp = NULL;
char *fileBuf = NULL;
size_t fileBufLength;
char *cp;
/* Verify that caller has supplied I__argV[1], which should be the path
* to the datafile.
*/
if(I__argC != 2)
{
rCode=EINVAL;
fprintf(stderr, "Usage: %s {data file path}\n", I__argV[0]);
goto CLEANUP;
}
/* Get the size of the file (in bytes);
* (assuming that we are not dealing with a sparse file, etc...)
*/
errno=0;
if((-1) == stat(I__argV[1], &statBuf))
{
rCode=errno;
fprintf(stderr, "stat(\"%s\", ...) failed. errno[%d]\n", I__argV[1], errno);
goto CLEANUP;
}
/* Open the caller-specified file in read mode. */
errno = 0;
fp=fopen(I__argV[1], "r");
if(NULL == fp)
{
rCode=errno;
fprintf(stderr, "fopen(\"%s\", \"r\") failed. errno[%d]\n", I__argV[1], errno);
goto CLEANUP;
}
/* Allocate a buffer large enough to hold the entire file content. */
fileBuf=malloc(statBuf.st_size);
if(NULL == fileBuf)
{
rCode=ENOMEM;
fprintf(stderr, "malloc(%zd) failed.\n", statBuf.st_size);
goto CLEANUP;
}
/* Read the file into the fileBuf. */
errno=0;
fileBufLength=fread(fileBuf, 1, statBuf.st_size, fp);
if(fileBufLength != statBuf.st_size)
{
rCode=errno;
fprintf(stderr, "fread() failed to read %zd file bytes. errno[%d]\n",
statBuf.st_size - fileBufLength, errno);
goto CLEANUP;
}
/* Parse the fileBuf for specific data. */
for(cp=fileBuf; cp < fileBuf + fileBufLength; ++cp)
{
long arrivalTime;
long sector;
/* Skip leading white-space (if any) */
if(isspace(*cp))
continue;
/* Parse the 'arrival time'. */
arrivalTime=strtol(cp, &cp, 10);
/* Skip leading white-space (if any) */
while(isspace(*cp))
++cp;
/* Parse the 'sector'. */
sector=strtol(cp, &cp, 10);
printf("%ld, %ld\n", arrivalTime, sector);
}
CLEANUP:
/* Free the fileBuf (if it was successfully allocated) */
if(fileBuf)
free(fileBuf);
/* Close the file (if it was successfully opened) */
if(fp)
fclose(fp);
return(rCode);
}

Related

Segfault in getline

The following code is supposed to read the file "rules.txt" and write it to a device line by line.
The flow should be:
Read line from rules.txt
Echo it to device
The following code always end in segfault because of readline and I have no idea why:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#include <ctype.h>
#include <termios.h>
#include <sys/types.h>
#include <sys/mman.h>
#define BUFFER_LENGTH 256
int main()
{
char *line;
size_t len = BUFFER_LENGTH;
int fd = open("./rules.txt", O_RDONLY);
if(fd == -1)
{ perror("open failed"); return 0; }
FILE* fout = fopen("/sys/class/Rule_Table_Class/Rule_Table_Class_Rule_Table_Device/sysfs_att", "w+");
if(fout == NULL)
{ close(fd); perror("fopen failed, log.txt is busy!"); return 0; }
while (1)
{
line = (char*) malloc(len*sizeof(char));
if(line==NULL){
perror("malloc failed!"); return 0;
}
int bytesRead = getline(&line, &len, fd);
if (bytesRead == -1)
{
perror("Failed to read the message from the device.");
return errno;
}
sprintf(line,"%s","lala");
printf("line = %s", line);
}
fclose(fout);
close(fd);
return 0;
}
EDIT:
I corrected the code and still get a segfault. Here is the corrected code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#include <ctype.h>
#include <termios.h>
#include <sys/types.h>
#include <sys/mman.h>
#define BUFFER_LENGTH 256
int main()
{
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
fp = fopen("./rules.txt", "r");
if(fp == NULL)
{ perror("open failed"); return 0; }
FILE* fout = fopen("/sys/class/Rule_Table_Class/Rule_Table_Class_Rule_Table_Device/sysfs_att", "w+");
if(fout == NULL)
{ perror("fopen failed!"); return 0; }
while (1)
{
ssize_t bytesRead = getline(&line, &len, fp);
if (bytesRead == -1)
{
return 0;
}
printf("line = %s", line);
fprintf(line,"%s",fout);
}
fclose(fout);
fclose(fp);
return 0;
}
First of all, the proper core getline() loop is
/* FILE *in = fopen(..., "r"); */
char *line = NULL;
size_t size = 0;
while (1) {
ssize_t len = getline(&line, &size, in);
if (len < 0)
break;
/* You have 'len' chars at 'line', with line[len] == '\0'. */
}
so, it is not getline() that causes the segfault, it is your fprintf(line, "%s", fout); that should be either fprintf(fout, "%s", line); or just fputs(line, fout);, or fwrite(line, 1, bytesRead, fout); since the line can contain embedded NUL bytes that fprintf() and fputs() consider an end of string mark.
If we modify the code so that the source and target file names are taken as command-line arguments (with - denoting standard input or standard output), this is what I'd personally like to see:
/* SPDX-License-Identifier: CC0-1.0 */
/* This tells the GNU C library to expose POSIX features, including getline(). */
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
int main(int argc, char *argv[])
{
if (argc != 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *self = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", self);
fprintf(stderr, " %s SOURCE TARGET\n", self);
fprintf(stderr, "\n");
fprintf(stderr, "This copies all content from SOURCE to TARGET, line by line.\n");
fprintf(stderr, "Use '-' for standard input source or standard output target.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
const char *srcpath = argv[1];
const char *dstpath = argv[2];
/* If the path is "-", set it to NULL. */
if (srcpath && !strcmp(srcpath, "-"))
srcpath = NULL;
if (dstpath && !strcmp(dstpath, "-"))
dstpath = NULL;
FILE *src, *dst;
/* Open source for reading. If srcpath is NULL, use stdin. */
if (srcpath) {
src = fopen(srcpath, "r");
if (!src) {
fprintf(stderr, "%s: %s.\n", srcpath, strerror(errno));
return EXIT_FAILURE;
}
} else {
src = stdin;
}
/* Open target for writing. If dstpath is NULL, use stdout. */
if (dstpath) {
dst = fopen(dstpath, "w");
if (!dst) {
fprintf(stderr, "%s: %s.\n", dstpath, strerror(errno));
fclose(src);
return EXIT_FAILURE;
}
} else {
dst = stdout;
}
char *line = NULL;
size_t size = 0;
unsigned long linenum = 0;
while (1) {
ssize_t len = getline(&line, &size, src);
if (len < 0)
break;
linenum++;
if (fwrite(line, 1, len, dst) != (size_t)len) {
fprintf(stderr, "%s: Write error on line %lu.\n", dstpath ? dstpath : "(standard output)", linenum);
fclose(dst);
fclose(src);
return EXIT_FAILURE;
}
}
/* Technically, we don't need to release dynamically allocated (non-shared) memory,
because we're just about to exit, and the OS will automagically do that for us.
We can do this at any point we want during the loop, too. */
free(line);
line = NULL;
size = 0;
/* We do not know why getline() returned -1. Check if an error occurred, and whether we're at end of input. */
if (ferror(src) || !feof(src)) {
fprintf(stderr, "%s: Read error on line %lu.\n", srcpath ? srcpath : "(standard input)", linenum);
fclose(dst);
fclose(src);
return EXIT_FAILURE;
}
/* Check if any write errors have occurred. */
if (ferror(dst)) {
fprintf(stderr, "%s: Write error on line %lu.\n", dstpath ? dstpath : "(standard output)", linenum);
fclose(dst);
fclose(src);
}
/* Read errors should not occur at close time, but it costs very little for us to test anyway. */
if (fclose(src)) {
fprintf(stderr, "%s: Read error on line %lu.\n", srcpath ? srcpath : "(standard input)", linenum);
fclose(dst);
return EXIT_FAILURE;
}
/* Write errors can occur at close time, if the output has been buffered, or the target is on
a remote filesystem. Again, it costs us very little to check. */
if (fclose(dst)) {
fprintf(stderr, "%s: Write error on line %lu.\n", dstpath ? dstpath : "(standard output)", linenum);
return EXIT_FAILURE;
}
/* No errors; target is an identical copy of the source file. */
fprintf(stderr, "%lu lines copied successfully.\n", linenum);
return EXIT_SUCCESS;
}
The SPDX-License-Identifier is a common way to indicate the license of the code. I use CC0-1.0, which basically means "use as you wish, just don't blame the author for any issues: no guarantees, no warranties."
The #define _POSIX_C_SOURCE 200809L tells the GNU C library that we want it to expose POSIX.1-2008 features, which include getline(). If you look at man 3 getline, you'll see in the Synopsis section that glibc 2.10 and later require _POSIX_C_SOURCE to be defined to at least 200809L.
Most of the code is to print the usage, and getting the source and target file names from the command line, and handling - as a special name, "standard stream", so that it can be used even in pipes by specifying - as the input and/or output file name.
The getline() loop uses fwrite() to write the line to the target file. This way, if the input contains embedded NUL bytes (\0), the target will still be identical to the source file.
After the loop, we discard the line buffer, although since the program is just about to exit, we could omit that (since the OS will release all dynamically allocated (non-shared) memory when we exit anyway).
I like code that checks with ferror(src) || !feof(src) if an error occurred in src or src did not reach end of input; and that checks the return value of fclose(), in case a delayed (write) error is reported. Granted, fclose() should never fail for a read-only file, and fclose() should only fail for files we've written to in specific circumstances, but it costs very little to check, and this way the user running the program will be told if the program detected loss of data.
I believe it is morally reprehensible to ignore checking for such errors (especially "because they occur so rarely"), since such tests are the only way the human user can know whether the operation was successful, or if some odd problem occurred. It is up to the human to investigate any problems, and up to our programs to report detectable issues.
The getline function takes a FILE * as a parameter, not an int. Replace the following line:
int fd = open("./rules.txt", O_RDONLY);
By;
FILE *fin = fopen("./rules.txt", "r");
Fix the error checking in the following lines accordingly, like you did with fout.
Then replace the line:
int bytesRead = getline(&line, &len, fd);
Instead it should now use fin:
ssize_t bytesRead = getline(&line, &len, fin);
Note that getline returns ssize_t, not int.
You are also never writing to fout, but I guess you're still working on this code.
Make sure to enable compiler warnings, because your compiler would surely have warned you for using an int parameter where a FILE * was expected.

Error message while reading input file - ‘feof’ makes pointer from integer without a cast

I am writing a C program in Ubuntu operating system. I provide an input text file containing string of characters.The program is required to read and manipulate ONE CHARACTER at time and print the whole content of input file in reverse.
When I compile program, I get following error:
reverseFileContent.c: In function ‘main’:
reverseFileContent.c:51:16: warning: passing argument 1 of ‘feof’ makes pointer from integer without a cast [-Wint-conversion]
while(!feof(inputFile))
Can you please explain the error message to me.
Below is the program:
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define BUFFER_SIZE 256
int main (int argc, char *argv[]){
FILE *inputFile, *outputFile;
int fileSize;
int pointer;
char buffer[BUFFER_SIZE];
/* Check for correct user's inputs. */
if( argc !=3 ) {
fprintf(stderr, "USAGE: %s inputFile outputFile.\n", argv[0]);
exit(-1);
}
/* Make sure input file exists. */
if( (inputFile = fopen(argv[1], O_RDONLY))) {
fprintf(stderr, "Input file doesn't exist.\n");
exit(-1);
}
/* Create output file, if it doesn't exist. Empty the file, if it exists. */
if((outputFile = fopen(argv[2], "a+"))) {
fclose(inputFile);
exit(-1);
}
/* Find the size of the input file. */
fileSize = fseek(inputFile, 0, SEEK_END);
/* Read input file and write to output file in reversed order.
* Use lseek() to move the file pointer to the ith position.
* To set the file pointer to a position use the SEEK_SET flag in lseek().
*/
for(pointer=fileSize-1; pointer>=0; pointer--) {
/* INSERT YOUR CODE HERE */
/*
* In a loop:
* - Read input file into a buffer
* Don't read one byte at a time! Try to read BUFFER_SIZE (i.e., 256) bytes at a time.
* - Write content in the buffer to the output file
*/
while(!feof(inputFile))
{
fgets(buffer, BUFFER_SIZE, inputFile); //reads 256 bytes at a time
fputs (buffer , outputFile );
}
}
fclose(inputFile);
fclose(outputFile);
return(0);
}

How to change STDIN stream to binary

I'm making an upload form via a CGI interface. I'm writing it in C and don't want to use any outside libraries (ie. cgic).
I thought the program was complete, as the first test files uploaded correctly. But they were ASCII files. When I tested with a binary file (JPG). It seems that STDIN is trying to read the binary data as ASCII which creates a problem for characters like \0 which is present at the end of an ASCII file, but is a common character in binary files. The results of uploading a 1.9MB file end up with a 38kB file.
When searching how to change the STDIN stream to binary, I was referred to the command freopen and told to use NULL as the argument for the file. example 1
It says:
If filename is a null pointer, the freopen() function shall attempt to
change the mode of the stream to that specified by mode, as if the
name of the file currently associated with the stream had been used.
In this case, the file descriptor associated with the stream need not
be closed if the call to freopen() succeeds. It is
implementation-defined which changes of mode are permitted (if any),
and under what circumstances.
But when I check the man page on my system with man 3 freopen, it doesn't say any of
this at all. Furthermore, reading the man page, I find out the the
option for binary (adding 'b' to the mode) is no longer recognized and
only exists for archaic compliancy:
The mode string can also include
the letter 'b' either as a last character or as a character between
the characters in any of the two-character strings described above.
This is strictly for compatibility with C89 and has no effect; the 'b'
is ignored on all POSIX conforming systems, including Linux.
So right now I'm completely lost. How can I change the STDIN stream to read binary input?
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
// Declare constants.
#define BUF_SIZE 4096
#define FILENAME_SIZE 500
#define MARKER_SIZE 100
#define RETURN_FAILURE 0
#define RETURN_SUCCESS 1
#define SEARCH_STRING_1 "filename=\""
#define SEARCH_STRING_2 "\r\n\r\n"
// Declare global variables.
char filename[FILENAME_SIZE + 1];
char *program_name;
// Declare function prototype.
void print_footer (void);
void print_header (void);
void process_input (char *data);
int main (int argc, char *argv[])
{
// Declare variables.
long long ret;
char buf[BUF_SIZE + 1];
// Get program name for error reporting.
program_name = basename(argv[0]);
// Prepare output for browser.
print_header();
// Protect variable against buffer overflow.
buf[BUF_SIZE] = '\0';
// Loop through all the file data.
while(1)
{
// Read in the next block of data.
if((ret = (long long) fread(buf, 1, BUF_SIZE, stdin)) != BUF_SIZE)
{
// Check for error.
if(ferror(stdin) != 0)
{
printf("%s: An error occurred while reading the input file.<br>\n", program_name);
process_input(NULL);
exit(EXIT_FAILURE);
}
// Check for EOF.
else if(feof(stdin) != 0)
break;
}
// Terminate and process uploaded data.
buf[ret] = '\0';
process_input(buf);
}
// Terminate and process uploaded data.
buf[ret] = '\0';
process_input(buf);
// Finish user output, close output file and exit.
print_footer();
process_input(NULL);
exit(EXIT_SUCCESS);
}
void process_input (char *data)
{
// Declare variables.
char *ptr1= NULL;
char *ptr2;
int x = 0;
static FILE *fp;
static int flag = 0;
static char marker[MARKER_SIZE + 1];
// If data is NULL, close output file.
if(data == NULL)
{
if(fclose(fp) == EOF)
{
printf("%s: process_input: close failed (%s)<br>\n", program_name, strerror(errno));
exit(EXIT_FAILURE);
}
return;
}
// Check if this is the first time through.
if(flag == 0)
{
// Get marker.
if((ptr1 = strchr(data, '\n')) == NULL)
{
printf("%s: process_input: strchr(1) failed (\n)<br>\n", program_name);
exit(EXIT_FAILURE);
}
ptr1[0] = '\0';
strcpy(marker, data);
ptr1[0] = '\n';
// Get filename.
if((ptr1 = strstr(data, SEARCH_STRING_1)) == NULL)
{
printf("%s: process_input: strstr(1) failed (%s)<br>\n", program_name, SEARCH_STRING_1);
exit(EXIT_FAILURE);
}
// Advance pointer to start of filename.
ptr1 += 10;
// Find end of filename.
if((ptr2 = strchr(ptr1, '"')) == NULL)
{
printf("%s: process_input: strchr(2) failed (\")<br>\n", program_name);
exit(EXIT_FAILURE);
}
// Terminate and store filename.
ptr2[0] = '\0';
strcpy(filename, ptr1);
ptr2[0] = '"';
// Remove spaces from filename.
while(filename[x] != '\0')
{
if(filename[x] == ' ')
filename[x] = '.';
x++;
}
// Open output file.
if((fp = fopen(filename, "wb")) == NULL)
{
printf("%s: process_input: fopen failed (%s) (%s)<br>\n", program_name, strerror(errno), filename);
exit(EXIT_FAILURE);
}
// Find start of file data.
if((ptr1 = strstr(data, SEARCH_STRING_2)) == NULL)
{
printf("%s: process_input: strstr(2) failed (%s)<br>\n", program_name, SEARCH_STRING_2);
fclose(fp);
exit(EXIT_FAILURE);
}
// Set flag.
flag++;
// Advance pointer to start of file data.
ptr1 += 4;
// Change STDIN stream to binary.
if(freopen(NULL, "rb", stdin) == NULL)
{
printf("%s: process_input: freopen failed (%s)<br>\n", program_name, strerror(errno));
fclose(fp);
exit(EXIT_FAILURE);
}
}
// Catch everything else.
else
{
ptr1 = data;
if((ptr2 = strstr(ptr1, marker)) != NULL)
ptr2[0 - 2] = '\0';
}
// Write file data.
if(fwrite(ptr1, 1, strlen(ptr1), fp) != strlen(ptr1))
{
printf("%s: process_input: write failed (%s)<br>\n", program_name, strerror(errno));
fclose(fp);
exit(EXIT_FAILURE);
}
}
void print_footer (void)
{
printf("\nMade it!\n");
}
void print_header (void)
{
printf("Content-type: text/plain\r\n\r\n");
}
Ok, it appears what #NominalAnimal said was correct. You can store binary data in a string, but the moment you use any function in the string.h library, it almost always changes what is stored in that string (if the data is binary).
The easy solution is to make a separate function that takes a pointer to the binary data and do your string searches in that function, returning what pertinent information is needed. That way, the original data is never changed.
'stdin' is a macro of STDIN_FILENO, which is egal to 0. See also 'unistd.h'.
You are not showing your code, but I think you stop when you encounter a '\0' or a non-ascii char, since you said you were using 'fread()'.
You have to stop when fread() function returns 0, which means it stopped to read : it encountered EOF.

Segmentation fault when I fopen

I'm new to C file management . My teacher wanted as a homework to create a functon that copy from a source file to destination file . I created but it gives me errors all the time : Segmentation Fault .
void source_to_destination(FILE *source , FILE *destination)
{
char name_source[10], name_destination[10],line[100];
memset(line,0,sizeof(line));
memset(name_source,0,sizeof(name_source));
memset(name_destination,0,sizeof(name_destination));
read_name_file(name_source);
read_name_file(name_destination);
source = fopen(name_source,"r");
destination = fopen(name_destination,"w");
while(fgets(line,sizeof(line),source) != NULL)
{
fputs(line,destination);
}
}
When copying data from one file to another, reading and writing in binary is preferred. There are a number of reasons that reading with line-oriented input functions such as fgets or getline will fail to properly read all characters in a file. Text output functions suffer similar shortcomings (e.g. attempting to write characters outside the printable range or characters that have alternate meaning as ASCII)
Reading and writing from a file in binary mode using fread and fwrite is not any more difficult than using fgets and fputs. However, using fread and fwrite you are guaranteed a correct and accurate copy of your data by avoiding the pitfalls inherent in attempting a general file copy in text mode.
If you know there is nothing but text contained in your source file, then there is nothing wrong with copying it in text mode. That just means you will have to write another function to handle files that are not text. (and generally you don't see different copy routines based on file contents). Reading and writing in binary eliminates all of these considerations.
The following is a short example of a filecopy function that will read all bytes in a file into a buffer and then write the contents of the buffer to your destination file. (a buffered read/write is generally much more efficient and you can easily adjust the buffer size by adjusting MAXS) The function returns the number of bytes copied on success, -1 otherwise. Look it over and let me know if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#define MAXS 256
int filecopy (char *source, char *dest);
int main (int argc, char **argv) {
if (argc < 3) { /* validate 2 arguments given */
fprintf (stderr, "usage: %s file1 file2\n", argv[0]);
return 1;
}
int filesize = 0;
if ((filesize = filecopy (argv[1], argv[2])) == -1) {
fprintf (stderr, "error: filecopy failed.\n");
return 1;
}
printf ("\n copied '%s' -> '%s' ('%d' bytes)\n\n",
argv[1], argv[2], filesize);
return 0;
}
int filecopy (char *source, char *dest)
{
char *buf = NULL; /* buffer used to read MAXS bytes from file */
size_t nbytes = 0; /* number of bytes read from file */
size_t idx = 0; /* file index (length) */
FILE *fp = fopen (source, "r"); /* stream pointer */
if (!fp) { /* open source for reading */
fprintf (stderr, "error: file open failed '%s'.\n", source);
return -1;
}
/* allocate MAXS size read buf initially */
if (!(buf = calloc (MAXS, sizeof *buf))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return -1;
}
/* while data read MAXS *buf from file - realloc for next read */
while ((nbytes = fread (buf+idx, sizeof *buf, MAXS, fp)))
{
idx += nbytes; /* update total bytes read */
if (nbytes < MAXS) break; /* end-of-file reached */
/* full read - realloc for next */
void *tmp;
if (!(tmp = realloc (buf, (idx + nbytes) * sizeof *buf))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
exit (EXIT_FAILURE);
}
buf = tmp;
}
fclose (fp); /* close input stream */
if (!(fp = fopen (dest, "w+b"))) { /* open output stream */
fprintf (stderr, "error: file open failed '%s'.\n", dest);
exit (EXIT_FAILURE);
}
fwrite (buf, sizeof *buf, idx, fp);
fclose (fp); /* close output stream */
free (buf);
return (int)idx;
}
Compile
gcc -Wall -Wextra -O3 -o bin/filecopy_simple filecopy_simple.c
Input File (binary)
-rw-r--r-- 1 david david 66672 Nov 19 13:17 acarsout2.bin
Use/Output
$ ./bin/filecopy_simple dat/acarsout2.bin dat/acarsout3.bin
copied 'dat/acarsout2.bin' -> 'dat/acarsout3.bin' ('66672' bytes)
Verification
$ ls -al acarsout[23]*
-rw-r--r-- 1 david david 66672 Nov 19 13:17 acarsout2.bin
-rw-r--r-- 1 david david 66672 Dec 13 14:51 acarsout3.bin
$ diff dat/acarsout2.bin dat/acarsout3.bin
$
The following code
compiles cleanly
performs the desire operation (copy a file)
perform appropriate error checking
is getting the file names from the command line
You will need to modify this to get the file names via your existing functions
always cleans up after itself including closing open files
demonstrates how to pass the FILE* variables --into-- the function
the sub function prototype, etc will need to be modified
if you want to open the files in the sub function
then have main() close them.
Here is a suggested method of performing the desired operation
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#define MAX_LINE_LENGTH (256)
// prototypes
void source_to_destination(FILE *source , FILE *destination);
int main( int argc, char * argv[] )
{
if( 3 != argc )
{ // not correct number of command line parameters
fprintf( stderr, "USAGE: %s <sourceFili> <destinationFile>\n", argv[0]);
exit( EXIT_FAILURE );
}
// implied else, correct number of arguments
FILE *fp_in = NULL;
if( NULL == (fp_in = fopen( argv[1], "r") ) )
{ // then fopen failed
fprintf( stderr, "fopen for input file: %s failed due to %s\n", argv[1], strerror(errno) );
exit( EXIT_FAILURE );
}
// implied else, fopen input file successful
FILE *fp_out = NULL;
if( NULL == (fp_out = fopen( argv[2], "w") ) )
{ // then fopen failed
fprintf( stderr, "fopen for output file: %s failed due to %s\n", argv[2], strerror(errno) );
fclose( fp_in ); // cleanup
exit( EXIT_FAILURE );
}
// implied else, fopen output file successful
source_to_destination( fp_in, fp_out );
fclose( fp_in );
fclose( fp_out );
return 0;
} // end function: main
void source_to_destination(FILE *source , FILE *destination)
{
char line[ MAX_LINE_LENGTH ];
while(fgets(line,sizeof(line),source) )
{
if( EOF == fputs(line,destination) )
{ // then fputs failed
fprintf( stderr, "fputs to output file failed due to %s\n", strerror(errno) );
fclose( source );
fclose( destination );
exit( EXIT_FAILURE );
}
}
} // end function: source_to_destination

Applying fork() and pipe() (or fifo()) on counting words code

I've completed writing of counting words code finally. It counts total number of words in files. (i.e. txt). Now, I want to use multiple fork() to access and read every file. I studied in the last week. Besides, I use global variable to hold number of counted words. As far as I know, If I apply fork(), used global variables are assigned as 0. To avoid it, I tried to use mmap() and similar functions this is okey. But, I also want to use pipe() also (fifo() if it is possible) to communicate (hold values of numbers).
I use nftw() function to go in folders and files. My logic is on the below picture. How can use fork() and pipe() (fifo()) on this code ? fork() is really complicated for me because of my inexperience. I'm new using of pipe() and fork(). According to my idea logic of the code is that if I can use fork() and pipe(), there will be fork() every file(i.e. txt) and access them by using fork. If there is another folder and there are files, again creates fork() from one of created forks , then access file. I try to explain also drawing below. Thank you. I want to learn using of them.
int countInEveryFolder(const char *dir)
is used because I don't know how to count files until the next folder in nftw() function. Number of files is necessary because it is number of fork.
Every folder should be parent of files. The files are included by the folder.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
#include <ftw.h>
#include <ctype.h>
#include <sys/mman.h>
#include <locale.h>
#include <errno.h>
#define MAX_PATH_LEN 2048
unsigned long total_words = 0UL;
unsigned long total_dirs = 0UL;
unsigned long total_files = 0UL;
// Just proves counting number of file in a folder
int countInEveryFolder(const char *dir) {
struct stat stDirInfo;
struct dirent * stFiles;
DIR * stDirIn;
char szFullName[MAX_PATH_LEN];
char szDirectory[MAX_PATH_LEN];
struct stat stFileInfo;
int numOfFile = 0;
strncpy( szDirectory, dir, MAX_PATH_LEN - 1 );
if (lstat( szDirectory, &stDirInfo) < 0)
{
perror (szDirectory);
return 0;
}
if (!S_ISDIR(stDirInfo.st_mode))
return 0;
if ((stDirIn = opendir( szDirectory)) == NULL)
{
perror( szDirectory );
return 0;
}
while (( stFiles = readdir(stDirIn)) != NULL)
{
if (!strcmp(stFiles->d_name, ".") || !strcmp(stFiles->d_name, ".."))
continue;
sprintf(szFullName, "%s/%s", szDirectory, stFiles -> d_name );
if (lstat(szFullName, &stFileInfo) < 0)
perror ( szFullName );
/* is the file a directory? */
if (S_ISREG(stFileInfo.st_mode))
{
printf( "Filename: %s\n", szFullName );
numOfFile++;
}
} // end while
closedir(stDirIn);
return numOfFile;
}
// Count words in files.
unsigned long count_words_in_file(const char *const filename)
{
unsigned long count = 0UL;
int errnum = 0;
int c;
FILE *in;
in = fopen(filename, "rt");
if (in == NULL) {
errnum = errno;
fprintf(stderr, "%s: %s.\n", filename, strerror(errnum));
errno = errnum;
return 0UL;
}
/* Skip leading whitespace. */
do {
c = getc(in);
} while (isspace(c));
/* Token loop. */
while (c != EOF) {
/* This token is a word, if it starts with a letter. */
if (isalpha(c))
count++;
/* Skip the rest of this token. */
while (!isspace(c) && c != EOF)
c = getc(in);
/* Skip the trailing whitespace. */
while (isspace(c))
c = getc(in);
}
/* Paranoid checking for I/O errors. */
if (!feof(in) || ferror(in)) {
fclose(in);
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
} else
if (fclose(in)) {
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
}
errno = errnum;
return count;
}
// Recursively go in folders
int nftw_callback(const char *filepath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
{
// Directory
if (typeflag == FTW_DP || typeflag == FTW_D)
{
total_dirs++;
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
//countInEveryFolder(filepath);
}
// Folder
else if (typeflag == FTW_F)
{
total_files++;
total_words += count_words_in_file(filepath);
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
}
return 0;
}
/* Error message */
void err_sys(const char *msg)
{
perror(msg);
fflush(stdout);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
total_files = total_dirs = total_words = 0UL;
if (nftw(argv[1], nftw_callback, 15, FTW_PHYS) == 0) {
/* Success! */
printf("%s: %lu files, %lu directories, %lu words total.\n",
argv[1], total_files, total_dirs, total_words);
} else {
/* Failed... */
err_sys("ntfw");
}
putchar('\n');
//printf( "\nTotal words = %d\n\n", *wordCount);
//printf( "\nTotal folders = %d\n\n", *folderCount);
//printf( "\nTotal childs = %d\n\n", *childCount); //fork()
return 0;
}
To start I would write the program with two phases. A single-process phase in which all the file-paths are queued up (into a linked-list or dequeue), and a multi-process phase in which the worker processes receive work via their pipe() and send counts back to the main process via their pipe(). The main process would use select() to multiplex the input from its children.
Once you understand how to use select() with pipe()s, then work on having the filepath discovery be concurrent.
This design would be much easier to implement in Go, node.js, or greenlet with Python, but learning how to do it in C gives you a level of understanding for the underlying operations that you don't get with newer languages.

Resources