Fastest way to get char* from file in C - arrays

I have function which gets arguments: char* filedata (stores whole file) and FILE *fp (the opened file).
void read_file(char *filedata, FILE *fp){
char buffer[1000];
while(fgets(buffer, sizeof(buffer), fp))
{
char *new_str;
if((new_str = malloc(strlen(filedata) + strlen(buffer)+1)) != NULL)
{
new_str[0] = '\0'; // ensures the memory is an empty string
strcat(new_str, filedata);
strcat(new_str, buffer);
}
else
{
printf("malloc failed!\n");
}
strcpy(filedata, new_str);
}
fclose(fp);
}
But this isn't too fast... is there faster way to read the whole file?

Below's my function illustrating how I usually do it. Not sure how fast it is compared to all other possible C implementations. But I'd imagine they're all pretty similar unless poorly programmed in one way or another, which might lead to slower, less efficient execution.
/* ==========================================================================
* Function: readfile ( FILE *ptr, int *nbytes )
* Purpose: read open file ptr into internal buffer
* --------------------------------------------------------------------------
* Arguments: ptr (I) FILE * to already open (via fopen)
* file, whose contents are to be read
* nbytes (O) int * returning #bytes in returned buffer
* --------------------------------------------------------------------------
* Returns: ( unsigned char * ) buffer with ptr's contents
* --------------------------------------------------------------------------
* Notes: o caller should free() returned output buffer ptr when finished
* ======================================================================= */
/* --- entry point --- */
unsigned char *readfile ( FILE *ptr, int *nbytes ) {
/* ---
* allocations and declarations
* ------------------------------- */
unsigned char *outbuff = NULL; /* malloc'ed and realloc'ed below */
int allocsz=0, reallocsz=500000, /*total #bytes allocated, #realloc */
blksz=9900, nread=0, /* #bytes to read, #actually read */
buffsz = 0; /* total #bytes in buffer */
/* ---
* collect all bytes from ptr
* ----------------------------- */
if ( ptr != NULL ) { /* return NULL error if no input */
while ( 1 ) { /* read all input from file */
if ( buffsz+blksz + 99 >= allocsz ) { /* first realloc more memory */
allocsz += reallocsz; /*add reallocsz to current allocation*/
if ( (outbuff=realloc(outbuff,allocsz)) == NULL ) /* reallocate */
goto end_of_job; } /* quit with NULL ptr if failed */
nread = fread(outbuff+buffsz,1,blksz,ptr); /* read next block */
if ( nread < 1 ) break; /* all done, nothing left to read */
buffsz += nread; /* add #bytes from current block */
} /* --- end-of-while(1) --- */
fclose(ptr); /* close fopen()'ed file ptr */
} /* --- end-of-if(ptr!=NULL) --- */
end_of_job:
if ( nbytes != NULL ) *nbytes = buffsz; /* #bytes in outbuff */
return ( outbuff ); /* back to caller with output or NULL*/
} /* --- end-of-function readfile() --- */

With some caveats, you can read the entire file into an appropriately-sized buffer in one fell swoop using the fread() function.
The following code outlines how to open the file, determine its size, allocate a buffer of that size, then read the file's data (all of it) into that buffer. But note the caveats about the fseek and ftell functions (discussed afterwards):
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
char* filename = "MyFile.txt"; // Or whatever
FILE* fp = fopen(filename, "rb"); // Open in binary mode
int seek = fseek(fp, 0, SEEK_END); // CAVEAT: Files in BINARY mode may not support SEEK_END ...
if (seek != 0) {
printf("Cannot fseek on binary file!\n");
fclose(fp);
return 1;
}
size_t filesize = (size_t)ftell(fp); // ... but this is not reliable if opened in TEXT mode!
char* filedata = calloc(filesize + 1, 1); // Add 1 for the terminating "nul" character
rewind(fp);
fread(filedata, 1, filesize, fp); // Read whole file
// Clean up ...
fclose(fp);
free(filedata);
return 0;
}
Caveats:
Note that files opened in BINARY mode (as in the "rb" mode argument I gave in the fopen() call) are not required to support the SEEK_END origin in calls to fseek(); if this is the case on your platform, then this answer offers some alternatives to determine the file's size. From cppreference:
… Binary streams are not required to support SEEK_END, in
particular if additional null bytes are output.
However, on the other hand, opening the file in TEXT mode (using "rt") will make the call to ftell effectively meaningless, in terms of the required size for your input buffer and the value specified to fread; from cppreference:
If the stream is open in text mode, the value returned by this
function is unspecified and is only meaningful as the input to
fseek().
Also note that, as pointed out in the comments, the fseek() and ftell() functions will fail if the size of the file is larger than the maximum value that can be stored in a long int variable; to handle such cases, you can use the (platform-dependent) 64-bit equivalents, as I described in an answer I posted some time ago.

Annotating your function (not mentioning the leaks, etc) and counting the operations on the character buffers:
void read_file(char *filedata, FILE *fp){
char buffer[1000];
while(fgets(buffer, sizeof(buffer), fp)) // <<-- NEW_SIZE
{
char *new_str;
if((new_str = malloc(strlen(filedata) // <<-- OLD_SIZE
+ strlen(buffer) // <<-- NEW_SIZE
+1)) != NULL)
{
new_str[0] = '\0'; // ensures the memory is an empty string
strcat(new_str, filedata); // <<-- OLD_SIZE
strcat(new_str, buffer); // <<-- OLD_SIZE + NEW_SIZE
}
else
{
printf("malloc failed!\n");
}
strcpy(filedata, new_str); // <<-- OLD_SIZE + NEW_SIZE
}
fclose(fp);
}
fgets(). strlen(), strcat() and strcpy() all need to loop over a character buffer.
Only the fgets() is actually needed, the rest of the copying can be avoided.
Adding the number of passes over the buffers:
sum of operations per loop: 4 * OLD_SIZE + 4 * NEW_SIZE
and: keep in mind that OLD_SIZE is actually SUM(NEW_SIZE), recursively, so your function has
QUADRATIC behavior wrt the number of times the loop iterates.(basically the number of lines read)
So you end up with:
Number of times a character is inspected
= 4 * N_LINE * LINE_SIZE
+ 8 * (NLINE * (NLINE-1) ) * LINE_SIZE
;
, which implies that for a 100 line file you need about 40K passes over the string(s).
[this is the "Schlemiel, the painter" story]

Related

How can I write a function That returns all the content of a text file using fgets in C programming?

I want to write a function char* lire(FILE* f) that it supposed to read a file of text and return its content, I want to return it and not just read it & display it.
I would like to use fgets.
This code works but it's not what I want
char *lire(FILE *f)
{
char *content;
content = (char*)malloc((strlen(content) + 1) * sizeof(char));
while (fgets(content,120000, f) )
{
printf("%s", content);
}
return 0;
}
Instead, I tried this to return the text file but it just shows the first line of my text file
char *lire(FILE *f)
{
char *content;
content = (char*)malloc((strlen(content) + 1) * sizeof(char));
while (fgets(content,120000, f) )
{
return content;
}
}
There are several ways to read an entire text file into a single string using fgets(), but all of them will use fgets() iteratively. You can try to determine the size of the file upfront and then allocate sufficient space and read into that space. This works for disk files where you can seek on the file; it doesn't work if the input comes from a pipe, terminal, socket or most other things that are not disk files. There's also a TOCTOU (time of check, time of use) problem; file sizes can change while your code is reading them.
Alternatively, you can simply read into allocated space, reallocating more space when it is needed. This works with any type of input file.
Using fgets() means that the file cannot usefully contain null bytes. That's because fgets() does not report how much data it read, so you have to use strlen() to find out how long the data was, and strlen() stops at the first null byte it encounters.
Using incremental allocation, you might end up with code like this:
/* SO 7028-9928 */
/* Slurp a file using fgets() */
/* #include "slurp.h" */
/* SOF - slurp.h */
#include <stdio.h>
extern char *slurp_file(FILE *fp);
/* EOF - slurp.h */
#include <stdlib.h>
#include <string.h>
#define INIT_ALLOC 1024
#define MIN_SPACE 256
#define MAX_EXCESS 256
char *slurp_file(FILE *fp)
{
size_t offset = 0;
size_t bufsiz = INIT_ALLOC;
char *buffer = malloc(bufsiz);
if (buffer == NULL)
return NULL;
while (fgets(buffer + offset, bufsiz - offset, fp) != NULL)
{
/* Assumes data does not contain null bytes */
/* Generic problem using fgets() */
size_t newlen = strlen(buffer + offset);
offset += newlen;
if (bufsiz - offset < MIN_SPACE)
{
size_t new_size = bufsiz * 2;
char *new_data = realloc(buffer, new_size);
if (new_data == NULL)
{
free(buffer);
return NULL;
}
bufsiz = new_size;
buffer = new_data;
}
}
if (bufsiz - offset > MAX_EXCESS)
buffer = realloc(buffer, offset + 1);
return buffer;
}
int main(void)
{
char *data;
if ((data = slurp_file(stdin)) != NULL)
{
printf("Size: %zu\n", strlen(data));
printf("Data: [[%s]]\n", data);
free(data);
}
return 0;
}
When the program is run on its own source code, it produces output like:
Size: 1362
Data: [[/* SO 7028-9928 */
/* Slurp a file using fgets() */
…
return 0;
}
]]
One alternative involves using fread() instead of fgets() — this can handle binary data, but you need to revise the function interface to report both the length of the data and the pointer to the start of the data.

A more elegant way to parse

I'm kind of new to C.
I need to write a small function that opens a configuration file that has 3 lines, each line contains a path to files/directories that I need to extract.
I wrote this program and it seem to work:
void readCMDFile(char* cmdFile,char directoryPath[INPUT_SIZE], char inputFilePath[INPUT_SIZE],char outputFilePath [INPUT_SIZE]) {
//open files
int file = open(cmdFile, O_RDONLY);
if (file < 0) {
handleFailure();
}
char buffer[BUFF_SIZE];
int status;
int count;
while((count=read(file,buffer,sizeof(buffer)))>0)
{
int updateParam = UPDATE1;
int i,j;
i=0;
j=0;
for (;i<count;i++) {
if (buffer[i]!='\n'&&buffer[i]!=SPACE&&buffer[i]!='\0') {
switch (updateParam){
case UPDATE1:
directoryPath[j] = buffer[i];
break;
case UPDATE2:
inputFilePath[j] = buffer[i];
break;
case UPDATE3:
outputFilePath[j] = buffer[i];
break;
}
j++;
} else{
switch (updateParam){
case UPDATE1:
updateParam = UPDATE2;
j=0;
break;
case UPDATE2:
updateParam = UPDATE3;
j=0;
break;
}
}
}
}
if (count < 0) {
handleFailure();
}
}
but it is incredibly unintuitive and pretty ugly, so I thought there must be a more elegant way to do it. are there any suggestions?
Thanks!
Update: a config file content will look like that:
/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt
Your question isn't one about parsing the contents of the file, it is simply one about reading the lines of the file into adequate storage within a function in a manner that the object containing the stored lines can be return to the calling function. This is fairly standard, but you have a number of ways to approach it.
The biggest consideration is not knowing the length of the lines to be read. You say there are currently 3-lines to be read, but there isn't any need to know beforehand how many lines there are (by knowing -- you can avoid realloc, but that is about the only savings)
You want to create as robust and flexible method you can for reading the lines and storing them in a way that allocates just enough memory to hold what is read. A good approach is to declare a fixed-size temporary buffer to hold each line read from the file with fgets and then to call strlen on the buffer to determine the number of characters required (as well as trimming the trailing newline included by fgets) Since you are reading path information the predefined macro PATH_MAX can be used to adequately size your temporary buffer to insure it can hold the maximum size path usable by the system. You could also use POSIX geline instead of fgets, but we will stick to the C-standard library for now.
The basic type that will allow you to allocate storage for multiple lines in your function and return a single pointer you can use in the calling function is char ** (a pointer to pointer to char -- or loosely an dynamic array of pointers). The scheme is simple, you allocate for some initial number of pointers (3 in your case) and then loop over the file, reading a line at a time, getting the length of the line, and then allocating length + 1 characters of storage to hold the line. For example, if you allocate 3 pointers with:
#define NPATHS 3
...
char **readcmdfile (FILE *fp, size_t *n)
{
...
char buf[PATH_MAX] = ""; /* temp buffer to hold line */
char **paths = NULL; /* pointer to pointer to char to return */
size_t idx = 0; /* index counter (avoids dereferencing) */
...
paths = calloc (NPATHS, sizeof *paths); /* allocate NPATHS pointers */
if (!paths) { /* validate allocation/handle error */
perror ("calloc-paths");
return NULL;
}
...
while (idx < NPATHS && fgets (buf, sizeof buf, fp)) {
size_t len = strlen (buf); /* get length of string in buf */
...
paths[idx] = malloc (len + 1); /* allocate storage for line */
if (!paths[idx]) { /* validate allocation */
perror ("malloc-paths[idx]"); /* handle error */
return NULL;
}
strcpy (paths[idx++], buf); /* copy buffer to paths[idx] */
...
return paths; /* return paths */
}
(note: you can eliminate the limit of idx < NPATHS, if you include the check before allocating for each string and realloc more pointers, as required)
The remainder is just the handling of opening the file and passing the open file-stream to your function. A basic approach is to either provide the filename on the command line and then opening the filename provided with fopen (or read from stdin by default if no filename is given). As with every step in your program, you need to validate the return and handle any error to avoid processing garbage (and invoking Undefined Behavior)
A simple example would be:
int main (int argc, char **argv) {
char **paths; /* pointer to pointer to char for paths */
size_t i, n = 0; /* counter and n - number of paths read */
/* open file given by 1st argument (or read stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("fopen-failed");
return 1;
}
paths = readcmdfile (fp, &n); /* call function to read file */
/* passing open file pointer */
if (!paths) { /* validate return from function */
fprintf (stderr, "error: readcmdfile failed.\n");
return 1;
}
for (i = 0; i < n; i++) { /* output lines read from file */
printf ("path[%lu]: %s\n", i + 1, paths[i]);
free (paths[i]); /* free memory holding line */
}
free (paths); /* free pointers */
return 0;
}
Putting all the pieces together, adding the code the trim the '\n' read and included in buf by fgets, and adding an additional test to make sure the line you read actually fit in buf, you could do something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h> /* for PATH_MAX */
#define NPATHS 3
/* read lines from file, return pointer to pointer to char on success
* otherwise return NULL. 'n' will contain number of paths read from file.
*/
char **readcmdfile (FILE *fp, size_t *n)
{
char buf[PATH_MAX] = ""; /* temp buffer to hold line */
char **paths = NULL; /* pointer to pointer to char to return */
size_t idx = 0; /* index counter (avoids dereferencing) */
*n = 0; /* zero the pointer passed as 'n' */
paths = calloc (NPATHS, sizeof *paths); /* allocate NPATHS pointers */
if (!paths) { /* validate allocation/handle error */
perror ("calloc-paths");
return NULL;
}
/* read while index < NPATHS & good read into buf
* (note: instead of limiting to NPATHS - you can simply realloc paths
* when idx == NPATHS -- but that is for later)
*/
while (idx < NPATHS && fgets (buf, sizeof buf, fp)) {
size_t len = strlen (buf); /* get length of string in buf */
if (len && buf[len - 1] == '\n') /* validate last char is '\n' */
buf[--len] = 0; /* overwrite '\n' with '\0' */
else if (len == PATH_MAX - 1) { /* check buffer full - line to long */
fprintf (stderr, "error: path '%lu' exceeds PATH_MAX.\n", idx);
return NULL;
}
paths[idx] = malloc (len + 1); /* allocate storage for line */
if (!paths[idx]) { /* validate allocation */
perror ("malloc-paths[idx]"); /* handle error */
return NULL;
}
strcpy (paths[idx++], buf); /* copy buffer to paths[idx] */
}
*n = idx; /* update 'n' to contain index - no. of lines read */
return paths; /* return paths */
}
int main (int argc, char **argv) {
char **paths; /* pointer to pointer to char for paths */
size_t i, n = 0; /* counter and n - number of paths read */
/* open file given by 1st argument (or read stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("fopen-failed");
return 1;
}
paths = readcmdfile (fp, &n); /* call function to read file */
/* passing open file pointer */
if (!paths) { /* validate return from function */
fprintf (stderr, "error: readcmdfile failed.\n");
return 1;
}
for (i = 0; i < n; i++) { /* output lines read from file */
printf ("path[%lu]: %s\n", i + 1, paths[i]);
free (paths[i]); /* free memory holding line */
}
free (paths); /* free pointers */
return 0;
}
(note: if you allocate memory -- it is up to you to preserve a pointer to the beginning of each block -- so it can be freed when it is no longer needed)
Example Input File
$ cat paths.txt
/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt
Example Use/Output
$ ./bin/readpaths <paths.txt
path[1]: /home/bla/dirname
path[2]: /home/bla/bla/file1.txt
path[3]: /home/bla/bla/file2.txt
As you can see the function has simply read each line of the input file, allocated 3 pointers, allocated for each line and assigned the address for each block to the corresponding pointer and then returns a pointer to the collection to main() where it is assigned to paths there. Look things over and let me know if you have further questions.
I recommend looking into regular expressions. That way you read everything, then match with regular expressions and handle your matches.
Regular expressions exist for this purpose: to make parsing elegant.
If I were you, I will create a method for if/else blocks. I feel like they're redundant.
switch(updateParam) {
case UPDATE1:
method(); /*do if/else here*/
break;
...............
...............
}
However, you can still put them there if you do not need the method for other times and you concern about performance issues as function call costs more than just collective instructions.
In your program, you are passing 3 array of char to store the 3 lines read from the file. But this is very inefficient as the input file may contain more lines and in future, you may have the requirement to read more than 3 lines from the file. Instead, you can pass the array of char pointers and allocate memory to them and copy the content of lines read from the file. As pointed by Jonathan (in comment), if you use standard I/O then you can use function like fgets() to read lines
from input file.
Read a line from the file and allocate memory to the pointer and copy the line, read from the file to it. If the line is too long, you can read remaining part in consecutive calls to fgets() and use realloc to expand the existing memory, the pointer is pointing to, large enough to accommodate the remaining part of the line read.
Putting these all together, you can do:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 100
#define MAX_LINES 3 /* Maximum number of lines to be read from file */
int readCMDFile(const char* cmdFile, char *paths[MAX_LINES]) {
int count, next_line, line_cnt, new_line_found;
char tmpbuf[BUF_SZ];
FILE *fp;
fp = fopen(cmdFile, "r");
if (fp == NULL) {
perror ("Failed to open file");
return -1;
}
next_line = 1; /* Keep track of next line */
count = 1; /* Used to calculate the size of memory, if need to reallocte
* in case when a line in the file is too long to read in one go */
line_cnt = 0; /* Keep track of index of array of char pointer */
new_line_found = 0;
while ((line_cnt < MAX_LINES) && (fgets (tmpbuf, BUF_SZ, fp) != NULL)) {
if (tmpbuf[strlen(tmpbuf) - 1] == '\n') {
tmpbuf[strlen(tmpbuf) - 1] = '\0';
new_line_found = 1;
} else {
new_line_found = 0;
}
if (next_line) {
paths[line_cnt] = calloc (sizeof (tmpbuf), sizeof (char));
if (paths[line_cnt] == NULL) {
perror ("Failed to allocate memory");
return -1;
}
next_line = 0;
count = 1;
} else {
char *ptr = realloc (paths[line_cnt], sizeof (tmpbuf) * (++count));
if (ptr == NULL) {
free (paths[line_cnt]);
perror ("Failed to reallocate memory");
return -1;
} else {
paths[line_cnt] = ptr;
}
}
/* Using strcat to copy the buffer to allocated memory because
* calloc initialize the block of memory with zero, so it will
* be same as strcpy when first time copying the content of buffer
* to the allocated memory and fgets add terminating null-character
* to the buffer so, it will concatenate the content of buffer to
* allocated memory in case when the pointer is reallocated */
strcat (paths[line_cnt], tmpbuf);
if (new_line_found) {
line_cnt++;
next_line = 1;
}
}
fclose(fp);
return line_cnt;
}
int main(void) {
int lines_read, index;
const char *file_name = "cmdfile.txt";
char *paths[MAX_LINES] = {NULL};
lines_read = readCMDFile(file_name, paths);
if (lines_read < 0) {
printf ("Failed to read file %s\n", file_name);
}
/* Check the output */
for (index = 0; index < lines_read; index++) {
printf ("Line %d: %s\n", index, paths[index]);
}
/* Free the allocated memory */
for (index = 0; index < lines_read; index++) {
free (paths[index]);
paths[index] = NULL;
}
return 0;
}
Output:
$ cat cmdfile.txt
/home/bla/dirname
/home/bla/bla/file1.txt
/home/bla/bla/file2.txt
$ ./a.out
Line 0: /home/bla/dirname
Line 1: /home/bla/bla/file1.txt
Line 2: /home/bla/bla/file2.txt
Note that the above program is not taking care of empty lines in the file as it has not been mentioned in the question. But if you want, you can add that check just after removing the trailing newline character from the line read from the file.

Copy a file with buffers of different sizes for read and write

I have been doing some practice problems for job interviews and I came across a function that I can't wrap my mind on how to tackle it. The idea is to create a function that takes the name of two files, and the allowed buffer size to read from file1 and allowed buffer size for write to file2. if the buffer size is the same, I know how to go trough the question, but I am having problems figuring how to move data between the buffers when the sizes are of different. Part of the constraints is that we have to always fill the write buffer before writing it to file. if file1 is not a multiple of file2, we pad the last buffer transfer with zeros.
// input: name of two files made for copy, and their limited buffer sizes
// output: number of bytes copied
int fileCopy(char* file1,char* file2, int bufferSize1, int bufferSize2){
int bytesTransfered=0;
int bytesMoved=o;
char* buffer1, *buffer2;
FILE *fp1, *fp2;
fp1 = fopen(file1, "r");
if (fp1 == NULL) {
printf ("Not able to open this file");
return -1;
}
fp2 = fopen(file2, "w");
if (fp2 == NULL) {
printf ("Not able to open this file");
fclose(fp1);
return -1;
}
buffer1 = (char*) malloc (sizeof(char)*bufferSize1);
if (buffer1 == NULL) {
printf ("Memory error");
return -1;
}
buffer2 = (char*) malloc (sizeof(char)*bufferSize2);
if (buffer2 == NULL) {
printf ("Memory error");
return -1;
}
bytesMoved=fread(buffer1, sizeof(buffer1),1,fp1);
//TODO: Fill buffer2 with maximum amount, either when buffer1 <= buffer2 or buffer1 > buffer2
//How do I iterate trough file1 and ensuring to always fill buffer 2 before writing?
bytesTransfered+=fwrite(buffer2, sizeof(buffer2),1,fp2);
fclose(fp1);
fclose(fp2);
return bytesTransfered;
}
How should I write the while loop for the buffer transfers before the fwrites?
I am having problems figuring how to move data between the buffers when the sizes are of different
Layout a plan. For "some practice problems for job interviews", a good plan and ability to justify it is important. Coding, although important, is secondary.
given valid: 2 FILE *, 2 buffers and their sizes
while write active && read active
while write buffer not full && reading active
if read buffer empty
read
update read active
append min(read buffer length, write buffer available space) of read to write buffer
if write buffer not empty
pad write buffer
write
update write active
return file status
Now code it. A more robust solution would use a struct to group the FILE*, buffer, size, offset, length, active variables.
// Return true on problem
static bool rw(FILE *in_s, void *in_buf, size_t in_sz, FILE *out_s,
void *out_buf, size_t out_sz) {
size_t in_offset = 0;
size_t in_length = 0;
bool in_active = true;
size_t out_length = 0;
bool out_active = true;
while (in_active && out_active) {
// While room for more data
while (out_length < out_sz && in_active) {
if (in_length == 0) {
in_offset = 0;
in_length = fread(in_buf, in_sz, 1, in_s);
in_active = in_length > 0;
}
// Append a portion of `in` to `out`
size_t chunk = min(in_length, out_sz - out_length);
memcpy((char*) out_buf + out_length, (char*) in_buf + in_offset, chunk);
out_length += chunk;
in_length -= chunk;
in_offset += chunk;
}
if (out_length > 0) {
// Padding only occurs, maybe, on last write
memset((char*) out_buf + out_length, 0, out_sz - out_length);
out_active = fwrite(out_buf, out_sz, 1, out_s) == out_sz;
out_length = 0;
}
}
return ferror(in_s) || ferror(out_s);
}
Other notes;
Casting malloc() results not needed. #Gerhardh
// buffer1 = (char*) malloc (sizeof(char)*bufferSize1);
buffer1 = malloc (sizeof *buffer1 * bufferSize1);
Use stderr for error messages. #Jonathan Leffler
Open the file in binary.
size_t is more robust for array/buffer sizes than int.
Consider sizeof buffer1 vs. sizeof (buffer1) as parens not needed with sizeof object
while(bytesMoved > 0) {
for(i=0; i<bytesMoved && i<bufferSize2; i++)
buffer2[i]=buffer1[i];
bytesTransfered+=fwrite(buffer2, i,1,fp2);
bytesMoved-=i;
}
If bufferSize1 is smaller than the filesize you need an outer loop.
As the comments to your question have indicated, this solution is not the best way to transfer data from 1 file to another file. However, your case has certain restrictions, which this solution accounts for.
(1) Since you are using a buffer, you do not need to read and write 1 char at a time, but instead you can make as few calls to those functions possible.
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);
:from the man page for fread, nmemb can = bufferSize1
(2) You will need to check the return from fread() (i.e. bytesMoved) and compare it with both of the bufferSize 1 and 2. If (a) bytesMoved (i.e. return from fread()) is equal to bufferSize1 or if (b) bufferSize2 is less than bufferSize1 or the return from fread(), then you know that there is still data that needs to be read (or written). So, therefore you should begin the next transfer of data, and when completed return to the previous step you left off on.
Note: The pointer to the File Stream in fread() and fwrite() will begin where it left off in the event that the data is larger than the bufferSizes.
PseudoCode:
/* in while() loop continue reading from file 1 until nothing is left to read */
while (bytesMoved = fread(buffer1, sizeof(buffer1), bufferSize1, fp1))
{
/* transfer from buffer1 to buffer2 */
for(i = 0; i < bytesMoved && i < bufferSize2; i++)
buffer2[i] = buffer1[i];
buffer2[i] = '\0';
iterations = 1; /* this is just in case your buffer2 is super tiny and cannot store all from buffer1 */
/* in while() loop continue writing to file 2 until nothing is left to write
to upgrade use strlen(buffer2) instead of bufferSize2 */
while (bytesTransfered = fwrite(buffer2, sizeof(buffer2), bufferSize2, fp2))
{
/* reset buffer2 & write again from buffer1 to buffer2 */
for(i = bufferSize2 * iterations, j = 0; i < bytesMoved && j < bufferSize2; i++, j++)
buffer2[j] = buffer1[i];
buffer2[j] = '\0';
iterations++;
}
/* mem reset buffer1 to prepare for next data transfer*/
}

Reading content from multiple files in C

Example:
Three files
hi.txt
Inside of txt: "May we be"
again.txt
Inside of txt: "The ones who once"
final.txt
Inside of txt: "knew C"
And then, another file called "order"
order.txt
Inside of txt:
"hi.txt;6"
"again.txt;7"
"final.txt;3"
What I want: read the first file name, open it, list the content, wait 6 seconds, read the second name, open it, list the content, wait 7 seconds, read the third name, open it, list the content, wait 3 seconds.
If I do it without opening the content (you'll see a second while on my code) and list the names, it works, yet for some reason it doesn't when it's about the content.
orderFile = fopen("order.txt","r");
while(fscanf(orderFile,"%49[^;];%d",fileName,&seconds) == 2)
{
contentFile = fopen(fileName,"r");
while(fscanf(contentFile,"%[^\t]",textContent) == 1)
{
printf("%s\n", textContent);
}
sleep(seconds);
fclose(contentFile);
}
fclose(orderFile);
Output:
May we be
(Waits 7 seconds)
Program closes with "RUN SUCCESSFUL"
EDIT#
It works now, as you guys said, this was the problem:
Old:
while(fscanf(orderFile,"%49[^;];%d",fileName,&seconds) == 2)
New:
while(fscanf(orderFile," %49[^;];%d",fileName,&seconds) == 2)
I'm having a "hard" time to completely understand it, what does the space does? doesn't accept enters? spaces? What exactly is it?
Don't use fscanf for that
int
main()
{
FILE *orderFile = fopen("order.txt", "r");
if (orderFile != NULL)
{
int seconds;
char line[128];
/*
* fgets, read sizeof line characters or unitl '\n' is encountered
* this will read one line if it has less than sizeof line characters
*/
while (fgets(line, sizeof line, orderFile) != NULL)
{
/*
* size_t is usually unsigned long int, and is a type used
* by some standard functions.
*/
size_t fileSize;
char *fileContent;
FILE *contentFile;
char fileName[50];
/* parse the readline with scanf, extract fileName and seconds */
if (sscanf(line, "%49[^;];%d", fileName, &seconds) != 2)
continue;
/* try opening the file */
contentFile = fopen(fileName,"r");
if (contentFile == NULL)
continue;
/* seek to the end of the file */
fseek(contentFile, 0, SEEK_END);
/*
* get current position in the stream,
* it's the file size, since we are at the end of it
*/
fileSize = ftell(contentFile);
/* seek back to the begining of the stream */
rewind(contentFile);
/*
* request space in memory to store the file's content
* if the file turns out to be too large, this call will
* fail, and you will need a different approach.
*
* Like reading smaller portions of the file in a loop.
*/
fileContent = malloc(1 + fileSize);
/* check if the system gave us space */
if (fileContent != NULL)
{
size_t readSize;
/* read the whole content from the file */
readSize = fread(fileContent, 1, fileSize, contentFile);
/* add a null terminator to the string */
fileContent[readSize] = '\0';
/* show the contents */
printf("%s\n", fileContent);
/* release the memory back to the system */
free(fileContent);
}
sleep(seconds);
fclose(contentFile);
}
fclose(orderFile);
}
return 0;
}
Everything is barely explained in the code, read the manuals if you need more information.

Reading text file into an array of lines in C

Using C I would like to read in the contents of a text file in such a way as to have when all is said and done an array of strings with the nth string representing the nth line of the text file. The lines of the file can be arbitrarily long.
What's an elegant way of accomplishing this? I know of some neat tricks to read a text file directly into a single appropriately sized buffer, but breaking it down into lines makes it trickier (at least as far as I can tell).
Thanks very much!
Breaking it down into lines means parsing the text and replacing all the EOL (by EOL I mean \n and \r) characters with 0.
In this way you can actually reuse your buffer and store just the beginning of each line into a separate char * array (all by doing only 2 passes).
In this way you could do one read for the whole file size+2 parses which probably would improve performance.
It's possible to read the number of lines in the file (loop fgets), then create a 2-dimensional array with the first dimension being the number of lines+1. Then, just re-read the file into the array.
You'll need to define the length of the elements, though. Or, do a count for the longest line size.
Example code:
inFile = fopen(FILENAME, "r");
lineCount = 0;
while(inputError != EOF) {
inputError = fscanf(inFile, "%s\n", word);
lineCount++;
}
fclose(inFile);
// Above iterates lineCount++ after the EOF to allow for an array
// that matches the line numbers
char names[lineCount][MAX_LINE];
fopen(FILENAME, "r");
for(i = 1; i < lineCount; i++)
fscanf(inFile, "%s", names[i]);
fclose(inFile);
For C (as opposed to C++), you'd probably wind up using fgets(). However, you might run into issues due to your arbitrary length lines.
Perhaps a Linked List would be the best way to do this?
The compiler won't like having an array with no idea how big to make it. With a Linked List you can have a really large text file, and not worry about allocating enough memory to the array.
Unfortunately, I haven't learned how to do linked lists, but maybe somebody else could help you.
If you have a good way to read the whole file into memory, you are almost there. After you've done that you could scan the file twice. Once to count the lines, and once to set the line pointers and replace '\n' and (and maybe '\r' if the file is read in Windows binary mode) with '\0'. In between scans allocate an array of pointers, now that you know how many you need.
you can use this way
#include <stdlib.h> /* exit, malloc, realloc, free */
#include <stdio.h> /* fopen, fgetc, fputs, fwrite */
struct line_reader {
/* All members are private. */
FILE *f;
char *buf;
size_t siz;
};
/*
* Initializes a line reader _lr_ for the stream _f_.
*/
void
lr_init(struct line_reader *lr, FILE *f)
{
lr->f = f;
lr->buf = NULL;
lr->siz = 0;
}
/*
* Reads the next line. If successful, returns a pointer to the line,
* and sets *len to the number of characters, at least 1. The result is
* _not_ a C string; it has no terminating '\0'. The returned pointer
* remains valid until the next call to next_line() or lr_free() with
* the same _lr_.
*
* next_line() returns NULL at end of file, or if there is an error (on
* the stream, or with memory allocation).
*/
char *
next_line(struct line_reader *lr, size_t *len)
{
size_t newsiz;
int c;
char *newbuf;
*len = 0; /* Start with empty line. */
for (;;) {
c = fgetc(lr->f); /* Read next character. */
if (ferror(lr->f))
return NULL;
if (c == EOF) {
/*
* End of file is also end of last line,
` * unless this last line would be empty.
*/
if (*len == 0)
return NULL;
else
return lr->buf;
} else {
/* Append c to the buffer. */
if (*len == lr->siz) {
/* Need a bigger buffer! */
newsiz = lr->siz + 4096;
newbuf = realloc(lr->buf, newsiz);
if (newbuf == NULL)
return NULL;
lr->buf = newbuf;
lr->siz = newsiz;
}
lr->buf[(*len)++] = c;
/* '\n' is end of line. */
if (c == '\n')
return lr->buf;
}
}
}
/*
* Frees internal memory used by _lr_.
*/
void
lr_free(struct line_reader *lr)
{
free(lr->buf);
lr->buf = NULL;
lr->siz = 0;
}
/*
* Read a file line by line.
* http://rosettacode.org/wiki/Read_a_file_line_by_line
*/
int
main()
{
struct line_reader lr;
FILE *f;
size_t len;
char *line;
f = fopen("foobar.txt", "r");
if (f == NULL) {
perror("foobar.txt");
exit(1);
}
/*
* This loop reads each line.
* Remember that line is not a C string.
* There is no terminating '\0'.
*/
lr_init(&lr, f);
while (line = next_line(&lr, &len)) {
/*
* Do something with line.
*/
fputs("LINE: ", stdout);
fwrite(line, len, 1, stdout);
}
if (!feof(f)) {
perror("next_line");
exit(1);
}
lr_free(&lr);
return 0;
}

Resources