Getting abort trap 6 after malloc() call - c

I'm new to C from java and having a bit of trouble with memory management. I have this function whose purpose is to read x through y lines of a file. I am trying to write the function so that it can handle any size line in a file. Here is the function:
char** getRangeOfLinesFromFile(char* fileName, int startingLineNumber, int endingLineNumber, int BUFFER_SIZE) {
//set up an array to hold the range of lines we're going to return
//+1 because the range is inclusive
char** lineArray = malloc((endingLineNumber - startingLineNumber + 1) * sizeof(char*));
//get the file
FILE* file_ptr = fopen(fileName,"r");
//our buffer array will hold the lines as we read them
char *buffer = (char*) malloc(sizeof(char) * BUFFER_SIZE);
//this array will be used just in case a line is bigger than our buffer and we need to take multiple passes at it
char *temp = (char*) malloc(sizeof(char) * BUFFER_SIZE);
int lineCounter = 1;
while (fgets(buffer, BUFFER_SIZE, file_ptr) != NULL) {
strcat(temp, buffer);
size_t len = strlen(temp);
if (temp[len - 1] != '\n') {
realloc(temp,BUFFER_SIZE);
} else {
if (shouldKeepLine(lineCounter,startingLineNumber,endingLineNumber)) {
int index = lineCounter - startingLineNumber;
lineArray[index] = malloc(len * sizeof(char));
//ERROR HERE
strcpy(lineArray[index],temp);
}
*temp = *(char*) malloc(sizeof(char) * BUFFER_SIZE);
lineCounter++;
// we don't need to keep reading the file if we're not going to print anything else
if (lineCounter > endingLineNumber) {
break;
}
}
}
//clean up
free(buffer);
free(temp);
fclose(file_ptr);
return lineArray;
}
This is what the shouldKeepLine() function looks like:
bool shouldKeepLine(int lineNumber, int lowerBound, int upperBound) {
if (lineNumber >= lowerBound && lineNumber <= upperBound) {
return true;
}
return false;
}
During testing I'm using a Buffer Size of 10, so I can test that it 'grows' properly. This size will increase when the program is complete. The test file I'm using to read from currently has 2 lines. The first line of the file has like 15 characters. The second line has around 90 or so.
When I run this program, I get an Abort trap 6 error. After putting some debugging print statements in, I see that it throws that error on the strcpy call right below the ERROR HERE comment in my code, but only for line 2, not line 1. Line 1 is also larger than the buffer but a tenth or so the size of line 2. If I change this line:
lineArray[index] = malloc(len * sizeof(char));
to:
lineArray[index] = malloc(len * sizeof(char) * 1000);
It works fine. This just doesn't seem dynamic.
Not sure what I'm doing wrong. Any help is appreciated.

the following code should (not compiled/tested)
perform the activity you want to implement
char** getRangeOfLinesFromFile(
char* fileName,
int startingLineNumber, // note: first line in file is considered line 1, not 0
int endingLineNumber,
int BUFFER_SIZE )
{
//set up an array to hold the range of lines we're going to return
//+1 because the range is inclusive
char** lineArray = NULL;
// what about if endingLineNumber is < startingLine Number???
int lineCount = endingLineNumber - startingLineNumber + 1;
int i; // loop counter/index
// get array of pointers to point to each line that is kept
if( NULL == (lineArray = malloc( (lineCount) * sizeof(char*)) ) )
{ // then, malloc failed
perror( "malloc for array of pointers failed" );
exit( EXIT_FAILURE );
}
// implied else, malloc successful
// *I* would use the getline()) function, so the length of the
// input line would not matter.
// however, the following code uses the passed in value BUFFER_SIZE
// (which I would have made a #define and not passed it in)
// initialize the array of pointers to char
for( i=0; i<lineCount; i++)
{
if( NULL == (lineArray[i] = malloc(BUFFER_SIZE) ) )
{ // then malloc failed
perror( "malloc for each line storage area failed" );
// free all allocated memory
// suggest using the cleanup() function, below
exit( EXIT_FAILURE );
}
// implied else, malloc successful
// (optional, but good practice)
// clear each memory allocated area to NUL bytes
memset( lineArray[i], 0x00, BUFFER_SIZE );
} // end for
// if got here, then all memory allocations are completed
//get the file
FILE* file_ptr = NULL;
if( NULL == (file_ptr = fopen(fileName,"r") ) )
{ // then, fopen failed
perror( "fopen for input file failed" );
// free all allocated memory
// suggest using the cleanup() function, below
exit( EXIT_FAILURE );
}
// implied else, fopen successful
for(i=0; i < (startingLineNumber-1); i++)
{
unsigned int inputChar;
while( (inputChar = fgetc( file_ptr )) != EOF
{
if( EOF == inputChar )
{ // then, file does not contain nearly enough lines
// free all allocated memory
// suggest using cleanup() function, below
fclose( file_ptr );
return (NULL);
}
if( '\n' == inputChar ) { break;} // end of current line
} // end while
} // end for
// if got here, then ready to read first line of interest
// NOTE: be sure there is a newline at end of input file
int bufLength = BUFFER_SIZE;
for( i=0; i<lineCount; i++ )
{
// get a line, allowing for max input buffer length
if( fgets( &lineArray[i][strlen(lineArray[i])], BUFFER_SIZE, file_ptr ) )
{ // then got (more) line, up to BUFFER_SIZE -1
if( '\n' != lineArray[i][(strlen( lineArray[i] )-1) )
{ // then more char available for this line
char *tempPtr = NULL;
if( NULL == (tempPtr = realloc( lineArray[i], (bufLength*2) ) ) )
{ // then, realloc failed
perror( "realloc to increase buffer size failed" );
// add --free all the allocations--
// suggest sub function with passed parameters
fclose( file_ptr );
exit( EXIT_FAILURE );
}
// implied else, realloc successful
// update the pointer in the array
lineArray[i] = tempPtr;
// update buffer length indication
bufLength *= 2;
i--; // so will append to same entry in array for rest of line
}
else
{ // else, got all of line
bufLength = BUFFER_SIZE; // reset
} // end if
}
else
{ // else, fgets failed
perror( "fgets for line failed" );
// add --free all allocations--
// suggest sub function with passed parameters
fclose( file_ptr );
exit( EXIT_FAILURE );
} // end if
} // end for
// if got here, then all desired lines read successfully
fclose( file_ptr );
return lineArray;
} // end function: getRangeOfLinesFromFile
remember that the caller has to free all those memory allocations
first the allocation for each line
then the allocation for the lineArray
perhaps like this:
void cleanup( char**pArray, int entryCount )
{
int i; // loop counter/index
for( i=0; i<entryCount; i++ )
{
free(pArray[i]);
}
free(pArray);
} // end function: cleanup

This line of code is likely to be troublesome:
strcat (temp, buffer);
At the point it executes, temp has been malloced, but not initialized.
There really ought to be checks after every malloc() to make sure they succeeded.

Related

Windows 10: fopen, fread, fgets and mixed EOL characters in large text file

I am developing a program for computational material scientists:
https://atomes.ipcms.fr/
Atomes can import large text files that contains atomic coordinates,
to do that I read the file as a whole using fread,
then split the text buffer over the CPU cores using OpenMP.
Works wonder on Linux, and Windows, until someone came along with an issue I wasn't expecting. A file with mixed, and not regularly placed, EOL symbols (\n and \r).
I found a way to overcome the problem on Windows, and Windows only,
and I would really appreciate your advise/comment on what I did to know if I used a proper correction.
Before that solution I tried to add option(s) to the fopen function like -t or -b but it had no effect.
Finally I noticed that there was no problem if I change compilation options and use the fgets function to read the data from the file, only in that case and for large files the treatment of the data is more complicated, so far I no way // using OpenMP, and takes more time.
Here is the code I wrote to read the file:
int open_coord_file (gchar * filename)
{
int res;
#ifdef OPENMP
// In that case I read the file in a single buffer, then work on that buffer
struct stat status;
res = stat (filename, & status);
if (res == -1)
{
// Basic function to store information on the reading process
add_reader_info ("Error - cannot get file statistics !");
return 1;
}
int fsize = status.st_size;
#endif
coordf = fopen (filename, "r");
if (! coordf)
{
add_reader_info ("Error - cannot open coordinates file !");
return 1;
}
int i, j, k;
#ifdef OPENMP
gchar * coord_content = g_malloc0(fsize*sizeof*coord_content);
// Using fread to read the entire file
fread (coord_content, fsize, 1, coordf);
fclose (coordf);
int linecount = 0;
// Evaluating the number of lines in the file:
for (j=0; j<fsize; j++) if (coord_content[j] == '\n') linecount ++;
#ifdef G_OS_WIN32
// What happen in Windows is that some '\r' symbols were found
// and not on all lines, so I decided to check for \r symbols:
int neolr = 0;
for (j=0; j<fsize; j++) if (coord_content[j] == '\r') neolr ++;
// And mofidy the number of lines accordingly
linecount -= neolr;
#endif
coord_line = g_malloc0 (linecount*sizeof*coord_line);
coord_line[0] = & coord_content[0];
i = 1;
int nfsize = fsize;
#ifdef G_OS_WIN32
// Now deleting the corresponding EOL symbols in the text buffer
// This is only required for Windows, and I am not sure that it is
// the proper way to do thing, any though on the matter would be appreciated.
for (j=0; j<fsize; j++)
{
if (coord_content[j] == '\n')
{
coord_content[j] = '\0';
}
else if (coord_content[j] == '\r')
{
for (k=j; k<fsize-1; k++)
{
coord_content[k] = coord_content[k+1];
}
nfsize --;
}
}
#endif
// And referencing properly the lines to work on the buffer:
for (j=0; j<nfsize; j++)
{
if (coord_content[j] == '\0')
{
if (i < linecount)
{
coord_line[i] = & coord_content[j+1];
i++;
}
}
}
#else
// On the other side if turn down OpenMP, then I use the fgets function
// to read the data from the text file, then there no problem what so ever
// with the EOL symbols and everything work smoothly.
// The fopen options being the same I am somewhat confused by this result.
gchar * buf = g_malloc0(LINE_SIZE*sizeof*buf);
struct line_node
{
gchar * line;//[LINE_SIZE];
struct line_node * next;
struct line_node * prev;
};
struct line_node * head = NULL;
struct line_node * tail = NULL;
i = 0;
while (fgets(buf, LINE_SIZE, coordf))
{
if (head == NULL)
{
head = g_malloc0 (sizeof*head);
tail = g_malloc0 (sizeof*tail);
tail = head;
}
else
{
tail -> next = g_malloc0 (sizeof*tail -> next);
tail = tail -> next;
}
tail -> line = g_strdup_printf ("%s", buf);
tail -> line = substitute_string (tail -> line, "\n", "\0");
i ++;
}
g_free (buf);
fclose (coordf);
#endif
// And then latter in the code I process the data
// providing i the number of lines as an input value.
return read_xyz_file (i);
Any advise would be really appreciated.
[EDIT]
I found a way arround my freadissue using a temporary buffer with fgets so I could get my data // with OpenMP easily again:
coord_line = g_malloc0 (i*sizeof*coord_line);
tail = head;
j = 0;
while (tail)
{
coord_line[j] = & tail -> line[0];
j ++;
tail = tail -> next;
}
Now everything is fine, though I still have no clues why I am having issues with fread
[/EDIT]
SO is not a free code writing service. However, you've shown real effort trying to work this out for yourself. Point-by-point correction would take forever, so here is a "code dump" that should be easy to follow and does (I hope) what you've tried hard to achieve.
This is in "plain, ordinary" C, without the "gXXXX" functions shown in your code. This opens & loads an entire (presumed-to-be) text file, squishes out CR if present, then slices the lines assigning pointers into a growing array of pointers to each line. (Empty lines will be assigned a pointer, too) Some printf lines report on some statistics of the process.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Load file contents to alloc'd memory, return pointer to buffer (to be free'd!)
char *loadFile( char *fname ) {
FILE *fp;
if( ( fp = fopen( fname, "rb" ) ) == NULL )
fprintf( stderr, "Cannot open '%s'\n", fname ), exit(1);
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
fseek( fp, 0, SEEK_SET );
char *buf;
if( ( buf = (char*)malloc( size + 1) ) == NULL )
fprintf( stderr, "Malloc() failed\n" ), exit(1);
if( fread( buf, sizeof *buf, size, fp ) != size )
fprintf( stderr, "Read incomplete\n" ), exit(1);
fclose( fp );
*(buf + size) = '\0'; // set xtra byte allocated to NULL (allows str functions to work)
return buf;
}
int main() {
char *fname = "FOO.BAR"; // To be defined...
char *fCont = loadFile( fname ), *d, *s;
// crush out '\r', if any
for( d = fCont, s = fCont; (*d = *s) != '\0'; s++ )
d += *d != '\r';
fprintf( stderr, "Orig %ld. Without CR %ld\n", s - fCont, d - fCont );
char **arr = NULL;
int lcnt = 0;
for( char *t = fCont; ( t = strtok( t, "\n" ) ) != NULL; t = NULL ) {
char **tmp = (char**)realloc( arr, (lcnt+1) * sizeof *tmp );
if( tmp == NULL )
fprintf( stderr, "realloc() failed\n" ), exit(1);
arr = tmp;
arr[lcnt++] = t;
}
fprintf( stderr, "%ld lines loaded\n", lcnt );
// "demo" the first 5 lines
for( int i = 0; i < 5 && i < lcnt; i++ )
fprintf( stderr, "%d - '%s'\n", i+1, arr[i] );
/* process from arr[0] to arr[lcnt-1] */
free( arr );
free( fCont );
return 0;
}
Hope this helps. Ball's in your court now...
This is solved using the b option with the fopen function:
coordf = fopen (filename, "rb");
After that fread behaves properly.
Note that in my first attempts I likely used the following and wrong order of parameters:
coordf = fopen (filename, "br");
That does not work.

Read sysfs file into buffer for string comparison, without having to open it twice

I have written the following code, modified it a bit for simplicity:
FILE *sysfs_file = fopen("/sys/file", "rb");
if (sysfs_file != NULL){
/* Loop over file handler until EOF to get filesize in bytes */
FILE *sysfs_file_get_size = fopen("/sys/file", "rb");
char d = fgetc(sysfs_file_get_size);
int filesize = 0;
while (d != EOF){
d = fgetc(sysfs_file_get_size);
filesize++;
}
fclose(sysfs_file_get_size);
/* Allocate buffer and copy file into it */
char *buf = malloc(filesize);
char c = fgetc(sysfs_file);
for (int i = 0; i < filesize; i++)
{
buf[i] = c;
c = fgetc(sysfs_file);
}
fclose(sysfs_file);
if(strstr(buf, "foo")) {
printf("bar.\n");
}
}
For security reasons, it seemed better to not assume what size the file will be, and first loop through the file to check of how many bytes it consists.
Regular methods of checking the filesize like fseek() or stat() do not work, as the kernel generates the file at the moment that it is being read. What I would like to know: is there a way of reading the file into a buffer in a secure manner, without having to open a file handler twice?
First of all, in the line
FILE *sysfs_file = fopen("/sys/file", "rb");
the "rb" mode does not make sense. If, as you write, you are looking for a "string", then the file is probably a text file, not a binary file. In that case, you should use "r" instead.
If you are using a POSIX-compliant platform (e.g. Linux), then there is no difference between text mode and binary mode. In that case, it makes even less sense to specifically ask for binary mode, when the file is a text file (even though it is not wrong).
For security reasons, it seemed better to not assume what size the file will be and first loop through the file to check of how many bytes it consists.
It is not a security issue if you limit the number of bytes read to the size of the allocated memory buffer, i.e. to the number of bytes the file originally had. That way, the file will only be truncated (which is generally not a security issue).
However, if you want to ensure that the file is not truncated, then it would probably be best to ignore the initial size of the file and to simply attempt to read as much from the file as possible, until you encounter end-of-file. If the initial buffer it not large enough to store the entire file, then you can use the function realloc to resize the buffer.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//This function will return a pointer to a dynamically
//allocated memory buffer which contains the file data as
//a string (i.e. that is terminated by a null character).
//The function "free" should be called on this data when it
//is no longer required.
char *create_buffer_with_file_data_as_string( FILE *fp )
{
char *buffer = NULL;
size_t buffer_size = 16384;
size_t valid_bytes_in_buffer = 0;
for (;;) //infinite loop, equivalent to while(1)
{
size_t bytes_to_read, bytes_read;
char *temp;
//(re)allocate buffer to desired size
temp = realloc( buffer, buffer_size );
if ( temp == NULL )
{
fprintf( stderr, "Realloc error!\n" );
free( buffer );
return NULL;
}
//(re)allocation was successful, so we can overwrite the
//pointer "buffer"
buffer = temp;
//calculate number of bytes to read from input file
//note that we must leave room for adding the terminating
//null character
bytes_to_read = buffer_size - valid_bytes_in_buffer - 1;
//attempt to fill buffer as much as possible with data from
//the input file
bytes_read = fread(
buffer + valid_bytes_in_buffer,
1,
bytes_to_read,
fp
);
//break out of loop if there is no data to process
if ( bytes_read == 0 )
break;
//update number of valid bytes in the buffer
valid_bytes_in_buffer += bytes_read;
//double the size of the buffer (will take effect in
//the next loop iteration
buffer_size *= 2;
}
//verify that no error occurred
if ( ferror( fp ) )
{
fprintf( stderr, "File I/O error occurred!" );
free( buffer );
return NULL;
}
//add terminating null character to data, so that it is a
//valid string that can be passed to the functon "strstr"
buffer[valid_bytes_in_buffer++] = '\0';
//shrink buffer to required size
{
char *temp;
temp = realloc( buffer, valid_bytes_in_buffer );
if ( temp == NULL )
{
fprintf( stderr, "Warning: Shrinking failed!\n" );
}
else
{
buffer = temp;
}
}
//the function was successful, so return a pointer to
//the data
return buffer;
}
int main( void )
{
FILE *fp;
char *data;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//call the function
data = create_buffer_with_file_data_as_string( fp );
if ( data == NULL )
{
fprintf(
stderr,
"An error occured in the function:\n"
" create_buffer_with_file_data_as_string\n"
);
fclose( fp );
exit( EXIT_FAILURE );
}
//the file is no longer needed, so close it
fclose( fp );
//search data for target string
if( strstr( data, "target" ) != NULL )
{
printf("Found \"target\".\n" );
}
else
{
printf("Did not find \"target\".\n" );
}
//cleanup
free( data );
}
For the input
This is a test file with a target.
this program has the following output:
Found "target".
Note that every time I am calling realloc, I am doubling the size of the buffer. I am not adding a constant amount to the size of the buffer. This is important, for the following reason:
Let's say that the file has a size of 160 MB (megabytes). In my program, I have an initial buffer size of about 16 KB (kilobytes). If I didn't double the size of the buffer every time I call realloc, but instead added a constant amount of bytes, for example added another 16 KB, then I would need to call realloc 10,000 times. Every time I call realloc, the content of the entire buffer may have to be copied by realloc, which means that on average, 80 MB may have to be copied every time, which is 800 GB (nearly a terabyte) in total. This would be highly inefficient.
However, if I instead double the size of the memory buffer (i.e. let the buffer grow exponentially), then it is guaranteed that the amount of data that must be copied will never be more than double the amount of the actual data. So, in my example above, it is guaranteed that never more than 320 MB will have to be copied by realloc.
You could just estimate what you need in blocks and grow the input buffer as needed...
This is untested, but gives the flavour of what should work.
This version attempts to load the entire file before investigating its content.
FILE *fp = fopen( "/sys/file", "rb" );
if( fp == NULL )
return -1;
#define BLK_SIZE 1024
char *buf = malloc( BLK_SIZE );
if( buf == NULL )
return -1;
char *readTo = buf;
size_t bufCnt = 0;
for( ;; ) {
size_t inCnt = fread( readTo, sizeof *readTo, BLK_SIZE, fp );
bufCnt += inCnt;
if( inCnt < BLK_SIZE )
break;
// possibly test for EOF here
char *tmp = realloc( buf, bufCnt + BLK_SIZE );
if( tmp == NULL )
return -1;
buf = tmp;
readTo = buf + bufCnt;
}
fclose( fp );
printf( "Got %ld valid bytes in buffer\n", bufCnt );
/* do stuff with *buf */
free( buf );
Hopefully the final EDIT of version 2:
I am grateful to #Andreas Wenzel for his cheerful and meticulous testing and comments that turned earlier (incorrect!) versions of my attempts into this prototype.
The objective is to find a string of bytes in a file.
In this prototype, single "buffer loads" are examined sequentially until the first instance of the target is found or EOF reached. This seems to cope with cases when the target bytes are split across two buffer loads. This uses a ridiculously small 'file' and small buffer that would, of course, be scaled up in the real world.
Making this more efficient is left as an exercise for the reader.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
// Simulated file with text
char inBytes[] = "The cute brown fox jumps over the dogs and bababanana and stuff.";
char *pFrom = NULL;
size_t nLeft = sizeof inBytes - 1;
// Simulated 'fopen()'.
bool myOpen( void ) { nLeft = strlen( pFrom = inBytes ); return true; }
// Simulated 'fread()'. (only 1 "file pointer in use")
size_t myRead( char *buf, size_t cnt ) {
if( nLeft == 0 ) return 0; // EOF
size_t give = nLeft <= cnt ? nLeft : cnt;
memcpy( buf, pFrom, give );
pFrom += give;
nLeft -= give;
return give;
}
// Look for string using different buffer sizes to prove target split functions
bool foobar( char srchfor[], int bufSize ) {
bool found = false;
int matched = 0;
int lenWant = strlen( srchfor ); // # of chars to match
// RAM buffer includes room for "wrapping"
char *iblk = (char*)malloc( lenWant + bufSize );
if( iblk == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
// simulate loading sequential blocks into a fixed size buffer.
myOpen();
size_t inBuf = 0;
char *pTo = iblk; // Read to location not always start of buffer
while( ( inBuf += myRead( pTo, bufSize ) ) != 0 ) {
printf( "'%.*s' ", (int)inBuf, iblk ); // Show what's in buffer
// The mill where matching is carried out
for( size_t i = 0; i < inBuf && matched < lenWant; )
if( srchfor[ matched ] == iblk[i] )
matched++, i++;
else if( matched )
i -= matched - 1, matched = 0; // rewind a bit and try again
else i++;
// Lucky?
if( matched == lenWant ) { printf( "Ahha!\n" ); found = true; break; }
if( matched == 0 ) {
pTo = iblk, inBuf = 0; // reset things
printf( "nothing\n" );
} else {
// preserve what did match, and read location is offset
printf( "got something\n" );
memmove( iblk, iblk + inBuf - matched, matched );
pTo += matched;
inBuf = matched;
matched = 0;
}
}
free( iblk );
return found;
}
int main() {
char *target = "babanana";
// Test with different buffer sizes (to split target across successive reads )
for( int sz = 20; sz < 27; sz += 2 )
printf( "bufSize = %d ... %s\n\n",
sz, foobar( target, sz ) ? "Found!": "Not Found." );
return 0;
}
Output:
'The cute brown fox j' nothing
'umps over the dogs a' nothing
'nd bababanana and st' Ahha!
bufSize = 20 ... Found!
'The cute brown fox jum' nothing
'ps over the dogs and b' got something
'bababanana and stuff.' Ahha!
bufSize = 22 ... Found!
'The cute brown fox jumps' nothing
' over the dogs and babab' got something
'babanana and stuff.' Ahha!
bufSize = 24 ... Found!
'The cute brown fox jumps o' nothing
'ver the dogs and bababanan' got something
'babanana and stuff.' Ahha!
bufSize = 26 ... Found!
EDIT3: That memmove() and the buffer size has been an annoyance for some time now.
Here's a version that takes one character of input at a time (fgetc() compatible), uses a heap buffer that is the same size as the target, uint8_t allows a search for binary targets, implements a circular buffer and has a lot of fiddley index manipulation. It's not Knuth, but neither am I...
size_t srch( uint8_t srch[], size_t nS, uint8_t targ[], size_t nT ) {
uint8_t c, skip = 0, *q = (uint8_t*)malloc( nT );
if( q == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
size_t head = 0, tail = 0, ti = 0, tiS = 0, i = 0;
while( ti < nT && i < nS ) {
c = skip ? c : srch[i++]; // getc()
skip = 0;
if( c == targ[ti] ) {
q[tail++] = c;
tail %= nT;
ti++;
} else if( ti ) {
skip = 1;
do{
while( --ti && q[ head = ++head%nT ] != targ[ 0 ] );
for( tiS = 0; q[ (head+tiS)%nT ] == targ[ tiS ]; tiS++ );
} while( tiS < ti );
}
}
free( q );
return ti == nT ? i - nT : nS; // found ? offset : impossible offset
}
int main() {
char *in =
"The cute brown fox jumps "
"over the dogs babababananana stuff";
size_t inSize = strlen( in );
char *targets[] = {
"The", "the", "ff",
"babanana", "banana",
"jumps", " cute",
"orange",
};
int nTargs = sizeof targets/sizeof targets[0];
for( int i = 0; i < nTargs; i++ ) {
size_t val = strlen( targets[i] );
val = srch( (uint8_t*)in, inSize, (uint8_t*)targets[i], val );
if( val == inSize )
printf( "%s ... not found\n", targets[i] );
else
printf( "%s ... %.15s\n", targets[i], in + val );
}
return 0;
}
Output
The ... The cute brown
the ... the dogs and ba
ff ... ff
babanana ... babananana and
banana ... bananana and st
jumps ... jumps over the
cute ... cute brown fox
orange ... not found
In my other answer, I have answered your question on how to read the entire file into a memory buffer, in order to search it. However, in this answer, I will present an alternative solution to searching a file for a string, in which the file is searched directly, so that it is not necessary to read the entire file into memory.
In this program, I read a file character by character using getc and whenever I encounter the first character of the target string, I continue reading characters in order to compare these characters with the remaining characters of the target string. If any of these characters does not match, I push back all characters except the first one onto the input stream using ungetc, and then continue searching for the first character of the target string.
#include <stdio.h>
#include <stdlib.h>
int main( void )
{
FILE *fp;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = getc(fp) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = getc(fp) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( ungetc( c, fp ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( ungetc( *p, fp ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( fp );
}
However, this solution has one big problem. The size of the pushback buffer is only guaranteed to be a single character by ISO C. Although some platforms have pushback buffers up to 4 KiB, some platforms actually only support a single character.
Therefore, in order for this solution to be portable, it would be necessary to implement a sufficiently large pushback buffer yourself using your own version of ungetc and fgetc (which I call my_ungetc and my_fgetc):
#include <stdio.h>
#include <stdlib.h>
struct pushback_buffer
{
char data[16384];
char *end;
char *p;
FILE *fp;
};
int my_ungetc( int c, struct pushback_buffer *p )
{
//verify that buffer is not full
if ( p->p == p->data )
{
//buffer is full
return EOF;
}
*--p->p = c;
return 0;
}
int my_fgetc( struct pushback_buffer *p )
{
//determine whether buffer is empty
if ( p->p == p->end )
{
//pass on request to getc
return getc( p->fp );
}
return *p->p++;
}
int main( void )
{
static struct pushback_buffer pbb;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//initialize pushback buffer except for "fp"
pbb.end = pbb.data + sizeof pbb.data;
pbb.p = pbb.end;
//open file and write FILE * to pushback buffer
pbb.fp = fopen( "filename", "r" );
if ( pbb.fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = my_fgetc(&pbb) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = my_fgetc(&pbb) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( my_ungetc( c, &pbb ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( my_ungetc( *p, &pbb ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( pbb.fp );
}
However, reading a file a single character at a time is not very efficient, especially on platforms which support multithreading, because this requires getc to acquire a lock every time. Some platforms offer platform-specific alternatives, such as getc_unlocked on POSIX-compliant platforms (e.g. Linux) and _getc_no_lock on Windows. But even when using these functions, reading one character at a time from the input stream will be rather slow. It would be more efficient to read a whole block of several kilobytes at once.
Here is a completely different solution of mine which reads a whole block at once, instead of one character at a time. However, this solution is rather complex, because it must handle two buffers at once and requires 4 levels of nested loops and multiple gotos to break out of these nested loops.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 8192
struct buffer
{
char data[BUFFER_SIZE];
size_t valid_chars;
};
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp );
int main( void )
{
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//verify that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//verify that target string is not so long that
//more than two buffers would be required
_Static_assert(
BUFFER_SIZE > sizeof target,
"target string too long"
);
//other declarations
FILE *fp;
struct buffer buffers[2];
struct buffer *current = NULL, *next = NULL;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one block per loop iteration
do
{
char *p, *q;
size_t chars_left;
if ( next == NULL )
{
//use the first buffer
current = &buffers[0];
//load the next block
current->valid_chars = read_next_block( current->data, fp );
}
else
{
current = next;
next = NULL;
}
p = current->data;
chars_left = current->valid_chars;
//search for next occurance of starting character
while (
chars_left != 0
&&
( q = memchr( p, target[0], chars_left ) ) != NULL
)
{
chars_left -= q - p;
p = q;
for ( size_t i = 1; i < target_length; i++ )
{
//swap to next block, if necessary
if ( i == chars_left )
{
//check whether we have reached end-of-file
if ( current->valid_chars != BUFFER_SIZE )
{
goto no_match;
}
//load next block, if necessary
if ( next == NULL )
{
//make "next" point to the other buffer
next = current == &buffers[0] ? &buffers[1] : &buffers[0];
//load the next block
next->valid_chars = read_next_block( next->data, fp );
}
for ( size_t j = 0; i < target_length; i++, j++ )
{
//check whether we have reached end-of-file
if ( j == next->valid_chars )
{
//the strings don't match
goto no_match;
}
if ( next->data[j] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
}
//go to next outer loop iteration if the
//strings do not match
if ( p[i] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
no_match:
p++;
chars_left--;
}
} while ( current->valid_chars == BUFFER_SIZE );
//no match was found
printf( "Not found!\n" );
goto cleanup;
match:
//the strings match
printf( "Found!\n" );
goto cleanup;
cleanup:
fclose( fp );
}
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp )
{
size_t bytes_read;
bytes_read = fread( buffer, 1, BUFFER_SIZE, fp );
if ( bytes_read == 0 && ferror( fp ) )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
return bytes_read;
}
If the kernel is creating the file as you read it and there is a risk that the size of it will be different the next time you read it, then your only real bet is to read it into a buffer before you know how large the file is. Start by allocating a LARGE buffer - big enough that it SHOULD accept the entire file - then call read() to get (at most) that many bytes. If there's still more to be read, you can realloc() the buffer you were writing into. Repeat the realloc() as often as necessary.

Read character by character from a file and put each line in a String

I've to read character by character from a file and put each line in a String.
The problem is that i don't know the size of each line so eventually I've to reallocate the memory. So If I try a reallocation my program return error. Am I doing something wrong?
FILE * file = fopen(input,"r");
if(file != NULL){
char temp;
char * line;
line = (char *) malloc(sizeof(char) * 10);
int i = 0;
while((temp = fgetc(file)) != EOF){
if(temp == '\n'){
i = 0;
}
else{
if(i > strlen(line) - 2){
line = (char *) realloc(line,sizeof(line) * 10);
}
line[i] = (char) temp;
i++;
}
}
free(line);
fclose(file);
}
else{
}
the following proposed code:
cleanly compiles
performs the desired functionality
properly checks for errors
outputs user error messages to stderr
outputs the text reason the system thinks an error occurred to stderr
documents why each header file is included
shows an example of how to handle the case where the user failed to enter a command line parameter (in this case a input file name)
makes use of size_t rather than int when passing parameters to malloc() and realloc()
and now, the proposed code:
#include <stdio.h> // fopen(), perror(), fclose() fprintf()
#include <stdlib.h> // exit(), EXIT_FAILURE, malloc(), realloc(). free()
int main( int argc, char *argv[] )
{
if( argc != 2 )
{
fprintf( stderr, "USAGE: %s <fileName>\n", argv[0] );
exit( EXIT_FAILURE );
}
FILE * file = fopen( argv[1], "r" );
if( !file )
{
perror( "fopen failed" );
exit( EXIT_FAILURE );
}
// implied else, fopen successful
int ch;
char * line = malloc( 10 );
if( !line )
{
perror( "malloc failed" );
fclose( file ); // cleanup
exit( EXIT_FAILURE );
}
// implied else, malloc successful
size_t lineLen = 10;
size_t i = 0;
while( (ch = fgetc(file)) != EOF )
{
if( ch == '\n' )
{
line[i] = '\0';
// do something with contents of line
i = 0;
}
else
{
if(i >= lineLen )
{
lineLen *= 2;
char * temp = realloc( line, lineLen );
if( !temp )
{
perror( "realloc failed" );
// cleanup
fclose( file );
free( line );
exit( EXIT_FAILURE );
}
line = temp;
}
line[i] = (char)ch;
i++;
}
}
free(line);
fclose(file);
}

runtime error with malloc

#define MAXL 256
I think the problem with my code is that eventho numInput = 3, somehow, output[2] did not exist so that when I try to assign it, the program crash (just guessing).
Is there a way to check if ouput[2] exist? or maybe someone will be able to find out the real problem of my code, that would be awesome!
Any help would be greatly appreciated!
NOTE: The reason that I cast malloc is that it is expected by my lecturer.
Input strings are: 25 7 * 14 - 6 +
1 24 3 + * 41 -
2 37 4 + * 15 +
void processPostfixExp(const char * fileName)
{
char ** input = NULL;
double ** output = NULL;
int i = 0, numInput = 0;
char tempInput[MAXL] = {0};
FILE * pFile = NULL;
/* Get number of strings, check if file is readable and open file */
numInput = checkFile(fileName);
pFile = fopen(fileName, "r");
/* Allocate memory for the string storages and alert if fail */
input = (char**)malloc(numInput * sizeof(char*));
output = (double**)malloc(numInput * sizeof(double*));
if(!input || !output)
{
printf("Memory allocation failed.\n");
system("PAUSE");
exit(1);
}
/* Scan the file by lines and duplicate the string to input storage */
for(i = 0; i < numInput; ++i)
{
fgets(tempInput, MAXL, pFile);
tempInput[strlen(tempInput)-1] = '\0';
input[i] = strdup(tempInput);
//printf("\n%s", input[i]);
}
/* Close file and clear screen */
fclose(pFile);
system("CLS");
/* Call converter and display result */
printf("-------------------------------------------------------\n");
printf("\nPostfix expression evaluation:\n");
for(i = 0; i < numInput; ++i)
{
printf("input = %s", input[i]); /* i = 2 Printf SUCCESS */
*output[i] = evaluatePost(input[i]); /* i = 2 CRASH HERE */
/* I added a check at the top most of the evaluatePost(), program did not get to there */
//printf("\nCase %d: %s\nResult:%.2f\n", i + 1, input[i], *output[i]);
}
printf("\n");
printf("-------------------------------------------------------\n");
}
UPDATE:
so I added these lines and can confirm that output[2] does not exist... how is that possible? Please help, Thank you!
for(i = 0; i < numInput; ++i)
{
*output[i] = (double)i;
printf("output[%d] = %.1f\n", i, *output[i]);
}
The problem is that you have:
*output[i]
You have allocated numInput pointers to double, but the pointers themselves don't exist.
It looks like you want to allocate space not for pointers, but for doubles:
double *output;
…
output = (double*)malloc(numInput * sizeof(double));
I'm not sure what is wrong with your call to evaluatePost(), especially as you have not provided a prototype for that function.
However, overall, your code should look similar to the following:
in the future, please post code that (standalone) actually cleanly compiles
when you want help with a run time problem.
strongly suggest compiling with all warnings enabled.
For gcc, at a minimum, use '-Wall -Wextra -pedantic'
When handling a error, always cleanup allocated memory, open files, etc.
#define _POSIX_C_SOURCE (200809L)
#include <stdio.h>
#include <stdlib.h> // exit() and EXIT_FAILURE
#include <string.h> // memset() and strdup()
#define MAXL (256)
// prototypes
int checkFile( const char * );
double evaluatePost( char * );
void processPostfixExp(const char * fileName)
{
char **input = NULL;
double **output = NULL;
int i = 0;
int numInput = 0;
char tempInput[MAXL] = {0};
FILE *pFile = NULL;
/* Get number of strings, check if file is readable and open file */
numInput = checkFile(fileName);
if( NULL == (pFile = fopen(fileName, "r") ) )
{ // then fopen failed
perror( "fopen for input file failed" );
exit( EXIT_FAILURE );
}
/* Allocate memory for the string storages and alert if fail */
if( NULL == (input = malloc(numInput * sizeof(char*)) ) )
{ // then malloc failed
perror( "malloc for input failed" );
fclose( pFile );
exit( EXIT_FAILURE );
}
// implied else, malloc successful
memset( input, 0x00, numInput*sizeof(char*) ); // to make later free() operation easy
if( NULL == (output = malloc(numInput * sizeof(double*)) ) )
{ // then malloc failed
perror( "malloc for output failed" );
fclose( pFile );
free( input );
exit( EXIT_FAILURE );
}
// implied else, malloc successful
memset( output, 0x00, numInput * sizeof(double*) ); // to make later free() operation easy
/* Scan the file by lines and duplicate the string to input storage */
for(i = 0; i < numInput; ++i)
{
if( NULL == fgets(tempInput, MAXL, pFile) )
{ // then fgets failed
perror( "fgets for input file failed" );
fclose( pFile );
for( int j=0; j<numInput; j++ )
{
free( input[j] );
free( output[j] );
}
free( input );
free( output );
exit( EXIT_FAILURE );
}
// implied else, fgets successful
char * offset = NULL;
if( NULL != (offset = strstr( tempInput, "\n" )) )
{ // then newline found
*offset = '\0';
}
if( NULL == (input[i] = strdup(tempInput) ) )
{ // then strdup failed
perror( "strdup for input line failed" );
fclose( pFile );
for( int j=0; j<numInput; j++ )
{
free( input[j] );
free( output[j] );
}
free( input );
free( output );
exit( EXIT_FAILURE );
}
//printf("\n%s", input[i]);
} // end for
/* Close file and clear screen */
fclose(pFile);
system("CLS");
/* Call converter and display result */
printf("-------------------------------------------------------\n");
printf("\nPostfix expression evaluation:\n");
for(i = 0; i < numInput; ++i)
{
printf("input = %s", input[i]); /* i = 2 Printf SUCCESS */
*output[i] = evaluatePost(input[i]); /* i = 2 CRASH HERE */
/* I added a check at the top most of the evaluatePost(), program did not get to there */
//printf("\nCase %d: %s\nResult:%.2f\n", i + 1, input[i], *output[i]);
}
printf("\n");
printf("-------------------------------------------------------\n");
for( int j=0; j<numInput; j++ )
{
free( input[j] );
free( output[j] );
}
free( input );
free( output );
} // end function: processPostfixExp

copy content of file in reverse order

I need to write a program that is copying the content of a file to another file and reverses it.
I found an example and read it through to understand what is going on.
The problem is that my program has to use two functions:
void reverse(char line[]){
int i;
int length;
char tmp;
..
..
..
return;
}
(no further paramters or local variables)
The second function does the rest of the work(opens files, copies files, closes files)
The main program only reads the name of the files and calls the copy function.
#include<stdio.h>
#include<string.h>
void reverse(char line[])
{
int i;
int length;
char temp;
if (line == NULL)
return;
length = strlen(line);
for (i = 0 ; i < length / 2 + length % 2 ; ++i)
{
if (line[i] == line[length - i - 1])
continue;
temp = line[i];
line[i] = line[length - i - 1];
line[length - i - 1] = temp;
}
return;
}
int main()
{
FILE *src_fh, *dst_fh;
char src_fn[256+1], dst_fn[256+1];
printf("Enter Source File Name:\n");
fgets(src_fn, sizeof(src_fn), stdin); reverse(src_fn);
if( (src_fh = fopen(src_fn, "r")) == NULL )
{
printf("ERROR: Source File %s Failed To Open...\n",src_fn);
return(-1);
}
printf("Enter Destination File Name:\n");
fgets(dst_fn, sizeof(dst_fn), stdin); reverse(dst_fn);
if( (dst_fh = fopen(dst_fn, "w+")) == NULL )
{
fclose(src_fh);
printf("ERROR: Destination File %s Failed To Open...\n",dst_fn);
return(-2);
}
int ch;
while( (ch = fgetc(src_fh)) != EOF )
{
fputc(ch, dst_fh);
}
fclose(src_fh);
fclose(dst_fh);
return 0;
}
You only need to swap the first character with the last, the second with the pre-last, and so on.
You actually don't need the int temp variable, but since it seems to be required, here it is
void reverse(char line[])
{
int i;
int length;
char temp;
if (line == NULL)
return;
length = strlen(line);
for (i = 0 ; i < length / 2 + length % 2 ; ++i)
{
if (line[i] == line[length - i - 1])
continue;
temp = line[i];
line[i] = line[length - i - 1];
line[length - i - 1] = temp;
}
return;
}
This is an improved version, without int temp, instead we store the result of length / 2 + length % 2 so it's not recalculated on each iteration
void reverse(char line[])
{
int i;
int length;
int half;
if (line == NULL)
return;
length = strlen(line);
half = length / 2 + length % 2;
for (i = 0 ; i < half ; ++i)
{
if (line[i] == line[length - i - 1])
continue;
line[length] = line[i];
line[i] = line[length - i - 1];
line[length - i - 1] = line[length];
}
line[length] = '\0';
return;
}
just use the location of the terminating '\0' byte as the temp when swapping.
For the second function, read each line using fgets and write it to the file with fprintf, just remember to remove the newline character from the read strings, you can use the chomp function y posted for that, if you don't remove the newline, the reversed lines will have the newline at the beginning of the line.
The prameter name line in the prototype void reverse(char line[]) seems to give a hint, how the given exercise might be intended to be solved.
split the file in lines
reverse every line
reverse the order of the lines
Nevertheless you should watch out following this strategy, as there is still a really nasty gotcha involved, if your file may contain any data.
In this case you'll get in big trouble finding the end of line[] as '\0' termination might get confused with a literal '\0' in the line.
As a workaround you might try to replace any literal occurance of '/0' by the sequence '\0' 'x' and mark the end of your line by the sequence '\0' '-' or whatever before passing it to reverse() and reaversing the substitution after writing the reversed line to the file.
Unfortunately this attempt doesn't look too elegant, but maybe reversing a file the way it is meant to be done in the exercise isn't really elegant anyays.
the following code
1) incorporates proper error checking
2) outputs each input line, reversed, to the output file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* chomp(char* p)
{
int len;
if(!p) return(p);
if( (len=strlen(p))<=0 ) return(p);
if( p[len-1] == '\n' ) { p[--len] = '\0'; }
if( p[len-1] == '\r' ) { p[--len] = '\0'; }
return(p);
} // end function: chomp
int main()
{
/* Create Usable Variables */
FILE *src_fh = NULL;
FILE *dst_fh = NULL;
char src_fn[256+1] = {'\0'};
char dst_fn[256+1] = {'\0'};
char line[2048] = {'\0'};
/* Retrieve Source File Name From User */
printf("Enter Source File Name:\n");
if( NULL == (fgets(src_fn, sizeof(src_fn), stdin) ) )
{ // fgets failed
perror("fgets for input file name failed" );
exit(EXIT_FAILURE);
}
// implied else, fgets successful
chomp(src_fn); // remove trailing newline characters
/* Attempt Opening Source File For Reading */
if( (src_fh = fopen(src_fn, "r")) == NULL )
{
perror( "fopen failed" );
printf("ERROR: Source File %s Failed To Open...\n",src_fn);
return(-1);
}
// implied else, fopen source file successful
/* Retrieve Destination File Name From User */
printf("Enter Destination File Name:\n");
if( NULL == (fgets(dst_fn, sizeof(dst_fn), stdin) ) )
{ // then fgets failed
perror( "fgets for output file name failed" );
fclose(src_fh); // cleanup
exit( EXIT_FAILURE );
}
// implied else, fgets for output file name successful
chomp(dst_fn); // remove trailing newline characters
/* Attempt Opening Destination File For Writing */
if( NULL == (dst_fh = fopen(dst_fn, "w")) )
{
perror( "fopen for output file failed" );
fclose(src_fh); // cleanup
printf("ERROR: Destination File %s Failed To Open...\n",dst_fn);
return(-2);
}
// implied else, fopen for output file successful
int index;
/* Copy Source File Contents (reversed, line by line) to destination file */
while( NULL != (fgets(line, sizeof(line), src_fh) ) )
{
chomp(line); // remove trailing newline characters
index = strlen(line) - 1; // -1 because arrays start with offset 0
// and strlen returns offset to '\0'
// output reversed line to file
while( index >= 0 )
{
fputc( line[index], dst_fh );
index--;
} // end while
fputc( '\n', dst_fh );
} // end while
/* Close Files On Success */
fclose(src_fh);
fclose(dst_fh);
return 0;
} // end function: main

Resources