How to detect a repeated " user input String " in a file? - c

The idea is like compare line by line of strings and detecting the duplicated ones to evade putting theme in another file
after I fill my file with names and created a new file to put all strings without the duplicated ones, I used this loop, but I don't know if it's right or nah. It didn't work
FILE *Tr , *temp;
char test[50] , test1[50];
Tr = fopen("test.txt","w");
temp = fopen("temp1.txt" , "r");
while( !feof(temp) )
{
fgets(test , 50 , temp);
while( !feof(temp) ){
if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){
printf("a string exist in the file");
}
else{ fprintf(Tr, "%s" , test1);
}
}
}

The following line is wrong:
if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){
Using == on pointers will compare the actual pointer values, i.e. the memory addresses. If you want to compare the actual string contents (i.e. what the pointers are pointing to), then you must use strcmp instead.
Also, you should only read from the input file, not the output file.
You should also remember all strings that you have read. Otherwise, you will have no way of determining whether the current line is a duplicate or not.
Additionally, it does not make sense having both an outer loop and an inner loop with the same loop condition:
while( !feof(temp) )
Also, using !feof(temp) as a loop condition is generally wrong. See this question for further information:
Why is “while ( !feof (file) )” always wrong?
The following program will remember up to 100 strings, each up to 100 chars in length (including the terminating null character).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_STRINGS 100
#define MAX_STRING_LEN 100
int main( void )
{
FILE *input, *output;
char strings[MAX_STRINGS][MAX_STRING_LEN];
int num_strings = 0;
char line[MAX_STRING_LEN];
//open input file
input = fopen( "input.txt", "r" );
if ( input == NULL )
{
fprintf( stderr, "Error opening input file!\n" );
exit( EXIT_FAILURE );
}
//open output file
output = fopen( "output.txt", "w" );
if ( output == NULL )
{
fprintf( stderr, "Error opening output file!\n" );
exit( EXIT_FAILURE );
}
//read one line of input per loop iteration
while ( fgets( line, sizeof line, input ) != NULL )
{
bool is_duplicate = false;
char *p;
//find newline character
p = strchr( line, '\n' );
//make sure that input buffer was large enough to
//read entire line, and remove newline character
//if it exists
if ( p == NULL )
{
if ( !feof( input ) )
{
fprintf( stderr, "Line was too long for input buffer!\n" );
exit( EXIT_FAILURE );
}
}
else
{
//remove newline character
*p = '\0';
}
//determine whether line is duplicate
for ( int i = 0; i < num_strings; i++ )
{
if ( strcmp( line, strings[i] ) == 0 )
{
is_duplicate = true;
break;
}
}
if ( !is_duplicate )
{
//remember string
strcpy( strings[num_strings++], line );
//write string to output file
fprintf( output, "%s\n", line );
}
}
//cleanup
fclose( output );
fclose( input );
}
Given the input
String1
String2
String3
String4
String5
String1
String6
String2
String1
String7
String8
String1
String2
this program has the following output:
String1
String2
String3
String4
String5
String6
String7
String8
As you can see, all duplicate strings were properly filtered out of the output.
However, using a statically sized array is a bit of a waste of space, and it also imposes a hard limit. Therefore, it may be better to use dynamic memory allocation instead:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define INITIAL_CAPACITY 100
#define MAX_LINE_LEN 200
int main( void )
{
FILE *input, *output;
char **strings;
size_t strings_capacity;
int num_strings = 0;
char line[MAX_LINE_LEN];
//open input file
input = fopen( "input.txt", "r" );
if ( input == NULL )
{
fprintf( stderr, "Error opening input file!\n" );
exit( EXIT_FAILURE );
}
//open output file
output = fopen( "output.txt", "w" );
if ( output == NULL )
{
fprintf( stderr, "Error opening output file!\n" );
exit( EXIT_FAILURE );
}
//set capacity of "strings" array to INITIAL_CAPACITY
strings_capacity = INITIAL_CAPACITY;
strings = malloc( strings_capacity * sizeof *strings );
if ( strings == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
//read one line of input per loop iteration
while ( fgets( line, sizeof line, input ) != NULL )
{
bool is_duplicate = false;
char *p;
//find newline character
p = strchr( line, '\n' );
//make sure that input buffer was large enough to
//read entire line, and remove newline character
//if it exists
if ( p == NULL )
{
if ( !feof( input ) )
{
fprintf( stderr, "Line was too long for input buffer!\n" );
exit( EXIT_FAILURE );
}
}
else
{
//remove newline character
*p = '\0';
}
//determine whether line is duplicate
for ( int i = 0; i < num_strings; i++ )
{
if ( strcmp( line, strings[i] ) == 0 )
{
is_duplicate = true;
break;
}
}
if ( !is_duplicate )
{
//expand capacity of "strings" array if necessary
if ( num_strings == strings_capacity )
{
strings_capacity *= 2;
strings = realloc( strings, strings_capacity * sizeof *strings );
if ( strings == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
}
//remember string
strings[num_strings] = malloc( strlen( line ) + 1 );
if ( strings[num_strings] == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
strcpy( strings[num_strings], line );
num_strings++;
//write string to output file
fprintf( output, "%s\n", line );
}
}
//cleanup
//free all dynamically allocated memory
for ( int i = 0; i < num_strings; i++ )
free( strings[i] );
free( strings );
//close file handles
fclose( output );
fclose( input );
}

Related

C language reading from file and placing in variables

In a text file I have the form Type:Username Password, how do I place it in three different variables, so that the variable Type is in the variable type, username is in username, and password is in password in C ?
Example:
Admin:Username Password
How to make?
Type:Admin
User:Username
Pw:Password
Here's my code:
int ch;
int i = 0;
while ((ch = fgetc(fp)) != EOF) {
// Check for the colon character
if (ch == ':') {
// We have reached the end of the type string
// Move to the next variable
i = 0;
continue;
}
// Check for the space character
if (ch == ' ')
{
// We have reached the end of the username string
// Move to the next variable
i = 0;
continue;
}
// Store the character in the appropriate variable
if (i < 50) {
if (type[0] == 0) {
type[i] = ch;
} else if (username[0] == 0) {
username[i] = ch;
} else {
password[i] = ch;
}
i++;
}
}
Considering your initial requirement that you posted in your Question, that a text file consists of following line:
Admin:Username Password
To store Admin in variable type, Username in variable username and similarly Password in variable Password.
You can declare a structure something similar to:
typedef struct user {
char type[512];
char username[512];
char password[512];
} user;
And as #IngoLeonhardt commented, that strtok() or strchr() can be used to parse the line read from the text file, you can refer the below simple example code snippet to understand and further improve the logic.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct user {
char type[512];
char username[512];
char password[512];
} user;
int main(void)
{
FILE *file = fopen("test.txt","r");
char *line = NULL;
size_t linesize = 0;
char *token;
const char colon[2] = ":";
const char space[2] = " ";
user user_1;
if (file)
{
while (getline(&line, &linesize, file) != -1)
{
printf("%s\n", line);
token = strtok(line, colon);
strcpy(user_1.type, token);
token = strtok(NULL, space);
strcpy(user_1.username, token);
token = strtok(NULL, space);
strcpy(user_1.password, token);
printf("%s\n", user_1.type);
printf("%s\n", user_1.username);
printf("%s", user_1.password);
free(line);
}
fclose(file);
}
return 0;
}
PS: There may be some flaws/bugs in above implementation, please consider the above code just as an example.
Here is a solution which reads one character at a time from the file:
#include <stdio.h>
#include <stdlib.h>
#define MAX_STRING_SIZE 200
int main( void )
{
char type[MAX_STRING_SIZE];
char user[MAX_STRING_SIZE];
char pw[MAX_STRING_SIZE];
FILE *fp;
int ch;
int i;
//open the input file
fp = fopen( "input.txt", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read first token
i = 0;
while ( ( ch = fgetc( fp ) ) != ':' )
{
if ( ch == EOF )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
if ( i == MAX_STRING_SIZE )
{
fprintf( stderr, "Sub-string is too long!\n" );
exit( EXIT_FAILURE );
}
type[i++] = ch;
}
type[i] = '\0';
//read second token
i = 0;
while ( ( ch = fgetc( fp ) ) != ' ' )
{
if ( ch == EOF )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
if ( i == MAX_STRING_SIZE )
{
fprintf( stderr, "Sub-string is too long!\n" );
exit( EXIT_FAILURE );
}
user[i++] = ch;
}
user[i] = '\0';
//read third token
i = 0;
while ( ( ch = fgetc( fp ) ) != EOF )
{
if ( i == MAX_STRING_SIZE )
{
fprintf( stderr, "Sub-string is too long!\n" );
exit( EXIT_FAILURE );
}
pw[i++] = ch;
}
pw[i] = '\0';
//close the file
fclose( fp );
//print the three strings
printf( "Type:%s\n", type );
printf( "User:%s\n", user );
printf( "Pw:%s\n", pw );
}
For the input stated in the question, this program has the desired output:
Type:Admin
User:Username
Pw:Password
Here is an alternative solution, which reads a whole line at once:
This solution reads a line from the file as a string, by using the function fgets. After doing that, it uses the function strchr to find the ':' and ' ' delimiter characters in the string. Once they are found, the characters between the tokens can be copied so that they form a string.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_STRING_SIZE 200
bool read_exactly_one_line( char buffer[], int buffer_size, FILE *fp );
int main( void )
{
char type[MAX_STRING_SIZE];
char user[MAX_STRING_SIZE];
char pw[MAX_STRING_SIZE];
FILE *fp;
char line[500];
char *p, *q;
int token_length;
//open the input file
fp = fopen( "input.txt", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one line of input
if ( ! read_exactly_one_line( line, sizeof line, fp ) )
{
fprintf( stderr, "File is empty!\n" );
exit( EXIT_FAILURE );
}
//find the delimiter between the first and second token
p = strchr( line, ':' );
if ( p == NULL )
{
fprintf( stderr, "Unable to find first delimiter!\n" );
exit( EXIT_FAILURE );
}
//verify that first token is not too large
token_length = p - line;
if ( token_length >= MAX_STRING_SIZE )
{
fprintf( stderr, "Token too large!\n" );
exit( EXIT_FAILURE );
}
//copy the first token
memcpy( type, line, token_length );
type[token_length] = '\0';
//move pointer to start of second token
p++;
//find the delimiter between the second and third token
q = strchr( p, ' ' );
if ( q == NULL )
{
fprintf( stderr, "Unable to find second delimiter!\n" );
exit( EXIT_FAILURE );
}
//verify that second token is not too large
token_length = q - p;
if ( token_length >= MAX_STRING_SIZE )
{
fprintf( stderr, "Token too large!\n" );
exit( EXIT_FAILURE );
}
//copy the second token
memcpy( user, p, token_length );
user[token_length] = '\0';
//move pointer to start of third token
q++;
//verify that third token is not too large
token_length = strlen( q );
if ( token_length >= MAX_STRING_SIZE )
{
fprintf( stderr, "Token too large!\n" );
exit( EXIT_FAILURE );
}
//copy the third token
memcpy( pw, q, token_length );
pw[token_length] = '\0';
//close the file
fclose( fp );
//print the three strings
printf( "Type:%s\n", type );
printf( "User:%s\n", user );
printf( "Pw:%s\n", pw );
}
//This function will read exactly one line and remove the newline
//character, if it exists. On success, it will return true. If this
//function is unable to read any further lines due to end-of-file,
//it returns false. If it fails for any other reason, it will not
//return, but will print an error message and call "exit" instead.
bool read_exactly_one_line( char buffer[], int buffer_size, FILE *fp )
{
char *p;
//attempt to read one line from the stream
if ( fgets( buffer, buffer_size, fp ) == NULL )
{
if ( ferror( fp ) )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
return false;
}
//make sure that line was not too long for input buffer
p = strchr( buffer, '\n' );
if ( p == NULL )
{
//a missing newline character is ok if the next
//character is a newline character or if we have
//reached end-of-file (for example if the input is
//being piped from a file or if the user enters
//end-of-file in the terminal itself)
if ( getc(fp) != '\n' && !feof(stdin) )
{
printf( "Line input was too long!\n" );
exit( EXIT_FAILURE );
}
}
else
{
//remove newline character by overwriting it with a null
//character
*p = '\0';
}
return true;
}
This program has the same output as the first one.

Read sysfs file into buffer for string comparison, without having to open it twice

I have written the following code, modified it a bit for simplicity:
FILE *sysfs_file = fopen("/sys/file", "rb");
if (sysfs_file != NULL){
/* Loop over file handler until EOF to get filesize in bytes */
FILE *sysfs_file_get_size = fopen("/sys/file", "rb");
char d = fgetc(sysfs_file_get_size);
int filesize = 0;
while (d != EOF){
d = fgetc(sysfs_file_get_size);
filesize++;
}
fclose(sysfs_file_get_size);
/* Allocate buffer and copy file into it */
char *buf = malloc(filesize);
char c = fgetc(sysfs_file);
for (int i = 0; i < filesize; i++)
{
buf[i] = c;
c = fgetc(sysfs_file);
}
fclose(sysfs_file);
if(strstr(buf, "foo")) {
printf("bar.\n");
}
}
For security reasons, it seemed better to not assume what size the file will be, and first loop through the file to check of how many bytes it consists.
Regular methods of checking the filesize like fseek() or stat() do not work, as the kernel generates the file at the moment that it is being read. What I would like to know: is there a way of reading the file into a buffer in a secure manner, without having to open a file handler twice?
First of all, in the line
FILE *sysfs_file = fopen("/sys/file", "rb");
the "rb" mode does not make sense. If, as you write, you are looking for a "string", then the file is probably a text file, not a binary file. In that case, you should use "r" instead.
If you are using a POSIX-compliant platform (e.g. Linux), then there is no difference between text mode and binary mode. In that case, it makes even less sense to specifically ask for binary mode, when the file is a text file (even though it is not wrong).
For security reasons, it seemed better to not assume what size the file will be and first loop through the file to check of how many bytes it consists.
It is not a security issue if you limit the number of bytes read to the size of the allocated memory buffer, i.e. to the number of bytes the file originally had. That way, the file will only be truncated (which is generally not a security issue).
However, if you want to ensure that the file is not truncated, then it would probably be best to ignore the initial size of the file and to simply attempt to read as much from the file as possible, until you encounter end-of-file. If the initial buffer it not large enough to store the entire file, then you can use the function realloc to resize the buffer.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//This function will return a pointer to a dynamically
//allocated memory buffer which contains the file data as
//a string (i.e. that is terminated by a null character).
//The function "free" should be called on this data when it
//is no longer required.
char *create_buffer_with_file_data_as_string( FILE *fp )
{
char *buffer = NULL;
size_t buffer_size = 16384;
size_t valid_bytes_in_buffer = 0;
for (;;) //infinite loop, equivalent to while(1)
{
size_t bytes_to_read, bytes_read;
char *temp;
//(re)allocate buffer to desired size
temp = realloc( buffer, buffer_size );
if ( temp == NULL )
{
fprintf( stderr, "Realloc error!\n" );
free( buffer );
return NULL;
}
//(re)allocation was successful, so we can overwrite the
//pointer "buffer"
buffer = temp;
//calculate number of bytes to read from input file
//note that we must leave room for adding the terminating
//null character
bytes_to_read = buffer_size - valid_bytes_in_buffer - 1;
//attempt to fill buffer as much as possible with data from
//the input file
bytes_read = fread(
buffer + valid_bytes_in_buffer,
1,
bytes_to_read,
fp
);
//break out of loop if there is no data to process
if ( bytes_read == 0 )
break;
//update number of valid bytes in the buffer
valid_bytes_in_buffer += bytes_read;
//double the size of the buffer (will take effect in
//the next loop iteration
buffer_size *= 2;
}
//verify that no error occurred
if ( ferror( fp ) )
{
fprintf( stderr, "File I/O error occurred!" );
free( buffer );
return NULL;
}
//add terminating null character to data, so that it is a
//valid string that can be passed to the functon "strstr"
buffer[valid_bytes_in_buffer++] = '\0';
//shrink buffer to required size
{
char *temp;
temp = realloc( buffer, valid_bytes_in_buffer );
if ( temp == NULL )
{
fprintf( stderr, "Warning: Shrinking failed!\n" );
}
else
{
buffer = temp;
}
}
//the function was successful, so return a pointer to
//the data
return buffer;
}
int main( void )
{
FILE *fp;
char *data;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//call the function
data = create_buffer_with_file_data_as_string( fp );
if ( data == NULL )
{
fprintf(
stderr,
"An error occured in the function:\n"
" create_buffer_with_file_data_as_string\n"
);
fclose( fp );
exit( EXIT_FAILURE );
}
//the file is no longer needed, so close it
fclose( fp );
//search data for target string
if( strstr( data, "target" ) != NULL )
{
printf("Found \"target\".\n" );
}
else
{
printf("Did not find \"target\".\n" );
}
//cleanup
free( data );
}
For the input
This is a test file with a target.
this program has the following output:
Found "target".
Note that every time I am calling realloc, I am doubling the size of the buffer. I am not adding a constant amount to the size of the buffer. This is important, for the following reason:
Let's say that the file has a size of 160 MB (megabytes). In my program, I have an initial buffer size of about 16 KB (kilobytes). If I didn't double the size of the buffer every time I call realloc, but instead added a constant amount of bytes, for example added another 16 KB, then I would need to call realloc 10,000 times. Every time I call realloc, the content of the entire buffer may have to be copied by realloc, which means that on average, 80 MB may have to be copied every time, which is 800 GB (nearly a terabyte) in total. This would be highly inefficient.
However, if I instead double the size of the memory buffer (i.e. let the buffer grow exponentially), then it is guaranteed that the amount of data that must be copied will never be more than double the amount of the actual data. So, in my example above, it is guaranteed that never more than 320 MB will have to be copied by realloc.
You could just estimate what you need in blocks and grow the input buffer as needed...
This is untested, but gives the flavour of what should work.
This version attempts to load the entire file before investigating its content.
FILE *fp = fopen( "/sys/file", "rb" );
if( fp == NULL )
return -1;
#define BLK_SIZE 1024
char *buf = malloc( BLK_SIZE );
if( buf == NULL )
return -1;
char *readTo = buf;
size_t bufCnt = 0;
for( ;; ) {
size_t inCnt = fread( readTo, sizeof *readTo, BLK_SIZE, fp );
bufCnt += inCnt;
if( inCnt < BLK_SIZE )
break;
// possibly test for EOF here
char *tmp = realloc( buf, bufCnt + BLK_SIZE );
if( tmp == NULL )
return -1;
buf = tmp;
readTo = buf + bufCnt;
}
fclose( fp );
printf( "Got %ld valid bytes in buffer\n", bufCnt );
/* do stuff with *buf */
free( buf );
Hopefully the final EDIT of version 2:
I am grateful to #Andreas Wenzel for his cheerful and meticulous testing and comments that turned earlier (incorrect!) versions of my attempts into this prototype.
The objective is to find a string of bytes in a file.
In this prototype, single "buffer loads" are examined sequentially until the first instance of the target is found or EOF reached. This seems to cope with cases when the target bytes are split across two buffer loads. This uses a ridiculously small 'file' and small buffer that would, of course, be scaled up in the real world.
Making this more efficient is left as an exercise for the reader.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
// Simulated file with text
char inBytes[] = "The cute brown fox jumps over the dogs and bababanana and stuff.";
char *pFrom = NULL;
size_t nLeft = sizeof inBytes - 1;
// Simulated 'fopen()'.
bool myOpen( void ) { nLeft = strlen( pFrom = inBytes ); return true; }
// Simulated 'fread()'. (only 1 "file pointer in use")
size_t myRead( char *buf, size_t cnt ) {
if( nLeft == 0 ) return 0; // EOF
size_t give = nLeft <= cnt ? nLeft : cnt;
memcpy( buf, pFrom, give );
pFrom += give;
nLeft -= give;
return give;
}
// Look for string using different buffer sizes to prove target split functions
bool foobar( char srchfor[], int bufSize ) {
bool found = false;
int matched = 0;
int lenWant = strlen( srchfor ); // # of chars to match
// RAM buffer includes room for "wrapping"
char *iblk = (char*)malloc( lenWant + bufSize );
if( iblk == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
// simulate loading sequential blocks into a fixed size buffer.
myOpen();
size_t inBuf = 0;
char *pTo = iblk; // Read to location not always start of buffer
while( ( inBuf += myRead( pTo, bufSize ) ) != 0 ) {
printf( "'%.*s' ", (int)inBuf, iblk ); // Show what's in buffer
// The mill where matching is carried out
for( size_t i = 0; i < inBuf && matched < lenWant; )
if( srchfor[ matched ] == iblk[i] )
matched++, i++;
else if( matched )
i -= matched - 1, matched = 0; // rewind a bit and try again
else i++;
// Lucky?
if( matched == lenWant ) { printf( "Ahha!\n" ); found = true; break; }
if( matched == 0 ) {
pTo = iblk, inBuf = 0; // reset things
printf( "nothing\n" );
} else {
// preserve what did match, and read location is offset
printf( "got something\n" );
memmove( iblk, iblk + inBuf - matched, matched );
pTo += matched;
inBuf = matched;
matched = 0;
}
}
free( iblk );
return found;
}
int main() {
char *target = "babanana";
// Test with different buffer sizes (to split target across successive reads )
for( int sz = 20; sz < 27; sz += 2 )
printf( "bufSize = %d ... %s\n\n",
sz, foobar( target, sz ) ? "Found!": "Not Found." );
return 0;
}
Output:
'The cute brown fox j' nothing
'umps over the dogs a' nothing
'nd bababanana and st' Ahha!
bufSize = 20 ... Found!
'The cute brown fox jum' nothing
'ps over the dogs and b' got something
'bababanana and stuff.' Ahha!
bufSize = 22 ... Found!
'The cute brown fox jumps' nothing
' over the dogs and babab' got something
'babanana and stuff.' Ahha!
bufSize = 24 ... Found!
'The cute brown fox jumps o' nothing
'ver the dogs and bababanan' got something
'babanana and stuff.' Ahha!
bufSize = 26 ... Found!
EDIT3: That memmove() and the buffer size has been an annoyance for some time now.
Here's a version that takes one character of input at a time (fgetc() compatible), uses a heap buffer that is the same size as the target, uint8_t allows a search for binary targets, implements a circular buffer and has a lot of fiddley index manipulation. It's not Knuth, but neither am I...
size_t srch( uint8_t srch[], size_t nS, uint8_t targ[], size_t nT ) {
uint8_t c, skip = 0, *q = (uint8_t*)malloc( nT );
if( q == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
size_t head = 0, tail = 0, ti = 0, tiS = 0, i = 0;
while( ti < nT && i < nS ) {
c = skip ? c : srch[i++]; // getc()
skip = 0;
if( c == targ[ti] ) {
q[tail++] = c;
tail %= nT;
ti++;
} else if( ti ) {
skip = 1;
do{
while( --ti && q[ head = ++head%nT ] != targ[ 0 ] );
for( tiS = 0; q[ (head+tiS)%nT ] == targ[ tiS ]; tiS++ );
} while( tiS < ti );
}
}
free( q );
return ti == nT ? i - nT : nS; // found ? offset : impossible offset
}
int main() {
char *in =
"The cute brown fox jumps "
"over the dogs babababananana stuff";
size_t inSize = strlen( in );
char *targets[] = {
"The", "the", "ff",
"babanana", "banana",
"jumps", " cute",
"orange",
};
int nTargs = sizeof targets/sizeof targets[0];
for( int i = 0; i < nTargs; i++ ) {
size_t val = strlen( targets[i] );
val = srch( (uint8_t*)in, inSize, (uint8_t*)targets[i], val );
if( val == inSize )
printf( "%s ... not found\n", targets[i] );
else
printf( "%s ... %.15s\n", targets[i], in + val );
}
return 0;
}
Output
The ... The cute brown
the ... the dogs and ba
ff ... ff
babanana ... babananana and
banana ... bananana and st
jumps ... jumps over the
cute ... cute brown fox
orange ... not found
In my other answer, I have answered your question on how to read the entire file into a memory buffer, in order to search it. However, in this answer, I will present an alternative solution to searching a file for a string, in which the file is searched directly, so that it is not necessary to read the entire file into memory.
In this program, I read a file character by character using getc and whenever I encounter the first character of the target string, I continue reading characters in order to compare these characters with the remaining characters of the target string. If any of these characters does not match, I push back all characters except the first one onto the input stream using ungetc, and then continue searching for the first character of the target string.
#include <stdio.h>
#include <stdlib.h>
int main( void )
{
FILE *fp;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = getc(fp) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = getc(fp) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( ungetc( c, fp ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( ungetc( *p, fp ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( fp );
}
However, this solution has one big problem. The size of the pushback buffer is only guaranteed to be a single character by ISO C. Although some platforms have pushback buffers up to 4 KiB, some platforms actually only support a single character.
Therefore, in order for this solution to be portable, it would be necessary to implement a sufficiently large pushback buffer yourself using your own version of ungetc and fgetc (which I call my_ungetc and my_fgetc):
#include <stdio.h>
#include <stdlib.h>
struct pushback_buffer
{
char data[16384];
char *end;
char *p;
FILE *fp;
};
int my_ungetc( int c, struct pushback_buffer *p )
{
//verify that buffer is not full
if ( p->p == p->data )
{
//buffer is full
return EOF;
}
*--p->p = c;
return 0;
}
int my_fgetc( struct pushback_buffer *p )
{
//determine whether buffer is empty
if ( p->p == p->end )
{
//pass on request to getc
return getc( p->fp );
}
return *p->p++;
}
int main( void )
{
static struct pushback_buffer pbb;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//initialize pushback buffer except for "fp"
pbb.end = pbb.data + sizeof pbb.data;
pbb.p = pbb.end;
//open file and write FILE * to pushback buffer
pbb.fp = fopen( "filename", "r" );
if ( pbb.fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = my_fgetc(&pbb) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = my_fgetc(&pbb) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( my_ungetc( c, &pbb ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( my_ungetc( *p, &pbb ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( pbb.fp );
}
However, reading a file a single character at a time is not very efficient, especially on platforms which support multithreading, because this requires getc to acquire a lock every time. Some platforms offer platform-specific alternatives, such as getc_unlocked on POSIX-compliant platforms (e.g. Linux) and _getc_no_lock on Windows. But even when using these functions, reading one character at a time from the input stream will be rather slow. It would be more efficient to read a whole block of several kilobytes at once.
Here is a completely different solution of mine which reads a whole block at once, instead of one character at a time. However, this solution is rather complex, because it must handle two buffers at once and requires 4 levels of nested loops and multiple gotos to break out of these nested loops.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 8192
struct buffer
{
char data[BUFFER_SIZE];
size_t valid_chars;
};
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp );
int main( void )
{
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//verify that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//verify that target string is not so long that
//more than two buffers would be required
_Static_assert(
BUFFER_SIZE > sizeof target,
"target string too long"
);
//other declarations
FILE *fp;
struct buffer buffers[2];
struct buffer *current = NULL, *next = NULL;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one block per loop iteration
do
{
char *p, *q;
size_t chars_left;
if ( next == NULL )
{
//use the first buffer
current = &buffers[0];
//load the next block
current->valid_chars = read_next_block( current->data, fp );
}
else
{
current = next;
next = NULL;
}
p = current->data;
chars_left = current->valid_chars;
//search for next occurance of starting character
while (
chars_left != 0
&&
( q = memchr( p, target[0], chars_left ) ) != NULL
)
{
chars_left -= q - p;
p = q;
for ( size_t i = 1; i < target_length; i++ )
{
//swap to next block, if necessary
if ( i == chars_left )
{
//check whether we have reached end-of-file
if ( current->valid_chars != BUFFER_SIZE )
{
goto no_match;
}
//load next block, if necessary
if ( next == NULL )
{
//make "next" point to the other buffer
next = current == &buffers[0] ? &buffers[1] : &buffers[0];
//load the next block
next->valid_chars = read_next_block( next->data, fp );
}
for ( size_t j = 0; i < target_length; i++, j++ )
{
//check whether we have reached end-of-file
if ( j == next->valid_chars )
{
//the strings don't match
goto no_match;
}
if ( next->data[j] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
}
//go to next outer loop iteration if the
//strings do not match
if ( p[i] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
no_match:
p++;
chars_left--;
}
} while ( current->valid_chars == BUFFER_SIZE );
//no match was found
printf( "Not found!\n" );
goto cleanup;
match:
//the strings match
printf( "Found!\n" );
goto cleanup;
cleanup:
fclose( fp );
}
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp )
{
size_t bytes_read;
bytes_read = fread( buffer, 1, BUFFER_SIZE, fp );
if ( bytes_read == 0 && ferror( fp ) )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
return bytes_read;
}
If the kernel is creating the file as you read it and there is a risk that the size of it will be different the next time you read it, then your only real bet is to read it into a buffer before you know how large the file is. Start by allocating a LARGE buffer - big enough that it SHOULD accept the entire file - then call read() to get (at most) that many bytes. If there's still more to be read, you can realloc() the buffer you were writing into. Repeat the realloc() as often as necessary.

Segmentation fault at the last line when reading file

I got a segmentation fault when reading a file. There is an empty line at the end of the file. I thought when I did fscanf( input, "[^\n]\n", c ), it would take the next line tab from the second last line and the token went to the empty line at the end, which will be '\0', but I got the a segmentation fault.
FILE *input = fopen( filename, "r" );
if ( input == NULL ) {
fprintf( stderr, "Can't open file: %s\n", filename );
exit( EXIT_FAILURE );
}
file = ( char ** )malloc( MAX_LINE * sizeof( char * ) );
int index = 0;
char *c = ( char * )malloc( MAX_CHAR * sizeof( char ) );
int length = 0;
while ( fscanf( input, "%[^\n]\n", c ) != EOF ) {
if ( c == NULL ) {
fprintf( stderr, "Cannot read line: %d ", index );
exit( EXIT_FAILURE );
}
length = ( int ) strlen( c );
// checking if a line has too many characters
if ( length > MAX_CHAR ) {
fprintf( stderr, "Line too long: %s:%d\n", filename, ( index + 1 ) );
exit( EXIT_FAILURE );
}
//checking if the file has too many lines
if ( index > MAX_LINE ) {
fprintf( stderr, "Too many lines: %s\n", filename );
exit( EXIT_FAILURE );
}
//printf( "%s\n%d %d\n", c, index, length );
file[ index ] = ( char * )malloc( MAX_CHAR * sizeof( char ) );
file[ index ] = c;
index++;
}

Why is my program suddenly terminating before executing scanf()?

I am trying to write code to replace a line from a text file. It compiles successfully, but it terminates out of nowhere as soon as it tries to scan the line number to be replaced.
I really have no idea what I am doing wrong. I have also tried with fgets() but it still doesn't work.
#include <stdio.h>
#include <stdlib.h>
#define MAXNAME 30
#define MAXLINE 256
int main(){
char fileName[MAXNAME];
FILE *originalFileCheck;
printf("Input the name of the file to be opened: ");
scanf("%s", fileName);
originalFileCheck = fopen(fileName, "r");
if(originalFileCheck == NULL){
printf("The file %s was not opened successfully. The program will now terminate.\n", fileName);
exit(1);
}
else{
FILE *tempFileWrite;
char tempName[MAXNAME] = "temp.txt";
tempFileWrite = fopen(tempName, "w");
char newLine[MAXLINE];
int lineNum;
printf("Input the content of the new line: ");
scanf("%s", newLine);
printf("Input the number of the line you want to replace: ");
scanf("%d", &lineNum); /* it terminates WITHOUT scanning this int*/
char str[MAXLINE];
int counter = 1;
while(fgets(str, MAXLINE, originalFileCheck) != NULL){
if(counter != lineNum){
for(int i = 0; str[i] != '\0' && str[i] != '\n'; i++){
fputc(str[i], tempFileWrite);
}
fprintf(tempFileWrite, "\n");
}
else{
fprintf(newLine, "%s\n", tempFileWrite);
}
counter++;
}
fclose(tempFileWrite);
fclose(originalFileCheck);
...
return 0;
}
the following proposed code:
cleanly compiles
checks for input errors
checks for output errors
performs the desired functionality
properly cleans up when an error occurs
and now, the proposed code:
#include <stdio.h>
#include <stdlib.h>
#define MAXNAME 30
#define MAXLINE 256
int main( void )
{
char fileName[MAXNAME];
FILE *originalFileCheck;
printf("Input the name of the file to be opened: ");
if( !fgets( filename, sizeof( fileName ), stdin) )
{
fprintf( stderr, "fgets to input 'original' file name failed\n" );
exit( EXIT_FAILURE );
}
// remove trailing newline
fileName[ strcspn( fileName, "\n" ) ] = '\0';
originalFileCheck = fopen( fileName, "r" );
if( !originalFileCheck )
{
perror( "fopen original file for read failed" );
exit( EXIT_FAILURE );
}
FILE *tempFileWrite;
char tempName[ MAXNAME ] = "temp.txt";
tempFileWrite = fopen( tempName, "w" );
if( !tempFileWrite )
{
perror( "fopen to write new file failed" );
fclose( originalFileCheck );
exit( EXIT_FAILURE );
}
char newLine[ MAXLINE ];
int lineNum;
printf("Input the content of the new line: ");
if( !fgets( newLine, sizeof( newLine ), stdin ) )
{
perror"fgets to input new line content failed" );
fclose( originalFileCheck );
exit( EXIT_FAILURE );
}
printf("Input the number of the line you want to replace: ");
if( scanf("%d", &lineNum) != 1 )
{
fprintf( stderr, "scanf for replacement line number failed\n" );
fclose( originalFileCheck );
fclose( tempFileWrite );
exit( EXIT_FAILURE );
}
char str[MAXLINE];
int counter = 1;
while( fgets(str, sizeof( str ), originalFileCheck) )
{
if(counter != lineNum)
{
if( fputs( str, tempFileWrite ) == EOF )
{
perror( "fputs for original line failed" );
fclose( originalFileCheck );
fclose( tempFileWrite );
exit( EXIT_FAILURE );
}
}
else
{
if( fputs( newLine, tempFileWrite ) == EOF )
{
perror( "fputs for replacement line failed" );
fclose( originalFileCheck );
fclose( tempFileWrite );
exit( EXIT_FAILURE );
}
}
counter++;
}
fclose(tempFileWrite);
fclose(originalFileCheck);
return 0;
}

How to copy the returned token by strtok in c

When I try to copy the strtok through strcpy as suggested by the other answers on this forum. I don't get the respective number. File format is something like this 43,789,127.0.0.1 on each particular line. However I get 127 in the temp[2] location which should be 127.0.0.1 . What am I missing over here?
FILE *f = fopen("somefile", "r");
char *temp[3];
temp[0] = malloc(20);
temp[1] = malloc(20);
temp[2] = malloc(20);
const char s[1] = ",";
char *pch;
if(f != NULL)
{
char line[1024];
while(fgets(line, sizeof line, f) != NULL)
{
pch = strtok(line, s);
for(int i = 0; i<3; i++)
{
strcpy(temp[i],pch);
pch = strtok (NULL, s);
}
}
fclose(f);
} else
{
perror("somefile");
}
for(int i = 0; i<3; i++)
{
printf ("%s\n",temp[i]);
}
s is not a proper C string: const char s[1] = ","; defines it to have a size of 1, without a null terminator.
Use this instead:
const char *s = ",";
Note that a line with fewer than 2 commas will cause the program to have undefined behavior as you do not check that strtok() returns a non NULL pointer. Here is an alternative using sscanf():
#include <stdio.h>
int main(void) {
FILE *fp = fopen("somefile", "r");
char temp[3][20];
if (fp != NULL) {
char line[1024];
while (fgets(line, sizeof line, fp) != NULL) {
if (sscanf(line, "%19[^,],%19[^,],%19[^\n]", temp[0], temp[1], temp[2]) == 3) {
printf("%s\n%s\n%s\n\n", temp[0], temp[1], temp[2]);
}
}
fclose(fp);
}
return 0;
}
Note however that the above code will fail to parse lines with empty fields such as ,, because sscanf() requires a non empty string for the %[^,] conversion specifier.
Note also that strtok() would be inappropriate for such parsing too as it treats sequences of separators as a single separator, which is OK for white space but probably incorrect for ,.
the following proposed code:
cleanly compiles
is consistently indented
is appropriately horizontally and vertically spaced
corrects all the known problems in the posted code
properly checks for and handles errors
properly displays the parameters from all the lines in the file
eliminates unneeded variables
and now the code
#include <stdio.h> // fopen(), fclose(), fgets(), perror()
#include <stdlib.h> // exit(), EXIT_FAILURE
#include <string.h> // strtok(), strcpy()
#define MAX_LENGTH 20
#define MAX_PARAMETERS 3
int main( void )
{
FILE *f = fopen( "somefile", "r" );
if( !f )
{
perror( "fopen to read somefile failed" );
exit( EXIT_FAILURE );
}
// implied else, fopen successful
char *temp[ MAX_PARAMETERS ];
if( NULL == ( temp[0] = malloc( MAX_LENGTH ) ) )
{
perror( "malloc failed" );
exit( EXIT_FAILURE );
}
if( NULL == ( temp[1] = malloc( MAX_LENGTH ) ) )
{
perror( "malloc failed" );
exit( EXIT_FAILURE );
}
if( NULL == ( temp[2] = malloc( MAX_LENGTH ) ) )
{
perror( "malloc failed" );
exit( EXIT_FAILURE );
}
char line[1024];
while( fgets(line, sizeof line, f) )
{
int i;
char *pch = strtok( line, "," );
for( i = 0; i<3 && pch; i++ )
{
strcpy( temp[i], pch );
pch = strtok ( NULL, "," );
}
if( MAX_PARAMETERS != i )
{
printf( "failed to extract all parameters from line: %s\n", line );
}
else
{
for( int j = 0; j<MAX_PARAMETERS; j++ )
{
printf( "%s\n", temp[j] );
}
}
}
fclose( f );
} // end function: main

Resources