How to make this transposing algorithm read a whole text file - c

So I've been trying to write a transposing algorithm where each character’s position is shifted to a new location within a file. For example if the key is 3 and the character array is
"This program is supposed to encrypt a file."
after encrypting, the output will be
"Tsrr ps cpai.h oaispetert lipgmsuodony fe"
The problem is that after it's done encrypting the first line of a file, it stops and doesn't continue encrypting the whole file. after compiling, it can be executed like this
./executable -e 3 inputfile outputfile
.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 200
int main ( int argc, char* argv[ ] ) {
int pos = 0;
char characters[ BUFFER_SIZE ];
int index, k, size, key;
char* echars;
FILE* input;
FILE* output;
if ( argc == 5 ) {
// exits if key is lower than 1
key = atoi ( argv[ 2 ] );
if ( key < 1 ) {
perror ( "Error: This value cannot be used as a key" );
exit ( EXIT_FAILURE );
}
input = fopen ( argv[ 3 ], "r" );
//Shows error if there's no file
if ( input == NULL ) {
perror ( "Error, File doesn't exits" );
exit ( EXIT_FAILURE );
}
output = fopen ( argv[ 4 ], "w" );
fgets (characters, BUFFER_SIZE, input);
fseek (input, 0, SEEK_END);
size = ftell ( input );
echars = ( char* ) malloc ( size );
if ( strcmp ( argv[ 1 ], "-e" ) == 0 ) {
while (strlen(characters) && characters[strlen(characters)-1] == '\n'){
characters[strlen(characters)-1] = '\0';
for (index = 0; index < key; index++) {
for (k = index; k < strlen( characters ); k += key)
echars[ pos++ ] = characters[ k ];
}
printf("Successfuly encrypted\n");
}
} else if( strcmp ( argv[ 1 ], "-d" ) == 0 ) {
for (index = 0; index < key; index++) {
for (k = index; k < strlen( characters ); k += key)
echars[ k ] = characters[ pos++ ];
}
printf("Successfuly decrypted\n");
}
fputs( echars, output );
fclose ( input );
fclose ( output );
} else {
perror("Too few arguments, something went wrong\n");
printf("Usage: ./program -e (encrypts) or -d (decripts) 3 (key) inputfile destinationfile\n");
printf("Example:'./exectutable -e 3 inputfile.txt outputfile.txt\n");
exit ( EXIT_FAILURE );
}
return 0;
}

fgets stops reading when it hits a newline.
You need to either put a loop around it or use a read operation that can read larger chunks.

Related

C: Reading last n lines from file and printing them

I'm trying to read last n lines from file and then print them. To read the lines I'm using fgets() and it seems to work fine. However, when I try to print the last n lines that I have stored in the array, it only prints the last line n times. It seems like there is something wrong with the way I store strings in an array. Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int agrc, char** agrv) {
FILE* input;
input = fopen(agrv[1], "r");
int n = *agrv[2]-'0';
int line = 0;
char text[11];
char** tab = malloc(1000000*sizeof(text));
while(fgets(text, sizeof(text), input) != 0) {
tab[line] = text;
line++;
}
fclose(input);
int jump = line-n;
for(int i=jump; i<line; i++) {
printf("%s\n", tab[i]);
}
}
Any help would be appreciated. Thanks in advance!
EDIT:
I've changed my while loop to this. However, it still doesn't work.
while(fgets(text, sizeof(text), input) != 0) {
char text2[11];
strcpy(text2, text);
tab[line] = text2;
line++;
}
tab[line] = text; sets tab[line] to point to the start of text. So you end up with all the tab[i] pointing to the same place, the start of text.
You need to copy each line read from the file to a different place in memory.
It may increase your understanding if you see working code that does what you hope to achieve. Because your OP could only show 1-9 "last lines", this code doesn't try to go beyond that "meager" amount. Further, this code is meant for files whose lines "are of moderate length". It should be clear where changes can be enacted.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h> // for 'isdigit()'
// Factor processing into functions for clarity
void tail( FILE *ifp, size_t n ) {
char text[ 9 ][ 126 + 1 + 1 ]; // 9 'rows' up to 128 bytes each
size_t lnCnt = 0;
while( fgets( text[ lnCnt % 9 ], sizeof text[0], ifp ) != NULL )
lnCnt++;
// Do the math to workout what is wanted and what's available
if( lnCnt < n )
n = lnCnt, lnCnt = 0;
else
lnCnt += 9 - n;
// output
while( n-- )
printf( "%s", text[ lnCnt++ % 9 ] );
}
int main( int argc, char *argv[] ) {
// Check parameters meet requirements
if( argc != 3 || !isdigit( argv[ 2 ][ 0 ] ) ) {
fprintf( stderr, "Usage: %s filename #lines (1-9)\n", argv[ 0 ] );
exit( EXIT_FAILURE );
}
// Check functions didn't fail
FILE *ifp = fopen( argv[ 1 ], "r" );
if( ifp == NULL ) {
fprintf( stderr, "Cannot open '%s'\n", argv[ 1 ] );
exit( EXIT_FAILURE );
}
// do processing
tail( ifp, argv[ 2 ][ 0 ] - '0' );
// clean up
fclose( ifp );
return 0;
}

Read sysfs file into buffer for string comparison, without having to open it twice

I have written the following code, modified it a bit for simplicity:
FILE *sysfs_file = fopen("/sys/file", "rb");
if (sysfs_file != NULL){
/* Loop over file handler until EOF to get filesize in bytes */
FILE *sysfs_file_get_size = fopen("/sys/file", "rb");
char d = fgetc(sysfs_file_get_size);
int filesize = 0;
while (d != EOF){
d = fgetc(sysfs_file_get_size);
filesize++;
}
fclose(sysfs_file_get_size);
/* Allocate buffer and copy file into it */
char *buf = malloc(filesize);
char c = fgetc(sysfs_file);
for (int i = 0; i < filesize; i++)
{
buf[i] = c;
c = fgetc(sysfs_file);
}
fclose(sysfs_file);
if(strstr(buf, "foo")) {
printf("bar.\n");
}
}
For security reasons, it seemed better to not assume what size the file will be, and first loop through the file to check of how many bytes it consists.
Regular methods of checking the filesize like fseek() or stat() do not work, as the kernel generates the file at the moment that it is being read. What I would like to know: is there a way of reading the file into a buffer in a secure manner, without having to open a file handler twice?
First of all, in the line
FILE *sysfs_file = fopen("/sys/file", "rb");
the "rb" mode does not make sense. If, as you write, you are looking for a "string", then the file is probably a text file, not a binary file. In that case, you should use "r" instead.
If you are using a POSIX-compliant platform (e.g. Linux), then there is no difference between text mode and binary mode. In that case, it makes even less sense to specifically ask for binary mode, when the file is a text file (even though it is not wrong).
For security reasons, it seemed better to not assume what size the file will be and first loop through the file to check of how many bytes it consists.
It is not a security issue if you limit the number of bytes read to the size of the allocated memory buffer, i.e. to the number of bytes the file originally had. That way, the file will only be truncated (which is generally not a security issue).
However, if you want to ensure that the file is not truncated, then it would probably be best to ignore the initial size of the file and to simply attempt to read as much from the file as possible, until you encounter end-of-file. If the initial buffer it not large enough to store the entire file, then you can use the function realloc to resize the buffer.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//This function will return a pointer to a dynamically
//allocated memory buffer which contains the file data as
//a string (i.e. that is terminated by a null character).
//The function "free" should be called on this data when it
//is no longer required.
char *create_buffer_with_file_data_as_string( FILE *fp )
{
char *buffer = NULL;
size_t buffer_size = 16384;
size_t valid_bytes_in_buffer = 0;
for (;;) //infinite loop, equivalent to while(1)
{
size_t bytes_to_read, bytes_read;
char *temp;
//(re)allocate buffer to desired size
temp = realloc( buffer, buffer_size );
if ( temp == NULL )
{
fprintf( stderr, "Realloc error!\n" );
free( buffer );
return NULL;
}
//(re)allocation was successful, so we can overwrite the
//pointer "buffer"
buffer = temp;
//calculate number of bytes to read from input file
//note that we must leave room for adding the terminating
//null character
bytes_to_read = buffer_size - valid_bytes_in_buffer - 1;
//attempt to fill buffer as much as possible with data from
//the input file
bytes_read = fread(
buffer + valid_bytes_in_buffer,
1,
bytes_to_read,
fp
);
//break out of loop if there is no data to process
if ( bytes_read == 0 )
break;
//update number of valid bytes in the buffer
valid_bytes_in_buffer += bytes_read;
//double the size of the buffer (will take effect in
//the next loop iteration
buffer_size *= 2;
}
//verify that no error occurred
if ( ferror( fp ) )
{
fprintf( stderr, "File I/O error occurred!" );
free( buffer );
return NULL;
}
//add terminating null character to data, so that it is a
//valid string that can be passed to the functon "strstr"
buffer[valid_bytes_in_buffer++] = '\0';
//shrink buffer to required size
{
char *temp;
temp = realloc( buffer, valid_bytes_in_buffer );
if ( temp == NULL )
{
fprintf( stderr, "Warning: Shrinking failed!\n" );
}
else
{
buffer = temp;
}
}
//the function was successful, so return a pointer to
//the data
return buffer;
}
int main( void )
{
FILE *fp;
char *data;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//call the function
data = create_buffer_with_file_data_as_string( fp );
if ( data == NULL )
{
fprintf(
stderr,
"An error occured in the function:\n"
" create_buffer_with_file_data_as_string\n"
);
fclose( fp );
exit( EXIT_FAILURE );
}
//the file is no longer needed, so close it
fclose( fp );
//search data for target string
if( strstr( data, "target" ) != NULL )
{
printf("Found \"target\".\n" );
}
else
{
printf("Did not find \"target\".\n" );
}
//cleanup
free( data );
}
For the input
This is a test file with a target.
this program has the following output:
Found "target".
Note that every time I am calling realloc, I am doubling the size of the buffer. I am not adding a constant amount to the size of the buffer. This is important, for the following reason:
Let's say that the file has a size of 160 MB (megabytes). In my program, I have an initial buffer size of about 16 KB (kilobytes). If I didn't double the size of the buffer every time I call realloc, but instead added a constant amount of bytes, for example added another 16 KB, then I would need to call realloc 10,000 times. Every time I call realloc, the content of the entire buffer may have to be copied by realloc, which means that on average, 80 MB may have to be copied every time, which is 800 GB (nearly a terabyte) in total. This would be highly inefficient.
However, if I instead double the size of the memory buffer (i.e. let the buffer grow exponentially), then it is guaranteed that the amount of data that must be copied will never be more than double the amount of the actual data. So, in my example above, it is guaranteed that never more than 320 MB will have to be copied by realloc.
You could just estimate what you need in blocks and grow the input buffer as needed...
This is untested, but gives the flavour of what should work.
This version attempts to load the entire file before investigating its content.
FILE *fp = fopen( "/sys/file", "rb" );
if( fp == NULL )
return -1;
#define BLK_SIZE 1024
char *buf = malloc( BLK_SIZE );
if( buf == NULL )
return -1;
char *readTo = buf;
size_t bufCnt = 0;
for( ;; ) {
size_t inCnt = fread( readTo, sizeof *readTo, BLK_SIZE, fp );
bufCnt += inCnt;
if( inCnt < BLK_SIZE )
break;
// possibly test for EOF here
char *tmp = realloc( buf, bufCnt + BLK_SIZE );
if( tmp == NULL )
return -1;
buf = tmp;
readTo = buf + bufCnt;
}
fclose( fp );
printf( "Got %ld valid bytes in buffer\n", bufCnt );
/* do stuff with *buf */
free( buf );
Hopefully the final EDIT of version 2:
I am grateful to #Andreas Wenzel for his cheerful and meticulous testing and comments that turned earlier (incorrect!) versions of my attempts into this prototype.
The objective is to find a string of bytes in a file.
In this prototype, single "buffer loads" are examined sequentially until the first instance of the target is found or EOF reached. This seems to cope with cases when the target bytes are split across two buffer loads. This uses a ridiculously small 'file' and small buffer that would, of course, be scaled up in the real world.
Making this more efficient is left as an exercise for the reader.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
// Simulated file with text
char inBytes[] = "The cute brown fox jumps over the dogs and bababanana and stuff.";
char *pFrom = NULL;
size_t nLeft = sizeof inBytes - 1;
// Simulated 'fopen()'.
bool myOpen( void ) { nLeft = strlen( pFrom = inBytes ); return true; }
// Simulated 'fread()'. (only 1 "file pointer in use")
size_t myRead( char *buf, size_t cnt ) {
if( nLeft == 0 ) return 0; // EOF
size_t give = nLeft <= cnt ? nLeft : cnt;
memcpy( buf, pFrom, give );
pFrom += give;
nLeft -= give;
return give;
}
// Look for string using different buffer sizes to prove target split functions
bool foobar( char srchfor[], int bufSize ) {
bool found = false;
int matched = 0;
int lenWant = strlen( srchfor ); // # of chars to match
// RAM buffer includes room for "wrapping"
char *iblk = (char*)malloc( lenWant + bufSize );
if( iblk == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
// simulate loading sequential blocks into a fixed size buffer.
myOpen();
size_t inBuf = 0;
char *pTo = iblk; // Read to location not always start of buffer
while( ( inBuf += myRead( pTo, bufSize ) ) != 0 ) {
printf( "'%.*s' ", (int)inBuf, iblk ); // Show what's in buffer
// The mill where matching is carried out
for( size_t i = 0; i < inBuf && matched < lenWant; )
if( srchfor[ matched ] == iblk[i] )
matched++, i++;
else if( matched )
i -= matched - 1, matched = 0; // rewind a bit and try again
else i++;
// Lucky?
if( matched == lenWant ) { printf( "Ahha!\n" ); found = true; break; }
if( matched == 0 ) {
pTo = iblk, inBuf = 0; // reset things
printf( "nothing\n" );
} else {
// preserve what did match, and read location is offset
printf( "got something\n" );
memmove( iblk, iblk + inBuf - matched, matched );
pTo += matched;
inBuf = matched;
matched = 0;
}
}
free( iblk );
return found;
}
int main() {
char *target = "babanana";
// Test with different buffer sizes (to split target across successive reads )
for( int sz = 20; sz < 27; sz += 2 )
printf( "bufSize = %d ... %s\n\n",
sz, foobar( target, sz ) ? "Found!": "Not Found." );
return 0;
}
Output:
'The cute brown fox j' nothing
'umps over the dogs a' nothing
'nd bababanana and st' Ahha!
bufSize = 20 ... Found!
'The cute brown fox jum' nothing
'ps over the dogs and b' got something
'bababanana and stuff.' Ahha!
bufSize = 22 ... Found!
'The cute brown fox jumps' nothing
' over the dogs and babab' got something
'babanana and stuff.' Ahha!
bufSize = 24 ... Found!
'The cute brown fox jumps o' nothing
'ver the dogs and bababanan' got something
'babanana and stuff.' Ahha!
bufSize = 26 ... Found!
EDIT3: That memmove() and the buffer size has been an annoyance for some time now.
Here's a version that takes one character of input at a time (fgetc() compatible), uses a heap buffer that is the same size as the target, uint8_t allows a search for binary targets, implements a circular buffer and has a lot of fiddley index manipulation. It's not Knuth, but neither am I...
size_t srch( uint8_t srch[], size_t nS, uint8_t targ[], size_t nT ) {
uint8_t c, skip = 0, *q = (uint8_t*)malloc( nT );
if( q == NULL ) {
fprintf( stderr, "Malloc failed!!!\n" );
exit( 1 );
}
size_t head = 0, tail = 0, ti = 0, tiS = 0, i = 0;
while( ti < nT && i < nS ) {
c = skip ? c : srch[i++]; // getc()
skip = 0;
if( c == targ[ti] ) {
q[tail++] = c;
tail %= nT;
ti++;
} else if( ti ) {
skip = 1;
do{
while( --ti && q[ head = ++head%nT ] != targ[ 0 ] );
for( tiS = 0; q[ (head+tiS)%nT ] == targ[ tiS ]; tiS++ );
} while( tiS < ti );
}
}
free( q );
return ti == nT ? i - nT : nS; // found ? offset : impossible offset
}
int main() {
char *in =
"The cute brown fox jumps "
"over the dogs babababananana stuff";
size_t inSize = strlen( in );
char *targets[] = {
"The", "the", "ff",
"babanana", "banana",
"jumps", " cute",
"orange",
};
int nTargs = sizeof targets/sizeof targets[0];
for( int i = 0; i < nTargs; i++ ) {
size_t val = strlen( targets[i] );
val = srch( (uint8_t*)in, inSize, (uint8_t*)targets[i], val );
if( val == inSize )
printf( "%s ... not found\n", targets[i] );
else
printf( "%s ... %.15s\n", targets[i], in + val );
}
return 0;
}
Output
The ... The cute brown
the ... the dogs and ba
ff ... ff
babanana ... babananana and
banana ... bananana and st
jumps ... jumps over the
cute ... cute brown fox
orange ... not found
In my other answer, I have answered your question on how to read the entire file into a memory buffer, in order to search it. However, in this answer, I will present an alternative solution to searching a file for a string, in which the file is searched directly, so that it is not necessary to read the entire file into memory.
In this program, I read a file character by character using getc and whenever I encounter the first character of the target string, I continue reading characters in order to compare these characters with the remaining characters of the target string. If any of these characters does not match, I push back all characters except the first one onto the input stream using ungetc, and then continue searching for the first character of the target string.
#include <stdio.h>
#include <stdlib.h>
int main( void )
{
FILE *fp;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = getc(fp) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = getc(fp) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( ungetc( c, fp ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( ungetc( *p, fp ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( fp );
}
However, this solution has one big problem. The size of the pushback buffer is only guaranteed to be a single character by ISO C. Although some platforms have pushback buffers up to 4 KiB, some platforms actually only support a single character.
Therefore, in order for this solution to be portable, it would be necessary to implement a sufficiently large pushback buffer yourself using your own version of ungetc and fgetc (which I call my_ungetc and my_fgetc):
#include <stdio.h>
#include <stdlib.h>
struct pushback_buffer
{
char data[16384];
char *end;
char *p;
FILE *fp;
};
int my_ungetc( int c, struct pushback_buffer *p )
{
//verify that buffer is not full
if ( p->p == p->data )
{
//buffer is full
return EOF;
}
*--p->p = c;
return 0;
}
int my_fgetc( struct pushback_buffer *p )
{
//determine whether buffer is empty
if ( p->p == p->end )
{
//pass on request to getc
return getc( p->fp );
}
return *p->p++;
}
int main( void )
{
static struct pushback_buffer pbb;
int c;
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//make sure that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//initialize pushback buffer except for "fp"
pbb.end = pbb.data + sizeof pbb.data;
pbb.p = pbb.end;
//open file and write FILE * to pushback buffer
pbb.fp = fopen( "filename", "r" );
if ( pbb.fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one character per loop iteration
while ( ( c = my_fgetc(&pbb) ) != EOF )
{
//compare first character
if ( c == (unsigned char)target[0] )
{
//compare remaining characters
for ( size_t i = 1; i < target_length; i++ )
{
if ( ( c = my_fgetc(&pbb) ) != (unsigned char)target[i] )
{
//strings are not identical, so push back all
//characters
//push back last character
if ( my_ungetc( c, &pbb ) == EOF && c != EOF )
{
fprintf( stderr, "Unexpected error in ungetc!\n" );
goto cleanup;
}
//push back all other characters, except for
//the first character
for ( const char *p = target + i - 1; p != target; p-- )
{
if ( my_ungetc( *p, &pbb ) == EOF )
{
fprintf(
stderr,
"Error with function \"ungetc\"!\n"
"This error is probably due to this function\n"
"not supporting a sufficiently large\n"
"pushback buffer."
);
goto cleanup;
}
}
//go to next outer loop iteration
goto continue_outer_loop;
}
}
//found target string
printf( "Found!\n" );
goto cleanup;
}
continue_outer_loop:
continue;
}
//did not find target string
printf( "Not found!\n" );
cleanup:
fclose( pbb.fp );
}
However, reading a file a single character at a time is not very efficient, especially on platforms which support multithreading, because this requires getc to acquire a lock every time. Some platforms offer platform-specific alternatives, such as getc_unlocked on POSIX-compliant platforms (e.g. Linux) and _getc_no_lock on Windows. But even when using these functions, reading one character at a time from the input stream will be rather slow. It would be more efficient to read a whole block of several kilobytes at once.
Here is a completely different solution of mine which reads a whole block at once, instead of one character at a time. However, this solution is rather complex, because it must handle two buffers at once and requires 4 levels of nested loops and multiple gotos to break out of these nested loops.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 8192
struct buffer
{
char data[BUFFER_SIZE];
size_t valid_chars;
};
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp );
int main( void )
{
//define target string
const char target[] = "banana";
const size_t target_length = sizeof target - 1;
//verify that length of target string is at least 1
_Static_assert(
sizeof target >= 2,
"target string must have at least one character"
);
//verify that target string is not so long that
//more than two buffers would be required
_Static_assert(
BUFFER_SIZE > sizeof target,
"target string too long"
);
//other declarations
FILE *fp;
struct buffer buffers[2];
struct buffer *current = NULL, *next = NULL;
//attempt to open file
fp = fopen( "filename", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read one block per loop iteration
do
{
char *p, *q;
size_t chars_left;
if ( next == NULL )
{
//use the first buffer
current = &buffers[0];
//load the next block
current->valid_chars = read_next_block( current->data, fp );
}
else
{
current = next;
next = NULL;
}
p = current->data;
chars_left = current->valid_chars;
//search for next occurance of starting character
while (
chars_left != 0
&&
( q = memchr( p, target[0], chars_left ) ) != NULL
)
{
chars_left -= q - p;
p = q;
for ( size_t i = 1; i < target_length; i++ )
{
//swap to next block, if necessary
if ( i == chars_left )
{
//check whether we have reached end-of-file
if ( current->valid_chars != BUFFER_SIZE )
{
goto no_match;
}
//load next block, if necessary
if ( next == NULL )
{
//make "next" point to the other buffer
next = current == &buffers[0] ? &buffers[1] : &buffers[0];
//load the next block
next->valid_chars = read_next_block( next->data, fp );
}
for ( size_t j = 0; i < target_length; i++, j++ )
{
//check whether we have reached end-of-file
if ( j == next->valid_chars )
{
//the strings don't match
goto no_match;
}
if ( next->data[j] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
}
//go to next outer loop iteration if the
//strings do not match
if ( p[i] != target[i] )
{
//the strings don't match
goto no_match;
}
}
//the strings match
goto match;
no_match:
p++;
chars_left--;
}
} while ( current->valid_chars == BUFFER_SIZE );
//no match was found
printf( "Not found!\n" );
goto cleanup;
match:
//the strings match
printf( "Found!\n" );
goto cleanup;
cleanup:
fclose( fp );
}
size_t read_next_block( char buffer[static BUFFER_SIZE], FILE *fp )
{
size_t bytes_read;
bytes_read = fread( buffer, 1, BUFFER_SIZE, fp );
if ( bytes_read == 0 && ferror( fp ) )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
return bytes_read;
}
If the kernel is creating the file as you read it and there is a risk that the size of it will be different the next time you read it, then your only real bet is to read it into a buffer before you know how large the file is. Start by allocating a LARGE buffer - big enough that it SHOULD accept the entire file - then call read() to get (at most) that many bytes. If there's still more to be read, you can realloc() the buffer you were writing into. Repeat the realloc() as often as necessary.

How to detect a repeated " user input String " in a file?

The idea is like compare line by line of strings and detecting the duplicated ones to evade putting theme in another file
after I fill my file with names and created a new file to put all strings without the duplicated ones, I used this loop, but I don't know if it's right or nah. It didn't work
FILE *Tr , *temp;
char test[50] , test1[50];
Tr = fopen("test.txt","w");
temp = fopen("temp1.txt" , "r");
while( !feof(temp) )
{
fgets(test , 50 , temp);
while( !feof(temp) ){
if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){
printf("a string exist in the file");
}
else{ fprintf(Tr, "%s" , test1);
}
}
}
The following line is wrong:
if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){
Using == on pointers will compare the actual pointer values, i.e. the memory addresses. If you want to compare the actual string contents (i.e. what the pointers are pointing to), then you must use strcmp instead.
Also, you should only read from the input file, not the output file.
You should also remember all strings that you have read. Otherwise, you will have no way of determining whether the current line is a duplicate or not.
Additionally, it does not make sense having both an outer loop and an inner loop with the same loop condition:
while( !feof(temp) )
Also, using !feof(temp) as a loop condition is generally wrong. See this question for further information:
Why is “while ( !feof (file) )” always wrong?
The following program will remember up to 100 strings, each up to 100 chars in length (including the terminating null character).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_STRINGS 100
#define MAX_STRING_LEN 100
int main( void )
{
FILE *input, *output;
char strings[MAX_STRINGS][MAX_STRING_LEN];
int num_strings = 0;
char line[MAX_STRING_LEN];
//open input file
input = fopen( "input.txt", "r" );
if ( input == NULL )
{
fprintf( stderr, "Error opening input file!\n" );
exit( EXIT_FAILURE );
}
//open output file
output = fopen( "output.txt", "w" );
if ( output == NULL )
{
fprintf( stderr, "Error opening output file!\n" );
exit( EXIT_FAILURE );
}
//read one line of input per loop iteration
while ( fgets( line, sizeof line, input ) != NULL )
{
bool is_duplicate = false;
char *p;
//find newline character
p = strchr( line, '\n' );
//make sure that input buffer was large enough to
//read entire line, and remove newline character
//if it exists
if ( p == NULL )
{
if ( !feof( input ) )
{
fprintf( stderr, "Line was too long for input buffer!\n" );
exit( EXIT_FAILURE );
}
}
else
{
//remove newline character
*p = '\0';
}
//determine whether line is duplicate
for ( int i = 0; i < num_strings; i++ )
{
if ( strcmp( line, strings[i] ) == 0 )
{
is_duplicate = true;
break;
}
}
if ( !is_duplicate )
{
//remember string
strcpy( strings[num_strings++], line );
//write string to output file
fprintf( output, "%s\n", line );
}
}
//cleanup
fclose( output );
fclose( input );
}
Given the input
String1
String2
String3
String4
String5
String1
String6
String2
String1
String7
String8
String1
String2
this program has the following output:
String1
String2
String3
String4
String5
String6
String7
String8
As you can see, all duplicate strings were properly filtered out of the output.
However, using a statically sized array is a bit of a waste of space, and it also imposes a hard limit. Therefore, it may be better to use dynamic memory allocation instead:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define INITIAL_CAPACITY 100
#define MAX_LINE_LEN 200
int main( void )
{
FILE *input, *output;
char **strings;
size_t strings_capacity;
int num_strings = 0;
char line[MAX_LINE_LEN];
//open input file
input = fopen( "input.txt", "r" );
if ( input == NULL )
{
fprintf( stderr, "Error opening input file!\n" );
exit( EXIT_FAILURE );
}
//open output file
output = fopen( "output.txt", "w" );
if ( output == NULL )
{
fprintf( stderr, "Error opening output file!\n" );
exit( EXIT_FAILURE );
}
//set capacity of "strings" array to INITIAL_CAPACITY
strings_capacity = INITIAL_CAPACITY;
strings = malloc( strings_capacity * sizeof *strings );
if ( strings == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
//read one line of input per loop iteration
while ( fgets( line, sizeof line, input ) != NULL )
{
bool is_duplicate = false;
char *p;
//find newline character
p = strchr( line, '\n' );
//make sure that input buffer was large enough to
//read entire line, and remove newline character
//if it exists
if ( p == NULL )
{
if ( !feof( input ) )
{
fprintf( stderr, "Line was too long for input buffer!\n" );
exit( EXIT_FAILURE );
}
}
else
{
//remove newline character
*p = '\0';
}
//determine whether line is duplicate
for ( int i = 0; i < num_strings; i++ )
{
if ( strcmp( line, strings[i] ) == 0 )
{
is_duplicate = true;
break;
}
}
if ( !is_duplicate )
{
//expand capacity of "strings" array if necessary
if ( num_strings == strings_capacity )
{
strings_capacity *= 2;
strings = realloc( strings, strings_capacity * sizeof *strings );
if ( strings == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
}
//remember string
strings[num_strings] = malloc( strlen( line ) + 1 );
if ( strings[num_strings] == NULL )
{
fprintf( stderr, "Memory allocation failure!\n" );
exit( EXIT_FAILURE );
}
strcpy( strings[num_strings], line );
num_strings++;
//write string to output file
fprintf( output, "%s\n", line );
}
}
//cleanup
//free all dynamically allocated memory
for ( int i = 0; i < num_strings; i++ )
free( strings[i] );
free( strings );
//close file handles
fclose( output );
fclose( input );
}

How to save a file's lines in a string table?

i have project in C that tells me to read a file and save each line in a string table stateTable[10][50],and i dont know how to do it,can anyone help me?
All i have come up with for now is:
int i=0,j=0,x=10,y=50;
char stateTable [ 10 ][ 50 ];
static const char filename[] = "file.txt";
FILE *file = fopen ( filename, "r" );
if ( file != NULL )
{
while ( fgets ( stateTable[i], y , file ) != NULL )
{
i++;
}
fclose ( file );
}
else
{
perror ( filename );
}
return 0;
Although i dont know if by putting stateTable[i] in gets is correct,and if so will each string which is saved in the stateTable[10][50] have \0 at the end?
#include <stdio.h>
#include <string.h>
int main(void)
{
int j, i = 0;
const int y = 50, x= 10;
char stateTable [ x][ y ];
const char filename[] = "file.txt";
FILE *file = fopen ( filename, "r" );
if ( file != NULL )
{
while ( fgets ( stateTable[i], y , file ) != NULL )
{
i++;
if (i > x)
break;
}
fclose ( file );
}
else
{
perror ( filename );
}
for (j=0 ; j < i; j++)
{
printf ( "\nstateTable[j] %s len = %zu",
stateTable[j], strlen (stateTable[j]));
}
printf ("\n");
return 0;
}
As seen from the printf statement in the end it shows that the code works.
The strlen shows that the \0 is inserted.
Mind that the fgets retains the linefeed in the string.

Working on a command line interpreter and can't get the && and || to work correctly

While trying to create a very basic command line interpreter I have come across a section I can't seem to figure out. While I am checking the token for the required delimeters I can't seem to correctly enable the && and || functions. Listed below is the loop that inserts the args and then created the process.
I am just focusing on the && at the moment and plan to use that implementation to help with the || function. Can you guys please take a look and help point me into the right direction?
Side note: this is also my first time writing a program in C, so there might be some errors in the code because of that.
This is a homework problem.
Thanks
Updated with full program code that I have so far.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#define ARG_SIZE 1<<16
#define MAXLINE 100
char *args[ARG_SIZE];
int place = 0;
int return_status;
void insert( char *token ){
args[ place ] = token;
place++;
}
void create() {
size_t nargs = place;
pid_t pid;
if ( nargs == 0 ) return;
if ( !strcmp( args[0], "exit" ) ) exit(0);
pid = fork();
if ( pid ) {
pid = wait( return_status );
} else {
if ( access( args[ 0 ], X_OK ) ){
if( execvp( args[ 0 ], args ) ) {
puts( strerror( errno ) );
exit( 127 );
}
}else{
puts( strerror( errno ) );
exit( 127 );
}
}
int i = 0;
for ( i ; i < place + 1; i++)
args[ i ] = NULL;
place = 0;
}
int main( int argc, char *argv[] ){
char line[MAXLINE+1]; /* an input line */
int c; /* a single input char or EOF */
int n; /* line length */
char *token; /* pointer to an input token */
for(;;) { /* repeat until end of file */
write( 1, "#>", 2 );
if (fgets(line,MAXLINE,stdin) == NULL) /* end of file? */
exit(0);
n = strlen(line); /* get length of line */
if (n == 0) /* if line is empty */
continue;
if (line[n-1] != '\n') { /* does input end with '\n' ? */
fprintf(stderr,"Line too long.\n"); /* no, so line is too long. */
/*--------------------------------------------*/
/* Read and ignore input through end of line. */
/*--------------------------------------------*/
while ((c = fgetc(stdin)) != '\n') {
if (c == EOF) {
fprintf(stderr,"Unexpected end of file\n");
exit(1);
}
}
continue;
}
write( 1, line, strlen( line ) );
line[n-1] = '\0'; /* remove the end of line */
/*-------------------------------------------------------------*/
/* Identify and process each token (i.e. sequence of non-blank */
/* characters delimited by whitespace). For "ordinary" tokens */
/* (i.e. "words") we just display them. For the ||, &&, and ; */
/* items we display them with a textual explanation. */
/*-------------------------------------------------------------*/
token = strtok(line," \t");
while (token != NULL ) {
insert( token );
if (!strcmp(token,"&&")){
place--;
args[ place ] = NULL;
create();
if( return_status == -1)
write( 1, "Wait failed.\n", 12 );
if( return_status & 0xff ){
int i = 0;
for( i; i < place; i++)
args[ i ] = NULL;
place = 0;
while( strcmp( token, "||" ) || strcmp( token, ";" ) ) ;
}
}
else if (!strcmp(token,"||")){
place--;
args[ place ] = NULL;
completed = 0;
create();
}
else if (!strcmp(token,";")){
place--;
args[ place ] = NULL;
create();
}
else{
}
token = strtok(NULL," \t");
}
create();
}
return 0;
}
wait() takes an integer pointer, not integer. There should be a compiler warning for that. If not, I would recommend you use a warning flag.
#include <sys.wait.h>
wait(&return_code);
This check isn't correct as access() returns 0 on success. Doesn't seem like you can use access to check commands not in the current directory, unless they have given the full path to the executable or you are able to expand it. For example, "ls" would need to be expanded to "/bin/ls".
if ( access( args[ 0 ], X_OK ) ){
if( execvp( args[ 0 ], args ) ) {
puts( strerror( errno ) );
exit( 127 );
}
This check of the return_status is looking at the first byte but the program that was forked() or execvp() returns its status in the 2nd byte. The check for what it returned:
if( return_status & 0xff ){
should either be:
if ( WEXITSTATUS(return_status) != 0 ){
or
if ( (return_status & 0xff00) >> 8 ){
The if body:
for( int i = 0; i < place; i++)
args[ i ] = NULL;
place = 0;
while( strcmp( token, "||" ) || strcmp( token, ";" ) ) ;
can be replaced by:
break;
because you are doing the for loop and setting place to 0 inside create() and the while loop was infinite for me. Just breaking out of the enclosing while ends the command sequence and gets you back to reading another typed in command line.
//if ( return_status == -1)
can use the wait() status macro:
if ( ! WIFEXITED(return_status) )
This code shows what I mentioned in the other answer and works for "false && ls" and is updated to address further commands after an && fails.
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
#define ARG_SIZE 1 << 16
#define MAXLINE 100
char *args[ARG_SIZE];
int place = 0;
int return_status;
char message[200];
void insert( char *token ){
args[ place ] = token;
place++;
}
void reset_args(void)
{
for (int i = 0 ; i < place + 1; i++)
args[ i ] = NULL;
place = 0;
}
#define EXECVP_FAILED 127
char last_command[200];
void create() {
size_t nargs = place;
pid_t pid;
if ( nargs == 0 ) return;
if ( args[0] )
strcpy(last_command,args[0]);
else
last_command[0] = '\0';
if ( !strcmp( args[0], "exit" ) ) exit(0);
pid = fork();
if ( pid ) {
pid = wait( &return_status );
} else {
if( execvp( args[ 0 ], args ) )
{
puts( strerror( errno ) );
exit( EXECVP_FAILED );
}
}
reset_args();
}
int main( void ){
char line[MAXLINE+1]; /* an input line */
int c; /* a single input char or EOF */
int n; /* line length */
char *token; /* pointer to an input token */
enum states { NORMAL, AND_CMD_FAILED };
enum states state = NORMAL;
for(;;) { /* repeat until end of file */
state = NORMAL;
write( 1, "#>", 2 );
if (fgets(line,MAXLINE,stdin) == NULL) /* end of file? */
exit(0);
n = strlen(line); /* get length of line */
if (n == 0) /* if line is empty */
continue;
if (line[n-1] != '\n') { /* does input end with '\n' ? */
fprintf(stderr,"Line too long.\n"); /* no, so line is too long. */
/*--------------------------------------------*/
/* Read and ignore input through end of line. */
/*--------------------------------------------*/
while ((c = fgetc(stdin)) != '\n') {
if (c == EOF) {
fprintf(stderr,"Unexpected end of file\n");
exit(1);
}
}
continue;
}
write( 1, line, strlen( line ) );
line[n-1] = '\0'; /* remove the end of line */
/*-------------------------------------------------------------*/
/* Identify and process each token (i.e. sequence of non-blank */
/* characters delimited by whitespace). For "ordinary" tokens */
/* (i.e. "words") we just display them. For the ||, &&, and ; */
/* items we display them with a textual explanation. */
/*-------------------------------------------------------------*/
token = strtok(line," \t");
while (token != NULL ) {
insert( token );
if (!strcmp(token,"&&")){
place--;
args[ place ] = NULL;
if ( state == NORMAL )
create();
else
reset_args();
if ( ! (WIFEXITED(return_status) ) || ( WEXITSTATUS(return_status) ) )
{
// command did not execute successfully
if ( ! WIFEXITED(return_status) )
{
sprintf(message,"Wait failed for %s\n",last_command);
write( 1, message, strlen(message) );
}
else if ( WEXITSTATUS(return_status) == EXECVP_FAILED )
{
sprintf(message,"execvp failed for %s\n",last_command);
write( 1, message, strlen(message) );
}
else if ( WEXITSTATUS(return_status) != 0 ) {
sprintf(message,"%s command failed - exit status %d\n",
last_command,WEXITSTATUS(return_status));
write( 1, message, strlen(message) );
}
// bypass next command(s) up to || or ; delimiters
state = AND_CMD_FAILED;
}
}
else if (!strcmp(token,"||")){
place--;
args[ place ] = NULL;
//completed = 0;
if ( state == NORMAL )
create();
else
reset_args();
state = NORMAL;
}
else if (!strcmp(token,";")){
place--;
args[ place ] = NULL;
if ( state == NORMAL )
create();
else
reset_args();
state = NORMAL;
}
else{
}
token = strtok(NULL," \t");
}
if ( state == NORMAL )
create();
else
reset_args();
}
return 0;
}

Resources