reading certain characters with fgetc - c

I am trying to create a spell checking program that takes an input file and makes sure each word is correct by searching through a dictionary file. The problem i am facing is that when i try to take each word seperated by spaces from the input file and put it into a char [] the words with " for some reason print
H0
i1
c0
h1
r2
i3
s4
!5
â0
1
2
h3
o4
w5
w6
â7
8
9
a0
r1
42
e3
y0
o1
u2
.3
the integers are my index
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dict.h"
int main(int argc, char *argv[] ) {
FILE *fdict,*input;
int i;
char ch;
/* the biggest posible word is 30 plus a possible of two " or ' characters and the null character. so the limit of the array is 33*/
char norm[33];
if ( argc < 3 ) /* argc should be 3 for correct execution*/
{
fprintf(stderr,"1 or 2 Files were missing.");
exit(1);
}
if ( argc > 3 ){
fprintf(stderr,"too many Arguments");
exit(1);
}
/* We assume argv[1] and agrv[2] are filenames to open*/
fdict = fopen( argv[1], "r" );/* file pointer for the dictionary file*/
input = fopen( argv[2], "r" );/*file pointer for the input file*/
/* fopen returns NULL on failure */
if ( fdict == NULL ){
fprintf(stderr,"Could not open file: %s\n", argv[1] );/*checks to make sure the dictionary file can be opened*/
exit(1);
}
if ( input == NULL ){
fprintf(stderr,"Could not open file: %s\n", argv[2] );/*checks to make sure the input file can be opened*/
exit(1);
}
/* Read one character at a time from file, stopping at EOF, which
indicates the end of the file. Note that the idiom of "assign
to a variable, check the value" used below works because
the assignment statement evaluates to the value assigned. */
while ( ( ch = fgetc( input ) ) != EOF ) {
char word[33] = "";/* resets the array*/
for ( i = 0; !isspace( ch ) ; i++ ){
word[i] = ch;
printf("%c%d\n",ch,i);/* checking to see what is wrong with the index*/
ch = fgetc( input );
}
}
fclose( fdict );
fclose( input );
return 0;
}
my input looks like:
Hi chris! “howw” are you.

" is not the same as “ nor ”. (3 different quote marks.) Based on different encodings, these 3 characters use various sequences of char to represent them yet code only prints one char at a time.
Suggest just using the simpe quote mark ".
A simple or programmer's text editor would do. Avoid a word processor that may bring in non-ASCII quote marks until your code is ready for that (#n.m.)

Related

Detect if line is the last line of a file, using only one loop?

I am writing a text file parser in C.
I would like to read each line of a text file using fgets, except for the very last line, which I would like to skip.
Also, there is no telling how many characters will be in the file or in the last line, but assume my parser only cares about the first LINEMAXLEN characters in each line.
Currently, the only way I can think to do this is by running two loops, something like the following:
char line[ LINEMAXLEN+1u ];
unsigned int nlines;
unsigned int i;
nlines = 0u;
while ( fgets (line, LINEMAXLEN, file) != NULL )
nlines += 1u;
i = 0u;
while ( fgets (line, LINEMAXLEN, file) != NULL ) {
if ( i >= nlines - 1u )
break;
//...parse the line
i += 1u;
}
But surely, there's got to be a smarter way to do it in only one loop, no?
Instead of using two loops, it would be more efficient to always read two lines in advance and to only process a line once the next line has been sucessfully read. That way, the last line will not be processed.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define LINEMAXLEN 30
//forward function declarations
void process_line( const char *line );
bool read_start_of_line_and_discard_rest( char buffer[], int buffer_size, FILE *fp );
int main( void )
{
FILE *fp;
char lines[2][LINEMAXLEN];
//This index specifies which index in the array "lines"
//represents the newest line. The other index is the
//index of the previous line.
int newest_index = 0;
//attempt to open file
fp = fopen( "input.txt", "r" );
if ( fp == NULL )
{
fprintf( stderr, "Error opening file!\n" );
exit( EXIT_FAILURE );
}
//read first line
if ( !read_start_of_line_and_discard_rest( lines[newest_index], LINEMAXLEN, fp ) )
{
fprintf( stderr, "Error reading first line!\n" );
exit( EXIT_FAILURE );
}
//process one line per loop iteration
for (;;)
{
//swap the index, so that the newest line is now the
//previous line
newest_index = !newest_index;
//read the new line
if ( !read_start_of_line_and_discard_rest( lines[newest_index], LINEMAXLEN, fp ) )
{
//we have reached end-of-file, so we don't process the
//previous line, because that line is the last line
break;
}
//since reading in a new line succeeded, we can be sure that
//the previous line is not the last line, so we can process
//the previous line
//process the previous line
process_line( lines[!newest_index] );
}
//cleanup
fclose( fp );
}
//This function will process a line after it has been read
//from the input file. For now, it will only print it.
void process_line( const char *line )
{
printf( "Processing line: %s\n", line );
}
//This function will read exactly one line of input and remove the
//newline character, if it exists. On success, it will return true.
//If this function is unable to read any further lines due to
//end-of-file, it returns false. If it fails for any other reason, it
//will not return, but will print an error message and call "exit"
//instead.
//If the line is too long to fit in the buffer, it will discard
//the rest of the line and report success.
bool read_start_of_line_and_discard_rest( char buffer[], int buffer_size, FILE *fp )
{
char *p;
//attempt to read one line from the stream
if ( fgets( buffer, buffer_size, fp ) == NULL )
{
if ( ferror( fp ) )
{
fprintf( stderr, "Input error!\n" );
exit( EXIT_FAILURE );
}
return false;
}
//determine whether line was too long for input buffer
p = strchr( buffer, '\n' );
if ( p == NULL )
{
int c;
//discard remainder of line
do
{
c = getchar();
} while ( c != EOF && c != '\n' );
}
else
{
//remove newline character by overwriting it with a null
//character
*p = '\0';
}
return true;
}
For the input
This is line1.
This is line2 which has an additional length longer than 30 characters.
This is line3.
This is line4.
this program has the following output:
Processing line: This is line1.
Processing line: This is line2 which has an ad
Processing line: This is line3.
As you can see, all lines except the last line are being processed, and only the first LINEMAXLEN-1 (30-1 in my example) characters of each line are being processed/stored. The remaining characters are being discarded.
Only LINEMAXLEN-1 instead of LINEMAXLEN characters from each line are being processed/stored because one character is required to store the terminating null character.
This is quite simple to do in a single loop if we use alternating buffers [as others have mentioned].
In the loop below we read a line into the "current" buffer. If not the first line, we process the previous line in the "other" buffer.
By alternating the index into a buffer pool of two buffers, we avoid unnecessary copying.
This introduces a delay in the processing of the buffer. On the last iteration, the last line will be in the current buffer, but it will not be processed.
#define LINEMAXLEN 1000 // line length of buffer
#define NBUF 2 // number of buffers
char lines[NBUF][LINEMAXLEN]; // buffer pool
int previdx = -1; // index of bufs for _previous_ line
int curidx = 0; // index of bufs for _current_ line
char *buf; // pointer to line buffer to process
// read all lines into alternating line buffers
for (; fgets(lines[curidx],LINEMAXLEN,stdin) != NULL;
previdx = curidx, curidx = (curidx + 1) % NBUF) {
// process _previous_ line ...
if (previdx >= 0) {
buf = lines[previdx];
// process line ...
}
}
fgets() will not modify the buffer at all when it reaches EOF, so just read lines until fgets() returns NULL. The last line read will be retained:
#include <stdio.h>
int main( int argc, char **argv )
{
char line[ 1024 ];
FILE *f = fopen( argv[ 1 ], "r" );
if ( NULL == f )
{
return( 1 );
}
for ( ;; )
{
char *p = fgets( line, sizeof( line ), f );
if ( NULL == p )
{
break;
}
}
printf( "last line: %s\n", line );
return( 0 );
}
This relies on the required behavior of fgets():
The fgets function returns s if successful. If end-of-file is encountered and no characters have been read into the array, the contents of the array remain unchanged and a null pointer is returned.
Robust code should check for errors with ferror().
Working that into your text processing is left as an exercise... ;-)

Converting Lower Case Characters in a Text File to Upper Case Letters and Vice Versa

I have a text file that contains the following text, without a new line character...
Hello World
I would like to convert the lower case characters to upper case, and vice versa, so that the same text file would end up with the following text...
hELLOW wORLD
Unfortuntately, when I run my code, it goes into an endless loop. When I step through the code, I see that fseek() goes back one byte for the first loop, as expected, but it goes back two bytes for the second and subsequent loops. I don't understand why it goes back two bytes instead of one. Why is this the case? Can someone please help?
Here's my code...
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
int main()
{
FILE *fp;
int ch;
long offset;
fp = fopen("c:\\users\\domenic\\desktop\\test.txt", "r+");
if (fp == NULL)
{
printf("error: unable to open file\n");
exit(1);
}
offset = ftell(fp);
while (1)
{
ch = fgetc(fp);
if (ch == EOF)
break;
if (isupper(ch))
{
fseek(fp, offset, SEEK_SET);
fputc(tolower(ch), fp);
}
else if (islower(ch))
{
fseek(fp, offset, SEEK_SET);
fputc(toupper(ch), fp);
}
offset = ftell(fp);
}
fclose(fp);
return 0;
}
If I understand you just want to change upper to lower and lower to upper throughout a file, you may be making it a bit harder on yourself than it need be.
Before we look at an approach to make things a bit easier, let's talk about avoiding magic numbers and hardcoded paths within your code. C provides a definition for main that allows you to provide arguments to your code to avoid hardcoding values (such as file/path names) -- use them. The proper invocation of main with arguments is:
int main (int argc, char *argv[])
(or you will see the equivalent int main (int argc, char **argv))
The invocation without arguments is int main (void).
Now on to the question at hand. As mentioned in my comment, when dealing with ASCII, the bit that controls the case is the 6th-bit -- and from the discussion, if you are dealing with EBCDIC, the *case-bitis the 7th-bit. As #chux pointed out both can be handled seamlessly by determining the appropriate bitA ^ afor both (the result is32, e.g.(1 << 5)for ASCII, and64or(1 << 6)for EBCDIC. To toggle any bit on/off you simply XOR the *case-bit* with the current character(A-Za-z). So far any character'c', you wish to toggle the case of, you simply XOR it withA ^ a`, e.g.
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
c ^= A ^ a;
If c was uppercase, it's now lowercase, and vice-versa.
To do that for an entire file, taking the filename to convert as the first argument to the program (or reading from stdin by default if no argument is given) and outputting the resulting case-converted to stdout, you could do something as simple as the following:
#include <stdio.h>
int main (int argc, char **argv) {
int c;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while ((c = fgetc(fp)) != EOF) /* read each char */
/* is it a letter ? */
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
putchar (c ^ ('A' ^ 'a')); /* toggle case */
else
putchar (c); /* just output */
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
Example Input
$ cat dat/captnjack.txt
This is a tale
Of Captain Jack Sparrow
A Pirate So Brave
On the Seven Seas.
Example Use/Output
$ ./bin/case_toggle < dat/captnjack.txt
tHIS IS A TALE
oF cAPTAIN jACK sPARROW
a pIRATE sO bRAVE
oN THE sEVEN sEAS.
If you want to write the output to a new file, simply redirect the output, e.g.
$ ./bin/case_toggle < dat/captnjack.txt > dat/captnjack_toggled.txt
Which would write the case-toggled output to dat/captnjack_toggled.txt.
Look things over and let me know if you have further questions.
To start off, fputc does not delete or rather act like an "insert" after you use fseek to go back 1 character.
In this case you are:
getting a char
fseek to before the char
placing a upper/lowercase char before the char found in step 2
setting new offset to right after your new character.
You probably get alot of hEEEEEEEEEEEEEEEEEE in your text file after exiting your endless loop?
To fix this I would create a temporary new file.. something like this:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
int main()
{
FILE *fp, *new_f;
int ch;
long offset;
fp = fopen("test.txt", "r+");
new_f = fopen("test2.txt", "w" );
if ( fp == NULL || new_f == NULL )
{
printf("error: unable to open file\n");
exit(1);
}
offset = ftell(fp);
while (1)
{
ch = fgetc(fp);
if (ch == EOF)
break;
if( !isalpha( ch ) )
{
fputc( ch, new_f );
}
else if (isupper(ch))
{
fputc(tolower(ch), new_f );
}
else if (islower(ch))
{
fputc(toupper(ch), new_f);
}
}
fclose( fp );
fclose( new_f );
return 0;
}

Segmentation Fault on my while loop

I am trying to count the number of lines and characters whatever they may be in a file that I specify from argv. But I get a segmentation fault when I hit the while loop for some reason. The program runs fine without the while loop, though it only goes through once.
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
if(argc != 2) {
return 0;
}
FILE *fp;
char c;
int lines = 0;
int chs = 0;
fp = fopen(argv[1], "r");
//Segmentation Fault happens here on the while loop
while((c = fgetc(fp)) != EOF) {
if(c == '\n') {
lines += 1;
}
else {
chs += 1;
}
}
printf("Charaters: %d\n", chs);
printf("lines: %d\n", lines);
if(fp){
fclose(fp);
}
return 0;
}
Your code needs to be follow Idiomatic C more closely.
You should validate fopen immediately, instead of after you've already attempted to use fp.
fgetc returns int, not char. This is because it needs to return side-channel information about the status of the stream (i.e. EOF), this information cannot be represented by char, but you can safely cast the int value to char if the value is not EOF.
Your code treats \r as a regular character when it is commonplace for \r\n to represent a line-break (not just a solitary \n), you might want to consider how you handle different character classes.
Your program does not handle non-trivial encodings (i.e. it will only correctly handle files in your system's native encoding, presumably ASCII). You should use a Unicode library to correctly read individual characters from a file: for example your program will treat a surrogate-pair in UTF-8 as two characters instead of 1, and would incorrectly count UTF-16 files.
Better:
FILE* fp = fopen( argv[1], "r" );
if( !fp ) {
printf( "Could not open file \"%s\" for reading.\r\n", argv[1] );
return 1;
}
int lines = 0;
int chars = 0;
int nc;
while( ( nc = fgetc( fp ) ) != EOF ) {
char c = (char)nc;
if ( c == '\n' ) lines++;
else if( c != '\r' ) chars++;
}
printf( "Characters: %d\r\nLines: %d\r\n", chars, lines );
fclose( fp );
return 0;

fscanf not reading the file correctly ~ problems with reading of bytes in HEX format

I am currently trying to do something that I have done dozens of times in C++, but this is my first time doing it in C.
I have a huge text file with 3 columns: hexNumber unsignedChar int
adam 38 1
john 39 1
sara 3a 1
frank 3b 0
Christopher 3c 0
kate 3d 0
However, like I said the file is huge, and the whitespace between then varies for some reason. I don't know, I didn't make the file. The way I understand it fscanf is delimited by whitespace so any amount should be fine, right?
I'm trying to read it into an array of structs, and here is my code:
typedef struct node {
unsigned char myHex;
char* myString;
int myInt;
} node;
void foo(bar* c){
if( c == NULL )
return;
struct node nArr[205] ;
//read in opcode information
FILE *fp;
fp = fopen( "input.txt" , "r" );
if ( fp == NULL ) {
fprintf( stderr, "Can't open input file file.txt!\n");
exit(-1);
}
int it = 0;
while( !feof( fp ) ){
fscanf( fp , "%s%s%d\n" , nArr[it].myString ,
&nArr[it].myHex , &nArr[it].myInt );
}
...
However, when I read in it just floods me with whitespace. Print out shows:
myHex:
myInt: 0
myString: (null)
For reading hexadecimal integers, %x format specifier should be used. Also note that the man page of fscanf says about %x that: "pointer must be a pointer to unsigned int." Thus you should change:
while( !feof( fp ) ){
fscanf( fp , "%s%s%d\n" , nArr[it].myString ,
&nArr[it].myHex , &nArr[it].myInt );
}
to:
unsigned int hexVal = 0;
for( ; fscanf(fp , "%s %2x %d\n",
nArr[it].myString,
&hexVal,
&nArr[it].myInt) == 3 ; ++it) {
nArr[it].myHex = hexVal;
}
and in your struct, change:
char* myString;
to
char myString[MAX_LEN];
where MAX_LEN is the maximum length that might appear in your input.
I don't think you've done it as often as you suggest: You are reading a string into an unsigned char - that's not going to fit (in C or C++). You appear to be reading a string into an uninitialized pointer - that's not going to work (in C or C++).

Replace string with another

I am just not sure why my replaceWord isn't going in to the file at all i have used all the commented out and so on and so forth. I am just trying to replace with with the text received from the command line argument. I know i might be far off I was just looking for a relatively easy way to do it.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main ( int argc, char *argv[] )
{
if ( argc != 4 ) /* argc should be 2 for correct execution */
{
/* We print argv[0] assuming it is the program name */
printf( "usage: %s filename\n", argv[0] );
}
else
{
// We assume argv[1] is a filename to open
char* wordReplace = argv[1];
char* replaceWord = argv[2];
FILE *file = fopen( argv[3], "r" );
/* fopen returns 0, the NULL pointer, on failure */
if ( file == 0 )
{
printf( "Could not open file\n" );
}
else
{
char string[100];
int len = 0;
/* read one character at a time from file, stopping at EOF, which
indicates the end of the file. Note that the idiom of "assign
to a variable, check the value" used below works because
the assignment statement evaluates to the value assigned. */
while ( (fscanf( file, "%s", string ) ) != EOF )
{
len = strlen(string);
printf( "%s\n", string );
if(strcmp(string, wordReplace) == 0){
//fseek (file, (-strlen(string) + 1), 1);
//fputc(*replaceWord,file);
//replaceWord++;
//strcpy(string, replaceWord);
fprintf(file,"%s",replaceWord);
fputs(replaceWord, file);
printf("\n%d\n", len);
}
}
fclose( file );
}
}
printf("\n");
return 0;
}
You've opened the file in r ie read mode and trying to write to it.
Also after correcting that, note that, the replaced word and word to be replaced have to be of the same size, if you want to replace the file in place. Else you will end up overwriting other data. And you need to use functions like fseek to reposition the internal file pointer as fp would have moved ahead after fscanf

Resources