Reading word by word from text file in C - c

I am very new to C. I am trying to read the words from a file which contains lots of not alpha characters. My input file looks something like this %tOm12%64ToMmy%^$$6 and I want to read tom first and then put tom in my data structure and then read tommy and put that in my data structure all in lowercase. This is what I have tried until now. All my other code works as I have manually sent the parameters to the methods and there are no errors. This is what I have tried to read the words from the file. A word can be of 100 characters max. Can someone help me understand the logic and possibly this code.I am very lost.Thank You!
void read(FILE *fp)
{
FILE *fp1 = fp;
char word[100];
int x;
int counter = 0;
while ((x = fgetc(fp1)) != EOF)
{
if (isalpha(x) == 0)
{
insert(&tree,word);
counter = 0;
}
if (isalpha(x) != 0)
{
tolower(x);
word[counter] = x;
counter++;
}
}
rewind(fp1);
fclose(fp1);
}

char *getWord(FILE *fp){
char word[100];
int ch, i=0;
while(EOF!=(ch=fgetc(fp)) && !isalpha(ch))
;//skip
if(ch == EOF)
return NULL;
do{
word[i++] = tolower(ch);
}while(EOF!=(ch=fgetc(fp)) && isalpha(ch));
word[i]='\0';
return strdup(word);
}
void read(FILE *fp){
char *word;
while(word=getWord(fp)){
insert(&tree, word);
}
//rewind(fp1);
fclose(fp);
}

This is a simplification of #BLUEPIXY 's answer. It also checks the array boundaries for word[]
char *getword(FILE *fp)
{
char word[100];
int ch;
size_t idx ;
for (idx=0; idx < sizeof word -1; ) {
ch = fgetc(fp);
if (ch == EOF) break;
if (!isalpha(ch)) {
if (!idx) continue; // Nothing read yet; skip this character
else break; // we are beyond the current word
}
word[idx++] = tolower(ch);
}
if (!idx) return NULL; // No characters were successfully read
word[idx] = '\0';
return strdup(word);
}

Related

Counting chars, words and lines in a file

I try to count the number of characters, words, lines in a file.
The txt file is:
The snail moves like a
Hovercraft, held up by a
Rubber cushion of itself,
Sharing its secret
And here is the code,
void count_elements(FILE* fileptr, char* filename, struct fileProps* properties) // counts chars, words and lines
{
fileptr = fopen(filename, "rb");
int chars = 0, words = 0, lines = 0;
char ch;
while ((ch = fgetc(fileptr)) != EOF )
{
if(ch != ' ') chars++;
if (ch == '\n') // check lines
lines++;
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\0') // check words
words++;
}
fclose(fileptr);
properties->char_count = chars;
properties->line_count = lines;
properties->word_count = words;
}
But when i print the num of chars, words and lines, outputs are 81, 18, 5 respectively
What am i missing?
(read mode does not changes anything, i tried "r" as well)
The solution I whipped up gives me the same results as the gedit document statistics:
#include <stdio.h>
void count_elements(char* filename)
{
// This can be a local variable as its not used externally. You do not have to put it into the functions signature.
FILE *fileptr = fopen(filename, "rb");
int chars = 0, words = 0, lines = 0;
int read;
unsigned char last_char = ' '; // Save the last char to see if really a new word was there or multiple spaces
while ((read = fgetc(fileptr)) != EOF) // Read is an int as fgetc returns an int, which is a unsigned char that got casted to int by the function (see manpage for fgetc)
{
unsigned char ch = (char)read; // This cast is safe, as it was already checked for EOF, so its an unsigned char.
if (ch >= 33 && ch <= 126) // only do printable chars without spaces
{
++chars;
}
else if (ch == '\n' || ch == '\t' || ch == '\0' || ch == ' ')
{
// Only if the last character was printable we count it as new word
if (last_char >= 33 && last_char <= 126)
{
++words;
}
if (ch == '\n')
{
++lines;
}
}
last_char = ch;
}
fclose(fileptr);
printf("Chars: %d\n", chars);
printf("Lines: %d\n", lines);
printf("Words: %d\n", words);
}
int main()
{
count_elements("test");
}
Please see the comments in the code for remarks and explanations. The code also would filter out any other special control sequences, like windows CRLF and account only the LF
Your function takes both a FILE* and filename as arguments and one of them should be removed. I've removed filename so that the function can be used with any FILE*, like stdin.
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
typedef struct { /* type defining the struct for easier usage */
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
/* a helper function to print the content of a fileProps */
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %ju\n"
"words %ju\n"
"lines %ju\n",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
char ch;
while((ch = fgetc(fileptr)) != EOF) {
++properties->char_count; /* count all characters */
/* use isspace() to check for whitespace characters */
if(isspace((unsigned char)ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
}
int main() {
fileProps p;
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}
Output for the file you showed in the question:
chars 93
words 17
lines 4
Edit: I just noticed your comment "trying to count only alphabetical letters as a char". For that you can use isalpha and replace the while loop with:
while((ch = fgetc(fileptr)) != EOF) {
if(isalpha((unsigned char)ch)) ++properties->char_count;
else if(isspace((unsigned char)ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
Output with the modified version:
chars 74
words 17
lines 4
A version capable of reading "wide" characters (multibyte):
#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
typedef struct {
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %ju\n"
"words %ju\n"
"lines %ju\n",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
wint_t ch;
while((ch = fgetwc(fileptr)) != WEOF) {
if(iswalpha(ch)) ++properties->char_count;
else if(iswspace(ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
}
int main() {
setlocale(LC_ALL, "sv_SE.UTF-8"); // set your locale
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
fileProps p;
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}
If the_file.txt contains one line with öäü it'll report
chars 3
words 1
lines 1
and for your original file, it'd report the same as above.

How to check if there is no content inside a file, and how to avoid reading the last character from a file?

I have used comma as a separator after every string
fp=fopen("log.dat","ab");
fprintf(fp,"%s%c",enteredUsername,',');
fclose(fp);
Reading from file:
fp=fopen("log.dat","rb");
int c;
while ((c = fgetc(fp)) != EOF)
printf("%c", c);
How can I:
1.Avoid reading the final character inside the file,
2.Check if file is empty?
You can use a one-character buffer like this:
#include <stdio.h>
int main(void)
{
FILE *fp = fopen("log.dat", "rb");
if(fp == NULL) return;
int prev = -1000;
int c;
while ((c = fgetc(fp)) != EOF) {
if(prev >= 0) {
printf("%c", prev);
}
prev = c;
}
fclose(fp);
return 0;
}
Input log.dat
one,two,three,
Output
one,two,three
Why you want to avoid reading the final character? You can use the EOF in order to check if a file is empty.
if( (c= fgetc(fp)) == EOF){
//do stuff
}
else{
printf("File is empty\n")
return 0;
}
That worked for me.
#include <stdio.h>
int main(void) {
unsigned counter;
for( counter=0; 1; counter++) {
int ch;
char lastchar;
ch = getc(stdin);
if (ch == EOF) break;
if (counter) putc( lastchar, stdout);
lastchar = ch;
}
if (!counter) fprintf(stderr,"File was empty\n" );
else fprintf(stderr,"Read %u characters.\n", counter );
return 0;
}
// Test it with:
// echo -n "a,b,c," >bagger
// ./a.out <bagger

How to write text from file to a string in C

I want to write code were the user is asked to write the name of a file. Then I want to analyze the file's content for a symbol, let's say 'e'.
My problem is that I don't know how to start analyzing the file the correct way so that the content can be checked.
int main() {
char c[1000], file_name[1000];
int i;
int s = 0;
FILE *fp;
printf("Enter the name of file you wish to see\n");
gets(file_name);
if ((fp = fopen(file_name, "r")) == NULL){
printf("Error! opening file");
exit(1);
}
if (fp) {
while (fscanf(fp, "%s", c) != EOF) {
printf("%s", c);
}
fclose(fp);
for (i = 0; c[i] != '\0'; ++i) {
puts(c);
if (c[i] == 'e') {
++s;
}
}
printf("\nWhite spaces: %d", s);
_getche();
return 0;
}
}
char line[512]; /*To fetch a line from file maximum of 512 char*/
rewind(fp);
memset(line,0,sizeof(line)); /*Initialize to NULL*/
while ( fgets(line, 512, fp ) && fp !=EOF)
{
/*Suppose u want to analyze string "WELL_DONE" in this fetched line.*/
if(strstr(line,"WELL_DONE")!=NULL)
{
printf("\nFOUND KEYWOD!!\n");
}
memset(line,0,sizeof(line)); /*Initialize to null to fetch again*/
}
If its just a symbol you're looking for, or a char, you can simply use getc() :
int c;
....
if (fp) {
while ((c = getc(fp)) != EOF) {
if (c == 'e') {
// Do what you need
}
}
Or, alternatively, if it's a word you're looking for, fscanf() will do the job:
int c;
char symb[100];
char symbToFind[] = "watever"; // This is the word you're looking for
....
while ((c = fscanf(fp, %s, symb)) != EOF) {
if (strcmp(symb, symbToFind) == 0) { // strcmp will compare every word in the file
// do whatever // to symbToFind
}
}
These alternatives will allow you to search every char or string in the file, without having to save them as an array.

Count paragraph in file

i am working in file system in that i am counting paragraph from the file but
i am not getting please suggest me how can i do that i tried this but not getting what i want
int main()
{
FILE *fp=fopen("200_content.txt ","r");
int pCount=0;
char c;
while ((c=fgetc(fp))!=EOF)
{
if(c=='\n'){pCount++;}
else{continue;}
}
printf("%d",pCount);
return 0;
}
You should declare c as int instead of char.
Also, remember to fclose(fp); before main() returns.
A paragraph contains two subsequent '\n's, use a variable for counting the two '\n's, like this,
int main()
{
FILE *fp=fopen("200_content.txt ","r");
int pCount=0;
char c;
int newln_cnt=0;
while ((c=fgetc(fp))!=EOF)
{
if(c=='\n')
{
newln_cnt++;
if(newln_cnt==2)
{
pCount++;
newln_cnt=0;
}
}
else{continue;}
}
printf("%d",pCount);
return 0;
}
You code counts the number of newline '\n' characters, not empty line which demarcates the paragraphs. Use fgets to read lines from the file. I suggest this -
#include <stdio.h>
// maximum length a line can have in the file.
// +1 for the terminating null byte added by fgets
#define MAX_LEN 100+1
int main(void) {
char line[MAX_LEN];
FILE *fp = fopen("200_content.txt", "r");
if(fp == NULL) {
printf("error in opening the file\n");
return 1;
}
int pcount = 0;
int temp = 0;
while(fgets(line, sizeof line, fp) != NULL) {
if(line[0] == '\n') {
// if newline is found and temp is 1 then
// this means end of the paragraph. increase
// the paragraph counter pcount and set temp to 0
if(temp == 1)
pcount++;
temp = 0;
}
else {
// if a non-empty line is found, this means
// the start of the paragraph
temp = 1;
}
}
// if the last para doesn't end with empty line(s)
if(temp == 1)
pcount++;
printf("number of para in the file is %d\n", pcount);
return 0;
}
For starters, I assume that you consider a new line to be a new paragraph.
i.e.
This is line 1.
This is line 2.
has 2 paragraphs.
What your code does is neglect the case where there is an EOF and not a newline character (\n) after This is line 2.
One way to fix this is to use an extra char variable.
int main()
{
FILE *fp=fopen("200_content.txt ","r");
int pCount=0;
char c; // char that checks
char last_c; //record of the last character read in the loop
while ((c=fgetc(fp))!=EOF)
{
if(c=='\n'){pCount++;}
last_c = c;
else{continue;} //this line is redundant. You can remove it
}
if (last_c != '\n') pCount++; //if EOF at the end of line and not '\n'
printf("%d",pCount);
return 0;
}
void analyze_file(const char *filename) {
FILE* out_file;
out_file = fopen(filename,"r");
int size;
if(out_file == NULL)
{
printf("Error(analyze_file): Could not open file %s\n",filename);
return;
}
fseek(out_file,0,SEEK_SET);
char ch,ch1;
int alpha_count = 0,num_count = 0,non_alnum =0,charac=0;
int word_count =0,line=0;
int para=0;
while(!feof(out_file))
{
ch = fgetc(out_file);
if (isalpha(ch))
alpha_count++;
else if(isdigit(ch))
num_count++;
else if(!isalnum(ch) && ch!='\n' && !isspace(ch))
++non_alnum;
else if(ch=='\n')
{ line++;
ch1 = fgetc(out_file);// courser moves ahead , as we read
fseek(out_file,-1,SEEK_CUR); // bringing courser back
}
else if(ch == ch1)
{para++; //paragraph counter
word_count--;
}
if(ch==' '||ch=='\n')
{
word_count++;
}
if(ch==EOF)
{
word_count++;line++;para++;
}
}
non_alnum -=1;// EOF character subtracted.
charac = alpha_count + non_alnum + num_count;
fclose(out_file);
printf("#Paragraphs = %d\n",para);
printf("#lines = %d\n",line);
printf("#Words = %d\n",word_count);
printf("#Characters = %d\n",charac);
printf("Alpha = %d\n",alpha_count);
printf("Numerical = %d\n",num_count);
printf("Other = %d\n",non_alnum);
printf("\n");
return;
}

How to count blank lines from file in C?

So what I'm trying to do is to count blank lines, which means not only just containing '\n'but space and tab symbols as well. Any help is appreciated! :)
char line[300];
int emptyline = 0;
FILE *fp;
fp = fopen("test.txt", "r");
if(fp == NULL)
{
perror("Error while opening the file. \n");
system("pause");
}
else
{
while (fgets(line, sizeof line, fp))
{
int i = 0;
if (line[i] != '\n' && line[i] != '\t' && line[i] != ' ')
{
i++;
}
emptyline++;
}
printf("\n The number of empty lines is: %d\n", emptyline);
}
fclose(fp);
You should try and get your code right when posting on SO. You are incrementing both i and emptyline but the use el in your call to printf(). And then I don't know what that is supposed to be in your code where it has }ine. Please, at least make an effort.
For starters, you are incrementing emptyline for every line because it is outside of your if statement.
Second, you need to test the entire line to see if it contains any character that is not a whitespace character. Only if that is true should you increment emptyline.
int IsEmptyLine(char *line)
{
while (*line)
{
if (!isspace(*line++))
return 0;
}
return 1;
}
Before getting into the line loop increment the emptyLine counter and if an non whitespace character is encountred decrement the emptyLine counter then break the loop.
#include <stdio.h>
#include <string.h>
int getEmptyLines(const char *fileName)
{
char line[300];
int emptyLine = 0;
FILE *fp = fopen("text.txt", "r");
if (fp == NULL) {
printf("Error: Could not open specified file!\n");
return -1;
}
else {
while(fgets(line, 300, fp)) {
int i = 0;
int len = strlen(line);
emptyLine++;
for (i = 0; i < len; i++) {
if (line[i] != '\n' && line[i] != '\t' && line[i] != ' ') {
emptyLine--;
break;
}
}
}
return emptyLine;
}
}
int main(void)
{
const char fileName[] = "text.txt";
int emptyLines = getEmptyLines(fileName);
if (emptyLines >= 0) {
printf("The number of empty lines is %d", emptyLines);
}
return 0;
}
You are incrementing emptyline on every iteration, so you should wrap it in an else block.
Let's think of this problem logically, and let's use functions to make it clear what is going on.
First, we want to detect lines that only consist of whitespace. So let's create a function to do that.
bool StringIsOnlyWhitespace(const char * line) {
int i;
for (i=0; line[i] != '\0'; ++i)
if (!isspace(line[i]))
return false;
return true;
}
Now that we have a test function, let's build a loop around it.
while (fgets(line, sizeof line, fp)) {
if (StringIsOnlyWhitespace(line))
emptyline++;
}
printf("\n The number of empty lines is: %d\n", emptyline);
Note that fgets() will not return a full line (just part of it) on lines that have at least sizeof(line) characters.

Resources