C check string's from a text file - c

I am new at c and I am writing a code that get a string from the user and compare it to a strings from the text file and my code is only working when I compare between two characters and when I compare between two strings it's not working If someone know how can I fix the problem it's will be verey helpful. the compare line is in the searchFile function. the text file is a csv file so insted compare the char to string I need to compare between what befor the , to the string_to_search. example for csv file at the end of the code
Example: string to search = 'c' work, string to search 'name' doesn't work
#include <stdio.h>
#define STR_LEN 100
int searchFile(char* string_to_search, char* path);
int main(int argc, char* argv[])
{
FILE* text_file = 0;
int found = 0, choice = 0;
char string_to_search[STR_LEN] = {0};
if (!(fopen(argv[1], "r") == NULL)) //check if file exists
{
do
{
printf("Please enter your choice:\n");
printf("1 - Search a term in the document.\n");
printf("2 - change a value in a specific place.\n");
printf("3 - copy a value from one place to another\n");
printf("4 - Exit\n");
scanf("%d", &choice);
getchar();
switch (choice)
{
case 1:
fgets(string_to_search, STR_LEN, stdin);
string_to_search[strcspn(string_to_search, "\n")] = 0;
found = searchFile(string_to_search, argv[1]); //found = where the string line
if (found != 0)
printf("Value was found in row %d\n", found);
else
printf("Value Wasn't Found\n");
}
}while(choice != 4);
}
else
{
printf("file does not exists\n");
}
getchar();
return 0;
}
int searchFile(char* string_to_search, char* path)
{
FILE* file = fopen(path, "r");
char ch = ' ';
int i = 0, len = 0, count = 1;
fseek(file, 0, SEEK_END);
len = ftell(file);
fseek(file, 0, SEEK_SET);
len = len - 2;
char* string = (char*)malloc(sizeof(char) * len);
do //copying the chars to a string
{
ch = fgetc(file);
string[i] = ch;
i++;
} while (ch != EOF);
fclose(file);
for (i = 0; i < len; i++)
{
if (string[i] == *string_to_search) //the compare
{
free(string);
return count;
}
if (string[i] == '\n')
{
count++;
}
}
free(string);
return 0;
}
Example for a CSV file:
roee,itay,3,4
5,6,7,8
a,b,c,d
e,f,g,h

You have to change the following line:
if (string[i] == *string_to_search) //the compare
into
if (string[i] == string_to_search[i]) //the compare
The problem is that *string_to_search always refers to the first character of string_to_search. With the [i] you will get the nth character of the string as you have done it for the variable string. So as you noticed it works for a comparsion of two characters but not for two strings, because on a string you will always compare with the first character of string_to_search. For example if you want to compare "aaa" it will also work.
But as noted in the comment section you may also want to use strcmp() instead of the loop. There you will also have to pass string_to_search and not *string_to_search, because you want to pass the pointer to the string and not a single character.

Related

C/C90/Counting words in large text file

I have a text file which consists of about 30000 words. My goal is to count the actual number of the words (keep in mind that multiple punctuation marks and consecutive spaces are included, as well as words connected with - (for example three-legged), so counting just the spaces isn't correct).
I have managed to count the total characters but I am struggling with the words.
Any help?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 50
char *getfile(void);
void stats(char *filename);
int main() {
char *file;
file = getfile();
stats(file);
return 0;
}
char *getfile(void) {
char *filename;
FILE *fp;
filename = malloc(SIZE);
printf("Enter the name of the text file: ");
scanf("%49s", filename);
fp = fopen(filename, "r");
printf("\n");
if (fp == NULL) {
printf("The entered file does not exist.");
printf("\n");
} else {
printf("The file exists.");
fclose(fp);
}
return filename;
}
void stats(char *filename) {
int cnt = 0, space = 0, lines = 0;
int c;
int count = 0;
FILE *fp;
fp = fopen(filename, "r");
while (((c = fgetc(fp)) != EOF)) {
cnt++;
if (c == ' ') {
space++;
}
if (c == '\n' || c == '\0') {
lines++;
}
}
printf("\nTotal characters in file: %d", cnt);
printf("\nTotal characters (excluding spaces) in file: %d", cnt - space);
fclose(fp);
return;
}
You should make a list of all the chars that can separate between words, and count every sequence of separating characters.
The reason you are having trouble is you have no state. That is, classifying context about what came before. You can use other methods to break the file into words, but a state-machine is simple and fast. As suggested in the comments and by other answers, you need two states, a white-space came before, and a word character came before. It's sort of like the one-bit derivative, with rising edge, white-space space to word, as a the thing you count.
Stripping off most of the extraneous stuff, this might be how you do a state machine.
#include <stdio.h>
int main(void) {
unsigned char buf[16384 /*50*/]; /* 50 is small. */
enum { WHITE, WORD } state = WHITE;
size_t cnt = 0, lines = 0, words = 0, nread, i;
do { /* Fill `buf`. */
nread = fread(buf, 1, sizeof buf, stdin);
if(ferror(stdin)) { perror("wc"); return 1; }
cnt += nread;
for(i = 0; i < nread; i++) { /* Char-by-char in `buf`. */
unsigned char c = buf[i];
/* https://en.cppreference.com/w/cpp/string/byte/isspace */
switch(c) {
case '\n':
lines++; /* Fall-though. Doesn't handle CRs properly. */
case '\0': case ' ': case '\f': case '\r': case '\t': case '\v':
state = WHITE;
break;
default:
if(state == WORD) break;
state = WORD;
words++;
break;
}
}
} while(nread == sizeof buf);
printf("Total characters in file: %lu\n", (unsigned long)(cnt - lines));
printf("Total lines in file: %lu\n", (unsigned long)lines);
printf("Total words in file: %lu\n", (unsigned long)words);
return 0;
}
I off-loaded some work on the hosted-environment for brevity, ./wc < file.txt and I used a buffer.

Find number of occurrences for the substring in a string using C programming

I am trying a program in c to read a text file that contains array of characters or a string and find the number of occurrences of the substring called "GLROX" and say sequence found when it is found. And the "inputGLORX.txt" contains following string inside it.
GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX
But i am getting wierd results. It would be great if some expert in C-programming helps me to solve this and thanks in advance.
#include <stdio.h>
#include <conio.h>
#include <string.h>
#define NUMBER_OF_STRINGS 40
#define MAX_STRING_SIZE 7
void seqFound()
{
printf("Sequence Found\n");
}
int main()
{
FILE *fp;
char buff[1000];
char strptrArr[NUMBER_OF_STRINGS] [MAX_STRING_SIZE];
const char *search = "GLROX";
fp = fopen("D:/CandC++/inputGLORX.txt", "r");
if(fp==NULL)
printf("It is a null pointer");
while(!feof(fp))
{
//fscanf(fp, "%s", buff);
fgets(buff, 1000,fp);
}
int len = strlen(buff);
printf("length is %d\n",len);
int count = 0;
char *store;
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++;
}
printf("count is %d\n",count);
while (count!=0) {
seqFound();
count--;
}
return 0;
}
As said in the comment, their are at least 2 problems in the code: your fgets will only fetch the last line (if it fetch one at all ? In any case, this is not what you want), and you are incrementing the search string instead of the buff string.
Something like this should fix most of your problems, as long as no lines in your file are longer than 999 characters. This will not work properly if you use the \n or NULL characters in your search string.
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
Here is a main for testing. I used argv to provide the input.txt and the search string.
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *fp;
char buff[1000];
char *search;
if (argc < 3)
return (-1);
search = argv[2];
if (search[0] == '\0')
return (-1);
if ((fp = fopen(argv[1], "r")) == NULL)
return (-1);
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
printf("Match found: %d\n", count);
return 0;
}
The way you search in buff is wrong, i.e. this code:
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++; // <------- ups
}
When you have a hit, you change search, i.e. the string you are looking for. That's not what you want. The search string (aka the needle) shall be the same all the time. Instead you want to move forward in the buffer buff so that you can search in the remainder of the buffer.
That could be something like:
int main()
{
const char* buff = "GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX";
const char* search = "GLROX";
const char* remBuff = buff; // Pointer to the remainder of buff
// Initialized to be the whole buffer
const char* hit;
int cnt = 0;
while((hit = strstr(remBuff, search))) // Search in the remainder of buff
{
++cnt;
remBuff = hit + 1; // Update the remainder pointer so it points just 1 char
// after the current hit
}
printf("Found substring %d times\n", cnt);
return 0;
}
Output:
Found substring 15 times

Problems making a spell checker, I cant remove the Punctuation

Spell-Checker:
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "search.h"
int main(void)
{
FILE *f = fopen("mistakes.txt", "w");
FILE *fPointer;
FILE *dictionary;
//int a,b;
char fname[20];
char wordcheck[45];
char worddict[45];
char dummy;
int i;
int notfound;
int k;
This is the files i am using including the dictionary file that has the words
i compare to the other text file
///^below add text file to be read
fPointer = fopen("fname", "r");
dictionary = fopen("dictionary.txt","r");
if (f == NULL)
{
printf("Error oping file!\n");
exit(1);
}
I have created this loop to make the words lower cased and identify the punctuation
while(fscanf(fPointer,"%s", wordcheck)!=EOF)// makes all characters lower case
{ // an array that contains the characters of the original word
char wordChars[1000];
int newwordlength = 0;
// loops through each character in a word
for (i=0; i < strlen(wordcheck); i++)
{
// makes all words lower cased
wordcheck[i] = tolower(wordcheck[i]);
// checks the word has these characters and increments newwordlength where the character is a letter
if (wordChars[i] != ','
&& wordChars[i] != '.'
&& wordChars[i] != '?'
&& wordChars[i] != '!'
&& wordChars[i] != ':')
{ // how long the word will be without these characters
newwordlength++;
}
}
// creates new character array with size newwordlength
char newWordChars[1000];
*I think my problem is in this part*
int currChar = 0;
**This is where i think my problem is as its not removing the the characters **
// loops through these characters and only adds non-punctuation characters to the new character array
for (k = 0; k < strlen(wordChars); k++)
{
if (wordChars[k] != ','
&& wordChars[k] != '.'
&& wordChars[k] != '?'
&& wordChars[k] != '!'
&& wordChars[k] != ':')
{
newWordChars[currChar] = wordChars[k];
currChar++;
}
}
When I use this loop, I copy the mistakes into a text file but it copies the punctuation as well
fseek(dictionary,0,SEEK_SET);
while(fscanf(dictionary,"%s", worddict)!=EOF)
{
notfound = 1;
if(strcmp(wordcheck, worddict)==0)// compares strings//
{
printf("This word: %s is in the dictionary\n", wordcheck);
notfound = 0;
break;
}
}
if(notfound == 1)
This is where it copies the txt file.
{
printf("%s is not in the dictionary\n", wordcheck);
const char *mistakes = "Write this to file";
fprintf(f, "Words not in the dictionary: %s\n", wordcheck);
}
printf("Your file has been checked. Please check your errors by typing mistakes.txt to view ");
}
fclose(dictionary);
fclose(fPointer);
return 0;
}

Returning the nth word in a file

I have a file with a bunch of words (separated by spaces). I'm trying to get the nth word.
I'm looping through each character of the file. I count the number of words by adding 1 to a counter when it gets to a space. If the counter value is equal to n (i.e. it's at the word I want), I want add the current character to the char array. Since n is an int, I use sprintf to convert to a char and then use strncat to add the letter to the word.
Here's the code:
int n;
int count = 1;
char word[100];
char converted_char[32];
while ((n = fgetc(file)) != EOF) {
if ((n) == ' ')
count++;
if ((count) == wordNumber)
{
sprintf(converted_char, "%d", n);
strncat(word, converted_char, 1);
}
}
printf("The word is: %s", word);
The problem is, the word is returned an an int. I tried replacing %s with %c which gave me an error. What am I doing wrong?
As well, I'm open to suggestions of better ways to do this.
try this:
#include <stdio.h>
int main(void){
int wordNumber = 3;
FILE *file = fopen("data.txt", "r");
int count = 0;
char word[100];
while (fscanf(file, "%99s", word) != EOF) {
if(++count == wordNumber){
printf("The word is: %s\n", word);
break;
}
}
fclose(file);
return 0;
}

Exercise for exam about comparing string on file

I'm trying to do this exercise for my exam tomorrow.
I need to compare a string of my own input and see if that string is appearing on the file. This needs to be done directly on the file, so I cannot extract the string to my program and compare them "indirectly".
I found this way but I'm not getting it right, and I don't know why. The algorithm sounds good to me.
Any help, please? I really need to focus on this one.
Thanks in advance, guys.
#include<stdio.h>
void comp();
int main(void)
{
comp();
return 0;
}
void comp()
{
FILE *file = fopen("e1.txt", "r+");
if(!file)
{
printf("Not possible to open the file");
return;
}
char src[50], ch;
short i, len;
fprintf(stdout, "What are you looking for? \nwrite: ");
fgets(src, 200, stdin);
len = strlen(src);
while((ch = fgetc(file)) != EOF)
{
i = 0;
while(ch == src[i])
{
if(i <= len)
{
printf("%c - %c", ch, src[i]);
fseek(file, 0, SEEK_CUR + 1);
i++;
}
else break;
}
}
}
Your logic after matching the first character looks suspect. There's no need to seek in the file, you need to read more content to try to match the later bytes from src and resetting i on each iteration prevents you from checking later characters from src.
The following (untested) code should be closer to the mark
i = 0;
while((ch = fgetc(file)) != EOF) {
if (ch != src[i]) {
i = 0;
}
else if (++i >= len) {
printf("found %s in file\n", src);
break;
}
}
It relies on repeated calls to fgetc rather than fseek and only resets the index into src when a character doesn't match.
Note also that
char src[50];
fgets(src, 200, stdin);
is slightly wrong. It tells fgets that it can write up to 200 chars to src. Writing any more than 50 will write beyond the memory allocated for src, with undefined consequences. You should change this to
char src[50];
fgets(src, sizeof(src), stdin);
while((ch = fgetc(file)) != EOF)
{
if(ch != compare[i]){
i = 0;
}
else{
i++;
}
if(i >= strlen(compare){
printf("we have a match");
break;
}
}

Resources