Counting the occurrence of words in a file - c

My wordOccurrences don't work if you read a file. They can appear twice in the counting so it doesn't count correctly but if I input from stdin it counts correctly.
So I have to read in a file (-i input.txt) count the words and word occurrences in that file. Output the results in the specific file that is given with -o output.txt. If there is a -c it should ignore punctuation and convert to lowercase
MAIN
#include "count.h"
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
//#include "wordOccurances.h"
#include "wordOccurrences.c"
int main(int argc, char **argv)
{
//Initialize variables
FILE *fi; // input file
FILE *fo =stdout; //output file
char buffer[1000];
char *input; //for manually entering string with -c
input = (char*)malloc(100 * sizeof(char));
char *name = "invalid";
int wordcount; // the number of words
char *ch; //a single character
ch = (char*)malloc(100000 * sizeof(char));
int c = 0, i, iFlag = 0, oFlag = 0, cFlag = 0;
char ptr1[50][100];
char *ptr;
if(argc == 1)
{
printf("Default settings\n");
}
else
{
for(i=1; i<argc;i++)
{
if(strcmp(argv[i], "-i")==0)
{
printf("input\n");
iFlag = 1;
fi = fopen(argv[++i],"r");
}
if(strcmp(argv[i], "-o")==0)
{
printf("output\n");
oFlag = 1;
fo = fopen(argv[++i],"w");
}
if(strcmp(argv[i], "-c")==0)
{
cFlag = 1;
}
}
}
if(iFlag ==1)
{
wordcount = countForFile(fi, wordcount);
fprintf(fo,"Word count is: %d \n", wordcount);
wordOccurencesForFile(fi, cFlag,fo);
}
else
{
printf("Enter text: ");
scanf(" %[^\n]s", input);
//Loop through input
int i = 0;
if(cFlag == 1)
{
int i =0;
for( i = 0;input[i]!='\0'; i++)
{
//find upperCase letters
if(input[i] >= 'A' && input[i] <= 'Z')
{
//overwrite to lowerCase
input[i] = tolower(input[i]);
//input[i] = input[i] +32;
}//end of if statement
//ignoring punctuation
if(input[i] == ',' || input[i] == '.' || input[i] == '!' || input[i] == '?' || input[i] == '"' || input[i] == ':' || input[i] ==';' || input[i] == '-')
{
input[i] = ' ';
}
} //end of for loop
}
wordcount = 0;
for(i = 0;input[i] != '\0'; i++)
{
if(input[i] == ' ' && input[i+1] != ' ')
wordcount++;
}// end of while loop
fprintf(fo,"WordCount is: %d\n", wordcount +1);
//count occurrences
wordOccurences(input, fo);
}
if(oFlag == 1)
{fclose(fo);}
}
wordOccurrences
/*
* C Program to Find the Frequency of Every Word in a
* given String
*/
#include <stdio.h>
#include <string.h>
#include "functions.h"
wordOccurences(char *input, FILE *output)
{
int count = 0, c = 0, i, j = 0, k, space = 0;
char str[100], p[50][100], str1[20], ptr1[50][100];
char *ptr;
// printf("Enter the string\n");
//scanf(" %[^\n]s", input);
printf("string length is %d\n", strlen(input));
for (i = 0;i<strlen(input);i++)
{
if ((input[i] == ' ')||(input[i] == ', ')||(input[i] == '.'))
{
space++;
}
}
for (i = 0, j = 0, k = 0;j < strlen(input);j++)
{
if ((input[j] == ' ')||(input[j] == 44)||(input[j] == 46))
{
p[i][k] = '\0';
i++;
k = 0;
}
else
p[i][k++] = input[j];
}
k = 0;
for (i = 0;i <= space;i++)
{
for (j = 0;j <= space;j++)
{
if (i == j)
{
strcpy(ptr1[k], p[i]);
k++;
count++;
break;
}
else
{
if (strcmp(ptr1[j], p[i]) != 0)
continue;
else
break;
}
}
}
for (i = 0;i < count;i++)
{
for (j = 0;j <= space;j++)
{
if (strcmp(ptr1[i], p[j]) == 0)
c++;
}
fprintf(output,"%s -> %d times\n", ptr1[i], c);
c = 0;
}
}
wordOccurencesForFile(FILE *fp, int cFlag, FILE *output)
{
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *str = (char*)malloc(fsize + 1);
fread(str, fsize, 1, fp);
fclose(fp);
str[fsize] = 0;
int count = 0, c = 0, i, j = 0, k, space = 0;
char p[1000][512], str1[512], ptr1[1000][512];
char *ptr;
if ( fp )
{
for (i = 0;i<strlen(str);i++)
{
if (cFlag == 1)
{
//ignoring punctuation
if(str[i] == ',' || str[i] == '.' || str[i] == '!'
|| str[i] == '?' || str[i] == '"' || str[i] == ':'
|| str[i] ==';' || str[i] == '-')
{
str[i] = ' ';
}
}
if ((str[i] == ' ')||(str[i] == ',')||(str[i] == '.'))
{
space++;
}
}
for (i = 0, j = 0, k = 0;j < strlen(str);j++)
{
if ((str[j] == ' ')||(str[j] == 44)||(str[j] == 46))
{
p[i][k] = '\0';
i++;
k = 0;
}
else
{
if (cFlag == 1)
{
//find upperCase letters
if(str[j] >= 'A' && str[j] <= 'Z')
{
//overwrite to lowerCase
str[j] = tolower(str[j]);
}//end of if statement
}
p[i][k++] = str[j];
}
}
k = 0;
for (i = 0;i <= space;i++)
{
for (j = 0;j <= space;j++)
{
if (i == j)
{
strcpy(ptr1[k], p[i]);
k++;
count++;
break;
}
else
{
if (strcmp(ptr1[j], p[i]) != 0)
continue;
else
break;
}
}
}
for (i = 0;i < count;i++)
{
for (j = 0;j <= space;j++)
{
if (strcmp(ptr1[i], p[j]) == 0)
c++;
}
fprintf(output,"%s %d \n", ptr1[i], c);
c = 0;
}
}
else
{
printf("Failed to open the file\n");
}
}

Related

Find index of word in string

I want to write a function which will find index of word in string.
For example if string is
This is word.
my function for string "word" should return number 3.
Note: functions from string.h library and auxiliary strings are not allowed.
How could I do this in C?
I can't think of a solution better than this (though there might be better ones).
#include <stdio.h>
int main() {
char word[] = "This is a word";
int flag = 0, space = 0, pos = -1;
for (int i = 0; word[i] != '\0'; i++) {
if (flag == 1) {
break;
}
for (int j = 0; word[j] != '\0'; j++) {
if (flag == 1) {
break;
}
else if (word[j+1] == '\0' || word[j+2] == '\0' || word[j+3] == '\0') {
break;
}
else {
if (word[j] == 'w' && word[j+1] == 'o' && word[j+2] == 'r' && word[j+3] == 'd') {
flag = 1;
pos = j;
}
}
}
}
for (int i = 0; word[i] != '\0'; i++) {
if (word[i] == ' ' || word[i] == '!' || word[i] == '#') {// And many more symbols
fchars++;
}
else {
break;
}
}
if (flag == 1 && pos-1 > 0 && word[pos-1] == ' ') {
for (int i = 0; i < pos; i++) {
if (word[i] == ' ') {
space++;
}
}
printf("Found at position = %i\n", space+1-fchars);
}
else {
printf("Not found!\n");
}
}
You can split the sentence by space to get the words and then match each word in the sentence with the word you want to match
Please check this modified code:
#include<stdio.h>
int main()
{
char word[] = "word";
char string[100];
gets(string);
int curWordStart = -1;
int curWordEnd = -1;
int wordCount = 0;
int i = 0;
for (i = 0; string[i] != '\0'; i++)
{
if (string[i] == ' ')
{
int curWordLength = curWordEnd - curWordStart + 1;
if (curWordStart != -1 && curWordLength > 0)
{
wordCount++;
int foundMatch = 1;
int j;
int k = 0;
for (j = curWordStart; j <= curWordEnd; j++) {
if (word[k] == '\0') {
foundMatch = 0;
break;
}
if (word[k] != string[j])
{
foundMatch = 0;
break;
}
k++;
}
if (word[k] != '\0')
{
foundMatch = 0;
}
if (foundMatch == 1)
{
printf("%d\n", wordCount);
}
}
curWordStart = -1;
curWordEnd = -1;
}
else if ((string[i] >= 'a' && string[i] <= 'z') || (string[i] >= 'A' && string[i] <= 'Z'))
{
if (curWordStart == -1) {
curWordStart = i;
}
curWordEnd = i;
}
}
int curWordLength = curWordEnd - curWordStart + 1;
if (curWordStart != -1 && curWordLength > 0)
{
wordCount++;
int foundMatch = 1;
int j;
int k = 0;
for (j = curWordStart; j <= curWordEnd; j++) {
if (word[k] == '\0') {
foundMatch = 0;
break;
}
if (word[k] != string[j])
{
foundMatch = 0;
break;
}
k++;
}
if (word[k] != '\0')
{
foundMatch = 0;
}
if (foundMatch == 1)
{
printf("%d\n", wordCount);
}
}
return 0;
}
It will print each position of the searched word in the sentence. If you want to just print the first one, you can easily modify it.
Here are steps to follow:
you must specify precisely what is a word in the string.
measure the length len of the word to search
define an int index = 1
in a loop, using a pointer p starting at the beginning of the string:
advance p past all word delimiters (spaces, punctuation or non letters?)
if p is at end of string return 0 (not found).
measure the length len1 of the current word in the string
if len1 == len and all bytes are identical to those of the word, return index
otherwise skip the word by advancing p by len1, increment index and continue the loop.
Here is an implementation:
#include <stddef.h>
int isletter(char c) {
/* assuming ASCII */
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
int word_index(const char *str, const char *word) {
const char *p = str;
size_t len, len1, i;
int index = 1;
for (len = 0; word[len]; len++)
continue;
for (;;) {
while (!is_letter(*p))
p++;
if (*p == '\0')
return 0;
for (len1 = 0; is_letter(p[len1]); len1++)
continue;
if (len1 == len) {
for (i = 0; i < len && p[i] == word[i]; i++)
continue;
if (i == len)
return index;
}
p += len1;
index++;
}
}

C code error, put periods in front of words in a file

I'm new to the forum and I have a problem with this part of my program. What I want to do is a function where every two words of the file you open put 3 dots in front of the word and I don't see where I have the error.
void frase_lenta1(char frase[MAX_F][MAX_C], char frase2[MAX_F][MAX_C])
{
char caracter;
int i, j, cont_p, n;
char fitx; //Nom del fitxer
FILE* fit;
n = cont_p % 2;
cont_p = 0;
for (i = 0; i < MAX_F; i++) {
fit = fopen(fitx, "r");
for (j = 0; j < MAX_C; j++) {
while (frase[i][j] != '.' && frase[i][j] != '\0') {
if (frase[i][j] == ' ') {
cont_p++;
}
if (n == 0) {
frase2[i][j] = frase[i][j];
frase2[i][j] = '.';
frase2[i][j + 1] = '.';
frase2[i][j + 2] = '.';
}
}
}
while ((caracter = fgetc(fit)) != EOF) {
printf("%c", caracter);
}
}
}

How to count word occurrences each different word in C

Program should include the words in the table in the same order in
which they appear in the text.
Use string.h, ctype.h, stdio.h, include strtok function
#include<ctype.h>
int main(void)
{
int i,j;
char text[3][80];
char wordList[120][80];
int count = 0;
char* ptr;
for (i = 0; i <= 2; i++) {
gets(&text[i][0]);
}
for (i = 0; i <= 2; i++) {
for (j = 0; text[i][j]!='\0' ; j++) {
text[i][j] = tolower(text[i][j]);
}
}
ptr = strtok(text, " ,.;:!?-()[]<>");
while (ptr != NULL) {
}
I've been thinking for a long time, and I don't know how to try. You could ask me what's wrong with my code, but I don't know the approach at all...
try this...
#include <stdio.h>
#include <string.h>
void main()
{
int count = 0, c = 0, i, j = 0, k, space = 0;
char str[100], p[50][100], str1[20], ptr1[50][100];
char *ptr;
printf("Enter the string\n");
scanf(" %[^\n]s", str);
for (i = 0;i<strlen(str);i++)
if ((str[i] == ' ')||(str[i] == ',' && str[i+1] == ' ')||(str[i] == '.'))
space++;
for (i = 0, j = 0, k = 0;j < strlen(str);j++)
{
if ((str[j] == ' ')||(str[j] == 44)||(str[j] == 46))
{
p[i][k] = '\0';
i++;
k = 0;
}
else
p[i][k++] = str[j];
}
k = 0;
for (i = 0;i <= space;i++)
{
for (j = 0;j <= space;j++)
{
if (i == j)
{
strcpy(ptr1[k], p[i]);
k++;
count++;
break;
}
else
{
if (strcmp(ptr1[j], p[i]) != 0)
continue;
else
break;
}
}
}
for (i = 0;i < count;i++)
{
for (j = 0;j <= space;j++)
{
if (strcmp(ptr1[i], p[j]) == 0)
c++;
}
printf("%s -> %d times\n", ptr1[i], c);
c = 0;
}
}
try this
#include <stdio.h>
#include <string.h>
void main()
{
int count = 0, c = 0, i, j = 0, k, space = 0;
char str[100], p[50][100], str1[20], ptr1[50][100];
char *ptr;
printf("Enter the string\n");
scanf(" %[^\n]s", str);
for (i = 0;i<strlen(str);i++)
{
if ((str[i] == ' ')||(str[i] == ',' && str[i+1] == ' ')||(str[i] == '.'))
{
space++;
}
}
for (i = 0, j = 0, k = 0;j < strlen(str);j++)
{
if ((str[j] == ' ')||(str[j] == 44)||(str[j] == 46))
{
p[i][k] = '\0';
i++;
k = 0;
}
else
p[i][k++] = str[j];
}
k = 0;
for (i = 0;i <= space;i++)
{
for (j = 0;j <= space;j++)
{
if (i == j)
{
strcpy(ptr1[k], p[i]);
k++;
count++;
break;
}
else
{
if (strcmp(ptr1[j], p[i]) != 0)
continue;
else
break;
}
}
}
for (i = 0;i < count;i++)
{
for (j = 0;j <= space;j++)
{
if (strcmp(ptr1[i], p[j]) == 0)
c++;
}
printf("%s -> %d times\n", ptr1[i], c);
c = 0;
}
}

output is different when I use default value vs user input

I am doing some pattern searching in a string for my homework. When I was testing the code, I declared a default value for easy testing. When I am done with the testing and tried to run the code using user input, the output is different.
The output (when used default value) "Match at position 4." but when use user input, it says "no match".
This is my code:
int main() {
char text[255], pattern[255];
char sensitive = 'N';
int n, a[255], i, j, k = 0, l, found = 0, t = 0, temp=0;
printf("Enter a sentence , up to 255 characters:");
fgets(text, 255, stdin);
text[strcspn(text, "\n")] = 0;
printf("Enter a pattern , up to 255 characters:");
fgets(pattern, 255, stdin);
pattern[strcspn(pattern, "\n")] = 0;
printf("Should the match be case-sensitive, Y or N?");
scanf("%c", &sensitive);
if (sensitive == 'N' || sensitive == 'n') {
for (i = 0; i < strlen(text); i++) {
text[i] = tolower(text[i]);
//printf("%c", text[i]);
}
for (i = 0; i < strlen(pattern); i++) {
pattern[i] = tolower(pattern[i]);
//printf("%c", pattern[i]);
}
}
for (i = 0;text[i] != '\0';i++)
{
j = 0;
if (text[i] == pattern[j] || pattern[j] == '.')
{
temp = i + 1;
while (text[i] == pattern[j] || pattern[j] == '.')
{
i++;
j++;
}
if (pattern[j] == '\0')
{
temp -= 1;
printf("Matches at position %d\n", temp);
exit(0);
}
else
{
i = temp;
temp = 0;
}
}
}
if (temp == 0)
printf("No match.\n");
return 0;
}
This tests if text or pattern are at the terminating zero so the index does not go beyond the array boundary.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int main ( void) {
char text[255], pattern[255];
char sensitive[3] = "N";
int i, j, temp=0;
printf ( "Enter a sentence , up to 255 characters:");
if ( ! fgets(text, 255, stdin)) {
fprintf ( stderr, "fgets problem [text]\n");
return 0;
}
text[strcspn(text, "\n")] = 0;
printf ( "Enter a pattern , up to 255 characters:");
if ( ! fgets(pattern, 255, stdin)) {
fprintf ( stderr, "fgets problem [pattern]\n");
return 0;
}
pattern[strcspn(pattern, "\n")] = 0;//remove newline
printf ( "Should the match be case-sensitive, Y or N?");
if ( ! fgets( sensitive, sizeof sensitive, stdin)) {
fprintf ( stderr, "fgets problem [sensitive]\n");
return 0;
}
if ( sensitive[0] == 'N' || sensitive[0] == 'n') {
for ( i = 0; text[i]; i++) {
text[i] = tolower(text[i]);
}
for (i = 0; pattern[i]; i++) {
pattern[i] = tolower(pattern[i]);
}
}
for ( i = 0; text[i] != '\0';i++) {
j = 0;
temp = i + 1;
while ( pattern[j] && text[i + j]
&& ( text[i + j] == pattern[j] || pattern[j] == '.')) {
j++;
}
if (pattern[j] == '\0') {
temp -= 1;
printf("Matches at position %d\n", temp);
exit(0);
}
else {
temp = 0;
}
}
if (temp == 0) {
printf("No match.\n");
}
return 0;
}

How to fix filling the struct with these data in c

I have struct input parameter and array of it called input_arr. I want to fill the array with the text as it gives the wrong value for the id and it work correctly with the name and nothing appear in visible.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct input_parameter {
int id;
char name[30];
int position;
char visible[5];
char required[5];
char parameter_type;
char client_id[5];
int min_length;
int max_length;
char confirm_required[5];
};
struct input_parameter input_arr[10];
char text[2*1024]="{\"success\": true,\"language\":
\"en\",\"action\":
\"GetServiceInputParameterList\",\"version\": 1,\"data\": {
\"input_parameter_list\": [{\"id\": 1489,\"service_id\":
12102,\"name\": \"Customer Number\",\"position\":
1,\"visible\":
true,\"required\": true,\"parameter_type\":
\"N\",\"client_id\":
true,\"min_length\": 11, \"max_length\":
11,\"confirm_required\":
false } ] }}";
Fill an array of structs with the text:
int main() {
int i = 0;
int Wstart = 0;
int Wend = 0;
char name[19] = {0x20};
char name1[19] = {0x20};
int menunum = 0;
int len = strlen(text);
while (1) // while ALL
{
if (i >= len) {
break;
}
if (text[i] == 'i' && text[i + 1] == 'd') {
while (1) { // while id
if (text[i] == ':') {
Wstart = i + 1;
Wend = 0;
i++;
} else if (text[i] == ',' || text[i] == '}') {
Wend = i;
strncpy(name, text + Wstart, Wend - Wstart);
input_arr[menunum].id = atoi(name);
memset(name, 0, sizeof(name));
i++;
break;
} else {
i = i + 1;
}
} // while id
} else if (text[i] == 'n' && text[i + 1] == 'a' && text[i + 2] == 'm' &&
text[i + 3] == 'e') {
while (1) { // while name
if (text[i] == ':') {
Wstart = i + 3;
Wend = 0;
i++;
} else if (text[i] == ',' || text[i] == '}') {
Wend = i - 1;
strncpy(name, text + Wstart, Wend - Wstart);
// name[Wend-Wstart] = '\0';
// memset(name1, 0, sizeof(name1));
if ((name[1] >= 'a' && name[1] <= 'z') ||
(name[1] >= 'A' && name[1] <= 'Z')) {
// printf("%c is an alphabet.",c);
strcpy(name1, name);
} else {
int vc = 0;
int ia = strlen(name) - 1;
for (ia = strlen(name) - 1; ia >= 0; ia--) {
name1[vc] = name[ia];
vc++;
}
}
strcpy(input_arr[menunum].name, name1);
menunum++;
memset(name, 0, sizeof(name));
i++;
break;
} else {
i = i + 1;
}
} // while name
} else if (text[i] == 'v' && text[i + 1] == 'i' && text[i + 2] == 's' &&
text[i + 3] == 'i' && text[i + 4] == 'b' &&
text[i + 5] == 'l' && text[i + 6] == 'e') {
while (1) { // while visible
if (text[i] == ':') {
Wstart = i + 3;
Wend = 0;
i++;
} else if (text[i] == ',' || text[i] == '}') {
Wend = i - 1;
strncpy(name, text + Wstart, Wend - Wstart);
// name[Wend-Wstart] = '\0';
memset(name1, 0, sizeof(name1));
if ((name[1] >= 'a' && name[1] <= 'z') ||
(name[1] >= 'A' && name[1] <= 'Z')) {
// printf("%c is an alphabet.",c);
strcpy(name1, name);
} else {
int vc = 0;
int ia = strlen(name) - 1;
for (ia = strlen(name) - 1; ia >= 0; ia--) {
name1[vc] = name[ia];
vc++;
}
}
strcpy(input_arr[menunum].visible, name1);
menunum++;
// memset(name, 0, sizeof(name));
i++;
break;
} else {
i = i + 1;
}
} // while visible
} else {
i++;
}
}
printf("id:%d \n name: %s \n visible: %s
\n",&input_arr[0].id,&input_arr[0].name,&input_arr[0].visible);
return 0;
}
Well you are printing address of id instead of its value using %d format specifier.
printf("id:%d\nname: %s\n visible: %d\n",&input_arr[0].id,&input_arr[0].name,&input_arr[0].visible);
should be
printf("id:%d \n name: %s \n visible: %d\n",input_arr[0].id,input_arr[0].name,input_arr[0].visible);

Resources