Counting word occurrences in a file C - c

Welcome everybody. I am new to Stackoverflow, I code in C for some time.
I have run to a problem writing a program counting word occurrences in a text file. I need to have an output telling what word occurred how many times. Here is the source code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int new_words=0;
int nwords=0;
typedef struct element{
char word[30];
int how_many;
} element;
int is_word_new(element ** dictionary, char * string)
{
for (int i =0; i<new_words; i++)
{
if (strcmp(string, dictionary[i]->word)==0)
return 0;
}
return 1;
}
int which_word(element ** dictionary, char * string)
{
for (int i =0; i<new_words; i++)
{
if (strcmp(string, dictionary[i]->word)==0)
return i;
}
return 0;
}
int main()
{
FILE * fp;
char word[30];
fp=fopen("input.txt", "r");
if (fp==NULL)
{
printf("FILE ERROR");
return 0;
}
while(!feof(fp))
{
fscanf(fp, "%s",word);
nwords++;
}
nwords--;
rewind(fp);
struct element * dictionary = (element*)malloc(sizeof(element)*nwords);
for (int i =0; i<nwords; i ++)
{
fscanf(fp, "%s", word);
if( is_word_new(&dictionary, word) )
{
strcpy(dictionary[new_words].word, word);
//dictionary[new_words].word= word;
dictionary[new_words].how_many=1;
new_words++;
}
else
dictionary[which_word(&dictionary, word)].how_many++;
word[0]='\0';
}
printf("\n\nFinal dictionary\n with %d words", new_words);
for (int i =0; i<new_words; i++)
{
printf("%s %d \n", dictionary[i].word, dictionary[i].how_many);
}
free(dictionary);
fclose(fp);
return 0;
}
the idea is that i first count how many words are in the text (which somehow is always greater by one than in fact). The function is_word_new checks if a newly read word is already in the dictionary. which_word() tells which word was found
However I get a segmentation fault running this program.
When I used the line which is commented // dictionary[i].word=word the program behaved as if there was only "word" in the dictionary.
Please give me hints where am I doing this stuff wrong

Must read question: Why is “while ( !feof (file) )” always wrong? Thanks to Jonathan Leffler's comment.
Please check my comments in the code below. I got you a start up for when the words are appearing once. I am letting the rest of the job for you, so that we can share the fun, but you can of course ask.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int new_words = 0;
int nwords = 0;
typedef struct element {
char word[30];
int how_many;
} element;
// no need to pass double pointer
int is_word_new(element* dictionary, char * string) {
int i;
for (i = 0; i < new_words; i++) {
printf("|%s|, |%s|\n", string, dictionary[i].word);
if (strcmp(string, dictionary[i].word) == 0)
return 0;
printf("i=%d\n",i);
}
return 1;
}
int which_word(element ** dictionary, char * string) {
int i;
for (i = 0; i < new_words; i++) {
if (strcmp(string, dictionary[i]->word) == 0)
return i;
}
return 0;
}
int main() {
FILE * fp;
char word[30];
fp = fopen("test.txt", "r");
if (fp == NULL) {
printf("FILE ERROR");
return 0;
}
printf("file read\n");
int read_counter;
while (!feof(fp)) {
read_counter = fscanf(fp, "%s", word);
// increment only if we really read something
if(read_counter >= 0)
nwords++;
}
// this is wrong, remove it
//nwords--;
rewind(fp);
printf("nwords = %d\n", nwords);
// do not cast what malloc returns. Also struct is not needed.
element * dictionary = malloc(sizeof (element) * nwords);
int i;
for (i = 0; i < nwords; i++) {
fscanf(fp, "%s", word);
printf("read |%s|\n", word);
if (is_word_new(dictionary, word)) {
strcpy(dictionary[new_words].word, word);
//dictionary[new_words].word= word;
dictionary[new_words].how_many = 1;
new_words++;
} else {
printf("bhka\n");
dictionary[which_word(&dictionary, word)].how_many++;
}
//word[0] = '\0';
}
printf("\n\nFinal dictionary\n with %d words", new_words);
for (i = 0; i < new_words; i++) {
printf("%s %d \n", dictionary[i].word, dictionary[i].how_many);
}
free(dictionary);
fclose(fp);
return 0;
}
Here is the test.txt I used:
sam klouvi george dit epfl
ok
end

Related

Read from a text file and use each line to compare if they are anagrams

I must modify my program to accept input from
a file called anagrams.txt.This file should have two strings per line, separated by the # character. My program should read
each pair of strings and report back if each pair of strings is an anagram. For example consider the following content of anagrams.txt:
hello#elloh
man#nam
Astro#Oastrrasd
Your program should print out the following:
hello#elloh - Anagrams!
man#nam - Anagrams!
Astro#Oastrrasd- Not anagrams!
I should compile in g++
Here is the code to read from text:
int main()
{
char input[30];
if(access( "anagrams.txt", F_OK ) != -1) {
FILE *ptr_file;
char buf[1000];
ptr_file =fopen("anagrams.txt","r"); if (!ptr_file)
return 1;
while (fgets(buf,1000, ptr_file)!=NULL)
printf("%s",buf);
fclose(ptr_file);
printf("\n");
}
else{ //if file does not exist
printf("\nFile not found!\n");
}
return 0;
}
Code to find if the text are anagrams:
#include <stdio.h>
int find_anagram(char [], char []);
int main()
{
char array1[100], array2[100];
int flag;
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
flag = find_anagram(array1, array2);
if (flag == 1)
printf(" %s and %s are anagrams.\n", array1, array2);
else
printf("%s and %s are not anagrams.\n", array1, array2);
return 0;
}
int find_anagram(char array1[], char array2[])
{
int num1[26] = {0}, num2[26] = {0}, i = 0;
while (array1[i] != '\0')
{
num1[array1[i] - 'a']++;
i++;
}
i = 0;
while (array2[i] != '\0')
{
num2[array2[i] -'a']++;
i++;
}
for (i = 0; i < 26; i++)
{
if (num1[i] != num2[i])
return 0;
}
return 1;
}
You can try something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXLETTER 256
int is_anagram(char *word1, char *word2);
void check_lines(FILE *filename);
int cmpfunc(const void *a, const void *b);
void convert_to_lowercase(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
if ((filename = fopen("anagram.txt", "r")) == NULL) {
fprintf(stderr, "Error opening file\n");
exit(EXIT_FAILURE);
}
check_lines(filename);
fclose(filename);
return 0;
}
void
check_lines(FILE *filename) {
char line[MAXLINE];
char *word1, *word2, *copy1, *copy2;
while (fgets(line, MAXLINE, filename) != NULL) {
word1 = strtok(line, "#");
word2 = strtok(NULL, "\n");
copy1 = strdup(word1);
copy2 = strdup(word2);
convert_to_lowercase(copy1);
convert_to_lowercase(copy2);
if (is_anagram(copy1, copy2)) {
printf("%s#%s - Anagrams!\n", word1, word2);
} else {
printf("%s#%s - Not Anagrams!\n", word1, word2);
}
}
}
void
convert_to_lowercase(char *word) {
int i;
for (i = 0; word[i] != '\0'; i++) {
word[i] = tolower(word[i]);
}
}
int
is_anagram(char *word1, char *word2) {
qsort(word1, strlen(word1), sizeof(*word1), cmpfunc);
qsort(word2, strlen(word2), sizeof(*word2), cmpfunc);
if (strcmp(word1, word2) == 0) {
return 1;
}
return 0;
}
int
cmpfunc(const void *a, const void *b) {
if ((*(char*)a) < (*(char*)b)) {
return -1;
}
if ((*(char*)a) > (*(char*)b)) {
return +1;
}
return 0;
}
Since this looks like a University question, I won't provide a full solution, only a hint.
All you have to do is replace the stdin input part of the anagram-finding file with the code you wrote to read from a file: it's as simple as changing
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
to
// before program:
#define SIZE 1000
// inside main
if (access("anagrams.txt", F_OK) == -1){
printf("\nFile not found!\n");
return 1; // Abort the program early if we can't find the file
}
FILE *ptr_file;
char buf[1000];
ptr_file = fopen("anagrams.txt","r");
if (!ptr_file)
return 1;
char array1[SIZE], array2[SIZE];
while (fgets(buf, 1000, ptr_file)!=NULL){
// do all your anagram stuff here!
// there is currently one line of the input file stored in buf
// Hint: You need to split buf into array_1 and array_2 using '#' to separate it.
}
fclose(ptr_file);
printf("\n");
Additional comments:
Don't ever ever ever use gets. gets doesn't check that the string it writes to can hold the data, which will cause your program to crash if it gets input bigger than the array size. Use fgets(buf, BUF_SIZE, stdin) instead.
Beautiful code is good code. People are more likely to help if they can read your code easily. (fix your brackets)
Just for interest, a more efficient algorithm for checking anagrams is to use qsort to sort both arrays, then a simple string matcher to compare them. This will have cost O(mnlog(m+n)), as opposed to O(m^2 n^2), awith the current algorithm
You need to split every line you read by fgets (as you did) in to two strings, and pass them to your find_anagram function. You can do that using strtok:
int main()
{
int flag;
char buf[1000];
FILE *ptr_file;
//Check file existence
//Open the file for reading
while (fgets (buf, 1000, ptr_file) != NULL)
{
char *array1 = strtok(buf, "#");
char *array2 = strtok(NULL, "\n");
flag = find_anagram (array1, array2);
//Check flag value to print your message
}
return 0;
}
//put your find_anagram function
Don't forget to #include <string.h> to use strtok().

Read file from different positions and put in an array in C

I want to read a file which looks like this:
Name=José, Age=21
Name=Antonio, Age=26
Name=Maria, Age=24
My problem is how can i read the names and ages from different positions and different lines and put in an array names[size] and the same thing for the ages ages[size].
I have this at the moment:
#include <stdio.h>
#define size 100
int main()
{
char ch = 0;
int i = 0;
char names[size];
char ages[size];
FILE *fp1;
fp1 = fopen("data.txt", "r");
if(fp1 == NULL)
{
printf("Error!");
return 1;
}
while((ch=fgetc(fp1)) != '=');
while((ch=fgetc(fp1)) != ',')
{
fscanf(fp1, "%s", names);
i++;
}
fclose(fp1);
printf("Names = %s", names);
return 0;
}
Can anyone explain me what is the best way to do it?
you need 2D-Array. E.g names[number of record][max length size + 1]
a way sample like this
#include <stdio.h>
#define size 100
int main(void){
int i = 0;
char names[size][128];
char ages[size][4];
FILE *fp1;
fp1 = fopen("data.txt", "r");
if(fp1 == NULL){
printf("Error!\n");
return 1;
}
while(i < size && 2 == fscanf(fp1, "Name=%127[^,], Age=%3[0-9]\n", names[i], ages[i])){
i++;
}
fclose(fp1);
int n = i;
for(i = 0; i < n; ++i)
printf("Names = %s, Ages = %s\n", names[i], ages[i]);
return 0;
}

Read file of ints and load it into array

I have to open a file and read the numbers that are on it and then put these numbers in a array. I have the code below but it won't print me the numbers. I can't figure out why, can you guys help me?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
FILE *f;
int *ptr;
char inteiros[100];
int inteiros_b[100];
f=fopen("C:\\Users\\PC\\Documents\\Inteiros.txt", "r");
if(f==NULL)
{
printf("Error\n");
return 0;
}
else
printf("Success\n");
if(ptr==NULL);
return NULL;
while(fgets(inteiros, 100, f)!=NULL)
{
int i=0;
inteiros_b[i]=atoi(inteiros);
i++;
}
for(int i=0;i<100;i++)
printf("%d\n", inteiros_b[i]);
}
Use fscanf function
int main() {
FILE * file = fopen("C:\\Users\\PC\\Documents\\Inteiros.txt", "r");
int inteiros[100], i = 0, number;
if (file == NULL) {
printf("Error\n");
return 0;
} else printf("Success\n");
while (fscanf(file, "%d", & number) > 0) {
inteiros[i] = number;
i++;
}
fclose(file);
for (int i = 0; i < 100; i++)
printf("%d\n", inteiros[i]);
}
if(ptr==NULL);
return NULL;
You never assign ptr to anything, so wouldn't this code return NULL every time?
edit: also, should there be a semicolon after if(ptr==NULL)?

C - Opening differents files using same pointer

I'm trying to retrieve informations by many plain-text files, which will be then stored in a proper struct. To do so, I'm using a function that takes member of the struct to populate and source of the plain-text file where the informations are stored.
Posting my "test" code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct _elem
{
const char *title;
int ok;
int almost;
int nope;
int hits;
float last_rank;
};
typedef struct _elem Chapter;
Chapter *generate_array(const char *source, int *elems);
int engine_start(Chapter *elem, char *source);
int main()
{
const char path_f[100];
int elements = 0;
int i = 0;
Chapter *dict;
printf("Insert the name of the source:\n");
scanf("%s", path_f);
printf("\nGenerating dictionary, please wait...\n");
dict = generate_array(path_f, &elements);
if (dict == NULL)
{
printf("Aborting.\n");
exit(1);
}
while (i < elements)
{
printf("Element %d:\n", (i + 1));
printf("\nTitle: %s\n", dict[i].title);
printf("Ok: %10d\n", dict[i].ok);
printf("Almost: %5d\n", dict[i].almost);
printf("Nope: %8d\n", dict[i].nope);
printf("Hits: %8d\n", dict[i].hits);
printf("Rank: %8.2f\n", dict[i].last_rank);
printf("\n");
i++;
}
return EXIT_SUCCESS;
}
Chapter *generate_array(const char *source, int *elems)
{
FILE *src;
int sources;
int i = 0;
char **srcs;
Chapter *generated;
src = fopen(source, "r");
if (src == NULL)
{
printf("[!!] Error while reading file!\n");
return NULL;
}
fscanf(src, "%d", &sources);
if (sources <= 0)
{
printf("[!!] Wrong number of sources, exiting.\n");
return NULL;
}
srcs = (char **) malloc(sizeof(char *) * sources);
while (i < sources && !feof(src))
{
srcs[i] = (char *) malloc(sizeof(char) * 100);
fscanf(src, "%s", srcs[i++]);
}
fclose(src);
generated = (Chapter *) malloc(sizeof(Chapter) * i);
*elems = i;
i = 0;
while (i < *elems)
{
if(engine_start( &generated[i], srcs[i] )) i++;
else
{
printf("[!!] Error in file %s, aborting.\n", srcs[i]);
return NULL;
}
}
return generated;
}
int engine_start(Chapter *elem, char *source)
{
FILE *parser;
int done = 0;
parser = fopen(source, "r");
if (parser == NULL) printf("[!!] Error while opening %s, aborting.\n", source);
else
{
fgets(elem->title, 100, parser);
fscanf(parser, "%d %d %d %d %f", &(elem->ok), &(elem->almost),
&(elem->nope), &(elem->hits),
&(elem->last_rank) );
fclose(parser);
done = 1;
}
return done;
}
Now this is the main file where are stored paths to the other plain-text files:
lol.dat
5
lold/lol1.dat
lold/lol2.dat
lold/lol3.dat
lold/lol4.dat
lold/lol5.dat
And one example of lolX.dat:
Qual'è la vittoria di cristo?
3 4 5 12 44.9
I'm getting SIGSEGV after the first iteration of "engine_start", probably due to FILE *parser (but I can be totally wrong, I don't know at this point).
Someone can guide me through this problem? Thank you.
Make the following changes and try-
struct _elem
{
char *title; // allocate the memory for this.
int ok;
int almost;
int nope;
int hits;
float last_rank;
};
You need to allocate memory for element title before assigning something to it.
int engine_start(Chapter *elem, char *source)
{
FILE *parser;
int done = 0;
parser = fopen(source, "r");
if (parser == NULL) printf("[!!] Error while opening %s, aborting.\n", source);
else
{
elem->title=(char *)malloc(100); // include this line.
fgets(elem->title, 100, parser);
fscanf(parser, "%d %d %d %d %f", &(elem->ok), &(elem->almost),
&(elem->nope), &(elem->hits),
&(elem->last_rank) );
fclose(parser);
done = 1;
}
return done;
}

Unexpected Segfault - What am I doing wrong

I've been trying to blow the cobwebs off my C programming skills, and I've been getting an error I can't seem to figure out. This program reads in a list of integers separated by newlines. This bit happens in read_integer_file... I have no issues going through the input there. It's when I pass the data back to main via out that I have the problem.
#include <stdlib.h>
#include <stdio.h>
int read_integer_file(char* filename, int* out)
{
FILE* file;
file = fopen(filename, "r");
/* check if the file open was successful */
if(file == NULL)
{
return 0;
}
int num_lines = 0;
/* first check how many lines there are in the file */
while(!feof(file))
{
fscanf(file, "%i\n");
num_lines++;
}
/* seek to the beginning of the file*/
rewind(file);
out = malloc(sizeof(int)*num_lines);
if(out == NULL)
return 0;
int inp = 0;
int i = 0;
while(!feof(file))
{
fscanf(file, "%i\n", &inp);
out[i] = inp;
printf("%i\n", out[i]); /* <---- Prints fine here! */
i++;
}
return num_lines;
}
int main(int argc, char** argv)
{
if(argc < 2)
{
printf("Not enough arguments!");
return -1;
}
/* get the input filename from the command line */
char* array_filename = argv[1];
int* numbers = NULL;
int number_count = read_integer_file(array_filename, numbers);
for(int i = 0; i < number_count; i++)
{
/* Segfault HERE */
printf("%i\n", numbers[i]);
}
}
You have not allocated any memory for numbers. Currently it is pointing to no where. When it gets back to the calling function it is still pointed to nowhere. Pass a pointer to a pointer to the function to allocate it within the function.
int read_integer_file(char* filename, int** out)
{
...
*out = malloc(sizeof(int)*num_lines);
...
int number_count = read_integer_file(array_filename, &numbers);
This is a version of your code working.. Keep in mind also that fscanf just skip the \n the way you wrote it so it's like writing fscanf(file, "%d");
And if you don't put a variable to handle what it reads the compiler may not see it but you'll probably get an error..
So here is the code :
#include <stdlib.h>
#include <stdio.h>
int read_integer_file(char* filename, int **out)
{
FILE* file;
file = fopen(filename, "r");
/* check if the file open was successful */
if(file == NULL)
{
return 0;
}
int num_lines = 0;
int garbi;
char garbc;
/* first check how many lines there are in the file */
while(!feof(file))
{
fscanf(file, "%d", &garbi);
fscanf(file, "%c", &garbc);
if (garbc=='\n') ++num_lines;
}
/* seek to the beginning of the file*/
rewind(file);
int *nbr = malloc(sizeof(int)*num_lines);
if(nbr == NULL)
return 0;
int i = 0;
while(!feof(file))
{
fscanf(file, "%d", &nbr[i++]);
fscanf(file, "%c", &garbc);
}
*out=nbr;
return num_lines;
}
int main(int argc, char** argv)
{
if(argc < 2)
{
printf("Not enough arguments!");
return -1;
}
/* get the input filename from the command line */
char* array_filename = argv[1];
int *numbers = NULL;
int number_count = read_integer_file(array_filename, &numbers);
int i;
for(i = 0; i < number_count; ++i)
printf("%d\n", numbers[i]);
return 0;
}

Resources