C program to find word frequency - c

I have a C program that will count the amount of words in each file given on the command line. I now need to make it count how many times each word appears. Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#define MAXWORDS 10000
#define MAXSTRING 100
/* structure holding word frequency information */
typedef struct _word {
char s[MAXSTRING]; /* the word */
int count; /* number of times word occurs */
} word;
static int *total_amount_of_words;
int countWords(FILE *file){
int count = 0;
char character;
while((character = fgetc(file)) != EOF){
if(character == '\n' || character == ' ')
count++;
}
return count;
}
void insert_word (word *words, int *n, char *s) {
int i;
/* linear search for the word */
for (i=0; i<*n; i++) if (strcmp (s, words[i].s) == 0) {
/* found it? increment and return. */
words[i].count++;
return;
}
/* error conditions... */
if (strlen (s) >= MAXSTRING) {
fprintf (stderr, "word too long!\n");
exit (1);
}
if (*n >= MAXWORDS) {
fprintf (stderr, "too many words!\n");
exit (1);
}
/* copy the word into the structure at the first available slot,
* i.e., *n
*/
strcpy (words[*n].s, s);
/* this word has occured once up to now, so count = 1 */
words[*n].count = 1;
/* one more word */
(*n)++;
}
int wordcmp (word *a, word *b) {
if (a->count < b->count) return +1;
if (a->count > b->count) return -1;
return 0;
}
/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */
int is_alpha (char c) {
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') return 1;
return 0;
}
void remove_char (char *s, int i) {
while (s[i]) {
i++;
s[i-1] = s[i];
}
s[i] = 0;
}
void remove_non_alpha (char *s) {
int i;
for (i=0; s[i]; i++) if (!is_alpha (s[i])) remove_char (s, i);
}
void make_lowercase (char *s) {
int i;
for (i=0; s[i]; i++) s[i] = tolower (s[i]);
}
int main(int argc, char *argv[])
{
word words[MAXWORDS];
char s[1000];
int i, j, n, m;
int pid;
FILE *current_file;
n = 0;
total_amount_of_words = mmap(NULL, sizeof *total_amount_of_words, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
//Create processes for each file on the command line
for(i = 1; i < argc; i++)
{
pid = fork();
if(pid == -1) //Error
{
exit(-1);
}
else if(pid == 0) //Children
{
current_file = fopen(argv[i], "r");
int current_word_amount = countWords(current_file);
*total_amount_of_words += current_word_amount;
printf("Child Process for File %s: number of words is: %i\n", argv[i], current_word_amount);
rewind(current_file);
/* read all the words in the file... */
while (!feof (current_file)) {
fscanf (current_file, "%s", s);
/* only insert the word if it's not punctuation */
if (is_alpha (s[0])) {
/* get rid of non-letters */
remove_non_alpha (s);
/* make all letters lowercase */
make_lowercase (s);
/* put this word in the list */
insert_word (words, &n, s);
}
}
fclose(current_file);
exit(0);
}
else //Parent
{
wait(NULL);
}
}
/* sort the list of words by descending frequency */
qsort((void *) words, n, sizeof (word),
(int (*) (const void *, const void *)) wordcmp);
/* print the words with their frequencies */
for (j=0; j<*total_amount_of_words; j++){
printf ("Word: %s\t", words[j].s);
printf ("Frequency: %%d\n", words[j].count);
}
printf("All %i files have been counted!\n Total Amount of Words: %d\n", (argc-1), *total_amount_of_words);
munmap(total_amount_of_words, sizeof *total_amount_of_words);
}
In the for loop at the bottom I cannot get it to print out each word and how many times it appeared. I cannot get anything to output, the struct need gets initialized. How do I share the struct with all processes?

Related

How to loop a nested array in C

I've been developing a guessing game in which the goal is to guess the character selected by the user among specific characters, anyway, my first and only idea is to create an array with the questions to be asked, and each question has its options like in the code below I'm a newbie in C language so that I there are several things which I'm not sure how to handle. In short, I'd like to know how can I loop over the array showing to the user the questions with its questions to be answered? Here's the code.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define ROW 500
#define LINE 200
//Read file and append to an array buffer
char *characters(){
char *source = NULL;
FILE *fp = fopen("file.txt", "r");
if (fp != NULL) {
/* Go to the end of the file. */
if (fseek(fp, 0L, SEEK_END) == 0) {
/* Get the size of the file. */
long bufsize = ftell(fp);
if (bufsize == -1) { /* Error */ }
/* Allocate our buffer to that size. */
source = malloc(sizeof(char) * (bufsize + 1));
/* Go back to the start of the file. */
if (fseek(fp, 0L, SEEK_SET) != 0) { /* Error */ }
/* Read the entire file into memory. */
size_t newLen = fread(source, sizeof(char), bufsize, fp);
if ( ferror( fp ) != 0 ) {
fputs("Error reading file", stderr);
} else {
source[newLen++] = '\0'; /* Just to be safe. */
}
}
fclose(fp);
}
return source;
}
char *strndup(const char *s, size_t n) {
char *p;
size_t n1;
for (n1 = 0; n1 < n && s[n1] != '\0'; n1++)
continue;
p = malloc(n + 1);
if (p != NULL) {
memcpy(p, s, n1);
p[n1] = '\0';
}
return p;
}
// User input
char *input(){
char *value;
char buffer[10];
int j = 0;
while( j < 1 && fgets(buffer, 10, stdin) != NULL){
value = strndup(buffer, 10);
j++;
}
return value;
}
// Main function
int main (void)
{
char *questions[] = {
"Genre",{"male","female"},
"Hair", {"black","red","blond"},
"Cloths",{"dress","shirt","pants"},
"pet", {"dog","cat","pig"}
};
int asked[4] = {0};
char *answers[5];
char buffer[6];
srand(time(NULL));
for (int i = 0; i < 4; i++) {
int q = rand() % 4;
while (asked[q])
q = rand() % 4;
asked[q]++;
printf ("%s\n", questions[q]);
answers[i] = input();
}
for(int i = 0; i < 4; i++)
{
printf(" %s ",answers[i]);
}
return 0;
}
That's the file's structure I'll compare as long as I have all the answers from the user.
female,blond,vestido,pig,character b
male,black,shirt,pants,dog,character c
male,black,shirt,pants,cat,character d
female,blond,dress,cat,character A
male,red,shirt,pants,pig,character e

reading file`s lines char by char into char** array

I wrote the next function that tries to read and enter each line from text file into a string array in c :
int main(int argc,char* argv[])
{
char ** lines;
readFile(argv[1],lines);
}
int readFile(char* filePath,char** lines)
{
char file_char;
int letter_in_line=0;
int line=1;
char* line_string=malloc(1024);
int j=1;
int fd=open(filePath,O_RDONLY);
if (fd < 0)
{
return 0;
}
while (read(fd,&file_char,1) >0)
{
if(file_char != '\n' && file_char != '0x0')
{
line_string[letter_in_line] = file_char;
letter_in_line++;
}
else
{
if(lines != NULL)
{
lines=(char**)realloc(lines,sizeof(char*)*line);
}
else
{
lines=(char**)malloc(sizeof(char*));
}
char* line_s_copy=strdup(line_string);
lines[line-1]=line_s_copy;
line++;
letter_in_line=0;
memset(line_string,0,strlen(line_string));
}
j++;
}
printf("cell 0 : %s",lines[0]);
return 1;
}
I have 2 questions :
1)Whenever the code reaches the print of cell 0, I'm getting
Segmentation fault (core dumped) error. What is wrong ?
2)In case I
want to see the changes in the lines array in my main, I should pass
&lines to the func and get char*** lines as an argument ? In
addition, I will need to replace every 'line' keyword with '*line' ?
*I know that I can use fopen,fget, etc... I decided to implement it in this way for a reason.
There is many issues that make your code core dump.
Here a version very similar to your code. I hope it will help you to understand this.
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
int read_file(const char *filename, char ***result)
{
/* open the file */
const int fd = open(filename, O_RDONLY);
if (fd < 0) {
*result = NULL;
return -1;
}
/* read the file characters by characters */
char *buffer = (char *)malloc(sizeof(char) * 1024);
char c;
int column = 0;
int line = 0;
*result = NULL;
/* for each characters in the file */
while (read(fd, &c, 1) > 0) {
/* check for end of line */
if (c != '\n' && c != 0 && column < 1024 - 1)
buffer[column++] = c;
else {
/* string are null terminated in C */
buffer[column] = 0;
column = 0;
/* alloc memory for this line in result */
*result = (char **)realloc(*result, sizeof(char *) *
(line + 1));
/* duplicate buffer and store it in result */
(*result)[line++] = strdup(buffer);
}
}
free(buffer);
return line;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "usage: %s [filename]", argv[0]);
return 1;
}
char **lines;
int line_count = read_file(argv[1], &lines);
if (line_count < 0) {
fprintf(stderr, "cannot open file %s\n", argv[1]);
return 1;
}
for(int i=0; i < line_count; i++)
printf("%s\n", lines[i]);
return 0;
}
Here an other version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int read_file(const char *filename, char ***result)
{
/* init result */
*result = NULL;
/* open the file */
FILE *file = fopen(filename, "r");
if (file == NULL)
return -1;
/* read the file line by line */
char *buffer = (char *)malloc(sizeof(char) * 1024);
int line = 0;
while (fgets(buffer, 1024, file)) {
*result = (char **)realloc(*result, sizeof(char *) *
(line + 1));
(*result)[line++] = strdup(buffer);
}
free(buffer);
return line;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "usage: %s [filename]", argv[0]);
return 1;
}
char **lines;
int line_count = read_file(argv[1], &lines);
if (line_count < 0) {
fprintf(stderr, "cannot open file %s\n", argv[1]);
return 1;
}
for(int i=0; i < line_count; i++)
printf("%s\n", lines[i]);
return 0;
}

Reading a file of strings to a string array

I'm trying to read a file (full of a word followed by a newline), to an array full of pointers to each string. Then print each word in the array, and count the number of words read. However it just prints no words and says 0 words imported.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ARGS_REQUIRED 2
#define MAX_WORDS 50
#define MAX_WORD_LENGTH 1024
void read_file (char * argv[], char word_storage[]);
void usage (char * argv[]);
int main (int argc, char* argv[])
{
char word_storage[MAX_WORDS];
if (argc == ARGS_REQUIRED)
{
system("clear");
read_file(&argv[1], word_storage);
}
else
{
usage(&argv[0]);
}
return 0;
}
void usage (char * argv[])
{
printf("Incorrect usage, try: ./program_name %s\n", argv[1]);
}
void read_file (char * argv[], char word_storage[])
{
FILE * file_name;
char *word[MAX_WORDS][MAX_WORD_LENGTH];
int word_count = 0, i = 0, j;
if ((file_name = fopen(argv[0], "r")) == NULL)
{
printf("Cannot open file ... \n");
}
while (fscanf(file_name, "%s", *word[MAX_WORDS]) == 1)
{
for (j = 0; j < MAX_WORDS; j++)
{
if (printf("%s\n", *word[j]) == 1)
{
word_count++;
}
}
}
fclose(file_name);
printf("Imported words: %d\n", word_count);
}
You can modify your program as below. It worked for me. Please add error check for printf() function.
void read_file (char * argv[], char word_storage[])
{
FILE * file_name;
char word[MAX_WORDS][MAX_WORD_LENGTH];
int word_count = 0, i = 0, j;
if ((file_name = fopen(argv[0], "r")) == NULL)
{
printf("Cannot open file ... \n");
}
while (fscanf(file_name, "%s", word) == 1)
{
printf("%s\n",word);
word_count++;
/*
for (j = 0; j < MAX_WORDS; j++)
{
if (printf("%s\n", word[j]) == 1)
{
word_count++;
}
}
*/
}
fclose(file_name);
printf("Imported words: %d\n", word_count);
}

Adding words to a char *[] in C

I have a program that reads the words of two files (the first a wordlist, and the second an ebook from the Gutenberg project ) into two char *arrays.
I am trying to add all the unique words from the second char *array that don't appear in
the first char *array into a third char *array then print them.
This program adds the correct words, but is adding them more than once.
The error occurs in findOdds().
Note when I use a non-binary search method this program works correctly, but takes a long time.
What is the problem with my program? I apologize for my English.
#include <stdio.h>
#include <stdlib.h> /* for malloc() */
#include <ctype.h>
#include <string.h>
#define MAXCHAR 24
#define MAXLINES 150000
int add2array(FILE *fp, char *lineptr[]);
int findOdds(char *lineptr[], char *lineptr1[], int nlines, int nlines1);
int binsearch1(char *val, char *lineptr[], int nlines);
char *lineptr2[MAXLINES]; /* The unique words not in the word list */
int main(int argc, char *argv[])
{
FILE *my_stream, *my_stream1;
char *lineptr[MAXLINES], *lineptr1[MAXLINES];
int i, nlines, nlines1, nlines2;
/* Load the wordlist. */
my_stream = fopen("words.txt","r");
if(my_stream == NULL) {
printf("error: Couldn't open file\n");
return 2;
} else {
nlines = add2array(my_stream, lineptr);
fclose(my_stream);
}
if(nlines==-1) {
printf("error: Epic Failure to copy words to char *lineptr[]\n");
return -1;
}
/* Load the ebook. */
my_stream1 = fopen("horsemanship.txt","r");
if(my_stream1 == NULL) {
printf("error: Couldn't open file\n");
return 2;
} else {
nlines1 = add2array(my_stream1, lineptr1);
fclose(my_stream1);
}
if(nlines1==-1) {
printf("error: Epic Failure to copy words to char *lineptr[]\n");
return -1;
}
/* Find and print the unique words from the ebook not in the wordlist */
nlines2 = findOdds(lineptr, lineptr1, nlines, nlines1);
for(i=0; i<nlines2; i++)
printf("%s\n",lineptr2[i]);
return 0;
}
/* add2array: read the words from the file into char *lineptr[] */
int add2array(FILE *fp, char *lineptr[])
{
int nlines=0, c=0, pos=0;
char temp[MAXCHAR];
char *p;
while((c = getc(fp)) != EOF) {
if(isalpha(c))
temp[pos++] = tolower(c);
else if(!isalpha(c)) {
temp[pos] = '\0';
pos = 0;
if(isalpha(temp[0])){
if((p = malloc(sizeof(temp)))==NULL)
return -1;
strcpy(p, temp);
lineptr[nlines++] = p;
}
}
}
return nlines;
}
/* Add the unique words from lineptr1 not in lineptr to lineptr2 */
int findOdds(char *lineptr[], char *lineptr1[], int nlines, int nlines1)
{
char *p;
char temp[MAXCHAR];
int i, nlines2=0;
for(i=0; i<nlines1; i++) {
if(binsearch1(lineptr1[i], lineptr, nlines)==-1) {
if(binsearch1(lineptr1[i], lineptr2, nlines2)==-1) {
if((p = malloc(sizeof(temp)))==NULL)
return -1;
strcpy(p, lineptr1[i]);
lineptr2[nlines2++] = p;
}
}
}
return nlines2;
}
int binsearch1(char *val, char *lineptr[], int nlines)
{
int pos;
int start = 0;
int end = nlines-1;
int cond = 0;
while(start <= end){
pos=(start + end)/2;
if((cond = strcmp(lineptr[pos],val)) == 0)
return pos;
else if(cond < 0)
start = pos+1;
else
end = pos-1;
}
return -1;
}
Arrays must be sorted if you want to use binary search, as stated above by n.m.
in main() ...
shellsort1(lineptr1, nlines1);
/* Find and print the unique words from the ebook not in the wordlist */
nlines2 = findOdds(lineptr, lineptr1, nlines, nlines1);
...
int shellsort1(char *v[], int n)
{
int gap, i, j;
char temp[MAXCHAR];
char *p;
for(gap=n/2; gap>0; gap/=2)
for(i=gap; i<n; i++)
for(j=i-gap; j>=0 && strcmp(v[j],v[j+gap])>0; j-=gap) {
if((p = malloc(sizeof(temp)))==NULL)
return -1;
p = v[j];
v[j] = v[j+gap];
v[j+gap] = p;
}
return 0;
}

Inserting into a Hash Table

I am trying to get words inserted into a hash table. When I run the code, it's supposed to give me a list of the frequency of each word, but it just gives me nothing.
I'm sure it's either to do with my print function, or my insert function, probably more my insert function. I know it's isn't mylib.h, but I'm just not sure where I'm going wrong.
It doesn't insert anything into my table or print it. I'm not really sure what's going on.
hashtable.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "htable.h"
struct htablerec {
char **key;
int *frequencies;
int num_keys;
int capacity;
};
void *emalloc(size_t s) {
void *result = malloc(s);
if (NULL == result) {
fprintf(stderr, "Memory allocation failed!\n");
exit(EXIT_FAILURE);
}
return result;
}
htable htable_new(int capacity) {
int i;
htable h = emalloc(sizeof * h);
h->capacity = capacity;
h->num_keys = 0;
h->frequencies = emalloc(h->capacity * sizeof h->frequencies[0]);
h->key = emalloc(h->capacity * sizeof h->key[0]);
for (i = 0; i < h->capacity; i++) {
h->frequencies[i] = 0;
h->key[i] = NULL;
}
return h;
}
void htable_free(htable h) {
free(h->frequencies);
free(h->key);
free(h);
}
static unsigned int htable_word_to_int(char *word) {
unsigned int result = 0;
while (*word != '\0') {
result = (*word++ + 31 * result);
}
return result;
}
int htable_insert(htable h, char *str) {
int i;
/*convert string to integer*/
unsigned int index = htable_word_to_int(str);
/*calculate index to insert into hash table*/
int remainder = index%h->capacity;
/*once calculated position in the hash table, 3 possibilities occur*/
/*no string in this positon, copy string to that position, increment number of keys, return 1*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
return 1;
}
/*the exact same string is at the position, increment frequency at that position, return frequency*/
if (strcmp(str, h->key[remainder]) == 0) {
h->frequencies[remainder]++;
return h->frequencies[remainder];
}/*a string is at that position, but it isnt the rightone, keep moving along the array
until you find either an open space or the string you are looking for*/
if (h->key[remainder] != NULL && strcmp(str, h->key[remainder]) != 0) {
/*you may need to wrap back around to the beginning of the table, so each time you add
to the position you should also mod by the table capacity.*/
for (i = 0; i <= h->capacity; i++) {
if (h->key[remainder] != NULL && h->capacity == i) {
i = 0;
}
/*no string in this positon, copy string to that position, increment number of keys*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
}
/*if you find the string you were looking for, increment the frequecny at the position
and return the frequency*/
if (strcmp(str, h->key[remainder]) == 0) {
h->frequencies[remainder]++;
return h->frequencies[remainder];
}
}
}
/*if you have kept looking for an open space but there isnt one, the hash table must be full so return 0*/
return 0;
}
void htable_print(htable h, FILE *stream) {
int i;
for(i = 0; i < h->capacity; i++) {
if(h->key[i] != NULL) {
fprintf(stream, "%d%s\n", h->frequencies[i], h->key[i]);
}
}
}
htable.h:
#ifndef HTABLE_H_
#define HTABLE_H_
#include <stdio.h>
typedef struct htablerec *htable;
extern void htable_free(htable h);
extern int htable_insert(htable h, char *str);
extern htable htable_new(int capacity);
extern void htable_print(htable h, FILE *stream);
extern int htable_search(htable h, char *str);
#endif
mylib.c:
#include <stdio.h>
#include <stdlib.h>
#include "mylib.h"
#include "htable.h"
int main(void) {
htable h = htable_new(18143);
char word[256];
while (getword(word, sizeof word, stdin) !=EOF) {
htable_insert(h, word);
}
htable_print(h, stdout);
htable_free(h);
return EXIT_SUCCESS;
}
mylib.h:
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
int getword(char *s, int limit, FILE *stream) {
int c;
char *w = s;
assert(limit > 0 && s != NULL && stream != NULL);
/*skip to the start fo the word */
while (!isalnum(c = getc(stream)) && EOF != c)
;
if(EOF == c) {
return EOF;
} else if (--limit > 0) { /*reduce limit by 1 to allow for the \0 */
*w++ = tolower(c);
}
while(--limit > 0) {
if(isalnum(c = getc(stream))) {
*w++ = tolower(c);
} else if ('\'' == c) {
limit++;
} else {
break;
}
}
*w = '\0';
return w - s;
}
You never set h->key[remainder] to anything in htable_insert, so h->key[i] is still NULL for all i when you call htable_print.
/*no string in this positon, copy string to that position, increment number of keys, return 1*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
return 1;
}
...
/*no string in this positon, copy string to that position, increment number of keys*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
}
You never copy the string. Try something like...
char *key = emalloc(strlen(str) + 1);
strcpy(str, key);
h->key[remainder] = key;

Resources