I want to store words from a pointer of char strings in a double linked list. My function for storing the words in the char strings works perfect, but when it comes to storing in the dll elements it doesn't work anymore. I can't understand if there is a problem in the declarative zone of the list (I am new to lists, we just did some theory on them in the class) or with the node changing pointer.
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <string.h>
int number_of_words (FILE *f) {
char x[1024];
int i=0;
while (fscanf(f, " %1023s", x) == 1) {
i++;
}
return i;
}
void words (FILE *f, char *words[]) {
char x[1024];
int i=0;
while (fscanf(f, " %1023s", x) == 1) {
words[i]=strdup(x);
i++;
}
}
typedef struct node{
int freq;
char *word_string;
struct node *next;
struct node *prev;
}node;
int main(int argc, const char * argv[]) {
FILE *input=fopen(argv[1], "r+");
if(input==NULL) printf("error in reading from file");
else printf("reading works.\n");
int k=number_of_words(input);
char *word[k];
char *word_unique[k];
rewind(input);
words(input, word);
int j=0,l=0,s=0;
for(j=0;j<k;j++) {
for (l=0; l<j; l++){
if (strcmp(word[j],word[l])==0)
break;
}
if (j==l){
word_unique[s]=word[j];
s++;
}
}
int *word_freq[s];
for(j=0;j<s;j++){
word_freq[j]=0;
}
for(j=0;j<s;j++) {
for (l=j; l<k; l++){
if (strcmp(word_unique[j],word[l])==0)
word_freq[j]++;
}
}
char *aux=malloc(30*sizeof(char));
for(j=0;j<s;j++){
for(l=j+1;l<s-1;l++){
if(strcasecmp(word_unique[j], word_unique[l])>0)
{
strcpy(aux,word_unique[j]);
strcpy(word_unique[j],word_unique[l]);
strcpy(word_unique[l],aux);
}
}
}
node *head, *curr=NULL;
int i=0;
head=NULL;
for(i=0;i<k;i++){
curr=(node *)malloc(sizeof(node));
curr->word_string=word_unique[i];
curr->freq=word_freq[i];
curr->next=head;
head=curr;
}
while(curr) {
if(curr->word_string!=NULL) printf("%s:%d\n", curr->word_string, curr->freq);
curr = curr->next;
}
return 0;
}
The input file is a text file and it looks like this:
Everything LaTeX numbers for you has a counter associated with it. The name of the counter
is the same as the name of the environment or command that produces the number, except
with no. Below is a list of some of the counters used in LaTeX’s standard document styles
to control numbering.
When I tried to print the unique elements in alphabetical order with their frequency, it actually prints out in reverse order with 4x frequency they actually have. It also separates "numbering." from the others + a new line at the beginning which I don't know where it comes from. This is what it prints:
reading works.
0- :2098416
numbering.:4
you:4
with:4
used:4
to:4
the:4
The:4
that:4
styles:4
standard:4
some:4
same:4
produces:4
or:4
of:4
numbers:4
number,:4
no:4
name:4
list:4
LaTeX’s:4
LaTeX:4
it.:4
is:4
in:8
has:24
for:16
except:8
Everything:4
environment:4
document:8
counters:4
counter:8
control:8
command:4
Below:4
associated:4
as:4
a:4
\.:4
Program ended with exit code: 0
Related
Before proceeding with PSET5 - SPELLER of the CS50 course, I have decided to practice with a made-up program that takes words from a file and sorts them into a Hash Table, but I think I`m doing something wrong with the Hash Function as I keep getting the following error:
array subscript is not an integer
table[hash] = n;
Some of the elements are taken from the task itself to understand how they work. I don`t have any previous knowledge, totally limited to the CS50 course.
Please have a look at my code and maybe give a few pointers to what I am doing wrong.
From what I understand - every new word`s first letter goes through Hash Functions and returns a number for the Bucket in which this word goes.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int hash(const char *buffer);
const unsigned int LENGTH = 9;
typedef struct node
{
char word[LENGTH + 1];
struct node* next;
}
node;
node *table[26] = {NULL};
int hash(const char *buffer)
{
return toupper(buffer[0]) - 'A';
}
int main(void)
{
FILE *file = fopen("words", "r");
if (file != NULL)
{
char buffer[LENGTH];
while (fscanf(file, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return 1;
}
strcpy(n->word, buffer);
n->next = NULL;
table[hash] = n;
}
fclose(file);
}
}
You need to call the function hash(..) , it is not a variable.
Your line should be:
table[ hash(n->word) ] = n;
I'm trying to use a chaining hashtable to count the number of repetition of all words in a .txt file.
So this is what I did, here's my header file:
#ifndef _header5_
#define _header5_
typedef struct cellule {
char cle[15]; // Cle=Word
int valeur; // Number of occurrences of the word
struct cellule *suivant;
} Cellule;
typedef Cellule * Liste; // type Liste
typedef struct table_hachage {
int taille; // table length
Liste *linkcase; // table of listes
} Table_hachage;
// type TableHacage
typedef Table_hachage *TableHachage;
/***************Methods*****/
int max(int,int);
int count_words (FILE*);
void read_words(FILE*);
TableHachage cree_table_hachage(int);
int hachage(TableHachage, char*);
int insere(TableHachage,char*);
int recherche(TableHachage, char*);
int Get_Value(TableHachage, char*);
void fill(TableHachage,FILE*);
#endif
and this is my implementation of the header file:
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "fnv.h"
#include "header5.h"
int max(int a,int b){
if(a>b)
return a;
else if(a<b)
return b;
else
return a;
}
int count_words (FILE *f) {
char word[1024];
int count=0;
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
count++;
}
return count;
}
void read_words(FILE *f) {
char word[1024];
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
puts(word);
}
}
//Create Empty chaining hashtable
TableHachage cree_table_hachage(int taille) {
int i;
TableHachage table = (TableHachage)malloc(sizeof(Table_hachage));
table->taille = taille;
table->linkcase = (Liste*)malloc(table->taille * sizeof(Liste));
for (i = 0; i < table->taille; i++)
table->linkcase[i] = NULL;
printf("HashTable is created\n");
return table;
}
//Getting the hash code using FNV1 Algorithm
//it works just fine by the way.
int hachage(TableHachage table, char *cle) {
//FNV HashCode Algorithm version 32-bits
int codeh = fnv_32_str(cle, FNV1_32_INIT);
return (abs(codeh) % table->taille);
}
//Insert the word in the hashtable
int insere(TableHachage table, char* cle) {
int codeh;
Liste liste = NULL;
codeh = hachage(table, cle); //getting the HashCode
liste = table->linkcase[codeh]; //getting the LinkedList at index==HashCode
while (liste) {
//In case the key is already existed we increment its value
//which indicates the number of repetition of that word
if (strcmp(liste->cle, cle) == 0){
liste->valeur++;
return 0;
}
liste = liste->suivant;
}
//if it's the first time to encounter the word
//we insert it and give it's value 1
if (liste == NULL) {
liste = (Liste)malloc(sizeof(Cellule));
strcpy(liste->cle, cle);
liste->valeur=1;
liste->suivant = table->linkcase[codeh];
table->linkcase[codeh] = liste;
return 1;
}
}
//Search existence of a word
int recherche(TableHachage table, char *cle){
Liste liste = table->linkcase[hachage(table, cle)];
for (; liste; liste = liste->suivant)
if (strcmp(cle, liste->cle) == 0)
return 1;
return 0;
}
//Getting value of a key a.k.a number of repetition of a word
int Get_Value(TableHachage table, char *cle){
Liste liste = table->linkcase[hachage(table, cle)];
for (; liste; liste = liste->suivant)
if (strcmp(cle, liste->cle) == 0)
return liste->valeur;
}
//Fill my hashcode with words and number of repetition of that key in the file
void fill(TableHachage table,FILE* f){
char word[1024];
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
insere(table,word);
}
}
So my problem resides in the fill() function, it is almost the same as read_word() function which works just fine except instead of printing the word, I want it to be inserted in the hash table.
When I checked which part doesn't work in fill() function, I realized it never goes into the while loop. So when I search for the word, it couldn't be found.
So can anyone explains this to me?
EDIT:
Here's my main():
#include <stdio.h>
#include <stdlib.h>
#include "fnv.h"
#include "header5.h"
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
FILE *file=fopen("fich.txt", "r");
int n=count_words(file);
//Creating an empty chaining hash table
TableHachage T=cree_table_hachage(n);
//fill hash table with words and its number of repetition in the text file
fill(T,file);
//student is a word in my file
if(recherche(T,"student")==1){
printf("found\n");
}
else{
printf("couldn't be found\n");
}
int occ=Get_Value(T, "student");
printf("Occ is: %d\n",occ);
fclose(file);
return 0
}
As a solution for this problem this is what i did
#include <stdio.h>
#include <stdlib.h>
#include "fnv.h"
#include "header5.h"
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
FILE *file=fopen("fich.txt", "r");
int n=count_wordsV2(file);
//printf("number of words is: %d\n",n);
fclose(file);
file=fopen("fich.txt", "r");
//Creating an empty chaining hash table
TableHachage T=cree_table_hachage(n);
//fill hash table with words and its number of repetition in the text file
fill(T,file);
//Get occurences of each word in text file
char* Max=Get_Occurences(T);
printf("The most repeated word is: \"%s\" with a value=%d\n",Max,Get_Value(T, Max));
//int occ=Get_Value(T,"pilots");
//printf("occ: %d\n",occ);
fclose(file);
return 0;
}
Hello i am slowly learning c and trying my best.
Can someone tell me why my variables are undefined?
"processId, userId, arrivalTime, priority, expectedTimeRemaining, expectedPctCPU, realTime" are all giving an error!
Please help, i provided my code. I tried reading up about structures and pointers. Followed resources, and now i am trying to implement it myself.
Thanks!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#define _CRT_SECURE_NO_WARNINGS
typedef enum States { NEW, READY, RUNNING, BLOCKED, SUSPENDED, EXIT, SUSPEND_READY } State;
char stateNames[7][14] = { "New", "Ready", "Running", "Blocked", "Suspended", "Exit", "SuspendReady" };
// partial - skips info needed to actually conduct process switch - contents of registers, program counter, stack pointers, ... pointers to page tables ...
// we ARE going to need some way of indicating IO needs so can decide when they should block
struct processblock {
int processId;
int userId; // pointer instead?
State processState; // Not input - initially NEW
int arrivalTime; // time units in simulation, not actual time
int priority; // base priority
int currentPriority; // can be upped or lowered based on what has happened with the process - not input - initially same as base priority
int timeWaitingSoFar; // Not input - initially zero
int timeProcessingLastRun; // Not input - initially zero
int timeProcessingSoFar; // Not input - initially zero
int expectedTimeRemaining;
struct event* waitingOn; // ??? // Not input - initially zero
int expectedMemoryNeed;
int expectedPctCPU; // to get an idea of whether CPU bound or IO bound
bool realTime; // whether a real-time process or not (real-time processes may need immediate attention)
struct processblock* nextPtr; // not used in this program - but preparing for linked list version
};
//fill in the array from file
int fillArrayFromFile(struct processblock processor[], FILE*fPtr, int maxSize) {
int count = 0;
//unsure if realtime is a parameter, pls check and confirm with me
while ((count < maxSize) && fscanf("fPtr,%d,%d,%d,%d,%d,%d,%d", processId, userId, arrivalTime, priority, expectedTimeRemaining, expectedPctCPU, realTime) != EOF) {
//fill in each part of the array
processor[count].processId = count;
}
}
You should use a linked list to store the data you need, here there is an example, this code reads the lines of a file named test.txt that has this layout:
100,101,20,1,50,1,0
102,105,30,1,55,1,1
101,10,40,0,56,1,1
and store all the values in a linked list, that contains your structure. I hope you'll learn something new, happy coding!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define _CRT_SECURE_NO_WARNINGS
typedef enum States { NEW, READY, RUNNING, BLOCKED, SUSPENDED, EXIT, SUSPEND_READY } State;
char * stateNames[7] = { "New", "Ready", "Running", "Blocked", "Suspended", "Exit", "SuspendReady" };
struct processblock {
int processId;
int userId;
int arrivalTime;
int priority;
int expectedTimeRemaining;
int expectedPctCPU;
int realTime; // scanf can't read boolean
State processState;
int currentPriority;
int timeWaitingSoFar;
int timeProcessingLastRun;
int timeProcessingSoFar;
int expectedMemoryNeed;
};
struct Node
{
struct processblock pblock;
struct Node *next;
};
typedef struct Node * List;
void NewNode(List * p , struct processblock pb) //function that creates new linked list nodes
{
List temp;
temp = (List)malloc(sizeof(struct Node));
temp->pblock = pb;
temp->next = *p;
*p = temp;
}
int fillArrayFromFile(char *filename, List * p) {
FILE *fPtr;
int count = 0;
struct processblock pb;
char * buffer;
buffer = malloc(sizeof(struct processblock));
if(!(fPtr= fopen(filename, "r")))
{
perror("No File");
fclose(fPtr);
return 0;
}
while(fgets(buffer,sizeof(struct processblock),fPtr))//read every line of file and store them in a buffer
{
if(sscanf(buffer,"%d,%d,%d,%d,%d,%d,%d", &pb.processId, &pb.userId, &pb.arrivalTime, &pb.priority, &pb.expectedTimeRemaining, &pb.expectedPctCPU, &pb.realTime) == 7 ) // read from the buffer
{
NewNode(p, pb); // creates new node from filled structure
count++;
}
}
free(buffer);
fclose(fPtr);
return count;
}
void ViewElements(List p)
{
struct processblock pb;
while(p != NULL)
{
pb = p->pblock;
printf("%d,%d,%d,%d,%d,%d,%d\n", pb.processId, pb.userId, pb.arrivalTime, pb.priority, pb.expectedTimeRemaining, pb.expectedPctCPU, pb.realTime);
p = p->next;
}
}
int main()
{
List HeadNode = NULL;
printf("Loaded %d lines\n", fillArrayFromFile("test.txt", &HeadNode));
ViewElements(HeadNode);
return 0;
}
I have written a program that reads in words from a text file. There is one word per line. I need to find how many times each word repeats. To find this out so far i have read the words in from the file and placed them all in a dynamically allocated array of struct. My problem is that the program keeps segmentation faulting whenever i try to run it. I assume there is a problem with how i am dynamically allocating the data.
Code is as follows;
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
//struct
struct _data {
char *word;
int number;
};
//scan for size of file
int SCAN(FILE *data) {
int size = 0;
char s_temp[50];
while (1) {
fscanf(data, "%s", s_temp);
if (feof(data)) break;
size++;
}
return size;
}
//load content into struct
int LOAD(FILE *data, int size, struct _data *Wordstruct){
int i;
char temp[50];
for (i=0; i <size; i++){
fscanf(data, "%s", temp , &Wordstruct[i].word, &Wordstruct[i].number);
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
strcpy(Wordstruct[i].word, temp);
if(strcasecmp(Wordstruct[i].word, temp) ==0){
Wordstruct[i].number++;
}
}
return size;
}
//count how many times each word repeats
void COUNT(struct _data *Wordstruct, int size){
int i;
int count;
count =0;
char *word;
if (strcasecmp(Wordstruct[i].word, word)==0){
count++;
for(i=0; i<size; i++){
printf("%s\n",Wordstruct[i].word,"occurs:\t",count);
}
}
}
//main routine
int main(int argc, char *argv[]){
int size;
FILE *data;
struct _data *Wordlist;
if(argc <2){
printf("Not enough arguments\n");
}
else{
FILE *data= fopen(argv[1],"r");
size =SCAN(data);
LOAD(data, size, Wordlist);
COUNT(Wordlist, size);
}
return 0;
}
You haven't allocated memory for Wordlist. Add
Wordlist = malloc(size*sizeof(*Wordlist));
before the call to LOAD.
And, as pointed out by #BLUEPIXY in comments, change
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
to
Wordstruct[i].word =calloc(strlen(temp)+1, sizeof(char));
Change this:
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
To this:
Wordstruct[i].word =calloc(strlen(temp)+1, sizeof(char));
You need to account for NULL terminator, strlen() does not do that for you here.
I want to create a hash table for an exercise I have to send in my University.
The program will open a number of files, break each file's content to <<words>> (tokens) and it will save each <<word>> in a hash table with the frequency of each <<word>>.
In case the word is already in the hash table , the program will increase the word's frequency.
At the end the program will print the words and it's frequencies accordingly.
Also the frequencies should be printed from the highest word frequency to the lowest.
The comparison of the <<words>> will ignore upper and lower case letters.
For example if a file contains : one two three four Two Three Four THREE FOUR FoUr
It should print:
four 4
three 3
two 2
one 1
The professor gave us a template that we should complete but I'm really confused on what to do with the insert_ht() and clear_ht() functions as well as the compare one.
Here is the code :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024
/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };
link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */
unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );
void Process(FILE *fp);
int main(int argc, char *argv[])
{
int i;
FILE *fp;
for (i=1; i < argc; i++)
{
fp = fopen(argv[i],"r");
if (NULL == fp)
{
fprintf(stderr,"Problem opening file: %s\n",argv[i]);
continue;
}
Process(fp);
fclose(fp);
}
print_ht();
clear_ht();
return 0;
}
void Process(FILE *fp)
{
const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";
char line[MAX_LINE_SIZ];
char *s;
while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
{
for (s=strtok(line,seperators); s; s=strtok(NULL,seperators))
insert_ht(s);
}
}
/* Hash Function */
unsigned int hash(char *tok)
{
unsigned int hv = 0;
while (*tok)
hv = (hv << 4) | toupper(*tok++);
return hv % HTABLE_SIZ;
}
void insert_ht(char *token)
{
……………………………………………
}
void clear_ht()
{
……………………………………………
}
int compare(const void *elem1, const void *elem2)
{
……………………………………………
}
void print_ht()
{
int i, j=0;
link l, *vector = (link*) malloc(sizeof(link)*size);
for (i=0; i < HTABLE_SIZ; i++)
for (l=htable[i]; l; l=l->next)
vector[j++] = l;
qsort(vector,size,sizeof(link),compare);
for (i=0; i < size; i++)
printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
free(vector);
}
I'll answer you in a new post because it's hard to be exhaustive in comments.
1. Malloc
Why would I need to use malloc then ? Shouldn't i write directly to the htable? (on the insert_ht() funtion)
You need to use malloc because you declare a char pointer in struct (char *token). The thing is that you never initialize the pointer to anything, and as far you don't know the size of the token, you need to malloc every token. But, as you use strdup(token), you don't need to malloc token because strdup does. So don't forget to free every token in order to avoid memory leaks.
2. Segfault
I can't test you code, but it seems like the following line causes the segmentation fault :
list = htable[hashval]->token
Indeed, you try to access token while htable[hashval] is NULL, and to assign a char * to a link type (list).
You need to loop with this :
for(list = htable[hashval]; list != NULL; list = list->next) { ... }
3. Notes
if (x=1) should be if(x==1).
Don't malloc new_list if you don't need to.
Because new_list if used when htable[hashval] is NULL, new_list->next = htable[hashval]; will set new_list->next to NULL.
You should use the -Wall option in gcc (for warnings) and you may use valgrind to understand your segmentation faults. In this case, use gcc with debug mode (-g).
Double and Final edit : Ι found the solution. Apparently for some reason my compare function was wrong.
I still haven't figured out why but here is the correct one, hopefully someone else will find this post helpful!
int compare(const void *elem1, const void *elem2)
{
return (*(link*)elem2)->freq - (*(link*)elem1)->freq;
}
Edit: deleted old answer . Found the correct way I think but I have another problem right now.
The compare function doesn't work correctly. My printf is fine but it doesnt sort them with the frequiencies. I want them to be sorted from the highest to lowest .
In this example: the file contains -> one two three four Two Three Four THREE FOUR FoUr
And I get:
two 2
one 1
four 4
three 3
While I should be getting :
four 4
three 3
two 2
one 1
Here is the code. Feel free to help!
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024
/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };
link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */
unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );
void Process(FILE *fp);
int main(int argc, char *argv[])
{
int i;
FILE *fp;
printf("prin tin for \n");
for (i=1; i < argc; i++)
{
printf("prin tin fopen \n");
fp = fopen(argv[i],"r");
if (NULL == fp)
{
fprintf(stderr,"Problem opening file: %s\n",argv[i]);
continue;
}
printf("prin tin process \n");
Process(fp);
fclose(fp);
}
print_ht();
//clear_ht();
return 0;
}
void Process(FILE *fp)
{
const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";
char line[MAX_LINE_SIZ];
char *s;
while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
{
for (s=strtok(line,seperators); s; s=strtok(NULL,seperators)){
printf("prin tin insert %s \n",s);
insert_ht(s);
}
}
}
/* Hash Function */
unsigned int hash(char *tok)
{
printf("bike stin hash \n");
unsigned int hv = 0;
while (*tok)
hv = (hv << 4) | toupper(*tok++);
printf("VGAINEIIIIIIIIIIIIII %d \n",hv);
return hv % HTABLE_SIZ;
}
void insert_ht(char *token)
{
printf("bike stin insert %s \n",token);
unsigned int hashval = hash(token);
if (htable[hashval]==NULL){
printf("mesa stin prwti if %u %s \n",hashval,token);
//token = strdup(token);
htable[hashval] = malloc(sizeof(token));
htable[hashval]->token = token ;
htable[hashval]->freq = 1;
size++;
}else {
htable[hashval]->freq++;
}
printf("ta evale epitixws \n");
}
int compare(const void *elem1, const void *elem2)
{
const struct node *p1 = elem1;
const struct node *p2 = elem2;
if ( p1->freq < p2->freq)
return -1;
else if (p1->freq > p2->freq)
return 1;
else
return 0;
}
void print_ht()
{
int i, j=0;
link l, *vector = (link*) malloc(sizeof(link)*size);
for (i=0; i < HTABLE_SIZ; i++)
for (l=htable[i]; l; l=l->next)
vector[j++] = l;
qsort(vector,size,sizeof(link),compare);
for (i=0; i < size; i++)
printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
free(vector);
}
Sorry for my bad english.
I think that :
insert(char *token) takes a word of the file and puts into the hash table. In brief, if the word exists in the hash table, you just have to increment its frequencie. Otherwise, you need to create another node and put the frequencie to 1, then ad it to the array. At the end, you will have one entry for each unique word.
compare(const void *elem1, const void *elem2) will be used by qsort. It returns 0 if elem1 = elem2, a negative number if elem1 < elem2 and a number > 0 if elem1 > elem2. By passing compare to qsort, you allow qsort to sort you array according to your own criteria.
clear_ht() may set all the values of the array to NULL, in order to restart another count ?