Differentiating words in trie - c

I have a word "all" in my trie and a word "alter" but "alt" is not a word in the trie. But when I check for "alt" it still returns true because is_word is true as "all" was a word. How am is supposed to work this error.
//Here's the code
typedef struct node{
bool is_word;
struct node *children[27];
} node;
unsigned int wsize=0;
node * root;
bool check(const char* word)
{
// TODO
node *chrawler=root;
for(int i=0;i<strlen(word)-1;i++)
{
int t;
if(word[i]>=65&&word[i]<=90)
{
t=word[i]-'A';
}
else if(isalpha(word[i]))
t=word[i]-'a';
else
t=26;
if(chrawler->children[t]==NULL)
return false;
else
chrawler=chrawler->children[t];
}
if(chrawler->is_word)
return true;
return false;
}
// Load function
bool load(const char* dictionary)
{
// TODO
FILE *inptr=fopen(dictionary,"r");
if(inptr==NULL)
{
return false;
}
node *new_node=malloc(sizeof(node));
root=new_node;
char * word=malloc((LENGTH+1)*sizeof(char));
int index=0;
for(int c=fgetc(inptr);c!=EOF;c=fgetc(inptr))
{
char ch=c;
if(ch=='\n')
{
word[index]='\0';
index=0;
node *chrawler=root;
for(int i=1;i<strlen(word);i++)
{
int t;
if(isalpha(word[i-1]))
t=word[i-1]-'a';
else
t=26;
if(chrawler->children[t]==NULL)
{
node *new_node=malloc(sizeof(node));
chrawler->children[t]=new_node;
chrawler=chrawler->children[t];
}
else
chrawler=chrawler->children[t];
}
chrawler->is_word=1;
wsize++;
}
else
{
word[index]=ch;
index++;
}
}
return true;
}

You need to ensure that all the pointers in a new node are null, as well as setting the is_word value to false. This is, perhaps, most easily done by using calloc() to allocate the space. Creating a function to allocate and error check the allocation of a node makes it easier. Similarly, you have two blocks of code mapping characters to trie indexes. You should use functions — even small ones — more generously.
The character-by-character input for a line of data is not really necessary, either; it would be better to use fgets() to read lines.
Adding these and sundry other changes (for example, local array word instead of dynamically allocated array — which wasn't freed; closing the file when finished; etc.) gives an MCVE (Minimal, Complete, Verifiable Example) like this:
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { LENGTH = 256 };
// Here's the code
typedef struct node
{
bool is_word;
struct node *children[27];
} node;
unsigned int wsize = 0;
node *root;
static inline int map_char(unsigned char c)
{
int t;
if (isalpha(c))
t = tolower(c) - 'a';
else
t = 26;
return t;
}
static inline node *alloc_node(void)
{
node *new_node = calloc(1, sizeof(node));
if (new_node == 0)
{
fprintf(stderr, "Memory allocation failed in %s\n", __func__);
exit(1);
}
return new_node;
}
static bool check(const char *word)
{
node *chrawler = root;
int len = strlen(word);
for (int i = 0; i < len; i++)
{
int t = map_char(word[i]);
if (chrawler->children[t] == NULL)
return false;
else
chrawler = chrawler->children[t];
}
return chrawler->is_word;
}
// Load function
static bool load(const char *dictionary)
{
FILE *inptr = fopen(dictionary, "r");
if (inptr == NULL)
{
fprintf(stderr, "Failed to open file '%s' for reading\n", dictionary);
return false;
}
root = alloc_node();
char word[LENGTH];
while (fgets(word, sizeof(word), inptr) != 0)
{
word[strcspn(word, "\n")] = '\0';
printf("[%s]\n", word);
node *chrawler = root;
int len = strlen(word);
for (int i = 0; i < len; i++)
{
int t = map_char(word[i]);
//printf("t = %d (%c)\n", t, word[i]);
if (chrawler->children[t] == NULL)
chrawler->children[t] = alloc_node();
chrawler = chrawler->children[t];
}
chrawler->is_word = 1;
wsize++;
}
printf("%d words read from %s\n", wsize, dictionary);
fclose(inptr);
return true;
}
int main(void)
{
const char *wordfile = "words.txt";
if (load(wordfile))
{
char line[4096];
while (fgets(line, sizeof(line), stdin) != 0)
{
line[strcspn(line, "\n")] = '\0';
if (check(line))
printf("[%s] is a word\n", line);
else
printf("[%s] is unknown\n", line);
}
}
return 0;
}
There are other changes that should be made. For example,
the wsize variable should be made non-global; it isn't really used outside the load() function. It's easily arguable that the root node should not be global either; the load() function should return the root node, and the check() function should be passed the root node. In general, global variables should be avoided when possible, and it is usually possible.
Given a file words.txt containing:
abelone
abyssinia
archimedes
brachiosaurus
triceratops
all
alter
asparagus
watchamacallit
a
abracadabra
abyss
ant
the output from a run of the program is:
[abelone]
[abyssinia]
[archimedes]
[brachiosaurus]
[triceratops]
[all]
[alter]
[asparagus]
[watchamacallit]
[a]
[abracadabra]
[abyss]
[ant]
13 words read from words.txt
a
[a] is a word
ab
[ab] is unknown
al
[al] is unknown
all
[all] is a word
alt
[alt] is unknown
alte
[alte] is unknown
alter
[alter] is a word
triceratops
[triceratops] is a word
brachiosaurus
[brachiosaurus] is a word
abys
[abys] is unknown
abbey
[abbey] is unknown
abyss
[abyss] is a word
ant
[ant] is a word
a
[a] is a word
archimedes
[archimedes] is a word

Related

CS50 Speller has a memory leak somewhere... and I can't find it

I am currently taking CS50x and am on Pset5: Speller. My code works as intended, however when I run the check50 function my program fails Valgrind.
This is particularly confusing to me because when I run Valgrind on my program with the help50, it passes with no memory leaks. I ran help50 Valgrind on my program after changing the default dictionary to the dictionaries/small, and it still passed.
Here is my code and the error log I get from check50. I believe the Valgrind error is from not using fclose() on the file, but if I include fclose() then my program fails when ran with a "munmap_chunk(): invalid pointer".
If anyone could help me out or had any idea where I should look I would be most grateful!
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of bucket for Hash Table
const unsigned int N = 676;
// Hash table
node *table[N];
int word_counter = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int index = hash(word);
node *cursor = table[index];
//printf("cursor: %s\n", cursor->word);
while (cursor != NULL)
{
if (strcasecmp(word, cursor->word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
unsigned int index = 0;
for (int i = 0; i < 26; i++)
{
if (word[0] == i + 'a' || word[0] == i + 'A')
{
index += i * 26;
}
if (word[1] == i + 'a' || word[1] == i + 'A')
{
index += i;
}
}
return index;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
FILE *dict_file = fopen(dictionary, "r");
if (dict_file == NULL)
{
return false;
}
table_clear();
// Use fscanf() to look at each string in the dictionary and store to buffer
char *buffer = malloc(sizeof(LENGTH + 1));
if (buffer == NULL)
{
return false;
}
while (fscanf(dict_file, "%s", buffer) != EOF)
{
// Create a new memory space for each dictionary word, and store it here
node *n = malloc(sizeof(node));
if (n == NULL)
{
free(buffer);
return false;
}
// Copy each word from buffer to the location (*dict_entry).word
strcpy(n->word, buffer);
n->next = NULL;
// Use hash function to find the hash value of the dictionary word
int index = hash(n->word);
if (table[index] != NULL)
{
n->next = table[index];
}
table[index] = n;
n = NULL;
free(n);
word_counter++;
}
//fclose(dict_file); <----- If activate, this line causes the munmap_chunk: invalid pointer error
free(buffer);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return word_counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor != NULL)
{
cursor = cursor->next;
free(table[i]);
table[i] = cursor;
}
free(cursor);
}
return true;
}
// Sets every value in table to NULL
void table_clear(void)
{
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
return;
}
**Cause valgrind tests failed; see log for more information
LOG:
running valgrind --show-leak-kinds=all --xml=yes --xml-file=/tmp/tmpr2rknwok -- ./speller substring/dict substring/text...
checking for output "MISSPELLED WORDS\n\nca\ncats\ncaterpill\ncaterpillars\n\nWORDS MISSPELLED: 4\nWORDS IN DICTIONARY: 2\nWORDS IN TEXT: 6\n"...
checking that program exited with status 0...
checking for valgrind errors...
Invalid write of size 1: (file: dictionary.c, line: 88)
Invalid read of size 1: (file: dictionary.c, line: 99)
472 bytes in 1 blocks are still reachable in loss record 1 of 1: (file: dictionary.c, line: 74)
See above for exlaination
There are numerous problems with the program as pointed out in the comments, but the most likely culprit for the valgrind error report is this dynamic allocation:
char *buffer = malloc(sizeof(LENGTH + 1));
It allocates the sizeof the expression (LENGTH + 1) number of bytes, which is most likely the same as doing sizeof(int) - which is often 4 - and 4 bytes isn't going to be able to store words longer than 3 chars. I say most likely because I don't know the type of LENGTH, but I'm assuming it's an int, and in that case, you effectively do char *buffer = malloc(sizeof(int));.
It should be:
char *buffer = malloc(LENGTH + 1);

I completed the week 5 speller cs50x today. The code was not giving the correct output when I used recursion, but normally, it worked

Here is the link to the question (problem set)speller
I find the recursion conditions correct. the problem is in the check function, according to the outputs. So when I changed it, the code worked.
Here are the codes from dictionary.c (first one with recursion and the second one without it)
with recursion
here I defined two functions of my own in order to recursively go through the lists...
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include "dictionary.h"
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
bool search(node *s, const char *wod);
void frr(node *ptr);
int sije = 0;
// TODO: Choose number of buckets in hash table
// ans - i would choose a 2d array which is [26][LENGTH + 1]
const unsigned int N = 26;
// 2d Hash table
node *table[N][LENGTH];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// finding the hash for the word
int h = hash(word);
// finding the length for the word
int len = strlen(word) - 1;
// creating another pointer for the sake of iterating
node *tmp = table[h][len];
return search(tmp, word);
}
bool search(node *s, const char *wod)
{
if (s == NULL)
{
return false;
}
if (!strcasecmp(s -> word, wod))
{
return true;
}
else
{
s = s -> next;
bool c = search(s, wod);
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
return toupper(word[0]) - 'A';
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// make the hash table free of garbage values
for (int i = 0; i < 26; i++)
{
for (int j = 0; j < 10; j++)
{
table[i][j] = NULL;
}
}
// TODO
// open the dictionary file
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
// read the word inside a char array
char n[LENGTH + 1];
while (fscanf(dict, "%s", n) != EOF)
{
// call the hash function and get the hach code
int h = hash(n);
// this will return something from 0 till 25
int len = strlen(n) - 1;
// this will have the length of the word
// let's load it to the hach table
// create a new node
node *no = malloc(sizeof(node));
if (no == NULL)
{
return false;
}
// copy the word from n to no
strcpy(no -> word, n);
// declare the pointer inside the node to null
no -> next = NULL;
// insert the node inside the hach table
if (table[h][len] == NULL)
{
table[h][len] = no;
}
// else if the spot is populated
else
{
no -> next = table[h][len];
table[h][len] = no;
}
sije += 1;
}
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return sije;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < 26; i++)
{
for (int j = 0; j < LENGTH; j++)
{
// if table at that inces in not null,
// that means that there is a linked list at that index of the table
if (table[i][j] != NULL)
{
frr(table[i][j]);
}
}
}
return true;
}
void frr(node *ptr)
{
if (ptr == NULL)
{
return;
}
frr(ptr -> next);
free(ptr);
}
without recursion (the one which worked)
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include "dictionary.h"
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
void frr(node *ptr);
int sije = 0;
// TODO: Choose number of buckets in hash table
// ans - i would choose a 2d array which is [26][LENGTH]
const unsigned int N = 26;
// 2d Hash table
node *table[N][LENGTH];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// finding the hash of the code
int h = hash(word);
// finding the length of the code
int len = strlen(word) - 1;
// creating another pointer for the sake of iterating
node *tmp = table[h][len];
while (true)
{
if (tmp == NULL)
{
return false;
}
if (!strcasecmp(tmp -> word, word))
{
return true;
}
tmp = tmp -> next;
}
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
return toupper(word[0]) - 'A';
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// make the hash table free of garbage values
for (int i = 0; i < 26; i++)
{
for (int j = 0; j < 10; j++)
{
table[i][j] = NULL;
}
}
// TODO
// open the dictionary file
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
// read the word inside a char array
char n[LENGTH + 1];
while (fscanf(dict, "%s", n) != EOF)
{
// call the hash function and get the hach code
int h = hash(n);
// this will return something from 0 till 25
int len = strlen(n) - 1;
// this will have the length of the word
// let's load it to the hach table
// create a new node
node *no = malloc(sizeof(node));
if (no == NULL)
{
return false;
}
// copy the word from n to no
strcpy(no -> word, n);
// declare the pointer inside the node to null
no -> next = NULL;
// insert the node inside the hach table
if (table[h][len] == NULL)
{
table[h][len] = no;
}
// else if the spot is populated
else
{
no -> next = table[h][len];
table[h][len] = no;
}
sije += 1;
}
fclose(dict);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return sije;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < 26; i++)
{
for (int j = 0; j < LENGTH; j++)
{
// if table at that inces in not null,
// that means that there is a linked list at that index of the table
if (table[i][j] != NULL)
{
frr(table[i][j]);
}
}
}
return true;
}
void frr(node *ptr)
{
if (ptr == NULL)
{
return;
}
frr(ptr -> next);
free(ptr);
}
What could be the reason. All others are correct, only the checking part ...
I thought I would investigate this issue further as it seemed to me that adding in a return within the "else" block should address the issue. Possibly, I was not making the answer clear enough via the comments so I went ahead and acquired the lesson code, implemented your code solution with the non-recursive check function performing some test over a select set of text files, and then trying out the code with the recursive search function along with the code tweak I had noted.
Executing the non-recursive solution over the "aca.txt" file, the following was the terminal output to be used as the baseline.
WORDS MISSPELLED: 17062
WORDS IN DICTIONARY: 143091
WORDS IN TEXT: 376904
TIME IN load: 0.08
TIME IN check: 3.61
TIME IN size: 0.00
TIME IN unload: 0.01
TIME IN TOTAL: 3.70
I then revised the program, dictionary.c, to include the check/search function calls with the code in its original state.
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// finding the hash for the word
int h = hash(word);
// finding the length for the word
int len = strlen(word) - 1;
// creating another pointer for the sake of iterating
node *tmp = table[h][len];
return search(tmp, word);
}
bool search(node *s, const char *wod)
{
if (s == NULL)
{
return false;
}
if (!strcasecmp(s -> word, wod))
{
return true;
}
else
{
s = s -> next;
bool c = search(s, wod);
}
return false;
}
Executing this version of the code did give some superfluous misspelling values when run over the same file.
WORDS MISSPELLED: 359710
WORDS IN DICTIONARY: 143091
WORDS IN TEXT: 376904
TIME IN load: 0.07
TIME IN check: 4.85
TIME IN size: 0.00
TIME IN unload: 0.01
TIME IN TOTAL: 4.93
My guess is that this is the type of behavior you were experiencing.
I then revised the search function to provide a return statement within the "else" block as I had noted in the comments. Following is the refactored code.
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// finding the hash for the word
int h = hash(word);
// finding the length for the word
int len = strlen(word) - 1;
// creating another pointer for the sake of iterating
node *tmp = table[h][len];
return search(tmp, word);
}
bool search(node *s, const char *wod)
{
if (s == NULL)
{
return false;
}
if (!strcasecmp(s -> word, wod))
{
return true;
}
else
{
s = s -> next;
return search(s, wod); /* Added this line of code */
//bool c = search(s, wod); /* Deactivated this line of code */
}
return false;
}
When the program was recompiled and executed over the same text file, the following statistical output was acquired.
WORDS MISSPELLED: 17062
WORDS IN DICTIONARY: 143091
WORDS IN TEXT: 376904
TIME IN load: 0.08
TIME IN check: 4.14
TIME IN size: 0.00
TIME IN unload: 0.01
TIME IN TOTAL: 4.23
This agrees with the values listed with the non-recursive version of the program.
I checked this refactored version against the other selected text files and all agreed with the values run using the non-recursive version of the program.
FYI, I utilized the gcc compiler. That is the compiler that I have on my system in lieu of Clang. And I will point out one other thing I needed to do to make successfully build this program. When compiling, the compiler complained about having a variable size definition for the "table" array.
const unsigned int N = 26;
To get around this issue with my compiler, I had to move the assignment/definition of "N" to the same spot as the "LENGTH" definition in the header file.
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
#define N 26
I don't think these additional tweaks had any bearing on the functionality of using a recursive function, but I wanted to be fully transparent.
Anyway, hopefully with this expanded explanation, you might try out the code tweaks to see if the recursive function would now work.

Valgrind: Conditional jump or move depends on uninitialised value(s). Is not terminating a string inside a longer array with '\0' enough?

The problem set 5 of cs50, speller, ask to implement some dictionary functionalities.
I get this warnings with valgrind:
==393== Conditional jump or move depends on uninitialised value(s)
==393== at 0x49DB143: tolower (ctype.c:46)
==393== by 0x483F864: strcasecmp (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==393== by 0x4019CF: check (dictionary.c:42)
==393== by 0x401603: main (speller.c:114)
==393== Uninitialised value was created by a heap allocation
==393== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==393== by 0x401AF8: load (dictionary.c:82)
==393== by 0x40129E: main (speller.c:40).
Here it is my dictionary.c file, which contains the helpers function.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dictionary.h"
#include <strings.h>
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 1;
unsigned int siz = 0;
// Hash table
node *table[N];
/*
for (int i = 0; i < N; i++)
{
table[i]->next
}
*/
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int h = hash(word);
node *ll = table[h];
while (ll != NULL)
{
if (strcasecmp(ll->word, word) == 0)
{
printf("len: %lu\n", strlen(ll->word));
return true;
}
ll = ll->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO
return 0;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
printf("a\n");
// TODO
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
printf("Could not open file.\n");
return false;
}
printf("b\n");
char c;
int i = 0;
unsigned int h = 0;
char word[LENGTH + 1];
node *key = NULL;
while (fread(&c, sizeof(char), 1, input))
{
if (i == 0)
{
key = malloc(sizeof(node));
}
else if (c == '\n')
{
word[i] = '\0';
i = 0;
h = hash(word);
key->next = table[h];
table[h] = key;
siz++;
continue;
}
key->word[i] = c;
i++;
}
fclose(input);
/*
for (int j = 0; j < N; j++)
{
while (table[j] != NULL)
{
printf("%s ->\n", table[j]->word);
table[j] = table[j]->next;
}
}
*/
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return siz;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
node *n = NULL;
node *tmp = NULL;
for (int i = 0; i < N; i++)
{
n = table[i];
while (n != NULL)
{
tmp = n;
n = n->next;
free(tmp);
}
}
return true;
}
I think this warning happens because at line 42 the function strcasecmp is trying to lowercase some characters of the string node->word, even after the '\0'.
In fact, the warnings go away if I substitute, in load(),
key = malloc(sizeof(node)); with key = calloc(1, sizeof(node));
Because calloc allocate and also sets the memory to 0.
So, my question is: how strcasecmp really work??
In the malloc scenario ll->word, the argument I pass to strcasecmp, is an array of chars like this "actual_valid_chars \0 garbage bytes until array length: LENGTH".
In the calloc scenario the array is "actual_valid_chars \0 000000...".
So, I think that at I am passing uninitialized memory to strcasecmp, in case of malloc, but I also terminate the string with \0 in both scenarios.
Shouldn't strcasecmp() recognize the end of the string by '\0' even if it is in the middle of the array?
Can someone clarify these passages to me please?
Also, can I simply ignore these warnings, or are there better practices to use?
key->word is not null terminated in the load function. The char array word is, though it's never populated.

Illegal instruction 4 when placing a function outside int main

I've just begun learning the C language and I ran into an issue with one of my programs.
I am getting an error: "Illegal instruction 4" when executing: ./dictionary large.txt
Large.txt is a file with 143091 alphabetically sorted words, with each word starting on a new line. I am trying to load all of them into a hash table and return true if all the words are loaded successfully.
This code works for me if the code in bool load() is within int main and load() is non-existent. However, once I place it inside the load() function and call it from main, I get an error.
I would appreciate help on this, as there are not many threads on Illegal instruction.
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Number of letters in the english alphabet
#define ALPHABET_LENGTH 26
// Default dictionary
#define DICTIONARY "large.txt"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
const unsigned int N = ALPHABET_LENGTH;
// Hash table
node *table[N];
// Load function
bool load(char *dictionary);
// Hash function
int hash(char *word);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: ./speller [DICTIONARY] text\n");
exit(1);
}
// Determine which dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
bool loaded = load(dictionary);
// TODO: free hashtable from memory
return 0;
}
bool load(char *dictionary)
{
// Open dictionary for reading
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Error 2: could not open %s. Please call customer service.\n", dictionary);
exit(2);
}
// Initialize array to NULL
for (int i = 0; i < N; i++)
table[i] = NULL;
// Declare and initialize variables
unsigned int char_count = 0;
unsigned int word_count = 0;
char char_buffer;
char word_buffer[LENGTH + 1];
int hash_code = 0;
int previous_hash_code = 0;
// Declare pointers
struct node *first_item;
struct node *current_item;
struct node *new_item;
// Is true the first time the while loop is ran to be able to distinguish between hash_code and previous_hash_code after one loop
bool first_loop = true;
// Count the number of words in dictionary
while (fread(&char_buffer, sizeof(char), 1, file))
{
// Builds the word_buffer by scanning characters
if (char_buffer != '\n')
{
word_buffer[char_count] = char_buffer;
char_count++;
}
else
{
// Increases word count each time char_buffer == '\n'
word_count += 1;
// Calls the hash function and stores its value in hash_code
hash_code = hash(&word_buffer[0]);
// Creates and initializes first node in a given table index
if (hash_code != previous_hash_code || first_loop == true)
{
first_item = table[hash_code] = (struct node *)malloc(sizeof(node));
if (first_item == NULL)
{
printf("Error 3: memory not allocated. Please call customer service.\n");
return false;
}
current_item = first_item;
strcpy(current_item->word, word_buffer);
current_item->next = NULL;
}
else
{
new_item = current_item->next = (struct node *)malloc(sizeof(node));
if (new_item == NULL)
{
printf("Error 4: memory not allocated. Please call customer service.\n");
return false;
}
current_item = new_item;
strcpy(current_item->word, word_buffer);
current_item->next = NULL;
}
// Fills word buffer elements with '\0'
for (int i = 0; i < char_count; i++)
{
word_buffer[i] = '\0';
}
// Signals the first loop has finished.
first_loop = false;
// Clears character buffer to keep track of next word
char_count = 0;
// Keeps track if a new table index should be initialized
previous_hash_code = hash_code;
}
}
return true;
}
// Hash in order of: 'a' is 0 and 'z' is 25
int hash(char *word_buffer)
{
int hash = word_buffer[0] - 97;
return hash;
}
Thank you in advance!
Chris
You should use node *table[ALPHABET_LENGTH]; for the table declaration instead of node *table[N];
There is a difference between constant macros and const variables, a macro can be used in a constant expression, such as a global array bound as per your use case, whereas a const variable cannot.
As you can see here, the compiler you say you are using, gcc, with no compiler flags, issues an error message:
error: variably modified 'table' at file scope
You can read more about these differences and use cases in "static const" vs "#define" vs "enum" it has more subjects, like static and enum, but is a nice read to grasp the differences between these concepts.

Passing a stack in C while using Threads

I am trying to make a C program to count the number of frequency of words in a document and to split the document up into N parts with each part being counted by a different thread. Everytime I run the program, I get back nonsensical data, but if I run it without the threads I get back the data that I expect.
Here is the Struct named BinarySearchTree.h
typedef struct {
char *num; /*! The contents of the <tt>Item</tt>.<br>Type: <tt>int</tt>*/
int count;
} Item; /*! \typedef Item
* \struct A struct represent one item.
*/
typedef struct node {
Item info; /*! A struct with a one <tt>int</tt> on it. */
struct node * left;
struct node * right;
} Tree;
typedef struct passargs {
char *File;
Tree *t;
int splitStart;
int splitEnd;
int i;
int a;
char words[512][512];
} pass;
Here is the main code named **BinarySearchTree.c*:
#include "BinarySearchTree.h"
#include <pthread.h>
#include <string.h>
pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER;
// Code for integer based BST from: https://gist.github.com/ArnonEilat/4611213
Tree * add(Tree* nod, char *number) {
if (nod == NULL) {
printf("Thread number %ld\n", pthread_self());
printf("\nMaking new node for %s\n", number);
nod = (Tree*) malloc(sizeof (Tree));
if (nod == NULL) {
return NULL;
}
nod->info.num = number;
nod->info.count=1; //Increment the value if the same word is found
nod->left = NULL;
nod->right = NULL;
return nod;
}
if (strcmp(nod->info.num, number)==0) {
printf("%s==%s ",nod->info.num,number);
printf("\t%s:%d ",nod->info.num,nod->info.count);
// ++nod->info.count;
nod->info.count++;
printf("->%d\n ",nod->info.count);
return nod;
}
if (strcmp(nod->info.num, number)>0){
printf("%s>%s ",nod->info.num,number);
nod->left = add(nod->left, number);
} else {
printf("%s<=%s ",nod->info.num,number);
nod->right = add(nod->right, number);
}
return nod;
}
void printInorder(Tree* nod) {
if (nod == NULL) {
return;
}
printInorder(nod->left);
printf(" %s: %d ", nod->info.num, nod->info.count);
printInorder(nod->right);
}
void freeTree(Tree *root) {
if (root == NULL) {
return;
}
freeTree(root->left);
freeTree(root->right);
free(root);
}
int newLines(char* File){ //Count the number of new lines in the file. Split based on those not word count
int newLines=0;
char buffTemp[5120];
FILE *fp1 = fopen(File, "r");
while(1)
{
if(fgets(buffTemp, 512, fp1) ==NULL)
break;
else{
newLines++;
}
}
fclose(fp1);
return newLines;
}
//This function reads the file and adds each entry to the tree, or increments if same word is present
Tree* read(pass* info){
const char *delims = " \n"; //Deliminate by newlines and spaces
char *token;
int splitCounter=0;
FILE *fp = fopen(info->File, "r");
char buff[512];
int aT=info->a; //Putting variables into local ones
int iT=info->i;
int splitEnd = info->splitEnd;
int splitStart = info->splitStart;
char words[512][512];
memcpy(words, info->words, 512);
Tree* nod=info->t;
while(1 && splitCounter<splitEnd) //While the file does not end and we have not reached the end of the split
{
fgets(buff, 512, fp);
splitCounter++;
if(buff == NULL)
break;
else if (splitCounter>splitStart){
printf("\t %s\n", buff);
token = strtok(buff, delims); //Split via the tokens and put them into buff
while (token!=0) {
strcpy(words[iT], token);
iT += 1;
token = strtok(NULL, delims);
}
}
}
for (;aT<iT;aT++){
nod=add(nod, (char *)words[aT]); //Add each word to the tree
}
printf("\n");
fclose(fp);
info->t=nod;
memcpy(info->words,words,512);
info->i=iT;
info->a=aT;
return nod;
}
int main() {
Tree* t = NULL;
pass args;
args.t=t;
args.i=0;
args.a=0;
args.splitStart=0; //These splits are used to tell each thread where to look in the file
args.splitEnd=0;
args.File="/home/dib/CLionProjects/deleteme/readFile"; //Address of the file to be read
int numLines=newLines(args.File);
printf("\nnewLines %d\n",numLines);
int split;
printf("How many splits/threads would you like to create?\n");
scanf("%d", &split);
int iterator=numLines/split;
printf("iterator %d:", iterator);
const int NUMTHREADS= numLines/iterator + (numLines % iterator != 0);
unsigned int p;
// This for loop shows how I hope the threads would work. Uncomment the below block to see it work
for (p=0; p<NUMTHREADS; p++){
args.splitStart=args.splitEnd;
if (args.splitEnd+iterator>numLines) args.splitEnd=numLines;
else args.splitEnd+=iterator;
read(&args);
}
pthread_t th[NUMTHREADS];
int threads[NUMTHREADS];
//This is my attempt at using threads to solve the same problem as the above for loop
// for(p=0; p< NUMTHREADS; p++){
// threads[p] = p;
// args.splitStart=args.splitEnd;
// if (args.splitEnd+iterator>numLines) args.splitEnd=numLines;
// else args.splitEnd+=iterator;
// printf("split end: %d",args.splitEnd);
// pthread_create(&th[p], NULL, &read, &args);
// }
printInorder(args.t);
freeTree(args.t);
exit(0);
}
And finally the document I have been using as a test case named readFile:
five five five one two
two three three four four
three six seven six six
four six five seven seven
seven seven seven seven six
five four six
I tried implementing the solution found here : passing struct to pthread as an argument
but did not know what thread_handles was, and could not get it to work though the problem faced in that link is similar. So am I also just having a problem with memory allocation or is it something completely different?

Resources