A couple questions here.
I'm trying to figure out Week 5 Pset5 Speller. Here is my code:
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 1000;
// Hash table
node *table[N];
// Dictionary size
int dictionary_size = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO #4!
// make lowercase copy of word
char copy[strlen(word) + 1];
for (int i = 0; i < strlen(word); i++)
{
copy[i] = tolower(word[i]);
}
// get hash value
int h = hash(copy);
// use hash value to see if word is in bucket
if (table[h] != NULL)
{
node *temp = table[h];
while (temp != NULL)
{
if (strcmp(temp->word, copy) == 0)
{
return true;
}
temp = temp->next;
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO #2
// source: https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/cf9189q/
// I used this source because I had trouble understanding different variations - this one explained everything well.
// I modified it slightly to fit my needs
unsigned int hash = 0;
for (int i = 0; i < strlen(word); i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO #1!
// open dictionary file
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
return false;
}
// read strings from file one at a time
char word[LENGTH + 1];
while (fscanf(file, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
// place word into node
strcpy(n->word, word);
// use hash function to take string and return an index
int h = hash(word);
// make the current node point to the bucket we want
n->next = table[h];
// make the bucket start now with the current node
table[h] = n;
//count number of words loaded
dictionary_size++;
}
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO #3!
return dictionary_size;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO #5!
for (int i = 0; i < N; i++)
{
while (table[i] != NULL)
{
node *temp = table[i]->next;
free(table[i]);
table[i] = temp;
}
}
return true;
}
Question Part 1:
When I run Check50, the only error that shows is the last one:
:) dictionary.c exists
:) speller compiles
:) handles most basic words properly
:) handles min length (1-char) words
:) handles max length (45-char) words
:) handles words with apostrophes properly
:) spell-checking is case-insensitive
:) handles substrings properly
:( program is free of memory errors
valgrind tests failed; see log for more information.
When I check Help50 Valgrind, it says:
==658== Conditional jump or move depends on uninitialised value(s)
Looks like you're trying to use a variable that might not have a
value? Take a closer look at line 70 of dictionary.c.
Valgrind then asks me to look at line 70, but I can't figure out what it's upset with. (Line 70 is the "for" line in the hash function). I've also tried moving the "strlen(word)" portion to be its own variable in case that's what it was upset about, and it still didn't agree with what I had done:
unsigned int hash(const char *word)
{
// TODO #2
// source: https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/cf9189q/
// I used this source because I had trouble understanding different variations - this one explained everything well.
// I modified it slightly to fit my needs
unsigned int hash = 0;
int j = strlen(word);
for (int i = 0; i < j; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
Can someone explain what valgrind is asking me to do?
Question Part 2: this part has been fixed :) see comments if you are curious about what was fixed in this portion of my question
Although Check50 is mostly happy with my code, I have run it multiple times and found my results do not match the staff results. For example, when I run ./speller texts/lalaland.txt, I get 2476 misspelled words while the staff got 955 misspelled words. I have the same number of words in the dictionary, but something in my code is not working the way it should. Can anyone help me identify where my problem is?
Much appreciated!!
I've just begun learning the C language and I ran into an issue with one of my programs.
I am getting an error: "Illegal instruction 4" when executing: ./dictionary large.txt
Large.txt is a file with 143091 alphabetically sorted words, with each word starting on a new line. I am trying to load all of them into a hash table and return true if all the words are loaded successfully.
This code works for me if the code in bool load() is within int main and load() is non-existent. However, once I place it inside the load() function and call it from main, I get an error.
I would appreciate help on this, as there are not many threads on Illegal instruction.
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Number of letters in the english alphabet
#define ALPHABET_LENGTH 26
// Default dictionary
#define DICTIONARY "large.txt"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
const unsigned int N = ALPHABET_LENGTH;
// Hash table
node *table[N];
// Load function
bool load(char *dictionary);
// Hash function
int hash(char *word);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: ./speller [DICTIONARY] text\n");
exit(1);
}
// Determine which dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
bool loaded = load(dictionary);
// TODO: free hashtable from memory
return 0;
}
bool load(char *dictionary)
{
// Open dictionary for reading
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Error 2: could not open %s. Please call customer service.\n", dictionary);
exit(2);
}
// Initialize array to NULL
for (int i = 0; i < N; i++)
table[i] = NULL;
// Declare and initialize variables
unsigned int char_count = 0;
unsigned int word_count = 0;
char char_buffer;
char word_buffer[LENGTH + 1];
int hash_code = 0;
int previous_hash_code = 0;
// Declare pointers
struct node *first_item;
struct node *current_item;
struct node *new_item;
// Is true the first time the while loop is ran to be able to distinguish between hash_code and previous_hash_code after one loop
bool first_loop = true;
// Count the number of words in dictionary
while (fread(&char_buffer, sizeof(char), 1, file))
{
// Builds the word_buffer by scanning characters
if (char_buffer != '\n')
{
word_buffer[char_count] = char_buffer;
char_count++;
}
else
{
// Increases word count each time char_buffer == '\n'
word_count += 1;
// Calls the hash function and stores its value in hash_code
hash_code = hash(&word_buffer[0]);
// Creates and initializes first node in a given table index
if (hash_code != previous_hash_code || first_loop == true)
{
first_item = table[hash_code] = (struct node *)malloc(sizeof(node));
if (first_item == NULL)
{
printf("Error 3: memory not allocated. Please call customer service.\n");
return false;
}
current_item = first_item;
strcpy(current_item->word, word_buffer);
current_item->next = NULL;
}
else
{
new_item = current_item->next = (struct node *)malloc(sizeof(node));
if (new_item == NULL)
{
printf("Error 4: memory not allocated. Please call customer service.\n");
return false;
}
current_item = new_item;
strcpy(current_item->word, word_buffer);
current_item->next = NULL;
}
// Fills word buffer elements with '\0'
for (int i = 0; i < char_count; i++)
{
word_buffer[i] = '\0';
}
// Signals the first loop has finished.
first_loop = false;
// Clears character buffer to keep track of next word
char_count = 0;
// Keeps track if a new table index should be initialized
previous_hash_code = hash_code;
}
}
return true;
}
// Hash in order of: 'a' is 0 and 'z' is 25
int hash(char *word_buffer)
{
int hash = word_buffer[0] - 97;
return hash;
}
Thank you in advance!
Chris
You should use node *table[ALPHABET_LENGTH]; for the table declaration instead of node *table[N];
There is a difference between constant macros and const variables, a macro can be used in a constant expression, such as a global array bound as per your use case, whereas a const variable cannot.
As you can see here, the compiler you say you are using, gcc, with no compiler flags, issues an error message:
error: variably modified 'table' at file scope
You can read more about these differences and use cases in "static const" vs "#define" vs "enum" it has more subjects, like static and enum, but is a nice read to grasp the differences between these concepts.
I'm working on CS50's Week 5 assignment, Speller. I'm building my functions one at a time, and I'm running into problems with my unload function (Line 151). Right now, I'm just testing the iteration in a way that prints results before I use that iteration to free each of the nodes. I'm doing this by changing each node's word to "FREE" in the order these nodes are to be freed.
The function call (Line 60) returns true, and the printf command prints successfully. However, everything in the unload function itself is being ignored. None of the printf lines that I added to see its progress (DEBUG DEBUG DEBUG) are printing. The print() function call on line 63 should be printing the table with all of the words set to "FREE", and all dictionary word locations showing "NOT FOUND". Instead, it's printing the list and locations completely unaltered, and with none of the DEBUG print commands within the for loop (Line 155) triggering.
I don't understand why this is happening. The unload() function call alone, whether or not it returns true, should still at least trigger the first printf command in the for loop (Line 157). But even that is skipped.
Can someone please help me understand why the function is returning true, yet making none of the changes it's supposed to? Thanks in advance.
EDIT: Okay, I was told that I wasn't calling the unload function correctly on line 60. I've since corrected that. Now it will print out "LOCATION 00:", but it ends as soon as it hits that first while loop on line 158. I was having this problem before, and I'm not sure why it's doing this. strcmp() should see that the head node's word does not match "FREE" until it makes the change from the end of the list to the beginning. Why is the while loop not even triggering?
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
unsigned int HASH_MAX = 50; // Max elements in hash table
unsigned int LENGTH = 20; // Max length of word to be stored
unsigned int hash(const char *word); // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
bool load(FILE *dictionary); // load dictionary into memory
bool check(char *word); // check if word exists in dictionary
bool unload(void); // unload dictionary from memory, free memory (CURRENTLY DEBUGGING, CHECKING ITERATION)
void print(void); // print table contents and node locations
typedef struct _node // node structure: stored word, pointer to next node
{
char *word[20];
struct _node *next;
} node;
node *HASH_TABLE[50];
int main(int argc, char *argv[])
{
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r"); // open dictionary file, read
if (!dictionary) // if dictionary is NULL, return error message, end program
{
printf("FILE NOT FOUND\n");
return 1;
}
if (load(dictionary)) // if dictionary loaded successfully (function call), close dictionary and print table contents
{
fclose(dictionary);
print(); // print "LIST (number): {(name, address), ...}\n
}
char *checkword = "Albatross"; // test check function for word that does not exist in the library
char *checkword2 = "Riku"; // test check function for word that does exist in the library
if (check(checkword)) // return check results for checkword, found or not found
{
printf("\n%s found\n", checkword);
}
else
{
printf("\n%s not found\n", checkword);
}
if (check(checkword2)) // return check results for checkword2, found or not found
{
printf("\n%s found\n", checkword2);
}
else
{
printf("\n%s not found\n", checkword2);
}
if (unload()) // if unloaded successfully (function call), print contents
{
printf("\nUNLOADED...\n\n"); // DEBUG DEBUG DEBUG (confirm unload function returned true)
print();
}
}
unsigned int hash(const char *word) // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
{
char word_conv[LENGTH + 1]; // store converted word for uniform key
unsigned int code = 0; // hash code
strcpy(word_conv, word);
for (int i = 0; i < strlen(word); i++) // set all letters in the word to lower case
{
word_conv[i] = tolower(word_conv[i]);
}
for (int j = 0; j < strlen(word_conv); j++) // for all letters in converted word, add ascii value to code and multiply by 3
{
code += word_conv[j];
code = code * 3;
}
code = code % HASH_MAX; // set code to remainder of current code divided by maximum hash table size
return code;
}
bool load(FILE *dictionary) // load dictionary into memory
{
char word[LENGTH+1]; // store next word in the dictionary
while (!feof(dictionary)) // until end of dictionary file
{
fscanf(dictionary, "%s", word); // scan for next word
node *new_n = malloc(sizeof(node)); // new node
strcpy(new_n->word, word); // store scanned word in new node
new_n->next = NULL; // new node's next pointer set to NULL
unsigned int code = hash(word); // retrieve and store hash code
if (HASH_TABLE[code] == NULL) // if hash location has no head
{
HASH_TABLE[code] = new_n; // set new node to location head
}
else if (HASH_TABLE[code] != NULL) // if head already exists at hash location
{
node *trav = HASH_TABLE[code]; // set traversal node
while (trav->next != NULL) // while traversal node's next pointer is not NULL
{
trav = trav->next; // move to next node
}
if (trav->next == NULL) // if traversal node's next pointer is null
{
trav->next = new_n; // set new node to traversal node's next pointer
}
}
}
return true; // confirm successful load
}
bool check(char *word) // check if word exists in dictionary
{
unsigned int code = hash(word); // retrieve and store hash code
node *check = HASH_TABLE[code]; // set traversal node to hash location head
while (check != NULL) // while traversal node is not NULL
{
int check_true = strcasecmp(check->word, word); // compare traversal node's word to provided word argument
if (check_true == 0) // if a match is found, return true
{
return true;
}
else if (check_true != 0) // if no match, move to next node
{
check = check->next;
}
}
if (check == NULL) // if end of list is reached without a match, return false
return false;
}
bool unload(void) // unload dictionary from memory, free memory (CURRENTLY DEBUGGING, CHECKING ITERATION)
{
char *word = "FREE"; // DEBUG DEBUG DEBUG (changin all nodes' words to "FREE" to test iteration)
for (int i = 0; i < HASH_MAX; i++) // for every element in the hash table, HASH_MAX (50)
{
printf("LOCATION %02d:\n", i); // DEBUG DEBUG DEBUG (print current hash table location)
while (strcmp(HASH_TABLE[i]->word, word) != 0) // while the head node's word is not "FREE"
{
node *trav = HASH_TABLE[i]; // set traversal node to head
printf("HEAD WORD: %s\n", HASH_TABLE[i]->word); // DEBUG DEBUG DEBUG (print head word to confirm while condition)
while (strcmp(trav->next->word, word) != 0) // while the traversal node's word is not "FREE"
{
trav = trav->next; // move to next node
printf("."); // DEBUG DEBUG DEBUG (print a dot for every location skipped)
}
printf("\n"); // DEBUG DEBUG DEBUG
strcpy(trav->word, word); // set traversal node's word to "FREE"
printf("{"); // DEBUG DEBUG DEBUG
while (trav != NULL) // DEBUG DEBUG DEBUG (print hash location's current list of words)
{
printf("%s, ", trav->word); // DEBUG DEBUG DEBUG
}
printf("}\n\n"); // DEBUG DEBUG DEBUG
}
}
return true; // freed successfully
}
void print(void) // print hash table contents and node locations
{
for (int i = 0; i < HASH_MAX; i++) // for every element in the hash table
{
node *check = HASH_TABLE[i]; // set traversal node to current hash table element head
printf("LIST %02d: {", i); // print hash table element location
while (check != NULL) // for all nodes in the current linked list
{
printf("%s, ", check->word); // print traversal node's word
check = check->next; // move to next node
}
printf("}\n");
}
printf("\n");
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r"); // open dictionary file
while (!feof(dictionary)) // for all words in the dictionary
{
char word[LENGTH + 1]; // store next word
fscanf(dictionary, "%s", word); // scan for next word
unsigned int code = hash(word); // retrieve and store word's hash code
node *search = HASH_TABLE[code]; // set traversal node to hash location head
while (search != NULL) // for all nodes at that location, or until word is found
{
if (strcasecmp(search->word, word) == 0) // compare traversal node's word to scanned word (case insensitive)
{
printf("%s: %p\n", search->word, search); // print traversal node's word and location
break; // break while loop
}
else
{
search = search->next; // if traversal node's word does not match scanned word, move to next node
}
}
if (search == NULL) // if the scanned word matches none of the words in the hash location's linked list
printf("\"%s\" NOT FOUND\n", word); // word not found
}
fclose(dictionary); // close dictionary file
}
Caveat: chqrlie has pointed out many of the basic issues, but here's some refactored code.
Your main issue was that unload didn't actually remove the nodes.
One of things to note is that it's easier/faster/better to use tolower once per string.
If the lowercased version is what we store in the node, and we lowercase the search word in check, we can use strcmp instead of strcasecmp [which has to redo the lowercasing for both arguments on each loop iteration].
So, I've changed the hash function to lowercase its argument "in-place".
As I mentioned in my above comment, print was extraneously rereading the dictionary file. So, I've removed that code. If it were necessary to do this, it should go into [yet] another function, or load and/or check should be reused.
(i.e.) print should do one thing well [a programming maxim].
Personally, I dislike "sidebar" comments:
if (unload()) // if unloaded successfully (function call), print contents
I prefer the comment to go above the line:
// if unloaded successfully (function call), print contents
if (unload())
To me, this is much clearer and it helps prevent the line from going beyond 80 characters in width.
Certain fixed constants (e.g. HASH_MAX and LENGTH) are global variables. This prevents them from being used to define arrays
(e.g.) you couldn't say:
node *HASH_TABLE[HASH_MAX];
and had to "hardwire" it as:
node *HASH_TABLE[50];
If we define these with either #define or as an enum, then we can use the preferred definitions.
Doing something like:
for (int i = 0; i < strlen(word); i++)
increases the loop time from O(length) to O(length^2) because strlen is called "length" times inside the loop and it rescans the string each time.
Much better to do:
int len = strlen(word);
for (int i = 0; i < len; i++)
But even this has an extra scan of the buffer. It can be better is to do something like:
for (int chr = *word++; chr != 0; chr = *word++)
I've refactored the code with annotations for the bugs. Original code is bracketed inside a #if 0 block:
#if 0
// old/original code
#else
// new/refactored code
#endif
Anyway, here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#if 1
#include <ctype.h>
#endif
// Max elements in hash table
#if 0
unsigned int HASH_MAX = 50;
#else
enum {
HASH_MAX = 50
};
#endif
// Max length of word to be stored
#if 0
unsigned int LENGTH = 20;
#else
enum {
LENGTH = 20
};
#endif
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
#if 0
unsigned int hash(const char *word);
#else
unsigned int hash(char *word);
#endif
// load dictionary into memory
bool load(FILE *dictionary);
// check if word exists in dictionary
#if 0
bool check(char *word);
#else
bool check(const char *word);
#endif
// unload dictionary from memory, free memory (CURRENTLY DEBUGGING,
// CHECKING ITERATION)
bool unload(void);
// print table contents and node locations
void print(void);
// node structure: stored word, pointer to next node
typedef struct _node {
#if 0
char *word[20];
#else
char word[LENGTH + 1];
#endif
struct _node *next;
} node;
#if 0
node *HASH_TABLE[50];
#else
node *HASH_TABLE[HASH_MAX];
#endif
int
main(int argc, char *argv[])
{
// open dictionary file, read
#if 0
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r");
#else
FILE *dictionary = fopen("Dictionary.txt", "r");
#endif
// if dictionary is NULL, return error message, end program
if (!dictionary) {
printf("FILE NOT FOUND\n");
return 1;
}
// if dictionary loaded successfully (function call), close dictionary and
// print table contents
if (load(dictionary)) {
fclose(dictionary);
// print "LIST (number): {(name, address), ...}\n
print();
}
// test check function for word that does not exist in the library
char *checkword = "Albatross";
// test check function for word that does exist in the library
char *checkword2 = "Riku";
// return check results for checkword, found or not found
if (check(checkword)) {
printf("\n%s found\n", checkword);
}
else {
printf("\n%s not found\n", checkword);
}
// return check results for checkword2, found or not found
if (check(checkword2)) {
printf("\n%s found\n", checkword2);
}
else {
printf("\n%s not found\n", checkword2);
}
// if unloaded successfully (function call), print contents
if (unload()) {
// DEBUG DEBUG DEBUG (confirm unload function returned true)
printf("\nUNLOADED...\n\n");
print();
}
}
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int
hash(char *word)
{
// store converted word for uniform key
#if 0
char word_conv[LENGTH + 1];
#endif
// hash code
unsigned int code = 0;
#if 0
strcpy(word_conv, word);
// set all letters in the word to lower case
for (int i = 0; i < strlen(word); i++) {
word_conv[i] = tolower(word_conv[i]);
}
// for all letters in converted word, add ascii value to code and multiply by 3
for (int j = 0; j < strlen(word_conv); j++) {
code += word_conv[j];
code = code * 3;
}
#else
int chr;
while (1) {
chr = *word;
if (chr == 0)
break;
chr = tolower(chr);
*word++ = chr;
code += chr;
code *= 3;
}
#endif
// set code to remainder of current code divided by maximum hash table size
code = code % HASH_MAX;
return code;
}
// load dictionary into memory
bool
load(FILE * dictionary)
{
// store next word in the dictionary
char word[LENGTH + 1];
// until end of dictionary file
// NOTE/BUG: don't use feof
#if 0
while (!feof(dictionary)) {
// scan for next word
fscanf(dictionary, "%s", word);
#else
// scan for next word
while (fscanf(dictionary, "%s", word) == 1) {
#endif
// new node
node *new_n = malloc(sizeof(node));
// store scanned word in new node
strcpy(new_n->word, word);
// new node's next pointer set to NULL
new_n->next = NULL;
// retrieve and store hash code
unsigned int code = hash(new_n->word);
// NOTE/BUG: there's no need to append to the end of the list -- pushing
// on the front is adequate and is faster
#if 0
// if hash location has no head
if (HASH_TABLE[code] == NULL) {
// set new node to location head
HASH_TABLE[code] = new_n;
}
// if head already exists at hash location
else if (HASH_TABLE[code] != NULL) {
// set traversal node
node *trav = HASH_TABLE[code];
// while traversal node's next pointer is not NULL
while (trav->next != NULL) {
// move to next node
trav = trav->next;
}
// if traversal node's next pointer is null
if (trav->next == NULL) {
// set new node to traversal node's next pointer
trav->next = new_n;
}
}
#else
new_n->next = HASH_TABLE[code];
HASH_TABLE[code] = new_n;
#endif
}
// confirm successful load
return true;
}
// check if word exists in dictionary
#if 0
bool
check(char *word)
#else
bool
check(const char *arg)
#endif
{
char word[LENGTH + 1];
// retrieve and store hash code
#if 1
strcpy(word,arg);
#endif
unsigned int code = hash(word);
// set traversal node to hash location head
node *check = HASH_TABLE[code];
// while traversal node is not NULL
while (check != NULL) {
// compare traversal node's word to provided word argument
// NOTE/BUG: strcmp is faster than strcasecmp if we convert to lowercase _once_
#if 0
int check_true = strcasecmp(check->word, word);
#else
int check_true = strcmp(check->word, word);
#endif
#if 0
// if a match is found, return true
if (check_true == 0) {
return true;
}
// if no match, move to next node
else if (check_true != 0) {
check = check->next;
}
#else
if (check_true == 0)
return true;
check = check->next;
#endif
}
// if end of list is reached without a match, return false
#if 0
if (check == NULL)
return false;
#else
return false;
#endif
}
// unload dictionary from memory, free memory
// (CURRENTLY DEBUGGING, CHECKING ITERATION)
bool
unload(void)
{
// DEBUG DEBUG DEBUG (changin all nodes' words to "FREE" to test iteration)
#if 0
char *word = "FREE";
#endif
// for every element in the hash table, HASH_MAX (50)
for (int i = 0; i < HASH_MAX; i++) {
#if 0
// DEBUG DEBUG DEBUG (print current hash table location)
printf("LOCATION %02d:\n", i);
// while the head node's word is not "FREE"
while (strcmp(HASH_TABLE[i]->word, word) != 0) {
// set traversal node to head
node *trav = HASH_TABLE[i];
// DEBUG DEBUG DEBUG (print head word to confirm while condition)
printf("HEAD WORD: %s\n", HASH_TABLE[i]->word);
// while the traversal node's word is not "FREE"
while (strcmp(trav->next->word, word) != 0) {
// move to next node
trav = trav->next;
// DEBUG DEBUG DEBUG (print a dot for every location skipped)
printf(".");
}
// DEBUG DEBUG DEBUG
printf("\n");
// set traversal node's word to "FREE"
strcpy(trav->word, word);
// DEBUG DEBUG DEBUG
printf("{");
// DEBUG DEBUG DEBUG (print hash location's current list of words)
while (trav != NULL) {
// DEBUG DEBUG DEBUG
printf("%s, ", trav->word);
}
// DEBUG DEBUG DEBUG
printf("}\n\n");
}
#else
node *nxt;
for (node *cur = HASH_TABLE[i]; cur != NULL; cur = nxt) {
nxt = cur->next;
free(cur);
}
HASH_TABLE[i] = NULL;
#endif
}
// freed successfully
return true;
}
// print hash table contents and node locations
void
print(void)
{
// for every element in the hash table
for (int i = 0; i < HASH_MAX; i++) {
// set traversal node to current hash table element head
node *check = HASH_TABLE[i];
// print hash table element location
printf("LIST %02d: {", i);
// for all nodes in the current linked list
while (check != NULL) {
// print traversal node's word
printf("%s, ", check->word);
// move to next node
check = check->next;
}
printf("}\n");
}
printf("\n");
// NOTE/BUG: why reread dictionary after printing it?
#if 0
// open dictionary file
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r");
// for all words in the dictionary
while (!feof(dictionary)) {
// store next word
char word[LENGTH + 1];
// scan for next word
fscanf(dictionary, "%s", word);
// retrieve and store word's hash code
unsigned int code = hash(word);
// set traversal node to hash location head
node *search = HASH_TABLE[code];
// for all nodes at that location, or until word is found
while (search != NULL) {
// compare traversal node's word to scanned word (case insensitive)
if (strcasecmp(search->word, word) == 0) {
// print traversal node's word and location
printf("%s: %p\n", search->word, search);
// break while loop
break;
}
else {
// if traversal node's word does not match scanned word,
// move to next node
search = search->next;
}
}
// if the scanned word matches none of the words in the hash location's
// linked list
if (search == NULL)
// word not found
printf("\"%s\" NOT FOUND\n", word);
}
// close dictionary file
fclose(dictionary);
#endif
}
Here's a version that has the #if 0 blocks removed.
Also, I've added a slight reordering in the load function, so that it inputs the data directly into the final place inside the node element (i.e. eliminates the intermediate buffer and a strcpy)
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
// Max elements in hash table
enum {
HASH_MAX = 50
};
// Max length of word to be stored
enum {
LENGTH = 20
};
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int hash(char *word);
// load dictionary into memory
bool load(FILE *dictionary);
// check if word exists in dictionary
bool check(const char *word);
// unload dictionary from memory, free memory (CURRENTLY DEBUGGING,
// CHECKING ITERATION)
bool unload(void);
// print table contents and node locations
void print(void);
// node structure: stored word, pointer to next node
typedef struct _node {
char word[LENGTH + 1];
struct _node *next;
} node;
node *HASH_TABLE[HASH_MAX];
int
main(int argc, char *argv[])
{
// open dictionary file, read
FILE *dictionary = fopen("Dictionary.txt", "r");
// if dictionary is NULL, return error message, end program
if (!dictionary) {
printf("FILE NOT FOUND\n");
return 1;
}
// if dictionary loaded successfully (function call), close dictionary and
// print table contents
if (load(dictionary)) {
fclose(dictionary);
// print "LIST (number): {(name, address), ...}\n
print();
}
// test check function for word that does not exist in the library
char *checkword = "Albatross";
// test check function for word that does exist in the library
char *checkword2 = "Riku";
// return check results for checkword, found or not found
if (check(checkword)) {
printf("\n%s found\n", checkword);
}
else {
printf("\n%s not found\n", checkword);
}
// return check results for checkword2, found or not found
if (check(checkword2)) {
printf("\n%s found\n", checkword2);
}
else {
printf("\n%s not found\n", checkword2);
}
// if unloaded successfully (function call), print contents
if (unload()) {
// DEBUG DEBUG DEBUG (confirm unload function returned true)
printf("\nUNLOADED...\n\n");
print();
}
}
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int
hash(char *word)
{
// store converted word for uniform key
// hash code
unsigned int code = 0;
unsigned char chr;
while (1) {
chr = *word;
if (chr == 0)
break;
chr = tolower(chr);
*word++ = chr;
code += chr;
code *= 3;
}
// set code to remainder of current code divided by maximum hash table size
code = code % HASH_MAX;
return code;
}
// load dictionary into memory
bool
load(FILE *dictionary)
{
// scan for next word
while (1) {
// new node
node *new_n = malloc(sizeof(node));
if (fscanf(dictionary, "%s", new_n->word) != 1) {
free(new_n);
break;
}
// store scanned word in new node
new_n->next = NULL;
// retrieve and store hash code
unsigned int code = hash(new_n->word);
// pushing on the front of the list is adequate and is faster
new_n->next = HASH_TABLE[code];
HASH_TABLE[code] = new_n;
}
// confirm successful load
return true;
}
// check if word exists in dictionary
bool
check(const char *arg)
{
char word[LENGTH + 1];
// retrieve and store hash code
strcpy(word,arg);
unsigned int code = hash(word);
// set traversal node to hash location head
node *check = HASH_TABLE[code];
// while traversal node is not NULL
while (check != NULL) {
// compare traversal node's word to provided word argument
int check_true = strcmp(check->word, word);
if (check_true == 0)
return true;
check = check->next;
}
// if end of list is reached without a match, return false
return false;
}
// unload dictionary from memory, free memory
// (CURRENTLY DEBUGGING, CHECKING ITERATION)
bool
unload(void)
{
// for every element in the hash table, HASH_MAX (50)
for (int i = 0; i < HASH_MAX; i++) {
node *nxt;
for (node *cur = HASH_TABLE[i]; cur != NULL; cur = nxt) {
nxt = cur->next;
free(cur);
}
HASH_TABLE[i] = NULL;
}
// freed successfully
return true;
}
// print hash table contents and node locations
void
print(void)
{
// for every element in the hash table
for (int i = 0; i < HASH_MAX; i++) {
// set traversal node to current hash table element head
node *check = HASH_TABLE[i];
// print hash table element location
printf("LIST %02d: {", i);
// for all nodes in the current linked list
while (check != NULL) {
// print traversal node's word
printf("%s, ", check->word);
// move to next node
check = check->next;
}
printf("}\n");
}
printf("\n");
}
UPDATE:
Could you please explain for (int chr = *word++; chr != 0; chr = *word++)? I don't know what *word++ means in this context.
Sure. With chr = *word++; it means dereference word [a char pointer]. This fetches the char value pointed to by word (i.e. fetch the value from memory). Then, set this value into chr. Then, increment word [so it points to the next character in the array.
The statement is composed of three operators: = is the assignment operator. * is a dereference operator and ++ is a post-decrement operator.
Based on the precedence [and/or binding] of the operators, * has higher precedence [tighter binding], so it is performed first. The value is placed in chr. Then, ++ is performed on the value in word. It is as the following is performed as a single statement:
chr = *word;
word += 1;
chr = tolower(chr); should be chr = tolower((unsigned char)chr); for reasons explained in my answer. Alternatively, you could define chr as unsigned char chr;
I was under the impression that tolower et. al. were "self protective" of this (e.g. they did the unsigned char cast). But, the [linux] manpage says its UB if the value is out of range. I've edited the second example to use unsigned char chr;.
Strangely, for glibc's tolower, it has a range check built it that works on the int value and returns the original value (i.e. does not index into the translation table) if the value is out of range. This appears to be part of some BSD compatibility [the BSD manpage states it does a range check, but the feature is deprecated]. I'm guessing the glibc range check as added after the manpage was written.
To me, the macro should just do the cast itself [and the global function as well]. But, I think this might break the BSD compatibility.
But, now we're all hamstrung to the old way [or add a wrapper macro] because of backward compatibility.
it is confusing for hash to have a side effect on its argument and further confusing that this side effect be necessary for the strcmp in check to work.
The side effect is [probably] no more [or, perhaps, even less] egregious than what strtok does. That is, it's not modifying a hidden/unrelated global, etc.
IMO, it wouldn't be confusing if the effect were commented [I documented it in the answer text]. Perhaps renaming hash to something a bit more descriptive would help. We could do: take_hash_of_argument_that_we_modify_to_lowercase_first.
That would make the function name "self documenting" as some (e.g. "Uncle" Bob Martin(?)) might suggest member functions should be.
But, maybe hash_and_lowercase might be better. This might be a sufficient clue to the reader that they need to consult the API documentation for the function rather than assuming they know all about it from just the name.
The linked list traversal is much faster with strcmp, so, at a minimum [architecturally] we want to store lower case strings in the nodes. We don't want to repeat the lowercasing for each node on each scan. And, we don't want strcasecmp to repeat the lowercasing on word [and the string in the node] for each loop iteration.
As you say, we could have two functions. And we could still achieve this refactoring: a string based version of tolower that lowercases its argument and leave the hash as it was done originally.
Originally, I considered this approach. I soon realized that everywhere you did a hash, you wanted it to be on the lowercased string. We could achieve this with (e.g.):
strlower(word);
value = hash(word);
But, there wasn't a use case here for doing one of these calls separately--only in pairs.
So, given that, why scan the argument string twice and slow down the operation by 2x?
From JFK [after the failed Bay of Pigs invasion]: Mistakes are not errors if we admit them.
So, I'd paraphrase that as: Side effects are not errors if we document them.
There are multiple problems in your code:
the word member of the _node structure has the wrong type: it should just be an array of 20 characters, not an array of 20 char pointers. And dont use _node, identifiers starting with _ are reserved. Change the definition to:
typedef struct node { // node structure: stored word, pointer to next node
char word[LENGTH+1];
struct node *next;
} node;
your reading loops are incorrect: while (!feof(dictionary)) is not the proper test to detect the end of file, you should instead test if fscanf() successfully reads the next word:
while (fscanf(dictionary, "%s", word) == 1) // until end of dictionary file
Furthermore you should specify a maximum length for fscanf() to avoid undefined behavior on long words:
while (fscanf(dictionary, "%19s", word) == 1) // read at most 19 characters
You do not check for allocation failure.
There are many redundant tests such as else if (HASH_TABLE[code] != NULL) and if (trav->next == NULL) in load(), else if (check_true != 0) and if (check == NULL) in check().
You do not modify trav in the loop while (trav != NULL) in the DEBUG code, causing an infinite loop.
It is not difficult to free the dictionary in unload(), your iteration checking code is way too complicated, you already have correct iteration code for print(). Here is a simple example:
bool unload(void) { // unload dictionary from memory, free memory
for (int i = 0; i < HASH_MAX; i++) {
while (HASH_TABLE[i]) {
node *n = HASH_TABLE[i];
HASH_TABLE[i] = n->next;
free(n);
}
}
return true;
}
Note also that there is no need to store the converted word to compute the hash value, and char values must be cast as (unsigned char) to pass to tolower() because this function is only defined for the values of unsigned char and the special negative value EOF. char may be a signed type, so tolower(word[i]) has undefined behavior for extended characters.
unsigned int hash(const char *word) // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
{
unsigned int code = 0; // hash code
for (int i = 0; word[i] != '\0'; i++) {
// compute hashcode from lowercase letters
code = (code + tolower((unsigned char)word[i])) * 3;
}
code = code % HASH_MAX; // set code to remainder of current code divided by maximum hash table size
return code;
}
I am having trouble implementing my load and unload functions in pset5 of the cs50 class at Harvard. When I run it, I get a segmentation fault and when I run valgrind, it tells me that none of the nodes that I malloc'd at load were freed.
I've been trying to fix this for days, I've tried several different implementations for my unload function, but nothing's worked. I think the mistake might be in my load function. Would someone please please please help me with this one?
/****************************************************************************
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
***************************************************************************/
#include <stdbool.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "dictionary.h"
#define HASHTABLE_SIZE 5000
// create word counter for size
int wordCount = 0;
// linked link struct
typedef struct node
{
// word's length + NULL character
char word[LENGTH + 1];
struct node* next;
}
node;
// Hashtable array
node* hashtable[HASHTABLE_SIZE];
// hash function from study.cs50.net
int hash_function(char* key)
{
// initialize index to 0
int index = 0;
// sum ascii values
for (int i = 0; key[i] != 0; i++)
{
index += toupper(key[i]) - 'A';
}
return index % HASHTABLE_SIZE;
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// create variable to hold word
char temp[LENGTH + 1];
// convert every character in word to lowercase
for (int i = 0, n = strlen(word); i < n; i++)
{
if (isalpha(word[i]))
{
temp[i] = tolower(word[i]);
}
}
// get hashed word's index
int hash_index = hash_function(temp);
// find head of that index
node* head = hashtable[hash_index];
// traverse through linked list
for (node* cur = head; cur != NULL; cur = cur->next)
{
// find if linnked list contains word
if (strcmp(cur->word, word) == 0)
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// // open file
FILE* file = fopen(dictionary, "r");
// check if file exists
if (file == NULL)
{
return false;
}
// word length plus NULL character
char word[LENGTH + 1];
// iterate through every word of the dictionary
while (fscanf(file, "%s\n", word) != EOF) // Source: http://stackoverflow.com/questions/6275558/question-about-whileeof
{
node* new_node = malloc(sizeof(node));
if (new_node == NULL)
{
return false;
}
wordCount++;
strcpy(new_node->word, word); // Source: cs50 reddit
int hash_index = hash_function(new_node->word);
// check whether node should be head
if (hashtable[hash_index] == NULL)
{
hashtable[hash_index] = new_node;
new_node->next = NULL;
}
else
{
new_node->next = hashtable[hash_index];
hashtable[hash_index] = new_node;
}
}
// close file
fclose(file);
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// go through all of the indexes in the hashtable
for (int i = 0; i < HASHTABLE_SIZE; i++)
{
node* head = hashtable[i];
while (head != NULL)
{
node* ptr = head->next;
free(head);
head = ptr;
}
}
return true;
}
Your unload function is good. The problem with your code is the check function, notably the part where you try to convert the input to lower case:
char temp[LENGTH + 1];
for (int i = 0, n = strlen(word); i < n; i++)
{
if (isalpha(word[i]))
{
temp[i] = tolower(word[i]);
}
}
There are two issues here. First, temp is not null-terminated. Second, the check for isalpha means you could leave characters uninitialised: If your input is, say, "I'm", temp will hold 'I', garbage, 'm', garbage when it should hold 'I', ' \'', 'm', '\0', garbage.
Alternatively, you can filter out unwanted characters. In that case, you need two indices: one for the source word, another for the filtered word.
But you don't even need this additional step, because you hash function converts the input to toupper again.
Speaking of your hash function: You might want to pick a better one. The current one doesn't distribute the values well over the 5000 slots. (How are you even going to reach 5000 when you add, what?, up to 20 numbers between 0 and 25?)
The hash also has another problem: If you input a number, the contributing "letters" are negative, because in ASCII, numbers have values from 48 to 57 and you subtract the value of 'A', 65, from them. In general, your hash function should return an unsigned value.
First post, extremely limited in coding knowledge and new to C. Be gentle! I am at the point where "trying" different things is just confusing me more and more. I need someone's correct guidance!
This particular problem is from an online edX course I am attempting which ultimately when implemented correctly, checks a given word read in from a text file (the 'check' function) and compares it to each word read into (from the 'load' function) a linked list of structs.
I believe I have the load function implemented correctly as when I use gdb, as I am seeing what I anticipate as I step through it, but my question and my problem relates specifically to the check function. I still have a lot to implement to finish my code but while testing with gdb, I am not seeing values of the char* member of the struct correspond with what I anticipate I should see.
When using gdb and stepping through the 'check' function and trying to access the dword member of the struct nodes in the linked list I created in the load function, I anticipate I should see a string for the char* member. For instance, I anticipate the word "cat" assigned to current->dword , but am instead seeing in gdb when I test:
~(gdb) print current->dword
$13 = 0xbfffede2 "\004\b\214\365\372D\300\355\377\277"
My thoughts are that I'm still only accessing an address somehow and not the actual value, but I'm oblivious as to why this is. When the node is created in the load function, a value is assigned to the dword member correctly (at least as far as I can tell while stepping through the code in gdb) but doesn't seem to be accessed correctly in the check function. Any help for a newbie would be appreciated!
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dictionary.h"
typedef struct node
{
char* dword;
struct node* next;
}
node;
// keep track of #of words in dictionary loaded
int wordCounter = 0;
// create root for hash table
node* root[26];
// create cursor to keep place in creating, pointing, and traversing through nodes
node* current = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// size of word read into buffer
int wordSize = sizeof(word);
// prepare to make a new lowercase only word for comparison to lowercase only dictionary
char bufWord[wordSize];
// make it
for(int i = 0; i < wordSize; i++)
{
if (i == wordSize - 1)
{
bufWord[i] = '\0';
}
else
{
bufWord[i] = tolower(word[i]);
}
}
// hash word to achieve proper root node location
int hash = bufWord[0] - 97;
// point to the correct root node to begin traversing
current = root[hash];
// make sure there is even a word in hash table location
if(root[hash] == NULL)
{
return false;
}
else if(root[hash] != NULL)
{
// progress through the nodes until the last node's next pointer member is NULL
while(current != NULL)
{
// compare 1st letter only of current->dword[i] to bufWord[i] to save time
// if they don't match, return false
// if they do match then continue
\
char dictWord[wordSize];
// hold copy of struct member value to compare to dictWord
char* wordTemp = current->dword;
//
for(int i = 0; i < wordSize; i++)
{
dictWord[i] = wordTemp[i];
}
// do a spell check
if(strcmp(bufWord, dictWord) == 0)
{
return true;
}
else
{
// set current to the next node if any or NULL if it's already the last node in the list
current = current->next;
}
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// buffer for reading in dictionary words
char wordIn[LENGTH + 1];
// open the dictionary file
FILE* newDict = fopen(dictionary, "r");
for (int i = 0; i < 27; i++)
{
root[i] = NULL;
}
// while there are words to read
while(fscanf(newDict, "%s ", wordIn) > 0)
{
// keep track of #of words for constant time read in size function
wordCounter++;
// hash the first letter for the location in root
int hash = wordIn[0] - 97;
// malloc space for a new node
node* newNode = malloc(sizeof(node));
// error check
if (newNode == NULL)
{
return false;
}
// set value member of node to current word
newNode->dword = wordIn;
// first insertion into linked list if that root node has not been used yet
if(root[hash] == NULL)
{
// sets to NULL
newNode->next = root[hash];
// link it
root[hash] = newNode;
}
else if(root[hash] != NULL)
{
// starts at the root
node* current = root[hash];
// insert into new beginning of list
newNode->next = current;
root[hash] = newNode;
}
}
fclose(newDict);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCounter;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
The source of your problem is the line:
newNode->dword = wordIn;
wordIn is a local array in load. You are storing the address of wordIn in the dword of your nodes. When you return from load, those addresses are no valid any longer.
What you need to do is allocate memory for the string in wordIn, assign the allocated memory to newNode->dword and copy the contents of wordIn to newNode->dword.
If your platform provides the non-standard function strdup, you can change the above line to:
newNode->dword = strdup(wordIn);
If not, it is easily implemented:
char* strdup(char const* in)
{
char* r = malloc(strlen(in)+1);
strcpy(r, in);
return r;
}