I'm working on CS50's Week 5 assignment, Speller. I'm building my functions one at a time, and I'm running into problems with my unload function (Line 151). Right now, I'm just testing the iteration in a way that prints results before I use that iteration to free each of the nodes. I'm doing this by changing each node's word to "FREE" in the order these nodes are to be freed.
The function call (Line 60) returns true, and the printf command prints successfully. However, everything in the unload function itself is being ignored. None of the printf lines that I added to see its progress (DEBUG DEBUG DEBUG) are printing. The print() function call on line 63 should be printing the table with all of the words set to "FREE", and all dictionary word locations showing "NOT FOUND". Instead, it's printing the list and locations completely unaltered, and with none of the DEBUG print commands within the for loop (Line 155) triggering.
I don't understand why this is happening. The unload() function call alone, whether or not it returns true, should still at least trigger the first printf command in the for loop (Line 157). But even that is skipped.
Can someone please help me understand why the function is returning true, yet making none of the changes it's supposed to? Thanks in advance.
EDIT: Okay, I was told that I wasn't calling the unload function correctly on line 60. I've since corrected that. Now it will print out "LOCATION 00:", but it ends as soon as it hits that first while loop on line 158. I was having this problem before, and I'm not sure why it's doing this. strcmp() should see that the head node's word does not match "FREE" until it makes the change from the end of the list to the beginning. Why is the while loop not even triggering?
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
unsigned int HASH_MAX = 50; // Max elements in hash table
unsigned int LENGTH = 20; // Max length of word to be stored
unsigned int hash(const char *word); // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
bool load(FILE *dictionary); // load dictionary into memory
bool check(char *word); // check if word exists in dictionary
bool unload(void); // unload dictionary from memory, free memory (CURRENTLY DEBUGGING, CHECKING ITERATION)
void print(void); // print table contents and node locations
typedef struct _node // node structure: stored word, pointer to next node
{
char *word[20];
struct _node *next;
} node;
node *HASH_TABLE[50];
int main(int argc, char *argv[])
{
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r"); // open dictionary file, read
if (!dictionary) // if dictionary is NULL, return error message, end program
{
printf("FILE NOT FOUND\n");
return 1;
}
if (load(dictionary)) // if dictionary loaded successfully (function call), close dictionary and print table contents
{
fclose(dictionary);
print(); // print "LIST (number): {(name, address), ...}\n
}
char *checkword = "Albatross"; // test check function for word that does not exist in the library
char *checkword2 = "Riku"; // test check function for word that does exist in the library
if (check(checkword)) // return check results for checkword, found or not found
{
printf("\n%s found\n", checkword);
}
else
{
printf("\n%s not found\n", checkword);
}
if (check(checkword2)) // return check results for checkword2, found or not found
{
printf("\n%s found\n", checkword2);
}
else
{
printf("\n%s not found\n", checkword2);
}
if (unload()) // if unloaded successfully (function call), print contents
{
printf("\nUNLOADED...\n\n"); // DEBUG DEBUG DEBUG (confirm unload function returned true)
print();
}
}
unsigned int hash(const char *word) // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
{
char word_conv[LENGTH + 1]; // store converted word for uniform key
unsigned int code = 0; // hash code
strcpy(word_conv, word);
for (int i = 0; i < strlen(word); i++) // set all letters in the word to lower case
{
word_conv[i] = tolower(word_conv[i]);
}
for (int j = 0; j < strlen(word_conv); j++) // for all letters in converted word, add ascii value to code and multiply by 3
{
code += word_conv[j];
code = code * 3;
}
code = code % HASH_MAX; // set code to remainder of current code divided by maximum hash table size
return code;
}
bool load(FILE *dictionary) // load dictionary into memory
{
char word[LENGTH+1]; // store next word in the dictionary
while (!feof(dictionary)) // until end of dictionary file
{
fscanf(dictionary, "%s", word); // scan for next word
node *new_n = malloc(sizeof(node)); // new node
strcpy(new_n->word, word); // store scanned word in new node
new_n->next = NULL; // new node's next pointer set to NULL
unsigned int code = hash(word); // retrieve and store hash code
if (HASH_TABLE[code] == NULL) // if hash location has no head
{
HASH_TABLE[code] = new_n; // set new node to location head
}
else if (HASH_TABLE[code] != NULL) // if head already exists at hash location
{
node *trav = HASH_TABLE[code]; // set traversal node
while (trav->next != NULL) // while traversal node's next pointer is not NULL
{
trav = trav->next; // move to next node
}
if (trav->next == NULL) // if traversal node's next pointer is null
{
trav->next = new_n; // set new node to traversal node's next pointer
}
}
}
return true; // confirm successful load
}
bool check(char *word) // check if word exists in dictionary
{
unsigned int code = hash(word); // retrieve and store hash code
node *check = HASH_TABLE[code]; // set traversal node to hash location head
while (check != NULL) // while traversal node is not NULL
{
int check_true = strcasecmp(check->word, word); // compare traversal node's word to provided word argument
if (check_true == 0) // if a match is found, return true
{
return true;
}
else if (check_true != 0) // if no match, move to next node
{
check = check->next;
}
}
if (check == NULL) // if end of list is reached without a match, return false
return false;
}
bool unload(void) // unload dictionary from memory, free memory (CURRENTLY DEBUGGING, CHECKING ITERATION)
{
char *word = "FREE"; // DEBUG DEBUG DEBUG (changin all nodes' words to "FREE" to test iteration)
for (int i = 0; i < HASH_MAX; i++) // for every element in the hash table, HASH_MAX (50)
{
printf("LOCATION %02d:\n", i); // DEBUG DEBUG DEBUG (print current hash table location)
while (strcmp(HASH_TABLE[i]->word, word) != 0) // while the head node's word is not "FREE"
{
node *trav = HASH_TABLE[i]; // set traversal node to head
printf("HEAD WORD: %s\n", HASH_TABLE[i]->word); // DEBUG DEBUG DEBUG (print head word to confirm while condition)
while (strcmp(trav->next->word, word) != 0) // while the traversal node's word is not "FREE"
{
trav = trav->next; // move to next node
printf("."); // DEBUG DEBUG DEBUG (print a dot for every location skipped)
}
printf("\n"); // DEBUG DEBUG DEBUG
strcpy(trav->word, word); // set traversal node's word to "FREE"
printf("{"); // DEBUG DEBUG DEBUG
while (trav != NULL) // DEBUG DEBUG DEBUG (print hash location's current list of words)
{
printf("%s, ", trav->word); // DEBUG DEBUG DEBUG
}
printf("}\n\n"); // DEBUG DEBUG DEBUG
}
}
return true; // freed successfully
}
void print(void) // print hash table contents and node locations
{
for (int i = 0; i < HASH_MAX; i++) // for every element in the hash table
{
node *check = HASH_TABLE[i]; // set traversal node to current hash table element head
printf("LIST %02d: {", i); // print hash table element location
while (check != NULL) // for all nodes in the current linked list
{
printf("%s, ", check->word); // print traversal node's word
check = check->next; // move to next node
}
printf("}\n");
}
printf("\n");
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r"); // open dictionary file
while (!feof(dictionary)) // for all words in the dictionary
{
char word[LENGTH + 1]; // store next word
fscanf(dictionary, "%s", word); // scan for next word
unsigned int code = hash(word); // retrieve and store word's hash code
node *search = HASH_TABLE[code]; // set traversal node to hash location head
while (search != NULL) // for all nodes at that location, or until word is found
{
if (strcasecmp(search->word, word) == 0) // compare traversal node's word to scanned word (case insensitive)
{
printf("%s: %p\n", search->word, search); // print traversal node's word and location
break; // break while loop
}
else
{
search = search->next; // if traversal node's word does not match scanned word, move to next node
}
}
if (search == NULL) // if the scanned word matches none of the words in the hash location's linked list
printf("\"%s\" NOT FOUND\n", word); // word not found
}
fclose(dictionary); // close dictionary file
}
Caveat: chqrlie has pointed out many of the basic issues, but here's some refactored code.
Your main issue was that unload didn't actually remove the nodes.
One of things to note is that it's easier/faster/better to use tolower once per string.
If the lowercased version is what we store in the node, and we lowercase the search word in check, we can use strcmp instead of strcasecmp [which has to redo the lowercasing for both arguments on each loop iteration].
So, I've changed the hash function to lowercase its argument "in-place".
As I mentioned in my above comment, print was extraneously rereading the dictionary file. So, I've removed that code. If it were necessary to do this, it should go into [yet] another function, or load and/or check should be reused.
(i.e.) print should do one thing well [a programming maxim].
Personally, I dislike "sidebar" comments:
if (unload()) // if unloaded successfully (function call), print contents
I prefer the comment to go above the line:
// if unloaded successfully (function call), print contents
if (unload())
To me, this is much clearer and it helps prevent the line from going beyond 80 characters in width.
Certain fixed constants (e.g. HASH_MAX and LENGTH) are global variables. This prevents them from being used to define arrays
(e.g.) you couldn't say:
node *HASH_TABLE[HASH_MAX];
and had to "hardwire" it as:
node *HASH_TABLE[50];
If we define these with either #define or as an enum, then we can use the preferred definitions.
Doing something like:
for (int i = 0; i < strlen(word); i++)
increases the loop time from O(length) to O(length^2) because strlen is called "length" times inside the loop and it rescans the string each time.
Much better to do:
int len = strlen(word);
for (int i = 0; i < len; i++)
But even this has an extra scan of the buffer. It can be better is to do something like:
for (int chr = *word++; chr != 0; chr = *word++)
I've refactored the code with annotations for the bugs. Original code is bracketed inside a #if 0 block:
#if 0
// old/original code
#else
// new/refactored code
#endif
Anyway, here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#if 1
#include <ctype.h>
#endif
// Max elements in hash table
#if 0
unsigned int HASH_MAX = 50;
#else
enum {
HASH_MAX = 50
};
#endif
// Max length of word to be stored
#if 0
unsigned int LENGTH = 20;
#else
enum {
LENGTH = 20
};
#endif
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
#if 0
unsigned int hash(const char *word);
#else
unsigned int hash(char *word);
#endif
// load dictionary into memory
bool load(FILE *dictionary);
// check if word exists in dictionary
#if 0
bool check(char *word);
#else
bool check(const char *word);
#endif
// unload dictionary from memory, free memory (CURRENTLY DEBUGGING,
// CHECKING ITERATION)
bool unload(void);
// print table contents and node locations
void print(void);
// node structure: stored word, pointer to next node
typedef struct _node {
#if 0
char *word[20];
#else
char word[LENGTH + 1];
#endif
struct _node *next;
} node;
#if 0
node *HASH_TABLE[50];
#else
node *HASH_TABLE[HASH_MAX];
#endif
int
main(int argc, char *argv[])
{
// open dictionary file, read
#if 0
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r");
#else
FILE *dictionary = fopen("Dictionary.txt", "r");
#endif
// if dictionary is NULL, return error message, end program
if (!dictionary) {
printf("FILE NOT FOUND\n");
return 1;
}
// if dictionary loaded successfully (function call), close dictionary and
// print table contents
if (load(dictionary)) {
fclose(dictionary);
// print "LIST (number): {(name, address), ...}\n
print();
}
// test check function for word that does not exist in the library
char *checkword = "Albatross";
// test check function for word that does exist in the library
char *checkword2 = "Riku";
// return check results for checkword, found or not found
if (check(checkword)) {
printf("\n%s found\n", checkword);
}
else {
printf("\n%s not found\n", checkword);
}
// return check results for checkword2, found or not found
if (check(checkword2)) {
printf("\n%s found\n", checkword2);
}
else {
printf("\n%s not found\n", checkword2);
}
// if unloaded successfully (function call), print contents
if (unload()) {
// DEBUG DEBUG DEBUG (confirm unload function returned true)
printf("\nUNLOADED...\n\n");
print();
}
}
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int
hash(char *word)
{
// store converted word for uniform key
#if 0
char word_conv[LENGTH + 1];
#endif
// hash code
unsigned int code = 0;
#if 0
strcpy(word_conv, word);
// set all letters in the word to lower case
for (int i = 0; i < strlen(word); i++) {
word_conv[i] = tolower(word_conv[i]);
}
// for all letters in converted word, add ascii value to code and multiply by 3
for (int j = 0; j < strlen(word_conv); j++) {
code += word_conv[j];
code = code * 3;
}
#else
int chr;
while (1) {
chr = *word;
if (chr == 0)
break;
chr = tolower(chr);
*word++ = chr;
code += chr;
code *= 3;
}
#endif
// set code to remainder of current code divided by maximum hash table size
code = code % HASH_MAX;
return code;
}
// load dictionary into memory
bool
load(FILE * dictionary)
{
// store next word in the dictionary
char word[LENGTH + 1];
// until end of dictionary file
// NOTE/BUG: don't use feof
#if 0
while (!feof(dictionary)) {
// scan for next word
fscanf(dictionary, "%s", word);
#else
// scan for next word
while (fscanf(dictionary, "%s", word) == 1) {
#endif
// new node
node *new_n = malloc(sizeof(node));
// store scanned word in new node
strcpy(new_n->word, word);
// new node's next pointer set to NULL
new_n->next = NULL;
// retrieve and store hash code
unsigned int code = hash(new_n->word);
// NOTE/BUG: there's no need to append to the end of the list -- pushing
// on the front is adequate and is faster
#if 0
// if hash location has no head
if (HASH_TABLE[code] == NULL) {
// set new node to location head
HASH_TABLE[code] = new_n;
}
// if head already exists at hash location
else if (HASH_TABLE[code] != NULL) {
// set traversal node
node *trav = HASH_TABLE[code];
// while traversal node's next pointer is not NULL
while (trav->next != NULL) {
// move to next node
trav = trav->next;
}
// if traversal node's next pointer is null
if (trav->next == NULL) {
// set new node to traversal node's next pointer
trav->next = new_n;
}
}
#else
new_n->next = HASH_TABLE[code];
HASH_TABLE[code] = new_n;
#endif
}
// confirm successful load
return true;
}
// check if word exists in dictionary
#if 0
bool
check(char *word)
#else
bool
check(const char *arg)
#endif
{
char word[LENGTH + 1];
// retrieve and store hash code
#if 1
strcpy(word,arg);
#endif
unsigned int code = hash(word);
// set traversal node to hash location head
node *check = HASH_TABLE[code];
// while traversal node is not NULL
while (check != NULL) {
// compare traversal node's word to provided word argument
// NOTE/BUG: strcmp is faster than strcasecmp if we convert to lowercase _once_
#if 0
int check_true = strcasecmp(check->word, word);
#else
int check_true = strcmp(check->word, word);
#endif
#if 0
// if a match is found, return true
if (check_true == 0) {
return true;
}
// if no match, move to next node
else if (check_true != 0) {
check = check->next;
}
#else
if (check_true == 0)
return true;
check = check->next;
#endif
}
// if end of list is reached without a match, return false
#if 0
if (check == NULL)
return false;
#else
return false;
#endif
}
// unload dictionary from memory, free memory
// (CURRENTLY DEBUGGING, CHECKING ITERATION)
bool
unload(void)
{
// DEBUG DEBUG DEBUG (changin all nodes' words to "FREE" to test iteration)
#if 0
char *word = "FREE";
#endif
// for every element in the hash table, HASH_MAX (50)
for (int i = 0; i < HASH_MAX; i++) {
#if 0
// DEBUG DEBUG DEBUG (print current hash table location)
printf("LOCATION %02d:\n", i);
// while the head node's word is not "FREE"
while (strcmp(HASH_TABLE[i]->word, word) != 0) {
// set traversal node to head
node *trav = HASH_TABLE[i];
// DEBUG DEBUG DEBUG (print head word to confirm while condition)
printf("HEAD WORD: %s\n", HASH_TABLE[i]->word);
// while the traversal node's word is not "FREE"
while (strcmp(trav->next->word, word) != 0) {
// move to next node
trav = trav->next;
// DEBUG DEBUG DEBUG (print a dot for every location skipped)
printf(".");
}
// DEBUG DEBUG DEBUG
printf("\n");
// set traversal node's word to "FREE"
strcpy(trav->word, word);
// DEBUG DEBUG DEBUG
printf("{");
// DEBUG DEBUG DEBUG (print hash location's current list of words)
while (trav != NULL) {
// DEBUG DEBUG DEBUG
printf("%s, ", trav->word);
}
// DEBUG DEBUG DEBUG
printf("}\n\n");
}
#else
node *nxt;
for (node *cur = HASH_TABLE[i]; cur != NULL; cur = nxt) {
nxt = cur->next;
free(cur);
}
HASH_TABLE[i] = NULL;
#endif
}
// freed successfully
return true;
}
// print hash table contents and node locations
void
print(void)
{
// for every element in the hash table
for (int i = 0; i < HASH_MAX; i++) {
// set traversal node to current hash table element head
node *check = HASH_TABLE[i];
// print hash table element location
printf("LIST %02d: {", i);
// for all nodes in the current linked list
while (check != NULL) {
// print traversal node's word
printf("%s, ", check->word);
// move to next node
check = check->next;
}
printf("}\n");
}
printf("\n");
// NOTE/BUG: why reread dictionary after printing it?
#if 0
// open dictionary file
FILE *dictionary = fopen("C:/Users/aaron/Desktop/Dictionary.txt", "r");
// for all words in the dictionary
while (!feof(dictionary)) {
// store next word
char word[LENGTH + 1];
// scan for next word
fscanf(dictionary, "%s", word);
// retrieve and store word's hash code
unsigned int code = hash(word);
// set traversal node to hash location head
node *search = HASH_TABLE[code];
// for all nodes at that location, or until word is found
while (search != NULL) {
// compare traversal node's word to scanned word (case insensitive)
if (strcasecmp(search->word, word) == 0) {
// print traversal node's word and location
printf("%s: %p\n", search->word, search);
// break while loop
break;
}
else {
// if traversal node's word does not match scanned word,
// move to next node
search = search->next;
}
}
// if the scanned word matches none of the words in the hash location's
// linked list
if (search == NULL)
// word not found
printf("\"%s\" NOT FOUND\n", word);
}
// close dictionary file
fclose(dictionary);
#endif
}
Here's a version that has the #if 0 blocks removed.
Also, I've added a slight reordering in the load function, so that it inputs the data directly into the final place inside the node element (i.e. eliminates the intermediate buffer and a strcpy)
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
// Max elements in hash table
enum {
HASH_MAX = 50
};
// Max length of word to be stored
enum {
LENGTH = 20
};
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int hash(char *word);
// load dictionary into memory
bool load(FILE *dictionary);
// check if word exists in dictionary
bool check(const char *word);
// unload dictionary from memory, free memory (CURRENTLY DEBUGGING,
// CHECKING ITERATION)
bool unload(void);
// print table contents and node locations
void print(void);
// node structure: stored word, pointer to next node
typedef struct _node {
char word[LENGTH + 1];
struct _node *next;
} node;
node *HASH_TABLE[HASH_MAX];
int
main(int argc, char *argv[])
{
// open dictionary file, read
FILE *dictionary = fopen("Dictionary.txt", "r");
// if dictionary is NULL, return error message, end program
if (!dictionary) {
printf("FILE NOT FOUND\n");
return 1;
}
// if dictionary loaded successfully (function call), close dictionary and
// print table contents
if (load(dictionary)) {
fclose(dictionary);
// print "LIST (number): {(name, address), ...}\n
print();
}
// test check function for word that does not exist in the library
char *checkword = "Albatross";
// test check function for word that does exist in the library
char *checkword2 = "Riku";
// return check results for checkword, found or not found
if (check(checkword)) {
printf("\n%s found\n", checkword);
}
else {
printf("\n%s not found\n", checkword);
}
// return check results for checkword2, found or not found
if (check(checkword2)) {
printf("\n%s found\n", checkword2);
}
else {
printf("\n%s not found\n", checkword2);
}
// if unloaded successfully (function call), print contents
if (unload()) {
// DEBUG DEBUG DEBUG (confirm unload function returned true)
printf("\nUNLOADED...\n\n");
print();
}
}
// assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
unsigned int
hash(char *word)
{
// store converted word for uniform key
// hash code
unsigned int code = 0;
unsigned char chr;
while (1) {
chr = *word;
if (chr == 0)
break;
chr = tolower(chr);
*word++ = chr;
code += chr;
code *= 3;
}
// set code to remainder of current code divided by maximum hash table size
code = code % HASH_MAX;
return code;
}
// load dictionary into memory
bool
load(FILE *dictionary)
{
// scan for next word
while (1) {
// new node
node *new_n = malloc(sizeof(node));
if (fscanf(dictionary, "%s", new_n->word) != 1) {
free(new_n);
break;
}
// store scanned word in new node
new_n->next = NULL;
// retrieve and store hash code
unsigned int code = hash(new_n->word);
// pushing on the front of the list is adequate and is faster
new_n->next = HASH_TABLE[code];
HASH_TABLE[code] = new_n;
}
// confirm successful load
return true;
}
// check if word exists in dictionary
bool
check(const char *arg)
{
char word[LENGTH + 1];
// retrieve and store hash code
strcpy(word,arg);
unsigned int code = hash(word);
// set traversal node to hash location head
node *check = HASH_TABLE[code];
// while traversal node is not NULL
while (check != NULL) {
// compare traversal node's word to provided word argument
int check_true = strcmp(check->word, word);
if (check_true == 0)
return true;
check = check->next;
}
// if end of list is reached without a match, return false
return false;
}
// unload dictionary from memory, free memory
// (CURRENTLY DEBUGGING, CHECKING ITERATION)
bool
unload(void)
{
// for every element in the hash table, HASH_MAX (50)
for (int i = 0; i < HASH_MAX; i++) {
node *nxt;
for (node *cur = HASH_TABLE[i]; cur != NULL; cur = nxt) {
nxt = cur->next;
free(cur);
}
HASH_TABLE[i] = NULL;
}
// freed successfully
return true;
}
// print hash table contents and node locations
void
print(void)
{
// for every element in the hash table
for (int i = 0; i < HASH_MAX; i++) {
// set traversal node to current hash table element head
node *check = HASH_TABLE[i];
// print hash table element location
printf("LIST %02d: {", i);
// for all nodes in the current linked list
while (check != NULL) {
// print traversal node's word
printf("%s, ", check->word);
// move to next node
check = check->next;
}
printf("}\n");
}
printf("\n");
}
UPDATE:
Could you please explain for (int chr = *word++; chr != 0; chr = *word++)? I don't know what *word++ means in this context.
Sure. With chr = *word++; it means dereference word [a char pointer]. This fetches the char value pointed to by word (i.e. fetch the value from memory). Then, set this value into chr. Then, increment word [so it points to the next character in the array.
The statement is composed of three operators: = is the assignment operator. * is a dereference operator and ++ is a post-decrement operator.
Based on the precedence [and/or binding] of the operators, * has higher precedence [tighter binding], so it is performed first. The value is placed in chr. Then, ++ is performed on the value in word. It is as the following is performed as a single statement:
chr = *word;
word += 1;
chr = tolower(chr); should be chr = tolower((unsigned char)chr); for reasons explained in my answer. Alternatively, you could define chr as unsigned char chr;
I was under the impression that tolower et. al. were "self protective" of this (e.g. they did the unsigned char cast). But, the [linux] manpage says its UB if the value is out of range. I've edited the second example to use unsigned char chr;.
Strangely, for glibc's tolower, it has a range check built it that works on the int value and returns the original value (i.e. does not index into the translation table) if the value is out of range. This appears to be part of some BSD compatibility [the BSD manpage states it does a range check, but the feature is deprecated]. I'm guessing the glibc range check as added after the manpage was written.
To me, the macro should just do the cast itself [and the global function as well]. But, I think this might break the BSD compatibility.
But, now we're all hamstrung to the old way [or add a wrapper macro] because of backward compatibility.
it is confusing for hash to have a side effect on its argument and further confusing that this side effect be necessary for the strcmp in check to work.
The side effect is [probably] no more [or, perhaps, even less] egregious than what strtok does. That is, it's not modifying a hidden/unrelated global, etc.
IMO, it wouldn't be confusing if the effect were commented [I documented it in the answer text]. Perhaps renaming hash to something a bit more descriptive would help. We could do: take_hash_of_argument_that_we_modify_to_lowercase_first.
That would make the function name "self documenting" as some (e.g. "Uncle" Bob Martin(?)) might suggest member functions should be.
But, maybe hash_and_lowercase might be better. This might be a sufficient clue to the reader that they need to consult the API documentation for the function rather than assuming they know all about it from just the name.
The linked list traversal is much faster with strcmp, so, at a minimum [architecturally] we want to store lower case strings in the nodes. We don't want to repeat the lowercasing for each node on each scan. And, we don't want strcasecmp to repeat the lowercasing on word [and the string in the node] for each loop iteration.
As you say, we could have two functions. And we could still achieve this refactoring: a string based version of tolower that lowercases its argument and leave the hash as it was done originally.
Originally, I considered this approach. I soon realized that everywhere you did a hash, you wanted it to be on the lowercased string. We could achieve this with (e.g.):
strlower(word);
value = hash(word);
But, there wasn't a use case here for doing one of these calls separately--only in pairs.
So, given that, why scan the argument string twice and slow down the operation by 2x?
From JFK [after the failed Bay of Pigs invasion]: Mistakes are not errors if we admit them.
So, I'd paraphrase that as: Side effects are not errors if we document them.
There are multiple problems in your code:
the word member of the _node structure has the wrong type: it should just be an array of 20 characters, not an array of 20 char pointers. And dont use _node, identifiers starting with _ are reserved. Change the definition to:
typedef struct node { // node structure: stored word, pointer to next node
char word[LENGTH+1];
struct node *next;
} node;
your reading loops are incorrect: while (!feof(dictionary)) is not the proper test to detect the end of file, you should instead test if fscanf() successfully reads the next word:
while (fscanf(dictionary, "%s", word) == 1) // until end of dictionary file
Furthermore you should specify a maximum length for fscanf() to avoid undefined behavior on long words:
while (fscanf(dictionary, "%19s", word) == 1) // read at most 19 characters
You do not check for allocation failure.
There are many redundant tests such as else if (HASH_TABLE[code] != NULL) and if (trav->next == NULL) in load(), else if (check_true != 0) and if (check == NULL) in check().
You do not modify trav in the loop while (trav != NULL) in the DEBUG code, causing an infinite loop.
It is not difficult to free the dictionary in unload(), your iteration checking code is way too complicated, you already have correct iteration code for print(). Here is a simple example:
bool unload(void) { // unload dictionary from memory, free memory
for (int i = 0; i < HASH_MAX; i++) {
while (HASH_TABLE[i]) {
node *n = HASH_TABLE[i];
HASH_TABLE[i] = n->next;
free(n);
}
}
return true;
}
Note also that there is no need to store the converted word to compute the hash value, and char values must be cast as (unsigned char) to pass to tolower() because this function is only defined for the values of unsigned char and the special negative value EOF. char may be a signed type, so tolower(word[i]) has undefined behavior for extended characters.
unsigned int hash(const char *word) // assign hash code -- [(code + current letter) * 3] * string length, % HASH_MAX
{
unsigned int code = 0; // hash code
for (int i = 0; word[i] != '\0'; i++) {
// compute hashcode from lowercase letters
code = (code + tolower((unsigned char)word[i])) * 3;
}
code = code % HASH_MAX; // set code to remainder of current code divided by maximum hash table size
return code;
}
I'm attempting to implement a linear search function for strings in C, but it isn't currently working. Here is my code:
// Linear search for name matching input string
int listSearch(struct LinkedList* linkedList, char name)
{
struct StudentRecord* temp = linkedList->head; // Go to first item in linked list
int count = 0; // Count variable to give index of search item
while((temp != NULL) && (name != temp->name))
{
temp = temp->next;
count++;
}
return count;
}
And here is the function call to listSearch:
printf("\nItem: Tim\nIndex: %d", listSearch(list_1, "Tim"));
'Tim' is at index 3, but the output consistently puts him at index 4 (there are 4 total items in the list and thus index 4 doesn't exist) - and the same is true for any item we search for. This leads me to believe that the (name != temp->name) condition is failing, but I can't for the life of me see why...Could anyone give me a hint as to why it isn't working?
You're passing in a char, not a pointer to a char and as a result, you were comparing a char to a string pointer. You also need to compare the strings.
int listSearch(struct LinkedList* linkedList, char * name)
{
struct StudentRecord* temp = linkedList; // Go to first item in linked list
int count = 0; // Count variable to give index of search item
while(temp != NULL) {
if (temp->name != NULL && strcmp(name,temp->name)) {
count++;
}
temp = temp->next;
}
return count;
}
Use strcmp to compare two strings, for example:
if(strcmp(a,b)==0)
printf("Entered strings are equal");
else
printf("Entered strings are not equal");
Ok, so the problem that I am trying to find is why when I call print_inOrder(), I don't get anything printed back. The assignment I am suppose to do is write a tree algorithm in descending order (meaning higher values on left and lower values on the right). I had already created a function that created a tree a while back so I had just modified it and it works as it should; however the signature for this assignment is different from my old assignment and when I tried changing the pointers around, I got it to compile, but nothing prints out. So if someone could double check my changes and explain where I went wrong and how I need to fix it, that would make my day! ^.^
Working Original Function:
Tnode add_tnode(Tnode **current_tnode, char *value)
{
if(!(*current_tnode))
{
*current_tnode = (Tnode*) malloc(sizeof(Tnode));
(*current_tnode)->strValue = value;
//initialize the children to null
(*current_tnode)->left = NULL;
(*current_tnode)->right = NULL;
}
//Greater values go to left
else if(strcmp(value, (*current_tnode)->strValue) >= 0)
{
return add_tnode(&(*current_tnode)->left, value);
}
//Lesser values go to right
else if(strcmp(value, (*current_tnode)->strValue) < 0)
{
return add_tnode(&(*current_tnode)->right, value);
}
}
How it's called in main:
Tnode *root;
root = NULL;
//Add some nodes with string values
add_tnode(&root, "pie");
add_tnode(&root, "hi");
add_tnode(&root, "hi");
add_tnode(&root, "l");
add_tnode(&root, "leg");
//Print nodes in descending order
print_inOrder(root);
Signature Required:
Tnode *add_tnode(Tnode *current_tnode, char* value)
My Attempt to Fix:
Tnode *add_tnode(Tnode *current_tnode, char* value)
{
if(!(current_tnode))
{
current_tnode = (Tnode*) malloc(sizeof(Tnode));
(current_tnode)->strValue = value;
/* initialize the children to null */
(current_tnode)->left = NULL;
(current_tnode)->right = NULL;
}
// Greater values go to left
else if(strcmp(value, (current_tnode)->strValue) >= 0)
{
return add_tnode((current_tnode)->left, value);
}
// Lesser values go to right
else if(strcmp(value, (current_tnode)->strValue) < 0)
{
return add_tnode((current_tnode)->right, value);
}
}
How it's called in Main:
Tnode *root;
root = NULL;
//Add some nodes with string values
add_tnode(root, "pie");
add_tnode(root, "hi");
add_tnode(root, "hi");
add_tnode(root, "l");
add_tnode(root, "leg");
//Print nodes in descending order
print_inOrder(root);
Here's print_inOrder() just in case someone wants to look at it
void print_inOrder(Tnode *current_tnode)
{
if (current_tnode)
{
print_inOrder(current_tnode->left);
printf("%s\n",current_tnode->strValue);
print_inOrder(current_tnode->right);
}
}
When I run it through the gdb debugger and the print function is called, it only goes through the if statement and ends which my guess it means that the tree wasn't created at all or the pass in value is incorrect. If someone could inform me on what the mistake is, I'd greatly appreciate it!
Your problem is that your first function takes a Tnode **, that is a pointer to a pointer, and modifies the TNode * it points to. Your second function takes just the pointer, and modifies the passed-in argument; the caller can't see those changes, and so nothing is ever added to the tree.
You should allocate and assign the root node before doing anything, then change the function so that it modifies the TNode instead of the pointer thereto.
If your previous assignment worked, all you have to do is change the print function, exploring the right nodes before exploring the left nodes.
void print_inOrder(Tnode *current_tnode)
{
if (current_tnode)
{
print_inOrder(current_tnode->right);
printf("%s\n",current_tnode->strValue);
print_inOrder(current_tnode->left);
}
}
I have a problem writing a code that should read usernames and put them in list. Every username should be connected to the number of times it has been entered. The problem occurs when entering the second username, my code places that username in the variable called first (where the first is kept). I guess I've done something wrong with the pointers, but I cannot find what. I am confused, in the end of one while loop the first one is the real first one, and when the program enters while again, variable first changes. How could that be? Please help me.
Thank you :)
typedef struct _user
{
char *name;
int counter;
struct _user *next;
} user;
int main() {
char userName [10];
int found = 0, go_on = 1;
user *first = NULL, *temp, *new;
while (go_on == 1) {
printf ("Username: ");
scanf("%s", userName);
if (first) {
// printf ("The first one in list: %s\n", first->name); - this prints the name of last username entered
for (temp = first; temp; temp = temp->next) {
if (strcmp (temp->name, userName) == 0) {
temp->counter++;
found = 1; }
if (found== 1) break;}
if (!found) {
new = (user*) malloc (sizeof(user));
new->name = userName;
new->counter = 1;
temp = new;
temp->next = NULL; } }
else {
new = (user*) malloc (sizeof(user));
new->name = userName;
new->counter = 1;
first = new;
first->next = NULL; }
printf ("Go on? (1/0)");
scanf("%d", &go_on);
printf ("Current list: ");
for (temp=first; temp; temp = temp->next)
printf("%s %d\n", temp->name, temp->counter);
//printf ("The first one in list: %s\n", first->name); - this prints the correct first
}
}
Your error, I think, is the userName array. You should allocate a new one for each element in your linked list. When you write new->name = userName;, you are not copying the name to the struct, you are making the struct point to your userName[10] array. As such every struct's actual "name" is storing only the single last name scanf-ed. That being said...
I generally prefer to write that kind of code with dedicated tools instead of logically embedding them in a loop construct:
Keeping your struct:
typedef struct _user
{
char *name;
int counter;
struct _user *next;
} user;
I would create a function that, given a properly constructed Sll returns a matching element:
function user *user_match_name(user *user_head, const ch *name)
{
user *cur_user = NULL;
/* look for a match */
for (cur_user = user_head ; cur_user ; cur_user = cur_user->next)
if(!strcmp(name,cur_user->name) return cur_user;
/* no match */
return NULL;
}
Then I usually prefer to have an Sll element builder:
function user *create_user(const ch *name)
{
user *new_user;
if(!(new_user = malloc(sizeof(user))))
printf("Error in allocation"); /* or better malloc error handling */
/* IMPORTANT: PROVIDE MEMORY FOR THE NAMES!!! */
if(!(new_user->name = malloc(sizeof(char)*256))) /* sizeof(char) is useless but I like to explicit it like that. And 256 should be enough a buffer could be better made */
printf("Error in allocation"); /* or better malloc error handling */
strncpy(new_user->name, name,256); /* not sure if I got the argument order right... */
new_user->counter = 0; /* or 1 depending on your prefered convention */
new_user->next = NULL;
return new_user;
}
It ease the debugging like you wouldn't believe! Then it's just a matter of rewriting your main function:
int main() {
char userName [10];
int found = 0, go_on = 1;
user *user_head = NULL, *new_user,*temp;
while (go_on == 1) {
printf ("Username: ");
scanf("%s", userName);
if( (new_user = user_match_name(user_head,userName)) )
++new_user->counter
else
new_user = create_user(userName);
/* Here we push on the Sll */
if(user_head){
new_user->next = user_head;
user_head = new_user;
} else {
user_head = new_user;
}
printf ("Go on? (1/0)");
scanf("%d", &go_on);
printf ("Current list: ");
for (temp = user_head; temp; temp = temp->next)
printf("%s %d\n", temp->name, temp->counter);
//printf ("The first one in list: %s\n", first->name); - this prints the correct first
}
}
Ahhhhhh! Much easier to read. Be mindful of: 1) I didn't compile check the code. The important ideas are there, leverage them. 2) Even in your previous implementation, you are white space vulnerable but that's somewhat another topic.
Or you could cimply fix it by doing:
typedef struct _user
{
char name[10];
int counter;
struct _user *next;
} user;
and strncpy(new->name,userName,10) instead of assigning the pointer.