I am working on a pset, where we have to implement load, hash, size, check and unload functions as efficiently as possible using a hash table in such a way that TIME IN load, TIME IN check, TIME IN size, and TIME IN unload are all minimized.
When I test it with a given text, I am having the following message:
WORDS MISSPELLED: 17187
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 17756
TIME IN load: 0.00
TIME IN check: 0.01
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.01
The dictionary is not working correctly as I did not implement hash and check as per conditions but I do not know how. Check must be case insensitive and will only be passed words that contain (uppercase or lowercase) alphabetical characters and possibly apostrophes.
Can you help me on this and point any other potential errors I made.
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 17576; // for the first 3 letters so 26*26*26
// Hash table
node *table[N];
unsigned int hash_value; // initialise positive int value
unsigned int word_counter; // initialise positive int word counter
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
//Hash the word to obtain the hash value;
hash_value = hash(word);
//Access linked list at the given index in the hash table, we are creating a trav pointer to the head of list indexed via the hash function
for (node *cursor = table[hash_value]; cursor!= NULL; cursor = cursor->next)
{
if (strcasecmp(word, cursor->word) == 0)
{
return true;
}
}
return false;
}
// Hashes word to a number
// hash function was taken from : stackexchange.com
unsigned int hash(const char *word)
{
for (hash_value = 0; *word != '\0'; word++)
{
hash_value = *word + 31 * hash_value;
}
return hash_value % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
//allocate memory for all node buckets
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
//Open dictionary
FILE *file = fopen(dictionary, "r");
if (file == NULL) // check if file is not empty)
{
printf("Error, empty file\n");
return 1;
}
char word[LENGTH + 1];
while (fscanf(file, "%s", word) != EOF)
{
//create a new node
node *new_node = malloc(sizeof(node));
if (new_node == NULL) // check if enough memory
{
return false;
}
else
{
strcpy(new_node->word, word); //copies word into new_node->word;
hash_value= hash(word);
//Insert the node into the hash table
new_node->next = table[hash_value];
/set the head to the new pointer, so it is inserted in front
table[hash_value] = new_node;
word_counter ++;
}
fclose(file);
}
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
if (word_counter > 0)
{
return word_counter;
}
else
{
return 0;
}
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// loop through all buckets ( ie all indexes of the array of linked lists)
for (int i = 0; i < N; i++)
{
node *cursor = table[i]; // place cursor to each bucket
while (cursor!= NULL) //
{
node *tmp = cursor->next; // create a tmp equal to cursor
free(cursor);
cursor = tmp;
}
return true;
}
return false;
}
Related
I explored the possibility that i'm skipping the last node in a linked list, but I can't find the error.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
unsigned int COUNT = 0;
// TODO: Choose number of buckets in hash table
const unsigned int N = 7480;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int checkhash;
char lowerword[LENGTH + 1];
// convert word to lower case
for (int i = 0; i <= strlen(word); i++)
{
lowerword[i] = tolower(word[i]);
}
//get hash number
checkhash = hash(lowerword);
//check if it's in dictionary
node *trav = table[checkhash]; // traversal pointer
while (trav != NULL)
{
if (!strcasecmp(word, trav->word))
{
return true;
}
trav = trav->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
unsigned int hash_num = 0;
for (int i = 0; i < strlen(word); i += 3)
{
hash_num += word[i] * word[2];
hash_num += i + 3 * i;
}
hash_num += 3435;
return hash_num % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
// • Open dictionary file
FILE *dp = fopen(dictionary, "r");
// • Read strings from file one at a time
if (dp == NULL)
{
printf("Error: Could not open dictionary\n");
return false;
}
// declaring needed variables
char word[LENGTH + 1];
while (fscanf(dp, "%s", word) != EOF) // call fscanf untill EOF is returned
{
// • Create a new node for each word
// • Use malloc
// • Remember to check if return value is NULL
// • Copy word into node using strcpy
node *newnode = malloc(sizeof(node));
if (newnode == NULL)
{
unload();
printf("There is insufficient memory to load dictionary\n");
return false;
}
strcpy(newnode->word, word);
// • Hash word to obtain a hash value
// • Use the hash function
// • Function takes a string and returns an index
int index = hash(word);
// • Insert node into hash table at that location
if (table[index] == NULL)
{
table[index] = newnode;
newnode->next = NULL;
}
else
{
newnode->next = table[index];
table[index] = newnode;
}
COUNT++;
}
fclose(dp);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return COUNT;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
node *trav; // traversal pointers
node *tmp;
for (int i = 0; i < N; i++)
{
trav = table[i]; //next pointer in array
// recursively free list
while (trav != NULL)
{
tmp = trav;
trav = trav->next;
free(tmp);
}
}
// TODO
return true;
}
I have tried readjusting the check function for the last 2 hours with no luck. I don't think it is the hash function as I've parsed through it for a while without finding any logical errors. I'm nearly certain that the error will be found within check(). I can't wait to smack my forehead over a newbie error.
Here is the feedback I get from check50:
MISSPELLED WORDS
WORDS MISSPELLED: 0
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 1
Actual Output:
MISSPELLED WORDS
a
WORDS MISSPELLED: 1
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 1
Just trying to get the simple version of the hash function to work for now (haven't implemented the hash function logic yet, just wanna fix the memory errors before getting to the actual logic) but can't figure where the memory error is arising.
In my load function, I open up the dictionary file and initialize my hash table to set all pointers to NULL. Then, I use fscanf to scan the dictionary file, create a node *n for each word in the dictionary, and copy the word into this node.
If table[index] == NULL, then I set both table[index] and a node called head equal to node n, and set the next address equal to NULL. Otherwise, I set the next node as table[index] and table[index] as the current node, n.
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
//represents the number of words in the dictionary
int word_count = 0;
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// initialize hash table (set all values to NULL)
// reference video: https://youtu.be/2Ti5yvumFTU
void init_table()
{
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
}
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
//obtain the index of the word in the hash
int node_index = hash(word);
//initiate cursor to point to the first node of the LL
node *cursor = table[node_index];
//traverse through the LL searching for the word
while (cursor != NULL)
{
if (strcasecmp(cursor->word, word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
return ((toupper(word[0]) - 'A') % N);
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
char *word = NULL;
int node_index = 0;
// Open input file
FILE *file = fopen(dictionary, "r");
//check if file exists
if (file == NULL)
{
return false;
}
init_table();
//count the number of words in the dictionary
word_count = fscanf(file, "%s", word);
node *head;
//loop through file for each word
while (word_count != EOF)
{
//assign memory for a new node
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//copy the scanned word into the created node
strcpy(n->word, word);
//get the hash index of the node
node_index = hash(n->word);
if (table[node_index] == NULL)
{
head = table[node_index] = n;
n->next = NULL;
}
// otherwise set next node as table[index], table[index] as current node n
else
{
n->next = head;
table[node_index] = n;
}
}
return true;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
node *temp = table[i];
while (cursor != NULL)
{
cursor = cursor->next;
free(temp);
temp = cursor->next;
}
}
return true;
}
A different file has the main function that links to this file.
Any help would be appreciated, thanks!
Your unload() function is buggy. For example, in the following statement you have no guarantee that cursor will not be null:
temp = cursor->next;
You can pretty easily fix this by changing the body of the loop to be:
temp = cursor;
cursor = cursor->next;
free(temp);
I am working on the CS50 pset5 Speller, and I keep getting a segmentation fault error. Debug50 suggests the problem is the line n->next = table[index]; in the implementation of the load function, line 110. I tried to revise but I can´t figure out why it would give error. Here below my code, can anyone please help me?
// Implements a dictionary's functionality
#include <stdbool.h>
#include <strings.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
const unsigned int N = 150000;
// Nodes counter
int nodes_counter = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int hash_value = hash(word);
node *cursor = malloc(sizeof(node));
if (cursor != NULL)
{
cursor = table[hash_value];
}
if (strcasecmp(cursor->word, word) == 0) // If word is first item in linked list
{
return 0;
}
else // Iterate over the list by moving the cursor
{
while (cursor->next != NULL)
{
if (strcasecmp(cursor->word, word) == 0) // If word is found
{
return 0;
}
else
{
cursor = cursor->next;
}
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
const unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
unsigned int i = 0;
for (i = 0; i < strlen(word); i++)
{
hash *= fnv_prime;
hash ^= (*word);
}
return hash;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// Open Dictionary File (argv[1] or dictionary?)
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Could not open file\n");
return 1;
}
// Read until end of file word by word (store word to read in word = (part of node)?)
char word[LENGTH + 1];
while(fscanf(file, "%s", word) != EOF)
{
// For each word, create a new node
node *n = malloc(sizeof(node));
if (n != NULL)
{
strcpy(n->word, word);
//Omitted to avoid segmentation fault n->next = NULL;
nodes_counter++;
}
else
{
return 2;
}
// Call hash function (input: word --> output: int)
int index = hash(word);
// Insert Node into Hash Table
n->next = table[index];
table[index] = n;
}
return false;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// Return number of nodes created in Load
if (nodes_counter > 0)
{
return nodes_counter;
}
return 0;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor->next != NULL)
{
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
}
return false;
}
There are multiple problems in your code:
node *table[N]; cannot be only be defined as a global object if N is a constant expression. N is defined as a const unsigned int, but N is not a constant expression in C (albeit it is in C++). Your program compiles only because the compiler accepts this as a non portable extension. Either use a macro or an enum.
you overwrite cursor as soon as it is allocated in check(). There is no need to allocate a node in this function.
the hash() function should produce the same hash for words that differ only in case.
the hash() function only uses the first letter in word.
the hash() function can return a hash value >= N.
fscanf(file, "%s", word) should be protected agains buffer overflow.
you do not check if cursor is non null before dereferencing it in unload()
Here is a modified version:
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
enum { N = 150000 };
// Nodes counter
int nodes_counter = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word) {
int hash_value = hash(word);
// Iterate over the list by moving the cursor
for (node *cursor = table[hash_value]; cursor; cursor = cursor->next) {
if (strcasecmp(cursor->word, word) == 0) {
// If word is found
return true;
}
}
// If word is not found
return false;
}
// Hashes word to a number
unsigned int hash(const char *word) {
// Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
for (unsigned int i = 0; word[i] != '\0'; i++) {
hash *= fnv_prime;
hash ^= toupper((unsigned char)word[i]);
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else a negative error number
int load(const char *dictionary) {
// Open Dictionary File (argv[1] or dictionary?)
FILE *file = fopen(dictionary, "r");
if (file == NULL) {
printf("Could not open file\n");
return -1;
}
// Read until end of file word by word (store word to read in word = (part of node)?)
char word[LENGTH + 1];
char format[10];
// construct the conversion specifier to limit the word size
// read by fscanf()
snprintf(format, sizeof format, "%%%ds", LENGTH);
while (fscanf(file, format, word) == 1) {
// For each word, create a new node
node *n = malloc(sizeof(node));
if (n == NULL) {
fclose(file);
return -2;
}
strcpy(n->word, word);
n->next = NULL;
nodes_counter++;
// Call hash function (input: word --> output: int)
int index = hash(word);
// Insert Node into Hash Table
n->next = table[index];
table[index] = n;
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void) {
// Return number of nodes created in Load
return nodes_counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void) {
for (int i = 0; i < N; i++) {
node *cursor = table[i];
table[i] = NULL;
while (cursor != NULL) {
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
}
return true;
}
currently doing the speller problem of cs50 pset5.
I'm having a trouble with forming the hash table and i think it causes a segmentation fault later on when i try to run a function that searches the table.
this is the function that creates the hash table:
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r"); //opens dictionary file
if (dict == NULL) // if cant be opened loading failed
{
return false;
}
char w[LENGTH + 1]; //buffer (length is the maximum character number
int i = 0; //index within word
while(fscanf(dict, "%s", w) != EOF) // scanning the dictionary for words
{
int x = hash(w); //getting the number of the linked list within the table
node *n = malloc(sizeof(node)); //allocating memory for a new node
if (n == NULL)
{
return false;
}
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
n->next = table[x];
table[x] = n; //new node is the beginning of the linked list
dicsize++;
}
fclose(dict);
return true;
}
my main question is whether my code for forming the table is correct and if not then why
thank you in advance
and this is the entire code:
// Implements a dictionary's functionality
#include <string.h>
#include <strings.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 26;
int dicsize = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int x = hash(word);
node *cur = table[x];
while(table[x] != NULL)
{
if(strcasecmp(word, cur->word) == 0)
{
return true;
}
if(cur == NULL)
{
return false;
}
cur = cur->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
char temp = tolower(word[0]);
int place = (temp - 97);
return place;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r"); //opens dictionary file
if (dict == NULL) // if cant be opened loading failed
{
return false;
}
char w[LENGTH + 1]; //buffer
int i = 0; //index within word
while(fscanf(dict, "%s", w) != EOF) // scanning the dictionary for words
{
int x = hash(w); //getting the number of the linked list within the table
node *n = malloc(sizeof(node)); //allocating memory for a new node
if (n == NULL)
{
return false;
}
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
n->next = table[x];
table[x] = n; //new node is the beginning of the linked list
dicsize++;
}
fclose(dict);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
printf("%i", dicsize);
return dicsize;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
return false;
}
The likely culprit is
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
As i is not changing inside the loop, instead of copying the string it will dump all the chars into one. The other index should also be j. (as in n->word[j] = w[j];)
It would be better to do this as strcpy anyway.
I am working through the CS50 Speller problem and facing a problem that when running the program it returns an error of "Could not unload dictionaries/large."
I have looked at other people's solutions and can't for the life of me identify what is going wrong in my program. I am thinking it is in the has function, but have seen this has function in other people's working programs?
Any help would be greatly appreciated.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of words in dictionary
int word_count = 0;
// Number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
unsigned int n = hash(word);
node *cursor = table[n];
while (cursor != NULL)
{
if (strcasecmp(word, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
// Hashes word to a number
// Function credit to delipity(staff) on CS50 reddit page
unsigned int hash(const char *word)
{
unsigned int hash_value = 0;
for (int i = 0, n = strlen(word); i < n; i++)
{
hash_value = (hash_value << 2) ^ word[i];
}
return hash_value % N; //N is size of hashtable
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Open dictionary and check for memory issue
// Open dictionary file and create word array
FILE *dict = fopen(dictionary, "r");
char word[LENGTH + 1];
// Check for memory issue with dict
if(dict == NULL)
{
printf("Dictionary is null\n");
unload();
return false;
}
// Read string 1 word at a time
while (fscanf(dict, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n -> word, word);
word_count++;
// Index word using hash function
int dict_index = hash(word);
// Insert into hash table if already empty
if (table[dict_index] == NULL)
{
n -> next = NULL;
}
// Insert work as new node if not empyty
else
{
n -> next = table[dict_index];
}
table[dict_index] = n;
}
// Close dictionary file
fclose(dict);
// Indicate success
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return word_count;
return 0;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
node *tmp = cursor;
while (cursor != NULL)
{
cursor = cursor -> next;
free(tmp);
tmp = cursor;
}
}
return false;
}
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor)
{
node *temp = cursor;
cursor = cursor->next;
free(temp);
}
}
return true;
}
Try this instead for the unload function