CS50 speller: Unload executes 80,000,000+ frees - c

This is pset 5 in Harvard's CS50. It consists mainly of loading the dictionary, checking whether each word in the chosen text is found within the loaded dictionary and then unloading (freeing all of the allocated memory).
All the other functions work but when it comes to unload it just executes, as stated, 80,000,000+ frees, whilst in the program there is just 143,094 mallocs, I am a newbie so this is mindblowing for me. Below are the relevant functions for unload.
Edit (1): Here is some context regarding the variable hashtable.
typedef struct node
{
char word[LENGTH+2];
struct node *next;
}
node;
node *hashtable[264636] = { NULL };
I initialize each element to NULL so that in unload I can easily skip the index values for which no key was generated in the hash function.
//LOAD FUNCTION: Loads the dictionary into a hash table. Djb2 function used.
bool load(const char *dictionary)
{
head = malloc(sizeof(node));
head->next = NULL;
if (head == NULL)
{
unload();
return false;
}
opntr = fopen(dictionary, "r");
while (fscanf(opntr, "%s", WORD) != EOF)
{
wnode = malloc(sizeof(node));
if (wnode == NULL)
{
unload();
return false;
}
strcpy(wnode->word, WORD);
wnode->next = head;
head = wnode;
unsigned long key = hash(wnode->word);
hashtable[key] = wnode;
wnode = wnode->next;
}
return true;
}
// Checks whether the input word is somewhere within the dictionary
bool check(const char *word)
{
char dword[strlen(word) + 1];
strcpy(dword, word);
for (int c = 0; c < strlen(dword); c++)
{
dword[c] = tolower(dword[c]);
}
int key_w;
key_w = hash(dword);
node *cursor = hashtable[key_w];
while (cursor != NULL)
{
if (strcmp(cursor->word, dword) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Unloads memory allocated (?) to store the dictionary
bool unload(void)
{
for (int in = 0; in < 264636; in++)
{
node *fhead = hashtable[in];
while (fhead != NULL)
{
node *fcursor = fhead->next;
free(fhead);
fhead = fcursor;
}
}
return true;
}

In case anybody finds it helpful, the problem was within the load function. The nodes malloced were updated wrongfully so that the elements of the hashtable array weren't independent linked lists and I was unnecessarily using a head node to point to the first element of the list each time we wanted to add a node. Instead, to add a new node to its corresponding element in the hashtable array, we use the element itself as the head of the linked list and we arrange it so that it points to each node added, thus succesfully keeping track of the beginning of the linked list. Each element in the hashtable is initially pointing to NULL so that we can find the end of each linked list.
int null()
{
for (int i = 0; i < 264636; i++)
{
hashtable[i]->next = NULL;
}
return 0;
}
bool load(const char *dictionary)
{
opntr = fopen(dictionary, "r");
while (fscanf(opntr, "%s", WORD) != EOF)
{
wnode = malloc(sizeof(node));
if (wnode == NULL)
{
unload();
return false;
}
strcpy(wnode->word, WORD);
unsigned long key = hash(wnode->word);
wnode->next = hashtable[key];
hashtable[key] = wnode;
}
return true;
}

Related

CS50 PSET5: Segmentation core dumped error in the simple version of the dictionary file

Just trying to get the simple version of the hash function to work for now (haven't implemented the hash function logic yet, just wanna fix the memory errors before getting to the actual logic) but can't figure where the memory error is arising.
In my load function, I open up the dictionary file and initialize my hash table to set all pointers to NULL. Then, I use fscanf to scan the dictionary file, create a node *n for each word in the dictionary, and copy the word into this node.
If table[index] == NULL, then I set both table[index] and a node called head equal to node n, and set the next address equal to NULL. Otherwise, I set the next node as table[index] and table[index] as the current node, n.
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
//represents the number of words in the dictionary
int word_count = 0;
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// initialize hash table (set all values to NULL)
// reference video: https://youtu.be/2Ti5yvumFTU
void init_table()
{
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
}
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
//obtain the index of the word in the hash
int node_index = hash(word);
//initiate cursor to point to the first node of the LL
node *cursor = table[node_index];
//traverse through the LL searching for the word
while (cursor != NULL)
{
if (strcasecmp(cursor->word, word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
return ((toupper(word[0]) - 'A') % N);
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
char *word = NULL;
int node_index = 0;
// Open input file
FILE *file = fopen(dictionary, "r");
//check if file exists
if (file == NULL)
{
return false;
}
init_table();
//count the number of words in the dictionary
word_count = fscanf(file, "%s", word);
node *head;
//loop through file for each word
while (word_count != EOF)
{
//assign memory for a new node
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//copy the scanned word into the created node
strcpy(n->word, word);
//get the hash index of the node
node_index = hash(n->word);
if (table[node_index] == NULL)
{
head = table[node_index] = n;
n->next = NULL;
}
// otherwise set next node as table[index], table[index] as current node n
else
{
n->next = head;
table[node_index] = n;
}
}
return true;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
node *temp = table[i];
while (cursor != NULL)
{
cursor = cursor->next;
free(temp);
temp = cursor->next;
}
}
return true;
}
A different file has the main function that links to this file.
Any help would be appreciated, thanks!
Your unload() function is buggy. For example, in the following statement you have no guarantee that cursor will not be null:
temp = cursor->next;
You can pretty easily fix this by changing the body of the loop to be:
temp = cursor;
cursor = cursor->next;
free(temp);

Dictionary not loading and spell checking words correctly

I am working on a pset, where we have to implement load, hash, size, check and unload functions as efficiently as possible using a hash table in such a way that TIME IN load, TIME IN check, TIME IN size, and TIME IN unload are all minimized.
When I test it with a given text, I am having the following message:
WORDS MISSPELLED: 17187
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 17756
TIME IN load: 0.00
TIME IN check: 0.01
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.01
The dictionary is not working correctly as I did not implement hash and check as per conditions but I do not know how. Check must be case insensitive and will only be passed words that contain (uppercase or lowercase) alphabetical characters and possibly apostrophes.
Can you help me on this and point any other potential errors I made.
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 17576; // for the first 3 letters so 26*26*26
// Hash table
node *table[N];
unsigned int hash_value; // initialise positive int value
unsigned int word_counter; // initialise positive int word counter
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
//Hash the word to obtain the hash value;
hash_value = hash(word);
//Access linked list at the given index in the hash table, we are creating a trav pointer to the head of list indexed via the hash function
for (node *cursor = table[hash_value]; cursor!= NULL; cursor = cursor->next)
{
if (strcasecmp(word, cursor->word) == 0)
{
return true;
}
}
return false;
}
// Hashes word to a number
// hash function was taken from : stackexchange.com
unsigned int hash(const char *word)
{
for (hash_value = 0; *word != '\0'; word++)
{
hash_value = *word + 31 * hash_value;
}
return hash_value % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
//allocate memory for all node buckets
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
//Open dictionary
FILE *file = fopen(dictionary, "r");
if (file == NULL) // check if file is not empty)
{
printf("Error, empty file\n");
return 1;
}
char word[LENGTH + 1];
while (fscanf(file, "%s", word) != EOF)
{
//create a new node
node *new_node = malloc(sizeof(node));
if (new_node == NULL) // check if enough memory
{
return false;
}
else
{
strcpy(new_node->word, word); //copies word into new_node->word;
hash_value= hash(word);
//Insert the node into the hash table
new_node->next = table[hash_value];
/set the head to the new pointer, so it is inserted in front
table[hash_value] = new_node;
word_counter ++;
}
fclose(file);
}
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
if (word_counter > 0)
{
return word_counter;
}
else
{
return 0;
}
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// loop through all buckets ( ie all indexes of the array of linked lists)
for (int i = 0; i < N; i++)
{
node *cursor = table[i]; // place cursor to each bucket
while (cursor!= NULL) //
{
node *tmp = cursor->next; // create a tmp equal to cursor
free(cursor);
cursor = tmp;
}
return true;
}
return false;
}

How do I reset the pointer to the head node when adding to nodes?

I need to start with the head node every cycle to add the new node in the right place. I think my current code makes the pointer for head and sptr equal so when I move one, the other one moves too. How do I move the pointer sptr to the beginning?
In debugger head->letter[1] turns true when I save an "a" as a word as it should, but later turns back to false as soon as sptr = head; runs. I think it has to do with the pointers.
typedef struct node
{
bool exist;
struct node* letter[28];
} trie;
trie *head = NULL;
int words = 0;
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
int i = 0;
FILE *infile = fopen(dictionary, "r");
if (infile == NULL)
{
printf("Could not open %s.\n", dictionary);
return 1;
}
// allocate memory
head = calloc(sizeof(trie), 1);
head->exist = false;
trie *sptr = head;
int cr;
// loop through file one character at a time
while ((cr = fgetc(infile)) != EOF)
{
// build a trie
// check if it's end of line
if (cr != 10)
{
i = tolower(cr) - 96;
// check for apostrophy
if (i < 0)
{
i = 0;
}
// check if the position exists
if (sptr->letter[i] == NULL)
{
sptr->letter[i] = malloc(sizeof(trie));
sptr->exist = false; // not the end of the word
}
sptr = sptr->letter[i];
}
else // indicate the end of a word that exists
{
sptr->exist = true;
sptr = head;// I think the problem might be here, I'm trying to move the pointer to the beginning.
words++;
}
}
return true;
}
Found the problem. It was in line sptr->exist = false, it should've read sptr->letter[i]->exist = false. The pointer was moving fine but I was changing the value of where the current pointer was, not the newly created node.

I have a segmentation fault and am unsure about what is wrong with my code

Any guidance would be appreciated. I personally believe the problem lies in the load method. Also, the basic functionality of each method is written in the comments. What could be the cause of my segmentation fault? and Is everything working as intended? Thank you for your time.
Any resources that may point in me in the proper direction would be appreciated too.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include "dictionary.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <cs50.h>
//Defining node:
typedef struct node
{ //Inner workings of each "element" in the linked lists
char word[LENGTH + 1]; //the word within the node is +1'd due to the memory after the word containing /0
struct node *next; //linked list
}node;
node *alphabetList[27]; //26 buckets that can contain variables of type node(of dynamic size)
//one bucket for each letter of the alphabet
node *cursor = NULL;
node *head = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int bucketIndex ;
//no need to malloc information b/c we are simply pointing to previously established nodes.
if(word[0] >= 65 && word[0] < 97){
bucketIndex = word[0] - 65;
}
else{
bucketIndex = word[0] - 97;
}
node *head = alphabetList[bucketIndex];
node *cursor = head;
while(cursor != NULL)
{
cursor = cursor -> next;
if(strcmp(cursor -> word, word) != 0)
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
{
//for every word we scan we want to malloc a node to ascertain we have sufficent memory
node *new_node = malloc(sizeof(node));
if(new_node == NULL) //error check(if you run out of memory malloc will return null)
{
unload();
return false;
}
//error check complete.
else{
strcpy(new_node -> word, word);
}
//not sure from here on
char first_letter = new_node[i].word[0]; //first letter of node word (confused on how to execute this properly)
first_letter = tolower(first_letter);
int index = first_letter - 97;
if(word){
for(node *ptr = alphabetList[index]; ptr!= NULL; ptr = ptr->next)
{
if(!ptr-> next){
ptr->next = new_node;
}
}
}
else
{
alphabetList[index] = new_node;
}
i++;
}
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i <= 26; i++)
{
node *head = alphabetList[i];
node *cursor = head;
while(cursor != NULL)
{
node *temp = cursor;
cursor = cursor -> next;
free(temp);
}
}
return true;
}
The problem is obvious now you've said on which line the code crashes. Consider these lines...
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
You've got 2 problems there. Firstly, you don't check that the call to fopen worked. You should always check that the value returned is not NULL.
Secondly, and the cause of the crash, is that word is still NULL - you don't allocate any space to hold a string in it. You might as well declare it the same as you declare it inside node so replace
char *word = NULL;
with
char word[LENGTH+1];
Speaking of node and to save you coming back with another crash later, you should always make sure you initialise all attributes of a struct. In this case new_node->next should be set to NULL as otherwise you'll come to check it later in your for loop (which looks fine BTW) and it might appear to point to a node, but it's pointing at some random place in memory and the code will crash.

Pset 5 Speller Trie segmentation fault in unload

I have implemented load, check, and unload for dictionary.c using trie, and I am suffering a segmentation fault. The error happens inside load function, or, to be more precise, inside the function called un_node, a recursive function I call in unload. The line if(cursor -> children[a] != NULL) pops up when I run debug50. Although I did not malloc my root/head node as a global (I just wrote node *root;), isn't the memory allocated in load function extant?
I did check whether I null pointed every children of next_node and root after mallocking them.
Regarding the un_node function, I am not sure whether I should free(cursor); return; inside the for loop or outside the for loop.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
/**
* Returns true if word is in dictionary else false.
*/
//create a trie data type
typedef struct node
{
bool is_word;
struct node *children[27]; //this is a pointer too!
}node;
//function prototype
void un_node (node *node_name);
void nullpoint (node *node_name);
//initialize root
node *root;
bool check(const char *word)
{
//create a trav pointer
node *cursor = root;
int i = 0;
while(word[i] != '\0')
{
char ch = word[i];
int index = (tolower(ch) - 97);
if(index == -58)
{
index = 26;
}
//validate index
if(index < 0 || index > 26)
{
printf("Error: index\n");
return false;
}
if(cursor -> children[index] != NULL)
{
cursor = cursor -> children[index];
i++;
}
else
{
//if it is NULL then word is not in dictionary
return false;
}
}
//end of word, so check if there is a flag from load
if(cursor -> is_word == true)
{
return true;
}
else
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//malloc space for root node
root = malloc(sizeof(node));
//all of root's children point to NULL now
nullpoint (root);
//open dictionary
FILE *dptr = fopen(dictionary, "r");
if(dptr == NULL)
{
return false;
}
char *c = malloc(sizeof(char));
node *next_node;
//scan the file char by char until end and store it in c
while(fscanf(dptr,"%s",c) != EOF)
{
//in the beginning of every word, make a traversal pointer copy of root so we can always refer back to root
node *trav = root;
//when temp increments, it moves on to next character of word
char *temp;
temp = c;
//repeat for every word
while ((*temp) != '\0')
{
//convert char into array index
int alpha = (tolower(*temp) - 97);
//handle apostrophe
if(alpha == -58)
{
alpha = 26;
}
//validate alpha
if(alpha < 0 || alpha > 26)
{
printf("Error: alpha\n");
return false;
}
//if array element is pointing to NULL, i.e. it hasn't been open yet,
if(trav -> children[alpha] == NULL)
{
//then malloc next node and point it with the cursor.
next_node = malloc(sizeof(node));
//initialize children of newly allocated node
nullpoint(next_node);
//cursor points at the newly allocated memory
trav -> children[alpha] = next_node;
//cusor moves on
trav = trav -> children[alpha];//null?
//quit if malloc returns null
if(next_node == NULL)
{
printf("Could not open dictionary");
return false;
}
}
else
{
//if an already existing path, just go to it
trav = trav -> children[alpha];
}
//increment the address of temp variable
temp++;
}
//a word is loaded.
trav -> is_word = true;
}
//success
fclose(dptr);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
node *cursor = root;
un_node(cursor);
return true;
}
void un_node (node *cursor)
{
for(int a = 0; a<27; a++)
{
//if the children's pointee is not NULL, i.e. this is not a dead end
if(cursor -> children[a] != NULL)
{
//go back to the beginning (with cursor -> children[a] as new argument) restart this function
un_node(cursor -> children[a]);
}
}
//and when it is dead end, start to
free(cursor);
return;
}
//function that points all children of node to NULL
void nullpoint (node *node_name)
{
for(int t=0;t<27;t++)
{
node_name -> children[t] = NULL;
}
}

Resources