I have implemented load, check, and unload for dictionary.c using trie, and I am suffering a segmentation fault. The error happens inside load function, or, to be more precise, inside the function called un_node, a recursive function I call in unload. The line if(cursor -> children[a] != NULL) pops up when I run debug50. Although I did not malloc my root/head node as a global (I just wrote node *root;), isn't the memory allocated in load function extant?
I did check whether I null pointed every children of next_node and root after mallocking them.
Regarding the un_node function, I am not sure whether I should free(cursor); return; inside the for loop or outside the for loop.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
/**
* Returns true if word is in dictionary else false.
*/
//create a trie data type
typedef struct node
{
bool is_word;
struct node *children[27]; //this is a pointer too!
}node;
//function prototype
void un_node (node *node_name);
void nullpoint (node *node_name);
//initialize root
node *root;
bool check(const char *word)
{
//create a trav pointer
node *cursor = root;
int i = 0;
while(word[i] != '\0')
{
char ch = word[i];
int index = (tolower(ch) - 97);
if(index == -58)
{
index = 26;
}
//validate index
if(index < 0 || index > 26)
{
printf("Error: index\n");
return false;
}
if(cursor -> children[index] != NULL)
{
cursor = cursor -> children[index];
i++;
}
else
{
//if it is NULL then word is not in dictionary
return false;
}
}
//end of word, so check if there is a flag from load
if(cursor -> is_word == true)
{
return true;
}
else
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//malloc space for root node
root = malloc(sizeof(node));
//all of root's children point to NULL now
nullpoint (root);
//open dictionary
FILE *dptr = fopen(dictionary, "r");
if(dptr == NULL)
{
return false;
}
char *c = malloc(sizeof(char));
node *next_node;
//scan the file char by char until end and store it in c
while(fscanf(dptr,"%s",c) != EOF)
{
//in the beginning of every word, make a traversal pointer copy of root so we can always refer back to root
node *trav = root;
//when temp increments, it moves on to next character of word
char *temp;
temp = c;
//repeat for every word
while ((*temp) != '\0')
{
//convert char into array index
int alpha = (tolower(*temp) - 97);
//handle apostrophe
if(alpha == -58)
{
alpha = 26;
}
//validate alpha
if(alpha < 0 || alpha > 26)
{
printf("Error: alpha\n");
return false;
}
//if array element is pointing to NULL, i.e. it hasn't been open yet,
if(trav -> children[alpha] == NULL)
{
//then malloc next node and point it with the cursor.
next_node = malloc(sizeof(node));
//initialize children of newly allocated node
nullpoint(next_node);
//cursor points at the newly allocated memory
trav -> children[alpha] = next_node;
//cusor moves on
trav = trav -> children[alpha];//null?
//quit if malloc returns null
if(next_node == NULL)
{
printf("Could not open dictionary");
return false;
}
}
else
{
//if an already existing path, just go to it
trav = trav -> children[alpha];
}
//increment the address of temp variable
temp++;
}
//a word is loaded.
trav -> is_word = true;
}
//success
fclose(dptr);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
node *cursor = root;
un_node(cursor);
return true;
}
void un_node (node *cursor)
{
for(int a = 0; a<27; a++)
{
//if the children's pointee is not NULL, i.e. this is not a dead end
if(cursor -> children[a] != NULL)
{
//go back to the beginning (with cursor -> children[a] as new argument) restart this function
un_node(cursor -> children[a]);
}
}
//and when it is dead end, start to
free(cursor);
return;
}
//function that points all children of node to NULL
void nullpoint (node *node_name)
{
for(int t=0;t<27;t++)
{
node_name -> children[t] = NULL;
}
}
Related
Just trying to get the simple version of the hash function to work for now (haven't implemented the hash function logic yet, just wanna fix the memory errors before getting to the actual logic) but can't figure where the memory error is arising.
In my load function, I open up the dictionary file and initialize my hash table to set all pointers to NULL. Then, I use fscanf to scan the dictionary file, create a node *n for each word in the dictionary, and copy the word into this node.
If table[index] == NULL, then I set both table[index] and a node called head equal to node n, and set the next address equal to NULL. Otherwise, I set the next node as table[index] and table[index] as the current node, n.
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
//represents the number of words in the dictionary
int word_count = 0;
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// initialize hash table (set all values to NULL)
// reference video: https://youtu.be/2Ti5yvumFTU
void init_table()
{
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
}
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
//obtain the index of the word in the hash
int node_index = hash(word);
//initiate cursor to point to the first node of the LL
node *cursor = table[node_index];
//traverse through the LL searching for the word
while (cursor != NULL)
{
if (strcasecmp(cursor->word, word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
return ((toupper(word[0]) - 'A') % N);
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
char *word = NULL;
int node_index = 0;
// Open input file
FILE *file = fopen(dictionary, "r");
//check if file exists
if (file == NULL)
{
return false;
}
init_table();
//count the number of words in the dictionary
word_count = fscanf(file, "%s", word);
node *head;
//loop through file for each word
while (word_count != EOF)
{
//assign memory for a new node
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//copy the scanned word into the created node
strcpy(n->word, word);
//get the hash index of the node
node_index = hash(n->word);
if (table[node_index] == NULL)
{
head = table[node_index] = n;
n->next = NULL;
}
// otherwise set next node as table[index], table[index] as current node n
else
{
n->next = head;
table[node_index] = n;
}
}
return true;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
node *temp = table[i];
while (cursor != NULL)
{
cursor = cursor->next;
free(temp);
temp = cursor->next;
}
}
return true;
}
A different file has the main function that links to this file.
Any help would be appreciated, thanks!
Your unload() function is buggy. For example, in the following statement you have no guarantee that cursor will not be null:
temp = cursor->next;
You can pretty easily fix this by changing the body of the loop to be:
temp = cursor;
cursor = cursor->next;
free(temp);
I have been working on this problem set for quite a time and the code seems to be wrong but I couldn't find the solution. I have been comparing my code and other people's code but I still don't know where I got wrong. Really appreciate all your help if you can provide me with some ways to solve this problem. It keeps prompting me free(): double free detected in tcache 2 but I can't seem to find my mistake.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 50;
// Hash table
node *table[N];
//word count
int count = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
bool found = false;
node *current = table[hash(word)];
while (current != NULL)
{
if (strcasecmp(current -> word, word) == 0)
{
found = true;
}
else if(current -> next != NULL)
{
current = current -> next;
}
else
{
return false;
}
}
return found;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO
unsigned long hash = 5381;
int c;
while ((c = toupper(*word++)))
{
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
FILE *infile = fopen(dictionary, "r");
if (infile == NULL)
{
return false;
}
char buffer[LENGTH+1];
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
fclose(infile);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return count;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
int num = count;
for (int i = 0; i < N ; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
num--;
}
}
if (num == 0)
{
return true;
}
else
{
return false;
}
}
The calls of free in this while loop
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
does not make a sense. You deleted at once (an object of the type node using the pointer n) what you was trying to add to the table (a valid address to an allocated object of the type node). As a result the element of the table at the position hash(buffer) that is set like
table[hash(buffer)] = n;
has an invalid value because it is the address of the already deleted node in this statement
free(n);
So in the function unload this invalid address will be again used to free already freed memory within the function load.
Pay attention to that you did not allocate memory as you wrote in a comment "for node n". n is just a pointer to the allocated unnamed object of the type node. So you are not freeing the pointer n itself in this statement
free(n);
You are freeing the allocated object of the type node using the pointer n. Thus all pointers that pointed to the allocated object of the type node become invalid.
This is pset 5 in Harvard's CS50. It consists mainly of loading the dictionary, checking whether each word in the chosen text is found within the loaded dictionary and then unloading (freeing all of the allocated memory).
All the other functions work but when it comes to unload it just executes, as stated, 80,000,000+ frees, whilst in the program there is just 143,094 mallocs, I am a newbie so this is mindblowing for me. Below are the relevant functions for unload.
Edit (1): Here is some context regarding the variable hashtable.
typedef struct node
{
char word[LENGTH+2];
struct node *next;
}
node;
node *hashtable[264636] = { NULL };
I initialize each element to NULL so that in unload I can easily skip the index values for which no key was generated in the hash function.
//LOAD FUNCTION: Loads the dictionary into a hash table. Djb2 function used.
bool load(const char *dictionary)
{
head = malloc(sizeof(node));
head->next = NULL;
if (head == NULL)
{
unload();
return false;
}
opntr = fopen(dictionary, "r");
while (fscanf(opntr, "%s", WORD) != EOF)
{
wnode = malloc(sizeof(node));
if (wnode == NULL)
{
unload();
return false;
}
strcpy(wnode->word, WORD);
wnode->next = head;
head = wnode;
unsigned long key = hash(wnode->word);
hashtable[key] = wnode;
wnode = wnode->next;
}
return true;
}
// Checks whether the input word is somewhere within the dictionary
bool check(const char *word)
{
char dword[strlen(word) + 1];
strcpy(dword, word);
for (int c = 0; c < strlen(dword); c++)
{
dword[c] = tolower(dword[c]);
}
int key_w;
key_w = hash(dword);
node *cursor = hashtable[key_w];
while (cursor != NULL)
{
if (strcmp(cursor->word, dword) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Unloads memory allocated (?) to store the dictionary
bool unload(void)
{
for (int in = 0; in < 264636; in++)
{
node *fhead = hashtable[in];
while (fhead != NULL)
{
node *fcursor = fhead->next;
free(fhead);
fhead = fcursor;
}
}
return true;
}
In case anybody finds it helpful, the problem was within the load function. The nodes malloced were updated wrongfully so that the elements of the hashtable array weren't independent linked lists and I was unnecessarily using a head node to point to the first element of the list each time we wanted to add a node. Instead, to add a new node to its corresponding element in the hashtable array, we use the element itself as the head of the linked list and we arrange it so that it points to each node added, thus succesfully keeping track of the beginning of the linked list. Each element in the hashtable is initially pointing to NULL so that we can find the end of each linked list.
int null()
{
for (int i = 0; i < 264636; i++)
{
hashtable[i]->next = NULL;
}
return 0;
}
bool load(const char *dictionary)
{
opntr = fopen(dictionary, "r");
while (fscanf(opntr, "%s", WORD) != EOF)
{
wnode = malloc(sizeof(node));
if (wnode == NULL)
{
unload();
return false;
}
strcpy(wnode->word, WORD);
unsigned long key = hash(wnode->word);
wnode->next = hashtable[key];
hashtable[key] = wnode;
}
return true;
}
I'm having some trouble in the pset5, I actually don't know how to start debugging, I've watched the lessons a few times now and I'm not getting anywhere..
When I run speller.c it is giving me a seg fault, I ran the debugger and it crashes at the beggining of the For Loop, here follows my code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// default dictionary
#define DICTIONARY "dictionaries/large"
//created the struct node
typedef struct node
{
bool is_word;
struct node * paths[27];
}
node;
int letter = 0;
char * word = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//todo
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//opens dictionary for reading
FILE *fp = fopen(DICTIONARY, "r");
if (fp == NULL)
{
return false;
unload();
}
//creates the root of the trie
node *root = malloc(sizeof(node));
root -> is_word = false;
node * trav = root;
char * word = NULL;
//start reading the file
while (fscanf(fp, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
//assing wich path to take
char c = fgetc(fp);
if (isupper(c))
{
letter = tolower (c);
letter = letter -'a';
}
else if (isalpha(c))
{
letter = c;
letter = letter -'a';
}
else if (c == '\'')
{
letter = 26;
}
else if (c == '\0')
{
trav -> is_word = true;
}
if (trav -> paths[letter] == NULL)
{
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
{
return false;
unload();
}
//point to new node
trav -> paths[letter] = new_node;
}
else
{
trav = trav -> paths[letter];
}
}
}
if (fscanf(fp, "%s", word) == EOF)
{
fclose(fp);
return true;
}
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
I also don't know how to point the new_node to the next new node and if I must have diferent names for them. For example, I'm going to store the word "foo", so I read the node called trav, go to the path[5] (the f letter), check if it is already opened, if not (if it's NULL) I create a node called new_node and point trav -> paths[5] to it, than I should update trav to be the new node, so I point it to it's own path[letter]?
word is a NULL pointer. And fscanf doesn't (can't really) allocate memory for that pointer to point to. So what happens when fscanf wants to dereference word to write the characters it reads? You can't dereference a NULL pointer, it leads to undefined behavior. I suggest you define word as an array instead.
Note : Answer taken from comments
I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.