Loading a trie data structure in C- pset5 cs50 - c

I am having trouble loading data into my trie structure. I keep getting a sgemenation fault. Does this have to do with my malloc? Anyone see any issues?
Thanks
/**
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
#define ASCII_OFFSET 97;
/** Node of the data structure used to store the dictionary key words
* Data Structures Type is Tries
* Includes a bool to indicate if the current node is the end of a word
* A pointer to the nodes child node
*/
typedef struct node
{
bool is_word;
struct node* children[27];
}
node;
node* rootNode;
node* nextNode;
//Variable to track the size of the dictinoary
int wordCount = 0;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
//Get words length
int wordLength = strlen(word);
for(int i = 0; i < wordLength; i++)
{
//taking the character we need to check
int charToCheck = tolower(word[i]) - ASCII_OFFSET;
//Checking to see if the char exists in the data strucutre, Trie;
if(nextNode->children[charToCheck] == NULL)
{
return false;
}
//Advance the next node down the trie
nextNode = nextNode->children[charToCheck];
}
nextNode = rootNode;
//Return what is_word return
return nextNode->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
//Open dict.. file to read
FILE* file = fopen(dictionary,"r");
//Check if the dict.. exsists!
if(file == NULL)
{
return false;
}
//Creating the first node in our data strucutre
rootNode = malloc(sizeof(node));
nextNode = rootNode;
//Get character to store
int character = fgetc(file) - ASCII_OFFSET;
//Go through the dict... file
while(character != EOF)
{
//Go through each word in the file
while(character != '\n')
{
//Add into our data structure
if(nextNode->children[character] == NULL)
{
//Create memory inorder to insert the next node
nextNode->children[character] = malloc(sizeof(node));
nextNode = nextNode->children[character];
}else {
nextNode = nextNode->children[character];
}
//advance character to next
character = fgetc(file) - ASCII_OFFSET;
}
//advance character to next word
character = fgetc(file) - ASCII_OFFSET;
//Set the last node loaded to is_word to track the end of each word
nextNode->is_word = true;
wordCount++;
nextNode = rootNode;
}
fclose(file);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i < 26; i++)
{
if(nextNode->children[i] != NULL)
{
nextNode = nextNode->children[i];
unload();
}
}
free(nextNode);
return true;
}

You don't check that a char is in range before using it as index in the array. Any character outside the range a-z will cause buffer overflow.
You compare the character to known character constants after you have subtracted 97 from it.
You need to initialize the memory returned from malloc by assigning NULL to all array elements and set is_word to false.
Just took a short glance at the code, so I may have missed additional bugs.

Related

CS50 PSET 5 (Speller): How do I get my dictionary to correctly check one letter words?

I explored the possibility that i'm skipping the last node in a linked list, but I can't find the error.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
unsigned int COUNT = 0;
// TODO: Choose number of buckets in hash table
const unsigned int N = 7480;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int checkhash;
char lowerword[LENGTH + 1];
// convert word to lower case
for (int i = 0; i <= strlen(word); i++)
{
lowerword[i] = tolower(word[i]);
}
//get hash number
checkhash = hash(lowerword);
//check if it's in dictionary
node *trav = table[checkhash]; // traversal pointer
while (trav != NULL)
{
if (!strcasecmp(word, trav->word))
{
return true;
}
trav = trav->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
unsigned int hash_num = 0;
for (int i = 0; i < strlen(word); i += 3)
{
hash_num += word[i] * word[2];
hash_num += i + 3 * i;
}
hash_num += 3435;
return hash_num % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
// • Open dictionary file
FILE *dp = fopen(dictionary, "r");
// • Read strings from file one at a time
if (dp == NULL)
{
printf("Error: Could not open dictionary\n");
return false;
}
// declaring needed variables
char word[LENGTH + 1];
while (fscanf(dp, "%s", word) != EOF) // call fscanf untill EOF is returned
{
// • Create a new node for each word
// • Use malloc
// • Remember to check if return value is NULL
// • Copy word into node using strcpy
node *newnode = malloc(sizeof(node));
if (newnode == NULL)
{
unload();
printf("There is insufficient memory to load dictionary\n");
return false;
}
strcpy(newnode->word, word);
// • Hash word to obtain a hash value
// • Use the hash function
// • Function takes a string and returns an index
int index = hash(word);
// • Insert node into hash table at that location
if (table[index] == NULL)
{
table[index] = newnode;
newnode->next = NULL;
}
else
{
newnode->next = table[index];
table[index] = newnode;
}
COUNT++;
}
fclose(dp);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return COUNT;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
node *trav; // traversal pointers
node *tmp;
for (int i = 0; i < N; i++)
{
trav = table[i]; //next pointer in array
// recursively free list
while (trav != NULL)
{
tmp = trav;
trav = trav->next;
free(tmp);
}
}
// TODO
return true;
}
I have tried readjusting the check function for the last 2 hours with no luck. I don't think it is the hash function as I've parsed through it for a while without finding any logical errors. I'm nearly certain that the error will be found within check(). I can't wait to smack my forehead over a newbie error.
Here is the feedback I get from check50:
MISSPELLED WORDS
WORDS MISSPELLED: 0
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 1
Actual Output:
MISSPELLED WORDS
a
WORDS MISSPELLED: 1
WORDS IN DICTIONARY: 1
WORDS IN TEXT: 1

When I run the program, it shows "killed". How do I overcome this issue?

This is regarding pset5 speller, and below is my code. When I run it, it shows "killed" in my terminal. Moreover, when I try to debug it or try valgrind, the program just stops and exits. How do I overcome this issue?
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 150000;
// Hash table
node *table[N];
//Declare variables here so that they can be used in the different functions below
unsigned int HASH_INDEX;
unsigned int NO_OF_WORDS = 0;
node *CURSOR;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
HASH_INDEX = hash(word);
CURSOR = table[HASH_INDEX];
do
{
if (strcasecmp(CURSOR->word, word) == 0)
{
return true;
}
else
{
CURSOR = CURSOR->next;
}
}
while (CURSOR != NULL);
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
// close address, each bucket in the hash table is a pointer
unsigned int hash_value = 0;
for (int i = 0; i < strlen(word); i++)
{
int c = tolower(word[i]);
hash_value = hash_value + c;
}
hash_value = hash_value % 31;
return hash_value;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
// open dictionary file
//use fopen
//remember to check if return value is NULL, to make sure we can successfully open up the file. If can't open then return false
FILE *d = fopen(dictionary, "r");
if (d == NULL)
{
return false;
}
// read strings from file one at a time (one word in the dictionary at a time)
//fscanf(file, "%s", word) --> %s means want to read in a string, word is char array a place we reading the word into.
//need to keep looping this fscanf for every word in the dictionary until fscanf return EOF (once it reaches the end of file)
char word[LENGTH + 1];
while (fscanf(d, "%s", word) != EOF)
{
// create a new node for each word, a node that contains a value and the next pointer (this helps to store each word into the hash table)
//use malloc to store a new node
//remember to check if return value is NULL --> check whether malloc returns NULL or not becos if malloc does not have enough memory it will return NULL and our load function should return false
//copy word into node using strcpy --> it takes a string and copy it from one location into another location
node *w = malloc(sizeof(node));
if (w == NULL)
{
return false;
}
else
{
strcpy(w->word, word);
w->next = NULL;
// hash word using the hash function to obtain a hash value
//use the hash function defined in dictionary.c --> function takes a string and returns an index (to determine which index into the hash table we should use when inserting this node)
HASH_INDEX = hash(word);
// lastly, take each of those words and insert that node into the hash table at the location given by hash functon
//recall that hash table is an array of linked lists
//be sure to set pointers in the correct order --> point to the 2nd node first then shift the head to point to the new node
if (table[HASH_INDEX] == NULL)
{
table[HASH_INDEX] = w;
}
w->next = table[HASH_INDEX];
table[HASH_INDEX] = w;
NO_OF_WORDS++;
}
}
fclose(d);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return NO_OF_WORDS;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
for (int i = 0; i < N; i++)
{
CURSOR = table[i];
while (CURSOR != NULL)
{
node *tmp = CURSOR;
CURSOR = CURSOR->next;
free(tmp);
}
}
return true;
}
Would really appreciate it if someone could help me with this! Thank you in advance!

cs50 speller keeps prompting free(): double free detected in tcache 2

I have been working on this problem set for quite a time and the code seems to be wrong but I couldn't find the solution. I have been comparing my code and other people's code but I still don't know where I got wrong. Really appreciate all your help if you can provide me with some ways to solve this problem. It keeps prompting me free(): double free detected in tcache 2 but I can't seem to find my mistake.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 50;
// Hash table
node *table[N];
//word count
int count = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
bool found = false;
node *current = table[hash(word)];
while (current != NULL)
{
if (strcasecmp(current -> word, word) == 0)
{
found = true;
}
else if(current -> next != NULL)
{
current = current -> next;
}
else
{
return false;
}
}
return found;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO
unsigned long hash = 5381;
int c;
while ((c = toupper(*word++)))
{
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
FILE *infile = fopen(dictionary, "r");
if (infile == NULL)
{
return false;
}
char buffer[LENGTH+1];
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
fclose(infile);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return count;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
int num = count;
for (int i = 0; i < N ; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
num--;
}
}
if (num == 0)
{
return true;
}
else
{
return false;
}
}
The calls of free in this while loop
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
does not make a sense. You deleted at once (an object of the type node using the pointer n) what you was trying to add to the table (a valid address to an allocated object of the type node). As a result the element of the table at the position hash(buffer) that is set like
table[hash(buffer)] = n;
has an invalid value because it is the address of the already deleted node in this statement
free(n);
So in the function unload this invalid address will be again used to free already freed memory within the function load.
Pay attention to that you did not allocate memory as you wrote in a comment "for node n". n is just a pointer to the allocated unnamed object of the type node. So you are not freeing the pointer n itself in this statement
free(n);
You are freeing the allocated object of the type node using the pointer n. Thus all pointers that pointed to the allocated object of the type node become invalid.

I have a segmentation fault and am unsure about what is wrong with my code

Any guidance would be appreciated. I personally believe the problem lies in the load method. Also, the basic functionality of each method is written in the comments. What could be the cause of my segmentation fault? and Is everything working as intended? Thank you for your time.
Any resources that may point in me in the proper direction would be appreciated too.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include "dictionary.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <cs50.h>
//Defining node:
typedef struct node
{ //Inner workings of each "element" in the linked lists
char word[LENGTH + 1]; //the word within the node is +1'd due to the memory after the word containing /0
struct node *next; //linked list
}node;
node *alphabetList[27]; //26 buckets that can contain variables of type node(of dynamic size)
//one bucket for each letter of the alphabet
node *cursor = NULL;
node *head = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int bucketIndex ;
//no need to malloc information b/c we are simply pointing to previously established nodes.
if(word[0] >= 65 && word[0] < 97){
bucketIndex = word[0] - 65;
}
else{
bucketIndex = word[0] - 97;
}
node *head = alphabetList[bucketIndex];
node *cursor = head;
while(cursor != NULL)
{
cursor = cursor -> next;
if(strcmp(cursor -> word, word) != 0)
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
{
//for every word we scan we want to malloc a node to ascertain we have sufficent memory
node *new_node = malloc(sizeof(node));
if(new_node == NULL) //error check(if you run out of memory malloc will return null)
{
unload();
return false;
}
//error check complete.
else{
strcpy(new_node -> word, word);
}
//not sure from here on
char first_letter = new_node[i].word[0]; //first letter of node word (confused on how to execute this properly)
first_letter = tolower(first_letter);
int index = first_letter - 97;
if(word){
for(node *ptr = alphabetList[index]; ptr!= NULL; ptr = ptr->next)
{
if(!ptr-> next){
ptr->next = new_node;
}
}
}
else
{
alphabetList[index] = new_node;
}
i++;
}
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i <= 26; i++)
{
node *head = alphabetList[i];
node *cursor = head;
while(cursor != NULL)
{
node *temp = cursor;
cursor = cursor -> next;
free(temp);
}
}
return true;
}
The problem is obvious now you've said on which line the code crashes. Consider these lines...
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
You've got 2 problems there. Firstly, you don't check that the call to fopen worked. You should always check that the value returned is not NULL.
Secondly, and the cause of the crash, is that word is still NULL - you don't allocate any space to hold a string in it. You might as well declare it the same as you declare it inside node so replace
char *word = NULL;
with
char word[LENGTH+1];
Speaking of node and to save you coming back with another crash later, you should always make sure you initialise all attributes of a struct. In this case new_node->next should be set to NULL as otherwise you'll come to check it later in your for loop (which looks fine BTW) and it might appear to point to a node, but it's pointing at some random place in memory and the code will crash.

Pset5 implementation of Load using trie

I'm having some trouble in the pset5, I actually don't know how to start debugging, I've watched the lessons a few times now and I'm not getting anywhere..
When I run speller.c it is giving me a seg fault, I ran the debugger and it crashes at the beggining of the For Loop, here follows my code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// default dictionary
#define DICTIONARY "dictionaries/large"
//created the struct node
typedef struct node
{
bool is_word;
struct node * paths[27];
}
node;
int letter = 0;
char * word = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//todo
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//opens dictionary for reading
FILE *fp = fopen(DICTIONARY, "r");
if (fp == NULL)
{
return false;
unload();
}
//creates the root of the trie
node *root = malloc(sizeof(node));
root -> is_word = false;
node * trav = root;
char * word = NULL;
//start reading the file
while (fscanf(fp, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
//assing wich path to take
char c = fgetc(fp);
if (isupper(c))
{
letter = tolower (c);
letter = letter -'a';
}
else if (isalpha(c))
{
letter = c;
letter = letter -'a';
}
else if (c == '\'')
{
letter = 26;
}
else if (c == '\0')
{
trav -> is_word = true;
}
if (trav -> paths[letter] == NULL)
{
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
{
return false;
unload();
}
//point to new node
trav -> paths[letter] = new_node;
}
else
{
trav = trav -> paths[letter];
}
}
}
if (fscanf(fp, "%s", word) == EOF)
{
fclose(fp);
return true;
}
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
I also don't know how to point the new_node to the next new node and if I must have diferent names for them. For example, I'm going to store the word "foo", so I read the node called trav, go to the path[5] (the f letter), check if it is already opened, if not (if it's NULL) I create a node called new_node and point trav -> paths[5] to it, than I should update trav to be the new node, so I point it to it's own path[letter]?
word is a NULL pointer. And fscanf doesn't (can't really) allocate memory for that pointer to point to. So what happens when fscanf wants to dereference word to write the characters it reads? You can't dereference a NULL pointer, it leads to undefined behavior. I suggest you define word as an array instead.
Note : Answer taken from comments

Resources