Pset5 implementation of Load using trie - c

I'm having some trouble in the pset5, I actually don't know how to start debugging, I've watched the lessons a few times now and I'm not getting anywhere..
When I run speller.c it is giving me a seg fault, I ran the debugger and it crashes at the beggining of the For Loop, here follows my code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// default dictionary
#define DICTIONARY "dictionaries/large"
//created the struct node
typedef struct node
{
bool is_word;
struct node * paths[27];
}
node;
int letter = 0;
char * word = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//todo
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//opens dictionary for reading
FILE *fp = fopen(DICTIONARY, "r");
if (fp == NULL)
{
return false;
unload();
}
//creates the root of the trie
node *root = malloc(sizeof(node));
root -> is_word = false;
node * trav = root;
char * word = NULL;
//start reading the file
while (fscanf(fp, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
//assing wich path to take
char c = fgetc(fp);
if (isupper(c))
{
letter = tolower (c);
letter = letter -'a';
}
else if (isalpha(c))
{
letter = c;
letter = letter -'a';
}
else if (c == '\'')
{
letter = 26;
}
else if (c == '\0')
{
trav -> is_word = true;
}
if (trav -> paths[letter] == NULL)
{
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
{
return false;
unload();
}
//point to new node
trav -> paths[letter] = new_node;
}
else
{
trav = trav -> paths[letter];
}
}
}
if (fscanf(fp, "%s", word) == EOF)
{
fclose(fp);
return true;
}
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
I also don't know how to point the new_node to the next new node and if I must have diferent names for them. For example, I'm going to store the word "foo", so I read the node called trav, go to the path[5] (the f letter), check if it is already opened, if not (if it's NULL) I create a node called new_node and point trav -> paths[5] to it, than I should update trav to be the new node, so I point it to it's own path[letter]?

word is a NULL pointer. And fscanf doesn't (can't really) allocate memory for that pointer to point to. So what happens when fscanf wants to dereference word to write the characters it reads? You can't dereference a NULL pointer, it leads to undefined behavior. I suggest you define word as an array instead.
Note : Answer taken from comments

Related

CS50 Pset5 Speller, Check50 says that my code is all correct, but its not right when I test my code

I have been working on the Speller assignment in pset5, and when I run the check50 command, everything seems to be fine.
:) dictionary.c exists
:) speller compiles
:) handles most basic words properly
:) handles min length (1-char) words
:) handles max length (45-char) words
:) handles words with apostrophes properly
:) spell-checking is case-insensitive
:) handles substrings properly
:) program is free of memory errors
But when I start testing the program myself by running "./speller texts/lalaland.txt " This happens
ps5/speller/ $ ./speller texts/lalaland.txt
Segmentation fault (core dumped)
Here is my code, code anyone help spot what is wrong with this.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 676;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int number = hash(word);
node *current = table[number];
while (current != NULL)
{
if (strcasecmp(word, current -> word) == 0)
{
return true;
}
current = current -> next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
int a = toupper(word[0]) - 'A';
int b = toupper(word[1]) - 'A';
return (a * b);
}
int counter;
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
//Open the file and check if it actually exists
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Unable to open file\n");
return false;
}
for (int i = 0; i < N; i++)
{
table [i] = malloc(sizeof(node));
table [i]-> next = NULL;
for (int u = 0; u < 48 ; u++)
{
table [i]-> word[u] = '0';
}
}
char buffer[LENGTH + 1];
//Read all the individual strings from the file
while (fscanf(file, "%s", buffer) != EOF)
{
//Create nodes and copy the words into them
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
strcpy(current_node -> word, buffer);
int hashnumber = hash(buffer);
node *pointer = table[hashnumber];
if (pointer == NULL)
{
table[hashnumber] = current_node;
counter++;
}
else
{
current_node -> next = table[hashnumber];
table[hashnumber] = current_node;
counter++;
}
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < 676; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
}
}
return true;
}
Try this:
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
current_node->next = NULL; // Missing, and presumed NULL..
strcpy(current_node->word, buffer);
It seemed like once I initialized the variable as NULL and made changes to the hash function. I checked through the other files and it worked now. Thanks!

I have a segmentation fault and am unsure about what is wrong with my code

Any guidance would be appreciated. I personally believe the problem lies in the load method. Also, the basic functionality of each method is written in the comments. What could be the cause of my segmentation fault? and Is everything working as intended? Thank you for your time.
Any resources that may point in me in the proper direction would be appreciated too.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include "dictionary.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <cs50.h>
//Defining node:
typedef struct node
{ //Inner workings of each "element" in the linked lists
char word[LENGTH + 1]; //the word within the node is +1'd due to the memory after the word containing /0
struct node *next; //linked list
}node;
node *alphabetList[27]; //26 buckets that can contain variables of type node(of dynamic size)
//one bucket for each letter of the alphabet
node *cursor = NULL;
node *head = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int bucketIndex ;
//no need to malloc information b/c we are simply pointing to previously established nodes.
if(word[0] >= 65 && word[0] < 97){
bucketIndex = word[0] - 65;
}
else{
bucketIndex = word[0] - 97;
}
node *head = alphabetList[bucketIndex];
node *cursor = head;
while(cursor != NULL)
{
cursor = cursor -> next;
if(strcmp(cursor -> word, word) != 0)
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
{
//for every word we scan we want to malloc a node to ascertain we have sufficent memory
node *new_node = malloc(sizeof(node));
if(new_node == NULL) //error check(if you run out of memory malloc will return null)
{
unload();
return false;
}
//error check complete.
else{
strcpy(new_node -> word, word);
}
//not sure from here on
char first_letter = new_node[i].word[0]; //first letter of node word (confused on how to execute this properly)
first_letter = tolower(first_letter);
int index = first_letter - 97;
if(word){
for(node *ptr = alphabetList[index]; ptr!= NULL; ptr = ptr->next)
{
if(!ptr-> next){
ptr->next = new_node;
}
}
}
else
{
alphabetList[index] = new_node;
}
i++;
}
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i <= 26; i++)
{
node *head = alphabetList[i];
node *cursor = head;
while(cursor != NULL)
{
node *temp = cursor;
cursor = cursor -> next;
free(temp);
}
}
return true;
}
The problem is obvious now you've said on which line the code crashes. Consider these lines...
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
You've got 2 problems there. Firstly, you don't check that the call to fopen worked. You should always check that the value returned is not NULL.
Secondly, and the cause of the crash, is that word is still NULL - you don't allocate any space to hold a string in it. You might as well declare it the same as you declare it inside node so replace
char *word = NULL;
with
char word[LENGTH+1];
Speaking of node and to save you coming back with another crash later, you should always make sure you initialise all attributes of a struct. In this case new_node->next should be set to NULL as otherwise you'll come to check it later in your for loop (which looks fine BTW) and it might appear to point to a node, but it's pointing at some random place in memory and the code will crash.

Pset 5 Speller Trie segmentation fault in unload

I have implemented load, check, and unload for dictionary.c using trie, and I am suffering a segmentation fault. The error happens inside load function, or, to be more precise, inside the function called un_node, a recursive function I call in unload. The line if(cursor -> children[a] != NULL) pops up when I run debug50. Although I did not malloc my root/head node as a global (I just wrote node *root;), isn't the memory allocated in load function extant?
I did check whether I null pointed every children of next_node and root after mallocking them.
Regarding the un_node function, I am not sure whether I should free(cursor); return; inside the for loop or outside the for loop.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
/**
* Returns true if word is in dictionary else false.
*/
//create a trie data type
typedef struct node
{
bool is_word;
struct node *children[27]; //this is a pointer too!
}node;
//function prototype
void un_node (node *node_name);
void nullpoint (node *node_name);
//initialize root
node *root;
bool check(const char *word)
{
//create a trav pointer
node *cursor = root;
int i = 0;
while(word[i] != '\0')
{
char ch = word[i];
int index = (tolower(ch) - 97);
if(index == -58)
{
index = 26;
}
//validate index
if(index < 0 || index > 26)
{
printf("Error: index\n");
return false;
}
if(cursor -> children[index] != NULL)
{
cursor = cursor -> children[index];
i++;
}
else
{
//if it is NULL then word is not in dictionary
return false;
}
}
//end of word, so check if there is a flag from load
if(cursor -> is_word == true)
{
return true;
}
else
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//malloc space for root node
root = malloc(sizeof(node));
//all of root's children point to NULL now
nullpoint (root);
//open dictionary
FILE *dptr = fopen(dictionary, "r");
if(dptr == NULL)
{
return false;
}
char *c = malloc(sizeof(char));
node *next_node;
//scan the file char by char until end and store it in c
while(fscanf(dptr,"%s",c) != EOF)
{
//in the beginning of every word, make a traversal pointer copy of root so we can always refer back to root
node *trav = root;
//when temp increments, it moves on to next character of word
char *temp;
temp = c;
//repeat for every word
while ((*temp) != '\0')
{
//convert char into array index
int alpha = (tolower(*temp) - 97);
//handle apostrophe
if(alpha == -58)
{
alpha = 26;
}
//validate alpha
if(alpha < 0 || alpha > 26)
{
printf("Error: alpha\n");
return false;
}
//if array element is pointing to NULL, i.e. it hasn't been open yet,
if(trav -> children[alpha] == NULL)
{
//then malloc next node and point it with the cursor.
next_node = malloc(sizeof(node));
//initialize children of newly allocated node
nullpoint(next_node);
//cursor points at the newly allocated memory
trav -> children[alpha] = next_node;
//cusor moves on
trav = trav -> children[alpha];//null?
//quit if malloc returns null
if(next_node == NULL)
{
printf("Could not open dictionary");
return false;
}
}
else
{
//if an already existing path, just go to it
trav = trav -> children[alpha];
}
//increment the address of temp variable
temp++;
}
//a word is loaded.
trav -> is_word = true;
}
//success
fclose(dptr);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
node *cursor = root;
un_node(cursor);
return true;
}
void un_node (node *cursor)
{
for(int a = 0; a<27; a++)
{
//if the children's pointee is not NULL, i.e. this is not a dead end
if(cursor -> children[a] != NULL)
{
//go back to the beginning (with cursor -> children[a] as new argument) restart this function
un_node(cursor -> children[a]);
}
}
//and when it is dead end, start to
free(cursor);
return;
}
//function that points all children of node to NULL
void nullpoint (node *node_name)
{
for(int t=0;t<27;t++)
{
node_name -> children[t] = NULL;
}
}

Segmentation Fault in Trie implementation in C

I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.

Loading a trie data structure in C- pset5 cs50

I am having trouble loading data into my trie structure. I keep getting a sgemenation fault. Does this have to do with my malloc? Anyone see any issues?
Thanks
/**
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
#define ASCII_OFFSET 97;
/** Node of the data structure used to store the dictionary key words
* Data Structures Type is Tries
* Includes a bool to indicate if the current node is the end of a word
* A pointer to the nodes child node
*/
typedef struct node
{
bool is_word;
struct node* children[27];
}
node;
node* rootNode;
node* nextNode;
//Variable to track the size of the dictinoary
int wordCount = 0;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
//Get words length
int wordLength = strlen(word);
for(int i = 0; i < wordLength; i++)
{
//taking the character we need to check
int charToCheck = tolower(word[i]) - ASCII_OFFSET;
//Checking to see if the char exists in the data strucutre, Trie;
if(nextNode->children[charToCheck] == NULL)
{
return false;
}
//Advance the next node down the trie
nextNode = nextNode->children[charToCheck];
}
nextNode = rootNode;
//Return what is_word return
return nextNode->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
//Open dict.. file to read
FILE* file = fopen(dictionary,"r");
//Check if the dict.. exsists!
if(file == NULL)
{
return false;
}
//Creating the first node in our data strucutre
rootNode = malloc(sizeof(node));
nextNode = rootNode;
//Get character to store
int character = fgetc(file) - ASCII_OFFSET;
//Go through the dict... file
while(character != EOF)
{
//Go through each word in the file
while(character != '\n')
{
//Add into our data structure
if(nextNode->children[character] == NULL)
{
//Create memory inorder to insert the next node
nextNode->children[character] = malloc(sizeof(node));
nextNode = nextNode->children[character];
}else {
nextNode = nextNode->children[character];
}
//advance character to next
character = fgetc(file) - ASCII_OFFSET;
}
//advance character to next word
character = fgetc(file) - ASCII_OFFSET;
//Set the last node loaded to is_word to track the end of each word
nextNode->is_word = true;
wordCount++;
nextNode = rootNode;
}
fclose(file);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i < 26; i++)
{
if(nextNode->children[i] != NULL)
{
nextNode = nextNode->children[i];
unload();
}
}
free(nextNode);
return true;
}
You don't check that a char is in range before using it as index in the array. Any character outside the range a-z will cause buffer overflow.
You compare the character to known character constants after you have subtracted 97 from it.
You need to initialize the memory returned from malloc by assigning NULL to all array elements and set is_word to false.
Just took a short glance at the code, so I may have missed additional bugs.

Resources