I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.
Related
I have been working on the Speller assignment in pset5, and when I run the check50 command, everything seems to be fine.
:) dictionary.c exists
:) speller compiles
:) handles most basic words properly
:) handles min length (1-char) words
:) handles max length (45-char) words
:) handles words with apostrophes properly
:) spell-checking is case-insensitive
:) handles substrings properly
:) program is free of memory errors
But when I start testing the program myself by running "./speller texts/lalaland.txt " This happens
ps5/speller/ $ ./speller texts/lalaland.txt
Segmentation fault (core dumped)
Here is my code, code anyone help spot what is wrong with this.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 676;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int number = hash(word);
node *current = table[number];
while (current != NULL)
{
if (strcasecmp(word, current -> word) == 0)
{
return true;
}
current = current -> next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
int a = toupper(word[0]) - 'A';
int b = toupper(word[1]) - 'A';
return (a * b);
}
int counter;
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
//Open the file and check if it actually exists
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Unable to open file\n");
return false;
}
for (int i = 0; i < N; i++)
{
table [i] = malloc(sizeof(node));
table [i]-> next = NULL;
for (int u = 0; u < 48 ; u++)
{
table [i]-> word[u] = '0';
}
}
char buffer[LENGTH + 1];
//Read all the individual strings from the file
while (fscanf(file, "%s", buffer) != EOF)
{
//Create nodes and copy the words into them
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
strcpy(current_node -> word, buffer);
int hashnumber = hash(buffer);
node *pointer = table[hashnumber];
if (pointer == NULL)
{
table[hashnumber] = current_node;
counter++;
}
else
{
current_node -> next = table[hashnumber];
table[hashnumber] = current_node;
counter++;
}
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < 676; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
}
}
return true;
}
Try this:
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
current_node->next = NULL; // Missing, and presumed NULL..
strcpy(current_node->word, buffer);
It seemed like once I initialized the variable as NULL and made changes to the hash function. I checked through the other files and it worked now. Thanks!
I'm was finished with cs50 pset5 speller(or so I thought), when I ran my complete program for the first time. It started printing out some words, so I was like: "Bingo! I'm done!" and I was seriously happy because I've been working on it for weeks. But then, while it was going through the words, I noticed some words like the word 'and' printed out when they were in the dictionary. So I thought that I had a small bug in my code. But when it came to the end of the list, It didn't do the 'Words misspelled:' Part and the other parts. It didn't even make a new line starting with $~. I had to close up that entire terminal because I couldn't do anything else there since it was like this:
OUT
THE
END
Those words were the last words, and then there's just an empty line. Then I opened a new terminal and checked my answers with the staff's answers, and SO much was wrong! I tried to find some bugs in my code, but I couldn't find any. If you can find some and tell me, I REALLY appreciate it. Here's my code:
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include "dictionary.h"
#include <strings.h>
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 19683;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
unsigned int lol = hash(word);
// TODO
int i;
node *cursor =table[lol];;
while(cursor != NULL)
{
if(strcasecmp(word, cursor -> word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
int count = 0;
// TODO
for(int i = 96; i < 122; i++)
{
for(int j = 96; j < 122; j++)
{
for(int d = 96; d < 122; d++)
{
count++;
char firstletter = i;
char secondletter = j;
char thirdletter = d;
if(word[0] == firstletter&& word[1] == secondletter && word[2] == thirdletter)
{
return count;
}
}
}
}
return 1;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Open dictionary and check for memory issue
// Open dictionary file and create word array
FILE *dict = fopen(dictionary, "r");
char word[LENGTH + 1];
// Check for memory issue with dict
if(dict == NULL)
{
printf("Dictionary is null\n");
unload();
return false;
}
// Read string 1 word at a time
while (fscanf(dict, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n -> word, word);
// Index word using hash function
int dict_index = hash(word);
// Insert into hash table if already empty
if (table[dict_index] == NULL)
{
n -> next = NULL;
}
// Insert work as new node if not empyty
else
{
n -> next = table[dict_index];
}
table[dict_index] = n;
}
// Close dictionary file
fclose(dict);
// Indicate success
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
int count = 0;
// TODO
for(int i = 0; i < N; i++)
{
while(table[i] != NULL)
{
node* cursor = table[i];
count++;
cursor = cursor -> next;
}
}
return count;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
// TODO
for(int i = 0; i < N; i++)
{
node *cursor = table[i];
while(cursor)
{
node *temp = cursor;
cursor = cursor -> next;
if(temp != NULL)
{
return true;
}
free(temp);
}
}
return 1;
}
Thanks a lot,
Lost in code.
hash is not case sensitive. Any word in the text with a capital letter will go in index 1.
unload has an infinte loop here while(table[i] != NULL) because table[i] never changes
Fixing these two issues should produce progress, though maybe not total success.
NB ctrl-c should end a program that is stuck in a loop without needing to close the terminal.
I'm having some trouble in the pset5, I actually don't know how to start debugging, I've watched the lessons a few times now and I'm not getting anywhere..
When I run speller.c it is giving me a seg fault, I ran the debugger and it crashes at the beggining of the For Loop, here follows my code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// default dictionary
#define DICTIONARY "dictionaries/large"
//created the struct node
typedef struct node
{
bool is_word;
struct node * paths[27];
}
node;
int letter = 0;
char * word = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//todo
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//opens dictionary for reading
FILE *fp = fopen(DICTIONARY, "r");
if (fp == NULL)
{
return false;
unload();
}
//creates the root of the trie
node *root = malloc(sizeof(node));
root -> is_word = false;
node * trav = root;
char * word = NULL;
//start reading the file
while (fscanf(fp, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
//assing wich path to take
char c = fgetc(fp);
if (isupper(c))
{
letter = tolower (c);
letter = letter -'a';
}
else if (isalpha(c))
{
letter = c;
letter = letter -'a';
}
else if (c == '\'')
{
letter = 26;
}
else if (c == '\0')
{
trav -> is_word = true;
}
if (trav -> paths[letter] == NULL)
{
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
{
return false;
unload();
}
//point to new node
trav -> paths[letter] = new_node;
}
else
{
trav = trav -> paths[letter];
}
}
}
if (fscanf(fp, "%s", word) == EOF)
{
fclose(fp);
return true;
}
return false;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
I also don't know how to point the new_node to the next new node and if I must have diferent names for them. For example, I'm going to store the word "foo", so I read the node called trav, go to the path[5] (the f letter), check if it is already opened, if not (if it's NULL) I create a node called new_node and point trav -> paths[5] to it, than I should update trav to be the new node, so I point it to it's own path[letter]?
word is a NULL pointer. And fscanf doesn't (can't really) allocate memory for that pointer to point to. So what happens when fscanf wants to dereference word to write the characters it reads? You can't dereference a NULL pointer, it leads to undefined behavior. I suggest you define word as an array instead.
Note : Answer taken from comments
I have implemented load, check, and unload for dictionary.c using trie, and I am suffering a segmentation fault. The error happens inside load function, or, to be more precise, inside the function called un_node, a recursive function I call in unload. The line if(cursor -> children[a] != NULL) pops up when I run debug50. Although I did not malloc my root/head node as a global (I just wrote node *root;), isn't the memory allocated in load function extant?
I did check whether I null pointed every children of next_node and root after mallocking them.
Regarding the un_node function, I am not sure whether I should free(cursor); return; inside the for loop or outside the for loop.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
/**
* Returns true if word is in dictionary else false.
*/
//create a trie data type
typedef struct node
{
bool is_word;
struct node *children[27]; //this is a pointer too!
}node;
//function prototype
void un_node (node *node_name);
void nullpoint (node *node_name);
//initialize root
node *root;
bool check(const char *word)
{
//create a trav pointer
node *cursor = root;
int i = 0;
while(word[i] != '\0')
{
char ch = word[i];
int index = (tolower(ch) - 97);
if(index == -58)
{
index = 26;
}
//validate index
if(index < 0 || index > 26)
{
printf("Error: index\n");
return false;
}
if(cursor -> children[index] != NULL)
{
cursor = cursor -> children[index];
i++;
}
else
{
//if it is NULL then word is not in dictionary
return false;
}
}
//end of word, so check if there is a flag from load
if(cursor -> is_word == true)
{
return true;
}
else
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//malloc space for root node
root = malloc(sizeof(node));
//all of root's children point to NULL now
nullpoint (root);
//open dictionary
FILE *dptr = fopen(dictionary, "r");
if(dptr == NULL)
{
return false;
}
char *c = malloc(sizeof(char));
node *next_node;
//scan the file char by char until end and store it in c
while(fscanf(dptr,"%s",c) != EOF)
{
//in the beginning of every word, make a traversal pointer copy of root so we can always refer back to root
node *trav = root;
//when temp increments, it moves on to next character of word
char *temp;
temp = c;
//repeat for every word
while ((*temp) != '\0')
{
//convert char into array index
int alpha = (tolower(*temp) - 97);
//handle apostrophe
if(alpha == -58)
{
alpha = 26;
}
//validate alpha
if(alpha < 0 || alpha > 26)
{
printf("Error: alpha\n");
return false;
}
//if array element is pointing to NULL, i.e. it hasn't been open yet,
if(trav -> children[alpha] == NULL)
{
//then malloc next node and point it with the cursor.
next_node = malloc(sizeof(node));
//initialize children of newly allocated node
nullpoint(next_node);
//cursor points at the newly allocated memory
trav -> children[alpha] = next_node;
//cusor moves on
trav = trav -> children[alpha];//null?
//quit if malloc returns null
if(next_node == NULL)
{
printf("Could not open dictionary");
return false;
}
}
else
{
//if an already existing path, just go to it
trav = trav -> children[alpha];
}
//increment the address of temp variable
temp++;
}
//a word is loaded.
trav -> is_word = true;
}
//success
fclose(dptr);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
node *cursor = root;
un_node(cursor);
return true;
}
void un_node (node *cursor)
{
for(int a = 0; a<27; a++)
{
//if the children's pointee is not NULL, i.e. this is not a dead end
if(cursor -> children[a] != NULL)
{
//go back to the beginning (with cursor -> children[a] as new argument) restart this function
un_node(cursor -> children[a]);
}
}
//and when it is dead end, start to
free(cursor);
return;
}
//function that points all children of node to NULL
void nullpoint (node *node_name)
{
for(int t=0;t<27;t++)
{
node_name -> children[t] = NULL;
}
}
I am having trouble loading data into my trie structure. I keep getting a sgemenation fault. Does this have to do with my malloc? Anyone see any issues?
Thanks
/**
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
#define ASCII_OFFSET 97;
/** Node of the data structure used to store the dictionary key words
* Data Structures Type is Tries
* Includes a bool to indicate if the current node is the end of a word
* A pointer to the nodes child node
*/
typedef struct node
{
bool is_word;
struct node* children[27];
}
node;
node* rootNode;
node* nextNode;
//Variable to track the size of the dictinoary
int wordCount = 0;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
//Get words length
int wordLength = strlen(word);
for(int i = 0; i < wordLength; i++)
{
//taking the character we need to check
int charToCheck = tolower(word[i]) - ASCII_OFFSET;
//Checking to see if the char exists in the data strucutre, Trie;
if(nextNode->children[charToCheck] == NULL)
{
return false;
}
//Advance the next node down the trie
nextNode = nextNode->children[charToCheck];
}
nextNode = rootNode;
//Return what is_word return
return nextNode->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
//Open dict.. file to read
FILE* file = fopen(dictionary,"r");
//Check if the dict.. exsists!
if(file == NULL)
{
return false;
}
//Creating the first node in our data strucutre
rootNode = malloc(sizeof(node));
nextNode = rootNode;
//Get character to store
int character = fgetc(file) - ASCII_OFFSET;
//Go through the dict... file
while(character != EOF)
{
//Go through each word in the file
while(character != '\n')
{
//Add into our data structure
if(nextNode->children[character] == NULL)
{
//Create memory inorder to insert the next node
nextNode->children[character] = malloc(sizeof(node));
nextNode = nextNode->children[character];
}else {
nextNode = nextNode->children[character];
}
//advance character to next
character = fgetc(file) - ASCII_OFFSET;
}
//advance character to next word
character = fgetc(file) - ASCII_OFFSET;
//Set the last node loaded to is_word to track the end of each word
nextNode->is_word = true;
wordCount++;
nextNode = rootNode;
}
fclose(file);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i < 26; i++)
{
if(nextNode->children[i] != NULL)
{
nextNode = nextNode->children[i];
unload();
}
}
free(nextNode);
return true;
}
You don't check that a char is in range before using it as index in the array. Any character outside the range a-z will cause buffer overflow.
You compare the character to known character constants after you have subtracted 97 from it.
You need to initialize the memory returned from malloc by assigning NULL to all array elements and set is_word to false.
Just took a short glance at the code, so I may have missed additional bugs.