CS50 Pset5 check() counting too many words as misspelled - c

I have loaded the dictionary into a tree structure and successfully gotten speller.c to compile with the following implementations of load() and check().
However, when I run the program, an incorrect number of words are counted as misspelled by my check() function. (In the case of lalaland.txt, it's 17187 words out of 17756).
I can't figure out what's wrong with my code and would be extremely grateful to anyone who could help point me in the right direction.
typedef struct node
{
bool isword;
struct node *children[27];
}
node;
node *root = NULL;
// Function returns the position of any given letter in the alphabet e.g. a = 1, b = 2 etc. Returns 0 for an apostrophe.
int index(char letter)
{
if (isalpha(letter))
{
int i = letter - 96;
return i;
}
return 0;
}
// Keeps track of number of words loaded into dictionary.
unsigned int wordno = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
char newword[LENGTH + 1];
node *temp = root;
for (int j = 0; j < strlen(word); j++)
{
//Makes each letter of the input lowercase and inserts it into a new array.
newword[j] = tolower(word[j]);
}
for (int i = 0; i < strlen(word); i++)
{
//Finds the position of the character in the alphabet by making a call to index().
int letter = index(newword[i]);
if (temp->children[letter] == NULL)
{
return false;
}
else
{
temp = temp->children[letter];
}
}
if (temp->isword == true)
{
return true;
}
return false;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
root = calloc(1, sizeof(node));
node *temp = root;
if (dict == NULL)
{
fprintf(stderr, "Could not load dictionary.\n");
return false;
}
char word[LENGTH+1];
while (fscanf(dict, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
int letter = index(word[i]);
if (temp->children[letter] == NULL)
{
temp->children[letter] = calloc(1, sizeof(node));
if ((temp->children[letter]) == NULL)
{
unload();
return false;
}
}
temp = temp->children[letter];
}
temp->isword = true;
wordno++;
}
return true;
}

node *temp = root;
should be placed inside this while loop:
while (fscanf(dict, "%s", word) != EOF)
By doing this, you allow temp to go back and point to the root node each time the loop begins iterating over a new word in the file.

Related

cs50 speller stuck!! Please tell me why i am getting Signal 11(SIGSEGV): dumping core

I am completely stuck with this segmentation fault. Can't even test if my code will actually do what's intended. Anyone can please help?
I am getting segmentation fault at the line: table[hashed] = n;
(Process terminating with default action of signal 11 (SIGSEGV): dumping core.
Bad permissions for mapped region at address.....)
Here is my code.Many Thanks!
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 1327;
// Hash table
node *table[sizeof(node) * N];
int loadedsize = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int hashed = hash(word);
node *cursor = table[hashed]->next;
while (cursor != NULL)
{
if(strcasecmp(word, cursor->word) == 0)
{
return true;
}
else
{
cursor = cursor->next;
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
int sum = 0;
for (int i = 1; i <= strlen(word); i++)
{
sum += (tolower(word[i]) * (15 - i));
}
sum = sum % 1327;
return sum;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
//setup variable for word
char s[LENGTH + 1];
//NULL table
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
//setup new node
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//open file
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
//read words and place node in table
while (fscanf(dict, "%s", s) != EOF)
{
int hashed = hash(s);
strcpy(n->word, s);
n->next = NULL;
if (table[hashed] == NULL)
{
table[hashed] = n;
}
else
{
n->next = table[hashed];
table[hashed] = n; <<<<THIS IS WHERE I GET DUMPING CORE>>>>
}
loadedsize += 1;
}
free(n);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return loadedsize;
}
```

cs50 speller problem hash table doesn't seem to form in load function

currently doing the speller problem of cs50 pset5.
I'm having a trouble with forming the hash table and i think it causes a segmentation fault later on when i try to run a function that searches the table.
this is the function that creates the hash table:
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r"); //opens dictionary file
if (dict == NULL) // if cant be opened loading failed
{
return false;
}
char w[LENGTH + 1]; //buffer (length is the maximum character number
int i = 0; //index within word
while(fscanf(dict, "%s", w) != EOF) // scanning the dictionary for words
{
int x = hash(w); //getting the number of the linked list within the table
node *n = malloc(sizeof(node)); //allocating memory for a new node
if (n == NULL)
{
return false;
}
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
n->next = table[x];
table[x] = n; //new node is the beginning of the linked list
dicsize++;
}
fclose(dict);
return true;
}
my main question is whether my code for forming the table is correct and if not then why
thank you in advance
and this is the entire code:
// Implements a dictionary's functionality
#include <string.h>
#include <strings.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 26;
int dicsize = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int x = hash(word);
node *cur = table[x];
while(table[x] != NULL)
{
if(strcasecmp(word, cur->word) == 0)
{
return true;
}
if(cur == NULL)
{
return false;
}
cur = cur->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
char temp = tolower(word[0]);
int place = (temp - 97);
return place;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r"); //opens dictionary file
if (dict == NULL) // if cant be opened loading failed
{
return false;
}
char w[LENGTH + 1]; //buffer
int i = 0; //index within word
while(fscanf(dict, "%s", w) != EOF) // scanning the dictionary for words
{
int x = hash(w); //getting the number of the linked list within the table
node *n = malloc(sizeof(node)); //allocating memory for a new node
if (n == NULL)
{
return false;
}
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
n->next = table[x];
table[x] = n; //new node is the beginning of the linked list
dicsize++;
}
fclose(dict);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
printf("%i", dicsize);
return dicsize;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
return false;
}
The likely culprit is
for (int j = 0; j < strlen(w) + 1; j++) // documenting the word within the new node
{
n->word[i] = w[j];
}
As i is not changing inside the loop, instead of copying the string it will dump all the chars into one. The other index should also be j. (as in n->word[j] = w[j];)
It would be better to do this as strcpy anyway.

Speller misspelled all words

All my words are misspelled when I run the program. It compiles but doesn't work as intended. I believe it has something to do with either the hash/check/or load function. I've checked it many times but can't figure out the problem. I've tried outputting the newWord variable to see if it returns a copy in all lower case but for some reason it doesn't print out. Maybe that means that all the values are NULL? So it might be a problem in my load function. I'll double check that again. Thanks in advance.
EDIT: Ran the debugger and I believe that the load function is returning correct values. I think it just has to be either my hash or check function.
#include <stdbool.h>
#include <strings.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
const unsigned int N = 17577;
// Hash table
node *table[N];
//words global variable
int words = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
int hashValue = hash(word);
if(hashValue == 17576){
return false;
}
node *cursor = table[hashValue];
printf("/%s", cursor->word);
//review my notes for this while loop
//basically tmp->next is pointing to the NEXT ITERATION
//and tmp is our CURRENT ITERATION
//so we want to check a value, if !true, continue
while(cursor->next != NULL){
if (strcasecmp(word, cursor->word) == 0){
return true;
}
else{
cursor = cursor->next;
}
}
return false;
}
unsigned int hash(const char *word)
{
const int ASCII_APOSTROPHE = 39;
int alphaIndex;
char *newWord = NULL;
//make it lowercase for easier read
for(int i = 0, n = strlen(word); i < n; i++){
if(isalpha(word[i]) == 1){
newWord[i] = tolower(word[i]);
}
else if(word[i] == ASCII_APOSTROPHE || isalpha(word[i]) == 2){
newWord[i] = word[i];
}
else{
//last index of array
return 17576;
}
}
if(strlen(newWord) >= 3){
alphaIndex = (word[0] - 97) + (word[1] - 97) + (word[2] - 97);
}
else if(strlen(newWord) == 2){
alphaIndex = (word[0] - 97) + (word[1] - 97);
}
else if(strlen(newWord) == 1){
alphaIndex = (word[0] - 97);
}
else{
alphaIndex = 0;
}
return alphaIndex;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE *file = fopen(dictionary, "r");
if(file == NULL){
printf("Could not open file!\n");
return false;
}
char *word = malloc(LENGTH + 1);
while(fscanf(file, "%s", word) != EOF){
node *n = malloc(sizeof(node));
if(n == NULL){
printf("Not enough memory!");
return 1;
}
strcpy(n->word, word);
n->next = table[hash(word)];
table[hash(word)] = n;
size();
}
free(word);
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
words++;
return words;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for(int i = 0; i < N; i++){
node *head = table[i];
if(head != NULL){
node *cursor = head->next;
node *deleteCursor = cursor;
while(cursor->next != NULL){
free(deleteCursor);
cursor = cursor->next;
deleteCursor = cursor;
}
}
}
return true;
}
There are two problems.
First one is that hash function is somehow running wrong. I did not debug it, I just replaced it with a very simple hash function (copied from https://stackoverflow.com/a/1469939/11000382).
unsigned int hash(const char *word)
{
char c = word[0];
int n = -1;
static const char *const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char *p = strchr(alphabet, toupper((unsigned char)c));
if (p)
{
n = p - alphabet;
}
return n;
}
After this, I checked with $ ./speller texts/lalaland.txt but I was getting still more mispelled words (1738). However, it should be 955. Then I reviewed the check function, and I spotted a small bug.
This,
while(cursor->next != NULL){
Should be this,
while(cursor != NULL){
Then, it finds 955 mispelled words for lalaland.txt.

CS50 Speller (PSET 5) not loading dictionary

I am working through the CS50 Speller problem and facing a problem that when running the program it returns an error of "Could not unload dictionaries/large."
I have looked at other people's solutions and can't for the life of me identify what is going wrong in my program. I am thinking it is in the has function, but have seen this has function in other people's working programs?
Any help would be greatly appreciated.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of words in dictionary
int word_count = 0;
// Number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
unsigned int n = hash(word);
node *cursor = table[n];
while (cursor != NULL)
{
if (strcasecmp(word, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
// Hashes word to a number
// Function credit to delipity(staff) on CS50 reddit page
unsigned int hash(const char *word)
{
unsigned int hash_value = 0;
for (int i = 0, n = strlen(word); i < n; i++)
{
hash_value = (hash_value << 2) ^ word[i];
}
return hash_value % N; //N is size of hashtable
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Open dictionary and check for memory issue
// Open dictionary file and create word array
FILE *dict = fopen(dictionary, "r");
char word[LENGTH + 1];
// Check for memory issue with dict
if(dict == NULL)
{
printf("Dictionary is null\n");
unload();
return false;
}
// Read string 1 word at a time
while (fscanf(dict, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n -> word, word);
word_count++;
// Index word using hash function
int dict_index = hash(word);
// Insert into hash table if already empty
if (table[dict_index] == NULL)
{
n -> next = NULL;
}
// Insert work as new node if not empyty
else
{
n -> next = table[dict_index];
}
table[dict_index] = n;
}
// Close dictionary file
fclose(dict);
// Indicate success
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return word_count;
return 0;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
node *tmp = cursor;
while (cursor != NULL)
{
cursor = cursor -> next;
free(tmp);
tmp = cursor;
}
}
return false;
}
bool unload(void)
{
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor)
{
node *temp = cursor;
cursor = cursor->next;
free(temp);
}
}
return true;
}
Try this instead for the unload function

Segmentation Fault in Trie implementation in C

I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.

Resources