Why does the hash function give a segmentation fault? - c

I was trying to make a spell checker program where I first have to load a dictionary into memory. To do so, I tried using a hash table. The program shows a segmentation fault when I use the hash function for the hash table.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 6536;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
//TODO
return false;
}
// Hashes word to a number
unsigned int hash(const char *word) //Hashed using djb2
{
unsigned long hash = 5381;
int c = 0;
while (c == *word++)
{
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
char *str = NULL;
FILE *dict = fopen(dictionary,"r");
node *temp = NULL;
if(dict == NULL)
{
//printf("Error: \n", strerror(errno));
return false;
}
for(int i = 0; i < N; i++)
{
table[i] = NULL;
}
while(fscanf(dict, "%s", str) != 1)
{
unsigned int key = hash(str);
temp = malloc(sizeof(node));
if(temp == NULL)
{
return false;
}
if(table[key] != NULL)
{
temp->next = table[key]->next;
strcpy(temp->word, str);
table[key]->next = temp;
free(temp);
}
else
{
table[key] = malloc(sizeof(node));
table[key]->next = NULL;
strcpy(table[key]->word, str);
}
}
fclose(dict);
printf("SUCCESS\n");
return true;
}
The debugger shows the seg. fault occuring at unsigned int key = hash(str);. I'd like to know how this can be fixed.

Try
char str[MAX_LEN];
instead of
char *str = NULL;
(after defining MAX_LEN as appopriate for your application).
As M Oehm pointed out in a comment, I think you might be interpreting the return value of fscanf() incorrectly also.

Related

Segmentation fault in CS50 Speller. Why?

I am working on the CS50 pset5 Speller, and I keep getting a segmentation fault error. Debug50 suggests the problem is the line n->next = table[index]; in the implementation of the load function, line 110. I tried to revise but I canĀ“t figure out why it would give error. Here below my code, can anyone please help me?
// Implements a dictionary's functionality
#include <stdbool.h>
#include <strings.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
const unsigned int N = 150000;
// Nodes counter
int nodes_counter = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int hash_value = hash(word);
node *cursor = malloc(sizeof(node));
if (cursor != NULL)
{
cursor = table[hash_value];
}
if (strcasecmp(cursor->word, word) == 0) // If word is first item in linked list
{
return 0;
}
else // Iterate over the list by moving the cursor
{
while (cursor->next != NULL)
{
if (strcasecmp(cursor->word, word) == 0) // If word is found
{
return 0;
}
else
{
cursor = cursor->next;
}
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
const unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
unsigned int i = 0;
for (i = 0; i < strlen(word); i++)
{
hash *= fnv_prime;
hash ^= (*word);
}
return hash;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// Open Dictionary File (argv[1] or dictionary?)
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Could not open file\n");
return 1;
}
// Read until end of file word by word (store word to read in word = (part of node)?)
char word[LENGTH + 1];
while(fscanf(file, "%s", word) != EOF)
{
// For each word, create a new node
node *n = malloc(sizeof(node));
if (n != NULL)
{
strcpy(n->word, word);
//Omitted to avoid segmentation fault n->next = NULL;
nodes_counter++;
}
else
{
return 2;
}
// Call hash function (input: word --> output: int)
int index = hash(word);
// Insert Node into Hash Table
n->next = table[index];
table[index] = n;
}
return false;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// Return number of nodes created in Load
if (nodes_counter > 0)
{
return nodes_counter;
}
return 0;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
for (int i = 0; i < N; i++)
{
node *cursor = table[i];
while (cursor->next != NULL)
{
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
}
return false;
}
There are multiple problems in your code:
node *table[N]; cannot be only be defined as a global object if N is a constant expression. N is defined as a const unsigned int, but N is not a constant expression in C (albeit it is in C++). Your program compiles only because the compiler accepts this as a non portable extension. Either use a macro or an enum.
you overwrite cursor as soon as it is allocated in check(). There is no need to allocate a node in this function.
the hash() function should produce the same hash for words that differ only in case.
the hash() function only uses the first letter in word.
the hash() function can return a hash value >= N.
fscanf(file, "%s", word) should be protected agains buffer overflow.
you do not check if cursor is non null before dereferencing it in unload()
Here is a modified version:
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
// Number of buckets in hash table
enum { N = 150000 };
// Nodes counter
int nodes_counter = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word) {
int hash_value = hash(word);
// Iterate over the list by moving the cursor
for (node *cursor = table[hash_value]; cursor; cursor = cursor->next) {
if (strcasecmp(cursor->word, word) == 0) {
// If word is found
return true;
}
}
// If word is not found
return false;
}
// Hashes word to a number
unsigned int hash(const char *word) {
// Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
for (unsigned int i = 0; word[i] != '\0'; i++) {
hash *= fnv_prime;
hash ^= toupper((unsigned char)word[i]);
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else a negative error number
int load(const char *dictionary) {
// Open Dictionary File (argv[1] or dictionary?)
FILE *file = fopen(dictionary, "r");
if (file == NULL) {
printf("Could not open file\n");
return -1;
}
// Read until end of file word by word (store word to read in word = (part of node)?)
char word[LENGTH + 1];
char format[10];
// construct the conversion specifier to limit the word size
// read by fscanf()
snprintf(format, sizeof format, "%%%ds", LENGTH);
while (fscanf(file, format, word) == 1) {
// For each word, create a new node
node *n = malloc(sizeof(node));
if (n == NULL) {
fclose(file);
return -2;
}
strcpy(n->word, word);
n->next = NULL;
nodes_counter++;
// Call hash function (input: word --> output: int)
int index = hash(word);
// Insert Node into Hash Table
n->next = table[index];
table[index] = n;
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void) {
// Return number of nodes created in Load
return nodes_counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void) {
for (int i = 0; i < N; i++) {
node *cursor = table[i];
table[i] = NULL;
while (cursor != NULL) {
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
}
return true;
}

Cs50 pset5 speller segmentation, memory errors

I'm doing the speller program and for the past few days I feel like I'm going in circles, receiving the same problems over and over again. Now, the error is free(): "invalid pointer
Aborted", most likely refering to the hash function, in which I use calloc,yet I can't understand what am I doing wrong there. Most likely there are multiple mistakes in my other functions, so tips regarding them would be very appreciated. I'll post the entire program, so I don't have to send snippets of it later. Completely lost here.
Ok, thanks to Tim Randal's answer the free() error looks fixed, however a segmentation error mentioned previously took his place.
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dictionary.h"
#include <strings.h>
#include <ctype.h>
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 1000;
// Number of words loaded into the dictionary
int counter = 0;
// Hash table
node *table[N] = {0};
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
size_t len = strlen(word);
char *lower = calloc(len+1, sizeof(char));
for (size_t i = 0; i < len; ++i) {
lower[i] = tolower((unsigned char)word[i]);
}
node *cursor = NULL;
cursor = table[hash(lower)];
while (cursor != NULL)
{
if (strcasecmp(cursor->word,lower) == 0)
{
free(lower);
return true;
}
else
{
cursor = cursor->next;
}
}
free(lower);
return false;
}
// Hashes word to a number
//hash function djb2 by Dan Bernstein
unsigned int hash(const char *word)
{
size_t len = strlen(word);
char *lower = calloc(len+1, sizeof(char));
for (size_t i = 0; i < len; ++i)
{
lower[i] = tolower((unsigned char)word[i]);
}
unsigned long hash = 5381;
int c;
while ((c = *lower++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
free(lower);
if (hash > N)
{
hash = hash % N;
return hash;
}
else
{
return hash;
}
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
char word[LENGTH + 1];
node *new_node = NULL;
node *tmp = NULL;
FILE *file = fopen(dictionary,"r");
if (file == NULL)
{
printf("Could not open file\n");
return false;
}
while(fscanf(file, "%s",word) != EOF)
{
new_node = malloc(sizeof(node));
if (new_node == NULL)
{
printf("Not enough memory!\n");
return false;
}
strcpy(new_node->word,word);
unsigned int index = hash(word);
if(table[index] == NULL)
{
table[index] = new_node;
new_node->next = NULL;
}
else
{
tmp = table[index];
table[index] = new_node;
new_node->next = tmp;
}
counter++;
}
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
const char *word = NULL;
int index = hash(word);
for(node *cursor = table[index]; cursor != NULL;cursor = cursor->next)
{
for(node *tmp = table[index]; tmp != NULL; tmp = tmp->next)
{
free(tmp);
}
free(cursor);
}
return true;
}
Here's one problem. I'm not saying it's the only one.
while ((c = *lower++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
free(lower);
You're freeing a pointer after changing it. You will need to copy lower to another pointer, which gets incremented and tested. The original lower needs to remember the address of the first byte allocated.
char* test = lower;
while ((c = *test++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
free(lower);

Pset5 Speller: I'm getting segmentation fault with small dictionary

When i used check50, my program was not giving any report as output. So i tried my code with small dictionary and i got a segmentation fault. I think it has do with my unload function
This is my code
// Implements a dictionary's functionality
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 125;
int word_size = 0;
// Hash table
node *hashtable[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
//Convert the word to lowercase
int length = strlen(word);
char copy[length + 1];
for (int i = 0; i < length; i++)
{
copy[i] = tolower(word[i]);
}
copy[length] = '\0';
//Find out the hash value
int hash_value = hash(copy);
node *tmp = hashtable[hash_value];
//Check if word is in dictionary
while(tmp != NULL)
{
if(strcasecmp(tmp->word, copy) == 0)
{
return true;
}
tmp = tmp->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
//It's djb2 hash function
unsigned int hash = 5381;
int c;
while ((c = *word++))
hash = ((hash << 5) + hash) + c;
return hash % N;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
//Initialize all indexs of hastable to NULL
for(int i =0; i < LENGTH + 1; i++)
{
hashtable[i] = NULL;
}
char word[LENGTH +1];
int pos;
FILE *dict = fopen(dictionary,"r");
if(dict == NULL)
{
return false;
}
//Scan every word...I think my problem is in here
while (fscanf(dict, "%s", word) != EOF)
{
word_size++;
int length = strlen(word);
char copy[length + 1];
for (int i = 0; i < length; i++)
{
copy[i] = tolower(word[i]);
}
copy[length] = '\0';
node *new_node = malloc(sizeof(node));
strcpy(new_node->word,copy);
new_node->next = NULL;
//Find the hash value
pos = hash(copy);
//Set the pointer of the new node to index of hashtable
new_node->next = hashtable[pos];
hashtable[pos] = new_node;
}
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return word_size;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
node *cursor = NULL;
node* tmp = NULL;
for(int i=0; i < N; i++)
{
while(cursor->next != NULL)
{
tmp = cursor;
cursor = cursor->next;
free(tmp);
}
free(cursor);
}
return true;
}
Can anyone say the write logic if my unload function is wrong. It works fine with large dictionary. I tried but couldn't figure out how to use debug50
In your unload () function you soon define
node *cursor = NULL;
but then, without assigning to cursor any other value, you dereference it:
while ( cursor->next != NULL )
Dereferencing a NULL pointer raises undefined behavior, and it will likely result in a segmentation fault.
The following action is free(cursor); that, with a NULL pointer, is not good as well.

CS50 Pset5 (Speller) Compiling Issue

So I've been taking Harvard's CS50 class and currently am working on it's Problem Set 5 called speller (https://cs50.harvard.edu/x/2020/psets/5/speller/)
Basically I think that I have filled everything correctly, however, when trying to compile this error message appears :
In function `check':/home/ubuntu/pset5/speller/dictionary.c:33: undefined reference to `hash' dictionary.o: In function `load': /home/ubuntu/pset5/speller/dictionary.c:90: undefined reference to `hash' clang-7: error: linker command failed with exit code 1 (use -v to see invocation)
I am not sure what this means, and tried to figure it out for quite some time, but I'm reaching out for an explanation...
My code is :
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include "dictionary.h"
int word_count = 0;
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
unsigned int h = hash(word);
node *cursor = table[h];
while(cursor != NULL)
{
if(strcasecmp(word, cursor -> word) ==0)
{
return true;
}
else
{
cursor = cursor -> next;
}
}
return false;
}
// Hashes word to a number
// This hash function was adapted by Neel Mehta from
// http://stackoverflow.com/questions/2571683/djb2-hash-function.
unsigned int hash_word(const char* word)
{
unsigned long hash = 5381;
for (const char* ptr = word; *ptr != '\0'; ptr++)
{
hash = ((hash << 5) + hash) + tolower(*ptr);
}
return hash %26;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE *dic = fopen(dictionary, "r");
char word[LENGTH + 1];
if(dic == NULL)
{
unload();
return false;
}
while (fscanf(dic,"%s",word) != EOF)
{
node *sllnode = malloc(sizeof(node));
if( sllnode == NULL)
{
return false;
}
strcpy(sllnode -> word, word);
word_count++;
int dicindex = hash(word);
if(table[dicindex] == NULL)
{
sllnode -> next = NULL;
}
else
{
sllnode -> next = table[dicindex];
}
table[dicindex]= sllnode;
}
fclose(dic);
// check here whethet to free memory space or not (maybe needs to be freed at very end)
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return word_count;
return 0;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for(int i = 0; i < N ; i++)
{
node *cursor = table[i];
while(cursor)
{
node *tmp = cursor;
cursor = cursor -> next;
free(tmp);
}
}
return true;
}
If you would like to know what dictionary.h looks like, here is the code for that:
// Declares a dictionary's functionality
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);
#endif // DICTIONARY_H
Hoping someone could help me with this..
Any help would be appreciated!
If you look in your load() function, you have this line: int dicindex = hash(word);.
The issue with this is that you changed hash() into hash_word() which is why you are having the error because there is no hash() function in the code. Easiest thing to do is to change unsigned int hash_word(const char* word) back to normal which was unsigned int hash(const char* word) as you weren't suppose to change any function name in this program.

CS50 Pset5 : code cannot compile because of linker failure

I just finish writing a simple code for Pset5 speller just to check if my code is able to compile. The code is written into the header file. However, when I try to compile the file the error clang-7: error: linker command failed with exit code 1 shows up. Based on my shallow understanding of what have read online, it says that this error would occur if I declare and call a function that does not have any definition. Any help would be much appreciated. Here is my code:
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
//uses the hash table to check
bool check(const char *word)
{
int hashindex = hash(word);
node *tmp = table[hashindex];
while (tmp->next != NULL)
{
if (strcmp(word, tmp->word) == 0)
{
return true;
}
tmp = tmp->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
//hashing the first letter of the string
unsigned int value1 = (int) *(word);
unsigned int value2;
if (isupper(value1))
{
value2 = tolower(value1);
}
else
{
value2 = value1;
}
int hashvalue = value2 % 97;
return hashvalue;
}
// Loads dictionary into memory, returning true if successful else false
//hashes the words in the dictionary file
bool load(const char *dictionary)
{
// open dictionary file to load
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Could not open %s.\n", dictionary);
return false;
}
//making an array that can store scanned words from the dictionary
char dictionaryword[LENGTH];
//initializing all the buckets in the table
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
//loop to read the word one-by-one
while(fscanf(file, "%s", dictionaryword) != EOF)
{
//creating a new node to store the word
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, dictionaryword);
n->next = NULL;
//hashing the word to determine which bucket to store it in
int hashindex = hash(n->word);
//checking for any collisions
if (table[hashindex] == NULL)
{
table[hashindex] = n;
}
else
{
//adding new node from the end
node *tmp = table[hashindex];
while (tmp->next != NULL)
{
tmp = tmp->next;
}
tmp->next = n;
}
memset (dictionaryword, 0, LENGTH);
}
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
//the biggest number in the hash table
unsigned int size(void)
{
//iterate over each bucket, assuming all the buckets are filled
unsigned int counter = 0;
for (int i = 0; i < N; i++)
{
node* tmp = table[i];
while(tmp->next != NULL)
{
tmp = tmp->next;
counter++;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for (int i = 0; i < N; i++)
{
//if the bucket is not filled, memory is leaked somewhere
if (table[i] == NULL)
{
return false;
}
while (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
return true;
}
EDIT: This is the header file
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);
#endif // DICTIONARY_H

Resources