I am trying to add (key, value) pairs to a hashmap but cannot access the values after insertion.
This hash table is supposed to deal with collisions as I am iterating along each hash index whenever a collision occurs. I then insert it when I have reached the end of the (key, value) pair list at that index.
Essentially it is a basic linked list hashmap.
The problem is, I keep getting a segmentation fault when I try to access the value again (and my showTable() function also fails). In this test, I am simply trying to access the first (key, value) pair at each hash index after something is added at that hash index. I am probably doing something very silly but I see it.
I have not yet commented but I hope the code is self explanatory. The important bit is InsertKeyValuePair() but I have added everything as a code review would also be beneficial.
#include <stdlib.h>
#include <stdio.h>
typedef struct TVal KeyValue;
typedef struct TVal {
char *value;
char *key;
KeyValue *next;
} KeyValue;
typedef KeyValue **HashTable;
int MAX_SIZE = 200;
int HashKey(char *Key, int Max);
void InsertKeyValuePair(char *key, char *value, int Index, HashTable table);
int insert(char *Key, char *value, HashTable table, int size);
void showTable(HashTable table, int size);
int HashKey(char *Key, int Max) {
char c = *Key;
int Hash = 0;
int n = 1;
while (c != 0) {
Hash += n * ((int)c);
c = *(Key + n);
n++;
}
return Hash % MAX_SIZE;
}
void InsertKeyValuePair(char *key, char *value, int Index, HashTable table) {
KeyValue *cursor = *(table + Index);
while (cursor != NULL) {
cursor = cursor->next;
}
cursor = malloc(sizeof(KeyValue));
cursor->value = value;
cursor->key = key;
printf("insert <K,V>(%s,%s) HashIndex = %i\n", cursor->key, cursor->value, Index);
//Trying to access value previously inserted
KeyValue *cursor2 = *(table + Index);
printf("<K,V>(%s,%s)\n", cursor2->key, cursor2->value);
}
int insert(char *Key, char *value, HashTable table, int size) {
int Index = HashKey(Key, MAX_SIZE);
InsertKeyValuePair(Key, value, Index, table);
return size + 1;
}
void showTable(HashTable table, int size) {
int i;
for (i = 0; i < size; i++) {
KeyValue *cursor = *(table + i);
if (cursor == NULL)
continue;
while (cursor != NULL) {
printf("==============");
printf("<K,V>(%s,%s)\n", cursor->key, cursor->value);
cursor = cursor->next;
}
printf("==============");
}
}
int main() {
HashTable HTbl = malloc(sizeof(HashTable) * MAX_SIZE);
int size = 0;
size = insert("yeuydfdan", "wesfg", HTbl, size);
size = insert("ywere", "rdgg", HTbl, size);
size = insert("ye4", "3244", HTbl, size);
//showTable(HTbl, MAX_SIZE);
}
There are multiple problems in your code:
The hash table is not initialized to NULL, causing segmentation faults when trying to dereference the pointers it contains. Allocating with calloc() would fix this problem.
It is confusing and error prone to hide pointers behind typedefs.
The allocation in main should read HashTable HTbl = calloc(sizeof(*HTbl), MAX_SIZE);
the insertion code in InsertKeyValuePair does not link the new pair at the end, nor at the beginning of the hashtable bucket list.
it is advisable to use unsigned arithmetics to compute the hash key to avoid overflow issues.
the pointer notation *(table + Index) is confusing. You should use the array notation table[Index] instead.
there seems to be some confusion between the length of the hashtable (MAX_SIZE) and the number of entries in the hashtable (size). Renaming the variables appropriately may improve readability. It is also probably better to pass the count by address and return a success indicator.
Here is a corrected version:
#include <stdlib.h>
#include <stdio.h>
typedef struct TVal KeyValue;
typedef struct TVal {
const char *value;
const char *key;
KeyValue *next;
} KeyValue;
typedef KeyValue **HashTable;
static unsigned int HASH_SIZE = 200;
static unsigned int HashKey(const char *key);
static KeyValue *InsertKeyValuePair(const char *key, const char *value, int index, HashTable table);
static int insert(const char *Key, const char *value, HashTable table, int *countp);
static void showTable(HashTable table);
static unsigned int HashKey(const char *key) {
unsigned int hash = 0;
size_t n;
for (n = 0; key[n] != 0; n++) {
hash += n * (unsigned char)key[n];
}
return hash % HASH_SIZE;
}
static KeyValue *InsertKeyValuePair(const char *key, const char *value, int index, HashTable table) {
KeyValue *cursor;
cursor = malloc(sizeof(KeyValue));
if (cursor != NULL) {
KeyValue **cursorp = &table[index];
while (*cursorp != NULL) {
cursorp = &(*cursorp)->next;
}
cursor->value = value;
cursor->key = key;
cursor->next = NULL;
*cursorp = cursor;
}
return cursor;
}
static int insert(const char *key, const char *value, HashTable table, int *countp) {
int index = HashKey(key);
if (InsertKeyValuePair(key, value, index, table)) {
*countp += 1;
return 1;
}
return 0;
}
static void showTable(HashTable table) {
unsigned int i;
for (i = 0; i < HASH_SIZE; i++) {
KeyValue *cursor = table[i];
if (cursor == NULL)
continue;
while (cursor != NULL) {
printf("==============");
printf("<K,V>(%s,%s)\n", cursor->key, cursor->value);
cursor = cursor->next;
}
printf("==============\n");
}
}
int main() {
HashTable HTbl = calloc(sizeof(*HTbl), HASH_SIZE);
int count = 0;
insert("yeuydfdan", "wesfg", HTbl, &count);
insert("ywere", "rdgg", HTbl, &count);
insert("ye4", "3244", HTbl, &count);
showTable(HTbl);
return 0;
}
This statement
HashTable HTbl = malloc(sizeof(HashTable)*MAX_SIZE);
is incorrect and moreover the allocated memory is not initialized. There should be
HashTable HTbl = calloc( MAX_SIZE, sizeof( KeyValue * ) );
or like
HashTable HTbl = calloc( MAX_SIZE, sizeof( *HTbl ) );
The index within the table should be calculated as some unsigned integer. Otherwise in general you can get a negative index.
In function HashKey parameter Max is not used.
In the function InsertKeyValuePair there is changed the local variable cursor instead of the data member cursor->next or *(table+Index).
The loop in the function showTable shall use MAX_SIZE not size in the loop condition. That is you have to pass as an argument the value of MAX_SIZE not the value of size.
Here is a demonstrative program that shows how the program can be updated.
#include <stdio.h>
#include <stdlib.h>
typedef struct TVal KeyValue;
typedef struct TVal
{
const char *value;
const char *key;
KeyValue *next;
} KeyValue;
typedef KeyValue **HashTable;
const size_t MAX_SIZE = 200;
static size_t HashKey( const char *key, size_t max_slots )
{
size_t Hash = 0;
for ( size_t i = 0; key[i]; i++ ) Hash += ( i + 1 ) * ( unsigned char )key[i];
return Hash % max_slots;
}
static int InsertKeyValuePair( HashTable table, const char *key, const char *value, size_t index )
{
KeyValue **cursor = &table[index];
while ( *cursor != NULL ) cursor = &( *cursor )->next;
*cursor = malloc( sizeof( KeyValue ) );
int success = *cursor != NULL;
if ( success )
{
( *cursor )->value = value;
( *cursor )->key = key;
( *cursor )->next = NULL;
}
return success;
}
int insert( HashTable table, const char *key, const char *value, size_t *size )
{
size_t index = HashKey( key, MAX_SIZE );
int success = InsertKeyValuePair( table, key, value, index );
if ( success ) ++*size;
return success;
}
void showTable( HashTable table, size_t size )
{
for ( size_t i = 0; i < size; i++ )
{
KeyValue *cursor = table[i];
if ( cursor != NULL )
{
do
{
puts( "==============" );
printf( "<K,V>(%s, %s)\n", cursor->key, cursor->value );
cursor = cursor->next;
} while ( cursor != NULL );
puts( "==============\n" );
}
}
}
int main( void )
{
HashTable HTbl = calloc( MAX_SIZE, sizeof( *HTbl ) );
size_t size = 0;
insert( HTbl, "yeuydfdan", "wesfg", &size );
insert( HTbl, "ywere", "rdgg", &size );
insert( HTbl, "ye4", "3244", &size );
showTable( HTbl, MAX_SIZE );
}
The program output is
==============
<K,V>(ywere, rdgg)
==============
==============
<K,V>(ye4, 3244)
==============
==============
<K,V>(yeuydfdan, wesfg)
==============
Of course you should add some other functions as for example a function that deletes the table with its nodes.
And it will be better if each node will allocate memory for a key and a value and copy there the passed arguments. Otherwise the table may deal in general only with string literals because they have static storage duration.
If you will rewrite the implementation of table such a way that it will copy keys and values in nodes of the table then the structure should be defined like
typedef struct TVal KeyValue;
typedef struct TVal
{
char *value;
const char *key;
KeyValue *next;
} KeyValue;
That is in any case the key should not be changed and should be declared with the qualifier const.
I'm writing an implementation for Hash Tables in C, but I'm getting bogged down with some stuff. I have written the functions and the data definitions in a separate file.
I have the header file Hashes.h which looks like:
#ifndef HASHES_H_INCLUDED
#define HASHES_H_INCLUDED
int accumulation(long int);
struct entry_s {
char *key;
char *value;
struct entry_s *next;
};
typedef struct entry_s entry_t;
struct hashtable_s {
int size;
struct entry_s **table;
};
typedef struct hashtable_s hashtable_t;
hashtable_t * ht_create( int );
#endif // HASHES_H_INCLUDED
and I have the Hashes.c file which looks like:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "Hashes.h"
/*Creates the hashtable*/
hashtable_t *ht_create( int size ) {
hashtable_t * hashtable = NULL;
int i;
if( size < 1 ) return NULL;
/* Allocate the table itself. */
if ( ( hashtable = malloc( sizeof( hashtable_t ) ) ) == NULL ) {
return NULL;
}
/* Allocate pointers to the head nodes. */
if ( ( hashtable->table = malloc( sizeof( entry_t * ) * size ) ) == NULL ) {
return NULL;
}
for( i = 0; i < size; i++ )
{
hashtable->table[i] = NULL;
}
hashtable->size = size;
return hashtable;
}
int accumulation (long int x)
{
int hash = 0;
int i = 0;
while (x != 0)
{
hash += pow(33, i) + x % 10;
x /= 10;
}
return hash;
}
and in the main I have :
#include <stdio.h>
#include <stdlib.h>
#include "Hashes.h"
int main()
{
hashtable_t* myHashTable = ht_create(10000);
return 0;
}
I compiled all the files with no error or warning, but when I run the program I receive the error "Undefined reference to 'ht_create'".
If some one has a clue I would deeply appreciate that.
P.S.: I am using CodeBlocks IDE.
I'm pretty new to C, and I don't have a very good grasp of pointers. I'm trying to build a hash table. Here's what I have so far.
I've been cobbling this together from several sources, and I've lost any idea I have of what the pointers are doing. If anyone could even give me a hint as to where my problem is I'd be very grateful.
Header File
typedef struct {
char * word;
char * defn;
struct entry *next;
} entry;
typedef struct {
int size;
struct entry **table;
} hashTable;
typedef hashTable * Dictionary;
Code
#include "hashP.h"
#include <stdlib.h>
#include <string.h>
#include <limits.h>
Dictionary create(int initial_capacity, int delta_capacity){
Dictionary new_table;
int i;
if ((new_table = malloc(sizeof(Dictionary))) == NULL){
return NULL;
}
if ((new_table->table = malloc(sizeof(entry *) * initial_capacity)) == NULL){
return NULL;
}
for(i=0; i < initial_capacity; i++){
new_table->table[i] = NULL;
}
return new_table;
}
/* Insert a key-value pair into a hash table. */
void insertEntry(Dictionary table, char *index, char *value) {
int bin = 0;
entry *newpair = NULL;
entry *next = NULL;
entry *last = NULL;
unsigned long int hashval;
int i = 0;
char *word = index;
char *defn = value;
/* Convert our string to an integer */
while( hashval < ULONG_MAX && i < strlen(word) ) {
hashval = hashval << 8;
hashval += word[i];
i++;
}
bin = hashval % table->size;
next = table->table[bin];
while( next != NULL && next->word != NULL && strcmp(word, next->word ) > 0 ) {
last = next;
next = next->next;
}
/* There's already a pair. Let's replace that string. */
if( next != NULL && next->word != NULL && strcmp( word, next->word ) == 0 ) {
free( next->defn );
next->defn = strdup(defn);
/* Nope, could't find it. Time to grow a pair. */
} else {
if( ( newpair = malloc( sizeof(entry) ) ) == NULL ) {
return NULL;
}
if( ( newpair->word = strdup(word) ) == NULL ) {
return NULL;
}
if( ( newpair->defn = strdup(defn) ) == NULL ) {
return NULL;
}
newpair->next = NULL;
/* We're at the start of the linked list in this bin. */
if( next == table->table[ bin ] ) {
newpair->next = next;
table->table[bin] = newpair;
/* We're at the end of the linked list in this bin. */
} else if ( next == NULL ) {
last->next = newpair;
/* We're in the middle of the list. */
} else {
newpair->next = next;
last->next = newpair;
}
}
}
Sorry for the huge wall of text. It gives me an "Assignment from incompatible pointer type" every time I use "next".
If you declare your struct as:
struct entry { ... };
Then you should use it as
struct entry* next;
If you declare it as:
typedef struct { ... } entry;
Then you should use it as
entry* next;
In this case struct entry* next will still compile (as in your case), but will refer to an incomplete type, which is not the entry that you defined. Assigning from something that is entry* to something that is struct entry* will, therefore, give you an error.
To fix your problem just replace all occurrences of struct entry with entry.
UPDATE: it will not work, because by the time you define entry* next entry itself is not defined yet. You can fix it like this, for example:
typedef struct entry_t {
char * word;
char * defn;
struct entry_t *next;
} entry;
Beside other problems, Dictionary is not defined. From how you use it, I am guessing it is a typedef for a pointer to a hashTable. The allocation is wrong, you allocate the size for a pointer, not for the hashTable structure. You should write:
Dictionary new_table = malloc(sizeof(*new_table));
I have this structure here:
typedef struct _open {
int x;
struct _open *next;
} *NODE;
And on my main function I declared this pointer:
NODE open = initOpen(size);
Here's the initOpen function:
NODE initOpen(int size) {
return (NODE)malloc(sizeof(struct _open)*size);
}
I this correct? can I access my array in the main function like: open[0] to open[9] ?
First of all, the way you are doing dynamically allocated array is wrong.
I'm not sure if you actually want the thing you wrote, which is linked list, or the thing you said, which is dynamically allocated array.
Below is how you should do dynamically allocated array. Hope it helps.
By doing so, you can add as many ints into the array as you want, before you run out of memory. And you can access the array using array notation but with a pointer first: darray->array[0]
Your linked list, however, can not be accessed with this syntax.
#include <stdio.h>
#include <stdlib.h>
#define INITSIZE 8
typedef struct dyarr{
int num;
int max;
int *array;
}arr;
arr* makeArr();
void add( arr*, int );
int main( int argc, char const *argv[] ){
int t;
arr* darray = makeArr();
while( scanf( "%d", &t ) != EOF ){
add( darray, t );
}
int i;
for( i = 0; i<darray->num; i++ ){
printf( "%d\n", darray->array[i] );
}
getchar();
return 0;
}
arr* makeArr(){
arr* A = malloc( sizeof( arr ) );
A->max = MAXSIZE;
A->num = 0;
A->array = malloc( sizeof( int )*A->max );
return A;
}
void add( arr* a, int i ){
if( a->num == a->max ){
a->max *= 2;
a->array = realloc( a->array, a->max );
}
a->array[a->num++] = i;
}
First of all, you should respect some conventions:
typedef struct node {
int x;
struct node *next;
} *nodePtr;
Second, what is the usage of the parameter size ?
According to me the right way to allocate a new nodePtr is:
nodePtr initNodePtr() {
return (nodePtr)malloc(sizeof(struct node));
}
Also dont forget to release memory after usage:
nodePtr node = initNodePtr();
...
...
free(node); //should be wrapped in a function to respect design.
To Create an array of structure, you should do the following:
typedef struct {
int x;
node* next;
} node;
int main() {
node* nodeArray = (node*)malloc(sizeof(node)*50); // 50 = size of your array
...
// do whatever you want
...
free(nodeArray);
}
Not tested, let me know if errors.
So I'm attempting to implement a hash table that will hash structures containing words.
the structures will be similar to this:
#ifndef HASHTABLE_H
#def HASHTABLE_H
typedef int (*HashFunctionT) (char* string, int upperbound);
struct node_
{
char * word;
struct node * next;
}
typedef struct node_ * node;
struct nodehash_
{
int size;
struct node * hash[100];
}
typedef struct nodehash_ * nodehash;
Hashtable createHashTable();
void addtohash(node list, nodehash hash);
#endif
And I want the hash function to work something like this:
#include "hashtable.h"
int hashFunction(char *word, int hashTableSize)
{
int length = strlen(word);
int h = 0;
int i;
for(i = 0; i<length; i++)
{
h=31 *h + word[i];
}
return h % hashTableSize;
};
nodehash createHashtable()
{
nodehash hashtable;
hashtable = malloc(sizeof(struct nodehash_));
hashtable->size = 100;
hashtable->hash = malloc(100 * sizeof (node));
int i;
for (i = 0; i < hashtable->size; i++)
{
hashtable->table[i] = NULL;
}
return hashtable;
};
void addtohash(node list, nodehash hashtable)
{
int nodehashnumber;
nodehashnumber = hashfunction(list->word, hash->size);
hashtable->hash[nodehasnumber] = list;
};
And the main functio will look something like this (assume that the linked list of node structures has been created and filled).
int main()
{
nodehash hashtable = createhashtable();
node nodelist;
/* here the nodelist would be created and filled and such and such*/
while (nodelist->next != NULL)
{
addtohash(nodelist, hashtable);
}
return;
}
Assume that there can be no collisions, because every word to be hashed will be different.
BAsically, I'm wondering if I missed and glaring, obvious mistakes or flaws in logic.
Any help would be greatly appreciated.
Thanks.
I didn't give the code an extensive read, but the first thing that stood out pretty clearly is the hash table size, 100. It is best to use a prime number for the size of your hash tables to help avoid collisions.
You seem to have a problem with semicolons:
struct node_
{
char * word;
struct node * next;
} /* <<-- HERE */
typedef struct node_ * node;
But::
int hashFunction(char *word, int hashTableSize)
{
int length = strlen(word);
int h = 0;
int i;
for(i = 0; i<length; i++)
{
h=31 *h + word[i];
}
return h % hashTableSize;
}; /* <<-- NOT here */
Also, a wise advice is IMHO to use as many unsigned types as possible: for the hash value (what does modulo division do with a negative operand?) and for sizes and indexes.
Rule of thumb: if it can not be negative: it's unsigned.