Hashing structures in C

Hashing structures in C - c

So I'm attempting to implement a hash table that will hash structures containing words.
the structures will be similar to this:
#ifndef HASHTABLE_H
#def HASHTABLE_H
typedef int (*HashFunctionT) (char* string, int upperbound);
struct node_
{
char * word;
struct node * next;
}
typedef struct node_ * node;
struct nodehash_
{
int size;
struct node * hash[100];
}
typedef struct nodehash_ * nodehash;
Hashtable createHashTable();
void addtohash(node list, nodehash hash);
#endif
And I want the hash function to work something like this:
#include "hashtable.h"
int hashFunction(char *word, int hashTableSize)
{
int length = strlen(word);
int h = 0;
int i;
for(i = 0; i<length; i++)
{
h=31 *h + word[i];
}
return h % hashTableSize;
};
nodehash createHashtable()
{
nodehash hashtable;
hashtable = malloc(sizeof(struct nodehash_));
hashtable->size = 100;
hashtable->hash = malloc(100 * sizeof (node));
int i;
for (i = 0; i < hashtable->size; i++)
{
hashtable->table[i] = NULL;
}
return hashtable;
};
void addtohash(node list, nodehash hashtable)
{
int nodehashnumber;
nodehashnumber = hashfunction(list->word, hash->size);
hashtable->hash[nodehasnumber] = list;
};
And the main functio will look something like this (assume that the linked list of node structures has been created and filled).
int main()
{
nodehash hashtable = createhashtable();
node nodelist;
/* here the nodelist would be created and filled and such and such*/
while (nodelist->next != NULL)
{
addtohash(nodelist, hashtable);
}
return;
}
Assume that there can be no collisions, because every word to be hashed will be different.
BAsically, I'm wondering if I missed and glaring, obvious mistakes or flaws in logic.
Any help would be greatly appreciated.
Thanks.

I didn't give the code an extensive read, but the first thing that stood out pretty clearly is the hash table size, 100. It is best to use a prime number for the size of your hash tables to help avoid collisions.

You seem to have a problem with semicolons:
struct node_
{
char * word;
struct node * next;
} /* <<-- HERE */
typedef struct node_ * node;
But::
int hashFunction(char *word, int hashTableSize)
{
int length = strlen(word);
int h = 0;
int i;
for(i = 0; i<length; i++)
{
h=31 *h + word[i];
}
return h % hashTableSize;
}; /* <<-- NOT here */
Also, a wise advice is IMHO to use as many unsigned types as possible: for the hash value (what does modulo division do with a negative operand?) and for sizes and indexes.
Rule of thumb: if it can not be negative: it's unsigned.

Related

When resizing a hash table how to point to the new array of buckets

When testing my code it seems as though when I am resizing my bucket array the hash_table struct is not pointing to my new resized hash table. Is this the route to take when resizing a hash table? Or instead of having buck_array being hash_entry ** should it have been a hash_entry *
also hash_key() function gives the index in the bucket array based on char *string.
relevant structs:
typedef struct hash_entry_{
char *string;
void *data;
struct hash_entry_ *next;
}hash_entry;
typedef struct hash_table_{
hash_entry ** buck_array;
unsigned num_buckets;
} hash_table, *Ptrhash_table;
My resize function:
void resize_hash(Ptrhash_table table, int size) {
unsigned int old_Bnum = table->num_buckets;
table->num_buckets = size;
int i;
hash_entry *cur;
hash_entry **new_arr = (hash_entry **)calloc(size, sizeof(hash_entry));
for (i = 0; i < size; i++){
new_arr[i] = NULL;
}
for (i = 0; i <= old_Bnum; i++) {
while((table->buck_array[i]) != NULL){
cur = table->buck_array[i];
table->buck_array[i] = cur->next;
cur->next = new_arr[hash_key(table, cur->string)];
new_arr[hash_key(table, cur->string)] = cur;
}
}
free(table->buck_array);
table->buck_array = new_arr;
}

C Language - Rehashing a Separate Chaining Hash Table

So I looked everywhere to get inspired but I didn't really find anything for rehashing a hash table using separate chaining method. So I tried myself, I think I know what I'm doing wrong, but I don't know how else to implement it, please help.
Everything works, except the new added function rehash()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
struct list_node
{
struct list_node *next;
char *key;
char *value;
};
struct hash_table
{
int table_size;
struct list_node **list_arr;
};
unsigned int hash(const char *key, unsigned int table_size);
struct hash_table *initialize(unsigned int table_size);
struct list_node *find(struct hash_table *H, const char *key);
void insert(struct hash_table *H, const char *key, const char *value);
void dump(struct hash_table *H);
void del(struct hash_table *H, const char *key);
struct hash_table *rehash(struct hash_table *H);
unsigned int
hash(const char *key, unsigned int table_size)
{
unsigned long int hashx = 0;
for(int i=0;key[i];i++)
{
hashx = (hashx<<5) + key[i];
}
return (hashx%table_size);
}
struct hash_table
*initialize(unsigned int table_size)
{
struct hash_table *H = malloc(sizeof(*H));
H->list_arr = malloc(sizeof(*H->list_arr)*table_size);
H->table_size = table_size;
for(unsigned int i = 0; i<table_size; i++)
{
H->list_arr[i] = malloc(sizeof(*H->list_arr[i]));
H->list_arr[i]->next = NULL;
}
return H;
}
void
insert(struct hash_table *H, const char *key, const char *value)
{
unsigned int index = hash(key, H->table_size);
struct list_node *head = H->list_arr[index];
struct list_node *current = head->next;
while(current!=NULL)
{
if(strcmp(current->key,key)==0)
{
free(current->value);
current->value = malloc(strlen(value)+1);
strcpy(current->value,value);
return;
}
current=current->next;
}
struct list_node *newNode = malloc(sizeof(*H->list_arr[index]));
newNode->next = head->next;
head->next = newNode;
newNode->key = malloc(strlen(key)+1);
newNode->value = malloc(strlen(value)+1);
strcpy(newNode->key,key);
strcpy(newNode->value,value);
}
void
dump(struct hash_table *H)
{
for( int i = 0; i<H->table_size; i++)
{
struct list_node *entry = H->list_arr[i]->next;
if(entry==NULL){continue;}
printf("Index[%d]: ", i);
while(entry!=NULL)
{
printf("\t%s|%s\t--> ", entry->key, entry->value);
entry = entry->next;
}
printf("\tNULL");
printf("\n");
}
}
void delete(struct hash_table *H, const char *key)
{
unsigned int index = hash(key,H->table_size);
struct list_node *prev = H->list_arr[index];
while(strcmp(prev->next->key,key)!=0)
{
if(prev->next==NULL){printf("Key not found!");return;}
prev=prev->next;
}
struct list_node *temp = prev->next;
prev->next = temp->next;
free(temp);
}
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node *old_entries = H->list_arr;
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
while(old_entries[i]!=NULL)
{
insert(H,old_entries[i].key,old_entries[i].value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
return H;
}
int main()
{
struct hash_table *H = initialize(20);
insert(H,"name1","David");
insert(H,"name2","Radka");
dump(H);
H = rehash(H);
dump(H);
return 1;
}
I think doing old_entries[i] is wrong, but nothing else comes to mind, please help me resolve this.

OK! After thinking about it for a while, I realized I created a struct list_node pointer variable that points to H->list_arr which is an array of pointers. That was my mistake. I was supposed to declare it as a double pointer.
Here's the modified rehash() function:
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node **old_entries = H->list_arr;
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
old_entries[i] = old_entries[i]->next;
while(old_entries[i]!=NULL)
{
insert(H,old_entries[i]->key,old_entries[i]->value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
return H;
}
with this code, you will have to return the address of the new hash_table to the pointer pointing to the old hash_table --> [H = rehash(H)] since passing the pointer H as a parameter will only change it locally. Therefore, I tried a second version (because I'm too lazy;) and inattentive and might forget to reassign it) where I don't have to return anything, I want to change it simply by calling the function and my pointer points to the new hash_table automatically -> [rehash(&H)], here's the other "lazy" alternative:
void
rehash(struct hash_table **H)
{
unsigned int old_size = (*H)->table_size;
struct list_node **old_entries = (*H)->list_arr;
*H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
old_entries[i] = old_entries[i]->next;
while(old_entries[i]!=NULL)
{
insert(*H,old_entries[i]->key,old_entries[i]->value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
}
If I'm doing something that's inefficient (in terms of space and time), please let me know, as I am only in Bachelor's 3rd semester of CS and we have only started DSA this semester.

The thing you are doing by putting dummy elements at the beginning of each bin is a good idea, but you don't need to allocate such dummies with malloc(). You can just make the bin array an array of nodes instead of pointers to nodes. Then you have allocated the dummies when you have allocated the array. So you could define your hash table as
struct hash_table
{
int table_size;
struct list_node *list_arr;
};
(instead of using struct list_node **list_arr).
When you loop through the bins in the initialisation, you have to set the bins' next pointer to NULL, but not allocate them.
struct hash_table
*initialize(unsigned int table_size)
{
struct hash_table *H = malloc(sizeof(*H));
H->list_arr = malloc(sizeof(*H->list_arr)*table_size);
H->table_size = table_size;
for(unsigned int i = 0; i<table_size; i++)
{
// no malloc here!
H->list_arr[i].next = NULL;
}
return H;
}
Anyway, that is not pertinent to the rehashing, just a suggestion. But because you have the dummy elements as bins, you can refactor your code (that is the reason I think the dummies are such a good idea). You can get the bin from the table and work from there, without worrying about the table itself after that. You can get the relevant bin for a key with
struct list_node *get_bin(struct hash_table *H, const char *key)
{
unsigned int index = hash(key, H->table_size);
return &H->list_arr[index];
}
and you can find the node in a bin with
struct list_node *find_node(struct list_node *bin, const char *key)
{
for (struct list_node *current = bin->next;
current;
current = current->next) {
if(strcmp(current->key,key)==0) return current;
}
return 0;
}
and, for example, simplify insertion to
void prepend_node(struct list_node *node, struct list_node *bin)
{
node->next = bin->next;
bin->next = node;
}
void insert(struct hash_table *H, const char *key, const char *value)
{
struct list_node *bin = get_bin(H, key);
struct list_node *node = find_node(bin, key);
if (node) {
// update node
free(node->value);
node->value = malloc(strlen(value)+1);
strcpy(node->value,value);
} else {
// prepend new node
prepend_node(new_node(key, value), bin);
}
}
where the new_node() function looks like
struct list_node *new_node(const char *key, const char *value)
{
struct list_node *node = malloc(sizeof *node);
if (!node) abort(); // add some error handling here
node->key = malloc(strlen(key)+1);
if (!node->key) abort(); // add some error handling here
strcpy(node->key,key);
node->value = malloc(strlen(value)+1);
if (!node->value) abort(); // add some error handling here
strcpy(node->value,value);
return node;
}
Because the bins are embedded in the array, you can safely assume in all the functions that they aren't NULL, which can save you from testing some special cases.
It is not shorter code, because I split it into several functions, but in my opinion, it is more readable when each function does one simple thing. Here, getting the bin, finding the key in a bin, creating a node, pretending to a bin, etc. With "raw" malloc() and strcpy() and such, scattered through the code, it is harder to track that everything works correctly. The total lines of code grew, but each function is shorter and simpler. And you can get away with it, because you can work on bins as lists, without accessing the hash table array, exactly because all bins have a dummy head element.
You can now rewrite rehash() to just prepend to bins. You know that all the keys in the old bins are unique, so you don't need to check anything. You just put each node at the front of its new bin:
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node *old_entries = H->list_arr;
free(H); // You forgot to free this one!
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
struct list_node *old_bin = &old_entries[i];
for (struct list_node *node = old_bin->next;
node; node = node->next) {
// just prepend to new bin; the key should be unique
prepend_node(node, get_bin(H, node->key));
}
}
free(old_entries);
return H;
}
I added a free(H) because you forgot to free memory for H, but it would be more efficient to update H without creating a new table. You can separate initialisation and allocation. But you do not gain terribly much as initialising the bins is the time-consuming part.
Speaking of freeing, though. Remember to write a function for freeing a hash table (that remembers to free the bins, including all the nodes). Don't use it with rehashing, of course, if you free H before you update it--you need to keep the nodes around--but you do want such a function.

How to initialize a hashmap in c?

I need to initialize a hashmap in C. I have created structs for hashnode and hashmap which will be shown below, but I need to send it to a function
void hashmap_init(hashmap_t *hm, int table_size);
and I need to initialize the hash map 'hm' to have given size and item_count 0. Have to ensure that the 'table' field is initialized to an array of size 'table_size' and filled with NULLs.
typedef struct hashnode {
char key[128];
char val[128];
struct hashnode *next;
} hashnode_t;
typedef struct {
int item_count;
int table_size;
hashnode_t **table;
} hashmap_t;
#define HASHMAP_DEFAULT_TABLE_SIZE 5

Use malloc() to allocate an array of table_size buckets.
void hashmap_init(hashmap_t *hm, int table_size) {
hm->item_count = 0;
hm->table_size = table_size;
hm->table = malloc(table_size * sizeof *(hm->table));
for (int i = 0; i < table_size; i++) {
hm->table[i] = NULL;
}
}
If you need to remove the hash map, you reverse the allocations:
for (int i = 0; i < hm->table_size; i++) {
free(hm->table[i]);
}
free(hm->table);

Malloc large space of struct and accessing it like arrays in c

I'm dealing with implementing a hash table. My understanding of a hashtable is that is that to have an array like table where you're able to access the elements quickly by getting the hash value and modding it by the table size. So my initial thought was declaring
Node *hTable [100];
where
typedef struct node {
char *s;
int value;
} Node;
and going to the index of the array and malloc a new element that belongs there. But, the problem is that I need to grow my table.
So, my question is, how would I make a dynamic table, but access it like an array? (e.g table[i]).
I know that you need to call something like
Node *table = (Node*)malloc(sizeof(Node)*size);
which lets you access it like a table table[i] =... but if I did that, I can't declare a new Node in the index of the table
table[i]=(Node*)malloc(sizeof(Node));
Here's a code that I've been testing with (getting seg fault) to better give a view of the problem:
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 typedef struct node {
5 int data;
6 struct node *next;
7 } Node;
8
9
10 void main() {
11 Node **list;
12 *list = (Node*)malloc(sizeof(Node)*10);
13 for (int i = 0; i < 10; i++) {
14 list[i] = (Node*)malloc(sizeof(Node)); //problem here?
15 list[i]->data = i;
16 list[i]->next = NULL;
17 }
18 printf("printing...\n");
19 for (int i = 0; i < 10; i++) {
20 printf("%d ", list[i]->data);
21 }
22 }

Your problem is how you allocate space for list. list is uninitialized and does not point to valid memory, you must allocate space for it first, and then allocate space for each element:
#include <stdio.h>
#include <stdlib.h>
typedef struct node
{
int data;
struct node *next;
} Node;
int main() //return type of main is int
{
Node **list;
list = malloc(10 * sizeof *list); //allocate memory for list not *list, also no need to cast return value of malloc.
for (int i = 0; i < 10; i++)
{
list[i] = malloc(sizeof *list[i]); //allocate space for each element.
list[i]->data = i;
list[i]->next = NULL;
}
printf("printing...\n");
for (int i = 0; i < 10; i++)
{
printf("%d ", list[i]->data);
}
return 0;
}

It doesn't need to be an array of pointers, you can simply make an array of nodes, with:
Node *list = malloc(sizeof *list * count);
Then you can access list[i].s and list[i].value.
When you want to grow the table, you use realloc():
new_list = realloc(list, sizeof *list * new_count);
if (new_list) {
list = new_list;
} else {
// report allocation failure
}

#include <stdio.h>
#include <stdlib.h>
typedef struct node {
int data;
struct node *next;
} Node;
int main() {
// just initialize it this way ( previous was undefined behavior, dereferencing an initialize pointer)
Node **list= malloc(sizeof(Node*)*10);
for (int i = 0; i < 10; i++) {
list[i] = malloc(sizeof(Node*)); //problem here?
list[i]->data = i;
list[i]->next = NULL;
}
printf("printing...\n");
for (int i = 0; i < 10; i++) {
printf("%d ", list[i]->data);
}
}

C - Deleting a node in a linked list

I'm working on a delete function in my code. I want to delete the key, value pair within the node and free the space allocated to it. I'm not sure how to approach this so that the following nodes shift into the right place (not sure how to word it, hope you know what I mean). Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "symTable.h"
#define DEFAULT_TABLE_SIZE 61
#define HASH_MULTIPLIER 65599
/*Structures*/
typedef struct Node
{
char *key;
void *value;
struct Node *next;
} Node_T;
typedef struct SymTable
{
Node_T **Table;
int tablesize;
int counter;
} *SymTable_T;
/*Global Variables*/
int tablesize = DEFAULT_TABLE_SIZE;
int counter = 0;
/*Create function to show how memory is allocated*/
SymTable_T SymTable_create(void)
{
SymTable_T S_Table;
S_Table = malloc(sizeof(SymTable_T *) * DEFAULT_TABLE_SIZE);
S_Table->Table = (Node_T **) calloc(DEFAULT_TABLE_SIZE, sizeof(Node_T *));
return S_Table;
}
/*Hash Function*/
static unsigned int hash(const char *key, const int tablesize)
{
int i;
unsigned int h = 0U;
for (i = 0; key[i] != '\0'; i++)
h = h * tablesize + (unsigned char) key[i];
return h % tablesize;
}
/*Delete Function*/
int symTable_delete(SymTable_T symTable, const char *key)
{
Node_T *new_list;
unsigned int hashval = hash(key, DEFAULT_TABLE_SIZE);
free(new_list->key);
free(new_list->value);
//here is where I am stuck, how can I make it so the nodes following the one deleted go to the right space?
}

With a singly linked list you have
A -> B -> C
if you want to remove B then you need to make
A -> C
The only way to do this is to get the parent of B and update its pointer which means either
You need to add a pointer from a node to its parent ( aka use a doubly linked list )
You iterate over the list until you find a node where the child pointer is set to the node you are removing and then you update the pointer to instead point to child.child

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Hashing structures in C - c

I didn't give the code an extensive read, but the first thing that stood out pretty clearly is the hash table size, 100. It is best to use a prime number for the size of your hash tables to help avoid collisions.

Related

When resizing a hash table how to point to the new array of buckets

C Language - Rehashing a Separate Chaining Hash Table

How to initialize a hashmap in c?

Malloc large space of struct and accessing it like arrays in c

C - Deleting a node in a linked list

Categories

Resources