Rehashing hashtable - c

When I debug I find out that in the temporary hashtable - 'hashtable2' no elements are inserted. All other parts of the hashtable code work.
I am not sure why the no values get inserted into hashtable2
void rehash(struct HashTable* hashTable, int keySize)
{
int i;
struct HashElement* current;
int i2 = 0;
struct List* word;
struct List* key;
struct HashTable* hashTable2 = hashTableConstructor(33524, keySize);
hashTable->keySize = keySize;
for(i=0; i<hashTable->numBuckets; i++) //for every bucket
{
current = hashTable->buckets[i];
//'walk' the linked list of HashElements in the bucket
while( current != NULL )
{ //till the end of hash table
word = listConstructor();
key = listConstructor();
word = current->value;
for(i2 = 0; i2<keySize; i2++)
{
listAdd(key, tolower(getCharacter(word,i2)));
}
insert(hashTable2, word, key);
listDestructor(key);
listDestructor(word);
current = current->next;
}
}
hashTableDestructor(hashTable);
hashTable = hashTableConstructor(33524, keySize);
hashTable = hashTable2;
}

Related

Hashtable with linked list not work in c?

I've a problem with memory allocation for an hash table with linked list (for avoid collisions) in C.
I think that the problem is on allocation of an item.
I've made two scruct, one for the single item and one for the table.
The first have two pointer to next and prev item.
Please help me.
I stay on this code until 3 days.
The code :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CAPACITY 50000
unsigned long hash(char *str) {
unsigned long int stringsum = 0;
for(; *str != '\0'; str++) {
stringsum += *str;
}
return stringsum % CAPACITY;
}
typedef struct item {
char *value;
char *key;
struct item *next;
struct item *prev;
} ht_item;
typedef struct hashtable {
ht_item **items;
int dim;
int count;
} HashTable;
HashTable* create_table(int size); HashTable* create_item(HashTable *table, char *value, char *key);
void print_table(HashTable* table, int dim);
int main(void) {
HashTable *table = create_table(CAPACITY);
table = create_item(table, "Giuseppe", "Nome");
print_table(table, CAPACITY);
return 0;
}
HashTable* create_item(HashTable *table, char *value, char *key) {
unsigned long index = hash(key);
printf("%u", index);
ht_item *_iterator; ht_item *prev;
for(_iterator = table->items[index], prev = NULL; _iterator != NULL; prev = _iterator, _iterator = _iterator->next);
_iterator = (ht_item*)malloc(sizeof(ht_item));
_iterator->key = (char*)malloc(200);
_iterator->value = (char*)malloc(200);
strcpy(_iterator->key, key);
strcpy(_iterator->value, value);
_iterator->next = NULL;
_iterator->prev = prev;
return table;
}
HashTable* create_table(int size)
{
HashTable *table = (HashTable*)malloc(sizeof(HashTable));
table->dim = size;
table->items = (ht_item**)calloc(size, sizeof(ht_item*));
for(int i = 0; i < size; i++){
table->items[i] = NULL;
}
return table;
}
void print_table(HashTable* table, int dim) {
for(int i = 0; i < CAPACITY; i++)
{
if(table->items[i] != NULL)
{ ht_item *_iterator = (ht_item*)malloc(sizeof(ht_item));
for(_iterator = table->items[i]; _iterator != NULL;
_iterator = _iterator->next)
{
printf("Key: %s\tValue: %s\n", _iterator->key, _iterator->value);
} free(_iterator);
}
}
}
Made some changes in your code. Please read through the blocks containing // CHANGE HERE comment.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CAPACITY 50000
// CHANGE HERE - additional parameter, value to be used for modulo
unsigned long hash(char *str, unsigned int mod_value) {
unsigned long int stringsum = 0;
for(; *str != '\0'; str++) {
stringsum += *str;
}
// CHANGE HERE - use mod_value instead of CAPACITY
return stringsum % mod_value;
}
typedef struct item {
char *value;
char *key;
struct item *next;
struct item *prev;
} ht_item;
typedef struct hashtable {
ht_item **items;
int dim;
int count;
} HashTable;
HashTable* create_table(int size); HashTable* create_item(HashTable *table, char *value, char *key);
void print_table(HashTable* table, int dim);
int main(void) {
HashTable *table = create_table(CAPACITY);
table = create_item(table, "Giuseppe", "Nome");
print_table(table);
return 0;
}
HashTable* create_item(HashTable *table, char *value, char *key) {
// CHANGE HERE - function arguments validation
if (table == NULL)
{
return table;
}
if (value == NULL || key == NULL)
{
printf("Key or value is null\n");
return table;
}
// CHANGE HERE - pass table->dim to hash
unsigned long index = hash(key, table->dim);
printf("Index: %lu\n", index);
// CHANGE HERE - simplified the code a bit
ht_item* new_node = malloc(sizeof(ht_item));
new_node->key = malloc(200 * sizeof(char));
strncpy(new_node->key, key, 200);
new_node->value = malloc(200 * sizeof(char));
strncpy(new_node->value, value, 200);
// CHANGE HERE - if first node in index
if (table->items[index] == NULL)
{
table->items[index] = new_node;
return table;
}
ht_item *cur, *prev = NULL;
for(cur = table->items[index]; cur != NULL; prev = cur, cur = cur->next);
prev->next = new_node; // CHANGE HERE - it seems this line was missing
new_node->prev = prev;
new_node->next = NULL;
return table;
}
HashTable* create_table(int size)
{
HashTable *table = (HashTable*)malloc(sizeof(HashTable));
table->dim = size;
table->items = (ht_item**)calloc(size, sizeof(ht_item*));
for(int i = 0; i < size; i++){
table->items[i] = NULL;
}
return table;
}
void print_table(HashTable* table) {
// CHANGE HERE - function arguments validation
if (table == NULL)
{
printf("Table is null\n");
return;
}
// CHANGE HERE - change CAPACITY to dim
for(int i = 0; i < table->dim; i++)
{
//printf("i = %d [%d]\n", i, table->items[i] == NULL);
if(table->items[i] != NULL)
{
// CHANGE HERE - removed unnecessary malloc
ht_item *_iterator = NULL;
for(_iterator = table->items[i]; _iterator != NULL; _iterator = _iterator->next)
{
printf("Key: %s\tValue: %s\n", _iterator->key, _iterator->value);
}
}
}
}
The create_item function can and should be simplified.
I have put some comments inline.
HashTable* create_item(HashTable *table, char *value, char *key) {
// use modulo operator here, not in the hash function
unsigned long index = hash(key) % table->dim;
// nicer way of allocating
ht_item *insert = malloc(sizeof *insert);
// use strdup to avoid wasted memory and buffer overflows
insert->key = strdup(key);
insert->value = strdup(value);
// head insert rather than tail
insert->next = table->items[index];
table->items[index] = insert;
return table;
}
I dropped the use of the prev member. If you need that somewhere it's an exercise for you to add it. I don't think it's necessary for a simple hash table.

How to add to a linked list within a separate chaining hash table in C

Here are the relevant structs for my question.
//A SymEntry is the building block for linked lists of (name, attribute) pairs
typedef struct SymEntry {
char * name;
void * attribute;
struct SymEntry * next;
} SymEntry;
/*
Each symbol table is represented by a SymTab
size is the current number of lists in the separate chaining hash table
contents is an array of lists (i.e. points to the zeroth element in the array)
if current is not NULL it points to the current (name, attribute) pair in the symbol table
*/
typedef struct {
int size;
SymEntry ** contents;
SymEntry *current;
} SymTab;
I have a project to create a Symbol Table in c. We are to implement a separate chaining hash table to accomplish this. I believe that I created the initial, empty hash table correctly. Below is my implementation of that.
SymTab * createSymTab(int size) {
int i;
SymTab *symbolTable = malloc(sizeof(SymTab));
symbolTable->contents = (SymEntry**)malloc(size * sizeof(SymEntry));
symbolTable->current = (SymEntry*)malloc(sizeof(SymEntry));
symbolTable->size = size;
for (i=0; i<size; i++) {
SymEntry *newEntry = malloc(sizeof(SymEntry));
newEntry -> name = NULL;
newEntry -> attribute = NULL;
newEntry -> next = NULL;
symbolTable->contents[i] = newEntry;
}
symbolTable->current = NULL;
return symbolTable;
}
I seem to also have it working to where it can add the first node (SymEntry) in the linked list. Below is my code to add an entry, along with my hash method.
int enterName(SymTab * table, char *name) {
if (findName(table, name) == 0) {
int size = table->size;
int hashNum = hash(name, &size);
SymEntry *head = table->contents[hashNum];
printf("Hash Number is %d\n", hashNum);
if (head->name == NULL) {
printf("Head is null\n");
head->name = name;
head->attribute = NULL;
}
else {
printf("Head is not null\n");
SymEntry *newNode = malloc(sizeof(SymEntry));
newNode->name = name;
newNode->attribute = NULL;
newNode->next = head;
head = newNode;
}
return 0;
}
return 1;
}
int hash(char *key, int * size) {
int hash = 0;
int i = 0;
int sizeOfNum = *size;
printf("Key Value: %s Size of Number: %d\n", key, sizeOfNum);
while (key && key[i]) {
hash = (hash + key[i] % sizeOfNum);
i++;
}
return hash % sizeOfNum;
}
Lastly, the below code is what I am using to test things out. If my understanding of everything is correct, the name that should be printing is Jess, my second entry, but instead I am only seeing Wes. Both of these names hash out to the same number, which in this case would be 5. What exactly am I doing wrong when I go to add a node (SymEntry) to the list? My output recognizes that the head is not empty when I go to add Jess, so I know the first entry works.
int main(void) {
SymTab * symbolTable = createSymTab(6);
enterName(symbolTable, "wes");
enterName(symbolTable, "jess");
SymEntry * example = symbolTable->contents[5];
printf("%s\n", example->name);
return 0;
}

How can I get the address of a pointer for my linked list such that I can properly set it in a hashtable after removing an item in c?

When removing an element from a hashtable, I need to traverse through linked-lists for elements that collide. I am using pointer operations to do this, and my linked list is in the form of bucket_t. The problem I am facing is that when I try to save the location of the first ht->bucket[I], that value changes along with the others, so at the end of the function, my head is right at the spot of next and results in a segmentation fault. I am new to working with pointers like this in c, and I apologize if my explanation is bad, but I think the code is fairly simple for you guys to see what I am trying to achieve:
void ht_del(hashtable_t *ht, char *key) {
bucket_t *last=NULL, *next=NULL, *head=NULL;
unsigned long i;
for(i = 0; i < ht->size; i++){
head = ht->buckets[i];
next = ht->buckets[i];
while(next && next->key && strcmp(next->key,key)!=0){
last = next;
next = next->next;
printf("\nvisiting next\n");
printf("key = %s\n", head->key);
}
if(next && next->key && strcmp(next->key,key)==0){
printf("key found, removing key = %s, val = %s:", next->key, next->val);
free(next->key);
free(next->val);
if(next->next){
last->next = next->next;
printf("Last->next ->key = %s\n", last->next->key);
}
else{
free(next->next);
printf("end of the line\n");
}
free(next);
printf("head key = %s", head->key);
}
}
}
Additionally, to help understand the structs im using:
typedef struct hashtable hashtable_t;
typedef struct bucket bucket_t;
struct bucket {
char *key;
void *val;
bucket_t *next;
};
struct hashtable{
unsigned long size;
bucket_t **buckets;
};
How can I get the address of a pointer for my linked list such that I can properly set it in a hashtable after removing an item... ?
To remove a node from a linked list, keep track of the previous node.
for(i = 0; i < ht->size; i++){
bucket_t before_head = { .next = ht->buckets[i] }; // Only next member used.
bucket_t *previous = &before_head;
while (previous->next && strcmp(previous->next->key,key) != 0) {
previous = previous->next;
}
if (previous->next) { // match was found
// delete previous->next and its members allocations
bucket_t *node_after_match = previous->next->next;
free(previous->next->key);
free(previous->next->val);
free(previous->next);
// link previous to node after deletion.
previous->next = node_after_match;
// assign a potential new head of the list
ht->buckets[i] = before_head.next;
break; // exit for loop
}
}
As hinted by #Pablo, I'd expect a hash function instead of a for() loop to rapidly find the hash table index. Something like:
// for(i = 0; i < ht->size; i++){
i = hash(key)%ht->size;
void ht_del(hashtable_t *ht, char *key) {
unsigned long i;
i = hash(key)%ht->size;
bucket_t before_head = { .next = ht->buckets[i]};
bucket_t *previous = &before_head;
while(previous->next && strcmp(previous->next->key,key)!=0) {
previous = previous->next;
}
if(previous->next ) {
bucket_t *next = previous->next->next;
bucket_t *b = previous->next;
free(previous->next->key);
free(previous->next->val);
previous->next = next;
free(b);
ht->buckets[i] = previous->next;
}
}

Segfault when reading from update linked list

So I'm working on an implementation of a hash table. I'm not very experienced with C and pointers and getting a bit stuck.
I've got hashtable definition that looks like this:
typedef struct KVnode {
int kv[2];
struct KVnode *next;
} KVnode;
typedef struct hashtable {
int size; // size of hash table
int entries; // number of slots allocated in table
KVnode *table; /* pointer to table. Each entry will point to linked list
of key-value nodes */
} hashtable;
Where the hash table struct contains a table of KVnode pointers. The KVnodes are essentially a linked list to store collisions.
My implementation of put looks like this:
void put(hashtable* ht, keyType key, valType value){
int index = hash_key(key, ht->size);
KVnode *new_node = malloc( sizeof(KVnode) );
new_node->kv[0] = key;
new_node->kv[1] = value;
new_node->next = NULL;
printf("Inserting at index: %i, key:%i, val:%i \n", index, key, value);
// If next val is 0, we can set the first node to key-value
if( ht->table[index].next == 0)
ht->table[index] = *new_node;
else{ // find last node in linked list and append new_node
KVnode cn = ht->table[index];
while( cn.next != NULL )
cn = *cn.next;
cn.next = new_node;
}
}
And here the overall code:
#include <stdio.h>
#include <stdlib.h>
#define SIZE 503
typedef struct KVnode {
int kv[2];
struct KVnode *next;
} KVnode;
typedef struct hashtable {
int size; // size of hash table
int entries; // number of slots allocated in table
KVnode *table; /* pointer to table. Each entry will point to linked list
of key-value nodes */
} hashtable;
typedef int keyType;
typedef int valType;
void init(hashtable**);
int hash_key(keyType key, int size);
void put(hashtable* ht, keyType key, valType value);
void init(hashtable** ht) {
*ht = (hashtable *) malloc( sizeof(hashtable) );
if(*ht == NULL){
printf( "Error: Unable to allocate memory for hashtable" );
exit(1);
}
else{
(*ht)->entries = 0;
(*ht)->size = SIZE;
(*ht)->table = calloc((*ht)->size , sizeof(KVnode *));
}
}
int hash_key(keyType key, int size){
return key % size;
}
void put(hashtable* ht, keyType key, valType value){
int index = hash_key(key, ht->size);
KVnode *new_node = malloc( sizeof(KVnode) );
new_node->kv[0] = key;
new_node->kv[1] = value;
new_node->next = NULL;
printf("Inserting at index: %i, key:%i, val:%i \n", index, key, value);
// If next val is 0, we can set the first node to key-value
if( ht->table[index].next == 0)
ht->table[index] = *new_node;
else{ // find last node in linked list and append new_node
KVnode cn = ht->table[index];
while( cn.next != NULL )
cn = *cn.next;
cn.next = new_node;
}
}
int main(){
hashtable *t = NULL;
init(&t);
put(t, 225, 100);
put(t, 55555, 100);
printf("node at 255, k:%i, v:%i\n", t->table[225].kv[0],t->table[225].kv[1] );
printf("node at 255, 2nd node, k:%i, v:%i\n",
t->table[225].next->kv[0],t->table[225].next->kv[1] );
free(t);
}
The program compiles fine but at runtime i get at segfault. Here's the output:
ds-MacBook-Pro:project0 d$ ./ll
Inserting at index: 225, key:225, val:100
Inserting at index: 225, key:55555, val:100
node at 255, k:55555, v:100
Segmentation fault: 11
I can't figure out if the issue is my print statement or if I'm actually not appending the linked list correctly in put.

Array of pointer to the linked list

I'm working of my assignment, in C, which to store 500 string into strings of 5 char by the mean of hash table with chaining method to fix the collision.
Hashing algorithm : to add up the ASCII value and apply the modulus operator to the result.
The hash table store the hash key generated and a pointer which points to a linked list. Each linked list has more than one element if there are more than one 5-char string that gives the same hash key.
So far this is my code. I compiled it (Codeblock) and it appears that there is no error. However the program crashed.
Please give some inputs on where did i do wrong.
#include <stdio.h>
#include <string.h>
#define SLEN 500
#define WLEN 5
#define MPRIME 73
struct Node {
char s[WLEN+1]; // array to hold the 5-letter word
int sindex; // starting index of the word
struct Node * next; // a pointer to the next word in the list
};
int searchword(char *);
int hashfunc(char *);
void build_hashtbl();
struct Node * hashtable[MPRIME] = {NULL};
char string[SLEN+1] = "thenamewasfamiliartomeonseverallevelslookingbackitwasfatethatifoundhimihadcometopeppervillebeachtocloseonasmallhousethathadbeeninourfamilyforyearsonmywaybacktotheairportistoppedforcoffeetherewasafieldacrossthestreetwherekidsinpurpletshirtswerepitchingandhittingihadtimeiwanderedoverasistoodatthebackstopmyfingercurledinthechainlinkfenceanoldmanmaneuveredalawnmoweroverthegrasshewastannedandwrinkledwithahalfcigarinhismouthheshutthemowerwhenhesawmeandaskedifihadakidoutthereisaidnoheaskedwhatiwasdoing";
int main(void) {
int index;
char query[WLEN+1];
build_hashtbl(); // prepare the hash table
printf("Enter a 5-letter word to search: ");
scanf("%s", query);
index = searchword(query);
if (index != -1)
printf("The word %s starts at index %d.\n", query, index);
else
printf("The word %s is not found.\n", query);
return 0;
}
int searchword(char * word) {
int hashval;
struct Node * lhead;
hashval = hashfunc(word);
lhead = hashtable[hashval];
while (lhead) {
if (strcmp(lhead->s,word) == 0)
return lhead->sindex;
lhead = lhead->next;
}
return -1;
}
int hashfunc(char *){
int hashval = 0;
int i = 0;
for (i = 0; i < WLEN; i++){
hashval += (int) string[i];
}
return (int) (hashval % MPRIME);
}
void build_hashtbl(){
struct Node *hashtable[MPRIME]; //already declared. put here for ease
struct Node * head = NULL;
struct Node * last = NULL;
int i = 0;
int k = 0;
int key = 0;
char sElement[WLEN+1] = {0};
for (i = 0; i <SLEN; i = i+WLEN){ //for every 5 char, find they hashtable index key
key = hashfunc(*string[i]);
for (k = 0; k <WLEN; k++){ //create a new string, sElement from the 5 letter word
sElement[k] = string[i+k];
}
if (hashtable[key] != (NULL)){ //if the hashtable element at that index is empty, STORE it in a node
hashtable[key] = head;
struct Node *new_node;
new_node = (struct Node *) malloc ( sizeof (struct Node) );
strcpy(new_node->s, sElement); //put the new 5 letter word string into the node
new_node->sindex = i; //put the starting index of this word
new_node->next = NULL; //the next pointer is set to NULL
head->next = new_node; //finally set the head node to point to this new node
last = new_node; //set the new node as the last node
}
else { //if there is already a node in the array
struct Node *new_node;
new_node = (struct Node *) malloc ( sizeof (struct Node) );
strcpy(new_node->s, sElement); //put the new 5 letter word string into the node
new_node->sindex = i; //put the starting index of this word
new_node->next = NULL; //the next pointer is set to NULL
head->next = new_node; //finally set the head node to point to this new node
last->next = new_node; //set the last node to point to thew new created node
last = new_node; //set the new node as the last node
}
}
}
Your used last and head uninitialized, so head->next and friends would segfault. In fact you don't need them at all and you don't need your if branches - just replace hashtable[key] by new_node after setting new_node->next to hashtable[key]
void build_hashtbl(){
int i = 0;
int k = 0;
int key = 0;
char sElement[WLEN+1] = {0};
for (i = 0; i <SLEN; i = i+WLEN){ //for every 5 char, find they hashtable index key
key = hashfunc(string+i);
for (k = 0; k <WLEN; k++){ //create a new string, sElement from the 5 letter word
sElement[k] = string[i+k];
}
struct Node *new_node;
new_node = (struct Node *) malloc ( sizeof (struct Node) );
strcpy(new_node->s, sElement); //put the new 5 letter word string into the node
new_node->next=hashtable[key];
new_node->sindex=i;
hashtable[key]=new_node;
}
}
Works for me.
Edit: Also needs #include <stdlib.h> (at least here)

Resources