How to save memory inside hashtable? - c

In my structure struct ListNode I am making a int type variable key but it is necessary to make that in the struct Listnode can we make that inside HashTableNode because when two or more items will be there in HashTableNode (that is when collision will be more in single table node) than we have to create more linked list node and every time inside that node key variable will consume some memory if we can define that inside HashTableNode than we can save memory.
Is it correct to mention the key in each list node so that we can access at any time whenever we need because the below hash table implementation is from very famous book of data structures.
Please tell me what i mentioned above is correct
Because i am a beginner if Not then please correct me
#define Load_factor 20
#include<stdio.h>
#include<stdlib.h>
struct Listnode{
int key;
int data;
struct Listnode* next;
};
struct HashTableNode{
int bcount; /// Number of elements in block
struct Listnode* next;
};
struct HashTable{
int tsize; /// Table size
int count;
struct HashTableNode** Table;
};
struct HashTable* createHashTable(int size){
struct HashTable* h;
h=(struct HashTable*)malloc(sizeof(struct HashTable));
h->tsize=size/Load_factor;
h->count=0;
h->Table=(struct HashTableNode**)malloc(sizeof(struct HashTableNode*)*h->tsize);
if(!h->Table){
printf("Memory Error");
return NULL;
}
for(int i=0;i<h->tsize;i++){
h->Table[i]->bcount=0;
h->Table[i]->next=NULL;
}
return h;
}
int HASH(int data,int tsize){
return(data%tsize);
}
/// Hashsearch
int HashSearch(struct HashTable* h,int data){
struct Listnode* temp;
temp=h->Table[HASH(data,h->tsize)]->next;
while(temp) ///same as temp!=NULL
{
if(temp->data==data)
return 1;
temp=temp->next;
}
return 0;
}
int HashDelete(struct HashTable* h,int data)
{
int index;
struct Listnode *temp,*prev;
index=HASH(data,h->tsize);
for(temp=h->Table[index]->next,prev=NULL;temp;prev=temp,temp=temp->next)
{
if(temp->data==data)
{
if(prev!=NULL)
prev->next=temp->next;
free(temp);
h->Table[index]->bcount--;
h->count--;
return 1;
}
}
return 0;
}
int HashInsert(struct HashTable *h ,int data){
int index;
struct Listnode* temp,*newnode;
if(HashSearch(h,data))
return 0;
index = HASH(data,h->tsize);
temp=h->Table[index]->next;
newnode=(struct Listnode*)malloc(sizeof(struct Listnode));
if(!newnode)
return -1;
newnode->key=index;
newnode->data;
newnode->next=h->Table[index]->next;
h->Table[index]->next=newnode;
h->Table[index]->bcount++;
h->count++;
return 1;
}

It is necessary to store key for every node because it is used to resolve collisions. Please note that the collision happens to the hash value and not the key, which means that every element (Listnode) in the same bucket (HashTableNode) will still have a different key so you can't optimize it away.
However, in your example the data is the key (which is usually called a HashSet as opposed to HashMap), so there is really no need for the key field in the Listnode.

Related

C Language - Rehashing a Separate Chaining Hash Table

So I looked everywhere to get inspired but I didn't really find anything for rehashing a hash table using separate chaining method. So I tried myself, I think I know what I'm doing wrong, but I don't know how else to implement it, please help.
Everything works, except the new added function rehash()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
struct list_node
{
struct list_node *next;
char *key;
char *value;
};
struct hash_table
{
int table_size;
struct list_node **list_arr;
};
unsigned int hash(const char *key, unsigned int table_size);
struct hash_table *initialize(unsigned int table_size);
struct list_node *find(struct hash_table *H, const char *key);
void insert(struct hash_table *H, const char *key, const char *value);
void dump(struct hash_table *H);
void del(struct hash_table *H, const char *key);
struct hash_table *rehash(struct hash_table *H);
unsigned int
hash(const char *key, unsigned int table_size)
{
unsigned long int hashx = 0;
for(int i=0;key[i];i++)
{
hashx = (hashx<<5) + key[i];
}
return (hashx%table_size);
}
struct hash_table
*initialize(unsigned int table_size)
{
struct hash_table *H = malloc(sizeof(*H));
H->list_arr = malloc(sizeof(*H->list_arr)*table_size);
H->table_size = table_size;
for(unsigned int i = 0; i<table_size; i++)
{
H->list_arr[i] = malloc(sizeof(*H->list_arr[i]));
H->list_arr[i]->next = NULL;
}
return H;
}
void
insert(struct hash_table *H, const char *key, const char *value)
{
unsigned int index = hash(key, H->table_size);
struct list_node *head = H->list_arr[index];
struct list_node *current = head->next;
while(current!=NULL)
{
if(strcmp(current->key,key)==0)
{
free(current->value);
current->value = malloc(strlen(value)+1);
strcpy(current->value,value);
return;
}
current=current->next;
}
struct list_node *newNode = malloc(sizeof(*H->list_arr[index]));
newNode->next = head->next;
head->next = newNode;
newNode->key = malloc(strlen(key)+1);
newNode->value = malloc(strlen(value)+1);
strcpy(newNode->key,key);
strcpy(newNode->value,value);
}
void
dump(struct hash_table *H)
{
for( int i = 0; i<H->table_size; i++)
{
struct list_node *entry = H->list_arr[i]->next;
if(entry==NULL){continue;}
printf("Index[%d]: ", i);
while(entry!=NULL)
{
printf("\t%s|%s\t--> ", entry->key, entry->value);
entry = entry->next;
}
printf("\tNULL");
printf("\n");
}
}
void delete(struct hash_table *H, const char *key)
{
unsigned int index = hash(key,H->table_size);
struct list_node *prev = H->list_arr[index];
while(strcmp(prev->next->key,key)!=0)
{
if(prev->next==NULL){printf("Key not found!");return;}
prev=prev->next;
}
struct list_node *temp = prev->next;
prev->next = temp->next;
free(temp);
}
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node *old_entries = H->list_arr;
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
while(old_entries[i]!=NULL)
{
insert(H,old_entries[i].key,old_entries[i].value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
return H;
}
int main()
{
struct hash_table *H = initialize(20);
insert(H,"name1","David");
insert(H,"name2","Radka");
dump(H);
H = rehash(H);
dump(H);
return 1;
}
I think doing old_entries[i] is wrong, but nothing else comes to mind, please help me resolve this.
OK! After thinking about it for a while, I realized I created a struct list_node pointer variable that points to H->list_arr which is an array of pointers. That was my mistake. I was supposed to declare it as a double pointer.
Here's the modified rehash() function:
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node **old_entries = H->list_arr;
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
old_entries[i] = old_entries[i]->next;
while(old_entries[i]!=NULL)
{
insert(H,old_entries[i]->key,old_entries[i]->value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
return H;
}
with this code, you will have to return the address of the new hash_table to the pointer pointing to the old hash_table --> [H = rehash(H)] since passing the pointer H as a parameter will only change it locally. Therefore, I tried a second version (because I'm too lazy;) and inattentive and might forget to reassign it) where I don't have to return anything, I want to change it simply by calling the function and my pointer points to the new hash_table automatically -> [rehash(&H)], here's the other "lazy" alternative:
void
rehash(struct hash_table **H)
{
unsigned int old_size = (*H)->table_size;
struct list_node **old_entries = (*H)->list_arr;
*H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
old_entries[i] = old_entries[i]->next;
while(old_entries[i]!=NULL)
{
insert(*H,old_entries[i]->key,old_entries[i]->value);
old_entries[i] = old_entries[i]->next;
}
}
free(old_entries);
}
If I'm doing something that's inefficient (in terms of space and time), please let me know, as I am only in Bachelor's 3rd semester of CS and we have only started DSA this semester.
The thing you are doing by putting dummy elements at the beginning of each bin is a good idea, but you don't need to allocate such dummies with malloc(). You can just make the bin array an array of nodes instead of pointers to nodes. Then you have allocated the dummies when you have allocated the array. So you could define your hash table as
struct hash_table
{
int table_size;
struct list_node *list_arr;
};
(instead of using struct list_node **list_arr).
When you loop through the bins in the initialisation, you have to set the bins' next pointer to NULL, but not allocate them.
struct hash_table
*initialize(unsigned int table_size)
{
struct hash_table *H = malloc(sizeof(*H));
H->list_arr = malloc(sizeof(*H->list_arr)*table_size);
H->table_size = table_size;
for(unsigned int i = 0; i<table_size; i++)
{
// no malloc here!
H->list_arr[i].next = NULL;
}
return H;
}
Anyway, that is not pertinent to the rehashing, just a suggestion. But because you have the dummy elements as bins, you can refactor your code (that is the reason I think the dummies are such a good idea). You can get the bin from the table and work from there, without worrying about the table itself after that. You can get the relevant bin for a key with
struct list_node *get_bin(struct hash_table *H, const char *key)
{
unsigned int index = hash(key, H->table_size);
return &H->list_arr[index];
}
and you can find the node in a bin with
struct list_node *find_node(struct list_node *bin, const char *key)
{
for (struct list_node *current = bin->next;
current;
current = current->next) {
if(strcmp(current->key,key)==0) return current;
}
return 0;
}
and, for example, simplify insertion to
void prepend_node(struct list_node *node, struct list_node *bin)
{
node->next = bin->next;
bin->next = node;
}
void insert(struct hash_table *H, const char *key, const char *value)
{
struct list_node *bin = get_bin(H, key);
struct list_node *node = find_node(bin, key);
if (node) {
// update node
free(node->value);
node->value = malloc(strlen(value)+1);
strcpy(node->value,value);
} else {
// prepend new node
prepend_node(new_node(key, value), bin);
}
}
where the new_node() function looks like
struct list_node *new_node(const char *key, const char *value)
{
struct list_node *node = malloc(sizeof *node);
if (!node) abort(); // add some error handling here
node->key = malloc(strlen(key)+1);
if (!node->key) abort(); // add some error handling here
strcpy(node->key,key);
node->value = malloc(strlen(value)+1);
if (!node->value) abort(); // add some error handling here
strcpy(node->value,value);
return node;
}
Because the bins are embedded in the array, you can safely assume in all the functions that they aren't NULL, which can save you from testing some special cases.
It is not shorter code, because I split it into several functions, but in my opinion, it is more readable when each function does one simple thing. Here, getting the bin, finding the key in a bin, creating a node, pretending to a bin, etc. With "raw" malloc() and strcpy() and such, scattered through the code, it is harder to track that everything works correctly. The total lines of code grew, but each function is shorter and simpler. And you can get away with it, because you can work on bins as lists, without accessing the hash table array, exactly because all bins have a dummy head element.
You can now rewrite rehash() to just prepend to bins. You know that all the keys in the old bins are unique, so you don't need to check anything. You just put each node at the front of its new bin:
struct hash_table *rehash(struct hash_table *H)
{
unsigned int old_size = H->table_size;
struct list_node *old_entries = H->list_arr;
free(H); // You forgot to free this one!
H = initialize(2*old_size);
for(unsigned int i = 0; i<old_size; i++)
{
struct list_node *old_bin = &old_entries[i];
for (struct list_node *node = old_bin->next;
node; node = node->next) {
// just prepend to new bin; the key should be unique
prepend_node(node, get_bin(H, node->key));
}
}
free(old_entries);
return H;
}
I added a free(H) because you forgot to free memory for H, but it would be more efficient to update H without creating a new table. You can separate initialisation and allocation. But you do not gain terribly much as initialising the bins is the time-consuming part.
Speaking of freeing, though. Remember to write a function for freeing a hash table (that remembers to free the bins, including all the nodes). Don't use it with rehashing, of course, if you free H before you update it--you need to keep the nodes around--but you do want such a function.

how to update table size inside hash function

I was learning how to implement Hash table but i am bit confused here in because in the book below code was available and i understood well enough the code, but inside book there was no definition of HASH function ,i know we have to define it by own ,but according the below code that was given give inside book HASH is taking two arguments wherever i used the HASH like in HashInsert it is taking two arguments index=HASH(data,t->size) if we assume that return type of HASH as int
now for eg we can define HASH as
int HASH(int data,int tsize){
return(data%7);
}
but according to my program how should i update t->size(table size) inside HASH function or how should i use that
Please help me in proper implementation of the above HASH function
#define Load_factor 20
#include<stdio.h>
#include<stdlib.h>
struct Listnode{
int key;
int data;
struct Listnode* next;
};
struct HashTableNode{
int bcount; /// Number of elements in block
struct Listnode* next;
};
struct HashTable{
int tsize; /// Table size
int count;
struct HashTableNode** Table;
};
struct HashTable* createHashTable(int size){
struct HashTable* h;
h=(struct HashTable*)malloc(sizeof(struct HashTable));
h->tsize=size/Load_factor;
h->count=0;
h->Table=(struct HashTableNode**)malloc(sizeof(struct HashTableNode*)*h->tsize);
if(!h->Table){
printf("Memory Error");
return NULL;
}
for(int i=0;i<h->tsize;i++){
h->Table[i]->bcount=0;
h->Table[i]->next=NULL;
}
return h;
}
/// Hashsearch
int HashSearch(struct HashTable* h,int data){
struct Listnode* temp;
temp=h->Table[HASH(data,h->tsize)]->next;
while(temp) ///same as temp!=NULL
{
if(temp->data==data)
return 1;
temp=temp->next;
}
return 0;
}
int HashDelete(struct HashTable* h,int data)
{
int index;
struct Listnode *temp,*prev;
index=HASH(data,h->tsize);
for(temp=h->Table[index]->next,prev=NULL;temp;prev=temp,temp=temp->next)
{
if(temp->data==data)
{
if(prev!=NULL)
prev->next=temp->next;
free(temp);
h->Table[index]->bcount--;
h->count--;
return 1;
}
}
return 0;
}
int HashInsert(struct HashTable *h ,int data){
int index;
struct Listnode* temp,*newnode;
if(HashSearch(h,data))
return 0;
index = HASH(data,h->tsize);
temp=h->Table[index]->next;
newnode=(struct Listnode*)malloc(sizeof(struct Listnode));
if(!newnode)
return -1;
newnode->key=index;
newnode->data;
newnode->next=h->Table[index]->next;
h->Table[index]->next=newnode;
h->Table[index]->bcount++;
h->count++;
return 1;
}
i am just learning implementation of hashing so main is looking quiet weird
int main(){
return 0;
}
You shouldn't! I mean you shouldn't modify it.
Instead the function gets the size of the hash-table (the number of "buckets") so it can use it to create a bucket index from the hash value. This is typically done through modulo %.
So instead of a fixed magic number 7 you modulo with the size:
return(data%tsize);

How to make changes in an array through a function

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define SIZE 10
// A hashtable is a mixture of a linked list and array
typedef struct node NODE;
struct node{
int value;
NODE* next;
};
int hash(int);
void insert(int,NODE **);
int main(){
NODE* hashtable[SIZE];
insert(12,&hashtable[SIZE]);
printf("%d\n",hashtable[5]->value);
}
int hash(int data){
return data%7;
}
void insert(int value,NODE **table){
int loc = hash(value);
NODE* temp = malloc(sizeof(NODE));
temp->next = NULL;
temp->value = value;
*table[loc] = *temp;
printf("%d\n",table[loc]->value);
}
The above code prints :
12 and
27475674 (A random number probably the location.)
how do I get it to print 12 and 12 i.e. how to make a change in the array. I want to fill array[5] with the location of a node created to store a value.
The expression *table[loc] is equal to *(table[loc]) which might not be what you want, since then you will dereference an uninitialized pointer.
Then the assignment copies the contents of *temp into some seemingly random memory.
You then discard the memory you just allocated leading to a memory leak.
There's also no attempt to make a linked list of the hash-bucket.
Try instead to initially create the hashtable array in the main function with initialization to make all pointers to NULL:
NODE* hashtable[SIZE] = { NULL }; // Will initialize all elements to NULL
Then when inserting the node, actually link it into the bucket-list:
temp->next = table[loc];
table[loc] = temp;
This is just a simple change which I have made to your program which will tell you what you are actually doing wrong.
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define SIZE 10
// A hashtable is a mixture of a linked list and array
typedef struct node NODE;
struct node {
int value;
NODE* next;
};
NODE *hashtable[SIZE] = { NULL };
int hash(int);
int insert(int); //, NODE **);
int main(void)
{
int loc = insert(12); //, &hashtable[SIZE]);
if (loc < SIZE) {
if (hashtable[loc]) {
printf("%d\n", hashtable[loc]->value);
} else {
printf("err: invalid pointer received\n");
}
}
return 0;
}
int hash(int data)
{
return data%7;
}
int insert(int value) //, NODE *table[])
{
int loc = hash(value);
printf("loc = %d\n", loc);
if (loc < SIZE) {
NODE *temp = (NODE *) malloc(sizeof(NODE));
temp->value = value;
temp->next = NULL;
hashtable[loc] = temp;
printf("%d\n", hashtable[loc]->value);
}
return loc;
}
Here I have declared the hashtable globally just to make sure that, the value which you are trying to update is visible to both the functions. And that's the problem in your code. Whatever new address you are allocating for temp is having address 'x', however you are trying to access invalid address from your main function. I just wanted to give you hint. Hope this helps you. Enjoy!

Removing all nodes from a queue of structures

I'm trying to delete all nodes from my queue of structures.
Structure:
struct element{
int id;
int sign;
int year;
int month;
double amount;
struct element *next;
};
struct queue{
struct element *head;
int size;
};
And the function I wrote:
void delete(struct queue *queue) {
if (queue->size == 0){
printf("Structure is empty\n");
}
else {
struct element* this;
struct element* other;
for(this=queue->head;this!=NULL;this=other)
{
other=this->next;
free(this);
}
free(queue);
}
}
It doesn't work, and I'm out of ideas. Any suggestions?
In your delete routine, you do not free the queue if the size is empty, but you do free it if the size is non-empty. You should probably do the same for both cases. That is, either don't free in both places, or free in both places.
It is bothersome to need to figure out what the right thing to do is, because delete can not know how the queue was allocated. Given your current design, a way out may be to pass a flag to delete to indicate what it should do:
void delete(struct queue *queue, int do_free) {
if (queue->size == 0){
printf("Structure is empty\n");
}
else {
struct element* this;
struct element* other;
for(this=queue->head;this!=NULL;this=other) {
other=this->next;
free(this);
}
queue->head = 0;
queue->size = 0;
}
if (do_free) free(queue);
}
struct queue new;
/* ... */
delete(&new, 0); /* don't free the queue */
struct queue *empty_new = malloc(sizeof(struct queue));
empty_new->size = 0;
delete(empty_new, 1); /* free the empty queue */
Here
struct queue new;
//...
delete(&new);
new is allocated on the stack, so don't call free(queue) in delete.
Instead, set queue->head = NULL; queue->size = 0; to indicate that the queue is now empty as mentioned by #kirill.
How about just passing the first element of the queue.
void delete(element *el ) {
if(el) {
delete(el->next );
free(el);
}
}
with
typedef struct _element{
int id;
int sign;
int year;
int month;
double amount;
struct _element *next;
} element;
you might have forgotten to update at the end of the function the pointer to NULL as well as changing the size of the queue to 0.

creating a queue of pointers in c

i have a dynamic number of pointers all having the same size. i need to store all the addresses of my pointers in some place like a link List in order to fetch them later on.
my question is what structs should i use. is the following correct:
struct Node{
int *k;
Node*Next;
}
struct LS{
Node*first,*last;
void push(Node*n);
Node* GetFirst();
Node* GetLast();
}
the LS is the linked list that stores Nodes. and a Node is a struct that holds the address of my pointer and a pointer to the next Node.
am i using int *k to store the address of my pointer correctly? should i continue with this implementation or is there any easier way to do this?
this sample code may help you start...
#include <stdio.h>
struct Node{
int *k;
Node *Next;
}* Temp;
struct LS
{
Node *first,*last;
void push(Node *MyNode)
{
MyNode->Next=NULL;
if(empty())
{
first=MyNode;
last=MyNode;
}
else
{
last->Next = MyNode;
last=MyNode;
}
}
Node* front()
{
return first;
}
void pop()
{
free(first->k);
first=first->Next;
}
bool empty()
{
if(first==NULL) return true;
return false;
}
};
int N=10;
int main()
{
LS Q;Q.first=NULL;
for(int i=0;i<3;i++)
{
Node *NewNode= (Node*)malloc(sizeof(Node));
NewNode->k = (int*)malloc(sizeof(int)*N);
for(int k=0;k<N;k++) NewNode->k[k]=i;
Q.push(NewNode);
}
while(!Q.empty())
{
Temp=Q.front();
for(int i=0;i<N;i++) printf("%d ",Temp->k[i]);
printf("\n");
Q.pop();
}
return 1;
}
Yes, your Node struct is correct.
As to whether there is an easier way it depends. If there is a maximum number of pointers that you will need then an array of pointers would be easier. If you can do it in C++ then an STL vector (can use it like an array, but underneath the hood it can grow dynamically as needed) is easier. If you have to do it in C and it has to be dynamic, though, then no, there is not an easier way.
WDM.H (microsoft header) has a bunch of linked list stuff to look at ( http://msdn.microsoft.com/en-us/library/ff547799(VS.85).aspx ) , I've cut and pasted from that, and added a very simple example.
typedef struct _LIST_ENTRY {
struct _LIST_ENTRY *Flink;
struct _LIST_ENTRY *Blink;
} LIST_ENTRY, *PLIST_ENTRY;
typedef struct _MY_THING
{
LIST_ENTRY ListEntry;
ULONG randomdata1;
ULONG randomdata2;
ULONG randomdata3;
ULONG randomdata4;
} MY_THING, *PMY_THING;
#define CONTAINING_RECORD(address, type, field) ((type *)( \
(PCHAR)(address) - \
(ULONG_PTR)(&((type *)0)->field)))
VOID
InsertHeadList(
IN PLIST_ENTRY ListHead,
IN PLIST_ENTRY Entry
)
{
PLIST_ENTRY Flink;
Flink = ListHead->Flink;
Entry->Flink = Flink;
Entry->Blink = ListHead;
Flink->Blink = Entry;
ListHead->Flink = Entry;
}
VOID
InitializeListHead(
IN PLIST_ENTRY ListHead
)
{
ListHead->Flink = ListHead->Blink = ListHead;
}
PLIST_ENTRY
RemoveHeadList(
IN PLIST_ENTRY ListHead
)
{
PLIST_ENTRY Flink;
PLIST_ENTRY Entry;
Entry = ListHead->Flink;
Flink = Entry->Flink;
ListHead->Flink = Flink;
Flink->Blink = ListHead;
return Entry;
}
void main()
{
LIST_ENTRY HeadOfMyList;
MY_THING Thing;
InitializeListHead(&Head);
// example of add thing to list.
InsertHeadList(&HeadOfMyList, &Thing.ListEntry);
// example of removing thing from the list
PLIST_ENTRY listEntry = RemoveHeadList(&HeadOfMyList);
PMY_THING pThing = (PMY_THING) CONTAINING_RECORD(listEntry, MY_THING, ListEntry);
}

Resources