Hashtable Add - C - c

Getting some segfault on the following algorithm to add an element to the correct bucket in a hashtable.
My structures are basic:
struct kv {
char* key;
unsigned val;
struct kv* next;
};
struct hashtable {
struct kv** table;
unsigned size;
};
And my buggy function:
struct kv* ht_find_or_put(char* word, unsigned value,
struct hashtablet* hashtable,
unsigned (*hash)(char*))
{
unsigned index = hash(word) % hashtable->size;
struct kv* ke = malloc(sizeof (struct kv));
for (ke = hashtable->table[index]; ke != NULL; ke = ke->next)
{
if (strcmp(ke->key, word) == 0)
return ke;
}
if (ke == NULL)
{
ke->key = word;
ke->val = value;
ke->next = hashtable->table[index];
hashtable->table[index] = ke;
}
return ke;
}
I know I haven't added yet all the tests (if malloc failed and such) just trying to debug this particular problem...
I'm allocating my table as such:
struct hashtable* hashtable_malloc(unsigned size)
{
struct hashtable *new_ht = malloc(sizeof(struct hashtable));
new_ht->size = size;
new_ht->table = malloc(sizeof(struct kv) * size);
for(unsigned i = 0; i < size; i++)
new_ht->table[i] = NULL;
return new_ht;
}
Any sort of help will greatly be appreciated. I'm only starting to learn.

The first issue is a memory leak, e.g. - you allocate memory using malloc, but than loses the reference to it, as you override the pointer:
// allocate memory
struct kv* ke = malloc(sizeof (struct kv));
// lose the reference
// VVVVVVVVVVV
for (ke = hashtable->table[index]; ke != NULL; ke = ke->next)
The second issue, which probably causes the segfault, is that you try to de-reference a null pointer:
if (ke == NULL)
{
// ke is NULL, you can't de-reference it
ke->key = word;
ke->val = value;
ke->next = hashtable->table[index];
hashtable->table[index] = ke;
}
The solution will be, IMHO, to allocate and put the new element, only upon failure to find it:
struct kv* ht_find_or_put(char* word, unsigned value, struct hashtablet* hashtable, unsigned (*hash)(char*))
{
unsigned index = hash(word) % hashtable->size;
struct kv* ke;
// first we try to find the node
for (ke = hashtable->table[index]; ke != NULL; ke = ke->next)
{
if (strcmp(ke->key, word) == 0)
return ke;
}
// didn't find it - lets create and put a new one.
if (ke == NULL)
{
ke = malloc(sizeof (struct kv));
// later add a check if the allocation succeded...
ke->key = word;
ke->val = value;
ke->next = hashtable->table[index];
hashtable->table[index] = ke;
}
return ke;
}
Since I didn't want to introduce entirely new code, that would just confuse you, I made the minimal changes to the original code.

Related

Unexpect behaviour with pointer array

im currently implementing a basic linked list structure on C.
Basically im parsing a txt file, and I buffer each line into a Node which contains a pointer to an array of integers. The problem is, that my data (the array of integers) doesn't save correctly / i don't know how to iterate through it correctly.
These are the structures that I use:
typedef struct Node
{
struct Node* next;
struct Node* prev;
int* data;
int len;
} Node;
typedef struct LinkedList
{
Node* head;
Node* tail;
} LinkedList;
and I use this method to create a new node from a buffered string:
int nodeManager(FILE* filePointer, char* buffer, char* token, LinkedList* linkedList)
{
token = strtok(buffer, DELIMITER);
int* data = (int*)malloc(sizeof(int));
int dataIndex = 0;
if (data == NULL)
{
fprintf(stderr, DATA_ALLOCATION_ERROR);
freeLinkedList(linkedList);
fclose(filePointer);
return EXIT_FAILURE;
}
const char* insertPosition = token;
while ((token = strtok(NULL, DELIMITER)))
{
data = realloc(data, dataIndex + 1);
if (data == NULL) {
fprintf(stderr, DATA_ALLOCATION_ERROR);
freeLinkedList(linkedList);
fclose(filePointer);
return EXIT_FAILURE;
}
char *res;
int num = (int) strtol(token, &res, 10);
data[dataIndex] = num;
dataIndex++;
}
Node* newNode = (Node*)malloc(sizeof(Node));
if (newNode == NULL)
{
freeLinkedList(linkedList);
free(data);
fclose(filePointer);
return EXIT_FAILURE;
}
newNode -> prev = NULL; newNode -> next = NULL;
newNode -> len = dataIndex; newNode -> data = data;
if(strcmp(insertPosition, INSERT_TO_START) == 0)
{
addToStartLinkedList(linkedList, newNode);
}
else
{
addToEndLinkedList(linkedList, newNode);
}
return EXIT_SUCCESS; // TODO - Change
}
A line of input looks like this:
start,1,2,3,4,5,6,7,10,22,44,55,66,66,77
for some reason, node -> data doesn't get all the values that I assign in this method and I can't really tell why.
I tried to print the values like this:
for (int i = 0; i < newNode -> len; i++)
{
printf("%d,", (newNode -> data)[i]);
}
However, as I said, something quite doesn't work with my assigment, or I just don't know how to access the values correctly.
Would love to get some insight - thanks.
This part is wrong:
data = realloc(data, dataIndex + 1);
You have forgotten to multiply with sizeof(int) or better sizeof *p
BTW: Be careful about doing realloc directly into data. On failure realloc may return NULL and then you have a memory leak. You should use a temporary pointer like:
int * temp = realloc(data, SOME_SIZE);
if (temp == NULL)
{
// oh dear, add error handling here - maybe a return or whatever fits
}
else
{
// All good
data = temp;
}
OT: Passing the file pointer to the function in order to be able to close the file is (IMO) a rather strange design. Instead, the caller should check the return value and close the file (if desired).

C memory leak when inserting into a doubly linked list

Hi I'm new to C and pointers and are having issues trying to implement the below doubly linked list structure. Memory leaks happened in listInsertEnd I believe? I am very confused as to why one work (at least no mem leak in output) and the other one doesn't. I have pasted only parts of the program, any help or explanation is much appreciated.
#include <stdio.h>
#include <stdlib.h>
typedef struct node *Node;
struct node {
int value;
Node next;
Node prev;
};
typedef struct list *List;
struct list {
Node first;
Node last;
int count;
};
Node newNode(int value) {
Node n = malloc(sizeof(*n));
if (n == NULL) fprintf(stderr, "couldn't create new node\n");
n->value = value;
n->next = NULL;
n->prev = NULL;
return n;
}
void listInsertEnd(List newList, int value) {
Node n = newNode(value);
if (newList== NULL) { //no item in list
//why is this giving me memory leaks
newList->first = newList->last = n;
//whereas this doesn't?
newList->first = newList->last = newNode(value);
} else { //add to end
n->prev = newList->last;
newList->last->next = n;
newList->last = n;
}
nList->count++;
}
First of all, talking about memory leaks: there is no direct memory leak in your code. If the leak happens somewhere, it's outside of these functions. It's most probably because you create one or more nodes and then forget to free() them, but this has nothing to do with the two functions you show.
I see that you are using typedef to declare simple pointer types, take a look at this question and answer to understand why that's bad practice and should be avoided: Is it a good idea to typedef pointers?. Also, this particular piece of Linux kernel documentation which explains the issue in more detail.
Secondly, the real problem in the code you show is that you are using pointers after you tested that they are invalid (NULL).
Here:
Node newNode(int value) {
Node n = malloc(sizeof(*n));
if (n == NULL) fprintf(stderr, "couldn't create new node\n");
n->value = value;
// ^^^^^^^^ BAD!
And also here:
if (newList== NULL) {
newList->first = newList->last = n;
// ^^^^^^^^^^^^^^ BAD!
If something is NULL, you cannot dereference it. Change your functions to safely abort after they detect an invalid pointer.
This can be done in multiple ways. Here's an example of correct code:
Node newNode(int value) {
Node n = malloc(sizeof(*n));
if (n == NULL) {
fprintf(stderr, "couldn't create new node\n");
return NULL;
}
n->value = value;
n->next = NULL;
n->prev = NULL;
return n;
}
void listInsertEnd(List newList, int value) {
Node n;
if (newList == NULL) {
return;
// You probably want to return some error value here.
// In that case change the function signature accordingly.
}
n = newNode(value);
if (newList->count == 0) {
newList->first = newList->last = n;
} else { //add to end
n->prev = newList->last;
newList->last->next = n;
newList->last = n;
}
newList->count++;
}
NOTE: the check newList->count == 0 assumes that you correctly increment/decrement the count when adding/removing elements.
This typedef declaration
typedef struct node *Node;
is confusing and presents a bad style. Consider for example this statement
Node n = malloc(sizeof(*n));
somebody can think that here is a typo and should be written
Node *n = malloc(sizeof(*n));
The function
void listInsertEnd(List newList, int value) {
Node n = newNode(value);
if (newList== NULL) { //no item in list
//why is this giving me memory leaks
newList->first = newList->last = n;
//whereas this doesn't?
newList->first = newList->last = newNode(value);
} else { //add to end
n->prev = newList->last;
newList->last->next = n;
newList->last = n;
}
nList->count++;
}
has undefined behavior. If newList is equal to NULL then you are trying to use memory pointed to by a null pointer.
if (newList== NULL) { //no item in list
//why is this giving me memory leaks
newList->first = newList->last = n;
//whereas this doesn't?
newList->first = newList->last = newNode(value);
And initially data members newList->first and newList->last can be equal to NULL. That also can be reason of undefined behavior because the function does not take this into account.
Before changing the function listInsertEnd you should define the function newNode the following way
Node newNode(int value)
{
Node n = malloc(sizeof(*n));
if ( n != NULL )
{
n->value = value;
n->next = NULL;
n->prev = NULL;
}
return n;
}
The function shall not issue any message. It is the caller of the function that decides whether to issue a message if it is required.
In this case the function listInsertEnd can be written the following way
int listInsertEnd(List newList, int value)
{
Node n = newNode(value);
int success = n != NULL;
if ( success )
{
n->prev = newList->last;
if ( newList->first == NULL )
{
newList->first = newList->last = n;
}
else
{
newList->last = newList->last->next = n;
}
++newList->count;
}
return success;
}
Within the main you should create the list the following way
int main( void )
{
struct list list1 = { .first = NULL, .last = NULL, .count = 0 };
// or
// struct list list1 = { NULL, NULL, 0 };
and call the function like
listInsertEnd) &list1, some_integer_value );

How to free memory occupied by a Tree, C?

I'm currently dealing with a generic Tree with this structure:
typedef struct NODE {
//node's keys
unsigned short *transboard;
int depth;
unsigned int i;
unsigned int j;
int player;
int value;
struct NODE *leftchild; //points to the first child from the left
struct NODE *rightbrothers; //linked list of brothers from the current node
}NODE;
static NODE *GameTree = NULL;
While the function that allocates the different nodes is (don't bother too much at the keys' values, basically allocates the children-nodes. If there aren't any the new child goes to leftchild, otherwise it goes at the end of the list "node->leftchild->rightbrothers"):
static int AllocateChildren(NODE **T, int depth, unsigned int i, unsigned int j, int player, unsigned short *transboard) {
NODE *tmp = NULL;
if ((*T)->leftchild == NULL) {
if( (tmp = (NODE*)malloc(sizeof(NODE)) )== NULL) return 0;
else {
tmp->i = i;
tmp->j = j;
tmp->depth = depth;
(player == MAX ) ? (tmp->value = 2 ): (tmp->value = -2);
tmp->player = player;
tmp->transboard = transboard;
tmp->leftchild = NULL;
tmp->rightbrothers = NULL;
(*T)->leftchild = tmp;
}
}
else {
NODE *scorri = (*T)->leftchild;
while (scorri->rightbrothers != NULL)
scorri = scorri->rightbrothers;
if( ( tmp = (NODE*)malloc(sizeof(NODE)) )== NULL) return 0;
else {
tmp->i = i;
tmp->j = j;
tmp->depth = depth;
(player == MAX) ? (tmp->value = 2) : (tmp->value = -2);
tmp->player = player;
tmp->transboard = transboard;
tmp->leftchild = NULL;
tmp->rightbrothers = NULL;
}
scorri->rightbrothers = tmp;
}
return 1;
}
I need to come up with a function, possibly recursive, that deallocates the whole tree, so far I've come up with this:
void DeleteTree(NODE **T) {
if((*T) != NULL) {
NODE *tmp;
for(tmp = (*T)->children; tmp->brother != NULL; tmp = tmp->brother) {
DeleteTree(&tmp);
}
free(*T);
}
}
But it doesn't seem working, it doesn't even deallocate a single node of memory.
Any ideas of where I am being wrong or how can it be implemented?
P.s. I've gotten the idea of the recursive function from this pseudocode from my teacher. However I'm not sure I've translated it correctly in C with my kind of Tree.
Pseudocode:
1: function DeleteTree(T)
2: if T != NULL then
3: for c ∈ Children(T) do
4: DeleteTree(c)
5: end for
6: Delete(T)
7: end if
8: end function
One thing I like doing if I'm allocating lots of tree nodes, that are going to go away at the same time, is to allocate them in 'batches'. I malloc then as an array of nodes and dole them out from a special nodealloc function after saving a pointer to the array (in a function like below). To drop the tree I just make sure I'm not keeping any references and then call the free routine (also like below).
This can also reduce the amount of RAM you allocate if you're lucky (or very smart) with your initial malloc or can trust realloc not to move the block when you shrink it.
struct freecell { struct freecell * next; void * memp; } * saved_pointers = 0;
static void
save_ptr_for_free(void * memp)
{
struct freecell * n = malloc(sizeof*n);
if (!n) {perror("malloc"); return; }
n->next = saved_pointers;
n->memp = memp;
saved_pointers = n;
}
static void
free_saved_memory(void)
{
while(saved_pointers) {
struct freecell * n = saved_pointers;
saved_pointers = saved_pointers->next;
free(n->memp);
free(n);
}
}
I've just realized my BIG mistake in the code and I'll just answer myself since no one had found the answer.
The error lies in this piece of code:
for(tmp = (*T)->children; tmp->brother != NULL; tmp = tmp->brother) {
DeleteTree(&tmp);
}
First of all Ami Tavory was right about the for condition, i need to continue as long as tmp != NULL
Basically it won't just work because after the DeleteTree(&tmp), I can no longer access the memory in tmp because it's obviously deleted, so after the first cycle of for ends I can't do tmp = tmp->rightbrother to move on the next node to delete because tmp->rightbrother no longer exists as I just deleted it.
In order to fix it I just needed to save the tmp->brother somewhere else:
void DeleteTree(NODE **T) {
if((*T) != NULL) {
NODE *tmp, *deletenode, *nextbrother;
for(tmp = (*T)->children; tmp != NULL; tmp = nextbrother) {
nextbrother = tmp->rightbrother;
DeleteTree(&tmp);
}
canc = (*T);
free(*T);
(*T) = NULL;
}
}
Just for the sake of completeness I want to add my version of DeleteTree
void DeleteTree(NODE *T) {
if(T != NULL) {
DeleteTree(T->rightbrothers);
DeleteTree(T->leftchild);
free(T);
}
}
I think it is much less obscure and much easier to read. Basically it solves the issue in DeleteTree but through eliminating the loop.
Since we free the nodes recursively we might as well do the whole process recursively.

C Having Trouble Resizing a Hash Table

I'll post snippets of the code here which (I think) are relevant to the problem, but I can pastebin if necessary. Probably posting more than enough code already :P
My program includes a hash table which needs to double when a certain hash bucket reaches 20 entries. Although I believe the logic to be good, and it compiles like a charm, it throws up a Segmentation Fault. The code runs like a charm when not resizing, but resizing messes things up.
Thanks for any help :)
Error
Program received signal SIGSEGV, Segmentation fault.
0x0000000000401012 in ml_add (ml=0x7fffffffe528, me=0x75a5a0) at mlist.c:74
74 while((cursorNode->next) != NULL){
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.80.el6_3.5.x86_64
(gdb) backtrace
#0 0x0000000000401012 in ml_add (ml=0x7fffffffe528, me=0x75a5a0) at mlist.c:74
#1 0x0000000000401554 in main (argc=1, argv=0x7fffffffe638) at finddupl.c:39
Structure of Hash Table
typedef struct bN { //linked list node containing data and next
MEntry *nestedEntry;
struct bN *next;
} bucketNode;
typedef struct bL { // bucket as linked list
struct bN *first;
int bucketSize;
} bucket;
struct mlist {
struct bL *currentTable; //bucket array
};
Add Function
int ml_add(MList **ml, MEntry *me){
MList *tempList;
tempList = *ml;
bucketNode *tempNode = (bucketNode *)malloc(sizeof(bucketNode));
tempNode->nestedEntry = me;
tempNode->next = NULL;
unsigned long currentHash = me_hash(me, tableSize);
if((tempList->currentTable[currentHash].bucketSize) == 0) {
tempList->currentTable[currentHash].first = tempNode;
tempList->currentTable[currentHash].bucketSize = (tempList->currentTable[currentHash].bucketSize) + 1;
}
else if((tempList->currentTable[currentHash].bucketSize) == 20){
printf("About to resize");
printf("About to resize");
tempList = ml_resize(&tempList, (tableSize * 2));
tableSize = tableSize * 2;
ml_add(&tempList,me);
}
else{
bucketNode *cursorNode;
cursorNode = tempList->currentTable[currentHash].first;
while((cursorNode->next) != NULL){
cursorNode = cursorNode->next;
}
cursorNode->next = tempNode;
tempList->currentTable[currentHash].bucketSize = (tempList->currentTable[currentHash].bucketSize) + 1;
return 1;
}
return 1;
}
Resize Function
MList *ml_resize(MList **ml, int newSize){
MList *oldList;
oldList = *ml;
MList *newList;
if ((newList = (MList *)malloc(sizeof(MList))) != NULL){
newList->currentTable = (bucket *)malloc(newSize * sizeof(bucket));
int i;
for(i = 0; i < newSize; i++){
newList->currentTable[i].first = NULL;
newList->currentTable[i].bucketSize = 0;
}
}
int j;
for(j = 0; j < tableSize; j++){
bucketNode *cursorNode = oldList->currentTable[j].first;
bucketNode *nextNode;
while(cursorNode != NULL){
nextNode = cursorNode->next;
ml_transfer(&newList, cursorNode, newSize);
cursorNode = nextNode;
}
}
free(oldList);
return newList;
}
Transfer to new list function
void ml_transfer(MList **ml, bucketNode *insertNode, int newSize){
MList *newList;
newList = *ml;
bucketNode *tempNode = insertNode;
tempNode->next = NULL;
unsigned long currentHash = me_hash((tempNode->nestedEntry), newSize);
if((newList->currentTable[currentHash].bucketSize) == 0) {
newList->currentTable[currentHash].first = tempNode;
newList->currentTable[currentHash].bucketSize = (newList->currentTable[currentHash].bucketSize) + 1;
}
else{
bucketNode *cursorNode;
cursorNode = newList->currentTable[currentHash].first;
while((cursorNode->next) != NULL){
cursorNode = cursorNode->next;
}
cursorNode->next = tempNode;
newList->currentTable[currentHash].bucketSize = (newList->currentTable[currentHash].bucketSize) + 1;
}
}
The problem most probably lies on the fact that the ml_add() function is failing to update the MList** ml parameter node whenever the hashtable is resized.
When the hashtable is resized, the old hashtable is destroyed (inside, ml_resize()), but the pointer to the resized, new hashtable is just updated in the tempList variable, that is just a local copy of *ml. You should also update *ml in order to modify the variable that is keeeping reference of the hashTable outside of the function, otherwise, it is left pointing to the deleted, invalid Hashtable. Try the following modification:
...
else if((tempList->currentTable[currentHash].bucketSize) == 20){
printf("About to resize");
printf("About to resize");
tempList = ml_resize(&tempList, (tableSize * 2));
tableSize = tableSize * 2;
ml_add(&tempList,me);
*ml = tempList; // this is necesary to fix the pointer outside the
// function, that still points to the hashtable
// memory freed by the resize function
}
...
Also please note the comments I made about two memory leaks existing in your code, and I would also take into account what #hexist pointed out that it is not necessary to insert at the end of the liked list at the head, simplifying the code and making it faster.

Description is only printing out for the last one entered

I'm quite new to C and I'm trying to implement a binary tree in C which will store a number and a string and then print them off e.g.
1 : Bread
2 : WashingUpLiquid
etc.
The code I have so far is:
#include <stdio.h>
#include <stdlib.h>
#define LENGTH 300
struct node {
int data;
char * definition;
struct node *left;
struct node *right;
};
struct node *node_insert(struct node *p, int value, char * word);
void print_preorder(struct node *p);
int main(void) {
int i = 0;
int d = 0;
char def[LENGTH];
struct node *root = NULL;
for(i = 0; i < 2; i++)
{
printf("Please enter a number: \n");
scanf("%d", &d);
printf("Please enter a definition for this word:\n");
scanf("%s", def);
root = node_insert(root, d, def);
printf("%s\n", def);
}
printf("preorder : ");
print_preorder(root);
printf("\n");
return 0;
}
struct node *node_insert(struct node *p, int value, char * word) {
struct node *tmp_one = NULL;
struct node *tmp_two = NULL;
if(p == NULL) {
p = (struct node *)malloc(sizeof(struct node));
p->data = value;
p->definition = word;
p->left = p->right = NULL;
}
else {
tmp_one = p;
while(tmp_one != NULL) {
tmp_two = tmp_one;
if(tmp_one->data > value)
tmp_one = tmp_one->left;
else
tmp_one = tmp_one->right;
}
if(tmp_two->data > value) {
tmp_two->left = (struct node *)malloc(sizeof(struct node));
tmp_two = tmp_two->left;
tmp_two->data = value;
tmp_two->definition = word;
tmp_two->left = tmp_two->right = NULL;
}
else {
tmp_two->right = (struct node *)malloc(sizeof(struct node));
tmp_two = tmp_two->right;
tmp_two->data = value;
tmp_two->definition = word;
tmp_two->left = tmp_two->right = NULL;
}
}
return(p);
}
void print_preorder(struct node *p) {
if(p != NULL) {
printf("%d : %s\n", p->data, p->definition);
print_preorder(p->left);
print_preorder(p->right);
}
}
At the moment it seems to work for the ints but the description part only prints out for the last one entered. I assume it has something to do with pointers on the char array but I had no luck getting it to work. Any ideas or advice?
You're always doing a scanf into def and then passing that to your insert routine which just saves the pointer to def. So, since all of your entries point to the def buffer, they all point to whatever was the last string you stored in that buffer.
You need to copy your string and place a pointer to the copy into the binary tree node.
The problem is that you're using the same buffer for the string. Notice your struct is holding a pointer to a char, and you are passing the same char array as that pointer each time.
When you call scanf on the buffer, you are changing the data it points to, not the pointer itself.
To fix this, before assigning it over to a struct, you can use strdup. So the lines of code would become
tmp_*->definition = strdup(word);
Keep in mind that the char array returned by strdup must be freed once you are done with it, otherwise you'll have a leak.

Resources