I'm working on a program for a course of mine, so I'd appreciate it if answers were kept abstract. I am working on a key-value hash table in C that stores a string for a key and an int for the value. I'm getting a segmentation fault on the helper function for the put() method. Below is the problematic code. I've changed it slightly for academic honesty purposes, and I've only included the parts that lead up to the error. I've tried adjusting how I dereference or don't dereference table[index]->symbol, but to no avail. I'm thinking that that line that the SEGFAULT is happening on probably isn't the culprit, but I'm struggling to find where it might otherwise appear. Any help on this matter would be greatly appreciated, be it GDB hints, high-level explanations, etc. I just ask that code snippets be kept vague so that I actually learn, rather than just being told an answer. Thank you!
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
typedef struct elem_t elem_t;
struct elem_t {
const char* symbol;
void* data;
elem_t* next;
};
typedef struct {
size_t length;
size_t size;
elem_t** table;
} table_t;
static unsigned int hash(const char *str) {
const unsigned int p = 16777619;
unsigned int hash = 2166136261u;
while (*str) {
hash = (hash ^ *str) * p;
str += 1;
}
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
return hash;
}
void *createTable(int sizeHint) {
table_t* table;
table = malloc(sizeof(table));
if (table == NULL) {
return NULL;
}
table->length = 0;
table->size = sizeHint * 2;
table->table = calloc(table->size, sizeof(elem_t*));
if (table->table == NULL) {
free(table);
return NULL;
}
return table;
}
static const char* putHelper(elem_t** table, size_t size, const char* symbol, void* data, size_t* length) {
unsigned int hashVal = hash(symbol);
size_t index = (size_t)(hashVal & (unsigned int)(size - 1));
while (table[index]->symbol != NULL) { // !!! SEGFAULT HERE !!!
if (strcmp(symbol, table[index]->symbol) == 0) { // collision
elem_t* cur = table[index];
while (table[index]->next != NULL) { // separate chaining
cur = cur->next;
}
elem_t* newElem = (elem_t*)malloc(sizeof(elem_t)); // make new element to hang at the end of the chain
cur->next = newElem;
newElem->data = data;
newElem->symbol = symbol;
newElem->next = NULL;
return newElem->symbol;
}
index++;
if (index >= size) {
index = 0;
}
}
if (length != NULL) {
symbol = strdup(symbol);
if (symbol == NULL) {
return NULL;
}
(*length)++;
}
table[index]->symbol = (char*)symbol;
table[index]->data = data;
return symbol;
}
int put(void *tableHandle, const char *symbol, void *data) {
table_t* table = (table_t*)tableHandle;
if (data == NULL) {
return 0;
}
table->length++;
const char* result = putHelper(table->table, table->size, symbol, data, &table->length);
if (result != NULL) {
return 1;
} else {
return 0;
}
}
int main() {
table_t* table = createTable(200);
int result = put(table, "t1", 25);
if (result == 0) {
printf("put failed");
return 1;
}
}
You allocated an array of null pointers
table->table = calloc(table->size, sizeof(elem_t*));
and then you are using null pointer tp access memory
while (table[index]->symbol != NULL) { // !!! SEGFAULT HERE !!!
^^^^^^^^^^^^^^^^^^^^
So the program crashes.
And as #bbbbbbbbb pointed out you allocated memory only for a pointer
table = malloc(sizeof(table));
You by the least need to change this:
table_t* table;
table = malloc(sizeof(table));
To this:
table_t* table;
table = malloc(sizeof(*table));
Or to this:
table_t* table;
table = malloc(sizeof(table_t));
Related
So I have an assignment to create a program in c that reads a couple of sentences(a 140mb file), and based on the 2nd input, which is a number, I need to return the Nth most common word. My idea was to build a hash table with linear probing, every time I get a new element I hash it accordingly based its position and based on djb2, else if there is a collision I rehash. After that, I apply Quicksort based on the occurrence and then I finally access by index.
I am having issues finishing up a hash table with linear probing in c. I am pretty sure I have finished it but every time I run I am getting a heap buffer overflow on lldb. I tried to spot the issue but I still cannot figure it out.
Am I getting out of memory on stack? The file is relatively small to consume so much memory.
I used address sanitiser and I got a heap-buffer-overflow on inserting.
I don't think I am touching the memory outside the allocate region but I am not 100% sure.
Any idea what has gone wrong? This is the table.c implementation and below that you can see the form of the struct.
Here is a more detailed message from address sanitiser:
thread #1: tid = 0x148b44, 0x0000000100166b20 libclang_rt.asan_osx_dynamic.dylib`__asan::AsanDie(), queue = 'com.apple.main-thread', stop reason = Heap buffer overflow
{
"access_size": 1,
"access_type": 1,
"address": 105690555220216,
"description": "heap-buffer-overflow",
"instrumentation_class": "AddressSanitizer",
"pc": 4294981434,
"stop_type": "fatal_error"
}
table.c :
#include "table.h"
#include "entities.h"
static inline entry_t* entryInit(const char* const value){
unsigned int len = strlen(value);
entry_t* entry = malloc(sizeof(entry));
entry->value = malloc(sizeof(char*) * len);
strncpy(entry->value, value, strlen(value));
entry->exists = 1;
entry->occurence = 1;
return entry;
}
table_t* tableInit(const unsigned int size){
table_t* table = malloc(sizeof(table_t));
table->entries = malloc(size*sizeof(entry_t));
table->seed = getPrime();
table->size = size;
table->usedEntries = 0U;
return table;
}
//okay, there is definitely an issue here
table_t* tableResize(table_t* table, const unsigned int newSize){
//most likely wont happen but if there is an overflow then we have a problem
if(table->size > newSize) return NULL;
//create a temp array of the realloced array, then do changes there
entry_t* temp = calloc(newSize,sizeof(entry_t));
table->size = newSize;
//temp pointer to an entry
entry_t *tptr = NULL;
unsigned int pos = 0;
unsigned int index = 0;
while(pos != table->size){
tptr = &table->entries[pos];
if(tptr->exists == 1){
index = hashString(table->seed, tptr->value, table->size, pos);
temp[index] = *entryInit(tptr->value);
temp[index].occurence = tptr->occurence;
break;
}
else pos++;
}
table->entries = temp;
//TODO: change table destroy to free the previous array from the table
free(temp);
return table;
}
//insert works fine, it is efficient enough to add something in the table
unsigned int tableInsert(table_t* table,const char* const value){
//decide when to resize, might create a large enough array to bloat the memory?
if(table->usedEntries >(unsigned int)(2*(table->size/3))) table = tableResize(table, table->size*2);
entry_t* entry = NULL;
unsigned int index;
auto int position = 0;
while(position != table->size){
//calculate the hash of our string as a function of the current position on the table
index = hashString(table->seed,value,table->size, position);
entry = &table->entries[index];
if(entry->exists == 0){
*entry = *entryInit(value);
table->usedEntries++;
return index;
} else if (entry->exists == 1 && strcmp(entry->value, value) == 0){
entry->occurence++;
return index;
} else{
position++;
}
}
}
//there might be an issue here
static inline void tableDestroy(const table_t* const table){
entry_t* entry = NULL;
for (auto int i = 0; i < table->size; ++i){
entry =&table->entries[i];
//printf("Value: %s Occurence: %d Exists: %d \n",entry->value, entry->occurence, entry->exists );
if(&table->entries[i] !=NULL)free(&table->entries[i]);
}
free(table);
}
entities.h :
#pragma once
typedef struct __attribute__((packed)) __entry {
char *value;
unsigned int exists : 1;
unsigned int occurence;
} entry_t;
typedef struct __table {
int size;
int usedEntries;
entry_t *entries;
unsigned int seed;
} table_t;
here is how I read from a file and process the text:
void readFromFile(const char* const fileName, table_t* table){
FILE *fp = fopen(fileName, "r");
if(!fp) fprintf(stderr,"error reading file. \n");
char word[15];//long enough to hold the biggest word in the text?
int position = 0;
char ch;
while((ch = fgetc(fp))!= EOF){
//discard all the ascii chars that are not letters
if(!(ch >= 65 && ch <= 90) && !(ch >= 97 && ch <= 122)){
word[position]= '\0';
if(word[0] == NULL)continue;
tableInsert(table, word);
position = 0;
continue;
}
else word[position++] = ch;
}
}
Any suggestions what is wrong with my code?
I believe resize might have an issue and I am not properly deleting yet because I have had a lot of problems with the memory management.
Thanks in advance!
Hi I am attempting to implement a really simple hashmap in regular C with a string as key and a void pointer as value as I wish to use the map for multiple data types.
So far I have this
struct node{
void * value;
char * key;
};
unsigned long strhash(char *string)
{
unsigned long hash = 5381;
int c;
while ((c = *string++))
{
hash = ((hash << 5) + hash) + c;
}
return hash;
}
map_t *map_create(int maxSize){
map_t *map = malloc(sizeof(map_t));
map->curSize = 0;
map->maxSize = maxSize;
map->nodes = calloc(map->maxSize, sizeof(node_t *));
return map;
}
node_t *node_create(char *key, void *value){
node_t *node = malloc(sizeof(node_t));
node->key = key;
node->value = value;
return node;
}
void map_insert(map_t *map, char *key, void *value){
node_t *node = node_create(key, value);
int idx = strhash(key) % map->maxSize;
if(map->nodes[idx] == NULL){
map->nodes[idx] = node;
}else{
while(map->nodes[idx] != NULL){
idx++%map->maxSize;
}
map->nodes[idx] = node;
}
return;
}
void map_print(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
printf("index: %d\t value: %d\n",i, *(int*)map->nodes[i]->value);
}
}
return;
}
void map_destroy(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
free(map->nodes[i]);
}
}
free(map->nodes);
free(map);
return;
}
int main(){
map_t *map = map_create(32);
for(int i = 0; i < 30; i++){
map_insert(map, (char*)&i, &i);
}
map_print(map);
map_destroy(map);
return 0;
}
The problem is the output is not as I'd expect when the map gets printed all that is retrieved is the value "30" on all indexes which is the last number inserted into the map. If I change the value to type int the map works as expected, so is there must be something crucial I am missing in regards to pointers.
I am not the greatest at C so any light which could be shed on this would be most appreciated.
The problem is that you're using the same pointer every time you call map_insert(). It just stores the pointer, it doesn't copy the data. Each time through the loop you change the contents of that memory, so all the hash map elements point to that same value.
There are two ways you can fix it. One way is to always make a dynamically-allocated copy of the data before calling map_insert():
for (int i = 0; i < 30; i++) {
int *i_copy = malloc(sizeof *i_copy);
*i_copy = i;
map_insert(map, (char *)i_copy, (char *)i_copy);
}
The other option is to add the size of the value to the map_insert() and node_create() arguments. Then node_create call malloc() and memcpy() to copy the value to dynamic memory.
BTW, there's another problem. The key is supposed to be a null-terminated string (strhash() depends on this), but you're using &i, which is a pointer to an integer. Casting a pointer to an integer to char* doesn't return a string, it just returns a pointer to the same location with a different data type. I haven't fixed this above.
OP stores a reference to the same value, so of course all lookups yield the same value (which is not even a string, but whatever the storage representation of the value of the variable i happens to be).
I prefer chaining the hash map entries, and keeping a copy of the hash in the entry:
struct entry {
struct entry *next;
size_t hash;
void *data;
size_t data_size;
int data_type;
unsigned char name[];
};
typedef struct {
size_t size;
size_t used; /* Number of entries, total */
struct entry **slot; /* Array of entry pointers */
size_t (*hash)(const unsigned char *, size_t);
} hashmap;
int hashmap_new(hashmap *hmap, const size_t size,
size_t (*hash)(const unsigned char *, size_t))
{
if (!hmap)
return -1; /* No hashmap specified */
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
if (size < 1)
return -1; /* Invalid size */
if (!hash)
return -1; /* No hash function specified. */
hmap->slot = calloc(size, sizeof hmap->slot[0]);
if (!hmap->slot)
return -1; /* Not enough memory */
hmap->size = size;
hmap->hash = hash;
return 0;
}
void hashmap_free(hashmap *hmap)
{
if (hmap) {
size_t i = hmap->size;
while (i-->0) {
struct entry *next = hmap->slot[i];
struct entry *curr;
while (next) {
curr = next;
next = next->next;
free(curr->data);
/* Poison the entry, to help detect use-after-free bugs. */
curr->next = NULL;
curr->data = NULL;
curr->hash = 0;
curr->data_size = 0;
curr->data_type = 0;
curr->name[0] = '\0';
free(curr);
}
}
}
free(hmap->slot);
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
}
To insert a key-value pair, the function either uses the data specified as-is, in which case it's the caller's responsibility to ensure each key has their own unique data not overwritten later; or we copy the user data. In the above hashmap_free() function, you'll see free(curr->data);; it assumes we allocated memory dynamically, and copied the user data there. So:
int hashmap_add(hashmap *hmap, const unsigned char *name,
const void *data, const size_t data_size,
const int data_type)
{
const size_t namelen = (name) ? strlen(name) : 0;
struct entry *curr;
size_t i;
if (!hmap)
return -1; /* No hashmap specified. */
if (name_len < 1)
return -1; /* NULL or empty name. */
/* Allocate memory for the hashmap entry,
including enough room for the name, and end of string '\0'. */
curr = malloc(sizeof (struct entry) + namelen + 1;
if (!curr)
return -1; /* Out of memory. */
/* Copy data, if any. */
if (data_size > 0) {
curr->data = malloc(data_size);
if (!curr->data) {
free(curr);
return -1; /* Out of memory. */
}
memcpy(curr->data, data, data_size);
} else {
curr->data = NULL;
curr->data_size = 0;
}
curr->data_type = data_type;
/* Calculate the hash of the name. */
curr->hash = hmap->hash(name, namelen);
/* Copy name, including the trailing '\0'. */
memcpy(curr->name, name, namelen + 1);
/* Slot to prepend to. */
i = curr->hash % hmap->size;
curr->next = hmap->slot[i];
hmap->slot[i] = curr;
/* An additional node added. */
hmap->used++;
return 0;
}
The meaning of data_type is completely up to the user of the code.
Lookup can be made based on the hash and the data type:
/* Returns 0 if found. */
int hashmap_find(hashmap *hmap, const unsigned char *name,
const int data_type,
void **dataptr_to, size_t *size_to)
{
struct entry *curr;
size_t hash;
if (size_to)
*size_to = 0;
if (dataptr_to)
*dataptr_to = NULL;
if (!hmap)
return -1; /* No hashmap specified. */
if (!name || !*name)
return -1; /* NULL or empty name. */
hash = hmap->hash(name, strlen(name));
curr = hmap->slot[hash % hmap->size];
for (curr = hmap->slot[hash % hmap->size]; curr != NULL; curr = curr->next) {
if (curr->data_type == data_type && curr->hash == hash &&
!strcmp(curr->name, name)) {
/* Data type an name matches. Save size if requested. */
if (size_to)
*size_to = curr->data_size;
if (dataptr_to)
*dataptr_to = curr->data;
return 0; /* Found. */
}
}
return -1; /* Not found. */
}
The above lookup returns 0 if found, and nonzero if error or not found. (This way, even zero-size NULL data can be stored in the hash map.)
If the number of data types supported is small, say 32, then using an unsigned int with each bit (1U<<0 == 1, 1U<<1 == 2, 1U<<2 == 4, and so on) reserved for a specific type, you can do the lookup using a mask, allowing only the specified types. Similarly, the data_type can be a mask, describing which types the value can be interpreted as (almost always will have just one bit set).
This scheme also allows one to dynamically resize the hashmap, by allocating a new slot array of pointers, and moving each old entry to the new one. The keys don't need to be rehashed, because the original hash is stored in each entry. For lookup efficiency, the chains (hanging off each slot) should be as short as possible. A common "rule of thumb" is that hashmap->size should be between hashmap->used and 2 * hashmap->used.
When you call map_insert(map, (char*)&i, &i); the value inserted into hasmap is the pointer to i variable, i.e. its address in memory, and not the value of i.
So when you change i value inside the for loop there is the side-effect to all entries into the hashmap, and at the end of the loop you only see the last value assigned.
so I've been set a task of creating a faux string struct and implementing all the usual string functions on my faux string struct. I'm stuck on the tests of my strcat implementation called append, with the first test failing (segfault) being the 5th line. My function for creating new structs should be OK because it passed all the tests, but I've included it just incase.
I've already been able to successfully implement length, get, set and copy functions for my faux string structs.
The struct:
struct text {
int capacity;
char *content;
};
typedef struct text text;
My function for creating new structs:
text *newText(char *s) {
printf("new Text from %s\n", s);
int sizeNeeded = (strlen(s)+1);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
text *out = malloc(sizeGot);
char *c = malloc(sizeGot);
strcpy(c, s);
out->content = c;
out->capacity = (sizeGot);
printf("the capacity is %d\n", sizeGot);
return out;
free(c);
}
My append function:
void append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %d\n", t1->content, *t2->content);
int sizeNeeded = (t1->capacity + t2->capacity);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
char *stringy = calloc(sizeGot, 32);
stringy = strcat(t1->content, t2->content);
free(t1);
t1 = newText(stringy);
}
and finally the tests:
void testAppend() {
text *t = newText("car");
text *t2 = newText("pet");
append(t, t2);
assert(like(t, "carpet"));
assert(t->capacity == 24);
text *t3 = newText("789012345678901234");
append(t, t3);
assert(like(t, "carpet789012345678901234"));
assert(t->capacity == 48);
freeText(t);
freeText(t2);
freeText(t3);
}
You are allocating memory in the wrong way. You could fix this by using a flexible array member like this:
typedef struct {
int capacity;
char content[];
} text;
text *out = malloc(sizeof(text) + sizeof(something));
strcpy(out->content, str);
...
And obviously code such as this is nonsense:
return out;
free(c);
}
Enable compiler warnings and listen to them.
Och, some errors you have:
Inside text_new you allocate memory for text *out using text *out = malloc(sizeGot); when sizeGot = 24 is a constant value. You should allocate sizeof(*out) or sizeof(text) bytes of memory for it.
I don't know what for int sizeGot = 24; while (sizeNeeded > sizeGot) the loop inside text_new and append is for. I guess the intention is to do allocations in power of 24. Also it mostly looks like the same code is in both functions, it does look like code duplication, which is a bad thing.
Inside append You pass a pointer to t1, not a double pointer, so if you modify the t1 pointer itself the modification will not be visible outside of function scope. t1 = newText(stringy); is just pointless and leaks memory. You could void append(text **t1, text *t2) and then *t1 = newText(stringy). But you can use a way better approach using realloc - I would expect append to "append" the string, not to create a new object. So first resize the buffer using realloc then strcat(&t1->content[oldcapacity - 1], string_to_copy_into_t1).
int sizeNeeded = (t1->capacity + t2->capacity); is off. You allocate capacity in power of 24, which does not really interact with string length. You need to have strlen(t1->content) + strlen(t2->content) + 1 bytes for both strings and the null terminator.
Try this:
size_t text_newsize(size_t sizeNeeded)
{
// I think this is just `return 24 << (sizeNeeded / 24);`, but not sure
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot *= 2;
}
return sizeGot;
}
text *newText(char *s) {
printf("new Text from %s\n", s);
if (s == NULL) return NULL;
int sizeNeeded = strlen(s) + 1;
int sizeGot = text_newsize(sizeNeeded);
text *out = malloc(sizeof(*out));
if (out == NULL) {
return NULL;
}
out->content = malloc(sizeGot);
if (out->content == NULL) {
free(out);
return NULL;
}
strcpy(out->content, s);
out->capacity = sizeGot;
printf("the capacity is %d\n", sizeGot);
return out;
}
and this:
int append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %s\n", t1->content, t2->content);
int sizeNeeded = strlen(t1->content) + strlen(t2->content) + 1;
if (t1->capacity < sizeNeeded) {
// this could a text_resize(text*, size_t) function
int sizeGot = text_newsize(sizeNeeded);
void *tmp = realloc(t1->content, sizeGot);
if (tmp == NULL) return -ENOMEM;
t1->content = tmp;
t1->capacity = sizeGot;
}
strcat(t1->content, t2->content);
return 0;
}
Some remarks:
Try to handle errors in your library. If you have a function like void append(text *t1, text *t2) let it be int append(text *t1, text *t2) and return 0 on success and negative number on *alloc errors.
Store the size of everything using size_t type. It's defined in stddef.h and should be used to represent a size of an object. strlen returns size_t and sizeof also returns size_t.
I like to put everything inside a single "namespace", I do that by prepending the functions with a string like text_.
I got some free time and decided to implement your library. Below is the code with a simple text object storing strings, I use 24 magic number as allocation chunk size.
// text.h file
#ifndef TEXT_H_
#define TEXT_H_
#include <stddef.h>
#include <stdbool.h>
struct text;
typedef struct text text;
text *text_new(const char content[]);
void text_free(text *t);
int text_resize(text *t, size_t newsize);
int text_append(text *to, const text *from);
int text_append_mem(text *to, const void *from, size_t from_len);
const char *text_get(const text *t);
int text_append_str(text *to, const char *from);
char *text_get_nonconst(text *t);
size_t text_getCapacity(const text *t);
bool text_equal(const text *t1, const text *t2);
#endif // TEXT_H_
// text.c file
//#include "text.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
struct text {
size_t capacity;
char *content;
};
text *text_new(const char content[])
{
text * const t = malloc(sizeof(*t));
if (t == NULL) goto MALLOC_ERR;
const struct text zero = {
.capacity = 0,
.content = NULL,
};
*t = zero;
if (content != NULL) {
const int ret = text_append_str(t, content);
if (ret) {
goto TEXT_APPEND_ERR;
}
}
return t;
TEXT_APPEND_ERR:
free(t);
MALLOC_ERR:
return NULL;
}
void text_free(text *t)
{
assert(t != NULL);
free(t->content);
free(t);
}
int text_resize(text *t, size_t newcapacity)
{
// printf("%s %d -> %d\n", __func__, t->capacity, newcapacity);
// we resize in chunks
const size_t chunksize = 24;
// clap the capacity into multiple of 24
newcapacity = (newcapacity + chunksize - 1) / chunksize * chunksize;
void * const tmp = realloc(t->content, newcapacity);
if (tmp == NULL) return -ENOMEM;
t->content = tmp;
t->capacity = newcapacity;
return 0;
}
int text_append_mem(text *to, const void *from, size_t from_len)
{
if (to == NULL || from == NULL) return -EINVAL;
if (from_len == 0) return 0;
const size_t oldcapacity = to->capacity == 0 ? 0 : strlen(to->content);
const size_t newcapacity = oldcapacity + from_len + 1;
int ret = text_resize(to, newcapacity);
if (ret) return ret;
memcpy(&to->content[newcapacity - from_len - 1], from, from_len);
to->content[newcapacity - 1] = '\0';
return 0;
}
int text_append_str(text *to, const char *from)
{
if (to == NULL || from == NULL) return -EINVAL;
return text_append_mem(to, from, strlen(from));
}
int text_append(text *to, const text *from)
{
if (to == NULL || from == NULL) return -EINVAL;
if (text_getCapacity(from) == 0) return 0;
return text_append_str(to, text_get(from));
}
const char *text_get(const text *t)
{
return t->content;
}
const size_t text_strlen(const text *t)
{
return t->capacity == 0 ? 0 : strlen(t->content);
}
size_t text_getCapacity(const text *t)
{
return t->capacity;
}
bool text_equal_str(const text *t, const char *str)
{
assert(t != NULL);
if (str == NULL && t->capacity == 0) return true;
const size_t strlength = strlen(str);
const size_t t_strlen = text_strlen(t);
if (t_strlen != strlength) return false;
if (memcmp(text_get(t), str, strlength) != 0) return false;
return true;
}
// main.c file
#include <stdio.h>
int text_testAppend(void) {
text *t = text_new("car");
if (t == NULL) return -1;
text *t2 = text_new("pet");
if (t2 == NULL) return -1;
if (text_append(t, t2)) return -1;
assert(text_equal_str(t, "carpet"));
assert(text_getCapacity(t) == 24);
text *t3 = text_new("789012345678901234");
if (t3 == NULL) return -1;
if (text_append(t, t3)) return -1;
assert(text_equal_str(t, "carpet789012345678901234"));
assert(text_getCapacity(t) == 48);
text_free(t);
text_free(t2);
text_free(t3);
return 0;
}
int main()
{
text *t1 = text_new("abc");
text_append_str(t1, "def");
printf("%s\n", text_get(t1));
text_free(t1);
printf("text_testAppend = %d\n", text_testAppend());
return 0;
}
I'm making a simple hash table and a hash function. Each element of the table has a pointer to another node that is used by the insert function when a collision occurs. The problem is that when a collision occurs my code simply crashes when it's navigating the linked list. Here is the code (sorry if it's a little lengthy):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hash.h"
#define TAM 50
int funcaoHash(int chave, char nome[50]) //Hash function
{
int i;
int hash = chave;
for(i = 0; nome[i] != '\0'; i++)
{
hash = hash + (int)nome[i];
}
hash = hash%TAM;
return hash;
}
void insere(int chave, char nome[50], itemTabela* TabelaHash[TAM]) //Insert function
{
int idx = funcaoHash(chave, nome);
itemTabela* ItemAux = (itemTabela*)malloc(sizeof(itemTabela));
itemTabela* ptrAux = NULL;
ItemAux->chave = chave;
strcpy(ItemAux->nome, nome);
if(TabelaHash[idx] == NULL) //No collision occurred
TabelaHash[idx] = ItemAux;
//If there is a collision
else
{
ptrAux = TabelaHash[idx]->ptr;
//Here the code breaks
while(ptrAux->ptr != NULL)
{
ptrAux = ptrAux->ptr;
}
ptrAux = ItemAux;
}
}
void inicializaTabela(itemTabela* TabelaHash[TAM]) //Initialize tabl
{
int i;
for(i = 0; i < TAM; i++)
TabelaHash[i] = NULL;
}
Here is the node structure:
typedef struct itemTabela
{
char nome[50];
int chave;
struct itemTabela* ptr;
}itemTabela;
And here is the main function:
int main()
{
itemTabela *ptrTabela[TAM] = {};
inicializaTabela(ptrTabela);
insere(6, "Chico", ptrTabela);
insere(6, "Chico", ptrTabela);
return 0;
}
Am I accessing the pointers in the wrong way or doing some ilegal access?
Thanks for your time!
1)
Initialization of ItemAux also requires ptr initialization. like ItemAux->ptr = NULL;
2)
ptrAux = TabelaHash[idx]->ptr;
//Here the code breaks
while(ptrAux->ptr != NULL)
{
ptrAux = ptrAux->ptr;
}
ptrAux = ItemAux;
should be
ptrAux = TabelaHash[idx];
while(ptrAux->ptr != NULL)
{
ptrAux = ptrAux->ptr;
}
ptrAux->ptr = ItemAux;
I am trying to implement a generic hash structure that can support any type of data and any hash function.
A wrote the code and try to run it, it dosn't work, it breaks. I try to debug it and there it works well. I don't know where the problem is?
Here is the code that I used for implementing the structure:
The "hash.h" file:
typedef struct tip_hash_nod
{
void *info;
struct tip_hash_nod *urm;
}NOD_LISTA_HASH;
typedef struct
{
NOD_LISTA_HASH *Table;
int size;
int sizeMemory;
int (*hash)(const void *obiect,const int m);
void (*distruge)(void *obiect);
}*HASH;
void initializare_hash(HASH *h,int size,int (*hash_dat)(const void *obiect,const int m),void (*distruge)(void *obiect));
int hash_insert(HASH *h,void *obiect,int sizeOfObiect);
int hash_search(HASH h,void *obiect,int (*compara)(const void *a,const void *b));
void hash_delete(HASH *h);
And the "hash.c" file:
void initializare_hash(HASH *h,int size,int (*hash_dat)(const void *obiect,const int m),void (*distruge)(void *obiect))
{
int i;
(*h) = (HASH)malloc(sizeof(HASH));
(*h)->sizeMemory = size;
if(size != 0)
{
(*h)->Table = (NOD_LISTA_HASH *)malloc((*h)->sizeMemory * sizeof(NOD_LISTA_HASH));
for(i=0;i<(*h)->sizeMemory;i++)
{
(*h)->Table[i].info = NULL;
(*h)->Table[0].urm = NULL;
}
}
else
{
(*h)->Table = (NOD_LISTA_HASH *)malloc(sizeof(NOD_LISTA_HASH));
(*h)->Table[0].info = NULL;
(*h)->Table[0].urm = NULL;
(*h)->sizeMemory = 1;
}
(*h)->size = 0;
(*h)->hash = hash_dat;
(*h)->distruge = distruge;
}
int hash_insert(HASH *h,void *obiect,int sizeOfObiect)
{
int i,poz;
NOD_LISTA_HASH *p;
if((*h)->size == (*h)->sizeMemory)
{
HASH h1;
initializare_hash(&h1,2*(*h)->sizeMemory,(*h)->hash,(*h)->distruge);
for(i=0;i<(*h)->sizeMemory;i++)
{
if((*h)->Table[i].info != NULL)
hash_insert(&h1,(*h)->Table[i].info,sizeOfObiect);
p=(*h)->Table[i].urm;
while(p!=NULL)
{
hash_insert(&h1,p->info,sizeOfObiect);
p = p->urm;
}
}
hash_delete(h);
*h=h1;
return hash_insert(h,obiect,sizeOfObiect);
}
else
{
poz = (*h)->hash(obiect,(*h)->sizeMemory);
if((*h)->Table[poz].info == NULL)
{
(*h)->Table[poz].info = malloc(sizeOfObiect);
memcpy((*h)->Table[poz].info,obiect,sizeOfObiect);
(*h)->Table[poz].urm = NULL;
(*h)->size++;
}
else
{
p = &((*h)->Table[poz]);
while(p->urm!=NULL)
p = p->urm;
p->urm = (NOD_LISTA_HASH *)malloc(sizeof(NOD_LISTA_HASH));
p = p->urm;
p->info = malloc(sizeOfObiect);
memcpy(p->info,obiect,sizeOfObiect);
p->urm = NULL;
}
return poz;
}
}
int hash_search(HASH h,void *obiect,int (*compara)(const void *a,const void *b))
{
int poz;
NOD_LISTA_HASH *p;
poz = h->hash(obiect,h->sizeMemory);
if(h->Table[poz].info == NULL)
return -1;
else
if(compara(h->Table[poz].info,obiect)==0)
return poz;
else
{
p=h->Table[poz].urm;
while(p != NULL)
{
if(compara(p->info,obiect)==0)
return poz;
p = p->urm;
}
return -1;
}
}
static void distruge_lista(NOD_LISTA_HASH *p,void (*distruge_obiect)(void *obiect))
{
if(p->urm != NULL)
distruge_lista(p->urm,distruge_obiect);
else
{
if(p->info != NULL)
distruge_obiect(p->info);
free(p);
}
}
void hash_delete(HASH *h)
{
int i;
for(i=0;i<(*h)->sizeMemory;i++)
{
if((*h)->Table[i].info != NULL && (*h)->Table[i].urm != NULL)
{
distruge_lista((*h)->Table[i].urm,(*h)->distruge);
}
}
free((*h)->Table);
*h = NULL;
}
And this is my "main.c" file:
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include "hash.h"
int comparare(const void *a,const void *b)
{
return (*(int *)a - *(int *)b);
}
int hash(const void *obiect,int m)
{
return (*(int *)obiect) % m;
}
void distruge_obiect(void *obiect)
{
free((int *)obiect);
}
int main()
{
HASH h;
int val,error;
initializare_hash(&h,0,hash,distruge_obiect);
val = 20;
hash_insert(&h,&val,sizeof(int));
val = 800;
hash_insert(&h,&val,sizeof(int));
val = 2000;
hash_insert(&h,&val,sizeof(int));
val = 765;
hash_insert(&h,&val,sizeof(int));
val = 800;
error = hash_search(h,&val,comparare);
if(error == -1)
printf("Elementul %d nu se afla in hash.\n",val);
else
printf("Elementul %d se afla pe pozitia: %d.\n",val,error);
hash_delete(&h);
getch();
return 0;
}
How I already sad if I try to debug it works with no problem, but when I run it, it crashes. I can onely make an assumption that it can not dealocate the memory or something. My call stack loocks like this:
You've dropped a pretty big pile of code on us, without much to go on. I had a quick look anyway, and noticed this incorrect allocation:
(*h) = (HASH)malloc(sizeof(HASH));
HASH is a pointer type, so you are allocating only enough memory for one pointer. You want to allocate memory for the thing to which it points:
*h = malloc(sizeof(**h));
(The cast is not required in C, and some folks around here will be strident about not using one.)
That error would be entirely enough to cause all manner of bad behavior. In particular, the erroneous code might seem to work until you dynamically allocate more memory and write to that, so perhaps that explains why your tests crash on the second insertion.