Convert sha256 to smaller uint type - c

I'm implementing a hash table in c and I chose as key the sha256 hash of the file that I need to store. The problem is that I need to convert the key to a reusable index to insert in the hash table. I tought to rehash the key, but this way I would increase the possibility of overlapping values. Is there a way to use this hash as the key to the table?
The sha256 is store as a BYTE[32] or can be converted as a sting
void* ht_get(ht* table, const char *key) {
size_t index = magicfunction(key);
while (table->entries[index].key != NULL) {
if (strcmp(key, table->entries[index].key) == 0) {
// Found key, return value.
return table->entries[index].value;
}
// Key wasn't in this slot, move to next (linear probing).
index++;
if (index >= table->capacity) {
index = 0;
}
}
return NULL;
}

When I need an absolute minimalist hashmap in C, I usually end up doing something like that:
#include <stddef.h>
#include <string.h>
#include <stdio.h>
struct hmap_entry {
const char *key;
/* Or in your case: */
/* char key[SIZE_OF_SHA256]; */
void *payload;
};
#define HMAP_ENTRY_CNT 256
struct hmap {
struct hmap_entry entries[HMAP_ENTRY_CNT];
};
static size_t str_hash(const char *s) {
size_t hash = 0;
while(*s) {
hash = (hash << 7) ^ (size_t)*s++ ^ hash;
}
return hash;
}
static struct hmap_entry *hmap_search_entry(struct hmap *map
, const char *key)
{
size_t hash = str_hash(key);
/* Or in your case: */
/* size_t hash = 0; memcpy(&hash, key, sizeof(key)) */
hash = hash % HMAP_ENTRY_CNT;
struct hmap_entry *e = NULL;
while(1) {
e = map->entries + hash;
if(e->key == NULL
|| strcmp(key, e->key) == 0) {
break;
}
/* Or in your case: */
/* if(e->key == NULL
|| memcmp(key, e->key, SIZE_OF_SHA256) == 0) {
break;
}
*/
hash = (hash + 1) % HMAP_ENTRY_CNT;
}
return e;
}
And this is how I use it:
int main()
{
struct hmap_entry *e;
struct hmap map = {};
/* insert foo */
e = hmap_search_entry(&map, "foo");
e->key = "foo";
e->payload = "hello world";
/* insert bar */
e = hmap_search_entry(&map, "bar");
e->key = "bar";
e->payload = "Something else";
/* get foo */
e = hmap_search_entry(&map, "foo");
if(e->key) {
printf("Value of \"%s\" is \"%s\"\n", e->key, (const char *)e->payload);
}
else {
printf("Not found!\n");
}
/* get bar */
e = hmap_search_entry(&map, "bar");
if(e->key) {
printf("Value of \"%s\" is \"%s\"\n", e->key, (const char *)e->payload);
}
else {
printf("Not found!\n");
}
/* get test */
e = hmap_search_entry(&map, "test");
if(e->key) {
printf("Value of \"%s\" is \"%s\"\n", e->key, (const char *)e->payload);
}
else {
printf("Not found!\n");
}
return 0;
}

Related

Cant insert Node to binary tree

I am trying to insert Node to Binary tree. This is my function for creating Node (rest is done).
void BVSCreate_function(TNodef *rootPtr, function_save token) {
TNodef *newPtr = malloc(sizeof(struct tnodef));
if (newPtr == NULL) {
fprintf(stderr, "99");
return;
}
TNodef init;
string initStr;
initStr.str = NULL;
initStr.length = 0;
initStr.alloc = 0;
newPtr = &init;
newPtr->content = &initStr;
newPtr->leftPtr = NULL;
newPtr->rightPtr = NULL;
newPtr->return_type = token.ret_value;
newPtr->parameters = token.param_count;
strCpyStr(newPtr->content, token.content);
rootPtr = newPtr;
}
void BVSInsert_function(TNodef *rootPtr, function_save token) {
if (rootPtr == NULL) {
BVSCreate_function(rootPtr, token);
} else {
if ((strCmpStr(token.content, rootPtr->content)) < 0) {
BVSCreate_function(rootPtr->leftPtr, token);
} else
if ((strCmpStr(token.content, rootPtr->content)) > 0) {
BVSCreate_function(rootPtr->rightPtr, token);
}
}
}
When TNodef and function_save are structs:
typedef struct {
string *content;
int param_count;
int ret_value;
} function_save;
typedef struct tnodef {
string *content;
struct tnodef *leftPtr;
struct tnodef *rightPtr;
int parameters;
int return_type;
} TNodef;
Where string is defined as this struct:
typedef struct {
char *str; // content of string
int length; // length of string
int alloc; // amount of memory allocated
} string;
strCpystr function :
int strCpyStr(string *s1, string *s2) {
int len2 = s2->length;
if (len2 > s1->alloc) {
if (((s1->str) = (char *)realloc(s1->str, len2 + 1)) == NULL) {
return 1;
}
s1->alloc = len2 + 1;
}
strcpy(s1->str, s2->str);
s1->length = len2 + 1;
return 0;
}
I am trying to create a node in binary tree and put there information from struct function_save.
But when I try to print this tree after insert it shows me that tree is still empty.
Your code in BVSCreate_function has undefined behavior because:
newPtr = &init; discards the allocated node and instead uses a local structure that will become invalid as soon as the function returns.
newPtr->content = &initStr; is incorrect for the same reason: you should allocate memory for the string too or possibly modify the TNodeDef to make content a string object instead of a pointer.
Function BVSInsert_function does not return the updated root pointer, hence the caller's root node is never updated. You could change the API, passing the address of the pointer to be updated.
There is also a confusion in BVSInsert_function: it should call itself recursively when walking down the tree instead of calling BVSCreate_function.
Here is a modified version:
/* Allocate the node and return 1 if successful, -1 on failure */
int BVSCreate_function(TNodef **rootPtr, function_save token) {
TNodef *newPtr = malloc(sizeof(*newPtr));
string *newStr = malloc(sizeof(*content));
if (newPtr == NULL || newStr == NULL) {
fprintf(stderr, "99");
free(newPtr);
free(newStr);
return -1;
}
newStr->str = NULL;
newStr->length = 0;
newStr->alloc = 0;
newPtr->content = newStr;
newPtr->leftPtr = NULL;
newPtr->rightPtr = NULL;
newPtr->return_type = token.ret_value;
newPtr->parameters = token.param_count;
strCpyStr(newPtr->content, token.content);
*rootPtr = newPtr;
return 1;
}
int BVSInsert_function(TNodef **rootPtr, function_save token) {
if (*rootPtr == NULL) {
return BVSCreate_function(rootPtr, token);
} else {
if (strCmpStr(token.content, rootPtr->content) < 0) {
return BVSInsert_function(&rootPtr->leftPtr, token);
} else
if ((strCmpStr(token.content, rootPtr->content)) > 0) {
return BVSInsert_function(&rootPtr->rightPtr, token);
} else {
/* function is already present: return 0 */
return 0;
}
}
}
Note also that function strCpyStr may write beyond the end of the allocated area is len2 == s1->alloc, assuming s1->len is the length of the string, excluding the null terminator.
Here is a modified version:
int strCpyStr(string *s1, const string *s2) {
int len2 = s2->length;
if (len2 >= s1->alloc) {
char *newstr = (char *)realloc(s1->str, len2 + 1);
if (newstr == NULL) {
return 1;
}
s1->str = newstr;
s1->alloc = len2 + 1;
}
strcpy(s1->str, s2->str);
s1->length = len2;
return 0;
}

create tree with static and pre-determined elements in c

I have a problem that I decided to solve using binary tree, however:
I can't think of a way to fill the tree with predetermined elements so that it looks like the following in the image
I used a vector as follows, and then I inserted it into the tree, I don't know if I just leave it in the order the tree will be assembled as in the image, but what I did was the following:
char* dict[] = {
"Mamifero","aves","repteis",
"quadrupede", "bipede", "voadores", "aquaticos",
"nao-voadoras", "nadadoras", "de rapina", "com casco", "carnivoros", "sem patas",
"carnivoro", "herbivoro", "onivoro", "afrutifero", "tropical", "polar",
"leao", "cavalo", "homem", "macaco", "morcego", "baleia", "avestruz", "pinguim",
"pato", "aguia", "tartaruga", "crocodilo", "cobra"
};
typedef struct Animal *animalptr;
typedef struct Animal {
char *str;
animalptr left, right;
} Animal;
typedef int (*compare)(const char*, const char*);
void insert (char* key, Animal** leaf, compare cmp) {
int res;
if (*leaf == NULL) {
*leaf = (Animal*) malloc(sizeof(Animal));
(*leaf)->str = malloc(strlen(key) + 1);
strcpy ((*leaf)->str, key);
(*leaf)->left = NULL;
(*leaf)->right = NULL;
// printf("\nnew node for %s", key);
}
else {
// printf("%d\n", res);
res = cmp (key, (*leaf)->str);
if (res < 0) insert (key, &(*leaf)->left, cmp);
else if (res > 0) insert (key, &(*leaf)->right, cmp);
else printf("key '%s' already in tree\n", key);
}
}
int cmpStr (const char* a, const char* b) {
// printf("a = %d\n b = %d", strlen(a), strlen(b));
return (strcmp (a,b));
}
fill it in as follows:
int main () {
Animal *parent = NULL;
char q;
// printf("%ld\n", sizeof(dict));
// insert(dict[0], &parent, (compare)cmpStr);
// printTree(parent);
for (int i = 0; i < NUM_NODES; i++) {
insert(dict[i], &parent, (compare)cmpStr);
}
printf ("%s", search(parent, "", (compare)cmpStr)->str);
// printTree(parent);
// do {
// // scanf("%c", &q);
// // printf("%s?", dict[rand() % 32]);
// // }while (q != 'q');
return 0;
}
so now my saga would be how to apply some weight to each word, to direct it to one side or the other, the exercise I'm trying to solve in this way is this:
Build a program that is able to conclude which of the following animals
was chosen through questions and answers. Possible animals: lion, horse, man,
monkey, whale, ostrich, penguin, duck, eagle, turtle, crocodile and snake.
teste
Each word can be marked with a parent-child relationship, weight = parent's weight + own serial number under the same parent, like
Mamifero => "1"
aves => "2"
repteis => "3"
bipede => "12" (second child under Mamifero)
afrutifero => "122" (second child under bipede)
If the prefixes are the same, it means that there is a parent-child relationship, insert it on the right side of the tree, otherwise insert it on the left side of the tree
Please see the modified code
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct words {
char *weight;
char *name;
} Words;
Words dict[] = {
{"1", "Mamifero"},
{"2", "aves"},
{"3", "repteis"},
{"11", "quadrupede"},
{"12", "bipede"},
{"13", "voadores"},
{"14", "aquaticos"},
{"21", "nao-voadoras"},
{"22", "nadadoras"},
{"23", "de rapina"},
{"31", "com casco"},
{"32", "carnivoros"},
{"33", "sem patas"},
{"111", "carnivoro"},
{"112", "herbivoro"},
{"121", "onivoro"},
{"122", "afrutifero"},
{"211", "tropical"},
{"212", "polar"},
{"1111", "leao"},
{"1121", "cavalo"},
{"1211", "homem"},
{"1221", "macaco"},
{"131", "morcego"},
{"141", "baleia"},
{"2111", "avestruz"},
{"2121", "pinguim"},
{"221", "pato"},
{"231", "aguia"},
{"311", "tartaruga"},
{"321", "crocodilo"},
{"331", "cobra"}
};
#define NUM_NODES (sizeof(dict)/sizeof(*dict))
typedef struct Animal *animalptr;
typedef struct Animal {
char *weight;
char *str;
animalptr left, right;
} Animal;
typedef int (*compare)(const char *, const char *);
void insert(Words *key, Animal **leaf, compare cmp)
{
int res;
if (*leaf == NULL) {
*leaf = (Animal *) malloc(sizeof(Animal));
(*leaf)->str = strdup(key->name);
(*leaf)->weight = strdup(key->weight);
(*leaf)->left = NULL;
(*leaf)->right = NULL;
} else {
res = cmp(key->weight, (*leaf)->weight);
if (res < 0) insert(key, &(*leaf)->left, cmp);
else if (res > 0) insert(key, &(*leaf)->right, cmp);
else printf("key '%s' already in tree\n", key->name);
}
}
int cmpStr(const char *a, const char *b)
{
if (strcmp(a, b) == 0)
return 0;
// If the prefixes are the same, it means a is a child of b, insert on the right
if (strncmp(a, b, strlen(b)) == 0)
return 1;
// Otherwise insert left
return -1;
}
char *search(Animal *leaf)
{
char *ret = "";
char buf[16];
while (leaf) {
if (!leaf->right && !leaf->left) {
ret = leaf->str;
break;
}
// guess
printf("É %s (yes,no): ", leaf->str);
fgets(buf, sizeof(buf), stdin);
if ((*buf == 'q') || (*buf == 'Q'))
break;
// If yes, go to the right
if ((*buf == 'y') || (*buf == 'Y'))
leaf = leaf->right;
// Otherwise, left
else if ((*buf == 'n') || (*buf == 'N'))
leaf = leaf->left;
}
return ret;
}
int main()
{
Animal *parent = NULL;
for (int i = 0; i < NUM_NODES; i++) {
insert(&dict[i], &parent, (compare)cmpStr);
}
printf("%s\n", search(parent));
return 0;
}

I get a segmentation fault because of free even though i used malloc

i am writing a Generic ADT using C and i keep getting a segmentation fault when i free an element
PairResult pairClear(Pair pair)
{
if(pair == NULL)
{
return PAIR_NULL_ARGUMENT;
}
KeyElement key=pair->key;
DataElement data=pair->data;
if(key)
pair->free_key(key);//i get the Error here
if(data)
pair->free_data(data);
return PAIR_SUCCESS;
}
the memory for key and data is allocated :
Pair pairCreate( KeyElement key, DataElement data,
copyDataElements copy_data,
freeDataElements free_data,
copyKeyElements copy_key,
freeKeyElements free_key)
{
Pair pair = malloc(sizeof(*pair));
if(pair == NULL)
{
return NULL;
}
pair->copy_data=copy_data;
pair->copy_key=copy_key;
pair->free_data=free_data;
pair->free_data=free_key;
KeyElement new_string_key = copy_key(key);
DataElement new_string_data = copy_data(data);
if((new_string_key == NULL) || (new_string_data == NULL))
{
pairDestroy(pair);
return NULL;
}
pair->key = new_string_key;
pair->data = new_string_data;
return pair;
}
this pairDestroy
void pairDestroy(Pair pair)
{
if(pair == NULL)
{
return;
}
#ifndef NDEBUG
PairResult result =
#endif
pairClear(pair);
assert(result == PAIR_SUCCESS);
free(pair);
}
these are the copy functions used:
static KeyElement copyKeyInt(KeyElement n) {
if (!n) {
return NULL;
}
int *copy = malloc(sizeof(*copy));
if (!copy) {
return NULL;
}
*copy = *(int *) n;
return copy;
}
static DataElement copyDataChar(DataElement n) {
if (!n) {
return NULL;
}
char *copy = malloc(sizeof(*copy));
if (!copy) {
return NULL;
}
*copy = *(char *) n;
return (DataElement) copy;
}
and these are the free functions used
static void freeInt(KeyElement n) {
free(n);
}
static void freeChar(DataElement n) {
free(n);
}
and here is the struct of pair
struct Pair_t {
KeyElement key;
DataElement data;
copyDataElements copy_data;
freeDataElements free_data;
copyKeyElements copy_key;
freeKeyElements free_key;
};
these are all the typedef used :
typedef struct Pair_t* Pair;
typedef enum PairResult_t {
PAIR_SUCCESS,
PAIR_OUT_OF_MEMORY,
PAIR_NULL_ARGUMENT,
} PairResult;
typedef void *DataElement;
typedef void *KeyElement;
typedef DataElement(*copyDataElements)(DataElement);
typedef KeyElement(*copyKeyElements)(KeyElement);
typedef void(*freeDataElements)(DataElement);
typedef void(*freeKeyElements)(KeyElement);
and a main function so that u could reproduce it
int main()
{
Pair pair;
for (int i = 1; i < 1000; ++i) {
char j = (char) i;
++j;
pair=pairCreate(&i,&j,copyDataChar,freeChar,copyKeyInt,freeInt);
pairDestroy(pair);
}
I added everything I could for a reproducible code
if anything should be edited please tell me in the comments
Pair pairCreate(...) {
...
pair->free_data = free_data;
pair->free_data = free_key;
// ^^^^^^^^^ UH OH
...
You owe me 15 mins of debugging time.

Command line argument causing heap corruption

I'm trying to write a buffer using C, it was supposed to take in contents from an input file and output it to another text file. However, when I try to debug it in Visual Studio 2019, it always trigger a breakpoint at if ((fi = fopen(argv[1],"r")) == NULL){...} of my test file, it also detected a critical error of C0000374. My command line argument is buffer.exe ass1.pls a
My test file : platy_bt.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "buffer.h"
/*check for ANSI C compliancy */
#define ANSI_C 0
#if defined(__STDC__)
#undef ANSI_C
#define ANSI_C 1
#endif
/* Declaration of an error printing function with
* variable number of arguments
*/
void err_printf(char *fmt, ...);
/* Declaration of a buffer contents display function */
void display (Buffer *ptr_Buffer);
long get_filesize(char *fname);
int main(int argc, char **argv){
pBuffer ptr_Buffer; /* pointer to Buffer structure */
FILE *fi; /* input file handle */
int loadsize = 0; /* the size of the file loaded in the buffer */
int ansi_c = !ANSI_C; /* ANSI C compliancy flag */
char symbol; /* symbol read from input file */
/* Check if the compiler option is set to compile ANSI C */
/* __DATE__, __TIME__, __LINE__, __FILE__, __STDC__ are predefined preprocessor macros*/
if(ansi_c){
err_printf("Date: %s Time: %s",__DATE__, __TIME__);
err_printf("ERROR: Compiler is not ANSI C compliant!\n");
exit(1);
}
/* missing file name or/and mode parameter */
if (argc <= 2){
err_printf("\nDate: %s Time: %s",__DATE__, __TIME__);
err_printf("\nRuntime error at line %d in file %s\n", __LINE__, __FILE__);
err_printf("%s\b\b\b\b%s%s",argv[0],": ","Missing parameters.");
err_printf("Usage: platybt source_file_name mode");
exit(1);
}
/* create a source code input buffer */
switch(*argv[2]){
case 'f': case 'a': case 'm': break;
default:
err_printf("%s%s%s",argv[0],": ","Wrong mode parameter.");
exit(1);
}
/*create the input buffer */
ptr_Buffer = b_allocate(0,0,*argv[2]);
if (ptr_Buffer == NULL){
err_printf("%s%s%s",argv[0],": ","Cannot allocate buffer.");
exit(1);
}
/* open the source file */
if ((fi = fopen(argv[1],"r")) == NULL){
err_printf("%s%s%s%s",argv[0],": ", "Cannot open file: ",argv[1]);
exit (1);
}
/* load a source file into the input buffer */
printf("Reading file %s ....Please wait\n",argv[1]);
loadsize = b_load (fi,ptr_Buffer);
if(loadsize == RT_FAIL_1)
err_printf("%s%s%s",argv[0],": ","Error in loading buffer.");
/*if the input file has not been completely loaded, find the file size and print the last symbol loaded */
if (loadsize == LOAD_FAIL){
printf("The input file %s %s\n", argv[1],"has not been completely loaded.");
symbol = (char)fgetc(fi);
printf("Last character read from the input file is: %c %d\n", symbol, symbol);
printf("Input file size: %ld\n", get_filesize(argv[1]));
}
/* close source file */
fclose(fi);
/* display the contents of the input buffer */
display(ptr_Buffer);
/* compact the buffer
* add end-of-file character (EOF) to the buffer
* display again
*/
if(!b_compact(ptr_Buffer,EOF)){
err_printf("%s%s%s",argv[0],": ","Error in compacting buffer.");
}
display(ptr_Buffer);
/* free the dynamic memory used by the buffer */
b_free(ptr_Buffer);
/* make the buffer invalid
It is not necessary here because the function terminates anyway,
but will prevent run-time errors and crashes in future expansions
*/
ptr_Buffer = NULL;
/*return success */
return (0);
}
/* error printing function with variable number of arguments*/
void err_printf( char *fmt, ... ){
/*Initialize variable list */
va_list ap;
va_start(ap, fmt);
(void)vfprintf(stderr, fmt, ap);
va_end(ap);
/* Move to new line */
if( strchr(fmt,'\n') == NULL )
fprintf(stderr,"\n");
}
void display (Buffer *ptr_Buffer){
printf("\nPrinting buffer parameters:\n\n");
printf("The capacity of the buffer is: %d\n",b_capacity(ptr_Buffer));
printf("The current size of the buffer is: %d\n", b_addcoffset(ptr_Buffer));
printf("The operational mode of the buffer is: %d\n",b_mode(ptr_Buffer));
printf("The increment factor of the buffer is: %lu\n",b_incfactor(ptr_Buffer));
printf("The first symbol in the buffer is: %c\n", b_addcoffset(ptr_Buffer)?*b_location(ptr_Buffer, 0):' ');
printf("The value of the flags field is: %04hX\n",ptr_Buffer->flags);
printf("\nPrinting buffer contents:\n\n");
b_rewind(ptr_Buffer);
if (!b_print(ptr_Buffer,1)) printf("empty buffer\n");
}
long get_filesize(char *fname){
FILE *input;
long flength;
input = fopen(fname, "r");
if(input == NULL){
err_printf("%s%s","Cannot open file: ",fname);
return 0;
}
fseek(input, 0L, SEEK_END);
flength = ftell(input);
fclose(input);
return flength;
}
<!-- -->
My buffer.c
#include "buffer.h";
#include <string.h>;
#include <stdlib.h>;
Buffer* b_allocate(short init_capacity, char inc_factor, char o_mode) {
char* characterArray;
if(init_capacity <0 || init_capacity >= SHRT_MAX) {
printf("Capacity exceed the limit, must be between 0 and %d.", SHRT_MAX);
return NULL;
}
else {
pBuffer bufferStructure = (pBuffer) calloc(init_capacity, sizeof(Buffer));
if (init_capacity == 0) {
init_capacity = DEFAULT_INIT_CAPACITY;
if (o_mode=='a' || o_mode== 'm') {
inc_factor = DEFAULT_INC_FACTOR;
bufferStructure->inc_factor = inc_factor;
}
else {
if (o_mode== 'f') {
inc_factor = 0;
bufferStructure->inc_factor = inc_factor;
}
}
characterArray = (char *) malloc(DEFAULT_INIT_CAPACITY);
}
else {
characterArray = (char*) malloc(init_capacity);
}
bufferStructure->cb_head = characterArray;
if (inc_factor == 0 && init_capacity != 0) {
bufferStructure->mode = 0;
bufferStructure->inc_factor = 0;
}
switch (o_mode) {
case 'f':
bufferStructure->mode = 0;
bufferStructure->inc_factor = 0;
break;
case 'a':
if (inc_factor >= 1 && inc_factor <= 255) {
bufferStructure->mode = 1;
bufferStructure->inc_factor = inc_factor;
break;
}
else {
if (inc_factor != 0) {
return NULL;
break;
}
}
case 'm':
if (inc_factor >= 1 && inc_factor <= 100) {
bufferStructure->mode = -1;
bufferStructure->inc_factor = inc_factor;
break;
}
else {
if (inc_factor != 0) {
return NULL;
break;
}
}
}
bufferStructure->capacity = 200;
bufferStructure->flags = DEFAULT_FLAGS;
bufferStructure->addc_offset = 0;
bufferStructure->getc_offset = 0;
bufferStructure->markc_offset;
return bufferStructure;
}
}
pBuffer b_addc(pBuffer const pBD, char symbol) {
int inc_factor = pBD->inc_factor - '0';
int newCapacity = 0;
int availableSpace =0;
long newIncrement = 0;
pBD->flags &= RESET_R_FLAG;
char* ptr;
if (pBD->addc_offset <= pBD->capacity) {
pBD->cb_head[pBD->addc_offset] = symbol;
pBD->addc_offset++;
return pBD;
}
else {
switch (pBD->mode) {
case 0:
return NULL;
break;
case 1:
newCapacity = pBD->capacity + inc_factor;
if (newCapacity > 0 && newCapacity < (SHRT_MAX - 1)) {
if (realloc(pBD->cb_head, newCapacity)) {
pBD->flags |= SET_R_FLAG;
pBD->cb_head[pBD->addc_offset] = symbol;
pBD->addc_offset++;
pBD->capacity = (short)newCapacity;
ptr = realloc(pBD->cb_head, newCapacity);
pBD->cb_head = ptr;
return pBD;
}
else {
return NULL;
}
}
else {
if (newCapacity > 0 && newCapacity >= (SHRT_MAX - 1)) {
newCapacity = SHRT_MAX - 1;
if (realloc(pBD->cb_head, newCapacity)) {
pBD->flags |= SET_R_FLAG;
pBD->cb_head[pBD->addc_offset] = symbol;
pBD->addc_offset++;
pBD->capacity = (short) newCapacity;
ptr = realloc(pBD->cb_head, newCapacity);
pBD->cb_head = ptr;
return pBD;
}
else {
return NULL;
}
}
else {
if (newCapacity <= 0) {
return NULL;
}
}
}
break;
case -1:
if (pBD->capacity >= SHRT_MAX -1) {
return NULL;
}
else {
availableSpace = SHRT_MAX - 1 - pBD->capacity;
newIncrement = (long)(availableSpace * inc_factor/100);
newCapacity = (short)(pBD->capacity + newIncrement);
if (newCapacity >= SHRT_MAX-1) {
newCapacity = SHRT_MAX - 1;
}
else {
if (newCapacity > 0 && newCapacity < SHRT_MAX - 1) {
if (realloc(pBD->cb_head, newCapacity)) {
pBD->flags |= SET_R_FLAG;
pBD->cb_head[pBD->addc_offset] = symbol;
pBD->addc_offset++;
pBD->capacity = (short)newCapacity;
ptr = realloc(pBD->cb_head, newCapacity);
pBD->cb_head = ptr;
return pBD;
}
else {
return NULL;
}
}
}
}
break;
}
}
}
int b_clear(Buffer* const pBD) {
pBD->addc_offset = 0;
pBD->getc_offset = 0;
pBD->markc_offset = 0;
pBD->flags = DEFAULT_FLAGS;
return 1;
}
void b_free(Buffer* const pBD) {
free(pBD->cb_head);
free(pBD);
}
short b_capacity(Buffer* const pBD) {
if (pBD->capacity != 0) {
return pBD->capacity;
}
else {
return -1;
}
}
short b_addcoffset(Buffer* const pBD) {
if (pBD->addc_offset != 0 ) {
return pBD->addc_offset;
}
else {
return -1;
}
}
short b_markc(pBuffer const pBD, short mark) {
if (mark<0 || mark > pBD->addc_offset) {
return -1;
}
else {
pBD->markc_offset = mark;
return pBD->markc_offset;
}
}
short b_getcoffset(Buffer* const pBD) {
if (pBD->getc_offset >=0) {
return pBD->getc_offset;
}
else {
return -1;
}
}
int b_mode(Buffer* const pBD) {
if (pBD->mode) {
return pBD->mode;
}
else {
printf("Not found");
}
}
size_t b_incfactor(Buffer* const pBD) {
unsigned char inc_factor = pBD->inc_factor;
size_t inc_factor_value = inc_factor;
if (pBD->inc_factor) {
return inc_factor_value;
}
else {
return 0x100;
}
}
char b_getc(Buffer* const pBD) {
if (pBD->getc_offset > pBD->addc_offset) {
return -2;
}
else {
if (pBD->getc_offset == pBD->addc_offset) {
pBD->flags &= RESET_EOB;
pBD->flags |= SET_EOB;
pBD->getc_offset++;
return 0;
}
else {
pBD->flags &= RESET_EOB;
pBD->getc_offset++;
return pBD->cb_head[pBD->getc_offset];
}
}
}
int b_print(Buffer* const pBD, char nl) {
while (b_eob(pBD)==0) {
printf("%c", b_getc(pBD));
}
if (nl != 0) {
printf("\n");
}
return nl;
}
int b_eob(Buffer* const pBD) {
if ((pBD->flags & CHECK_EOB) > 0) {
return pBD->flags & CHECK_EOB;
}
else {
if ((pBD->flags & CHECK_EOB) == 0) {
return 0;
}
else {
return -1;
}
}
}
char b_rflag(Buffer* const pBD) {
if ((pBD->flags & CHECK_R_FLAG) > 0) {
return pBD->flags & CHECK_R_FLAG ;
}
else {
if ((pBD->flags & CHECK_R_FLAG) == 0) {
return 0;
}
else {
return -1;
}
}
}
int b_isfull(Buffer* const pBD) {
if (pBD->addc_offset == pBD->capacity) {
return 0;
}
else {
if (pBD->addc_offset < pBD->capacity) {
return 1;
}
else {
return -1;
}
}
}
int b_isempty(Buffer* const pBD) {
if (pBD->addc_offset == 0) {
return 1;
}
else {
if (pBD->addc_offset > 0) {
return 0;
}
else {
return -1;
}
}
}
char* b_location(Buffer* const pBD, short loc_offset) {
if ( loc_offset > pBD->addc_offset) {
return NULL;
}
else {
return &pBD->cb_head[loc_offset];
}
}
int b_load(FILE* const fi, Buffer* const pBD) {
while (!feof(fi)) {
char symbol = (char)fgetc(fi);
if (b_addc(pBD, symbol) != NULL) {
}
else {
ungetc(symbol, fi);
return LOAD_FAIL;
}
}
return pBD->addc_offset;
}
Buffer* b_compact(Buffer* const pBD, char symbol) {
short newCapacity = pBD->addc_offset+1;
char * ptr = (char *)realloc(pBD, newCapacity);
pBD->cb_head = ptr;
pBD->capacity = (short)newCapacity;
pBD->cb_head[pBD->addc_offset] = symbol;
pBD->addc_offset++;
pBD->flags &= RESET_R_FLAG;
pBD->flags |= SET_R_FLAG;
return pBD;
}
short b_retract(Buffer* const pBD) {
if (pBD->getc_offset <= 0) {
return -1;
}
else {
pBD->getc_offset--;
return pBD->getc_offset;
}
}
short b_reset(Buffer* const pBD) {
if (pBD->getc_offset > pBD->addc_offset || pBD->markc_offset < 0) {
return -1;
}
else {
pBD->getc_offset = pBD->markc_offset;
}
return pBD->getc_offset;
}
int b_rewind(Buffer* const pBD) {
pBD->getc_offset = 0;
pBD->markc_offset = 0;
return 0;
}
<!-- -->
My buffer.h
#ifndef BUFFER_H_
#define BUFFER_H_
/*#pragma warning(1:4001) *//*to enforce C89 type comments - to make //comments an warning */
/*#pragma warning(error:4001)*//* to enforce C89 comments - to make // comments an error */
/* standard header files */
#include <stdio.h> /* standard input/output */
#include <malloc.h> /* for dynamic memory allocation*/
#include <limits.h> /* implementation-defined data type ranges and limits */
/* constant definitions */
#define RT_FAIL_1 (-1) /* operation failure return value 1 */
#define RT_FAIL_2 (-2) /* operation failure return value 2 */
#define LOAD_FAIL (-2) /* load fail return value */
#define DEFAULT_INIT_CAPACITY 200 /* default initial buffer capacity */
#define DEFAULT_INC_FACTOR 15 /* default increment factor */
/* You should add your own constant definitions here */
/* Add your bit-masks constant definitions here */
#define DEFAULT_FLAGS 0xFFF9
#define SET_EOB 0x0002
#define RESET_EOB 0xFFFD
#define CHECK_EOB 0x0002
#define SET_R_FLAG 0x0004
#define RESET_R_FLAG 0xFFFB
#define CHECK_R_FLAG 0x0004
#define DEFAULTZ 0x0000 /* 0000 0000 0000 0000 */
#define SET_LSB 0x0001 /* 0000 0000 0000 0001 */
#define RESET_LSB 0xFFFE /* 1111 1111 1111 1110 */
#define CHK_LSB 0x0001 /* 0000 0000 0000 0001 */
/* user data type declarations */
typedef struct BufferDescriptor {
char *cb_head; /* pointer to the beginning of character array (character buffer) */
short capacity; /* current dynamic memory size (in bytes) allocated to character buffer */
char inc_factor; /* character array increment factor */
char mode; /* operational mode indicator*/
unsigned short flags; /* contains character array reallocation flag and end-of-buffer flag */
short addc_offset; /* the offset (in chars) to the add-character location */
short getc_offset; /* the offset (in chars) to the get-character location */
short markc_offset; /* the offset (in chars) to the mark location */
} Buffer, *pBuffer;
/*typedef Buffer *pBuffer;*/
/* function declarations */
/*
Place your function declarations here.
Do not include the function header comments here.
Place them in the buffer.c file
*/
Buffer* b_allocate(short init_capacity, char inc_factor, char o_mode);
pBuffer b_addc(pBuffer const pBD, char symbol);
int b_clear(Buffer* const pBD);
void b_free(Buffer* const pBD);
int b_isfull(Buffer* const pBD);
short b_addcoffset(Buffer* const pBD);
short b_capacity(Buffer* const pBD);
short b_markc(pBuffer const pBD, short mark);
int b_mode(Buffer* const pBD);
size_t b_incfactor(Buffer* const pBD);
int b_load(FILE* const fi, Buffer* const pBD);
int b_isempty(Buffer* const pBD);
char b_getc(Buffer* const pBD);
int b_eob(Buffer* const pBD);
int b_print(Buffer* const pBD, char nl);
Buffer* b_compact(Buffer* const pBD, char symbol);
char b_rflag(Buffer* const pBD);
short b_retract(Buffer* const pBD);
short b_reset(Buffer* const pBD);
short b_getcoffset(Buffer* const pBD);
int b_rewind(Buffer* const pBD);
char* b_location(Buffer* const pBD, short loc_offset);
If I try to countinue with debugging, it output : Unhandled exception at 0x774DFA1D (ntdll.dll) in buffer.exe: 0xC0000374: A heap has been corrupted (parameters: 0x7751B960). and Unhandled exception at 0x7743C5D7 (ntdll.dll) in buffer.exe: 0xC0000005: Access violation reading location 0x00985A7A.. If I build this and run it in release mode, it cause assertion violation.

tries data structure implementation......... Application - Dictionary

Wanted to write a program to implement a dictionary of words using Tries Data Structure.
Please tell me the structure for the implementation so that I could start the program, as i haven't got any article on internet, for Tries Implementation..
The confusion is, that when we search through the word, and we get through the word at the leaf node, only then is the meaning of the word would be stored.. But all the nodes in Tries data structure will be of waste. i.e. storing a variable of meaning in every internal node......
So, the basic idea, is with a help of small example as how to store dictionary, please let me know the structure for Tries Data Structure..
And please C program Implementation..
Thanks..
Compressed Tries.. This is giving me the correct Compressed Trie, as expected,,,, but there are some issues with it.... And wanted to discuss that....
1) I Build a simple trie first, and then compressed it using a function trie_compress(), Now when i want to add any word into it, it would want a changed trie_add(), also changed trie_lookup(), ok i will do this on my own, just wanted to know, is my approach correct or there could be some better way..
2) In trie_new(), I have used t->substr = (char*)malloc(10);,,,,,, This doesn't look efficient, as memory should be allocated, as in when required. Can we improve upon this.
typedef struct trie
{
int on;
char *substr;
struct trie *first_child;
struct trie *next_sibling;
}trie;
trie* trie_new()
{
trie *t = (trie*)malloc(sizeof(trie));
t->substr = (char*)malloc(10);
t->on = 0;
t->substr[0] = '\0';
t->first_child = NULL;
t->next_sibling = NULL;
return t;
}
trie* trie_at_level(trie *t, char c)
{
while(t != NULL)
{
if(t->substr[0] == c)
{
return t;
}
t = t->next_sibling;
}
return NULL;
}
void trie_add(trie *t, const char *str)
{
const int n = strlen(str);
int i;
for(i=0; i<n; i++)
{
const char c = str[i];
trie* parent = t;
t = t->first_child;
t = trie_at_level(t,c);
if(t == NULL)
{
t = trie_new();
t->substr[0] = c;
t->substr[1] = '\0';
t->next_sibling = parent->first_child;
parent->first_child = t;
}
}
t->on = 1;
}
int trie_lookup(trie *t, const char *str)
{
const int n = strlen(str);
int i;
for(i=0; i<n; i++)
{
const char c = str[i];
t = t->first_child;
t = trie_at_level(t,c);
if(t == NULL)
return 0;
}
return t->on;
}
void trie_compress(trie *t)
{
trie* parent = t;
t = t->first_child;
if(t->first_child != NULL)
trie_compress(t);
if(t->next_sibling == NULL)
{
parent->substr = strcat(parent->substr,t->substr);
parent->first_child = t->first_child;
parent->on = t->first_child->on;
free(t);
return;
}
else
trie_compress(t->next_sibling);
}
Okay, I think I got it right this time around.
Compressed Trie:
#include <string.h>
#include <stdlib.h>
typedef struct trie {
int value;
char* key;
struct trie* kids;
struct trie* next;
} trie;
/* Creates an empty trie.
*/
trie* trie_new () {
trie* t = (trie*) malloc (sizeof (trie));
t->value = 0;
t->key = NULL;
t->kids = NULL;
t->next = NULL;
return t;
}
/* Sets |t->key| to |key|.
*/
static void trie_set_key (trie* t, const char* key) {
char* key_copy = (char*) malloc (sizeof (char) * (strlen (key) + 1));
strcpy (key_copy, key);
free (t->key);
t->key = key_copy;
}
/* Creates a trie with |->key| set to |key| whose |->value| is on.
*/
static trie* trie_new_init (const char* key) {
trie* t = trie_new ();
t->value = 1;
trie_set_key (t, key);
return t;
}
/* Frees all memory used by the trie |t|.
*/
void trie_delete (trie* t) {
if (t == NULL) {
return;
}
trie_delete (t->kids);
trie_delete (t->next);
free (t->key);
free (t);
}
typedef struct trie_str_pair {
trie* trie;
const char* str;
} trie_str_pair;
/* Creates a trie_str_pair with the values |->trie| and |->str| set to
* |t| and |str|, respectively.
*/
static trie_str_pair mk_trie_str_pair (trie* t, const char* str) {
trie_str_pair pair;
pair.trie = t;
pair.str = str;
return pair;
}
/* Tries to find a sibling of |t| or |t| itself that matches the input
* choice function |choiceFunc|. A match occurs if |choiceFunc| returns
* a string other than NULL. Upon a match, the matching trie and the string
* are returned. Otherwise NULL values are returned in the pair struct.
*/
static trie_str_pair lookup_by (
const char* (*choiceFunc)(const char*, trie*)
, const char* key, trie* t
) {
while (t != NULL) {
const char* str = choiceFunc (key, t);
if (str != NULL) {
return mk_trie_str_pair (t, str);
}
t = t->next;
}
return mk_trie_str_pair (NULL, NULL);
}
/* If |prefix| is a prefix of |str|, returns a pointer into |str| immediately
* after the prefix.
*/
static const char* strip_prefix (const char* prefix, const char* str) {
int i;
for (i = 0; prefix [i] != '\0'; ++i) {
if (str [i] != prefix [i]) {
return NULL;
}
}
return str + i;
}
/* If |t->key| is a prefix of |str|, returns a pointer into |str| immediately
* after the prefix.
*/
static const char* strip_prefix_with_key (const char* str, trie* t) {
return strip_prefix (t->key, str);
}
/* If |str| is a prefix of |t->key|, returns a pointer into |t->key|
* immediately after the prefix.
*/
static const char* strip_prefix_from_key (const char* str, trie* t) {
return strip_prefix (str, t->key);
}
/* Returns a pointer into |str1| immediately after the longest common prefix
* between |str1| and |str2|.
*/
static const char* strip_common_prefix (const char* str1, const char* str2) {
int i;
for (i = 0; str1 [i] != '\0' && str2 [i] != '\0'; ++i) {
if (str1 [i] != str2 [i]) {
break;
}
}
if (i == 0) {
return NULL;
}
return str1 + i;
}
/* Returns a pointer into |str| past the longest common prefix between
* |str| and |t->str|.
*/
static const char* strip_common_prefix_on_key (const char* str, trie* t) {
return strip_common_prefix (str, t->key);
}
/* Returns 1 if |key| is in the trie |t|. Returns 0 if not.
*/
int trie_lookup (trie* t, const char* key) {
while (key != NULL && key [0] != '\0') {
trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
t = pair.trie;
if (t == NULL) {
return 0;
}
key = pair.str;
}
return t->value;
}
/* Adds |kid| to |t|'s list of kids.
*/
static void trie_add_kid (trie* t, trie* kid) {
kid->next = t->kids;
t->kids = kid;
}
/* Removes |kid| from |t|'s list of kids.
* |kid| must be in |t|'s list of kids.
*/
static void trie_remove_kid (trie* t, trie* kid) {
if (t->kids == kid) {
t->kids = kid->next;
}
else {
t = t->kids;
while (t->next != kid) {
t = t->next;
}
t->next = kid->next;
}
}
/* Replaces |kid| from |t|'s list of kids with |new_kid|.
* |kid| must be in |t|'s list of kids.
*/
static void trie_replace_kid (trie* t, trie* kid, trie* new_kid) {
trie_remove_kid (t, kid);
trie_add_kid (t, new_kid);
}
/* If |t| has exactly one kid and no grandkids, |t| and its kid are merged
* into one trie node. In other words, |t|'s kid's |->key| is appended to
* |t->key| and |t->value| becomes that of its kid's |->value|.
*/
static void trie_try_merge_with_kids (trie* t) {
if (t->key != NULL) {
trie* kid = t->kids;
if (kid != NULL && kid->next == NULL) {
t->value = kid->value;
t->kids = kid->kids;
int new_len = strlen (t->key) + strlen (kid->key);
t->key = realloc (t->key, sizeof (char) * (new_len + 1));
strcat (t->key, kid->key);
free (kid->key);
free (kid);
}
}
}
/* Helper for trie_insert.
*/
static void trie_insert_split_key (
trie* t
, const char* key_prefix, const char* key_suffix
) {
trie* kid = trie_new_init (key_suffix);
trie_add_kid (t, kid);
trie_set_key (t, key_prefix);
}
/* Helper for trie_insert.
*/
static void trie_insert_simple (trie* t, const char* key) {
trie* kid = trie_new_init (key);
trie_add_kid (t, kid);
}
/* Helper for trie_insert.
*/
static void trie_insert_fork (
trie* t
, trie* kid
, char* key_prefix /* Caller loses ownership of this string */
, const char* key_suffix
, const char* kid_key_suffix
) {
trie* fork_kid = trie_new ();
fork_kid->key = key_prefix;
fork_kid->kids = trie_new_init (key_suffix);
fork_kid->kids->next = kid;
trie_replace_kid (t, kid, fork_kid);
fork_kid->next = kid->next;
kid->next = NULL;
trie_set_key (kid, kid_key_suffix);
}
/* Inserts |key| into the trie |t|.
*/
void trie_insert (trie* t, const char* key) {
if (key [0] == '\0') {
return;
}
while (1) {
trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
trie* kid = pair.trie;
const char* stripped = pair.str;
if (kid != NULL) {
if (stripped [0] == '\0') {
kid->value = 1;
return;
}
t = kid;
key = stripped;
continue;
}
pair = lookup_by (strip_prefix_from_key, key, t->kids);
kid = pair.trie;
stripped = pair.str;
if (kid != NULL) {
trie_insert_split_key (kid, key, stripped);
return;
}
pair = lookup_by (strip_common_prefix_on_key, key, t->kids);
kid = pair.trie;
stripped = pair.str;
if (kid == NULL) {
trie_insert_simple (t, key);
return;
}
int prefix_len = stripped - key;
char* common_prefix = (char*) malloc (sizeof (char) * (prefix_len + 1));
strncpy (common_prefix, key, prefix_len);
common_prefix [prefix_len] = '\0';
trie_insert_fork (t, kid, common_prefix, stripped, kid->key + prefix_len);
return;
}
}
/* Helper for trie_remove.
*/
static void trie_remove_simple (trie* t, trie* kid) {
trie_remove_kid (t, kid);
free (kid->key);
free (kid);
}
/* Helper for trie_remove.
*/
static void trie_remove_merge (trie* t) {
t->value = 0;
trie_try_merge_with_kids (t);
}
/* Removes |key| from the trie |t|.
*/
void trie_remove (trie* t, const char* key) {
trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
trie* kid = pair.trie;
const char* stripped = pair.str;
if (kid == NULL) {
return;
}
if (stripped [0] == '\0') {
if (kid->kids == NULL) {
trie_remove_simple (t, kid);
}
else {
trie_remove_merge (kid);
}
}
else {
trie_remove (kid, stripped);
}
trie_try_merge_with_kids (t);
}
Maybe this would help?
Prefix Tree
Trie C Implementation
Another Trie C Implementation

Resources