i have the following structure that i use to create a hash table for fingerprints
typedef struct fpinfo
{
unsigned long chunk_offset;
unsigned long chunk_length;
unsigned char fing_print[33];
}fpinfo;
/*
* The following defines one entry in the hash table.
*/
typedef struct Hash_Entry
{
struct Hash_Entry *next; /* Link entries within same bucket. */
unsigned namehash; /* hash value of key */
struct fpinfo fp;
} Hash_Entry;
typedef struct Hash_Table
{
struct Hash_Entry **bucketPtr; /* Buckets in the table */
int numBuckets;
int buck_entry_count[64];//number of entries in each bucket
int size; /* Actual size of array. */
int numEntries; /* Number of entries in the table. */
int mask; /* Used to select bits for hashing. */
} Hash_Table;
I insert fingerprints into it using
int Hash_CreateEntry(Hash_Table *t, struct Hash_Entry he)
{
Hash_Entry *e;
const char *p;
int keylen;
struct Hash_Entry **hp;
unsigned long h = 0, g,i=0;
while ( i<5 )
{
h = ( h ) + he.fp.fing_print[i]++;
g = h & 0xF0000000;
h ^= g >> 24;
h &= ~g;
i++;
}
p =(const char*) he.fp.fing_print;
for (e = t->bucketPtr[h & t->mask]; e != NULL; e = e->next)
{
if (e->namehash == h && strcmp((const char *)(e->fp).fing_print, p) == 0)
{
printf("\n%d \t%s",(e->fp).chunk_length,(e->fp).fing_print);
return (1);
}
}
if (t->numEntries >= rebuildLimit * t->size)
WriteHTtoFile(t);
e = (Hash_Entry *)malloc(sizeof(*e) /*+ keylen*/);
hp = &t->bucketPtr[h & t->mask];
e->next = *hp;
*hp = e;
e->namehash = h;
strcpy((char *)(e->fp).fing_print, p);
t->numEntries++;
t->buck_entry_count[h & t->mask]++;
return (0);
}
The Code I used to write the HT to file is
static void WriteHTtoFile(Hash_Table *t)
{
Hash_Entry *e, *next = NULL, **hp, **xp;
int i=0, mask;
Hash_Entry **oldhp;
int oldsize;
FILE *htfile=fopen("htfile.txt","a");
system("cls");
for ( hp = t->bucketPtr;t->bucketPtr!=NULL;hp=t->bucketPtr++)
{
for (e = *hp;e ->next!= NULL;e = e->next)
fprintf(htfile,"\n%d \t%s",(e->fp).chunk_length,(e->fp).fing_print);
}
fclose(htfile);
}
my problem is (are)
1-it says "Access violation reading location 0xfdfdfe09." after writting a considerable number of times (it wrote 6401 fingerprints). It indicates the faulty line to be the fprintf() in the file writing function.
2- the fingerprints it writes and what i have before writing does not match at all. Actually the hex representation of the fingerprints in the compiler(i am using VC2010) and the one i have which is read by the program are different.
3- the values for chunck_length of all the entries are 3452816845l
I guess the loop in WriteHTtoFile should look more like this:
for (i = 0; i < t->numBuckets; ++i)
{
for (e = t->bucketPtr[i]; e && e->next; e = e->next)
fprintf(htfile, /*...*/);
}
You have more problems than that; this code is hopelessly botched
WriteHTToFile modifies the original hashtable, so you end up with memory leak at least
You use %d format to print out fing_print; it's not at all clear what fing_print is/should be (binary string; ascii string).
Get a good book on C, and get some practice with a debugger.
Related
I have a contact structure inserted into a linked list which is in an hashtable. I don't know if I defined all my structures correctly.
I basically want to add a contact via input when given the command 'a' (command would be like this: a name mail phone).
I sould not be able to add the contact if it already exists.
I've tried creating the necessary structure of an hashtable with linked lists, i just don't understand how to work with it. So this function would help me a lot with understanding this concept.
This is the structure i've tried
#define NOME_SIZE 1023
#define MAIL_SIZE 511
#define TELEFONE_SIZE 63
#define HASH_SIZE 1000
typedef struct contacts{
char name[NOME_SIZE];
char mail[MAIL_SIZE];
char phone[TELEFONE_SIZE];
struct contacts *next;
}HashList;
typedef struct hash_bucket{
HashList *head, *tail;
int n_elements;
}HashBucket;
HashBucket hashtable[HASH_SIZE];
I do not expect any output if i can successfuly add the contact.
If it already exists it should return an error saying the contact already exists
A proposal from your code and my remarks. I removed n_elements because for me it is useless. I let tail but not sure it is useful because your list only have a next with a previous. I let arrays for name, phone and mail but I think it is better to use char *
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NOME_SIZE 1023
#define MAIL_SIZE 511
#define TELEFONE_SIZE 63
#define HASH_SIZE 1000
typedef struct contacts{
char name[NOME_SIZE];
char mail[MAIL_SIZE];
char phone[TELEFONE_SIZE];
struct contacts *next;
}HashList;
typedef struct hash_bucket{
HashList *head, *tail;
/* int n_elements; * I removed that field, it is useless */
}HashBucket;
HashBucket hashtable[HASH_SIZE];
// from https://stackoverflow.com/a/7666577/2458991
size_t hash(char * str)
{
size_t hash = 5381;
unsigned char c;
while ((c = (unsigned char) *str++) != 0)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
HashList * createElt(char * n, char * m, char * p)
{
HashList * e = malloc(sizeof(HashList));
strncpy(e->name, n, NOME_SIZE - 1);
e->name[NOME_SIZE - 1] = 0;
strncpy(e->mail, m, MAIL_SIZE - 1);
e->name[MAIL_SIZE - 1] = 0;
strncpy(e->phone, p, TELEFONE_SIZE - 1);
e->name[TELEFONE_SIZE - 1] = 0;
e->next = NULL;
return e;
}
// add or replace an element, the key is the name
// return 0 if the entry is added, else a non null value if the entry is (probably) modified
int insertElt(char * n, char * m, char * p)
{
// suppose list sort on the name
HashBucket * hb = &hashtable[hash(n) % HASH_SIZE];
HashList ** hl = &hb->head;
for (;;) {
if (*hl == NULL) {
/* last (and may be first) element */
*hl = createElt(n, m, p);
hb->tail = *hl;
return 0;
}
int cmp = strcmp((*hl)->name, n);
if (cmp == 0) {
/* replace */
strncpy((*hl)->mail, m, MAIL_SIZE - 1);
(*hl)->name[MAIL_SIZE - 1] = 0;
strncpy((*hl)->phone, p, TELEFONE_SIZE - 1);
(*hl)->name[TELEFONE_SIZE - 1] = 0;
return 1;
}
if (cmp > 0) {
/* insert before */
HashList * e = createElt(n, m, p);
e->next = *hl;
*hl = e;
return 0;
}
hl = &(*hl)->next;
}
}
void pr()
{
for (size_t i = 0; i != HASH_SIZE; ++i)
for (HashList * hl = hashtable[i].head; hl != NULL; hl = hl->next)
printf("%s %s %s\n", hl->name, hl->mail, hl->phone);
}
int main()
{
printf("%d\n", insertElt("n1", "m1", "p1"));
printf("%d\n", insertElt("n2", "m2", "p2"));
pr();
printf("%d\n", insertElt("n1", "mm1", "pp1"));
pr();
return 0;
}
Compilation and execution :
pi#raspberrypi:/tmp $ gcc -pedantic -Wextra -Wall hm.c
pi#raspberrypi:/tmp $ ./a.out
0
0
n1 m1 p1
n2 m2 p2
1
n1 mm1 pp1
n2 m2 p2
Currently I have a hash table implementation in C that uses strings as the keys and values. If I wanted to store integers instead of strings as the values, what would be the best way to do this? I'm thinking of storing the integer in a string and converting it to an integer when I need it but it seems inefficient for arithmetic. Something like
insert("money", "13");
int i = atoi(get("key1"));
int sum = i + 10;
insert("money", itoa(sum));
Is there a better way to do this?
EDIT: hash table implementation
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct tableentry /* hashtab entry */
{
struct tableentry *next;
char *key;
char *val;
} tableentry_t;
typedef struct hashtable
{
size_t size;
struct tableentry **tab;
} hashtable_t;
/* creates hashtable */
/* NOTE: dynamically allocated, remember to ht_free() */
hashtable_t *ht_create(size_t size)
{
hashtable_t *ht = NULL;
if ((ht = malloc(sizeof(hashtable_t))) == NULL)
return NULL;
/* allocate ht's table */
if ((ht->tab = malloc(sizeof(tableentry_t) * size)) == NULL)
return NULL;
/* null-initialize table */
int i;
for (i = 0; i < size; i++)
ht->tab[i] = NULL;
ht->size = size;
return ht;
}
/* creates hash for a hashtab */
static unsigned hash(hashtable_t *ht, char *s)
{
unsigned hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = *s + 31 * hashval;
return hashval;
}
/* loops through linked list freeing */
static void te_free(tableentry_t *te)
{
tableentry_t *next;
while (te != NULL)
{
next = te->next;
free(te->key);
free(te->val);
free(te);
te = next;
}
}
/* creates a key-val pair */
static tableentry_t *new(char *k, char *v)
{
tableentry_t *te = NULL;
if ((te = calloc(1, sizeof(*te))) == NULL
|| (te->key = strdup(k)) == NULL
|| (te->val = strdup(v)) == NULL)
{
te_free(te);
return NULL;
}
te->next = NULL;
return te;
}
static tableentry_t *lookup(hashtable_t *ht, char *k)
{
tableentry_t *te;
/* step through linked list */
for (te = ht->tab[hash(ht, k) % ht->size]; te != NULL; te = te->next)
if (strcmp(te->key, k) == 0)
return te; /* found */
return NULL; /* not found */
}
/* inserts the key-val pair */
hashtable_t *ht_insert(hashtable_t *ht, char *k, char *v)
{
tableentry_t *te;
/* unique entry */
if ((te = lookup(ht, k)) == NULL)
{
te = new(k, v);
unsigned hashval = hash(ht, k) % ht->size;
/* insert at beginning of linked list */
te->next = ht->tab[hashval];
ht->tab[hashval] = te;
}
/* replace val of previous entry */
else
{
free(te->val);
if ((te->val = strdup(v)) == NULL)
return NULL;
}
return ht;
}
/* retrieve value from key */
char *ht_get(hashtable_t *ht, char *k)
{
tableentry_t *te;
if ((te = lookup(ht, k)) == NULL)
return NULL;
return te->val;
}
/* frees hashtable created from ht_create() */
void ht_free(hashtable_t *ht)
{
int i;
for (i = 0; i < ht->size; i++)
if (ht->tab[i] != NULL)
te_free(ht->tab[i]);
free(ht);
}
/* resizes hashtable, returns new hashtable and frees old*/
hashtable_t *ht_resize(hashtable_t *oht, size_t size)
{
hashtable_t *nht; /* new hashtable */
nht = ht_create(size);
/* rehash */
int i;
tableentry_t *te;
/* loop through hashtable */
for (i = 0; i < oht->size; i++)
/* loop through linked list */
for (te = oht->tab[i]; te != NULL; te = te->next)
if (ht_insert(nht, te->key, te->val) == NULL)
return NULL;
ht_free(oht);
return nht;
}
The access and manipulation functions associated with your hash table implementation assume that values have the form of null-terminated strings, and that their significance is carried entirely by their contents (not, for example, by the values of the pointers themselves). Among other things, this is evident from the fact that the new() and ht_insert() functions make copies of the provided values via strdup(). Therefore, if you intend to use those functions (not just the underlying data structures) then your only alternative for storing integers is to encode the integers into strings in some way, and store the strings. This is what you already came up with.
Note, by the way, that this presents a bit of an issue if you want to be able to store both strings and integers in the same hash table. The table entries do not provide any way to record data type metadata, so to avoid collisions between string and number representations, you would need to encode data types into the values you store -- not only for the integers, but for the strings, too. For example, you might encode values into strings whose first character communicates the data type. Thus, perhaps "S12345" represents the string "12345", whereas "I12345" represents the integer 12345. But you don't need such tricks if you assume all the values are of uniform type, on a table-by-table basis.
You would have more options if you were open to writing at least a partial set of alternative hash table functions for storing integers in the existing data structures. For example, you might use the fact that pointers and integers can be converted back and forth (with implementation-defined results). But I interpret you to have rejected such approaches, as using alternative functions is effectively the same thing as modifying the implementation.
I am trying to build a program which will function as an assembler, it will be getting file name as command line arguments and translate them to machine code.
The program compiles just fine and runs OK with 1 file name, but when I try to run with several, the error appears after the first iteration.
I think there might be something withe the Clear() function (which flushes out all the data allocated in the previous iteration), but not sure why. Note that this is partial, but as I said, the program will run unless several files are used.
struct symbolStruct { // a structure which is used to absorb info about a tag, its place in memory and related flags
char *name;
int place;
unsigned int isEntry : 1;
unsigned int isData : 1;
unsigned int isExternal : 1;
struct symbolStruct *next;
};
typedef struct { // a structure which is used to absorb info about the operand structure of an instruction line
unsigned int numOfOperands : 2;
unsigned int addrMethSou : 2;
unsigned int addrMethDest : 2;
unsigned int operation : 4;
unsigned int extraWords : 2;
char *firstOperand;
char *secondOperand;
} OperandType;
typedef struct {
unsigned int row : WORD_SIZE;
} int15;
struct MachineCode { // a structure which is used to absorb machine code lines, and their location in the assembly file
unsigned int row : WORD_SIZE;
unsigned int line;
OperandType *structure;
struct MachineCode *next;
};
struct DataCode { // a structure which is used to absorb data and string elements (signed numbers and ascii characters)
unsigned int row : WORD_SIZE;
struct DataCode *next;
};
struct Operation { /* the main operation structure, contains pointers to all used lists, the ic and dc counters, the
current line number which is dealt with and the error flag. */
unsigned int ic;
unsigned int dc;
struct symbolStruct *externHead; // a pointer to a linked list of extern tags used in the assembly file, and their locations
struct symbolStruct *symbolHead; // a pointer to a linked list of all tags
struct DataCode *dataHead; // a pointer to a linked list of all data/string elements
struct MachineCode *machineHead; // a pointer to a linked list of all machine code rows
int linenumber;
unsigned int errorflag : 1; // raised in case of an error which triggered a warning
};
#include "header.h"
void FirstRun(struct Operation*, char *);
void DataUpdate(struct symbolStruct*,int);
void SecondRun(struct Operation *, char *);
void Clear(struct Operation *);
int main(int argc, char *argv[]) {
int i;
struct Operation programCore = {0,0,NULL,NULL,NULL,NULL,0,0};
for(i=1;i<argc;i++) {
char *fn = argv[i];
FirstRun(&programCore,fn);
DataUpdate(programCore.symbolHead,programCore.ic+INSTRUCTION_OFFSET);
SecondRun(&programCore,fn);
Clear(&programCore);
programCore.symbolHead = programCore.externHead = programCore.dataHead = programCore.machineHead = NULL;
}
if(argc < 2) {
fprintf(stderr,"No files selected.\n");
}
return 0;
}
/*Used to empty the linked lists and allocated memory after the program has finished one iteration. */
void Clear(struct Operation *programCore) {
/*f(pointer name) is there to hold a pointer to the allocated memory which is about to be flushed. */
struct MachineCode *machineHead = programCore->machineHead, *fMachineHead;
struct DataCode *dataHead = programCore->dataHead, *fDataHead;
struct symbolStruct *externHead = programCore->externHead, *fExternHead;
struct symbolStruct *symbolHead = programCore->symbolHead, *fSymbolHead;
while(machineHead != NULL) {
fMachineHead = machineHead;
machineHead = machineHead->next;
if(fMachineHead->structure != NULL) {
if(fMachineHead->structure->numOfOperands == 2)
free(fMachineHead->structure->secondOperand);
if(fMachineHead->structure->numOfOperands > 0)
free(fMachineHead->structure->firstOperand);
free(fMachineHead->structure);
}
free(fMachineHead);
}
while(dataHead != NULL) {
fDataHead = dataHead;
dataHead = dataHead->next;
free(fDataHead);
}
while(externHead != NULL) {
fExternHead = externHead;
externHead = externHead->next;
free(fExternHead->name);
free(fExternHead);
}
while(symbolHead != NULL) {
fSymbolHead = symbolHead;
symbolHead = symbolHead->next;
free(fSymbolHead->name);
free(fSymbolHead);
}
programCore->ic = programCore->dc = programCore->linenumber = programCore->errorflag = 0;
}
You do not free and nullifying the linked lists in the context struct (programCore). I suspect you are then using pointers to freed memory blocks.
This line only copies the pointer:
struct MachineCode *machineHead = programCore->machineHead;
The while() loop is not clearing programCore->machineHead
To fix it, run directly on the head:
while(programCore->machineHead != NULL)
{
...
}
Well, by getting rid of
if(fMachineHead->structure->numOfOperands == 2)
free(fMachineHead->structure->secondOperand);
if(fMachineHead->structure->numOfOperands > 0)
free(fMachineHead->structure->firstOperand);
I have managed to solve the error, but now I am getting a new one -
main.c:242:13: error: request for member ‘symbolHead’ in something not a structure or union
main.c:242:38: error: request for member ‘externHead’ in something not a structure or union
main.c:243:13: error: request for member ‘dataHead’ in something not a structure or union
main.c:244:13: error: request for member ‘machineHead’ in something not a structure or union
Referring to the next line -
programCore.symbolHead = programCore.externHead = programCore.dataHead = programCore.machineHead = NULL;
Is there a problem with the way I wrote that? (Obviously yes, but I just don't see it).
Changed the clear() function again and it seems to be working fine now.
/*Used to empty the linked lists and allocated memory after the program has finished one iteration. */
void Clear(struct Operation *programCore) {
/*f(pointer name) is there to hold a pointer to the allocated memory which is about to be flushed. */
struct MachineCode *machineRowPointer = programCore->machineHead, *fMachineRow;
struct DataCode *dataRowPointer = programCore->dataHead, *fDataRow;
struct symbolStruct *externSymbolPointer = programCore->externHead, *fExtern;
struct symbolStruct *symbolPointer = programCore->symbolHead, *fSymbol;
if(machineRowPointer != NULL) {
while(machineRowPointer != NULL) {
if(machineRowPointer->structure != NULL)
free(machineRowPointer->structure);
fMachineRow = machineRowPointer;
machineRowPointer = machineRowPointer->next;
free(fMachineRow);
}
programCore->machineHead = NULL;
}
if(dataRowPointer != NULL) {
while(dataRowPointer != NULL) {
fDataRow = dataRowPointer;
dataRowPointer = dataRowPointer->next;
free(fDataRow);
}
programCore->dataHead = NULL;
}
if(externSymbolPointer != NULL) {
while(externSymbolPointer != NULL) {
fExtern = externSymbolPointer;
externSymbolPointer = externSymbolPointer->next;
free(fExtern->name);
free(fExtern);
}
programCore->externHead = NULL;
}
if(symbolPointer != NULL) {
while(symbolPointer != NULL) {
fSymbol = symbolPointer;
symbolPointer = symbolPointer->next;
free(fSymbol->name);
free(fSymbol);
}
programCore->symbolHead = NULL;
}
programCore->ic = programCore->dc = programCore->linenumber = programCore->errorflag = 0;
}
How can I read each individual character from a string that is accessed through an array of pointers? In the below code I currently have generated an array of pointers to strings called, symCodes, in my makeCodes function. I want to read the strings 8 characters at a time, I thought about concatenating each string together, then looping through that char by char but the strings in symCodes could be up to 255 characters each, so I feel like that could possibly be too much all to handle at once. Instead, I thought I could read each character from the strings, character by character.
I've tried scanf or just looping through and always end up with seg faults. At the end of headerEncode(), it's near the bottom. I malloc enough memory for each individual string, I try to loop through the array of pointers and print out each individual character but am ending up with a seg fault.
Any suggestions of a different way to read an array of pointers to strings, character by character, up to n amount of characters is appreciated.
EDIT 1: I've updated the program to no longer output warnings when using the -Wall and -W flags. I'm no longer getting a seg fault(yay!) but I'm still unsure of how to go about my question, how can I read an array of pointers to strings, character by character, up to n amount of characters?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
#define FAIL 0
#define SUCCESS 1
/* global 1 day arrays that hold chars and their freqs from file */
unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;
typedef struct HuffmanTreeNode* HTNode;
struct HuffmanTreeNode* globalSortedLL;
/*
struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
char symbol;
unsigned long freq;
char *code;
struct HuffmanTreeNode *left, *right;
struct HuffmanTreeNode* next;
};
/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
unsigned size;
};
/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
newNode->symbol = symbol;
newNode->freq = freq;
newNode->left = newNode->right = NULL;
return newNode;
}
/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/
struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{
struct HuffmanTreeNode* currentNode = node;
if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
{
htnNew->next = currentNode;
return htnNew;
}
else
{
while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
{
currentNode = currentNode->next;
}
htnNew->next = currentNode->next;
currentNode->next = htnNew;
return node;
}
}
int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
if(b->freq < a->freq)
{
return 0;
}
if(a->freq == b->freq)
{
if(a->symbol > b->symbol)
return 1;
return 0;
}
if(b->freq > a->freq)
return 1;
}
struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
struct HuffmanTreeNode* node = *head;
*head = (*head)->next;
return node;
}
/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */
/*
#function:
#param:
#return:
*/
int listLength(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* current = node;
int length = 0;
while(current != NULL)
{
length++;
current = current->next;
}
return length;
}
/*
#function:
#param:
#return:
*/
void printList(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* currentNode = node;
while(currentNode != NULL)
{
if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
printf("=%d", currentNode->symbol);
else
printf("%c", currentNode->symbol);
printf("%lu ", currentNode->freq);
currentNode = currentNode->next;
}
printf("\n");
}
/*
#function:
#param:
#return:
*/
void buildSortedList()
{
int i;
for(i = 0; i < 256; i++)
{
if(!globalFreqs[i] == 0)
{
globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
}
}
printf("Sorted freqs: ");
printList(globalSortedLL);
printf("listL: %d\n", listLength(globalSortedLL));
}
/*
#function: isLeaf()
will test to see if the current node is a leaf or not
#param:
#return
*/
int isLeaf(struct HuffmanTreeNode* node)
{
if((node->left == NULL) && (node->right == NULL))
return SUCCESS;
else
return FAIL;
}
/*where I plan to build the actual huffmantree */
/*
#function:
#param:
#return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
int top = 0;
struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
struct HuffmanTreeNode* head = node;
struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;
while(head->next != NULL)
{
/*grab first two items from linkedL, and remove two items*/
firstNode = popNode(&head);
secondNode = popNode(&head);
/*combine sums, use higher symbol, create new node*/
newChildNode = newNode(secondNode->symbol, (firstNode->freq + secondNode->freq));
newChildNode->left = firstNode;
newChildNode->right = secondNode;
/*insert new node, decrement total symbols in use */
head = insert(head, newChildNode);
}
return head;
}
void printTable(char *codesArray[])
{
int i;
printf("Symbol\tFreq\tCode\n");
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
if(i <= ' ' || i > '~')
{
printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
else
{
printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
}
}
printf("Total chars = %lu\n", totalCount);
}
void makeCodes(
struct HuffmanTreeNode *node, /* Pointer to some tree node */
char *code, /* The *current* code in progress */
char *symCodes[256], /* The array to hold the codes for all the symbols */
int depth) /* How deep in the tree we are (code length) */
{
char *copiedCode;
int i = 0;
if(isLeaf(node))
{
code[depth] = '\0';
symCodes[node->symbol] = code;
return;
}
copiedCode = malloc(255*sizeof(char));
memcpy(copiedCode, code, 255*sizeof(char));
code[depth] = '0';
copiedCode[depth] = '1';
makeCodes(node->left, code, symCodes, depth+1);
makeCodes(node->right, copiedCode, symCodes, depth+1);
}
/*
#function: getFileFreq()
gets the frequencies of each character in the given
file from the command line, this function will also
create two global 1d arrays, one for the currently
used characters in the file, and then one with those
characters frequencies, the two arrays will line up
parallel
#param: FILE* in, FILE* out,
the current file being processed
#return: void
*/
void getFileFreq(FILE* in, FILE* out)
{
unsigned long freqs[256] = {0};
int i, t, fileCh;
while((fileCh = fgetc(in)) != EOF)
{
freqs[fileCh]++;
totalCount++;
}
for(i = 0; i < 256; i++)
{
if(freqs[i] != 0)
{
globalUsedCh[i] = i;
globalFreqs[i] = freqs[i];
if(i <= ' ' || i > '~')
{
globalUniqueSymbols++;
}
else
{
globalUniqueSymbols++;
}
}
}
/* below code until total count is for debugging purposes */
printf("Used Ch: ");
for(t = 0; t < 256; t++)
{
if(globalUsedCh[t] != 0)
{
if(t <= ' ' || t > '~')
{
printf("%d ", globalUsedCh[t]);
}
else
printf("%c ", globalUsedCh[t]);
}
}
printf("\n");
printf("Freq Ch: ");
for(t = 0; t < 256; t++)
{
if(globalFreqs[t] != 0)
{
printf("%lu ", globalFreqs[t]);
}
}
printf("\n");
/* end of code for debugging/vizualazation of arrays*/
printf("Total Count %lu\n", totalCount);
printf("globalArrayLength: %d\n", globalUniqueSymbols);
}
void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
char c;
int i, ch, t, q, b, z;
char *a;
char *fileIn;
unsigned char *uniqueSymbols;
unsigned char *byteStream;
unsigned char *tooManySym = 0;
unsigned long totalEncodedSym;
*uniqueSymbols = globalUniqueSymbols;
totalEncodedSym = ftell(in);
rewind(in);
fileIn = malloc((totalEncodedSym+1)*sizeof(char));
fread(fileIn, totalEncodedSym, 1, in);
if(globalUniqueSymbols == 256)
{
fwrite(tooManySym, 1, sizeof(char), out);
}
else
{
fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
}
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
fwrite(globalUsedCh+i, 1, sizeof(char), out);
fwrite(globalFreqs+i, 8, sizeof(char), out);
}
}
for(t = 0; t < totalEncodedSym; t++)
{
fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
}
for(q = 0; q < totalEncodedSym; q++)
{
symCodes[q] = malloc(255*sizeof(char));
a = symCodes[q];
while(*a != '\0')
printf("%c\n", *(a++));
}
printf("Total encoded symbols: %lu\n", totalEncodedSym);
printf("%s\n", fileIn);
}
void encodeFile(FILE* in, FILE* out)
{
int top = 0;
int i;
char *code;
char *symCodes[256] = {0};
int depth = 0;
code = malloc(255*sizeof(char));
getFileFreq(in, out);
buildSortedList();
makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
printTable(symCodes);
headerEncode(in, out, symCodes);
free(code);
}
/*
void decodeFile(FILE* in, FILE* out)
{
}*/
There are many problems in your code:
[major] function compareTwoNodes does not always return a value. The compiler can detect such problems if instructed to output more warnings.
[major] the member symbol in the HuffmanTreeNode should have type int. Type char is problematic as an index value because it can be signed or unsigned depending on compiler configuration and platform specificities. You assume that char has values from 0 to 255, which is incorrect for most platforms where char actually has a range of -128 .. 127. Use unsigned char or int but cast the char values to unsigned char to ensure proper promotion.
[major] comparison if (globalUniqueSymbols == 256) is always false because globalUniqueSymbols is an unsigned char. The maximum number of possible byte values is indeed 256 for 8-bit bytes, but it does not fit in an unsigned char, make globalUniqueSymbols an int.
[major] *uniqueSymbols = globalUniqueSymbols; in function headerEncode stores globalUniqueSymbols into an uninitialized pointer, definitely undefined behavior, probable segmentation fault.
[major] sizeof(uniqueSymbols) is the size of a pointer, not the size of the array not the size of the type. Instead of hacking it as sizeof(uniqueSymbols)-7, fputc(globalUniqueSymbols, out);
[major] fwrite(tooManySym, 1, sizeof(char), out); is incorrect too, since tooManySym is initialized to 0, ie: it is a NULL pointer. You need a special value to tell that all bytes values are used in the source stream, use 0 for that and write it with fputc(0, out);.
You have nested C style comments before function insert, this is not a bug but error prone and considered bad style.
function newNode should take type unsigned long for freq for consistency.
function buildHuffmanTree has unused local variables: right, top and topNode.
variable i is unused in function makeCodes.
many unused variables in headerEncode: byteStream, c, ch, b...
totalEncodedSym is an unsigned long, use an index of the proper type in the loops where you stop at totalEncodedSym.
unused variables un encodeFile: i, top...
Most of these can be detected by the compiler with the proper warning level: gcc -Wall -W or clang -Weverything...
There are probably also errors in the program logic, but you cannot see these until you fix the major problems above.
I have defined my own type. It contains a pointer to an array, as well as how many items are in that array
struct neighborList
{
unsigned int nNeighbors;
unsigned int* pNeighbors;
};
These get instantiated, populated, and eventually I want to go back through them. Then something very strange happens. I think screenshots are better than words here.
I've shown the next statement to execute. I have an array of the aforementioned data type, and the one under consideration here has 1 neighbor and the address of that 1 neighbor is 0x107a28; Cool. But what actually gets assigned to pLook?
The address is always off by 0x40. Has anyone seen anything like this? Help here is appreciated.
EDIT: Here's the whole thing since several people want to see it.
#include "stdafx.h"
#include <stdlib.h>
#include <time.h>
//#define NVERTEX 875714
#define NVERTEX 9
struct linkedNode
{
unsigned int node;
linkedNode* pNextLinkedNode;
linkedNode* pPrevLinkedNode;
};
struct neighborList
{
unsigned int nNeighbors;
unsigned int* pNeighbors;
};
struct linkedNodeList
{
linkedNode* pHead;
linkedNode* pTail;
};
void populateNeighbors(neighborList* pNeighborList, FILE* fp);
void DFSLoop(neighborList* pNeighborList, linkedNode* pOutput, unsigned int nNodes);
void append(linkedNodeList* pLinkedList, unsigned int node);
void DFSLoop(neighborList* pNeighborList, linkedNodeList* pOutput, unsigned int nNodes)
{
bool* visitedArray;
bool* cashedArray;
unsigned int* leaderArray;
unsigned int* finishingTimes;
unsigned int t = 0;
visitedArray = (bool*)malloc(nNodes*sizeof(bool));
cashedArray = (bool*)malloc(nNodes*sizeof(bool));
leaderArray = (unsigned int*)malloc(nNodes*sizeof(unsigned int));
finishingTimes = (unsigned int*)malloc(nNodes*sizeof(unsigned int));
//initialize all arrays to all false/0
for (unsigned int i = 0; i < nNodes; i++)
{
visitedArray[i] = false;
cashedArray[i] = false;
leaderArray[i] = 0;
finishingTimes[i] = 0;
}
//firstly, pick a starting node and put it on the linkedList
//initialize head and tail
(pOutput->pHead)->node = 1;
(pOutput->pHead)->pNextLinkedNode = NULL;
(pOutput->pHead)->pPrevLinkedNode = NULL;
(pOutput->pTail)->node = 1;
(pOutput->pTail)->pNextLinkedNode = NULL;
(pOutput->pTail)->pPrevLinkedNode = NULL;
unsigned int curNode = (pOutput->pTail)->node;
for (;;)
{
//Start DFS
//#1 If current node under consideration has an unexplored neighbor, make it the new tail and repeat
// If not, current node is cashed. Set it's finishing time, and leader. Work back through the list
// Until you find a node with an unexplored neighbor
unsigned int nNeighbors = pNeighborList[curNode].nNeighbors;
for (unsigned int i = 0; i < nNeighbors; i++)
{
unsigned int* pLook = (pNeighborList[curNode]).pNeighbors;
unsigned int neighbor = pLook[0];
/*
unsigned int nodeUnderConsideration = (pNeighborList[curNode].pNeighbors)[i];
if ( !cashedArray[nodeUnderConsideration])
{
append(pOutput, (pNeighborList[curNode].pNeighbors)[i]);
curNode = (pOutput->pTail)->node;
continue;
}
*/
}
//#2 If you make it back to the head and have no unexplored neighbors, pick new vertex (if unvisited) and repeat
}
free(visitedArray);
free(cashedArray);
free(leaderArray);
free(finishingTimes);
}
int _tmain(int argc, _TCHAR* argv[])
{
//open file
FILE* fp;
FILE* fpRev;
//fp = fopen("SCC.txt", "rb");
//fpRev = fopen("SSCrev.txt", "rb");
fp = fopen("SSCsmall1.txt", "rb");
fpRev = fopen("SSCsmall1rev.txt", "rb");
/* read file. When reading, keep track of how much memory to malloc */
/* for each vertex */
neighborList* pAllEdges;
neighborList* pAllEdgesRev;
pAllEdges = (neighborList*)malloc(NVERTEX*sizeof(neighborList));
pAllEdgesRev = (neighborList*)malloc(NVERTEX*sizeof(neighborList));
populateNeighbors(pAllEdges, fp);
populateNeighbors(pAllEdgesRev, fpRev);
//instantiate pointers for linkedlists needed for DFS
linkedNodeList NodesFirstPass, NodesSecondPass;
NodesFirstPass.pHead = (linkedNode*)malloc(sizeof(linkedNode));
NodesFirstPass.pTail = NodesFirstPass.pHead;
NodesSecondPass.pHead = (linkedNode*)malloc(sizeof(linkedNode));
NodesSecondPass.pTail = NodesSecondPass.pHead;
DFSLoop(pAllEdges, &NodesFirstPass, NVERTEX);
free(pAllEdges);
free(pAllEdgesRev);
return 0;
}
void populateNeighbors(neighborList* pNeighborList, FILE* fp)
{
unsigned int v1 = 1;
unsigned int v2 = 1;
unsigned int v1_next = 1;
unsigned int v2_next = 1;
unsigned int neighbors [1000];
fscanf(fp, "%u", &v1_next);
fscanf(fp, "%u", &v2_next);
for (unsigned int i = 0; i < (NVERTEX - 1); i++)
{
//initialize nNeigbors to 0
unsigned int nNeighbors = 0;
for (;;)
{
//if v1_next is a different vertex then v1, then copy v1_next to v1,
//malloc what we need to, copy over the array and continue
if (v1_next != v1)
{
pNeighborList[i].nNeighbors = nNeighbors;
if (nNeighbors != 0)
{
pNeighborList[i].pNeighbors = (unsigned int*)malloc(nNeighbors * sizeof(unsigned int));
for (unsigned int j = 0; j < nNeighbors; j++)
{
pNeighborList[i].pNeighbors[j] = neighbors[j];
}
}
v1++;
break;
}
//else, increment the neighbor count for this particular vertex and continue
//within this loop, getting new neighbors (edges)
else
{
neighbors[nNeighbors] = v2_next;
nNeighbors++;
if (nNeighbors == 1000)
{
break;
}
fscanf(fp, "%u", &v1_next);
fscanf(fp, "%u", &v2_next);
}
}
}
}
void append(linkedNodeList* pLinkedList, unsigned int node)
{
//make new node with the intention that it's going to be the new tail
linkedNode* pNewNode = (linkedNode*)malloc(sizeof(linkedNode));
pNewNode->node = node;
pNewNode->pNextLinkedNode = NULL;
pNewNode->pPrevLinkedNode = pLinkedList->pTail;
//set next node of current tail to new node
(pLinkedList->pTail)->pNextLinkedNode = pNewNode;
//new tail becomes new node
pLinkedList->pTail = pNewNode;
//lastly, set old tail's next node to point to new tail
(pLinkedList->pTail->pPrevLinkedNode)->pNextLinkedNode = pLinkedList->pTail;
}
Judging by the screenshots, and assuming you are on a 64 bit system (a pointer being 8 bytes wide), the pointer pNeighborList links to the start of the list, while pLook links to the pNeighbors attribute of a neighborList element at index 5:
// assuming sizeof(neighborList) == 4 (int) + 8 (pointer) = 12 bytes
neighborList* pNeighborList = new neighborList[10];
// pNeighborList points to the start of the list, 0x00107a28
// pNeighborList[5] is at address 0x00107a64 (start + 5 * sizeof(neighborList)
// .pNeighbors is offset 4 more bytes (sizeof(unsigned int)) = 0x00107a68
int curNode = 5;
unsigned int* pLook = (pNeighborList[curNode]).pNeighbors;
// pLook points to pNeighbors of the element at index 5, 0x00107a68
When you hover the pointer pNeighborList in Visual Studio, it shows you the pointer (which points to the start of the list), not the full value ((pNeighborList[curNode]).pNeighbors).