How can I read each individual character from a string that is accessed through an array of pointers? In the below code I currently have generated an array of pointers to strings called, symCodes, in my makeCodes function. I want to read the strings 8 characters at a time, I thought about concatenating each string together, then looping through that char by char but the strings in symCodes could be up to 255 characters each, so I feel like that could possibly be too much all to handle at once. Instead, I thought I could read each character from the strings, character by character.
I've tried scanf or just looping through and always end up with seg faults. At the end of headerEncode(), it's near the bottom. I malloc enough memory for each individual string, I try to loop through the array of pointers and print out each individual character but am ending up with a seg fault.
Any suggestions of a different way to read an array of pointers to strings, character by character, up to n amount of characters is appreciated.
EDIT 1: I've updated the program to no longer output warnings when using the -Wall and -W flags. I'm no longer getting a seg fault(yay!) but I'm still unsure of how to go about my question, how can I read an array of pointers to strings, character by character, up to n amount of characters?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
#define FAIL 0
#define SUCCESS 1
/* global 1 day arrays that hold chars and their freqs from file */
unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;
typedef struct HuffmanTreeNode* HTNode;
struct HuffmanTreeNode* globalSortedLL;
/*
struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
char symbol;
unsigned long freq;
char *code;
struct HuffmanTreeNode *left, *right;
struct HuffmanTreeNode* next;
};
/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
unsigned size;
};
/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
newNode->symbol = symbol;
newNode->freq = freq;
newNode->left = newNode->right = NULL;
return newNode;
}
/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/
struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{
struct HuffmanTreeNode* currentNode = node;
if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
{
htnNew->next = currentNode;
return htnNew;
}
else
{
while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
{
currentNode = currentNode->next;
}
htnNew->next = currentNode->next;
currentNode->next = htnNew;
return node;
}
}
int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
if(b->freq < a->freq)
{
return 0;
}
if(a->freq == b->freq)
{
if(a->symbol > b->symbol)
return 1;
return 0;
}
if(b->freq > a->freq)
return 1;
}
struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
struct HuffmanTreeNode* node = *head;
*head = (*head)->next;
return node;
}
/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */
/*
#function:
#param:
#return:
*/
int listLength(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* current = node;
int length = 0;
while(current != NULL)
{
length++;
current = current->next;
}
return length;
}
/*
#function:
#param:
#return:
*/
void printList(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* currentNode = node;
while(currentNode != NULL)
{
if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
printf("=%d", currentNode->symbol);
else
printf("%c", currentNode->symbol);
printf("%lu ", currentNode->freq);
currentNode = currentNode->next;
}
printf("\n");
}
/*
#function:
#param:
#return:
*/
void buildSortedList()
{
int i;
for(i = 0; i < 256; i++)
{
if(!globalFreqs[i] == 0)
{
globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
}
}
printf("Sorted freqs: ");
printList(globalSortedLL);
printf("listL: %d\n", listLength(globalSortedLL));
}
/*
#function: isLeaf()
will test to see if the current node is a leaf or not
#param:
#return
*/
int isLeaf(struct HuffmanTreeNode* node)
{
if((node->left == NULL) && (node->right == NULL))
return SUCCESS;
else
return FAIL;
}
/*where I plan to build the actual huffmantree */
/*
#function:
#param:
#return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
int top = 0;
struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
struct HuffmanTreeNode* head = node;
struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;
while(head->next != NULL)
{
/*grab first two items from linkedL, and remove two items*/
firstNode = popNode(&head);
secondNode = popNode(&head);
/*combine sums, use higher symbol, create new node*/
newChildNode = newNode(secondNode->symbol, (firstNode->freq + secondNode->freq));
newChildNode->left = firstNode;
newChildNode->right = secondNode;
/*insert new node, decrement total symbols in use */
head = insert(head, newChildNode);
}
return head;
}
void printTable(char *codesArray[])
{
int i;
printf("Symbol\tFreq\tCode\n");
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
if(i <= ' ' || i > '~')
{
printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
else
{
printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
}
}
printf("Total chars = %lu\n", totalCount);
}
void makeCodes(
struct HuffmanTreeNode *node, /* Pointer to some tree node */
char *code, /* The *current* code in progress */
char *symCodes[256], /* The array to hold the codes for all the symbols */
int depth) /* How deep in the tree we are (code length) */
{
char *copiedCode;
int i = 0;
if(isLeaf(node))
{
code[depth] = '\0';
symCodes[node->symbol] = code;
return;
}
copiedCode = malloc(255*sizeof(char));
memcpy(copiedCode, code, 255*sizeof(char));
code[depth] = '0';
copiedCode[depth] = '1';
makeCodes(node->left, code, symCodes, depth+1);
makeCodes(node->right, copiedCode, symCodes, depth+1);
}
/*
#function: getFileFreq()
gets the frequencies of each character in the given
file from the command line, this function will also
create two global 1d arrays, one for the currently
used characters in the file, and then one with those
characters frequencies, the two arrays will line up
parallel
#param: FILE* in, FILE* out,
the current file being processed
#return: void
*/
void getFileFreq(FILE* in, FILE* out)
{
unsigned long freqs[256] = {0};
int i, t, fileCh;
while((fileCh = fgetc(in)) != EOF)
{
freqs[fileCh]++;
totalCount++;
}
for(i = 0; i < 256; i++)
{
if(freqs[i] != 0)
{
globalUsedCh[i] = i;
globalFreqs[i] = freqs[i];
if(i <= ' ' || i > '~')
{
globalUniqueSymbols++;
}
else
{
globalUniqueSymbols++;
}
}
}
/* below code until total count is for debugging purposes */
printf("Used Ch: ");
for(t = 0; t < 256; t++)
{
if(globalUsedCh[t] != 0)
{
if(t <= ' ' || t > '~')
{
printf("%d ", globalUsedCh[t]);
}
else
printf("%c ", globalUsedCh[t]);
}
}
printf("\n");
printf("Freq Ch: ");
for(t = 0; t < 256; t++)
{
if(globalFreqs[t] != 0)
{
printf("%lu ", globalFreqs[t]);
}
}
printf("\n");
/* end of code for debugging/vizualazation of arrays*/
printf("Total Count %lu\n", totalCount);
printf("globalArrayLength: %d\n", globalUniqueSymbols);
}
void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
char c;
int i, ch, t, q, b, z;
char *a;
char *fileIn;
unsigned char *uniqueSymbols;
unsigned char *byteStream;
unsigned char *tooManySym = 0;
unsigned long totalEncodedSym;
*uniqueSymbols = globalUniqueSymbols;
totalEncodedSym = ftell(in);
rewind(in);
fileIn = malloc((totalEncodedSym+1)*sizeof(char));
fread(fileIn, totalEncodedSym, 1, in);
if(globalUniqueSymbols == 256)
{
fwrite(tooManySym, 1, sizeof(char), out);
}
else
{
fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
}
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
fwrite(globalUsedCh+i, 1, sizeof(char), out);
fwrite(globalFreqs+i, 8, sizeof(char), out);
}
}
for(t = 0; t < totalEncodedSym; t++)
{
fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
}
for(q = 0; q < totalEncodedSym; q++)
{
symCodes[q] = malloc(255*sizeof(char));
a = symCodes[q];
while(*a != '\0')
printf("%c\n", *(a++));
}
printf("Total encoded symbols: %lu\n", totalEncodedSym);
printf("%s\n", fileIn);
}
void encodeFile(FILE* in, FILE* out)
{
int top = 0;
int i;
char *code;
char *symCodes[256] = {0};
int depth = 0;
code = malloc(255*sizeof(char));
getFileFreq(in, out);
buildSortedList();
makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
printTable(symCodes);
headerEncode(in, out, symCodes);
free(code);
}
/*
void decodeFile(FILE* in, FILE* out)
{
}*/
There are many problems in your code:
[major] function compareTwoNodes does not always return a value. The compiler can detect such problems if instructed to output more warnings.
[major] the member symbol in the HuffmanTreeNode should have type int. Type char is problematic as an index value because it can be signed or unsigned depending on compiler configuration and platform specificities. You assume that char has values from 0 to 255, which is incorrect for most platforms where char actually has a range of -128 .. 127. Use unsigned char or int but cast the char values to unsigned char to ensure proper promotion.
[major] comparison if (globalUniqueSymbols == 256) is always false because globalUniqueSymbols is an unsigned char. The maximum number of possible byte values is indeed 256 for 8-bit bytes, but it does not fit in an unsigned char, make globalUniqueSymbols an int.
[major] *uniqueSymbols = globalUniqueSymbols; in function headerEncode stores globalUniqueSymbols into an uninitialized pointer, definitely undefined behavior, probable segmentation fault.
[major] sizeof(uniqueSymbols) is the size of a pointer, not the size of the array not the size of the type. Instead of hacking it as sizeof(uniqueSymbols)-7, fputc(globalUniqueSymbols, out);
[major] fwrite(tooManySym, 1, sizeof(char), out); is incorrect too, since tooManySym is initialized to 0, ie: it is a NULL pointer. You need a special value to tell that all bytes values are used in the source stream, use 0 for that and write it with fputc(0, out);.
You have nested C style comments before function insert, this is not a bug but error prone and considered bad style.
function newNode should take type unsigned long for freq for consistency.
function buildHuffmanTree has unused local variables: right, top and topNode.
variable i is unused in function makeCodes.
many unused variables in headerEncode: byteStream, c, ch, b...
totalEncodedSym is an unsigned long, use an index of the proper type in the loops where you stop at totalEncodedSym.
unused variables un encodeFile: i, top...
Most of these can be detected by the compiler with the proper warning level: gcc -Wall -W or clang -Weverything...
There are probably also errors in the program logic, but you cannot see these until you fix the major problems above.
Related
So I have an assignment to create a program in c that reads a couple of sentences(a 140mb file), and based on the 2nd input, which is a number, I need to return the Nth most common word. My idea was to build a hash table with linear probing, every time I get a new element I hash it accordingly based its position and based on djb2, else if there is a collision I rehash. After that, I apply Quicksort based on the occurrence and then I finally access by index.
I am having issues finishing up a hash table with linear probing in c. I am pretty sure I have finished it but every time I run I am getting a heap buffer overflow on lldb. I tried to spot the issue but I still cannot figure it out.
Am I getting out of memory on stack? The file is relatively small to consume so much memory.
I used address sanitiser and I got a heap-buffer-overflow on inserting.
I don't think I am touching the memory outside the allocate region but I am not 100% sure.
Any idea what has gone wrong? This is the table.c implementation and below that you can see the form of the struct.
Here is a more detailed message from address sanitiser:
thread #1: tid = 0x148b44, 0x0000000100166b20 libclang_rt.asan_osx_dynamic.dylib`__asan::AsanDie(), queue = 'com.apple.main-thread', stop reason = Heap buffer overflow
{
"access_size": 1,
"access_type": 1,
"address": 105690555220216,
"description": "heap-buffer-overflow",
"instrumentation_class": "AddressSanitizer",
"pc": 4294981434,
"stop_type": "fatal_error"
}
table.c :
#include "table.h"
#include "entities.h"
static inline entry_t* entryInit(const char* const value){
unsigned int len = strlen(value);
entry_t* entry = malloc(sizeof(entry));
entry->value = malloc(sizeof(char*) * len);
strncpy(entry->value, value, strlen(value));
entry->exists = 1;
entry->occurence = 1;
return entry;
}
table_t* tableInit(const unsigned int size){
table_t* table = malloc(sizeof(table_t));
table->entries = malloc(size*sizeof(entry_t));
table->seed = getPrime();
table->size = size;
table->usedEntries = 0U;
return table;
}
//okay, there is definitely an issue here
table_t* tableResize(table_t* table, const unsigned int newSize){
//most likely wont happen but if there is an overflow then we have a problem
if(table->size > newSize) return NULL;
//create a temp array of the realloced array, then do changes there
entry_t* temp = calloc(newSize,sizeof(entry_t));
table->size = newSize;
//temp pointer to an entry
entry_t *tptr = NULL;
unsigned int pos = 0;
unsigned int index = 0;
while(pos != table->size){
tptr = &table->entries[pos];
if(tptr->exists == 1){
index = hashString(table->seed, tptr->value, table->size, pos);
temp[index] = *entryInit(tptr->value);
temp[index].occurence = tptr->occurence;
break;
}
else pos++;
}
table->entries = temp;
//TODO: change table destroy to free the previous array from the table
free(temp);
return table;
}
//insert works fine, it is efficient enough to add something in the table
unsigned int tableInsert(table_t* table,const char* const value){
//decide when to resize, might create a large enough array to bloat the memory?
if(table->usedEntries >(unsigned int)(2*(table->size/3))) table = tableResize(table, table->size*2);
entry_t* entry = NULL;
unsigned int index;
auto int position = 0;
while(position != table->size){
//calculate the hash of our string as a function of the current position on the table
index = hashString(table->seed,value,table->size, position);
entry = &table->entries[index];
if(entry->exists == 0){
*entry = *entryInit(value);
table->usedEntries++;
return index;
} else if (entry->exists == 1 && strcmp(entry->value, value) == 0){
entry->occurence++;
return index;
} else{
position++;
}
}
}
//there might be an issue here
static inline void tableDestroy(const table_t* const table){
entry_t* entry = NULL;
for (auto int i = 0; i < table->size; ++i){
entry =&table->entries[i];
//printf("Value: %s Occurence: %d Exists: %d \n",entry->value, entry->occurence, entry->exists );
if(&table->entries[i] !=NULL)free(&table->entries[i]);
}
free(table);
}
entities.h :
#pragma once
typedef struct __attribute__((packed)) __entry {
char *value;
unsigned int exists : 1;
unsigned int occurence;
} entry_t;
typedef struct __table {
int size;
int usedEntries;
entry_t *entries;
unsigned int seed;
} table_t;
here is how I read from a file and process the text:
void readFromFile(const char* const fileName, table_t* table){
FILE *fp = fopen(fileName, "r");
if(!fp) fprintf(stderr,"error reading file. \n");
char word[15];//long enough to hold the biggest word in the text?
int position = 0;
char ch;
while((ch = fgetc(fp))!= EOF){
//discard all the ascii chars that are not letters
if(!(ch >= 65 && ch <= 90) && !(ch >= 97 && ch <= 122)){
word[position]= '\0';
if(word[0] == NULL)continue;
tableInsert(table, word);
position = 0;
continue;
}
else word[position++] = ch;
}
}
Any suggestions what is wrong with my code?
I believe resize might have an issue and I am not properly deleting yet because I have had a lot of problems with the memory management.
Thanks in advance!
Hi I am attempting to implement a really simple hashmap in regular C with a string as key and a void pointer as value as I wish to use the map for multiple data types.
So far I have this
struct node{
void * value;
char * key;
};
unsigned long strhash(char *string)
{
unsigned long hash = 5381;
int c;
while ((c = *string++))
{
hash = ((hash << 5) + hash) + c;
}
return hash;
}
map_t *map_create(int maxSize){
map_t *map = malloc(sizeof(map_t));
map->curSize = 0;
map->maxSize = maxSize;
map->nodes = calloc(map->maxSize, sizeof(node_t *));
return map;
}
node_t *node_create(char *key, void *value){
node_t *node = malloc(sizeof(node_t));
node->key = key;
node->value = value;
return node;
}
void map_insert(map_t *map, char *key, void *value){
node_t *node = node_create(key, value);
int idx = strhash(key) % map->maxSize;
if(map->nodes[idx] == NULL){
map->nodes[idx] = node;
}else{
while(map->nodes[idx] != NULL){
idx++%map->maxSize;
}
map->nodes[idx] = node;
}
return;
}
void map_print(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
printf("index: %d\t value: %d\n",i, *(int*)map->nodes[i]->value);
}
}
return;
}
void map_destroy(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
free(map->nodes[i]);
}
}
free(map->nodes);
free(map);
return;
}
int main(){
map_t *map = map_create(32);
for(int i = 0; i < 30; i++){
map_insert(map, (char*)&i, &i);
}
map_print(map);
map_destroy(map);
return 0;
}
The problem is the output is not as I'd expect when the map gets printed all that is retrieved is the value "30" on all indexes which is the last number inserted into the map. If I change the value to type int the map works as expected, so is there must be something crucial I am missing in regards to pointers.
I am not the greatest at C so any light which could be shed on this would be most appreciated.
The problem is that you're using the same pointer every time you call map_insert(). It just stores the pointer, it doesn't copy the data. Each time through the loop you change the contents of that memory, so all the hash map elements point to that same value.
There are two ways you can fix it. One way is to always make a dynamically-allocated copy of the data before calling map_insert():
for (int i = 0; i < 30; i++) {
int *i_copy = malloc(sizeof *i_copy);
*i_copy = i;
map_insert(map, (char *)i_copy, (char *)i_copy);
}
The other option is to add the size of the value to the map_insert() and node_create() arguments. Then node_create call malloc() and memcpy() to copy the value to dynamic memory.
BTW, there's another problem. The key is supposed to be a null-terminated string (strhash() depends on this), but you're using &i, which is a pointer to an integer. Casting a pointer to an integer to char* doesn't return a string, it just returns a pointer to the same location with a different data type. I haven't fixed this above.
OP stores a reference to the same value, so of course all lookups yield the same value (which is not even a string, but whatever the storage representation of the value of the variable i happens to be).
I prefer chaining the hash map entries, and keeping a copy of the hash in the entry:
struct entry {
struct entry *next;
size_t hash;
void *data;
size_t data_size;
int data_type;
unsigned char name[];
};
typedef struct {
size_t size;
size_t used; /* Number of entries, total */
struct entry **slot; /* Array of entry pointers */
size_t (*hash)(const unsigned char *, size_t);
} hashmap;
int hashmap_new(hashmap *hmap, const size_t size,
size_t (*hash)(const unsigned char *, size_t))
{
if (!hmap)
return -1; /* No hashmap specified */
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
if (size < 1)
return -1; /* Invalid size */
if (!hash)
return -1; /* No hash function specified. */
hmap->slot = calloc(size, sizeof hmap->slot[0]);
if (!hmap->slot)
return -1; /* Not enough memory */
hmap->size = size;
hmap->hash = hash;
return 0;
}
void hashmap_free(hashmap *hmap)
{
if (hmap) {
size_t i = hmap->size;
while (i-->0) {
struct entry *next = hmap->slot[i];
struct entry *curr;
while (next) {
curr = next;
next = next->next;
free(curr->data);
/* Poison the entry, to help detect use-after-free bugs. */
curr->next = NULL;
curr->data = NULL;
curr->hash = 0;
curr->data_size = 0;
curr->data_type = 0;
curr->name[0] = '\0';
free(curr);
}
}
}
free(hmap->slot);
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
}
To insert a key-value pair, the function either uses the data specified as-is, in which case it's the caller's responsibility to ensure each key has their own unique data not overwritten later; or we copy the user data. In the above hashmap_free() function, you'll see free(curr->data);; it assumes we allocated memory dynamically, and copied the user data there. So:
int hashmap_add(hashmap *hmap, const unsigned char *name,
const void *data, const size_t data_size,
const int data_type)
{
const size_t namelen = (name) ? strlen(name) : 0;
struct entry *curr;
size_t i;
if (!hmap)
return -1; /* No hashmap specified. */
if (name_len < 1)
return -1; /* NULL or empty name. */
/* Allocate memory for the hashmap entry,
including enough room for the name, and end of string '\0'. */
curr = malloc(sizeof (struct entry) + namelen + 1;
if (!curr)
return -1; /* Out of memory. */
/* Copy data, if any. */
if (data_size > 0) {
curr->data = malloc(data_size);
if (!curr->data) {
free(curr);
return -1; /* Out of memory. */
}
memcpy(curr->data, data, data_size);
} else {
curr->data = NULL;
curr->data_size = 0;
}
curr->data_type = data_type;
/* Calculate the hash of the name. */
curr->hash = hmap->hash(name, namelen);
/* Copy name, including the trailing '\0'. */
memcpy(curr->name, name, namelen + 1);
/* Slot to prepend to. */
i = curr->hash % hmap->size;
curr->next = hmap->slot[i];
hmap->slot[i] = curr;
/* An additional node added. */
hmap->used++;
return 0;
}
The meaning of data_type is completely up to the user of the code.
Lookup can be made based on the hash and the data type:
/* Returns 0 if found. */
int hashmap_find(hashmap *hmap, const unsigned char *name,
const int data_type,
void **dataptr_to, size_t *size_to)
{
struct entry *curr;
size_t hash;
if (size_to)
*size_to = 0;
if (dataptr_to)
*dataptr_to = NULL;
if (!hmap)
return -1; /* No hashmap specified. */
if (!name || !*name)
return -1; /* NULL or empty name. */
hash = hmap->hash(name, strlen(name));
curr = hmap->slot[hash % hmap->size];
for (curr = hmap->slot[hash % hmap->size]; curr != NULL; curr = curr->next) {
if (curr->data_type == data_type && curr->hash == hash &&
!strcmp(curr->name, name)) {
/* Data type an name matches. Save size if requested. */
if (size_to)
*size_to = curr->data_size;
if (dataptr_to)
*dataptr_to = curr->data;
return 0; /* Found. */
}
}
return -1; /* Not found. */
}
The above lookup returns 0 if found, and nonzero if error or not found. (This way, even zero-size NULL data can be stored in the hash map.)
If the number of data types supported is small, say 32, then using an unsigned int with each bit (1U<<0 == 1, 1U<<1 == 2, 1U<<2 == 4, and so on) reserved for a specific type, you can do the lookup using a mask, allowing only the specified types. Similarly, the data_type can be a mask, describing which types the value can be interpreted as (almost always will have just one bit set).
This scheme also allows one to dynamically resize the hashmap, by allocating a new slot array of pointers, and moving each old entry to the new one. The keys don't need to be rehashed, because the original hash is stored in each entry. For lookup efficiency, the chains (hanging off each slot) should be as short as possible. A common "rule of thumb" is that hashmap->size should be between hashmap->used and 2 * hashmap->used.
When you call map_insert(map, (char*)&i, &i); the value inserted into hasmap is the pointer to i variable, i.e. its address in memory, and not the value of i.
So when you change i value inside the for loop there is the side-effect to all entries into the hashmap, and at the end of the loop you only see the last value assigned.
I have defined my own type. It contains a pointer to an array, as well as how many items are in that array
struct neighborList
{
unsigned int nNeighbors;
unsigned int* pNeighbors;
};
These get instantiated, populated, and eventually I want to go back through them. Then something very strange happens. I think screenshots are better than words here.
I've shown the next statement to execute. I have an array of the aforementioned data type, and the one under consideration here has 1 neighbor and the address of that 1 neighbor is 0x107a28; Cool. But what actually gets assigned to pLook?
The address is always off by 0x40. Has anyone seen anything like this? Help here is appreciated.
EDIT: Here's the whole thing since several people want to see it.
#include "stdafx.h"
#include <stdlib.h>
#include <time.h>
//#define NVERTEX 875714
#define NVERTEX 9
struct linkedNode
{
unsigned int node;
linkedNode* pNextLinkedNode;
linkedNode* pPrevLinkedNode;
};
struct neighborList
{
unsigned int nNeighbors;
unsigned int* pNeighbors;
};
struct linkedNodeList
{
linkedNode* pHead;
linkedNode* pTail;
};
void populateNeighbors(neighborList* pNeighborList, FILE* fp);
void DFSLoop(neighborList* pNeighborList, linkedNode* pOutput, unsigned int nNodes);
void append(linkedNodeList* pLinkedList, unsigned int node);
void DFSLoop(neighborList* pNeighborList, linkedNodeList* pOutput, unsigned int nNodes)
{
bool* visitedArray;
bool* cashedArray;
unsigned int* leaderArray;
unsigned int* finishingTimes;
unsigned int t = 0;
visitedArray = (bool*)malloc(nNodes*sizeof(bool));
cashedArray = (bool*)malloc(nNodes*sizeof(bool));
leaderArray = (unsigned int*)malloc(nNodes*sizeof(unsigned int));
finishingTimes = (unsigned int*)malloc(nNodes*sizeof(unsigned int));
//initialize all arrays to all false/0
for (unsigned int i = 0; i < nNodes; i++)
{
visitedArray[i] = false;
cashedArray[i] = false;
leaderArray[i] = 0;
finishingTimes[i] = 0;
}
//firstly, pick a starting node and put it on the linkedList
//initialize head and tail
(pOutput->pHead)->node = 1;
(pOutput->pHead)->pNextLinkedNode = NULL;
(pOutput->pHead)->pPrevLinkedNode = NULL;
(pOutput->pTail)->node = 1;
(pOutput->pTail)->pNextLinkedNode = NULL;
(pOutput->pTail)->pPrevLinkedNode = NULL;
unsigned int curNode = (pOutput->pTail)->node;
for (;;)
{
//Start DFS
//#1 If current node under consideration has an unexplored neighbor, make it the new tail and repeat
// If not, current node is cashed. Set it's finishing time, and leader. Work back through the list
// Until you find a node with an unexplored neighbor
unsigned int nNeighbors = pNeighborList[curNode].nNeighbors;
for (unsigned int i = 0; i < nNeighbors; i++)
{
unsigned int* pLook = (pNeighborList[curNode]).pNeighbors;
unsigned int neighbor = pLook[0];
/*
unsigned int nodeUnderConsideration = (pNeighborList[curNode].pNeighbors)[i];
if ( !cashedArray[nodeUnderConsideration])
{
append(pOutput, (pNeighborList[curNode].pNeighbors)[i]);
curNode = (pOutput->pTail)->node;
continue;
}
*/
}
//#2 If you make it back to the head and have no unexplored neighbors, pick new vertex (if unvisited) and repeat
}
free(visitedArray);
free(cashedArray);
free(leaderArray);
free(finishingTimes);
}
int _tmain(int argc, _TCHAR* argv[])
{
//open file
FILE* fp;
FILE* fpRev;
//fp = fopen("SCC.txt", "rb");
//fpRev = fopen("SSCrev.txt", "rb");
fp = fopen("SSCsmall1.txt", "rb");
fpRev = fopen("SSCsmall1rev.txt", "rb");
/* read file. When reading, keep track of how much memory to malloc */
/* for each vertex */
neighborList* pAllEdges;
neighborList* pAllEdgesRev;
pAllEdges = (neighborList*)malloc(NVERTEX*sizeof(neighborList));
pAllEdgesRev = (neighborList*)malloc(NVERTEX*sizeof(neighborList));
populateNeighbors(pAllEdges, fp);
populateNeighbors(pAllEdgesRev, fpRev);
//instantiate pointers for linkedlists needed for DFS
linkedNodeList NodesFirstPass, NodesSecondPass;
NodesFirstPass.pHead = (linkedNode*)malloc(sizeof(linkedNode));
NodesFirstPass.pTail = NodesFirstPass.pHead;
NodesSecondPass.pHead = (linkedNode*)malloc(sizeof(linkedNode));
NodesSecondPass.pTail = NodesSecondPass.pHead;
DFSLoop(pAllEdges, &NodesFirstPass, NVERTEX);
free(pAllEdges);
free(pAllEdgesRev);
return 0;
}
void populateNeighbors(neighborList* pNeighborList, FILE* fp)
{
unsigned int v1 = 1;
unsigned int v2 = 1;
unsigned int v1_next = 1;
unsigned int v2_next = 1;
unsigned int neighbors [1000];
fscanf(fp, "%u", &v1_next);
fscanf(fp, "%u", &v2_next);
for (unsigned int i = 0; i < (NVERTEX - 1); i++)
{
//initialize nNeigbors to 0
unsigned int nNeighbors = 0;
for (;;)
{
//if v1_next is a different vertex then v1, then copy v1_next to v1,
//malloc what we need to, copy over the array and continue
if (v1_next != v1)
{
pNeighborList[i].nNeighbors = nNeighbors;
if (nNeighbors != 0)
{
pNeighborList[i].pNeighbors = (unsigned int*)malloc(nNeighbors * sizeof(unsigned int));
for (unsigned int j = 0; j < nNeighbors; j++)
{
pNeighborList[i].pNeighbors[j] = neighbors[j];
}
}
v1++;
break;
}
//else, increment the neighbor count for this particular vertex and continue
//within this loop, getting new neighbors (edges)
else
{
neighbors[nNeighbors] = v2_next;
nNeighbors++;
if (nNeighbors == 1000)
{
break;
}
fscanf(fp, "%u", &v1_next);
fscanf(fp, "%u", &v2_next);
}
}
}
}
void append(linkedNodeList* pLinkedList, unsigned int node)
{
//make new node with the intention that it's going to be the new tail
linkedNode* pNewNode = (linkedNode*)malloc(sizeof(linkedNode));
pNewNode->node = node;
pNewNode->pNextLinkedNode = NULL;
pNewNode->pPrevLinkedNode = pLinkedList->pTail;
//set next node of current tail to new node
(pLinkedList->pTail)->pNextLinkedNode = pNewNode;
//new tail becomes new node
pLinkedList->pTail = pNewNode;
//lastly, set old tail's next node to point to new tail
(pLinkedList->pTail->pPrevLinkedNode)->pNextLinkedNode = pLinkedList->pTail;
}
Judging by the screenshots, and assuming you are on a 64 bit system (a pointer being 8 bytes wide), the pointer pNeighborList links to the start of the list, while pLook links to the pNeighbors attribute of a neighborList element at index 5:
// assuming sizeof(neighborList) == 4 (int) + 8 (pointer) = 12 bytes
neighborList* pNeighborList = new neighborList[10];
// pNeighborList points to the start of the list, 0x00107a28
// pNeighborList[5] is at address 0x00107a64 (start + 5 * sizeof(neighborList)
// .pNeighbors is offset 4 more bytes (sizeof(unsigned int)) = 0x00107a68
int curNode = 5;
unsigned int* pLook = (pNeighborList[curNode]).pNeighbors;
// pLook points to pNeighbors of the element at index 5, 0x00107a68
When you hover the pointer pNeighborList in Visual Studio, it shows you the pointer (which points to the start of the list), not the full value ((pNeighborList[curNode]).pNeighbors).
Edit:
Hash.c is updated with revisions from the comments, I am still getting a Seg fault. I must be missing something here that you guys are saying
I have created a hash table ADT using C but I am encountering a segmentation fault when I try to call a function (find_hash) in the ADT.
I have posted all 3 files that I created parse.c, hash.c, and hash.h, so you can see all of the variables. We are reading from the file gettysburg.txt which is also attached
The seg fault is occuring in parse.c when I call find_hash. I cannot figure out for the life of me what is going on here. If you need anymore information I can surely provide it.
sorry for the long amount of code I have just been completely stumped for a week now on this. Thanks in advance
The way I run the program is first:
gcc -o parse parse.c hash.c
then: cat gettysburg.txt | parse
Parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)malloc(sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
cur2 = next_hash_walk(t);
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)malloc(sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
t->sorted_array = NULL;
t->index=0;
t->sort_num=0;
for(i=0;i<size;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
hash_entry *cur;
for(i = 0; i<(table->size);i++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
free(cur->key); //Freeing memory for key and data
free(cur->data);
}
free(table->buckets[i]); //free the whole bucket
}}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)malloc(sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
if(strcmp(table->buckets[i]->key, key) == 0)
return((table->buckets[i]->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size]; //creates an array, same size as table->size(# of buckets)
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
count ++;}
node_num[i] = count;
count = 0;}
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) / (table->size);
}
void *start_hash_walk(Phash_table table){
Phash_table temp = table;
int i, j, k;
hash_entry *cur; //CHANGE IF NEEDED to HASH_TABLE *
if(table->sorted_array != NULL) free(table->sorted_array);
table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));
for(i = 0; i < table->total; i++){
if(table->buckets[i]!=NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
table->sorted_array[i] = table->buckets[i]->data;
}}
}
for(j = (table->total) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp -> buckets[0]-> data = table->sorted_array[k-1];
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp->buckets[0] -> data;
}
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
table->sort_num ++;
return table->sorted_array[table->sort_num];
}
hash.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_entry_ { //Linked List
void *data; //Generic pointer
char *key; //String-based key value
struct hash_entry_ *next; //Self-Referencing pointer
} hash_entry, *Phash_entry;
typedef struct hash_table_ {
hash_entry **buckets; //Pointer to a pointer to a Linked List of type hash_entry
int (*hash_func)(char *);
int (*cmp_func)(void *, void *);
int size;
void **sorted_array; //Array used to sort each hash entry
int index;
int total;
int largest;
float average;
int sort_num;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
Gettysburg.txt
Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.
Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.
We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.
It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.
It's possible that one of several problems with this code are loops like:
for(table->buckets[i];
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
The initializing part of the for loop (table->buckets[i]) has no effect. If i is 0 and table->buckets[0] == NULL, then the condition on this loop (table->buckets[i]->next != NULL) will dereference a null pointer and crash.
That's where your code seemed to be crashing for on my box, at least. When I changed several of your loops to:
if (table->buckets[i] != NULL) {
for(;
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
}
...it kept crashing, but in a different place. Maybe that will help get you unstuck?
Edit: another potential problem is that those for loops are destructive. When you call find_hash, do you really want all of those buckets to be modified?
I'd suggest using something like:
hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
// ...
}
}
When I do that and comment out your dump_dictionary function, your code runs without crashing.
Hmm,
here's hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)calloc(1, sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *)); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
for(i=0;t->buckets[i] != NULL;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
for(i = 0; i<(table->size);i++){
if(table->buckets[i]!=NULL)
for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
free(table->buckets[i]->key); //Freeing memory for key and data
free(table->buckets[i]->data);
}
free(table->buckets[i]); //free the whole bucket
}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
if(strcmp(cur->key, key) == 0)
return((cur->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size];
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++)
{
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
count ++;}
node_num[i] = count;
count = 0;
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}
void *start_hash_walk(Phash_table table){
void* temp = 0; //oook: this was another way of overwriting your input table
int i, j, k;
int l=0; //oook: new counter for elements in your sorted_array
hash_entry *cur;
if(table->sorted_array !=NULL) free(table->sorted_array);
table->sorted_array = (void**)calloc((table->total), sizeof(void*));
for(i = 0; i < table->size; i ++){
//for(i = 0; i < table->total; i++){ //oook: i don't think you meant total ;)
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
table->sorted_array[l++] = cur->data;
}
}
//oook: sanity check/assert on expected values
if (l != table->total)
{
printf("oook: l[%d] != table->total[%d]\n",l,table->total);
}
for(j = (l) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if (table->sorted_array[k-1] && table->sorted_array[k])
{
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp;
}
}
else
printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
/*oook: this was blowing up since you were incrementing past the size of sorted_array..
NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
//table->sort_num++
return table->sorted_array[table->sort_num++];
}
here's parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h> //oook: added so you can assert ;)
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)calloc(1,sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
dictionary_size++;
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
if (!cur) //ook: do test or assert for null values
{
printf("oook: null== (cur = start_hash_walk)\n");
exit(-1);
}
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
{//oook: i think you needed these braces
cur2 = next_hash_walk(t);
if (!cur2) //ook: do test or assert for null values
{
printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
}
else
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}//oook: i think you needed these braces
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
/* ooook: do assert on programmatic errors.
this function *requires non-null inputs. */
assert(cur1 && cur2);
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
follow the //ooks
Explanation:
There were one or two places this was going to blow up in.
The quick fix and answer to your question was in parse.c, circa L100:
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
..checking that cur is not null before calling printf fixes your immediate seg-fault.
But why would cur be null ? ~because of this bad-boy:
void *start_hash_walk(Phash_table table)
Your hash_func(char *string) can (& does) return non-unique values. This is of course ok except that you have not yet implemented your linked list chains. Hence you end up with table->sorted_array containing less than table->total elements ~or you would if you were iterating over all table->size buckets ;)
There are one or two other issues.
For now i hacked Nate Kohl's for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next) further, to be for(cur=table->buckets[i]; cur != NULL; cur=cur->next) since you have no chains. But this is *your TODO so enough said about that.
Finally. note that in next_hash_walk(Phash_table table) you have:
table->sort_num++
return table->sorted_array[table->sort_num];
Ouch! Do check those array bounds!
Notes
1) If you're function isn't designed to change input, then make the input const. That way the compiler may well tell you when you're inadvertently trashing something.
2) Do bound checking on your array indices.
3) Do test/assert for Null pointers before attempting to use them.
4) Do unit test each of your functions; never write too much code before compiling & testing.
5) Use minimal test-data; craft it such that it limit-tests your code & attempts to break it in cunning ways.
6) Do initialise you data structures!
7)Never use egyptian braces ! {
only joking ;)
}
PS Good job so far ~> pointers are tricky little things! & a well asked question with all the necessary details so +1 and gl ;)
(//oook: maybe add a homework tag)
I am trying to write a Huffman encoding program to compress a text file. Upon completetion, the program will terminate at the return statement, or when I attempt to close a file I was reading from. I assume I have memory leaks, but I cannot find them. If you can spot them, let me know (and a method for fixing them would be appreciated!).
(note: small1.txt is any standard text file)
Here is the main program
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define ASCII 255
struct link {
int freq;
char ch[ASCII];
struct link* right;
struct link* left;
};
typedef struct link node;
typedef char * string;
FILE * ofp;
FILE * ifp;
int writebit(unsigned char);
void sort(node *[], int);
node* create(char[], int);
void sright(node *[], int);
void Assign_Code(node*, int[], int, string *);
void Delete_Tree(node *);
int main(int argc, char *argv[]) {
//Hard-coded variables
//Counters
int a, b, c = 0;
//Arrays
char *key = (char*) malloc(ASCII * sizeof(char*));
int *value = (int*) malloc(ASCII * sizeof(int*));
//File pointers
FILE *fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "can't open %s\n", argv[1]);
return 0;
}
//Nodes
node* ptr;//, *head;
node* array[ASCII];
//
int u, carray[ASCII];
char str[ASCII];
//Variables
char car = 0;
int inList = 0;
int placeinList = -1;
int numofKeys;
if (argc < 2) {
printf("Usage: huff <.txt file> \n");
return 0;
}
for (a = 0; a < ASCII; a++) {
key[a] = -1;
value[a] = 0;
}
car = fgetc(fp);
while (!feof(fp)) {
for (a = 0; a < ASCII; a++) {
if (key[a] == car) {
inList = 1;
placeinList = a;
}
}
if (inList) {
//increment value array
value[placeinList]++;
inList = 0;
} else {
for (b = 0; b < ASCII; b++) {
if (key[b] == -1) {
key[b] = car;
break;
}
}
}
car = fgetc(fp);
}
fclose(fp);
c = 0;
for (a = 0; a < ASCII; a++) {
if (key[a] != -1) {
array[c] = create(&key[a], value[a]);
numofKeys = c;
c++;
}
}
string code_string[numofKeys];
while (numofKeys > 1) {
sort(array, numofKeys);
u = array[0]->freq + array[1]->freq;
strcpy(str, array[0]->ch);
strcat(str, array[1]->ch);
ptr = create(str, u);
ptr->right = array[1];
ptr->left = array[0];
array[0] = ptr;
sright(array, numofKeys);
numofKeys--;
}
Assign_Code(array[0], carray, 0, code_string);
ofp = fopen("small1.txt.huff", "w");
ifp = fopen("small1.txt", "r");
car = fgetc(ifp);
while (!feof(ifp)) {
for (a = 0; a < ASCII; a++) {
if (key[a] == car) {
for (b = 0; b < strlen(code_string[a]); b++) {
if (code_string[a][b] == 48) {
writebit(0);
} else if (code_string[a][b] == 49) {
writebit(1);
}
}
}
}
car = fgetc(ifp);
}
writebit(255);
fclose(ofp);
ifp = fopen("small1.txt", "r");
fclose(ifp);
free(key);
//free(value);
//free(code_string);
printf("here1\n");
return 0;
}
int writebit(unsigned char bitval) {
static unsigned char bitstogo = 8;
static unsigned char x = 0;
if ((bitval == 0) || (bitval == 1)) {
if (bitstogo == 0) {
fputc(x, ofp);
x = 0;
bitstogo = 8;
}
x = (x << 1) | bitval;
bitstogo--;
} else {
x = (x << bitstogo);
fputc(x, ofp);
}
return 0;
}
void Assign_Code(node* tree, int c[], int n, string * s) {
int i;
static int cnt = 0;
string buf = malloc(ASCII);
if ((tree->left == NULL) && (tree->right == NULL)) {
for (i = 0; i < n; i++) {
sprintf(buf, "%s%d", buf, c[i]);
}
s[cnt] = buf;
cnt++;
} else {
c[n] = 1;
n++;
Assign_Code(tree->left, c, n, s);
c[n - 1] = 0;
Assign_Code(tree->right, c, n, s);
}
}
node* create(char a[], int x) {
node* ptr;
ptr = (node *) malloc(sizeof(node));
ptr->freq = x;
strcpy(ptr->ch, a);
ptr->right = ptr->left = NULL;
return (ptr);
}
void sort(node* a[], int n) {
int i, j;
node* temp;
for (i = 0; i < n - 1; i++)
for (j = i; j < n; j++)
if (a[i]->freq > a[j]->freq) {
temp = a[i];
a[i] = a[j];
a[j] = temp;
}
}
void sright(node* a[], int n) {
int i;
for (i = 1; i < n - 1; i++)
a[i] = a[i + 1];
}
If your program is crashing on what is otherwise a valid operation (like returning from a function or closing a file), I'll near-guarantee it's a buffer overflow problem rather than a memory leak.
Memory leaks just generally mean your mallocs will eventually fail, they do not mean that other operations will be affected. A buffer overflow of an item on the stack (for example) will most likely corrupt other items on the stack near it (such as a file handle variable or the return address from main).
Probably your best bet initially is to set up a conditional breakpoint on writes to the file handles. This should happen in the calls to fopen and nowhere else. If you detect a write after the fopen calls are finished, that will be where your problem occurred, so just examine the stack and the executing line to find out why.
Your first problem (this is not necessarily the only one) lies here:
c = 0;
for (a = 0; a < ASCII; a++) {
if (key[a] != -1) {
array[c] = create(&key[a], value[a]);
numofKeys = c; // DANGER,
c++; // WILL ROBINSON !!
}
}
string code_string[numofKeys];
You can see that you set the number of keys before you increment c. That means the number of keys is one less than you actually need so that, when you access the last element of code_string, you're actually accessing something else (which is unlikely to be a valid pointer).
Swap the numofKeys = c; and c++; around. When I do that, I at least get to the bit printing here1 and exit without a core dump. I can't vouch for the correctness of the rest of your code but this solves the segmentation violation so anything else should probably go in your next question (if need be).
I can see one problem:
strcpy(str, array[0]->ch);
strcat(str, array[1]->ch);
the ch field of struct link is a char array of size 255. It is not NUL terminated. So you cannot copy it using strcpy.
Also you have:
ofp = fopen("small1.txt.huff", "w");
ifp = fopen("small1.txt", "r");
If small1.txt.huff does not exist, it will be created. But if small1.txt it will not be created and fopen will return NULL, you must check the return value of fopen before you go and read from the file.
Just from counting, you have 4 separate malloc calls, but only one free call.
I would also be wary of your sprintf call, and how you are actually mallocing.
You do an sprintf(buf, "%s%d", buf, c[i]) but that can potentially be a buffer overflow if your final string is longer than ASCII bytes.
I advise you to step through with a debugger to see where it's throwing a segmentation fault, and then debug from there.
i compiled the program and ran it with it's source as that small1.txt file and got "can't open (null)" if the file doesn't exist or the file exist and you give it on the command like ./huf small1.txt the program crashes with:
Program terminated with signal 11, Segmentation fault.
#0 0x08048e47 in sort (a=0xbfd79688, n=68) at huf.c:195
195 if (a[i]->freq > a[j]->freq) {
(gdb) backtrace
#0 0x08048e47 in sort (a=0xbfd79688, n=68) at huf.c:195
#1 0x080489ba in main (argc=2, argv=0xbfd79b64) at huf.c:99
to get this from gdb you run
ulimit -c 100000000
./huf
gdb --core=./core ./huf
and type backtrace
You have various problems in your Code:
1.- mallocs (must be):
//Arrays
char *key = (char*) malloc(ASCII * sizeof(char));
int *value = (int*) malloc(ASCII * sizeof(int));
sizeof(char) == 1, sizeof(char *) == 4 or 8 (if 64 bits compiler is used).
2.- Buffer sizes 255 (ASCII) is too short to receive the contents of array[0]->ch + array[1]->ch + '\0'.
3.- Use strncpy instead of strcpy and strncat instead of strcat.
4.- key is an array of individuals chars or is a null terminated string ?, because you are using this variable in both ways in your code. In the characters counting loop you are using this variables as array of individuals chars, but in the creation of nodes you are passing the pointer of the array and copying as null terminated array.
5.- Finally always check your parameters before used it, you are checking if argc < 2 after trying to open argv[1].