Problem with the page output function of a double-linked list - c

Page output of list items. There are count/2 items on the page. Since the end may not be equal to this number, but be less, it must be handled separately. I enter boundaries upper and lower, when the lower boundary is reached the tail is output separately and this is the whole problem...
void ShowList(hwnd* hwnd){
int id = 0, count = 10, CountElementstInEnd;
node* temp;
node* head = hwnd->head;
node* UpLimit = hwnd->head;
node* LwLimit = hwnd->tail;
if(!head) return;
for(int i = 0; i < count/2 ; i++)
UpLimit = UpLimit->next;
CountElementstInEnd = hwnd->size % (count/2) - 1;
for(int i = 0; i < CountElementstInEnd; i++)
LwLimit = LwLimit->prev;
temp = LwLimit;
char c;
do
{
system("cls");
puts (" ID NAME SEX SPORT BORN GROWTH ");
for(int i = 0; i < count/2 ; i++, id++){
///Output the records at the end (their number may not be a multiple of count/2)
if(head == LwLimit){
for(int i = 0; i < CountElementstInEnd; i++, id++){
printf(" %-2.2d %-12.12s %-6.6s %-16.16s %-4.4d %-3.3d \n", id, temp->data->name, temp->data->sex, temp->data->sport, temp->data->born, temp->data->growth);
temp = temp->next;
}
temp = LwLimit;
id-=CountElementstInEnd;
break;
}
///normal output
printf(" %-2.2d %-12.12s %-6.6s %-16.16s %-4.4d %-3.3d \n", id, head->data->name, head->data->sex, head->data->sport, head->data->born, head->data->growth);
head = head->next;
}
///users input 1 - next, 2 - prev
while(1){
c = getch();
if (c == 0x31 && (head == LwLimit)){
for(int i = 0; i < count; i++)
head = head->prev;
id -= count;
break;
}
if (c == 0x31 && (head != UpLimit)){
for(int i = 0; i < count; i++)
head = head->prev;
id -= count;
break;
}
else if(c == 0x32 || c == 27)
break;
}
}
while(c != 27);
}
The function works, but if we share to the end and then go back, it skips 1 page. If we go back when it reaches not by count but by count/2 entries, the output will loop to the last page and "tail".
if (c == 0x31 && (head == hwnd->tail)){
head = head->prev->prev;
break;
}
then the output will loop at the end... Is there any way out of this situation? =(
https://pastebin.com/J2bnc151

This is a possible (working) example of how you might achieve your goals:
#include <stdio.h>
#include <stdlib.h>
typedef struct node_s { // abbreviated struct declaration
char *name;
struct node_s *prev, *next;
} node;
const int pagesz = 2; // a "working" page size to adapt
void ShowList( node *head ) {
int id = 0;
char c = 0;
while( c != 27 ) { // UGLY "magic number"
node *p = head; // a working copy
// output the "page" (without "cls")
puts( " ID NAME" );
for( int i = 0; i < pagesz && p; i++, p = p->next )
printf(" %2d %s\n", id + i, p->name );
while( 1 ) {
c = getchar();
if( c == 27) // again, meaningless magic number
break;
if( c == '+' ) { // '+' == "advance"
for( int i = 0; i < pagesz; i++ )
if( head->next )
head = head->next, id++;
break;
}
if( c == '-' ) { // '-' == "rewind"
for( int i = 0; i < pagesz; i++ )
if( head->prev )
head = head->prev, id--;
break;
}
}
}
}
int main() {
node arr[] = {
{ "Grumpy 0"},
{ "Dopey 1" },
{ "Doc 2" },
{ "Sneezy 3" },
{ "Bashful 4" },
{ "Sleepy 5" },
{ "Happy 6" },
};
int i;
for( i = 0; i < sizeof arr/sizeof arr[0]; i++ )
arr[i].prev = &arr[i-1], arr[i].next = &arr[i+1];
arr[0].prev = arr[i-1].next = NULL;
ShowList( arr );
return 0;
}
ID NAME
0 Grumpy 0
1 Dopey 1
+
ID NAME
2 Doc 2
3 Sneezy 3
+
ID NAME
4 Bashful 4
5 Sleepy 5
-
ID NAME
2 Doc 2
3 Sneezy 3
+
ID NAME
4 Bashful 4
5 Sleepy 5
+
ID NAME
6 Happy 6
-
ID NAME
4 Bashful 4
5 Sleepy 5

Related

My program gives the correct output in windows(gcc) but in Linux(gcc) it leads to segmentation fault

This program is to find the epsilon closure of all states of an NFA. I have used the stack to get this done.The program gives the right output when I compiled it using gcc and ran it Windows 10(Command Prompt). But when I compiled with the same compiler and ran it in Linux it results in segmentation fault. I have used any dynamic memory allocation for that matter.
I tried to debug using gdb but not able to find the problem. Detected a segmentation fault after a printf("\n") when displaying the transitions matrix.
It would be very helpful for someone could find the fault. Thanks in advance.
The input is read from a file : nfa.txt.
//states
q0 q1 q2
//input_symbols
0 1
//start_state
q0
//final_state
q2
//transitions of the form : intial_state input final_state
q0 0 q0
q0 e q1
q1 1 q1
q1 e q2
q2 2 q2
The output is as follows:
232 is to represent null transition(Φ) and -1 for ε.
States:
q0
q1
q2
Transitions read
232 0 1 2 -1
0 0 232 232 1
1 232 1 232 2
2 232 232 2 232
e-closure(0) : 0 1 2
e-closure(1) : 1 2
e-closure(2) : 2
Please bear with me because it's a fairly long program.
#include <stdio.h>
#include <string.h> //REMEMBER ME WHILE I'M GONE
#include <errno.h>
#include <stdlib.h>
FILE *file;
int numberOfStates = 0;
int flag = 0;
int states[20];
int j = 0;
int i = 0;
int k = 0;
char a[20];
int transitions[4][5];
int visited[10];
int MAXSIZE = 8;
int stack[8];
int top = -1;
int isempty()
{
if(top == -1)
return 1;
else
return 0;
}
int isfull()
{
if(top == MAXSIZE)
return 1;
else
return 0;
}
int pop()
{
int data;
if(!isempty()) {
data = stack[top];
top = top - 1;
return data;
}
else
printf("Could not retrieve data, Stack is empty.\n");
}
int push(int data) {
if(!isfull()) {
top = top + 1;
stack[top] = data;
}
else
printf("Could not insert data, Stack is full.\n");
}
int IsVisited(int edge)
{
for(int i = 0; i < 10; i++)
if(visited[edge] == 1)
return 1;
return 0;
}
void epsilon_closure(int state)
{
int e_closure[10];
for(int i = 0; i < 10; i++ )
{ e_closure[i] = -1;
visited[i] = 0;
}
push(state);
visited[state] = 1;
while(top != -1)
{
int u = pop();
j = 1;
while(j < 5)
{
//if there is an epsilon transition from the state 'u' to 'v'
if(transitions[j][0] == u && transitions[j][4] != 232) //ASCII of Φ = 232
{
if(! IsVisited(transitions[j][4]))
{
visited[transitions[j][4]] = 1;
push(transitions[j][4]);
}
}
j++;
}
}
j = 0;
for(int edge = 0; edge < 10; edge++)
{
if(visited[edge] == 1)
e_closure[j++] = edge;
}
printf("e-closure(%d) : ",state);
for (i = 0; e_closure[i] != -1; ++i)
printf("%d ", e_closure[i]);
printf("\n");
}
int main()
{
file = fopen("nfa.txt","r");
if (file == NULL) {
perror("fopen");
return -1;
}
//Reading the states
while(!feof(file))
{
fscanf(file,"%s",a);
if(strcmp("//states",a) == 0)
flag = 1;
else if(strcmp("//input_symbols",a) == 0)
break;
if (flag == 1 && a[0] != '/')
{
states[i++] = a[1] - '0';
}
numberOfStates = i;
}
//Display the states of the e-NFA
printf("\nStates : \n");
for(i = 0; i < numberOfStates; i++ )
{
printf("q%d\n",states[i]);
}
i = 1;
flag = 0;
//Reading the transition table
for(int i = 0; i < 4; i++){
for(int j = 0; j < 5; j++)
{
transitions[i][j] = 232;
}
}
while(!feof(file))
{
fgets(a,100,file);
if(a[0] == '/')
{
flag = 1;
}
if(flag == 1 && a[0] != '/')
{
j = 0;
//found a way to store the transition table in a matrix
if(a[3] == 'e')
transitions[(a[1] - '0') + 1][4] = a[6] - '0';
else
transitions[(a[1] - '0') + 1][(a[3] - '0') + 1] = a[6] - '0';
if(a[3] != 'e')
transitions[0][a[3] - '0' + 1] = a[3] - '0'; //input
else
transitions[0][4] = -1; // epsilon input
transitions[(a[1] - '0') + 1][0] = a[1] - '0'; //initial state
}
}
printf("\nTransitions read\n");
for(int i = 0; i < 4; i++){
for(int j = 0; j < 5; j++)
{
printf("%d\t",transitions[i][j]);
}
printf("\n"); //detected segmentation fault here
}
//Calling e-closure for all the states
for(k = 0; k < numberOfStates; k++)
{
epsilon_closure(states[k]);
}
return 0;
}
There is a bug here:
int push(int data) {
if(!isfull()) {
top = top + 1;
stack[top] = data;
}
else
printf("Could not insert data, Stack is full.\n");
}
If top == MAXSIZE-1, isfull() will return false, then you increment top to MAXSIZE and assign stack[MAXSIZE] what is out of bounds and invokes UB. Not having checked the complete source code, I could imagine that incrementing top after assigning would be correct or you have to change isfull() to return true if top >= MAXSIZE-1

Count occurrences and associate with given array in C

I'm having issues to correct my code so that it works as I want it.
I have three arrays given in this example:
char arr[MAX_ELEMENTS][MAX_LENGTH] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[MAX_ELEMENTS][MAX_LENGTH] = {"PARI", "PARI", "LOND", "PARI", "PARI", "CYKA", "LOND", "CYKA", "PARI"};
int freq[MAX_ELEMENTS];
I wish to create a function that can count the amount of occurrences of string elements in arr[] and store them in freq[]. Apart from that I also wish to know in what race[] there have been the most occurrences of given arr[].
To demonstrate this here is an example of what output I wish to receive when the function works:
In Race [PARI] the highest occurence was [ABS] with 3 occurences!
In Race [LOND] the highest occurence was [ACT] with 1 occurences!
.....
Currently, I am able to count the occurrences of arr[] in freq[] but I can't associate them with their respective race[] and give that output..
for(i=0; i<size; i++)
{
count = 1;
for(j=i+1; j<size; j++)
{
/* If duplicate element is found */
if(strcmp(arr[i], arr[j])==0)
{
count++;
/* Make sure not to count frequency of same element again */
freq[j] = 0;
}
}
/* If frequency of current element is not counted */
if(freq[i] != 0)
{
freq[i] = count;
}
}
Giving me currently :
ABS occurs 3 times.
ACT occurs 2 times.
etc. etc...
But I don't know how I can associate them with the race[] and only count them if a given race.
You probably have to use struct here to format your data.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define true 1
#define len 100
#define elms 10
struct NODE;
#define Node struct NODE
struct NODE {
unsigned long int val;
int count;
char name[len];
Node *left;
Node *right;
};
Node * makeNode(char * str, unsigned long int val){
Node * tmp = (Node *)malloc(sizeof(Node));
strcpy(tmp->name, str);
tmp->val = val;
tmp->left = NULL;
tmp->right = NULL;
tmp->count = 1;
return tmp;
}
unsigned long int getHash(char * name){
int prime = 19;
int i = 0;
unsigned long int val = 0;
while(name[i]!='\0'){
val += (name[i] * pow(prime, i) );
++i;
}
return val;
}
void insert(Node * root, char * name){
Node * newnode;
int val = getHash(name);
Node * tmp = root;
while(tmp != NULL) {
if ( tmp->val == val){
tmp->count += 1;
break;
}
if (val > tmp->val){
if( tmp->right != NULL)
tmp = tmp->right;
else{
tmp->right = makeNode(name, val);
break;
}
}else {
if( tmp->left != NULL)
tmp = tmp->left;
else{
tmp -> left = makeNode(name, val);
break;
}
}
}
}
Node * find(Node * root, char * name){
int val = getHash(name);
Node * tmp = root;
while(tmp != NULL){
if(tmp -> val == val){
return tmp;
}else if (val > tmp->val){
tmp = tmp->right;
}else{
tmp = tmp->left;
}
}
return NULL;
}
struct Race {
char name[len];
char elements[elms][len];
};
char arr[elms][len] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[elms][len] = {"PARI", "PARI", "LOND", "PARI", "PARI", "CYKA", "LOND", "CYKA", "PARI"};
int freq[elms];
void copyArray(char dest[elms][len], char src[elms][len] ){
int i = 0;
while(strlen(src[i]) > 0){
strcpy(dest[i],src[i]);
++i;
}
}
int main(){
Node * root = makeNode("root", 0);
int i = 0;
while(strlen(arr[i]) > 0){
insert(root,arr[i]);
++i;
}
i = 0;
while(strlen(arr[i]) > 0){
Node * r = find(root,arr[i]);
printf("found %s, count = %ld\n", r->name, r->count);
++i;
}
// make representation of race
struct Race r1, r2;
strcpy(r1.name, "PARI");
{
char tmp[elms][len] = { "ABS", "PPB", "QQQ" };
copyArray(r1.elements, tmp);
}
strcpy(r2.name, "LOND");
{
char tmp[elms][len] = { "ACT" };
copyArray(r2.elements, tmp);
}
struct Race races[2] = {r1, r2};
i = 0;
while(i < 2){
struct Race * current = &races[i];
printf("for %s", current->name);
Node * max = NULL;
int m = -1;
int j = 0;
while(strlen(current->elements[j]) > 0){
Node * tmp = find(root, current->elements[j]);
if( tmp != NULL && tmp->count > m) {
max = tmp;
m = tmp->count;
}
++j;
}
if (max != NULL){
printf(" max is %s : %d\n", max->name, max->count);
}else{
printf(" max is None\n");
}
++i;
}
return 0;
}
Basically you have to format you data, and specify link between them. Here I used Binary tree and Rabin karp hashing technique to store data efficiently.
Binary tree is best way to solve counting problem, since the search operation fairly cheap. and Rabin karp hashing technique will avoid string comparison every time.
And I create a struct called Race to store all related elements of that race. so the algorithm is going to be.
let arr be array of elements
let races be array of races
for each race in races
define related element
#find occurrence now
#Binary tree will increment count if element already exist.
let binary_tree be a Binary Tree
for each element in arr
add element to binary_tree
# now we have all the elements with it's count
# let's iterate through races now
for each race in races
m = null
for element in race.elements
node = find_element_in_binary_tree(element)
if node is not null
m = max(m, node)
if m is not null then
print m
else
print not found
First, initializations, note the []s
char arr[][MAX_LENGTH] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[][MAX_LENGTH] = {"PARI","PARI","LOND","PARI","PARI","CYKA","LOND","CYKA","PARI"};
int freq[MAX_ELEMENTS];
int n = sizeof(arr)/sizeof(*arr); // get actual number of used items
int i,j;
int max = 0; // init max to 0
The main loop goes through arr and race, and whenever a dupe is found at [j] (after [i]), "invalidate" the dupe ("already processed") by setting its first char to 0 (empty string).
Note that j starts from i and not i+1 to ensure freq is at least 1, even for the first non-dupes items.
for(i=0 ; i<n ; i++) {
freq[i] = 0; // ensure freq is 0 for any item
if ( ! *arr[i]) continue; // skip already processed items
for(j=i ; j<n ; j++) { // j=i, not i+1!
if (!strcmp(arr[i],arr[j]) && !strcmp(race[i],race[j])) {
freq[i]++; // update max if necessary
if (freq[i] > max) max = freq[i];
if (j > i) *arr[j] = 0; // invalidate that arr element
}
}
}
Finally display the max appearances, including ties
printf("Items at max=%d:\n", max);
for(i=0 ; i<n ; i++) {
if (freq[i] == max) { // skipped items are never displayed (max cannot be 0)
printf("%s / %s\n", arr[i],race[i]);
}
}
(no need to check for "invalidation" as max will be >0, and all invalidated items have freq[i] == 0)

I get error when i use fgets instead of gets

This is a picture when I use fgets. (it does not work properly, insanely slow !!)
and This is a picture when it comes to gets. (works fine)
# define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
struct sNode {
struct sNode* tNode;
int G[20];
};
struct tNode {
struct tNode* prev;
struct tNode* next;
int data;
int used;
};
int convert_input(struct sNode** t_ArrayRef, char string[200])
{
int j = 0, i = 0, temp = 0;
int K[20];
while (string[i] != '\0')
{
temp = 0;
while (string[i] != ' ' && string[i] != '\0')
temp = temp * 10 + (string[i++] - '0');
if (string[i] == ' ')
{
i++;
}
(*t_ArrayRef)->G[j++] = temp;
}
return j - 1;
}
int main() {
int i;
char string[200];
char* str[5];
struct sNode* t = (struct sNode*)malloc(sizeof(struct sNode));
str[0] = string;
//fgets(str[0], sizeof(str[0]), stdin); // doesn't works !!!
gets(str[0]); // works !!!
int c = convert_input(&t, str[0]);
int num = t->G[0];
const int a = num;
struct tNode* tNod[6000];
for (i = 0; i<num; i++) {
tNod[i] = (struct tNode*)malloc(sizeof(struct tNode));
}i = 0;
for (i = 1; i<num; i++) {
tNod[i - 1]->data = i;
tNod[i - 1]->used = 0;
if (i != num - 1) {
tNod[i - 1]->next = tNod[i];
}
else {
tNod[i - 1]->next = tNod[i];
tNod[i]->data = i + 1;
tNod[i]->next = tNod[0];
}
}i = 0;
struct tNode* current;
i = 1; int j = 0; int fCount = 0; int zCount = 0;
current = tNod[i - 1];
printf("<");
while (fCount == 0) {
while (current->used == 1) {
current = current->next;
j++;
if (j > num) {
fCount = 1;
break;
}
}
j = 0;
if (i % t->G[1] == 0 && fCount == 0) {
zCount++;
if (zCount != t->G[0]) {
printf("%d, ", current->data, i);
current->used = 1;
}
else {
printf("%d", current->data, i);
current->used = 1;
}
}
i++;
current = current->next;
}
printf(">");
return 0;
}
Could anyone explain to me why I can't get it working using fgets ?
When you use
fgets(str[0], sizeof(str[0]), stdin);
you do not pass the correct size: sizeof(str[0]) is the size of a pointer to char, not the size of the 200-byte char array that you stored in it.
The compiler resolves this sizeof operator at compile time. It has no idea about the value that you put into element zero. In fact, it ignores zero altogether, replacing with sizeof(*str).
Fix this problem by passing the proper size:
fgets(str[0], sizeof(string), stdin);

Insert function hash table C

I am having trouble implementing my insert function for my hash table.
So I implement some test calls where I just call the function separately. For actual use, I call the function inside a while loop. For testing purpose, I only run the loop 4 times.
I post some outputs below. The reason the table looks weird is because of my hash function. It hashes the words such that A = 1, B = 2, C = 3, and so on. The position of the letter in the word is irrelevant, since I will consider permutations of the word. Moreover, the case of the letter will be irrelevant in this problem as well, so the value of a = the value of A = 1.
And for strings, abc = 1 + 2 + 3 = 6, bc = 2 + 3 = 5, etc.
Overall, the hash function is fine. The problem is the insert function.
The first 4 words of my local dictionary are A, A's, AA's, AB's.
My expected output should be (I got the same output when I run the test calls):
0:
1: [W: A, Len:1]
2:
3:
...
18:
19:
20: [W: A's, Len:3]
21: [W: AA's, Len:4]
22: [W: AB's, Len:4]
But when I call the function inside a loop, whatever is last on the list will overwrite other entries. If I run the loop 100 times, then the last entry still replaces the previous ones (Notice how the lengths of the words are unchanged, but only the words are replaced):
0:
1: [W: AB's, L:1]
2:
3:
...
18:
19:
20: [W: AB's, Len:3]
21: [W: AB's, Len:4]
22: [W: AB's, Len:4]
Below is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int hash(char *word)
{
int h = 0;
while(*word != '\0')
{
if(*word >='A' && *word < 'A'+26) {
h=h+(*word -'A' + 1);
}
else if(*word >='a' && *word < 'a'+26) {
h=h+(*word -'a' + 1);
}
//else { // special characters
// return -1;
//}
word++;
}
return h;
}
typedef struct Entry {
char *word;
int len;
struct Entry *next;
} Entry;
#define TABLE_SIZE 1000 // random numbers for testing
Entry *table[TABLE_SIZE] = { NULL }; // an array of elements
void init() {
int i;
for (i = 0; i < TABLE_SIZE; i++) {
// initialize values
struct Entry *en = (struct Entry *)malloc(sizeof(struct Entry));
en->word = "";
en->len = 0;
en->next = table[i];
table[i] = en;
}
}
//Insert element
void insertElement(char *word, int len) {
int h = hash(word);
int i;
// because all words are different so there is no need to check for duplicates
struct Entry *en = (struct Entry *)malloc(sizeof(struct Entry));
en->word = word;
en->len = len;
en->next = table[h];
table[h] = en;
}
void cleanTable()
{
struct Entry *p, *q;
int i;
for( i=0; i<TABLE_SIZE; ++i )
{
for( p=table[i]; p!=NULL; p=q )
{
q = p->next;
free( p );
}
} // for each entry
}
int main() {
init(); // create hash table
// test calls produce correct output
//insertElement("A", (int)strlen("A"));
//insertElement("A's", (int)strlen("A's"));
//insertElement("AA's", (int)strlen("AA's"));
//insertElement("AB's", (int)strlen("AB's"));
int i;
i = 0;
FILE* dict = fopen("/usr/share/dict/words", "r"); //open the dictionary for read-only access
if(dict == NULL) {
return;
}
// Read each line of the file, and insert the word in hash table
char word[128];
while(i < 4 && fgets(word, sizeof(word), dict) != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // trim the \n
}
insertElement(word, (int)strlen(word));
i++;
}
for ( i=0; i < 50; i++)
{
printf("%d: ", i);
struct Entry *enTemp = table[i];
while (enTemp->next != NULL)
{
printf("[W: %s, Len:%d] ", enTemp->word, enTemp->len);
enTemp = enTemp->next;
}
printf("\n");
}
cleanTable();
return 0;
}
Try to reallocate the memory in each loop in this part of code:
char* word = malloc(sizeof(char)*128);
while(i < 4 && fgets(word, sizeof(word), dict) != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // trim the \n
}
insertElement(word, (int)strlen(word));
word = malloc(sizeof(char)*128);
i++;
}
You forgot to reallocate memory to every string which causes all pointers points at same point
Note: Not tested
notice that your insertElement get a pointer to a string, and assign that pointer to the current Entry, but its the main function, you pass the word argument(a pointer) that point the stack allocated string, and that string is changed after each read of a word. you must use malloc so that each word point to its own memory area

Huffman encoding in C

I am trying to write a module which assigns huffman encoded words to the input symbols, but the given codes differ from what they should look like.
For example, if I run it with following symbol probabilities:
(1st column: probabilities; 2nd column: my huffman codes; 3rd column: correct huffman codes)
0,25 --> 01 --> 10
0,15 --> 101 --> 111
0,15 --> 110 --> 110
0,1 --> 1111 --> 010
0,1 --> 000 --> 001
0,05 --> 0010 --> 0110
0,05 --> 0011 --> 0001
0,05 --> 1000 --> 0000
0,05 --> 1001 --> 01111
0,05 --> 1110 --> 01110
I think the problem might be caused in my function for generating huffman codes, since strcat() function's behaviour was initially not good for my idea, so I combined it with strcat(). Not sure if it is good that way tho.
I am providing you with two functions responsible for codes assign, build_huffman_tree() and generate_huffman_tree(), hopefully you can help me out with this, and point out where the problem could be.
Generate guffman tree:
void generate_huffman_tree(node *n, char *code){
if(n->left== NULL && n->right== NULL){
SYMBOLS[code_counter] = n->symbol; // this 3 lines just store current code, not important
CODES[code_counter] = strdup(code);
code_counter += 1;
}
if(n->left!= NULL){
char temp[100];
strcpy(temp, code);
strcat(temp, "0");
generate_huffman_tree(n->left, temp);
}
if(n->right!= NULL){
char temp[100];
strcpy(temp, code);
strcat(temp, "1");
generate_huffman_tree(n->right, temp);
}
Build Huffman tree:
node *build_huffman_tree(double *probabilities){
int num_of_nodes = NUM_SYMBOLS;
int num = NUM_SYMBOLS;
// 1) Initialization: Create new node for every probability
node *leafs = (node*) malloc(num_of_nodes*sizeof(node));
int i;
for(i=0; i<num_of_nodes; i+=1){
node c;
c.probability= *(probability+ i);
c.symbol= *(SYMBOLS + i);
c.left= NULL;
c.right= NULL;
*(leafs+i) = c;
}
node *root= (node*) malloc(sizeof(node)); // Root node which will be returned
while(num_of_nodes> 1){
// 2) Find 2 nodes with lowest probabilities
node *min_n1= (node*)malloc(sizeof(node));
node *min_n2 = (node*)malloc(sizeof(node));
*min_n1 = *find_min_node(leafs, num, min_n1);
leafs = remove_node(leafs, min_n1, num);
num -= 1;
*min_n2= *find_min_node(leafs, num, min_n2);
leafs = remove_node(leafs, min_n2, num);
num -= 1;
// 3) Create parent node, and assign 2 min nodes as its children
// add parent node to leafs, while its children have been removed from leafs
node *new_node = (node*) malloc(sizeof(node));
new_node->probabilty= min_n1->probability + min_n2->probability;
new_node->left= min_n1;
new_node->right= min_n2;
leafs = add_node(leafs, new_node, num);
num += 1;
num_of_nodes -= 1;
root = new_node;
}
return root;
I have tested functions for finding 2 min nodes, removing and adding nodes to leafs structure, and it is proven to work fine, so I guess the problem should be something about this here.
I didn't look at your source code, but there's nothing wrong with the Huffman code you generated. There is also nothing wrong with what you are calling "correct huffman codes". There is more than one valid Huffman code possible with that set of probabilities. If you take the sum of the probabilities times the bit lengths for both Huffman codes, you will find that those sums are exactly the same. Both Huffman codes are optimal, even though they're different.
The way this happens is that when you look for the two lowest frequencies, there is more than one choice. Depending on which choice you make, you will get a different tree.
This code below is an implementation of Mark Allen Weiss's Algorithm. Give it a try!
It offers routines similar to yours, in addition to a function that displays the result according to the previously constituted codes for each letter.
The compiler used is MinGW 2.95 (C-Free 4.0).
Prerequisites:
An input file with a text (any, but remember, it deals with alphabet characters only, no punctuation, no space, no numbers).
The constant IN_PATH is the one you should modify to point at the right location of your text file to run the program successfully.
The image shows a sample text, the letters proportions and the result of huffman code interpretation (letters separated by one space).
Good luck!
//*******************************************************************
// File: HuffmanEncoding - Tree.c
// Author(s): Mohamed Ennahdi El Idrissi
// Date: 14-Aug-2012
//
// Input Files: in.txt
// Output Files: out.txt
// Description: CSC 2302 - <Data Structures>
// <Struct, Array, File I/O, Recursion, Pointers, Binary Tree>
// This program covers the Huffman Encoding concept.
// We first read a file, from we which we count the number of characters, and then reckon the frequency
// of each letter individually. Each letter's frequency is stored in a node with its respective character.
// This node is stored in an array of 26 elements (element 0 -> 'A', element 1 -> 'B'...element 25 -> 'Z').
// Each element is a pointer, and each pointer is supposed to be a root of a tree (sub tree).
// After processing all characters of the text (read from a file), we end up with an array with
// 25 NULL elements. The only element that is not NULL is the root of the tree that gathers the different
// nodes of each letter.
// Deducing the encoding of each letter if performed with intermediary of the prefix traversal.
// To summarize, the pseudo-code is:
// - Initialize the letters array
// - Read file
// - Increment each letter frequency + compute the number of characters in the file
// - Store in the array's node the frequency of each letter
// - Compute the number (N) of involved characters (Sometimes, texts don't include all letters. In our case 'Q' and 'Z' are absent).
// - Loop N times
// - find Minimum and second minimum
// - create a new node, its left child contains the minimum and the right child contains the second minimum
// - minimum position points on the new node, and the second minimum's array position points on NULL
// - Browse the array till the unique non NULL element is encountered
// - invoke prefix traversal function
// - build the encoding of each character
// - display the letter and its characteristics when found.
// - Finally, read the output file to interpret its content
// - if root contains a character (A - Z), display character
// - else, if the current character is '0', browse the left leaf
// - else, if the current character is '1', browse the right leaf
//
//*******************************************************************
#include <stdio.h>
#define NBR_OF_LETTERS 26
#define LEFT 'L'
#define RIGHT 'R'
#define CODE_SIZE 128
#define TYPED_ALLOC(type) (type *) malloc( sizeof(type) )
#define BYTE_SIZE 8
#define IN_PATH "./files/in.txt"
#define OUT_PATH "./files/out.txt"
typedef struct tree_node_s {
float frequency;
char c;
char code[CODE_SIZE];
struct tree_node_s *left;
struct tree_node_s *right;
} tree_node_t;
tree_node_t *arr[NBR_OF_LETTERS], *letters[NBR_OF_LETTERS];
void findMinAndSecondMin(tree_node_t **, float *, int *, float *, int *);
void printTree(tree_node_t *);
void interpret(char *, int *, tree_node_t *);
void printTree(tree_node_t *);
void encode(tree_node_t *, tree_node_t **, char, short, char*);
/*
*
*/
int main() {
char str[CODE_SIZE];
int fileReadingVerdict;
int i, j, k, index, n;
float min, secondMin;
int minIndex, secondMinIndex;
int numberOfCharacters = 0;
tree_node_t *tree;
FILE *in = fopen(IN_PATH, "r");
FILE *out;
if ( in == NULL ) {
printf("\nFile not found");
return 0;
} else {
/*
* Begin: Array Initialization
*/
for (i = 'A'; i <= 'Z'; i++) {
index = i - 'A';
arr[index] = NULL;
}
/*
* End: Array Initialization
*/
numberOfCharacters = 0;
fileReadingVerdict = fgets(str, CODE_SIZE, in) != NULL;
while(!feof(in) || fileReadingVerdict) {
n = strlen(str);
printf("\n%s", str);
for (i = 0; i < n ; i ++ ) {
str[i] = toupper(str[i]);
if (str[i] >= 'A' && str[i] <= 'Z') {
numberOfCharacters ++;
index = str[i] - 'A';
if (arr[index] == NULL) {
arr[index] = TYPED_ALLOC(tree_node_t);// malloc(sizeof(tree_node_t));
arr[index]->c = str[i];
arr[index]->frequency = 1;
arr[index]->left = arr[index]->right = NULL;
} else {
arr[index]->frequency += 1;
}
}
}
if (fileReadingVerdict) {
fileReadingVerdict = fgets(str, CODE_SIZE, in) != NULL;
}
}
}
fclose(in);
for ( i = 0, n = 0 ; i < NBR_OF_LETTERS ; i ++ ) {
letters[i] = arr[i];
if (arr[i] != NULL) {
arr[i]->frequency /= numberOfCharacters; // Computing the frequency.
n ++; // n is the number of involved letters which is going to be consumed in the do while loop's condition
}
}
j = 1;
do {
findMinAndSecondMin(arr, &min, &minIndex, &secondMin, &secondMinIndex);
if (minIndex != -1 && secondMinIndex != -1 && minIndex != secondMinIndex) {
tree_node_t *temp;
tree = TYPED_ALLOC(tree_node_t);// malloc(sizeof(tree_node_t));
tree->frequency = arr[minIndex]->frequency + arr[secondMinIndex]->frequency;
tree->c = j;
tree->left = arr[minIndex];
temp = TYPED_ALLOC(tree_node_t);// malloc(sizeof(tree_node_t));
temp->c = arr[secondMinIndex]->c;
temp->frequency = arr[secondMinIndex]->frequency;
temp->left = arr[secondMinIndex]->left;
temp->right = arr[secondMinIndex]->right;
tree->right = temp;
arr[minIndex] = tree;
arr[secondMinIndex] = NULL;
}
j ++;
} while( j < n );
for ( i = 0 ; i < NBR_OF_LETTERS ; i ++ ) {
if (arr[i] != NULL) {
char code[CODE_SIZE];
strcpy(code, "");
encode(tree = arr[i], letters, 0, 0, code);
puts("\nSuccessful encoding");
printTree(arr[i]);
break;
}
}
in = fopen(IN_PATH, "r");
out = fopen(OUT_PATH, "w");
fileReadingVerdict = fgets(str, CODE_SIZE, in) != NULL;
while(!feof(in) || fileReadingVerdict) {
n = strlen(str);
for (i = 0; i < n ; i ++ ) {
str[i] = toupper(str[i]);
if (str[i] >= 'A' && str[i] <= 'Z') {
index = str[i] - 'A';
fputs(letters[index]->code, out);
}
}
if (fileReadingVerdict) {
fileReadingVerdict = fgets(str, CODE_SIZE, in) != NULL;
}
}
fclose(in);
fclose(out);
printf("\nFile size (only letters) of the input file: %d bits", numberOfCharacters * BYTE_SIZE);
out = fopen(OUT_PATH, "r");
fileReadingVerdict = fgets(str, CODE_SIZE, out) != NULL;
numberOfCharacters = 0;
while(!feof(out) || fileReadingVerdict) {
numberOfCharacters += strlen(str);
if (fileReadingVerdict) {
fileReadingVerdict = fgets(str, CODE_SIZE, out) != NULL;
}
}
fclose(out);
printf("\nFile size of the output file: %d bits", numberOfCharacters);
printf("\nInterpreting output file:\n");
out = fopen(OUT_PATH, "r");
fileReadingVerdict = fgets(str, CODE_SIZE, out) != NULL;
while(!feof(out) || fileReadingVerdict) {
n = strlen(str);
i = 0 ;
while(i < n) {
interpret(str, &i, tree);
}
if (fileReadingVerdict) {
fileReadingVerdict = fgets(str, CODE_SIZE, out) != NULL;
}
}
fclose(out);
puts("\n");
return 0;
}
/*
*
*/
void encode(tree_node_t *node, tree_node_t **letters, char direction, short level, char* code) {
int n;
if ( node != NULL ) {
if ((n = strlen(code)) < level) {
if (direction == RIGHT) {
strcat(code, "1");
} else {
if (direction == LEFT) {
strcat(code, "0");
}
}
} else {
if (n >= level) {
code[n - (n - level) - 1] = 0;
if (direction == RIGHT) {
strcat(code, "1");
} else {
if (direction == LEFT) {
strcat(code, "0");
}
}
}
}
if (node->c >= 'A' && node->c <= 'Z') {
strcpy(node->code, code);
strcpy(letters[node->c - 'A']->code, code);
}
encode(node->left, letters, LEFT, level + 1, code);
encode(node->right, letters, RIGHT, level + 1, code);
}
}
void printTree(tree_node_t *node) {
int n;
if ( node != NULL ) {
if (node->c >= 'A' && node->c <= 'Z') {
printf("\t%c - frequency: %.10f\tencoding: %s\n", node->c, node->frequency, node->code);
}
printTree(node->left);
printTree(node->right);
}
}
/*
* Begin: Minimum and second minimum
*/
void findMinAndSecondMin(tree_node_t *arr[], float *min, int *minIndex, float *secondMin, int *secondMinIndex) {
int i, k;
k = 0;
*minIndex = -1;
/*
* Skipping all the NULL elements.
*/
while (k < NBR_OF_LETTERS && arr[k] == NULL) k++;
*minIndex = k;
*min = arr[k]->frequency;
for ( i = k ; i < NBR_OF_LETTERS; i ++ ) {
if ( arr[i] != NULL && arr[i]->frequency < *min ) {
*min = arr[i]->frequency;
*minIndex = i;
}
}
k = 0;
*secondMinIndex = -1;
/*
* Skipping all the NULL elements.
*/
while ((k < NBR_OF_LETTERS && arr[k] == NULL) || (k == *minIndex && arr[k] != NULL)) k++;
*secondMin = arr[k]->frequency;
*secondMinIndex = k;
if (k == *minIndex) k ++;
for ( i = k ; i < NBR_OF_LETTERS; i ++ ) {
if ( arr[i] != NULL && arr[i]->frequency < *secondMin && i != *minIndex ) {
*secondMin = arr[i]->frequency;
*secondMinIndex = i;
}
}
/*
* End: Minimum and second minimum
*/
}
void interpret(char *str, int *index, tree_node_t *tree) {
int n = strlen(str);
if (tree->c >= 'A' && tree->c <= 'Z') {
printf("%c ", tree->c);
return ;
} else {
if ( *index < n ) {
if (str[*index] == '0') {
(*index) ++;
interpret(str, index, tree->left);
} else {
if (str[*index] == '1') {
(*index) ++;
interpret(str, index, tree->right);
}
}
}
}
}

Resources