I am trying to make a C program to count the number of frequency of words in a document and to split the document up into N parts with each part being counted by a different thread. Everytime I run the program, I get back nonsensical data, but if I run it without the threads I get back the data that I expect.
Here is the Struct named BinarySearchTree.h
typedef struct {
char *num; /*! The contents of the <tt>Item</tt>.<br>Type: <tt>int</tt>*/
int count;
} Item; /*! \typedef Item
* \struct A struct represent one item.
*/
typedef struct node {
Item info; /*! A struct with a one <tt>int</tt> on it. */
struct node * left;
struct node * right;
} Tree;
typedef struct passargs {
char *File;
Tree *t;
int splitStart;
int splitEnd;
int i;
int a;
char words[512][512];
} pass;
Here is the main code named **BinarySearchTree.c*:
#include "BinarySearchTree.h"
#include <pthread.h>
#include <string.h>
pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER;
// Code for integer based BST from: https://gist.github.com/ArnonEilat/4611213
Tree * add(Tree* nod, char *number) {
if (nod == NULL) {
printf("Thread number %ld\n", pthread_self());
printf("\nMaking new node for %s\n", number);
nod = (Tree*) malloc(sizeof (Tree));
if (nod == NULL) {
return NULL;
}
nod->info.num = number;
nod->info.count=1; //Increment the value if the same word is found
nod->left = NULL;
nod->right = NULL;
return nod;
}
if (strcmp(nod->info.num, number)==0) {
printf("%s==%s ",nod->info.num,number);
printf("\t%s:%d ",nod->info.num,nod->info.count);
// ++nod->info.count;
nod->info.count++;
printf("->%d\n ",nod->info.count);
return nod;
}
if (strcmp(nod->info.num, number)>0){
printf("%s>%s ",nod->info.num,number);
nod->left = add(nod->left, number);
} else {
printf("%s<=%s ",nod->info.num,number);
nod->right = add(nod->right, number);
}
return nod;
}
void printInorder(Tree* nod) {
if (nod == NULL) {
return;
}
printInorder(nod->left);
printf(" %s: %d ", nod->info.num, nod->info.count);
printInorder(nod->right);
}
void freeTree(Tree *root) {
if (root == NULL) {
return;
}
freeTree(root->left);
freeTree(root->right);
free(root);
}
int newLines(char* File){ //Count the number of new lines in the file. Split based on those not word count
int newLines=0;
char buffTemp[5120];
FILE *fp1 = fopen(File, "r");
while(1)
{
if(fgets(buffTemp, 512, fp1) ==NULL)
break;
else{
newLines++;
}
}
fclose(fp1);
return newLines;
}
//This function reads the file and adds each entry to the tree, or increments if same word is present
Tree* read(pass* info){
const char *delims = " \n"; //Deliminate by newlines and spaces
char *token;
int splitCounter=0;
FILE *fp = fopen(info->File, "r");
char buff[512];
int aT=info->a; //Putting variables into local ones
int iT=info->i;
int splitEnd = info->splitEnd;
int splitStart = info->splitStart;
char words[512][512];
memcpy(words, info->words, 512);
Tree* nod=info->t;
while(1 && splitCounter<splitEnd) //While the file does not end and we have not reached the end of the split
{
fgets(buff, 512, fp);
splitCounter++;
if(buff == NULL)
break;
else if (splitCounter>splitStart){
printf("\t %s\n", buff);
token = strtok(buff, delims); //Split via the tokens and put them into buff
while (token!=0) {
strcpy(words[iT], token);
iT += 1;
token = strtok(NULL, delims);
}
}
}
for (;aT<iT;aT++){
nod=add(nod, (char *)words[aT]); //Add each word to the tree
}
printf("\n");
fclose(fp);
info->t=nod;
memcpy(info->words,words,512);
info->i=iT;
info->a=aT;
return nod;
}
int main() {
Tree* t = NULL;
pass args;
args.t=t;
args.i=0;
args.a=0;
args.splitStart=0; //These splits are used to tell each thread where to look in the file
args.splitEnd=0;
args.File="/home/dib/CLionProjects/deleteme/readFile"; //Address of the file to be read
int numLines=newLines(args.File);
printf("\nnewLines %d\n",numLines);
int split;
printf("How many splits/threads would you like to create?\n");
scanf("%d", &split);
int iterator=numLines/split;
printf("iterator %d:", iterator);
const int NUMTHREADS= numLines/iterator + (numLines % iterator != 0);
unsigned int p;
// This for loop shows how I hope the threads would work. Uncomment the below block to see it work
for (p=0; p<NUMTHREADS; p++){
args.splitStart=args.splitEnd;
if (args.splitEnd+iterator>numLines) args.splitEnd=numLines;
else args.splitEnd+=iterator;
read(&args);
}
pthread_t th[NUMTHREADS];
int threads[NUMTHREADS];
//This is my attempt at using threads to solve the same problem as the above for loop
// for(p=0; p< NUMTHREADS; p++){
// threads[p] = p;
// args.splitStart=args.splitEnd;
// if (args.splitEnd+iterator>numLines) args.splitEnd=numLines;
// else args.splitEnd+=iterator;
// printf("split end: %d",args.splitEnd);
// pthread_create(&th[p], NULL, &read, &args);
// }
printInorder(args.t);
freeTree(args.t);
exit(0);
}
And finally the document I have been using as a test case named readFile:
five five five one two
two three three four four
three six seven six six
four six five seven seven
seven seven seven seven six
five four six
I tried implementing the solution found here : passing struct to pthread as an argument
but did not know what thread_handles was, and could not get it to work though the problem faced in that link is similar. So am I also just having a problem with memory allocation or is it something completely different?
Related
Hello I am developing a program for the Raspberry in C (the in-progress project can be found here).
I noted there are some errors in the task1 function so I created an equivalent program in my Desktop (running Ubuntu) to find the error, where the task1 was readapted as below:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stub.h"
#include "globals.h"
#include "utils.h"
#include <pthread.h>
#define INPIN 25
void takePic(char picname[24]);
//This thread reads the PIR sensor output
void task1()
{
unsigned char val_read = 0;
static unsigned char alarm_on = FALSE;
static const unsigned int maxNumPics=10;
static char folderPath[] = "/home/usr/Documents/alarmSys_rasp/alarm_new/pics/";
char fileNameTodelete[73];
fileNameTodelete[0] = '\0';
strcat(fileNameTodelete, folderPath);
//INITIALIZING
pinMode(INPIN, INPUT);
//create folder where to save pics
createFolder(folderPath);
char* names[24];
char picname[24];
int res = 0;
picname[0] = '\0';
static unsigned int numPicsInFolder;
//delete if more than 10 files
while((numPicsInFolder = filesByName(names, folderPath))>maxNumPics)
{
fileNameTodelete[0] = '\0';
strcat(fileNameTodelete, folderPath);
strcat(fileNameTodelete, names[0]);
printf("%s\n", fileNameTodelete);
remove(fileNameTodelete);
}
static unsigned int nexEl;
nexEl = numPicsInFolder % maxNumPics;
printf("Entering while\n");
while(1)
{
//static const unsigned int del = 300;
val_read = digitalRead(INPIN);
if (val_read && (!alarm_on)) //motion detected
{
printf("\nDetected movement\n");
if (numPicsInFolder >= maxNumPics)
{
printf("\nMax num pics\n");
fileNameTodelete[0] = '\0';
strcat(fileNameTodelete, folderPath);
strcat(fileNameTodelete, names[nexEl]);
printFiles(names, numPicsInFolder);
printf("File to be deleted %d: %s, ", nexEl, names[nexEl]);
//printf("%s\n", fileNameTodelete);
if ((res = remove(fileNameTodelete))!=0)
{
printf("Error deleting file: %d\n", res);
}
}
else
{
printf("\nNot reached max num pics\n");
numPicsInFolder++;
}
//update buffer
takePic(picname);
printf("value returned by takePic: %s\n", picname);
//names[nexEl] = picname;
strcpy(names[nexEl], picname); //ERROR HERE
printFiles(names, numPicsInFolder);
printf("curr element %d: %s\n",nexEl, names[nexEl]);
nexEl++;
nexEl %= maxNumPics;
printf("\nDetected movement: alarm tripped\n\n");
alarm_on = TRUE;
/*Give some time before another pic*/
}
else if (alarm_on && !val_read)
{
alarm_on = FALSE;
printf("\nAlarm backed off\n\n");
}
}
}
void takePic(char picname[24])
{
/*Build string to take picture*/
int err;
//finalcmd is very long
char finalcmd[150];
finalcmd[0] = '\0';
getDateStr(picname);
char cmd1[] = "touch /home/usr/Documents/alarmSys_rasp/alarm_new/pics/";
char cmdlast[] = "";
strcat(finalcmd, cmd1);
strcat(picname, ".jpg");
strcat(finalcmd, picname);
strcat(finalcmd, cmdlast);
system(finalcmd);
if ((err=remove("/var/www/html/*.jpg"))!=0)
{
printf("Error deleting /var/www/html/*.jpg, maybe not existing\n" );
}
//system(finalcmd_ln);
//pthread_mutex_lock(&g_new_pic_m);
g_new_pic_flag = TRUE;
printf("\nPicture taken\n\n");
}
DESCRIPTION
The main function calls the task1 function defined in the file task1.c. The function creates a file in the folder ./pics/ every time the condition (val_read && (!alarm_on)) is verified (in the simulation this condition is satisfied every 2 loops). The function allows only 10 files in the folder. If there are already 10, it deletes the oldest one and creates the new file by calling the function takePic.
The name of files are stored in a array of strings char* names[24]; and the variable nexEl points to the element of this array having the name of the oldest file so that it is replaced with the name of the new file just created.
PROBLEM
The problem is the following: the array char* names[24] is correctly populated at the first iteration but already in the second iteration some elements are overwritten. The problem arises when the folder has the maximum number of files (10) maybe on the update of the array.
It seems the calls to printf overwrite some of its elements so that for example one of them contains the string "Error deleting /var/www/html/*.jpg, maybe not existing\n" printed inside the funtion takePic.
What am I missing or doing wrong with the management of arrays of strings?
UTILITIES FUNCTIONS
To be complete here are shortly described and reported other functions used in the program.
The function getDateStr builds a string representing the current date in the format yyyy_mm_dd_hh_mm_ss.
The function filesByName builds an array of strings where each string is the name of a file in the folder ./ ordered from the last file created to the newest.
The function printFiles prints the previous array.
void getDateStr(char str[20])
{
char year[5], common[3];
time_t t = time(NULL);
struct tm tm = *localtime(&t);
str[0]='\0';
sprintf(year, "%04d", tm.tm_year+1900);
strcat(str, year);
sprintf(common, "_%02d", tm.tm_mon + 1);
strcat(str, common);
sprintf(common, "_%02d", tm.tm_mday);
strcat(str, common);
sprintf(common, "_%02d", tm.tm_hour);
strcat(str, common);
sprintf(common, "_%02d", tm.tm_min);
strcat(str, common);
sprintf(common, "_%02d", tm.tm_sec);
strcat(str, common);
//printf("%s\n", str);
}
unsigned int countFiles(char* dir)
{
unsigned int file_count = 0;
DIR * dirp;
struct dirent * entry;
dirp = opendir(dir); /* There should be error handling after this */
while ((entry = readdir(dirp)) != NULL) {
if (entry->d_type == DT_REG) { /* If the entry is a regular file */
file_count++;
}
}
return file_count;
}
void printFiles(char* names[24], unsigned int file_count)
{
for (int i=0; i<file_count; i++)
{
printf("%s\n", names[i]);
}
}
unsigned int filesByName(char* names[24], char* dir)
{
unsigned int file_count = 0;
DIR * dirp;
struct dirent * entry;
dirp = opendir(dir); /* There should be error handling after this */
while ((entry = readdir(dirp)) != NULL) {
if (entry->d_type == DT_REG) { /* If the entry is a regular file */
//strncpy(names[file_count], entry->d_name,20);
//names[file_count] = malloc(24*sizeof(char));
names[file_count] = entry->d_name;
file_count++;
}
}
closedir(dirp);
char temp[24];
if (file_count>0)
{
for (int i=0; i<file_count-1; i++)
{
for (int j=i; j<file_count; j++)
{
if (strcmp(names[i], names[j])>0)
{
strncpy(temp, names[i],24);
strncpy(names[i], names[j],24);
strncpy(names[j], temp, 24);
}
}
}
}
return file_count;
}
For the simulation I created also the following function (digitalRead is actually a function of the wiringPi C library for the Raspberry):
int digitalRead(int INPIN)
{
static int res = 0;
res = !res;
return res;
}
In task1, you have char *names[24]. This is an array of char pointers.
In filesByName, you do
names[file_count] = entry->d_name;
but should be doing
names[file_count] = strdup(entry->d_name);
because you can't guarantee that d_name persists or is unique after the function returns or even within the loop. You were already close with the commented out malloc call.
Because you call filesByName [possibly] multiple times, it needs to check for names[file_count] being non-null so it can do a free on it [to free the old/stale value from a previous invocation] before doing the strdup to prevent a memory leak.
Likewise, in task1,
strcpy(names[nexEl], picname); //ERROR HERE
will have similar problems and should be replaced with:
if (names[nexEl] != NULL)
free(names[nexEl]);
names[nexEl] = strdup(picname);
There may be other places that need similar adjustments. And, note that in task1, names should be pre-inited will NULL
Another way to solve this is to change the definition of names [everywhere] from:
char *names[24];
to:
char names[24][256];
This avoids some of the malloc/free actions.
getDateStr() uses a char buffer that is always too small. Perhaps other problems exists too.
void getDateStr(char str[20]) {
char year[5], common[3];
....
sprintf(common, "_%02d", tm.tm_mon + 1); // BAD, common[] needs at least 4
Alternative with more error checking
char *getDateStr(char *str, size_t sz) {
if (str == NULL || sz < 1) {
return NULL;
}
str[0] = '\0';
time_t t = time(NULL);
struct tm *tm_ptr = localtime(&t);
if (tm_ptr == NULL) {
return NULL;
}
struct tm tm = *tm_ptr;
int cnt = snprintf(year, sz, "%04d_%02d_%02d_%02d_%02d_%02d",
tm.tm_year+1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec);
if (cnt < 0 || cnt >= sz) {
return NULL;
}
return str;
}
I have a word "all" in my trie and a word "alter" but "alt" is not a word in the trie. But when I check for "alt" it still returns true because is_word is true as "all" was a word. How am is supposed to work this error.
//Here's the code
typedef struct node{
bool is_word;
struct node *children[27];
} node;
unsigned int wsize=0;
node * root;
bool check(const char* word)
{
// TODO
node *chrawler=root;
for(int i=0;i<strlen(word)-1;i++)
{
int t;
if(word[i]>=65&&word[i]<=90)
{
t=word[i]-'A';
}
else if(isalpha(word[i]))
t=word[i]-'a';
else
t=26;
if(chrawler->children[t]==NULL)
return false;
else
chrawler=chrawler->children[t];
}
if(chrawler->is_word)
return true;
return false;
}
// Load function
bool load(const char* dictionary)
{
// TODO
FILE *inptr=fopen(dictionary,"r");
if(inptr==NULL)
{
return false;
}
node *new_node=malloc(sizeof(node));
root=new_node;
char * word=malloc((LENGTH+1)*sizeof(char));
int index=0;
for(int c=fgetc(inptr);c!=EOF;c=fgetc(inptr))
{
char ch=c;
if(ch=='\n')
{
word[index]='\0';
index=0;
node *chrawler=root;
for(int i=1;i<strlen(word);i++)
{
int t;
if(isalpha(word[i-1]))
t=word[i-1]-'a';
else
t=26;
if(chrawler->children[t]==NULL)
{
node *new_node=malloc(sizeof(node));
chrawler->children[t]=new_node;
chrawler=chrawler->children[t];
}
else
chrawler=chrawler->children[t];
}
chrawler->is_word=1;
wsize++;
}
else
{
word[index]=ch;
index++;
}
}
return true;
}
You need to ensure that all the pointers in a new node are null, as well as setting the is_word value to false. This is, perhaps, most easily done by using calloc() to allocate the space. Creating a function to allocate and error check the allocation of a node makes it easier. Similarly, you have two blocks of code mapping characters to trie indexes. You should use functions — even small ones — more generously.
The character-by-character input for a line of data is not really necessary, either; it would be better to use fgets() to read lines.
Adding these and sundry other changes (for example, local array word instead of dynamically allocated array — which wasn't freed; closing the file when finished; etc.) gives an MCVE (Minimal, Complete, Verifiable Example) like this:
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { LENGTH = 256 };
// Here's the code
typedef struct node
{
bool is_word;
struct node *children[27];
} node;
unsigned int wsize = 0;
node *root;
static inline int map_char(unsigned char c)
{
int t;
if (isalpha(c))
t = tolower(c) - 'a';
else
t = 26;
return t;
}
static inline node *alloc_node(void)
{
node *new_node = calloc(1, sizeof(node));
if (new_node == 0)
{
fprintf(stderr, "Memory allocation failed in %s\n", __func__);
exit(1);
}
return new_node;
}
static bool check(const char *word)
{
node *chrawler = root;
int len = strlen(word);
for (int i = 0; i < len; i++)
{
int t = map_char(word[i]);
if (chrawler->children[t] == NULL)
return false;
else
chrawler = chrawler->children[t];
}
return chrawler->is_word;
}
// Load function
static bool load(const char *dictionary)
{
FILE *inptr = fopen(dictionary, "r");
if (inptr == NULL)
{
fprintf(stderr, "Failed to open file '%s' for reading\n", dictionary);
return false;
}
root = alloc_node();
char word[LENGTH];
while (fgets(word, sizeof(word), inptr) != 0)
{
word[strcspn(word, "\n")] = '\0';
printf("[%s]\n", word);
node *chrawler = root;
int len = strlen(word);
for (int i = 0; i < len; i++)
{
int t = map_char(word[i]);
//printf("t = %d (%c)\n", t, word[i]);
if (chrawler->children[t] == NULL)
chrawler->children[t] = alloc_node();
chrawler = chrawler->children[t];
}
chrawler->is_word = 1;
wsize++;
}
printf("%d words read from %s\n", wsize, dictionary);
fclose(inptr);
return true;
}
int main(void)
{
const char *wordfile = "words.txt";
if (load(wordfile))
{
char line[4096];
while (fgets(line, sizeof(line), stdin) != 0)
{
line[strcspn(line, "\n")] = '\0';
if (check(line))
printf("[%s] is a word\n", line);
else
printf("[%s] is unknown\n", line);
}
}
return 0;
}
There are other changes that should be made. For example,
the wsize variable should be made non-global; it isn't really used outside the load() function. It's easily arguable that the root node should not be global either; the load() function should return the root node, and the check() function should be passed the root node. In general, global variables should be avoided when possible, and it is usually possible.
Given a file words.txt containing:
abelone
abyssinia
archimedes
brachiosaurus
triceratops
all
alter
asparagus
watchamacallit
a
abracadabra
abyss
ant
the output from a run of the program is:
[abelone]
[abyssinia]
[archimedes]
[brachiosaurus]
[triceratops]
[all]
[alter]
[asparagus]
[watchamacallit]
[a]
[abracadabra]
[abyss]
[ant]
13 words read from words.txt
a
[a] is a word
ab
[ab] is unknown
al
[al] is unknown
all
[all] is a word
alt
[alt] is unknown
alte
[alte] is unknown
alter
[alter] is a word
triceratops
[triceratops] is a word
brachiosaurus
[brachiosaurus] is a word
abys
[abys] is unknown
abbey
[abbey] is unknown
abyss
[abyss] is a word
ant
[ant] is a word
a
[a] is a word
archimedes
[archimedes] is a word
How can I read each individual character from a string that is accessed through an array of pointers? In the below code I currently have generated an array of pointers to strings called, symCodes, in my makeCodes function. I want to read the strings 8 characters at a time, I thought about concatenating each string together, then looping through that char by char but the strings in symCodes could be up to 255 characters each, so I feel like that could possibly be too much all to handle at once. Instead, I thought I could read each character from the strings, character by character.
I've tried scanf or just looping through and always end up with seg faults. At the end of headerEncode(), it's near the bottom. I malloc enough memory for each individual string, I try to loop through the array of pointers and print out each individual character but am ending up with a seg fault.
Any suggestions of a different way to read an array of pointers to strings, character by character, up to n amount of characters is appreciated.
EDIT 1: I've updated the program to no longer output warnings when using the -Wall and -W flags. I'm no longer getting a seg fault(yay!) but I'm still unsure of how to go about my question, how can I read an array of pointers to strings, character by character, up to n amount of characters?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
#define FAIL 0
#define SUCCESS 1
/* global 1 day arrays that hold chars and their freqs from file */
unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;
typedef struct HuffmanTreeNode* HTNode;
struct HuffmanTreeNode* globalSortedLL;
/*
struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
char symbol;
unsigned long freq;
char *code;
struct HuffmanTreeNode *left, *right;
struct HuffmanTreeNode* next;
};
/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
unsigned size;
};
/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
newNode->symbol = symbol;
newNode->freq = freq;
newNode->left = newNode->right = NULL;
return newNode;
}
/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/
struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{
struct HuffmanTreeNode* currentNode = node;
if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
{
htnNew->next = currentNode;
return htnNew;
}
else
{
while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
{
currentNode = currentNode->next;
}
htnNew->next = currentNode->next;
currentNode->next = htnNew;
return node;
}
}
int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
if(b->freq < a->freq)
{
return 0;
}
if(a->freq == b->freq)
{
if(a->symbol > b->symbol)
return 1;
return 0;
}
if(b->freq > a->freq)
return 1;
}
struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
struct HuffmanTreeNode* node = *head;
*head = (*head)->next;
return node;
}
/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */
/*
#function:
#param:
#return:
*/
int listLength(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* current = node;
int length = 0;
while(current != NULL)
{
length++;
current = current->next;
}
return length;
}
/*
#function:
#param:
#return:
*/
void printList(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* currentNode = node;
while(currentNode != NULL)
{
if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
printf("=%d", currentNode->symbol);
else
printf("%c", currentNode->symbol);
printf("%lu ", currentNode->freq);
currentNode = currentNode->next;
}
printf("\n");
}
/*
#function:
#param:
#return:
*/
void buildSortedList()
{
int i;
for(i = 0; i < 256; i++)
{
if(!globalFreqs[i] == 0)
{
globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
}
}
printf("Sorted freqs: ");
printList(globalSortedLL);
printf("listL: %d\n", listLength(globalSortedLL));
}
/*
#function: isLeaf()
will test to see if the current node is a leaf or not
#param:
#return
*/
int isLeaf(struct HuffmanTreeNode* node)
{
if((node->left == NULL) && (node->right == NULL))
return SUCCESS;
else
return FAIL;
}
/*where I plan to build the actual huffmantree */
/*
#function:
#param:
#return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
int top = 0;
struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
struct HuffmanTreeNode* head = node;
struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;
while(head->next != NULL)
{
/*grab first two items from linkedL, and remove two items*/
firstNode = popNode(&head);
secondNode = popNode(&head);
/*combine sums, use higher symbol, create new node*/
newChildNode = newNode(secondNode->symbol, (firstNode->freq + secondNode->freq));
newChildNode->left = firstNode;
newChildNode->right = secondNode;
/*insert new node, decrement total symbols in use */
head = insert(head, newChildNode);
}
return head;
}
void printTable(char *codesArray[])
{
int i;
printf("Symbol\tFreq\tCode\n");
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
if(i <= ' ' || i > '~')
{
printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
else
{
printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
}
}
printf("Total chars = %lu\n", totalCount);
}
void makeCodes(
struct HuffmanTreeNode *node, /* Pointer to some tree node */
char *code, /* The *current* code in progress */
char *symCodes[256], /* The array to hold the codes for all the symbols */
int depth) /* How deep in the tree we are (code length) */
{
char *copiedCode;
int i = 0;
if(isLeaf(node))
{
code[depth] = '\0';
symCodes[node->symbol] = code;
return;
}
copiedCode = malloc(255*sizeof(char));
memcpy(copiedCode, code, 255*sizeof(char));
code[depth] = '0';
copiedCode[depth] = '1';
makeCodes(node->left, code, symCodes, depth+1);
makeCodes(node->right, copiedCode, symCodes, depth+1);
}
/*
#function: getFileFreq()
gets the frequencies of each character in the given
file from the command line, this function will also
create two global 1d arrays, one for the currently
used characters in the file, and then one with those
characters frequencies, the two arrays will line up
parallel
#param: FILE* in, FILE* out,
the current file being processed
#return: void
*/
void getFileFreq(FILE* in, FILE* out)
{
unsigned long freqs[256] = {0};
int i, t, fileCh;
while((fileCh = fgetc(in)) != EOF)
{
freqs[fileCh]++;
totalCount++;
}
for(i = 0; i < 256; i++)
{
if(freqs[i] != 0)
{
globalUsedCh[i] = i;
globalFreqs[i] = freqs[i];
if(i <= ' ' || i > '~')
{
globalUniqueSymbols++;
}
else
{
globalUniqueSymbols++;
}
}
}
/* below code until total count is for debugging purposes */
printf("Used Ch: ");
for(t = 0; t < 256; t++)
{
if(globalUsedCh[t] != 0)
{
if(t <= ' ' || t > '~')
{
printf("%d ", globalUsedCh[t]);
}
else
printf("%c ", globalUsedCh[t]);
}
}
printf("\n");
printf("Freq Ch: ");
for(t = 0; t < 256; t++)
{
if(globalFreqs[t] != 0)
{
printf("%lu ", globalFreqs[t]);
}
}
printf("\n");
/* end of code for debugging/vizualazation of arrays*/
printf("Total Count %lu\n", totalCount);
printf("globalArrayLength: %d\n", globalUniqueSymbols);
}
void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
char c;
int i, ch, t, q, b, z;
char *a;
char *fileIn;
unsigned char *uniqueSymbols;
unsigned char *byteStream;
unsigned char *tooManySym = 0;
unsigned long totalEncodedSym;
*uniqueSymbols = globalUniqueSymbols;
totalEncodedSym = ftell(in);
rewind(in);
fileIn = malloc((totalEncodedSym+1)*sizeof(char));
fread(fileIn, totalEncodedSym, 1, in);
if(globalUniqueSymbols == 256)
{
fwrite(tooManySym, 1, sizeof(char), out);
}
else
{
fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
}
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
fwrite(globalUsedCh+i, 1, sizeof(char), out);
fwrite(globalFreqs+i, 8, sizeof(char), out);
}
}
for(t = 0; t < totalEncodedSym; t++)
{
fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
}
for(q = 0; q < totalEncodedSym; q++)
{
symCodes[q] = malloc(255*sizeof(char));
a = symCodes[q];
while(*a != '\0')
printf("%c\n", *(a++));
}
printf("Total encoded symbols: %lu\n", totalEncodedSym);
printf("%s\n", fileIn);
}
void encodeFile(FILE* in, FILE* out)
{
int top = 0;
int i;
char *code;
char *symCodes[256] = {0};
int depth = 0;
code = malloc(255*sizeof(char));
getFileFreq(in, out);
buildSortedList();
makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
printTable(symCodes);
headerEncode(in, out, symCodes);
free(code);
}
/*
void decodeFile(FILE* in, FILE* out)
{
}*/
There are many problems in your code:
[major] function compareTwoNodes does not always return a value. The compiler can detect such problems if instructed to output more warnings.
[major] the member symbol in the HuffmanTreeNode should have type int. Type char is problematic as an index value because it can be signed or unsigned depending on compiler configuration and platform specificities. You assume that char has values from 0 to 255, which is incorrect for most platforms where char actually has a range of -128 .. 127. Use unsigned char or int but cast the char values to unsigned char to ensure proper promotion.
[major] comparison if (globalUniqueSymbols == 256) is always false because globalUniqueSymbols is an unsigned char. The maximum number of possible byte values is indeed 256 for 8-bit bytes, but it does not fit in an unsigned char, make globalUniqueSymbols an int.
[major] *uniqueSymbols = globalUniqueSymbols; in function headerEncode stores globalUniqueSymbols into an uninitialized pointer, definitely undefined behavior, probable segmentation fault.
[major] sizeof(uniqueSymbols) is the size of a pointer, not the size of the array not the size of the type. Instead of hacking it as sizeof(uniqueSymbols)-7, fputc(globalUniqueSymbols, out);
[major] fwrite(tooManySym, 1, sizeof(char), out); is incorrect too, since tooManySym is initialized to 0, ie: it is a NULL pointer. You need a special value to tell that all bytes values are used in the source stream, use 0 for that and write it with fputc(0, out);.
You have nested C style comments before function insert, this is not a bug but error prone and considered bad style.
function newNode should take type unsigned long for freq for consistency.
function buildHuffmanTree has unused local variables: right, top and topNode.
variable i is unused in function makeCodes.
many unused variables in headerEncode: byteStream, c, ch, b...
totalEncodedSym is an unsigned long, use an index of the proper type in the loops where you stop at totalEncodedSym.
unused variables un encodeFile: i, top...
Most of these can be detected by the compiler with the proper warning level: gcc -Wall -W or clang -Weverything...
There are probably also errors in the program logic, but you cannot see these until you fix the major problems above.
I just found this code online, and do not understand how the input should be formatted. An example of similar input from the same programmer is shown here: Pushdown automaton implemented in C
But it still does not help that much. Here is what it says:
The input format is like:
e01:e0$:000111:a:ad:aeeb$:b0eb0:b10ce:c10ce:ce$de The input is
separated by a semicolon “:”, first section is “input alphabet”,
second is “stack alphabet”, then “input” and the last whole bunch are
transition functions.
Can anyone provide some guidance how the input is handled? I am trying really hard for about 6 hours now, and cannot for the life of me decipher how the input should be formatted for this code.
Once it is compiled with gcc, to run it just do "./executable" and press enter. Then paste in the sample input string as shown above (although for this program I would need a different input).
/* This C file implements a Turing Machine
* author: Kevin Zhou
* Computer Science and Electronics
* University of Bristol
* Date: 21st April 2010
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef struct tapes {
struct tapes *left;
struct tapes *right;
char content;
} Tape;
typedef enum { LEFT,RIGHT } Direction;
typedef struct transition {
char current_state;
char tape_symbol;
char new_state;
char new_tape_symbol;
Direction dir;
} Transition;
typedef struct list {
Transition *content;
struct list *next;
} List;
typedef struct tm {
char *input_alpha;
char *input;
char *tape_alpha;
char start;
char accept;
char reject;
List *transition;
} TM;
Tape *insert_tape(Tape *t, Direction dir, char c) {
Tape *head = t;
Tape *new1 = calloc(1,sizeof(Tape));;
new1 -> content = c;
if(dir == LEFT) {
while(t->left != NULL) {
t = t->left;
}
new1->right = t;
new1->left = NULL;
t->left = new1;
return new1;
}
if(dir == RIGHT) {
while(t->right != NULL) {
t = t->right;
}
new1->left = t;
new1->right = NULL;
t->right = new1;
}
return head;
}
Tape *create_tape(char *input) {
int i=1;
Tape *t = calloc(1,sizeof(Tape));
t->content = input[0];
while(1) {
if(input[i] == '\0') break;
t = insert_tape(t,RIGHT,input[i]);
i++;
}
return t;
}
/* turn the input string into Transition fields */
Transition *get_transition(char *s) {
Transition *t = calloc(1,sizeof(Transition));
Direction dir;
t->current_state = s[0];
t->tape_symbol = s[1];
t->new_state = s[2];
t->new_tape_symbol = s[3];
dir = (s[4]=='R')? RIGHT:LEFT;
t->dir = dir;
return t;
}
/* turn the string into transitions and add into list */
List *insert_list( List *l, char *elem ) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = get_transition(elem);
t->next = NULL;
l->next = t;
return head;
}
/* insert a transition into a list */
List *insert_list_transition( List *l, Transition *tr) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = tr;
t->next = NULL;
l->next = t;
return head;
}
void print_tape( Tape *t,char blank) {
char c;
while(1) {
if(t->content != blank) break;
t= t->right;
}
while(1) {
if(t==NULL) break;
c = t->content;
if(t->content != blank)
putchar(c);
t= t->right;
}
putchar('\n');
}
void print_transition (Transition *t) {
char s1[] = "Left";
char s2[] = "Right";
if(t==NULL) {
printf("NULL Transfer");
return;
}
printf("current:%c tape:%c new state:%c new tape:%c direction %s\n",t->current_state,t->tape_symbol,t->new_state,t->new_tape_symbol,(t->dir == LEFT)?s1:s2);
}
/*test if the char c is in the string s */
int contains ( char c, char *s ) {
int i=0;
while(1) {
if(c== s[i]) return 1;
if(s[i] == '\0') return 0;
i++;
}
}
/* test if the input is a valid input */
int is_valid_input( char *input_alpha, char *input ) {
int i=0;
char c;
while(1) {
c = input[i];
if(c == '\0') break;
if(!contains(c,input_alpha)) return 0;
i++;
}
return 1;
}
TM *createTM (char *input) {
TM *m = calloc(1,sizeof(TM));
List *tr = calloc(1,sizeof(List));
char *buffer;
/*read input alphabet of PDA*/
buffer = strtok(input,":");
if(buffer == NULL) {
printf("Error in reading input alphabet!\n");
exit(1);
}
m->input_alpha = buffer;
/*read tape alphabet*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading tape alphabet!\n");
exit(1);
}
m->tape_alpha = buffer;
/*read input sequence*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading input sequence!\n");
exit(1);
}
if(!is_valid_input(m->input_alpha,buffer)) {
printf("Error! Input contains some invalid characters that don't match the input alphabet!\n");
exit(1);
}
m->input = buffer;
buffer = strtok(NULL,":");
m->start = buffer[0];
buffer = strtok(NULL,":");
m->accept = buffer[0];
buffer = strtok(NULL,":");
m->reject = buffer[0];
/*read tape transition*/
while(1) {
buffer = strtok(NULL,":");
if(buffer == NULL) break;
tr = insert_list(tr,buffer);
}
m->transition = tr->next;
return m;
}
Transition *find_transition(List * list,char state, char tape_symbol) {
Transition *t;
while(1) {
if(list==NULL) return NULL;
t = list -> content;
if(t->current_state == state && t->tape_symbol == tape_symbol)
return t;
list = list->next;
}
}
Tape *move(Tape *t,Direction dir, char blank) {
if(dir == LEFT) {
if(t->left==NULL) {
t = insert_tape(t,LEFT,blank);
}
return t->left;
}
if(dir == RIGHT) {
if(t->right==NULL) {
t = insert_tape(t,RIGHT,blank);
}
return t->right;
}
return NULL;
}
void simulate( TM *m ) {
/* first symbol in input symbol used to represent the blank symbol */
const char blank = m->tape_alpha[0];
char current_state = m->start;
Tape *tape = create_tape(m->input);
Tape *current_tape = tape;
char current_tape_symbol;
Transition *current_transition;
while(1) {
if(current_state == m->accept) {
printf("Accept\n");
print_tape(tape,blank);
break;
}
if(current_state == m->reject) {
printf("Reject\n");
print_tape(tape,blank);
break;
}
current_tape_symbol = (current_tape==NULL||current_tape ->content == '\0')?blank:current_tape->content;
current_transition = find_transition(m->transition,current_state,current_tape_symbol);
current_state = current_transition -> new_state;
current_tape -> content = current_transition -> new_tape_symbol;
current_tape = move( current_tape, current_transition ->dir, blank);
}
}
int main(void) {
char s[300];
TM *p;
scanf("%s",s);
p = createTM(s);
simulate(p);
return 0;
}
The heavy use of the line buffer = strtok(NULL,":") confirms that the input strings are (like in the linked-to code) colon-delimited.
The struct defintions are the key to reverse-engineering the input.
The main struct is:
typedef struct tm {
char *input_alpha;
char *input;
char *tape_alpha;
char start;
char accept;
char reject;
List *transition;
} TM;
The function createTM() is the function which splits the input on : and loads the Turing machine. struct tm has 7 fields and createTM() has 7 clear phases
1) The first part is the input alphabet. Presumably this would be a string of 1 or more characters, e.g. 01.
2) The second part is the tape is the tape alphabet. The only character in this which plays any role in the rest of the code is the first character. The line const char blank = m->tape_alpha[0]; in the main simulation function indicates that the first character plays the role of the blank character -- the character which indicates that a tape square is empty. The ability to write the blank to a square allows the Turing machine to erase the data in a square. Note that in some sense this part of the input is out of order -- it is listed as the third field in the struct definition but is the second field in the input string.
3) The thirs part is the initial input on the tape. It is a string all of whose characters are drawn from the first part. The function is_valid_input() is used to check this condition.
4) The next part is the start state, which consists of a single char
5) The next part is the accept state, which is again a single char. Thus, in this model of a TM there is a single accepting state
6) The next part is the rejecting state, which is again represented by a single char
7) What follows is a sequence of strings, fed into a linked list of strings. The key function in understanding how it works is get_transition() which takes one of these transition strings and converts it into a Transition struct, declared as:
typedef struct transition {
char current_state;
char tape_symbol;
char new_state;
char new_tape_symbol;
Direction dir;
} Transition;
Looking carefully at the function get_transition() you can infer that a transition is represented by a string of length 5 where the last char is either R or L. An example would be something like a1b0R which says something like "if you are in state a while scanning symbol 0, transition to state b, write symbol 1 and the move to the right".
Putting it all together, the form of an input string would be something like:
01:_102:1001010101:$:a:r:$0b1R:b1b0L:a1b2R
corresponding to
01 _102 1001010101 $ a r $0b1R b1b0L a1b2R
input tape input start accept reject transitions
| alphabets | | states |
(blank = '_')
I just made some transitions at random, and neither know nor care what the program would do with this input. This should be enough for you to start experimenting with the program.
' I need to be able to create a alphabet tree. Then open an example .txt file with '.','-' and '/' '//'. '.' goes to the left of the tree or in this case the rist letters.'-'dash to the right.
http://www.skaut.ee/?jutt=10201 - what the tree looks like. '
#include <stdio.h>
#include<stdlib.h>
#include<string.h>
struct MorsePuu {
char t2ht;
struct MorsePuu *punkt, *kriips, *next;
};
static int i;
char TAHED[32]={' ','E','I','S','H','V','U','F','Ü','A','R','L','Ä','W','P','J','T','N','D','B','X','K','C','Y','M','G','Z','Q','O','Ö','™'};
//creating the "alphabet-tree"
struct MorsePuu *Ehitamine(int N) {
struct MorsePuu *uus;
int nl, nr;
if (N==0) {return NULL;}
else {
nl = N / 2;
nr = N-nl-1;
uus = malloc(sizeof *uus);
uus->t2ht = TAHED[i];
i++;
uus->punkt = NULL;
uus->kriips = NULL;
uus->punkt = Ehitamine(nl);
uus->kriips = Ehitamine(nr);
return uus;
}
}
//creating the order of the tree.
Preorder(struct MorsePuu *JViit) {
printf("%c",JViit->t2ht);
if (JViit->punkt != NULL) {
Preorder(JViit->punkt);}
// printf("%c",JViit->t2ht); Siin oleks Inorderi väljatrükk
if (JViit->kriips != NULL) {
Preorder(JViit->kriips);}
// printf("%c",JViit->t2ht); Ja siin oleks Postorderi väljatrükk
}
main(void) {
struct MorsePuu *morse, *abi;
char rida[128];
FILE *fm=NULL;
printf("Käigepealt tuleb morsepuu ?les ehitada!");
i = 0;
morse=Ehitamine(31);
printf("Puu väljatrükk preorder järjekorras.\n");
Preorder(morse);
printf("%c",morse);
//opening the file . Contents e.g .-/.// return ie. // stops it.
fm = fopen("morse1.txt", "r");
fgets(rida, 128, fm);
printf("\n %s", rida);
fclose(fm);
//this is where the reading and changing loop crashers.
/*
for(i=0; i<strlen(rida); i++){
if(rida[i]=='/'){
}
if(rida[i] == '.'){
//printf();
abi=abi->punkt;
}
if(rida[i]== '-'){
abi=abi->kriips;
}
}
*/
}
The problem starts from the last loop. The letter tree is created but i am not able to search the letter from the tree.
you didn't allocate any memory for *abi here
main(void) {
struct MorsePuu *morse, *abi;
char rida[128];
FILE *fm=NULL;
Abi is a pointer to struct MorsePuu, it points to a random location and when you use it later you cause undefined behaviour at line abi=abi->punkt.
You have to add a line
abi = malloc( sizeof( struct MorsePuu )) ;