Cannot understand input of a Turing Machine Implementation in C - c

I just found this code online, and do not understand how the input should be formatted. An example of similar input from the same programmer is shown here: Pushdown automaton implemented in C
But it still does not help that much. Here is what it says:
The input format is like:
e01:e0$:000111:a:ad:aeeb$:b0eb0:b10ce:c10ce:ce$de The input is
separated by a semicolon “:”, first section is “input alphabet”,
second is “stack alphabet”, then “input” and the last whole bunch are
transition functions.
Can anyone provide some guidance how the input is handled? I am trying really hard for about 6 hours now, and cannot for the life of me decipher how the input should be formatted for this code.
Once it is compiled with gcc, to run it just do "./executable" and press enter. Then paste in the sample input string as shown above (although for this program I would need a different input).
/* This C file implements a Turing Machine
* author: Kevin Zhou
* Computer Science and Electronics
* University of Bristol
* Date: 21st April 2010
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef struct tapes {
struct tapes *left;
struct tapes *right;
char content;
} Tape;
typedef enum { LEFT,RIGHT } Direction;
typedef struct transition {
char current_state;
char tape_symbol;
char new_state;
char new_tape_symbol;
Direction dir;
} Transition;
typedef struct list {
Transition *content;
struct list *next;
} List;
typedef struct tm {
char *input_alpha;
char *input;
char *tape_alpha;
char start;
char accept;
char reject;
List *transition;
} TM;
Tape *insert_tape(Tape *t, Direction dir, char c) {
Tape *head = t;
Tape *new1 = calloc(1,sizeof(Tape));;
new1 -> content = c;
if(dir == LEFT) {
while(t->left != NULL) {
t = t->left;
}
new1->right = t;
new1->left = NULL;
t->left = new1;
return new1;
}
if(dir == RIGHT) {
while(t->right != NULL) {
t = t->right;
}
new1->left = t;
new1->right = NULL;
t->right = new1;
}
return head;
}
Tape *create_tape(char *input) {
int i=1;
Tape *t = calloc(1,sizeof(Tape));
t->content = input[0];
while(1) {
if(input[i] == '\0') break;
t = insert_tape(t,RIGHT,input[i]);
i++;
}
return t;
}
/* turn the input string into Transition fields */
Transition *get_transition(char *s) {
Transition *t = calloc(1,sizeof(Transition));
Direction dir;
t->current_state = s[0];
t->tape_symbol = s[1];
t->new_state = s[2];
t->new_tape_symbol = s[3];
dir = (s[4]=='R')? RIGHT:LEFT;
t->dir = dir;
return t;
}
/* turn the string into transitions and add into list */
List *insert_list( List *l, char *elem ) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = get_transition(elem);
t->next = NULL;
l->next = t;
return head;
}
/* insert a transition into a list */
List *insert_list_transition( List *l, Transition *tr) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = tr;
t->next = NULL;
l->next = t;
return head;
}
void print_tape( Tape *t,char blank) {
char c;
while(1) {
if(t->content != blank) break;
t= t->right;
}
while(1) {
if(t==NULL) break;
c = t->content;
if(t->content != blank)
putchar(c);
t= t->right;
}
putchar('\n');
}
void print_transition (Transition *t) {
char s1[] = "Left";
char s2[] = "Right";
if(t==NULL) {
printf("NULL Transfer");
return;
}
printf("current:%c tape:%c new state:%c new tape:%c direction %s\n",t->current_state,t->tape_symbol,t->new_state,t->new_tape_symbol,(t->dir == LEFT)?s1:s2);
}
/*test if the char c is in the string s */
int contains ( char c, char *s ) {
int i=0;
while(1) {
if(c== s[i]) return 1;
if(s[i] == '\0') return 0;
i++;
}
}
/* test if the input is a valid input */
int is_valid_input( char *input_alpha, char *input ) {
int i=0;
char c;
while(1) {
c = input[i];
if(c == '\0') break;
if(!contains(c,input_alpha)) return 0;
i++;
}
return 1;
}
TM *createTM (char *input) {
TM *m = calloc(1,sizeof(TM));
List *tr = calloc(1,sizeof(List));
char *buffer;
/*read input alphabet of PDA*/
buffer = strtok(input,":");
if(buffer == NULL) {
printf("Error in reading input alphabet!\n");
exit(1);
}
m->input_alpha = buffer;
/*read tape alphabet*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading tape alphabet!\n");
exit(1);
}
m->tape_alpha = buffer;
/*read input sequence*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading input sequence!\n");
exit(1);
}
if(!is_valid_input(m->input_alpha,buffer)) {
printf("Error! Input contains some invalid characters that don't match the input alphabet!\n");
exit(1);
}
m->input = buffer;
buffer = strtok(NULL,":");
m->start = buffer[0];
buffer = strtok(NULL,":");
m->accept = buffer[0];
buffer = strtok(NULL,":");
m->reject = buffer[0];
/*read tape transition*/
while(1) {
buffer = strtok(NULL,":");
if(buffer == NULL) break;
tr = insert_list(tr,buffer);
}
m->transition = tr->next;
return m;
}
Transition *find_transition(List * list,char state, char tape_symbol) {
Transition *t;
while(1) {
if(list==NULL) return NULL;
t = list -> content;
if(t->current_state == state && t->tape_symbol == tape_symbol)
return t;
list = list->next;
}
}
Tape *move(Tape *t,Direction dir, char blank) {
if(dir == LEFT) {
if(t->left==NULL) {
t = insert_tape(t,LEFT,blank);
}
return t->left;
}
if(dir == RIGHT) {
if(t->right==NULL) {
t = insert_tape(t,RIGHT,blank);
}
return t->right;
}
return NULL;
}
void simulate( TM *m ) {
/* first symbol in input symbol used to represent the blank symbol */
const char blank = m->tape_alpha[0];
char current_state = m->start;
Tape *tape = create_tape(m->input);
Tape *current_tape = tape;
char current_tape_symbol;
Transition *current_transition;
while(1) {
if(current_state == m->accept) {
printf("Accept\n");
print_tape(tape,blank);
break;
}
if(current_state == m->reject) {
printf("Reject\n");
print_tape(tape,blank);
break;
}
current_tape_symbol = (current_tape==NULL||current_tape ->content == '\0')?blank:current_tape->content;
current_transition = find_transition(m->transition,current_state,current_tape_symbol);
current_state = current_transition -> new_state;
current_tape -> content = current_transition -> new_tape_symbol;
current_tape = move( current_tape, current_transition ->dir, blank);
}
}
int main(void) {
char s[300];
TM *p;
scanf("%s",s);
p = createTM(s);
simulate(p);
return 0;
}

The heavy use of the line buffer = strtok(NULL,":") confirms that the input strings are (like in the linked-to code) colon-delimited.
The struct defintions are the key to reverse-engineering the input.
The main struct is:
typedef struct tm {
char *input_alpha;
char *input;
char *tape_alpha;
char start;
char accept;
char reject;
List *transition;
} TM;
The function createTM() is the function which splits the input on : and loads the Turing machine. struct tm has 7 fields and createTM() has 7 clear phases
1) The first part is the input alphabet. Presumably this would be a string of 1 or more characters, e.g. 01.
2) The second part is the tape is the tape alphabet. The only character in this which plays any role in the rest of the code is the first character. The line const char blank = m->tape_alpha[0]; in the main simulation function indicates that the first character plays the role of the blank character -- the character which indicates that a tape square is empty. The ability to write the blank to a square allows the Turing machine to erase the data in a square. Note that in some sense this part of the input is out of order -- it is listed as the third field in the struct definition but is the second field in the input string.
3) The thirs part is the initial input on the tape. It is a string all of whose characters are drawn from the first part. The function is_valid_input() is used to check this condition.
4) The next part is the start state, which consists of a single char
5) The next part is the accept state, which is again a single char. Thus, in this model of a TM there is a single accepting state
6) The next part is the rejecting state, which is again represented by a single char
7) What follows is a sequence of strings, fed into a linked list of strings. The key function in understanding how it works is get_transition() which takes one of these transition strings and converts it into a Transition struct, declared as:
typedef struct transition {
char current_state;
char tape_symbol;
char new_state;
char new_tape_symbol;
Direction dir;
} Transition;
Looking carefully at the function get_transition() you can infer that a transition is represented by a string of length 5 where the last char is either R or L. An example would be something like a1b0R which says something like "if you are in state a while scanning symbol 0, transition to state b, write symbol 1 and the move to the right".
Putting it all together, the form of an input string would be something like:
01:_102:1001010101:$:a:r:$0b1R:b1b0L:a1b2R
corresponding to
01 _102 1001010101 $ a r $0b1R b1b0L a1b2R
input tape input start accept reject transitions
| alphabets | | states |
(blank = '_')
I just made some transitions at random, and neither know nor care what the program would do with this input. This should be enough for you to start experimenting with the program.

Related

How do I debug a Segmentation fault when I nothing is wrong before the fault or after?

The particular problem I have is that in my main function, I have added a print statement before and after I call the "bad" function. It always shows the before statement, but never the after statement. I also added a print statement to the end of the "bad" function, and I can see that it runs properly to the very last line of the "bad" function, so it should return normally. After the functions last print and before the main function print, I get the segfault. Any ideas? Here is the code:
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100) {
scanf("%[^\n]", myItem);
i++;
if (myItem == EOF) {
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
//printf("string read in from user typing: %s\n", myItem);
printf("i = %d\n", i);
emailFilter(myItem);
printf("done with email filter in main\n");
//printf("item from this pass is:%s\n\n", myItem);
}
return 0;
}
and the "bad" function:
void emailFilter(char* mySubject)
{
printf(" Just entered the emailFilter() .\n");
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
printf("Leaving emailfilter()...\n");
}
and running this code provides me:
$:
i = 1
Just entered the emailFilter() .
C, Meeting ,01/12/2019,15:30,NEB202
Leaving emailfilter()...
done with email filter in main
i = 2
Just entered the emailFilter() .
Leaving emailfilter()...
Segmentation fault
This shows that I always make it through the function, but still don't return properly.
Here is my entire code to reproduce the error.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct node {
char * event_data[5];
struct node * next;
};
struct node *head = NULL;
struct node *current = NULL;
char* earliest = NULL;
char* substring (char* orig, char* dest, int offset, int len)
{
int input_len = strlen (orig);
if (offset + len > input_len)
{
return NULL;
}
strncpy (dest, orig + offset, len);
//add null char \0 to end
char * term = "\0";
strncpy (dest + len, term, 1);
return dest;
}
char * firstItem(char* shortenedSubject)
{
int i = 0;
int currentLength = 0;
int currentCharIndex = 0;
int eventIndex = 0;
char * toReturn = (char*)malloc(100);
while ((shortenedSubject[currentLength] != '\0') && (shortenedSubject[currentLength] != ',') )//50 is my safety num to make sure it exits eventually
{
currentLength++;
}
if (shortenedSubject[currentLength] == ',') {
substring(shortenedSubject, toReturn, 0, currentLength);
}
return toReturn;
}
char parseSubject(char* subject,char * eventDataToReturn[5]) //returns "what type of command called, or none"
{
char toReturn;
char * shortenedSubject = (char*)malloc(100);
substring(subject,shortenedSubject,9,strlen(subject)-9);//put substring into tempString
int currentCharIndex = 0;// well feed into index of substring()
int eventIndex = 0; //lets us know which event to fill in
int currentLength = 0;//lets us know length of current event
int i = 0; //which char in temp string were alooking at
char * action = firstItem(shortenedSubject);
if (strlen(action) == 1)
{
if ( action[0] == 'C')
{
toReturn = 'C';
}
else if (action[0] == 'X')
{
toReturn = 'X';
}
else if (action[0] == 'D')
{
toReturn = 'D';
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
char* debug2;
while ((shortenedSubject[i] != '\0') && (i <= 50) )//50 is my safety num to make sure it exits eventually
{
char debugvar = shortenedSubject[i];
currentLength++;
if (shortenedSubject[i] == ',')
{
//eventDataToReturn[i] = substring2(shortenedSubject,currentCharIndex,currentLength);
substring(shortenedSubject,eventDataToReturn[eventIndex],currentCharIndex,currentLength-1);
debug2 = eventDataToReturn[eventIndex];
currentCharIndex= i +1;
eventIndex++;
currentLength = 0;
//i++;
}
i++;
}
substring(shortenedSubject,eventDataToReturn[4],currentCharIndex,currentLength);
return toReturn;
}
void printEventData(char* my_event_data[])
{
//printf("\nPrinting event data...\n");
for (int i = 1; i < 4; i++)
{
printf("%s,",my_event_data[i]);
}
//print last entry, no comma
printf("%s",my_event_data[4]);
}
void printEventsInorder()
{
struct node * ptr = head;
while (ptr != NULL)//if not empty, check each one and add when ready
{
printEventData(ptr->event_data);
printf("\n");
ptr = ptr->next;
}
}
void insertFront(char* my_event_data[5])
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 0; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
head = link;
}
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45,turn timeL into timeL1 and timeL2, and time R1 and timeR2
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,3,2);//extract first half of time
day_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,6,4);//extract first half of time
year_L = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,0,2);//extract first half of time
month_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,3,2);//extract first half of time
day_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,6,4);//extract first half of time
year_R = atoi(char_holder); //convert first half of time to int
int time_L1,time_L2,time_R1,time_R2;
substring(event_data_L[3],char_holder,0,2);//extract first half of time
time_L1 = atoi(char_holder); //convert first half of time to int
substring(event_data_L[3],char_holder,3,2);//extract second half of time
time_L2 = atoi(char_holder); //convert second half of time to int
substring(event_data_R[3],char_holder,0,2);
time_R1 = atoi(char_holder);
substring(event_data_R[3],char_holder,3,2);
time_R2 = atoi(char_holder);
//convert to 2 ints, first compare left ints, then right ints
if(year_L < year_R)
{
return 1;
}
else if ( year_L == year_R)
{
if (month_L < month_L)
{
return 1;
}
else if (month_L == month_L)
{
if (day_L < day_R)
{
return 1;
}
else if (day_L == day_R)
{
if (time_L1 < time_R1)
{
return 1;
}
else if (time_L1 == time_R1)
{
if (time_L2 < time_R2)
{
return 1;
}
else if (time_L2 == time_R2)
{
return 2;
}
else//else, time is greater
{
return 3;
}
}
else //left time is greater, return 3
{
return 3;
}
}
else
{
return 3;
}
}
else
{
return 3;
}
}
else //its left is greater than right so return 3 to show that
{
return 3;
}
}
void create(char* my_event_data[5]) {
//print required sentence
char * debugvar2 = my_event_data[3];
if (head == NULL)//if empty calendar, just add it
{
insertFront(my_event_data);
//printf("EARLIEST bc empty list, \n");
printf("C, ");
printEventData(my_event_data);
printf("\n");
return;
}
else
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 1; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
struct node *ptr = head;
struct node *prev = NULL;
if (ptr->next == NULL) //if this is the last node to check against
{
if (isEarlier(my_event_data, ptr->event_data) == 1)
{ //check against it
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL) //if this is first item in linked list...
{
link->next = head; //assign something before head
head = link; //move head to that thing
}
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
return;
}
else //else is equal to or later, so tack it on after:
{
ptr->next = link;
}
}
else
{
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (isEarlier(my_event_data,ptr->event_data) == 1)
{
if (head == ptr) //if earlier than head... insert and print
{
//printf("earlier than head!");
printf("C, ");
printEventData(my_event_data);
printf("\n");
link->next = ptr;
head = link;
}
else //if earlier than non head, insert, but dont print
{
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
}
return;
}
else
{
prev = ptr;
ptr = ptr->next;
}
}
if (isEarlier(my_event_data,ptr->event_data) == 1) //while ptr-> is null now
{
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr->next;
return;
}
else
{
prev = link;
link = ptr;
}
}
return;
}
//if it gets here, it is the latest meeting, tack it on the end
//prev->ptr = link;
}
void change(char* my_event_data[5]) {
//create a link
struct node *ptr = head;
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (*ptr->event_data[1] == *my_event_data[1])
{
for (int i = 1; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
ptr = ptr->next;
}
if (*ptr->event_data[1] == *my_event_data[1]) //check final node
{
for (int i = 0; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
printf("event to change not found");
return;
//if it gets here, nothing matched the title to change
}
void delete(char* my_event_data[5])
{
struct node *ptr = head;
struct node *prev = NULL;
while (ptr != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if ( strcmp( ptr->event_data[1], my_event_data[1] ) == 0) // if title matches, delete it
{
if (prev != NULL)
{
prev->next = ptr->next;
}
if (ptr == head)
{
head = ptr->next;
}
free(ptr);
printf("D, ");
printEventData(my_event_data);
printf("\n");
return;
}
prev = ptr;
ptr = ptr->next;
}
}
void emailFilter(char* mySubject)
{
if (strlen(mySubject) < 9)
{
return;
}
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
}
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100)
{
scanf("%[^\n]", myItem);
i++;
if ( myItem == EOF )
{
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
printf("i = %d\n", i);
emailFilter(myItem);
}
return 0;
}
Also please note that this error happens when I use a txt file as STDIN via the ">" symbol on the command line. Here is the file I use:
Subject: C,Meeting ,01/12/2019,15:30,NEB202
Subject: C,Meeting ,01/12/2019,16:30,NEB202
Subject: C,Meeting ,01/12/2019,11:30,NEB202
Having tried to find something to contribute, there's this:
The code is dealing with the date/time. Below is the declaration and use of a "destination buffer" into which is copied fragments of the string:
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45 // ....
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
//...
Notice that char_holder isn't pointing anywhere in particular. UB...
While it represents a beginner's approach, it is actually painful to see code like this. Below is a more concise version of isEarlier() (untested.)
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
memcpy( l + 0, ed_L[2][6],4 ); // YYYY
memcpy( l + 4, ed_L[2][0],2 ); // MM
memcpy( l + 6, ed_L[2][3],2 ); // DD
memcpy( l + 8, ed_L[3][0],2 ); // hh
memcpy( l + 10, ed_L[3][3],2 ); // mm
memcpy( r + 0, ed_R[2][6],4 ); // YYYY
memcpy( r + 4, ed_R[2][0],2 ); // MM
memcpy( r + 6, ed_R[2][3],2 ); // DD
memcpy( r + 8, ed_R[3][0],2 ); // hh
memcpy( r + 10, ed_R[3][3],2 ); // mm
int res = memcmp( l, r, 12 );
return res < 0 ? 1 : res == 0 ? 2 : 3;
}
Note: The sample data provided indicates 2 digits for both month and day, and is ambiguous as to "mm/dd" or "dd/mm" format. The offset values used here come from the OP code.
One way to reduce the possibility of bugs in code is to both write less but more capable code, if you can, and to perform "unit testing" on code that you write. Focus on one function at a time and do not use global variables. Another is to become as familiar as you can with the proven capabilities of functions in the standard library.
EDIT: Looking at this answer, it occurs to me that this function should, itself, be refactored:
void reformatDateTime( char *d, char *s[5] ) {
memcpy( d + 0, s[2][6],4 ); // YYYY
memcpy( d + 4, s[2][0],2 ); // MM
memcpy( d + 6, s[2][3],2 ); // DD
memcpy( d + 8, s[3][0],2 ); // hh
memcpy( d + 10, s[3][3],2 ); // mm
}
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
reformatDateTime( l, ed_L );
reformatDateTime( r, ed_R );
int res = memcmp( l, r, 12 );
res = res < 0 ? 1 : res == 0 ? 2 : 3;
printf( "isEarlier() '%.12s' vs '%.12s' result %d\n", l, r, res ); // debug
return res;
}
and I can see that it runs properly
You contradict yourself, you say that it sometimes or always seg faults. It's rather unlikely that some C code would crash at the point of leaving a function, since there's no "RAII" and in this case no multi-threading either. A stack corruption could have destroyed the function return address however.
The best way of debugging is not so much about focusing on the symptom, as it trying to pinpoint where something goes wrong. You've already done as much, so that's most of the debug effort already done.
One way of debugging from there is step #1: stare at the function for one minute. After around 5 seconds: event_holder[i] = ((char*)malloc(100 * sizeof(char*))); Well that's an obvious bug. After some 30 seconds more: wait, who cleans up this memory? The delete function perhaps but why is it then executed conditionally? (Turns out delete doesn't free() memory though.) The function leaks memory, another bug. Then after one full minute we realize that parseSubject does a whole lot of things and we'll need to dig through that one in detail if we want to weed out every possible chance of bugs. And it will take a lot more time to get to the bottom of that. But we already found 2 blatant bugs just by glancing at the code.
Fix the bugs, try again, is the problem gone?
At another glance there's a bug in main(), myItem == EOF is senseless and shouldn't compile. This suggests that you are compiling with way too lax warning levels or ignoring warnings, either is a very bad thing. What compiler options are recommended for beginners learning C?
We might note that extensive use of "magic numbers" make the code hard to read. It is also usually a sure sign of brittle code. Where do these 5 and 9 and so on come from? Use named constants. We will also fairly quickly note the lack of const correctness in something that's only supposed to parse, not change data. And so on.
I didn't read the code in detail, but the overall lack of following best practices and 3 bugs found just by brief glances suggests there's a whole lot more bugs in there.

Reading data from a file and save in list in C

I've just started a little Project to learn C and got a couple of problems.
Trying to read a file from stdin and and save strings to list with a specified key.
The structure of the files looks like this:
1sfa23sab:1najsf9,aksfafio2413,asfjla2345,asjfiao242
25kldg:asfkn36,akal,l,slgjii90345-234
I've already managed to read the file char by char like this:
while ((c = fgetc(stdin)) != EOF )
{
create_list(c);
}
and defined a special data type for my list:
typedef struct data_id
{
char id;
int marker;
int key;
void* ptr;
} data_id;
data_id *nodeList;
I want to save the characters until ":", "-", "," or "LF/CR" appears as one id entry in my list with a specific key. So every ID should also have a key. The part after a "-" is a marker which should also be saved in the list.
e.g. am entry should be like id=slgjii90345, key=10 and marker=234 ...
The create_list function I've written so far is:
void create_list(input){
nodeList = malloc(sizeof *nodeList * 10);
if (islower(input) || isdigit(input)){
if (m_flag == true && isdigit(input)){
nodeList[num_id].marker += (char)input;
printf("%d", nodeList[num_id].marker);
}
else{
nodeList[num_id].id += (char)input;
m_flag = false;
}
}
else if (input == '-')
{
m_flag = true;
printf("\ndash detected, marker follows: ");
}
else if (input == '\n' || input == ',' || input == ':')
{
// printf("\nnext entry follows:\n");
nodeList[num_id].marker = 0;
nodeList[num_id].key = num_id;
num_id++;
m_flag = false;
}
// printf("%d", nodeList[num_id].marker);
}
This answer is based on the original question post; several mistakes were pointed out in comments. Here's a usable variant of your code:
typedef struct data_id
{
char id[13]; // allow strings up to length 12
int key;
} data_id;
data_id *nodeList; // accessible and persistent outside create_list
int num_id;
void create_list(char input[])
{
int c = num_id++; // index of created node
nodeList = realloc(nodeList, sizeof *nodeList * num_id);
if (!nodeList) exit(1);
strcpy(nodeList[c].id, input);
nodeList[c].key = num_id;
printf("node_id: %s\n", input);
printf("key: %d\n", num_id);
}
To read the file with the given separators and call create_list for each id, you can use this:
for (char c[13]; scanf("%12[^-:,\n]%*c", c) != EOF; ) create_list(c);
Is there a way to to have a char id[] variable with dynamically growing length?
Of course there are many ways. The easiest is if you have a POSIX.1-2008 conforming scanf - then you can use the m modifier to let it allocate a buffer of sufficient size:
typedef struct data_id
{
char *id; // allows strings of any length
int key;
} data_id;
…
nodeList[c].id = input; // instead of strcpy(nodeList[c].id, input);
…
for (char *c; scanf("%m[^-:,\n]%*c", &c) != EOF; ) create_list(c);

Global array of struct pointers not storing data

I have a C program that is trying to represent the layout of a house. It reads in the rooms from a text file with the following format:
Room
Door
Door
*
Room
Door
Door
The rooms and doors are stored as structures, and I have a global array of pointers to store 10 rooms. I'm using the method readrooms() to read in the rooms from the text file and store them in the array. However, after reading it in, when I try to print the contents of the array, I get a string of random characters.
#include <stdio.h>
#define MAX 10
struct room * rooms[MAX];
int rp = 0; //room count
//Declare Structures
struct room {
char *name;
struct door *doors[4];
int dp; //door count
};
struct door {
char *name;
struct room *room;
};
//Declare Functions
char *readLine(FILE *fin);
readrooms(FILE *fin);
struct door *newDoor(char * name);
struct room *newRoom(char *name);
main(int argc, char const *argv[])
{
FILE *f = fopen("C:\\Users\\s\\Documents\\C\\explore\\rooms.txt", "r");
readrooms(f);
printf("\n----- READ FILE SUCCESSFULLY | Room Count: %d -----\n", rp);
for (int i = 0; i < rp; i++) {
if (rooms[i] != NULL) {
struct room r = *rooms[i];
printf("ROOM %d: %s\n", i, r.name);
}
}
return 0;
}
struct door *newDoor(char * name) {
struct door d;
//TODO: MAKE SURE THIS IS RIGHT
d.name = name;
d.room = NULL;
return &d;
}
struct room *newRoom(char *name) {
struct room r;
r.name = name;
r.dp = 0;
rooms[rp++] = &r;
return &r;
}
char *readLine(FILE *fin) {
char *str = (char *) malloc(sizeof(char) * 3);
char current = fgetc(fin);
int iter = 0;
while (1) {
if (current == '\n') {
str[iter] = '\0';
break;
}
else if (current == EOF) return NULL;
else {
str[iter++] = current;
current = fgetc(fin);
}
}
return str;
}
readrooms(FILE *fin) {
char *curr_room = readLine(fin);
while (curr_room != NULL) {
if (strcmp(curr_room, "*") == 0) {
curr_room = readLine(fin);
continue;
}
struct room r = *newRoom(curr_room);
printf("\n\nReading room %s\n", r.name);
curr_room = readLine(fin);
while (curr_room != NULL && strcmp(curr_room, "*") != 0) {
struct door d = *newDoor(curr_room);
d.room = &r;
r.doors[r.dp++] = &d;
printf("\t%s.doors[%d] = %s\n", r.name, r.dp-1, d.name);
curr_room = readLine(fin);
//printf("Current room is now %s\n\n", curr_room);
}
}
}
Here is the output:
Reading room Hall
Hall.doors[0] = Study
Hall.doors[1] = Cellar
Hall.doors[2] = Kitchen
Reading room Study
Study.doors[0] = Hall
Study.doors[1] = Garden
Reading room Cellar
Cellar.doors[0] = Hall
Reading room Kitchen
Kitchen.doors[0] = Hall
Kitchen.doors[1] = Garden
Reading room Garden
Garden.doors[0] = Study
Garden.doors[1] = Kitchen
----- READ FILE SUCCESSFULLY | Room Count: 5 -----
ROOM 0: ├ïuΣ uαΦ┤■  Y├jhxÖä
ROOM 1: É√o
ROOM 2: É√o
ROOM 3: É√o
ROOM 4: É√o
One problem.
struct room *newRoom(char *name) {
struct room r;
r.name = name;
r.dp = 0;
rooms[rp++] = &r;
return &r;
}
struct room r; is local variable and will be vanished once control exits newRoom function.
Instead what you can do is
struct room *r = malloc(sizeof(struct room));
r->name = name;
r->dp = 0;
rooms[rp++] = r;
In readLine allocate enough memory to read complete line, otherwise you end up accessing out of bound and invoking undefined behavior.
char *readLine(FILE *fin) {
char *str = (char *) malloc(sizeof(char) * 256);
^^^Max line length
...
}
If you don't want to allocate memory blindly realloc is the thing you are looking for.

Reading string from array of pointers

How can I read each individual character from a string that is accessed through an array of pointers? In the below code I currently have generated an array of pointers to strings called, symCodes, in my makeCodes function. I want to read the strings 8 characters at a time, I thought about concatenating each string together, then looping through that char by char but the strings in symCodes could be up to 255 characters each, so I feel like that could possibly be too much all to handle at once. Instead, I thought I could read each character from the strings, character by character.
I've tried scanf or just looping through and always end up with seg faults. At the end of headerEncode(), it's near the bottom. I malloc enough memory for each individual string, I try to loop through the array of pointers and print out each individual character but am ending up with a seg fault.
Any suggestions of a different way to read an array of pointers to strings, character by character, up to n amount of characters is appreciated.
EDIT 1: I've updated the program to no longer output warnings when using the -Wall and -W flags. I'm no longer getting a seg fault(yay!) but I'm still unsure of how to go about my question, how can I read an array of pointers to strings, character by character, up to n amount of characters?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
#define FAIL 0
#define SUCCESS 1
/* global 1 day arrays that hold chars and their freqs from file */
unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;
typedef struct HuffmanTreeNode* HTNode;
struct HuffmanTreeNode* globalSortedLL;
/*
struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
char symbol;
unsigned long freq;
char *code;
struct HuffmanTreeNode *left, *right;
struct HuffmanTreeNode* next;
};
/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
unsigned size;
};
/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
newNode->symbol = symbol;
newNode->freq = freq;
newNode->left = newNode->right = NULL;
return newNode;
}
/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/
struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{
struct HuffmanTreeNode* currentNode = node;
if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
{
htnNew->next = currentNode;
return htnNew;
}
else
{
while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
{
currentNode = currentNode->next;
}
htnNew->next = currentNode->next;
currentNode->next = htnNew;
return node;
}
}
int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
if(b->freq < a->freq)
{
return 0;
}
if(a->freq == b->freq)
{
if(a->symbol > b->symbol)
return 1;
return 0;
}
if(b->freq > a->freq)
return 1;
}
struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
struct HuffmanTreeNode* node = *head;
*head = (*head)->next;
return node;
}
/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */
/*
#function:
#param:
#return:
*/
int listLength(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* current = node;
int length = 0;
while(current != NULL)
{
length++;
current = current->next;
}
return length;
}
/*
#function:
#param:
#return:
*/
void printList(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* currentNode = node;
while(currentNode != NULL)
{
if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
printf("=%d", currentNode->symbol);
else
printf("%c", currentNode->symbol);
printf("%lu ", currentNode->freq);
currentNode = currentNode->next;
}
printf("\n");
}
/*
#function:
#param:
#return:
*/
void buildSortedList()
{
int i;
for(i = 0; i < 256; i++)
{
if(!globalFreqs[i] == 0)
{
globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
}
}
printf("Sorted freqs: ");
printList(globalSortedLL);
printf("listL: %d\n", listLength(globalSortedLL));
}
/*
#function: isLeaf()
will test to see if the current node is a leaf or not
#param:
#return
*/
int isLeaf(struct HuffmanTreeNode* node)
{
if((node->left == NULL) && (node->right == NULL))
return SUCCESS;
else
return FAIL;
}
/*where I plan to build the actual huffmantree */
/*
#function:
#param:
#return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
int top = 0;
struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
struct HuffmanTreeNode* head = node;
struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;
while(head->next != NULL)
{
/*grab first two items from linkedL, and remove two items*/
firstNode = popNode(&head);
secondNode = popNode(&head);
/*combine sums, use higher symbol, create new node*/
newChildNode = newNode(secondNode->symbol, (firstNode->freq + secondNode->freq));
newChildNode->left = firstNode;
newChildNode->right = secondNode;
/*insert new node, decrement total symbols in use */
head = insert(head, newChildNode);
}
return head;
}
void printTable(char *codesArray[])
{
int i;
printf("Symbol\tFreq\tCode\n");
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
if(i <= ' ' || i > '~')
{
printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
else
{
printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
}
}
printf("Total chars = %lu\n", totalCount);
}
void makeCodes(
struct HuffmanTreeNode *node, /* Pointer to some tree node */
char *code, /* The *current* code in progress */
char *symCodes[256], /* The array to hold the codes for all the symbols */
int depth) /* How deep in the tree we are (code length) */
{
char *copiedCode;
int i = 0;
if(isLeaf(node))
{
code[depth] = '\0';
symCodes[node->symbol] = code;
return;
}
copiedCode = malloc(255*sizeof(char));
memcpy(copiedCode, code, 255*sizeof(char));
code[depth] = '0';
copiedCode[depth] = '1';
makeCodes(node->left, code, symCodes, depth+1);
makeCodes(node->right, copiedCode, symCodes, depth+1);
}
/*
#function: getFileFreq()
gets the frequencies of each character in the given
file from the command line, this function will also
create two global 1d arrays, one for the currently
used characters in the file, and then one with those
characters frequencies, the two arrays will line up
parallel
#param: FILE* in, FILE* out,
the current file being processed
#return: void
*/
void getFileFreq(FILE* in, FILE* out)
{
unsigned long freqs[256] = {0};
int i, t, fileCh;
while((fileCh = fgetc(in)) != EOF)
{
freqs[fileCh]++;
totalCount++;
}
for(i = 0; i < 256; i++)
{
if(freqs[i] != 0)
{
globalUsedCh[i] = i;
globalFreqs[i] = freqs[i];
if(i <= ' ' || i > '~')
{
globalUniqueSymbols++;
}
else
{
globalUniqueSymbols++;
}
}
}
/* below code until total count is for debugging purposes */
printf("Used Ch: ");
for(t = 0; t < 256; t++)
{
if(globalUsedCh[t] != 0)
{
if(t <= ' ' || t > '~')
{
printf("%d ", globalUsedCh[t]);
}
else
printf("%c ", globalUsedCh[t]);
}
}
printf("\n");
printf("Freq Ch: ");
for(t = 0; t < 256; t++)
{
if(globalFreqs[t] != 0)
{
printf("%lu ", globalFreqs[t]);
}
}
printf("\n");
/* end of code for debugging/vizualazation of arrays*/
printf("Total Count %lu\n", totalCount);
printf("globalArrayLength: %d\n", globalUniqueSymbols);
}
void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
char c;
int i, ch, t, q, b, z;
char *a;
char *fileIn;
unsigned char *uniqueSymbols;
unsigned char *byteStream;
unsigned char *tooManySym = 0;
unsigned long totalEncodedSym;
*uniqueSymbols = globalUniqueSymbols;
totalEncodedSym = ftell(in);
rewind(in);
fileIn = malloc((totalEncodedSym+1)*sizeof(char));
fread(fileIn, totalEncodedSym, 1, in);
if(globalUniqueSymbols == 256)
{
fwrite(tooManySym, 1, sizeof(char), out);
}
else
{
fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
}
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
fwrite(globalUsedCh+i, 1, sizeof(char), out);
fwrite(globalFreqs+i, 8, sizeof(char), out);
}
}
for(t = 0; t < totalEncodedSym; t++)
{
fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
}
for(q = 0; q < totalEncodedSym; q++)
{
symCodes[q] = malloc(255*sizeof(char));
a = symCodes[q];
while(*a != '\0')
printf("%c\n", *(a++));
}
printf("Total encoded symbols: %lu\n", totalEncodedSym);
printf("%s\n", fileIn);
}
void encodeFile(FILE* in, FILE* out)
{
int top = 0;
int i;
char *code;
char *symCodes[256] = {0};
int depth = 0;
code = malloc(255*sizeof(char));
getFileFreq(in, out);
buildSortedList();
makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
printTable(symCodes);
headerEncode(in, out, symCodes);
free(code);
}
/*
void decodeFile(FILE* in, FILE* out)
{
}*/
There are many problems in your code:
[major] function compareTwoNodes does not always return a value. The compiler can detect such problems if instructed to output more warnings.
[major] the member symbol in the HuffmanTreeNode should have type int. Type char is problematic as an index value because it can be signed or unsigned depending on compiler configuration and platform specificities. You assume that char has values from 0 to 255, which is incorrect for most platforms where char actually has a range of -128 .. 127. Use unsigned char or int but cast the char values to unsigned char to ensure proper promotion.
[major] comparison if (globalUniqueSymbols == 256) is always false because globalUniqueSymbols is an unsigned char. The maximum number of possible byte values is indeed 256 for 8-bit bytes, but it does not fit in an unsigned char, make globalUniqueSymbols an int.
[major] *uniqueSymbols = globalUniqueSymbols; in function headerEncode stores globalUniqueSymbols into an uninitialized pointer, definitely undefined behavior, probable segmentation fault.
[major] sizeof(uniqueSymbols) is the size of a pointer, not the size of the array not the size of the type. Instead of hacking it as sizeof(uniqueSymbols)-7, fputc(globalUniqueSymbols, out);
[major] fwrite(tooManySym, 1, sizeof(char), out); is incorrect too, since tooManySym is initialized to 0, ie: it is a NULL pointer. You need a special value to tell that all bytes values are used in the source stream, use 0 for that and write it with fputc(0, out);.
You have nested C style comments before function insert, this is not a bug but error prone and considered bad style.
function newNode should take type unsigned long for freq for consistency.
function buildHuffmanTree has unused local variables: right, top and topNode.
variable i is unused in function makeCodes.
many unused variables in headerEncode: byteStream, c, ch, b...
totalEncodedSym is an unsigned long, use an index of the proper type in the loops where you stop at totalEncodedSym.
unused variables un encodeFile: i, top...
Most of these can be detected by the compiler with the proper warning level: gcc -Wall -W or clang -Weverything...
There are probably also errors in the program logic, but you cannot see these until you fix the major problems above.

Reading in a .txt file with morse code and finding letters with from a tree?

' I need to be able to create a alphabet tree. Then open an example .txt file with '.','-' and '/' '//'. '.' goes to the left of the tree or in this case the rist letters.'-'dash to the right.
http://www.skaut.ee/?jutt=10201 - what the tree looks like. '
#include <stdio.h>
#include<stdlib.h>
#include<string.h>
struct MorsePuu {
char t2ht;
struct MorsePuu *punkt, *kriips, *next;
};
static int i;
char TAHED[32]={' ','E','I','S','H','V','U','F','Ü','A','R','L','Ä','W','P','J','T','N','D','B','X','K','C','Y','M','G','Z','Q','O','Ö','™'};
//creating the "alphabet-tree"
struct MorsePuu *Ehitamine(int N) {
struct MorsePuu *uus;
int nl, nr;
if (N==0) {return NULL;}
else {
nl = N / 2;
nr = N-nl-1;
uus = malloc(sizeof *uus);
uus->t2ht = TAHED[i];
i++;
uus->punkt = NULL;
uus->kriips = NULL;
uus->punkt = Ehitamine(nl);
uus->kriips = Ehitamine(nr);
return uus;
}
}
//creating the order of the tree.
Preorder(struct MorsePuu *JViit) {
printf("%c",JViit->t2ht);
if (JViit->punkt != NULL) {
Preorder(JViit->punkt);}
// printf("%c",JViit->t2ht); Siin oleks Inorderi väljatrükk
if (JViit->kriips != NULL) {
Preorder(JViit->kriips);}
// printf("%c",JViit->t2ht); Ja siin oleks Postorderi väljatrükk
}
main(void) {
struct MorsePuu *morse, *abi;
char rida[128];
FILE *fm=NULL;
printf("Käigepealt tuleb morsepuu ?les ehitada!");
i = 0;
morse=Ehitamine(31);
printf("Puu väljatrükk preorder järjekorras.\n");
Preorder(morse);
printf("%c",morse);
//opening the file . Contents e.g .-/.// return ie. // stops it.
fm = fopen("morse1.txt", "r");
fgets(rida, 128, fm);
printf("\n %s", rida);
fclose(fm);
//this is where the reading and changing loop crashers.
/*
for(i=0; i<strlen(rida); i++){
if(rida[i]=='/'){
}
if(rida[i] == '.'){
//printf();
abi=abi->punkt;
}
if(rida[i]== '-'){
abi=abi->kriips;
}
}
*/
}
The problem starts from the last loop. The letter tree is created but i am not able to search the letter from the tree.
you didn't allocate any memory for *abi here
main(void) {
struct MorsePuu *morse, *abi;
char rida[128];
FILE *fm=NULL;
Abi is a pointer to struct MorsePuu, it points to a random location and when you use it later you cause undefined behaviour at line abi=abi->punkt.
You have to add a line
abi = malloc( sizeof( struct MorsePuu )) ;

Resources