I was reading about how to implement a DPDA and found this code in the following Internet address: http://code.zhoubot.com/, This c file implements a simple pushdown automata. The automata will read in a description of their transition function and input, perform its computation on the input, and then print their output.
The input format is like:
e01:e0$:000111:a:ad:aeeb$:b0eb0:b10ce:c10ce:ce$de
The input is separated by a semicolon “:”, first section is “input alphabet”, second is “stack alphabet”, then “input” and the last whole bunch are transition functions.
/* This C file implements a Deterministic Pushdown Automata
* author: Kevin Zhou
* Computer Science and Electronics
* University of Bristol
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef struct stack {
char content;
struct stack *next;
} Stack;
typedef struct transistion {
char current_state;
char input_symbol;
char pull;
char new_state;
char push;
} Transistion;
/* list of transistion functions */
typedef struct list {
Transistion *content;
struct list *next;
} List;
typedef struct pda {
char *input_alpha;
char *stack_alpha;
char *input;
char start;
char *accept;
List *transistion;
} PDA;
/* create a new empty stack */
Stack *create_stack( void ) {
Stack *s = calloc(1,sizeof(Stack));
if(s==NULL) {
printf("Out of Memory!");
exit(1);
}
return s;
}
/* test if the stack is empty */
int isempty( Stack *s ) {
return (s->next==NULL)? 1:0;
}
Stack *push_stack (Stack *s, char c) {
Stack *new = calloc(1,sizeof(Stack));
if(new ==NULL) {
printf("Out of Memory!");
exit(1);
}
new -> content = c;
new -> next = s;
return new;
}
Stack *pull_stack (Stack *s) {
Stack *head;
if(isempty(s)) {
return '\0';
}
head = s;
s = head -> next;
return s;
}
/*return the top elememt in the stack */
char top (Stack *s) {
return s->content;
}
/* replace a value 'ontop' which on top of the stack with a newvalue 'newvalue'
epsilon represents an empty element*/
Stack *replace(Stack *sta, char ontop, char newvalue, char epsilon) {
if(ontop == epsilon && newvalue == epsilon) return sta;
if(ontop == epsilon && newvalue != epsilon) {
sta = push_stack(sta,newvalue);
return sta;
}
if(ontop != epsilon && newvalue == epsilon) {
if(ontop != top(sta)) return NULL;
sta = pull_stack(sta);
return sta;
}
if(ontop != top(sta)) return NULL;
sta = pull_stack(sta);
sta = push_stack(sta,newvalue);
return sta;
}
/* turn the input string into transistion fields */
Transistion *get_transistion(char *s) {
Transistion *t = calloc(1,sizeof(Transistion));
t->current_state = s[0];
t->input_symbol = s[1];
t->pull = s[2];
t->new_state = s[3];
t->push = s[4];
return t;
}
/* turn the string into transitions and add into list */
List *insert_list( List *l, char *elem ) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = get_transistion(elem);
t->next = NULL;
l->next = t;
return head;
}
/* insert a transistion into a list */
List *insert_list_transistion( List *l, Transistion *tr) {
List *t = calloc(1,sizeof(List));
List *head = l;
while(l->next!=NULL)
l = l->next;
t->content = tr;
t->next = NULL;
l->next = t;
return head;
}
/*test if the char c is in the string s */
int contains ( char c, char *s ) {
int i=0;
while(1) {
if(c== s[i]) return 1;
if(s[i] == '\0') return 0;
i++;
}
}
/* test if the input is a valid input */
int is_valid_input( char *input_alpha, char *input ) {
int i=0;
char c;
while(1) {
c = input[i];
if(c == '\0') break;
if(!contains(c,input_alpha)) return 0;
i++;
}
return 1;
}
/* test if the input is a valid transistion */
int is_valid_transistion ( List *l, PDA *m) {
Transistion *t;
while(1) {
if(l==NULL) break;
t = l->content;
if(!contains(t->input_symbol,m->input_alpha)) return 0;
if(!contains(t->pull,m->stack_alpha)) return 0;
if(!contains(t->push,m->stack_alpha)) return 0;
l = l->next;
}
return 1;
}
/* create a pushdown automata */
PDA *createPDA (char *input) {
PDA *m = calloc(1,sizeof(PDA));
List *tr = calloc(1,sizeof(List));
char *buffer;
char *epsilon = calloc(1,sizeof(char));
/*read input alphabet of PDA*/
buffer = strtok(input,":");
if(buffer == NULL) {
printf("Error in reading input alphabet!\n");
exit(1);
}
m->input_alpha = buffer;
epsilon[0] = m->input_alpha[0];
/*read stack alphabet*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading stack alphabet!\n");
exit(1);
}
m->stack_alpha = buffer;
/*read input sequence*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Error in reading input sequence!\n");
exit(1);
}
if(!is_valid_input(m->input_alpha,buffer)) {
printf("Error! Input contains some invalid characters that don't match the input alphabet!\n");
exit(1);
}
m->input = buffer;
/*read start state*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Invalid string!\n");
exit(1);
}
m->start = buffer[0];
/*read accept state*/
buffer = strtok(NULL,":");
if(buffer == NULL) {
printf("Invalid string!\n");
exit(1);
}
m->accept = buffer;
/*read transistion function*/
while(1) {
buffer = strtok(NULL,":");
if(buffer == NULL) break;
tr = insert_list(tr,buffer);
}
if(!is_valid_transistion(tr->next,m)) {
printf("Error! Invalid transistion functions!\n");
exit(1);
}
m->transistion = tr->next;
return m;
}
/*print a stack */
void print_stack2(Stack *s) {
if(s==NULL) {
return;
}
print_stack2(s->next);
printf("%c",s->content);
}
void print_stack(Stack *s) {
print_stack2(s);
printf("\n");
}
/* find a proper transition function for the current state */
Transistion *find_transistion(List *list,char input,char current,char e) {
Transistion *t;
while(1) {
if(list==NULL) return NULL;
t = list -> content;
if(t->current_state == current && t->input_symbol == input)
return t;
if(t->current_state == current && t->input_symbol == e)
return t;
list = list->next;
}
}
int isAccept(char current, char* accept) {
int i=0;
while(1) {
if(accept[i]=='\0') return 0;
if(accept[i]==current) return 1;
i++;
}
}
/*simulate the Pushdown automata */
void simulate(PDA *m) {
/* first symbol in input symbol used to represent the usual */
const char epsilon = m->input_alpha[0];
char current_state = m->start;
char input;
int i=0;
Stack *sta = create_stack();
Transistion *current_transistion;
Stack *backup;
while(1) {
/*get input*/
input = m->input[i];
if(input == '\0'&&isAccept(current_state,m->accept)) {
printf("Accept\n");
print_stack(sta);
break;
}
/*get transistion function*/
current_transistion = find_transistion(m->transistion,input,current_state,epsilon);
if(current_transistion==NULL) {
printf("Reject\n");
print_stack(sta);
break;
}
current_state = current_transistion->new_state;
backup = sta;
sta = replace(sta, current_transistion->pull, current_transistion->push,epsilon);
if(sta == NULL) {
printf("Reject\n");
print_stack(backup);
break;
}
if(current_transistion->input_symbol != epsilon&¤t_transistion->input_symbol != '\0')
i++;
}
}
void print(PDA *m) {
printf("input alphabet:%s\n",m->input_alpha);
printf("stack alphabet:%s\n",m->stack_alpha);
printf("input sequence:%s\n",m->input);
printf("start state:%c\n",m->start);
printf("accept state:%s\n",m->accept);
}
int main(void) {
char s[300];
PDA *p;
scanf("%s",s);
p = createPDA(s);
simulate(p);
return 0;
}
When trying to compile, the compiler tells me the following error:
Line 41: "error: invalid conversion from 'void *' to 'Stack *'
how I can fix this error, since I'm trying to understand
code?
You are likely using a C++ compiler instead of a C one. In C it's not required (it's actually discouraged) to cast void *. In C++ it's mandatory.
Incidentally, this C FAQ answers your problem. This one explains why casting void * can be problematic.
Related
I am trying to insert strings in the binary search tree.
So what I am to trying is,
parsing strings from a file(contains instruction set) and then inserting in the function
insertOpcodeFromFile().
So this function will execute
(*node) = Node_insert(&node,instruction).
the node will be the root of binary tree which is located in main function.
So in simple way to explain, I want to manipulate(insert) the root pointer in the main function by using double pointer in the other function contain insert function.
I have a simple understanding about the pointer, but in this situation, I need to use more than double pointer I think.
please explain me about the double pointer clearly using this example.
Here is my code(I commenting out insert_node)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef BINARYTREE_H_
#define BINARYTREE_H_
typedef struct node *NodePtr;
typedef struct node {
char *word;
int count;
NodePtr left;
NodePtr right;
} Node;
NodePtr Node_alloc();
NodePtr Node_insert(NodePtr node_ptr, char *word);
void clearArray(char a[]);
void insertOpcodeFromFile(FILE *opcodeFile, NodePtr *node);
void Node_display(NodePtr);
char *char_copy(char *word);
#endif
int main(int argc, const char * argv[]) {
FILE * opFile;
FILE * progFile;
struct node *root = NULL;
if ( argc != 4) { // # of flag check
fprintf(stderr, " # of arguments must be 4.\n" );
exit(1);
}
opFile = fopen ( argv[1], "r");
if(opFile == NULL)
{
fprintf(stderr,"There is no name of the opcode file\n");
exit(1);
}
progFile = fopen ( argv[2], "r");
if(progFile == NULL)
{
fprintf(stderr,"There is no name of the program file \n");
exit(1);
}
insertOpcodeFromFile(opFile, &root);
//Node_display(root);
}/* main is over */
void insertOpcodeFromFile(FILE *opcodeFile, NodePtr *node)
{
int fsize = 0;
int lengthOfInst = 0;
int c;
int i;
char buffer[100];
fsize = getFileSize(opcodeFile);
enum flag {ins,opc,form};
int flag = ins;
char instruction[6];
unsigned int opcode = 0;
unsigned char format;
while (c != EOF)
{
c = fgetc(opcodeFile);
buffer[i++] = c;
if (c == 32){
switch (flag) {
case ins:
flag = opc;
memcpy(instruction,buffer,i);
instruction[i] = '\0';
clearArray(buffer);
i = 0;
// printf("인스트럭션 : %s\n",instruction );
break;
case opc:
flag = form;
opcode = atoi(buffer);
clearArray(buffer);
i = 0;
// printf("옵코드 : %d\n",opcode );
break;
default:
break;
}/* end of switch */
}/* end of if(space) */
if((c == 10) || (c == EOF))
{
if (flag == form)
{
format = buffer[0];
clearArray(buffer);
i = 0;
// printf("포멧: %c\n", format);
}
flag = ins;
//node = Node_insert(node,instruction);
}
}
//Node_display(node);
}
int getFileSize(FILE *opcodeFile)
{ int fsize = 0;
fseek(opcodeFile,0, SEEK_SET);
fseek(opcodeFile,0, SEEK_END);
fsize = (int)ftell(opcodeFile);
fseek(opcodeFile,0, SEEK_SET);
return fsize;
}
int countUntilSpace(FILE *opcodeFile, int currentPosition)
{ char readword[1];
char *space = " ";
char *nextLine = "/n";
int i = 0;
//printf("현재: %d\n",currentPosition );
while(1)
{
fread(readword, sizeof(char),1,opcodeFile);
i++;
if(strcmp(readword,space) == 0 || strcmp(readword,nextLine) == 0)
{
//printf("break\n");
break;
}
}
fseek(opcodeFile,currentPosition ,SEEK_SET);
//printf("끝난 현재 :%d\n",ftell(opcodeFile) );
//printf("%I : %d\n",i );
return i - 1;
}
void clearArray(char a[])
{
memset(&a[0], 0, 100);
}
NodePtr Node_alloc()
{
return (NodePtr) malloc(sizeof(NodePtr));
}
NodePtr Node_insert(NodePtr node_ptr, char *word)
{
int cond;
if (node_ptr == NULL) {
node_ptr = Node_alloc();
node_ptr->word = char_copy(word);
node_ptr->count = 1;
node_ptr->left = node_ptr->right = NULL;
} else if ((cond = strcmp(word, node_ptr->word)) == 0) {
node_ptr->count++;
} else if (cond < 0) {
node_ptr->left = Node_insert(node_ptr->left, word);
} else {
node_ptr->right = Node_insert(node_ptr->right, word);
}
return node_ptr;
}
void Node_display(NodePtr node_ptr)
{
if (node_ptr != NULL) {
Node_display(node_ptr->left);
printf("%04d: %s\n", node_ptr->count, node_ptr->word);
Node_display(node_ptr->right);
}
}
char *char_copy(char *word)
{
char *char_ptr;
char_ptr = (char *) malloc(strlen(word) + 1);
if (char_ptr != NULL) {
char_ptr = strdup(word);
}
return char_ptr;
}
In this case, in main(),
Node *root;
Why do you need to use a "double" pointer ( Node ** ) in functions that alter root is because root value as to be set in these functions.
For instance, say you want to allocate a Node and set it into root.
If you do the following
void alloc_root(Node *root) {
root = malloc(sizeof (Node));
// root is a function parameter and has nothing to do
// with the 'main' root
}
...
// then in main
alloc_root( root );
// here main's root is not set
Using a pointer to pointer (that you call "double pointer")
void alloc_root(Node **root) {
*root = malloc(sizeof (Node)); // note the *
}
...
// then in main
allow_root( &root );
// here main's root is set
The confusion comes probably from the Node *root in main, root being a pointer to a Node. How would you set an integer int i; in a function f? You would use f(&i) to call the function f(int *p) { *p = 31415; } to set i to the value 31415.
Consider root to be a variable that contains an address to a Node, and to set its value in a function you have to pass &root. root being a Node *, that makes another *, like func(Node **p).
I'm attempting to insert stdin strings into a binary search tree, and then output the inorder and postorder traversals. My problem is it seems like older nodes are being overwritten by the most recent, but I can't figure out why. So my inorder/postorder methods return the last inserted string how many times there are nodes in the tree.
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <string.h>
#include <ctype.h>
#include "bstsort.h"
/* Case Sensitive String Comparison */
int strcmpCase(char* string1, char* string2) {
while(*string1 && (*string1==*string2)) {
string1++;
string2++;
}
return *string1 - *string2;
}
/* Case Insensitive String Comparison */
int strcmpNoCase(char* string1, char* string2) {
int i;
char a[100];
char b[100];
strcpy(a, string1);
strcpy(b, string2);
for (i = 0; a[i]; ++i) {
a[i] = tolower(a[i]);
}
for (i = 0; b[i]; ++i) {
b[i] = tolower(b[i]);
}
return strcmpCase(a, b);
}
/* Inserts a Node into the Binary Search Tree */
void insert(struct Node **node, char *keyStr, int cflag) {
// Creates new node
if (*node == NULL) {
*node = (struct Node*) malloc (100);
strcpy((*node)->key, keyStr);
(*node)->left = NULL;
(*node)->right = NULL;
(*node)->count = 1;
return;
}
// Compares Strings
int compareResult;
if (cflag == 1){
// Case sensitive
compareResult = strcmpCase(keyStr, (*node)->key);
} else {
// Case insensitive
compareResult = strcmpNoCase(keyStr, (*node)->key);
}
// Moves down branches of BST to insert node in correct order
if (compareResult < 0) {
insert(&((*node)->left), keyStr, cflag);
} else if (compareResult > 0) {
insert(&((*node)->right), keyStr, cflag);
}
(*node)->count++;
return;
}
/* Outputs in-order traversal or BST */
void inorder(Node* root) {
if (root != NULL) { // if current node is not null
inorder(root->left); // travel down left child, recursively
printf("%s", root->key); // prints key of current node, the root
inorder(root->right); // travel down right child after root printed, recursively
}
}
void postorder(Node* root) {
printf("Postorder: \n");
if (root != NULL) { // if current node is not null
postorder(root->left); // travel down left child, recursively
postorder(root->right); // travel down right child, recursively
printf("%s", root->key); // prints key of current node, the root
}
}
int main(int argc, char **argv) {
extern char *optarg;
extern int optind;
int c, err = 0, i = 0, numRead, isfirst = 1;
int cflag = 0, oflag = 0;
char *inName = NULL; // Input filename
char *outName = NULL; // Output filename
static char usage[] = "Usage: bstsort [-c] [-o output_file_name] [input_file_name]\n";
FILE* inFile = NULL;
FILE* outFile = NULL;
char *line;
char tmp[100] = "";
struct Node *root = NULL;
while ((c = getopt(argc, argv, "co:")) != -1)
switch (c) {
case 'c':
cflag = 1;
break;
case 'o':
oflag = 1;
outName = optarg;
break;
case '?':
err = 1;
break;
}
if (err) {
fprintf(stderr, usage, argv[0]);
exit(1);
}
/* see what we have */
printf("cflag: %d\n", cflag);
printf("oflag: %d\n", oflag);
printf("Output Filename: \"%s\"\n", outName);
/* these are the arguments after the command-line options */
if (optind < argc) {
for (; optind < argc; optind++) {
inName = argv[optind];
printf("Input Filename: \"%s\"\n", inName);
}
} else {
printf("No input filename provided.\n");
}
/* Reads stdin one line at a time when input filename not provided*/
line = (char*) malloc (100);
if (inName == NULL) {
printf("\nEnter one line at a time:\n");
fflush(stdout);
fgets(line, 100, stdin);
insert(&root, line, cflag);
while (strcmp(line, "\n") != 0) {
fflush(stdout);
fgets(line, 100, stdin);
if (strcmp(line, "\n") != 0) {
insert(&root, line, cflag);
}
}
}
inorder(root);
postorder(root)
free(line);
free(root);
fclose(inFile);
exit(0);
}
Here's my Node struct
#ifndef BSTSORT_H
#define BSTSORT_H
/* Binary Search Tree Node Struct */
typedef struct Node {
char *key;
int count;
struct Node *left;
struct Node *right;
} Node;
void insert(Node **node, char *keyStr, int cflag);
void inorder(Node* root);
void postorder(Node* root);
void display_tree(Node* nd);
#endif
Any tips would be great.
I would like to read from a file, line by line. Each line has 3 arguments guaranteed. First 2 are first and last name and third is age.
I want to make a linked list, in which, each node represents a person (line) in the file.
I don't know the size of the names so I made it dynamic. I also don't know the number of lines in the file, so I would like that to be dynamic too.
My approach was to use fscanf, but then I wouldn't know how much memory needs to be allocated prior to reading it.
The function convertToList is supposed to receive a file path of the file we wanna read, convert it to a linked list, then return the head node. (Open to improvements)
Check out my code and see where I got stuck:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef enum
{
FALSE,
TRUE
}bool;
struct Node{
char firstName[50];
char lastName[50];
int age;
struct Node *next;
};
typedef struct {
struct Node *head;
}LinkedList;
struct Node * convertToList(char *inputFilePath);
int main(int argc, char* argv[]) {
if(argc != 4) {
printf("Invalid arguments.\n");
exit(0);
}
if (strlen(argv[3])!=1) {
printf("Invalid sorting type.\n");
exit(0);
}
char *inputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[1]) +1);
memcpy(inputFilePath, argv[1], strlen(argv[1]));
char *outputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[2]) +1);
memcpy(outputFilePath, argv[2], strlen(argv[2]) +1);
char *sortType = argv[3];
//LinkedList* inputList = (LinkedList*)malloc(sizeof(struct Node));
struct Node* head = malloc(sizeof(struct Node));
head = convertToList(inputFilePath);
printf("\n%s %s %d\n", head->firstName, head->lastName, head->age);
// printf("\nsaaap\n");
getchar();
}
struct Node * convertToList(char *inputFilePath) {
FILE* ifp;
ifp = fopen(inputFilePath, "r");
if (!ifp) { perror("fopen"); exit(0); }
struct Node *head = NULL;
struct Node *prev = NULL;
bool isHead = TRUE;
while(!feof(ifp)) {
struct Node *tmp = (struct Node*)malloc(sizeof(struct Node));
if (prev != NULL)
prev->next = tmp;
if (head==NULL)
head = tmp;
fscanf(ifp, "%s %s %d\n", tmp->firstName, tmp->lastName, &tmp->age);
prev = tmp;
//Need to link to next node as well
}
fclose(ifp);
return head;
}
I know that the fscanf is wrong, but I'm not sure how to fix it.
Also, how do I return the root? Is my approach gonna work?
And lastly, how do can I set the next node in the list? I don't see it happening with the current while loop.
Thanks.
If you need to link the nodes this is how you can do it and use dynamic storage, here you go, I didn't think this very much but it is Ok.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
struct Node
{
char *firstName;
char *lastName;
int age;
struct Node *next;
};
struct Node *convertToList(const char *const inputFilePath);
void freeList(struct Node *);
int main(int argc, char* argv[])
{
struct Node *head;
if (argc != 2)
{
printf("Invalid arguments.\n");
return 1;
}
head = convertToList(argv[1]);
if (head != NULL)
{
struct Node *current;
current = head;
while (current != NULL)
{
fprintf(stderr, "%s %s %d\n", current->firstName, current->lastName, current->age);
current = current->next;
}
/* do manupulations with the list, example above, print the values */
freeList(head);
}
return 0;
}
void freeList(struct Node *node)
{
struct Node *current;
current = node;
while (current != NULL)
{
struct Node *next;
next = current->next;
if (current->firstName != NULL)
free(current->firstName);
if (current->lastName != NULL)
free(current->lastName);
free(current);
current = next;
}
}
size_t appendChar(char **buffer, char character, size_t length)
{
char *temporary;
if (buffer == NULL)
return length;
temporary = realloc(*buffer, 1 + length);
if (temporary == NULL)
return length;
temporary[length] = character;
*buffer = temporary;
return 1 + length;
}
struct Node *parseFileLine(char *line)
{
char *word;
struct Node *node;
char *endptr;
if (line == NULL)
return NULL;
node = malloc(sizeof(struct Node));
if (node == NULL)
return NULL;
node->firstName = NULL;
node->lastName = NULL;
node->age = -1; // an invalid value;
node->next = NULL;
word = strtok(line, " ");
if (word == NULL)
return node;
node->firstName = strdup(word);
word = strtok(NULL, " ");
if (word == NULL)
return node;
node->lastName = strdup(word);
word = strtok(NULL, " ");
if (word == NULL)
return node;
node->age = strtol(word, &endptr, 10);
if (*endptr != '\0')
node->age = -1;
return node;
}
struct Node *getNode(FILE *file)
{
char *line;
int character;
size_t length;
line = NULL;
length = 0;
while ((character = fgetc(file)) != EOF)
{
if (((char)character == '\n') && (line != NULL))
{
struct Node *node;
length = appendChar(&line, '\0', length);
node = parseFileLine(line);
free(line);
return node;
}
length = appendChar(&line, (char)character, length);
}
if (line != NULL)
free(line);
return NULL;
}
struct Node *convertToList(const char *const inputFilePath)
{
FILE *ifp;
struct Node *head;
struct Node *current;
struct Node *last;
ifp = fopen(inputFilePath, "r");
if (ifp == NULL)
{
perror("fopen");
return NULL;
}
head = NULL;
last = NULL;
while ((current = getNode(ifp)) != NULL)
{
if (current == NULL)
return head;
if (head == NULL)
head = current;
if (last != NULL)
last->next = current;
last = current;
}
fclose(ifp);
return head;
}
Here you can also print the nodes to see that the data is correctly there.
I think you don't understand what malloc is for and you don't know much about pointers too, in your fscanf you are storing data in firstName and lastName without allocating memory for it, they are not even initialized so you would get a segmentation fault.
A somewhat different approach.
argv copying
First off, as mentioned, you do not need to copy argv values. Main reason for doing do is if you manipulate the values. There are also cases where one want to erase argv values as they can be read by ps and other tools, read from /proc/ etc. For example some programs take passwords as argument, to prevent password to be readable by anyone having access to the system one typically copy the argument then overwrite the argv value.
It is however usually good practice to use variables for the arguments. It usually makes the code clearer, but also makes it easier to maintain if one do changes. E.g. implement flag arguments like -f <filename>.
exit() and return from main()
You also exit() with zero on error. You would want to exit with zero on success, and other value on error or other. This is the norm. 0 == success. Some applications implement numeric exit codes that can mean different things. E.g. 0 is normal exit, 1 is not an error but some special case, 2 likewise 3 might be an error etc. For example grep:
EXIT STATUS
The exit status is 0 if selected lines are found, and 1 if not found. If an
error occurred the exit status is 2. (Note: POSIX error handling code should
check for '2' or greater.)
scanf
When you use scanf to read strings there are some tricks that can be used to make it better. First off always use the size parameter.
char name[16]
sscanf(buf, "%15s", name);
Do also check items read:
if (sscanf(buf, "%15s %d", name, &age) != 2)
... error ...
Third you can also save number of bytes read by %n:
sscanf(buf, "%n%15s%n %n%d%n", &of1, name, &of2, &age, &of3)
Usage
A very simple, but also quick and user-friendly thing, is to add a usage function.
Typically:
int usage(const char *self, const char *err_str)
{
fprintf(stderr,
"Usage: %s <in-file> <out-file> <sort-type>\n"
" Sort types:\n"
" f Sort by First Name\n"
" l Sort by Last Name\n"
" a Sort by Age\n"
,
self
);
if (err_str) {
fprintf(stderr,
"\nError: %s\n",
err_str
);
}
return ERR_ARG;
}
Then in main() you can quickly and clean add something like:
if (argc < 4)
return usage(argv[0], "Missing arguments.");
A note on you validation of the sort argument. Instead of using strlen() you can check if byte 2 is 0.
if (argv[3][1] != '\0')
... error ...
Finally main could be something like:
int main(int argc, char *argv[])
{
char *in_file, *out_file, sort;
struct Node *head = NULL;
int err = 0;
if (argc < 4)
return usage(argv[0], "Missing arguments.");
if (argc > 4)
return usage(argv[0], "Unknown arguments.");
if (argv[3][1] != '\0')
return usage(argv[0], "Invalid sorting type.");
in_file = argv[1];
out_file = argv[2];
sort = argv[3][0];
if (sort != 'f' && sort != 'l' && sort != 'a')
return usage(argv[0], "Invalid sorting type.");
if ((err = file_to_llist(in_file, &head)) != 0)
return err;
prnt_llist(stdout, head);
free_ll(head);
return err;
}
malloc helpers
When dealing with a lot of mallocing and similar it can be useful to add some helper functions. If you get a memory error you normally would exit right away.
void *alloc(size_t size)
{
void *buf;
if ((buf = malloc(size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
void *re_alloc(void *old, size_t size)
{
void *buf;
if ((buf = realloc(old, size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
Parsing of the file
As you want to have everything dynamically allocated and no limits (beyond system memory) one solution is to implement some sort of tokenizer. It can be helpful to use a struct to hold it together. Something like:
struct file_toker {
FILE *fh; /* File handle */
char *buf; /* Dynamic Read buffer */
size_t size; /* Size of buffer */
size_t len; /* Length of actual data in buffer. */
};
One point here is to keep length of tokens read. By this one do not need to keep using strlen etc.
If you can afford it it would usually be better to read whole file in one go, then parse the buffer. Optionally one can read file in chunks of say 4096*16 bytes, but then one get some complexity when it comes to overlapping lines between reads etc.
Anyhow in this example one byte is read at a time.
Start code
Finally a starting ground could be something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* memcpy/strncpy */
#include <errno.h> /* errno for fopen() */
#include <ctype.h> /* isspace() */
#define ERR_ARG 1
#define ERR_FILE_FMT 2
#define ERR_MEM 3
struct Node {
char *name_first;
char *name_last;
int age;
struct Node *next;
};
struct file_toker {
FILE *fh;
char *buf;
size_t size;
size_t len;
};
/* ===============----- GEN HELPERS ------=================== */
int usage(const char *self, const char *err_str)
{
fprintf(stderr,
"Usage: %s <in-file> <out-file> <sort-type>\n"
" Sort types:\n"
" f Sort by First Name\n"
" l Sort by Last Name\n"
" a Sort by Age\n"
,
self
);
if (err_str) {
fprintf(stderr,
"\nError: %s\n",
err_str
);
}
return ERR_ARG;
}
void *alloc(size_t size)
{
void *buf;
if ((buf = malloc(size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
void *re_alloc(void *old, size_t size)
{
void *buf;
if ((buf = realloc(old, size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
/* ===============----- LINKED LIST ------=================== */
void free_node(struct Node *n)
{
if (!n)
return;
if (n->name_first)
free(n->name_first);
if (n->name_last)
free(n->name_last);
free(n);
}
void free_ll(struct Node *n)
{
struct Node *p;
if (!n)
return;
for ( ; n ; ) {
p = n;
n = n->next;
free_node(p);
}
}
void prnt_llist(FILE *fd, struct Node *n)
{
int i = 0;
fprintf(fd, "NODELIST:\n");
for ( ; n != NULL ; n = n->next) {
fprintf(fd,
"Entry %d {\n"
" Name: %s, %s\n"
" Age : %d\n"
"}\n",
++i,
n->name_last,
n->name_first,
n->age
);
}
}
/* ================--------- FILE TOKER ------------==================== */
/* Free / close reader. */
void free_ft(struct file_toker *ft)
{
if (!ft)
return;
if (ft->fh)
fclose(ft->fh);
free(ft->buf);
ft->fh = NULL;
ft->buf = NULL;
}
/* Initiate reader. */
int ft_init(struct file_toker *ft, const char *fn, size_t buf_sz)
{
ft->size = buf_sz;
ft->len = 0;
ft->buf = alloc(ft->size);
ft->fh = fopen(fn, "r");
if (!ft->fh) {
perror("Unable to open file");
return errno;
}
return 0;
}
/* Increase buffer size. */
size_t ft_increase(struct file_toker *ft)
{
if (ft->size < 1)
ft->size = 1;
ft->size *= 2;
ft->buf = re_alloc(ft->buf, ft->size);
return ft->size;
}
/* Read and skip spaces (\n, \r, ' ', \t etc.). Return first non-space. */
char ft_skip_space(struct file_toker *ft)
{
int c;
while ((c = fgetc(ft->fh)) != EOF && isspace(c))
;
return c == EOF ? 0 : (char)c;
}
/* Read next token */
size_t file_tok(struct file_toker *ft)
{
size_t i = 1;
size_t max;
int c;
if (ft->size < 2)
ft_increase(ft);
ft->len = 0;
max = ft->size - 1;
/* Skip any leading spaces. Function return first non-space. */
if ((ft->buf[0] = ft_skip_space(ft)) == 0)
return 0;
while ((c = fgetc(ft->fh)) != EOF) {
/* If space, break. */
if (isspace(c))
break;
/* Save char to buffer. */
ft->buf[i++] = (char)c;
/* If entire buffer used, increase it's size. */
if (i > max)
max = ft_increase(ft) - 1;
}
/* Null terminate. */
ft->buf[i] = 0x00;
/* Length without terminating null */
ft->len = i;
return i;
}
/* Read next space separated token and save it as new allocated string. */
int file_tok_str(struct file_toker *ft, char **out)
{
if (file_tok(ft) == 0)
return 1;
*out = alloc(ft->len + 1);
memcpy(*out, ft->buf, ft->len + 1);
return 0;
}
/* Read next space separated token and scan it as int. */
int file_tok_int(struct file_toker *ft, int *out)
{
if (file_tok(ft) == 0)
return 1;
if ((sscanf(ft->buf, "%d", out)) != 1)
return 1;
return 0;
}
/* ===============----- FILE PARSER ------=================== */
int file_to_llist(const char *fn, struct Node **head)
{
struct Node *node = NULL, *cur = *head;
struct file_toker ft;
/* Initiate new file token reader, initial buffer size 4096 bytes. */
if (ft_init(&ft, fn, 4096))
return 1;
while (1) {
/* Allocate next node */
node = alloc(sizeof(struct Node));
node->name_first = NULL;
node->name_last = NULL;
/* Read and copy first name. */
if (file_tok_str(&ft, &node->name_first))
break;
/* Read and copy last name. */
if (file_tok_str(&ft, &node->name_last))
break;
/* Read and copy age. */
if (file_tok_int(&ft, &node->age))
break;
/* Link and save current for next iteration. */
node->next = NULL;
if (cur) {
cur->next = node;
}
cur = node;
if (*head == NULL)
*head = node;
}
/* Free last unused node. */
free_node(node);
free_ft(&ft);
return 0;
}
/* ===============----- MAIN ROUTINE ------=================== */
int main(int argc, char *argv[])
{
char *in_file, *out_file, sort;
struct Node *head = NULL;
int err = 0;
if (argc < 4)
return usage(argv[0], "Missing arguments.");
if (argc > 4)
return usage(argv[0], "Unknown arguments.");
if (argv[3][1] != '\0')
return usage(argv[0], "Invalid sorting type.");
in_file = argv[1];
out_file = argv[2];
sort = argv[3][0];
if (sort != 'f' && sort != 'l' && sort != 'a')
return usage(argv[0], "Invalid sorting type.");
if ((err = file_to_llist(in_file, &head)) != 0)
return err;
prnt_llist(stdout, head);
free_ll(head);
return err;
}
I have a problem with my code. I am getting a segmentation fault error, which I understand is a dangling pointer problem(generally) or a faulty allocation of memory. The compiler dose not show at what line the problem might be, so my question is how do I detect these problems for further concern? and where would my problem be in the code?
here is my code:
`#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define ALPHABET_SIZE (256)
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
#define LEVELS 255
// trie node
struct n
{
char value,level,isLeaf;
struct n* children[ALPHABET_SIZE];
struct n* failLink;
};
typedef struct n node;
//trie
struct t
{
node *root;
int count;
};
typedef struct t trie;
void bytesCpy(char *to, char *from, int len)
{
int i;
for(i=0;i<len;i++)
{
to[i]=from[i];
}
}
// Returns new trie node (initialized to NULLs)
node *getNode(trie *t, char value,char level)
{
node *pNode = NULL;
pNode = (node *)malloc(sizeof(node));
if (pNode)
{
printf("ok\n");
int i;
for (i = 0; i < ALPHABET_SIZE; i++)
{
pNode->children[i] = NULL;
}
pNode->failLink = t->root;
pNode->value=value;
pNode->level=level;
pNode->isLeaf=0;
}
else
printf("error\n");
return pNode;
}
// Initializes trie (root is dummy node)
void initialize(trie *t)
{
t->root = getNode(t, '[', 0);
//t->count = 0;
}
// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
void insert(trie *t, char key[], int len)
{
int level;
char value;
node *node = t->root;
for (level = 0; level<len; level++)
{
value = key[level];
printf("value: %c\n",value);
if (node->children[value] == NULL)
{
node->children[value] = getNode(t, value, level+1);
}
node = node->children[value];
}
node->isLeaf=1;
}
// Returns non zero, if key presents in trie
int search(trie *t, char key[])
{
int level;
int length = strlen(key);
int value;
node *node;
node = t->root;
for (level = 0; level < length; level++)
{
value = key[level];//CHAR_TO_INDEX(key[level]);
if (!node->children[value])
{
node = node->failLink;
return 0;
}
node = node->children[value];
}
return (0 != node);// && node->value);
}
void search1(trie *t, char *c, int len)
{
node *curNode = t->root;
int i;
for(i=0; i<=len; i++)
{
printf("i=%d curnode=%p\n",i,curNode);
if(curNode->isLeaf) //leaf: cuvant gasit
{
printf("if1 curGasit \n");
do{
curNode=curNode->failLink;
if(curNode->isLeaf)
printf("if1 curGasit \n");
else break;
}while(1);
continue;
}
else //nu e gasit inca
{
if(curNode->children[c[i]]==NULL) //fail
{
printf("if2\n");
curNode = curNode->failLink;
continue;
}
else //litera gasita: go on
{
printf("el2\n");
curNode=curNode->children[c[i]];
}
}
}
printf("end of search\n");
}
node* searchAux(trie *t, node *curRoot, char cuv[], char len, int level ,int failLevel)
{
char cuvAux[1024];
bytesCpy(cuvAux,cuv,len);
printf("searchAux level:%d cuvAux:%s curRootLevel:%d\n",level,cuvAux,curRoot->level);
if(cuvAux[level+1] == '\0') //got to the end of cuvAux
{
printf("1st if\n");
return curRoot;
}
if(curRoot->children[cuvAux[level+1]] == NULL) //fail: letter not found
{
printf("3rd if\n");
return searchAux(t, t->root, &cuvAux[failLevel+1], len, 0, failLevel+1);
}
else //letter found: go on
{
printf("3rd else\n");
if(cuvAux[level+2] == '\0') //the found letter was the last of the string
{
printf("4th if\n");
return curRoot->children[cuvAux[level+1]]; //return final pointer
}
else //the found letter was not the last of the string: continue with the next one
{
printf("4th else\n");
return searchAux(t, curRoot->children[cuvAux[level+1]], cuvAux, len, level+1, failLevel);
}
}
}
void createFailLinks(trie *t, node* curRoot, char cuv[], int level)
{
int i;
char cuvAux[1024];
bytesCpy(cuvAux,cuv,1024);
if(curRoot == NULL)
return;
for(i=0;i<ALPHABET_SIZE/*curRoot->children[i] != NULL*/;i++)
{
if(curRoot->children[i] == NULL)
continue;
else
{
cuvAux[level] = curRoot->children[i]->value;
printf("createFailLinks %c%d\n",cuvAux[level],curRoot->children[i]->level);
curRoot->children[i]->failLink = searchAux(t, t->root, cuvAux, level+1, 0, 0);
createFailLinks(t,curRoot->children[i],cuvAux,level+1);
}
}
printf("got\n");
}
void printTrie(node *curRoot)
{
int i;
if(curRoot == NULL)
return;
printf("%c: ", curRoot->value);
for(i=0;i<ALPHABET_SIZE;i++)
if(curRoot->children[i] != NULL)
{
printf("%c ", i);
}
printf("\n");
for(i=0;i<ALPHABET_SIZE;i++)
if(curRoot->children[i] != NULL)
{
printTrie(curRoot->children[i]);
}
}
void checkLinks(node* curRoot)
{
int i;
if(curRoot == NULL)
return;
printf("node %c%d: ",curRoot->value,curRoot->level);
for(i=0;i<256;i++)
if(curRoot->children[i] != NULL)
printf("\n\t%c%d:%c%d",curRoot->children[i]->value, curRoot->children[i]->level, curRoot->children[i]->failLink->value,curRoot->children[i]->failLink->level);
printf("\n");
for(i=0;i<256;i++)
if(curRoot->children[i] != NULL)
checkLinks(curRoot->children[i]);
}
int mai()
{
FILE *fd = fopen("VirusDatabase.txt","r");//O_RDONLY);
int i;
char c;
for(i=0;i<1000;i++)
{
fscanf(fd, "%c", &c);
printf("%c",c);
}
}
int main()
{
// Input keys (use only 'a' through 'z' and lower case)
char keys[][1024] = { "he", "she", "her", "his", "heres"};
char cuv[] = {'\0','\0','\0','\0','\0','\0'};
trie t;
char output[][32] = { "Not present in trie", "Present in trie" };
int i;
char text[]={"andreiherutshevlastashecristihiskatjaheres"};
initialize(&t);
// Construct trie
for (i = 0; i < ARRAY_SIZE(keys); i++)
{
insert(&t, keys[i], strlen(keys[i]));
}
createFailLinks(&t, t.root, cuv, 0);
printTrie(t.root);
printf("\n\n");
checkLinks(t.root);
search1(&t, text, strlen(text));
return 0;
// Search for different keys
printf("%s --- %s\n", "abcd", output[search(&t, "abcd")]);
printf("%s --- %s\n", "ab", output[search(&t, "ab")]);
printf("%s --- %s\n", "ccdd", output[search(&t, "ccdd")]);
printf("%s --- %s\n", "thaw", output[search(&t, "thaw")]);
return 0;
char a = getchar();
}`
Do you have access to a debugger? I ran your code in a debugger and get a memory access violation at line 157 here:
return searchAux(t, t->root, &cuvAux[failLevel+1], len, 0, failLevel+1);
You seem to be recursively calling searchAux. ie you have:
node* searchAux(trie *t, node *curRoot, char cuv[], char len, int level ,int failLevel)
{
char cuvAux[1024];
...
return searchAux(t, t->root, &cuvAux[failLevel+1], len, 0, failLevel+1);
...
Anyway, eventually the buffer size variable failLevel exceeds the size of your buffer so you are attempting to access memory outside the bounds of your array which is why you get an access violation.
The easiest way to debug is use an interactive debugger. On Windows there is a free version of Visual Studio with a very good debugger. On linux you can use GDB.
Failing that you can embed print statements to print out variables before the crash.
You can add print statements at lines of code.
#include <iostream>
std::cout << "At Line: " << __LINE__ << endl;
putting that at various lines of code, you can see what lines got executed, and find where it crashes.
This is for C++. My bad. Same idea, but put printf() statements and see where it stopped executing to narrow down the crash location.
I want to be able to use my getNextWord function to return a pointer to the next word in the file. I think I'm getting the seg fault while inserting but I just can't figure it out. Any help on this would be excellent. Also, I should probably find a better way of getting my hash_table_size than increasing a count for the total number of words in the file then rewinding. How can I make the size grow automatically?
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
int hash_table_size;
char* getNextWord(FILE* fd) {
char c;
char buffer[256];
int putChar = 0;
while((c = fgetc(fd)) != EOF) {
if(isalnum(c)) break;
}
if(c == EOF) return NULL;
buffer[putChar++] = c;
while((c = fgetc(fd)) != EOF) {
if(isspace(c) || putChar >= 256 -1) break;
if(isalnum(c))
buffer[putChar++] = c;
}
buffer[putChar] = '\0';
return strdup(buffer);
}
struct node {
struct node *next;
int count;
char* key;
};
struct list {
struct node *head;
int count;
};
struct list *hashTable = NULL;
/*
* djb2 hash function
*/
unsigned int hash(unsigned char *str) {
unsigned int hash = 5381;
int c;
while(c == *str++)
hash = ((hash << 5) + hash) + c;
return (hash % hash_table_size);
}
struct node* createNode(char *key) {
struct node *new_node;
new_node = (struct node *)malloc(sizeof(struct node));
strcpy(new_node->key, key);
new_node->next = NULL;
return new_node;
}
void hashInsert(char *str) {
int hash_dex = hash(str);
struct node *new_node = createNode(str);
if(!hashTable[hash_dex].head) {
hashTable[hash_dex].head = new_node;
hashTable[hash_dex].count = 1;
return;
}
new_node->next = (hashTable[hash_dex].head);
hashTable[hash_dex].head = new_node;
hashTable[hash_dex].count++;
return;
}
void display() {
struct node *current;
int i;
while(i < hash_table_size) {
if(hashTable[i].count == 0)
continue;
current = hashTable[i].head;
if(!current)
continue;
while(current != NULL) {
char tmp[256];
strcpy(tmp, current->key);
printf("%s", tmp);
current = current->next;
}
}
return;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: ./hashFile textfile\n");
}
else {
FILE *file = fopen(argv[1], "r");
if(file == 0) {
printf("Could not open file\n");
}
else {
char *new_word;
while((new_word = getNextWord(file)) != NULL) {
hash_table_size++;
}
rewind(file);
hashTable = (struct list *)calloc(hash_table_size, sizeof(struct list));
while((new_word = getNextWord(file)) != NULL) {
hashInsert(new_word);
}
display();
fclose(file);
}
}
return 0;
}
int c;
while(c == *str++)
hash = ((hash << 5) + hash) + c;
c is not initialized here. As is i in display function. Please enable all the compiler warnings and fix them.
Also:
char c;
char buffer[256];
int putChar = 0;
while((c = fgetc(fd)) != EOF) {
if(isalnum(c)) break;
}
c has to be of type int not char.
change
while(c == *str++)
to
while(c = *str++)