So i'm using a flex/bison parser but the variable names arent printing correctly. It understands the number values. I've tried messing with everything but I'm lost. heres a link to the output. its where it prints "Data: 0" that i'm trying to get the variable name [https://imgur.com/vJDpgpR][1]
invocation is: ./frontEnd data.txt
//main.c
#define BUF_SIZE 1024
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE* yyin;
extern yyparse();
int main(int argc, char* argv[]){
if(argc < 2){
FILE* fp = fopen("temp.txt", "a");
printf("Entering data: \n");
void *content = malloc(BUF_SIZE);
if (fp == 0)
printf("error opening file");
int read;
while ((read = fread(content, BUF_SIZE, 1, stdin))){
fwrite(content, read, 1, fp);
}
if (ferror(stdin))
printf("There was an error reading from stdin");
fclose(fp);
yyparse(fp);
}
if(argc == 2){
yyin = fopen(argv[2], "r");
if(!yyin)
{
perror(argv[2]);
printf("ERROR: file does not exist.\n");
return 0;
}
yyparse (yyin);
}
return 0;
}
void yyerror(char *s){
fprintf(stderr, "error: exiting %s \n", s);
}
//lex.l
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
%}
%option noyywrap
%option nounput yylineno
%%
"stop" return STOP;
"iter" return ITER;
"scanf" return SCANF;
"printf" return PRINTF;
"main" return MAIN;
"if" return IF;
"then" return THEN;
"let" return LET;
"func" return FUNC;
"//" return COMMENT; printf("\n");
"start" return START;
"=" return ASSIGN;
"=<" return LE;
"=>" return GE;
":" return COLON;
"+" return PLUS;
"-" return MINUS;
"*" return MULT;
"/" return DIV;
"%" return MOD;
"." return DOT;
"(" return RPAREN;
")" return LPAREN;
"," return COMMA;
"{" return RBRACE;
"}" return LBRACE;
";" return SEMICOLON;
"[" return LBRACK;
"]" return RBRACK;
"==" return EQUAL;
[A-Z][a-z]* { printf("SYNTAX ERROR: Identifiers must start with lower case. "); }
[a-zA-Z][_a-zA-Z0-9]* {
printf("string: %s \n", yytext);
yylval.iVal = strdup(yytext);
yylval.iVal = addSymbol(yytext);
return ID;
}
[0-9]+ {
yylval.iVal = atoi(yytext);
printf("num: %s \n", yytext);
return NUMBER; }
[ _\t\r\s\n] ;
^"#".+$ return COMMENT;
. {printf("ERROR: Invalid Character "); yyterminate();}
<<EOF>> { printf("EOF: line %d\n", yylineno); yyterminate(); }
%%
// stores all variable id is in an array
SYMTABNODEPTR newSymTabNode()
{
return ((SYMTABNODEPTR)malloc(sizeof(SYMTABNODE)));
}
int addSymbol(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
i = lookup(s);
if(i >= 0){
return(i);
}
else if(curSymSize >= SYMBOLTABLESIZE)
{
return (NOTHING);
}
else{
symtable[curSymSize] = newSymTabNode();
strncpy(symtable[curSymSize]->id,s,IDLENGTH);
symtable[curSymSize]->id[IDLENGTH-1] = '\0';
return(curSymSize++);
}
}
int lookup(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
for(i=0;i<curSymSize;i++)
{
if(strncmp(s,symtable[i]->id,IDLENGTH) == 0){
return (i);
}
}
return(-1);
}
// parser.y
%{
#define YYERROR_VERBOSE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern char *yytext;
extern int yylex();
extern void yyerror(char *);
extern int yyparse();
extern FILE *yyin;
/* ------------- some constants --------------------------------------------- */
#define SYMBOLTABLESIZE 50
#define IDLENGTH 15
#define NOTHING -1
#define INDENTOFFSET 2
#ifdef DEBUG
char *NodeName[] =
{
"PROGRAM", "BLOCK", "VARS", "EXPR", "N", "A", "R", "STATS", "MSTAT", "STAT",
"IN", "OUT", "IF_STAT", "LOOP", "ASSIGN", "RO", "IDVAL", "NUMVAL"
};
#endif
enum ParseTreeNodeType
{
PROGRAM, BLOCK, VARS, EXPR, N, A, R, STATS, MSTAT, STAT,
IN, OUT,IF_STAT, LOOP, ASSIGN, RO, IDVAL, NUMVAL
};
#define TYPE_CHARACTER "char"
#define TYPE_INTEGER "int"
#define TYPE_REAL "double"
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef NULL
#define NULL 0
#endif
// definitions for parse tree
struct treeNode {
int item;
int nodeID;
struct treeNode *first;
struct treeNode *second;
};
typedef struct treeNode TREE_NODE;
typedef TREE_NODE *TREE;
TREE makeNode(int, int, TREE, TREE);
#ifdef DEBUG
void printTree(TREE, int);
#endif
// symbol table definitions.
struct symbolTableNode{
char id[IDLENGTH];
};
typedef struct symbolTableNode SYMTABNODE;
typedef SYMTABNODE *SYMTABNODEPTR;
SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
int curSymSize = 0;
%}
%start program
%union {
char *sVal;
int iVal;
TREE tVal;
}
// list of all tokens
%token SEMICOLON GE LE EQUAL COLON RBRACK LBRACK ASSIGNS LPAREN RPAREN COMMENT
%token DOT MOD PLUS MINUS DIV MULT RBRACE LBRACE START MAIN STOP LET COMMA
%token SCANF PRINTF IF ITER THEN FUNC
%left MULT DIV MOD ADD SUB
// tokens defined with values and rule names
%token<iVal> NUMBER ID
//%token<sVal> ID
%type<tVal> program type block vars expr N A R stats mStat stat in out if_stat loop assign RO
%%
program : START vars MAIN block STOP
{
TREE tree;
tree = makeNode(NOTHING, PROGRAM, $2,$4);
#ifdef DEBUG
printTree(tree, 0);
#endif
}
;
block : RBRACE vars stats LBRACE
{
$$ = makeNode(NOTHING, BLOCK, $2, $3);
}
;
vars : /*empty*/
{
$$ = makeNode(NOTHING, VARS,NULL,NULL);
}
| LET ID COLON NUMBER vars
{
$$ = makeNode($2, VARS, $5,NULL);
printf("id: %d", $2);
}
;
//variable:
// type ID{$$ = newNode($2,VARIABLE,$1,NULL,NULL);};
//type:
// INT {$$ = newNode(INT,TYPE,NULL,NULL,NULL);}
// | BOOL {$$ = newNode(BOOL,TYPE,NULL,NULL,NULL);}
// | CHAR {$$ = newNode(CHAR,TYPE,NULL,NULL,NULL);}
// | STRING{$$ = newNode(STRING,TYPE,NULL,NULL,NULL);};
expr : N DIV expr
{
$$ = makeNode(DIV, EXPR, $1, $3);
}
| N MULT expr
{
$$ = makeNode(MULT, EXPR, $1, $3);
}
| N
{
$$ = makeNode(NOTHING, EXPR, $1,NULL);
}
;
N : A PLUS N
{
$$ = makeNode(PLUS, N, $1, $3);
}
| A MINUS N
{
$$ = makeNode(MINUS, N, $1, $3);
}
| A
{
$$ = makeNode(NOTHING, N, $1,NULL);
}
;
A : MOD A
{
$$ = makeNode(NOTHING, A, $2,NULL);
}
| R
{
$$ = makeNode(NOTHING, A, $1,NULL);
}
;
R : LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, R, $2,NULL);
}
| ID
{
$$ = makeNode($1, IDVAL, NULL,NULL);
}
| NUMBER
{
$$ = makeNode($1, NUMVAL, NULL,NULL);
}
;
stats : stat mStat
{
$$ = makeNode(NOTHING, STATS, $1, $2);
}
;
mStat : /* empty */
{
$$ = makeNode(NOTHING, MSTAT, NULL,NULL);
}
| stat mStat
{
$$ = makeNode(NOTHING, MSTAT, $1, $2);
}
;
stat: in DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| out DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| block
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| if_stat DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| loop DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| assign DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
;
in : SCANF LBRACK ID RBRACK
{
$$ = makeNode($3, IN,NULL,NULL);
}
;
out : PRINTF LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, OUT,$3,NULL);
}
;
if_stat : IF LBRACK expr RO expr RBRACK THEN block
{
$$ = makeNode(NOTHING, IF_STAT, $4, $8);
}
;
loop : ITER LBRACK expr RO expr RBRACK block
{
$$ = makeNode(NOTHING, LOOP, $4, $7);
}
;
assign : ID ASSIGNS expr
{
$$ = makeNode($1, ASSIGN, $3,NULL);
}
;
RO : LE
{
$$ = makeNode(LE, RO, NULL,NULL);
}
| GE
{
$$ = makeNode(GE, RO, NULL,NULL);
}
| EQUAL
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
| COLON COLON
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
;
%%
// node generator
TREE makeNode(int iVal, int nodeID, TREE p1, TREE p2)
{
TREE t;
t = (TREE)malloc(sizeof(TREE_NODE));
t->item = iVal;
t->nodeID = nodeID;
t->first = p1;
t->second = p2;
//printf("NODE CREATED");
return(t);
}
// prints the tree with indentation for depth
void printTree(TREE tree, int depth){
int i;
if(tree == NULL) return;
for(i=depth;i;i--)
printf(" ");
if(tree->nodeID == NUMBER)
printf("INT: %d ",tree->item);
else if(tree->nodeID == IDVAL){
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE )
printf("id: %s ",symtable[tree->item]->id);
else
printf("unknown id: %d ", tree->item);
}
if(tree->item != NOTHING){
printf("Data: %d ",tree->item);
}
// If out of range of the table
if (tree->nodeID < 0 || tree->nodeID > sizeof(NodeName))
printf("Unknown ID: %d\n",tree->nodeID);
else
printf("%s\n",NodeName[tree->nodeID]);
printTree(tree->first,depth+2);
printTree(tree->second,depth+2);
}
#include "lex.yy.c"
// heres the makefile I use for compilation
frontEnd: lex.yy.c parser.tab.c
gcc parser.tab.c main.c -o frontEnd -lfl -DDEBUG
parser.tab.c parser.tab.h: parser.y
bison -d parser.y
lex.yy.c: lex.l
flex lex.l
clean:
rm lex.yy.c y.tab.c frontEnd
'''
// data.txt
start
let x : 13
main {
scanf [ x ] .
printf [ 34 ] .
} stop[enter image description here][2]
[1]: https://i.stack.imgur.com/xlNnh.png
[2]: https://i.stack.imgur.com/HKRtX.png
I think this has a lot more to do with your AST and symbol table functions than with your parser, and practically nothing to do with bison itself.
For example, your function to print trees won't attempt to print an identifier's name if its symbol table index is 0.
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE)
But the first symbol entered in the table will have index 0. (Perhaps you fixed this between pasting your code and generating the results. You should always check that the code you paste in a question corresponds precisely to the output which you show. But this isn't the only bug in your code; it's just an example.)
As another example, the immediate problem which causes Data: 0 to be printed instead of the symbol name is that your tree printer only prints symbol names for AST nodes of type IDVAL, but you create an AST IN node whose data field contains the variable's symbol table index. So either you need to fix your tree printer so it knows about IN nodes, or you need to change the IN node so that it has a child which is the IDVAL node. (That's probably the best solution in the long run.)
It's always a temptation to blame bison (or whatever unfamiliar tool you're using at the moment) for bugs, instead of considering the possibility that you've introduced bugs in your own support code. To avoid falling into this trap, it's always a good idea to test your library functions separately before using them in a more complicated project. For example, you could write a small test driver that builds a fixed AST tree, prints it, and deletes it. Once that works, and only when that works, you can check to see if your parser can build and print the same tree by parsing an input.
You will find that some simple good software design practices will make this whole process much smoother:
Organise your code into separate component files, each with its own header file. Document the library interfaces (and, if necessary, data structures) using comments in the header file. Briefly describe what each function's purpose is. If you can't find a brief description, it nay be that the function is trying to do too many different things.
In your parser, the functions and declarations needed to build and use ASTs are scattered between different parts of your lexer and parser files. This makes them much harder to read, debug, maintain and even use.
No matter what your teacher might tell you, if you find it necessary to #include the generated lexical scanner directly into the parser, then you probably have not found a good way to organise your support functions. You should always aim to make it possible to separately compile the parser and the scanner.
For data structures like your AST node, which use different member variables in different ways depending on an enumerated node type -- which is a model you'll find in other C projects as well, but is particularly common in parsers -- document the precise use of each field for every enumeration value. And make sure that every time you change the way you use the data or add new enumeration values, you fix the documentation accordingly.
This documentation will make it much easier to verify that your AST is being built correctly. As an additional benefit, you (or others using your code) will have an accurate description of how to interpret the contents of AST nodes, which makes it much easier to write code which analyses the tree.
In short, the way to write, debug and maintain any non-trivial project is not by "messing around" but by being systematic and modular. While it might seem like all of this takes precious time, particularly the documentation, it will almost always save you a lot of time in the long run.
Related
I have a problem about segmentation fault 11.
Every time, when I want to add action rules in function grammar blocks, I must get the segmentation fault 11.
Therefore, I cannot get the dump.out, which is a file that record the identifiers for me.
I do not think the problem is because of the scanner file, but that is still possible.
Of course, the problem should have something about symbol table, but it is really strange.
The problem just occurs like:
function: FN ID '(' ')' {if ($2->st_type == UNDEF) $2->st_type = FUNCTION_TYPE};
When I add action in the block, segmentation fault 11 will occur.
However, this is okay.
function: FN ID '(' ')' {};
The parser file do not contains all contents since it is so many.
I use mac os
I hope someone can help me.
Thank you anyway
Where the error occur
1: // Hello World Example
<fn>
<id: main>
<(>
<)>
<{>
2: fn main() {
3: // Print text to the console
<let>
<mut>
<id: a>
<:>
<int>
<=>
<integer: 10>
<;>
4: let mut a:int = 10;
<let>
<mut>
<id: b>
<=>
<string: 1199>
<;>
5: let mut b = "1199";
<let>
<mut>
<id: sum>
<[>
<str>
<,>
<integer: 10>
<]>
<;>
6: let mut sum[str, 10];
<id: sum>
<[>
<integer: 0>
<]>
<=>
<string: 100>
<;>
7: sum[0] = "100";
<id: b>
<=>
<string: 123>
<+>
<id: b>
<;>
8: b = "123" + b;
<println>
<(>
<string: Hello World>
<)>
<;>
9: println ("Hello World");
<}>
10: }
Symbol table:
a
b
sum
main
Segmentation fault: 11
The input file
// Hello World Example
fn main() {
// Print text to the console
let mut a:int = 10;
let mut b = "1199";
let mut sum[str, 10];
sum[0] = "100";
b = "123" + b;
println ("Hello World");
}
This is my symbol table header file.
#include <stdio.h>
/* maximum size of hash table */
#define SIZE 211
/* maximum size of tokens-identifiers */
#define MAXTOKENLEN 40
/* token types */
#define UNDEF 0
#define INT_TYPE 1
#define REAL_TYPE 2
#define STR_TYPE 3
#define LOGIC_TYPE 4
#define ARRAY_TYPE 5
#define FUNCTION_TYPE 6
/* new type for parser */
#define CONST_INT_TYPE 7
#define CONST_REAL_TYPE 8
#define CONST_STR_TYPE 9
#define CONST_LOGIC_TYPE 10
/* how parameter is passed */
#define BY_VALUE 1
#define BY_REFER 2
/*
* Originally here, now it is in the symbols.c
* current scope
* int cur_scope = 0;
*/
/* parameter struct */
typedef struct Parameter{
int par_type;
char *param_name;
// to store value
int ival; double fval; char *st_sval; int bval; // boolean type
int passing; // value or reference
struct Parameter *next; // link to next one
}Param;
/* a linked list of references (lineno's) for each variable */
typedef struct Ref{
int lineno;
struct Ref *next;
int type;
}RefList;
// struct that represents a list node
typedef struct list{
char st_name[MAXTOKENLEN];
int st_size;
int scope;
RefList *lines;
// to store value and sometimes more information
int st_ival; double st_fval; char *st_sval; int st_bval;
// type
int st_type;
int inf_type; // for arrays (info type) and functions (return type)
// array stuff
int *i_vals; double *f_vals; char **s_vals; int *b_vals; // boolean type
int array_size;
// function parameters
Param *parameters;
int num_of_pars; // Meanwhile, it record the current position of the parameters
// pointer to next item in the list
struct list *next;
}list_t;
/* the hash table */
static list_t **hash_table;
// Function Declarations
void create(); // initialize hash table
unsigned int hash(char *key); // hash function for insert
void insert(char *name, int len, int type, int lineno); // insert entry
list_t *lookup(char *name); // search for entry
list_t *lookup_scope(char *name, int scope); // search for entry in scope
void hide_scope(); // hide the current scope
void incr_scope(); // go to next scope
void dump(FILE *of); // dump file
This is symbol table code file.
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* current scope */
int cur_scope = 0;
void create()
{
int i;
hash_table = malloc(SIZE * sizeof(list_t*));
for(i = 0; i < SIZE; i++) hash_table[i] = NULL;
}
unsigned int hash(char *key)
{
unsigned int hashval = 0;
for(;*key!='\0';key++) hashval += *key;
hashval += key[0] % 11 + (key[0] << 3) - key[0];
return hashval % SIZE;
}
void insert(char *name, int len, int type, int lineno)
{
unsigned int hashval = hash(name); // hash function used
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
/* variable not yet in table */
if (l == NULL){
l = (list_t*) malloc(sizeof(list_t));
strncpy(l->st_name, name, len);
/* add to hashtable */
l->st_type = type;
l->scope = cur_scope;
l->lines = (RefList*) malloc(sizeof(RefList));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hash_table[hashval];
hash_table[hashval] = l;
// printf("Inserted %s for the first time with linenumber %d!\n", name, lineno); // error checking
}
/* found in table, so just add line number */
else{
l->scope = cur_scope;
RefList *t = l->lines;
while (t->next != NULL) t = t->next;
/* add linenumber to reference list */
t->next = (RefList*) malloc(sizeof(RefList));
t->next->lineno = lineno;
t->next->next = NULL;
// printf("Found %s again at line %d!\n", name, lineno);
}
}
list_t *lookup(char *name)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
return l; // NULL is not found
}
list_t *lookup_scope(char *name, int scope)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0) && (scope != l->scope)) l = l->next;
return l; // NULL is not found
}
void hide_scope()
{ /* hide the current scope */
if(cur_scope > 0) cur_scope--;
}
void incr_scope()
{ /* go to next scope */
cur_scope++;
}
/* print to stdout by default */
void dump(FILE * of)
{
int i; int count; // record whether first line prints or not.
fprintf(of,"------------ ----------------- -------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ----------------- -------------\n");
for (i=0; i < SIZE; ++i){
if (hash_table[i] != NULL){
list_t *l = hash_table[i];
while (l != NULL){
RefList *t = l->lines;
fprintf(of,"%-12s ",l->st_name);
printf("%s\n", l->st_name); // print out all the names in the symbol table
if (l->st_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->st_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->st_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->st_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else if (l->st_type == CONST_INT_TYPE) fprintf(of, "%-7s", "const_int"); // constant_int_type
else if (l->st_type == CONST_REAL_TYPE) fprintf(of, "%-7s", "const_real"); // constant_real_type
else if (l->st_type == CONST_STR_TYPE) fprintf(of, "%-7s", "const_string"); // constant_string_type
else if (l->st_type == CONST_LOGIC_TYPE) fprintf(of, "%-7s", "const_bool"); // const_logic_type
else if (l->st_type == ARRAY_TYPE){
fprintf(of,"array of ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else fprintf(of,"%-7s","undef");
}
else if (l->st_type == FUNCTION_TYPE){
fprintf(of,"%-7s %s","function returns ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"-7%s","bool");
else fprintf(of,"%-7s","undef");
}
else fprintf(of,"%-7s","undef"); // if UNDEF or 0
count = 0;
while (t != NULL){
if (count == 0)
{
if (l->st_type == INT_TYPE || l->st_type == REAL_TYPE || l->st_type == STR_TYPE || l->st_type == UNDEF)
fprintf(of,"%13d ", t->lineno);
else if (l->st_type == CONST_INT_TYPE || l->st_type == CONST_REAL_TYPE || l->st_type == CONST_STR_TYPE || l->st_type == CONST_LOGIC_TYPE)
fprintf(of,"%10d", t->lineno);
else if (l->st_type == ARRAY_TYPE || l->st_type == FUNCTION_TYPE)
fprintf(of,"%4d", t->lineno);
}
else
fprintf(of,"%3d", t->lineno);
count++;
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}
scanner file
%option noyywrap
%{
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"
#define LIST strcat(buf,yytext)
#define token(t) {LIST; printf("<%s>\n");}
#define tokenInteger(t, i) {LIST; printf("<%s: %d>\n", t, i);}
#define tokenReal(t, r) {LIST; printf("<%s: %lf>\n", t, r);}
#define tokenString(t, s) {LIST; printf("<%s: %s>\n", t, s);}
#define MAX_LINE_LENG 256
extern FILE* yyin;
extern FILE* yyout;
int linenum = 1;
char buf[MAX_LINE_LENG];
char* tempStr;
int indexForStr = 1;
list_t* temp;
%}
%x Comment
%%
"//".* {LIST;}
"/*" {BEGIN(Comment); LIST;}
<Comment>"*/" {LIST; BEGIN(0);}
<Comment>\n {LIST; printf("%d: %s\n", linenum++, buf); buf[0] = '\0';}
<Comment>. {LIST;}
"bool" {
token("BOOL");
return BOOL;
}
"break" {token("BREAK"); return BREAK;}
"char" {token("CHAR"); return CHAR;}
"continue" {token("CONTINUE"); return CONTINUE;}
"do" {token("DO"); return DO;}
"else" {token("ELSE"); return ELSE;}
"enum" {token("ENUM"); return ENUM;}
"extern" {token("EXTERN"); return EXTERN;}
"false" {token("FALSE"); yylval.boolVal = 0; return FALSE;}
"float" {
token("FLOAT");
return FLOAT;
}
"for" {token("FOR"); return FOR;}
"fn" {token("FN"); return FN;}
"if" {token("IF"); return IF;}
"in" {token("IN"); return IN;}
"int" {
token("INT");
return INT;
}
"let" {token("LET"); return LET;}
"loop" {token("LOOP"); return LOOP;}
"match" {token("MATCH"); return MATCH;}
"mut" {token("MUT"); return MUT;}
"print" {token("PRINT"); return PRINT;}
"println" {token("PRINTLN"); return PRINTLN;}
"pub" {token("PUB"); return PUB;}
"return" {token("RETURN"); return RETURN;}
"self" {token("SELF"); return SELF;}
"static" {token("STATIC"); return STATIC;}
"str" {
token("STR");
return STR;
}
"struct" {token("STRUCT"); return STRUCT;}
"true" {token("TRUE"); yylval.boolVal = 1; return TRUE;}
"use" {token("USE"); return USE;}
"where" {token("WHERE"); return WHERE;}
"while" {token("WHILE"); return WHILE;}
"," {token("','"); return ',';}
":" {token("':'"); return ':';}
";" {token("';'"); return ';';}
"(" {token("'('"); return '(';}
")" {token("')'"); return ')';}
"[" {token("'['"); return '[';}
"]" {token("']'"); return ']';}
"{" {token("'{'"); return '{';}
"}" {token("'}'"); return '}';}
"+" {token("'+'"); return '+';}
"-" {token("'-'"); return '-';}
"*" {token("'*'"); return '*';}
"/" {token("'/'"); return '/';}
"++" {token("'++'"); return '++';}
"--" {token("'--'"); return '--';}
"%" {token("'%'"); return '%';}
"<" {token("'<'"); return LESS;}
"<=" {token("'<='"); return '<=';}
">=" {token("'>='"); return '>=';}
">" {token("'>'"); return GREATER;}
"==" {token("'=='"); return '==';}
"!=" {token("'!='"); return '!=';}
"&&" {token("'&&'"); return '&&';}
"||" {token("'||'"); return '||';}
"!" {token("'!'"); return EXCLAMATION;}
"=" {token("'='"); return ASSIGN;}
"+=" {token("'+='"); return '+=';}
"-=" {token("'-='"); return '-=';}
"*=" {token("'*='"); return '*=';}
"/=" {token("'/='"); return '/=';}
"->" {token("'->'"); return ARROW;}
"read" {token("'READ'"); return READ;}
[A-Z_a-z]([A-Z_a-z]|[0-9])* {
insert(yytext, yyleng, UNDEF, linenum);
yylval.symptr = lookup(yytext);
tokenString("id", yylval.symptr->st_name);
return ID;
}
"0"|[0-9][0-9]* {
sscanf(yytext, "%d", &yylval.intVal);
tokenInteger("integer", yylval.intVal);
return INTEGER;
}
[0-9_]+"."[0-9_]|[0-9_][Ee][+-]?[0-9_]+ {
yylval.floatVal = atof(yytext);
tokenReal("real", yylval.floatVal);
return REAL;
}
\"([\\.]|[^\\"])*\" {
tempStr = malloc((strlen(yytext) - 1) * sizeof(char));
for (int i = 0; i < strlen(yytext) - 2; i++)
{
tempStr[i] = yytext[indexForStr];
indexForStr++;
}
tempStr[strlen(yytext) - 1] = '\0';
yylval.stringVal = strdup(yytext);
tokenString("string", tempStr);
free(tempStr);
indexForStr = 1;
return STRING;
}
\n {
LIST;
printf("%d: %s", linenum++, buf);
buf[0] = '\0';
}
[ \t]* {LIST;}
. {
LIST;
printf("%d:%s\n", linenum+1, buf);
printf("bad character:'%s'\n",yytext);
exit(-1);
}
%%
parser file
%{
#include "symbols.c"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#define Trace(t, line) printf(t, line) // Trace where the error occurs and print the line number
#ifndef STRSIZE
#define STRSIZE 40
#endif
#ifndef PARAMSIZE
#define PARAMSIZE 40
#endif
extern FILE* yyin;
extern FILE* yyout;
extern int linenum;
extern int yylex();
void yyerror(char* msg);
%}
%union{
char* stringVal;
double floatVal;
int intVal;
int boolVal;
list_t* symptr;
}
/* tokens */
%token <symptr> ID
%token <intVal> INTEGER
%token <floatVal> REAL
%token <stringVal> STRING
%token <boolVal> TRUE FALSE
%token INT FLOAT STR BOOL
%token BREAK CHAR CONTINUE DO ELSE
%token ENUM EXTERN FOR
%token FN IF IN LET
%token LOOP MATCH MUT PRINT PRINTLN
%token RETURN SELF STATIC STRUCT
%token USE WHERE WHILE
%token READ PUB
%token LESS GREATER ASSIGN EXCLAMATION ARROW
/* precedence for operators */
%left '||'
%left '&&'
%left EXCLAMATION
%left LESS '<=' '>=' GREATER '==' '!='
%left '+' '-'
%left '*' '/'
%left UMINUS
/* types */
%type <intVal> integer_exp
%type <floatVal> real_exp
%type <stringVal> string_exp
%type <boolVal> bool_exp
%start program /* the initial entry point */
%%
program: functions | global_declaration functions
;
global_declaration: global_declaration constant_declaration
| global_declaration variable_declaration
| global_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
local_declaration: local_declaration constant_declaration
| local_declaration variable_declaration
| local_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
block: start local_declaration statements end
| start local_declaration end
| start statements end
| start end
;
start: '{' {
incr_scope();
}
end: '}' {
hide_scope();
}
;
functions: functions function
| function
;
function: FN ID '(' ')' start local_declaration statements end{
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start statements end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start local_declaration end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
;
%%
void yyerror(char* msg)
{
fprintf(stderr, "line %d: %s\n", linenum, msg);
}
int main(int argc, char** argv)
{
/* create the hash table */
create();
/* open the source program file */
if (argc != 2) {
printf ("Usage: sc filename\n");
exit(1);
}
yyin = fopen(argv[1], "r"); /* open input file */
int flag;
flag = yyparse();
/* perform parsing */
if (flag == 1) /* parsing */
yyerror("Parsing error !"); /* syntax error */
fclose(yyin); /* close input file */
/* output symbol table */
printf("\nSymbol table:\n");
yyout = fopen("dump.out", "w");
dump(yyout);
fclose(yyout);
return 0;
}
Clearly, the problem occurs during the dump function when the token type is FUNCTION_TYPE. That's clear from the debugging output (dump is presumably executing when the segfault occurs) and from the change report (the problem occurs when an action sets the st_type field to FUNCTION_TYPE).
Visual inspection of the if clause in dump() corresponding to that condition reveals the following obvious error:
fprintf(of,"%-7s %s","function returns ");
That call to fprintf has a format string with two %s conversions. However, there is only one argument to be inserted.
The real question you should be asking yourself is "How can I easily find stupid typos like this without spending a lot of time or resorting to outside experts?"
As a first approximation, that error is so common and easy to detect that most compilers will warn you about it. So your first step is to make sure you always compile with warnings enabled (-Wall if you are using gcc or clang), and that you read the warnings.
Even without the warning, it would have been straight-forward to find the error with a debugger such as gdb. Just set a breakpoint at dump and single-step until the segfault occurs.
Finally, you are making your life much more complicated when you first build a large complex program with a lot of components and only then start to debug it. In the long run, you will find that it is worth taking the time to test each component individually (your symbol table, for example), using some kind of test harness, and only assembling your more complex program when you are reasonably confident that the individual pieces work. That will avoid the difficulty of identifying where the error occurred (as in this case, where you were evidently distracted by your doubts about the parser generator, leading you to miss the actual problem which has nothing to do with the parser).
By the way, strncpy is a definite red flag, although in this case you seem to have been lucky (or unlucky) enough to not encounter the bug. strncpy is pretty well never what you want, and if it is what you want then the length parameter should be the longest string you can accommodate rather than the length of the input string. (strncpy is intended for use in fixed-length formats, which is why it pads the output to the specified length.)
If you use the length of the input string, then you have two problems: (1) the copy is guaranteed to not be NUL-terminated, leading to Undefined Behaviour; and (2) nothing stops the copy from overrunning the output buffer, in the case that the input string is too long.
Even used correctly, strncpy requires you to manually NUL-terminate the output, which is a nuisance. A better solution is to first check that the string is not too long (length < SIZE) and then use strcpy, which will correctly NUL-terminate. Even better is to make the name field a char* instead of an array, and dynamically allocate a string of the correct length (see strdup, for example), thereby avoiding having to arbitrarily limit the size of identifiers.
I am trying to display the whole arithmetic expression from text file and its result, I tried it with file handling option but it is not working.
YACC :
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this file */
%}
%token NUM
%%
Calc : Expr {printf(" = %d\n",$1);}
| Calc Expr {printf(" = %d\n",$2);}
| Calc error {yyerror("\n");}
;
Expr : Term { $$ = $1; }
| Expr '+' Term { $$ = $1 + $3; }
| Expr '-' Term { $$ = $1 - $3; }
;
Term : Fact { $$ = $1; }
| Term '*' Fact { $$ = $1 * $3; }
| Term '/' Fact { if($3==0){
yyerror("Divide by Zero Encountered.");
break;}
else
$$ = $1 / $3;
}
;
Fact : Prim { $$ = $1; }
| '-' Prim { $$ = -$2; }
;
Prim : '(' Expr ')' { $$ = $2; }
| Id { $$ = $1; }
;
Id :NUM { $$ = yylval; }
;
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch,c;
FILE *f;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
/*
f=fopen(argv[1],"r");
if(f!=NULL){
char line[1000];
while(fgets(line,sizeof(line),f)!=NULL)
{
fprintf(stdout,"%s",line);
yyparse();
}
}
*/
yyparse();
}
void yyerror(char *mesg){
printf("\n%s", mesg);
}
LEX
%{
#include <stdio.h>
#include "y.tab.h"
int yylval; /*declared extern by yacc code. used to pass info to yacc*/
%}
letter [A-Za-z]
digit ([0-9])*
op "+"|"*"|"("|")"|"/"|"-"
ws [ \t\n\r]+$
other .
%%
{ws} { /*Nothing*/ }
{digit} { yylval = atoi(yytext); return NUM;}
{op} { return yytext[0];}
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
%%
My Text file contains these two expressions :
4+3-2*(-7)
9/3-2*(-5)
I want output as :
4+3-2*(-7)=21
9/3-2*(-5)=13
But the Output Is :
=21
=13
because a parser will do all calculations at once so this (the commented code) is not legit to use. So what is needed is to show pass input expression to grammar and print in Calc block. I am not able to find anything relevant on google about displaying input in grammar.Thanks in advance for comments & suggestions.
You don't want to do this in the grammar. Too complicated, and too subject to whatever rearrangement the grammar may do. You could consider doing it in the lexer, i.e. print yytext in every action other than the whitespace action, just before you return it, but I would echo all the input as it is read, by overriding lex(1)'s input function.
NB You should be using flex(1), not lex(1), and note that if you change, yyyext ceases being a char[] and becomes a char *.
I didn't mention it in your prior question, but this rule:
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
would better be written as:
{other} { return yytext[0]; }
That way the parser will see it and produce a syntax error, so you don't have to print anything yourself. This technique also lets you get rid of the rules for the individual special characters +,-=*,/,(,), as the parser will recognize them via yytext[0].
Finally, I got it :
YACC
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this
file */ %}
%token NUM
%%
Calc : Expr {printf(" = %d\n",$1);}
| Calc Expr {printf(" = %d\n",$2);}
| error {yyerror("Bad Expression\n");}
;
Expr : Term { $$ = $1; }
| Expr Add Term { $$ = $1 + $3; }
| Expr Sub Term { $$ = $1 - $3; }
;
Term : Fact { $$ = $1; }
| Term Mul Fact { $$ = $1 * $3; }
| Term Div Fact { if($3==0){
yyerror("Divide by Zero Encountered.");
break;}
else
$$ = $1 / $3;
}
;
Fact : Prim { $$ = $1; }
| '-' Prim { $$ = -$2; }
;
Prim : LP Expr RP { $$ = $2; }
| Id { $$ = $1; }
;
Id :NUM { $$ = yylval; printf("%d",yylval); }
;
Add : '+' {printf("+");}
Sub : '-' {printf("-");}
Mul : '*' {printf("*");}
Div : '/' {printf("/");}
LP : '(' {printf("(");}
RP : ')' {printf(")");}
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch,c;
FILE *f;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
yyparse();
}
void yyerror(char *mesg){
printf("%s ", mesg);
}
Thanks EJP & EMACS User for responding.
I'm trying to make a simple parser. It's for a homework assignment but also for own experimentation. I have completed the lexer and the parser and I'm trying now to output an AST. The problem is that when I'm adding, for example, two integers, the result tree is printed with unrecognizable symbols. A valid input should be +(1,1) and a valid output should be (+ 1 1). Instead of this, I'm getting ( + �|k �|k ). I've tried many things, without actually any significant result. The sprintf function returns a null terminator, so probably this is not the problem. Below is the parser code (.y file):
%{
#define YYDEBUG 1
%}
%start program
%token NUMBER
%token ID
%token PLUS MINUS TIMES
%token LP RP EQUALS COMMA
%token END
%token LET IN AND
%left PLUS MINUS
%left TIMES
%left LET IN AND
%left EQUALS
%%
program:{printf("Empty Input\n");} /* empty */
| program line /* do nothing */
line: expr END { printtree($1); printf("\n");}
;
expr : /*Empty*/
| LET deflist IN expr {}
| ID { printf("Found ID\n"); $$ = make_id_leaf($1);}
| NUMBER { printf("Found NUMBER\n"); $$ = make_number_leaf($1);}
| PLUS LP expr COMMA expr RP {$$ = make_plus_tree($3,$5); printf("Found expr PLUS expr.\n"); }
| TIMES LP expr COMMA expr RP {$$ = make_times_tree($3,$5); printf("Found expr TIMES expr. Result:%d\n", $$);}
| MINUS ID
| MINUS NUMBER { printf("found MINUS NUMBER\n"); }
;
deflist : definition
| definition AND deflist
;
definition : /*Empty*/
| ID EQUALS expr {printf("Found EQ\n");}
;
%%
/*int main (void) {return yyparse ( );}*/
int yyerror (char *s) {fprintf (stderr, "%s\n", s);}
The lexer file:
%{
#include "parser.h"
%}
DIGIT [0-9]
LETTER [a-zA-Z]
%%
LET {printf("Encountered LET\n"); return(LET);}
IN {printf("Encountered IN\n"); return(IN);}
AND {printf("Encountered AND\n"); return(AND);}
{DIGIT}+ {yylval = atoi(yytext); return NUMBER;}
{LETTER}* { if (strlen(yytext) <= 8){
yylval = strlen(yytext);
printf( "<ID, %s> ", yytext );
return(ID);
} else {
yytext[8] = '\0';
printf("WARNING! Long identifier. Truncating to 8 chars\n");
printf( "<ID, %s> ", yytext );
return(ID);
}
}
[ \t] ;
[\n] return(END);
"+" return(PLUS);
"-" return(MINUS);
"*" return(TIMES);
"=" return(EQUALS);
"(" return(LP);
")" return(RP);
"," return(COMMA);
<<EOF>> return(0);
%%
int yywrap (void) {return 1;}
The main.c which includes the yyparse() function:
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "treedefs.h"
int main(int argc, char **argv){
yyparse();
return 0;
}
And the treedefs.h file which includes the function definitions (I've included only the struct definition, the number leaf and the plus tree):
typedef struct tree{
char *token;
TREE *l;
TREE *r;
TREE *child;
}TREE;
/* Make number leaves */
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
leafNum->l = NULL;
leafNum->r = NULL;
leafNum->child = NULL;
printf("NUM Leaf is: %s\n", leafNum->token);
return (leafNum);
}
/* Addition tree */
TREE *make_plus_tree(TREE *l, TREE *r){
TREE *plusTree = malloc(sizeof(TREE));
plusTree->token = "+";
plusTree->l = l;
plusTree->r = r;
plusTree->child = NULL;
return (plusTree);
}
void printtree(TREE *tree)
{
if (tree->l || tree->r){
printf("(");
}
printf(" %s ", tree->token);
if (tree->l){
printtree(tree->l);
}
if (tree->r){
printtree(tree->r);
}
if (tree->l || tree->r){
printf(")");
}
}
The file tree.h includes only some declarations, no big deal, and definitely not related to the issue.
Why the numbers look like this? And how can I fix it? Any help will be greatly appreciated.
This problem actually has nothing to do with bison or flex. It's in your make_number_leaf implementation:
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
// ^ local variable
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
// ^ dangling pointer
// Remainder omitted
}
As indicated in the comments above, ch is a local (stack-allocated) variable, whose lifetime ends when the function returns. Assigning its address to the variable c does nothing to change that. So the value of c which is stored into leafNum->token will become a dangling pointer as soon as the function returns.
So when you later attempt to print out the token, you are printing out the contents of random memory.
You need to malloc a character buffer, and remember to free it when you are freeing the TREE. (However, in the case where leafNum->token is a string literal, you cannot call free, so you need to be a bit cleverer.)
So I have to create a compiler for the Tiny C language, but I cant compile it, I have the .y and .l files and both work all right, but when I try to compile the .tab.c file, it shows 3 errors for
undefined reference to 'install_id'
undefined reference to printSymtab'
undefined reference to 'lookup_id'
Here are the codes:
Symtab.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct symtab_node * SYMTAB;
typedef struct symtab_node {
char * nombre;
int type;
float fval;
SYMTAB next;
} SYMTAB_NODE;
SYMTAB lookup_id(SYMTAB st, char * name);
SYMTAB install_id(SYMTAB st, char * name, int typ);
void printSymtab(SYMTAB t);
Symtab.c
#include "symtab.h"
#include <stdio.h>
int next_num() {
static int i = 1;
return i++;
}
/* looks up an is in ST. Returns pointer to cell if found else NULL */
SYMTAB lookup_id(SYMTAB st, char * name) {
SYMTAB tmp = st;
if (tmp == NULL) {/* empty list */
return NULL;
} else {
while (tmp != NULL) {
if (strcmp(tmp->idname,name) == 0) {
return tmp; /* found */
} else {
tmp = tmp->next; /* go to next cell */
}
}
return NULL; /* not found */
}
}
/* adds an id to ST if not present */
SYMTAB install_id(SYMTAB st, char * name, int typ) {
if (lookup_id(st, name) == NULL) {
SYMTAB nst = (SYMTAB)malloc(sizeof(SYMTAB_NODE));
nst->idname = (char *) strdup(name);
nst->idnum = next_num();
nst->next = st;
return nst;
} else {
return st;
}
}
/* print out ST */
void printSymtab(SYMTAB t) {
SYMTAB tmp = t;
while (tmp != NULL) {
printf("%s\t%d\n", tmp->idname, tmp->idnum);
tmp = tmp->next;
}
}
grammar.y
%{
#include "symtab.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char * concat (char * str1, char * str2);
extern int yylex();
extern char * yytext;
extern int yylineno;
SYMTAB st;
int typev;
/* Function definitions */
void yyerror (char *string);
%}
%union{
char *strval;
int value;
float fvalue;
SYMTAB st;
}
/* Declaramos todos los tokens que recibirá el programa y que provienen del cparser.l */
%token SEMI INTEGER FLOAT
%token IF THEN ELSE WHILE DO
%token READ WRITE
%token LPAREN RPAREN LBRACE RBRACE
%token LT EQ
%token PLUS MINUS TIMES DIV ASSIGN
%token<value> INT_NUM
%token<fvalue> FLOAT_NUM
%token<strval> ID
%%
/* Definimos las reglas de producción para el mini-lenguaje C */
program: var_dec stmt_seq { printf ("No hay errores sintacticos\n");}
;
var_dec: var_dec single_dec
|
;
single_dec: type ID SEMI { st = install_id(st,$2,typev); printSymtab(st); }
;
type: INTEGER { typev = 1; }
| FLOAT { typev = 2; }
;
stmt_seq: stmt_seq stmt
|
;
stmt: IF exp THEN else
| WHILE exp DO stmt
| variable ASSIGN exp SEMI { /*st = install_id(st,$1); */}
| READ LPAREN variable RPAREN SEMI { /*st = install_id(st,$3); */}
| WRITE LPAREN exp RPAREN SEMI
| block
| error SEMI { yyerrok;}
;
else: stmt
| ELSE stmt
;
block: LBRACE stmt_seq RBRACE
;
exp: simple_exp LT simple_exp
| simple_exp EQ simple_exp
| simple_exp
;
simple_exp: simple_exp PLUS term
| simple_exp MINUS term
| term
;
term: term TIMES factor
| term DIV factor
| factor
;
factor: LPAREN exp RPAREN
| INT_NUM
| FLOAT_NUM
| variable
;
variable: ID
{ if(lookup_id(st,$1) == NULL){
yyerror(concat("Error: Undeclared Identifier ", $1));
}
}
;
%%
/* A function that concatenates two strings and returns the result */
char * concat(char * str1, char * str2){
char *str3;
str3 = (char *) calloc(strlen(str1)+strlen(str2)+1, sizeof(char));
strcpy(str3,str1);
strcat(str3,str2);
return str3;
}
#include "lex.yy.c"
/* Bison does NOT implement yyerror, so define it here */
void yyerror (char *string){
printf ("ERROR NEAR LINE %d: %s\n",yylineno,string);
}
/* Bison does NOT define the main entry point so define it here */
main (){
yyparse();
yylex();
}
lexem.y
%{
#include <string.h>
#include <stdlib.h>
char * strval;
int value;
float fvalue;
int error;
extern YYSTYPE yylval;
%}
/* This is the rule definition */
%option noyywrap
%option yylineno
ids [A-Za-z_][A-Za-z0-9_]*
digits 0|[1-9][0-9]*|0(c|C)[0-7]+|0(x|X)[0-9A-Fa-f]+
floats [0-9]*"."[0-9]+([eE][-+]?[0-9]+)?
%%
/* Consume los comentarios*/
(\/\*([^\*]|\*[^/])*\*\/)|(\/\/.*)
/* Consume los espacios, tabulaciones y saltos de linea*/
[[:space:]]|[[:blank:]]|\n
/* Palabras reservadas */
"int" { return INTEGER; }
"float" { return FLOAT; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"do" { return DO; }
"while" { return WHILE; }
"read" { return READ; }
"write" { return WRITE; }
/* Simbolos de puntuacion, operadores y relacionales */
/* Puntuacion */
";" { return SEMI; }
"(" { return LPAREN; }
")" { return RPAREN; }
"{" { return LBRACE; }
"}" { return RBRACE; }
/* Relacionales */
">" { return LT; }
"==" { return EQ; }
/* Operadores */
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return TIMES; }
"/" { return DIV; }
"=" { return ASSIGN; }
{ids} { yylval.strval = (char *) strdup(yytext);
return (ID); }
{digits} { yylval.value = atoi(yytext);
return (INT_NUM); }
{floats} { yylval.fvalue = atof(yytext);
return (FLOAT_NUM); }
/* Consume los simbolos que sobran y marca error */
. { printf("LEXICAL ERROR NEAR LINE %d: %s \n", yyget_lineno(), yyget_text()); error++; }
%%
You're not supposed to compile the whatever.tab.h file, that's a header file containing the YACC elements for the grammar, for inclusion into the lex and yacc code sections, as well as your own code if you need access to it.
You're supposed to compile whatever.tab.c, ensuring that you're also including your symtab.c (or its equivalent object file), and any other C source files as well.
And, based on your comment, it's this non-inclusion of the symtab.c file which is indeed causing your immediate error.
When I execute your steps (slightly modified for different names):
flex lexem.l
yacc -d -v grammar.y
gcc -o par y.tab.c
then I get a similar problem to what you're seeing:
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x35c): undefined reference to `install_id'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x36e): undefined reference to `printSymtab'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x3a7): undefined reference to `lookup_id'
However, when I incorporate the symtab.c file into the compile line (and add the idname and idnum missing bits to the structure in symtab.h to solve compilation problems), it works just fine:
gcc -o par y.tab.c symtab.c
So that's what you need to do, include symtab.c on the gcc command line.
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.