Flex and Bison how to find depth level of command - c

I have Big problem in Bison - I need to find maximum depth level of command (P) in if statement..
So i code this for language.l (FLEX)
%{
#include "jazyk.tab.h"
int max = 0;
int j = 0;
%}
%%
[ \t]+
[Bb][Ee][Gg][Ii][Nn] return(LBEGIN);
[Ee][Nn][Dd] return(LEND);
[Ii][Ff] {j++; if(j>max)max=j; return(LIF);}
[Tt][Hh][Ee][Nn] return(LTHEN);
// command to find max depth level in If statement
[Pp] return(LP);
// V is statement
[Vv] return(LV);
[.] return(.);
[;] return(;);
[-+&~|^/%*(),!] { printf("unknown character in input: %c\n", *yytext);}
[\n] yyterminate();
%%
void maximum()
{
printf("Maximum depth level of command(P): %i\n", max);
}
And this for language.y (BISON)
%{
#include <stdio.h>
#define YYSTYPE float
void koniec(YYSTYPE);
extern char *yytext;
int counterIf;
int counterP;
%}
// define the "terminal symbol" token types (in CAPS by convention)
%token LBEGIN
%token LEND
%token LIF
%token LTHEN
%token LP
%token LV
%token .
%token ;
%start PROGRAM
%%
// the first rule defined is the highest-level rule
PROGRAM: LBEGIN prikazy LEND .
prikazy: prikaz ; prikazy
prikaz: LIF LV LTHEN prikaz {counterIf++;}
prikaz:
prikaz: LP
%%
int main() {
counterIf = 0;
counterP = 0;
printf("Examples to better copy in console: \n");
printf("begin p; p; end. \n");
printf("begin if v then p; end.\n");
printf("begin p; if v then if v then p; p; end.\n");
printf("\n");
if (yyparse()==0){
printf("Sucesfull \n");
printf("If counter: \n");
printf("%d \n", counterIf);
printf("Maximal depth level of command(P): \n");
printf("%d \n", counterP);
maximum();
}
else
printf("Wrong \n");
}
For example of functionality - when i write begin if v then p; end.Result must be: IF: 1; Max depth level of P: 2;
Or:
begin
p;
if v then
if v then p;
p;
end.
Result: IF: 2; max depth: 3;
Im really desperate right now. Please help me with depth counter :-( (And im sorry its not all in English)

Don't try to compute the depth in the scanner. The scanner has no idea about the structure of the program. The parser understands the nesting, so it is where you should count the depth.
Since you're not currently using semantic values for anything, I took the liberty of using them for the statistics. If you had real semantic values, you could add the statistics structure as a member, or use the location value.
When the parser encounters an if statement, it knows that there is one more if statement and that the current nesting depth is one more than the nesting depth of the target of the if.
I added a syntax for if statements with blocks because it was trivial and it makes the program a bit more interesting. When the parser adds a statement to a block, it needs to sum the current if count for the block and the if count for the new statement, and compute the maximum depth as the maximum of the two depths. The function merge_statistics does that.
I didn't really understand what your nesting depth should be; it's possible that the {0, 0} should be {0, 1}. (In the case of an empty block, I assumed that the nesting depth is 0 because there are no statements. But maybe you won't even allow empty blocks.)
You'll need to compile with a compiler which understands C99 (-std=c99 or -std=c11 if you use gcc) because I use compound literals.
I also removed the yyterminate call from your scanner and fixed it so that it insists on spaces between tokens, although maybe you don't care about that.
scanner
%option noinput nounput noyywrap yylineno nodefault
%{
#include "jazyk.tab.h"
%}
%%
[[:space:]]+
[Bb][Ee][Gg][Ii][Nn] return(LBEGIN);
[Ee][Nn][Dd] return(LEND);
[Ii][Ff] return(LIF);
[Tt][Hh][Ee][Nn] return(LTHEN);
[Pp] return(LP);
[Vv] return(LV);
[[:alpha:]]+ { printf("Unknown token: %s\n", yytext); }
[.;] return(*yytext);
. { printf("unknown character in input: %c\n", *yytext);}
parser
%{
#include <stdio.h>
typedef struct statistics {
int if_count;
int max_depth;
} statistics;
statistics merge_statistics(statistics a, statistics b) {
return (statistics){a.if_count + b.if_count,
a.max_depth > b.max_depth ? a.max_depth : b.max_depth};
}
#define YYSTYPE statistics
extern int yylineno;
int yylex();
void yyerror(const char* message);
%}
%token LIF "if" LTHEN "then" LBEGIN "begin" LEND "end"
%token LV
%token LP
%start program
%%
program: block '.' { printf("If count: %d, max depth: %d\n",
$1.if_count, $1.max_depth); }
block: "begin" statements "end" { $$ = $2; }
statements: /* empty */ { $$ = (statistics){0, 0}; }
| statements statement ';' { $$ = merge_statistics($1, $2); }
statement : LP { $$ = (statistics){0, 1}; }
| block
| "if" LV "then" statement { $$ = (statistics){$4.if_count + 1,
$4.max_depth + 1}; }
%%
void yyerror(const char* message) {
printf("At %d: %s\n", yylineno, message);
}
int main(int argc, char** argv) {
int status = yyparse();
/* Unnecessary because yyerror will print an error message */
if (status != 0) printf("Parse failed\n");
return status;
}
Test run:
$ ./jazyk <<<'begin if v then p; end.'
If count: 1, max depth: 2
$ ./jazyk <<<'begin p; if v then if v then p; p; end.'
If count: 2, max depth: 3
$ ./jazyk <<<'begin if v then begin if v then p; p; end; end.'
If count: 2, max depth: 3
$ ./jazyk <<<'begin if v then begin p; if v then p; end; end.'
If count: 2, max depth: 3
$ ./jazyk <<<'begin if v then begin if v then p; if v then p; end; end.'
If count: 3, max depth: 3

Related

Why isn't my bison printing the variable names?

So i'm using a flex/bison parser but the variable names arent printing correctly. It understands the number values. I've tried messing with everything but I'm lost. heres a link to the output. its where it prints "Data: 0" that i'm trying to get the variable name [https://imgur.com/vJDpgpR][1]
invocation is: ./frontEnd data.txt
//main.c
#define BUF_SIZE 1024
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE* yyin;
extern yyparse();
int main(int argc, char* argv[]){
if(argc < 2){
FILE* fp = fopen("temp.txt", "a");
printf("Entering data: \n");
void *content = malloc(BUF_SIZE);
if (fp == 0)
printf("error opening file");
int read;
while ((read = fread(content, BUF_SIZE, 1, stdin))){
fwrite(content, read, 1, fp);
}
if (ferror(stdin))
printf("There was an error reading from stdin");
fclose(fp);
yyparse(fp);
}
if(argc == 2){
yyin = fopen(argv[2], "r");
if(!yyin)
{
perror(argv[2]);
printf("ERROR: file does not exist.\n");
return 0;
}
yyparse (yyin);
}
return 0;
}
void yyerror(char *s){
fprintf(stderr, "error: exiting %s \n", s);
}
//lex.l
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
%}
%option noyywrap
%option nounput yylineno
%%
"stop" return STOP;
"iter" return ITER;
"scanf" return SCANF;
"printf" return PRINTF;
"main" return MAIN;
"if" return IF;
"then" return THEN;
"let" return LET;
"func" return FUNC;
"//" return COMMENT; printf("\n");
"start" return START;
"=" return ASSIGN;
"=<" return LE;
"=>" return GE;
":" return COLON;
"+" return PLUS;
"-" return MINUS;
"*" return MULT;
"/" return DIV;
"%" return MOD;
"." return DOT;
"(" return RPAREN;
")" return LPAREN;
"," return COMMA;
"{" return RBRACE;
"}" return LBRACE;
";" return SEMICOLON;
"[" return LBRACK;
"]" return RBRACK;
"==" return EQUAL;
[A-Z][a-z]* { printf("SYNTAX ERROR: Identifiers must start with lower case. "); }
[a-zA-Z][_a-zA-Z0-9]* {
printf("string: %s \n", yytext);
yylval.iVal = strdup(yytext);
yylval.iVal = addSymbol(yytext);
return ID;
}
[0-9]+ {
yylval.iVal = atoi(yytext);
printf("num: %s \n", yytext);
return NUMBER; }
[ _\t\r\s\n] ;
^"#".+$ return COMMENT;
. {printf("ERROR: Invalid Character "); yyterminate();}
<<EOF>> { printf("EOF: line %d\n", yylineno); yyterminate(); }
%%
// stores all variable id is in an array
SYMTABNODEPTR newSymTabNode()
{
return ((SYMTABNODEPTR)malloc(sizeof(SYMTABNODE)));
}
int addSymbol(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
i = lookup(s);
if(i >= 0){
return(i);
}
else if(curSymSize >= SYMBOLTABLESIZE)
{
return (NOTHING);
}
else{
symtable[curSymSize] = newSymTabNode();
strncpy(symtable[curSymSize]->id,s,IDLENGTH);
symtable[curSymSize]->id[IDLENGTH-1] = '\0';
return(curSymSize++);
}
}
int lookup(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
for(i=0;i<curSymSize;i++)
{
if(strncmp(s,symtable[i]->id,IDLENGTH) == 0){
return (i);
}
}
return(-1);
}
// parser.y
%{
#define YYERROR_VERBOSE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern char *yytext;
extern int yylex();
extern void yyerror(char *);
extern int yyparse();
extern FILE *yyin;
/* ------------- some constants --------------------------------------------- */
#define SYMBOLTABLESIZE 50
#define IDLENGTH 15
#define NOTHING -1
#define INDENTOFFSET 2
#ifdef DEBUG
char *NodeName[] =
{
"PROGRAM", "BLOCK", "VARS", "EXPR", "N", "A", "R", "STATS", "MSTAT", "STAT",
"IN", "OUT", "IF_STAT", "LOOP", "ASSIGN", "RO", "IDVAL", "NUMVAL"
};
#endif
enum ParseTreeNodeType
{
PROGRAM, BLOCK, VARS, EXPR, N, A, R, STATS, MSTAT, STAT,
IN, OUT,IF_STAT, LOOP, ASSIGN, RO, IDVAL, NUMVAL
};
#define TYPE_CHARACTER "char"
#define TYPE_INTEGER "int"
#define TYPE_REAL "double"
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef NULL
#define NULL 0
#endif
// definitions for parse tree
struct treeNode {
int item;
int nodeID;
struct treeNode *first;
struct treeNode *second;
};
typedef struct treeNode TREE_NODE;
typedef TREE_NODE *TREE;
TREE makeNode(int, int, TREE, TREE);
#ifdef DEBUG
void printTree(TREE, int);
#endif
// symbol table definitions.
struct symbolTableNode{
char id[IDLENGTH];
};
typedef struct symbolTableNode SYMTABNODE;
typedef SYMTABNODE *SYMTABNODEPTR;
SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
int curSymSize = 0;
%}
%start program
%union {
char *sVal;
int iVal;
TREE tVal;
}
// list of all tokens
%token SEMICOLON GE LE EQUAL COLON RBRACK LBRACK ASSIGNS LPAREN RPAREN COMMENT
%token DOT MOD PLUS MINUS DIV MULT RBRACE LBRACE START MAIN STOP LET COMMA
%token SCANF PRINTF IF ITER THEN FUNC
%left MULT DIV MOD ADD SUB
// tokens defined with values and rule names
%token<iVal> NUMBER ID
//%token<sVal> ID
%type<tVal> program type block vars expr N A R stats mStat stat in out if_stat loop assign RO
%%
program : START vars MAIN block STOP
{
TREE tree;
tree = makeNode(NOTHING, PROGRAM, $2,$4);
#ifdef DEBUG
printTree(tree, 0);
#endif
}
;
block : RBRACE vars stats LBRACE
{
$$ = makeNode(NOTHING, BLOCK, $2, $3);
}
;
vars : /*empty*/
{
$$ = makeNode(NOTHING, VARS,NULL,NULL);
}
| LET ID COLON NUMBER vars
{
$$ = makeNode($2, VARS, $5,NULL);
printf("id: %d", $2);
}
;
//variable:
// type ID{$$ = newNode($2,VARIABLE,$1,NULL,NULL);};
//type:
// INT {$$ = newNode(INT,TYPE,NULL,NULL,NULL);}
// | BOOL {$$ = newNode(BOOL,TYPE,NULL,NULL,NULL);}
// | CHAR {$$ = newNode(CHAR,TYPE,NULL,NULL,NULL);}
// | STRING{$$ = newNode(STRING,TYPE,NULL,NULL,NULL);};
expr : N DIV expr
{
$$ = makeNode(DIV, EXPR, $1, $3);
}
| N MULT expr
{
$$ = makeNode(MULT, EXPR, $1, $3);
}
| N
{
$$ = makeNode(NOTHING, EXPR, $1,NULL);
}
;
N : A PLUS N
{
$$ = makeNode(PLUS, N, $1, $3);
}
| A MINUS N
{
$$ = makeNode(MINUS, N, $1, $3);
}
| A
{
$$ = makeNode(NOTHING, N, $1,NULL);
}
;
A : MOD A
{
$$ = makeNode(NOTHING, A, $2,NULL);
}
| R
{
$$ = makeNode(NOTHING, A, $1,NULL);
}
;
R : LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, R, $2,NULL);
}
| ID
{
$$ = makeNode($1, IDVAL, NULL,NULL);
}
| NUMBER
{
$$ = makeNode($1, NUMVAL, NULL,NULL);
}
;
stats : stat mStat
{
$$ = makeNode(NOTHING, STATS, $1, $2);
}
;
mStat : /* empty */
{
$$ = makeNode(NOTHING, MSTAT, NULL,NULL);
}
| stat mStat
{
$$ = makeNode(NOTHING, MSTAT, $1, $2);
}
;
stat: in DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| out DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| block
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| if_stat DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| loop DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| assign DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
;
in : SCANF LBRACK ID RBRACK
{
$$ = makeNode($3, IN,NULL,NULL);
}
;
out : PRINTF LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, OUT,$3,NULL);
}
;
if_stat : IF LBRACK expr RO expr RBRACK THEN block
{
$$ = makeNode(NOTHING, IF_STAT, $4, $8);
}
;
loop : ITER LBRACK expr RO expr RBRACK block
{
$$ = makeNode(NOTHING, LOOP, $4, $7);
}
;
assign : ID ASSIGNS expr
{
$$ = makeNode($1, ASSIGN, $3,NULL);
}
;
RO : LE
{
$$ = makeNode(LE, RO, NULL,NULL);
}
| GE
{
$$ = makeNode(GE, RO, NULL,NULL);
}
| EQUAL
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
| COLON COLON
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
;
%%
// node generator
TREE makeNode(int iVal, int nodeID, TREE p1, TREE p2)
{
TREE t;
t = (TREE)malloc(sizeof(TREE_NODE));
t->item = iVal;
t->nodeID = nodeID;
t->first = p1;
t->second = p2;
//printf("NODE CREATED");
return(t);
}
// prints the tree with indentation for depth
void printTree(TREE tree, int depth){
int i;
if(tree == NULL) return;
for(i=depth;i;i--)
printf(" ");
if(tree->nodeID == NUMBER)
printf("INT: %d ",tree->item);
else if(tree->nodeID == IDVAL){
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE )
printf("id: %s ",symtable[tree->item]->id);
else
printf("unknown id: %d ", tree->item);
}
if(tree->item != NOTHING){
printf("Data: %d ",tree->item);
}
// If out of range of the table
if (tree->nodeID < 0 || tree->nodeID > sizeof(NodeName))
printf("Unknown ID: %d\n",tree->nodeID);
else
printf("%s\n",NodeName[tree->nodeID]);
printTree(tree->first,depth+2);
printTree(tree->second,depth+2);
}
#include "lex.yy.c"
// heres the makefile I use for compilation
frontEnd: lex.yy.c parser.tab.c
gcc parser.tab.c main.c -o frontEnd -lfl -DDEBUG
parser.tab.c parser.tab.h: parser.y
bison -d parser.y
lex.yy.c: lex.l
flex lex.l
clean:
rm lex.yy.c y.tab.c frontEnd
'''
// data.txt
start
let x : 13
main {
scanf [ x ] .
printf [ 34 ] .
} stop[enter image description here][2]
[1]: https://i.stack.imgur.com/xlNnh.png
[2]: https://i.stack.imgur.com/HKRtX.png
I think this has a lot more to do with your AST and symbol table functions than with your parser, and practically nothing to do with bison itself.
For example, your function to print trees won't attempt to print an identifier's name if its symbol table index is 0.
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE)
But the first symbol entered in the table will have index 0. (Perhaps you fixed this between pasting your code and generating the results. You should always check that the code you paste in a question corresponds precisely to the output which you show. But this isn't the only bug in your code; it's just an example.)
As another example, the immediate problem which causes Data: 0 to be printed instead of the symbol name is that your tree printer only prints symbol names for AST nodes of type IDVAL, but you create an AST IN node whose data field contains the variable's symbol table index. So either you need to fix your tree printer so it knows about IN nodes, or you need to change the IN node so that it has a child which is the IDVAL node. (That's probably the best solution in the long run.)
It's always a temptation to blame bison (or whatever unfamiliar tool you're using at the moment) for bugs, instead of considering the possibility that you've introduced bugs in your own support code. To avoid falling into this trap, it's always a good idea to test your library functions separately before using them in a more complicated project. For example, you could write a small test driver that builds a fixed AST tree, prints it, and deletes it. Once that works, and only when that works, you can check to see if your parser can build and print the same tree by parsing an input.
You will find that some simple good software design practices will make this whole process much smoother:
Organise your code into separate component files, each with its own header file. Document the library interfaces (and, if necessary, data structures) using comments in the header file. Briefly describe what each function's purpose is. If you can't find a brief description, it nay be that the function is trying to do too many different things.
In your parser, the functions and declarations needed to build and use ASTs are scattered between different parts of your lexer and parser files. This makes them much harder to read, debug, maintain and even use.
No matter what your teacher might tell you, if you find it necessary to #include the generated lexical scanner directly into the parser, then you probably have not found a good way to organise your support functions. You should always aim to make it possible to separately compile the parser and the scanner.
For data structures like your AST node, which use different member variables in different ways depending on an enumerated node type -- which is a model you'll find in other C projects as well, but is particularly common in parsers -- document the precise use of each field for every enumeration value. And make sure that every time you change the way you use the data or add new enumeration values, you fix the documentation accordingly.
This documentation will make it much easier to verify that your AST is being built correctly. As an additional benefit, you (or others using your code) will have an accurate description of how to interpret the contents of AST nodes, which makes it much easier to write code which analyses the tree.
In short, the way to write, debug and maintain any non-trivial project is not by "messing around" but by being systematic and modular. While it might seem like all of this takes precious time, particularly the documentation, it will almost always save you a lot of time in the long run.

Bison: When I try to add action in function grammar rule, the segmentation fault 11 occur

I have a problem about segmentation fault 11.
Every time, when I want to add action rules in function grammar blocks, I must get the segmentation fault 11.
Therefore, I cannot get the dump.out, which is a file that record the identifiers for me.
I do not think the problem is because of the scanner file, but that is still possible.
Of course, the problem should have something about symbol table, but it is really strange.
The problem just occurs like:
function: FN ID '(' ')' {if ($2->st_type == UNDEF) $2->st_type = FUNCTION_TYPE};
When I add action in the block, segmentation fault 11 will occur.
However, this is okay.
function: FN ID '(' ')' {};
The parser file do not contains all contents since it is so many.
I use mac os
I hope someone can help me.
Thank you anyway
Where the error occur
1: // Hello World Example
<fn>
<id: main>
<(>
<)>
<{>
2: fn main() {
3: // Print text to the console
<let>
<mut>
<id: a>
<:>
<int>
<=>
<integer: 10>
<;>
4: let mut a:int = 10;
<let>
<mut>
<id: b>
<=>
<string: 1199>
<;>
5: let mut b = "1199";
<let>
<mut>
<id: sum>
<[>
<str>
<,>
<integer: 10>
<]>
<;>
6: let mut sum[str, 10];
<id: sum>
<[>
<integer: 0>
<]>
<=>
<string: 100>
<;>
7: sum[0] = "100";
<id: b>
<=>
<string: 123>
<+>
<id: b>
<;>
8: b = "123" + b;
<println>
<(>
<string: Hello World>
<)>
<;>
9: println ("Hello World");
<}>
10: }
Symbol table:
a
b
sum
main
Segmentation fault: 11
The input file
// Hello World Example
fn main() {
// Print text to the console
let mut a:int = 10;
let mut b = "1199";
let mut sum[str, 10];
sum[0] = "100";
b = "123" + b;
println ("Hello World");
}
This is my symbol table header file.
#include <stdio.h>
/* maximum size of hash table */
#define SIZE 211
/* maximum size of tokens-identifiers */
#define MAXTOKENLEN 40
/* token types */
#define UNDEF 0
#define INT_TYPE 1
#define REAL_TYPE 2
#define STR_TYPE 3
#define LOGIC_TYPE 4
#define ARRAY_TYPE 5
#define FUNCTION_TYPE 6
/* new type for parser */
#define CONST_INT_TYPE 7
#define CONST_REAL_TYPE 8
#define CONST_STR_TYPE 9
#define CONST_LOGIC_TYPE 10
/* how parameter is passed */
#define BY_VALUE 1
#define BY_REFER 2
/*
* Originally here, now it is in the symbols.c
* current scope
* int cur_scope = 0;
*/
/* parameter struct */
typedef struct Parameter{
int par_type;
char *param_name;
// to store value
int ival; double fval; char *st_sval; int bval; // boolean type
int passing; // value or reference
struct Parameter *next; // link to next one
}Param;
/* a linked list of references (lineno's) for each variable */
typedef struct Ref{
int lineno;
struct Ref *next;
int type;
}RefList;
// struct that represents a list node
typedef struct list{
char st_name[MAXTOKENLEN];
int st_size;
int scope;
RefList *lines;
// to store value and sometimes more information
int st_ival; double st_fval; char *st_sval; int st_bval;
// type
int st_type;
int inf_type; // for arrays (info type) and functions (return type)
// array stuff
int *i_vals; double *f_vals; char **s_vals; int *b_vals; // boolean type
int array_size;
// function parameters
Param *parameters;
int num_of_pars; // Meanwhile, it record the current position of the parameters
// pointer to next item in the list
struct list *next;
}list_t;
/* the hash table */
static list_t **hash_table;
// Function Declarations
void create(); // initialize hash table
unsigned int hash(char *key); // hash function for insert
void insert(char *name, int len, int type, int lineno); // insert entry
list_t *lookup(char *name); // search for entry
list_t *lookup_scope(char *name, int scope); // search for entry in scope
void hide_scope(); // hide the current scope
void incr_scope(); // go to next scope
void dump(FILE *of); // dump file
This is symbol table code file.
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* current scope */
int cur_scope = 0;
void create()
{
int i;
hash_table = malloc(SIZE * sizeof(list_t*));
for(i = 0; i < SIZE; i++) hash_table[i] = NULL;
}
unsigned int hash(char *key)
{
unsigned int hashval = 0;
for(;*key!='\0';key++) hashval += *key;
hashval += key[0] % 11 + (key[0] << 3) - key[0];
return hashval % SIZE;
}
void insert(char *name, int len, int type, int lineno)
{
unsigned int hashval = hash(name); // hash function used
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
/* variable not yet in table */
if (l == NULL){
l = (list_t*) malloc(sizeof(list_t));
strncpy(l->st_name, name, len);
/* add to hashtable */
l->st_type = type;
l->scope = cur_scope;
l->lines = (RefList*) malloc(sizeof(RefList));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hash_table[hashval];
hash_table[hashval] = l;
// printf("Inserted %s for the first time with linenumber %d!\n", name, lineno); // error checking
}
/* found in table, so just add line number */
else{
l->scope = cur_scope;
RefList *t = l->lines;
while (t->next != NULL) t = t->next;
/* add linenumber to reference list */
t->next = (RefList*) malloc(sizeof(RefList));
t->next->lineno = lineno;
t->next->next = NULL;
// printf("Found %s again at line %d!\n", name, lineno);
}
}
list_t *lookup(char *name)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
return l; // NULL is not found
}
list_t *lookup_scope(char *name, int scope)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0) && (scope != l->scope)) l = l->next;
return l; // NULL is not found
}
void hide_scope()
{ /* hide the current scope */
if(cur_scope > 0) cur_scope--;
}
void incr_scope()
{ /* go to next scope */
cur_scope++;
}
/* print to stdout by default */
void dump(FILE * of)
{
int i; int count; // record whether first line prints or not.
fprintf(of,"------------ ----------------- -------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ----------------- -------------\n");
for (i=0; i < SIZE; ++i){
if (hash_table[i] != NULL){
list_t *l = hash_table[i];
while (l != NULL){
RefList *t = l->lines;
fprintf(of,"%-12s ",l->st_name);
printf("%s\n", l->st_name); // print out all the names in the symbol table
if (l->st_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->st_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->st_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->st_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else if (l->st_type == CONST_INT_TYPE) fprintf(of, "%-7s", "const_int"); // constant_int_type
else if (l->st_type == CONST_REAL_TYPE) fprintf(of, "%-7s", "const_real"); // constant_real_type
else if (l->st_type == CONST_STR_TYPE) fprintf(of, "%-7s", "const_string"); // constant_string_type
else if (l->st_type == CONST_LOGIC_TYPE) fprintf(of, "%-7s", "const_bool"); // const_logic_type
else if (l->st_type == ARRAY_TYPE){
fprintf(of,"array of ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else fprintf(of,"%-7s","undef");
}
else if (l->st_type == FUNCTION_TYPE){
fprintf(of,"%-7s %s","function returns ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"-7%s","bool");
else fprintf(of,"%-7s","undef");
}
else fprintf(of,"%-7s","undef"); // if UNDEF or 0
count = 0;
while (t != NULL){
if (count == 0)
{
if (l->st_type == INT_TYPE || l->st_type == REAL_TYPE || l->st_type == STR_TYPE || l->st_type == UNDEF)
fprintf(of,"%13d ", t->lineno);
else if (l->st_type == CONST_INT_TYPE || l->st_type == CONST_REAL_TYPE || l->st_type == CONST_STR_TYPE || l->st_type == CONST_LOGIC_TYPE)
fprintf(of,"%10d", t->lineno);
else if (l->st_type == ARRAY_TYPE || l->st_type == FUNCTION_TYPE)
fprintf(of,"%4d", t->lineno);
}
else
fprintf(of,"%3d", t->lineno);
count++;
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}
scanner file
%option noyywrap
%{
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"
#define LIST strcat(buf,yytext)
#define token(t) {LIST; printf("<%s>\n");}
#define tokenInteger(t, i) {LIST; printf("<%s: %d>\n", t, i);}
#define tokenReal(t, r) {LIST; printf("<%s: %lf>\n", t, r);}
#define tokenString(t, s) {LIST; printf("<%s: %s>\n", t, s);}
#define MAX_LINE_LENG 256
extern FILE* yyin;
extern FILE* yyout;
int linenum = 1;
char buf[MAX_LINE_LENG];
char* tempStr;
int indexForStr = 1;
list_t* temp;
%}
%x Comment
%%
"//".* {LIST;}
"/*" {BEGIN(Comment); LIST;}
<Comment>"*/" {LIST; BEGIN(0);}
<Comment>\n {LIST; printf("%d: %s\n", linenum++, buf); buf[0] = '\0';}
<Comment>. {LIST;}
"bool" {
token("BOOL");
return BOOL;
}
"break" {token("BREAK"); return BREAK;}
"char" {token("CHAR"); return CHAR;}
"continue" {token("CONTINUE"); return CONTINUE;}
"do" {token("DO"); return DO;}
"else" {token("ELSE"); return ELSE;}
"enum" {token("ENUM"); return ENUM;}
"extern" {token("EXTERN"); return EXTERN;}
"false" {token("FALSE"); yylval.boolVal = 0; return FALSE;}
"float" {
token("FLOAT");
return FLOAT;
}
"for" {token("FOR"); return FOR;}
"fn" {token("FN"); return FN;}
"if" {token("IF"); return IF;}
"in" {token("IN"); return IN;}
"int" {
token("INT");
return INT;
}
"let" {token("LET"); return LET;}
"loop" {token("LOOP"); return LOOP;}
"match" {token("MATCH"); return MATCH;}
"mut" {token("MUT"); return MUT;}
"print" {token("PRINT"); return PRINT;}
"println" {token("PRINTLN"); return PRINTLN;}
"pub" {token("PUB"); return PUB;}
"return" {token("RETURN"); return RETURN;}
"self" {token("SELF"); return SELF;}
"static" {token("STATIC"); return STATIC;}
"str" {
token("STR");
return STR;
}
"struct" {token("STRUCT"); return STRUCT;}
"true" {token("TRUE"); yylval.boolVal = 1; return TRUE;}
"use" {token("USE"); return USE;}
"where" {token("WHERE"); return WHERE;}
"while" {token("WHILE"); return WHILE;}
"," {token("','"); return ',';}
":" {token("':'"); return ':';}
";" {token("';'"); return ';';}
"(" {token("'('"); return '(';}
")" {token("')'"); return ')';}
"[" {token("'['"); return '[';}
"]" {token("']'"); return ']';}
"{" {token("'{'"); return '{';}
"}" {token("'}'"); return '}';}
"+" {token("'+'"); return '+';}
"-" {token("'-'"); return '-';}
"*" {token("'*'"); return '*';}
"/" {token("'/'"); return '/';}
"++" {token("'++'"); return '++';}
"--" {token("'--'"); return '--';}
"%" {token("'%'"); return '%';}
"<" {token("'<'"); return LESS;}
"<=" {token("'<='"); return '<=';}
">=" {token("'>='"); return '>=';}
">" {token("'>'"); return GREATER;}
"==" {token("'=='"); return '==';}
"!=" {token("'!='"); return '!=';}
"&&" {token("'&&'"); return '&&';}
"||" {token("'||'"); return '||';}
"!" {token("'!'"); return EXCLAMATION;}
"=" {token("'='"); return ASSIGN;}
"+=" {token("'+='"); return '+=';}
"-=" {token("'-='"); return '-=';}
"*=" {token("'*='"); return '*=';}
"/=" {token("'/='"); return '/=';}
"->" {token("'->'"); return ARROW;}
"read" {token("'READ'"); return READ;}
[A-Z_a-z]([A-Z_a-z]|[0-9])* {
insert(yytext, yyleng, UNDEF, linenum);
yylval.symptr = lookup(yytext);
tokenString("id", yylval.symptr->st_name);
return ID;
}
"0"|[0-9][0-9]* {
sscanf(yytext, "%d", &yylval.intVal);
tokenInteger("integer", yylval.intVal);
return INTEGER;
}
[0-9_]+"."[0-9_]|[0-9_][Ee][+-]?[0-9_]+ {
yylval.floatVal = atof(yytext);
tokenReal("real", yylval.floatVal);
return REAL;
}
\"([\\.]|[^\\"])*\" {
tempStr = malloc((strlen(yytext) - 1) * sizeof(char));
for (int i = 0; i < strlen(yytext) - 2; i++)
{
tempStr[i] = yytext[indexForStr];
indexForStr++;
}
tempStr[strlen(yytext) - 1] = '\0';
yylval.stringVal = strdup(yytext);
tokenString("string", tempStr);
free(tempStr);
indexForStr = 1;
return STRING;
}
\n {
LIST;
printf("%d: %s", linenum++, buf);
buf[0] = '\0';
}
[ \t]* {LIST;}
. {
LIST;
printf("%d:%s\n", linenum+1, buf);
printf("bad character:'%s'\n",yytext);
exit(-1);
}
%%
parser file
%{
#include "symbols.c"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#define Trace(t, line) printf(t, line) // Trace where the error occurs and print the line number
#ifndef STRSIZE
#define STRSIZE 40
#endif
#ifndef PARAMSIZE
#define PARAMSIZE 40
#endif
extern FILE* yyin;
extern FILE* yyout;
extern int linenum;
extern int yylex();
void yyerror(char* msg);
%}
%union{
char* stringVal;
double floatVal;
int intVal;
int boolVal;
list_t* symptr;
}
/* tokens */
%token <symptr> ID
%token <intVal> INTEGER
%token <floatVal> REAL
%token <stringVal> STRING
%token <boolVal> TRUE FALSE
%token INT FLOAT STR BOOL
%token BREAK CHAR CONTINUE DO ELSE
%token ENUM EXTERN FOR
%token FN IF IN LET
%token LOOP MATCH MUT PRINT PRINTLN
%token RETURN SELF STATIC STRUCT
%token USE WHERE WHILE
%token READ PUB
%token LESS GREATER ASSIGN EXCLAMATION ARROW
/* precedence for operators */
%left '||'
%left '&&'
%left EXCLAMATION
%left LESS '<=' '>=' GREATER '==' '!='
%left '+' '-'
%left '*' '/'
%left UMINUS
/* types */
%type <intVal> integer_exp
%type <floatVal> real_exp
%type <stringVal> string_exp
%type <boolVal> bool_exp
%start program /* the initial entry point */
%%
program: functions | global_declaration functions
;
global_declaration: global_declaration constant_declaration
| global_declaration variable_declaration
| global_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
local_declaration: local_declaration constant_declaration
| local_declaration variable_declaration
| local_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
block: start local_declaration statements end
| start local_declaration end
| start statements end
| start end
;
start: '{' {
incr_scope();
}
end: '}' {
hide_scope();
}
;
functions: functions function
| function
;
function: FN ID '(' ')' start local_declaration statements end{
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start statements end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start local_declaration end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
;
%%
void yyerror(char* msg)
{
fprintf(stderr, "line %d: %s\n", linenum, msg);
}
int main(int argc, char** argv)
{
/* create the hash table */
create();
/* open the source program file */
if (argc != 2) {
printf ("Usage: sc filename\n");
exit(1);
}
yyin = fopen(argv[1], "r"); /* open input file */
int flag;
flag = yyparse();
/* perform parsing */
if (flag == 1) /* parsing */
yyerror("Parsing error !"); /* syntax error */
fclose(yyin); /* close input file */
/* output symbol table */
printf("\nSymbol table:\n");
yyout = fopen("dump.out", "w");
dump(yyout);
fclose(yyout);
return 0;
}
Clearly, the problem occurs during the dump function when the token type is FUNCTION_TYPE. That's clear from the debugging output (dump is presumably executing when the segfault occurs) and from the change report (the problem occurs when an action sets the st_type field to FUNCTION_TYPE).
Visual inspection of the if clause in dump() corresponding to that condition reveals the following obvious error:
fprintf(of,"%-7s %s","function returns ");
That call to fprintf has a format string with two %s conversions. However, there is only one argument to be inserted.
The real question you should be asking yourself is "How can I easily find stupid typos like this without spending a lot of time or resorting to outside experts?"
As a first approximation, that error is so common and easy to detect that most compilers will warn you about it. So your first step is to make sure you always compile with warnings enabled (-Wall if you are using gcc or clang), and that you read the warnings.
Even without the warning, it would have been straight-forward to find the error with a debugger such as gdb. Just set a breakpoint at dump and single-step until the segfault occurs.
Finally, you are making your life much more complicated when you first build a large complex program with a lot of components and only then start to debug it. In the long run, you will find that it is worth taking the time to test each component individually (your symbol table, for example), using some kind of test harness, and only assembling your more complex program when you are reasonably confident that the individual pieces work. That will avoid the difficulty of identifying where the error occurred (as in this case, where you were evidently distracted by your doubts about the parser generator, leading you to miss the actual problem which has nothing to do with the parser).
By the way, strncpy is a definite red flag, although in this case you seem to have been lucky (or unlucky) enough to not encounter the bug. strncpy is pretty well never what you want, and if it is what you want then the length parameter should be the longest string you can accommodate rather than the length of the input string. (strncpy is intended for use in fixed-length formats, which is why it pads the output to the specified length.)
If you use the length of the input string, then you have two problems: (1) the copy is guaranteed to not be NUL-terminated, leading to Undefined Behaviour; and (2) nothing stops the copy from overrunning the output buffer, in the case that the input string is too long.
Even used correctly, strncpy requires you to manually NUL-terminate the output, which is a nuisance. A better solution is to first check that the string is not too long (length < SIZE) and then use strcpy, which will correctly NUL-terminate. Even better is to make the name field a char* instead of an array, and dynamically allocate a string of the correct length (see strdup, for example), thereby avoiding having to arbitrarily limit the size of identifiers.

BISON AST production prints scrambled values

I'm trying to make a simple parser. It's for a homework assignment but also for own experimentation. I have completed the lexer and the parser and I'm trying now to output an AST. The problem is that when I'm adding, for example, two integers, the result tree is printed with unrecognizable symbols. A valid input should be +(1,1) and a valid output should be (+ 1 1). Instead of this, I'm getting ( + �|k �|k ). I've tried many things, without actually any significant result. The sprintf function returns a null terminator, so probably this is not the problem. Below is the parser code (.y file):
%{
#define YYDEBUG 1
%}
%start program
%token NUMBER
%token ID
%token PLUS MINUS TIMES
%token LP RP EQUALS COMMA
%token END
%token LET IN AND
%left PLUS MINUS
%left TIMES
%left LET IN AND
%left EQUALS
%%
program:{printf("Empty Input\n");} /* empty */
| program line /* do nothing */
line: expr END { printtree($1); printf("\n");}
;
expr : /*Empty*/
| LET deflist IN expr {}
| ID { printf("Found ID\n"); $$ = make_id_leaf($1);}
| NUMBER { printf("Found NUMBER\n"); $$ = make_number_leaf($1);}
| PLUS LP expr COMMA expr RP {$$ = make_plus_tree($3,$5); printf("Found expr PLUS expr.\n"); }
| TIMES LP expr COMMA expr RP {$$ = make_times_tree($3,$5); printf("Found expr TIMES expr. Result:%d\n", $$);}
| MINUS ID
| MINUS NUMBER { printf("found MINUS NUMBER\n"); }
;
deflist : definition
| definition AND deflist
;
definition : /*Empty*/
| ID EQUALS expr {printf("Found EQ\n");}
;
%%
/*int main (void) {return yyparse ( );}*/
int yyerror (char *s) {fprintf (stderr, "%s\n", s);}
The lexer file:
%{
#include "parser.h"
%}
DIGIT [0-9]
LETTER [a-zA-Z]
%%
LET {printf("Encountered LET\n"); return(LET);}
IN {printf("Encountered IN\n"); return(IN);}
AND {printf("Encountered AND\n"); return(AND);}
{DIGIT}+ {yylval = atoi(yytext); return NUMBER;}
{LETTER}* { if (strlen(yytext) <= 8){
yylval = strlen(yytext);
printf( "<ID, %s> ", yytext );
return(ID);
} else {
yytext[8] = '\0';
printf("WARNING! Long identifier. Truncating to 8 chars\n");
printf( "<ID, %s> ", yytext );
return(ID);
}
}
[ \t] ;
[\n] return(END);
"+" return(PLUS);
"-" return(MINUS);
"*" return(TIMES);
"=" return(EQUALS);
"(" return(LP);
")" return(RP);
"," return(COMMA);
<<EOF>> return(0);
%%
int yywrap (void) {return 1;}
The main.c which includes the yyparse() function:
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "treedefs.h"
int main(int argc, char **argv){
yyparse();
return 0;
}
And the treedefs.h file which includes the function definitions (I've included only the struct definition, the number leaf and the plus tree):
typedef struct tree{
char *token;
TREE *l;
TREE *r;
TREE *child;
}TREE;
/* Make number leaves */
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
leafNum->l = NULL;
leafNum->r = NULL;
leafNum->child = NULL;
printf("NUM Leaf is: %s\n", leafNum->token);
return (leafNum);
}
/* Addition tree */
TREE *make_plus_tree(TREE *l, TREE *r){
TREE *plusTree = malloc(sizeof(TREE));
plusTree->token = "+";
plusTree->l = l;
plusTree->r = r;
plusTree->child = NULL;
return (plusTree);
}
void printtree(TREE *tree)
{
if (tree->l || tree->r){
printf("(");
}
printf(" %s ", tree->token);
if (tree->l){
printtree(tree->l);
}
if (tree->r){
printtree(tree->r);
}
if (tree->l || tree->r){
printf(")");
}
}
The file tree.h includes only some declarations, no big deal, and definitely not related to the issue.
Why the numbers look like this? And how can I fix it? Any help will be greatly appreciated.
This problem actually has nothing to do with bison or flex. It's in your make_number_leaf implementation:
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
// ^ local variable
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
// ^ dangling pointer
// Remainder omitted
}
As indicated in the comments above, ch is a local (stack-allocated) variable, whose lifetime ends when the function returns. Assigning its address to the variable c does nothing to change that. So the value of c which is stored into leafNum->token will become a dangling pointer as soon as the function returns.
So when you later attempt to print out the token, you are printing out the contents of random memory.
You need to malloc a character buffer, and remember to free it when you are freeing the TREE. (However, in the case where leafNum->token is a string literal, you cannot call free, so you need to be a bit cleverer.)

Error while compiling Concurrent YACC program

I am trying to build implement a basic calculator using Concurrent YACC. I have tried the code by statically creating the threads. But whe I want to dynamically specify how many threads to be created, the parser seems to have a problem. Here are the contents of my code.
aa.y file
%{
#include <stdio.h>
#include <pthread.h>
#include <string.h>
void * scanner;
FILE *yyin;
#define YYSTYPE int
%}
%token digit
%lex-param {void * scanner}
%parse-param {void * scanner}
%start list
%token NUMBER
%left '+' '-'
%left '*' '/' '%'
%left UMINUS
%union {int i;}
%%
list:
|
list stat '\n'
|
list error '\n'{ yyerrok; }
;
stat: expr { printf("Thread = %d ... Ans = %d\n",pthread_self(),$1);}
;
expr: '(' expr ')'{ $$ = $2; }
|
expr '*' expr { $$ = $1 * $3; }
|
expr '/' expr { $$ = $1 / $3; }
|
expr '+' expr { $$ = $1 + $3; }
|
expr '-' expr { $$ = $1 - $3; }
|
'-' expr %prec UMINUS { $$ = -$2; }
|
NUMBER
;
%%
struct struct_arg
{
unsigned char* file;
};
int yyerror()
{
return 1;
}
void *parse(void *arguments)
{
struct struct_arg *args = (struct struct_arg *)arguments;
unsigned char* filename;
filename = args -> file;
yyin = fopen(filename,"r+");
if(yyin == NULL)
{
}
else
{
yylex_init(&scanner);
yyset_in(yyin,scanner);
yyparse(scanner);
yylex_destroy(scanner);
printf("Thread = %d\n",pthread_self());
}
fclose(yyin);
}
int main(int argc, char *argv[])
{
int num;
printf("How many threads you want to create??\n");
scanf("%d", &num);
int error, count = 0;
FILE *fp[num], *file_pointer;
char line[256];
size_t len = 0;
char read;
file_pointer = fopen("test.txt", "r");
while (fgets(line, sizeof(line), file_pointer))
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", count);
strcat(file_name, dummy);
strcat(file_name, dummy2);
fp[count] = fopen(file_name, "a");
fprintf(fp[count], "%s", line);
fclose(fp[count]);
count++;
if(count == num)
{
count = 0;
}
}
struct struct_arg arguments[num];
int i = 0;
while(i < num)
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", i);
strcat(file_name, dummy);
strcat(file_name, dummy2);
arguments[i].file = file_name;
i++;
}
pthread_t tid[num];
int j = 0;
while(j < num)
{
error = pthread_create(&(tid[j]), NULL, &parse, (void *) &arguments[j]);
j++;
}
int n = 0;
while(n < num)
{
pthread_join(tid[n], NULL);
n++;
}
int temp, k = 0;
while(k < num)
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", k);
strcat(file_name, dummy);
strcat(file_name, dummy2);
temp = remove(file_name);
k++;
}
return 0;
}
aa.l
%{
#include <stdio.h>
#include "y.tab.h"
extern int scanner;
%}
%option reentrant
%option noyywrap
NUMBER [0-9]+
%%
" " ;
{NUMBER} {
yylval->i = atoi(yytext);
return(NUMBER);
}
[^0-9\b] {
return(yytext[0]);
}
My compiling steps are
yacc -d aa.y
lex aa.l
cc lex.yy.c y.tab.c -o aa.exe -pthread
And the error generated is
aa.l: In function 'yylex':
aa.l:13:23: error: invalid type argument of '->' (have 'YYSTYPE')
yylval->i = atoi(yytext);
Can anyone please point out what I am doing wrong??
That's a simple compiler error, which is (indirectly) the result of your not requesting a reentrant ("pure") bison parser. [Note 1]
Since the parser is not reentrant, it uses a global yylval which is of type YYSTYPE. Your %union declaration will create a declaration of YYSTYPE as a union type which will be placed in the generated header file y.tab.h, which effectively looks something like this (leaving out some unimportant details):
#ifndef YYSTYPE
typedef union yystype {
int i;
} YYSTYPE;
extern YYSTYPE yylval;
#endif
That code will also be placed into y.tab.c, but it will go after the inserted C segment from the %{...} section of your bison definition. There you #define YYSTYPE int, with the result that in y.tab.c yylval has type int, whereas in `yy.lex.c, it is a union type. That's undefined behaviour (UB), which is how you say "wrong wrong wrong" in C. (But UB is really undefined; one possibility is that the error is silently ignored.)
Since yylval is an instance of YYSTYPE, rather than a pointer to a YYSTYPE, the correct way of referring to member i is yylval.i, not yylval->i. Hence the compiler error.
In your bison file, you don't declare any of your nonterminals to have a type. Since you're including a %union declaration, bison requires you to tell it the type of any terminal or non-terminal whose semantic value is used (with $1, $2, etc.) or assigned to (with $$). So you should have received a pile of errors when you attempted to pass the file through bison. On the other hand, if you had declared types, then the bison-generated parser would have contained references to yylval.i, and that would also generated compiler errors because your #define YYSTYPE effectively bypassed the union declaration. (Bison doesn't know about the #define because it doesn't parse included C code. So it can't generate an error message. But it's definitely an error.)
If you had told bison to produce a re-entrant parser, then the generated parser would have called yylex with an additional argument of type YYSTYPE*; had you also supplied %option bison-bridge in the flex definition, then flex would have generated a declaration of yylex with an additional parameter of type YYSTYPE* which will become the value of yylval. In that case, yylval will be a pointer, rather than an instance, and yylval->i would have been correct.
Notes
For some reason, the use of reentrant bison parsers is referred to incorrectly as "Concurrent YACC". That's wrong on two counts: first, the generated parser is not concurrent (although because it is reentrant, it can be used concurrently if the actions don't introduce race conditions), and secondly because the feature is not available in YACC; it's a bison extension.
A quick Google search revealed two uses of the phrase "Concurrent YACC". One of them was in a comment in an entry in ESR's blog, describing a tool he wrote some decades ago, before bison existed, to make yacc parsers reentrant. The other one was a third-year programming assignment in a course on concurrent programming offered by the University of Pune, which uses the phrase "Concurrent YACC" as though it were meaningful.
I'm guessing that this question derives from the second of those, which might imply that the coursework includes an explanation of what is meant. But for what it's worth, ESR does outline the steps involved in correctly bridging a reentrant bison parser to a reentrant flex lexer. So I suggest you take a look at it, although I do not endorse ESR's description of %bison-bridge as buggy. (Had he said "badly-documented kludge", I would have been 100% on-side.)

How to write a pure parser and reentrant scanner by "win_flex bison"?

I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.

Resources