So i'm using a flex/bison parser but the variable names arent printing correctly. It understands the number values. I've tried messing with everything but I'm lost. heres a link to the output. its where it prints "Data: 0" that i'm trying to get the variable name [https://imgur.com/vJDpgpR][1]
invocation is: ./frontEnd data.txt
//main.c
#define BUF_SIZE 1024
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE* yyin;
extern yyparse();
int main(int argc, char* argv[]){
if(argc < 2){
FILE* fp = fopen("temp.txt", "a");
printf("Entering data: \n");
void *content = malloc(BUF_SIZE);
if (fp == 0)
printf("error opening file");
int read;
while ((read = fread(content, BUF_SIZE, 1, stdin))){
fwrite(content, read, 1, fp);
}
if (ferror(stdin))
printf("There was an error reading from stdin");
fclose(fp);
yyparse(fp);
}
if(argc == 2){
yyin = fopen(argv[2], "r");
if(!yyin)
{
perror(argv[2]);
printf("ERROR: file does not exist.\n");
return 0;
}
yyparse (yyin);
}
return 0;
}
void yyerror(char *s){
fprintf(stderr, "error: exiting %s \n", s);
}
//lex.l
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
%}
%option noyywrap
%option nounput yylineno
%%
"stop" return STOP;
"iter" return ITER;
"scanf" return SCANF;
"printf" return PRINTF;
"main" return MAIN;
"if" return IF;
"then" return THEN;
"let" return LET;
"func" return FUNC;
"//" return COMMENT; printf("\n");
"start" return START;
"=" return ASSIGN;
"=<" return LE;
"=>" return GE;
":" return COLON;
"+" return PLUS;
"-" return MINUS;
"*" return MULT;
"/" return DIV;
"%" return MOD;
"." return DOT;
"(" return RPAREN;
")" return LPAREN;
"," return COMMA;
"{" return RBRACE;
"}" return LBRACE;
";" return SEMICOLON;
"[" return LBRACK;
"]" return RBRACK;
"==" return EQUAL;
[A-Z][a-z]* { printf("SYNTAX ERROR: Identifiers must start with lower case. "); }
[a-zA-Z][_a-zA-Z0-9]* {
printf("string: %s \n", yytext);
yylval.iVal = strdup(yytext);
yylval.iVal = addSymbol(yytext);
return ID;
}
[0-9]+ {
yylval.iVal = atoi(yytext);
printf("num: %s \n", yytext);
return NUMBER; }
[ _\t\r\s\n] ;
^"#".+$ return COMMENT;
. {printf("ERROR: Invalid Character "); yyterminate();}
<<EOF>> { printf("EOF: line %d\n", yylineno); yyterminate(); }
%%
// stores all variable id is in an array
SYMTABNODEPTR newSymTabNode()
{
return ((SYMTABNODEPTR)malloc(sizeof(SYMTABNODE)));
}
int addSymbol(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
i = lookup(s);
if(i >= 0){
return(i);
}
else if(curSymSize >= SYMBOLTABLESIZE)
{
return (NOTHING);
}
else{
symtable[curSymSize] = newSymTabNode();
strncpy(symtable[curSymSize]->id,s,IDLENGTH);
symtable[curSymSize]->id[IDLENGTH-1] = '\0';
return(curSymSize++);
}
}
int lookup(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
for(i=0;i<curSymSize;i++)
{
if(strncmp(s,symtable[i]->id,IDLENGTH) == 0){
return (i);
}
}
return(-1);
}
// parser.y
%{
#define YYERROR_VERBOSE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern char *yytext;
extern int yylex();
extern void yyerror(char *);
extern int yyparse();
extern FILE *yyin;
/* ------------- some constants --------------------------------------------- */
#define SYMBOLTABLESIZE 50
#define IDLENGTH 15
#define NOTHING -1
#define INDENTOFFSET 2
#ifdef DEBUG
char *NodeName[] =
{
"PROGRAM", "BLOCK", "VARS", "EXPR", "N", "A", "R", "STATS", "MSTAT", "STAT",
"IN", "OUT", "IF_STAT", "LOOP", "ASSIGN", "RO", "IDVAL", "NUMVAL"
};
#endif
enum ParseTreeNodeType
{
PROGRAM, BLOCK, VARS, EXPR, N, A, R, STATS, MSTAT, STAT,
IN, OUT,IF_STAT, LOOP, ASSIGN, RO, IDVAL, NUMVAL
};
#define TYPE_CHARACTER "char"
#define TYPE_INTEGER "int"
#define TYPE_REAL "double"
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef NULL
#define NULL 0
#endif
// definitions for parse tree
struct treeNode {
int item;
int nodeID;
struct treeNode *first;
struct treeNode *second;
};
typedef struct treeNode TREE_NODE;
typedef TREE_NODE *TREE;
TREE makeNode(int, int, TREE, TREE);
#ifdef DEBUG
void printTree(TREE, int);
#endif
// symbol table definitions.
struct symbolTableNode{
char id[IDLENGTH];
};
typedef struct symbolTableNode SYMTABNODE;
typedef SYMTABNODE *SYMTABNODEPTR;
SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
int curSymSize = 0;
%}
%start program
%union {
char *sVal;
int iVal;
TREE tVal;
}
// list of all tokens
%token SEMICOLON GE LE EQUAL COLON RBRACK LBRACK ASSIGNS LPAREN RPAREN COMMENT
%token DOT MOD PLUS MINUS DIV MULT RBRACE LBRACE START MAIN STOP LET COMMA
%token SCANF PRINTF IF ITER THEN FUNC
%left MULT DIV MOD ADD SUB
// tokens defined with values and rule names
%token<iVal> NUMBER ID
//%token<sVal> ID
%type<tVal> program type block vars expr N A R stats mStat stat in out if_stat loop assign RO
%%
program : START vars MAIN block STOP
{
TREE tree;
tree = makeNode(NOTHING, PROGRAM, $2,$4);
#ifdef DEBUG
printTree(tree, 0);
#endif
}
;
block : RBRACE vars stats LBRACE
{
$$ = makeNode(NOTHING, BLOCK, $2, $3);
}
;
vars : /*empty*/
{
$$ = makeNode(NOTHING, VARS,NULL,NULL);
}
| LET ID COLON NUMBER vars
{
$$ = makeNode($2, VARS, $5,NULL);
printf("id: %d", $2);
}
;
//variable:
// type ID{$$ = newNode($2,VARIABLE,$1,NULL,NULL);};
//type:
// INT {$$ = newNode(INT,TYPE,NULL,NULL,NULL);}
// | BOOL {$$ = newNode(BOOL,TYPE,NULL,NULL,NULL);}
// | CHAR {$$ = newNode(CHAR,TYPE,NULL,NULL,NULL);}
// | STRING{$$ = newNode(STRING,TYPE,NULL,NULL,NULL);};
expr : N DIV expr
{
$$ = makeNode(DIV, EXPR, $1, $3);
}
| N MULT expr
{
$$ = makeNode(MULT, EXPR, $1, $3);
}
| N
{
$$ = makeNode(NOTHING, EXPR, $1,NULL);
}
;
N : A PLUS N
{
$$ = makeNode(PLUS, N, $1, $3);
}
| A MINUS N
{
$$ = makeNode(MINUS, N, $1, $3);
}
| A
{
$$ = makeNode(NOTHING, N, $1,NULL);
}
;
A : MOD A
{
$$ = makeNode(NOTHING, A, $2,NULL);
}
| R
{
$$ = makeNode(NOTHING, A, $1,NULL);
}
;
R : LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, R, $2,NULL);
}
| ID
{
$$ = makeNode($1, IDVAL, NULL,NULL);
}
| NUMBER
{
$$ = makeNode($1, NUMVAL, NULL,NULL);
}
;
stats : stat mStat
{
$$ = makeNode(NOTHING, STATS, $1, $2);
}
;
mStat : /* empty */
{
$$ = makeNode(NOTHING, MSTAT, NULL,NULL);
}
| stat mStat
{
$$ = makeNode(NOTHING, MSTAT, $1, $2);
}
;
stat: in DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| out DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| block
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| if_stat DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| loop DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| assign DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
;
in : SCANF LBRACK ID RBRACK
{
$$ = makeNode($3, IN,NULL,NULL);
}
;
out : PRINTF LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, OUT,$3,NULL);
}
;
if_stat : IF LBRACK expr RO expr RBRACK THEN block
{
$$ = makeNode(NOTHING, IF_STAT, $4, $8);
}
;
loop : ITER LBRACK expr RO expr RBRACK block
{
$$ = makeNode(NOTHING, LOOP, $4, $7);
}
;
assign : ID ASSIGNS expr
{
$$ = makeNode($1, ASSIGN, $3,NULL);
}
;
RO : LE
{
$$ = makeNode(LE, RO, NULL,NULL);
}
| GE
{
$$ = makeNode(GE, RO, NULL,NULL);
}
| EQUAL
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
| COLON COLON
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
;
%%
// node generator
TREE makeNode(int iVal, int nodeID, TREE p1, TREE p2)
{
TREE t;
t = (TREE)malloc(sizeof(TREE_NODE));
t->item = iVal;
t->nodeID = nodeID;
t->first = p1;
t->second = p2;
//printf("NODE CREATED");
return(t);
}
// prints the tree with indentation for depth
void printTree(TREE tree, int depth){
int i;
if(tree == NULL) return;
for(i=depth;i;i--)
printf(" ");
if(tree->nodeID == NUMBER)
printf("INT: %d ",tree->item);
else if(tree->nodeID == IDVAL){
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE )
printf("id: %s ",symtable[tree->item]->id);
else
printf("unknown id: %d ", tree->item);
}
if(tree->item != NOTHING){
printf("Data: %d ",tree->item);
}
// If out of range of the table
if (tree->nodeID < 0 || tree->nodeID > sizeof(NodeName))
printf("Unknown ID: %d\n",tree->nodeID);
else
printf("%s\n",NodeName[tree->nodeID]);
printTree(tree->first,depth+2);
printTree(tree->second,depth+2);
}
#include "lex.yy.c"
// heres the makefile I use for compilation
frontEnd: lex.yy.c parser.tab.c
gcc parser.tab.c main.c -o frontEnd -lfl -DDEBUG
parser.tab.c parser.tab.h: parser.y
bison -d parser.y
lex.yy.c: lex.l
flex lex.l
clean:
rm lex.yy.c y.tab.c frontEnd
'''
// data.txt
start
let x : 13
main {
scanf [ x ] .
printf [ 34 ] .
} stop[enter image description here][2]
[1]: https://i.stack.imgur.com/xlNnh.png
[2]: https://i.stack.imgur.com/HKRtX.png
I think this has a lot more to do with your AST and symbol table functions than with your parser, and practically nothing to do with bison itself.
For example, your function to print trees won't attempt to print an identifier's name if its symbol table index is 0.
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE)
But the first symbol entered in the table will have index 0. (Perhaps you fixed this between pasting your code and generating the results. You should always check that the code you paste in a question corresponds precisely to the output which you show. But this isn't the only bug in your code; it's just an example.)
As another example, the immediate problem which causes Data: 0 to be printed instead of the symbol name is that your tree printer only prints symbol names for AST nodes of type IDVAL, but you create an AST IN node whose data field contains the variable's symbol table index. So either you need to fix your tree printer so it knows about IN nodes, or you need to change the IN node so that it has a child which is the IDVAL node. (That's probably the best solution in the long run.)
It's always a temptation to blame bison (or whatever unfamiliar tool you're using at the moment) for bugs, instead of considering the possibility that you've introduced bugs in your own support code. To avoid falling into this trap, it's always a good idea to test your library functions separately before using them in a more complicated project. For example, you could write a small test driver that builds a fixed AST tree, prints it, and deletes it. Once that works, and only when that works, you can check to see if your parser can build and print the same tree by parsing an input.
You will find that some simple good software design practices will make this whole process much smoother:
Organise your code into separate component files, each with its own header file. Document the library interfaces (and, if necessary, data structures) using comments in the header file. Briefly describe what each function's purpose is. If you can't find a brief description, it nay be that the function is trying to do too many different things.
In your parser, the functions and declarations needed to build and use ASTs are scattered between different parts of your lexer and parser files. This makes them much harder to read, debug, maintain and even use.
No matter what your teacher might tell you, if you find it necessary to #include the generated lexical scanner directly into the parser, then you probably have not found a good way to organise your support functions. You should always aim to make it possible to separately compile the parser and the scanner.
For data structures like your AST node, which use different member variables in different ways depending on an enumerated node type -- which is a model you'll find in other C projects as well, but is particularly common in parsers -- document the precise use of each field for every enumeration value. And make sure that every time you change the way you use the data or add new enumeration values, you fix the documentation accordingly.
This documentation will make it much easier to verify that your AST is being built correctly. As an additional benefit, you (or others using your code) will have an accurate description of how to interpret the contents of AST nodes, which makes it much easier to write code which analyses the tree.
In short, the way to write, debug and maintain any non-trivial project is not by "messing around" but by being systematic and modular. While it might seem like all of this takes precious time, particularly the documentation, it will almost always save you a lot of time in the long run.
I'm building a simplified C parser using bison and flex. I've written a grammar rule to detect functions in my Bison file, and I want the parser to send the name of the function to my C monitoring program. Here is an extremely simplified sample of what I've implemented :
my monitor program
/*monitor.h*/
#ifndef MONITOR_H
#define MONITOR_H
extern int noLig;
extern char* yytext;
#endif
/*monitor.c*/
#include <stdio.h>
#include <monitor.h>
#include <project.tab.h>
int noLig=0;
int main (int argc, char * argv[]) {
printf("flex-\n");
int err_code=yyparse();
if (err_code == 0) {
printf("It went fine\n");
} else {printf("It didn't go well\n");}
return 0;
}
project.l file
%{
#include <stdio.h>
#include <monitor.h>
#include <project.tab.h>
%}
%%
"\"[a-zA-Z0-9]+"\" {ECHO; yylval.str=yytext; return STR;}
[.,;=()\[\]\{\}] { return yytext[0]; }
"char" {ECHO; printf("-"); yylval.str=yytext; return TYPE;}
"int" {ECHO; printf("-");yylval.str=yytext; return TYPE;}
"float" {ECHO; printf("-");yylval.str=yytext; return TYPE;}
"double" {ECHO; printf("-");yylval.str=yytext;return TYPE;}
[a-zA-Z][a-zA-Z0-9]* {ECHO; printf("-");yylval.str = yytext; return VAR;}
[ \t\n\b]+ {noLig++;}
"//".* {}
. {printf(":%cwhat is this",yytext[0]);}
%%
project.y file
%{
#include <stdio.h>
#include <monitor.h>
int yylex();
int yyerror ();
%}
%union {
char *str;
int i;
}
%define parse.error verbose
%type <i> INT
%type <str> TYPE STR VAR
%token TYPE INT STR VAR
%start Program
%%
Program: function_l
;
function_l: function
| function_l function
;
function: TYPE VAR '(' param_prototype_ld ')' '{' instruction_l '}'
{printf("\n\nbison-\n%s\n",$1);}
;
param_prototype_ld: /*empty*/
| param_prototype_l
;
param_prototype_l: param_prototype
| param_prototype_l ',' param_prototype
;
param_prototype: TYPE VAR
;
instruction_l: /*empty*/
| VAR ';'
| VAR instruction_l
;
%%
int yyerror (char* s) {
fprintf(stderr, "Compilator3000:l.%d: %s\n", noLig, s);
}
test.c file
int main (int arg) {
a;
}
It compiles alright, with no warning. However, when I run ./monitor < test.c, I get the following output :
flex-
int-main-int-arg-a-
bison-
int main (int arg) {
a;
}
It went fine
Why does bison variable $1 returns the whole function block ? How can I get only the return type ? (In the end, my goal is to print the return type, function name, and the type of the arguments)
The value of yytext is not guaranteed to persist, and to make yytext persist it must be copied to a separate buffer. This is often done using strdup:
...
"\"[a-zA-Z0-9]+"\" {ECHO; yylval.str=strdup(yytext); return STR;}
[.,;=()\[\]\{\}] { return yytext[0]; }
"char" {ECHO; printf("-"); yylval.str=strdup(yytext); return TYPE;}
"int" {ECHO; printf("-");yylval.str=strdup(yytext); return TYPE;}
"float" {ECHO; printf("-");yylval.str=strdup(yytext); return TYPE;}
"double" {ECHO; printf("-");yylval.str=strdup(yytext);return TYPE;}
[a-zA-Z][a-zA-Z0-9]* {ECHO; printf("-");yylval.str = strdup(yytext); return VAR;}
...
Though strdup can return NULL, a wrapper function can be used to make that failure explicit
char *
strdup_checked(char *str)
{
char *p;
if ((p = strdup(str)) == NULL) {
perror("strdup");
exit(EXIT_FAILURE);
}
return (p);
}
Although OP has found the solution, I would try to provide a complete answer.
This is the Grammar which defines the function as per your code.
function: TYPE VAR '(' param_prototype_ld ')' '{' instruction_l '}'
{printf("\n\nbison-\n%s\n",$1);}
;
Each symbol is a positional variable. Eg TYPE is $1, VAR is $2 (which is I believe the function name). $$ is the return value of any rule. In this case function. In order to return the function name you need to set $$=$2 in the action part. Setting $$ to $1 will return the function name. Alternatively you can create a data structure in the action such as an array or struct to hold more than one $ variable and then return it.
The effect would be seen in the following rule
function_l: function
| function_l function
;
Non-terminal symbol function will hold the name of the function. In this rule "function_l" is $1 and "function" is $2. If you print $2 it will give you the function name which was passed from the rule "function".
Since the terminal "VAR" is a string you need to set yylval.str=strdup(yytext) either in the lex or in the grammar rule as yylval.str=strdup($<pos>)
I'm trying to make a simple parser. It's for a homework assignment but also for own experimentation. I have completed the lexer and the parser and I'm trying now to output an AST. The problem is that when I'm adding, for example, two integers, the result tree is printed with unrecognizable symbols. A valid input should be +(1,1) and a valid output should be (+ 1 1). Instead of this, I'm getting ( + �|k �|k ). I've tried many things, without actually any significant result. The sprintf function returns a null terminator, so probably this is not the problem. Below is the parser code (.y file):
%{
#define YYDEBUG 1
%}
%start program
%token NUMBER
%token ID
%token PLUS MINUS TIMES
%token LP RP EQUALS COMMA
%token END
%token LET IN AND
%left PLUS MINUS
%left TIMES
%left LET IN AND
%left EQUALS
%%
program:{printf("Empty Input\n");} /* empty */
| program line /* do nothing */
line: expr END { printtree($1); printf("\n");}
;
expr : /*Empty*/
| LET deflist IN expr {}
| ID { printf("Found ID\n"); $$ = make_id_leaf($1);}
| NUMBER { printf("Found NUMBER\n"); $$ = make_number_leaf($1);}
| PLUS LP expr COMMA expr RP {$$ = make_plus_tree($3,$5); printf("Found expr PLUS expr.\n"); }
| TIMES LP expr COMMA expr RP {$$ = make_times_tree($3,$5); printf("Found expr TIMES expr. Result:%d\n", $$);}
| MINUS ID
| MINUS NUMBER { printf("found MINUS NUMBER\n"); }
;
deflist : definition
| definition AND deflist
;
definition : /*Empty*/
| ID EQUALS expr {printf("Found EQ\n");}
;
%%
/*int main (void) {return yyparse ( );}*/
int yyerror (char *s) {fprintf (stderr, "%s\n", s);}
The lexer file:
%{
#include "parser.h"
%}
DIGIT [0-9]
LETTER [a-zA-Z]
%%
LET {printf("Encountered LET\n"); return(LET);}
IN {printf("Encountered IN\n"); return(IN);}
AND {printf("Encountered AND\n"); return(AND);}
{DIGIT}+ {yylval = atoi(yytext); return NUMBER;}
{LETTER}* { if (strlen(yytext) <= 8){
yylval = strlen(yytext);
printf( "<ID, %s> ", yytext );
return(ID);
} else {
yytext[8] = '\0';
printf("WARNING! Long identifier. Truncating to 8 chars\n");
printf( "<ID, %s> ", yytext );
return(ID);
}
}
[ \t] ;
[\n] return(END);
"+" return(PLUS);
"-" return(MINUS);
"*" return(TIMES);
"=" return(EQUALS);
"(" return(LP);
")" return(RP);
"," return(COMMA);
<<EOF>> return(0);
%%
int yywrap (void) {return 1;}
The main.c which includes the yyparse() function:
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "treedefs.h"
int main(int argc, char **argv){
yyparse();
return 0;
}
And the treedefs.h file which includes the function definitions (I've included only the struct definition, the number leaf and the plus tree):
typedef struct tree{
char *token;
TREE *l;
TREE *r;
TREE *child;
}TREE;
/* Make number leaves */
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
leafNum->l = NULL;
leafNum->r = NULL;
leafNum->child = NULL;
printf("NUM Leaf is: %s\n", leafNum->token);
return (leafNum);
}
/* Addition tree */
TREE *make_plus_tree(TREE *l, TREE *r){
TREE *plusTree = malloc(sizeof(TREE));
plusTree->token = "+";
plusTree->l = l;
plusTree->r = r;
plusTree->child = NULL;
return (plusTree);
}
void printtree(TREE *tree)
{
if (tree->l || tree->r){
printf("(");
}
printf(" %s ", tree->token);
if (tree->l){
printtree(tree->l);
}
if (tree->r){
printtree(tree->r);
}
if (tree->l || tree->r){
printf(")");
}
}
The file tree.h includes only some declarations, no big deal, and definitely not related to the issue.
Why the numbers look like this? And how can I fix it? Any help will be greatly appreciated.
This problem actually has nothing to do with bison or flex. It's in your make_number_leaf implementation:
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
// ^ local variable
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
// ^ dangling pointer
// Remainder omitted
}
As indicated in the comments above, ch is a local (stack-allocated) variable, whose lifetime ends when the function returns. Assigning its address to the variable c does nothing to change that. So the value of c which is stored into leafNum->token will become a dangling pointer as soon as the function returns.
So when you later attempt to print out the token, you are printing out the contents of random memory.
You need to malloc a character buffer, and remember to free it when you are freeing the TREE. (However, in the case where leafNum->token is a string literal, you cannot call free, so you need to be a bit cleverer.)
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.
I want to create a read-eval-print loop using flex/bison parser. Trouble is, the flex generated lexer wants input of type FILE* and i would like it to be char*. Is there anyway to do this?
One suggestion has been to create a pipe, feed it the string and open the file descriptor and send to the lexer. This is fairly simple but it feels convoluted and not very platform independent. Is there a better way?
The following routines are available for setting up input buffers for scanning in-memory strings instead of files (as yy_create_buffer does):
YY_BUFFER_STATE yy_scan_string(const char *str): scans a NUL-terminated string`
YY_BUFFER_STATE yy_scan_bytes(const char *bytes, int len): scans len bytes (including possibly NULs) starting at location bytes
Note that both of these functions create, return a corresponding YY_BUFFER_STATE handle (which you must delete with yy_delete_buffer() when done with it) so yylex() scan a copy of the string or bytes. This behavior may be desirable since yylex() modifies the contents of the buffer it is scanning).
If you want avoid the copy (and yy_delete_buffer) using:
YY_BUFFER_STATE yy_scan_buffer(char *base, yy_size_t size)
sample main:
int main() {
yy_scan_buffer("a test string");
yylex();
}
See this section of Flex's manual for information on how to scan in-memory buffers, such as strings.
flex can parse char * using any one of three functions: yy_scan_string(),
yy_scan_buffer(), and yy_scan_bytes() (see the documentation). Here's an example of the first:
typedef struct yy_buffer_state * YY_BUFFER_STATE;
extern int yyparse();
extern YY_BUFFER_STATE yy_scan_string(char * str);
extern void yy_delete_buffer(YY_BUFFER_STATE buffer);
int main(){
char string[] = "String to be parsed.";
YY_BUFFER_STATE buffer = yy_scan_string(string);
yyparse();
yy_delete_buffer(buffer);
return 0;
}
The equivalent statements for yy_scan_buffer() (which requires a doubly null-terminated string):
char string[] = "String to be parsed.\0";
YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string));
My answer reiterates some of the information provided by #dfa and #jlholland, but neither of their answers' code seemed to be working for me.
Here is what I needed to do :
extern yy_buffer_state;
typedef yy_buffer_state *YY_BUFFER_STATE;
extern int yyparse();
extern YY_BUFFER_STATE yy_scan_buffer(char *, size_t);
int main(int argc, char** argv) {
char tstr[] = "line i want to parse\n\0\0";
// note yy_scan_buffer is is looking for a double null string
yy_scan_buffer(tstr, sizeof(tstr));
yy_parse();
return 0;
}
you cannot extern the typedef, which make sense when you think about it.
The accepted answer is incorrect. It will cause memory leaks.
Internally, yy_scan_string calls yy_scan_bytes which, in turn, calls yy_scan_buffer.
yy_scan_bytes allocates memory for a COPY of the input buffer.
yy_scan_buffer works directly upon the supplied buffer.
With all three forms, you MUST call yy_delete_buffer to free the flex buffer-state information (YY_BUFFER_STATE).
However, with yy_scan_buffer, you avoid the internal allocation/copy/free of the internal buffer.
The prototype for yy_scan_buffer does NOT take a const char* and you MUST NOT expect the contents to remain unchanged.
If you allocated memory to hold your string, you are responsible for freeing it AFTER you call yy_delete_buffer.
Also, don't forget to have yywrap return 1 (non-zero) when you're parsing JUST this string.
Below is a COMPLETE example.
%%
<<EOF>> return 0;
. return 1;
%%
int yywrap()
{
return (1);
}
int main(int argc, const char* const argv[])
{
FILE* fileHandle = fopen(argv[1], "rb");
if (fileHandle == NULL) {
perror("fopen");
return (EXIT_FAILURE);
}
fseek(fileHandle, 0, SEEK_END);
long fileSize = ftell(fileHandle);
fseek(fileHandle, 0, SEEK_SET);
// When using yy_scan_bytes, do not add 2 here ...
char *string = malloc(fileSize + 2);
fread(string, fileSize, sizeof(char), fileHandle);
fclose(fileHandle);
// Add the two NUL terminators, required by flex.
// Omit this for yy_scan_bytes(), which allocates, copies and
// apends these for us.
string[fileSize] = '\0';
string[fileSize + 1] = '\0';
// Our input file may contain NULs ('\0') so we MUST use
// yy_scan_buffer() or yy_scan_bytes(). For a normal C (NUL-
// terminated) string, we are better off using yy_scan_string() and
// letting flex manage making a copy of it so the original may be a
// const char (i.e., literal) string.
YY_BUFFER_STATE buffer = yy_scan_buffer(string, fileSize + 2);
// This is a flex source file, for yacc/bison call yyparse()
// here instead ...
int token;
do {
token = yylex(); // MAY modify the contents of the 'string'.
} while (token != 0);
// After flex is done, tell it to release the memory it allocated.
yy_delete_buffer(buffer);
// And now we can release our (now dirty) buffer.
free(string);
return (EXIT_SUCCESS);
}
Other-way, you can redefine function YY_INPUT in lex file, and then set your string to LEX's input. As below:
#undef YY_INPUT
#define YY_INPUT(buf) (my_yyinput(buf))
char my_buf[20];
void set_lexbuf(char *org_str)
{ strcpy(my_buf, org_str); }
void my_yyinput (char *buf)
{ strcpy(buf, my_buf); }
In your main.c, before scanning, you need to set lex's buffer first:
set_lexbuf(your_string);
scanning...
here is a small example for using bison / flex as a parser inside your cpp code for parsing string and changing a string value according to it
(few parts of the code were removed so there might be irrelevant parts there.)
parser.y :
%{
#include "parser.h"
#include "lex.h"
#include <math.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
int yyerror(yyscan_t scanner, string result, const char *s){
(void)scanner;
std::cout << "yyerror : " << *s << " - " << s << std::endl;
return 1;
}
%}
%code requires{
#define YY_TYPEDEF_YY_SCANNER_T
typedef void * yyscan_t;
#define YYERROR_VERBOSE 0
#define YYMAXDEPTH 65536*1024
#include <math.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
}
%output "parser.cpp"
%defines "parser.h"
%define api.pure full
%lex-param{ yyscan_t scanner }
%parse-param{ yyscan_t scanner } {std::string & result}
%union {
std::string * sval;
}
%token TOKEN_ID TOKEN_ERROR TOKEN_OB TOKEN_CB TOKEN_AND TOKEN_XOR TOKEN_OR TOKEN_NOT
%type <sval> TOKEN_ID expression unary_expression binary_expression
%left BINARY_PRIO
%left UNARY_PRIO
%%
top:
expression {result = *$1;}
;
expression:
TOKEN_ID {$$=$1; }
| TOKEN_OB expression TOKEN_CB {$$=$2;}
| binary_expression {$$=$1;}
| unary_expression {$$=$1;}
;
unary_expression:
TOKEN_NOT expression %prec UNARY_PRIO {result = " (NOT " + *$2 + " ) " ; $$ = &result;}
;
binary_expression:
expression expression %prec BINARY_PRIO {result = " ( " + *$1+ " AND " + *$2 + " ) "; $$ = &result;}
| expression TOKEN_AND expression %prec BINARY_PRIO {result = " ( " + *$1+ " AND " + *$3 + " ) "; $$ = &result;}
| expression TOKEN_OR expression %prec BINARY_PRIO {result = " ( " + *$1 + " OR " + *$3 + " ) "; $$ = &result;}
| expression TOKEN_XOR expression %prec BINARY_PRIO {result = " ( " + *$1 + " XOR " + *$3 + " ) "; $$ = &result;}
;
%%
lexer.l :
%{
#include <string>
#include "parser.h"
%}
%option outfile="lex.cpp" header-file="lex.h"
%option noyywrap never-interactive
%option reentrant
%option bison-bridge
%top{
/* This code goes at the "top" of the generated file. */
#include <stdint.h>
}
id ([a-zA-Z][a-zA-Z0-9]*)+
white [ \t\r]
newline [\n]
%%
{id} {
yylval->sval = new std::string(yytext);
return TOKEN_ID;
}
"(" {return TOKEN_OB;}
")" {return TOKEN_CB;}
"*" {return TOKEN_AND;}
"^" {return TOKEN_XOR;}
"+" {return TOKEN_OR;}
"!" {return TOKEN_NOT;}
{white}; // ignore white spaces
{newline};
. {
return TOKEN_ERROR;
}
%%
usage :
void parse(std::string& function) {
string result = "";
yyscan_t scanner;
yylex_init_extra(NULL, &scanner);
YY_BUFFER_STATE state = yy_scan_string(function.c_str() , scanner);
yyparse(scanner,result);
yy_delete_buffer(state, scanner);
yylex_destroy(scanner);
function = " " + result + " ";
}
makefile:
parser.h parser.cpp: parser.y
# /usr/local/bison/2.7.91/bin/bison -y -d parser.y
lex.h lex.cpp: lexer.l
# /usr/local/flex/2.5.39/bin/flex lexer.l
clean:
- \rm -f *.o parser.h parser.cpp lex.h lex.cpp
There's this funny code in libmatheval:
/* Redefine macro to redirect scanner input from string instead of
* standard input. */
#define YY_INPUT( buffer, result, max_size ) \
{ result = input_from_string (buffer, max_size); }