So i'm using a flex/bison parser but the variable names arent printing correctly. It understands the number values. I've tried messing with everything but I'm lost. heres a link to the output. its where it prints "Data: 0" that i'm trying to get the variable name [https://imgur.com/vJDpgpR][1]
invocation is: ./frontEnd data.txt
//main.c
#define BUF_SIZE 1024
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE* yyin;
extern yyparse();
int main(int argc, char* argv[]){
if(argc < 2){
FILE* fp = fopen("temp.txt", "a");
printf("Entering data: \n");
void *content = malloc(BUF_SIZE);
if (fp == 0)
printf("error opening file");
int read;
while ((read = fread(content, BUF_SIZE, 1, stdin))){
fwrite(content, read, 1, fp);
}
if (ferror(stdin))
printf("There was an error reading from stdin");
fclose(fp);
yyparse(fp);
}
if(argc == 2){
yyin = fopen(argv[2], "r");
if(!yyin)
{
perror(argv[2]);
printf("ERROR: file does not exist.\n");
return 0;
}
yyparse (yyin);
}
return 0;
}
void yyerror(char *s){
fprintf(stderr, "error: exiting %s \n", s);
}
//lex.l
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
%}
%option noyywrap
%option nounput yylineno
%%
"stop" return STOP;
"iter" return ITER;
"scanf" return SCANF;
"printf" return PRINTF;
"main" return MAIN;
"if" return IF;
"then" return THEN;
"let" return LET;
"func" return FUNC;
"//" return COMMENT; printf("\n");
"start" return START;
"=" return ASSIGN;
"=<" return LE;
"=>" return GE;
":" return COLON;
"+" return PLUS;
"-" return MINUS;
"*" return MULT;
"/" return DIV;
"%" return MOD;
"." return DOT;
"(" return RPAREN;
")" return LPAREN;
"," return COMMA;
"{" return RBRACE;
"}" return LBRACE;
";" return SEMICOLON;
"[" return LBRACK;
"]" return RBRACK;
"==" return EQUAL;
[A-Z][a-z]* { printf("SYNTAX ERROR: Identifiers must start with lower case. "); }
[a-zA-Z][_a-zA-Z0-9]* {
printf("string: %s \n", yytext);
yylval.iVal = strdup(yytext);
yylval.iVal = addSymbol(yytext);
return ID;
}
[0-9]+ {
yylval.iVal = atoi(yytext);
printf("num: %s \n", yytext);
return NUMBER; }
[ _\t\r\s\n] ;
^"#".+$ return COMMENT;
. {printf("ERROR: Invalid Character "); yyterminate();}
<<EOF>> { printf("EOF: line %d\n", yylineno); yyterminate(); }
%%
// stores all variable id is in an array
SYMTABNODEPTR newSymTabNode()
{
return ((SYMTABNODEPTR)malloc(sizeof(SYMTABNODE)));
}
int addSymbol(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
i = lookup(s);
if(i >= 0){
return(i);
}
else if(curSymSize >= SYMBOLTABLESIZE)
{
return (NOTHING);
}
else{
symtable[curSymSize] = newSymTabNode();
strncpy(symtable[curSymSize]->id,s,IDLENGTH);
symtable[curSymSize]->id[IDLENGTH-1] = '\0';
return(curSymSize++);
}
}
int lookup(char *s)
{
extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;
int i;
for(i=0;i<curSymSize;i++)
{
if(strncmp(s,symtable[i]->id,IDLENGTH) == 0){
return (i);
}
}
return(-1);
}
// parser.y
%{
#define YYERROR_VERBOSE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern char *yytext;
extern int yylex();
extern void yyerror(char *);
extern int yyparse();
extern FILE *yyin;
/* ------------- some constants --------------------------------------------- */
#define SYMBOLTABLESIZE 50
#define IDLENGTH 15
#define NOTHING -1
#define INDENTOFFSET 2
#ifdef DEBUG
char *NodeName[] =
{
"PROGRAM", "BLOCK", "VARS", "EXPR", "N", "A", "R", "STATS", "MSTAT", "STAT",
"IN", "OUT", "IF_STAT", "LOOP", "ASSIGN", "RO", "IDVAL", "NUMVAL"
};
#endif
enum ParseTreeNodeType
{
PROGRAM, BLOCK, VARS, EXPR, N, A, R, STATS, MSTAT, STAT,
IN, OUT,IF_STAT, LOOP, ASSIGN, RO, IDVAL, NUMVAL
};
#define TYPE_CHARACTER "char"
#define TYPE_INTEGER "int"
#define TYPE_REAL "double"
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef NULL
#define NULL 0
#endif
// definitions for parse tree
struct treeNode {
int item;
int nodeID;
struct treeNode *first;
struct treeNode *second;
};
typedef struct treeNode TREE_NODE;
typedef TREE_NODE *TREE;
TREE makeNode(int, int, TREE, TREE);
#ifdef DEBUG
void printTree(TREE, int);
#endif
// symbol table definitions.
struct symbolTableNode{
char id[IDLENGTH];
};
typedef struct symbolTableNode SYMTABNODE;
typedef SYMTABNODE *SYMTABNODEPTR;
SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
int curSymSize = 0;
%}
%start program
%union {
char *sVal;
int iVal;
TREE tVal;
}
// list of all tokens
%token SEMICOLON GE LE EQUAL COLON RBRACK LBRACK ASSIGNS LPAREN RPAREN COMMENT
%token DOT MOD PLUS MINUS DIV MULT RBRACE LBRACE START MAIN STOP LET COMMA
%token SCANF PRINTF IF ITER THEN FUNC
%left MULT DIV MOD ADD SUB
// tokens defined with values and rule names
%token<iVal> NUMBER ID
//%token<sVal> ID
%type<tVal> program type block vars expr N A R stats mStat stat in out if_stat loop assign RO
%%
program : START vars MAIN block STOP
{
TREE tree;
tree = makeNode(NOTHING, PROGRAM, $2,$4);
#ifdef DEBUG
printTree(tree, 0);
#endif
}
;
block : RBRACE vars stats LBRACE
{
$$ = makeNode(NOTHING, BLOCK, $2, $3);
}
;
vars : /*empty*/
{
$$ = makeNode(NOTHING, VARS,NULL,NULL);
}
| LET ID COLON NUMBER vars
{
$$ = makeNode($2, VARS, $5,NULL);
printf("id: %d", $2);
}
;
//variable:
// type ID{$$ = newNode($2,VARIABLE,$1,NULL,NULL);};
//type:
// INT {$$ = newNode(INT,TYPE,NULL,NULL,NULL);}
// | BOOL {$$ = newNode(BOOL,TYPE,NULL,NULL,NULL);}
// | CHAR {$$ = newNode(CHAR,TYPE,NULL,NULL,NULL);}
// | STRING{$$ = newNode(STRING,TYPE,NULL,NULL,NULL);};
expr : N DIV expr
{
$$ = makeNode(DIV, EXPR, $1, $3);
}
| N MULT expr
{
$$ = makeNode(MULT, EXPR, $1, $3);
}
| N
{
$$ = makeNode(NOTHING, EXPR, $1,NULL);
}
;
N : A PLUS N
{
$$ = makeNode(PLUS, N, $1, $3);
}
| A MINUS N
{
$$ = makeNode(MINUS, N, $1, $3);
}
| A
{
$$ = makeNode(NOTHING, N, $1,NULL);
}
;
A : MOD A
{
$$ = makeNode(NOTHING, A, $2,NULL);
}
| R
{
$$ = makeNode(NOTHING, A, $1,NULL);
}
;
R : LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, R, $2,NULL);
}
| ID
{
$$ = makeNode($1, IDVAL, NULL,NULL);
}
| NUMBER
{
$$ = makeNode($1, NUMVAL, NULL,NULL);
}
;
stats : stat mStat
{
$$ = makeNode(NOTHING, STATS, $1, $2);
}
;
mStat : /* empty */
{
$$ = makeNode(NOTHING, MSTAT, NULL,NULL);
}
| stat mStat
{
$$ = makeNode(NOTHING, MSTAT, $1, $2);
}
;
stat: in DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| out DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| block
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| if_stat DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| loop DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
| assign DOT
{
$$ = makeNode(NOTHING, STAT, $1,NULL);
}
;
in : SCANF LBRACK ID RBRACK
{
$$ = makeNode($3, IN,NULL,NULL);
}
;
out : PRINTF LBRACK expr RBRACK
{
$$ = makeNode(NOTHING, OUT,$3,NULL);
}
;
if_stat : IF LBRACK expr RO expr RBRACK THEN block
{
$$ = makeNode(NOTHING, IF_STAT, $4, $8);
}
;
loop : ITER LBRACK expr RO expr RBRACK block
{
$$ = makeNode(NOTHING, LOOP, $4, $7);
}
;
assign : ID ASSIGNS expr
{
$$ = makeNode($1, ASSIGN, $3,NULL);
}
;
RO : LE
{
$$ = makeNode(LE, RO, NULL,NULL);
}
| GE
{
$$ = makeNode(GE, RO, NULL,NULL);
}
| EQUAL
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
| COLON COLON
{
$$ = makeNode(EQUAL, RO, NULL,NULL);
}
;
%%
// node generator
TREE makeNode(int iVal, int nodeID, TREE p1, TREE p2)
{
TREE t;
t = (TREE)malloc(sizeof(TREE_NODE));
t->item = iVal;
t->nodeID = nodeID;
t->first = p1;
t->second = p2;
//printf("NODE CREATED");
return(t);
}
// prints the tree with indentation for depth
void printTree(TREE tree, int depth){
int i;
if(tree == NULL) return;
for(i=depth;i;i--)
printf(" ");
if(tree->nodeID == NUMBER)
printf("INT: %d ",tree->item);
else if(tree->nodeID == IDVAL){
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE )
printf("id: %s ",symtable[tree->item]->id);
else
printf("unknown id: %d ", tree->item);
}
if(tree->item != NOTHING){
printf("Data: %d ",tree->item);
}
// If out of range of the table
if (tree->nodeID < 0 || tree->nodeID > sizeof(NodeName))
printf("Unknown ID: %d\n",tree->nodeID);
else
printf("%s\n",NodeName[tree->nodeID]);
printTree(tree->first,depth+2);
printTree(tree->second,depth+2);
}
#include "lex.yy.c"
// heres the makefile I use for compilation
frontEnd: lex.yy.c parser.tab.c
gcc parser.tab.c main.c -o frontEnd -lfl -DDEBUG
parser.tab.c parser.tab.h: parser.y
bison -d parser.y
lex.yy.c: lex.l
flex lex.l
clean:
rm lex.yy.c y.tab.c frontEnd
'''
// data.txt
start
let x : 13
main {
scanf [ x ] .
printf [ 34 ] .
} stop[enter image description here][2]
[1]: https://i.stack.imgur.com/xlNnh.png
[2]: https://i.stack.imgur.com/HKRtX.png
I think this has a lot more to do with your AST and symbol table functions than with your parser, and practically nothing to do with bison itself.
For example, your function to print trees won't attempt to print an identifier's name if its symbol table index is 0.
if(tree->item > 0 && tree->item < SYMBOLTABLESIZE)
But the first symbol entered in the table will have index 0. (Perhaps you fixed this between pasting your code and generating the results. You should always check that the code you paste in a question corresponds precisely to the output which you show. But this isn't the only bug in your code; it's just an example.)
As another example, the immediate problem which causes Data: 0 to be printed instead of the symbol name is that your tree printer only prints symbol names for AST nodes of type IDVAL, but you create an AST IN node whose data field contains the variable's symbol table index. So either you need to fix your tree printer so it knows about IN nodes, or you need to change the IN node so that it has a child which is the IDVAL node. (That's probably the best solution in the long run.)
It's always a temptation to blame bison (or whatever unfamiliar tool you're using at the moment) for bugs, instead of considering the possibility that you've introduced bugs in your own support code. To avoid falling into this trap, it's always a good idea to test your library functions separately before using them in a more complicated project. For example, you could write a small test driver that builds a fixed AST tree, prints it, and deletes it. Once that works, and only when that works, you can check to see if your parser can build and print the same tree by parsing an input.
You will find that some simple good software design practices will make this whole process much smoother:
Organise your code into separate component files, each with its own header file. Document the library interfaces (and, if necessary, data structures) using comments in the header file. Briefly describe what each function's purpose is. If you can't find a brief description, it nay be that the function is trying to do too many different things.
In your parser, the functions and declarations needed to build and use ASTs are scattered between different parts of your lexer and parser files. This makes them much harder to read, debug, maintain and even use.
No matter what your teacher might tell you, if you find it necessary to #include the generated lexical scanner directly into the parser, then you probably have not found a good way to organise your support functions. You should always aim to make it possible to separately compile the parser and the scanner.
For data structures like your AST node, which use different member variables in different ways depending on an enumerated node type -- which is a model you'll find in other C projects as well, but is particularly common in parsers -- document the precise use of each field for every enumeration value. And make sure that every time you change the way you use the data or add new enumeration values, you fix the documentation accordingly.
This documentation will make it much easier to verify that your AST is being built correctly. As an additional benefit, you (or others using your code) will have an accurate description of how to interpret the contents of AST nodes, which makes it much easier to write code which analyses the tree.
In short, the way to write, debug and maintain any non-trivial project is not by "messing around" but by being systematic and modular. While it might seem like all of this takes precious time, particularly the documentation, it will almost always save you a lot of time in the long run.
I'm building a simplified C parser using bison and flex. I've written a grammar rule to detect functions in my Bison file, and I want the parser to send the name of the function to my C monitoring program. Here is an extremely simplified sample of what I've implemented :
my monitor program
/*monitor.h*/
#ifndef MONITOR_H
#define MONITOR_H
extern int noLig;
extern char* yytext;
#endif
/*monitor.c*/
#include <stdio.h>
#include <monitor.h>
#include <project.tab.h>
int noLig=0;
int main (int argc, char * argv[]) {
printf("flex-\n");
int err_code=yyparse();
if (err_code == 0) {
printf("It went fine\n");
} else {printf("It didn't go well\n");}
return 0;
}
project.l file
%{
#include <stdio.h>
#include <monitor.h>
#include <project.tab.h>
%}
%%
"\"[a-zA-Z0-9]+"\" {ECHO; yylval.str=yytext; return STR;}
[.,;=()\[\]\{\}] { return yytext[0]; }
"char" {ECHO; printf("-"); yylval.str=yytext; return TYPE;}
"int" {ECHO; printf("-");yylval.str=yytext; return TYPE;}
"float" {ECHO; printf("-");yylval.str=yytext; return TYPE;}
"double" {ECHO; printf("-");yylval.str=yytext;return TYPE;}
[a-zA-Z][a-zA-Z0-9]* {ECHO; printf("-");yylval.str = yytext; return VAR;}
[ \t\n\b]+ {noLig++;}
"//".* {}
. {printf(":%cwhat is this",yytext[0]);}
%%
project.y file
%{
#include <stdio.h>
#include <monitor.h>
int yylex();
int yyerror ();
%}
%union {
char *str;
int i;
}
%define parse.error verbose
%type <i> INT
%type <str> TYPE STR VAR
%token TYPE INT STR VAR
%start Program
%%
Program: function_l
;
function_l: function
| function_l function
;
function: TYPE VAR '(' param_prototype_ld ')' '{' instruction_l '}'
{printf("\n\nbison-\n%s\n",$1);}
;
param_prototype_ld: /*empty*/
| param_prototype_l
;
param_prototype_l: param_prototype
| param_prototype_l ',' param_prototype
;
param_prototype: TYPE VAR
;
instruction_l: /*empty*/
| VAR ';'
| VAR instruction_l
;
%%
int yyerror (char* s) {
fprintf(stderr, "Compilator3000:l.%d: %s\n", noLig, s);
}
test.c file
int main (int arg) {
a;
}
It compiles alright, with no warning. However, when I run ./monitor < test.c, I get the following output :
flex-
int-main-int-arg-a-
bison-
int main (int arg) {
a;
}
It went fine
Why does bison variable $1 returns the whole function block ? How can I get only the return type ? (In the end, my goal is to print the return type, function name, and the type of the arguments)
The value of yytext is not guaranteed to persist, and to make yytext persist it must be copied to a separate buffer. This is often done using strdup:
...
"\"[a-zA-Z0-9]+"\" {ECHO; yylval.str=strdup(yytext); return STR;}
[.,;=()\[\]\{\}] { return yytext[0]; }
"char" {ECHO; printf("-"); yylval.str=strdup(yytext); return TYPE;}
"int" {ECHO; printf("-");yylval.str=strdup(yytext); return TYPE;}
"float" {ECHO; printf("-");yylval.str=strdup(yytext); return TYPE;}
"double" {ECHO; printf("-");yylval.str=strdup(yytext);return TYPE;}
[a-zA-Z][a-zA-Z0-9]* {ECHO; printf("-");yylval.str = strdup(yytext); return VAR;}
...
Though strdup can return NULL, a wrapper function can be used to make that failure explicit
char *
strdup_checked(char *str)
{
char *p;
if ((p = strdup(str)) == NULL) {
perror("strdup");
exit(EXIT_FAILURE);
}
return (p);
}
Although OP has found the solution, I would try to provide a complete answer.
This is the Grammar which defines the function as per your code.
function: TYPE VAR '(' param_prototype_ld ')' '{' instruction_l '}'
{printf("\n\nbison-\n%s\n",$1);}
;
Each symbol is a positional variable. Eg TYPE is $1, VAR is $2 (which is I believe the function name). $$ is the return value of any rule. In this case function. In order to return the function name you need to set $$=$2 in the action part. Setting $$ to $1 will return the function name. Alternatively you can create a data structure in the action such as an array or struct to hold more than one $ variable and then return it.
The effect would be seen in the following rule
function_l: function
| function_l function
;
Non-terminal symbol function will hold the name of the function. In this rule "function_l" is $1 and "function" is $2. If you print $2 it will give you the function name which was passed from the rule "function".
Since the terminal "VAR" is a string you need to set yylval.str=strdup(yytext) either in the lex or in the grammar rule as yylval.str=strdup($<pos>)
I'm trying to make a simple parser. It's for a homework assignment but also for own experimentation. I have completed the lexer and the parser and I'm trying now to output an AST. The problem is that when I'm adding, for example, two integers, the result tree is printed with unrecognizable symbols. A valid input should be +(1,1) and a valid output should be (+ 1 1). Instead of this, I'm getting ( + �|k �|k ). I've tried many things, without actually any significant result. The sprintf function returns a null terminator, so probably this is not the problem. Below is the parser code (.y file):
%{
#define YYDEBUG 1
%}
%start program
%token NUMBER
%token ID
%token PLUS MINUS TIMES
%token LP RP EQUALS COMMA
%token END
%token LET IN AND
%left PLUS MINUS
%left TIMES
%left LET IN AND
%left EQUALS
%%
program:{printf("Empty Input\n");} /* empty */
| program line /* do nothing */
line: expr END { printtree($1); printf("\n");}
;
expr : /*Empty*/
| LET deflist IN expr {}
| ID { printf("Found ID\n"); $$ = make_id_leaf($1);}
| NUMBER { printf("Found NUMBER\n"); $$ = make_number_leaf($1);}
| PLUS LP expr COMMA expr RP {$$ = make_plus_tree($3,$5); printf("Found expr PLUS expr.\n"); }
| TIMES LP expr COMMA expr RP {$$ = make_times_tree($3,$5); printf("Found expr TIMES expr. Result:%d\n", $$);}
| MINUS ID
| MINUS NUMBER { printf("found MINUS NUMBER\n"); }
;
deflist : definition
| definition AND deflist
;
definition : /*Empty*/
| ID EQUALS expr {printf("Found EQ\n");}
;
%%
/*int main (void) {return yyparse ( );}*/
int yyerror (char *s) {fprintf (stderr, "%s\n", s);}
The lexer file:
%{
#include "parser.h"
%}
DIGIT [0-9]
LETTER [a-zA-Z]
%%
LET {printf("Encountered LET\n"); return(LET);}
IN {printf("Encountered IN\n"); return(IN);}
AND {printf("Encountered AND\n"); return(AND);}
{DIGIT}+ {yylval = atoi(yytext); return NUMBER;}
{LETTER}* { if (strlen(yytext) <= 8){
yylval = strlen(yytext);
printf( "<ID, %s> ", yytext );
return(ID);
} else {
yytext[8] = '\0';
printf("WARNING! Long identifier. Truncating to 8 chars\n");
printf( "<ID, %s> ", yytext );
return(ID);
}
}
[ \t] ;
[\n] return(END);
"+" return(PLUS);
"-" return(MINUS);
"*" return(TIMES);
"=" return(EQUALS);
"(" return(LP);
")" return(RP);
"," return(COMMA);
<<EOF>> return(0);
%%
int yywrap (void) {return 1;}
The main.c which includes the yyparse() function:
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "treedefs.h"
int main(int argc, char **argv){
yyparse();
return 0;
}
And the treedefs.h file which includes the function definitions (I've included only the struct definition, the number leaf and the plus tree):
typedef struct tree{
char *token;
TREE *l;
TREE *r;
TREE *child;
}TREE;
/* Make number leaves */
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
leafNum->l = NULL;
leafNum->r = NULL;
leafNum->child = NULL;
printf("NUM Leaf is: %s\n", leafNum->token);
return (leafNum);
}
/* Addition tree */
TREE *make_plus_tree(TREE *l, TREE *r){
TREE *plusTree = malloc(sizeof(TREE));
plusTree->token = "+";
plusTree->l = l;
plusTree->r = r;
plusTree->child = NULL;
return (plusTree);
}
void printtree(TREE *tree)
{
if (tree->l || tree->r){
printf("(");
}
printf(" %s ", tree->token);
if (tree->l){
printtree(tree->l);
}
if (tree->r){
printtree(tree->r);
}
if (tree->l || tree->r){
printf(")");
}
}
The file tree.h includes only some declarations, no big deal, and definitely not related to the issue.
Why the numbers look like this? And how can I fix it? Any help will be greatly appreciated.
This problem actually has nothing to do with bison or flex. It's in your make_number_leaf implementation:
TREE *make_number_leaf(int n){
TREE *leafNum = malloc(sizeof(TREE));
char *c, ch[8];
// ^ local variable
sprintf(ch, "%d", n); /* Effective way to convert int to string */
c = ch;
leafNum->token = c;
// ^ dangling pointer
// Remainder omitted
}
As indicated in the comments above, ch is a local (stack-allocated) variable, whose lifetime ends when the function returns. Assigning its address to the variable c does nothing to change that. So the value of c which is stored into leafNum->token will become a dangling pointer as soon as the function returns.
So when you later attempt to print out the token, you are printing out the contents of random memory.
You need to malloc a character buffer, and remember to free it when you are freeing the TREE. (However, in the case where leafNum->token is a string literal, you cannot call free, so you need to be a bit cleverer.)
I am trying to build implement a basic calculator using Concurrent YACC. I have tried the code by statically creating the threads. But whe I want to dynamically specify how many threads to be created, the parser seems to have a problem. Here are the contents of my code.
aa.y file
%{
#include <stdio.h>
#include <pthread.h>
#include <string.h>
void * scanner;
FILE *yyin;
#define YYSTYPE int
%}
%token digit
%lex-param {void * scanner}
%parse-param {void * scanner}
%start list
%token NUMBER
%left '+' '-'
%left '*' '/' '%'
%left UMINUS
%union {int i;}
%%
list:
|
list stat '\n'
|
list error '\n'{ yyerrok; }
;
stat: expr { printf("Thread = %d ... Ans = %d\n",pthread_self(),$1);}
;
expr: '(' expr ')'{ $$ = $2; }
|
expr '*' expr { $$ = $1 * $3; }
|
expr '/' expr { $$ = $1 / $3; }
|
expr '+' expr { $$ = $1 + $3; }
|
expr '-' expr { $$ = $1 - $3; }
|
'-' expr %prec UMINUS { $$ = -$2; }
|
NUMBER
;
%%
struct struct_arg
{
unsigned char* file;
};
int yyerror()
{
return 1;
}
void *parse(void *arguments)
{
struct struct_arg *args = (struct struct_arg *)arguments;
unsigned char* filename;
filename = args -> file;
yyin = fopen(filename,"r+");
if(yyin == NULL)
{
}
else
{
yylex_init(&scanner);
yyset_in(yyin,scanner);
yyparse(scanner);
yylex_destroy(scanner);
printf("Thread = %d\n",pthread_self());
}
fclose(yyin);
}
int main(int argc, char *argv[])
{
int num;
printf("How many threads you want to create??\n");
scanf("%d", &num);
int error, count = 0;
FILE *fp[num], *file_pointer;
char line[256];
size_t len = 0;
char read;
file_pointer = fopen("test.txt", "r");
while (fgets(line, sizeof(line), file_pointer))
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", count);
strcat(file_name, dummy);
strcat(file_name, dummy2);
fp[count] = fopen(file_name, "a");
fprintf(fp[count], "%s", line);
fclose(fp[count]);
count++;
if(count == num)
{
count = 0;
}
}
struct struct_arg arguments[num];
int i = 0;
while(i < num)
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", i);
strcat(file_name, dummy);
strcat(file_name, dummy2);
arguments[i].file = file_name;
i++;
}
pthread_t tid[num];
int j = 0;
while(j < num)
{
error = pthread_create(&(tid[j]), NULL, &parse, (void *) &arguments[j]);
j++;
}
int n = 0;
while(n < num)
{
pthread_join(tid[n], NULL);
n++;
}
int temp, k = 0;
while(k < num)
{
char file_name[32] = "test_";
char dummy[4];
char dummy2[5] = ".txt";
sprintf(dummy, "%d", k);
strcat(file_name, dummy);
strcat(file_name, dummy2);
temp = remove(file_name);
k++;
}
return 0;
}
aa.l
%{
#include <stdio.h>
#include "y.tab.h"
extern int scanner;
%}
%option reentrant
%option noyywrap
NUMBER [0-9]+
%%
" " ;
{NUMBER} {
yylval->i = atoi(yytext);
return(NUMBER);
}
[^0-9\b] {
return(yytext[0]);
}
My compiling steps are
yacc -d aa.y
lex aa.l
cc lex.yy.c y.tab.c -o aa.exe -pthread
And the error generated is
aa.l: In function 'yylex':
aa.l:13:23: error: invalid type argument of '->' (have 'YYSTYPE')
yylval->i = atoi(yytext);
Can anyone please point out what I am doing wrong??
That's a simple compiler error, which is (indirectly) the result of your not requesting a reentrant ("pure") bison parser. [Note 1]
Since the parser is not reentrant, it uses a global yylval which is of type YYSTYPE. Your %union declaration will create a declaration of YYSTYPE as a union type which will be placed in the generated header file y.tab.h, which effectively looks something like this (leaving out some unimportant details):
#ifndef YYSTYPE
typedef union yystype {
int i;
} YYSTYPE;
extern YYSTYPE yylval;
#endif
That code will also be placed into y.tab.c, but it will go after the inserted C segment from the %{...} section of your bison definition. There you #define YYSTYPE int, with the result that in y.tab.c yylval has type int, whereas in `yy.lex.c, it is a union type. That's undefined behaviour (UB), which is how you say "wrong wrong wrong" in C. (But UB is really undefined; one possibility is that the error is silently ignored.)
Since yylval is an instance of YYSTYPE, rather than a pointer to a YYSTYPE, the correct way of referring to member i is yylval.i, not yylval->i. Hence the compiler error.
In your bison file, you don't declare any of your nonterminals to have a type. Since you're including a %union declaration, bison requires you to tell it the type of any terminal or non-terminal whose semantic value is used (with $1, $2, etc.) or assigned to (with $$). So you should have received a pile of errors when you attempted to pass the file through bison. On the other hand, if you had declared types, then the bison-generated parser would have contained references to yylval.i, and that would also generated compiler errors because your #define YYSTYPE effectively bypassed the union declaration. (Bison doesn't know about the #define because it doesn't parse included C code. So it can't generate an error message. But it's definitely an error.)
If you had told bison to produce a re-entrant parser, then the generated parser would have called yylex with an additional argument of type YYSTYPE*; had you also supplied %option bison-bridge in the flex definition, then flex would have generated a declaration of yylex with an additional parameter of type YYSTYPE* which will become the value of yylval. In that case, yylval will be a pointer, rather than an instance, and yylval->i would have been correct.
Notes
For some reason, the use of reentrant bison parsers is referred to incorrectly as "Concurrent YACC". That's wrong on two counts: first, the generated parser is not concurrent (although because it is reentrant, it can be used concurrently if the actions don't introduce race conditions), and secondly because the feature is not available in YACC; it's a bison extension.
A quick Google search revealed two uses of the phrase "Concurrent YACC". One of them was in a comment in an entry in ESR's blog, describing a tool he wrote some decades ago, before bison existed, to make yacc parsers reentrant. The other one was a third-year programming assignment in a course on concurrent programming offered by the University of Pune, which uses the phrase "Concurrent YACC" as though it were meaningful.
I'm guessing that this question derives from the second of those, which might imply that the coursework includes an explanation of what is meant. But for what it's worth, ESR does outline the steps involved in correctly bridging a reentrant bison parser to a reentrant flex lexer. So I suggest you take a look at it, although I do not endorse ESR's description of %bison-bridge as buggy. (Had he said "badly-documented kludge", I would have been 100% on-side.)
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.