recover yyparse after yyaccept - c

Friends, I am parsing a bibtex file, having multiple bibtex entry within a file, e.g.
#Book{a1,
Title="ASR",
Publisher="oxf",
Author = {a {\"m}ook, Rudra Banerjee},
Year="2010",
Address="UK",
Edition="1",
}
#Article{a2,
Author="Rudra Banerjee",
Title="Fe{\"Ni}Mo",
Publisher="P{\"R}B",
Number="12",
Pages="36690",
Year="2011",
Address="UK",
Edition="1",
}
Now, I want yyparse to return after each entry, hence, my parser is:
%union
{
char *sval;
};
%token <sval> VALUE
%token <sval> KEY
%token OBRACE
%token EBRACE
%token QUOTE
%token SEMICOLON
%start Input
%%
Input:
/* empty */
| Input Entry ; /* input is zero or more entires */
Entry:
'#' KEY '{' KEY ','{
g_hash_table_insert(table, g_strdup("TYPE"), g_strdup($2));
g_hash_table_insert(table, g_strdup("ID"), g_strdup($4));
g_printf("%s:%s\n","KEY=>",g_hash_table_lookup(table,"TYPE"));
// g_printf("%s: %s\n", $2, $4);
}
KeyVals '}'
{YYACCEPT;}
;
KeyVals:
/* empty */
| KeyVals KeyVal ; /* zero or more keyvals */
KeyVal:
KEY '=' VALUE ',' { g_hash_table_insert(table, g_strdup($1), g_strdup($3));
// g_printf("%s: %s\n", $1, $3);
g_printf("%s:%s\n",$1,g_hash_table_lookup(table,$1));
};
%%
and in main routine, it is called as:
do{
yyparse();
}
The problem is, it is parsed correctly, but only the first entry; i.e. it is not recovering from the YYACCEPT.
How I can make the code recall the yyparse again after yyaccept?
This is almost same question as How do I convince Bison to parse part of a file? But I have failed to solve my problem.

You should enable the %debug traces to check what is going on. Also, given what you are trying to do, you should probably give a try to push parsers instead (http://www.gnu.org/software/bison/manual/html_node/Push-Decl.html).

Related

Bison nonterminal useless in grammar, rule useless in parser

I am trying to make a compiler "from scratch" using flex-bison.
I am tried to find help online but there is not too much that I have dug out
I managed to find a book: flex & bison by John Levine
It was pretty useful but I am stuck without knowing what to do.
This is my flex code:
%option noyywrap
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"
extern FILE *yyin;
extern FILE *yyout;
int line_no = 1;
//the function of lexer analysis. Return the token
int yylex();
//error function
void yyerror();
//print statement function
void print_return(char *token);
%}
%x ML_COMMENT
alphabet [a-zA-Z]
digit [0-9]
alphanumeric {alphabet}|{digit}
print [ -~]
underscore _
identifier ({alphabet}|{underscore})+({alphanumeric}|{underscore})*
integer "0"|[0-9]{digit}*
float_number "0"|{digit}*"."{digit}+
char \'{print}\'
%%
"PROGRAM" { print_return("PROGRAM"); return PROGRAM}
"%".* { print_return("COMMENT"); return COMMENT; }
"BREAK" { print_return("BREAK"); return BREAK; }
"VARS" { print_return("VARS"); return VARS; }
"STARTMAIN" { print_return("STARTMAIN"); return STARTMAIN; }
"ENDMAIN" { print_return("ENDMAIN"); return ENDMAIN;}
"IF" { print_return("IF"); return IF; }
"THEN" { print_return("THEN"); return THEN;}
"ELSEIF" { print_return("ELSEIF"); return ELSEIF; }
"ELSE" { print_return("ELSE"); return ELSE; }
"ENDIF" { print_return("ENDIF"); return ENDIF; }
"FOR" { print_return("FOR"); return FOR; }
"TO" { print_return("TO"); return TO; }
"STEP" { print_return("STEP"); return STEP; }
"ENDFOR" { print_return("ENDFOR"); return ENDFOR; }
"SWITCH" { print_return("SWITCH"); return SWITCH; }
"CASE" { print_return("CASE"); return CASE; }
"ENDSWITCH" { print_return("ENDSWITCH"); return ENDSWITCH; }
"RETURN" { print_return("RETURN"); RETURN; }
"FUNCTION" { print_return("FUN"); return FUN; }
"ENDFUNCTION" { print_return("ENDFUNCTION"); return ENDFUNCTION; }
"PRINT" { print_return("PRINT"); return PRINT; }
"WHILE" { print_return("WHILE"); return WHILE;}
"ENDWHILE" { print_return("ENDWHILE"); return ENDWHILE;}
";" { print_return("QM"); return QM; }
"\n" { line_no++; print_return("NEWLINE"); return NEWLINE; }
"\t" { print_return("INDENT"); return INDENT; }
"+=" { print_return("ADD_ASSIGN"); return ADD_ASSIGN; }
"-=" { print_return("SUB_ASSIGN"); return SUB_ASSIGN; }
"/=" { print_return("DIV_ASSIGN"); return DIV_ASSIGN; }
"%=" { print_return("MOD_ASSIGN"); return MOD_ASSIGN; }
"--" { print_return("DEC_OP"); return DEC_OP; }
"++" { print_return("INC_OP"); return INC_OP; }
"AND" { print_return("AND_OP"); return AND_OP; }
"OR" { print_return("OR_OP"); return OR_OP; }
"==" { print_return("EQ_OP"); return EQ_OP; }
">=" { print_return("GE_OP"); return GE_OP; }
"<=" { print_return("LE_OP"); return LE_OP; }
"!=" { print_return("NE_OP"); return NE_OP; }
"{" { print_return("L_BRACE"); return L_BRACE; }
"}" { print_return("R_BRACE"); return R_BRACE; }
"," { print_return("COMMA"); return COMMA; }
"=" { print_return("ASSIGN"); return ASSIGN; }
"(" { print_return("L_PAR"); return L_PAR; }
")" { print_return("R_PAR"); return R_PAR;}
"[" { print_return("L_BRACK"); return L_BRACK; }
"]" { print_return("R_BRACK"); return R_BRACK;}
"." { print_return("DOT"); return DOT; }
"_" { print_return("UNDERSCORE"); return UNDERSCORE; }
"-" { print_return("MINUS"); return MINUS; }
"+" { print_return("PLUS"); return PLUS; }
"*" { print_return("MUL"); return MUL; }
":" { print_return("COLON"); return COLON; }
"/" { print_return("DIV"); return DIV; }
"<" { print_return("LT"); return LT; }
">" { print_return("GT"); return GT; }
[ ] ;
. { yyerror("Unkown character"); }
{identifier} { print_return("ID"); strcpy(yylval.name, yytext); return IDENTIFIER; }
{integer} { yylval.integer_val = atoi(yytext); print_return("INTEGER"); return INTEGER; }
{float_number} { print_return("FLOAT"); return FLOAT; }
{char} { print_return("CHAR"); return CHAR; }
%%
/*---------------------------------------------------------------------------------------------------------------------*/
void print_return(char *token)
{
printf("Token: %s\t\t Line: %d\t\t Text: %s\n", token, line_no, yytext);
}
This is my bison file:
%{
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "print_console.c"
//pointer to input file of lexer
extern FILE *yyin;
//pointer to output file of lexer
extern FILE *yyout;
//line counter
extern int line_no;
//reads the input stream generates tokens
extern int yylex();
//temporary token save
extern char* yytext;
//Function Initilize
int yylex();
void yyerror(char *message);
%}
//struct for print_console
%union
{
char name[500];
int integer_val;
}
/* --------------------------------------- TOKENS ---------------------------------------*/
//starting symbol
%start PROGRAM
%token COMMENT
%token BREAK
%token VARS
%token QM
%token STARTMAIN
%token ENDMAIN
%token IF
%token THEN
%token ELSEIF
%token ELSE
%token ENDIF
%token FOR
%token TO
%token STEP
%token ENDFOR
%token SWITCH
%token CASE
%token ENDSWITCH
%token RETURN
%token FUNCTION
%token ENDFUNCTION
%token PRINT
%token WHILE
%token ENDWHILE
%token NEWLINE
%token INDENT
%token ADD_ASSIGN
%token SUB_ASSIGN
%token DIV_ASSIGN
%token MOD_ASSIGN
%token DEC_OP
%token INC_OP
%token AND_OP
%token OR_OP
%token EQ_OP
%token GE_OP
%token LE_OP
%token NE_OP
%token L_BRACE
%token R_BRACE
%token COMMA
%token COLON
%token ASSIGN
%token L_PAR
%token R_PAR
%token L_BRACK
%token R_BRACK
%token DOT
%token UNDERSCORE
%token MINUS
%token PLUS
%token MUL
%token DIV
%token LT
%token GT
%token FLOAT
%token CHAR
%token <name> IDENTIFIER
%token <integer_val> INTEGER
//type for access to $$
%type <integer_val> line int_op int_data
%type <name> calc_assignment
%%
/* --------------------------------------- BNF GRAMMAR ---------------------------------------*/
program: program line;
line: if_stmt {;}
| elseif_stmt {;}
| else_stmt {;}
| for_statement {;}
| function NEWLINE INDENT {;}
| function NEWLINE indent2 {;}
| function NEWLINE {;}
| function_call {;}
| comments NEWLINE {;}
| action {;}
| print NEWLINE {;}
| switch NEWLINE case NEWLINE {;}
| dictionaries NEWLINE {;}
| calc_assignment NEWLINE {;}
| NEWLINE {;} ;
/*--------- BREAK -------------*/
break:BREAK QM NEWLINE ;
/*--------- ACTION & indents -------------*/
indent2: INDENT INDENT;
indent3: INDENT INDENT INDENT;
indent4: INDENT INDENT INDENT INDENT;
indent5: INDENT INDENT INDENT INDENT INDENT;
action: INDENT line
| indent2 line
| indent3 line
| indent4 line
| indent5 line ;
/*--------- DATA TYPES -------------*/
data_type: CHAR
| INTEGER
| IDENTIFIER;
/*--------- FUNCTIONS --------------*/
function: FUNCTION IDENTIFIER L_PAR optional_parameters R_PAR ;
end_function: ENDFUNCTION NEWLINE;
function_call: IDENTIFIER L_PAR optional_parameters R_PAR
| IDENTIFIER L_PAR data_type R_PAR
| IDENTIFIER L_PAR data_type COMMA data_type R_PAR
| IDENTIFIER L_PAR data_type COMMA data_type COMMA data_type R_PAR;
/*------------ INSPECTORS -------------*/
inspector:IDENTIFIER operators IDENTIFIER
|IDENTIFIER operators INTEGER
|INTEGER operators IDENTIFIER
|INTEGER operators INTEGER ;
inspector_gen: inspector | inspector AND_OR_operators;
/*----------- IF & FOR STATEMENTS -------------*/
if_stmt:IF L_PAR inspector_gen R_PAR THEN NEWLINE action ;
elseif_stmt: ELSEIF L_PAR inspector_gen R_PAR NEWLINE action ;
else_stmt: ELSE NEWLINE action ;
end_if_stmt:ENDIF NEWLINE ;
for_statement: FOR IDENTIFIER COLON ASSIGN INTEGER TO INTEGER STEP INTEGER NEWLINE action;
end_for_statement: ENDFOR NEWLINE;
/*---------- SWITCH / CASE STATEMENT -----------------*/
switch: SWITCH L_PAR LT IDENTIFIER GT R_PAR NEWLINE action;
case: CASE L_PAR LT INTEGER GT R_PAR NEWLINE action;
end_switch: ENDSWITCH NEWLINE;
/*-------------- WHILE ---------------*/
while: WHILE L_PAR inspector_gen R_PAR NEWLINE action ;
end_wile: ENDWHILE NEWLINE;
/*-------------- OPERATORS ---------------*/
operators:EQ_OP
| GE_OP
| LE_OP
| NE_OP
| DEC_OP
| INC_OP
| LT
| GT;
AND_OR_operators:AND_OP
|OR_OP;
optional_parameters: IDENTIFIER
| optional_parameters COMMA IDENTIFIER ;
/*-------------- COMMENTS ---------------*/
comments: COMMENT;
/*-------------- PRINT ---------------*/
print: PRINT L_PAR data_type R_PAR QM;
/*-------------- MAIN ---------------*/
start_main: STARTMAIN NEWLINE action;
end_main: ENDMAIN NEWLINE ;
/* --- DICTIONARIES --- */
dictionaries: IDENTIFIER ASSIGN L_BRACE dictionary_data R_BRACE
| IDENTIFIER ASSIGN IDENTIFIER L_PAR L_BRACK L_PAR dictionary_data R_PAR R_BRACK R_PAR
IDENTIFIER ASSIGN IDENTIFIER L_PAR dictionary_data optional_parameters dictionary_data R_PAR ;
dictionary_data: data_type COLON data_type
|data_type COLON data_type COMMA dictionary_data
| data_type COMMA data_type optional_parameters
| IDENTIFIER ASSIGN data_type | /* empty */ ;
/* --- CALCULATE --- */
calc_assignment: IDENTIFIER ASSIGN int_op { Change($1, $3); };
int_op: int_data { $$ = $1; }
| int_op PLUS int_data { $$ = $1 + $3; }
| int_op MINUS int_data { $$ = $1 - $3; }
| int_op MUL int_data { $$ = $1 * $3; }
| int_op DIV int_data { $$ = $1 / $3; } ;
int_data: INTEGER { $$ = $1; }
| IDENTIFIER { $$ = Search($1) -> integer_val; };
%%
/* ------------------------------------------------ C FUNCTIONS -------------------------------------------- */
void yyerror(char *message){
printf("Error: \"%s\"\t in line %d. Token = %s\n", message, line_no, yytext);
exit(1);
}
/* ------------------------------------------ MAIN FUNCTION --------------------------------------------- */
int main(int argc, char *argv[]){
hashTable = (hash *) calloc(SIZE, sizeof(hash));
int flag;
yyin = fopen(argv[1],"r");
//yyparse(): reads tokens, executes actions
flag = yyparse();
fclose(yyin);
printf("Parsing finished succesfully!\n\n");
printf(" __________________________\n");
Print();
printf(" __________________________\n");
return flag;
}
I am stuck and don't know what to do. The compiler just does not like my code:
parser.y: warning: 9 nonterminals useless in grammar [-Wother]
parser.y: warning: 9 rules useless in grammar [-Wother]
parser.y:136.1-5: warning: nonterminal useless in grammar: break [-Wother]
136 | break:BREAK QM NEWLINE ;
| ^~~~~
parser.y:157.1-12: warning: nonterminal useless in grammar: end_function [-Wother]
157 | end_function: ENDFUNCTION NEWLINE;
| ^~~~~~~~~~~~
parser.y:177.1-11: warning: nonterminal useless in grammar: end_if_stmt [-Wother]
177 | end_if_stmt:ENDIF NEWLINE ;
| ^~~~~~~~~~~
parser.y:180.1-17: warning: nonterminal useless in grammar: end_for_statement [-Wother]
180 | end_for_statement: ENDFOR NEWLINE;
| ^~~~~~~~~~~~~~~~~
parser.y:188.1-10: warning: nonterminal useless in grammar: end_switch [-Wother]
188 | end_switch: ENDSWITCH NEWLINE;
| ^~~~~~~~~~
parser.y:191.1-5: warning: nonterminal useless in grammar: while [-Wother]
191 | while: WHILE L_PAR inspector_gen R_PAR NEWLINE action ;
| ^~~~~
parser.y:192.1-8: warning: nonterminal useless in grammar: end_wile [-Wother]
192 | end_wile: ENDWHILE NEWLINE;
| ^~~~~~~~
parser.y:217.1-10: warning: nonterminal useless in grammar: start_main [-Wother]
217 | start_main: STARTMAIN NEWLINE action;
| ^~~~~~~~~~
parser.y:218.1-8: warning: nonterminal useless in grammar: end_main [-Wother]
218 | end_main: ENDMAIN NEWLINE ;
| ^~~~~~~~
parser.y: warning: 48 shift/reduce conflicts [-Wconflicts-sr]
parser.y: warning: 68 reduce/reduce conflicts [-Wconflicts-rr]
parser.y:141.10-29: warning: rule useless in parser due to conflicts [-Wother]
141 | indent3: INDENT INDENT INDENT;
| ^~~~~~~~~~~~~~~~~~~~
parser.y:142.10-36: warning: rule useless in parser due to conflicts [-Wother]
142 | indent4: INDENT INDENT INDENT INDENT;
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
parser.y:143.10-43: warning: rule useless in parser due to conflicts [-Wother]
143 | indent5: INDENT INDENT INDENT INDENT INDENT;
I know that I have done something completely wrong. Please help me!
I don't know how to move on.
When you are starting out with Bison (and, really, any time you are using it), you are best off writing and debugging your grammar in small pieces. That's a good habit for any project in any programming language, but it's particularly true when you lack experience. Don't implement all the operators, just implement a couple of them. Once you have that working, you can add the rest. Similarly, don't implement every statement syntax. Start with one, get that working, and then add another one. It's much easier to find an error when the haystack is not very big. Once you make that a habit, you'll find that programming actually becomes much easier.
Now, your actual problem. A non-terminal is "useless" if it's never used. In other words, if you define a non-terminal and don't ever use it in some production, bison will warn you that it was pointless to define that non-terminal. And Bison is clever enough to do that analysis recursively: if the only place a non-terminal appears is on the right-hand side of a useless non-terminal, that non-terminal is also useless, and you'll get a warning for it, too. (I don't think that's an issue here, but I didn't do an extensive analysis of your code.)
So, for example, nowhere in your grammar do you do anything with the non-terminal break other than define it as break:BREAK QM NEWLINE ;. I suppose you intend to add it to your statement alternatives later on, in which case you could just ignore the warning (which is why it is a warning and not an error). But, on the whole, you would have created less noise by not adding break to your grammar until you were ready to add its use as well.
Now, the shift/reduce conflicts. Unless you're lucky enough to stumble upon an obvious issue, it's really hard to figure out what causes a shift/reduce conflict without seeing the actual states with conflicts; Bison will produce a report of these states if you use the -v command-line option. There's useful information on debugging conflicts in John Levine's excellent book.
The latest Bison versions can help you even more by producing counterexamples. There's another good explanation of conflicts in the Bison manual, and some examples which explain how to use this new feature.
But, as it happens, I did stumble upon one obvious error. You have (in part) the following productions:
line: action | print NEWLINE
action: INDENT line | indent2 line
indent2: INDENT INDENT
There's a lot more, but that's enough to create a conflict. Leaving aside what constitutes an INDENT token, and just noting that print starts with the token PRINT, suppose we have the following input:
INDENT INDENT PRINT
Now, how can your grammar derive that? It could do this:
line -> action -> INDENT line -> INDENT action
-> INDENT INDENT line -> INDENT INDENT print NEWLINE
Or it could do this:
line -> action -> indent2 line -> INDENT INDENT line
-> INDENT INDENT print NEWLINE
(As I hope you know, a derivation step consists of replacing a non-terminal with one of its right-hand sides. So the above is two different derivations for the same input, which means your grammar is ambiguous. Bison insists on producing a definitive parse -- that's its entire purpose -- and if there are two possible parses for the same input, it can't do that.
Or, more precisely, it can do that, by picking which parse to use with the aid of some rules. But those rules often don't work as expected, and with an ambiguous grammar there is really no way for anyone other than the grammar's author to know which parse was intended. So Bison warns you that you have shift/reduce conflicts, and then uses its built-in rules to choose one possible parsing strategy.
Frequently, as with your grammar, when Bison applies these rules it finds that certain productions will no longer apply to any input (because the disambiguation rules chose some other production to apply). In that case, the eliminated productions become useless, and that's almost certainly an error, so Bison generates a warning about that, too.
I don't know if that's the cause of all the conflicts, but it would be good to fix that problem, and then see what is left.
It doesn't seem to me like your intent is to write a Python-like language where layout determines block structure, since you seem to be defining explicit end tokens for all your block syntaxes. It's not possible to use a context-free grammar to enforce correct indentation, so I hope that wasn't your intent.
The most usual parsing technique, for languages like C which don't consider layout as part of the grammar, is for the lexical scanner to simply skip over whitespace (tabs and spaces); since the whitespace makes no difference to the parse, there's no point confusing the grammar by forcing it to consider where the whitespace might go. That's certainly what I would suggest, but since I really have no idea what your intent was, I can't really say any more.
Good luck with the project.

Flex and Bison code - syntax error always

First of all I need to say that I am very new to Flex and Bison and I am a bit confused. There is a school project that want us to create a compiler using Flex and Bison for some kind of CLIPS language.
My code has a lot of problems but the main one is that whatever i type i see a syntax error while the result should be something else. The ideal scenario would be to fully work for the language CLIPS. EG when i write "4" it get syntax error. Reading my code maybe will get you understand this better. If i write "test 3 4" it doesnt show syntax error but it counts it as an unknown token and thats wrong again..i'm completely lost. the code is a prototype by the school and we need to do some changes. if you have any questions dont hesitate to ask. THank you!
P.S.: dont mind the comments, they are in greek.
FLEX CODE:
%option noyywrap
/* Kwdikas C gia orismo twn apaitoumenwn header files kai twn metablhtwn.
Otidhpote anamesa sta %{ kai %} metaferetai autousio sto arxeio C pou
tha dhmiourghsei to Flex. */
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* Header file pou periexei lista me ola ta tokens */
#include "token.h"
/* Orismos metrhth trexousas grammhs */
int line = 1;
%}
/* Onomata kai antistoixoi orismoi (ypo morfh kanonikhs ekfrashs).
Meta apo auto, mporei na ginei xrhsh twn onomatwn (aristera) anti twn,
synhthws idiaiterws makroskelwn kai dysnohtwn, kanonikwn ekfrasewn */
/* dimiourgia KE simfona me ta orismata tis glossas */
DELIMITER [ \t]+
INTCONST [+-]*[1-9][0-9]*
VARIABLE [?][A-Za-z0-9]*
DEFINITIONS [a-zA-Z][-|_|A-Z|a-z|0-9]*
COMMENTS ^;.*$
/* Gia kathe pattern (aristera) pou tairiazei ekteleitai o antistoixos
kwdikas mesa sta agkistra. H entolh return epitrepei thn epistrofh
mias arithmhtikhs timhs mesw ths synarthshs yylex() */
/* an sinantisei diaxoristi i sxolio to agnoei, an sinantisei akeraio,metavliti i orismo ton emfanizei. se kathe alli periptosi ektiponei oti den anagnorizei to token, ti grammi pou vrisketai kai to string pou dothike */
%%
{DELIMITER} {;}
"bind" { return BIND;}
"test" { return TEST;}
"read" { return READ;}
"printout" { return PRINTOUT;}
"deffacts" { return DEFFACTS;}
"defrule" { return DEFRULE;}
"->" { return '->';}
"=" { return '=';}
"+" { return '+';}
"-" { return '-';}
"*" { return '*';}
"/" { return '/';}
"(" { return '(';}
")" { return ')';}
{INTCONST} { return INTCONST; }
{VARIABLE} { return VARIABLE; }
{DEFINITIONS} { return DEFINITIONS; }
{COMMENTS} {;}
\n { line++; printf("\n"); }
.+ { printf("\tLine=%d, UNKNOWN TOKEN, value=\"%s\"\n",line, yytext);}
<<EOF>> { printf("#END-OF-FILE#\n"); exit(0); }
%%
/* Pinakas me ola ta tokens se antistoixia me tous orismous sto token.h */
char *tname[11] = {"DELIMITER","INTCONST" , "VARIABLE", "DEFINITIONS", "COMMENTS", "BIND", "TEST", "READ", "PRINTOUT", "DEFFACTS", "DEFRULE"};
BISON CODE:
%{
/* Orismoi kai dhlwseis glwssas C. Otidhpote exei na kanei me orismo h arxikopoihsh
metablhtwn & synarthsewn, arxeia header kai dhlwseis #define mpainei se auto to shmeio */
#include <stdio.h>
#include <stdlib.h>
int yylex(void);
void yyerror(char *);
%}
/* Orismos twn anagnwrisimwn lektikwn monadwn. */
%token INTCONST VARIABLE DEFINITIONS PLUS NEWLINE MINUS MULT DIV COM BIND TEST READ PRINTOUT DEFFACTS DEFRULE
%%
/* Orismos twn grammatikwn kanonwn. Kathe fora pou antistoixizetai enas grammatikos
kanonas me ta dedomena eisodou, ekteleitai o kwdikas C pou brisketai anamesa sta
agkistra. H anamenomenh syntaksh einai:
onoma : kanonas { kwdikas C } */
program:
program expr NEWLINE { printf("%d\n", $2); }
|
;
expr:
INTCONST { $$ = $1; }
| VARIABLE { $$ = $1; }//prosthiki tis metavlitis
| PLUS expr expr { $$ = $2 + $3; }//prosthiki tis prosthesis os praksi
| MINUS expr expr { $$ = $2 - $3; } //prosthiki tis afairesis os praksi
| MULT expr expr { $$ = $2 * $3; }//prosthiki tou pollaplasiasmou os praksi
| DIV expr expr { $$ = $2 / $3; }//prosthiki tis diairesis os praksi
| COM { $$ = $1; }//prosthiki ton sxolion
| DEFFACTS expr { $$ = $2; }//prosthiki ton gegonoton
| DEFRULE expr { $$ = $2; }//prosthiki ton kanonon
| BIND expr expr { $$ = $2;}//prosthiki tis bind
| TEST expr expr { $$ = $2 ;}//prosthiki tis test
| READ expr expr { $$ = $2 ;}//prosthiki tis read
| PRINTOUT expr expr { $$ = $2 ;}//prosthiki tis printout
;
%%
/* H synarthsh yyerror xrhsimopoieitai gia thn anafora sfalmatwn. Sygkekrimena kaleitai
apo thn yyparse otan yparksei kapoio syntaktiko lathos. Sthn parakatw periptwsh h
synarthsh epi ths ousias typwnei mhnyma lathous sthn othonh. */
void yyerror(char *s) {
fprintf(stderr, "Error: %s\n", s);
}
/* H synarthsh main pou apotelei kai to shmeio ekkinhshs tou programmatos.
Sthn sygkekrimenh periptwsh apla kalei thn synarthsh yyparse tou Bison
gia na ksekinhsei h syntaktikh analysh. */
int main(void) {
yyparse();
return 0;
}
TOKEN FILE:
#define DELIMITER 1
#define INTCONST 2
#define VARIABLE 3
#define DEFINITIONS 4
#define COMMENTS 5
#define BIND 6
#define TEST 7
#define READ 8
#define PRINTOUT 9
#define DEFFACTS 10
#define DEFRULE 11
MAKEFILE:
all:
bison -d simple-bison-code.y
flex mini-clips-la.l
gcc simple-bison-code.tab.c lex.yy.c -o B2
./B2
clean:
rm simple-bison-code.tab.c simple-bison-code.tab.h lex.yy.c B2
Your top-level rule is:
program:
program expr NEWLINE
which cannot succeed unless the parser sees a NEWLINE token. But it will never see one, because your lexical scanner never sends one; when it sees a newline, it increments the line count but doesn't return anything.
All your tokens are considered invalid because your lexical scanner uses its own definitions of the token values. You shouldn't do that. The parser generator (bison/yacc) will generate a header file containing the correct definitions; that is, the values it is expecting to see.
There are various other problems, probably more than I noticed. The most important is that you should not call exit(0) in the <<EOF>> rule, since that will mean that the parser can never succeed; it does not succeed until it is passed an EOF token. In fact, you should not normally have an <<EOF>> rule; the default action is to return 0 and that is pretty well the only action which makes sense.
Also, '->' is not a correct C literal. The compiler would have complained about it if you had enabled compiler warnings (-Wall), which you should always do, even if you are compiling generated code.
And your scanner's last pattern, intended to trigger on bad tokens, is .+, which will match the entire line, not just the erroneous character. Since (f)lex scanners accept the pattern with the longest match, most of your other patterns will never match. (Flex usually warns you about unmatchable patterns. Didn't you get such a warning?)
The fallback pattern should be .|\n, although you can use . if you are absolutely sure that every newline will be matched by some rule. I like to use %option nodefault, which will cause flex to warn me if there is some possible input not matched by any rule.

Swap between stdin and file in Bison

I have the following code in Bison, which extends the mfcalc proposed in the guide, implementing some functions like yylex() externally with FLEX.
To understand my problem, the key rules are in non-terminal token called line at the beginning of the grammar. Concretely, the rules EVAL CLOSED_STRING '\n' and END (this token is sent by FLEX when EOF is detected. The first opens a file and points the input to that file. The second closes the file and points the input to stdin input.
I'm trying to make a rule eval "file_path" to load tokens from a file and evaluate them. Initially I have yyin = stdin (I use the function setStandardInput() to do this).
When a user introduces eval "file_path" the parser swaps yyinfrom stdin to the file pointer (with the function setFileInput()) and the tokens are readen correctly.
When the END rule is reached by the parser, it tries to restore the stdin input but it gets bugged. This bug means the calculator doesn't ends but what I write in the input isn't evaluated.
Note: I supposed there are no errors in the grammar, because error recovery it's not complete. In the file_path you can use simple arithmetic operations.
As a summary, I want to swap among stdin and file pointers as inputs, but when I swap to stdin it gets bugged, except I start the calculator with stdin as default.
%{
/* Library includes */
#include <stdio.h>
#include <math.h>
#include "utils/fileutils.h"
#include "lex.yy.h"
#include "utils/errors.h"
#include "utils/stringutils.h"
#include "table.h"
void setStandardInput();
void setFileInput(char * filePath);
/* External functions and variables from flex */
extern size_t yyleng;
extern FILE * yyin;
extern int parsing_line;
extern char * yytext;
//extern int yyerror(char *s);
extern int yyparse();
extern int yylex();
int yyerror(char * s);
%}
/***** TOKEN DEFINITION *****/
%union{
char * text;
double value;
}
%type <value> exp asig
%token LS
%token EVAL
%token <text> ID
%token <text> VAR
%token <value> FUNCTION
%token <value> LEXEME
%token <value> RESERVED_WORD
%token <value> NUMBER
%token <value> INTEGER
%token <value> FLOAT
%token <value> BINARY
%token <value> SCIENTIFIC_NOTATION
%token <text> CLOSED_STRING
%token DOCUMENTATION
%token COMMENT
%token POW
%token UNRECOGNIZED_CHAR
%token MALFORMED_STRING_ERROR
%token STRING_NOT_CLOSED_ERROR
%token COMMENT_ERROR
%token DOCUMENTATION_ERROR
%token END
%right '='
%left '+' '-'
%left '/' '*'
%left NEG_MINUS
%right '^'
%right '('
%%
input: /* empty_expression */ |
input line
;
line: '\n'
| asig '\n' { printf("\t%f\n", $1); }
| asig END { printf("\t%f\n", $1); }
| LS { print_table(); }
| EVAL CLOSED_STRING '\n' {
// Getting the file path
char * filePath = deleteStringSorroundingQuotes($2);
setFileInput(filePath);
| END { closeFile(yyin); setStandardInput();}
;
exp: NUMBER { $$ = $1; }
| VAR {
lex * result = table_search($1, LEXEME);
if(result != NULL) $$ = result->value;
}
| VAR '(' exp ')' {
lex * result = table_search($1, FUNCTION);
// If the result is a function, then invokes it
if(result != NULL) $$ = (*(result->function))($3);
else yyerror("That identifier is not a function.");
}
| exp '+' exp { $$ = $1 + $3; }
| exp '-' exp { $$ = $1 - $3; }
| exp '*' exp { $$ = $1 * $3; }
| exp '/' exp {
if($3 != 0){ $$ = $1 / $3;};
yyerror("You can't divide a number by zero");
}
| '-' exp %prec NEG_MINUS { $$ = -$2; }
| exp '^' exp { $$ = pow($1, $3); }
| '(' exp ')' { $$ = $2; }
| '(' error ')' {
yyerror("An error has ocurred between the parenthesis."); yyerrok; yyclearin;
}
;
asig: exp { $$ = $1; }
| VAR '=' asig {
int type = insertLexeme($1, $3);
if(type == RESERVED_WORD){
yyerror("You tried to assign a value to a reserved word.");
YYERROR;
}else if(type == FUNCTION){
yyerror("You tried to assign a value to a function.");
YYERROR;
}
$$ = $3;
}
;
%%
void setStandardInput(){
printf("Starting standard input:\n");
yyin = NULL;
yyin = stdin;
yyparse();
}
void setFileInput(char * filePath){
FILE * inputFile = openFile(filePath);
if(inputFile == NULL){
printf("The file couldn't be loaded. Redirecting to standard input: \n");
setStandardInput();
}else{
yyin = inputFile;
}
}
int main(int argc, char ** argv) {
create_table(); // Table instantiation and initzialization
initTable(); // Symbol table initzialization
setStandardInput(); // yyin = stdin
while(yyparse()!=1);
print_table();
// Table memory liberation
destroyTable();
return 0;
}
int yyerror(char * s){
printf("---------- Error in line %d --> %s ----------------\n", parsing_line, s);
return 0;
}
It's not too difficult to create a parser and a scanner which can be called recursively. (See below for an example.) But neither the default bison-generated parser nor the flex-generated scanner are designed to be reentrant. So with the default parser/scanner, you shouldn't call yyparse() inside SetStandardInput, because that function is itself called by yyparse.
If you had a recursive parser and scanner, on the other hand, you could significantly simplify your logic. You could get rid of the END token (which is, in any case, practically never a good idea) and just recursively call yyparse in your action for EVAL CLOSED_STRING '\n'.
If you want to use the default parser and scanner, then your best solution is to use Flex's buffer stack to push and later pop a "buffer" corresponding to the file to be evaluated. (The word "buffer" here is a bit confusing, I think. A Flex "buffer" is actually an input source, such as a file; it's called a buffer because only a part of it is in memory, but Flex will read the entire input source as part of processing a "buffer".)
You can read about the buffer stack usage in the flex manual, which includes sample code. Note that in the sample code, the end of file condition is entirely handled inside the scanner, which is usual for this architecture.
It is possible in this case to manufacture an end-of-file indicator (although you cannot use END because that is used to indicate the end of all input). That has the advantage of ensuring that the contents of the evaluated file are parsed as a whole, without leaking a partial parse back to the including file, but you will still want to pop the buffer stack inside the scanner because it annoyingly tricky to get end-of-file handling correct without violating any of the API constraints (one of which is that you cannot reliably read EOF twice on the same "buffer").
In this case, I would recommend generating a reentrant parser and scanner and simply doing a recursive call. It's a clean and simple solution, and avoiding global variables is always good.
A simple example. The simple language below only has echo and eval statements, both of which require a quoted string argument.
There are a variety of ways to hook together a reentrant scanner and reentrant parser. All of them have some quirks and the documentation (although definitely worth reading) has some holes. This is a solution which I've found useful. Note that most of the externally visible functions are defined in the scanner file, because they rely on interfaces defined in that file for manipulating the reentrant scanner context object. You can get flex to export a header with the approriate definitions, but I've generally found it simpler to write my own wrapper functions and export those. (I don't usually export yyscan_t either; normally I create a context object of my own which has a yyscan_t member.)
There is an annoying circularity which is largely the result of bison not allowing for the possibility to introduce user code at the top of yyparse. Consequently, it is necessary to pass the yyscan_t used to call the lexer as an argument to yyparse, which means that it is necessary to declare yyscan_t in the bison file. yyscan_t is actually declared in the scanner generated file (or the flex-generated header, if you've asked for one), but you can't include the flex-generated header in the bison-generated header because the flex-generated header requires YYSTYPE which is declared in the bison-generated header.
I normally avoid this circularity by using a push parser, but that's pushing the boundaries for this question, so I just resorted to the usual work-around, which is to insert
typedef void* yyscan_t;
in the bison file. (That's the actual definition of yyscan_t, whose actual contents are supposed to be opaque.)
I hope the rest of the example is self-evident, but please feel free to ask for clarification if there is anything which you don't understand.
file recur.l
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "recur.tab.h"
%}
%option reentrant bison-bridge
%option noinput nounput nodefault noyywrap
%option yylineno
%%
"echo" { return T_ECHO; }
"eval" { return T_EVAL; }
[[:alpha:]][[:alnum:]]* {
yylval->text = strdup(yytext);
return ID;
}
["] { yyerror(yyscanner, "Unterminated string constant"); }
["][^"\n]*["] {
yylval->text = malloc(yyleng - 1);
memcpy(yylval->text, yytext + 1, yyleng - 2);
yylval->text[yyleng - 2] = '\0';
return STRING;
}
"." { return yytext[0]; }
[[:digit:]]*("."[[:digit:]]*)? {
yylval->number = strtod(yytext, NULL);
return NUMBER;
}
[ \t]+ ;
.|\n { return yytext[0]; }
%%
/* Use "-" or NULL to parse stdin */
int parseFile(const char* path) {
FILE* in = stdin;
if (path && strcmp(path, "-") != 0) {
in = fopen(path, "r");
if (!in) {
fprintf(stderr, "Could not open file '%s'\n", path);
return 1;
}
}
yyscan_t scanner;
yylex_init (&scanner);
yyset_in(in, scanner);
int rv = yyparse(scanner);
yylex_destroy(scanner);
if (in != stdin) fclose(in);
return rv;
}
void yyerror(yyscan_t yyscanner, const char* msg) {
fprintf(stderr, "At line %d: %s\n", yyget_lineno(yyscanner), msg);
}
file recur.y
%code {
#include <stdio.h>
}
%define api.pure full
%param { scanner_t context }
%union {
char* text;
double number;
}
%code requires {
int parseFILE(FILE* in);
}
%token ECHO "echo" EVAL "eval"
%token STRING ID NUMBER
%%
program: %empty | program command '\n'
command: echo | eval | %empty
echo: "echo" STRING { printf("%s\n", $2); }
eval: "eval" STRING { FILE* f = fopen($2, "r");
if (f) {
parseFILE(f);
close(f);
}
else {
fprintf(stderr, "Could not open file '%s'\n",
$2);
YYABORT;
}
}
%%

bison how to call pointer declared in %union

Here is a little part of my code, and I got an error saying
request for member 's' is something not a structure or a union.
I have this error because I don't need anymore to use s, because I specified his type. The problem I have, is that I need another way to make refference to that 's', instead of $3.s , and I can't find how to do that. If I put only $3, I won't get an error at '$3.s[0]', but I'll get an error at 'strcpy($3.s, $3.s+1)'
I am new in lex&yacc and the things that I know until now, can't help me to solve this.
%union{
int i;
char *s;
}
%left '+','-'
%left '*','/'
%left UNARYMINUS
%type <i> expr
%type <s> instr
%token <i> NUMBER
%token <s> WORD
%token <s> SPACE
%%
instr: SPACE instr { }
|WORD '=' expr ';' {
int v;
if ($3.s[0]=='$')
{
fprintf(fout, "\tmove\t$%d, %s\n\n", variabile($1.s), $3.s);
strcpy($3.s, $3.s+1);
v=atoi($3.s);
if (v>nvar)
erasereg(v);
}
else
fprintf(fout, "\taddi\t$%d, $0, %s\n\n", variabila($1.s), $3.s);
free($1.s);
free($3.s);
}
;
With %type <i> expr, you tell Yacc that expr is an integer but you still check whether it points to $. It's either one or the other. Instead of trying to cram all the functionality into the block that parses instr, you could:
match $variables with lex rules and look it up in the symbol table there
"$"[A-Za-z][A-Za-z0-9]* { return var_lookup(yytext); }
Or you could look them up in the yacc rule for expr
expr: WORD {
$$ = $1[0]=='$' ? var_lookup($1) : atoi($1);
}
Also, Arguments to %left are separated by spaces, not commas, and you don't call non-function pointers, you use/dereference them.

Flex and Bison Calculator

I'm trying to implement a calculator for nor expressions, such as true nor true nor (false nor false) using Flex and Bison, but I keep getting my error message back. Here is my .l file:
%{
#include <stdlib.h>
#include "y.tab.h"
%}
%%
("true"|"false") {return BOOLEAN;}
.|\n {yyerror();}
%%
int main(void)
{
yyparse();
return 0;
}
int yywrap(void)
{
return 0;
}
int yyerror(void)
{
printf("Error\n");
}
Here is my .y file:
/* Bison declarations. */
%token BOOLEAN
%left 'nor'
%% /* The grammar follows. */
input:
/* empty */
| input line
;
line:
'\n'
| exp '\n' { printf ("%s",$1); }
;
exp:
BOOLEAN { $$ = $1; }
| exp 'nor' exp { $$ = !($1 || $3); }
| '(' exp ')' { $$ = $2; }
;
%%
Does anyone see the problem?
The simple way to handle all the single-character tokens, which as #vitaut correctly says you aren't handling at all yet, is to return yytext[0] for the dot rule, and let the parser sort out which ones are legal.
You have also lost the values of the BOOLEANs 'true' and 'false', which should be stored into yylval as 1 and 0 respectively, which will then turn up in $1, $3 etc. If you're going to have more datatypes in the longer term, you need to look into the %union directive.
The reason why you get errors is that your lexer only recognizes one type of token, namely BOOLEAN, but not the newline, parentheses or nor (and you produce an error for everything else). For single letter tokens like parentheses and the newline you can return the character itself as a token type:
\n { return '\n'; }
For nor thought you should introduce a token type like you did for BOOLEAN and add an appropriate rule to the lexer.

Resources