I am trying to generate intermediate code of the following grammar for while loop and general statements, but I keep encountering syntax error. Though earlier, the grammar for statements and expressions worked, but after adding the production for while loop this program doesn't work.
this is my lex.l file
%{
#include "y.tab.h"
%}
NUMBER [0-9]
ALPHABET [a-zA-Z]
%%
[\t];
{NUMBER}+ { strcpy(yylval.str, yytext); return ID; }
{ALPHABET} { strcpy(yylval.str, yytext); return ID; }
"while" { return WHILE; }
"do" { return DO; }
"<" { yylval.symbol=yytext[0]; return OP; }
">" { yylval.symbol=yytext[0]; return OP; }
"!=" { yylval.symbol=yytext[0]; return OP; }
"==" { yylval.symbol=yytext[0]; return OP; }
[\n];
. { return yytext[0]; }
%%
And this is my yacc.y file
%{
#include <stdio.h>
#include <string.h>
char result_gen();
char quadruple_entry(char a[], char b, char c[]);
char quadruple_entry_assign(char a[], char b, char c[]);
char quadruple_entry_loop();
char quadruple_entry_do();
void three_address_code();
int q_index = 0;
char result[3] = {'t','0','\0'};
char result2[3] = {'L','0','\0'};
char temp[3];
char temp2[3];
struct QuadrupleStructure {
char arg1[10];
char op;
char arg2[10];
char rslt[3];
}quadruple[25];
%}
%union {
char str[10];
char symbol;
}
%token WHILE DO
%token <str> ID
%token <symbol> OP
%type <str> expr
%right '='
%left '+' '-'
%left '/' '*'
%%
wstmt : WHILE { quadruple_entry_loop(); } stmt DO { quadruple_entry_do(); }
;
stmt : ID '=' expr { quadruple_entry_assign($1, '=', $3); }
| ID OP ID { quadruple_entry($1,$2,$3); }
;
expr : expr '+' expr { quadruple_entry($1, '+', $3); strcpy($$,temp); }
| expr '-' expr { quadruple_entry($1, '-', $3); strcpy($$,temp); }
| expr '/' expr { quadruple_entry($1, '/', $3); strcpy($$,temp); }
| expr '*' expr { quadruple_entry($1, '*', $3); strcpy($$,temp); }
| '(' expr ')' { strcpy($$,$2); }
| ID { strcpy($$,$1); }
;
%%
char result_gen() {
strcpy(temp,result);
result[1]++;
}
char quadruple_entry(char a[], char b, char c[]) {
result_gen();
strcpy(quadruple[q_index].arg1, a);
quadruple[q_index].op = b;
strcpy(quadruple[q_index].arg2, c);
strcpy(quadruple[q_index].rslt, temp);
q_index++;
}
char quadruple_entry_assign(char a[], char b, char c[]) {
char tempLocal[3] = {' ',' ','\0'};
strcpy(quadruple[q_index].arg1, a);
quadruple[q_index].op = b;
strcpy(quadruple[q_index].arg2, c);
strcpy(quadruple[q_index].rslt, tempLocal);
q_index++;
}
char quadruple_entry_loop() {
char tempLocal[3];
strcpy(tempLocal, result2);
char tempLocal2[] = " if ";
char tempLocal3 = ' ';
char tempLocal4[] = " ";
strcpy(quadruple[q_index].rslt, tempLocal);
strcpy(quadruple[q_index].arg1, tempLocal4);
quadruple[q_index].op = tempLocal3;
strcpy(quadruple[q_index].arg2, tempLocal2);
q_index++;
}
char quadruple_entry_do() {
char tempLocal[4];
strcpy(tempLocal, result2);
tempLocal[3] = ':';
strcpy(quadruple[q_index].arg1,tempLocal);
char tempLocal2[] = " ";
char tempLocal3 = ' ';
quadruple[q_index].op = tempLocal3;
strcpy(quadruple[q_index].arg2, tempLocal2);
q_index++;
result2[1]++;
char tempLocal4[4];
strcpy(tempLocal4, result2);
tempLocal4[3] = ':';
strcpy(quadruple[q_index].arg1,tempLocal4);
char tempLocal5[] = " ";
char tempLocal6 = ' ';
quadruple[q_index].op = tempLocal6;
strcpy(quadruple[q_index].arg2, tempLocal5);
q_index++;
result2[1]++;
}
void three_address_code() {
int i;
for(i=0 ; i<q_index ; i++)
printf("\n%s := %s %c %s", quadruple[i].rslt, quadruple[i].arg1, quadruple[i].op, quadruple[i].arg2);
}
void yyerror(char *s){
printf("Errror %s",s);
}
int yywrap() {
return 1;
}
int main() {
yyparse();
three_address_code();
return 0;
}
Input:
i=2*5-10
while i<5 do
Output:
Errror syntax error
If someone can figure out where the grammar is wrong, or if my code is wrong, it'd be very helpful.
Your start symbol is wstmt, so the program accepts a single while statement as an input. If you need to accept a sequence of statements, you need a symbol that expands into a sequence of statements as your starting symbol.
In addition, your lexer doesn't swallow spaces, so any program that has spaces has an error.
In my opinion, your rule
wstmt : WHILE { quadruple_entry_loop(); } stmt DO { quadruple_entry_do(); }
;
is wrong. As your stmt only considers assignment expressions, you should include a stmt in the expression for the syntax to be valid.
Your syntax rule for wstmt, without interspersed code is:
wstmt : WHILE stmt DO ;
you should change it into:
wstmt : WHILE expr DO stmt ;
and the precise points to do the proper output of code should be:
wstmt: WHILE {
/* get a new label and place it at
* this point, you'll need to jump
* here, push the label name in a
* stack */
}
expr {
/* include code here to evaluate
* (probably you do it inside expr */
}
DO {
/* get a new label but don't place it
* yet, and push it's name in the
* stack */
}
stmt {
/* a jump to the first label you
* pushed (the one that is already
* placed), then emit code for the
* second label (the one that is not
* placed yet */
};
(and you should include possibilities to use the < and > and evaluating boolean operators in the expr syntax, also)
the stmt nonterminal forces what you put as while condition to be an assignment, and this is not what you have written as input.
In my humble opinion, you should implement this compiler in two phases.... first try to do full language parsing (as it is a separate, unrelated, and completely different problem), and once you have the parser making the right syntax tree (you can probe it trying to build the syntax tree proper, and printing it) and once you have it working... then you can intersperse the code generation code.
Related
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 years ago.
Improve this question
I tried the following input
a = 10;
print a;
print 1+2+3;
a = 5+10;
I get syntax error when I try to execute the file with above input
There's no error during compilation
Here's the code
Flex
%{
/* header files */
%}
/* regex */
%option yylineno
%%
"println" { printf("token is println"); return(TOK_PRINTLN);}
"print" { printf("token is print"); return(TOK_PRINTLN); }
"main()" { return(TOK_MAIN); }
{digit}+ { /* convert to int and store its val*/
printf("token is %d", yylval.int_val);
return INTEGER;
}
{id} {
/* convert to char */
printf("token is %c", yylval.id_val);
return(TOK_ID);
}
";" { return(TOK_SEMICOLON); }
"+" { return(TOK_ADD); }
"-" { return(TOK_SUB); } /* not req */
"*" { return(TOK_MUL); }
"/" { return(TOK_DIV); } /* not req */
"=" { return(TOK_EQ); }
[ \t\n] {printf("token is space");}
. {printf("Invalid character '%c', ignored\n",
yytext[0]);
}
%%
For bison, we use a symbol table which is an array.
We get the variable (identifier represented by TOK_ID) and convert it to index where we can store value of the expression.
Bison File
%{
/* header file and fucn dec*/
%}
%union{
int int_val;
char id_val;
}
/* tokens and types */
%start stmt
%right TOK_EQ
%left TOK_ADD TOK_SUB
%left TOK_MUL TOK_DIV
%%
/* grammar */
stmt: expr_stmt TOK_SEMICOLON
{; /* do nothing*/
}
| TOK_PRINTLN expr TOK_SEMICOLON
{
printf("%d \n",$2);
}
| stmt TOK_PRINTLN expr TOK_SEMICOLON
{
printf("%d \n",$3);
}
| stmt expr TOK_SEMICOLON
{
;
}
;
expr_stmt: TOK_ID TOK_EQ expr
{
setSTVal($1, $3);
}
;
expr:
/*expr stuff */
;
%%
int getSTIndex(char c){
/* return index*/
}
int getSTVal(char c){
/* set val*/
}
void setSTVal(char c, int v){
/* set table val*/
}
int yyerror(char *s)
{
printf("\nsyntax error on line no %d\n",yylineno);
return 0;
}
void initializeSymbolTable(){
for(int i=0; i<100; i++)symbol_table[i] = 0; /*avoiding garbage val*/
/* initializn stuff */
}
int main()
{
initializeSymbolTable();
yyparse(); /* C routine produced by lex */
return 0;
}
When I tried to debug with input a=5; and a = 5;
It could capture the token a but it threw syntax error after that
It coudnt capture = and everything after that.
I can't figure out why it captures only the first digit/command/string and then throws syntax error
If I simplify your grammar a bit to
/* ... */
%start input
/* ... */
input: /* empty file/no input */
| input stmt
/* each statement is an "expr" followed by a semicolon */
stmt: expr TOK_SEMICOLON
{
;
}
/* This is a function and should go into the rule "expr", too, btw. */
| TOK_PRINTLN expr TOK_SEMICOLON
{
printf("%d \n",$2);
}
;
expr: /* empty expression */
expr TOK_ADD expr
{
$$ = $1 + $3;
}
/* ... */
| INTEGER
{
$$ = $1;
};
| TOK_ID
{
$$ = getSTVal($1);
}
| TOK_ID TOK_EQ expr
{
setSTVal($1, $3);
}
;
It works with the input file
a = 10;
print
a;
print 1+2+3;
a = 5
+
10;
print a;
as expected. It is not very elegant but should point you in the right direction.
Your problem was that TOK_ID is in two rules and the second occurrence of TOK_EQ happened when the parser was in expr and there is no rule for TOK_ID TOK_EQ only for TOK_ID alone. (it is a bit more complicated than that, admitted)
If you have the Bison documentation at hand you might look for the mfcalc example.
I'm writing a translator for uni project which should translate given Pascal code into assembler code using flex/bison. I've written parser and lexer, which generates symbol table (atm works correctly only without procedures and functions). And my question is, how do I generate assembler code from it and print it to file.
Here is my lexer:
%{
#include "parser.tab.h"
#include <string.h>
#define YY_FLEX_DEBUG 1
%}
letter [a-zA-Z]
digit [0-9]
ID {letter}({letter}|{digit})*
delim [ \t\n]
NUM {digit}+(\.{digit}+)?(E[+\-]?(digit)+)?
ws {delim}+
%%
{ws} { }
if {return(IF); }
then {return(THEN); }
else {return(ELSE); }
{NUM} {yylval.stringValue = strdup(yytext); return(NUM); }
"<" {yylval.stringValue = "<"; return(RELOP); }
"<=" {yylval.stringValue = "<="; return(RELOP); }
"=" {yylval.stringValue = "="; return(RELOP); }
">" {yylval.stringValue = ">"; return(RELOP); }
">=" {yylval.stringValue = ">="; return(RELOP); }
"<>" {yylval.stringValue = "<>"; return(RELOP); }
":=" {return(ASSIGNOP); }
do {return(DO); }
program {return(PROGRAM); }
var {return(VAR); }
array {return(ARRAY); }
of {return(OF); }
integer {return(INTEGER); }
real {return(REAL); }
function {return(FUNCTION); }
procedure {return(PROCEDURE); }
begin {return(START); }
end {return(END); }
div {yylval.stringValue = "div"; return(MULOP); }
mod {yylval.stringValue = "mod"; return(MULOP); }
and {yylval.stringValue = "and"; return(MULOP); }
"*" {yylval.stringValue = "*"; return(MULOP); }
"/" {yylval.stringValue = "/"; return(MULOP); }
while {return(WHILE); }
or {return(OR); }
"+" {yylval.stringValue = "+"; return(SIGN); }
"-" {yylval.stringValue = "-"; return(SIGN); }
".." {return(DOUBLEDOT); }
"," {return *yytext; }
"(" {return *yytext; }
")" {return *yytext; }
"[" {return *yytext; }
"]" {return *yytext; }
";" {return *yytext; }
":" {return *yytext; }
"." {return *yytext; }
not {return(NOT); }
{ID} {yylval.stringValue= strdup(yytext); return(ID);}
%%
int yywrap(void){}
Here is my parser:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SymbolTable.h"
int errors;
int lable;
#define YYDEBUG 1
install (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putsym(sym_name);
else {
errors++;
printf("%s is defined\n", sym_name);
}
}
install_num (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putnum(sym_name);
}
context_check(char *sym_name)
{
if (getsym(sym_name) == 0)
printf("%s is undeclared\n", sym_name);
}
%}
%union
{
int intValue;
float floatValue;
char *stringValue;
int adress;
}
%start program
%token <stringValue> ID
%token <stringValue> NUM
%token IF THEN PROGRAM VAR ARRAY
%token OF INTEGER REAL
%token FUNCTION PROCEDURE
%token START END
%token ASSIGNOP RELOP MULOP
%token ELSE WHILE DO
%token SIGN OR
%token DOUBLEDOT
%token NOT
%left '-' '+'
%left '*' '/'
%%
program: PROGRAM ID '(' prog_list ')' ';' declarations subprogram_declarations compound_statement '.'
;
prog_list: ID
| prog_list ',' ID
;
identifier_list: ID {install($1);}
| identifier_list ',' ID {install($3);}
;
declarations: declarations VAR identifier_list ':' type ';'
| /* empty */
;
type: standart_type
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF REAL {set_type("REALARR");}
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF INTEGER {set_type("INTARR");}
;
standart_type: INTEGER {set_type("INTEGER");}
| REAL {set_type("REAL");}
;
subprogram_declarations: subprogram_declarations subprogram_declaration ';'
| /* empty */
;
subprogram_declaration: subprogram_head declarations compound_statement;
subprogram_head: FUNCTION ID arguments ':' INTEGER ';' {install($2); set_type("INTEGER");}
| FUNCTION ID arguments ':' REAL ';' {install($2); set_type("REAL");}
| PROCEDURE ID arguments ';' {install($2); set_proc($2);}
;
arguments: '(' parameter_list ')'
| /* empty */;
parameter_list: identifier_list ':' type
| parameter_list ';' identifier_list ':' type
;
compound_statement: START
optional_statements END
;
optional_statements: statement_list
| /* empty */
;
statement_list: statement
| statement_list ';' statement
;
statement: variable ASSIGNOP expression
| procedure_statement
| compound_statement
| IF expression THEN statement ELSE statement
| WHILE expression DO statement
;
variable: ID {context_check($1);}
| ID '[' expression ']' {context_check($1);}
;
procedure_statement: ID
| ID '(' expression_list ')'
;
expression_list: expression
| expression_list ',' expression
;
expression: simple_expression
| simple_expression RELOP simple_expression
;
simple_expression: term
| SIGN term
| simple_expression SIGN term
| simple_expression OR term
;
term: factor
| term MULOP factor
;
factor: variable
| ID '(' expression_list ')' {context_check($1);}
| NUM {install_num($1);}
| '(' expression ')'
| NOT factor
;
%%
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
fprintf(output, "\t jump.i #lab0\n");
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
yydebug = 1;
errors = 0;
yyparse();
print_sym_table();
fprintf(output, "\t exit");
fclose(output);
}
yyerror (char *s) /* Called by yyparse on error */
{
errors++;
printf ("%s\n", s);
}
Here is symbol table:
struct symrec
{
char *name;
int addr;
char *type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putnum();
void set_type();
void set_proc();
void set_func();
void print_sym_table();
symrec *putsym(char *sym_name)
{
symrec *ptr;
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
ptr->type = NULL;
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
symrec *putnum(char *sym_name)
{
symrec *ptr;
char *dPos = strchr(sym_name, '.');
char *ePos = strchr(sym_name, 'e');
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
if ((dPos == NULL) && (ePos == NULL)){
ptr->type = (char *)malloc(strlen("INTEGER") + 1);
strcpy(ptr->type, "INTEGER");
}
else if ((dPos != NULL) && (ePos == NULL)) {
ptr->type = (char *)malloc(strlen("REAL") + 1);
strcpy(ptr->type, "REAL");
}
else {
ptr->type = (char *)malloc(strlen("FLOAT") + 1);
strcpy(ptr->type, "FLOAT");
}
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
void set_type(char *type)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next) {
if (ptr->type == NULL) {
ptr->type = (char *)malloc(strlen(type) + 1);
strcpy(ptr->type, type);
}
}
}
void set_proc(char *sym_name) {
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0){
ptr->type = (char *)malloc(strlen("PROC") + 1);
strcpy(ptr->type, "PROC");
}
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0)
return ptr;
return 0;
}
void print_sym_table()
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
printf("\n%s %s\n", ptr->name, ptr->type);
}
Simple test file
program example(input, output);
var x, y: integer;
var g,h:real;
begin
g:=x+y;
write(g)
end.
And what it should print to the output file:
jump.i #lab0 ;jump.i lab0
lab0:
add.i 0,4,24 ;add.i x,y,$t0
inttoreal.i 24,28 ;inttoreal.i $t0,$t1
mov.r 28,8 ;mov.r $t1,g
write.r 8 ;write.r g
exit ;exit
comments (;jump.i lab0) are not necessary.
I know how adresses of variables should be calculated and I can translate pascal code to this assembler on paper, but I really don't understand where and what should I put in bison or flex file so it would generate assembler code into output file. I've tried to generate labels for begin statements in rule :
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
But it got segmentation fault. It's pretty obvious how to generate labels, but how should I generate
add.i 0, 4, 24
Should I create another parser after I've built symbol table with this one? Or is it doable without additional parser. Need some hints what to do next.
So you've got this bit of code:
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
You're on the right track doing it this way, but you get a segmentation fault when you add it in and this is because output isn't initialised.
I can't see where you've declared the output that is being referenced there, but it isn't the same one that is declared in main where you open a file for output.
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
That version output is local to main and only visible inside that function. If you remove the declaration of output from main and leave just the assignment, you'll be assigning the results of fopen to the globally declared version of output that your bison code is using.
main (int argc, char *argv[]) {
output = fopen("output.asm", "w");
Not sure why you're having confusion with the other part of your question since you've demonstrated how to do it already in your parser. Take this bit of your parser:
variable: ID {context_check($1);}
It is taking the value of "ID" - the $1 - and passing it to that function. If you wanted "variable" to contain a value you'd store it in $$. Then when you use "variable" higher up like in here:
statement: variable ASSIGNOP expression
$1 will contain whatever value you put in $$ for "variable". $2 will be the value obtained from "ASSIGNOP" token and $3 will have the results from "expression". And again if you store a value in $$ you'd be able to use it in anything that is expecting a "statement".
$$, $1 etc... are all of the type you've created by using %union, so you can also do $$.intValue or $2.stringValue if you need to specifically state which value you're setting.
In your parser, for example, you have a pattern:
| term MULOP factor
You would like to put an action on that pattern that was something like:
{ fprintf(output, "mul term, factor, result\n"); }
but it starts to get sticky very quickly: where are term, factor and where should you put the result?
The easiest answer is a stack: whenever an variable is referenced, push its value onto the stack. whenever an operation is matched, pop the operand(s) into registers, perform the operation, and push the result, so the above becomes:
{
fprintf(output, "pop r0; pop r1; mul r1, r0, r0;");
fprintf(output, "push r0\n");
}
and assignments just pop the stack into a variable.
I am trying to display the whole arithmetic expression from text file and its result, I tried it with file handling option but it is not working.
YACC :
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this file */
%}
%token NUM
%%
Calc : Expr {printf(" = %d\n",$1);}
| Calc Expr {printf(" = %d\n",$2);}
| Calc error {yyerror("\n");}
;
Expr : Term { $$ = $1; }
| Expr '+' Term { $$ = $1 + $3; }
| Expr '-' Term { $$ = $1 - $3; }
;
Term : Fact { $$ = $1; }
| Term '*' Fact { $$ = $1 * $3; }
| Term '/' Fact { if($3==0){
yyerror("Divide by Zero Encountered.");
break;}
else
$$ = $1 / $3;
}
;
Fact : Prim { $$ = $1; }
| '-' Prim { $$ = -$2; }
;
Prim : '(' Expr ')' { $$ = $2; }
| Id { $$ = $1; }
;
Id :NUM { $$ = yylval; }
;
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch,c;
FILE *f;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
/*
f=fopen(argv[1],"r");
if(f!=NULL){
char line[1000];
while(fgets(line,sizeof(line),f)!=NULL)
{
fprintf(stdout,"%s",line);
yyparse();
}
}
*/
yyparse();
}
void yyerror(char *mesg){
printf("\n%s", mesg);
}
LEX
%{
#include <stdio.h>
#include "y.tab.h"
int yylval; /*declared extern by yacc code. used to pass info to yacc*/
%}
letter [A-Za-z]
digit ([0-9])*
op "+"|"*"|"("|")"|"/"|"-"
ws [ \t\n\r]+$
other .
%%
{ws} { /*Nothing*/ }
{digit} { yylval = atoi(yytext); return NUM;}
{op} { return yytext[0];}
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
%%
My Text file contains these two expressions :
4+3-2*(-7)
9/3-2*(-5)
I want output as :
4+3-2*(-7)=21
9/3-2*(-5)=13
But the Output Is :
=21
=13
because a parser will do all calculations at once so this (the commented code) is not legit to use. So what is needed is to show pass input expression to grammar and print in Calc block. I am not able to find anything relevant on google about displaying input in grammar.Thanks in advance for comments & suggestions.
You don't want to do this in the grammar. Too complicated, and too subject to whatever rearrangement the grammar may do. You could consider doing it in the lexer, i.e. print yytext in every action other than the whitespace action, just before you return it, but I would echo all the input as it is read, by overriding lex(1)'s input function.
NB You should be using flex(1), not lex(1), and note that if you change, yyyext ceases being a char[] and becomes a char *.
I didn't mention it in your prior question, but this rule:
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
would better be written as:
{other} { return yytext[0]; }
That way the parser will see it and produce a syntax error, so you don't have to print anything yourself. This technique also lets you get rid of the rules for the individual special characters +,-=*,/,(,), as the parser will recognize them via yytext[0].
Finally, I got it :
YACC
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this
file */ %}
%token NUM
%%
Calc : Expr {printf(" = %d\n",$1);}
| Calc Expr {printf(" = %d\n",$2);}
| error {yyerror("Bad Expression\n");}
;
Expr : Term { $$ = $1; }
| Expr Add Term { $$ = $1 + $3; }
| Expr Sub Term { $$ = $1 - $3; }
;
Term : Fact { $$ = $1; }
| Term Mul Fact { $$ = $1 * $3; }
| Term Div Fact { if($3==0){
yyerror("Divide by Zero Encountered.");
break;}
else
$$ = $1 / $3;
}
;
Fact : Prim { $$ = $1; }
| '-' Prim { $$ = -$2; }
;
Prim : LP Expr RP { $$ = $2; }
| Id { $$ = $1; }
;
Id :NUM { $$ = yylval; printf("%d",yylval); }
;
Add : '+' {printf("+");}
Sub : '-' {printf("-");}
Mul : '*' {printf("*");}
Div : '/' {printf("/");}
LP : '(' {printf("(");}
RP : ')' {printf(")");}
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch,c;
FILE *f;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
yyparse();
}
void yyerror(char *mesg){
printf("%s ", mesg);
}
Thanks EJP & EMACS User for responding.
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.
I'm writing a simple calculator based on .gertrude esolang. What I'm trying to do is parse a text file that contains ratios (in the form of n/m) with flex, than check if the ratio is an index for an operation (+ - / *) or a number and than send the right token to Bison. I get no error when the code are compiled, but when the program is running returns a -segmentation fault core dump - for every kind of input (like 1/2 14/10 1/8 that should be 2 + 8).
Here gertrude.l
%{
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "gertrude.tab.h"
void yyerror(char *);
int FrazioneToDecimale(char *str1){
int num, den;
unsigned tot;
char *token;
char *deli;
const char del = '/';
*deli = del;
token = strtok (str1, deli);
num = atoi(token);
token = strtok (NULL, deli);
den = atoi(token);
tot = 1 / (num/den);
return tot;
}
%}
%%
/* ratio */
"14/10" {
yylval.sval = '+';
return SOMMA;
}
"11/7" {
yylval.sval = '-';
return SOTTRAZIONE;
}
"6/16" {
yylval.sval = '*';
return MOLTIPLICAZIONE;
}
"5/8" {
yylval.sval = '/';
return DIVISIONE;
}
[0-9]+"/"[0-9]+ {
//yylval = *yytext ;
yylval.ival = FrazioneToDecimale(yytext);
return NUMERO;
}
[ \t] ;
[ \n] { return EOL; };
%%
int yywrap(void) {
return 0;
}
Here gertrude.y
%{
#include <stdio.h>
#include <string.h>
%}
%union {
int ival;
char sval;
}
%type <ival> exp fattore termine
%token <ival> NUMERO
%token <sval> SOMMA SOTTRAZIONE MOLTIPLICAZIONE DIVISIONE
%token EOL
%%
istruzione:
| istruzione exp EOL { printf("= %d\n", $2); }
;
exp: fattore
| exp SOMMA fattore { $$ = $1 + $3; }
| exp SOTTRAZIONE fattore { $$ = $1 - $3; }
;
fattore: termine
| fattore MOLTIPLICAZIONE termine { $$ = $1 * $3; }
| fattore DIVISIONE termine { $$ = $1 / $3; }
;
termine: NUMERO { $$ = $1; }
;
%%
int main(void) {
yyparse();
}
yyerror(char *s) {
fprintf(stderr, "error: %s\n\n", s);
}
Thanks in advance for any kind of advice!
Your code has a problem with pointers and strings. This is a C problem, not a Bison or Flex problem.
Look at these lines from gertrude.l:
char *deli;
const char del = '/';
*deli = del;
Your pointer variable deli is uninitialized and contains garbage, so it might point anywhere. Then you follow that pointer to where it points (anywhere!) and you put a character there. This causes the program to crash. Plus the string (wherever it is) isn't NUL-terminated.
Simply replace those three lines with this line:
char *deli = "/";