I'm writing a simple calculator based on .gertrude esolang. What I'm trying to do is parse a text file that contains ratios (in the form of n/m) with flex, than check if the ratio is an index for an operation (+ - / *) or a number and than send the right token to Bison. I get no error when the code are compiled, but when the program is running returns a -segmentation fault core dump - for every kind of input (like 1/2 14/10 1/8 that should be 2 + 8).
Here gertrude.l
%{
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "gertrude.tab.h"
void yyerror(char *);
int FrazioneToDecimale(char *str1){
int num, den;
unsigned tot;
char *token;
char *deli;
const char del = '/';
*deli = del;
token = strtok (str1, deli);
num = atoi(token);
token = strtok (NULL, deli);
den = atoi(token);
tot = 1 / (num/den);
return tot;
}
%}
%%
/* ratio */
"14/10" {
yylval.sval = '+';
return SOMMA;
}
"11/7" {
yylval.sval = '-';
return SOTTRAZIONE;
}
"6/16" {
yylval.sval = '*';
return MOLTIPLICAZIONE;
}
"5/8" {
yylval.sval = '/';
return DIVISIONE;
}
[0-9]+"/"[0-9]+ {
//yylval = *yytext ;
yylval.ival = FrazioneToDecimale(yytext);
return NUMERO;
}
[ \t] ;
[ \n] { return EOL; };
%%
int yywrap(void) {
return 0;
}
Here gertrude.y
%{
#include <stdio.h>
#include <string.h>
%}
%union {
int ival;
char sval;
}
%type <ival> exp fattore termine
%token <ival> NUMERO
%token <sval> SOMMA SOTTRAZIONE MOLTIPLICAZIONE DIVISIONE
%token EOL
%%
istruzione:
| istruzione exp EOL { printf("= %d\n", $2); }
;
exp: fattore
| exp SOMMA fattore { $$ = $1 + $3; }
| exp SOTTRAZIONE fattore { $$ = $1 - $3; }
;
fattore: termine
| fattore MOLTIPLICAZIONE termine { $$ = $1 * $3; }
| fattore DIVISIONE termine { $$ = $1 / $3; }
;
termine: NUMERO { $$ = $1; }
;
%%
int main(void) {
yyparse();
}
yyerror(char *s) {
fprintf(stderr, "error: %s\n\n", s);
}
Thanks in advance for any kind of advice!
Your code has a problem with pointers and strings. This is a C problem, not a Bison or Flex problem.
Look at these lines from gertrude.l:
char *deli;
const char del = '/';
*deli = del;
Your pointer variable deli is uninitialized and contains garbage, so it might point anywhere. Then you follow that pointer to where it points (anywhere!) and you put a character there. This causes the program to crash. Plus the string (wherever it is) isn't NUL-terminated.
Simply replace those three lines with this line:
char *deli = "/";
Related
I am trying to generate intermediate code of the following grammar for while loop and general statements, but I keep encountering syntax error. Though earlier, the grammar for statements and expressions worked, but after adding the production for while loop this program doesn't work.
this is my lex.l file
%{
#include "y.tab.h"
%}
NUMBER [0-9]
ALPHABET [a-zA-Z]
%%
[\t];
{NUMBER}+ { strcpy(yylval.str, yytext); return ID; }
{ALPHABET} { strcpy(yylval.str, yytext); return ID; }
"while" { return WHILE; }
"do" { return DO; }
"<" { yylval.symbol=yytext[0]; return OP; }
">" { yylval.symbol=yytext[0]; return OP; }
"!=" { yylval.symbol=yytext[0]; return OP; }
"==" { yylval.symbol=yytext[0]; return OP; }
[\n];
. { return yytext[0]; }
%%
And this is my yacc.y file
%{
#include <stdio.h>
#include <string.h>
char result_gen();
char quadruple_entry(char a[], char b, char c[]);
char quadruple_entry_assign(char a[], char b, char c[]);
char quadruple_entry_loop();
char quadruple_entry_do();
void three_address_code();
int q_index = 0;
char result[3] = {'t','0','\0'};
char result2[3] = {'L','0','\0'};
char temp[3];
char temp2[3];
struct QuadrupleStructure {
char arg1[10];
char op;
char arg2[10];
char rslt[3];
}quadruple[25];
%}
%union {
char str[10];
char symbol;
}
%token WHILE DO
%token <str> ID
%token <symbol> OP
%type <str> expr
%right '='
%left '+' '-'
%left '/' '*'
%%
wstmt : WHILE { quadruple_entry_loop(); } stmt DO { quadruple_entry_do(); }
;
stmt : ID '=' expr { quadruple_entry_assign($1, '=', $3); }
| ID OP ID { quadruple_entry($1,$2,$3); }
;
expr : expr '+' expr { quadruple_entry($1, '+', $3); strcpy($$,temp); }
| expr '-' expr { quadruple_entry($1, '-', $3); strcpy($$,temp); }
| expr '/' expr { quadruple_entry($1, '/', $3); strcpy($$,temp); }
| expr '*' expr { quadruple_entry($1, '*', $3); strcpy($$,temp); }
| '(' expr ')' { strcpy($$,$2); }
| ID { strcpy($$,$1); }
;
%%
char result_gen() {
strcpy(temp,result);
result[1]++;
}
char quadruple_entry(char a[], char b, char c[]) {
result_gen();
strcpy(quadruple[q_index].arg1, a);
quadruple[q_index].op = b;
strcpy(quadruple[q_index].arg2, c);
strcpy(quadruple[q_index].rslt, temp);
q_index++;
}
char quadruple_entry_assign(char a[], char b, char c[]) {
char tempLocal[3] = {' ',' ','\0'};
strcpy(quadruple[q_index].arg1, a);
quadruple[q_index].op = b;
strcpy(quadruple[q_index].arg2, c);
strcpy(quadruple[q_index].rslt, tempLocal);
q_index++;
}
char quadruple_entry_loop() {
char tempLocal[3];
strcpy(tempLocal, result2);
char tempLocal2[] = " if ";
char tempLocal3 = ' ';
char tempLocal4[] = " ";
strcpy(quadruple[q_index].rslt, tempLocal);
strcpy(quadruple[q_index].arg1, tempLocal4);
quadruple[q_index].op = tempLocal3;
strcpy(quadruple[q_index].arg2, tempLocal2);
q_index++;
}
char quadruple_entry_do() {
char tempLocal[4];
strcpy(tempLocal, result2);
tempLocal[3] = ':';
strcpy(quadruple[q_index].arg1,tempLocal);
char tempLocal2[] = " ";
char tempLocal3 = ' ';
quadruple[q_index].op = tempLocal3;
strcpy(quadruple[q_index].arg2, tempLocal2);
q_index++;
result2[1]++;
char tempLocal4[4];
strcpy(tempLocal4, result2);
tempLocal4[3] = ':';
strcpy(quadruple[q_index].arg1,tempLocal4);
char tempLocal5[] = " ";
char tempLocal6 = ' ';
quadruple[q_index].op = tempLocal6;
strcpy(quadruple[q_index].arg2, tempLocal5);
q_index++;
result2[1]++;
}
void three_address_code() {
int i;
for(i=0 ; i<q_index ; i++)
printf("\n%s := %s %c %s", quadruple[i].rslt, quadruple[i].arg1, quadruple[i].op, quadruple[i].arg2);
}
void yyerror(char *s){
printf("Errror %s",s);
}
int yywrap() {
return 1;
}
int main() {
yyparse();
three_address_code();
return 0;
}
Input:
i=2*5-10
while i<5 do
Output:
Errror syntax error
If someone can figure out where the grammar is wrong, or if my code is wrong, it'd be very helpful.
Your start symbol is wstmt, so the program accepts a single while statement as an input. If you need to accept a sequence of statements, you need a symbol that expands into a sequence of statements as your starting symbol.
In addition, your lexer doesn't swallow spaces, so any program that has spaces has an error.
In my opinion, your rule
wstmt : WHILE { quadruple_entry_loop(); } stmt DO { quadruple_entry_do(); }
;
is wrong. As your stmt only considers assignment expressions, you should include a stmt in the expression for the syntax to be valid.
Your syntax rule for wstmt, without interspersed code is:
wstmt : WHILE stmt DO ;
you should change it into:
wstmt : WHILE expr DO stmt ;
and the precise points to do the proper output of code should be:
wstmt: WHILE {
/* get a new label and place it at
* this point, you'll need to jump
* here, push the label name in a
* stack */
}
expr {
/* include code here to evaluate
* (probably you do it inside expr */
}
DO {
/* get a new label but don't place it
* yet, and push it's name in the
* stack */
}
stmt {
/* a jump to the first label you
* pushed (the one that is already
* placed), then emit code for the
* second label (the one that is not
* placed yet */
};
(and you should include possibilities to use the < and > and evaluating boolean operators in the expr syntax, also)
the stmt nonterminal forces what you put as while condition to be an assignment, and this is not what you have written as input.
In my humble opinion, you should implement this compiler in two phases.... first try to do full language parsing (as it is a separate, unrelated, and completely different problem), and once you have the parser making the right syntax tree (you can probe it trying to build the syntax tree proper, and printing it) and once you have it working... then you can intersperse the code generation code.
I'm writing a translator for uni project which should translate given Pascal code into assembler code using flex/bison. I've written parser and lexer, which generates symbol table (atm works correctly only without procedures and functions). And my question is, how do I generate assembler code from it and print it to file.
Here is my lexer:
%{
#include "parser.tab.h"
#include <string.h>
#define YY_FLEX_DEBUG 1
%}
letter [a-zA-Z]
digit [0-9]
ID {letter}({letter}|{digit})*
delim [ \t\n]
NUM {digit}+(\.{digit}+)?(E[+\-]?(digit)+)?
ws {delim}+
%%
{ws} { }
if {return(IF); }
then {return(THEN); }
else {return(ELSE); }
{NUM} {yylval.stringValue = strdup(yytext); return(NUM); }
"<" {yylval.stringValue = "<"; return(RELOP); }
"<=" {yylval.stringValue = "<="; return(RELOP); }
"=" {yylval.stringValue = "="; return(RELOP); }
">" {yylval.stringValue = ">"; return(RELOP); }
">=" {yylval.stringValue = ">="; return(RELOP); }
"<>" {yylval.stringValue = "<>"; return(RELOP); }
":=" {return(ASSIGNOP); }
do {return(DO); }
program {return(PROGRAM); }
var {return(VAR); }
array {return(ARRAY); }
of {return(OF); }
integer {return(INTEGER); }
real {return(REAL); }
function {return(FUNCTION); }
procedure {return(PROCEDURE); }
begin {return(START); }
end {return(END); }
div {yylval.stringValue = "div"; return(MULOP); }
mod {yylval.stringValue = "mod"; return(MULOP); }
and {yylval.stringValue = "and"; return(MULOP); }
"*" {yylval.stringValue = "*"; return(MULOP); }
"/" {yylval.stringValue = "/"; return(MULOP); }
while {return(WHILE); }
or {return(OR); }
"+" {yylval.stringValue = "+"; return(SIGN); }
"-" {yylval.stringValue = "-"; return(SIGN); }
".." {return(DOUBLEDOT); }
"," {return *yytext; }
"(" {return *yytext; }
")" {return *yytext; }
"[" {return *yytext; }
"]" {return *yytext; }
";" {return *yytext; }
":" {return *yytext; }
"." {return *yytext; }
not {return(NOT); }
{ID} {yylval.stringValue= strdup(yytext); return(ID);}
%%
int yywrap(void){}
Here is my parser:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SymbolTable.h"
int errors;
int lable;
#define YYDEBUG 1
install (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putsym(sym_name);
else {
errors++;
printf("%s is defined\n", sym_name);
}
}
install_num (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putnum(sym_name);
}
context_check(char *sym_name)
{
if (getsym(sym_name) == 0)
printf("%s is undeclared\n", sym_name);
}
%}
%union
{
int intValue;
float floatValue;
char *stringValue;
int adress;
}
%start program
%token <stringValue> ID
%token <stringValue> NUM
%token IF THEN PROGRAM VAR ARRAY
%token OF INTEGER REAL
%token FUNCTION PROCEDURE
%token START END
%token ASSIGNOP RELOP MULOP
%token ELSE WHILE DO
%token SIGN OR
%token DOUBLEDOT
%token NOT
%left '-' '+'
%left '*' '/'
%%
program: PROGRAM ID '(' prog_list ')' ';' declarations subprogram_declarations compound_statement '.'
;
prog_list: ID
| prog_list ',' ID
;
identifier_list: ID {install($1);}
| identifier_list ',' ID {install($3);}
;
declarations: declarations VAR identifier_list ':' type ';'
| /* empty */
;
type: standart_type
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF REAL {set_type("REALARR");}
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF INTEGER {set_type("INTARR");}
;
standart_type: INTEGER {set_type("INTEGER");}
| REAL {set_type("REAL");}
;
subprogram_declarations: subprogram_declarations subprogram_declaration ';'
| /* empty */
;
subprogram_declaration: subprogram_head declarations compound_statement;
subprogram_head: FUNCTION ID arguments ':' INTEGER ';' {install($2); set_type("INTEGER");}
| FUNCTION ID arguments ':' REAL ';' {install($2); set_type("REAL");}
| PROCEDURE ID arguments ';' {install($2); set_proc($2);}
;
arguments: '(' parameter_list ')'
| /* empty */;
parameter_list: identifier_list ':' type
| parameter_list ';' identifier_list ':' type
;
compound_statement: START
optional_statements END
;
optional_statements: statement_list
| /* empty */
;
statement_list: statement
| statement_list ';' statement
;
statement: variable ASSIGNOP expression
| procedure_statement
| compound_statement
| IF expression THEN statement ELSE statement
| WHILE expression DO statement
;
variable: ID {context_check($1);}
| ID '[' expression ']' {context_check($1);}
;
procedure_statement: ID
| ID '(' expression_list ')'
;
expression_list: expression
| expression_list ',' expression
;
expression: simple_expression
| simple_expression RELOP simple_expression
;
simple_expression: term
| SIGN term
| simple_expression SIGN term
| simple_expression OR term
;
term: factor
| term MULOP factor
;
factor: variable
| ID '(' expression_list ')' {context_check($1);}
| NUM {install_num($1);}
| '(' expression ')'
| NOT factor
;
%%
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
fprintf(output, "\t jump.i #lab0\n");
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
yydebug = 1;
errors = 0;
yyparse();
print_sym_table();
fprintf(output, "\t exit");
fclose(output);
}
yyerror (char *s) /* Called by yyparse on error */
{
errors++;
printf ("%s\n", s);
}
Here is symbol table:
struct symrec
{
char *name;
int addr;
char *type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putnum();
void set_type();
void set_proc();
void set_func();
void print_sym_table();
symrec *putsym(char *sym_name)
{
symrec *ptr;
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
ptr->type = NULL;
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
symrec *putnum(char *sym_name)
{
symrec *ptr;
char *dPos = strchr(sym_name, '.');
char *ePos = strchr(sym_name, 'e');
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
if ((dPos == NULL) && (ePos == NULL)){
ptr->type = (char *)malloc(strlen("INTEGER") + 1);
strcpy(ptr->type, "INTEGER");
}
else if ((dPos != NULL) && (ePos == NULL)) {
ptr->type = (char *)malloc(strlen("REAL") + 1);
strcpy(ptr->type, "REAL");
}
else {
ptr->type = (char *)malloc(strlen("FLOAT") + 1);
strcpy(ptr->type, "FLOAT");
}
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
void set_type(char *type)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next) {
if (ptr->type == NULL) {
ptr->type = (char *)malloc(strlen(type) + 1);
strcpy(ptr->type, type);
}
}
}
void set_proc(char *sym_name) {
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0){
ptr->type = (char *)malloc(strlen("PROC") + 1);
strcpy(ptr->type, "PROC");
}
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0)
return ptr;
return 0;
}
void print_sym_table()
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
printf("\n%s %s\n", ptr->name, ptr->type);
}
Simple test file
program example(input, output);
var x, y: integer;
var g,h:real;
begin
g:=x+y;
write(g)
end.
And what it should print to the output file:
jump.i #lab0 ;jump.i lab0
lab0:
add.i 0,4,24 ;add.i x,y,$t0
inttoreal.i 24,28 ;inttoreal.i $t0,$t1
mov.r 28,8 ;mov.r $t1,g
write.r 8 ;write.r g
exit ;exit
comments (;jump.i lab0) are not necessary.
I know how adresses of variables should be calculated and I can translate pascal code to this assembler on paper, but I really don't understand where and what should I put in bison or flex file so it would generate assembler code into output file. I've tried to generate labels for begin statements in rule :
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
But it got segmentation fault. It's pretty obvious how to generate labels, but how should I generate
add.i 0, 4, 24
Should I create another parser after I've built symbol table with this one? Or is it doable without additional parser. Need some hints what to do next.
So you've got this bit of code:
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
You're on the right track doing it this way, but you get a segmentation fault when you add it in and this is because output isn't initialised.
I can't see where you've declared the output that is being referenced there, but it isn't the same one that is declared in main where you open a file for output.
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
That version output is local to main and only visible inside that function. If you remove the declaration of output from main and leave just the assignment, you'll be assigning the results of fopen to the globally declared version of output that your bison code is using.
main (int argc, char *argv[]) {
output = fopen("output.asm", "w");
Not sure why you're having confusion with the other part of your question since you've demonstrated how to do it already in your parser. Take this bit of your parser:
variable: ID {context_check($1);}
It is taking the value of "ID" - the $1 - and passing it to that function. If you wanted "variable" to contain a value you'd store it in $$. Then when you use "variable" higher up like in here:
statement: variable ASSIGNOP expression
$1 will contain whatever value you put in $$ for "variable". $2 will be the value obtained from "ASSIGNOP" token and $3 will have the results from "expression". And again if you store a value in $$ you'd be able to use it in anything that is expecting a "statement".
$$, $1 etc... are all of the type you've created by using %union, so you can also do $$.intValue or $2.stringValue if you need to specifically state which value you're setting.
In your parser, for example, you have a pattern:
| term MULOP factor
You would like to put an action on that pattern that was something like:
{ fprintf(output, "mul term, factor, result\n"); }
but it starts to get sticky very quickly: where are term, factor and where should you put the result?
The easiest answer is a stack: whenever an variable is referenced, push its value onto the stack. whenever an operation is matched, pop the operand(s) into registers, perform the operation, and push the result, so the above becomes:
{
fprintf(output, "pop r0; pop r1; mul r1, r0, r0;");
fprintf(output, "push r0\n");
}
and assignments just pop the stack into a variable.
I'm trying to build a calculator using lex/yacc where you can create an unlimited amount of variables and use them in calculations by using a linked list.
When you type "?", it should print out the contents of the linked list, it does do this, except afterwards, it gives me a: syntax error and ends my program.
The rest of the calculator works as expected, am I missing something?
Sample Output
-bash-4.1$ ./calc
a = 42
b = 21
c = a / b
?
num-syms: 5
PHI => 1.61803
PI => 3.14159
a => 42
b => 21
c => 2
syntax error
-bash-4.1$
sym.h
#ifndef SYMTBL_H
#define SYMTBL_H
struct sym {
int length;
char * name;
double value;
struct sym *prev;
struct sym *next;
};
struct sym * sym_p;
struct sym * sym_lookup(char *);
void sym_inventory();
#endif /* SYMTBL_H */
calc.l
%{
/*#include <math.h> */
#include "y.tab.h"
#include "sym.h"
%}
%%
"?" { sym_inventory(); }
([0-9]+|([0-9]*\.[0-9]+)([eE][+-]?[0-9]+)?) {
yylval.dval = atof(yytext);
return NUMBER;
}
[ \t] ; /* ignore whitespace */
[A-Za-z][A-Za-z0-9]* {
/* return symbol pointer */
yylval.symptr = sym_lookup(yytext);
return NAME;
}
"$" { return 0; }
\n |
. { return yytext[0]; };
%%
int yywrap() { return 1; }
calc.y
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "sym.h"
%}
%union {
double dval;
struct sym * symptr;
}
%token <symptr> NAME
%token <dval> NUMBER
%left '-' '+'
%left '*' '/'
%nonassoc UMINUS
%type <dval> expression
%%
statement_list
: statement '\n'
| statement_list statement '\n'
;
statement
: NAME '=' expression {
char *name = $1->name;
if (strcmp(name, "PI") != 0 && strcmp(name, "PHI") != 0) {
$1->value = $3;
} else {
yyerror("assign to const");
}
}
| expression { printf("= %g\n", $1); }
;
expression
: expression '+' expression { $$ = $1 + $3; }
| expression '-' expression { $$ = $1 - $3; }
| expression '*' expression { $$ = $1 * $3; }
| expression '/' expression {
if ($3 == 0) {
yyerror("divide by zero");
} else {
$$ = $1 / $3;
}
}
| '-' expression %prec UMINUS { $$ = -$2; }
| '(' expression ')' { $$ = $2; }
| NUMBER
| NAME { $$ = $1->value; }
;
%%
struct sym * sym_lookup(char * s)
{
if (sym_p == NULL) {
sym_p = (struct sym *)malloc(sizeof(struct sym));
}
struct sym *original = sym_p;
struct sym * sp = sym_p;
if (sp->name == NULL) {
struct sym *n = (struct sym *)malloc(sizeof(struct sym));
n->name = "PI";
n->value = 3.14159;
n->next = NULL;
sp->name = "PHI";
sp->value = 1.61803;
sp->next = n;
sp->length = 2;
}
while (1) {
if (sp->name == NULL) {
sp->name = strdup(s);
sp->next = NULL;
return sp;
} else if (strcmp(sp->name, s) == 0) {
return sp;
} else if (sp->next != NULL) {
sp = sp->next;
} else {
struct sym *n = (struct sym *)malloc(sizeof(struct sym));
n->name = strdup(s);
sp = original;
struct sym *old = NULL;
while (1) {
if (strcmp(sp->name, s) > 0) {
// new variable name comes before in ascii table
if (old == NULL) {
// new node insert at beginning of sym_p
n->next = original;
n->length = original->length;
sym_p = n;
original = sym_p;
sp = original;
} else {
// insert in middle and update links
old->next = n;
n->next = sp;
sp = original;
}
break;
} else {
if (sp->next != NULL) {
old = sp;
sp = sp->next;
} else {
sp->next = n;
break;
}
}
}
sp = original;
sp->length++;
return n;
}
}
}
void sym_inventory()
{
struct sym * sp = sym_p;
printf("num-syms: %d\n", sp->length);
int i;
int length = sp->length;
for (i = 0; i < length; i++) {
printf("\t%s => %g\n", sp->name, sp->value);
sp = sp->next;
}
}
The problem
Your grammar doesn't recognize an empty line as a valid input. When you type ? followed by a newline, the newline is returned, and that isn't syntactically valid, hence the 'syntax error' report.
You could demonstrate by typing ?a + b as an input, for example.
Note that your lexical analyzer processes the ? without letting the parser know that it happened; the parser never sees the ?.
Prodding to get enough information to make the question answerable
This adaptation of your Lex code shows that the lexical analyzer recognizes ? as an input. Your question doesn't show how the code is used, so there's not enough information for us to know what you're doing wrong. You need to provide an MCVE (Minimal, Complete, Verifiable Example) so that you can get the relevant help.
%option noinput
%option nounput
%%
"?" { printf("Got a ?\n"); }
([0-9]+|([0-9]*\.[0-9]+)([eE][+-]?[0-9]+)?) {
printf("Number: %s\n", yytext);
}
[ \t] { printf("Space: [%s]\n", yytext); }
[A-Za-z][A-Za-z0-9]* {
printf("Name: %s\n", yytext);
}
"$" { printf("Dollar\n"); return 0; }
\n |
. { printf("Other: %c\n", yytext[0]); return yytext[0]; };
%%
int yywrap(void) { return 1; }
int main(void)
{
while (yylex() != 0)
;
return 0;
}
Sample run (shell prompt is JL: rather than $ because one of the inputs is $):
JL: ./xy73
a b 23
Name: a
Space: [ ]
Name: b
Space: [ ]
Number: 23
Other:
?
Got a ?
Other:
=#%
Other: =
Other: #
Other: %
Other:
$
Dollar
JL:
The ? is followed by a newline which is returned as a character token, but your grammar only accepts a newline after a statement, which always starts with a NAME.
So I have to create a compiler for the Tiny C language, but I cant compile it, I have the .y and .l files and both work all right, but when I try to compile the .tab.c file, it shows 3 errors for
undefined reference to 'install_id'
undefined reference to printSymtab'
undefined reference to 'lookup_id'
Here are the codes:
Symtab.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct symtab_node * SYMTAB;
typedef struct symtab_node {
char * nombre;
int type;
float fval;
SYMTAB next;
} SYMTAB_NODE;
SYMTAB lookup_id(SYMTAB st, char * name);
SYMTAB install_id(SYMTAB st, char * name, int typ);
void printSymtab(SYMTAB t);
Symtab.c
#include "symtab.h"
#include <stdio.h>
int next_num() {
static int i = 1;
return i++;
}
/* looks up an is in ST. Returns pointer to cell if found else NULL */
SYMTAB lookup_id(SYMTAB st, char * name) {
SYMTAB tmp = st;
if (tmp == NULL) {/* empty list */
return NULL;
} else {
while (tmp != NULL) {
if (strcmp(tmp->idname,name) == 0) {
return tmp; /* found */
} else {
tmp = tmp->next; /* go to next cell */
}
}
return NULL; /* not found */
}
}
/* adds an id to ST if not present */
SYMTAB install_id(SYMTAB st, char * name, int typ) {
if (lookup_id(st, name) == NULL) {
SYMTAB nst = (SYMTAB)malloc(sizeof(SYMTAB_NODE));
nst->idname = (char *) strdup(name);
nst->idnum = next_num();
nst->next = st;
return nst;
} else {
return st;
}
}
/* print out ST */
void printSymtab(SYMTAB t) {
SYMTAB tmp = t;
while (tmp != NULL) {
printf("%s\t%d\n", tmp->idname, tmp->idnum);
tmp = tmp->next;
}
}
grammar.y
%{
#include "symtab.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char * concat (char * str1, char * str2);
extern int yylex();
extern char * yytext;
extern int yylineno;
SYMTAB st;
int typev;
/* Function definitions */
void yyerror (char *string);
%}
%union{
char *strval;
int value;
float fvalue;
SYMTAB st;
}
/* Declaramos todos los tokens que recibirá el programa y que provienen del cparser.l */
%token SEMI INTEGER FLOAT
%token IF THEN ELSE WHILE DO
%token READ WRITE
%token LPAREN RPAREN LBRACE RBRACE
%token LT EQ
%token PLUS MINUS TIMES DIV ASSIGN
%token<value> INT_NUM
%token<fvalue> FLOAT_NUM
%token<strval> ID
%%
/* Definimos las reglas de producción para el mini-lenguaje C */
program: var_dec stmt_seq { printf ("No hay errores sintacticos\n");}
;
var_dec: var_dec single_dec
|
;
single_dec: type ID SEMI { st = install_id(st,$2,typev); printSymtab(st); }
;
type: INTEGER { typev = 1; }
| FLOAT { typev = 2; }
;
stmt_seq: stmt_seq stmt
|
;
stmt: IF exp THEN else
| WHILE exp DO stmt
| variable ASSIGN exp SEMI { /*st = install_id(st,$1); */}
| READ LPAREN variable RPAREN SEMI { /*st = install_id(st,$3); */}
| WRITE LPAREN exp RPAREN SEMI
| block
| error SEMI { yyerrok;}
;
else: stmt
| ELSE stmt
;
block: LBRACE stmt_seq RBRACE
;
exp: simple_exp LT simple_exp
| simple_exp EQ simple_exp
| simple_exp
;
simple_exp: simple_exp PLUS term
| simple_exp MINUS term
| term
;
term: term TIMES factor
| term DIV factor
| factor
;
factor: LPAREN exp RPAREN
| INT_NUM
| FLOAT_NUM
| variable
;
variable: ID
{ if(lookup_id(st,$1) == NULL){
yyerror(concat("Error: Undeclared Identifier ", $1));
}
}
;
%%
/* A function that concatenates two strings and returns the result */
char * concat(char * str1, char * str2){
char *str3;
str3 = (char *) calloc(strlen(str1)+strlen(str2)+1, sizeof(char));
strcpy(str3,str1);
strcat(str3,str2);
return str3;
}
#include "lex.yy.c"
/* Bison does NOT implement yyerror, so define it here */
void yyerror (char *string){
printf ("ERROR NEAR LINE %d: %s\n",yylineno,string);
}
/* Bison does NOT define the main entry point so define it here */
main (){
yyparse();
yylex();
}
lexem.y
%{
#include <string.h>
#include <stdlib.h>
char * strval;
int value;
float fvalue;
int error;
extern YYSTYPE yylval;
%}
/* This is the rule definition */
%option noyywrap
%option yylineno
ids [A-Za-z_][A-Za-z0-9_]*
digits 0|[1-9][0-9]*|0(c|C)[0-7]+|0(x|X)[0-9A-Fa-f]+
floats [0-9]*"."[0-9]+([eE][-+]?[0-9]+)?
%%
/* Consume los comentarios*/
(\/\*([^\*]|\*[^/])*\*\/)|(\/\/.*)
/* Consume los espacios, tabulaciones y saltos de linea*/
[[:space:]]|[[:blank:]]|\n
/* Palabras reservadas */
"int" { return INTEGER; }
"float" { return FLOAT; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"do" { return DO; }
"while" { return WHILE; }
"read" { return READ; }
"write" { return WRITE; }
/* Simbolos de puntuacion, operadores y relacionales */
/* Puntuacion */
";" { return SEMI; }
"(" { return LPAREN; }
")" { return RPAREN; }
"{" { return LBRACE; }
"}" { return RBRACE; }
/* Relacionales */
">" { return LT; }
"==" { return EQ; }
/* Operadores */
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return TIMES; }
"/" { return DIV; }
"=" { return ASSIGN; }
{ids} { yylval.strval = (char *) strdup(yytext);
return (ID); }
{digits} { yylval.value = atoi(yytext);
return (INT_NUM); }
{floats} { yylval.fvalue = atof(yytext);
return (FLOAT_NUM); }
/* Consume los simbolos que sobran y marca error */
. { printf("LEXICAL ERROR NEAR LINE %d: %s \n", yyget_lineno(), yyget_text()); error++; }
%%
You're not supposed to compile the whatever.tab.h file, that's a header file containing the YACC elements for the grammar, for inclusion into the lex and yacc code sections, as well as your own code if you need access to it.
You're supposed to compile whatever.tab.c, ensuring that you're also including your symtab.c (or its equivalent object file), and any other C source files as well.
And, based on your comment, it's this non-inclusion of the symtab.c file which is indeed causing your immediate error.
When I execute your steps (slightly modified for different names):
flex lexem.l
yacc -d -v grammar.y
gcc -o par y.tab.c
then I get a similar problem to what you're seeing:
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x35c): undefined reference to `install_id'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x36e): undefined reference to `printSymtab'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x3a7): undefined reference to `lookup_id'
However, when I incorporate the symtab.c file into the compile line (and add the idname and idnum missing bits to the structure in symtab.h to solve compilation problems), it works just fine:
gcc -o par y.tab.c symtab.c
So that's what you need to do, include symtab.c on the gcc command line.
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.