So I have to create a compiler for the Tiny C language, but I cant compile it, I have the .y and .l files and both work all right, but when I try to compile the .tab.c file, it shows 3 errors for
undefined reference to 'install_id'
undefined reference to printSymtab'
undefined reference to 'lookup_id'
Here are the codes:
Symtab.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct symtab_node * SYMTAB;
typedef struct symtab_node {
char * nombre;
int type;
float fval;
SYMTAB next;
} SYMTAB_NODE;
SYMTAB lookup_id(SYMTAB st, char * name);
SYMTAB install_id(SYMTAB st, char * name, int typ);
void printSymtab(SYMTAB t);
Symtab.c
#include "symtab.h"
#include <stdio.h>
int next_num() {
static int i = 1;
return i++;
}
/* looks up an is in ST. Returns pointer to cell if found else NULL */
SYMTAB lookup_id(SYMTAB st, char * name) {
SYMTAB tmp = st;
if (tmp == NULL) {/* empty list */
return NULL;
} else {
while (tmp != NULL) {
if (strcmp(tmp->idname,name) == 0) {
return tmp; /* found */
} else {
tmp = tmp->next; /* go to next cell */
}
}
return NULL; /* not found */
}
}
/* adds an id to ST if not present */
SYMTAB install_id(SYMTAB st, char * name, int typ) {
if (lookup_id(st, name) == NULL) {
SYMTAB nst = (SYMTAB)malloc(sizeof(SYMTAB_NODE));
nst->idname = (char *) strdup(name);
nst->idnum = next_num();
nst->next = st;
return nst;
} else {
return st;
}
}
/* print out ST */
void printSymtab(SYMTAB t) {
SYMTAB tmp = t;
while (tmp != NULL) {
printf("%s\t%d\n", tmp->idname, tmp->idnum);
tmp = tmp->next;
}
}
grammar.y
%{
#include "symtab.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char * concat (char * str1, char * str2);
extern int yylex();
extern char * yytext;
extern int yylineno;
SYMTAB st;
int typev;
/* Function definitions */
void yyerror (char *string);
%}
%union{
char *strval;
int value;
float fvalue;
SYMTAB st;
}
/* Declaramos todos los tokens que recibirá el programa y que provienen del cparser.l */
%token SEMI INTEGER FLOAT
%token IF THEN ELSE WHILE DO
%token READ WRITE
%token LPAREN RPAREN LBRACE RBRACE
%token LT EQ
%token PLUS MINUS TIMES DIV ASSIGN
%token<value> INT_NUM
%token<fvalue> FLOAT_NUM
%token<strval> ID
%%
/* Definimos las reglas de producción para el mini-lenguaje C */
program: var_dec stmt_seq { printf ("No hay errores sintacticos\n");}
;
var_dec: var_dec single_dec
|
;
single_dec: type ID SEMI { st = install_id(st,$2,typev); printSymtab(st); }
;
type: INTEGER { typev = 1; }
| FLOAT { typev = 2; }
;
stmt_seq: stmt_seq stmt
|
;
stmt: IF exp THEN else
| WHILE exp DO stmt
| variable ASSIGN exp SEMI { /*st = install_id(st,$1); */}
| READ LPAREN variable RPAREN SEMI { /*st = install_id(st,$3); */}
| WRITE LPAREN exp RPAREN SEMI
| block
| error SEMI { yyerrok;}
;
else: stmt
| ELSE stmt
;
block: LBRACE stmt_seq RBRACE
;
exp: simple_exp LT simple_exp
| simple_exp EQ simple_exp
| simple_exp
;
simple_exp: simple_exp PLUS term
| simple_exp MINUS term
| term
;
term: term TIMES factor
| term DIV factor
| factor
;
factor: LPAREN exp RPAREN
| INT_NUM
| FLOAT_NUM
| variable
;
variable: ID
{ if(lookup_id(st,$1) == NULL){
yyerror(concat("Error: Undeclared Identifier ", $1));
}
}
;
%%
/* A function that concatenates two strings and returns the result */
char * concat(char * str1, char * str2){
char *str3;
str3 = (char *) calloc(strlen(str1)+strlen(str2)+1, sizeof(char));
strcpy(str3,str1);
strcat(str3,str2);
return str3;
}
#include "lex.yy.c"
/* Bison does NOT implement yyerror, so define it here */
void yyerror (char *string){
printf ("ERROR NEAR LINE %d: %s\n",yylineno,string);
}
/* Bison does NOT define the main entry point so define it here */
main (){
yyparse();
yylex();
}
lexem.y
%{
#include <string.h>
#include <stdlib.h>
char * strval;
int value;
float fvalue;
int error;
extern YYSTYPE yylval;
%}
/* This is the rule definition */
%option noyywrap
%option yylineno
ids [A-Za-z_][A-Za-z0-9_]*
digits 0|[1-9][0-9]*|0(c|C)[0-7]+|0(x|X)[0-9A-Fa-f]+
floats [0-9]*"."[0-9]+([eE][-+]?[0-9]+)?
%%
/* Consume los comentarios*/
(\/\*([^\*]|\*[^/])*\*\/)|(\/\/.*)
/* Consume los espacios, tabulaciones y saltos de linea*/
[[:space:]]|[[:blank:]]|\n
/* Palabras reservadas */
"int" { return INTEGER; }
"float" { return FLOAT; }
"if" { return IF; }
"then" { return THEN; }
"else" { return ELSE; }
"do" { return DO; }
"while" { return WHILE; }
"read" { return READ; }
"write" { return WRITE; }
/* Simbolos de puntuacion, operadores y relacionales */
/* Puntuacion */
";" { return SEMI; }
"(" { return LPAREN; }
")" { return RPAREN; }
"{" { return LBRACE; }
"}" { return RBRACE; }
/* Relacionales */
">" { return LT; }
"==" { return EQ; }
/* Operadores */
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return TIMES; }
"/" { return DIV; }
"=" { return ASSIGN; }
{ids} { yylval.strval = (char *) strdup(yytext);
return (ID); }
{digits} { yylval.value = atoi(yytext);
return (INT_NUM); }
{floats} { yylval.fvalue = atof(yytext);
return (FLOAT_NUM); }
/* Consume los simbolos que sobran y marca error */
. { printf("LEXICAL ERROR NEAR LINE %d: %s \n", yyget_lineno(), yyget_text()); error++; }
%%
You're not supposed to compile the whatever.tab.h file, that's a header file containing the YACC elements for the grammar, for inclusion into the lex and yacc code sections, as well as your own code if you need access to it.
You're supposed to compile whatever.tab.c, ensuring that you're also including your symtab.c (or its equivalent object file), and any other C source files as well.
And, based on your comment, it's this non-inclusion of the symtab.c file which is indeed causing your immediate error.
When I execute your steps (slightly modified for different names):
flex lexem.l
yacc -d -v grammar.y
gcc -o par y.tab.c
then I get a similar problem to what you're seeing:
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x35c): undefined reference to `install_id'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x36e): undefined reference to `printSymtab'
/tmp/ccI5DpZQ.o:y.tab.c:(.text+0x3a7): undefined reference to `lookup_id'
However, when I incorporate the symtab.c file into the compile line (and add the idname and idnum missing bits to the structure in symtab.h to solve compilation problems), it works just fine:
gcc -o par y.tab.c symtab.c
So that's what you need to do, include symtab.c on the gcc command line.
Related
I'm writing a translator for uni project which should translate given Pascal code into assembler code using flex/bison. I've written parser and lexer, which generates symbol table (atm works correctly only without procedures and functions). And my question is, how do I generate assembler code from it and print it to file.
Here is my lexer:
%{
#include "parser.tab.h"
#include <string.h>
#define YY_FLEX_DEBUG 1
%}
letter [a-zA-Z]
digit [0-9]
ID {letter}({letter}|{digit})*
delim [ \t\n]
NUM {digit}+(\.{digit}+)?(E[+\-]?(digit)+)?
ws {delim}+
%%
{ws} { }
if {return(IF); }
then {return(THEN); }
else {return(ELSE); }
{NUM} {yylval.stringValue = strdup(yytext); return(NUM); }
"<" {yylval.stringValue = "<"; return(RELOP); }
"<=" {yylval.stringValue = "<="; return(RELOP); }
"=" {yylval.stringValue = "="; return(RELOP); }
">" {yylval.stringValue = ">"; return(RELOP); }
">=" {yylval.stringValue = ">="; return(RELOP); }
"<>" {yylval.stringValue = "<>"; return(RELOP); }
":=" {return(ASSIGNOP); }
do {return(DO); }
program {return(PROGRAM); }
var {return(VAR); }
array {return(ARRAY); }
of {return(OF); }
integer {return(INTEGER); }
real {return(REAL); }
function {return(FUNCTION); }
procedure {return(PROCEDURE); }
begin {return(START); }
end {return(END); }
div {yylval.stringValue = "div"; return(MULOP); }
mod {yylval.stringValue = "mod"; return(MULOP); }
and {yylval.stringValue = "and"; return(MULOP); }
"*" {yylval.stringValue = "*"; return(MULOP); }
"/" {yylval.stringValue = "/"; return(MULOP); }
while {return(WHILE); }
or {return(OR); }
"+" {yylval.stringValue = "+"; return(SIGN); }
"-" {yylval.stringValue = "-"; return(SIGN); }
".." {return(DOUBLEDOT); }
"," {return *yytext; }
"(" {return *yytext; }
")" {return *yytext; }
"[" {return *yytext; }
"]" {return *yytext; }
";" {return *yytext; }
":" {return *yytext; }
"." {return *yytext; }
not {return(NOT); }
{ID} {yylval.stringValue= strdup(yytext); return(ID);}
%%
int yywrap(void){}
Here is my parser:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SymbolTable.h"
int errors;
int lable;
#define YYDEBUG 1
install (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putsym(sym_name);
else {
errors++;
printf("%s is defined\n", sym_name);
}
}
install_num (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putnum(sym_name);
}
context_check(char *sym_name)
{
if (getsym(sym_name) == 0)
printf("%s is undeclared\n", sym_name);
}
%}
%union
{
int intValue;
float floatValue;
char *stringValue;
int adress;
}
%start program
%token <stringValue> ID
%token <stringValue> NUM
%token IF THEN PROGRAM VAR ARRAY
%token OF INTEGER REAL
%token FUNCTION PROCEDURE
%token START END
%token ASSIGNOP RELOP MULOP
%token ELSE WHILE DO
%token SIGN OR
%token DOUBLEDOT
%token NOT
%left '-' '+'
%left '*' '/'
%%
program: PROGRAM ID '(' prog_list ')' ';' declarations subprogram_declarations compound_statement '.'
;
prog_list: ID
| prog_list ',' ID
;
identifier_list: ID {install($1);}
| identifier_list ',' ID {install($3);}
;
declarations: declarations VAR identifier_list ':' type ';'
| /* empty */
;
type: standart_type
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF REAL {set_type("REALARR");}
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF INTEGER {set_type("INTARR");}
;
standart_type: INTEGER {set_type("INTEGER");}
| REAL {set_type("REAL");}
;
subprogram_declarations: subprogram_declarations subprogram_declaration ';'
| /* empty */
;
subprogram_declaration: subprogram_head declarations compound_statement;
subprogram_head: FUNCTION ID arguments ':' INTEGER ';' {install($2); set_type("INTEGER");}
| FUNCTION ID arguments ':' REAL ';' {install($2); set_type("REAL");}
| PROCEDURE ID arguments ';' {install($2); set_proc($2);}
;
arguments: '(' parameter_list ')'
| /* empty */;
parameter_list: identifier_list ':' type
| parameter_list ';' identifier_list ':' type
;
compound_statement: START
optional_statements END
;
optional_statements: statement_list
| /* empty */
;
statement_list: statement
| statement_list ';' statement
;
statement: variable ASSIGNOP expression
| procedure_statement
| compound_statement
| IF expression THEN statement ELSE statement
| WHILE expression DO statement
;
variable: ID {context_check($1);}
| ID '[' expression ']' {context_check($1);}
;
procedure_statement: ID
| ID '(' expression_list ')'
;
expression_list: expression
| expression_list ',' expression
;
expression: simple_expression
| simple_expression RELOP simple_expression
;
simple_expression: term
| SIGN term
| simple_expression SIGN term
| simple_expression OR term
;
term: factor
| term MULOP factor
;
factor: variable
| ID '(' expression_list ')' {context_check($1);}
| NUM {install_num($1);}
| '(' expression ')'
| NOT factor
;
%%
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
fprintf(output, "\t jump.i #lab0\n");
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
yydebug = 1;
errors = 0;
yyparse();
print_sym_table();
fprintf(output, "\t exit");
fclose(output);
}
yyerror (char *s) /* Called by yyparse on error */
{
errors++;
printf ("%s\n", s);
}
Here is symbol table:
struct symrec
{
char *name;
int addr;
char *type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putnum();
void set_type();
void set_proc();
void set_func();
void print_sym_table();
symrec *putsym(char *sym_name)
{
symrec *ptr;
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
ptr->type = NULL;
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
symrec *putnum(char *sym_name)
{
symrec *ptr;
char *dPos = strchr(sym_name, '.');
char *ePos = strchr(sym_name, 'e');
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
if ((dPos == NULL) && (ePos == NULL)){
ptr->type = (char *)malloc(strlen("INTEGER") + 1);
strcpy(ptr->type, "INTEGER");
}
else if ((dPos != NULL) && (ePos == NULL)) {
ptr->type = (char *)malloc(strlen("REAL") + 1);
strcpy(ptr->type, "REAL");
}
else {
ptr->type = (char *)malloc(strlen("FLOAT") + 1);
strcpy(ptr->type, "FLOAT");
}
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
void set_type(char *type)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next) {
if (ptr->type == NULL) {
ptr->type = (char *)malloc(strlen(type) + 1);
strcpy(ptr->type, type);
}
}
}
void set_proc(char *sym_name) {
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0){
ptr->type = (char *)malloc(strlen("PROC") + 1);
strcpy(ptr->type, "PROC");
}
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0)
return ptr;
return 0;
}
void print_sym_table()
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
printf("\n%s %s\n", ptr->name, ptr->type);
}
Simple test file
program example(input, output);
var x, y: integer;
var g,h:real;
begin
g:=x+y;
write(g)
end.
And what it should print to the output file:
jump.i #lab0 ;jump.i lab0
lab0:
add.i 0,4,24 ;add.i x,y,$t0
inttoreal.i 24,28 ;inttoreal.i $t0,$t1
mov.r 28,8 ;mov.r $t1,g
write.r 8 ;write.r g
exit ;exit
comments (;jump.i lab0) are not necessary.
I know how adresses of variables should be calculated and I can translate pascal code to this assembler on paper, but I really don't understand where and what should I put in bison or flex file so it would generate assembler code into output file. I've tried to generate labels for begin statements in rule :
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
But it got segmentation fault. It's pretty obvious how to generate labels, but how should I generate
add.i 0, 4, 24
Should I create another parser after I've built symbol table with this one? Or is it doable without additional parser. Need some hints what to do next.
So you've got this bit of code:
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
You're on the right track doing it this way, but you get a segmentation fault when you add it in and this is because output isn't initialised.
I can't see where you've declared the output that is being referenced there, but it isn't the same one that is declared in main where you open a file for output.
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
That version output is local to main and only visible inside that function. If you remove the declaration of output from main and leave just the assignment, you'll be assigning the results of fopen to the globally declared version of output that your bison code is using.
main (int argc, char *argv[]) {
output = fopen("output.asm", "w");
Not sure why you're having confusion with the other part of your question since you've demonstrated how to do it already in your parser. Take this bit of your parser:
variable: ID {context_check($1);}
It is taking the value of "ID" - the $1 - and passing it to that function. If you wanted "variable" to contain a value you'd store it in $$. Then when you use "variable" higher up like in here:
statement: variable ASSIGNOP expression
$1 will contain whatever value you put in $$ for "variable". $2 will be the value obtained from "ASSIGNOP" token and $3 will have the results from "expression". And again if you store a value in $$ you'd be able to use it in anything that is expecting a "statement".
$$, $1 etc... are all of the type you've created by using %union, so you can also do $$.intValue or $2.stringValue if you need to specifically state which value you're setting.
In your parser, for example, you have a pattern:
| term MULOP factor
You would like to put an action on that pattern that was something like:
{ fprintf(output, "mul term, factor, result\n"); }
but it starts to get sticky very quickly: where are term, factor and where should you put the result?
The easiest answer is a stack: whenever an variable is referenced, push its value onto the stack. whenever an operation is matched, pop the operand(s) into registers, perform the operation, and push the result, so the above becomes:
{
fprintf(output, "pop r0; pop r1; mul r1, r0, r0;");
fprintf(output, "push r0\n");
}
and assignments just pop the stack into a variable.
I have a problem about segmentation fault 11.
Every time, when I want to add action rules in function grammar blocks, I must get the segmentation fault 11.
Therefore, I cannot get the dump.out, which is a file that record the identifiers for me.
I do not think the problem is because of the scanner file, but that is still possible.
Of course, the problem should have something about symbol table, but it is really strange.
The problem just occurs like:
function: FN ID '(' ')' {if ($2->st_type == UNDEF) $2->st_type = FUNCTION_TYPE};
When I add action in the block, segmentation fault 11 will occur.
However, this is okay.
function: FN ID '(' ')' {};
The parser file do not contains all contents since it is so many.
I use mac os
I hope someone can help me.
Thank you anyway
Where the error occur
1: // Hello World Example
<fn>
<id: main>
<(>
<)>
<{>
2: fn main() {
3: // Print text to the console
<let>
<mut>
<id: a>
<:>
<int>
<=>
<integer: 10>
<;>
4: let mut a:int = 10;
<let>
<mut>
<id: b>
<=>
<string: 1199>
<;>
5: let mut b = "1199";
<let>
<mut>
<id: sum>
<[>
<str>
<,>
<integer: 10>
<]>
<;>
6: let mut sum[str, 10];
<id: sum>
<[>
<integer: 0>
<]>
<=>
<string: 100>
<;>
7: sum[0] = "100";
<id: b>
<=>
<string: 123>
<+>
<id: b>
<;>
8: b = "123" + b;
<println>
<(>
<string: Hello World>
<)>
<;>
9: println ("Hello World");
<}>
10: }
Symbol table:
a
b
sum
main
Segmentation fault: 11
The input file
// Hello World Example
fn main() {
// Print text to the console
let mut a:int = 10;
let mut b = "1199";
let mut sum[str, 10];
sum[0] = "100";
b = "123" + b;
println ("Hello World");
}
This is my symbol table header file.
#include <stdio.h>
/* maximum size of hash table */
#define SIZE 211
/* maximum size of tokens-identifiers */
#define MAXTOKENLEN 40
/* token types */
#define UNDEF 0
#define INT_TYPE 1
#define REAL_TYPE 2
#define STR_TYPE 3
#define LOGIC_TYPE 4
#define ARRAY_TYPE 5
#define FUNCTION_TYPE 6
/* new type for parser */
#define CONST_INT_TYPE 7
#define CONST_REAL_TYPE 8
#define CONST_STR_TYPE 9
#define CONST_LOGIC_TYPE 10
/* how parameter is passed */
#define BY_VALUE 1
#define BY_REFER 2
/*
* Originally here, now it is in the symbols.c
* current scope
* int cur_scope = 0;
*/
/* parameter struct */
typedef struct Parameter{
int par_type;
char *param_name;
// to store value
int ival; double fval; char *st_sval; int bval; // boolean type
int passing; // value or reference
struct Parameter *next; // link to next one
}Param;
/* a linked list of references (lineno's) for each variable */
typedef struct Ref{
int lineno;
struct Ref *next;
int type;
}RefList;
// struct that represents a list node
typedef struct list{
char st_name[MAXTOKENLEN];
int st_size;
int scope;
RefList *lines;
// to store value and sometimes more information
int st_ival; double st_fval; char *st_sval; int st_bval;
// type
int st_type;
int inf_type; // for arrays (info type) and functions (return type)
// array stuff
int *i_vals; double *f_vals; char **s_vals; int *b_vals; // boolean type
int array_size;
// function parameters
Param *parameters;
int num_of_pars; // Meanwhile, it record the current position of the parameters
// pointer to next item in the list
struct list *next;
}list_t;
/* the hash table */
static list_t **hash_table;
// Function Declarations
void create(); // initialize hash table
unsigned int hash(char *key); // hash function for insert
void insert(char *name, int len, int type, int lineno); // insert entry
list_t *lookup(char *name); // search for entry
list_t *lookup_scope(char *name, int scope); // search for entry in scope
void hide_scope(); // hide the current scope
void incr_scope(); // go to next scope
void dump(FILE *of); // dump file
This is symbol table code file.
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* current scope */
int cur_scope = 0;
void create()
{
int i;
hash_table = malloc(SIZE * sizeof(list_t*));
for(i = 0; i < SIZE; i++) hash_table[i] = NULL;
}
unsigned int hash(char *key)
{
unsigned int hashval = 0;
for(;*key!='\0';key++) hashval += *key;
hashval += key[0] % 11 + (key[0] << 3) - key[0];
return hashval % SIZE;
}
void insert(char *name, int len, int type, int lineno)
{
unsigned int hashval = hash(name); // hash function used
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
/* variable not yet in table */
if (l == NULL){
l = (list_t*) malloc(sizeof(list_t));
strncpy(l->st_name, name, len);
/* add to hashtable */
l->st_type = type;
l->scope = cur_scope;
l->lines = (RefList*) malloc(sizeof(RefList));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hash_table[hashval];
hash_table[hashval] = l;
// printf("Inserted %s for the first time with linenumber %d!\n", name, lineno); // error checking
}
/* found in table, so just add line number */
else{
l->scope = cur_scope;
RefList *t = l->lines;
while (t->next != NULL) t = t->next;
/* add linenumber to reference list */
t->next = (RefList*) malloc(sizeof(RefList));
t->next->lineno = lineno;
t->next->next = NULL;
// printf("Found %s again at line %d!\n", name, lineno);
}
}
list_t *lookup(char *name)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
return l; // NULL is not found
}
list_t *lookup_scope(char *name, int scope)
{ /* return symbol if found or NULL if not found */
unsigned int hashval = hash(name);
list_t *l = hash_table[hashval];
while ((l != NULL) && (strcmp(name,l->st_name) != 0) && (scope != l->scope)) l = l->next;
return l; // NULL is not found
}
void hide_scope()
{ /* hide the current scope */
if(cur_scope > 0) cur_scope--;
}
void incr_scope()
{ /* go to next scope */
cur_scope++;
}
/* print to stdout by default */
void dump(FILE * of)
{
int i; int count; // record whether first line prints or not.
fprintf(of,"------------ ----------------- -------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ----------------- -------------\n");
for (i=0; i < SIZE; ++i){
if (hash_table[i] != NULL){
list_t *l = hash_table[i];
while (l != NULL){
RefList *t = l->lines;
fprintf(of,"%-12s ",l->st_name);
printf("%s\n", l->st_name); // print out all the names in the symbol table
if (l->st_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->st_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->st_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->st_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else if (l->st_type == CONST_INT_TYPE) fprintf(of, "%-7s", "const_int"); // constant_int_type
else if (l->st_type == CONST_REAL_TYPE) fprintf(of, "%-7s", "const_real"); // constant_real_type
else if (l->st_type == CONST_STR_TYPE) fprintf(of, "%-7s", "const_string"); // constant_string_type
else if (l->st_type == CONST_LOGIC_TYPE) fprintf(of, "%-7s", "const_bool"); // const_logic_type
else if (l->st_type == ARRAY_TYPE){
fprintf(of,"array of ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"%-7s","bool");
else fprintf(of,"%-7s","undef");
}
else if (l->st_type == FUNCTION_TYPE){
fprintf(of,"%-7s %s","function returns ");
if (l->inf_type == INT_TYPE) fprintf(of,"%-7s","int");
else if (l->inf_type == REAL_TYPE) fprintf(of,"%-7s","real");
else if (l->inf_type == STR_TYPE) fprintf(of,"%-7s","string");
else if (l->inf_type == LOGIC_TYPE) fprintf(of,"-7%s","bool");
else fprintf(of,"%-7s","undef");
}
else fprintf(of,"%-7s","undef"); // if UNDEF or 0
count = 0;
while (t != NULL){
if (count == 0)
{
if (l->st_type == INT_TYPE || l->st_type == REAL_TYPE || l->st_type == STR_TYPE || l->st_type == UNDEF)
fprintf(of,"%13d ", t->lineno);
else if (l->st_type == CONST_INT_TYPE || l->st_type == CONST_REAL_TYPE || l->st_type == CONST_STR_TYPE || l->st_type == CONST_LOGIC_TYPE)
fprintf(of,"%10d", t->lineno);
else if (l->st_type == ARRAY_TYPE || l->st_type == FUNCTION_TYPE)
fprintf(of,"%4d", t->lineno);
}
else
fprintf(of,"%3d", t->lineno);
count++;
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}
scanner file
%option noyywrap
%{
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"
#define LIST strcat(buf,yytext)
#define token(t) {LIST; printf("<%s>\n");}
#define tokenInteger(t, i) {LIST; printf("<%s: %d>\n", t, i);}
#define tokenReal(t, r) {LIST; printf("<%s: %lf>\n", t, r);}
#define tokenString(t, s) {LIST; printf("<%s: %s>\n", t, s);}
#define MAX_LINE_LENG 256
extern FILE* yyin;
extern FILE* yyout;
int linenum = 1;
char buf[MAX_LINE_LENG];
char* tempStr;
int indexForStr = 1;
list_t* temp;
%}
%x Comment
%%
"//".* {LIST;}
"/*" {BEGIN(Comment); LIST;}
<Comment>"*/" {LIST; BEGIN(0);}
<Comment>\n {LIST; printf("%d: %s\n", linenum++, buf); buf[0] = '\0';}
<Comment>. {LIST;}
"bool" {
token("BOOL");
return BOOL;
}
"break" {token("BREAK"); return BREAK;}
"char" {token("CHAR"); return CHAR;}
"continue" {token("CONTINUE"); return CONTINUE;}
"do" {token("DO"); return DO;}
"else" {token("ELSE"); return ELSE;}
"enum" {token("ENUM"); return ENUM;}
"extern" {token("EXTERN"); return EXTERN;}
"false" {token("FALSE"); yylval.boolVal = 0; return FALSE;}
"float" {
token("FLOAT");
return FLOAT;
}
"for" {token("FOR"); return FOR;}
"fn" {token("FN"); return FN;}
"if" {token("IF"); return IF;}
"in" {token("IN"); return IN;}
"int" {
token("INT");
return INT;
}
"let" {token("LET"); return LET;}
"loop" {token("LOOP"); return LOOP;}
"match" {token("MATCH"); return MATCH;}
"mut" {token("MUT"); return MUT;}
"print" {token("PRINT"); return PRINT;}
"println" {token("PRINTLN"); return PRINTLN;}
"pub" {token("PUB"); return PUB;}
"return" {token("RETURN"); return RETURN;}
"self" {token("SELF"); return SELF;}
"static" {token("STATIC"); return STATIC;}
"str" {
token("STR");
return STR;
}
"struct" {token("STRUCT"); return STRUCT;}
"true" {token("TRUE"); yylval.boolVal = 1; return TRUE;}
"use" {token("USE"); return USE;}
"where" {token("WHERE"); return WHERE;}
"while" {token("WHILE"); return WHILE;}
"," {token("','"); return ',';}
":" {token("':'"); return ':';}
";" {token("';'"); return ';';}
"(" {token("'('"); return '(';}
")" {token("')'"); return ')';}
"[" {token("'['"); return '[';}
"]" {token("']'"); return ']';}
"{" {token("'{'"); return '{';}
"}" {token("'}'"); return '}';}
"+" {token("'+'"); return '+';}
"-" {token("'-'"); return '-';}
"*" {token("'*'"); return '*';}
"/" {token("'/'"); return '/';}
"++" {token("'++'"); return '++';}
"--" {token("'--'"); return '--';}
"%" {token("'%'"); return '%';}
"<" {token("'<'"); return LESS;}
"<=" {token("'<='"); return '<=';}
">=" {token("'>='"); return '>=';}
">" {token("'>'"); return GREATER;}
"==" {token("'=='"); return '==';}
"!=" {token("'!='"); return '!=';}
"&&" {token("'&&'"); return '&&';}
"||" {token("'||'"); return '||';}
"!" {token("'!'"); return EXCLAMATION;}
"=" {token("'='"); return ASSIGN;}
"+=" {token("'+='"); return '+=';}
"-=" {token("'-='"); return '-=';}
"*=" {token("'*='"); return '*=';}
"/=" {token("'/='"); return '/=';}
"->" {token("'->'"); return ARROW;}
"read" {token("'READ'"); return READ;}
[A-Z_a-z]([A-Z_a-z]|[0-9])* {
insert(yytext, yyleng, UNDEF, linenum);
yylval.symptr = lookup(yytext);
tokenString("id", yylval.symptr->st_name);
return ID;
}
"0"|[0-9][0-9]* {
sscanf(yytext, "%d", &yylval.intVal);
tokenInteger("integer", yylval.intVal);
return INTEGER;
}
[0-9_]+"."[0-9_]|[0-9_][Ee][+-]?[0-9_]+ {
yylval.floatVal = atof(yytext);
tokenReal("real", yylval.floatVal);
return REAL;
}
\"([\\.]|[^\\"])*\" {
tempStr = malloc((strlen(yytext) - 1) * sizeof(char));
for (int i = 0; i < strlen(yytext) - 2; i++)
{
tempStr[i] = yytext[indexForStr];
indexForStr++;
}
tempStr[strlen(yytext) - 1] = '\0';
yylval.stringVal = strdup(yytext);
tokenString("string", tempStr);
free(tempStr);
indexForStr = 1;
return STRING;
}
\n {
LIST;
printf("%d: %s", linenum++, buf);
buf[0] = '\0';
}
[ \t]* {LIST;}
. {
LIST;
printf("%d:%s\n", linenum+1, buf);
printf("bad character:'%s'\n",yytext);
exit(-1);
}
%%
parser file
%{
#include "symbols.c"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#define Trace(t, line) printf(t, line) // Trace where the error occurs and print the line number
#ifndef STRSIZE
#define STRSIZE 40
#endif
#ifndef PARAMSIZE
#define PARAMSIZE 40
#endif
extern FILE* yyin;
extern FILE* yyout;
extern int linenum;
extern int yylex();
void yyerror(char* msg);
%}
%union{
char* stringVal;
double floatVal;
int intVal;
int boolVal;
list_t* symptr;
}
/* tokens */
%token <symptr> ID
%token <intVal> INTEGER
%token <floatVal> REAL
%token <stringVal> STRING
%token <boolVal> TRUE FALSE
%token INT FLOAT STR BOOL
%token BREAK CHAR CONTINUE DO ELSE
%token ENUM EXTERN FOR
%token FN IF IN LET
%token LOOP MATCH MUT PRINT PRINTLN
%token RETURN SELF STATIC STRUCT
%token USE WHERE WHILE
%token READ PUB
%token LESS GREATER ASSIGN EXCLAMATION ARROW
/* precedence for operators */
%left '||'
%left '&&'
%left EXCLAMATION
%left LESS '<=' '>=' GREATER '==' '!='
%left '+' '-'
%left '*' '/'
%left UMINUS
/* types */
%type <intVal> integer_exp
%type <floatVal> real_exp
%type <stringVal> string_exp
%type <boolVal> bool_exp
%start program /* the initial entry point */
%%
program: functions | global_declaration functions
;
global_declaration: global_declaration constant_declaration
| global_declaration variable_declaration
| global_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
local_declaration: local_declaration constant_declaration
| local_declaration variable_declaration
| local_declaration array_declaration
| constant_declaration
| variable_declaration
| array_declaration
;
block: start local_declaration statements end
| start local_declaration end
| start statements end
| start end
;
start: '{' {
incr_scope();
}
end: '}' {
hide_scope();
}
;
functions: functions function
| function
;
function: FN ID '(' ')' start local_declaration statements end{
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start statements end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start local_declaration end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
}
| FN ID '(' ')' start end {
if ($2->st_type == UNDEF)
{
$2->st_type = FUNCTION_TYPE;
$2->inf_type = UNDEF;
}
else
{
Trace("line %d: Redeclaration of identifier.\n", linenum);
}
;
%%
void yyerror(char* msg)
{
fprintf(stderr, "line %d: %s\n", linenum, msg);
}
int main(int argc, char** argv)
{
/* create the hash table */
create();
/* open the source program file */
if (argc != 2) {
printf ("Usage: sc filename\n");
exit(1);
}
yyin = fopen(argv[1], "r"); /* open input file */
int flag;
flag = yyparse();
/* perform parsing */
if (flag == 1) /* parsing */
yyerror("Parsing error !"); /* syntax error */
fclose(yyin); /* close input file */
/* output symbol table */
printf("\nSymbol table:\n");
yyout = fopen("dump.out", "w");
dump(yyout);
fclose(yyout);
return 0;
}
Clearly, the problem occurs during the dump function when the token type is FUNCTION_TYPE. That's clear from the debugging output (dump is presumably executing when the segfault occurs) and from the change report (the problem occurs when an action sets the st_type field to FUNCTION_TYPE).
Visual inspection of the if clause in dump() corresponding to that condition reveals the following obvious error:
fprintf(of,"%-7s %s","function returns ");
That call to fprintf has a format string with two %s conversions. However, there is only one argument to be inserted.
The real question you should be asking yourself is "How can I easily find stupid typos like this without spending a lot of time or resorting to outside experts?"
As a first approximation, that error is so common and easy to detect that most compilers will warn you about it. So your first step is to make sure you always compile with warnings enabled (-Wall if you are using gcc or clang), and that you read the warnings.
Even without the warning, it would have been straight-forward to find the error with a debugger such as gdb. Just set a breakpoint at dump and single-step until the segfault occurs.
Finally, you are making your life much more complicated when you first build a large complex program with a lot of components and only then start to debug it. In the long run, you will find that it is worth taking the time to test each component individually (your symbol table, for example), using some kind of test harness, and only assembling your more complex program when you are reasonably confident that the individual pieces work. That will avoid the difficulty of identifying where the error occurred (as in this case, where you were evidently distracted by your doubts about the parser generator, leading you to miss the actual problem which has nothing to do with the parser).
By the way, strncpy is a definite red flag, although in this case you seem to have been lucky (or unlucky) enough to not encounter the bug. strncpy is pretty well never what you want, and if it is what you want then the length parameter should be the longest string you can accommodate rather than the length of the input string. (strncpy is intended for use in fixed-length formats, which is why it pads the output to the specified length.)
If you use the length of the input string, then you have two problems: (1) the copy is guaranteed to not be NUL-terminated, leading to Undefined Behaviour; and (2) nothing stops the copy from overrunning the output buffer, in the case that the input string is too long.
Even used correctly, strncpy requires you to manually NUL-terminate the output, which is a nuisance. A better solution is to first check that the string is not too long (length < SIZE) and then use strcpy, which will correctly NUL-terminate. Even better is to make the name field a char* instead of an array, and dynamically allocate a string of the correct length (see strdup, for example), thereby avoiding having to arbitrarily limit the size of identifiers.
I'm trying to build a calculator using lex/yacc where you can create an unlimited amount of variables and use them in calculations by using a linked list.
When you type "?", it should print out the contents of the linked list, it does do this, except afterwards, it gives me a: syntax error and ends my program.
The rest of the calculator works as expected, am I missing something?
Sample Output
-bash-4.1$ ./calc
a = 42
b = 21
c = a / b
?
num-syms: 5
PHI => 1.61803
PI => 3.14159
a => 42
b => 21
c => 2
syntax error
-bash-4.1$
sym.h
#ifndef SYMTBL_H
#define SYMTBL_H
struct sym {
int length;
char * name;
double value;
struct sym *prev;
struct sym *next;
};
struct sym * sym_p;
struct sym * sym_lookup(char *);
void sym_inventory();
#endif /* SYMTBL_H */
calc.l
%{
/*#include <math.h> */
#include "y.tab.h"
#include "sym.h"
%}
%%
"?" { sym_inventory(); }
([0-9]+|([0-9]*\.[0-9]+)([eE][+-]?[0-9]+)?) {
yylval.dval = atof(yytext);
return NUMBER;
}
[ \t] ; /* ignore whitespace */
[A-Za-z][A-Za-z0-9]* {
/* return symbol pointer */
yylval.symptr = sym_lookup(yytext);
return NAME;
}
"$" { return 0; }
\n |
. { return yytext[0]; };
%%
int yywrap() { return 1; }
calc.y
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "sym.h"
%}
%union {
double dval;
struct sym * symptr;
}
%token <symptr> NAME
%token <dval> NUMBER
%left '-' '+'
%left '*' '/'
%nonassoc UMINUS
%type <dval> expression
%%
statement_list
: statement '\n'
| statement_list statement '\n'
;
statement
: NAME '=' expression {
char *name = $1->name;
if (strcmp(name, "PI") != 0 && strcmp(name, "PHI") != 0) {
$1->value = $3;
} else {
yyerror("assign to const");
}
}
| expression { printf("= %g\n", $1); }
;
expression
: expression '+' expression { $$ = $1 + $3; }
| expression '-' expression { $$ = $1 - $3; }
| expression '*' expression { $$ = $1 * $3; }
| expression '/' expression {
if ($3 == 0) {
yyerror("divide by zero");
} else {
$$ = $1 / $3;
}
}
| '-' expression %prec UMINUS { $$ = -$2; }
| '(' expression ')' { $$ = $2; }
| NUMBER
| NAME { $$ = $1->value; }
;
%%
struct sym * sym_lookup(char * s)
{
if (sym_p == NULL) {
sym_p = (struct sym *)malloc(sizeof(struct sym));
}
struct sym *original = sym_p;
struct sym * sp = sym_p;
if (sp->name == NULL) {
struct sym *n = (struct sym *)malloc(sizeof(struct sym));
n->name = "PI";
n->value = 3.14159;
n->next = NULL;
sp->name = "PHI";
sp->value = 1.61803;
sp->next = n;
sp->length = 2;
}
while (1) {
if (sp->name == NULL) {
sp->name = strdup(s);
sp->next = NULL;
return sp;
} else if (strcmp(sp->name, s) == 0) {
return sp;
} else if (sp->next != NULL) {
sp = sp->next;
} else {
struct sym *n = (struct sym *)malloc(sizeof(struct sym));
n->name = strdup(s);
sp = original;
struct sym *old = NULL;
while (1) {
if (strcmp(sp->name, s) > 0) {
// new variable name comes before in ascii table
if (old == NULL) {
// new node insert at beginning of sym_p
n->next = original;
n->length = original->length;
sym_p = n;
original = sym_p;
sp = original;
} else {
// insert in middle and update links
old->next = n;
n->next = sp;
sp = original;
}
break;
} else {
if (sp->next != NULL) {
old = sp;
sp = sp->next;
} else {
sp->next = n;
break;
}
}
}
sp = original;
sp->length++;
return n;
}
}
}
void sym_inventory()
{
struct sym * sp = sym_p;
printf("num-syms: %d\n", sp->length);
int i;
int length = sp->length;
for (i = 0; i < length; i++) {
printf("\t%s => %g\n", sp->name, sp->value);
sp = sp->next;
}
}
The problem
Your grammar doesn't recognize an empty line as a valid input. When you type ? followed by a newline, the newline is returned, and that isn't syntactically valid, hence the 'syntax error' report.
You could demonstrate by typing ?a + b as an input, for example.
Note that your lexical analyzer processes the ? without letting the parser know that it happened; the parser never sees the ?.
Prodding to get enough information to make the question answerable
This adaptation of your Lex code shows that the lexical analyzer recognizes ? as an input. Your question doesn't show how the code is used, so there's not enough information for us to know what you're doing wrong. You need to provide an MCVE (Minimal, Complete, Verifiable Example) so that you can get the relevant help.
%option noinput
%option nounput
%%
"?" { printf("Got a ?\n"); }
([0-9]+|([0-9]*\.[0-9]+)([eE][+-]?[0-9]+)?) {
printf("Number: %s\n", yytext);
}
[ \t] { printf("Space: [%s]\n", yytext); }
[A-Za-z][A-Za-z0-9]* {
printf("Name: %s\n", yytext);
}
"$" { printf("Dollar\n"); return 0; }
\n |
. { printf("Other: %c\n", yytext[0]); return yytext[0]; };
%%
int yywrap(void) { return 1; }
int main(void)
{
while (yylex() != 0)
;
return 0;
}
Sample run (shell prompt is JL: rather than $ because one of the inputs is $):
JL: ./xy73
a b 23
Name: a
Space: [ ]
Name: b
Space: [ ]
Number: 23
Other:
?
Got a ?
Other:
=#%
Other: =
Other: #
Other: %
Other:
$
Dollar
JL:
The ? is followed by a newline which is returned as a character token, but your grammar only accepts a newline after a statement, which always starts with a NAME.
I've write a parser for evaluating a logical expression. I know flex and bison use global variables (like yylval). I want a pure parser and a reentrant scanner for thread programming. My '.y' file is here:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
//int yylex (YYSTYPE* lvalp);
int yylex(void);
bool parseExpression(const std::string& inp);
%}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror(char *s) {
fprintf(stderr, "%s\n", s);
}
void main(void) {
std::string inp = "0|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
My '.y' file is here:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror(char *);
%}
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
yylval = false;
//*lvalp = false;
}
else
{
yylval = true;
//*lvalp = true;
}
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
bool parseExpression(const std::string& inp)
{
yy_delete_buffer(YY_CURRENT_BUFFER);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str());
bool nasi = yyparse();
return nasi;
}
I've added %pure_parser to both files, changed yylex declaration to int yylex (YYSTYPE* lvalp); and replaced yylval to *lvalp, but I saw an error: 'lvalp' is undeclared identifier.. There are many examples about 'reentrant' and 'pure', but I can't find the best guideline.
Could someone guide me?
Thanks in advance.
Fortunately, I did it. Here is my code. I think it can be a good guideline for who wants write a pure parser.ل
My reentrant scanner:
/* Lexer */
%{
#include "parser.tab.h"
#include <stdlib.h>
#include <string>
#define YYSTYPE bool
void yyerror (yyscan_t yyscanner, char const *msg);
%}
%option reentrant bison-bridge
%%
[0-1] {
if (strcmp(yytext, "0")==0)
{
*yylval = false;
}
else
{
*yylval = true;
}
//yylval = atoi(yytext);
return INTEGER;
}
[&|!()\n] { return *yytext; }
[ \t] ; /* skip whitespace */
. yyerror (yyscanner, "Unknown character");
%%
int yywrap(yyscan_t yyscanner)
{
return 1;
}
bool parseExpression(const std::string& inp)
{
yyscan_t myscanner;
yylex_init(&myscanner);
struct yyguts_t * yyg = (struct yyguts_t*)myscanner;
yy_delete_buffer(YY_CURRENT_BUFFER,myscanner);
/*Copy string into new buffer and Switch buffers*/
yy_scan_string(inp.c_str(), myscanner);
bool nasi = yyparse(myscanner);
yylex_destroy(myscanner);
return nasi;
}
My pure parser:
%{
#include <stdio.h>
#include <string>
#define YYSTYPE bool
typedef void* yyscan_t;
void yyerror (yyscan_t yyscanner, char const *msg);
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
bool parseExpression(const std::string& inp);
%}
%define api.pure full
%lex-param {yyscan_t scanner}
%parse-param {yyscan_t scanner}
%token INTEGER
%left '&' '|'
%%
program:
program statement '\n'
| /* NULL */
;
statement:
expression { printf("%d\n", $1); return $1; }
;
expression:
INTEGER
| expression '|' expression { $$ = $1 | $3; }
| expression '&' expression { $$ = $1 & $3; }
| '(' expression ')' { $$ = $2; }
| '!' expression { $$ = !$2; }
;
%%
void yyerror (yyscan_t yyscanner, char const *msg){
fprintf(stderr, "%s\n", msg);
}
void main(void) {
std::string inp = "1|0\n";
bool nasi = parseExpression(inp);
printf("%s%d\n", "nasi ", nasi);
printf("Press ENTER to close. ");
getchar();
}
Notice that I've cheat and defined yyg myself as
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
I don't find another way to get the YY_CURRENT_BUFFER. So, If someone knows the best way to get the YY_CURRENT_BUFFER, tell me,plz.
Here is a complete Flex/Bison C++ example. Everything is reentrant, no use of global variables. Both parser/lexer are encapsulated in a class placed in a separate namespace. You can instantiate as many "interpreters" in as many threads as you want.
https://github.com/ezaquarii/bison-flex-cpp-example
Disclaimer: it's not tested on Windows, but the code should be portable with minor tweaks.
I'm writing a simple calculator based on .gertrude esolang. What I'm trying to do is parse a text file that contains ratios (in the form of n/m) with flex, than check if the ratio is an index for an operation (+ - / *) or a number and than send the right token to Bison. I get no error when the code are compiled, but when the program is running returns a -segmentation fault core dump - for every kind of input (like 1/2 14/10 1/8 that should be 2 + 8).
Here gertrude.l
%{
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "gertrude.tab.h"
void yyerror(char *);
int FrazioneToDecimale(char *str1){
int num, den;
unsigned tot;
char *token;
char *deli;
const char del = '/';
*deli = del;
token = strtok (str1, deli);
num = atoi(token);
token = strtok (NULL, deli);
den = atoi(token);
tot = 1 / (num/den);
return tot;
}
%}
%%
/* ratio */
"14/10" {
yylval.sval = '+';
return SOMMA;
}
"11/7" {
yylval.sval = '-';
return SOTTRAZIONE;
}
"6/16" {
yylval.sval = '*';
return MOLTIPLICAZIONE;
}
"5/8" {
yylval.sval = '/';
return DIVISIONE;
}
[0-9]+"/"[0-9]+ {
//yylval = *yytext ;
yylval.ival = FrazioneToDecimale(yytext);
return NUMERO;
}
[ \t] ;
[ \n] { return EOL; };
%%
int yywrap(void) {
return 0;
}
Here gertrude.y
%{
#include <stdio.h>
#include <string.h>
%}
%union {
int ival;
char sval;
}
%type <ival> exp fattore termine
%token <ival> NUMERO
%token <sval> SOMMA SOTTRAZIONE MOLTIPLICAZIONE DIVISIONE
%token EOL
%%
istruzione:
| istruzione exp EOL { printf("= %d\n", $2); }
;
exp: fattore
| exp SOMMA fattore { $$ = $1 + $3; }
| exp SOTTRAZIONE fattore { $$ = $1 - $3; }
;
fattore: termine
| fattore MOLTIPLICAZIONE termine { $$ = $1 * $3; }
| fattore DIVISIONE termine { $$ = $1 / $3; }
;
termine: NUMERO { $$ = $1; }
;
%%
int main(void) {
yyparse();
}
yyerror(char *s) {
fprintf(stderr, "error: %s\n\n", s);
}
Thanks in advance for any kind of advice!
Your code has a problem with pointers and strings. This is a C problem, not a Bison or Flex problem.
Look at these lines from gertrude.l:
char *deli;
const char del = '/';
*deli = del;
Your pointer variable deli is uninitialized and contains garbage, so it might point anywhere. Then you follow that pointer to where it points (anywhere!) and you put a character there. This causes the program to crash. Plus the string (wherever it is) isn't NUL-terminated.
Simply replace those three lines with this line:
char *deli = "/";