lex and yacc analyzer - c

Now here is my lex code
%{
#include <stdio.h>
#include "y.tab.h"
extern int yylval;
int yywrap();
%}
%%
[a-zA-Z] {yylval = *yytext; return ALPHABET;}
[0-9]+ {yylval = atoi(yytext); return NUMBER;}
[0-9]+"."[0-9]* {yylval = atof(yytext); return NUMBER;}
"==" return EQ;
"<=" return LE;
">=" return GE;
"!=" return NE;
[\t] ;
\n return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}
and here is my yacc code
%{
#include<stdio.h>
extern int yylex();
extern int yyparse();
extern FILE* yyin;
int flag = 0;
%}
%token NUMBER
%token ALPHABET
%left '+''-'
%left '*''/''%'
%left '&''|''^''~'
%right EQ LE GE NE'<''>'
%left '('')'
%left UMINUS
%left UPLUS
%start check
%%
check : E { }
E:E '+' E {$$ = $1 + $3;}
|E '-' E {$$ = $1 - $3;}
|E '&' E {$$ = $1 & $3;}
|E '|' E {$$ = $1 | $3;}
|E '^' E {$$ = $1 ^ $3;}
|'~' E {$$ = ~$2;}
|E EQ E {$$ = (EQ, $1, $3);}
|E LE E {$$ = (LE, $1, $3);}
|E GE E {$$ = (GE, $1, $3);}
|E NE E {$$ = (NE, $1, $3);}
|E '<' E {$$ = ('<', $1, $3);}
|E '>' E {$$ = ('>', $1, $3);}
|'(' E ')' {$$ = $2;}
|'-' E %prec UMINUS
{$$ = - $2;}
|'+' E %prec UPLUS
{$$ = + $2;}
|NUMBER {$$ = $1;}
|ALPHABET {$$ = $1;}
;
%%
int main(int argc, char** argv)
{
char filename[30];
char line[300];
printf("\nEnter filename\n");
scanf("%s",filename);
yyin = fopen(filename, "r");
if(NULL == yyin)
{
fprintf(stderr,"Can't read file %s\n",filename);
return 1;
}
else
{
while(fgets(line, sizeof line, yyin) != NULL)
{
printf("%s\n", line);
}
yyparse();
fclose(yyin);
printf("\nValue of yyparse : %d\n",yyparse());
}
if(flag == 0)
printf("\nBoolean Arithmetic Expression is valid\n");
return 0;
}
void yyerror()
{
printf("\nBoolean Arithmetic expression is invalid\n\n");
flag = 1;
}
This is my main part for reading text file and do some operations, so anyone can tell me this how to read multiple line in text file using Yacc. Now I put my fully Yacc code and I try to check Boolean expression is correct or not my text file expressions are : -
a-b
a+b&c
(P!=F+E-O+F-(U>Y+I<N))
(((a+B)-7+4-(c-d))+((P^q)-(L|z))+(m&n)+(O-g)
((A-2)&(B+2)|(C-4)^(D+4)+(~E))==F+(G!=)-(i<j)-(K>M)
((((a+b)-(c+d))-((E-F)+(G-H)))+((a&B)+(c|d))-((e^f)+(~g)+(i==2)-(j!=2)+(k<=8)-(l>=17.98)+(M<N)-(O>p)-((-2+4)+(6-(-5)))))
So my code check only first expression. So my problem is that how to check all expressions line by line.
Now please check where is the problem for reading text line by line and give message expression is valid or not please help.
Some expressions are valid and some are invalid so please check and tell me the problem and how to correct it.

You grammar only handles a single ArithmeticExpression, and once that is done the parser returns.
One way to solve your problem is to modify the parser grammar just a little, so it handles multiple "lines" (or rather multiple expressions in your case) itself:
ArithmeticExpression_list
: ArithmeticExpression
| ArithmeticExpression_list ArithmeticExpression
;
Then you simply use the return value of the yyparse() function to see if parsing was successful or not. If yyparse() return 0 then all expressions were syntactically okay.
If you want to print for each and every expression, just add a semantic action for the ArithmeticExpression, if there's a syntax error it will not be invoked.

The structure of main() is wrong. It reads the whole of the input file using fgets(), and then seems to expect yyparse() to read some more information from it.
What you probably need to do is:
while (yyparse() == 0)
printf("OK\n");
Well, something along those lines. You might need to analyze flag, and/or set flag to 0 (or 1) after each iteration. If you want to read lines and have the grammar read the string, you have more work to do — setting up appropriate input functions, etc.
To get the code to compile cleanly, I used the following code. I added #define YY_NO_INPUT and full prototypes and various other changes — and cleaned up the triple comma expressions in the grammar for the comparison operations. They probably aren't what you'll use in the long term, but they satisfy the stringent compiler options I use.
testlex.l
%{
#include <stdio.h>
#include "y.tab.h"
#define YY_NO_INPUT
extern int yylval;
int yywrap(void);
extern void use_yyunput(char c);
%}
%%
[a-zA-Z] {yylval = *yytext; return ALPHABET;}
[0-9]+ {yylval = atoi(yytext); return NUMBER;}
[0-9]+"."[0-9]* {yylval = atof(yytext); return NUMBER;}
"==" return EQ;
"<=" return LE;
">=" return GE;
"!=" return NE;
[\t] ;
\n return 0;
. return yytext[0];
%%
int yywrap(void)
{
return 1;
}
void use_yyunput(char c)
{
unput(c);
}
testyacc.y
%{
#include <stdio.h>
#include <assert.h>
extern int yylex(void);
extern int yyparse(void);
extern void yyerror(char *str);
extern FILE* yyin;
int flag = 0;
%}
%token NUMBER
%token ALPHABET
%left '+''-'
%left '*''/''%'
%left '&''|''^''~'
%right EQ LE GE NE'<''>'
%left '('')'
%left UMINUS
%left UPLUS
%start check
%%
check : E { }
E:E '+' E {$$ = $1 + $3;}
|E '-' E {$$ = $1 - $3;}
|E '&' E {$$ = $1 & $3;}
|E '|' E {$$ = $1 | $3;}
|E '^' E {$$ = $1 ^ $3;}
|'~' E {$$ = ~$2;}
|E EQ E {$$ = ($1 == $3);}
|E LE E {$$ = ($1 <= $3);}
|E GE E {$$ = ($1 >= $3);}
|E NE E {$$ = ($1 != $3);}
|E '<' E {$$ = ($1 < $3);}
|E '>' E {$$ = ($1 > $3);}
|'(' E ')' {$$ = $2;}
|'-' E %prec UMINUS
{$$ = - $2;}
|'+' E %prec UPLUS
{$$ = + $2;}
|NUMBER {$$ = $1;}
|ALPHABET {$$ = $1;}
;
%%
int main(void)
{
char filename[30];
printf("\nEnter filename\n");
scanf("%s",filename);
yyin = fopen(filename, "r");
if(NULL == yyin)
{
fprintf(stderr,"Can't read file %s\n",filename);
return 1;
}
else
{
while (yyparse() == 0)
{
printf("OK\n");
}
fclose(yyin);
}
if(flag == 0)
printf("\nBoolean Arithmetic Expression is valid\n");
return 0;
}
void yyerror(char *str)
{
assert(str != 0);
printf("\nBoolean Arithmetic expression is invalid\n\n");
flag = 1;
}
Compilation and run
The file data contained the data from the question. rmk is a variation on the theme of make.
$ rmk testyacc.o testlex.o
yacc testyacc.y
gcc -O3 -g -I/Users/jleffler/inc -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition -Werror -c y.tab.c
mv y.tab.o testyacc.o
rm -f y.tab.c
lex testlex.l
gcc -O3 -g -I/Users/jleffler/inc -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition -Werror -c lex.yy.c
mv lex.yy.o testlex.o
rm -f lex.yy.c
$ gcc -o testit testyacc.o testlex.o
$ ./testit
Enter filename
data
OK
OK
OK
Boolean Arithmetic expression is invalid
$
Your stack uses integers, I believe. The use of atof() isn't going to help very much. That mainly means you've still got a lot of work to do.
Testing on Mac OS X 10.11.2 with GCC 5.3.0 (with flex 2.5.35 Apple(flex-31) and
bison (GNU Bison) 2.3 masquerading as lex and yacc).

Related

Parser gives a unknown type name error for entry_t

I'm editing my first parser and I'm very new to compiler design. I'm using a hashtable to store tokens. I have created a struct called struct_t in my symboletable.h file.
When I try to create a new entry_t in .y file under %union to be used in lex file. But it gives an error in compile time as:
parser.y:17:2: error: unknown type name 'entry_t' entry_t** entry;
parser.y file :
%{
#include <stdlib.h>
#include <stdio.h>
#include "symboltable.h"
entry_t** symbol_table;
entry_t** constant_table;
double Evaluate (double lhs_value,int assign_type,double rhs_value);
int current_dtype;
int yyerror(char *msg);
%}
%union
{
double dval;
entry_t** entry;
int ival;
}
%token <entry> IDENTIFIER
/* Constants */
%token <dval> DEC_CONSTANT HEX_CONSTANT
%token STRING
/* Logical and Relational operators */
%token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ
/* Short hand assignment operators */
%token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN
%token LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
%token INCREMENT DECREMENT
/* Data types */
%token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST
/* Keywords */
%token IF FOR WHILE CONTINUE BREAK RETURN
%type <dval> expression
%type <dval> sub_expr
%type <dval> constant
%type <dval> unary_expr
%type <dval> arithmetic_expr
%type <dval> assignment_expr
%type <entry> lhs
%type <ival> assign_op
%start starter
%left ','
%right '='
%left LOGICAL_OR
%left LOGICAL_AND
%left EQ NOT_EQ
%left '<' '>' LS_EQ GR_EQ
%left '+' '-'
%left '*' '/' '%'
%right '!'
%nonassoc UMINUS
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE
%%
/* Program is made up of multiple builder blocks. */
starter: starter builder
|builder;
/* Each builder block is either a function or a declaration */
builder: function|
declaration;
/* This is how a function looks like */
function: type IDENTIFIER '(' argument_list ')' compound_stmt;
/* Now we will define a grammar for how types can be specified */
type :data_type pointer
|data_type;
pointer: '*' pointer
|'*'
;
data_type :sign_specifier type_specifier
|type_specifier
;
sign_specifier :SIGNED
|UNSIGNED
;
type_specifier :INT {current_dtype = INT;}
|SHORT INT {current_dtype = SHORT;}
|SHORT {current_dtype = SHORT;}
|LONG {current_dtype = LONG;}
|LONG INT {current_dtype = LONG;}
|LONG_LONG {current_dtype = LONG_LONG;}
|LONG_LONG INT {current_dtype = LONG_LONG;}
;
/* grammar rules for argument list */
/* argument list can be empty */
argument_list :arguments
|
;
/* arguments are comma separated TYPE ID pairs */
arguments :arguments ',' arg
|arg
;
/* Each arg is a TYPE ID pair */
arg :type IDENTIFIER
;
/* Generic statement. Can be compound or a single statement */
stmt:compound_stmt
|single_stmt
;
/* The function body is covered in braces and has multiple statements. */
compound_stmt :'{' statements '}'
;
statements:statements stmt
|
;
/* Grammar for what constitutes every individual statement */
single_stmt :if_block
|for_block
|while_block
|declaration
|function_call ';'
|RETURN ';'
|CONTINUE ';'
|BREAK ';'
|RETURN sub_expr ';'
;
for_block:FOR '(' expression_stmt expression_stmt ')' stmt
|FOR '(' expression_stmt expression_stmt expression ')' stmt
;
if_block:IF '(' expression ')' stmt %prec LOWER_THAN_ELSE
|IF '(' expression ')' stmt ELSE stmt
;
while_block: WHILE '(' expression ')' stmt
;
declaration:type declaration_list ';'
|declaration_list ';'
| unary_expr ';'
declaration_list: declaration_list ',' sub_decl
|sub_decl;
sub_decl: assignment_expr
|IDENTIFIER {$1 -> data_type = current_dtype;}
|array_index
/*|struct_block ';'*/
;
/* This is because we can have empty expession statements inside for loops */
expression_stmt:expression ';'
|';'
;
expression:
expression ',' sub_expr {$$ = $1,$3;}
|sub_expr {$$ = $1;}
;
sub_expr:
sub_expr '>' sub_expr {$$ = ($1 > $3);}
|sub_expr '<' sub_expr {$$ = ($1 < $3);}
|sub_expr EQ sub_expr {$$ = ($1 == $3);}
|sub_expr NOT_EQ sub_expr {$$ = ($1 != $3);}
|sub_expr LS_EQ sub_expr {$$ = ($1 <= $3);}
|sub_expr GR_EQ sub_expr {$$ = ($1 >= $3);}
|sub_expr LOGICAL_AND sub_expr {$$ = ($1 && $3);}
|sub_expr LOGICAL_OR sub_expr {$$ = ($1 || $3);}
|'!' sub_expr {$$ = (!$2);}
|arithmetic_expr {$$ = $1;}
|assignment_expr {$$ = $1;}
|unary_expr {$$ = $1;}
/* |IDENTIFIER {$$ = $1->value;}
|constant {$$ = $1;} */
//|array_index
;
assignment_expr :lhs assign_op arithmetic_expr {$$ = $1->value = Evaluate($1->value,$2,$3);}
|lhs assign_op array_index {$$ = 0;}
|lhs assign_op function_call {$$ = 0;}
|lhs assign_op unary_expr {$$ = $1->value = Evaluate($1->value,$2,$3);}
|unary_expr assign_op unary_expr {$$ = 0;}
;
unary_expr: lhs INCREMENT {$$ = $1->value = ($1->value)++;}
|lhs DECREMENT {$$ = $1->value = ($1->value)--;}
|DECREMENT lhs {$$ = $2->value = --($2->value);}
|INCREMENT lhs {$$ = $2->value = ++($2->value);}
lhs:IDENTIFIER {$$ = $1; if(! $1->data_type) $1->data_type = current_dtype;}
//|array_index
;
assign_op:'=' {$$ = '=';}
|ADD_ASSIGN {$$ = ADD_ASSIGN;}
|SUB_ASSIGN {$$ = SUB_ASSIGN;}
|MUL_ASSIGN {$$ = MUL_ASSIGN;}
|DIV_ASSIGN {$$ = DIV_ASSIGN;}
|MOD_ASSIGN {$$ = MOD_ASSIGN;}
;
arithmetic_expr: arithmetic_expr '+' arithmetic_expr {$$ = $1 + $3;}
|arithmetic_expr '-' arithmetic_expr {$$ = $1 - $3;}
|arithmetic_expr '*' arithmetic_expr {$$ = $1 * $3;}
|arithmetic_expr '/' arithmetic_expr {$$ = ($3 == 0) ? yyerror("Divide by 0!") : ($1 / $3);}
|arithmetic_expr '%' arithmetic_expr {$$ = (int)$1 % (int)$3;}
|'(' arithmetic_expr ')' {$$ = $2;}
|'-' arithmetic_expr %prec UMINUS {$$ = -$2;}
|IDENTIFIER {$$ = $1 -> value;}
|constant {$$ = $1;}
;
constant: DEC_CONSTANT {$$ = $1;}
|HEX_CONSTANT {$$ = $1;}
;
array_index: IDENTIFIER '[' sub_expr ']'
function_call: IDENTIFIER '(' parameter_list ')'
|IDENTIFIER '(' ')'
;
parameter_list:
parameter_list ',' parameter
|parameter
;
parameter: sub_expr
|STRING
;
%%
#include "lex.yy.c"
#include <ctype.h>
double Evaluate (double lhs_value,int assign_type,double rhs_value)
{
switch(assign_type)
{
case '=': return rhs_value;
case ADD_ASSIGN: return (lhs_value + rhs_value);
case SUB_ASSIGN: return (lhs_value - rhs_value);
case MUL_ASSIGN: return (lhs_value * rhs_value);
case DIV_ASSIGN: return (lhs_value / rhs_value);
case MOD_ASSIGN: return ((int)lhs_value % (int)rhs_value);
}
}
int main(int argc, char *argv[])
{
symbol_table = create_table();
constant_table = create_table();
yyin = fopen(argv[1], "r");
if(!yyparse())
{
printf("\nParsing complete\n");
}
else
{
printf("\nParsing failed\n");
}
printf("\n\tSymbol table");
display(symbol_table);
fclose(yyin);
return 0;
}
int yyerror(char *msg)
{
printf("Line no: %d Error message: %s Token: %s\n", yylineno, msg, yytext);
}
lexl.l file
%{
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "y.tab.h"
int cmnt_strt = 0;
%}
%option yylineno
letter [a-zA-Z]
digit [0-9]
ws [ \t\r\f\v]+
identifier (_|{letter})({letter}|{digit}|_){0,31}
hex [0-9a-f]
/* Exclusive states */
%x CMNT
/*%x PREPROC*/
%%
/* Keywords*/
"int" {return INT;}
"long" {return LONG;}
"long long" {return LONG_LONG;}
"short" {return SHORT;}
"signed" {return SIGNED;}
"unsigned" {return UNSIGNED;}
"for" {return FOR;}
"while" {return WHILE;}
"break" {return BREAK;}
"continue" {return CONTINUE;}
"if" {return IF;}
"else" {return ELSE;}
"return" {return RETURN;}
{identifier} {yylval.entry = insert(symbol_table, yytext, INT_MAX); return IDENTIFIER;}
{ws} ;
[+\-]?[0][x|X]{hex}+[lLuU]? { yylval.dval = (int)strtol(yytext, NULL, 16); return HEX_CONSTANT;}
[+\-]?{digit}+[lLuU]? {yylval.dval = atoi(yytext); return DEC_CONSTANT;}
"/*" {cmnt_strt = yylineno; BEGIN CMNT;}
<CMNT>.|{ws} ;
<CMNT>\n {yylineno++;}
<CMNT>"*/" {BEGIN INITIAL;}
<CMNT>"/*" {printf("Line %3d: Nested comments are not valid!\n",yylineno);}
<CMNT><<EOF>> {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();}
/*^"#include" {BEGIN PREPROC;}*/
/*<PREPROC>"<"[^<>\n]+">" {return HEADER_FILE;}*/
/*<PREPROC>{ws} ;*/
/*<PREPROC>\"[^"\n]+\" {return HEADER_FILE;}*/
/*<PREPROC>\n {yylineno++; BEGIN INITIAL;}*/
/*<PREPROC>. {printf("Line %3d: Illegal header file format \n",yylineno);}*/
"//".* ;
\"[^\"\n]*\" {
if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
{
yyless(yyleng-1); /* push the quote back if it was escaped */
yymore();
}
else{
insert( constant_table, yytext, INT_MAX);
return STRING;
}
}
\"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
{digit}+({letter}|_)+ {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
\n {yylineno++;}
"--" {return DECREMENT;}
"++" {return INCREMENT;}
/* "->" {return PTR_SELECT;} */
"+=" {return ADD_ASSIGN;}
"-=" {return SUB_ASSIGN;}
"*=" {return MUL_ASSIGN;}
"/=" {return DIV_ASSIGN;}
"%=" {return MOD_ASSIGN;}
"&&" {return LOGICAL_AND;}
"||" {return LOGICAL_OR;}
"<=" {return LS_EQ;}
">=" {return GR_EQ;}
"==" {return EQ;}
"!=" {return NOT_EQ;}
. {return yytext[0];}
%%
/*
int main()
{
yyin=fopen("test2.c","r");
constant_table=create_table();
symbol_table = create_table();
yylex();
printf("\n\tSymbol table");
display(symbol_table);
printf("\n\tConstants Table");
display(constant_table);
printf("NOTE: Please refer tokens.h for token meanings\n");
} */
symboltable.h file
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
#define HASH_TABLE_SIZE 100
/* struct to hold each entry */
struct entry_s
{
char* lexeme;
double value;
int data_type;
struct entry_s* successor;
};
typedef struct entry_s entry_t;
/* Create a new hash_table. */
entry_t** create_table()
{
entry_t** hash_table_ptr = NULL; // declare a pointer
/* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */
if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL )
return NULL;
int i;
// Intitialise all entries as NULL
for( i = 0; i < HASH_TABLE_SIZE; i++ )
{
hash_table_ptr[i] = NULL;
}
return hash_table_ptr;
}
/* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */
uint32_t hash( char *lexeme )
{
size_t i;
uint32_t hash;
/* Apply jenkin's hash function
* https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time
*/
for ( hash = i = 0; i < strlen(lexeme); ++i ) {
hash += lexeme[i];
hash += ( hash << 10 );
hash ^= ( hash >> 6 );
}
hash += ( hash << 3 );
hash ^= ( hash >> 11 );
hash += ( hash << 15 );
return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE)
}
/* Create an entry for a lexeme, token pair. This will be called from the insert function */
entry_t *create_entry( char *lexeme, int value )
{
entry_t *newentry;
/* Allocate space for newentry */
if( ( newentry = malloc( sizeof( entry_t ) ) ) == NULL ) {
return NULL;
}
/* Copy lexeme to newentry location using strdup (string-duplicate). Return NULL if it fails */
if( ( newentry->lexeme = strdup( lexeme ) ) == NULL ) {
return NULL;
}
newentry->value = value;
newentry->successor = NULL;
return newentry;
}
/* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */
entry_t* search( entry_t** hash_table_ptr, char* lexeme )
{
uint32_t idx = 0;
entry_t* myentry;
// get the index of this lexeme as per the hash function
idx = hash( lexeme );
/* Traverse the linked list at this idx and see if lexeme exists */
myentry = hash_table_ptr[idx];
while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 )
{
myentry = myentry->successor;
}
if(myentry == NULL) // lexeme is not found
return NULL;
else // lexeme found
return myentry;
}
/* Insert an entry into a hash table. */
entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value )
{
entry_t* finder = search( hash_table_ptr, lexeme );
if( finder != NULL) // If lexeme already exists, don't insert, return
return finder ;
uint32_t idx;
entry_t* newentry = NULL;
entry_t* head = NULL;
idx = hash( lexeme ); // Get the index for this lexeme based on the hash function
newentry = create_entry( lexeme, value ); // Create an entry using the <lexeme, token> pair
if(newentry == NULL) // In case there was some error while executing create_entry()
{
printf("Insert failed. New entry could not be created.");
exit(1);
}
head = hash_table_ptr[idx]; // get the head entry at this index
if(head == NULL) // This is the first lexeme that matches this hash index
{
hash_table_ptr[idx] = newentry;
}
else // if not, add this entry to the head
{
newentry->successor = hash_table_ptr[idx];
hash_table_ptr[idx] = newentry;
}
return hash_table_ptr[idx];
}
// Traverse the hash table and print all the entries
void display(entry_t** hash_table_ptr)
{
int i;
entry_t* traverser;
printf("\n====================================================\n");
printf(" %-20s %-20s %-20s\n","lexeme","value","data-type");
printf("====================================================\n");
for( i=0; i < HASH_TABLE_SIZE; i++)
{
traverser = hash_table_ptr[i];
while( traverser != NULL)
{
printf(" %-20s %-20d %-20d \n", traverser->lexeme, (int)traverser->value, traverser->data_type);
traverser = traverser->successor;
}
}
printf("====================================================\n");
}
I cant figure out why unknown file type error is given. If you can please point me in the right direction.
Assuming you are the same Shehan who yesterday asked this question with remarkably similar code, what I said then still applies:
If this is the first time you've attempted to write a C application with more than one source file, you should probably take a few minutes to review how linking multiple files works in C. That will save you a lot of frustration later.
This is precisely the sort of frustration I was referring to.
The error you are receiving comes from attempting to compile lex.yy.c. That file, which is generated from lexl.l, does not #include "symboltable.h", but it does #include "y.tab.h". In y.tab.h, entry_t is used in the declaration of the union semantic type, but since symboltable.h has not been included, entry_t has not been defined.
But just adding #include "symboltable.h" won't work, so please don't do it.
symboltable.h is not a correct header file, as has been mentioned in a comment by #JonathanLeffler. Header files must not contain function definitions, only declarations. Furthermore, symboltable.h does not have inclusion guards, so nothing stops it from being included twice, resulting in duplicate definition errors.
But the way your files are set up, you shouldn't be compiling lex.yy.c in the first place. Your parser.y file includes the line
#include "lex.yy.c"
which, as might be expected, includes the entire text of lex.yy.c in the source being compiled when you compile y.tab.c. I don't believe you thought this trick up yourself; I think you copied it from some tutorial written by someone who was too lazy to attempt to explain how to set up a project with multiple sources. Although it is possible to make things work for simple parsers, it's not going to serve you well in the future, and you should not start your learning about parsers by learning bad habits.
So I say once again: please start by learning how to create a C project with multiple source and header files. Otherwise, your attempt to learn how to use bison and flex will be much harder and more frustrating than necessary.

I got an error in function `yylex': lex.yy.c:(.text+0x2ac): undefined reference

I am new with lex and yacc, and I am following the "lex & yacc 1992" book.
I am working in an example in chapter 3, and I have an error in the compiling process, but I couldn't find a solution;
here is the code:
the lex file.l :
%{
#include "y.tab.h"
#include "symboletable.h"
#include <math.h>
extern int yylavl;
%}
%%
([0-9]+|([0-9]*\.[0-9]+)([eE][-+]?[0-9]+)?) {
yylval.dval = atof(yytext);
return NUMBER;
}
[ \t] ; /* ignore whitespace */
[A-Za-z][A-Za-z0-9]* { /* return symbol pointer */
yylval.symp = symlook(yytext);
return NAME;
}
"$" { return 0; }
\n |
. return yytext[0];
%%
and here the yacc file.y
%{
#include "symboletable.h"
#include <string.h>
#include <stdio.h> /* C declarations used in actions */
int yylex();
void yyerror(const char *s);
%}
%union {
double dval;
struct symtab *symp;
}
%token <symp> NAME
%token <dval> NUMBER
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <dval> expression
%%
statement_list : statement '\n'
| statement_list statement '\n'
;
statement : expression { printf("= %g\n", $1); }
| NAME '=' expression {$1->value = $3; }
;
expression : NAME {$$ = $1->value; }
| expression '+' expression {$$ = $1 + $3; }
| expression '-' expression {$$ = $1 - $3; }
| expression '*' expression {$$ = $1 * $3; }
| expression '/' expression
{ if ($3 ==0.0)
yyerror("divide by zero");
else
$$ = $1 / $3;
}
| '-' expression %prec UMINUS {$$ = -$2; }
| '(' expression ')' {$$ = $2; }
| NUMBER
;
%%
according to the example in the book, I need to write a symbol table routines, to get the string and allocate dynamic space for the string, here the file.h
the symboletable.h
#define NSYMS 20 /* maximum number of symbols */
struct symtab {
char *name;
double value;
} symtab[NSYMS];
struct symtab *symlook();
and the symboletable.pgm:
/* look up a symbol table entry, add if not present */
struct symtab *
symlook(s)
char *s;
{
char *p;
struct symtab *sp;
for (sp = symtab; sp < &symtab[NSYMS]; sp++){
/* is it already here ? */
if (sp->name && !strcmp(sp->name, s))
return sp;
/* is it free */
if (!sp->name){
sp->name = strdup(s);
return sp;
}
/* otherwise continue to next */
}
yyerror("Too many symbols");
exit(1); /* cannot continue */
} /* symlook */
now when I run the following command:
yacc -d file.y
lex file.l
cc -c lex.yy.c -o newfile -ll
cc -o new y.tab.c lex.yy.c -ly -ll
but here the error I got:
/tmp/ccGnPAO2.o: In function yylex': lex.yy.c:(.text+0x2ac):
undefined reference tosymlook' collect2: error: ld returned 1 exit
status
so, why I got that error, I am totally follow the example ?
You need to include your symbol table implementation in your compilation command. Otherwise, how is the linker going to find that code?

Segmentation Fault after assignment statement(lex and yacc)

This code works perfectly fine. After compiling lex and yacc, the code is able to do basic arithmetic operations, and even echoes the value of a variable when asked to do so. The only problem is with assignment statements.
If I want to, say, do A = 12, and later type A to see its value, the program crashes and I get a segmentation fault. How do I ensure that my assignment statements work, and how can I avoid this segmentation fault?
Here is my code:
//lex file
/*Lex input specification*/
%{
#include <stdlib.h>
#include <stdio.h>
#include "y.tab.h"
void yyerror(char*);
%}
%%
" " ;
[A-Z] { yylval = *yytext-'a'; return VARIABLE;}
[0-9]+([0-9])* { yylval=atoi(yytext); return INTEGER;}
[-/+()=*\n] { return *yytext;}
[\t] ;
. { yyerror("invalid character");}
%%
int yywrap(void) { return 1;}
And the yacc file:
/*yacc*/
%token INTEGER VARIABLE
%left '|'
%left '&'
%left '+' '-'
%left '*' '/'
%left UMINUS
%{
void yyerror(char*);
int yylex(void);
int sym[26];
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
%}
%%
program:
program statement '\n'
|
;
statement:
expr {printf("%d\n",$1);}
| VARIABLE '=' expr {sym[$1] = $3;}
;
expr:
INTEGER {$$ = $1;}
| VARIABLE {$$ = sym[$1];}
| expr '*' expr {$$ = $1 * $3;}
| expr '/' expr {$$ = $1 / $3;}
| expr '+' expr {$$ = $1 + $3;}
| expr '-' expr {$$ = $1 - $3;}
| '(' expr ')' {$$ = $2;}
;
%%
void yyerror(char*s) {
fprintf(stderr, "%s\n", s);
}
int main(void) {
yyparse();
return 0;
}
(I flagged this as "not reproducible" because the fix was so trivial; however the flag has now timed-out/aged away. I'll answer instead so it is not shown as an open unanswered question).
As #BLUEPIXY noted:
maybe *yytext-'A'
Which, to clarify, is the lex rule:
[A-Z] { yylval = *yytext-'A'; return VARIABLE;}

error in yacc: ''x' has no declared type

I want to run the following .y code for constructing a C compiler. The code was taken exactky from this Book
The following miniC.y code is:
%{
#include <stdio.h>
#include "mini.h"
#include "miniC.h"
%}
%union {
ADDRESS address;
int code; /* comparison code 1-6 */
struct {int L1;
int L2;
int L3;
int L4;} labels;
}
%token <address> IDENTIFIER
%token <code> INT
%token <code> FLOAT
%token FOR
%token WHILE
%token <code> COMPARISON
%token IF
%token ELSE
%token <address> NUM
%type <code> Type
%type <address> Expr
%type <address> OptExpr
%type <labels> WhileStmt
%type <labels> ForStmt
%type <labels> IfStmt
%type <labels> Label
%right '='
%left COMPARISON
%left '+' '-'
%left '*' '/'
%left UMINUS UPLUS
%%
Function: Type IDENTIFIER '(' ArgListOpt ')' CompoundStmt
;
ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type IDENTIFIER
;
Declaration: Type {dcl = TRUE;
identType = $1;}
IdentList ';' {dcl = FALSE;}
;
IdentList: IDENTIFIER ',' IdentList
| IDENTIFIER
;
Type: INT {$$ = $1;}
| FLOAT {$$ = $1;}
;
Stmt: ForStmt
| WhileStmt
| Expr ';'
| IfStmt
| CompoundStmt
| Declaration
| ';' /* null statement */
;
ForStmt: FOR '(' Expr ';' {$$.L1 = newlabel();
atom (LBL,NULL,NULL,NULL,0,$$.L1);}
OptExpr ';' {$$.L2 = newlabel();
atom (TST,$6,zero,NULL,6,
$<labels>$.L2);
$$.L3 = newlabel();
atom (JMP,NULL,NULL,NULL,0,
$<labels>$.L3);
$$.L4 = newlabel();
atom (LBL,NULL,NULL,NULL,0,
$<labels>$.L4);}
OptExpr ')' {atom (JMP,NULL,NULL,NULL,0,
$<labels>5.L1);
atom (LBL,NULL,NULL,NULL,0,
$<labels>8.L2);}
Stmt {atom (JMP,NULL,NULL,NULL,0,
$<labels>8.L4);
atom (LBL,NULL,NULL,NULL,0,
$<labels>8.L3);}
;
OptExpr: Expr {$$ = $1;}
| {$$ = one;} /* default to inf loop */
;
WhileStmt: WHILE {$$.L1 = newlabel();
atom (LBL,NULL,NULL,NULL,0,$$.L1);}
'(' Expr ')' {$$.L2 = newlabel();
atom (TST,$4, zero, NULL,1,$$.L2);}
Stmt {atom (JMP,NULL,NULL,NULL,0,
$<labels>2.L1);
atom (LBL,NULL,NULL,NULL,0,
$<labels>6.L2);}
;
IfStmt: IF '(' Expr ')' {$$.L1 = newlabel();
atom (TST, $3, zero, NULL, 1, $$.L1);}
Stmt {$$.L2 = newlabel();
atom (JMP,NULL,NULL,NULL,0, $$.L2);
atom (LBL,NULL,NULL,NULL,0,
$<labels>5.L1);}
ElsePart {atom (LBL,NULL,NULL,NULL,0,
$<labels>7.L2);}
;
ElsePart:
| ELSE Stmt
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Expr: IDENTIFIER '=' Expr {atom (MOV, $3, NULL, $1,0,0);
$$ = $3;}
| Expr COMPARISON Expr
Label {$$ = alloc(1);
atom (MOV, one, NULL, $$,0,0);
atom (TST, $1, $3, NULL, $2, $4.L1);
atom (MOV, zero, NULL, $$,0,0);
atom (LBL,NULL,NULL,NULL,0,$4.L1);}
| '+' Expr %prec UPLUS {$$ = $2;}
| '-' Expr %prec UMINUS {$$ = alloc(1);
atom (NEG, $2,NULL,$$,0,0); }
| Expr '+' Expr {$$ = alloc(1);
atom (ADD, $1, $3,$$,0,0); }
| Expr '-' Expr {$$ = alloc(1);
atom (SUB, $1, $3, $$,0,0); }
| Expr '*' Expr {$$ = alloc(1);
atom (MUL, $1, $3, $$,0,0); }
| Expr '/' Expr {$$ = alloc(1);
atom (DIV, $1, $3, $$,0,0); }
| '(' Expr ')' {$$ = $2;}
| IDENTIFIER {$$ = $1; }
| NUM {$$ = $1; }
;
Label: {$$.L1 = newlabel();}
; /* Used to store a label in
compare expr above */
%%
char *progname;
char * op_text();
int lineno = 1;
ADDRESS save;
ADDRESS one;
ADDRESS zero;
int nextlabel = 1;
#include "lex.yy.c"
#include "gen.c"
main (int argc, char *argv[]){
progname = argv[0];
atom_file_ptr = fopen ("atoms", "wb");
strcpy (yytext,"0.0");
zero = searchNums(); /* install the constant 0.0 in table */
strcpy (yytext, "1.0");
one = searchNums(); /* also 1.0 */
yyparse();
fclose (atom_file_ptr);
if (!err_flag) code_gen();
}
yyerror (char * s){
fprintf(stderr, "%s[%d]: %s\n", progname, lineno, s);
printf ("yytext is <%s>", yytext);
err_flag = TRUE;
}
newlabel (void){ return nextlabel++;}
atom (int operation, ADDRESS operand1, ADDRESS operand2,
ADDRESS result, int comparison, int dest)
/* put out an atom. destination will be a label number. */
{ struct atom outp;
outp.op = operation;
outp.left = operand1;
outp.right = operand2;
outp.result = result;
outp.cmp = comparison;
outp.dest = dest;
fwrite (&outp, sizeof (struct atom), 1, atom_file_ptr);
}
decode (int atom){
switch (atom){
case ADD: strcpy (mne, "ADD");
break;
case SUB: strcpy (mne, "SUB");
break;
case MUL: strcpy (mne, "MUL");
break;
case DIV: strcpy (mne, "DIV");
break;
case JMP: strcpy (mne, "JMP");
break;
case NEG: strcpy (mne, "NEG");
break;
case LBL: strcpy (mne, "LBL");
break;
case TST: strcpy (mne, "TST");
break;
case MOV: strcpy (mne, "MOV");
}
}
The errors are:
miniC.y:65.42-43: $$ for the midrule at $5 of 'ForStmt' has no declared type
miniC.y:66.69-70: $$ for the midrule at $5 of 'ForStmt' has no declared type
miniC.y:67.42-43: $$ for the midrule at $8 of 'ForStmt' has no declared type
miniC.y:70.42-43: $$ for the midrule at $8 of 'ForStmt' has no declared type
miniC.y:73.42-43: $$ for the midrule at $8 of 'ForStmt' has no declared type
miniC.y:88.42-43: $$ for the midrule at $2 of 'WhileStmt' has no declared type
miniC.y:89.69-70: $$ for the midrule at $2 of 'WhileStmt' has no declared type
miniC.y:90.42-43: $$ for the midrule at $6 of 'WhileStmt' has no declared type
miniC.y:91.69-70: $$ for the midrule at $6 of 'WhileStmt' has no declared type
miniC.y:97.42-43: $$ for the midrule at $5 of 'IfStmt' has no declared type
miniC.y:98.72-73: $$ for the midrule at $5 of 'IfStmt' has no declared type
miniC.y:99.42-43: $$ for the midrule at $7 of 'IfStmt' has no declared type
miniC.y:100.70-71: $$ for the midrule at $7 of 'IfStmt' has no declared type
make: *** [y.tab.c] Error 1
My makefile contains:
miniC: lex.yy.c y.tab.c
gcc -g y.tab.c -o miniC -ly -ll
lex.yy.c:miniC.l
lex miniC.l
y.tab.c:miniC.y
yacc -d miniC.y
Can any mentor will come forward to advice me to resolve this problem.
Thank Yoy
The errors are complaining about the use of $$ (no type tag included) in midaction rules, which is illegal. ALL uses of $$ in midaction rules need a type tag. Interestingly, not all uses are incorrect -- SOME of them have the type tag $<labels>$.
I think what you need to do is replace $$ with $<labels>$ in all the mid rule actions (but NOT in the end-of-rule actions...) The easiest would be to go through the error messages (looking at the line and column of each), and replace that $$ with $<labels>$
Which version of bison/yacc are you using? What is the command line? My version gave these ( very common ) messages:
[Charlies-MacBook-Pro:~/junk] crb% bison x.y
x.y: conflicts: 6 shift/reduce
[Charlies-MacBook-Pro:~/junk] crb% bison --version
bison (GNU Bison) 2.3
Written by Robert Corbett and Richard Stallman.
It seems to say that your grammar is mostly fine.
Maybe post your Makefile and look at your bison version.

Bison conflicting type for yyerror

I'm trying to make a calculator from flex and bison, but I found an error during the compile.
Here is the error:
C:\GnuWin32\src>gcc lex.yy.c y.tab.c -o tugas
tugas.y:51: error: conflicting types for 'yyerror'
y.tab.c:1433: error: previous implicit declaration of 'yyerror' was here
Here is my .l code :
%{
#include <stdio.h>
#include "y.tab.h"
YYSTYPE yylval;
%}
plus [+]
semi [;]
minus [-]
var [a-z]
digit [0-1]+
equal [:=]
%%
{var} {yylval = *yytext - 'a'; return VAR;}
{digit} {yylval = atoi(yytext); return DIGIT;}
{plus} {return PLUS;}
{minus} {return MINUS;}
{equal} {return EQUAL;}
{semi} {return SEMI;}
. { return *yytext; }
%%
int main(void)
{
yyparse();
return 0;
}
int yywrap(void)
{
return 0;
}
int yyerror(void)
{
printf("Error\n");
exit(1);
}
And here is my .y code :
%{
int sym[26];
%}
%token DIGIT VAR
%token MINUS PLUS EQUAL
%token SEMI
%%
program: dlist SEMI slist
;
dlist: /* nothing */
| decl SEMI dlist
;
decl: 'VAR' VAR {printf("deklarasi variable accepted");}
;
slist: stmt
| slist SEMI stmt
;
stmt: VAR EQUAL expr {sym[$1] = $3;}
| 'PRINT' VAR {printf("%d",sym[$2]);}
;
expr: term {$$ = $1;}
| expr PLUS term { $$ = $1 + $3;}
| expr MINUS term { $$ = $1 - $3; }
;
term: int {$$ = $1;}
| VAR {$$ = sym[$1]; }
;
int: DIGIT {$$ = $1;}
| int DIGIT
;
Why I am getting this error? any suggestion to overcome this issue.Thanks in advance
yyerror should have this signature:
int yyerror(char *);
Since it is expected to accept a string to be used in the error message (would probably be better with a const char *, but you might get additional (ignorable) warnings with that...
You need to change
int yyerror(void)
to
int yyerror(char*)
In other words, yyerror() must take a single c-string argument which describes the error which occured.

Resources