YACC LEX IF AND ELSE calculator - c

I have a question about this code I would like to create a IF / ELSE in YACC and LEX. but I have difficulties to detect the condition and execute the right block (because it's depend on the condition!)
I have to make a calculator that takes into account the IF and ELSE I have added TOKEN.
expr allows to evaluate an expression
stmt and stmtlist allows nesting.
%%
// US : liste de ... (commande, assignation, expression)
liste :
| liste error RC { yyerrok; yyclearin; code (STOP); return 1;}
| liste stmtList RC { printf("stmtList\n"); code((instr_t)printExprCode); code (STOP); return 3;}
| liste cmd RC { code (STOP); return 6;}
;
// US : Commande
cmd :
opCmd { code ((instr_t)*($1->U.pFct));}
| DBG sym { dbgSymbol($2); }
;
// US : Expression algébrique
expr : ENTIER { code2((instr_t)intPush, (instr_t)$1); }
| REEL { code2((instr_t)floPush, (instr_t)$1);}
| IVAR { code3((instr_t)varPush, (instr_t)$1, (instr_t)varEval);}
| FVAR { code3((instr_t)varPush, (instr_t)$1, (instr_t)varEval);}
| UNDEF
| PO expr PF { printf("test&\n, $$=%f"); $$=$2; }
| expr opAlg expr { code ((instr_t)*($2->U.pFct)); }
| SUB expr { code((instr_t)negate); } %prec UNARY_MINUS
| PREDEF PO expr PF { code2((instr_t)predef, (instr_t)$1); }
| PO ENTIER LT ENTIER PF { code3((instr_t)intPush, (instr_t)$1, (instr_t)intPush);}
| expr opCOMP expr { code ((instr_t)*($2->U.pFct)); }
;
assgn :
IVAR AFF expr { code3((instr_t)varPush, (instr_t)$1, (instr_t)varAssign); }
| FVAR AFF expr { code3((instr_t)varPush, (instr_t)$1, (instr_t)varAssign); }
| UNDEF AFF expr { code3((instr_t)varPush, (instr_t)$1, (instr_t)varAssign); }
;
// US : Statement
cond : PO expr PF { printf("Condition");} ;
//cond : PO ENTIER LT ENTIER PF { printf("ok"); if($1<$3){$$=1;}else{$$=0;}};
stmt : expr
| assgn
| IF expr stmt {$$ = code(IF,)}
;
// US : Statement List
stmtList : {data_t d; d.value = 0; push(d);}
| stmt
| stmtList DEL stmtList
| AO stmtList AF
;
// US : Statement List
block :
AO stmtList AF
;
// US : sym, nbr, var, opAlg, opCmc
sym : nbr | var | PREDEF | opAlg | opCmd | opCOMP
;
nbr : ENTIER | REEL
;
var : UNDEF | IVAR | FVAR
;
opAlg : ADD | SUB | MUL | DIV
;
opCmd : PR_TS | PR_TS2 | DBG_TS | DBG_TS2
;
opCOMP : LT | GT | EQ | NE
;
%%

Generally, for this you'll want an "embedded" action in the middle of the rule. Something like:
stmt: IF expr {
// generate code to branch to a label if the preceeding expression was false
} stmtList {
// generate the label that is branched to
}
You can communicate between these two fragments by setting $$ to something in the first action and then using that value (as $3) in the second action. The final action needs to set $$ for the final result of the rule.
For an IF/ELSE, you need something more complex with two labels (one at the end and one just after the ELSE) and two branches (conditional to the else label as with the simple if and unconditional to the end label just before the else label)

Related

Problem when try get $4 with some arguments- YACC -> C

i was stack when i tried got 2 or more arguments to another condition.
The big idea is to build Symbol Table to identify errors in code, i was build a tree but now we need build the table, but i didnt want to use this tree..
so i try to use with LinkList that i made.
this is my linklist in c:
typedef struct linkList {
char* data;
struct linkList* next;
}linkList;
sorry that it's not clarity, i will try to explain with exmple.
Here some of my condition:
s: Program { };
Program: Proc_Func {$$ = mknode("CODE",$1,NULL); };
Proc_Func: Proc_Func Funct {$$ = mknode("",$1,$2); }
| Proc_Func Proce {$$ = mknode("",$1,$2); }
| Funct {$$ =$1;}// mknode("",$1,NULL); }
| Statement {$$ = mknode("",$1,NULL); }
| Proce {$$ = mknode("",$1,NULL); }
|{$$=NULL;};
Funct: FUNC id LBRACKET Param RBRACKET RETURN Type LBRACE Body RBRACE {$$ = mknode("FUNC",mknode("",mknode("",$2,NULL),mknode("ARGS",$4,mknode("RETURN",$7,NULL))),mknode("",$9,NULL));
buildLinkList($2,$4,$7,$9);};
Proce: PROC id LBRACKET Param RBRACKET LBRACE Body RBRACE {$$ = mknode("PROC",mknode("",mknode("",$2,NULL),mknode("ARGS",$4,mknode("",$7,NULL))),NULL); };
Param: Param_list {$$ = mknode("",$1,NULL); }
|{$$ =NULL;};
Param_list: Var_id COLON Type {$$ = mknode("",$3,mknode("",$1,mknode(")",NULL,NULL))); }
| Param_list SEMICOLON Param_list {$$ = mknode("",$1,mknode("",$3,NULL)); };
Var_id: id COMMA Var_id {$$ = $1;}//mknode("",mknode("",$1,NULL),$3); }
| id {$$ = mknode(yytext,NULL,NULL);};
Type: BOOL {$$ = mknode("BOOLEAN",NULL,NULL); }
| CHAR {$$ = mknode("CHAR",NULL,NULL); }
| INT {$$ = mknode("INT",NULL,NULL); }
| REAL {$$ = mknode("REAL",NULL,NULL); }
| INT_P {$$ = mknode("INT_P",NULL,NULL); }
| REAL_P {$$ = mknode("REAL_P",NULL,NULL); }
| CHAR_P {$$ = mknode("CHAR_P",NULL,NULL); };
Body: Proc_Func Declares Statements {$$= mknode ("BODY",mknode("",$1,NULL),mknode("",$2,mknode("",$3,mknode("",NULL,NULL))));};
Declares: Declares Declare {$$= mknode ("",$1,$2);}
|{$$=NULL;};
Declare: VAR Var_id COLON Type SEMICOLON {$$= mknode ("VAR",$2,$4);};
Statements: Statements Statement {$$= mknode ("",$1,$2);}
|{$$=NULL;};
Statement: IF LBRACKET exp RBRACKET ST_Block {$$ = mknode("IF",mknode("(",$3,mknode(")",NULL,NULL)),$5);}
| IF LBRACKET exp RBRACKET ST_Block ELSE ST_Block {$$=mknode("IF ELSE", mknode("",$3,mknode("",NULL,NULL)),mknode("",$5,mknode("",$7,NULL)));}
| WHILE LBRACKET exp RBRACKET ST_Block {$$=mknode("WHILE",mknode("(",$3,mknode(")",NULL,NULL)),$5);}
| ST_Assign SEMICOLON {$$=mknode("",$1,NULL);}
| exp SEMICOLON {$$=$1;}
| RETURN exp SEMICOLON {$$=mknode("RETURN",$2,NULL);}
| NEW_Block {$$=$1;};
ST_Block: Statement {$$=$1;}
| Declare {$$=$1;}
| Proce {$$=$1;}
| Funct {$$=$1;}
| SEMICOLON {$$=mknode("",NULL,NULL);};
NEW_Block: LBRACE Proc_Func Declares Statements RBRACE {$$= mknode ("{",$2,mknode("",$3,mknode("",$4,("}",NULL,NULL))));};
ST_Assign: Ll ASSIGN exp {$$= mknode("=",$1,$3);};
Ll: id LSQRBR exp RSQRBR
| id {$$ = mknode("",$1,NULL); }
| ;
exp: exp EQUAL exp {$$= mknode ("==",$1,$3);}
| exp NOTEQ exp {$$= mknode ("!=",$1,$3);}
| exp BIGGER exp {$$= mknode (">",$1,$3);}
| exp BIGGEREQ exp {$$= mknode (">=",$1,$3);}
| exp SMALLER exp {$$= mknode ("<",$1,$3);}
| exp SMALLEREQ exp {$$= mknode ("<=",$1,$3);}
| exp AND exp {$$= mknode ("&&",$1,$3);}
| exp OR exp {$$= mknode ("||",$1,$3);}
| exp PLUS exp {$$= mknode ("+",$1,$3);}
| exp MINUS exp {$$= mknode ("-",$1,$3);}
| exp MULTIPLY exp {$$= mknode ("*",$1,$3);}
| exp DIV exp {$$= mknode ("/",$1,$3);}
| NOT exp {$$= mknode ("!",$2,NULL);}
| BOOLTRUE {$$= mknode ("",mknode("BOOLEAN",$1,NULL),NULL);}
| BOOLFALSE {$$= mknode ("",mknode("BOOLEAN",$1,NULL),NULL);}
|id {$$ = mknode("",$1,NULL); }
|CHAR_LTL {$$= mknode ($1,mknode("CHAR",NULL,NULL),NULL);}
| NUM {$$ = mknode(yytext,NULL,NULL); };
id: ID {$$ = $1; mknode(yytext,NULL,NULL); };
//| NULLL;
and this is the function to build the link list:
void buildLinkList(char *d1,char *d2,char *d3,char *d4)
{
linkList *link1 = NULL;
link1=(linkList*)malloc(sizeof(linkList));
link1->data = (char*)malloc(sizeof(d1)+1);
strcpy(link1->data,d1);
linkList* link2= (linkList*)malloc(sizeof(linkList));
link2->data=(char*)malloc(sizeof(linkList));
strcpy(link2->data,d2);
link1->next=link2;
linkList *link3= (linkList*)malloc(sizeof(linkList));
link3->data=(char*)malloc(sizeof(linkList));
strcpy(link3->data,d3);
link2->next=link3;
linkList* link4= (linkList*)malloc(sizeof(linkList));
link4->data=(char*)malloc(sizeof(linkList));
strcpy(link4->data,d4);
link3->next=link4;
//printf("%s",link1->data);
//printf("%s",link2->data);
i try to return the linklist but its not working.. so i try to send all argument to one list.
In condition "Funct" you can see the i called the function buildLinkList, i send $2,$4,$7,$9.
$2- i can "cache" it in my list and sucsses to print it.. but $4 is "param" and it return me some of varible, i cant to push them in my list, when i try to print it i get garbage.
i tried a lot of ways to pass it and another ideas, please help me to crack it, another ideas will be welcomes.
thank you !
I believe the issue is here (and in the related lines):
link1->data = (char*)malloc(sizeof(d1)+1);
Here, sizeof(d1) gives you the size of the pointer named d1, not the size of the string it points at. For strings longer than the pointer size, this will underallocate the space for the string that you then copy over.
To fix this, change sizeof(d1) to strlen(d1). Alternatively, look into using strdup, which both allocates the space you need and copies over the string contents into that space.

How to create Abstract Syntax Tree nodes when the operator and operands are not in the same production?

So in all cases of AST examples, there are productions of the following kind:
expr -> expr "+" expr;
expr -> expr "-" expr;
And in this case it's easy to create a new node like this:
expr: expr "+" expr {newNode("+",$1,$3);}
;
Now my grammar has the following implementation:
assignment:IDENTIFIER '=' expression ';'
;
expression:term expression_1
;
expression_1: '+' term expression_1 |
'-' term expression_1 |
;
term: factor term_1
;
term_1: '*' factor term_1 |
'/' factor term_1 |
;
factor: IDENTIFIER |
'(' expression ')' |
NUM | FNUM | STRING
;
Here, while making a new node, how do I take the first operand(which is in a previous production), and feed that into a newNode function which will have the operator and the second operand(both of these are together in a different production)?

Flex/Bison start condition

How to enable a start condition at the beginning of a rule and disable it at the end ? I have to ignore whitespace with some bison rules only.
How to ignore whitespace inside nested brackets.
define_directive:
DEFINE '(' class_name ')'{ ... }
;
I'm trying to write a parser for this sample code with some more rules.
#/*
* #Template Family
* #Description sample script template for Mate Programming language
* (multi-line comment)
*/
#namespace(sample)
#require(String fatherName)
#require(String motherName)
#require(Array childrenNames)
#define(Family : Template) #// end of header anything can go in body section below (comment)
Family Description
==================
Father's Name: #(fatherName)
Mother's Name: #(motherName)
Number of child: #(childrenNamesCount,0) #// valuation operator is null safe (comment)
List of children's names
------------------------
#foreach(childName:childrenNames)
> #(childName)
#empty
> there is no child name to display.
#end
##(varName) #// this should not be interpreted because escaped with # (comment)
Lexer and parser partially implemented. My problem is how to deal with whitespace inside statement keywords like #foreach, #require.
Whitespaces should be ignored for these.
desired sample output
Family Description
==================
Father's Name: Mira
Mother's Name: James
Number of child: 0
List of children's names
------------------------
> there is no child name to display.
##(varName)
bison file content
command:
fileword
| valuation
| alternative
| loop
| command_directive
;
fileword:
tokenword { scriptlangy_echo(yytext,"fileword.tokenword"); }
| MAGICESC { scriptlangy_echo("#","fileword.MAGICESC"); }
;
tokenword:
IDENTIFIER | NUMBER | STRING_LITERAL | WHITESPACE
| INC_OP | DEC_OP | AND_OP | OR_OP | LE_OP | GE_OP | EQ_OP | NE_OP | L_OP | G_OP
| ';' | ',' | ':' | '=' | ']' | '.' | '&' | '[' | '!' | '~' | '-' | '+' | '*' | '/' | '%' | '^' | '|' | ')' | '}' | '?' | '{' | '('
;
valuation:
'#' '(' expression ')' {
fprintf(yyout, "<val>");
}
| '#' '(' expression ',' default_value ')' {
fprintf(yyout, "<val>");
}
;
loop:
for_loop
| foreach_loop
| while_loop
;
while_loop:
WHILE '(' expression ')' end_block
| WHILE '(' expression ')' commands end_block
;
for_loop:
FOR '(' expression_statement expression_statement expression')' end_block
| FOR '(' expression_statement expression_statement expression')' commands end_block
;
foreach_loop:
foreach_block end_block
| foreach_block empty_block end_block
;
foreach_block:
FOREACH '(' IDENTIFIER ')'
| FOREACH '(' IDENTIFIER ':' expression')' commands
;
The key part of your question seems to be this:
I have to ignore whitespace with some bison rules only. How to ignore
whitespace inside nested brackets.
As I remarked in comments, your implementation idea of somehow doing this by having your parser rules manipulate scanner start conditions is pretty much a non-starter. Forget about that.
Since evidently your scanner does not, in general, ignore whitespace, it must emit tokens that represent whitespace, or perhaps tokens that represent something else plus whitespace (ugly). If it emits whitespace tokens then the thing to do is simply to account for them in your grammar rules. This is completely possible. In fact, you can build a parser for any context-free language on top of a scanner that just returns every character as its own token. The scanner / parser dichotomy is a functional and conceptual convenience, not a necessity.
For example, then, suppose we want to be able to parse numeric array literals, formed as a nonempty, comma-delimited list of decimal numbers enclosed in curly braces, with optional whitespace around commas and inside the braces. Suppose further that we have these terminal symbols to work with:
OPEN // open brace
CLOSE // close brace
NUM // maximal sequence of one or more decimal digits
COMMA // a comma
WS // a maximal run of whitespace
We might then write these rules:
array: array_start array_elements CLOSE;
array_start: OPEN
| OPEN WS
;
array_elements: array_element
| array_elements array_separator array_element
;
array_element: NUM
| NUM WS
;
array_separator: COMMA
| COMMA WS
;
There are, of course, many other ways to set up the details, but, generally speaking, this is how you handle whitespace with parser rules: not by ignoring it, but by accepting it.

Bison C to Matlab

I need to translate a for loop from C to Matlab.
I have the following grammar:
stmt : stmt_while
| stmt_for
|...
;
stmt_for : FOR '(' for_exp ')' stmt { }
;
for_exp : exp ';' exp ';' exp { }
| exp ';' exp ';' { }
| exp ';' ';' exp { }
| exp ';' ';' { }
| ';' exp ';' exp { }
| ';' exp ';' { }
| ';' ';' exp { }
| ';' ';' { }
;
The for loops in Matlab have a completely different syntax so I do not know how to implement the translation.
The simplest solution seems to me to translate it in a while:
exp1;
for(exp1; exp2; exp3){ while(exp2){
for_body... ==> for_body...
} exp3;
}
You have other solutions?
Thank you all.

Bison's analysis Pascal grammar in C

I am writing program to analyse Pascal grammar. I want to check correctness of input Pascal file and show where errors are.
I have a problem with finding more than one error, after finding an error parser ends.
Also parser doesnt't show in which line error is only display " Syntax error at or before [declaration], line" but i want to this line where is an error.
I used : http://ccia.ei.uvigo.es/docencia/PL/doc/bison/pascal/
pascal.l
%{
/*
* pascal.l
*
* lex input file for pascal scanner
*
* extensions: to ways to spell "external" and "->" ok for "^".
*/
#include <stdio.h>
#include "pascal.tab.h"
int line_no = 1;
%}
A [aA]
B [bB]
C [cC]
D [dD]
E [eE]
F [fF]
G [gG]
H [hH]
I [iI]
J [jJ]
K [kK]
L [lL]
M [mM]
N [nN]
O [oO]
P [pP]
Q [qQ]
R [rR]
S [sS]
T [tT]
U [uU]
V [vV]
W [wW]
X [xX]
Y [yY]
Z [zZ]
NQUOTE [^']
%%
{A}{N}{D} return(AND);
{A}{R}{R}{A}{Y} return(ARRAY);
{C}{A}{S}{E} return(CASE);
{C}{O}{N}{S}{T} return(CONST);
{D}{I}{V} return(DIV);
{D}{O} return(DO);
{D}{O}{W}{N}{T}{O} return(DOWNTO);
{E}{L}{S}{E} return(ELSE);
{E}{N}{D} return(END);
{E}{X}{T}{E}{R}{N} |
{E}{X}{T}{E}{R}{N}{A}{L} return(EXTERNAL);
{F}{O}{R} return(FOR);
{F}{O}{R}{W}{A}{R}{D} return(FORWARD);
{F}{U}{N}{C}{T}{I}{O}{N} return(FUNCTION);
{G}{O}{T}{O} return(GOTO);
{I}{F} return(IF);
{I}{N} return(IN);
{L}{A}{B}{E}{L} return(LABEL);
{M}{O}{D} return(MOD);
{N}{I}{L} return(NIL);
{N}{O}{T} return(NOT);
{O}{F} return(OF);
{O}{R} return(OR);
{O}{T}{H}{E}{R}{W}{I}{S}{E} return(OTHERWISE);
{P}{A}{C}{K}{E}{D} return(PACKED);
{B}{E}{G}{I}{N} return(PBEGIN);
{F}{I}{L}{E} return(PFILE);
{P}{R}{O}{C}{E}{D}{U}{R}{E} return(PROCEDURE);
{P}{R}{O}{G}{R}{A}{M} return(PROGRAM);
{R}{E}{C}{O}{R}{D} return(RECORD);
{R}{E}{P}{E}{A}{T} return(REPEAT);
{S}{E}{T} return(SET);
{T}{H}{E}{N} return(THEN);
{T}{O} return(TO);
{T}{Y}{P}{E} return(TYPE);
{U}{N}{T}{I}{L} return(UNTIL);
{V}{A}{R} return(VAR);
{W}{H}{I}{L}{E} return(WHILE);
{W}{I}{T}{H} return(WITH);
[a-zA-Z]([a-zA-Z0-9\-])* return(IDENTIFIER);
":=" return(ASSIGNMENT);
'({NQUOTE}|'')+' return(CHARACTER_STRING);
":" return(COLON);
"," return(COMMA);
[0-9]+ return(DIGSEQ);
"." return(DOT);
".." return(DOTDOT);
"=" return(EQUAL);
">=" return(GE);
">" return(GT);
"[" return(LBRAC);
"<=" return(LE);
"(" return(LPAREN);
"<" return(LT);
"-" return(MINUS);
"<>" return(NOTEQUAL);
"+" return(PLUS);
"]" return(RBRAC);
[0-9]+"."[0-9]+ return(REALNUMBER);
")" return(RPAREN);
";" return(SEMICOLON);
"/" return(SLASH);
"*" return(STAR);
"**" return(STARSTAR);
"->" |
"^" return(UPARROW);
"(*" |
"{" { register int c;
while ((c = input()))
{
if (c == '}')
break;
else if (c == '*')
{
if ((c = input()) == ')')
break;
else
unput (c);
}
else if (c == '\n')
line_no++;
else if (c == 0)
commenteof();
}
}
[\t\f " "] ;
\n line_no++;
. { fprintf (stderr,
"'%c' (0%o): illegal character at line %d\n",
yytext[0], yytext[0], line_no);
}
%%
commenteof()
{
fprintf (stderr, "Unexpected EOF inside comment at line %d\n", line_no);
exit (1);
}
yywrap ()
{
return (1);
}
pascal.y
%{
/*
* pascal.y
*
* Pascal grammar in Yacc format, based originally on BNF given
* in "Standard Pascal -- User Reference Manual", by Doug Cooper.
* This in turn is the BNF given by the ANSI and ISO Pascal standards,
* and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal.
* The grammar has been massaged somewhat to make it LALR, and added
* the following extensions.
*
* constant expressions
* otherwise statement in a case
* productions to correctly match else's with if's
* beginnings of a separate compilation facility
*/
#include<stdio.h>
%}
%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING
%token COLON COMMA CONST DIGSEQ DIV DO DOT DOTDOT
%token DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD
%token FUNCTION GE GOTO GT IDENTIFIER IF IN LABEL LBRAC
%token LE LPAREN LT MINUS MOD NIL NOT NOTEQUAL OF OR
%token OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE
%token PROGRAM RBRAC REALNUMBER RECORD REPEAT RPAREN
%token SEMICOLON SET SLASH STAR STARSTAR THEN
%token TO TYPE UNTIL UPARROW VAR WHILE WITH
%%
file : program
| module
;
program : program_heading semicolon block DOT
;
program_heading : PROGRAM identifier
| PROGRAM identifier LPAREN identifier_list RPAREN
;
identifier_list : identifier_list COMMA identifier
| identifier
;
block : label_declaration_part
constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
statement_part
;
module : constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
;
label_declaration_part : LABEL label_list semicolon
|
;
label_list : label_list comma label
| label
;
label : DIGSEQ
;
constant_definition_part : CONST constant_list
|
;
constant_list : constant_list constant_definition
| constant_definition
;
constant_definition : identifier EQUAL cexpression semicolon
;
/*constant : cexpression ; /* good stuff! */
cexpression : csimple_expression
| csimple_expression relop csimple_expression
;
csimple_expression : cterm
| csimple_expression addop cterm
;
cterm : cfactor
| cterm mulop cfactor
;
cfactor : sign cfactor
| cexponentiation
;
cexponentiation : cprimary
| cprimary STARSTAR cexponentiation
;
cprimary : identifier
| LPAREN cexpression RPAREN
| unsigned_constant
| NOT cprimary
;
constant : non_string
| sign non_string
| CHARACTER_STRING
;
sign : PLUS
| MINUS
;
non_string : DIGSEQ
| identifier
| REALNUMBER
;
type_definition_part : TYPE type_definition_list
|
;
type_definition_list : type_definition_list type_definition
| type_definition
;
type_definition : identifier EQUAL type_denoter semicolon
;
type_denoter : identifier
| new_type
;
new_type : new_ordinal_type
| new_structured_type
| new_pointer_type
;
new_ordinal_type : enumerated_type
| subrange_type
;
enumerated_type : LPAREN identifier_list RPAREN
;
subrange_type : constant DOTDOT constant
;
new_structured_type : structured_type
| PACKED structured_type
;
structured_type : array_type
| record_type
| set_type
| file_type
;
array_type : ARRAY LBRAC index_list RBRAC OF component_type
;
index_list : index_list comma index_type
| index_type
;
index_type : ordinal_type ;
ordinal_type : new_ordinal_type
| identifier
;
component_type : type_denoter ;
record_type : RECORD record_section_list END
| RECORD record_section_list semicolon variant_part END
| RECORD variant_part END
;
record_section_list : record_section_list semicolon record_section
| record_section
;
record_section : identifier_list COLON type_denoter
;
variant_part : CASE variant_selector OF variant_list semicolon
| CASE variant_selector OF variant_list
|
;
variant_selector : tag_field COLON tag_type
| tag_type
;
variant_list : variant_list semicolon variant
| variant
;
variant : case_constant_list COLON LPAREN record_section_list RPAREN
| case_constant_list COLON LPAREN record_section_list semicolon
variant_part RPAREN
| case_constant_list COLON LPAREN variant_part RPAREN
;
case_constant_list : case_constant_list comma case_constant
| case_constant
;
case_constant : constant
| constant DOTDOT constant
;
tag_field : identifier ;
tag_type : identifier ;
set_type : SET OF base_type
;
base_type : ordinal_type ;
file_type : PFILE OF component_type
;
new_pointer_type : UPARROW domain_type
;
domain_type : identifier ;
variable_declaration_part : VAR variable_declaration_list semicolon
|
;
variable_declaration_list :
variable_declaration_list semicolon variable_declaration
| variable_declaration
;
variable_declaration : identifier_list COLON type_denoter
;
procedure_and_function_declaration_part :
proc_or_func_declaration_list semicolon
|
;
proc_or_func_declaration_list :
proc_or_func_declaration_list semicolon proc_or_func_declaration
| proc_or_func_declaration
;
proc_or_func_declaration : procedure_declaration
| function_declaration
;
procedure_declaration : procedure_heading semicolon directive
| procedure_heading semicolon procedure_block
;
procedure_heading : procedure_identification
| procedure_identification formal_parameter_list
;
directive : FORWARD
| EXTERNAL
;
formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ;
formal_parameter_section_list :
formal_parameter_section_list semicolon formal_parameter_section
| formal_parameter_section
;
formal_parameter_section : value_parameter_specification
| variable_parameter_specification
| procedural_parameter_specification
| functional_parameter_specification
;
value_parameter_specification : identifier_list COLON identifier
;
variable_parameter_specification : VAR identifier_list COLON identifier
;
procedural_parameter_specification : procedure_heading ;
functional_parameter_specification : function_heading ;
procedure_identification : PROCEDURE identifier ;
procedure_block : block ;
function_declaration : function_heading semicolon directive
| function_identification semicolon function_block
| function_heading semicolon function_block
;
function_heading : FUNCTION identifier COLON result_type
| FUNCTION identifier formal_parameter_list COLON result_type
;
result_type : identifier ;
function_identification : FUNCTION identifier ;
function_block : block ;
statement_part : compound_statement ;
compound_statement : PBEGIN statement_sequence END ;
statement_sequence : statement_sequence semicolon statement
| statement
;
statement : open_statement
| closed_statement
;
open_statement : label COLON non_labeled_open_statement
| non_labeled_open_statement
;
closed_statement : label COLON non_labeled_closed_statement
| non_labeled_closed_statement
;
non_labeled_closed_statement : assignment_statement
| procedure_statement
| goto_statement
| compound_statement
| case_statement
| repeat_statement
| closed_with_statement
| closed_if_statement
| closed_while_statement
| closed_for_statement
|
;
non_labeled_open_statement : open_with_statement
| open_if_statement
| open_while_statement
| open_for_statement
;
repeat_statement : REPEAT statement_sequence UNTIL boolean_expression
;
open_while_statement : WHILE boolean_expression DO open_statement
;
closed_while_statement : WHILE boolean_expression DO closed_statement
;
open_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO open_statement
;
closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO closed_statement
;
open_with_statement : WITH record_variable_list DO open_statement
;
closed_with_statement : WITH record_variable_list DO closed_statement
;
open_if_statement : IF boolean_expression THEN statement
| IF boolean_expression THEN closed_statement ELSE open_statement
;
closed_if_statement : IF boolean_expression THEN closed_statement
ELSE closed_statement
;
assignment_statement : variable_access ASSIGNMENT expression
;
variable_access : identifier
| indexed_variable
| field_designator
| variable_access UPARROW
;
indexed_variable : variable_access LBRAC index_expression_list RBRAC
;
index_expression_list : index_expression_list comma index_expression
| index_expression
;
index_expression : expression ;
field_designator : variable_access DOT identifier
;
procedure_statement : identifier params
| identifier
;
params : LPAREN actual_parameter_list RPAREN ;
actual_parameter_list : actual_parameter_list comma actual_parameter
| actual_parameter
;
/*
* this forces you to check all this to be sure that only write and
* writeln use the 2nd and 3rd forms, you really can't do it easily in
* the grammar, especially since write and writeln aren't reserved
*/
actual_parameter : expression
| expression COLON expression
| expression COLON expression COLON expression
;
goto_statement : GOTO label
;
case_statement : CASE case_index OF case_list_element_list END
| CASE case_index OF case_list_element_list semicolon END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement semicolon END
;
case_index : expression ;
case_list_element_list : case_list_element_list semicolon case_list_element
| case_list_element
;
case_list_element : case_constant_list COLON statement
;
otherwisepart : OTHERWISE
| OTHERWISE COLON
;
control_variable : identifier ;
initial_value : expression ;
direction : TO
| DOWNTO
;
final_value : expression ;
record_variable_list : record_variable_list comma variable_access
| variable_access
;
boolean_expression : expression ;
expression : simple_expression
| simple_expression relop simple_expression
;
simple_expression : term
| simple_expression addop term
;
term : factor
| term mulop factor
;
factor : sign factor
| exponentiation
;
exponentiation : primary
| primary STARSTAR exponentiation
;
primary : variable_access
| unsigned_constant
| function_designator
| set_constructor
| LPAREN expression RPAREN
| NOT primary
;
unsigned_constant : unsigned_number
| CHARACTER_STRING
| NIL
;
unsigned_number : unsigned_integer | unsigned_real ;
unsigned_integer : DIGSEQ
;
unsigned_real : REALNUMBER
;
/* functions with no params will be handled by plain identifier */
function_designator : identifier params
;
set_constructor : LBRAC member_designator_list RBRAC
| LBRAC RBRAC
;
member_designator_list : member_designator_list comma member_designator
| member_designator
;
member_designator : member_designator DOTDOT expression
| expression
;
addop: PLUS
| MINUS
| OR
;
mulop : STAR
| SLASH
| DIV
| MOD
| AND
;
relop : EQUAL
| NOTEQUAL
| LT
| GT
| LE
| GE
| IN
;
identifier : IDENTIFIER
;
semicolon : SEMICOLON
;
comma : COMMA
;
%%
extern int line_no;
extern char *yytext;
int yyerror(s)
char *s;
{
fprintf(stderr, "%s: at or before '%s', line %d\n",
s, yytext, line_no);
}
main (void) {
extern int init();
extern FILE *yyin;
extern int yylex();
extern int yylineno;
extern char *yytext;
yyin = fopen("D:\\helloword.pascal", "r");
if(yyparse()){
printf("error");
}
else {
printf("good");
}
fclose(yyin);
getchar();
}
Pascal input file
program Hello;
begin someErrorText1
writeln ('Hello, world.') someErrorText2
writeln ('Hello, world2.')
end.
output
Console
And you see error is in line 2 but parser show 3, and doesn't show second error.

Resources