Bison C to Matlab - c

I need to translate a for loop from C to Matlab.
I have the following grammar:
stmt : stmt_while
| stmt_for
|...
;
stmt_for : FOR '(' for_exp ')' stmt { }
;
for_exp : exp ';' exp ';' exp { }
| exp ';' exp ';' { }
| exp ';' ';' exp { }
| exp ';' ';' { }
| ';' exp ';' exp { }
| ';' exp ';' { }
| ';' ';' exp { }
| ';' ';' { }
;
The for loops in Matlab have a completely different syntax so I do not know how to implement the translation.
The simplest solution seems to me to translate it in a while:
exp1;
for(exp1; exp2; exp3){ while(exp2){
for_body... ==> for_body...
} exp3;
}
You have other solutions?
Thank you all.

Related

Problem when try get $4 with some arguments- YACC -> C

i was stack when i tried got 2 or more arguments to another condition.
The big idea is to build Symbol Table to identify errors in code, i was build a tree but now we need build the table, but i didnt want to use this tree..
so i try to use with LinkList that i made.
this is my linklist in c:
typedef struct linkList {
char* data;
struct linkList* next;
}linkList;
sorry that it's not clarity, i will try to explain with exmple.
Here some of my condition:
s: Program { };
Program: Proc_Func {$$ = mknode("CODE",$1,NULL); };
Proc_Func: Proc_Func Funct {$$ = mknode("",$1,$2); }
| Proc_Func Proce {$$ = mknode("",$1,$2); }
| Funct {$$ =$1;}// mknode("",$1,NULL); }
| Statement {$$ = mknode("",$1,NULL); }
| Proce {$$ = mknode("",$1,NULL); }
|{$$=NULL;};
Funct: FUNC id LBRACKET Param RBRACKET RETURN Type LBRACE Body RBRACE {$$ = mknode("FUNC",mknode("",mknode("",$2,NULL),mknode("ARGS",$4,mknode("RETURN",$7,NULL))),mknode("",$9,NULL));
buildLinkList($2,$4,$7,$9);};
Proce: PROC id LBRACKET Param RBRACKET LBRACE Body RBRACE {$$ = mknode("PROC",mknode("",mknode("",$2,NULL),mknode("ARGS",$4,mknode("",$7,NULL))),NULL); };
Param: Param_list {$$ = mknode("",$1,NULL); }
|{$$ =NULL;};
Param_list: Var_id COLON Type {$$ = mknode("",$3,mknode("",$1,mknode(")",NULL,NULL))); }
| Param_list SEMICOLON Param_list {$$ = mknode("",$1,mknode("",$3,NULL)); };
Var_id: id COMMA Var_id {$$ = $1;}//mknode("",mknode("",$1,NULL),$3); }
| id {$$ = mknode(yytext,NULL,NULL);};
Type: BOOL {$$ = mknode("BOOLEAN",NULL,NULL); }
| CHAR {$$ = mknode("CHAR",NULL,NULL); }
| INT {$$ = mknode("INT",NULL,NULL); }
| REAL {$$ = mknode("REAL",NULL,NULL); }
| INT_P {$$ = mknode("INT_P",NULL,NULL); }
| REAL_P {$$ = mknode("REAL_P",NULL,NULL); }
| CHAR_P {$$ = mknode("CHAR_P",NULL,NULL); };
Body: Proc_Func Declares Statements {$$= mknode ("BODY",mknode("",$1,NULL),mknode("",$2,mknode("",$3,mknode("",NULL,NULL))));};
Declares: Declares Declare {$$= mknode ("",$1,$2);}
|{$$=NULL;};
Declare: VAR Var_id COLON Type SEMICOLON {$$= mknode ("VAR",$2,$4);};
Statements: Statements Statement {$$= mknode ("",$1,$2);}
|{$$=NULL;};
Statement: IF LBRACKET exp RBRACKET ST_Block {$$ = mknode("IF",mknode("(",$3,mknode(")",NULL,NULL)),$5);}
| IF LBRACKET exp RBRACKET ST_Block ELSE ST_Block {$$=mknode("IF ELSE", mknode("",$3,mknode("",NULL,NULL)),mknode("",$5,mknode("",$7,NULL)));}
| WHILE LBRACKET exp RBRACKET ST_Block {$$=mknode("WHILE",mknode("(",$3,mknode(")",NULL,NULL)),$5);}
| ST_Assign SEMICOLON {$$=mknode("",$1,NULL);}
| exp SEMICOLON {$$=$1;}
| RETURN exp SEMICOLON {$$=mknode("RETURN",$2,NULL);}
| NEW_Block {$$=$1;};
ST_Block: Statement {$$=$1;}
| Declare {$$=$1;}
| Proce {$$=$1;}
| Funct {$$=$1;}
| SEMICOLON {$$=mknode("",NULL,NULL);};
NEW_Block: LBRACE Proc_Func Declares Statements RBRACE {$$= mknode ("{",$2,mknode("",$3,mknode("",$4,("}",NULL,NULL))));};
ST_Assign: Ll ASSIGN exp {$$= mknode("=",$1,$3);};
Ll: id LSQRBR exp RSQRBR
| id {$$ = mknode("",$1,NULL); }
| ;
exp: exp EQUAL exp {$$= mknode ("==",$1,$3);}
| exp NOTEQ exp {$$= mknode ("!=",$1,$3);}
| exp BIGGER exp {$$= mknode (">",$1,$3);}
| exp BIGGEREQ exp {$$= mknode (">=",$1,$3);}
| exp SMALLER exp {$$= mknode ("<",$1,$3);}
| exp SMALLEREQ exp {$$= mknode ("<=",$1,$3);}
| exp AND exp {$$= mknode ("&&",$1,$3);}
| exp OR exp {$$= mknode ("||",$1,$3);}
| exp PLUS exp {$$= mknode ("+",$1,$3);}
| exp MINUS exp {$$= mknode ("-",$1,$3);}
| exp MULTIPLY exp {$$= mknode ("*",$1,$3);}
| exp DIV exp {$$= mknode ("/",$1,$3);}
| NOT exp {$$= mknode ("!",$2,NULL);}
| BOOLTRUE {$$= mknode ("",mknode("BOOLEAN",$1,NULL),NULL);}
| BOOLFALSE {$$= mknode ("",mknode("BOOLEAN",$1,NULL),NULL);}
|id {$$ = mknode("",$1,NULL); }
|CHAR_LTL {$$= mknode ($1,mknode("CHAR",NULL,NULL),NULL);}
| NUM {$$ = mknode(yytext,NULL,NULL); };
id: ID {$$ = $1; mknode(yytext,NULL,NULL); };
//| NULLL;
and this is the function to build the link list:
void buildLinkList(char *d1,char *d2,char *d3,char *d4)
{
linkList *link1 = NULL;
link1=(linkList*)malloc(sizeof(linkList));
link1->data = (char*)malloc(sizeof(d1)+1);
strcpy(link1->data,d1);
linkList* link2= (linkList*)malloc(sizeof(linkList));
link2->data=(char*)malloc(sizeof(linkList));
strcpy(link2->data,d2);
link1->next=link2;
linkList *link3= (linkList*)malloc(sizeof(linkList));
link3->data=(char*)malloc(sizeof(linkList));
strcpy(link3->data,d3);
link2->next=link3;
linkList* link4= (linkList*)malloc(sizeof(linkList));
link4->data=(char*)malloc(sizeof(linkList));
strcpy(link4->data,d4);
link3->next=link4;
//printf("%s",link1->data);
//printf("%s",link2->data);
i try to return the linklist but its not working.. so i try to send all argument to one list.
In condition "Funct" you can see the i called the function buildLinkList, i send $2,$4,$7,$9.
$2- i can "cache" it in my list and sucsses to print it.. but $4 is "param" and it return me some of varible, i cant to push them in my list, when i try to print it i get garbage.
i tried a lot of ways to pass it and another ideas, please help me to crack it, another ideas will be welcomes.
thank you !
I believe the issue is here (and in the related lines):
link1->data = (char*)malloc(sizeof(d1)+1);
Here, sizeof(d1) gives you the size of the pointer named d1, not the size of the string it points at. For strings longer than the pointer size, this will underallocate the space for the string that you then copy over.
To fix this, change sizeof(d1) to strlen(d1). Alternatively, look into using strdup, which both allocates the space you need and copies over the string contents into that space.

How can i know if a variable was already declared using lex/yacc?

i have to design an original programming language and provide a syntactic analyzer for it. Now i'm at the point where i should check if a variable was already declared and if it was i shouldn't be able to declare it again. How can i do this?with an array?(how)
This is what I've done so far.
lex:
%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
%}
%option noyywrap
%%
"/*"(.|\n)+"*/" ;
"float"|"char"|"string" {return TYPE;}
"int" {return INTTYPE;}
"for" {return FOR;}
"while" {return WHILE;}
"if" {return IF;}
"else" {return ELSE;}
"bool" {return BOOLTYPE;}
"true"|"false" {return BOOLVALUE;}
"++"|"--" {return INCRDECR;}
"/[^\"]+/" {return STRINGVALUE;}
"scat"|"scmp"|"scpy"|"slen" {return STRINGFUNCTION;}
"protected"|"private"|"public" { return CLASSTYPE;}
"class" {return CLASS;}
"eval" {return EVAL;}
[1-9][0-9]*|0 {yylval = atoi(yytext); return INTVALUE;}
[A-Za-z][A-Za-z0-9]* {yylval = strdup(yytext); return ID;}
"=" {return ASSIGN;}
"<="|"<"|">="|">"|"=="|"!=" {return COMP;}
"begin_prog" {return BGIN;}
"end_prog" {return END;}
[ \t] ;
\n {yylineno++;}
. {return yytext[0];}
yacc:
%{
#include <stdio.h>
extern FILE* yyin;
extern char* yytext;
extern int yylineno;
%}
%token TYPE INTTYPE FOR WHILE IF ELSE BOOLTYPE BOOLVALUE INCRDECR STRINGVALUE STRINGFUNCTION CLASSTYPE CLASS EVAL INTVALUE ID ASSIGN COMP BGIN END
%start s
%left '.'
%left ','
%left ';'
%left '+' '-'
%left '*' '/' '%'
%%
s: declarations block {printf ("correct input \n ");}
;
declarations : declaration ';'
| declarations declaration ';'
;
declaration : TYPE ID
|BOOLTYPE ID ASSIGN BOOLVALUE
|INTTYPE ID ASSIGN INTVALUE
|INTTYPE ID
|TYPE ID '(' list_param ')'
|TYPE ID '[' INTVALUE ']'
|INTTYPE ID '(' list_param ')'
|INTTYPE ID '[' INTVALUE ']'
|BOOLTYPE ID
;
block: BGIN blockinstr END
;
blockinstr: blockinstr listfiw
| blockinstr classs
| blockinstr fdeclaration
| listfiw
| listinstr
| stringg
| classs
| fdeclaration
;
fdeclaration : TYPE EVAL '(' list_paramf ')' '{' listinstr '}'
| INTTYPE EVAL '(' list_paramf ')' '{' listinstr '}'
| TYPE ID '(' list_param ')' '{' listinstr '}'
| INTTYPE ID '(' list_param ')' '{' listinstr '}'
;
list_param : parameter
| list_param ',' parameter
;
parameter : TYPE ID
| parameterf
;
list_paramf : parameterf
| list_paramf ',' parameterf
;
parameterf : INTTYPE ID
;
listfiw : iff
|forr
|whilee
;
iff: IF '(' ID COMP INTVALUE ')' '{' listinstr '}'
| IF '(' ID COMP ID ')' '{' listinstr '}'
| IF '(' ID ')' '{' listinstr '}'
| IF '(' ID COMP INTVALUE ')' '{' listinstr '}' ELSE '{' listinstr '}'
| IF '(' ID COMP ID ')' '{' listinstr '}' ELSE '{' listinstr '}'
| IF '(' ID ')' '{' listinstr '}' ELSE '{' listinstr '}'
| IF '(' ID COMP BOOLVALUE ')' '{' listinstr '}'
| IF '(' ID COMP BOOLVALUE ')' '{' listinstr '}' ELSE '{' listinstr '}'
;
forr: FOR '(' ID ASSIGN INTVALUE ';' ID COMP INTVALUE ';' ID INCRDECR ')' '{' listinstr '}'
| FOR '(' ID ASSIGN INTVALUE ';' ID COMP INTVALUE ';' ID INCRDECR ')' '{' FOR '(' ID ASSIGN INTVALUE ';' ID COMP INTVALUE ';' ID INCRDECR ')' '{' listinstr '}' '}'
;
whilee: WHILE '(' ID COMP INTVALUE ')' '{' listinstr '}'
| WHILE '(' INTVALUE ')' '{' listinstr '}'
;
listinstr : instruction ';'
| listinstr instruction ';'
;
instruction: ID ASSIGN BOOLVALUE
| ID ASSIGN operations
| ID '(' operations ')'
;
operations: operations '+' operations
| operations '*' operations
|operations '-' operations
|operations '/' operations
|operations '%' operations
|'(' operations ')'
|ID '[' INTVALUE ']'
|ID '(' operations ')'
|INTVALUE
|ID
;
stringg : stringg STRINGFUNCTION '(' ID ')' ';'
| stringg STRINGFUNCTION '(' ID ',' ID ')' ';'
| STRINGFUNCTION '(' ID ')' ';'
| STRINGFUNCTION '(' ID ',' ID ')' ';'
;
classs: CLASS ID '{' classlists '}'
;
classlists: classlist
| classlists classlist
;
classlist: CLASSTYPE ':' declarations
;
%%
int yyerror(char * s){
printf("err: %s line:%d\n",s,yylineno);
}
int main(int argc, char** argv[]){
yyin=fopen(argv[1],"r");
yyparse();
fclose(f);
fclose(yyin);
}
This works for declarations of any type.
I hope you can help me with my problem.
Thanks!!

Bison loop for conflict

to solve the dangling else problem, I used the following solution:
stmt : stmt_matched
| stmt_unmatched
;
stmt_unmatched : IF '(' exp ')' stmt
| IF '(' exp ')' stmt_matched ELSE stmt_unmatched
;
stmt_matched : IF '(' exp ')' stmt_matched ELSE stmt_matched
| stmt_for
| ...
;
For defining the rules of grammar about the for loop, I produce a conflict shift/reduce due to the same problem:
stmt_for : FOR '(' exp ';' exp ';' exp ')' stmt
;
How can I solve this problem?
Not all for statements are matched. Consider, for example
if (c) for (;;) if (d) ; else ;
So it is necessary to divide for statements into for_matched and for_unmatched. (And similarly with other compound statements such as while.)

Bison's analysis Pascal grammar in C

I am writing program to analyse Pascal grammar. I want to check correctness of input Pascal file and show where errors are.
I have a problem with finding more than one error, after finding an error parser ends.
Also parser doesnt't show in which line error is only display " Syntax error at or before [declaration], line" but i want to this line where is an error.
I used : http://ccia.ei.uvigo.es/docencia/PL/doc/bison/pascal/
pascal.l
%{
/*
* pascal.l
*
* lex input file for pascal scanner
*
* extensions: to ways to spell "external" and "->" ok for "^".
*/
#include <stdio.h>
#include "pascal.tab.h"
int line_no = 1;
%}
A [aA]
B [bB]
C [cC]
D [dD]
E [eE]
F [fF]
G [gG]
H [hH]
I [iI]
J [jJ]
K [kK]
L [lL]
M [mM]
N [nN]
O [oO]
P [pP]
Q [qQ]
R [rR]
S [sS]
T [tT]
U [uU]
V [vV]
W [wW]
X [xX]
Y [yY]
Z [zZ]
NQUOTE [^']
%%
{A}{N}{D} return(AND);
{A}{R}{R}{A}{Y} return(ARRAY);
{C}{A}{S}{E} return(CASE);
{C}{O}{N}{S}{T} return(CONST);
{D}{I}{V} return(DIV);
{D}{O} return(DO);
{D}{O}{W}{N}{T}{O} return(DOWNTO);
{E}{L}{S}{E} return(ELSE);
{E}{N}{D} return(END);
{E}{X}{T}{E}{R}{N} |
{E}{X}{T}{E}{R}{N}{A}{L} return(EXTERNAL);
{F}{O}{R} return(FOR);
{F}{O}{R}{W}{A}{R}{D} return(FORWARD);
{F}{U}{N}{C}{T}{I}{O}{N} return(FUNCTION);
{G}{O}{T}{O} return(GOTO);
{I}{F} return(IF);
{I}{N} return(IN);
{L}{A}{B}{E}{L} return(LABEL);
{M}{O}{D} return(MOD);
{N}{I}{L} return(NIL);
{N}{O}{T} return(NOT);
{O}{F} return(OF);
{O}{R} return(OR);
{O}{T}{H}{E}{R}{W}{I}{S}{E} return(OTHERWISE);
{P}{A}{C}{K}{E}{D} return(PACKED);
{B}{E}{G}{I}{N} return(PBEGIN);
{F}{I}{L}{E} return(PFILE);
{P}{R}{O}{C}{E}{D}{U}{R}{E} return(PROCEDURE);
{P}{R}{O}{G}{R}{A}{M} return(PROGRAM);
{R}{E}{C}{O}{R}{D} return(RECORD);
{R}{E}{P}{E}{A}{T} return(REPEAT);
{S}{E}{T} return(SET);
{T}{H}{E}{N} return(THEN);
{T}{O} return(TO);
{T}{Y}{P}{E} return(TYPE);
{U}{N}{T}{I}{L} return(UNTIL);
{V}{A}{R} return(VAR);
{W}{H}{I}{L}{E} return(WHILE);
{W}{I}{T}{H} return(WITH);
[a-zA-Z]([a-zA-Z0-9\-])* return(IDENTIFIER);
":=" return(ASSIGNMENT);
'({NQUOTE}|'')+' return(CHARACTER_STRING);
":" return(COLON);
"," return(COMMA);
[0-9]+ return(DIGSEQ);
"." return(DOT);
".." return(DOTDOT);
"=" return(EQUAL);
">=" return(GE);
">" return(GT);
"[" return(LBRAC);
"<=" return(LE);
"(" return(LPAREN);
"<" return(LT);
"-" return(MINUS);
"<>" return(NOTEQUAL);
"+" return(PLUS);
"]" return(RBRAC);
[0-9]+"."[0-9]+ return(REALNUMBER);
")" return(RPAREN);
";" return(SEMICOLON);
"/" return(SLASH);
"*" return(STAR);
"**" return(STARSTAR);
"->" |
"^" return(UPARROW);
"(*" |
"{" { register int c;
while ((c = input()))
{
if (c == '}')
break;
else if (c == '*')
{
if ((c = input()) == ')')
break;
else
unput (c);
}
else if (c == '\n')
line_no++;
else if (c == 0)
commenteof();
}
}
[\t\f " "] ;
\n line_no++;
. { fprintf (stderr,
"'%c' (0%o): illegal character at line %d\n",
yytext[0], yytext[0], line_no);
}
%%
commenteof()
{
fprintf (stderr, "Unexpected EOF inside comment at line %d\n", line_no);
exit (1);
}
yywrap ()
{
return (1);
}
pascal.y
%{
/*
* pascal.y
*
* Pascal grammar in Yacc format, based originally on BNF given
* in "Standard Pascal -- User Reference Manual", by Doug Cooper.
* This in turn is the BNF given by the ANSI and ISO Pascal standards,
* and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal.
* The grammar has been massaged somewhat to make it LALR, and added
* the following extensions.
*
* constant expressions
* otherwise statement in a case
* productions to correctly match else's with if's
* beginnings of a separate compilation facility
*/
#include<stdio.h>
%}
%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING
%token COLON COMMA CONST DIGSEQ DIV DO DOT DOTDOT
%token DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD
%token FUNCTION GE GOTO GT IDENTIFIER IF IN LABEL LBRAC
%token LE LPAREN LT MINUS MOD NIL NOT NOTEQUAL OF OR
%token OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE
%token PROGRAM RBRAC REALNUMBER RECORD REPEAT RPAREN
%token SEMICOLON SET SLASH STAR STARSTAR THEN
%token TO TYPE UNTIL UPARROW VAR WHILE WITH
%%
file : program
| module
;
program : program_heading semicolon block DOT
;
program_heading : PROGRAM identifier
| PROGRAM identifier LPAREN identifier_list RPAREN
;
identifier_list : identifier_list COMMA identifier
| identifier
;
block : label_declaration_part
constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
statement_part
;
module : constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
;
label_declaration_part : LABEL label_list semicolon
|
;
label_list : label_list comma label
| label
;
label : DIGSEQ
;
constant_definition_part : CONST constant_list
|
;
constant_list : constant_list constant_definition
| constant_definition
;
constant_definition : identifier EQUAL cexpression semicolon
;
/*constant : cexpression ; /* good stuff! */
cexpression : csimple_expression
| csimple_expression relop csimple_expression
;
csimple_expression : cterm
| csimple_expression addop cterm
;
cterm : cfactor
| cterm mulop cfactor
;
cfactor : sign cfactor
| cexponentiation
;
cexponentiation : cprimary
| cprimary STARSTAR cexponentiation
;
cprimary : identifier
| LPAREN cexpression RPAREN
| unsigned_constant
| NOT cprimary
;
constant : non_string
| sign non_string
| CHARACTER_STRING
;
sign : PLUS
| MINUS
;
non_string : DIGSEQ
| identifier
| REALNUMBER
;
type_definition_part : TYPE type_definition_list
|
;
type_definition_list : type_definition_list type_definition
| type_definition
;
type_definition : identifier EQUAL type_denoter semicolon
;
type_denoter : identifier
| new_type
;
new_type : new_ordinal_type
| new_structured_type
| new_pointer_type
;
new_ordinal_type : enumerated_type
| subrange_type
;
enumerated_type : LPAREN identifier_list RPAREN
;
subrange_type : constant DOTDOT constant
;
new_structured_type : structured_type
| PACKED structured_type
;
structured_type : array_type
| record_type
| set_type
| file_type
;
array_type : ARRAY LBRAC index_list RBRAC OF component_type
;
index_list : index_list comma index_type
| index_type
;
index_type : ordinal_type ;
ordinal_type : new_ordinal_type
| identifier
;
component_type : type_denoter ;
record_type : RECORD record_section_list END
| RECORD record_section_list semicolon variant_part END
| RECORD variant_part END
;
record_section_list : record_section_list semicolon record_section
| record_section
;
record_section : identifier_list COLON type_denoter
;
variant_part : CASE variant_selector OF variant_list semicolon
| CASE variant_selector OF variant_list
|
;
variant_selector : tag_field COLON tag_type
| tag_type
;
variant_list : variant_list semicolon variant
| variant
;
variant : case_constant_list COLON LPAREN record_section_list RPAREN
| case_constant_list COLON LPAREN record_section_list semicolon
variant_part RPAREN
| case_constant_list COLON LPAREN variant_part RPAREN
;
case_constant_list : case_constant_list comma case_constant
| case_constant
;
case_constant : constant
| constant DOTDOT constant
;
tag_field : identifier ;
tag_type : identifier ;
set_type : SET OF base_type
;
base_type : ordinal_type ;
file_type : PFILE OF component_type
;
new_pointer_type : UPARROW domain_type
;
domain_type : identifier ;
variable_declaration_part : VAR variable_declaration_list semicolon
|
;
variable_declaration_list :
variable_declaration_list semicolon variable_declaration
| variable_declaration
;
variable_declaration : identifier_list COLON type_denoter
;
procedure_and_function_declaration_part :
proc_or_func_declaration_list semicolon
|
;
proc_or_func_declaration_list :
proc_or_func_declaration_list semicolon proc_or_func_declaration
| proc_or_func_declaration
;
proc_or_func_declaration : procedure_declaration
| function_declaration
;
procedure_declaration : procedure_heading semicolon directive
| procedure_heading semicolon procedure_block
;
procedure_heading : procedure_identification
| procedure_identification formal_parameter_list
;
directive : FORWARD
| EXTERNAL
;
formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ;
formal_parameter_section_list :
formal_parameter_section_list semicolon formal_parameter_section
| formal_parameter_section
;
formal_parameter_section : value_parameter_specification
| variable_parameter_specification
| procedural_parameter_specification
| functional_parameter_specification
;
value_parameter_specification : identifier_list COLON identifier
;
variable_parameter_specification : VAR identifier_list COLON identifier
;
procedural_parameter_specification : procedure_heading ;
functional_parameter_specification : function_heading ;
procedure_identification : PROCEDURE identifier ;
procedure_block : block ;
function_declaration : function_heading semicolon directive
| function_identification semicolon function_block
| function_heading semicolon function_block
;
function_heading : FUNCTION identifier COLON result_type
| FUNCTION identifier formal_parameter_list COLON result_type
;
result_type : identifier ;
function_identification : FUNCTION identifier ;
function_block : block ;
statement_part : compound_statement ;
compound_statement : PBEGIN statement_sequence END ;
statement_sequence : statement_sequence semicolon statement
| statement
;
statement : open_statement
| closed_statement
;
open_statement : label COLON non_labeled_open_statement
| non_labeled_open_statement
;
closed_statement : label COLON non_labeled_closed_statement
| non_labeled_closed_statement
;
non_labeled_closed_statement : assignment_statement
| procedure_statement
| goto_statement
| compound_statement
| case_statement
| repeat_statement
| closed_with_statement
| closed_if_statement
| closed_while_statement
| closed_for_statement
|
;
non_labeled_open_statement : open_with_statement
| open_if_statement
| open_while_statement
| open_for_statement
;
repeat_statement : REPEAT statement_sequence UNTIL boolean_expression
;
open_while_statement : WHILE boolean_expression DO open_statement
;
closed_while_statement : WHILE boolean_expression DO closed_statement
;
open_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO open_statement
;
closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO closed_statement
;
open_with_statement : WITH record_variable_list DO open_statement
;
closed_with_statement : WITH record_variable_list DO closed_statement
;
open_if_statement : IF boolean_expression THEN statement
| IF boolean_expression THEN closed_statement ELSE open_statement
;
closed_if_statement : IF boolean_expression THEN closed_statement
ELSE closed_statement
;
assignment_statement : variable_access ASSIGNMENT expression
;
variable_access : identifier
| indexed_variable
| field_designator
| variable_access UPARROW
;
indexed_variable : variable_access LBRAC index_expression_list RBRAC
;
index_expression_list : index_expression_list comma index_expression
| index_expression
;
index_expression : expression ;
field_designator : variable_access DOT identifier
;
procedure_statement : identifier params
| identifier
;
params : LPAREN actual_parameter_list RPAREN ;
actual_parameter_list : actual_parameter_list comma actual_parameter
| actual_parameter
;
/*
* this forces you to check all this to be sure that only write and
* writeln use the 2nd and 3rd forms, you really can't do it easily in
* the grammar, especially since write and writeln aren't reserved
*/
actual_parameter : expression
| expression COLON expression
| expression COLON expression COLON expression
;
goto_statement : GOTO label
;
case_statement : CASE case_index OF case_list_element_list END
| CASE case_index OF case_list_element_list semicolon END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement semicolon END
;
case_index : expression ;
case_list_element_list : case_list_element_list semicolon case_list_element
| case_list_element
;
case_list_element : case_constant_list COLON statement
;
otherwisepart : OTHERWISE
| OTHERWISE COLON
;
control_variable : identifier ;
initial_value : expression ;
direction : TO
| DOWNTO
;
final_value : expression ;
record_variable_list : record_variable_list comma variable_access
| variable_access
;
boolean_expression : expression ;
expression : simple_expression
| simple_expression relop simple_expression
;
simple_expression : term
| simple_expression addop term
;
term : factor
| term mulop factor
;
factor : sign factor
| exponentiation
;
exponentiation : primary
| primary STARSTAR exponentiation
;
primary : variable_access
| unsigned_constant
| function_designator
| set_constructor
| LPAREN expression RPAREN
| NOT primary
;
unsigned_constant : unsigned_number
| CHARACTER_STRING
| NIL
;
unsigned_number : unsigned_integer | unsigned_real ;
unsigned_integer : DIGSEQ
;
unsigned_real : REALNUMBER
;
/* functions with no params will be handled by plain identifier */
function_designator : identifier params
;
set_constructor : LBRAC member_designator_list RBRAC
| LBRAC RBRAC
;
member_designator_list : member_designator_list comma member_designator
| member_designator
;
member_designator : member_designator DOTDOT expression
| expression
;
addop: PLUS
| MINUS
| OR
;
mulop : STAR
| SLASH
| DIV
| MOD
| AND
;
relop : EQUAL
| NOTEQUAL
| LT
| GT
| LE
| GE
| IN
;
identifier : IDENTIFIER
;
semicolon : SEMICOLON
;
comma : COMMA
;
%%
extern int line_no;
extern char *yytext;
int yyerror(s)
char *s;
{
fprintf(stderr, "%s: at or before '%s', line %d\n",
s, yytext, line_no);
}
main (void) {
extern int init();
extern FILE *yyin;
extern int yylex();
extern int yylineno;
extern char *yytext;
yyin = fopen("D:\\helloword.pascal", "r");
if(yyparse()){
printf("error");
}
else {
printf("good");
}
fclose(yyin);
getchar();
}
Pascal input file
program Hello;
begin someErrorText1
writeln ('Hello, world.') someErrorText2
writeln ('Hello, world2.')
end.
output
Console
And you see error is in line 2 but parser show 3, and doesn't show second error.

issue with the function definition in my grammar

I have an issue with the function definition in my C grammar wich can be found here http://www.archive-host.com/files/1959635/24fe084677d7655eb57ba66e1864081450017dd9/cAST.txt, it does not define correctly and I can't multiply it by something.
The code I am tring to input is this one :
int factorielle(int n)
{ int x;
if ( n == 0)
return 1;
else return n*factorielle(n-1);
}
The function definition is this one :
function_definition
: declaration_specifiers declarator compound_statement
| declarator compound_statement
;
declaration_specifiers should be linked to int and declarator to factorielle(int n), to do this I replaced this :
direct_declarator
: ID ((direct_declarator '[' ']') | (direct_declarator '(' parameter_type_list ')') | (direct_declarator '(' identifier_list ')') | (direct_declarator '(' ')') )*
with
direct_declarator
: ID ((direct_declarator '[' ']') | (direct_declarator '(' parameter_type_list ')') | (direct_declarator '(' identifier_list ')') | (direct_declarator '(' ')') | '(' parameter_type_list ')' )*
But it does not help much.
As for the multiplication I don't know how to do without bringing conflict.
is there a way to fix this please ?
You're likely to have an difficult time parsing real C code using a pure grammar with pure ANTLR.
The reason is that certain declarations look like legitimate executable statements. (While the referenced answer seems to be about LR(1) parsers, it is really about parsers that cannot handle ambiguity; ANTLR cannot).
The only way to tell them apart is to use context information available from earlier symbol declarations. So you will have to collect symbol types as you parse, and inspect that information in the grammar rule reductions to decide whether such instances are statements or declarations. (I don't know how one implements this in ANTLR, although I believe it to be possible).
I may have found a solution to the first part of the issue by remplacing
compound_statement
: '{' '}'
| '{' statement_list '}'
;
with
compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' external_declaration+ '}'
;
and adding this to direct_declarator:
| ID '(' parameter_type_list ')'
But I don't know if it will bring some conflicts.

Resources