如何在Flex / bison中实现If语句 [英] How do i implement If statement in Flex/bison

查看:1898
本文介绍了如何在Flex / bison中实现If语句的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我没有得到错误,请你帮助我,这里是.l和.y file.thanks。

 %{
#includeifanw.tab.h
extern int yylval;
%}
%%
={return EQ; }
!={return NE; }
< {return LT; }
< ={return LE; }
> {return GT; }
> ={return GE; }
+{return PLUS; }
- {return MINUS; }
*{return MULT; }
/{return DIVIDE; }
){return RPAREN; }
({return LPAREN;}
:={return ASSIGN;}
;{return SEMICOLON;}
IF{return IF;}
THEN{return THEN;}
ELSE{return ELSE;}
FI{return FI;}
WHILE{return WHILE;}
DO{return DO;}
OD{return OD;}
PRINT{return PRINT;}
[0-9] + {yylval = atoi ); return NUMBER;}
[az] {yylval = yytext [0] - 'a'; return NAME;}
\ {;}
\\\
{nextline }
\t {;}
//\".*\\\
{nextline();}
。{yyerror(illegal token);}
% %

Yacc文件

 %start ROOT 

%令牌EQ
%令牌NE
%令牌LT
%令牌LE
%令牌GT
%令牌GE
%令牌PLUS
%令牌MINUS
%令牌MULT
%令牌DIVIDE
%令牌RPAREN
%令牌LPAREN
%令牌ASSIGN
%令牌SEMICOLON
%令牌IF
%令牌THEN
%令牌ELSE
%令牌FI
%令牌WHILE
%令牌DO
%令牌OD
%令牌PRINT
%令牌NUMBER
%令牌NAME

%%

ROOT:
stmtseq {execute($ 1); }
;

语句:
指示符ASSIGN表达式{$$ = assignment($ 1,$ 3); }
| PRINT表达式{$$ = print($ 2); }
| IF expression THEN stmtseq ELSE stmtseq FI
{$$ = ifstmt($ 2,$ 4,$ 6); }
| IF expression THEN stmtseq FI
{$$ = ifstmt($ 2,$ 4,empty()); }
| WHILE expression DO stmtseq OD {$$ = whilestmt($ 2,$ 4); }
;

stmtseq:
stmtseq SEMICOLON语句{$$ = seq($ 1,$ 3); }
|语句{$$ = $ 1; }
;

表达式:
expr2 {$$ = $ 1; }
| expr2 EQ expr2 {$$ = eq($ 1,$ 3); }
| expr2 NE expr2 {$$ = ne($ 1,$ 3); }
| expr2 LT expr2 {$$ = le($ 1,$ 3); }
| expr2 LE expr2 {$$ = le($ 1,$ 3); }
| expr2 GT expr2 {$$ = gt($ 1,$ 3); }
| expr2 GE expr2 {$$ = gt($ 1,$ 3); }
;

expr2:
expr3 {$$ == $ 1; }
| expr2 PLUS expr3 {$$ = plus($ 1,$ 3); }
| expr2 MINUS expr3 {$$ = minus($ 1,$ 3); }
;

expr3:
expr4 {$$ = $ 1; }
| expr3 MULT expr4 {$$ = multi($ 1,$ 3); }
| expr3 DIVIDE expr4 {$$ = divide($ 1,$ 3); }
;

expr4:
PLUS expr4 {$$ = $ 2; }
| MINUS expr4 {$$ = neg($ 2); }
| LPAREN表达式RPAREN {$$ = $ 2; }
| NUMBER {$$ = number($ 1); }
|指示符{$$ = $ 1; }
;

指示符:
NAME {$$ = name($ 1); }
;
%%







是有可能使用flex / bison像Assembler一样实现JMP指令,去像我的例子一样的标签,感谢您的帮助。

 :L1 
如果标志和X0001
EVT 23;
ELSE
WAIT 500 ms;
JMP L1;
END IF;


解决方案

编辑:



这是承诺的更详细的示例:



通常我从所需语言的示例文件开始:

 #example.toy 
begin#简单的玩具语言
x = 23;
while x> 0 do begin
x = x - 1;
print(x * x);
end;
end;

下一步是创建一个lexer +解析器组合,其中上一个文件
通过。



这里是lexer(用 flex -o lexer.c lexer.l 生成源代码)。还需要注意的是,lexer源依赖于解析器源(因为TOKEN_ *常量),所以bison必须在编译lexer源之前运行:

 %option noyywrap 

%{
#includeparser.h
#include< stdlib.h>
%}

%%

whilereturn TOKEN_WHILE;
beginreturn TOKEN_BEGIN;
endreturn TOKEN_END;
doreturn TOKEN_DO;
[a-zA-Z _] [a-zA-Z0-9 _] * {yylval.name = strdup(yytext); return TOKEN_ID;}
[ - ]?[0-9] + {yylval.val = atoi(yytext); return TOKEN_NUMBER;}
[()=;] {return * yytext;}
[* / + - <>] {yylval.op = * yytext;返回TOKEN_OPERATOR;}
[\t\\\
] {/ *禁止输入文件中的空白输出到stdout * /}
#。* {/ *单行注释* }

和解析器(使用 bison -d -o parser.c parser.y -d 告诉bison创建parser.h头文件包含词法需要的一些东西)

 %error-verbose / *指示bison生成详细错误消息* / 
%{
/ *启用解析器调试:当yydebug设置为1之前的
* yyparse调用分析器打印了很多关于它做什么的消息* /
#define YYDEBUG 1
%}

%union {
int val;
char op;
char * name;
}

%令牌TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO TOKEN_ID TOKEN_NUMBER TOKEN_OPERATOR
%开始程序

%{
/ *转发声明* /
void yyerror(const char * const message);


%}

%%

程序:语句';';

块:TOKEN_BEGIN语句TOKEN_END;

语句:
|语句语句';'
|语句块';';

语句:
赋值
| whileStmt
|块
|呼叫;

赋值:TOKEN_ID'='expression;

表达式:TOKEN_ID
| TOKEN_NUMBER
|表达式TOKEN_OPERATOR表达式;

whileStmt:TOKEN_WHILE表达式TOKEN_DO语句;

call:TOKEN_ID'('expression')';

%%

#include< stdlib.h>

void yyerror(const char * const message)
{
fprintf(stderr,Parse error:%s\\\
,message);
exit(1);
}

int main()
{
yydebug = 0;
yyparse();
}

gcc parser.c lexer.c -o toylang-noop toylang-noop< example.toy 必须没有任何错误地运行。所以现在解析器本身工作,并且可以解析示例脚本。



下一步是创建一个所谓的语法的抽象语法树。在这一点上,我首先通过定义不同类型的标记和规则,以及在每个解析步骤中插入规则,来扩充解析器。

 %error-verbose / *指示bison生成详细的错误消息* / 
%{
#includeastgen.h
#define YYDEBUG 1

/ *由于解析器必须返回AST,它必须得到一个参数,其中
* AST可以存储。参数的类型将是void *。 * /
#define YYPARSE_PARAM astDest
%}

%union {
int val;
char op;
char * name;
struct AstElement * ast; / *这是存储AST元素的新成员* /
}

%令牌TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO
%令牌< name> TOKEN_ID
%token< val> TOKEN_NUMBER
%令牌< op> TOKEN_OPERATOR
%type< ast>程序块语句语句分配表达式whileStmt调用
%start程序

%{
/ *转发声明* /
void yyerror(const char * const message);


%}

%%

程序:语句';'{(*(struct AstElement **)astDest)= $ 1; };

块:TOKEN_BEGIN语句TOKEN_END {$$ = $ 2; };

声明:{$$ = 0;}
|语句语句';'{$$ = makeStatement($ 1,$ 2);}
|语句块';'{$$ = makeStatement($ 1,$ 2);};

语句:
赋值{$$ = $ 1;}
| whileStmt {$$ = $ 1;}
|块{$$ = $ 1;}
|调用{$$ = $ 1;}

分配:TOKEN_ID'='expression {$$ = makeAssignment($ 1,$ 3);}

表达式:TOKEN_ID {$$ = makeExpByName($ 1);}
| TOKEN_NUMBER {$$ = makeExpByNum($ 1);}
|表达式TOKEN_OPERATOR表达式{$$ = makeExp($ 1,$ 3,$ 2);}

whileStmt:TOKEN_WHILE表达式TOKEN_DO语句{$$ = makeWhile($ 2,$ 4);};

call:TOKEN_ID'('expression')'{$$ = makeCall($ 1,$ 3);};

%%

#includeastexec.h
#include< stdlib.h>

void yyerror(const char * const message)
{
fprintf(stderr,Parse error:%s\\\
,message);
exit(1);
}

int main()
{
yydebug = 0;
struct AstElement * a;
yyparse(& a);
}

正如你所看到的,生成AST的主要部分是创建节点
当解析器的某个规则通过时的AST。由于bison维护着当前解析过程本身的
堆栈,因此只需要将
当前解析状态分配给栈
的元素(这些是 $$ = foo(bar) lines)



目标是内存中的以下结构:

  ekStatements 
.count = 2
.statements
ekAssignment
.name =x
.right
ekNumber
.val = 23
ekWhile
.cond
ekBinExpression
.left
ekId
.name =x
.right
ekNumber
.val = 0
.op ='>'
.statements
ekAssignment
.name =x
.right
ekBinExpression
.left
ekId
.name =x
.right
ekNumber
.val = 1
.op =' - '
ekCall
.name =print
.param
ekBinExpression
.left
ekId
.name =x
.right
ekId
.name =x
.op ='*'



要获得此图,需要生成代码astgen.h:

  #ifndef ASTGEN_H 
#define ASTGEN_H

struct AstElement
{
enum {ekId,ekNumber,ekBinExpression,ekAssignment,ekWhile, ekCall,ekStatements,ekLastElement} kind;
union
{
int val;
char * name;
struct
{
struct AstElement * left,* right;
char op;
} expression;
struct
{
char * name;
struct AstElement * right;
} assignment;
struct
{
int count;
struct AstElement ** statements;
}语句;
struct
{
struct AstElement * cond;
struct AstElement * statements;
} whileStmt;
struct
{
char * name;
struct AstElement * param;
} call;
} data;
};

struct AstElement * makeAssignment(char * name,struct AstElement * val);
struct AstElement * makeExpByNum(int val);
struct AstElement * makeExpByName(char * name);
struct AstElement * makeExp(struct AstElement * left,struct AstElement * right,char op);
struct AstElement * makeStatement(struct AstElement * dest,struct AstElement * toAppend);
struct AstElement * makeWhile(struct AstElement * cond,struct AstElement * exec);
struct AstElement * makeCall(char * name,struct AstElement * param);
#endif

astgen.c:


$ b b

  #includeastgen.h
#include< stdio.h>
#include< stdlib.h>
#include< assert.h>

static void * checkAlloc(size_t sz)
{
void * result = calloc(sz,1);
if(!result)
{
fprintf(stderr,alloc failed\\\
);
exit(1);
}
}

struct AstElement * makeAssignment(char * name,struct AstElement * val)
{
struct AstElement * result = checkAlloc *结果));
result-> kind = ekAssignment;
result-> data.assignment.name = name;
result-> data.assignment.right = val;
return result;
}

struct AstElement * makeExpByNum(int val)
{
struct AstElement * result = checkAlloc(sizeof(* result));
result-> kind = ekNumber;
result-> data.val = val;
return result;
}

struct AstElement * makeExpByName(char * name)
{
struct AstElement * result = checkAlloc(sizeof(* result));
result-> kind = ekId;
result-> data.name = name;
return result;
}

struct AstElement * makeExp(struct AstElement * left,struct AstElement * right,char op)
{
struct AstElement * result = checkAlloc结果));
result-> kind = ekBinExpression;
result-> data.expression.left = left;
result-> data.expression.right = right;
result-> data.expression.op = op;
return result;
}

struct AstElement * makeStatement(struct AstElement * result,struct AstElement * toAppend)
{
if(!result)
{
result = checkAlloc(sizeof(* result));
result-> kind = ekStatements;
result-> data.statements.count = 0;
result-> data.statements.statements = 0;
}
assert(ekStatements == result-> kind);
result-> data.statements.count ++;
result-> data.statements.statements = realloc(result-> data.statements.statements,result-> data.statements.count * sizeof(* result-> data.statements.statements)) ;
result-> data.statements.statements [result-> data.statements.count-1] = toAppend;
return result;
}

struct AstElement * makeWhile(struct AstElement * cond,struct AstElement * exec)
{
struct AstElement * result = checkAlloc(sizeof(* result)) ;
result-> kind = ekWhile;
result-> data.whileStmt.cond = cond;
result-> data.whileStmt.statements = exec;
return result;
}

struct AstElement * makeCall(char * name,struct AstElement * param)
{
struct AstElement * result = checkAlloc(sizeof(* result));
result-> kind = ekCall;
result-> data.call.name = name;
result-> data.call.param = param;
return result;
}

您可以看到,AST元素的生成是一个单调
工作。在步骤完成后,程序仍然不执行任何操作,但AST可以在调试器中查看



下一步是编写解释器。这是astexec.h:

  #ifndef ASTEXEC_H 
#define ASTEXEC_H

struct AstElement ;
struct ExecEnviron;

/ *创建执行引擎* /
struct ExecEnviron * createEnv();

/ *删除ExecEnviron * /
void freeEnv(struct ExecEnviron * e);

/ *执行一个AST * /
void execAst(struct ExecEnviron * e,struct AstElement * a);

#endif

解释器本身很简单,尽管它的
长度。大多数功能仅处理一种特定类型的AstElement。
正确的函数由dispatchExpression和dispatchStatement
函数选择。 dispatch函数在valExecs
和runExecs数组中查找目标函数。



astexec.c:

  #includeastexec.h
#includeastgen.h
#include< stdlib.h>
#include< assert.h>
#include< stdio.h>

struct ExecEnviron
{
int x; / * x变量的值,一个真正的语言会有一些name->值查找表,而不是* /
};

static int execTermExpression(struct ExecEnviron * e,struct AstElement * a);
static int execBinExp(struct ExecEnviron * e,struct AstElement * a);
static void execAssign(struct ExecEnviron * e,struct AstElement * a);
static void execWhile(struct ExecEnviron * e,struct AstElement * a);
static void execCall(struct ExecEnviron * e,struct AstElement * a);
static void execStmt(struct ExecEnviron * e,struct AstElement * a);

/ *产生值的AST元素的查找数组* /
static int(* valExecs [])(struct ExecEnviron * e,struct AstElement * a)=
{
execTermExpression,
execTermExpression,
execBinExp,
NULL,
NULL,
NULL,
NULL
};

/ *非值AST元素的查找数组* /
static void(* runExecs [])(struct ExecEnviron * e,struct AstElement * a)=
{
NULL,/ * ID和数字是规范的,* /
NULL,/ *不需要执行* /
NULL,/ *不执行二进制表达式* /
execAssign,
execWhile,
execCall,
execStmt,
};

/ *调度任何值表达式* /
static int dispatchExpression(struct ExecEnviron * e,struct AstElement * a)
{
assert(a);
assert(valExecs [a-> kind]);
return valExecs [a-> kind](e,a);
}

static void dispatchStatement(struct ExecEnviron * e,struct AstElement * a)
{
assert(a);
assert(runExecs [a-> kind]);
runExecs [a-> kind](e,a);
}

static void onlyName(const char * name,const char * reference,const char * kind)
{
if(strcmp(reference,name))
{
fprintf(stderr,
这种语言只知道%s'%s',而不是'%s'\\\
',
kind,reference,name) ;
exit(1);
}
}

static void onlyX(const char * name)
{
onlyName(name,x,variable);
}

static void onlyPrint(const char * name)
{
onlyName(name,print,function);
}

static int execTermExpression(struct ExecEnviron * e,struct AstElement * a)
{
/ *此函数看起来很丑,因为它处理两种不同类型的
* AstElement。我会重构它一个execNameExp和execVal
*函数来摆脱这两个if语句。 * /
assert(a);
if(ekNumber == a-> kind)
{
return a-> data.val;
}
else
{
if(ekId == a-> kind)
{
onlyX(a-> data.name);
assert(e);
return e-> x;
}
}
fprintf(stderr,OOPS:试图获取非表达式(%d)的值\ n,a-> kind);
exit(1);
}

static int execBinExp(struct ExecEnviron * e,struct AstElement * a)
{
assert(ekBinExpression == a-> kind);
const int left = dispatchExpression(e,a-> data.expression.left);
const int right = dispatchExpression(e,a-> data.expression.right);
switch(a-> data.expression.op)
{
case'+':
return left + right;
case' - ':
return left-right;
case'*':
return left * right;
case'<':
return left<对;
case'>':
return left>对;
default:
fprintf(stderr,OOPS:Unknown operator:%c \\\
,a-> data.expression.op);
exit(1);
}
/ *这里没有返回值,因为每个switch case返回一些值(或bails out)* /
}

static void execAssign(struct ExecEnviron * e ,struct AstElement * a)
{
assert(a);
assert(ekAssignment == a-> kind);
onlyX(a-> data.assignment.name);
assert(e);
struct AstElement * r = a-> data.assignment.right;
e-> x = dispatchExpression(e,r);
}

static void execWhile(struct ExecEnviron * e,struct AstElement * a)
{
assert(a);
assert(ekWhile == a-> kind);
struct AstElement * const c = a-> data.whileStmt.cond;
struct AstElement * const s = a-> data.whileStmt.statements;
assert(c);
assert(s)
while(dispatchExpression(e,c))
{
dispatchStatement(e,s);
}
}

static void execCall(struct ExecEnviron * e,struct AstElement * a)
{
assert(a);
assert(ekCall == a-> gt);
onlyPrint(a-> data.call.name);
printf(%d \\\
,dispatchExpression(e,a-> data.call.param));
}

static void execStmt(struct ExecEnviron * e,struct AstElement * a)
{
assert(a);
assert(ekStatements == a-> kind);
int i;
for(i = 0; i data.statements.count; i ++)
{
dispatchStatement(e,a-> data.statements.statements [i] ;
}
}

void execAst(struct ExecEnviron * e,struct AstElement * a)
{
dispatchStatement(e,a);
}

struct ExecEnviron * createEnv()
{
assert(ekLastElement ==(sizeof(valExecs)/ sizeof(* valExecs)))
assert(ekLastElement ==(sizeof(runExecs)/ sizeof(* runExecs)));
return calloc(1,sizeof(struct ExecEnviron));
}

void freeEnv(struct ExecEnviron * e)
{
free(e);
}

现在解释器已完成,并且示例可以运行,函数已更新:

  #include< assert.h> 

int main()
{
yydebug = 0;
struct AstElement * a = 0;
yyparse(& a);
/ * Q& D警告:在生产代码中,此assert必须由
*实际错误处理替换。 * /
assert(a);
struct ExecEnviron * e = createEnv();
execAst(e,a);
freeEnv(e);
/ * TODO:销毁AST * /
}

为这种语言工作。请注意,此解释器中有一些限制:




  • 它只有一个变量和一个函数

    • 只有函数的一个参数


  • >很难添加goto支持,因为对于每个AST元素,解释器调用解释函数。 Goto可以在一个块内通过在 execStmt 函数中添加一些东西来实现,但是为了在不同的块或级别之间跳转,执行机制必须显着改变(这是因为 t在解释器中的不同堆栈帧之间跳转)。例如,AST可以转换为字节码,这个字节码由vm解释。

  • 一些其他我需要查找:)






您需要定义语言的语法。有些事情(lexer和解析器都不完整):

 
/ * foo.y * /
% ELSE OR AND / *首先列出语言的所有终端符号* /
%%

语句:/ *允许空语句* / | stm |语句';'stm;

stm:ifStatement
| NAME
| NAME expList
|标签;

expList:expression | expList表达式;

label:':'NAME {/ *存储标签的代码* /};

ifStatement:IF表达式语句
| IF表达式语句ELSE语句;

表达式:ID {/ *处理找到的ID的代码* /}
|表达式AND表达式{/ *代码到con cat两个表达式和和* /}
|表达式OR表达式
| '('expression')';

然后使用 bison -d foo.y -o foo.c -d 开关指示bison生成一个包含解析器使用的所有令牌的头。现在您创建的词法分析器

 
/ * bar.l * /
%{
#includefoo.h
%}

%%

如果返回IF;
ELSE return ELSE;
OR return OR;
AND return AND;
[A-Z] + {/ *存储yylval某处访问它在解析器* /返回ID; }

之后,您的词法分析器和解析器完成,只有需要为您的语言编写语义操作。 >

I dont get the error, please can you help me out, here is the .l and .y file.thanks.

%{
#include "ifanw.tab.h"
extern int yylval;
%}
%%
"="      { return EQ; }
"!="     { return NE; }
"<"      { return LT; }
"<="     { return LE; }
">"      { return GT; }
">="     { return GE; }
"+"      { return PLUS; }
"-"      { return MINUS; }
"*"      { return MULT; }
"/"      { return DIVIDE; }
")"      { return RPAREN; }
"("      { return LPAREN; }
":="     { return ASSIGN; }
";"      { return SEMICOLON; }
"IF"     { return IF; }
"THEN"   { return THEN; }
"ELSE"   { return ELSE; }
"FI"     { return FI; }
"WHILE"  { return WHILE; }
"DO"     { return DO; }
"OD"     { return OD; }
"PRINT"  { return PRINT; }
[0-9]+   { yylval = atoi(yytext); return NUMBER; }
[a-z]    { yylval = yytext[0] - 'a'; return NAME; }   
\        { ; }
\n       { nextline(); }
\t       { ; }
"//".*\n { nextline(); }
.        { yyerror("illegal token"); }
%%

Yacc-file

%start ROOT

%token EQ
%token NE
%token LT
%token LE
%token GT
%token GE
%token PLUS
%token MINUS
%token MULT
%token DIVIDE
%token RPAREN
%token LPAREN
%token ASSIGN
%token SEMICOLON
%token IF
%token THEN
%token ELSE
%token FI
%token WHILE
%token DO
%token OD
%token PRINT
%token NUMBER
%token NAME

%%

ROOT:
   stmtseq { execute($1); } 
   ;

statement:
     designator ASSIGN expression { $$ = assignment($1, $3); } 
   | PRINT expression { $$ = print($2); } 
   | IF expression THEN stmtseq ELSE stmtseq FI
    { $$ = ifstmt($2, $4, $6); }
   | IF expression THEN stmtseq FI
    { $$ = ifstmt($2, $4, empty()); }
   | WHILE expression DO stmtseq OD { $$ = whilestmt($2, $4); }   
   ;

stmtseq:
     stmtseq SEMICOLON statement { $$ = seq($1, $3); }
   | statement { $$ = $1; }
   ;

expression:
 expr2 { $$ = $1; } 
   | expr2 EQ expr2 { $$ = eq($1, $3); }
   | expr2 NE expr2 { $$ = ne($1, $3); }
   | expr2 LT expr2 { $$ = le($1, $3); }
   | expr2 LE expr2 { $$ = le($1, $3); }
   | expr2 GT expr2 { $$ = gt($1, $3); }
   | expr2 GE expr2 { $$ = gt($1, $3); }
   ;

expr2:
     expr3 { $$ == $1; }
   | expr2 PLUS expr3 { $$ = plus($1, $3); }
   | expr2 MINUS expr3 { $$ = minus($1, $3); }
   ;

expr3:
     expr4 { $$ = $1; }
   | expr3 MULT expr4 { $$ = mult($1, $3); }
   | expr3 DIVIDE expr4 { $$ = divide ($1, $3); }
   ;

expr4:
     PLUS expr4 { $$ = $2; }
   | MINUS expr4 { $$ = neg($2); }
   | LPAREN expression RPAREN { $$ = $2; }
   | NUMBER { $$ = number($1); }
   | designator { $$ = $1; }
   ;

designator:
     NAME { $$ = name($1); }
  ;
%%


I have another question, is there a possibility to implement a JMP instruction with flex/bison like in Assembler to go to a label like my example, thanks for your help.

:L1
IF FLAG AND X"0001"
    EVT 23;
ELSE
    WAIT 500 ms;
    JMP L1;
END IF; 

解决方案

EDIT: I put the old answer to the end

Here is the promised more detailed example:

usually I begin with an example file of the desired language:

# example.toy
begin # example of the simple toy language
    x = 23;
    while x > 0 do begin
        x = x - 1;
        print(x*x);
    end;
end;

The next step is to create a lexer+parser combination where the previous file passes.

Here comes the lexer (generate the source with flex -o lexer.c lexer.l). Also note that the lexer source depends on the parser sources (because of the TOKEN_* constants), so bison must be run before compiling the lexer source:

%option noyywrap

%{
#include "parser.h"
#include <stdlib.h>
%}

%%

"while" return TOKEN_WHILE;
"begin" return TOKEN_BEGIN;
"end"   return TOKEN_END;
"do"    return TOKEN_DO;
[a-zA-Z_][a-zA-Z0-9_]* {yylval.name = strdup(yytext); return TOKEN_ID;}
[-]?[0-9]+    {yylval.val = atoi(yytext); return TOKEN_NUMBER;}
[()=;]  {return *yytext;}
[*/+-<>] {yylval.op = *yytext; return TOKEN_OPERATOR;}
[ \t\n] {/* suppress the output of the whitespaces from the input file to stdout */}
#.* {/* one-line comment */}

and the parser (compile with bison -d -o parser.c parser.y, the -d tells bison to create the parser.h header file with some stuff the lexer needs)

%error-verbose /* instruct bison to generate verbose error messages*/
%{
/* enable debugging of the parser: when yydebug is set to 1 before the
 * yyparse call the parser prints a lot of messages about what it does */
#define YYDEBUG 1
%}

%union {
    int val;
    char op;
    char* name;
}

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO TOKEN_ID TOKEN_NUMBER TOKEN_OPERATOR
%start program

%{
/* Forward declarations */
void yyerror(const char* const message);


%}

%%

program: statement';';

block: TOKEN_BEGIN statements TOKEN_END;

statements:
    | statements statement ';'
    | statements block';';

statement: 
      assignment
    | whileStmt
    | block
    | call;

assignment: TOKEN_ID '=' expression;

expression: TOKEN_ID
    | TOKEN_NUMBER
    | expression TOKEN_OPERATOR expression;

whileStmt: TOKEN_WHILE expression TOKEN_DO statement;

call: TOKEN_ID '(' expression ')';

%%

#include <stdlib.h>

void yyerror(const char* const message)
{
    fprintf(stderr, "Parse error:%s\n", message);
    exit(1);
}

int main()
{
    yydebug = 0;
    yyparse();
}

After gcc parser.c lexer.c -o toylang-noop the call of toylang-noop < example.toy must run without any error. So now the parser itself works and can parse the example script.

The next step is to create a so called abstract syntax tree of the grammar. At this point I start with the augmenting of the parser by defining different types to the tokens and rules, as well as inserting rules to each parsing step.

%error-verbose /* instruct bison to generate verbose error messages*/
%{
#include "astgen.h"
#define YYDEBUG 1

/* Since the parser must return the AST, it must get a parameter where
 * the AST can be stored. The type of the parameter will be void*. */
#define YYPARSE_PARAM astDest
%}

%union {
    int val;
    char op;
    char* name;
    struct AstElement* ast; /* this is the new member to store AST elements */
}

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO
%token<name> TOKEN_ID
%token<val> TOKEN_NUMBER
%token<op> TOKEN_OPERATOR
%type<ast> program block statements statement assignment expression whileStmt call
%start program

%{
/* Forward declarations */
void yyerror(const char* const message);


%}

%%

program: statement';' { (*(struct AstElement**)astDest) = $1; };

block: TOKEN_BEGIN statements TOKEN_END{ $$ = $2; };

statements: {$$=0;}
    | statements statement ';' {$$=makeStatement($1, $2);}
    | statements block';' {$$=makeStatement($1, $2);};

statement: 
      assignment {$$=$1;}
    | whileStmt {$$=$1;}
    | block {$$=$1;}
    | call {$$=$1;}

assignment: TOKEN_ID '=' expression {$$=makeAssignment($1, $3);}

expression: TOKEN_ID {$$=makeExpByName($1);}
    | TOKEN_NUMBER {$$=makeExpByNum($1);}
    | expression TOKEN_OPERATOR expression {$$=makeExp($1, $3, $2);}

whileStmt: TOKEN_WHILE expression TOKEN_DO statement{$$=makeWhile($2, $4);};

call: TOKEN_ID '(' expression ')' {$$=makeCall($1, $3);};

%%

#include "astexec.h"
#include <stdlib.h>

void yyerror(const char* const message)
{
    fprintf(stderr, "Parse error:%s\n", message);
    exit(1);
}

int main()
{
    yydebug = 0;
    struct AstElement *a;
    yyparse(&a);
}

As you can see, the main part when generating the AST is to create the nodes of the AST when a certain rule of the parser was passed. Since bison maintains a stack of the current parsing process itself, is is only needed to assign the current parsing status to the elements of the stack (these are the $$=foo(bar) lines)

The target is the following structure in memory:

ekStatements
  .count = 2
  .statements
    ekAssignment
      .name = "x"
      .right
        ekNumber
          .val = 23
    ekWhile
      .cond
        ekBinExpression
        .left
          ekId
            .name = "x"
        .right
          ekNumber
            .val=0
        .op = '>'
      .statements
        ekAssignment
          .name = "x"
          .right
            ekBinExpression
              .left
                ekId
                  .name = "x"
              .right
                ekNumber
                  .val = 1
              .op = '-'
        ekCall
          .name = "print"
          .param
            ekBinExpression
              .left
                ekId
                  .name = "x"
              .right
                ekId
                  .name = "x"
              .op = '*'

To get this graph, there is the generating code needed, astgen.h:

#ifndef ASTGEN_H
#define ASTGEN_H

struct AstElement
{
    enum {ekId, ekNumber, ekBinExpression, ekAssignment, ekWhile, ekCall, ekStatements, ekLastElement} kind;
    union
    {
        int val;
        char* name;
        struct
        {
            struct AstElement *left, *right;
            char op;
        }expression;
        struct
        {
            char*name;
            struct AstElement* right;
        }assignment;
        struct
        {
            int count;
            struct AstElement** statements;
        }statements;
        struct
        {
            struct AstElement* cond;
            struct AstElement* statements;
        } whileStmt;
        struct
        {
            char* name;
            struct AstElement* param;
        }call;
    } data;
};

struct AstElement* makeAssignment(char*name, struct AstElement* val);
struct AstElement* makeExpByNum(int val);
struct AstElement* makeExpByName(char*name);
struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op);
struct AstElement* makeStatement(struct AstElement* dest, struct AstElement* toAppend);
struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec);
struct AstElement* makeCall(char* name, struct AstElement* param);
#endif

astgen.c:

#include "astgen.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

static void* checkAlloc(size_t sz)
{
    void* result = calloc(sz, 1);
    if(!result)
    {
        fprintf(stderr, "alloc failed\n");
        exit(1);
    }
}

struct AstElement* makeAssignment( char*name, struct AstElement* val)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekAssignment;
    result->data.assignment.name = name;
    result->data.assignment.right = val;
    return result;
}

struct AstElement* makeExpByNum(int val)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekNumber;
    result->data.val = val;
    return result;
}

struct AstElement* makeExpByName(char*name)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekId;
    result->data.name = name;
    return result;
}

struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekBinExpression;
    result->data.expression.left = left;
    result->data.expression.right = right;
    result->data.expression.op = op;
    return result;
}

struct AstElement* makeStatement(struct AstElement* result, struct AstElement* toAppend)
{
    if(!result)
    {
        result = checkAlloc(sizeof(*result));
        result->kind = ekStatements;
        result->data.statements.count = 0;
        result->data.statements.statements = 0;
    }
    assert(ekStatements == result->kind);
    result->data.statements.count++;
    result->data.statements.statements = realloc(result->data.statements.statements, result->data.statements.count*sizeof(*result->data.statements.statements));
    result->data.statements.statements[result->data.statements.count-1] = toAppend;
    return result;
}

struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekWhile;
    result->data.whileStmt.cond = cond;
    result->data.whileStmt.statements = exec;
    return result;
}

struct AstElement* makeCall(char* name, struct AstElement* param)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekCall;
    result->data.call.name = name;
    result->data.call.param = param;
    return result;
}

You can see here that the generating of the AST elements is a rather monotone job. After the step is done, the program still does nothing, but the AST can be viewed in a debugger.

The next step is to write the interpreter. This is astexec.h:

#ifndef ASTEXEC_H
#define ASTEXEC_H

struct AstElement;
struct ExecEnviron;

/* creates the execution engine */
struct ExecEnviron* createEnv();

/* removes the ExecEnviron */
void freeEnv(struct ExecEnviron* e);

/* executes an AST */
void execAst(struct ExecEnviron* e, struct AstElement* a);

#endif

Well, this looks friendly. The Interpreter itself is simple, despite it's length. The most functions deals with only a particular kind of AstElement. The correct function is selected by the dispatchExpression and dispatchStatement functions. The dispatch functions looks for the target function in the valExecs and runExecs arrays.

astexec.c:

#include "astexec.h"
#include "astgen.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>

struct ExecEnviron
{
    int x; /* The value of the x variable, a real language would have some name->value lookup table instead */
};

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a);
static int execBinExp(struct ExecEnviron* e, struct AstElement* a);
static void execAssign(struct ExecEnviron* e, struct AstElement* a);
static void execWhile(struct ExecEnviron* e, struct AstElement* a);
static void execCall(struct ExecEnviron* e, struct AstElement* a);
static void execStmt(struct ExecEnviron* e, struct AstElement* a);

/* Lookup Array for AST elements which yields values */
static int(*valExecs[])(struct ExecEnviron* e, struct AstElement* a) =
{
    execTermExpression,
    execTermExpression,
    execBinExp,
    NULL,
    NULL,
    NULL,
    NULL
};

/* lookup array for non-value AST elements */
static void(*runExecs[])(struct ExecEnviron* e, struct AstElement* a) =
{
    NULL, /* ID and numbers are canonical and */
    NULL, /* don't need to be executed */
    NULL, /* a binary expression is not executed */
    execAssign,
    execWhile,
    execCall,
    execStmt,
};

/* Dispatches any value expression */
static int dispatchExpression(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(valExecs[a->kind]);
    return valExecs[a->kind](e, a);
}

static void dispatchStatement(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(runExecs[a->kind]);
    runExecs[a->kind](e, a);
}

static void onlyName(const char* name, const char* reference, const char* kind)
{
    if(strcmp(reference, name))
    {
        fprintf(stderr,
            "This language knows only the %s '%s', not '%s'\n",
            kind, reference, name);
        exit(1);
    }
}

static void onlyX(const char* name)
{
    onlyName(name, "x", "variable");
}

static void onlyPrint(const char* name)
{
    onlyName(name, "print", "function");
}

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a)
{
    /* This function looks ugly because it handles two different kinds of
     * AstElement. I would refactor it to an execNameExp and execVal
     * function to get rid of this two if statements. */
    assert(a);
    if(ekNumber == a->kind)
    {
        return a->data.val;
    }
    else
    {
        if(ekId == a->kind)
        {
            onlyX(a->data.name);
            assert(e);
            return e->x;
        }
    }
    fprintf(stderr, "OOPS: tried to get the value of a non-expression(%d)\n", a->kind);
    exit(1);
}

static int execBinExp(struct ExecEnviron* e, struct AstElement* a)
{
    assert(ekBinExpression == a->kind);
    const int left = dispatchExpression(e, a->data.expression.left);
    const int right = dispatchExpression(e, a->data.expression.right);
    switch(a->data.expression.op)
    {
        case '+':
            return left + right;
        case '-':
            return left - right;
        case '*':
            return left * right;
        case '<':
            return left < right;
        case '>':
            return left > right;
        default:
            fprintf(stderr,  "OOPS: Unknown operator:%c\n", a->data.expression.op);
            exit(1);
    }
    /* no return here, since every switch case returns some value (or bails out) */
}

static void execAssign(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekAssignment == a->kind);
    onlyX(a->data.assignment.name);
    assert(e);
    struct AstElement* r = a->data.assignment.right;
    e->x = dispatchExpression(e, r);
}

static void execWhile(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekWhile == a->kind);
    struct AstElement* const c = a->data.whileStmt.cond;
    struct AstElement* const s = a->data.whileStmt.statements;
    assert(c);
    assert(s);
    while(dispatchExpression(e, c))
    {
        dispatchStatement(e, s);
    }
}

static void execCall(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekCall == a->kind);
    onlyPrint(a->data.call.name);
    printf("%d\n", dispatchExpression(e, a->data.call.param));
}

static void execStmt(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekStatements == a->kind);
    int i;
    for(i=0; i<a->data.statements.count; i++)
    {
        dispatchStatement(e, a->data.statements.statements[i]);
    }
}

void execAst(struct ExecEnviron* e, struct AstElement* a)
{
    dispatchStatement(e, a);
}

struct ExecEnviron* createEnv()
{
    assert(ekLastElement == (sizeof(valExecs)/sizeof(*valExecs)));
    assert(ekLastElement == (sizeof(runExecs)/sizeof(*runExecs)));
    return calloc(1, sizeof(struct ExecEnviron));
}

void freeEnv(struct ExecEnviron* e)
{
    free(e);
}

Now the interpreter is complete, and the example can be run, after the main function is updated:

#include <assert.h>

int main()
{
    yydebug = 0;
    struct AstElement *a = 0;
    yyparse(&a);
    /* Q&D WARNING: in production code this assert must be replaced by
     * real error handling. */
    assert(a);
    struct ExecEnviron* e = createEnv();
    execAst(e, a);
    freeEnv(e);
    /* TODO: destroy the AST */
}

Now the interpreter for this language works. Note that there are some limitations within this interpreter:

  • it has only one variable and one function
    • and only one parameter to a function
  • only the type int for values
  • it is difficult to add goto support, since for each AST element the interpreter calls an interpreting function. Goto can be implemented within one block by hacking something into the execStmt function, but to jump between different blocks or levels the execution machinery must be changed dramatically (this is because one can't jump between different stack frames in the interpreter). For example the AST can be transformed into byte code and this byte code is interpreted by a vm.
  • some other which I would need to lookup :)

You need to define the grammar for your language. Some thing like this (both lexer and parser are incomplete):

/* foo.y */
%token ID IF ELSE OR AND /* First list all terminal symbols of the language */
%%

statements: /* allow empty statements */ | stm | statements ';' stm;

stm: ifStatement
   | NAME
   | NAME expList
   | label;

expList: expression | expList expression;

label: ':' NAME { /* code to store the label */ };

ifStatement: IF expression statements
           | IF expression statements ELSE statements;

expression: ID                          { /* Code to handle the found ID */ }
          | expression AND expression   { /* Code to con cat two expression with and */ }
          | expression OR expression
          | '(' expression ')';

Then you compile this file with bison -d foo.y -o foo.c. The -d switch instruct bison to generate a header with all the tokens the parser uses. Now you create your lexer

/* bar.l */
%{
#include "foo.h"
%}

%%

IF   return IF;
ELSE return ELSE;
OR   return OR;
AND  return AND;
[A-Z]+  { /*store yylval somewhere to access it in the parser*/ return ID; }

After this you have your lexer and parser done, and "only" need to write the semantic actions for your language.

这篇关于如何在Flex / bison中实现If语句的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆