在解析器中提升Spirit Segfault [英] Boost Spirit Segfault In Parser

查看:82
本文介绍了在解析器中提升Spirit Segfault的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我一直在尝试将我在本科生编译器中编写的一些lex和yacc代码转换为精神代码,以学习精神,我发现了一个似乎无法弄清的段错误.我这样写词法分析器:

I have been trying to convert some lex and yacc code I wrote in an undergraduate compiler, course to spirit code to learn spirit and I have found a segfault that I can't seem to figure out. I wrote the lexer like this:

namespace lex = boost::spirit::lex;

enum Tokens
{
    k_andTok = 1,
    k_def = 2,
    k_elihw = 3,
    k_elseTok = 4,
    k_falseTok = 5,
    k_fed = 6,
    k_fi = 7,
    k_ifTok = 8,
    k_input = 9,
    k_notTok = 10,
    k_orTok = 11,
    k_print = 12,
    k_returnTok = 13,
    k_trueTok = 14,
    k_whileTok = 15,
    k_plues = 16,
    k_minus = 17,
    k_mult = 18,
    k_div = 19,
    k_bang = 20,
    k_equalTo = 21,
    k_greaterEq = 22,
    k_lessEq = 23,
    k_notEq = 24,
    k_less = 25,
    k_greater = 26,
    k_assign = 27,
    k_comma = 28,
    k_colon = 29,
    k_leftParen = 30,
    k_rightParen = 31,
    k_leftBracket = 32,
    k_rightBracket = 33,
    k_nonTerminal = 34,
    k_terminal = 35
};

template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
    LexerTokens() :
       whiteSpace("[ \\t\\n]"),
       andTok("and"),
       def("def"),
       elihw("elihw"),
       elseTok("else"),
       falseTok("false"),
       fed("fed"),
       fi("fi"),
       ifTok("if"),
       input("input"),
       notTok("not"),
       orTok("or"),
       print("print"),
       returnTok("return"),
       trueTok("true"),
       whileTok("while"),
       plus("\\+"),
       minus("\\-"),
       mult("\\*"),
       div("\\/"),
       bang("\\!"),
       equalTo("=="),
       greaterEq(">="),
       lessEq("<="),
       notEq("!="),
       less("<"),
       greater(">"),
       assign("="),
       comma(","),
       colon(":"),
       leftParen("\\("),
       rightParen("\\)"),
       leftBracket("\\["),
       rightBracket("\\["),
       nonTerminal("[a-z][a-zA-Z0-9]*"),
       terminal("[0-9]")
    {
        this->self("WHITESPACE") = whiteSpace;

        this->self.add
            (andTok, k_andTok)
            (def, k_def)
            (elihw, k_elihw)
            (elseTok, k_elseTok)
            (falseTok, k_falseTok)
            (fed, k_fed)
            (fi, k_fi)
            (ifTok, k_ifTok)
            (andTok, k_andTok)
            (input, k_input)
            (notTok, k_notTok)
            (orTok, k_orTok)
            (print, k_print)
            (returnTok, k_returnTok)
            (trueTok, k_trueTok)
            (whileTok, k_whileTok)
            (plus, k_plues)
            (minus, k_minus)
            (mult, k_mult)
            (div, k_div)
            (bang, k_bang)
            (equalTo, k_equalTo)
            (greaterEq, k_greaterEq)
            (lessEq, k_lessEq)
            (notEq, k_notEq)
            (less, k_less)
            (greater, k_greater)
            (assign, k_assign)
            (comma, k_comma)
            (colon, k_colon)
            (leftParen, k_leftParen)
            (rightParen, k_rightParen)
            (leftBracket, k_leftBracket)
            (rightBracket, k_rightBracket)
            (nonTerminal, k_nonTerminal)
            (terminal, k_terminal);
    }

    lex::token_def<lex::omit> whiteSpace;
    lex::token_def<std::string> andTok;
    lex::token_def<std::string> def;
    lex::token_def<std::string> elihw;
    lex::token_def<std::string> elseTok;
    lex::token_def<std::string> falseTok;
    lex::token_def<std::string> fed;
    lex::token_def<std::string> fi;
    lex::token_def<std::string> ifTok;
    lex::token_def<std::string> input;
    lex::token_def<std::string> notTok;
    lex::token_def<std::string> orTok;
    lex::token_def<std::string> print;
    lex::token_def<std::string> returnTok;
    lex::token_def<std::string> trueTok;
    lex::token_def<std::string> whileTok;
    lex::token_def<std::string> plus;
    lex::token_def<std::string> minus;
    lex::token_def<std::string> mult;
    lex::token_def<std::string> div;
    lex::token_def<std::string> bang;
    lex::token_def<std::string> equalTo;
    lex::token_def<std::string> greaterEq;
    lex::token_def<std::string> lessEq;
    lex::token_def<std::string> notEq;
    lex::token_def<std::string> less;
    lex::token_def<std::string> greater;
    lex::token_def<std::string> assign;
    lex::token_def<std::string> comma;
    lex::token_def<std::string> colon;
    lex::token_def<std::string> leftParen;
    lex::token_def<std::string> rightParen;
    lex::token_def<std::string> leftBracket;
    lex::token_def<std::string> rightBracket;
    lex::token_def<std::string> nonTerminal;
    lex::token_def<std::string> terminal;
};

还有解析器

namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{        
//    using boost::phoenix::ref;
//    using boost::phoenix::size;

    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start),
        connect(0)
    {
        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= exp >> qi::token(k_equalTo) >> exp
              |
              exp >> qi::token(k_notEq) >> exp
              |
              exp >> qi::token(k_less) >> exp
              |
              exp >> qi::token(k_lessEq) >> exp
              |
              exp >> qi::token(k_greater) >> exp
              |
              exp >> qi::token(k_greaterEq) >> exp
              |
              exp >> qi::token(k_andTok) >> exp
              |
              exp >> qi::token(k_orTok) >> exp
              |
              qi::token(k_notTok) >> exp 
              |
              exp >> qi::token(k_plues) >> exp
              |
              exp >> qi::token(k_minus) >> exp
              |
              exp >> qi::token(k_mult) >> exp
              |
              qi::token(k_minus) >> exp
              |
              qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal)
              |
              qi::token(k_terminal)
              |
              qi::token(k_trueTok)
              |
              qi::token(k_falseTok);

        // parameter list
        paramList %= paramList >> qi::token(k_comma) >> exp
                    |
                    exp;

        // return statements
        returnStatement %= returnStatement >> exp
                         |
                         returnStatement;

        // function call statements
        callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                        |
                        qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                              |
                              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                                  >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
                  |
                  qi::token(k_terminal);

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                       |
                       qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= endList >> end
                  |
                  end;

        // end possibilities of program in global space
        end %= callStatement
              |
              printStatement
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                  >> qi::token(k_rightBracket)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                  >> qi::token(k_assign) >> exp;

        // function parameters
        paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);

        // define a statement as assignment print input condition while or call
        statement %= assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statements >> statement
                     |
                     statement;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                           >> statements >> qi::token(k_fed)
                       |
                       qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
                       | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }

    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

还有主要部分

int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

LexerTokens< lexer_type > lexer;
InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

// read the file
if (argc != 2)
{
    std::cout << "File required" << std::endl;
    return 1;
}

std::ifstream t(argv[1]); 

t.seekg(0, std::ios::end);   
sourceCode.reserve(t.tellg());
t.seekg(0, std::ios::beg);

sourceCode.assign(std::istreambuf_iterator<char>(t), 
                  std::istreambuf_iterator<char>());

char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}

使用该语言的示例如下:

And an example of something that would be in the language is:

def add(x,y) :                                                                                                                              
  if (x <= 0) : return y fi
   return 1 + add(x-1,y) 
fed
y = add(5,4)
print y

我在调用语法时遇到的错误是解析器段错误.

The error I run into is the parser segfaults when invoking the grammar.

我看到了,如果我

  • 注释掉部分相关规则(funtionList),直到该部分为止 语法需要调用其他规则(例如paramList)的地方
  • 并删除发送到词法分析器/解析器中的部分源代码,以 仅包含令牌部分
  • comment out parts of the relevant rules (funtionList) up to the portion where the grammar needs to invoke another rule (like paramList)
  • and remove portions of the source code being sent into the lexer/parser to only include the token parts,

语法不会进行段错误和正确地解析表达式.

the grammar will not segfault and parse the expression correctly.

当我在调试器中运行代码时,我在 代码段错误,将打印一个大表达式,所有成员都具有一个 字符串说,

When I run the code in the debugger I see that on the line where the code segfaults, a large expression is printed with all of the members having a string saying,

错误读取变量:无法访问地址0x7fffff7fefe0处的内存

error reading variable: Cannot access memory at address 0x7fffff7fefe0

我检查了其他类似的帖子,这些错误在本质上是segfaults, 但是,

I checked other similar posts where the errors were segfaults in spirit, however,

  • 我不认为此错误是由于语法被递归保留的,或者 自从我写了LL解析器以来,规则中就存在临时语法 以前在lex和yacc中成功地可以解析该语句,而我 相信所有规则都将在程序的整个运行过程中存在.
  • I don't believe this error to be due to the grammar being left recursive, or there being temporary grammars in the rules, since the LL parser I wrote previously in lex and yacc successfully could parse the statement and I believe all of the rules will exist for the entire run of the program.

任何指向正确方向的点,否则将对当前代码进行评论 非常感谢.

Any points in the correct direction, or critique of the current code would be much appreciated.

推荐答案

如果您使用AddressSanitizer,它将告诉您:

If you use AddressSanitizer, it will tell you:

<start>...
  <try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
    #0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5)...
    #1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
    #2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
    #8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
    #9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
    #10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
    #11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
    #12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
    #13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
    #14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
    #15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
    #16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
    #17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
    #18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
    #19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
    #20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
    #21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
    [ snip repeated frames ]
    #250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...


SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5) in __a...
==8985==ABORTING...

因此,这显然是左递归,导致堆栈溢出.

So, this is clearly left-recursion leading to stack overflow.

其他解析器生成器可以应付的事实意义不大:Spirit是PEG解析器生成器,因此无法进行左递归.

The fact that other parser generators cope with it means very little: Spirit is a PEG parser generator, and left-recursion is impossible.

您需要重写类似的内容

    exp %= exp >> qi::token(k_equalTo) >> exp

加入一些使lhs更具体的内容.

Into something that makes the lhs more specific.

注意:我不得不解决您呈现代码的方式中的一些随机问题.这就是我以前用来复制的内容:

Note: I had to fix some random issues with the way you presented your code. This is what I used to repro:

在Coliru上直播

#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_nonTerminal = 34,
        k_terminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           nonTerminal("[a-z][a-zA-Z0-9]*"),
           terminal("[0-9]")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (nonTerminal, k_nonTerminal)
                (terminal, k_terminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> nonTerminal;
        lex::token_def<std::string> terminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {        
    //    using boost::phoenix::ref;
    //    using boost::phoenix::size;

        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& )
            : InterpreterGrammar::base_type(start)
              //, connect(0)
        {
            start 
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp = exp >> qi::token(k_equalTo) >> exp
                | exp >> qi::token(k_notEq) >> exp
                | exp >> qi::token(k_less) >> exp
                | exp >> qi::token(k_lessEq) >> exp
                | exp >> qi::token(k_greater) >> exp
                | exp >> qi::token(k_greaterEq) >> exp
                | exp >> qi::token(k_andTok) >> exp
                | exp >> qi::token(k_orTok) >> exp
                | qi::token(k_notTok) >> exp 
                | exp >> qi::token(k_plues) >> exp
                | exp >> qi::token(k_minus) >> exp
                | exp >> qi::token(k_mult) >> exp
                | qi::token(k_minus) >> exp
                | qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal)
                | qi::token(k_terminal)
                | qi::token(k_trueTok)
                | qi::token(k_falseTok)
                ;

            // parameter list
            paramList 
                = paramList >> qi::token(k_comma) >> exp
                | exp
                ;

            // return statements
            returnStatement 
                = returnStatement >> exp
                | returnStatement
                ;

            // function call statements
            callStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
                ;

            // variable assignment
            assignmentStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                    >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
                ;

            // list of integers
            intList 
                = intList >> qi::token(k_comma) >> qi::token(k_terminal)
                | qi::token(k_terminal)
                ;

            // print out a variable
            printStatement 
                = qi::token(k_print) >> exp
                ;

            // take input
            inputStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_input)
                ;

            // conditional statement
            conditionStatement 
                = qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse 
                = qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                | qi::eps
                ;

            // while loop
            whileStatement 
                = qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
                ;

            // actual program statements
            endList 
                = endList >> end
                | end
                ;

            // end possibilities of program in global space
            end = callStatement
                | printStatement
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                    >> qi::token(k_rightBracket)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                    >> qi::token(k_assign) >> exp
                ;

            // function parameters
            paramList 
                = paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                ;

            // define a statement as assignment print input condition while or call
            statement 
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements 
                = statements >> statement
                | statement
                ;

            // functions
            functionList 
                = qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                               >> statements >> qi::token(k_fed)
                | qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        }

        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

    interpreter::LexerTokens< lexer_type > lexer;
    interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]); 
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
    bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

    std::cout << "Remaining " << std::string(first,last) << std::endl;
    std::cout << "R is " << r << std::endl;
}

这篇关于在解析器中提升Spirit Segfault的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆