在Boost.Spirit中的Lex和Qi中在语法规则中使用Lexer标记属性 [英] Using lexer token attributes in grammar rules with Lex and Qi from Boost.Spirit

查看:109
本文介绍了在Boost.Spirit中的Lex和Qi中在语法规则中使用Lexer标记属性的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

让我们考虑以下代码:

#include <boost/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;

struct operation
{
    enum type
    {
        add,
        sub,
        mul,
        div
    };
};

template<typename Lexer>
class expression_lexer
    : public lex::lexer<Lexer>
{
public:
    typedef lex::token_def<operation::type> operator_token_type;
    typedef lex::token_def<double> value_token_type;
    typedef lex::token_def<std::string> variable_token_type;
    typedef lex::token_def<lex::omit> parenthesis_token_type;
    typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
    typedef lex::token_def<lex::omit> whitespace_token_type;

    expression_lexer()
        : operator_add('+'),
          operator_sub('-'),
          operator_mul("[x*]"),
          operator_div("[:/]"),
          value("\\d+(\\.\\d+)?"),
          variable("%(\\w+)"),
          parenthesis({
            std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
            std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
          }),
          whitespace("[ \\t]+")
    {
        this->self
            += operator_add [lex::_val = operation::add]
            | operator_sub [lex::_val = operation::sub]
            | operator_mul [lex::_val = operation::mul]
            | operator_div [lex::_val = operation::div]
            | value
            | variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]
            | whitespace [lex::_pass = lex::pass_flags::pass_ignore]
            ;

        std::for_each(parenthesis.cbegin(), parenthesis.cend(),
            [&](parenthesis_token_pair_type const& token_pair)
            {
                this->self += token_pair.first | token_pair.second;
            }
        );
    }

    operator_token_type operator_add;
    operator_token_type operator_sub;
    operator_token_type operator_mul;
    operator_token_type operator_div;

    value_token_type value;
    variable_token_type variable;

    std::vector<parenthesis_token_pair_type> parenthesis;

    whitespace_token_type whitespace;
};

template<typename Iterator>
class expression_grammar
    : public qi::grammar<Iterator>
{
public:
    template<typename Tokens>
    explicit expression_grammar(Tokens const& tokens)
        : expression_grammar::base_type(start)
    {
        start                     %= expression >> qi::eoi;

        expression                %= sum_operand >> -(sum_operator >> expression);
        sum_operator              %= tokens.operator_add | tokens.operator_sub;
        sum_operand               %= fac_operand >> -(fac_operator >> sum_operand);
        fac_operator              %= tokens.operator_mul | tokens.operator_div;

        if(!tokens.parenthesis.empty())
            fac_operand           %= parenthesised | terminal;
        else
            fac_operand           %= terminal;

        terminal                  %= tokens.value | tokens.variable;

        if(!tokens.parenthesis.empty())
        {
            parenthesised         %= tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
            std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
                [&](typename Tokens::parenthesis_token_pair_type const& token_pair)
                {
                    parenthesised %= parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
                }
            );
        }
    }

private:
    qi::rule<Iterator> start;
    qi::rule<Iterator> expression;
    qi::rule<Iterator> sum_operand;
    qi::rule<Iterator> sum_operator;
    qi::rule<Iterator> fac_operand;
    qi::rule<Iterator> fac_operator;
    qi::rule<Iterator> terminal;
    qi::rule<Iterator> parenthesised;
};


int main()
{
    typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<operation::type, double, std::string>> token_type;
    typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
    typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
    typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;

    expression_lexer_type lexer;
    expression_grammar_type grammar(lexer);

    while(std::cin)
    {
        std::string line;
        std::getline(std::cin, line);

        std::string::const_iterator first = line.begin();
        std::string::const_iterator const last = line.end();

        bool const result = lex::tokenize_and_parse(first, last, lexer, grammar);
        if(!result)
            std::cout << "Parsing failed! Reminder: >" << std::string(first, last) << "<" << std::endl;
        else
        {
            if(first != last)
                std::cout << "Parsing succeeded! Reminder: >" << std::string(first, last) << "<" << std::endl;
            else
                std::cout << "Parsing succeeded!" << std::endl;
        }
    }
}

它是具有值和变量的算术表达式的简单解析器.它是使用expression_lexer提取令牌,然后使用expression_grammar解析令牌构建的.

It is a simple parser for arithmetic expressions with values and variables. It is build using expression_lexer for extracting tokens, and then with expression_grammar to parse the tokens.

在这么小的情况下使用lexer似乎是一个过大的杀伤力,可能是一个.但这就是简化示例的代价.还要注意,使用lexer可以轻松地使用正则表达式定义令牌,而可以通过外部代码(尤其是用户提供的配置)轻松地定义令牌.使用提供的示例,从外部配置文件中读取令牌的定义根本就没有问题,例如,允许用户将变量从%name更改为$name.

Use of lexer for such a small case might seem an overkill and probably is one. But that is the cost of simplified example. Also note that use of lexer allows to easily define tokens with regular expression while that allows to easily define them by external code (and user provided configuration in particular). With the example provided it would be no issue at all to read definition of tokens from an external config file and for example allow user to change variables from %name to $name.

代码似乎运行正常(在Visual Studio 2013中使用Boost 1.61进行了检查).

The code seems to be working fine (checked on Visual Studio 2013 with Boost 1.61).

expression_lexer具有附加到令牌的属性.我猜他们从编译起就可以工作了.但是我真的不知道如何检查.

The expression_lexer has attributes attached to tokens. I guess they work since they compile. But I don't really know how to check.

最终,我希望文法为我建立一个std::vector且其表达式的波兰语反转. (其中每个元素都是operator::typedoublestd::string上的boost::variant.)

Ultimately I would like the grammar to build me an std::vector with reversed polish notation of the expression. (Where every element would be a boost::variant over either operator::type or double or std::string.)

但是,问题是我未能在expression_grammar中使用令牌属性.例如,如果您尝试通过以下方式更改sum_operator:

The problem is however that I failed to use token attributes in my expression_grammar. For example if you try to change sum_operator following way:

qi::rule<Iterator, operation::type ()> sum_operator;

您将得到编译错误.我希望它能起作用,因为operation::typeoperator_addoperator_sub的属性,因此也是它们的替代属性.而且仍然无法编译.从assign_to_attribute_from_iterators中的错误来看,解析器似乎试图直接从输入流范围构建属性值.这意味着它将忽略我在词法分析器中指定的[lex::_val = operation::add].

you will get compilation error. I expected this to work since operation::type is the attribute for both operator_add and operator_sub and so also for their alternative. And still it doesn't compile. Judging from the error in assign_to_attribute_from_iterators it seems that parser tries to build the attribute value directly from input stream range. Which means it ignores the [lex::_val = operation::add] I specified in my lexer.

更改为

qi::rule<Iterator, operation::type (operation::type)> sum_operator;

也没有帮助.

我也尝试将定义更改为

sum_operator %= (tokens.operator_add | tokens.operator_sub) [qi::_val = qi::_1];

也没有帮助.

如何解决?我知道我可以使用Qi的symbols.但是我想拥有词法分析器,以使其易于为令牌配置正则表达式.我也可以按照文档中的说明扩展assign_to_attribute_from_iterators,但这会使工作加倍.我想我也可以跳过lexer上的属性,而仅将它们放在语法上.但这又不能很好地适应variable令牌的灵活性(在我的实际情况下,那里的逻辑略多,因此可以配置令牌的哪一部分形成变量的实际名称-在这里它固定为只需跳过第一个字符).还有吗?

How to work around that? I know I could use symbols from Qi. But I want to have the lexer to make it easy to configure regexes for the tokens. I could also extend the assign_to_attribute_from_iterators as described in the documentation but this kind of double the work. I guess I could also skip the attributes on lexer and just have them on grammar. But this again doesn't work well with flexibility on variable token (in my actual case there is slightly more logic there so that it is configurable also which part of the token forms actual name of the variable - while here it is fixed to just skip the first character). Anything else?

还有一个附带的问题-也许有人知道.有没有一种方法可以从tokens操作中捕获令牌的正则表达式组?这样就不用

Also a side question - maybe anyone knows. Is there a way to get to capture groups of the regular expression of the token from tokens action? So that instead of having

variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]

相反,我将能够从捕获组中创建一个字符串,从而轻松处理$var$之类的格式.

instead I would be able to make a string from the capture group and so easily handle formats like $var$.

已编辑!在使用Boost时,我改进了的结论.精神气和Lex .这是一种简化操作,不会影响此处提出的问题.

Edited! I have improved whitespace skipping along conclusions from Whitespace skipper when using Boost.Spirit Qi and Lex. It is a simplification that does not affect questions asked here.

推荐答案

好,这是我对RPN要求"的看法.我极力主张自然(自动)属性传播胜过语义动作(请参阅 Boost Spirit:语义行为是邪恶的" ?? )

Ok, here's my take on the RPN 'requirement'. I heavily favor natural (automatic) attribute propagation over semantic actions (see Boost Spirit: "Semantic actions are evil"?)

我考虑了其他选项(令人讨厌的)优化.如果您对整体设计感到满意并且不介意使其难以维护:)

I consider the other options (uglifying) optimizations. You might do them if you're happy with the overall design and don't mind making it harder to maintain :)

在Coliru上直播

除了您已经研究过的评论样本之外,我还添加了RPN转换步骤:

Beyond the sample from my comment that you've already studied, I added that RPN transformation step:

namespace RPN {
    using cell      = boost::variant<AST::operation, AST::value, AST::variable>;
    using rpn_stack = std::vector<cell>;

    struct transform : boost::static_visitor<> {
        void operator()(rpn_stack& stack, AST::expression const& e) const {
            boost::apply_visitor(boost::bind(*this, boost::ref(stack), ::_1), e);
        }
        void operator()(rpn_stack& stack, AST::bin_expr const& e) const {
            (*this)(stack, e.lhs);
            (*this)(stack, e.rhs);
            stack.push_back(e.op);
        }
        void operator()(rpn_stack& stack, AST::value    const& v) const { stack.push_back(v); }
        void operator()(rpn_stack& stack, AST::variable const& v) const { stack.push_back(v); }
    };
}

仅此而已!像这样使用它,例如:

That's all! Use it like so, e.g.:

RPN::transform compiler;
RPN::rpn_stack program;
compiler(program, expr);

for (auto& instr : program) {
    std::cout << instr << " ";
}

哪个输出:

Parsing success: (3 + (8 * 9))
3 8 9 * + 

完整列表

在Coliru上直播

//#define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>

namespace lex     = boost::spirit::lex;
namespace qi      = boost::spirit::qi;
namespace phoenix = boost::phoenix;

struct operation
{
    enum type
    {
        add,
        sub,
        mul,
        div
    };

    friend std::ostream& operator<<(std::ostream& os, type op) {
        switch (op) {
            case type::add: return os << "+";
            case type::sub: return os << "-";
            case type::mul: return os << "*";
            case type::div: return os << "/";
        }
        return os << "<" << static_cast<int>(op) << ">";
    }
};

template<typename Lexer>
class expression_lexer
    : public lex::lexer<Lexer>
{
public:
    //typedef lex::token_def<operation::type> operator_token_type;
    typedef lex::token_def<lex::omit> operator_token_type;
    typedef lex::token_def<double> value_token_type;
    typedef lex::token_def<std::string> variable_token_type;

    typedef lex::token_def<lex::omit> parenthesis_token_type;
    typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
    typedef lex::token_def<lex::omit> whitespace_token_type;

    expression_lexer()
        : operator_add('+'),
          operator_sub('-'),
          operator_mul("[x*]"),
          operator_div("[:/]"),
          value("\\d+(\\.\\d+)?"),
          variable("%(\\w+)"),
          parenthesis({
            std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
            std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
          }),
          whitespace("[ \\t]+")
    {
        this->self
            += operator_add [lex::_val = operation::add]
             | operator_sub [lex::_val = operation::sub]
             | operator_mul [lex::_val = operation::mul]
             | operator_div [lex::_val = operation::div]
             | value
             | variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]
             | whitespace [lex::_pass = lex::pass_flags::pass_ignore]
             ;

        std::for_each(parenthesis.cbegin(), parenthesis.cend(),
            [&](parenthesis_token_pair_type const& token_pair)
            {
                this->self += token_pair.first | token_pair.second;
            }
        );
    }

    operator_token_type operator_add;
    operator_token_type operator_sub;
    operator_token_type operator_mul;
    operator_token_type operator_div;

    value_token_type value;
    variable_token_type variable;

    std::vector<parenthesis_token_pair_type> parenthesis;

    whitespace_token_type whitespace;
};

namespace AST {
    using operation = operation::type;

    using value     = double;
    using variable  = std::string;

    struct bin_expr;
    using expression = boost::variant<value, variable, boost::recursive_wrapper<bin_expr> >;

    struct bin_expr {
        expression lhs, rhs;
        operation op;

        friend std::ostream& operator<<(std::ostream& os, bin_expr const& be) {
            return os << "(" << be.lhs << " " << be.op << " " << be.rhs << ")";
        }
    };
}

BOOST_FUSION_ADAPT_STRUCT(AST::bin_expr, lhs, op, rhs)

template<typename Iterator>
class expression_grammar : public qi::grammar<Iterator, AST::expression()>
{
public:
    template<typename Tokens>
    explicit expression_grammar(Tokens const& tokens)
        : expression_grammar::base_type(start)
    {
        start                     = expression >> qi::eoi;

        bin_sum_expr              = sum_operand >> sum_operator >> expression;
        bin_fac_expr              = fac_operand >> fac_operator >> sum_operand;

        expression                = bin_sum_expr | sum_operand;
        sum_operand               = bin_fac_expr | fac_operand;

        sum_operator              = tokens.operator_add >> qi::attr(AST::operation::add) | tokens.operator_sub >> qi::attr(AST::operation::sub);
        fac_operator              = tokens.operator_mul >> qi::attr(AST::operation::mul) | tokens.operator_div >> qi::attr(AST::operation::div);

        if(tokens.parenthesis.empty()) {
            fac_operand           = terminal;
        }
        else {
            fac_operand           = parenthesised | terminal;

            parenthesised         = tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
            std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
                    [&](typename Tokens::parenthesis_token_pair_type const& token_pair)
                    {
                        parenthesised = parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
                    });
        }

        terminal                  = tokens.value | tokens.variable;

        BOOST_SPIRIT_DEBUG_NODES(
                (start) (expression) (bin_sum_expr) (bin_fac_expr)
                (fac_operand) (terminal) (parenthesised) (sum_operand)
                (sum_operator) (fac_operator)
            );
    }

private:
    qi::rule<Iterator, AST::expression()> start;
    qi::rule<Iterator, AST::expression()> expression;
    qi::rule<Iterator, AST::expression()> sum_operand;
    qi::rule<Iterator, AST::expression()> fac_operand;
    qi::rule<Iterator, AST::expression()> terminal;
    qi::rule<Iterator, AST::expression()> parenthesised;

    qi::rule<Iterator, int()> sum_operator;
    qi::rule<Iterator, int()> fac_operator;

    // extra rules to help with AST creation
    qi::rule<Iterator, AST::bin_expr()> bin_sum_expr;
    qi::rule<Iterator, AST::bin_expr()> bin_fac_expr;
};

namespace RPN {
    using cell      = boost::variant<AST::operation, AST::value, AST::variable>;
    using rpn_stack = std::vector<cell>;

    struct transform : boost::static_visitor<> {
        void operator()(rpn_stack& stack, AST::expression const& e) const {
            boost::apply_visitor(boost::bind(*this, boost::ref(stack), ::_1), e);
        }
        void operator()(rpn_stack& stack, AST::bin_expr const& e) const {
            (*this)(stack, e.lhs);
            (*this)(stack, e.rhs);
            stack.push_back(e.op);
        }
        void operator()(rpn_stack& stack, AST::value    const& v) const { stack.push_back(v); }
        void operator()(rpn_stack& stack, AST::variable const& v) const { stack.push_back(v); }
    };
}

int main()
{
    typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<operation::type, double, std::string>> token_type;
    typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
    typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
    typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;

    expression_lexer_type lexer;
    expression_grammar_type grammar(lexer);
    RPN::transform compiler;

    std::string line;
    while(std::getline(std::cin, line) && !line.empty())
    {
        std::string::const_iterator first = line.begin();
        std::string::const_iterator const last = line.end();

        AST::expression expr;
        bool const result = lex::tokenize_and_parse(first, last, lexer, grammar, expr);
        if(!result)
            std::cout << "Parsing failed!\n";
        else
        {
            std::cout << "Parsing success: " << expr << "\n";

            RPN::rpn_stack program;
            compiler(program, expr);

            for (auto& instr : program) {
                std::cout << instr << " ";
            }
        }

        if(first != last)
            std::cout << "Remainder: >" << std::string(first, last) << "<\n";
    }
}

这篇关于在Boost.Spirit中的Lex和Qi中在语法规则中使用Lexer标记属性的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆