升压精神,统一法多个令牌入法单个标记由ID分化 [英] boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

查看:119
本文介绍了升压精神,统一法多个令牌入法单个标记由ID分化的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

修改:我已经撕去了词法分析器,因为它不干净地整合齐,只是混淆语法(请参阅下面的回答)


我的词法分析如下所示:

 模板< typename的词法>
结构标记:法::词法<&词法GT;
{
令牌()
    :left_curly(\\{\\),
    right_curly(\\} \\),
    left_paren(\\(\\),
    right_paren(\\)\\),
    结肠(:),
    scolon(;),
    命名空间_((I:命名空间))
    事件((ⅰ:事件)。),
    可选的((I:可选))
    要求(?(我:必须))
    重复((ⅰ:重复)。),
    t_int_4((I:INT4))
    t_int_8((ⅰ:INT8)),
    T_STRING((I:字符串))
    序(\\\\ D +),
    标识符(\\\\ W +){
    使用boost ::精神:: ::法_ VAL;    这 - >自
        =
        left_curly
        | right_curly
        | left_paren
        | right_paren
        |结肠
        | scolon
        | namespace_
        |事件
        |可选的
        |需要
        |重复
        | t_int_4
        | t_int_8
        | T_STRING
        |序数词
        |识别码
        |法:: token_def<>([\\\\ \\\\牛逼N] +)[法:: _通= ::法:: pass_flags pass_ignore]。
}
法:: token_def<法::省略> left_curly,right_curly,结肠,scolon,反复,left_paren,right_paren;
法:: token_def<法::省略> namespace_,事件,可选,必需t_int_4,t_int_8,T_STRING;
法:: token_def<提高:: uint32_t的>序;
法:: token_def<标准::字符串>标识符;

};

我想 t_int_4 t_int_8 T_STRING 再由整型归咎于单一的令牌类型psented $ p $。此刻我齐的文法有做解除本,然后设置令牌补气::规则的语义动作:

  atomic_type = tok.t_int_4 [_val = RBL_INT4]
                | tok.t_int_8 [_val = RBL_INT8]
                | tok.t_string [_val = RBL_STRING];


解决方案

从你与整合问题语法,在过去的几天里。看来你已经确定了多个集成问题。在这一点上,你应该问自己,为什么你甚至想词法分析器集成到一个PEG语法。 PEG语法可以整齐地捕捉原位标记化,所以你真的不从词法分析器引入了更大的收益特别是考虑到在这里引入一个词法分析器表明你不仅你需要黑客做lex->气情况是什么气整齐在EX pressing你的语法方面,而且黑客获取错误处理和注释工作正常。因此,我建议删除Lex和坚持益气。

下面是除去词法分析器你的语法。 AST是在它自己的文件。

 的#includeast.hpp
#定义BOOST_SPIRIT_USE_PHOENIX_V3
#包括LT&;升压/精神/有/ qi.hpp>
#包括LT&;升压/精神/有/ phoenix_core.hpp>
#包括LT&;升压/范围/ iterator_range.hpp>
#包括LT&;矢量>命名空间补气=的boost ::精神::补气;
命名空间ASCII =的boost ::精神:: ASCII;
命名空间PX =提振::凤;模板< typename的迭代器>
结构队长:补气::语法<&迭代器GT;
{
    船长():船长:: base_type(开始)
    {
        使用boost ::精神:: ASCII :: char_;        开始= ASCII ::空间|齐亮::(//)>> *(ASCII :: char_ - 齐:: EOL)GT;>补气:: EOL;
    }    齐::规则<&迭代器GT;开始;
};结构error_handler_
{
    无效的typedef result_type的;
    模板< typename的第一,typename的最后,类型名称ErrorPos,TYPENAME什么>
    void运算符()(F头,尾升,ErrorPos E,什么W)常量
    {
        性病::法院LT&;< 预期:<< W<<的std :: ENDL;
        性病::法院LT&;<标准::字符串(F,L)LT;<的std :: ENDL;
        INT I =的std ::距离(F,E);
        性病::法院LT&;<标准::字符串第(i + 1,'')所述;&下; ^ ----这里<<的std :: ENDL;
    }
};PX ::功能< error_handler_> error_handler;模板< typename的迭代器>
结构annotation_state
{
  TYPEDEF提振:: iterator_range的<&迭代器GT; annotation_iterator;
  的typedef的std ::矢量<&annotation_iterator GT; annotation_iterators;  annotation_iterators注释;
};模板< typename的迭代器>
结构annotate_
{
    无效的typedef result_type的;    annotation_state<&迭代器GT; &安培;如;
    annotate_(annotation_state<&迭代器GT;&安培; AS):AS(AS){}    模板< TYPENAME缬氨酸,类型名称首先,typename的最后>
    void运算符()(VAL V,首架F,最后升)常量
    {
      v.id = as.annotations.size();
      as.annotations.push_back(升压:: make_iterator_range(F,L));
      性病::法院LT&;<标准::字符串(F,L)LT;<的std :: ENDL;
    }
};模板< typename的迭代器,类型名船长>
语法结构:补气::语法<迭代器,namespace_descriptor(),船长>
{
    语法(annotation_state<&迭代器GT;&安培;如)
        :语法:: base_type(namespace_descriptor_)
          annotation_state_(如),
          注释(为)    {
        使用命名空间补气;        atomic_type.add
            (INT4,RBL_INT4)
            (int8的RBL_INT8)
            (字符串,RBL_STRING);        event_entry_qualifier.add
            (可选,ENTRY_OPTIONAL)
            (必需的,ENTRY_REQUIRED)
            (重复,ENTRY_REPEATED);        oid_ =序> ':'>标识符;
        序= uint_parser<提高:: uint32_t的>();
        标识符= +(字符_(A,Z)| _ CHAR(A,Z)|字符_(_));
        type_descriptor_ = atomic_type_ | compound_type_;
        atomic_type_ = NO_CASE [atomic_type> ATTR();        compound_type_ =
            NO_CASE [亮起(事件)]
            > ATTR(RBL_EVENT)
            > (
            >识别码
            > ')';        event_entry_ =
            NO_CASE [event_entry_qualifier]
            > oid_
            > type_descriptor_
            > ';';        event_descriptor_ =
            NO_CASE [亮起(事件)]
            > oid_
            > {
            > *(event_entry_)
            > };        namespace_descriptor_ =
            NO_CASE [亮起(命名空间)]
            >识别码
            > {
            > *(event_descriptor_)
            > };        identifier.name(标记);
        oid_.name(序标识符对);
        ordinal.name(序号);        ON_ERROR<&失败GT;(namespace_descriptor_,:: error_handler(_1,_2,_3,_4));
        on_success(oid_,注释(_val,_1,_3));
        on_success(type_descriptor_,注释(_val,_1,_3));
        on_success(event_entry_,注释(_val,_1,_3));
        on_success(event_descriptor_,注释(_val,_1,_3));
    }    annotation_state<&迭代器GT; &安培; annotation_state_;
    PX ::功能< annotate_<&迭代器GT; >注释;    齐::规则<迭代器,OID()> oid_;
    齐::规则<迭代器,提振:: uint32_t的()>序;
    齐::规则<迭代器,标准::字符串()>标识符;
    齐::规则<迭代器,type_descriptor()> type_descriptor_;
    齐::规则<迭代器,type_descriptor()> atomic_type_;
    齐::规则<迭代器,type_descriptor()> compound_type_;    齐::规则<迭代器,event_entry(),船长> event_entry_;
    齐::规则<迭代器,event_descriptor(),船长> event_descriptor_;
    齐::规则<迭代器,namespace_descriptor(),船长> namespace_descriptor_;    齐::符号<焦炭,INT> atomic_type;
    齐::符号<焦炭,INT> event_entry_qualifier;
};诠释的main()
{
    标准::字符串测试=命名空间NS {事件1:sihan {可选1:哈桑事件(哈桑);}};
    的typedef的std ::字符串:迭代它;    它乞求= test.begin();
    它结束= test.end();    annotation_state<它>如;
    船长<它>跳跃;
    语法<它,船长<它> >加油站);
    BOOL R =气:: phrase_parse(BEG,最后,克,略过);
    如果(r)的
        ;
    其他
    {
        性病::法院LT&;< 解析失败<<的std :: ENDL;
    }
}

edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see answer below).


My lexer looks as follows :

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
    : left_curly("\"{\""),
    right_curly("\"}\""),
    left_paren("\"(\""),
    right_paren("\")\""),
    colon(":"),
    scolon(";"),
    namespace_("(?i:namespace)"),
    event("(?i:event)"),
    optional("(?i:optional)"),
    required("(?i:required)"),
    repeated("(?i:repeated)"),
    t_int_4("(?i:int4)"),
    t_int_8("(?i:int8)"),
    t_string("(?i:string)"),
    ordinal("\\d+"),
    identifier("\\w+")

{
    using boost::spirit::lex::_val;

    this->self
        = 
        left_curly    
        | right_curly 
        | left_paren
        | right_paren
        | colon         
        | scolon
        | namespace_      
        | event             
        | optional           
        | required          
        | repeated
        | t_int_4
        | t_int_8
        | t_string
        | ordinal             
        | identifier         
        | lex::token_def<>("[ \\t\\n]+")   [lex::_pass = lex::pass_flags::pass_ignore];
}


lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<std::string> identifier;

};

I want t_int_4,t_int_8, and t_string to represented by a single token type attributed by an integral type. At the moment my QI grammar has to do the lifting for this and then set the token in a qi::rule semantic action :

 atomic_type = tok.t_int_4     [ _val = RBL_INT4]
                | tok.t_int_8             [ _val = RBL_INT8]
                | tok.t_string            [ _val = RBL_STRING];

解决方案

From your questions relating to integrating lex into qi grammar, from the last few days. It seems you've identified multiple integration issues. At this point you should ask yourself why you are even trying to integrate a lexer into a PEG grammar. PEG grammars can neatly capture tokenization in situ, and so you don't really gain much from introducing lexer especially considering the lex->qi case where introducing a lexer has shown you that not only do you need hacks to do what is neat in qi in terms of expressing your grammar but also hacks for getting error handling and annotation working properly. Therefore I suggest removing Lex and sticking to Qi.

Here is your grammar with the lexer removed. The ast is in a file of it's own.

#include "ast.hpp"
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/range/iterator_range.hpp>
#include <vector>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;

template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        using boost::spirit::ascii::char_;

        start = ascii::space | qi::lit("//") >> *(ascii::char_ - qi::eol) >> qi::eol;
    }

    qi::rule<Iterator> start;
};

struct error_handler_
{
    typedef void result_type;
    template<typename First, typename Last, typename ErrorPos, typename What>
    void operator()(First f, Last l, ErrorPos e, What w) const
    {
        std::cout << "Expected : " << w << std::endl;
        std::cout << std::string(f,l) << std::endl;
        int i = std::distance(f,e);
        std::cout << std::string(i+1,' ') <<  "^---- here"  << std::endl;
    }
};

px::function<error_handler_> error_handler;

template<typename Iterator>
struct annotation_state
{
  typedef boost::iterator_range<Iterator> annotation_iterator;
  typedef std::vector<annotation_iterator> annotation_iterators;

  annotation_iterators annotations;
};

template<typename Iterator>
struct annotate_
{
    typedef void result_type;

    annotation_state<Iterator> & as;
    annotate_(annotation_state<Iterator> & as) : as(as) {}

    template<typename Val, typename First, typename Last>
    void operator()(Val v, First f, Last l) const
    {
      v.id = as.annotations.size();
      as.annotations.push_back(boost::make_iterator_range(f,l));
      std::cout << std::string(f,l) << std::endl;
    }
};



template <typename Iterator, typename Skipper>
struct grammar : qi::grammar<Iterator,namespace_descriptor(),Skipper>
{
    grammar(annotation_state<Iterator> & as) 
        : grammar::base_type(namespace_descriptor_),
          annotation_state_(as),
          annotate(as)

    {
        using namespace qi;

        atomic_type.add
            ("int4", RBL_INT4)
            ("int8", RBL_INT8)
            ("string", RBL_STRING);

        event_entry_qualifier.add
            ("optional", ENTRY_OPTIONAL)
            ("required", ENTRY_REQUIRED)
            ("repeated", ENTRY_REPEATED);

        oid_ = ordinal  > ':' > identifier;
        ordinal = uint_parser<boost::uint32_t>();
        identifier = +(char_("a","z") | char_("A","Z") | char_('_'));
        type_descriptor_ = atomic_type_ | compound_type_;
        atomic_type_ = no_case[atomic_type] > attr("");

        compound_type_ = 
            no_case[lit("event")] 
            > attr(RBL_EVENT) 
            > '(' 
            > identifier  
            > ')';

        event_entry_ = 
            no_case[event_entry_qualifier] 
            > oid_ 
            > type_descriptor_ 
            > ';';

        event_descriptor_ = 
            no_case[lit("event")] 
            > oid_ 
            > '{' 
            > *(event_entry_) 
            > '}'; 

        namespace_descriptor_ = 
            no_case[lit("namespace")] 
            > identifier 
            > '{' 
            > * (event_descriptor_) 
            > '}'; 

        identifier.name("identifier");
        oid_.name("ordinal-identifier pair");
        ordinal.name("ordinal");

        on_error<fail>(namespace_descriptor_, ::error_handler(_1,_2,_3,_4));
        on_success(oid_, annotate(_val,_1,_3));
        on_success(type_descriptor_, annotate(_val,_1,_3));
        on_success(event_entry_, annotate(_val,_1,_3));
        on_success(event_descriptor_, annotate(_val,_1,_3));
    }

    annotation_state<Iterator> & annotation_state_;
    px::function<annotate_<Iterator> > annotate;

    qi::rule< Iterator, oid()> oid_;
    qi::rule< Iterator, boost::uint32_t()> ordinal;
    qi::rule< Iterator, std::string()> identifier;
    qi::rule< Iterator, type_descriptor()> type_descriptor_;
    qi::rule< Iterator, type_descriptor()> atomic_type_;
    qi::rule< Iterator, type_descriptor()> compound_type_; 

    qi::rule< Iterator, event_entry(), Skipper> event_entry_;
    qi::rule< Iterator, event_descriptor(), Skipper> event_descriptor_;
    qi::rule< Iterator, namespace_descriptor(), Skipper> namespace_descriptor_;

    qi::symbols<char, int> atomic_type;
    qi::symbols<char, int> event_entry_qualifier;
};

int main()
{
    std::string test = "namespace ns { event 1:sihan { OpTIONAL 1:hassan event(haSsan);} }";
    typedef std::string::iterator it;

    it beg = test.begin();
    it end = test.end();

    annotation_state<it> as;
    skipper<it> skip;
    grammar<it, skipper<it> > g(as);


    bool r = qi::phrase_parse(beg,end,g,skip);
    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}

这篇关于升压精神,统一法多个令牌入法单个标记由ID分化的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆