使用boost :: spirit :: q来用分隔符解析数字 [英] Using boost::spirit::qi to parse numbers with separators

查看:144
本文介绍了使用boost :: spirit :: q来用分隔符解析数字的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图使用boost :: spirit :: qi做一些解析。它实际上是相当不错,我成功地设法解析基于后缀的各种基础的数字。示例:123,c12h,777o,110101b。



然后我想添加允许完全忽略的分隔符字符的功能,允许像123_456或1101_0011b这样的值来解析。我尝试使用跳过解析器,但我高度怀疑,我完全误解如何使用它。它编译很好,但我试图忽略下划线绝对没有什么。任何建议,如何使这做做我想要的,将不胜感激。我的测试代码如下:

  #include< boost / spirit / include / qi.hpp> 
#include< boost / spirit / include / phoenix.hpp>

命名空间qi = boost :: spirit :: qi;
namespace ascii = boost :: spirit :: ascii;
使用qi :: _ val;
使用qi :: _ 1;
using qi :: skip;
使用qi :: uint_parser;
using ascii :: char_;

template< typename Iterator>
struct unsigned_pa​​rser:qi :: grammar< Iterator,uint64_t()> {

unsigned_pa​​rser():unsigned_pa​​rser :: base_type(start){
uint_parser< uint64_t,10> dec_parser;
uint_parser< uint64_t,16> hex_parser;
uint_parser< uint64_t,8> oct_parser;
uint_parser< uint64_t,2> bin_parser;

start = skip(char _('_'))[
/ *带后缀的二进制文件* /
(bin_parser [_val = _1]> ))
/ *带后缀的八进制* /
| (oct_parser [_val = _1]>> char _(qQoO))
/ *带后缀的十六进制* /
| (hex_parser [_val = _1]>> char _(hHxX))
/ *带可选后缀的十进制* /
| (dec_parser [_val = _1]> -char _(dDtT))
];
}

qi :: rule< Iterator,uint64_t()>开始;
};

int main(int argv,const char * argc []){
typedef std :: string :: const_iterator iter;
unsigned_pa​​rser< iter>向上;
uint64_t val;
if(argv!= 2){
std :: cerr<< 用法:< argc [0]< < input> << std :: endl;
return 1;
}
std :: string test(argc [1]);
iter i = test.begin();
iter end = test.end();
bool rv = parse(i,end,up,val);
if(rv&& i == end){
std :: cout<< Succeeded:<< val<< std :: endl;
return 0;
}
if(rv){
std :: cout<< 失败的部分解析:< val<< std :: endl;
return 1;
}
std :: cout<< 失败。 << std :: endl;
return 1;
}


解决方案

除非您要扩展库并实现自己的解析器指令,否则没有人应该像Spirit解析器上下文那样麻烦实施细节。



直到时间 phoenix :: function<> phoenix :: bind 或甚至 BOOST_PHOENIX_ADAPT_FUNCTION 对任何人都应该是足够的。



这里有两个方法来解决你的问题,没有任何补丁的库。


  1. 简单解析 Live on Coliru



    这可以被看作是一种朴素的方式来解析不同样式的整数Qi和简单的语义操作:

      start = 
    eps [_val = 0]> +(char _(0-9a-fA-F)[_val = _val * 16 + _decode(_1)] |'_')>> char _(hHxX)/ *带后缀* /
    |的十六进制eps [_val = 0]> +(char _(0-7)[_val = _val * 8 + _decode(_1)] |'_')>> char _(qQoO)/ *带后缀的八进制* /
    | eps [_val = 0]> +(char _(01)[_val = _val * 2 + _decode(_1)] |'_')>> char _(bByY)/ *带后缀的二进制* /
    | eps [_val = 0]> +(char _(0-9)[_val = _val * 10 + _decode(_1)] |'_')> -char _(dDtT)/ *带可选后缀的十进制* /
    ;

    当然,你会想知道 _decode 看起来像。你自己定义:

      struct decode {
    template< typename> struct result {typedef int type; };
    template< typename Ch> int operator()(Ch ch)const {
    if(ch> ='0'&& ch <='9')return ch - '0';
    if(ch> ='a'&& ch< ='z')return ch - 'a'+ 10;
    if(ch> ='A'&& ch< ='Z')return ch - 'A'+ 10;
    assert(false);
    }
    };
    boost :: phoenix :: function< decode> _解码;


  2. 使用 BOOST_PHOENIX_ADAPT_FUNCTION Live On Coliru



    而不是定义函数对象,你可以使用宏

      ch){
    if(ch> ='0'& ch< ='9')return ch - '0';
    if(ch> ='a'&& ch< ='z')return ch - 'a'+ 10;
    if(ch> ='A'&& ch< ='Z')return ch - 'A'+ 10;
    assert(false);
    }

    BOOST_PHOENIX_ADAPT_FUNCTION(int,_decode,decode,1)


  3. 使用 std :: strtoul Live on Coliru



    当然,上述内容可能是一个复杂需要你处理整数算术和数字解码的细节细节。



    此外,naive方法会在文字是像101_101这样的十进制值的情况下进行一些重复的工作。它会计算十六进制,八进制和二进制分支的子结果,然后才意识到它是十进制。



    我们可以改变顺序:

      start = 
    (raw [+ char _(_ 0-9a-fA- F)]>> char _(hHxX))[_val = _strtoul(_1,16)] / *带后缀的十六进制* /
    | ($ [$ char *($ 0))] / *八进制,带后缀* /
    | (
    )(*)$ * /



    。 ([_ char _ __D))[_val = _strtoul(_1,10)] / *带有可选后缀的十进制* /
    (raw [+ char _(_ 0-9)]>

    同样,您会很好奇我们如何实现 _evaluate ?它是一个函数,它接受来自 raw (这是一个迭代器范围)和基础的合成属性,这是当然知道的:

      struct strtoul_f {
    template< typename,typename> struct result {typedef uint64_t type; };
    template< typename Raw,typename Int> uint64_t operator()(raw raw,Int base)const {
    std :: string s(raw.begin(),raw.end());
    s.erase(std :: remove(s.begin(),s.end(),'_'),s.end());
    char * f(& s [0]),* l(f + s.size());
    return std :: strtoul(f,& l,base);
    }
    };
    boost :: phoenix :: function< strtoul_f> _strtoul;

    正如你所看到的,唯一的复杂性是删除 _



I am attempting to use boost::spirit::qi to do some parsing. It's actually going quite well, and I successfully have managed to parse numbers in various bases based on a suffix. Examples: 123, c12h, 777o, 110101b.

I then wanted to add the ability to allow a completely ignored separator character, to allow values like 123_456 or 1101_0011b to parse. I tried using the skip parser, but I highly suspect that I completely misunderstood how it was to be used. It compiles just fine, but my attempt to make it ignore the underscore does absolutely nothing at all. Any suggestions on how to make this do what I want would be appreciated. My test code is included below:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using qi::_val;
using qi::_1;
using qi::skip;
using qi::uint_parser;
using ascii::char_;

template <typename Iterator>
struct unsigned_parser : qi::grammar<Iterator, uint64_t()> {

    unsigned_parser() : unsigned_parser::base_type(start) {
        uint_parser<uint64_t, 10> dec_parser;
        uint_parser<uint64_t, 16> hex_parser;
        uint_parser<uint64_t, 8> oct_parser;
        uint_parser<uint64_t, 2> bin_parser;

        start = skip(char_('_'))[
            /* binary with suffix */
            (bin_parser[_val=_1] >> char_("bByY"))
            /* octal with suffix */
            | (oct_parser[_val=_1] >> char_("qQoO"))
            /* hexadecimal with suffix */
            | (hex_parser[_val=_1] >> char_("hHxX"))
            /* decimal with optional suffix */
            | (dec_parser[_val=_1] >> -char_("dDtT"))
            ];
    }

    qi::rule<Iterator, uint64_t()> start;
};

int main(int argv, const char *argc[]) {
    typedef std::string::const_iterator iter;
    unsigned_parser<iter> up;
    uint64_t val;
    if (argv != 2) {
        std::cerr << "Usage: " << argc[0] << " <input>" << std::endl;
        return 1;
    }
    std::string test(argc[1]);
    iter i = test.begin();
    iter end = test.end();
    bool rv = parse(i, end, up, val);
    if (rv && i == end) {
        std::cout << "Succeeded: " << val << std::endl;
        return 0;
    }
    if (rv) {
        std::cout << "Failed partial parse: " << val << std::endl;
        return 1;
    }
    std::cout << "Failed." << std::endl;
    return 1;
}

解决方案

Aw. Nobody should have to bother with implementation details like Spirit parser contexts unless you're extending the library and implementing your own parser directives.

Until that time, phoenix::function<>, phoenix::bind or even BOOST_PHOENIX_ADAPT_FUNCTION should be plenty for anyone.

Here are two approaches to your question without any patches to the library.

  1. Straightforward parsing Live On Coliru

    This could be viewed as the "naive" way of parsing the different styles of integers using just Qi and simple semantic actions:

    start = 
          eps [_val=0] >> +(char_("0-9a-fA-F") [ _val = _val*16 + _decode(_1) ] | '_')>>  char_("hHxX") /* hexadecimal with suffix */
        | eps [_val=0] >> +(char_("0-7")       [ _val = _val* 8 + _decode(_1) ] | '_')>>  char_("qQoO") /* octal       with suffix */
        | eps [_val=0] >> +(char_("01")        [ _val = _val* 2 + _decode(_1) ] | '_')>>  char_("bByY") /* binary      with suffix */
        | eps [_val=0] >> +(char_("0-9")       [ _val = _val*10 + _decode(_1) ] | '_')>> -char_("dDtT") /* decimal     with optional suffix */
        ;
    

    Of course, you will want to know what _decode looks like. Well you define it yourself:

    struct decode {
        template <typename> struct result { typedef int type; };
        template <typename Ch> int operator()(Ch ch) const {
            if (ch>='0' && ch<='9') return ch - '0';
            if (ch>='a' && ch<='z') return ch - 'a' + 10;
            if (ch>='A' && ch<='Z') return ch - 'A' + 10;
            assert(false);
        }
    };
    boost::phoenix::function<decode> _decode;
    

  2. Using BOOST_PHOENIX_ADAPT_FUNCTION macro Live On Coliru

    Instead of defining the function object you can use the macro

    int decode(char ch) {
        if (ch>='0' && ch<='9') return ch - '0';
        if (ch>='a' && ch<='z') return ch - 'a' + 10;
        if (ch>='A' && ch<='Z') return ch - 'A' + 10;
        assert(false);
    }
    
    BOOST_PHOENIX_ADAPT_FUNCTION(int, _decode, decode, 1)
    

  3. Using std::strtoul Live On Coliru

    Of course, the above may be a tad "complex" because it requires you to deal with nitty gritty details of integer arithmetics and digit decoding.

    Also, the "naive" approach does some duplicate work in case the literal is a decimal value like "101_101". It will calculate the subresult for the hex, octal and binary branches before realizing it was a decimal.

    So we could change the order around:

    start = 
            (raw[+char_("_0-9a-fA-F")] >>  char_("hHxX")) [ _val = _strtoul(_1,16) ] /* hexadecimal with suffix */
          | (raw[+char_("_0-7")]       >>  char_("qQoO")) [ _val = _strtoul(_1, 8) ] /* octal       with suffix */
          | (raw[+char_("_01")]        >>  char_("bByY")) [ _val = _strtoul(_1, 2) ] /* binary      with suffix */
          | (raw[+char_("_0-9")]       >> -char_("dDtT")) [ _val = _strtoul(_1,10) ] /* decimal     with optional suffix */
          ;
    

    Again you will be curious how we implemented _evaluate? It's a function that takes the synthesized attributes from raw (which is an iterator range) and the base, which is definitely known by then:

    struct strtoul_f {
        template <typename, typename> struct result { typedef uint64_t type; };
        template <typename Raw, typename Int> uint64_t operator()(Raw raw, Int base) const {
            std::string s(raw.begin(), raw.end());
            s.erase(std::remove(s.begin(), s.end(), '_'), s.end());
            char *f(&s[0]), *l(f+s.size());
            return std::strtoul(f, &l, base);
        }
    };
    boost::phoenix::function<strtoul_f> _strtoul;
    

    As you can see, the only complexity is removing the _ from the range first.

这篇关于使用boost :: spirit :: q来用分隔符解析数字的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆