如何提高boost :: spirit :: x3键值解析器的性能 [英] how to improve performance of boost::spirit::x3 key-value parser

查看:287
本文介绍了如何提高boost :: spirit :: x3键值解析器的性能的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我使用 boost :: spirit :: x3 解析键值对(类似于HTTP标头)。当比较手写解析器的性能时, boost :: spirit :: x3 比那个慢10%。



我使用boost 1.61和GCC 6.1:

  $ g ++ -std = c ++ 14 -O3 -I / tmp / boost_1_61_0 / boost / main.cpp&&& ./a.out 

phrase_parse 1.97432 microseconds
parseHeader 1.75742 microseconds

如何提高基于 boost :: spirit :: x3 的解析器的性能?

  #include< iostream> 
#include< string>
#include< map>
#include< chrono>

#include< boost / spirit / home / x3.hpp>
#include< boost / fusion / adapted / std_pair.hpp>

使用header_map = std :: map< std :: string,std :: string> ;;

命名空间解析器
{
命名空间x3 = boost :: spirit :: x3;
using x3 :: char_;
using x3 :: lexeme;

x3 :: rule< class map,header_map> const map =msg;

const auto key = + char _(0-9a-zA-Z-);
const auto value = +〜char _(\r\\\
);

const auto header =(key>>':'>> value>> lexeme [\r\\\
]);
const auto map_def = * header>> lexeme [\r\\\
];

BOOST_SPIRIT_DEFINE(map);
}


template< typename it>
void parseHeader(it& iter,It end,header_map& map)
{
std :: string key;
std :: string value;

it last = iter;
bool inKey = true;
while(iter + 1!= end)
{
if(inKey&& *(iter +1)==':')
{
key.assign(last,iter + 1);
iter + = 3;
last = iter;
inKey = false;
}
else if(!inKey&& *(iter + 1)=='\r'&&& *(iter + 2)=='\\\
')
{
value.assign(last,iter + 1);
map.insert({std :: move(key),std :: move(value)});
iter + = 3;
last = iter;
inKey = true;
}
else if(inKey&& *(iter)=='\r'&&& *(iter + 1)=='\\\
')
{
iter + = 2;
break;
}
else
{
++ iter;
}
}
}

模板< typename F,typename ... Args>
double benchmark(F func,Args& ... args)
{
auto start = std :: chrono :: system_clock :: now();

constexpr auto num = 10 * 1000 * 1000;
for(std :: size_t i = 0; i {
func(std :: forward> args)...);
}

auto end = std :: chrono :: system_clock :: now();
auto duration = std :: chrono :: duration_cast< std :: chrono :: microseconds>(end-start);

return duration.count()/(double)num;
}

int main()
{
const std :: size_t headerCount = 20;

std :: string str;
for(std :: size_t i = 0; i {
std :: string num = std :: to_string(i);
str.append(key+ num +:+value+ num +\r\\\
);
}
str.append(\r\\\
);

double t1 = benchmark([& str](){
auto iter = str.cbegin();
auto end = str.cend();

header_map header;
phrase_parse(iter,end,parser :: map,boost :: spirit :: x3 :: ascii :: blank,header);
return header;
});
std :: cout<< phrase_parse< t1 < 微秒< std :: endl;

double t2 = benchmark([& str](){
auto iter = str.cbegin();
auto end = str.cend();

header_map header;
parseHeader(iter,end,header);
return header;
});
std :: cout<< parseHeader< t2 < 微秒< std :: endl;
return 0;
}



统计数据为2.5μs,平均为3.5μs。



完整代码



使用 http://nonius.io 进行稳定的基准化:

  #include< iostream> 
#include< string>
#include< map>
#include< nonius / benchmark.h ++>

#include< boost / spirit / home / x3.hpp>
#include< boost / fusion / adapted / std_pair.hpp>

使用header_map = std :: map< std :: string,std :: string> ;;

命名空间解析器
{
命名空间x3 = boost :: spirit :: x3;
using x3 :: char_;

const auto key = +〜char_(':');
const auto value = *(char_ - \r\\\
);

const auto header = key>> ':'>>值>> \r\\\
;
const auto map = * header>> \r\\\
;
}


template< typename it>
void parseHeader(it& iter,It end,header_map& map)
{
std :: string key;
std :: string value;

it last = iter;
bool inKey = true;
while(iter + 1!= end)
{
if(inKey&& *(iter +1)==':')
{
key.assign(last,iter + 1);
iter + = 3;
last = iter;
inKey = false;
}
else if(!inKey&& *(iter + 1)=='\r'&&& *(iter + 2)=='\\\
')
{
value.assign(last,iter + 1);
map.insert({std :: move(key),std :: move(value)});
iter + = 3;
last = iter;
inKey = true;
}
else if(inKey&& *(iter)=='\r'&&& *(iter + 1)=='\\\
')
{
iter + = 2;
break;
}
else
{
++ iter;
}
}
}

static auto const str = [] {
std :: string tmp;
const std :: size_t headerCount = 20;
for(std :: size_t i = 0; i {
std :: string num = std :: to_string(i);
tmp.append(key+ num +:+value+ num +\r\\\
);
}
tmp.append(\r\\\
);
return tmp;
}();

NONIUS_BENCHMARK(manual,[](nonius :: chronometer cm){

cm.measure([](){
auto iter = str。 cbegin();
auto end = str.cend();

header_map header;
parseHeader(iter,end,header);
assert ()== 20);
return header.size();
});
})

NONIUS_BENCHMARK(x3,[] :chronometer cm){

cm.measure([] {
auto iter = str.cbegin();
auto end = str.cend();

header_map header;
parse(iter,end,parser :: map,header);
assert(header.size()== 20);
return header.size );
});
})

#include< nonius / main.h ++>

我使用gcc 5.4和Boost 1.61


I am parsing key value pairs (similar to HTTP headers) using boost::spirit::x3. When comparing the performance to my handwritten parser, boost::spirit::x3 is around 10% slower than that.

I am using boost 1.61 and GCC 6.1:

$ g++ -std=c++14 -O3 -I/tmp/boost_1_61_0/boost/ main.cpp  && ./a.out

phrase_parse 1.97432 microseconds
parseHeader 1.75742 microseconds

How can I improve the performance of the boost::spirit::x3 based parser?

#include <iostream>
#include <string>
#include <map>
#include <chrono>

#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>

using header_map = std::map<std::string, std::string>; 

namespace parser
{
    namespace x3 = boost::spirit::x3;
    using x3::char_;
    using x3::lexeme;

    x3::rule<class map, header_map> const map = "msg";

    const auto key     = +char_("0-9a-zA-Z-");
    const auto value   = +~char_("\r\n");

    const auto header =(key >> ':' >> value >> lexeme["\r\n"]);
    const auto map_def = *header >> lexeme["\r\n"];

    BOOST_SPIRIT_DEFINE(map);
}


template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
    std::string key;
    std::string value;

    It last = iter;
    bool inKey = true;
    while(iter+1 != end)
    {
        if(inKey && *(iter+1)==':')
        {
            key.assign(last, iter+1);
            iter+=3;
            last = iter;
            inKey = false;
        }
        else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
        {
            value.assign(last, iter+1);
            map.insert({std::move(key), std::move(value)});
            iter+=3;
            last = iter;
            inKey = true;
        }
        else if (inKey && *(iter)=='\r' && *(iter+1)=='\n') 
        {
            iter+=2;
            break;
        }
        else
        {
            ++iter;
        }
    }
}

template<typename F, typename ...Args>
double benchmark(F func, Args&&... args)
{
    auto start = std::chrono::system_clock::now();

    constexpr auto num = 10 * 1000 * 1000;
    for (std::size_t i = 0; i < num; ++i)
    {
        func(std::forward<Args>(args)...);
    }

    auto end = std::chrono::system_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);

    return duration.count() / (double)num;
}

int main()
{
    const std::size_t headerCount = 20;

    std::string str;
    for(std::size_t i = 0; i < headerCount; ++i)
    {
        std::string num = std::to_string(i);
        str.append("key" + num + ": " + "value" + num + "\r\n");
    }
    str.append("\r\n");

    double t1 = benchmark([&str]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        phrase_parse(iter, end, parser::map, boost::spirit::x3::ascii::blank, header);
        return header;
    });
    std::cout << "phrase_parse " << t1 << " microseconds"<< std::endl;

    double t2 = benchmark([&str]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parseHeader(iter, end, header);
        return header;
    });
    std::cout << "parseHeader " << t2 << " microseconds"<< std::endl;
    return 0;
}

live example

解决方案

Here's a fixed x3 grammar that comes a lot closer to your hand rolled "parser":

const auto key     = +~char_(':');
const auto value   = *(char_ - "\r\n");

const auto header = key >> ':' >> value >> "\r\n";
const auto map    = *header >> "\r\n";

Of course, it's still more strict and more robust. Also, don't call it with a space skipper, since your hand-rolled parser doesn't do that either.

Here's the performance measurements on my box:

Statistics that's 2.5µs vs. 3.5µs on average.

Full Code

Using http://nonius.io for robust benchmarking:

#include <iostream>
#include <string>
#include <map>
#include <nonius/benchmark.h++>

#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>

using header_map = std::map<std::string, std::string>; 

namespace parser
{
    namespace x3 = boost::spirit::x3;
    using x3::char_;

    const auto key     = +~char_(':');
    const auto value   = *(char_ - "\r\n");

    const auto header = key >> ':' >> value >> "\r\n";
    const auto map    = *header >> "\r\n";
}


template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
    std::string key;
    std::string value;

    It last = iter;
    bool inKey = true;
    while(iter+1 != end)
    {
        if(inKey && *(iter+1)==':')
        {
            key.assign(last, iter+1);
            iter+=3;
            last = iter;
            inKey = false;
        }
        else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
        {
            value.assign(last, iter+1);
            map.insert({std::move(key), std::move(value)});
            iter+=3;
            last = iter;
            inKey = true;
        }
        else if (inKey && *(iter)=='\r' && *(iter+1)=='\n') 
        {
            iter+=2;
            break;
        }
        else
        {
            ++iter;
        }
    }
}

static auto const str = [] {
    std::string tmp;
    const std::size_t headerCount = 20;
    for(std::size_t i = 0; i < headerCount; ++i)
    {
        std::string num = std::to_string(i);
        tmp.append("key" + num + ": " + "value" + num + "\r\n");
    }
    tmp.append("\r\n");
    return tmp;
}();

NONIUS_BENCHMARK("manual", [](nonius::chronometer cm) {

    cm.measure([]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parseHeader(iter, end, header);
        assert(header.size() == 20);
        return header.size();
    });
})

NONIUS_BENCHMARK("x3", [](nonius::chronometer cm) {

    cm.measure([] {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parse(iter, end, parser::map, header);
        assert(header.size() == 20);
        return header.size();
    });
})

#include <nonius/main.h++>

I'm using gcc 5.4 and Boost 1.61

这篇关于如何提高boost :: spirit :: x3键值解析器的性能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆