如何提高boost :: spirit :: x3键值解析器的性能 [英] how to improve performance of boost::spirit::x3 key-value parser
问题描述
我使用 boost :: spirit :: x3
解析键值对(类似于HTTP标头)。当比较手写解析器的性能时, boost :: spirit :: x3
比那个慢10%。
我使用boost 1.61和GCC 6.1:
$ g ++ -std = c ++ 14 -O3 -I / tmp / boost_1_61_0 / boost / main.cpp&&& ./a.out
phrase_parse 1.97432 microseconds
parseHeader 1.75742 microseconds
如何提高基于 boost :: spirit :: x3
的解析器的性能?
#include< iostream>
#include< string>
#include< map>
#include< chrono>
#include< boost / spirit / home / x3.hpp>
#include< boost / fusion / adapted / std_pair.hpp>
使用header_map = std :: map< std :: string,std :: string> ;;
命名空间解析器
{
命名空间x3 = boost :: spirit :: x3;
using x3 :: char_;
using x3 :: lexeme;
x3 :: rule< class map,header_map> const map =msg;
const auto key = + char _(0-9a-zA-Z-);
const auto value = +〜char _(\r\\\
);
const auto header =(key>>':'>> value>> lexeme [\r\\\
]);
const auto map_def = * header>> lexeme [\r\\\
];
BOOST_SPIRIT_DEFINE(map);
}
template< typename it>
void parseHeader(it& iter,It end,header_map& map)
{
std :: string key;
std :: string value;
it last = iter;
bool inKey = true;
while(iter + 1!= end)
{
if(inKey&& *(iter +1)==':')
{
key.assign(last,iter + 1);
iter + = 3;
last = iter;
inKey = false;
}
else if(!inKey&& *(iter + 1)=='\r'&&& *(iter + 2)=='\\\
')
{
value.assign(last,iter + 1);
map.insert({std :: move(key),std :: move(value)});
iter + = 3;
last = iter;
inKey = true;
}
else if(inKey&& *(iter)=='\r'&&& *(iter + 1)=='\\\
')
{
iter + = 2;
break;
}
else
{
++ iter;
}
}
}
模板< typename F,typename ... Args>
double benchmark(F func,Args& ... args)
{
auto start = std :: chrono :: system_clock :: now();
constexpr auto num = 10 * 1000 * 1000;
for(std :: size_t i = 0; i {
func(std :: forward> args)...);
}
auto end = std :: chrono :: system_clock :: now();
auto duration = std :: chrono :: duration_cast< std :: chrono :: microseconds>(end-start);
return duration.count()/(double)num;
}
int main()
{
const std :: size_t headerCount = 20;
std :: string str;
for(std :: size_t i = 0; i {
std :: string num = std :: to_string(i);
str.append(key+ num +:+value+ num +\r\\\
);
}
str.append(\r\\\
);
double t1 = benchmark([& str](){
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
phrase_parse(iter,end,parser :: map,boost :: spirit :: x3 :: ascii :: blank,header);
return header;
});
std :: cout<< phrase_parse< t1 < 微秒< std :: endl;
double t2 = benchmark([& str](){
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
parseHeader(iter,end,header);
return header;
});
std :: cout<< parseHeader< t2 < 微秒< std :: endl;
return 0;
}
统计数据为2.5μs,平均为3.5μs。
完整代码
使用 http://nonius.io 进行稳定的基准化:
#include< iostream>
#include< string>
#include< map>
#include< nonius / benchmark.h ++>
#include< boost / spirit / home / x3.hpp>
#include< boost / fusion / adapted / std_pair.hpp>
使用header_map = std :: map< std :: string,std :: string> ;;
命名空间解析器
{
命名空间x3 = boost :: spirit :: x3;
using x3 :: char_;
const auto key = +〜char_(':');
const auto value = *(char_ - \r\\\
);
const auto header = key>> ':'>>值>> \r\\\
;
const auto map = * header>> \r\\\
;
}
template< typename it>
void parseHeader(it& iter,It end,header_map& map)
{
std :: string key;
std :: string value;
it last = iter;
bool inKey = true;
while(iter + 1!= end)
{
if(inKey&& *(iter +1)==':')
{
key.assign(last,iter + 1);
iter + = 3;
last = iter;
inKey = false;
}
else if(!inKey&& *(iter + 1)=='\r'&&& *(iter + 2)=='\\\
')
{
value.assign(last,iter + 1);
map.insert({std :: move(key),std :: move(value)});
iter + = 3;
last = iter;
inKey = true;
}
else if(inKey&& *(iter)=='\r'&&& *(iter + 1)=='\\\
')
{
iter + = 2;
break;
}
else
{
++ iter;
}
}
}
static auto const str = [] {
std :: string tmp;
const std :: size_t headerCount = 20;
for(std :: size_t i = 0; i {
std :: string num = std :: to_string(i);
tmp.append(key+ num +:+value+ num +\r\\\
);
}
tmp.append(\r\\\
);
return tmp;
}();
NONIUS_BENCHMARK(manual,[](nonius :: chronometer cm){
cm.measure([](){
auto iter = str。 cbegin();
auto end = str.cend();
header_map header;
parseHeader(iter,end,header);
assert ()== 20);
return header.size();
});
})
NONIUS_BENCHMARK(x3,[] :chronometer cm){
cm.measure([] {
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
parse(iter,end,parser :: map,header);
assert(header.size()== 20);
return header.size );
});
})
#include< nonius / main.h ++>
我使用gcc 5.4和Boost 1.61
I am parsing key value pairs (similar to HTTP headers) using boost::spirit::x3
. When comparing the performance to my handwritten parser, boost::spirit::x3
is around 10% slower than that.
I am using boost 1.61 and GCC 6.1:
$ g++ -std=c++14 -O3 -I/tmp/boost_1_61_0/boost/ main.cpp && ./a.out
phrase_parse 1.97432 microseconds
parseHeader 1.75742 microseconds
How can I improve the performance of the boost::spirit::x3
based parser?
#include <iostream>
#include <string>
#include <map>
#include <chrono>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
using header_map = std::map<std::string, std::string>;
namespace parser
{
namespace x3 = boost::spirit::x3;
using x3::char_;
using x3::lexeme;
x3::rule<class map, header_map> const map = "msg";
const auto key = +char_("0-9a-zA-Z-");
const auto value = +~char_("\r\n");
const auto header =(key >> ':' >> value >> lexeme["\r\n"]);
const auto map_def = *header >> lexeme["\r\n"];
BOOST_SPIRIT_DEFINE(map);
}
template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
std::string key;
std::string value;
It last = iter;
bool inKey = true;
while(iter+1 != end)
{
if(inKey && *(iter+1)==':')
{
key.assign(last, iter+1);
iter+=3;
last = iter;
inKey = false;
}
else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
{
value.assign(last, iter+1);
map.insert({std::move(key), std::move(value)});
iter+=3;
last = iter;
inKey = true;
}
else if (inKey && *(iter)=='\r' && *(iter+1)=='\n')
{
iter+=2;
break;
}
else
{
++iter;
}
}
}
template<typename F, typename ...Args>
double benchmark(F func, Args&&... args)
{
auto start = std::chrono::system_clock::now();
constexpr auto num = 10 * 1000 * 1000;
for (std::size_t i = 0; i < num; ++i)
{
func(std::forward<Args>(args)...);
}
auto end = std::chrono::system_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
return duration.count() / (double)num;
}
int main()
{
const std::size_t headerCount = 20;
std::string str;
for(std::size_t i = 0; i < headerCount; ++i)
{
std::string num = std::to_string(i);
str.append("key" + num + ": " + "value" + num + "\r\n");
}
str.append("\r\n");
double t1 = benchmark([&str]() {
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
phrase_parse(iter, end, parser::map, boost::spirit::x3::ascii::blank, header);
return header;
});
std::cout << "phrase_parse " << t1 << " microseconds"<< std::endl;
double t2 = benchmark([&str]() {
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
parseHeader(iter, end, header);
return header;
});
std::cout << "parseHeader " << t2 << " microseconds"<< std::endl;
return 0;
}
Here's a fixed x3 grammar that comes a lot closer to your hand rolled "parser":
const auto key = +~char_(':');
const auto value = *(char_ - "\r\n");
const auto header = key >> ':' >> value >> "\r\n";
const auto map = *header >> "\r\n";
Of course, it's still more strict and more robust. Also, don't call it with a space skipper, since your hand-rolled parser doesn't do that either.
Here's the performance measurements on my box:
Statistics that's 2.5µs vs. 3.5µs on average.
Full Code
Using http://nonius.io for robust benchmarking:
#include <iostream>
#include <string>
#include <map>
#include <nonius/benchmark.h++>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
using header_map = std::map<std::string, std::string>;
namespace parser
{
namespace x3 = boost::spirit::x3;
using x3::char_;
const auto key = +~char_(':');
const auto value = *(char_ - "\r\n");
const auto header = key >> ':' >> value >> "\r\n";
const auto map = *header >> "\r\n";
}
template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
std::string key;
std::string value;
It last = iter;
bool inKey = true;
while(iter+1 != end)
{
if(inKey && *(iter+1)==':')
{
key.assign(last, iter+1);
iter+=3;
last = iter;
inKey = false;
}
else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
{
value.assign(last, iter+1);
map.insert({std::move(key), std::move(value)});
iter+=3;
last = iter;
inKey = true;
}
else if (inKey && *(iter)=='\r' && *(iter+1)=='\n')
{
iter+=2;
break;
}
else
{
++iter;
}
}
}
static auto const str = [] {
std::string tmp;
const std::size_t headerCount = 20;
for(std::size_t i = 0; i < headerCount; ++i)
{
std::string num = std::to_string(i);
tmp.append("key" + num + ": " + "value" + num + "\r\n");
}
tmp.append("\r\n");
return tmp;
}();
NONIUS_BENCHMARK("manual", [](nonius::chronometer cm) {
cm.measure([]() {
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
parseHeader(iter, end, header);
assert(header.size() == 20);
return header.size();
});
})
NONIUS_BENCHMARK("x3", [](nonius::chronometer cm) {
cm.measure([] {
auto iter = str.cbegin();
auto end = str.cend();
header_map header;
parse(iter, end, parser::map, header);
assert(header.size() == 20);
return header.size();
});
})
#include <nonius/main.h++>
I'm using gcc 5.4 and Boost 1.61
这篇关于如何提高boost :: spirit :: x3键值解析器的性能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!