如何提高boost interval_map查找的性能 [英] How to improve performance of boost interval_map lookups

查看:257
本文介绍了如何提高boost interval_map查找的性能的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我使用 boost :: icl :: interval_map 将字节范围映射到一组字符串。从一个(排序)磁盘文件加载地图,然后使用下面的代码查找。



问题是查找真的很慢。



在我的测试中,我在地图中插入了66425个范围。我概述了代码,基本上> 99.9%的时间花费在各种Boost功能中(没有一个特定的功能很慢,在许多功能上有很多时间)。





我的树不平衡(我如何找到?我如何重新平衡它?)



正在使用set< string>一个问题?



计算地图和窗口的交集有问题吗? (虽然这是我需要的,所以我不能看到怎么做)。

  using namespace std; 
typedef set< string>对象;
typedef boost :: icl :: interval_map< uint64_t,objects>范围;
void
find_range(const ranges * map,uint64_t start,uint64_t end,
void(* f)(uint64_t start,uint64_t end,const char * object,
void * opaque ),
void * opaque)
{
boost :: icl :: interval< uint64_t> :: type window;
window = boost :: icl :: interval< uint64_t> :: right_open(start,end);

ranges r = * map&窗口;

ranges :: iterator iter = r.begin();
while(iter!= r.end()){
boost :: icl :: interval< uint64_t> :: type range = iter-> first;
uint64_t start = range.lower();
uint64_t end = range.upper();

objects obj_set = iter-> second;
objects :: iterator iter2 = obj_set.begin();
while(iter2!= obj_set.end()){
f(start,end,iter2-> c_str
iter2 ++;
}
iter ++;
}
}

前几个配置文件条目:

 %累积自我总计
时间秒秒呼叫我们/呼叫我们/呼叫名称
9.77 0.13 0.13 21866814 0.01 boost: :icl :: interval_bounds :: interval_bounds(unsigned char)
6.02 0.21 0.08 9132134 0.01 boost :: icl :: interval_traits< boost :: icl :: ionic_interval< unsigned long,std :: less> > :: lower(boost :: icl :: discrete_interval< unsigned long,std :: less> const&)
6.02 0.29 0.08 6004967 0.01 boost :: enable_if< boost :: icl :: is_discrete_interval& icl :: discrete_interval< unsigned long,std :: less> >,bool> :: type boost :: icl :: is_empty< boost :: icl :: discrete_interval< unsigned long,std :: less> >(boost :: icl :: discrete_interval< unsigned long,std :: less> const&)
5.26 0.36 0.07 21210093 0.00 boost :: icl :: discrete_interval< unsigned long,std :: less> ()const
5.26 0.43 0.07 11964109 0.01 std :: less< unsigned long> :: operator()(unsigned long const& unsigned long const&)const
4.51 0.49 0.06 35761849 0.00 std :: _ Rb_tree< ; std :: basic_string< char,std :: char_traits< char>,std :: allocator< char> >,std :: basic_string< char,std :: char_traits< char>,std :: allocator< char> >,std :: _ Identity< std :: basic_string< char,std :: char_traits< char>,std :: allocator< char> > >,std :: less< std :: basic_string< char,std :: char_traits< char>,std :: allocator< char> > >,std :: allocator< std :: basic_string< char,std :: char_traits< char>,std :: allocator< char> > > > :: __ S_left(std :: _ Rb_tree_node_base const *)
4.51 0.55 0.06 12009934 0.00 boost :: icl :: operator ==(boost :: icl :: interval_bounds,boost :: icl :: interval_bounds)
3.76 0.60 0.05 12078493 0.00 boost :: icl :: discrete_interval< unsigned long,std :: less> :: upper()const
3.76 0.65 0.05 12077959 0.00 boost :: enable_if< boost :: icl :: is_interval< ; boost :: icl :: discrete_interval< unsigned long,std :: less> >,boost :: icl :: interval_traits< boost :: icl :: discrete_interval< unsigned long,std :: less> > :: domain_type> :: type boost :: icl :: upper< boost :: icl :: discrete_interval< unsigned long,std :: less> >(boost :: icl :: discrete_interval< unsigned long,std :: less> const&)
3.76 0.70 0.05 8837475 0.01 boost :: icl :: interval_bounds :: bits()const
0.05 6004967 0.01 boost :: enable_if< boost :: icl :: is_interval< boost :: icl :: discrete_interval< unsigned long,std :: less> >,bool> :: type boost :: icl :: domain_less_equal< boost :: icl :: discrete_interval< unsigned long,std :: less> >(boost :: icl :: interval_traits< boost :: icl :: discrete_interval< unsigned long,std :: less>> :: domain_type const& boost :: icl :: interval_traits< boost :: icl :: discrete_interval< ; unsigned long,std :: less>> :: domain_type const&)
3.01 0.79 0.04 5891650 0.01 boost :: icl :: is_right_closed(boost :: icl :: interval_bounds)

数据集: http://oirase.annexia.org/tmp/bmap.txt

完整代码: http://git.annexia.org/?p=virt-bmap.git;a=tree

解决方案

在此答案中,我提出了三个优化:


  1. 通过 boost :: container :: flat_set 的对象 std :: set 可以提高局部性对象集是<4元素)


    UPDATE 在我的最终版本中, code> boost :: container :: flat_map 返回 std :: set 退化性能 find_range code / $ c $> / code>以系数〜2x和查找_ blockquote>


  2. 替换物件ID std :: string c>(这在技术上是 char const * 但在逻辑上是唯一的)。这种技术类似于各种VM实现(如Java / .NET)中的互联字符串。


    strong>: make_atom 的当前实现是非常简单的,从不释放原子。你可能想要回到队列中的字符串,使用Boost Flyweights,一个池分配器或者这些的组合,使它更聪明。但是,这是否需要取决于您的用例



  3. $ c>等于范围调用,通过避免复制(大量)数据节省大量时间


    _ UPDATE 单独应用此优化时,加速已经是 26〜30x 。请注意,与包含所有三个优化时相比,内存使用量大约增加了两倍_





卷和数据效率



在开始之前,我想知道数据的样子。所以,编写一些代码来解析 bmap.txt 输入,我们得到:



Source On Coliru

  Parsed ok 
66425个输入行的直方图
d:3367
f:20613
p:21222
v:21223
范围大小:6442450944
范围迭代大小:21223
最小对象集:1.000000
最大对象集:234.000000
平均对象集:3.129859
最小间隔宽度:1024.000000
最大间隔宽度:2526265344.000000
平均间隔宽度:296.445177k
第一个:[0,1048576)
最后:[3916185600,6442450944)
字符串原子:23904 unique in 66425总计
原子效率:35.986451%

项目和很多



使用 make_atom 对象命名方法与 boost :: flat_set 将内存分配从 〜15GiB 减少到 ~10Gib



此优化还会减少字符串与指针插入比较的字符串比较, interval_map 的组合器策略,因此对于更大的数据集,这有可能有很多加速。



查询效率



查询性能确实因输入的部分副本而严重削弱。



复制,而不是查看重叠范围,只需替换:

  const ranges r = * map&窗口; 
ranges :: const_iterator iter = r.begin();
while(iter!= r.end()){



  auto r = map-> equal_range(window); 
ranges :: const_iterator iter = r.first;
while(iter!= r.second){

在我的系统上运行10000个相同的随机具有两个版本的查询导致加速32倍

  10000个随机OLD查找导致156729884 callbacks in 29148ms 
10000'random'新查找导致156729884回调在897ms

实数0m31.715s
用户0m31.664s
sys 0m0.012s

运行时现在由解析/统计信息支配。完整的基准代码位于此处: Coliru

  #define BOOST_RESULT_OF_USE_DECTYPE 
#define BOOST_SPIRIT_USE_PHOENIX_V3

/ * virt-bmap examiner plugin
*版权所有Red Hat Inc.
*
*此程序是免费软件;您可以根据
*自由软件基金会发布的GNU通用公共许可证的条款重新分发和/或修改
*版本2的许可证,或
*(可选)任何更高的版本。
*
*这个程序分发,希望它将是有用的,
*,但没有任何保证;甚至没有
*适销性或适用于特定用途的默示保证。有关更多详细信息,请参阅
* GNU通用公共许可证。
*
*您应该已经收到GNU通用公共许可证
*的副本以及此程序;如果没有,写自由软件
* Foundation,Inc.,51 Franklin Street,Fifth Floor,Boston,MA 02110-1301 USA。
* /

#include< stdio.h>
#include< stdlib.h>
#include< stdint.h>
#include< inttypes.h>
#include< assert.h>

#include& lt; boost / icl / interval.hpp>
#include< boost / icl / interval_set.hpp>
#include< boost / icl / interval_map.hpp>
#include< boost / container / flat_set.hpp>

using namespace std;

/ *将区间(uint64_t,uint64_t)映射到一组字符串,其中每个
* string表示覆盖该范围的对象。
* /

static size_t atoms_requested = 0;
static size_t atoms_unique_created = 0;

使用string_atom = const char *;
string_atom make_atom(std :: string& s)
{
static std :: set< std :: string> s_atoms;
atoms_requested + = 1;

auto it = s_atoms.find(s);
if(it!= s_atoms.end())
return it-> c_str();

atoms_unique_created + = 1;
return s_atoms.insert(std :: move(s))。first-> c_str();
}

typedef boost :: container :: flat_set< string_atom>对象;
typedef boost :: icl :: interval_map< uint64_t,objects>范围;

范围*
new_ranges(void)
{
return new ranges();
}

void
free_ranges(ranges * mapv)
{
ranges * map =(ranges *)mapv;
delete map;
}

externCvoid
insert_range(void * mapv,uint64_t start,uint64_t end,const char * object)
{
ranges * map =(ranges *)mapv;
objects obj_set;
obj_set.insert(obj_set.end(),object);
map-> add(std :: make_pair(boost :: icl :: interval< uint64_t> :: right_open(start,end),// SEHE added std ::
obj_set)
}

externCvoid
iter_range(void * mapv,void(* f)(uint64_t start,uint64_t end,const char * object,void * opaque), void * opaque)
{
ranges * map =(ranges *)mapv;
ranges :: iterator iter = map-> begin();
while(iter!= map-> end()){
boost :: icl :: interval< uint64_t> :: type range = iter-> first;
uint64_t start = range.lower();
uint64_t end = range.upper();

objects obj_set = iter-> second;
objects :: iterator iter2 = obj_set.begin();
while(iter2!= obj_set.end()){
f(start,end,* iter2 / * - > c_str // SEHE
iter2 ++;
}
iter ++;
}
}

externCvoid
find_range(void const * mapv,uint64_t start,uint64_t end,void(* f)(uint64_t start,uint64_t结束,const char * object,void * opaque),void * opaque)
{
const ranges * map =(const ranges *)mapv;

boost :: icl :: interval< uint64_t> :: type window;
window = boost :: icl :: interval< uint64_t> :: right_open(start,end);

const ranges r = * map&窗口;

ranges :: const_iterator iter = r.begin();
while(iter!= r.end()){
boost :: icl :: interval< uint64_t> :: type range = iter-> first;
uint64_t start = range.lower();
uint64_t end = range.upper()?

objects obj_set = iter-> second;
objects :: iterator iter2 = obj_set.begin();
while(iter2!= obj_set.end()){
f(start,end,* iter2 / * - > c_str // SEHE
iter2 ++;
}
iter ++;
}
}

externCvoid
find_range_ex(void const * mapv,uint64_t start,uint64_t end,void(* f)(uint64_t start,uint64_t结束,const char * object,void * opaque),void * opaque)
{
const ranges * map =(const ranges *)mapv;

boost :: icl :: interval< uint64_t> :: type window;
window = boost :: icl :: interval< uint64_t> :: right_open(start,end);

#if 0
const ranges r = * map&窗口;
ranges :: const_iterator iter = r.begin();
while(iter!= r.end()){
#else
auto r = map-> equal_range(window);
ranges :: const_iterator iter = r.first;
while(iter!= r.second){
#endif

boost :: icl :: interval< uint64_t> :: type range = iter-> first;
uint64_t start = range.lower();
uint64_t end = range.upper();

objects obj_set = iter-> second;
objects :: iterator iter2 = obj_set.begin()
while(iter2!= obj_set.end()){
f(start,end,* iter2 / * - > c_str // SEHE
iter2 ++;
}
iter ++;
}
}

#include< memory>
#include< boost / spirit / include / qi.hpp>
#include< boost / spirit / include / phoenix.hpp>
#include< boost / accumulators / accumulators.hpp>
#include< boost / accumulators / statistics.hpp>
#include< fstream>
#include< chrono>

std :: map< char,size_t> histo;

bool insert_line_of_input(ranges& bmap_data,uint64_t b,uint64_t e,char type,std :: string& object){
if(!object.empty())
histo [type] ++;
// std :: cout<< std :: hex<< b<< < e - < <类型<< <对象<< \\\
;

#if 0
object.insert(object.begin(),':');
object.insert(object.begin(),type);
#endif
insert_range(& bmap_data,b,e,make_atom(std :: move(object)));
return true;
}

std :: vector< std :: pair< uint64_t,uint64_t> > generate_test_queries(ranges const& bmap_data,size_t n){
std :: vector< std :: pair< uint64_t,uint64_t> >查询
queries.reserve(n);

for(size_t i = 0; i {
auto start =(static_cast< uint64_t& )%bmap_data.size();
auto end = start + rand();

query.emplace_back(start,end);
}

返回查询;
}

范围read_mapfile(const char * fname){
std :: ifstream ifs(fname);
boost :: spirit :: istream_iterator f(ifs>> std :: noskipws),l;

范围bmap_data;

命名空间phx = boost :: phoenix;
using namespace boost :: spirit :: qi;
uint_parser< uint64_t,16>抵消;
if(!phrase_parse(f,l,
(1>> offset>> offset>> char _(pvdf)>> as_string [lexeme [+图表]>>>>>> attr('/')& :ref(bmap_data),_1,_2,_3,_4)]%eol> * eol,
blank))
{
exit(255)
} else
{
std :: cout< Parsed ok\\\
;
}

if(f!= l)
std :: cout< Warning:remaining input'<< std :: string(f,l)<< \\\
;

return bmap_data;
}

void report_statistics(ranges const& bmap_data){
size_t total = 0;
for(auto e:histo)total + = e.second;

std :: cout<< 直方图<总< input lines\\\
;

for(auto e:histo)
std :: cout<< e.first<< :<< e.second<< \\\
;

命名空间ba = boost :: accumulators;
ba :: accumulator_set< double,ba :: stats< ba :: tag :: mean,ba :: tag :: max,ba :: tag :: min> >
object_sets,interval_widths;

for(auto const& r:bmap_data)
{
auto width = r.first.upper() - r.first.lower
assert(width%1024 == 0);

interval_widths(width);
object_sets(r.second.size());
}

std :: cout<< std :: fixed;
std :: cout<< ranges size:< bmap_data.size()<< \\\
;
std :: cout<< ranges iterative size:<< bmap_data.iterative_size()<< \\\
;

std :: cout<< 最小对象集:< ba :: min(object_sets)<< \\\
;
std :: cout<< Max object set:<< ba :: max(object_sets)<< \\\
;
std :: cout<< Average object set:<< ba :: mean(object_sets)<< \\\
;
std :: cout<< 最小间隔宽度:<< ba :: min(interval_widths)<< \\\
;
std :: cout<< 最大间隔宽度:<< ba :: max(interval_widths)<< \\\
;
std :: cout<< 平均间隔宽度:< ba :: mean(interval_widths)/1024.0 < k\\\
;
std :: cout<< 第一:< bmap_data.begin() - > first<< \\\
;
std :: cout<< Last:<< bmap_data.rbegin() - > first<< \\\
;

std :: cout<< String atoms:< atoms_unique_created<< unique in<< atoms_requested<< total\\\
;
std :: cout<< 原子效率:< (atoms_unique_created * 100.0 / atoms_requested)<< %\\\
;
}

void perform_comparative_benchmarks(ranges const& bmap_data,size_t number_of_queries){
srand(42);
auto const queries = generate_test_queries(bmap_data,number_of_queries);

使用hrc = std :: chrono :: high_resolution_clock;
{
auto start = hrc :: now();
size_t callbacks = 0;

for(auto const& q:queries)
{
find_range(& bmap_data,q.first,q.second,
[](uint64_t start, uint64_t end,const char * object,void * opaque){
++(* static_cast< size_t *>(opaque));
},& callbacks);
}
std :: cout<< number_of_queries<< '随机'OLD查找导致<回调
<< callbacks in< std :: chrono :: duration_cast< std :: chrono :: milliseconds>((hrc :: now() - start))count()< ms\\\
;
}

{
auto start = hrc :: now();
size_t callbacks = 0;

for(auto const& q:queries)
{
find_range_ex(& bmap_data,q.first,q.second,
[](uint64_t start, uint64_t end,const char * object,void * opaque){
++(* static_cast< size_t *>(opaque));
},& callbacks);
}
std :: cout<< number_of_queries<< 'random'NEW查找导致<回调
<< callbacks in< std :: chrono :: duration_cast< std :: chrono :: milliseconds>((hrc :: now() - start))count()< ms\\\
;
}
}

int main(){
auto bmap = read_mapfile(bmap.txt);

report_statistics(bmap);

perform_comparative_benchmarks(bmap,1000);

#if 0 //将范围转储到控制台
for(auto const& r:bmap)
{
std :: cout< r.first<< \t<< r.second.size()<< \t;
std :: copy(r.second.begin(),r.second.end(),std :: ostream_iterator< std :: string>(std :: cout,\t));
std :: cout<< \\\
;
}
#endif
}


I'm using a boost::icl::interval_map to map byte ranges to a set of strings. The map is loaded from a (sorted) disk file, and then I do lookups using the code below.

The problem is the lookups are really slow.

In my test, I inserted 66425 ranges into the map. I profiled the code and basically > 99.9% of the time is spent in various Boost functions (there's not a particular function that is slow, there's a lot of time spread over many functions).

What can be done to make this faster?

Is my tree unbalanced (how do I find out? how can I rebalance it?)

Is using set<string> a problem?

Is calculating the intersection of the map and the window a problem? (Although it's what I need, so I can't see how else to do it).

using namespace std;
typedef set<string> objects;
typedef boost::icl::interval_map<uint64_t, objects> ranges;
void
find_range (const ranges *map, uint64_t start, uint64_t end,
            void (*f) (uint64_t start, uint64_t end, const char *object,
                       void *opaque),
            void *opaque)
{
  boost::icl::interval<uint64_t>::type window;
  window = boost::icl::interval<uint64_t>::right_open (start, end);

  ranges r = *map & window;

  ranges::iterator iter = r.begin ();
  while (iter != r.end ()) {
    boost::icl::interval<uint64_t>::type range = iter->first;
    uint64_t start = range.lower ();
    uint64_t end = range.upper ();

    objects obj_set = iter->second;
    objects::iterator iter2 = obj_set.begin ();
    while (iter2 != obj_set.end ()) {
      f (start, end, iter2->c_str (), opaque);
      iter2++;
    }
    iter++;
  }
}

The first few profile entries:

  %   cumulative   self              self     total
 time   seconds   seconds    calls  us/call  us/call  name
  9.77      0.13     0.13 21866814     0.01           boost::icl::interval_bounds::interval_bounds(unsigned char)
  6.02      0.21     0.08  9132134     0.01           boost::icl::interval_traits<boost::icl::discrete_interval<unsigned long, std::less> >::lower(boost::icl::discrete_interval<unsigned long, std::less> const&)
  6.02      0.29     0.08  6004967     0.01           boost::enable_if<boost::icl::is_discrete_interval<boost::icl::discrete_interval<unsigned long, std::less> >, bool>::type boost::icl::is_empty<boost::icl::discrete_interval<unsigned long, std::less> >(boost::icl::discrete_interval<unsigned long, std::less> const&)
  5.26      0.36     0.07 21210093     0.00           boost::icl::discrete_interval<unsigned long, std::less>::bounds() const
  5.26      0.43     0.07 11964109     0.01           std::less<unsigned long>::operator()(unsigned long const&, unsigned long const&) const
  4.51      0.49     0.06 35761849     0.00           std::_Rb_tree<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::_Identity<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_S_left(std::_Rb_tree_node_base const*)
  4.51      0.55     0.06 12009934     0.00           boost::icl::operator==(boost::icl::interval_bounds, boost::icl::interval_bounds)
  3.76      0.60     0.05 12078493     0.00           boost::icl::discrete_interval<unsigned long, std::less>::upper() const
  3.76      0.65     0.05 12077959     0.00           boost::enable_if<boost::icl::is_interval<boost::icl::discrete_interval<unsigned long, std::less> >, boost::icl::interval_traits<boost::icl::discrete_interval<unsigned long, std::less> >::domain_type>::type boost::icl::upper<boost::icl::discrete_interval<unsigned long, std::less> >(boost::icl::discrete_interval<unsigned long, std::less> const&)
  3.76      0.70     0.05  8837475     0.01           boost::icl::interval_bounds::bits() const
  3.76      0.75     0.05  6004967     0.01           boost::enable_if<boost::icl::is_interval<boost::icl::discrete_interval<unsigned long, std::less> >, bool>::type boost::icl::domain_less_equal<boost::icl::discrete_interval<unsigned long, std::less> >(boost::icl::interval_traits<boost::icl::discrete_interval<unsigned long, std::less> >::domain_type const&, boost::icl::interval_traits<boost::icl::discrete_interval<unsigned long, std::less> >::domain_type const&)
  3.01      0.79     0.04  5891650     0.01           boost::icl::is_right_closed(boost::icl::interval_bounds)

Data set: http://oirase.annexia.org/tmp/bmap.txt
Full code: http://git.annexia.org/?p=virt-bmap.git;a=tree

解决方案

In this answer I present three optimizations:

  1. replacing the objects std::set by boost::container::flat_set for improved locality (and likely reallocation costs, since most object sets are <4 elements)

    UPDATE In my final version below, simply replacing boost::container::flat_map back with std::set degraded performance of just find_range by a factor ~2x and find_range_ex by a factor of ~4x on my test system

  2. replacing the object id std::string by string_atom (which is technically a char const* but logically unique). This technique is similar to interned strings in various VM implementations (like Java/.NET).

    NOTE: The current implementation of make_atom is exceedingly simplistic and never frees atoms. You would potentially want to back the strings in a deque, use Boost Flyweights, a pool allocator or some combination of those to make it smarter. However, whether this is required depends on your use cases

  3. replacing the map intersection with a equal_range call, which saves the bulk of time by avoiding copying (large amounts of) data

    _UPDATE When applying just this optimization in isolation the speed up is already 26~30x. Note that the memory usage is roughly double at ~20MiB compared to when including all three optimizations_

Volume and data efficiency

Before I start, I like to know what the data looks like. So, writing some code to parse that bmap.txt input, we get:

Source On Coliru

Parsed ok
Histogram of 66425 input lines
d: 3367
f: 20613
p: 21222
v: 21223
ranges size:            6442450944
ranges iterative size:  21223
Min object set:         1.000000
Max object set:         234.000000
Average object set:     3.129859
Min interval width:     1024.000000
Max interval width:     2526265344.000000
Average interval width: 296.445177k
First:                  [0,1048576)
Last:                   [3916185600,6442450944)
String atoms:           23904 unique in 66425 total
Atom efficiency:        35.986451%

As you can see the sets are usually ~3 items, and many are duplicated.

Using the make_atom object naming method with boost::flat_set reduced memory allocation from ~15GiB to ~10Gib.

This optimization also reduces string comparison to pointer comparison for set insertion and the Combiner strategy of the interval_map, so for larger data sets this has the potential to have a lot of speedup.

Query efficiency

Query performance is indeed severely crippled by the partial copy of the input.

Don't copy, instead view the overlapping range, simply by replacing:

  const ranges r = *map & window;
  ranges::const_iterator iter = r.begin ();
  while (iter != r.end ()) {

with

  auto r = map->equal_range(window);
  ranges::const_iterator iter = r.first;
  while (iter != r.second) {

On my system running 10000 identical randomized queries with both versions results in a speedup of 32x:

10000 'random' OLD lookups resulted in 156729884 callbacks in 29148ms
10000 'random' NEW lookups resulted in 156729884 callbacks in 897ms

real    0m31.715s
user    0m31.664s
sys 0m0.012s

The runtime is now dominated by the parsing/statistics. Full benchmark code is here: On Coliru

#define BOOST_RESULT_OF_USE_DECTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3

/* virt-bmap examiner plugin
 * Copyright (C) 2014 Red Hat Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <assert.h>

#include <boost/icl/interval.hpp>
#include <boost/icl/interval_set.hpp>
#include <boost/icl/interval_map.hpp>
#include <boost/container/flat_set.hpp>

using namespace std;

/* Maps intervals (uint64_t, uint64_t) to a set of strings, where each
 * string represents an object that covers that range.
 */

static size_t atoms_requested = 0;
static size_t atoms_unique_created = 0;

using string_atom = const char*;
string_atom make_atom(std::string&& s)
{
    static std::set<std::string> s_atoms;
    atoms_requested += 1;

    auto it = s_atoms.find(s);
    if (it != s_atoms.end())
        return it->c_str();

    atoms_unique_created += 1;
    return s_atoms.insert(std::move(s)).first->c_str();
}

typedef boost::container::flat_set<string_atom> objects;
typedef boost::icl::interval_map<uint64_t, objects> ranges;

ranges*
new_ranges (void)
{
  return new ranges ();
}

void
free_ranges (ranges *mapv)
{
  ranges *map = (ranges *) mapv;
  delete map;
}

extern "C" void
insert_range (void *mapv, uint64_t start, uint64_t end, const char *object)
{
  ranges *map = (ranges *) mapv;
  objects obj_set;
  obj_set.insert (obj_set.end(), object);
  map->add (std::make_pair (boost::icl::interval<uint64_t>::right_open (start, end), // SEHE added std::
                       obj_set));
}

extern "C" void
iter_range (void *mapv, void (*f) (uint64_t start, uint64_t end, const char *object, void *opaque), void *opaque)
{
  ranges *map = (ranges *) mapv;
  ranges::iterator iter = map->begin ();
  while (iter != map->end ()) {
    boost::icl::interval<uint64_t>::type range = iter->first;
    uint64_t start = range.lower ();
    uint64_t end = range.upper ();

    objects obj_set = iter->second;
    objects::iterator iter2 = obj_set.begin ();
    while (iter2 != obj_set.end ()) {
      f (start, end, *iter2/*->c_str ()*/, opaque); // SEHE
      iter2++;
    }
    iter++;
  }
}

extern "C" void
find_range (void const *mapv, uint64_t start, uint64_t end, void (*f) (uint64_t start, uint64_t end, const char *object, void *opaque), void *opaque)
{
  const ranges *map = (const ranges *) mapv;

  boost::icl::interval<uint64_t>::type window;
  window = boost::icl::interval<uint64_t>::right_open (start, end);

  const ranges r = *map & window;

  ranges::const_iterator iter = r.begin ();
  while (iter != r.end ()) {
    boost::icl::interval<uint64_t>::type range = iter->first;
    uint64_t start = range.lower ();
    uint64_t end = range.upper ();

    objects obj_set = iter->second;
    objects::iterator iter2 = obj_set.begin ();
    while (iter2 != obj_set.end ()) {
      f (start, end, *iter2/*->c_str ()*/, opaque); // SEHE
      iter2++;
    }
    iter++;
  }
}

extern "C" void
find_range_ex (void const *mapv, uint64_t start, uint64_t end, void (*f) (uint64_t start, uint64_t end, const char *object, void *opaque), void *opaque)
{
  const ranges *map = (const ranges *) mapv;

  boost::icl::interval<uint64_t>::type window;
  window = boost::icl::interval<uint64_t>::right_open (start, end);

#if 0
  const ranges r = *map & window;
  ranges::const_iterator iter = r.begin ();
  while (iter != r.end ()) {
#else
  auto r = map->equal_range(window);
  ranges::const_iterator iter = r.first;
  while (iter != r.second) {
#endif

    boost::icl::interval<uint64_t>::type range = iter->first;
    uint64_t start = range.lower ();
    uint64_t end = range.upper ();

    objects obj_set = iter->second;
    objects::iterator iter2 = obj_set.begin ();
    while (iter2 != obj_set.end ()) {
      f (start, end, *iter2/*->c_str ()*/, opaque); // SEHE
      iter2++;
    }
    iter++;
  }
}

#include <memory>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
#include <fstream>
#include <chrono>

std::map<char, size_t> histo;

bool insert_line_of_input(ranges& bmap_data, uint64_t b, uint64_t e, char type, std::string& object) {
    if (!object.empty())
        histo[type]++;
    //std::cout << std::hex << b << " " << e << " " << type << " " << object << "\n";

#if 0
    object.insert(object.begin(), ':');
    object.insert(object.begin(), type);
#endif
    insert_range(&bmap_data, b, e, make_atom(std::move(object)));
    return true;
}

std::vector<std::pair<uint64_t, uint64_t> > generate_test_queries(ranges const& bmap_data, size_t n) {
    std::vector<std::pair<uint64_t, uint64_t> > queries;
    queries.reserve(n);

    for (size_t i = 0; i < n; ++i)
    {
        auto start = (static_cast<uint64_t>(rand()) * rand()) % bmap_data.size();
        auto end   = start + rand();

        queries.emplace_back(start,end);
    }

    return queries;
}

ranges read_mapfile(const char* fname) {
    std::ifstream ifs(fname);
    boost::spirit::istream_iterator f(ifs >> std::noskipws), l;

    ranges bmap_data;

    namespace phx = boost::phoenix;
    using namespace boost::spirit::qi;
    uint_parser<uint64_t, 16> offset;
    if (!phrase_parse(f,l,
                ("1 " >> offset >> offset >> char_("pvdf") >> as_string[lexeme[+graph] >> attr('/') >> lexeme[*~char_("\r\n")]]) 
                [ _pass = phx::bind(insert_line_of_input, phx::ref(bmap_data), _1, _2, _3, _4) ] % eol >> *eol, 
                blank))
    {
        exit(255);
    } else
    {
        std::cout << "Parsed ok\n";
    }

    if (f!=l)
        std::cout << "Warning: remaining input '" << std::string(f,l) << "\n";

    return bmap_data;
}

void report_statistics(ranges const& bmap_data) {
    size_t total = 0;
    for (auto e : histo) total += e.second;

    std::cout << "Histogram of " << total << " input lines\n";

    for (auto e : histo)
        std::cout << e.first << ": " << e.second << "\n";

    namespace ba = boost::accumulators;
    ba::accumulator_set<double, ba::stats<ba::tag::mean, ba::tag::max, ba::tag::min> > 
        object_sets, interval_widths;

    for (auto const& r : bmap_data)
    {
        auto width = r.first.upper() - r.first.lower();
        assert(width % 1024 == 0);

        interval_widths(width);
        object_sets(r.second.size());
    }

    std::cout << std::fixed;
    std::cout << "ranges size:            " << bmap_data.size()                 << "\n";
    std::cout << "ranges iterative size:  " << bmap_data.iterative_size()       << "\n";

    std::cout << "Min object set:         " << ba::min(object_sets)             << "\n" ;
    std::cout << "Max object set:         " << ba::max(object_sets)             << "\n" ;
    std::cout << "Average object set:     " << ba::mean(object_sets)            << "\n" ;
    std::cout << "Min interval width:     " << ba::min(interval_widths)         << "\n" ;
    std::cout << "Max interval width:     " << ba::max(interval_widths)         << "\n" ;
    std::cout << "Average interval width: " << ba::mean(interval_widths)/1024.0 << "k\n" ;
    std::cout << "First:                  " << bmap_data.begin()->first         << "\n" ;
    std::cout << "Last:                   " << bmap_data.rbegin()->first        << "\n" ;

    std::cout << "String atoms:           " << atoms_unique_created << " unique in " << atoms_requested << " total\n";
    std::cout << "Atom efficiency:        " << (atoms_unique_created*100.0/atoms_requested) << "%\n";
}

void perform_comparative_benchmarks(ranges const& bmap_data, size_t number_of_queries) {
    srand(42);
    auto const queries = generate_test_queries(bmap_data, number_of_queries);

    using hrc = std::chrono::high_resolution_clock;
    {
        auto start = hrc::now();
        size_t callbacks = 0;

        for (auto const& q: queries)
        {
            find_range(&bmap_data, q.first, q.second, 
                    [](uint64_t start, uint64_t end, const char *object, void *opaque) {
                    ++(*static_cast<size_t*>(opaque));
                    }, &callbacks);
        }
        std::cout << number_of_queries << " 'random' OLD lookups resulted in " << callbacks 
                  << " callbacks in " << std::chrono::duration_cast<std::chrono::milliseconds>((hrc::now()-start)).count() << "ms\n";
    }

    {
        auto start = hrc::now();
        size_t callbacks = 0;

        for (auto const& q: queries)
        {
            find_range_ex(&bmap_data, q.first, q.second, 
                    [](uint64_t start, uint64_t end, const char *object, void *opaque) {
                    ++(*static_cast<size_t*>(opaque));
                    }, &callbacks);
        }
        std::cout << number_of_queries << " 'random' NEW lookups resulted in " << callbacks 
                  << " callbacks in " << std::chrono::duration_cast<std::chrono::milliseconds>((hrc::now()-start)).count() << "ms\n";
    }
}

int main() {
    auto bmap = read_mapfile("bmap.txt");

    report_statistics(bmap);

    perform_comparative_benchmarks(bmap, 1000);

#if 0 // to dump ranges to console
    for (auto const& r : bmap)
    {
        std::cout << r.first << "\t" << r.second.size() << "\t";
        std::copy(r.second.begin(), r.second.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
        std::cout << "\n";
    }
#endif
}

这篇关于如何提高boost interval_map查找的性能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆