一个unordered_map真的比实际上的地图快吗? [英] Is an unordered_map really faster than a map in practice?

查看:154
本文介绍了一个unordered_map真的比实际上的地图快吗?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

当然,unordered_map的查找性能是一个平均值,而且地图的查找性能是O(logN)。



但是当然为了在unordered_map中找到一个对象,我们必须:


  1. 哈希我们要查找的密钥。

  2. equality_compare同一个桶中的每个键的密钥。

而在地图中,我们只需要比较所寻求的密钥使用log2(N)键,其中N是地图中的项目数。



我想知道什么是真正的性能差异,因为哈希函数增加了开销而且一个等于_比较比不上比较便宜。



而不是让社区有一个问题我可以回答自己,我写了一个测试。
$ b

我已经分享了下面的结果,以防其他人觉得这有趣或有用。



更多的答案当然是被邀请的,如果有人是能够和愿意增加更多

解决方案

在回答有关与错过的搜索次数相关的性能问题时,我重新进行了参数化测试



示例结果:

  searching = 1000000 set_size = 0 miss = 100%order = 4384 unordered = 12901 flat_map = 681 
searching = 1000000 set_size = 99 miss = 99.99%ordered = 89127 unordered = 42615 flat_map = 86091
searching = 1000000 set_size = 172 miss = 99.98%ordered = 101283 unordered = 53468 flat_map = 96008
searching = 1000000 set_size = 303 miss = 99.97%ordered = 112747 unordered = 53211 flat_map = 107343
searching = 1000000 set_size = 396 miss = 99.96%ordered = 124179 unordered = 59655 flat_map = 112687
searching = 1000000 set_size = 523 miss = 99.95%ordered = 132180 unordered = 51133 flat_map = 121669
searching = 1000000 set_size = 599 miss = 99.94%ordered = 135850 unordered = 55078 flat_map = 121072
searche s = 1000000 set_size = 695 miss = 99.93%ordered = 140204 unordered = 60087 flat_map = 124961
searching = 1000000 set_size = 795 miss = 99.92%ordered = 146071 unordered = 64790 flat_map = 127873
searching = 1000000 set_size = 916 miss = 99.91%ordered = 154461 unordered = 50944 flat_map = 133194
searching = 1000000 set_size = 988 miss = 99.9%ordered = 156327 unordered = 54094 flat_map = 134288
/ pre>

密钥:

  searching =针对每个地图
set_size =每个地图有多大(因此有多少搜索会导致命中)
miss =生成错过的搜索的概率。用于生成搜索和set_size。
ordered =搜索有序地图的时间
无序=搜索unordered_map
的时间flat_map =搜索平面图的时间

注意:时间是在std :: system_clock :: duration ticks中测量。

TL; DR



结果:一旦地图中有数据,unordered_map就会显示其优越性。只有当地图空白时,表现不如表的性能才会更差。



这是新的代码:

  #include< iostream> 
#include< iomanip>
#include< random>
#include< algorithm>
#include< string>
#include< vector>
#include< map>
#include< unordered_map>
#include< unordered_set>
#include< chrono>
#include< tuple>
#include< future>
#include< stdexcept>
#include< sstream>

使用namespace std;

//这将设置我们将用作键的字符串长度。
//修改这个以测试关键复杂度是否改变各种映射的性能比
//
static const size_t key_length = 20;

//我们将生成的密钥数(测试的大小)
const size_t nkeys = 1000000;



//使用虚拟方法来阻止优化器检测到
//我们的宿函数实际上什么都不做。否则可能会使测试偏移
struct string_user
{
virtual void sink(const std :: string&)= 0;
virtual〜string_user()= default;
};

struct real_string_user:string_user
{
virtual void sink(const std :: string&)override
{

}
};

struct real_string_user_print:string_user
{
virtual void sink(const std :: string& s)override
{
cout<< s < ENDL;
}
};

//从一个字符串生成一个接收器 - 这是一个运行时操作,因此
//防止优化器意识到接收器不起作用
std :: unique_ptr< string_user> ; make_sink(const std :: string& name)
{
if(name ==print)
{
return make_unique< real_string_user_print>();
}
if(name ==noprint)
{
return make_unique< real_string_user>();
}
throw logic_error(name);
}

//生成随机密钥,给定随机引擎和分发
auto gen_string = [](auto& engine,auto& dist)
{
std :: string result(key_length,'');
generate(begin(result),end(result),[&] {
return dist(engine);
});
返回结果;
};

//我们的平面地图的比较谓词。
struct pair_less
{
bool operator()(const pair< string,string>& l,const string& r)const {
return l.first& R等
}

bool operator()(const string& l,const pair< string,string>& r)const {
return l& r.first;
}
};

模板< class F>
auto time_test(F&& f,const vector< string> keys)
{
auto start_time = chrono :: system_clock :: now(); (auto const& key:keys)


{
f(key);
}

auto stop_time = chrono :: system_clock :: now();
auto diff = stop_time - start_time;
return diff;
}

struct report_key
{
size_t nkeys;
int miss_chance;
};

std :: ostream&运算符<(std :: ostream& os,const report_key& key)
{
return os< miss =<< setw(2)< key.miss_chance<<< %;
}

void run_test(string_user& sink,size_t nkeys,double miss_prob)
{
//我们将测试的地图类型
unordered_map<字符串,字符串>无序的;
map< string,string>有序;
vector< pair< string,string>> flat_map;

//所有键的向量,我们可以随机抽取,以便随机地$ a
//所有地图的访问顺序
vector< string>键;
unordered_set< string> keys_record;

//生成键
auto eng = std :: default_random_engine(std :: random_device()());
auto alpha_dist = std :: uniform_int_distribution< char>('A','Z');
auto prob_dist = std :: uniform_real_distribution< double>(0,1.0 - std :: numeric_limits< double> :: epsilon());

auto generate_new_key = [&] {
while(true){
//生成一个键
auto key = gen_string(eng,alpha_dist);
//尝试将其存储在无序映射中
//如果它已经存在,强制再生
//否则将其存储在有序映射中,平面映射
if(keys_record.insert(key).second){
return key;
}
}
};

for(size_t i = 0; i< nkeys; ++ i)
{
bool inserted = false;
auto value = to_string(i);

auto key = generate_new_key();
if(prob_dist(eng)> = miss_prob){
unordered.emplace(key,value);
flat_map.emplace_back(key,value);
ordered.emplace(key,std :: move(value));
}
//记录密钥以供以后使用
keys.emplace_back(std :: move(key));
}
//将我们的向量'平面图'转换成实际的平面图,方法是将其按pair.first进行排序。这是关键
sort(begin(flat_map),end(flat_map),
[](const auto& l,const auto& r){return l.first< r.first;});

//随机键入随机访问顺序
shuffle(begin(keys),end(keys),eng);

auto unordered_lookup = [&](auto& key){
auto i = unordered.find(key);
if(i!= end(unordered)){
sink.sink(i-> second);
}
};

auto ordered_lookup = [&](auto& key){
auto i = ordered.find(key);
if(i!= end(ordered)){
sink.sink(i-> second);
}
};

auto flat_map_lookup = [&](auto& key){
auto i = lower_bound(begin(flat_map),
end(flat_map),
key,
pair_less());
if(i!= end(flat_map)&& i-> first == key){
sink.sink(i-> second);
}
};

//产生线程来访问无序地图
auto unordered_future = async(launch :: async,
[&]()
{
return time_test(unordered_lookup,keys);
});

//产生一个线程来访问有序地图
auto ordered_future = async(launch :: async,[&]
{
return time_test ordered_lookup,keys);
});

//产生线程来访问平面图
auto flat_future = async(launch :: async,[&]
{
return time_test flat_map_lookup,keys);
});

//同步所有线程并获取时间
auto ordered_time = ordered_future.get();
auto unordered_time = unordered_future.get();
auto flat_time = flat_future.get();

cout<<< searching =<< setw(7)< nkeys;
cout<<< set_size =<< setw(7)< unordered.size();
cout<<< miss =<< setw(7)< setprecision(6)<< miss_prob * 100.0< %;
cout<<< ordered =<< setw(7)< ordered_time.count();
cout<<< 无序=<< setw(7)<< unordered_time.count();
cout<<< flat_map =<<< setw(7)< flat_time.count()<<< ENDL;

}

int main()
{
//生成接收器,防止优化器实现它
// 。
stringstream ss;
ss<< NOPRINT;
string arg;
ss>> ARG;
auto puser = make_sink(arg);

for(double chance = 1.0; chance> = 0.0; chance - = 0.0001)
{
run_test(* puser,1000000,几率)
}


返回0;
}


Sure, the lookup performance of an unordered_map is constant on average, and the lookup performance of a map is O(logN).

But of course in order to find an object in an unordered_map, we have to:

  1. hash the key we want to find.
  2. equality_compare the key with every key in the same bucket.

Whereas in a map, we simply need to less_than compare the sought key with log2(N) keys, where N is the number of items in the map.

I wondered what the real performance difference would be, given that the hash function adds overhead and an equality_compare is no cheaper than a less_than compare.

Rather than bother the community with a question I could answer myself, I wrote a test.

I have shared the results below, in case anyone else finds this interesting or useful.

More answers are of course invited if someone is able and willing to add more information.

解决方案

In response to questions about performance in relation to the number of missed searches, I have refactored the test to parameterise this.

Example results:

searches=1000000 set_size=      0 miss=    100% ordered=   4384 unordered=  12901 flat_map=    681
searches=1000000 set_size=     99 miss=  99.99% ordered=  89127 unordered=  42615 flat_map=  86091
searches=1000000 set_size=    172 miss=  99.98% ordered= 101283 unordered=  53468 flat_map=  96008
searches=1000000 set_size=    303 miss=  99.97% ordered= 112747 unordered=  53211 flat_map= 107343
searches=1000000 set_size=    396 miss=  99.96% ordered= 124179 unordered=  59655 flat_map= 112687
searches=1000000 set_size=    523 miss=  99.95% ordered= 132180 unordered=  51133 flat_map= 121669
searches=1000000 set_size=    599 miss=  99.94% ordered= 135850 unordered=  55078 flat_map= 121072
searches=1000000 set_size=    695 miss=  99.93% ordered= 140204 unordered=  60087 flat_map= 124961
searches=1000000 set_size=    795 miss=  99.92% ordered= 146071 unordered=  64790 flat_map= 127873
searches=1000000 set_size=    916 miss=  99.91% ordered= 154461 unordered=  50944 flat_map= 133194
searches=1000000 set_size=    988 miss=   99.9% ordered= 156327 unordered=  54094 flat_map= 134288

Key:

searches = number of searches performed against each map
set_size = how big each map is (and therefore how many of the searches will result in a hit)
miss = the probability of generating a missed search. Used for generating searches and set_size.
ordered = the time spent searching the ordered map
unordered = the time spent searching the unordered_map
flat_map = the time spent searching the flat map

note: time is measured in std::system_clock::duration ticks.

TL;DR

Results: the unordered_map shows its superiority as soon as there is data in the map. The only time it exhibits worse performance than the ordered map is when the maps are empty.

Here's the new code:

#include <iostream>
#include <iomanip>
#include <random>
#include <algorithm>
#include <string>
#include <vector>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <chrono>
#include <tuple>
#include <future>
#include <stdexcept>
#include <sstream>

using namespace std;

// this sets the length of the string we will be using as a key.
// modify this to test whether key complexity changes the performance ratios
// of the various maps
static const size_t key_length = 20;

// the number of keys we will generate (the size of the test)
const size_t nkeys = 1000000;



// use a virtual method to prevent the optimiser from detecting that
// our sink function actually does nothing. otherwise it might skew the test
struct string_user
{
    virtual void sink(const std::string&) = 0;
    virtual ~string_user() = default;
};

struct real_string_user : string_user
{
    virtual void sink(const std::string&) override
    {

    }
};

struct real_string_user_print : string_user
{
    virtual void sink(const std::string& s) override
    {
        cout << s << endl;
    }
};

// generate a sink from a string - this is a runtime operation and therefore
// prevents the optimiser from realising that the sink does nothing
std::unique_ptr<string_user> make_sink(const std::string& name)
{
    if (name == "print")
    {
        return make_unique<real_string_user_print>();
    }
    if (name == "noprint")
    {
        return make_unique<real_string_user>();
    }
    throw logic_error(name);
}

// generate a random key, given a random engine and a distribution
auto gen_string = [](auto& engine, auto& dist)
{
    std::string result(key_length, ' ');
    generate(begin(result), end(result), [&] {
        return dist(engine);
    });
    return result;
};

// comparison predicate for our flat map.
struct pair_less
{
    bool operator()(const pair<string, string>& l, const string& r) const {
        return l.first < r;
    }

    bool operator()(const string& l, const pair<string, string>& r) const {
        return l < r.first;
    }
};

template<class F>
auto time_test(F&& f, const vector<string> keys)
{
    auto start_time = chrono::system_clock::now();

    for (auto const& key : keys)
    {
        f(key);
    }

    auto stop_time = chrono::system_clock::now();
    auto diff =  stop_time - start_time;
    return diff;
}

struct report_key
{
    size_t nkeys;
    int miss_chance;
};

std::ostream& operator<<(std::ostream& os, const report_key& key)
{
    return os << "miss=" << setw(2) << key.miss_chance << "%";
}

void run_test(string_user& sink, size_t nkeys, double miss_prob)
{
    // the types of map we will test
    unordered_map<string, string> unordered;
    map<string, string> ordered;
    vector<pair<string, string>> flat_map;

    // a vector of all keys, which we can shuffle in order to randomise
    // access order of all our maps consistently
    vector<string> keys;
    unordered_set<string> keys_record;

    // generate keys
    auto eng = std::default_random_engine(std::random_device()());
    auto alpha_dist = std::uniform_int_distribution<char>('A', 'Z');
    auto prob_dist = std::uniform_real_distribution<double>(0, 1.0 - std::numeric_limits<double>::epsilon());

    auto generate_new_key = [&] {
        while(true) {
            // generate a key
            auto key = gen_string(eng, alpha_dist);
            // try to store it in the unordered map
            // if it already exists, force a regeneration
            // otherwise also store it in the ordered map and the flat map
            if(keys_record.insert(key).second) {
                return key;
            }
        }
    };

    for (size_t i = 0 ; i < nkeys ; ++i)
    {
        bool inserted = false;
        auto value = to_string(i);

        auto key = generate_new_key();
        if (prob_dist(eng) >= miss_prob) {
            unordered.emplace(key, value);
            flat_map.emplace_back(key, value);
            ordered.emplace(key, std::move(value));
        }
        // record the key for later use
        keys.emplace_back(std::move(key));
    }
    // turn our vector 'flat map' into an actual flat map by sorting it by pair.first. This is the key.
    sort(begin(flat_map), end(flat_map),
         [](const auto& l, const auto& r) { return l.first < r.first; });

    // shuffle the keys to randomise access order
    shuffle(begin(keys), end(keys), eng);

    auto unordered_lookup = [&](auto& key) {
        auto i = unordered.find(key);
        if (i != end(unordered)) {
            sink.sink(i->second);
        }
    };

    auto ordered_lookup = [&](auto& key) {
        auto i = ordered.find(key);
        if (i != end(ordered)) {
            sink.sink(i->second);
        }
    };

    auto flat_map_lookup = [&](auto& key) {
        auto i = lower_bound(begin(flat_map),
                             end(flat_map),
                             key,
                             pair_less());
        if (i != end(flat_map) && i->first == key) {
            sink.sink(i->second);
        }
    };

    // spawn a thread to time access to the unordered map
    auto unordered_future = async(launch::async,
                                  [&]()
                                  {
                                      return time_test(unordered_lookup, keys);
                                  });

    // spawn a thread to time access to the ordered map
    auto ordered_future = async(launch::async, [&]
                                {
                                    return time_test(ordered_lookup, keys);
                                });

    // spawn a thread to time access to the flat map
    auto flat_future = async(launch::async, [&]
                             {
                                 return time_test(flat_map_lookup, keys);
                             });

    // synchronise all the threads and get the timings
    auto ordered_time = ordered_future.get();
    auto unordered_time = unordered_future.get();
    auto flat_time = flat_future.get();

    cout << "searches=" << setw(7) << nkeys;
    cout << " set_size=" << setw(7) << unordered.size();
    cout << " miss=" << setw(7) << setprecision(6) << miss_prob * 100.0 << "%";
    cout << " ordered=" << setw(7) << ordered_time.count();
    cout << " unordered=" << setw(7) << unordered_time.count();
    cout << " flat_map=" << setw(7) << flat_time.count() << endl;

}

int main()
{
    // generate the sink, preventing the optimiser from realising what it
    // does.
    stringstream ss;
    ss << "noprint";
    string arg;
    ss >> arg;
    auto puser = make_sink(arg);

    for (double chance = 1.0 ; chance >= 0.0 ; chance -= 0.0001)
    {
        run_test(*puser, 1000000, chance);
    }


    return 0;
}

这篇关于一个unordered_map真的比实际上的地图快吗?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆