为什么通过weak_ptr调用这么慢？ [英] Why calling via weak_ptr is so slow?

查看：164 发布时间：2016/11/6 0:44:30 c++ performance c++11 g++

本文介绍了为什么通过weak_ptr调用这么慢？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我已阅读 weak_ptr的性能损失是什么？但我自己的测试显示不同的结果。

我使用智能指针代表。下面的简单代码显示了 weak_ptr 的性能问题。有人可以告诉我为什么吗？

  #include< chrono> 
 #include< functional> 
 #include< iostream> 
 #include< memory> 
 #include< stdint.h> 
 #include< string> 
 #include< utility> 
 
 struct Foo 
 {
 Foo（）：counter（0）{incrStep = 1;} 
 
 void bar（）
 { 
 counter + = incrStep; 
} 
 
 virtual〜Foo（）
 {
 std :: cout< End<<计数器<< std :: endl; 
} 
 private：
 uint64_t counter; 
 uint64_t incrStep; 
}; 
 
 void pf（const std :: string& md，const std :: function< void（）>& g）
 {
 const auto st = std： ：chrono :: high_resolution_clock :: now（）; 
 g（）; 
 const auto ft = std :: chrono :: high_resolution_clock :: now（）; 
 const auto del = std :: chrono :: duration_cast< std :: chrono :: milliseconds>（ft-st）; 
 std :: cout<< md < \t：\t<< del.count（）<< std :: endl; 
}

测试：

  int main（int，char **）
 {
 volatile size_t l = 1000000000ULL; 
 size_t maxCounter = l; 
 
 auto a = std :: make_shared< Foo>（）; 
 std :: weak_ptr< Foo> wp = a; 
 
 pf（通过raw ptr调用，[=]（）{
 for（size_t i = 0; i  {
 auto p = a.get（）; 
 if（p）
 {
 p-> bar（）; 
} 
} 
 }）; 
 
 pf（通过shared_ptr调用，[=]（）{
 for（size_t i = 0; i  {
 if（a）
 {
 a-> bar（）; 
} 
} 
} 
 
 pf（call via weak_ptr，[=]（）{
 std :: shared_ptr< Foo> p; 
 for（size_t i = 0; i& ; ++ i）
 {
p = wp.lock（）; 
 if（p）
 {
 p-> bar（）; 
 } 
} 
}）; 
 
 pf（通过shared_ptr复制调用，[=]（）{
 volatile std :: shared_ptr< Foo> p1 = a; 
 std :: shared_ptr< Foo> p; 
 for（size_t i = 0; i  {
p = const_cast< std :: shared_ptr< Foo>&>（p1）; 
 if（p）
 {
 p-> bar（）; 
} 
} 
} 
 
 pf（通过mem_fn调用，[=]（）{
 auto fff = std :: mem_fn（& Foo :: bar）; 
 for = 0; i  {
 fff（a.get（））; 
} 
} 
 
 return 0; 
}

结果：

  $ ./test 
通过原始ptr调用：369 
通过shared_ptr调用：302 
通过weak_ptr调用：22663 
通过shared_ptr拷贝调用：2171 
通过mem_fn调用：2124 
结束5000000000

， weak_ptr 比复制 shared_ptr 慢10倍，并且 std :: mem_fn 比使用原始ptr或 shared_ptr.get（）

解决方案/ div> 在尝试重现你的测试，我意识到，优化器可能会消除比它应该多。我做的是利用随机数来击败过度优化，这些结果看起来很现实， std :: weak_ptr 比 std慢三倍:: shared_ptr 或其原始指针。
 
 
我在每次测试中计算校验和，相同的工作：
  #include< chrono> 
 #include< memory> 
 #include< vector> 
 #include< iomanip> 
 #include< iostream> 
 
 #define OUT（m）do {std :: cout< m < '\\\
';} while（0）
 
 class Timer 
 {
 using clk = std :: chrono :: steady_clock; 
 using microseconds = std :: chrono :: microseconds; 
 
 clk :: time_point tsb; 
 clk :: time_point tse; 
 
 public：
 
 void clear（）{tsb = tse = clk :: now（） } 
 void start（）{tsb = clk :: now（）; } 
 void stop（）{tse = clk :: now（）; } 
 
 friend std :: ostream& <<（std :: ostream& o，const Timer& timer）
 {
 return o< timer.secs（）; 
} 
 
 //返回时间差（秒）
 double secs（）const 
 {
 if（tse <= tsb）
 return 0.0; 
 auto d = std :: chrono :: duration_cast< microseconds>（tse  -  tsb）; 
 return d.count（）/ 1000000.0; 
} 
}; 
 
定时器; 
 
 constexpr auto N = 100000000U; 
 
 int main（）
 {
 std :: srand（std :: time（0））; 
 
 std :: vector< int> random_ints; 
 for（auto i = 0U; i <1024; ++ i）
 random_ints.push_back（std :: rand（）％（i + 1））; 
 
 std :: shared_ptr< int> sptr = std :: make_shared< int>（std :: rand（）％100）; 
 int * rptr = sptr.get（）; 
 std :: weak_ptr< int> wptr = sptr; 
 
 unsigned sum = 0; 
 
 sum = 0; 
 timer.start（）; 
 for（auto i = 0U; i  {
 sum + = random_ints [i％random_ints.size（）] * * sptr; 
} 
 timer.stop（）; 
 
 OUT（sptr：<<< sum<<<<< timer）; 
 
 sum = 0; 
 timer.start（）; 
 for（auto i = 0U; i  {
 sum + = random_ints [i％random_ints.size（）] * * rptr; 
} 
 timer.stop（）; 
 
 OUT（rptr：<<< sum<<<<< timer）; 
 
 sum = 0; 
 timer.start（）; 
 for（auto i = 0U; i  {
 sum + = random_ints [i％random_ints.size（）] * * wptr.lock 
} 
 timer.stop（）; 
 
 OUT（wptr：<< sum<<<<< timer）; 
} 
  
 编译器标记： 
 $ b 
  g ++ -std = c ++ 14 -O3 -g0 -D NDEBUG -o bin / timecpp src / timecpp.cpp 
  
 示例输出：
  sptr：3318793206 1.30389 //共享指针
 rptr：3318793206 1.2751 //原指针
 wptr：3318793206 3.13879 //弱指针
  
 
I have read the question What's the performance penalty of weak_ptr? but my own tests show different results.

I'm making delegates with smart pointers. The simple code below shows reproduces the performance issues with weak_ptr. Can anybody tell me why?
#include <chrono>
#include <functional>
#include <iostream>
#include <memory>
#include <stdint.h>
#include <string>
#include <utility>

struct Foo
{
    Foo() : counter(0) { incrStep = 1;}

    void bar()
    {
        counter += incrStep;
    }

    virtual ~Foo()
    {
        std::cout << "End " << counter << std::endl;
    }
private:
    uint64_t counter;
    uint64_t incrStep;
};

void pf(const std::string &md, const std::function<void()> &g)
{
    const auto st = std::chrono::high_resolution_clock::now();
    g();
    const auto ft = std::chrono::high_resolution_clock::now();
    const auto del = std::chrono::duration_cast<std::chrono::milliseconds>(ft - st);
    std::cout << md << " \t: \t" << del.count() << std::endl;
}
And the test:
int main(int , char** )
{
    volatile size_t l = 1000000000ULL;
    size_t maxCounter = l;

    auto a = std::make_shared<Foo>();
    std::weak_ptr<Foo> wp = a;

    pf("call via raw ptr        ", [=](){
        for (size_t i = 0; i < maxCounter; ++i)
        {
            auto p = a.get();
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via shared_ptr      ", [=](){
        for (size_t i = 0; i < maxCounter; ++i)
        {
            if (a)
            {
                a->bar();
            }
        }
    });

    pf("call via weak_ptr       ", [=](){
        std::shared_ptr<Foo> p;
        for (size_t i = 0; i < maxCounter; ++i)
        {
            p = wp.lock();
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via shared_ptr copy", [=](){
        volatile std::shared_ptr<Foo> p1 = a;
        std::shared_ptr<Foo> p;
        for (size_t i = 0; i < maxCounter; ++i)
        {
            p = const_cast<std::shared_ptr<Foo>& >(p1);
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via mem_fn         ", [=](){
        auto fff = std::mem_fn(&Foo::bar);
        for (size_t i = 0; i < maxCounter; ++i)
        {
            fff(a.get());
        }
    });

    return 0;
}
Results:
$ ./test
call via raw ptr            :   369
call via shared_ptr         :   302
call via weak_ptr           :   22663
call via shared_ptr copy    :   2171
call via mem_fn             :   2124
End 5000000000
As you can see, weak_ptr is 10 times slower than shared_ptr with copying and std::mem_fn and 60 times slower than using raw ptr or shared_ptr.get()
 解决方案 
In trying to reproduce your test I realised that the optimizer might be eliminating more than it should. What I did was to utilize random numbers to defeat over-optimization and these results seem realistic with std::weak_ptr being about three times slower than the std::shared_ptr or its raw pointer.

I calculate a checksum in each test to ensure they are all doing the same work:
#include <chrono>
#include <memory>
#include <vector>
#include <iomanip>
#include <iostream>

#define OUT(m) do{std::cout << m << '\n';}while(0)

class Timer
{
    using clk = std::chrono::steady_clock;
    using microseconds = std::chrono::microseconds;

    clk::time_point tsb;
    clk::time_point tse;

public:

    void clear() { tsb = tse = clk::now(); }
    void start() { tsb = clk::now(); }
    void stop() { tse = clk::now(); }

    friend std::ostream& operator<<(std::ostream& o, const Timer& timer)
    {
        return o << timer.secs();
    }

    // return time difference in seconds
    double secs() const
    {
        if(tse <= tsb)
            return 0.0;
        auto d = std::chrono::duration_cast<microseconds>(tse - tsb);
        return d.count() / 1000000.0;
    }
};

Timer timer;

constexpr auto N = 100000000U;

int main()
{
    std::srand(std::time(0));

    std::vector<int> random_ints;
    for(auto i = 0U; i < 1024; ++i)
        random_ints.push_back(std::rand() % (i + 1));

    std::shared_ptr<int> sptr = std::make_shared<int>(std::rand() % 100);
    int* rptr = sptr.get();
    std::weak_ptr<int> wptr = sptr;

    unsigned sum = 0;

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *sptr;
    }
    timer.stop();

    OUT("sptr: " << sum << " " << timer);

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *rptr;
    }
    timer.stop();

    OUT("rptr: " << sum << " " << timer);

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *wptr.lock();
    }
    timer.stop();

    OUT("wptr: " << sum << " " << timer);
}
Compiler flags:
g++ -std=c++14 -O3 -g0 -D NDEBUG -o bin/timecpp src/timecpp.cpp
Example Output:
sptr: 3318793206 1.30389 // shared pointer
rptr: 3318793206 1.2751 // raw pointer
wptr: 3318793206 3.13879 // weak pointer


                        
这篇关于为什么通过weak_ptr调用这么慢？的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！


                    
                        查看全文

为什么通过weak_ptr调用这么慢？ [英] Why calling via weak_ptr is so slow?

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

为什么通过weak_ptr调用这么慢？ [英] Why calling via weak_ptr is so slow?

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭