为什么通过weak_ptr调用这么慢? [英] Why calling via weak_ptr is so slow?

查看:164
本文介绍了为什么通过weak_ptr调用这么慢?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我已阅读 weak_ptr的性能损失是什么?但我自己的测试显示不同的结果。



我使用智能指针代表。下面的简单代码显示了 weak_ptr 的性能问题。有人可以告诉我为什么吗?

  #include< chrono> 
#include< functional>
#include< iostream>
#include< memory>
#include< stdint.h>
#include< string>
#include< utility>

struct Foo
{
Foo():counter(0){incrStep = 1;}

void bar()
{
counter + = incrStep;
}

virtual〜Foo()
{
std :: cout< End<<计数器<< std :: endl;
}
private:
uint64_t counter;
uint64_t incrStep;
};

void pf(const std :: string& md,const std :: function< void()>& g)
{
const auto st = std: :chrono :: high_resolution_clock :: now();
g();
const auto ft = std :: chrono :: high_resolution_clock :: now();
const auto del = std :: chrono :: duration_cast< std :: chrono :: milliseconds>(ft-st);
std :: cout<< md < \t:\t<< del.count()<< std :: endl;
}

测试:

  int main(int,char **)
{
volatile size_t l = 1000000000ULL;
size_t maxCounter = l;

auto a = std :: make_shared< Foo>();
std :: weak_ptr< Foo> wp = a;

pf(通过raw ptr调用,[=](){
for(size_t i = 0; i {
auto p = a.get();
if(p)
{
p-> bar();
}
}
});

pf(通过shared_ptr调用,[=](){
for(size_t i = 0; i {
if(a)
{
a-> bar();
}
}
}

pf(call via weak_ptr,[=](){
std :: shared_ptr< Foo> p;
for(size_t i = 0; i& ; ++ i)
{
p = wp.lock();
if(p)
{
p-> bar();
}
}
});

pf(通过shared_ptr复制调用,[=](){
volatile std :: shared_ptr< Foo> p1 = a;
std :: shared_ptr< Foo> p;
for(size_t i = 0; i {
p = const_cast< std :: shared_ptr< Foo>&>(p1);
if(p)
{
p-> bar();
}
}
}

pf(通过mem_fn调用,[=](){
auto fff = std :: mem_fn(& Foo :: bar);
for = 0; i {
fff(a.get());
}
}

return 0;
}

结果:

  $ ./test 
通过原始ptr调用:369
通过shared_ptr调用:302
通过weak_ptr调用:22663
通过shared_ptr拷贝调用:2171
通过mem_fn调用:2124
结束5000000000

weak_ptr 比复制 shared_ptr 慢10倍,并且 std :: mem_fn 比使用原始ptr或 shared_ptr.get()

解决方案/ div>

在尝试重现你的测试,我意识到,优化器可能会消除比它应该多。我做的是利用随机数来击败过度优化,这些结果看起来很现实, std :: weak_ptr std慢三倍:: shared_ptr 或其原始指针



我在每次测试中计算校验和,相同的工作:

  #include< chrono> 
#include< memory>
#include< vector>
#include< iomanip>
#include< iostream>

#define OUT(m)do {std :: cout< m < '\\\
';} while(0)

class Timer
{
using clk = std :: chrono :: steady_clock;
using microseconds = std :: chrono :: microseconds;

clk :: time_point tsb;
clk :: time_point tse;

public:

void clear(){tsb = tse = clk :: now() }
void start(){tsb = clk :: now(); }
void stop(){tse = clk :: now(); }

friend std :: ostream& <<(std :: ostream& o,const Timer& timer)
{
return o< timer.secs();
}

//返回时间差(秒)
double secs()const
{
if(tse <= tsb)
return 0.0;
auto d = std :: chrono :: duration_cast< microseconds>(tse - tsb);
return d.count()/ 1000000.0;
}
};

定时器;

constexpr auto N = 100000000U;

int main()
{
std :: srand(std :: time(0));

std :: vector< int> random_ints;
for(auto i = 0U; i <1024; ++ i)
random_ints.push_back(std :: rand()%(i + 1));

std :: shared_ptr< int> sptr = std :: make_shared< int>(std :: rand()%100);
int * rptr = sptr.get();
std :: weak_ptr< int> wptr = sptr;

unsigned sum = 0;

sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * sptr;
}
timer.stop();

OUT(sptr:<<< sum<<<<< timer);

sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * rptr;
}
timer.stop();

OUT(rptr:<<< sum<<<<< timer);

sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * wptr.lock
}
timer.stop();

OUT(wptr:<< sum<<<<< timer);
}

编译器标记:
$ b

  g ++ -std = c ++ 14 -O3 -g0 -D NDEBUG -o bin / timecpp src / timecpp.cpp 

示例输出

  sptr:3318793206 1.30389 //共享指针
rptr:3318793206 1.2751 //原指针
wptr:3318793206 3.13879 //弱指针


I have read the question What's the performance penalty of weak_ptr? but my own tests show different results.

I'm making delegates with smart pointers. The simple code below shows reproduces the performance issues with weak_ptr. Can anybody tell me why?

#include <chrono>
#include <functional>
#include <iostream>
#include <memory>
#include <stdint.h>
#include <string>
#include <utility>

struct Foo
{
    Foo() : counter(0) { incrStep = 1;}

    void bar()
    {
        counter += incrStep;
    }

    virtual ~Foo()
    {
        std::cout << "End " << counter << std::endl;
    }
private:
    uint64_t counter;
    uint64_t incrStep;
};

void pf(const std::string &md, const std::function<void()> &g)
{
    const auto st = std::chrono::high_resolution_clock::now();
    g();
    const auto ft = std::chrono::high_resolution_clock::now();
    const auto del = std::chrono::duration_cast<std::chrono::milliseconds>(ft - st);
    std::cout << md << " \t: \t" << del.count() << std::endl;
}

And the test:

int main(int , char** )
{
    volatile size_t l = 1000000000ULL;
    size_t maxCounter = l;

    auto a = std::make_shared<Foo>();
    std::weak_ptr<Foo> wp = a;

    pf("call via raw ptr        ", [=](){
        for (size_t i = 0; i < maxCounter; ++i)
        {
            auto p = a.get();
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via shared_ptr      ", [=](){
        for (size_t i = 0; i < maxCounter; ++i)
        {
            if (a)
            {
                a->bar();
            }
        }
    });

    pf("call via weak_ptr       ", [=](){
        std::shared_ptr<Foo> p;
        for (size_t i = 0; i < maxCounter; ++i)
        {
            p = wp.lock();
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via shared_ptr copy", [=](){
        volatile std::shared_ptr<Foo> p1 = a;
        std::shared_ptr<Foo> p;
        for (size_t i = 0; i < maxCounter; ++i)
        {
            p = const_cast<std::shared_ptr<Foo>& >(p1);
            if (p)
            {
                p->bar();
            }
        }
    });

    pf("call via mem_fn         ", [=](){
        auto fff = std::mem_fn(&Foo::bar);
        for (size_t i = 0; i < maxCounter; ++i)
        {
            fff(a.get());
        }
    });

    return 0;
}

Results:

$ ./test
call via raw ptr            :   369
call via shared_ptr         :   302
call via weak_ptr           :   22663
call via shared_ptr copy    :   2171
call via mem_fn             :   2124
End 5000000000

As you can see, weak_ptr is 10 times slower than shared_ptr with copying and std::mem_fn and 60 times slower than using raw ptr or shared_ptr.get()

解决方案

In trying to reproduce your test I realised that the optimizer might be eliminating more than it should. What I did was to utilize random numbers to defeat over-optimization and these results seem realistic with std::weak_ptr being about three times slower than the std::shared_ptr or its raw pointer.

I calculate a checksum in each test to ensure they are all doing the same work:

#include <chrono>
#include <memory>
#include <vector>
#include <iomanip>
#include <iostream>

#define OUT(m) do{std::cout << m << '\n';}while(0)

class Timer
{
    using clk = std::chrono::steady_clock;
    using microseconds = std::chrono::microseconds;

    clk::time_point tsb;
    clk::time_point tse;

public:

    void clear() { tsb = tse = clk::now(); }
    void start() { tsb = clk::now(); }
    void stop() { tse = clk::now(); }

    friend std::ostream& operator<<(std::ostream& o, const Timer& timer)
    {
        return o << timer.secs();
    }

    // return time difference in seconds
    double secs() const
    {
        if(tse <= tsb)
            return 0.0;
        auto d = std::chrono::duration_cast<microseconds>(tse - tsb);
        return d.count() / 1000000.0;
    }
};

Timer timer;

constexpr auto N = 100000000U;

int main()
{
    std::srand(std::time(0));

    std::vector<int> random_ints;
    for(auto i = 0U; i < 1024; ++i)
        random_ints.push_back(std::rand() % (i + 1));

    std::shared_ptr<int> sptr = std::make_shared<int>(std::rand() % 100);
    int* rptr = sptr.get();
    std::weak_ptr<int> wptr = sptr;

    unsigned sum = 0;

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *sptr;
    }
    timer.stop();

    OUT("sptr: " << sum << " " << timer);

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *rptr;
    }
    timer.stop();

    OUT("rptr: " << sum << " " << timer);

    sum = 0;
    timer.start();
    for(auto i = 0U; i < N; ++i)
    {
        sum += random_ints[i % random_ints.size()] * *wptr.lock();
    }
    timer.stop();

    OUT("wptr: " << sum << " " << timer);
}

Compiler flags:

g++ -std=c++14 -O3 -g0 -D NDEBUG -o bin/timecpp src/timecpp.cpp

Example Output:

sptr: 3318793206 1.30389 // shared pointer
rptr: 3318793206 1.2751 // raw pointer
wptr: 3318793206 3.13879 // weak pointer

这篇关于为什么通过weak_ptr调用这么慢?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆