为什么通过weak_ptr调用这么慢? [英] Why calling via weak_ptr is so slow?
问题描述
我已阅读 weak_ptr的性能损失是什么?但我自己的测试显示不同的结果。
我使用智能指针代表。下面的简单代码显示了 weak_ptr
的性能问题。有人可以告诉我为什么吗?
#include< chrono>
#include< functional>
#include< iostream>
#include< memory>
#include< stdint.h>
#include< string>
#include< utility>
struct Foo
{
Foo():counter(0){incrStep = 1;}
void bar()
{
counter + = incrStep;
}
virtual〜Foo()
{
std :: cout< End<<计数器<< std :: endl;
}
private:
uint64_t counter;
uint64_t incrStep;
};
void pf(const std :: string& md,const std :: function< void()>& g)
{
const auto st = std: :chrono :: high_resolution_clock :: now();
g();
const auto ft = std :: chrono :: high_resolution_clock :: now();
const auto del = std :: chrono :: duration_cast< std :: chrono :: milliseconds>(ft-st);
std :: cout<< md < \t:\t<< del.count()<< std :: endl;
}
测试:
int main(int,char **)
{
volatile size_t l = 1000000000ULL;
size_t maxCounter = l;
auto a = std :: make_shared< Foo>();
std :: weak_ptr< Foo> wp = a;
pf(通过raw ptr调用,[=](){
for(size_t i = 0; i {
auto p = a.get();
if(p)
{
p-> bar();
}
}
});
pf(通过shared_ptr调用,[=](){
for(size_t i = 0; i {
if(a)
{
a-> bar();
}
}
}
pf(call via weak_ptr,[=](){
std :: shared_ptr< Foo> p;
for(size_t i = 0; i& ; ++ i)
{
p = wp.lock();
if(p)
{
p-> bar();
}
}
});
pf(通过shared_ptr复制调用,[=](){
volatile std :: shared_ptr< Foo> p1 = a;
std :: shared_ptr< Foo> p;
for(size_t i = 0; i {
p = const_cast< std :: shared_ptr< Foo>&>(p1);
if(p)
{
p-> bar();
}
}
}
pf(通过mem_fn调用,[=](){
auto fff = std :: mem_fn(& Foo :: bar);
for = 0; i {
fff(a.get());
}
}
return 0;
}
结果:
$ ./test
通过原始ptr调用:369
通过shared_ptr调用:302
通过weak_ptr调用:22663
通过shared_ptr拷贝调用:2171
通过mem_fn调用:2124
结束5000000000
, weak_ptr
比复制 shared_ptr
慢10倍,并且 std :: mem_fn
比使用原始ptr或 shared_ptr.get()
在尝试重现你的测试,我意识到,优化器可能会消除比它应该多。我做的是利用随机数来击败过度优化,这些结果看起来很现实, std :: weak_ptr
比 std慢三倍:: shared_ptr
或其原始指针。
我在每次测试中计算校验和,相同的工作:
#include< chrono>
#include< memory>
#include< vector>
#include< iomanip>
#include< iostream>
#define OUT(m)do {std :: cout< m < '\\\
';} while(0)
class Timer
{
using clk = std :: chrono :: steady_clock;
using microseconds = std :: chrono :: microseconds;
clk :: time_point tsb;
clk :: time_point tse;
public:
void clear(){tsb = tse = clk :: now() }
void start(){tsb = clk :: now(); }
void stop(){tse = clk :: now(); }
friend std :: ostream& <<(std :: ostream& o,const Timer& timer)
{
return o< timer.secs();
}
//返回时间差(秒)
double secs()const
{
if(tse <= tsb)
return 0.0;
auto d = std :: chrono :: duration_cast< microseconds>(tse - tsb);
return d.count()/ 1000000.0;
}
};
定时器;
constexpr auto N = 100000000U;
int main()
{
std :: srand(std :: time(0));
std :: vector< int> random_ints;
for(auto i = 0U; i <1024; ++ i)
random_ints.push_back(std :: rand()%(i + 1));
std :: shared_ptr< int> sptr = std :: make_shared< int>(std :: rand()%100);
int * rptr = sptr.get();
std :: weak_ptr< int> wptr = sptr;
unsigned sum = 0;
sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * sptr;
}
timer.stop();
OUT(sptr:<<< sum<<<<< timer);
sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * rptr;
}
timer.stop();
OUT(rptr:<<< sum<<<<< timer);
sum = 0;
timer.start();
for(auto i = 0U; i {
sum + = random_ints [i%random_ints.size()] * * wptr.lock
}
timer.stop();
OUT(wptr:<< sum<<<<< timer);
}
编译器标记:
$ b
g ++ -std = c ++ 14 -O3 -g0 -D NDEBUG -o bin / timecpp src / timecpp.cpp
示例输出:
sptr:3318793206 1.30389 //共享指针
rptr:3318793206 1.2751 //原指针
wptr:3318793206 3.13879 //弱指针
I have read the question What's the performance penalty of weak_ptr? but my own tests show different results.
I'm making delegates with smart pointers. The simple code below shows reproduces the performance issues with weak_ptr
. Can anybody tell me why?
#include <chrono>
#include <functional>
#include <iostream>
#include <memory>
#include <stdint.h>
#include <string>
#include <utility>
struct Foo
{
Foo() : counter(0) { incrStep = 1;}
void bar()
{
counter += incrStep;
}
virtual ~Foo()
{
std::cout << "End " << counter << std::endl;
}
private:
uint64_t counter;
uint64_t incrStep;
};
void pf(const std::string &md, const std::function<void()> &g)
{
const auto st = std::chrono::high_resolution_clock::now();
g();
const auto ft = std::chrono::high_resolution_clock::now();
const auto del = std::chrono::duration_cast<std::chrono::milliseconds>(ft - st);
std::cout << md << " \t: \t" << del.count() << std::endl;
}
And the test:
int main(int , char** )
{
volatile size_t l = 1000000000ULL;
size_t maxCounter = l;
auto a = std::make_shared<Foo>();
std::weak_ptr<Foo> wp = a;
pf("call via raw ptr ", [=](){
for (size_t i = 0; i < maxCounter; ++i)
{
auto p = a.get();
if (p)
{
p->bar();
}
}
});
pf("call via shared_ptr ", [=](){
for (size_t i = 0; i < maxCounter; ++i)
{
if (a)
{
a->bar();
}
}
});
pf("call via weak_ptr ", [=](){
std::shared_ptr<Foo> p;
for (size_t i = 0; i < maxCounter; ++i)
{
p = wp.lock();
if (p)
{
p->bar();
}
}
});
pf("call via shared_ptr copy", [=](){
volatile std::shared_ptr<Foo> p1 = a;
std::shared_ptr<Foo> p;
for (size_t i = 0; i < maxCounter; ++i)
{
p = const_cast<std::shared_ptr<Foo>& >(p1);
if (p)
{
p->bar();
}
}
});
pf("call via mem_fn ", [=](){
auto fff = std::mem_fn(&Foo::bar);
for (size_t i = 0; i < maxCounter; ++i)
{
fff(a.get());
}
});
return 0;
}
Results:
$ ./test
call via raw ptr : 369
call via shared_ptr : 302
call via weak_ptr : 22663
call via shared_ptr copy : 2171
call via mem_fn : 2124
End 5000000000
As you can see, weak_ptr
is 10 times slower than shared_ptr
with copying and std::mem_fn
and 60 times slower than using raw ptr or shared_ptr.get()
In trying to reproduce your test I realised that the optimizer might be eliminating more than it should. What I did was to utilize random numbers to defeat over-optimization and these results seem realistic with std::weak_ptr
being about three times slower than the std::shared_ptr
or its raw pointer.
I calculate a checksum in each test to ensure they are all doing the same work:
#include <chrono>
#include <memory>
#include <vector>
#include <iomanip>
#include <iostream>
#define OUT(m) do{std::cout << m << '\n';}while(0)
class Timer
{
using clk = std::chrono::steady_clock;
using microseconds = std::chrono::microseconds;
clk::time_point tsb;
clk::time_point tse;
public:
void clear() { tsb = tse = clk::now(); }
void start() { tsb = clk::now(); }
void stop() { tse = clk::now(); }
friend std::ostream& operator<<(std::ostream& o, const Timer& timer)
{
return o << timer.secs();
}
// return time difference in seconds
double secs() const
{
if(tse <= tsb)
return 0.0;
auto d = std::chrono::duration_cast<microseconds>(tse - tsb);
return d.count() / 1000000.0;
}
};
Timer timer;
constexpr auto N = 100000000U;
int main()
{
std::srand(std::time(0));
std::vector<int> random_ints;
for(auto i = 0U; i < 1024; ++i)
random_ints.push_back(std::rand() % (i + 1));
std::shared_ptr<int> sptr = std::make_shared<int>(std::rand() % 100);
int* rptr = sptr.get();
std::weak_ptr<int> wptr = sptr;
unsigned sum = 0;
sum = 0;
timer.start();
for(auto i = 0U; i < N; ++i)
{
sum += random_ints[i % random_ints.size()] * *sptr;
}
timer.stop();
OUT("sptr: " << sum << " " << timer);
sum = 0;
timer.start();
for(auto i = 0U; i < N; ++i)
{
sum += random_ints[i % random_ints.size()] * *rptr;
}
timer.stop();
OUT("rptr: " << sum << " " << timer);
sum = 0;
timer.start();
for(auto i = 0U; i < N; ++i)
{
sum += random_ints[i % random_ints.size()] * *wptr.lock();
}
timer.stop();
OUT("wptr: " << sum << " " << timer);
}
Compiler flags:
g++ -std=c++14 -O3 -g0 -D NDEBUG -o bin/timecpp src/timecpp.cpp
Example Output:
sptr: 3318793206 1.30389 // shared pointer
rptr: 3318793206 1.2751 // raw pointer
wptr: 3318793206 3.13879 // weak pointer
这篇关于为什么通过weak_ptr调用这么慢?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!