快速C ++容器,如C#HashSet< T>和Dictionary< K,V>? [英] Fast C++ container like the C# HashSet<T> and Dictionary<K,V>?
问题描述
我在C#中使用了很多HashSet和Dictionary,发现它们非常快...
我试过使用std :: map和std: :hash_map和我发现它们在比较非常缓慢。这听起来像预期的行为吗?在使用std :: hash_map时,我可能做错了什么?还是有更好的C ++哈希容器吗?
我在散列int32s,通常大约有100,000个。
更新:我在C#和C ++中创建了一个重现。它运行两个试验,他们在C#中花费19ms和13ms,在C ++中花费约11,000ms。我的C ++代码肯定有一些错误:)
(两者均以发布版本运行,都是控制台应用程序)
C#输出:
在交集中找到511个值,在19 ms
找到508个值交集,在13 ms
C ++输出:
在交集中找到308个值,在11764.7ms
在交集中找到316个值,在11742.5ms
C ++输出(使用stdext :: hash_map代替std :: map)
在交集中找到300个值,在383.552ms
在2277.02ms中找到306个值
C ++输出(使用stdext :: hash_map,版本为x64版本)
交叉点,在1037.67ms
在交集中找到302个值,在3663.71ms
- Set2没有按照我想在C ++中填充,我希望它与Set1有50%的交集
C#:在C#中,我不得不将我的随机数乘以10因为某种原因,甚至让它们部分不相交
static void Main(string [] args)
{
int start = DateTime.Now.Millisecond ;
int intersectionSize = runIntersectionTest();
int duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format(Found {0} values in the intersection,in {1} ms,intersectionSize,duration));
start = DateTime.Now.Millisecond;
intersectionSize = runIntersectionTest();
duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format(Found {0} values in the intersection,in {1} ms,intersectionSize,duration));
Console.ReadKey();
}
static int runIntersectionTest()
{
随机随机= new Random(DateTime.Now.Millisecond);
字典< int,int> theMap = new Dictionary< int,int>();
List< int> set1 = new List< int>();
List< int> set2 = new List< int>();
//为set1创建100,000个值
for(int i = 0; i <100000; i ++)
{
int value = 1000000000 + i;
set1.Add(value);
}
//为set2创建1,000个值
for(int i = 0; i <1000; i ++)
{
int value = 1000000000 +(random.Next()%200000 + 1);
set2.Add(value);
}
//现在通过填充映射相交两个集合
foreach(set1中的int值)
{
theMap [value] = 1 ;
}
int intersectionSize = 0;
foreach(set2中的int值)
{
int count;
if(theMap.TryGetValue(value,out count))
{
intersectionSize ++;
theMap [value] = 2;
}
}
return intersectionSize;
}
C ++:
int runIntersectionTest()
{
std :: map< int,int&地图;
vector< int> set1;
vector< int> set2;
//为set1创建100,000个值
for(int i = 0; i <100000; i ++)
{
int value = 1000000000 + i;
set1.push_back(value);
}
//为set2创建1,000个值
for(int i = 0; i <1000; i ++)
{
int random = rand()%200000 + 1;
random * = 10;
int value = 1000000000 + random;
set2.push_back(value);
}
//现在通过填充映射来交叉两个集合
for(vector< int> iterator iterator = set1.begin(); iterator!= set1。 end(); iterator ++)
{
int value = * iterator;
theMap [value] = 1;
}
int intersectionSize = 0;
for(vector< int> iterator iterator = set2.begin(); iterator!= set2.end(); iterator ++)
{
int value = * iterator ;
map< int,int> :: iterator foundValue = theMap.find(value);
if(foundValue!= theMap.end())
{
theMap [value] = 2;
intersectionSize ++;
}
}
return intersectionSize;
}
int _tmain(int argc,_TCHAR * argv [])
{
srand(time(NULL));
定时器;
int intersectionSize = runIntersectionTest();
timer.Stop();
cout<< Found< intersectionSize<< 交叉中的值,在< timer.GetMilliseconds()<< ms< endl;
timer.Reset();
intersectionSize = runIntersectionTest();
timer.Stop();
cout<< Found< intersectionSize<< 交叉中的值,在< timer.GetMilliseconds()<< ms< endl;
getchar();
return 0;
}
Hash_map和hash_set是非排他性的,标准, unordered_map 和 unordered_set 最可能很快就是标准版本。没有一个再现器,我不认为这将是远远的。
我编译提供的示例在MS Visual Studio 2008 v9.0.30729.1下,作为Visual C ++ - > Win32 - >控制台应用程序(虽然我滚动我自己的Timer类,因为我不知道你使用什么)。在调试下,我获得了1000毫秒的时间,但是在发布之下的编译是50毫秒。
#include< vector&
#include< iostream>
#include< map>
#include< stdio.h>
#include< stdlib.h>
#include< time.h>
#include< windows.h>
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
class CStopWatch {
private:
stopWatch timer;
LARGE_INTEGER frequency;
double LIToSecs(LARGE_INTEGER& L);
public:
CStopWatch();
void startTimer();
void stopTimer();
double getElapsedTime();
};
double CStopWatch :: LIToSecs(LARGE_INTEGER& L){
return((double)L.QuadPart /(double )frequency.QuadPart);
}
CStopWatch :: CStopWatch(){
timer.start.QuadPart = 0;
timer.stop.QuadPart = 0;
QueryPerformanceFrequency(& frequency);
}
void CStopWatch :: startTimer(){
QueryPerformanceCounter(& timer.start);
}
void CStopWatch :: stopTimer(){
QueryPerformanceCounter(& timer.stop);
}
double CStopWatch :: getElapsedTime(){
LARGE_INTEGER time;
time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
return LIToSecs(time);
}
using namespace std;
int runIntersectionTest()
{
std :: map< int,int>地图;
vector< int> set1;
vector< int> set2;
//为set1创建100,000个值
for(int i = 0; i <100000; i ++)
{
int value = 1000000000 + i;
set1.push_back(value);
}
//为set2创建1,000个值
for(int i = 0; i <1000; i ++)
{
int random = rand()%200000 + 1;
random * = 10;
int value = 1000000000 + random;
set2.push_back(value);
}
//现在通过填充映射
来交叉两个集合(vector< int> iterator iterator = set1.begin(); iterator!= set1。 end(); iterator ++)
{
int value = * iterator;
theMap [value] = 1;
}
int intersectionSize = 0;
for(vector< int> iterator iterator = set2.begin(); iterator!= set2.end(); iterator ++)
{
int value = * iterator ;
map< int,int> :: iterator foundValue = theMap.find(value);
if(foundValue!= theMap.end())
{
theMap [value] = 2;
intersectionSize ++;
}
}
return intersectionSize;
}
int main(int argc,char * argv [])
{
srand(time(NULL));
int tests = 2;
while(tests - ){
CStopWatch timer;
timer.startTimer();
int intersectionSize = runIntersectionTest();
timer.stopTimer();
cout<< Found< intersectionSize<< 交叉中的值,在< timer.getElapsedTime()< s\r\\\
;
}
getchar();
return 0;
}
(我会尝试用unordered_map但是我的版本没有)。我怀疑你的C ++安装有一些问题。
I've used HashSet and Dictionary a lot in C#, and found them very fast...
I've tried using std::map and std::hash_map and am finding them very slow in comparision. Does this sound like expected behaviour? Is there something I might be doing wrong in my use of std::hash_map?
Or, is there a better C++ Hash container out there?
I'm hashing int32s, usually around 100,000 of them.
Update: I created a repro in C# and C++. It runs two trials, they take 19ms and 13ms in C#, and about 11,000ms in C++. There must be something really wrong with my C++ code :)
(Both were run as Release builds, both are Console apps)
C# Output:
Found 511 values in the intersection, in 19 ms
Found 508 values in the intersection, in 13 ms
C++ Output:
Found 308 values in the intersection, in 11764.7ms
Found 316 values in the intersection, in 11742.8ms
C++ Output (using stdext::hash_map instead of std::map)
Found 300 values in the intersection, in 383.552ms
Found 306 values in the intersection, in 2277.02ms
C++ Output (using stdext::hash_map, a release x64 build)
Found 292 values in the intersection, in 1037.67ms
Found 302 values in the intersection, in 3663.71ms
Notes:
- Set2 is not getting populated quite as I wanted in C++, I was expecting it to have a 50% intersection with Set1 (as it does in C#), but I had to multiply my random number by 10 for some reason to even get them to partially not intersect
C#:
static void Main(string[] args)
{
int start = DateTime.Now.Millisecond;
int intersectionSize = runIntersectionTest();
int duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
start = DateTime.Now.Millisecond;
intersectionSize = runIntersectionTest();
duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
Console.ReadKey();
}
static int runIntersectionTest()
{
Random random = new Random(DateTime.Now.Millisecond);
Dictionary<int,int> theMap = new Dictionary<int,int>();
List<int> set1 = new List<int>();
List<int> set2 = new List<int>();
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.Add(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int value = 1000000000 + (random.Next() % 200000 + 1);
set2.Add(value);
}
// Now intersect the two sets by populating the map
foreach( int value in set1 )
{
theMap[value] = 1;
}
int intersectionSize = 0;
foreach ( int value in set2 )
{
int count;
if ( theMap.TryGetValue(value, out count ) )
{
intersectionSize++;
theMap[value] = 2;
}
}
return intersectionSize;
}
C++:
int runIntersectionTest()
{
std::map<int,int> theMap;
vector<int> set1;
vector<int> set2;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int _tmain(int argc, _TCHAR* argv[])
{
srand ( time(NULL) );
Timer timer;
int intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
Hash_map and hash_set are non-standard, unordered_map and unordered_set are the most likely soon to be standard versions. Without having a reproducer, I don't think this is going to get far though. Under the hood, they are the same data structures, so they should have similar performance.
I compiled the provided sample under MS Visual Studio 2008 v9.0.30729.1, as Visual C++ -> Win32 -> Console Application (though I rolled my own Timer class because I wasn't sure what you were using). Under debug, I got times of 1000 ms, but compiling under release was 50 ms.
#include <vector>
#include <iostream>
#include <map>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
class CStopWatch {
private:
stopWatch timer;
LARGE_INTEGER frequency;
double LIToSecs( LARGE_INTEGER & L);
public:
CStopWatch();
void startTimer( );
void stopTimer( );
double getElapsedTime();
};
double CStopWatch::LIToSecs( LARGE_INTEGER & L) {
return ((double)L.QuadPart /(double)frequency.QuadPart) ;
}
CStopWatch::CStopWatch(){
timer.start.QuadPart=0;
timer.stop.QuadPart=0;
QueryPerformanceFrequency( &frequency ) ;
}
void CStopWatch::startTimer( ) {
QueryPerformanceCounter(&timer.start) ;
}
void CStopWatch::stopTimer( ) {
QueryPerformanceCounter(&timer.stop) ;
}
double CStopWatch::getElapsedTime() {
LARGE_INTEGER time;
time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
return LIToSecs( time) ;
}
using namespace std;
int runIntersectionTest()
{
std::map<int,int> theMap;
vector<int> set1;
vector<int> set2;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int main(int argc, char* argv[])
{
srand ( time(NULL) );
int tests = 2;
while(tests--){
CStopWatch timer;
timer.startTimer();
int intersectionSize = runIntersectionTest();
timer.stopTimer();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.getElapsedTime() << "s\r\n";
}
getchar();
return 0;
}
(I would try with unordered_map but my version doesn't have it). I suspect there is some problem in your setup for C++.
这篇关于快速C ++容器,如C#HashSet< T>和Dictionary< K,V>?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!