谷歌稀疏哈希与杂音哈希函数 [英] google sparse hash with murmur hash function

查看:191
本文介绍了谷歌稀疏哈希与杂音哈希函数的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如何在谷歌稀疏哈希映射中使用杂音哈希函数?
请问我可以一步一步地告诉我如何使用杂音散列函数?
我使用visual c ++。

目前我在google稀疏哈希映射中使用std :: hash哈希函数。
使用std :: hash和murmur hash实现的goole稀疏哈希映射之间是否存在任何性能差异?

解决方案

<您需要为 sparse_hash_map 模板提供散列函数。我检查了 https://sites.google.com/site/murmurhash/ ;它的接口不同于 std :: hash<> ,所以你需要编写适配器类。下面是一个工作示例(请注意,适配器仅涵盖一些情况):

  #include< iostream> 
#include< sparsehash / sparse_hash_map>

使用google :: sparse_hash_map; //命名空间,其中类默认存在
using namespace std;

// 64位平台的64位散列
//从https://sites.google.com/site/murmurhash/复制
uint64_t MurmurHash64A(const void * key,int len,unsigned int seed)
{
const uint64_t m = 0xc6a4a7935bd1e995;
const int r = 47;

uint64_t h = seed ^(len * m);

const uint64_t * data =(const uint64_t *)key;
const uint64_t * end = data +(len / 8);

while(data!= end)
{
uint64_t k = * data ++;

k * = m;
k ^ = k>> R等
k * = m;

h ^ = k;
h * = m;
}

const unsigned char * data2 =(const unsigned char *)data;

switch(len& 7)
{
case 7:h ^ = uint64_t(data2 [6])< 48;
情况6:h ^ = uint64_t(data2 [5])<< 40;
情况5:h ^ = uint64_t(data2 [4])<< 32;
情况4:h ^ = uint64_t(data2 [3])<< 24;
情况3:h ^ = uint64_t(data2 [2])<< 16;
情况2:h ^ = uint64_t(data2 [1])<< 8;
案例1:h ^ = uint64_t(data2 [0]);
h * = m;
};

h ^ = h>> R等
h * = m;
h ^ = h>> R等

return h;
}

//不带指针的类型的简单哈希适配器
模板< typename T>
struct MurmurHasher {
size_t operator()(const T& t)const {
return MurmurHash64A(& t,sizeof(t),0);
}
};

//专业化字符串
模板<>
struct MurmurHasher< string> {
size_t operator()(const string& t)const {
return MurmurHash64A(t.c_str(),t.size(),0);
}
};
$ b $结构eqstr
{
bool operator()(const char * s1,const char * s2)const
{
return(s1 == s2 )|| (s1& s2&& strcmp(s1,s2)== 0);
}
};
$ b $ int main()
{
sparse_hash_map< const char *,int,MurmurHasher< const char *>,eqstr>个月;

个月[january] = 31;
个月[二月] = 28;
个月[march] = 31;
个月[april] = 30;
个月[可能] = 31;
个月[june] = 30;
个月[7月] = 31;
个月[august] = 31;
个月[september] = 30;
个月[october] = 31;
个月[11月] = 30;
个月[十二月] = 31;

cout<< 9月 - ><<个月[九月]<< ENDL;
cout<< 4月 - ><<几个月[四月]< ENDL;
cout<< june - ><<个月[june]<< ENDL;
cout<< 11月 - ><<个月[11月] ENDL;

sparse_hash_map< string,int,MurmurHasher< string>>年份;
年[2012] = 366;
cout<< 2012 - ><<年[2012] <<<<< ENDL;
}

性能可能取决于您的数据模式,所以您应该自己做测试。


How to use murmur hash function in google sparse hash map? Could you please give me step by step instructions on how to use murmur hash function? I'm using visual c++.

Currently I'm using std::hash hash function in google sparse hash map. Is there any performance difference between the goole sparse hash maps which are implemented using std::hash and murmur hash?

解决方案

You need to provide hash function to sparse_hash_map template. I've checked https://sites.google.com/site/murmurhash/; it's interface is different than std::hash<>, so you'll need to write adapter class. Here's a working example (keep in mind that adapter only covers some cases):

#include <iostream>
#include <sparsehash/sparse_hash_map>

using google::sparse_hash_map;      // namespace where class lives by default
using namespace std;

// 64-bit hash for 64-bit platforms
// copied from https://sites.google.com/site/murmurhash/
uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
{
  const uint64_t m = 0xc6a4a7935bd1e995;
  const int r = 47;

  uint64_t h = seed ^ (len * m);

  const uint64_t * data = (const uint64_t *)key;
  const uint64_t * end = data + (len/8);

  while(data != end)
  {
    uint64_t k = *data++;

    k *= m; 
    k ^= k >> r; 
    k *= m; 

    h ^= k;
    h *= m; 
  }

  const unsigned char * data2 = (const unsigned char*)data;

  switch(len & 7)
  {
  case 7: h ^= uint64_t(data2[6]) << 48;
  case 6: h ^= uint64_t(data2[5]) << 40;
  case 5: h ^= uint64_t(data2[4]) << 32;
  case 4: h ^= uint64_t(data2[3]) << 24;
  case 3: h ^= uint64_t(data2[2]) << 16;
  case 2: h ^= uint64_t(data2[1]) << 8;
  case 1: h ^= uint64_t(data2[0]);
          h *= m;
  };

  h ^= h >> r;
  h *= m;
  h ^= h >> r;

  return h;
} 

// simple hash adapter for types without pointers
template<typename T> 
struct MurmurHasher {
    size_t operator()(const T& t) const {
        return MurmurHash64A(&t, sizeof(t), 0);
    }    
};

// specialization for strings
template<> 
struct MurmurHasher<string> {
    size_t operator()(const string& t) const {
        return MurmurHash64A(t.c_str(), t.size(), 0);
    }    
};

struct eqstr
{
  bool operator()(const char* s1, const char* s2) const
  {
    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
  }
};

int main()
{
  sparse_hash_map<const char*, int, MurmurHasher<const char*>, eqstr> months;

  months["january"] = 31;
  months["february"] = 28;
  months["march"] = 31;
  months["april"] = 30;
  months["may"] = 31;
  months["june"] = 30;
  months["july"] = 31;
  months["august"] = 31;
  months["september"] = 30;
  months["october"] = 31;
  months["november"] = 30;
  months["december"] = 31;

  cout << "september -> " << months["september"] << endl;
  cout << "april     -> " << months["april"] << endl;
  cout << "june      -> " << months["june"] << endl;
  cout << "november  -> " << months["november"] << endl;

  sparse_hash_map<string, int, MurmurHasher<string>> years;
  years["2012"] = 366;
  cout << "2012      -> " << years["2012"] << endl;
}

Performance will probably depend on your data patterns, so you should do the tests yourself.

这篇关于谷歌稀疏哈希与杂音哈希函数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆