Vita
cache_hash.h
Go to the documentation of this file.
1
13#if !defined(VITA_CACHE_HASH_H)
14#define VITA_CACHE_HASH_H
15
16#include <cstddef>
17#include <cstring>
18#include "kernel/common.h"
19
20namespace vita
21{
26struct hash_t
27{
28 explicit hash_t(std::uint64_t a = 0, std::uint64_t b = 0) : data{a, b} {}
29
31 void clear() { data[0] = data[1] = 0; }
32
34 bool operator==(hash_t h) const
35 { return data[0] == h.data[0] && data[1] == h.data[1]; }
36
38 bool operator!=(hash_t h) const
39 { return data[0] != h.data[0] || data[1] != h.data[1]; }
40
48 {
49 // This is the simple algorithm used in `Apache.Commons.HashCodeBuilder`.
50 // It uses simple prime number multiplication and is a special key of
51 // Bob Jenkins' idea (`m * H(A) + H(B)`0.
52 data[0] = data[0] * 37 + h.data[0];
53 data[1] = data[1] * 37 + h.data[1];
54
55 // An alternative from Boost is:
56 //data[0] ^= h.data[0] + 0x9e3779b9 + (data[0] << 6) + (data[0] >> 2);
57 //data[1] ^= h.data[1] + 0x9e3779b9 + (data[1] << 6) + (data[1] >> 2);
58 }
59
61 bool empty() const { return !data[0] && !data[1]; }
62
63 // Serialization.
64 bool load(std::istream &);
65 bool save(std::ostream &) const;
66
67 // Data members.
68 std::uint_least64_t data[2];
69};
70
71std::ostream &operator<<(std::ostream &, hash_t);
72
73#if defined(_MSC_VER)
74# define ROTL64(x, y) _rotl64(x, y)
75#else
82inline std::uint64_t rotl64(std::uint64_t x, std::uint8_t r)
83{
84 return (x << r) | (x >> (64 - r));
85}
86#define ROTL64(x, y) rotl64(x, y)
87#endif
88
101{
102public:
103 static hash_t hash128(const void *const, std::size_t, std::uint32_t = 1973);
104
105private:
106 static std::uint64_t fmix(std::uint64_t);
107 static std::uint32_t fmix(std::uint32_t);
108 template<class T> static T get_block(const T *, std::size_t);
109};
110
119inline hash_t murmurhash3::hash128(const void *const data, std::size_t len,
120 std::uint32_t seed)
121{
122 const auto n_blocks(len / 16); // block size is 128bit
123
124 const std::uint64_t c1(0x87c37b91114253d5), c2(0x4cf5ad432745937f);
125
126 // Body.
127 const auto *blocks(reinterpret_cast<const std::uint64_t *>(data));
128 hash_t h(seed, seed);
129
130 for (std::size_t i(0); i < n_blocks; ++i)
131 {
132 std::uint64_t k1(get_block(blocks, i * 2 + 0));
133 std::uint64_t k2(get_block(blocks, i * 2 + 1));
134
135 k1 *= c1;
136 k1 = ROTL64(k1, 31);
137 k1 *= c2;
138 h.data[0] ^= k1;
139
140 h.data[0] = ROTL64(h.data[0], 27);
141 h.data[0] += h.data[1];
142 h.data[0] = h.data[0] * 5 + 0x52dce729;
143
144 k2 *= c2;
145 k2 = ROTL64(k2, 33);
146 k2 *= c1;
147 h.data[1] ^= k2;
148
149 h.data[1] = ROTL64(h.data[1], 31);
150 h.data[1] += h.data[0];
151 h.data[1] = h.data[1] * 5 + 0x38495ab5;
152 }
153
154 // Tail.
155 auto tail(reinterpret_cast<const std::byte *>(data) + n_blocks * 16);
156
157 std::uint64_t k1(0), k2(0);
158
159 switch (len & 15)
160 {
161 case 15: k2 ^= std::uint64_t(tail[14]) << 48; [[fallthrough]];
162 case 14: k2 ^= std::uint64_t(tail[13]) << 40; [[fallthrough]];
163 case 13: k2 ^= std::uint64_t(tail[12]) << 32; [[fallthrough]];
164 case 12: k2 ^= std::uint64_t(tail[11]) << 24; [[fallthrough]];
165 case 11: k2 ^= std::uint64_t(tail[10]) << 16; [[fallthrough]];
166 case 10: k2 ^= std::uint64_t(tail[ 9]) << 8; [[fallthrough]];
167 case 9: k2 ^= std::uint64_t(tail[ 8]) << 0;
168 k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h.data[1] ^= k2;
169 [[fallthrough]];
170 case 8: k1 ^= std::uint64_t(tail[ 7]) << 56; [[fallthrough]];
171 case 7: k1 ^= std::uint64_t(tail[ 6]) << 48; [[fallthrough]];
172 case 6: k1 ^= std::uint64_t(tail[ 5]) << 40; [[fallthrough]];
173 case 5: k1 ^= std::uint64_t(tail[ 4]) << 32; [[fallthrough]];
174 case 4: k1 ^= std::uint64_t(tail[ 3]) << 24; [[fallthrough]];
175 case 3: k1 ^= std::uint64_t(tail[ 2]) << 16; [[fallthrough]];
176 case 2: k1 ^= std::uint64_t(tail[ 1]) << 8; [[fallthrough]];
177 case 1: k1 ^= std::uint64_t(tail[ 0]) << 0;
178 k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h.data[0] ^= k1;
179 }
180
181 // Finalization.
182 h.data[0] ^= len;
183 h.data[1] ^= len;
184
185 h.data[0] += h.data[1];
186 h.data[1] += h.data[0];
187
188 h.data[0] = fmix(h.data[0]);
189 h.data[1] = fmix(h.data[1]);
190
191 h.data[0] += h.data[1];
192 h.data[1] += h.data[0];
193
194 return h;
195}
196
197inline std::uint64_t murmurhash3::fmix(std::uint64_t k)
198{
199 // The constants were generated by a simple simulated-annealing algorithm.
200 k ^= k >> 33;
201 k *= 0xff51afd7ed558ccd;
202 k ^= k >> 33;
203 k *= 0xc4ceb9fe1a85ec53;
204 k ^= k >> 33;
205
206 return k;
207}
208
209inline std::uint32_t murmurhash3::fmix(std::uint32_t k)
210{
211 // The constants were generated by a simple simulated-annealing algorithm.
212 k ^= k >> 16;
213 k *= 0x85ebca6b;
214 k ^= k >> 13;
215 k *= 0xc2b2ae35;
216 k ^= k >> 16;
217
218 return k;
219}
220
221template<class T>
222inline T murmurhash3::get_block(const T *p, std::size_t i)
223{
224 // The reason we do a memcpy() instead of simply returning `p[i]` is because
225 // doing it this way avoids violations of the strict aliasing rule.
226
227 T tmp;
228 std::memcpy(&tmp, p + i, sizeof(T));
229 return tmp;
230}
231
232typedef murmurhash3 hash;
233
234} // namespace vita
235
236#endif // include guard
MurmurHash3 (https://github.com/aappleby/smhasher) by Austin Appleby.
Definition: cache_hash.h:101
static hash_t hash128(const void *const, std::size_t, std::uint32_t=1973)
Hashes a single message in one call, return 128-bit output.
Definition: cache_hash.h:119
The main namespace for the project.
std::uint64_t rotl64(std::uint64_t x, std::uint8_t r)
Definition: cache_hash.h:82
std::ostream & operator<<(std::ostream &o, hash_t h)
Mainly useful for debugging / testing.
Definition: cache_hash.cc:56
A 128bit unsigned integer used as individual's signature / hash table look-up key.
Definition: cache_hash.h:27
bool empty() const
We assume that a string of 128 zero bits means empty.
Definition: cache_hash.h:61
bool operator!=(hash_t h) const
Standard inequality operator for hash signature.
Definition: cache_hash.h:38
bool save(std::ostream &) const
Definition: cache_hash.cc:42
bool operator==(hash_t h) const
Standard equality operator for hash signature.
Definition: cache_hash.h:34
void clear()
Resets the content of the object.
Definition: cache_hash.h:31
bool load(std::istream &)
Definition: cache_hash.cc:26
void combine(hash_t h)
Used to combine multiple hashes.
Definition: cache_hash.h:47