attempt to fix murmur on big endian
[awesomized/libmemcached] / src / libhashkit / murmur3.cc
1 //-----------------------------------------------------------------------------
2 //MurmurHash3 was written by Austin Appleby, and is placed in the public
3 //domain. The author hereby disclaims copyright to this source code.
4
5 // Note - The x86 and x64 versions do _not_ produce the same results, as the
6 // algorithms are optimized for their respective platforms. You can still
7 // compile and run any of them on any platform, but your performance with the
8 // non-native version will be less than optimal.
9
10 #include "libhashkit/hashkitcon.h"
11
12 #include "libhashkit/murmur3.h"
13
14 //-----------------------------------------------------------------------------
15 // Platform-specific functions and macros
16
17 #ifdef __GNUC__
18 #define FORCE_INLINE __attribute__((always_inline)) inline
19 #else
20 #define FORCE_INLINE inline
21 #endif
22
23 static FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r )
24 {
25 return (x << r) | (x >> (32 - r));
26 }
27
28 static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
29 {
30 return (x << r) | (x >> (64 - r));
31 }
32
33 #define ROTL32(x,y) rotl32(x,y)
34 #define ROTL64(x,y) rotl64(x,y)
35
36 #define BIG_CONSTANT(x) (x##LLU)
37
38 //-----------------------------------------------------------------------------
39 // Block read - if your platform needs to do endian-swapping or can only
40 // handle aligned reads, do the conversion here
41
42 #include <cassert>
43 #include <cstring>
44 template <typename T>
45 static inline T getblock(const T *blocks, int i) {
46 T b;
47 #if WORDS_BIGENDIAN
48 const uint8_t *data = ((const uint8_t *) blocks) + i * sizeof(T);
49 # define sl(s) (((T)data[s - 1]) << (8 * (sizeof(T) - s)))
50 b = 0;
51 switch (sizeof(T)) {
52 case 8: b |= sl(8); /* fall through */
53 case 7: b |= sl(7); /* fall through */
54 case 6: b |= sl(6); /* fall through */
55 case 5: b |= sl(5); /* fall through */
56 case 4: b |= sl(4); /* fall through */
57 case 3: b |= sl(3); /* fall through */
58 case 2: b |= sl(2); /* fall through */
59 case 1: b |= sl(1); break;
60 default: assert(0);
61 }
62 #else
63 memcpy(&b, ((const uint8_t *) blocks) + i * sizeof(T), sizeof(T));
64 #endif
65 return b;
66 }
67
68 //-----------------------------------------------------------------------------
69 // Finalization mix - force all bits of a hash block to avalanche
70
71 static FORCE_INLINE uint32_t fmix32 ( uint32_t h )
72 {
73 h ^= h >> 16;
74 h *= 0x85ebca6b;
75 h ^= h >> 13;
76 h *= 0xc2b2ae35;
77 h ^= h >> 16;
78
79 return h;
80 }
81
82 //----------
83
84 static FORCE_INLINE uint64_t fmix64 ( uint64_t k )
85 {
86 k ^= k >> 33;
87 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
88 k ^= k >> 33;
89 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
90 k ^= k >> 33;
91
92 return k;
93 }
94
95 //-----------------------------------------------------------------------------
96
97 void MurmurHash3_x86_32 ( const void * key, int len,
98 uint32_t seed, void * out )
99 {
100 const uint8_t * data = (const uint8_t*)key;
101 const int nblocks = len / 4;
102 int i;
103
104 uint32_t h1 = seed;
105
106 uint32_t c1 = 0xcc9e2d51;
107 uint32_t c2 = 0x1b873593;
108
109 //----------
110 // body
111
112 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
113
114 for(i = -nblocks; i; i++)
115 {
116 uint32_t k1 = getblock(blocks,i);
117
118 k1 *= c1;
119 k1 = ROTL32(k1,15);
120 k1 *= c2;
121
122 h1 ^= k1;
123 h1 = ROTL32(h1,13);
124 h1 = h1*5+0xe6546b64;
125 }
126
127 //----------
128 // tail
129
130 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
131
132 uint32_t k1 = 0;
133
134 switch(len & 3)
135 {
136 #if WORDS_BIGENDIAN
137 case 3: k1 ^= tail[2] << 8;
138 /* fall through */
139 case 2: k1 ^= tail[1] << 16;
140 /* fall through */
141 case 1: k1 ^= tail[0] << 24;
142 #else
143 case 3: k1 ^= tail[2] << 16;
144 /* fall through */
145 case 2: k1 ^= tail[1] << 8;
146 /* fall through */
147 case 1: k1 ^= tail[0];
148 #endif
149 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
150 };
151
152 //----------
153 // finalization
154
155 h1 ^= len;
156
157 h1 = fmix32(h1);
158
159 *(uint32_t*)out = h1;
160 }
161
162 //-----------------------------------------------------------------------------
163
164 void MurmurHash3_x86_128 ( const void * key, const int len,
165 uint32_t seed, void * out )
166 {
167 const uint8_t * data = (const uint8_t*)key;
168 const int nblocks = len / 16;
169 int i;
170
171 uint32_t h1 = seed;
172 uint32_t h2 = seed;
173 uint32_t h3 = seed;
174 uint32_t h4 = seed;
175
176 uint32_t c1 = 0x239b961b;
177 uint32_t c2 = 0xab0e9789;
178 uint32_t c3 = 0x38b34ae5;
179 uint32_t c4 = 0xa1e38b93;
180
181 //----------
182 // body
183
184 const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
185
186 for(i = -nblocks; i; i++)
187 {
188 uint32_t k1 = getblock(blocks,i*4+0);
189 uint32_t k2 = getblock(blocks,i*4+1);
190 uint32_t k3 = getblock(blocks,i*4+2);
191 uint32_t k4 = getblock(blocks,i*4+3);
192
193 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
194
195 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
196
197 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
198
199 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
200
201 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
202
203 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
204
205 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
206
207 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
208 }
209
210 //----------
211 // tail
212
213 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
214
215 uint32_t k1 = 0;
216 uint32_t k2 = 0;
217 uint32_t k3 = 0;
218 uint32_t k4 = 0;
219
220 switch(len & 15)
221 {
222 case 15: k4 ^= tail[14] << 16;
223 /* fall through */
224 case 14: k4 ^= tail[13] << 8;
225 /* fall through */
226 case 13: k4 ^= tail[12] << 0;
227 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
228 /* fall through */
229 case 12: k3 ^= tail[11] << 24;
230 /* fall through */
231 case 11: k3 ^= tail[10] << 16;
232 /* fall through */
233 case 10: k3 ^= tail[ 9] << 8;
234 /* fall through */
235 case 9: k3 ^= tail[ 8] << 0;
236 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
237 /* fall through */
238 case 8: k2 ^= tail[ 7] << 24;
239 /* fall through */
240 case 7: k2 ^= tail[ 6] << 16;
241 /* fall through */
242 case 6: k2 ^= tail[ 5] << 8;
243 /* fall through */
244 case 5: k2 ^= tail[ 4] << 0;
245 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
246 /* fall through */
247 case 4: k1 ^= tail[ 3] << 24;
248 /* fall through */
249 case 3: k1 ^= tail[ 2] << 16;
250 /* fall through */
251 case 2: k1 ^= tail[ 1] << 8;
252 /* fall through */
253 case 1: k1 ^= tail[ 0] << 0;
254 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
255 };
256
257 //----------
258 // finalization
259
260 h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
261
262 h1 += h2; h1 += h3; h1 += h4;
263 h2 += h1; h3 += h1; h4 += h1;
264
265 h1 = fmix32(h1);
266 h2 = fmix32(h2);
267 h3 = fmix32(h3);
268 h4 = fmix32(h4);
269
270 h1 += h2; h1 += h3; h1 += h4;
271 h2 += h1; h3 += h1; h4 += h1;
272
273 ((uint32_t*)out)[0] = h1;
274 ((uint32_t*)out)[1] = h2;
275 ((uint32_t*)out)[2] = h3;
276 ((uint32_t*)out)[3] = h4;
277 }
278
279 //-----------------------------------------------------------------------------
280
281 void MurmurHash3_x64_128 ( const void * key, const int len,
282 const uint32_t seed, void * out )
283 {
284 const uint8_t * data = (const uint8_t*)key;
285 const int nblocks = len / 16;
286 int i;
287
288 uint64_t h1 = seed;
289 uint64_t h2 = seed;
290
291 uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
292 uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
293
294 //----------
295 // body
296
297 const uint64_t * blocks = (const uint64_t *)(data);
298
299 for(i = 0; i < nblocks; i++)
300 {
301 uint64_t k1 = getblock(blocks,i*2+0);
302 uint64_t k2 = getblock(blocks,i*2+1);
303
304 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
305
306 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
307
308 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
309
310 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
311 }
312
313 //----------
314 // tail
315
316 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
317
318 uint64_t k1 = 0;
319 uint64_t k2 = 0;
320
321 switch(len & 15)
322 {
323 case 15: k2 ^= (uint64_t)(tail[14]) << 48;
324 /* fall through */
325 case 14: k2 ^= (uint64_t)(tail[13]) << 40;
326 /* fall through */
327 case 13: k2 ^= (uint64_t)(tail[12]) << 32;
328 /* fall through */
329 case 12: k2 ^= (uint64_t)(tail[11]) << 24;
330 /* fall through */
331 case 11: k2 ^= (uint64_t)(tail[10]) << 16;
332 /* fall through */
333 case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
334 /* fall through */
335 case 9: k2 ^= (uint64_t)(tail[ 8]) << 0;
336 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
337 /* fall through */
338 case 8: k1 ^= (uint64_t)(tail[ 7]) << 56;
339 /* fall through */
340 case 7: k1 ^= (uint64_t)(tail[ 6]) << 48;
341 /* fall through */
342 case 6: k1 ^= (uint64_t)(tail[ 5]) << 40;
343 /* fall through */
344 case 5: k1 ^= (uint64_t)(tail[ 4]) << 32;
345 /* fall through */
346 case 4: k1 ^= (uint64_t)(tail[ 3]) << 24;
347 /* fall through */
348 case 3: k1 ^= (uint64_t)(tail[ 2]) << 16;
349 /* fall through */
350 case 2: k1 ^= (uint64_t)(tail[ 1]) << 8;
351 /* fall through */
352 case 1: k1 ^= (uint64_t)(tail[ 0]) << 0;
353 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
354 };
355
356 //----------
357 // finalization
358
359 h1 ^= len; h2 ^= len;
360
361 h1 += h2;
362 h2 += h1;
363
364 h1 = fmix64(h1);
365 h2 = fmix64(h2);
366
367 h1 += h2;
368 h2 += h1;
369
370 ((uint64_t*)out)[0] = h1;
371 ((uint64_t*)out)[1] = h2;
372 }
373
374 //-----------------------------------------------------------------------------
375