libhashkit: fix UB on unaligned access
[awesomized/libmemcached] / libhashkit / murmur.cc
1 /* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
2 *
3 * HashKit library
4 *
5 * Copyright (C) 2011-2012 Data Differential, http://datadifferential.com/
6 * Copyright (C) 2006-2009 Brian Aker All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following disclaimer
17 * in the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * * The names of its contributors may not be used to endorse or
21 * promote products derived from this software without specific prior
22 * written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 */
37
38 /*
39 "Murmur" hash provided by Austin, tanjent@gmail.com
40 http://murmurhash.googlepages.com/
41
42 Note - This code makes a few assumptions about how your machine behaves -
43
44 1. We can read a 4-byte value from any address without crashing
45 2. sizeof(int) == 4
46
47 And it has a few limitations -
48 1. It will not work incrementally.
49 2. It will not produce the same results on little-endian and big-endian
50 machines.
51
52 Updated to murmur2 hash - BP
53 */
54
55 #include <libhashkit/common.h>
56
57 #ifdef HAVE_MURMUR_HASH
58
59 #include <cstring>
60
61 uint32_t hashkit_murmur(const char *key, size_t length, void *context)
62 {
63 /*
64 'm' and 'r' are mixing constants generated offline. They're not
65 really 'magic', they just happen to work well.
66 */
67
68 const unsigned int m= 0x5bd1e995;
69 const uint32_t seed= (0xdeadbeef * (uint32_t)length);
70 const int r= 24;
71
72
73 // Initialize the hash to a 'random' value
74
75 uint32_t h= seed ^ (uint32_t)length;
76
77 // Mix 4 bytes at a time into the hash
78
79 const unsigned char * data= (const unsigned char *)key;
80 (void)context;
81
82 while(length >= 4)
83 {
84 unsigned int k;
85 memcpy(&k, data, sizeof(unsigned int));
86
87 k *= m;
88 k ^= k >> r;
89 k *= m;
90
91 h *= m;
92 h ^= k;
93
94 data += 4;
95 length -= 4;
96 }
97
98 // Handle the last few bytes of the input array
99
100 switch(length)
101 {
102 case 3: h ^= ((uint32_t)data[2]) << 16; /* fall through */
103 case 2: h ^= ((uint32_t)data[1]) << 8; /* fall through */
104 case 1: h ^= data[0];
105 h *= m;
106 default: break;
107 };
108
109 /*
110 Do a few final mixes of the hash to ensure the last few bytes are
111 well-incorporated.
112 */
113
114 h ^= h >> 13;
115 h *= m;
116 h ^= h >> 15;
117
118 return h;
119 }
120
121 #else
122 uint32_t hashkit_murmur(const char *, size_t , void *)
123 {
124 return 0;
125 }
126 #endif