move repository from m6w6 to awesomized
[m6w6/libmemcached] / src / libhashkit / jenkins.cc
1 /*
2 +--------------------------------------------------------------------+
3 | libmemcached - C/C++ Client Library for memcached |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted under the terms of the BSD license. |
7 | You should have received a copy of the license in a bundled file |
8 | named LICENSE; in case you did not receive a copy you can review |
9 | the terms online at: https://opensource.org/licenses/BSD-3-Clause |
10 +--------------------------------------------------------------------+
11 | Copyright (c) 2006-2014 Brian Aker https://datadifferential.com/ |
12 | Copyright (c) 2020-2021 Michael Wallner https://awesome.co/ |
13 +--------------------------------------------------------------------+
14 */
15
16 #include "libhashkit/common.h"
17
18 #define hashsize(n) ((uint32_t) 1 << (n))
19 #define hashmask(n) (hashsize(n) - 1)
20 #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
21
22 #define mix(a, b, c) \
23 { \
24 a -= c; \
25 a ^= rot(c, 4); \
26 c += b; \
27 b -= a; \
28 b ^= rot(a, 6); \
29 a += c; \
30 c -= b; \
31 c ^= rot(b, 8); \
32 b += a; \
33 a -= c; \
34 a ^= rot(c, 16); \
35 c += b; \
36 b -= a; \
37 b ^= rot(a, 19); \
38 a += c; \
39 c -= b; \
40 c ^= rot(b, 4); \
41 b += a; \
42 }
43
44 #define final(a, b, c) \
45 { \
46 c ^= b; \
47 c -= rot(b, 14); \
48 a ^= c; \
49 a -= rot(c, 11); \
50 b ^= a; \
51 b -= rot(a, 25); \
52 c ^= b; \
53 c -= rot(b, 16); \
54 a ^= c; \
55 a -= rot(c, 4); \
56 b ^= a; \
57 b -= rot(a, 14); \
58 c ^= b; \
59 c -= rot(b, 24); \
60 }
61
62 #define JENKINS_INITVAL 13
63
64 /*
65 jenkins_hash() -- hash a variable-length key into a 32-bit value
66 k : the key (the unaligned variable-length array of bytes)
67 length : the length of the key, counting by bytes
68 initval : can be any 4-byte value
69 Returns a 32-bit value. Every bit of the key affects every bit of
70 the return value. Two keys differing by one or two bits will have
71 totally different hash values.
72
73 The best hash table sizes are powers of 2. There is no need to do
74 mod a prime (mod is sooo slow!). If you need less than 32 bits,
75 use a bitmask. For example, if you need only 10 bits, do
76 h = (h & hashmask(10));
77 In which case, the hash table should have hashsize(10) elements.
78 */
79
80 #if HAVE_ASAN
81 __attribute__((no_sanitize_address, no_sanitize("address")))
82 #endif
83 uint32_t
84 hashkit_jenkins(const char *key, size_t length, void *) {
85 uint32_t a, b, c; /* internal state */
86 #if !WORDS_BIGENDIAN
87 union {
88 const void *ptr;
89 size_t i;
90 } u;
91 u.ptr = key;
92 #endif
93
94 /* Set up the internal state */
95 a = b = c = 0xdeadbeef + ((uint32_t) length) + JENKINS_INITVAL;
96
97 #if !WORDS_BIGENDIAN
98 if ((u.i & 0x3) == 0) {
99 const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
100
101 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
102 while (length > 12) {
103 a += k[0];
104 b += k[1];
105 c += k[2];
106 mix(a, b, c);
107 length -= 12;
108 k += 3;
109 }
110
111 /*----------------------------- handle the last (probably partial) block */
112 /*
113 * "k[2]&0xffffff" actually reads beyond the end of the string, but
114 * then masks off the part it's not allowed to read. Because the
115 * string is aligned, the masked-off tail is in the same word as the
116 * rest of the string. Every machine with memory protection I've seen
117 * does it on word boundaries, so is OK with this. But VALGRIND will
118 * still catch it and complain. The masking trick does make the hash
119 * noticably faster for short strings (like English words).
120 */
121 switch (length) {
122 case 12:
123 c += k[2];
124 b += k[1];
125 a += k[0];
126 break;
127 case 11:
128 c += k[2] & 0xffffff;
129 b += k[1];
130 a += k[0];
131 break;
132 case 10:
133 c += k[2] & 0xffff;
134 b += k[1];
135 a += k[0];
136 break;
137 case 9:
138 c += k[2] & 0xff;
139 b += k[1];
140 a += k[0];
141 break;
142 case 8:
143 b += k[1];
144 a += k[0];
145 break;
146 case 7:
147 b += k[1] & 0xffffff;
148 a += k[0];
149 break;
150 case 6:
151 b += k[1] & 0xffff;
152 a += k[0];
153 break;
154 case 5:
155 b += k[1] & 0xff;
156 a += k[0];
157 break;
158 case 4:
159 a += k[0];
160 break;
161 case 3:
162 a += k[0] & 0xffffff;
163 break;
164 case 2:
165 a += k[0] & 0xffff;
166 break;
167 case 1:
168 a += k[0] & 0xff;
169 break;
170 case 0:
171 return c; /* zero length strings require no mixing */
172 default:
173 return c;
174 }
175
176 } else if ((u.i & 0x1) == 0) {
177 const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
178 const uint8_t *k8;
179
180 /*--------------- all but last block: aligned reads and different mixing */
181 while (length > 12) {
182 a += k[0] + (((uint32_t) k[1]) << 16);
183 b += k[2] + (((uint32_t) k[3]) << 16);
184 c += k[4] + (((uint32_t) k[5]) << 16);
185 mix(a, b, c);
186 length -= 12;
187 k += 6;
188 }
189
190 /*----------------------------- handle the last (probably partial) block */
191 k8 = (const uint8_t *) k;
192 switch (length) {
193 case 12:
194 c += k[4] + (((uint32_t) k[5]) << 16);
195 b += k[2] + (((uint32_t) k[3]) << 16);
196 a += k[0] + (((uint32_t) k[1]) << 16);
197 break;
198 case 11:
199 c += ((uint32_t) k8[10]) << 16;
200 /* fall through */
201 case 10:
202 c += k[4];
203 b += k[2] + (((uint32_t) k[3]) << 16);
204 a += k[0] + (((uint32_t) k[1]) << 16);
205 break;
206 case 9:
207 c += k8[8];
208 /* fall through */
209 case 8:
210 b += k[2] + (((uint32_t) k[3]) << 16);
211 a += k[0] + (((uint32_t) k[1]) << 16);
212 break;
213 case 7:
214 b += ((uint32_t) k8[6]) << 16;
215 /* fall through */
216 case 6:
217 b += k[2];
218 a += k[0] + (((uint32_t) k[1]) << 16);
219 break;
220 case 5:
221 b += k8[4];
222 /* fall through */
223 case 4:
224 a += k[0] + (((uint32_t) k[1]) << 16);
225 break;
226 case 3:
227 a += ((uint32_t) k8[2]) << 16;
228 /* fall through */
229 case 2:
230 a += k[0];
231 break;
232 case 1:
233 a += k8[0];
234 break;
235 case 0:
236 return c; /* zero length requires no mixing */
237 default:
238 return c;
239 }
240
241 } else { /* need to read the key one byte at a time */
242 #endif /* little endian */
243 const uint8_t *k = (const uint8_t *) key;
244
245 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
246 while (length > 12) {
247 a += k[0];
248 a += ((uint32_t) k[1]) << 8;
249 a += ((uint32_t) k[2]) << 16;
250 a += ((uint32_t) k[3]) << 24;
251 b += k[4];
252 b += ((uint32_t) k[5]) << 8;
253 b += ((uint32_t) k[6]) << 16;
254 b += ((uint32_t) k[7]) << 24;
255 c += k[8];
256 c += ((uint32_t) k[9]) << 8;
257 c += ((uint32_t) k[10]) << 16;
258 c += ((uint32_t) k[11]) << 24;
259 mix(a, b, c);
260 length -= 12;
261 k += 12;
262 }
263
264 /*-------------------------------- last block: affect all 32 bits of (c) */
265 switch (length) /* all the case statements fall through */ {
266 case 12:
267 c += ((uint32_t) k[11]) << 24;
268 /* fall through */
269 case 11:
270 c += ((uint32_t) k[10]) << 16;
271 /* fall through */
272 case 10:
273 c += ((uint32_t) k[9]) << 8;
274 /* fall through */
275 case 9:
276 c += k[8];
277 /* fall through */
278 case 8:
279 b += ((uint32_t) k[7]) << 24;
280 /* fall through */
281 case 7:
282 b += ((uint32_t) k[6]) << 16;
283 /* fall through */
284 case 6:
285 b += ((uint32_t) k[5]) << 8;
286 /* fall through */
287 case 5:
288 b += k[4];
289 /* fall through */
290 case 4:
291 a += ((uint32_t) k[3]) << 24;
292 /* fall through */
293 case 3:
294 a += ((uint32_t) k[2]) << 16;
295 /* fall through */
296 case 2:
297 a += ((uint32_t) k[1]) << 8;
298 /* fall through */
299 case 1:
300 a += k[0];
301 break;
302 case 0:
303 return c;
304 default:
305 return c;
306 }
307 #if !WORDS_BIGENDIAN
308 }
309 #endif
310
311 final(a, b, c);
312 return c;
313 }