src/hashkit: apply clang-format
[m6w6/libmemcached] / src / libhashkit / jenkins.cc
1 /*
2 +--------------------------------------------------------------------+
3 | libmemcached - C/C++ Client Library for memcached |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted under the terms of the BSD license. |
7 | You should have received a copy of the license in a bundled file |
8 | named LICENSE; in case you did not receive a copy you can review |
9 | the terms online at: https://opensource.org/licenses/BSD-3-Clause |
10 +--------------------------------------------------------------------+
11 | Copyright (c) 2006-2014 Brian Aker https://datadifferential.com/ |
12 | Copyright (c) 2020 Michael Wallner <mike@php.net> |
13 +--------------------------------------------------------------------+
14 */
15
16 #include "libhashkit/common.h"
17
18 #define hashsize(n) ((uint32_t) 1 << (n))
19 #define hashmask(n) (hashsize(n) - 1)
20 #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
21
22 #define mix(a, b, c) \
23 { \
24 a -= c; \
25 a ^= rot(c, 4); \
26 c += b; \
27 b -= a; \
28 b ^= rot(a, 6); \
29 a += c; \
30 c -= b; \
31 c ^= rot(b, 8); \
32 b += a; \
33 a -= c; \
34 a ^= rot(c, 16); \
35 c += b; \
36 b -= a; \
37 b ^= rot(a, 19); \
38 a += c; \
39 c -= b; \
40 c ^= rot(b, 4); \
41 b += a; \
42 }
43
44 #define final(a, b, c) \
45 { \
46 c ^= b; \
47 c -= rot(b, 14); \
48 a ^= c; \
49 a -= rot(c, 11); \
50 b ^= a; \
51 b -= rot(a, 25); \
52 c ^= b; \
53 c -= rot(b, 16); \
54 a ^= c; \
55 a -= rot(c, 4); \
56 b ^= a; \
57 b -= rot(a, 14); \
58 c ^= b; \
59 c -= rot(b, 24); \
60 }
61
62 #define JENKINS_INITVAL 13
63
64 /*
65 jenkins_hash() -- hash a variable-length key into a 32-bit value
66 k : the key (the unaligned variable-length array of bytes)
67 length : the length of the key, counting by bytes
68 initval : can be any 4-byte value
69 Returns a 32-bit value. Every bit of the key affects every bit of
70 the return value. Two keys differing by one or two bits will have
71 totally different hash values.
72
73 The best hash table sizes are powers of 2. There is no need to do
74 mod a prime (mod is sooo slow!). If you need less than 32 bits,
75 use a bitmask. For example, if you need only 10 bits, do
76 h = (h & hashmask(10));
77 In which case, the hash table should have hashsize(10) elements.
78 */
79
80 #if HAVE_ASAN
81 __attribute__((no_sanitize_address, no_sanitize("address")))
82 #endif
83 uint32_t
84 hashkit_jenkins(const char *key, size_t length, void *) {
85 uint32_t a, b, c; /* internal state */
86 #if !WORDS_BIGENDIAN
87 union {
88 const void *ptr;
89 size_t i;
90 } u;
91 u.ptr = key;
92 #endif
93
94 /* Set up the internal state */
95 a = b = c = 0xdeadbeef + ((uint32_t) length) + JENKINS_INITVAL;
96
97 #if !WORDS_BIGENDIAN
98 if ((u.i & 0x3) == 0) {
99 const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
100
101 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
102 while (length > 12) {
103 a += k[0];
104 b += k[1];
105 c += k[2];
106 mix(a, b, c);
107 length -= 12;
108 k += 3;
109 }
110
111 /*----------------------------- handle the last (probably partial) block */
112 /*
113 * "k[2]&0xffffff" actually reads beyond the end of the string, but
114 * then masks off the part it's not allowed to read. Because the
115 * string is aligned, the masked-off tail is in the same word as the
116 * rest of the string. Every machine with memory protection I've seen
117 * does it on word boundaries, so is OK with this. But VALGRIND will
118 * still catch it and complain. The masking trick does make the hash
119 * noticably faster for short strings (like English words).
120 */
121 switch (length) {
122 case 12:
123 c += k[2];
124 b += k[1];
125 a += k[0];
126 break;
127 case 11:
128 c += k[2] & 0xffffff;
129 b += k[1];
130 a += k[0];
131 break;
132 case 10:
133 c += k[2] & 0xffff;
134 b += k[1];
135 a += k[0];
136 break;
137 case 9:
138 c += k[2] & 0xff;
139 b += k[1];
140 a += k[0];
141 break;
142 case 8:
143 b += k[1];
144 a += k[0];
145 break;
146 case 7:
147 b += k[1] & 0xffffff;
148 a += k[0];
149 break;
150 case 6:
151 b += k[1] & 0xffff;
152 a += k[0];
153 break;
154 case 5:
155 b += k[1] & 0xff;
156 a += k[0];
157 break;
158 case 4: a += k[0]; break;
159 case 3: a += k[0] & 0xffffff; break;
160 case 2: a += k[0] & 0xffff; break;
161 case 1: a += k[0] & 0xff; break;
162 case 0: return c; /* zero length strings require no mixing */
163 default: return c;
164 }
165
166 } else if ((u.i & 0x1) == 0) {
167 const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
168 const uint8_t *k8;
169
170 /*--------------- all but last block: aligned reads and different mixing */
171 while (length > 12) {
172 a += k[0] + (((uint32_t) k[1]) << 16);
173 b += k[2] + (((uint32_t) k[3]) << 16);
174 c += k[4] + (((uint32_t) k[5]) << 16);
175 mix(a, b, c);
176 length -= 12;
177 k += 6;
178 }
179
180 /*----------------------------- handle the last (probably partial) block */
181 k8 = (const uint8_t *) k;
182 switch (length) {
183 case 12:
184 c += k[4] + (((uint32_t) k[5]) << 16);
185 b += k[2] + (((uint32_t) k[3]) << 16);
186 a += k[0] + (((uint32_t) k[1]) << 16);
187 break;
188 case 11:
189 c += ((uint32_t) k8[10]) << 16;
190 /* fall through */
191 case 10:
192 c += k[4];
193 b += k[2] + (((uint32_t) k[3]) << 16);
194 a += k[0] + (((uint32_t) k[1]) << 16);
195 break;
196 case 9:
197 c += k8[8];
198 /* fall through */
199 case 8:
200 b += k[2] + (((uint32_t) k[3]) << 16);
201 a += k[0] + (((uint32_t) k[1]) << 16);
202 break;
203 case 7:
204 b += ((uint32_t) k8[6]) << 16;
205 /* fall through */
206 case 6:
207 b += k[2];
208 a += k[0] + (((uint32_t) k[1]) << 16);
209 break;
210 case 5:
211 b += k8[4];
212 /* fall through */
213 case 4: a += k[0] + (((uint32_t) k[1]) << 16); break;
214 case 3:
215 a += ((uint32_t) k8[2]) << 16;
216 /* fall through */
217 case 2: a += k[0]; break;
218 case 1: a += k8[0]; break;
219 case 0: return c; /* zero length requires no mixing */
220 default: return c;
221 }
222
223 } else { /* need to read the key one byte at a time */
224 #endif /* little endian */
225 const uint8_t *k = (const uint8_t *) key;
226
227 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
228 while (length > 12) {
229 a += k[0];
230 a += ((uint32_t) k[1]) << 8;
231 a += ((uint32_t) k[2]) << 16;
232 a += ((uint32_t) k[3]) << 24;
233 b += k[4];
234 b += ((uint32_t) k[5]) << 8;
235 b += ((uint32_t) k[6]) << 16;
236 b += ((uint32_t) k[7]) << 24;
237 c += k[8];
238 c += ((uint32_t) k[9]) << 8;
239 c += ((uint32_t) k[10]) << 16;
240 c += ((uint32_t) k[11]) << 24;
241 mix(a, b, c);
242 length -= 12;
243 k += 12;
244 }
245
246 /*-------------------------------- last block: affect all 32 bits of (c) */
247 switch (length) /* all the case statements fall through */ {
248 case 12:
249 c += ((uint32_t) k[11]) << 24;
250 /* fall through */
251 case 11:
252 c += ((uint32_t) k[10]) << 16;
253 /* fall through */
254 case 10:
255 c += ((uint32_t) k[9]) << 8;
256 /* fall through */
257 case 9:
258 c += k[8];
259 /* fall through */
260 case 8:
261 b += ((uint32_t) k[7]) << 24;
262 /* fall through */
263 case 7:
264 b += ((uint32_t) k[6]) << 16;
265 /* fall through */
266 case 6:
267 b += ((uint32_t) k[5]) << 8;
268 /* fall through */
269 case 5:
270 b += k[4];
271 /* fall through */
272 case 4:
273 a += ((uint32_t) k[3]) << 24;
274 /* fall through */
275 case 3:
276 a += ((uint32_t) k[2]) << 16;
277 /* fall through */
278 case 2:
279 a += ((uint32_t) k[1]) << 8;
280 /* fall through */
281 case 1: a += k[0]; break;
282 case 0: return c;
283 default: return c;
284 }
285 #if !WORDS_BIGENDIAN
286 }
287 #endif
288
289 final(a, b, c);
290 return c;
291 }