git.m6w6.name Git - m6w6/libmemcached/blob - src/libhashkit/jenkins.cc

   1 /*
   2     +--------------------------------------------------------------------+
   3     | libmemcached - C/C++ Client Library for memcached                  |
   4     +--------------------------------------------------------------------+
   5     | Redistribution and use in source and binary forms, with or without |
   6     | modification, are permitted under the terms of the BSD license.    |
   7     | You should have received a copy of the license in a bundled file   |
   8     | named LICENSE; in case you did not receive a copy you can review   |
   9     | the terms online at: https://opensource.org/licenses/BSD-3-Clause  |
  10     +--------------------------------------------------------------------+
  11     | Copyright (c) 2006-2014 Brian Aker   https://datadifferential.com/ |
  12     | Copyright (c) 2020-2021 Michael Wallner        https://awesome.co/ |
  13     +--------------------------------------------------------------------+
  14 */
  15
  16 #include "libhashkit/common.h"
  17
  18 #define hashsize(n) ((uint32_t) 1 << (n))
  19 #define hashmask(n) (hashsize(n) - 1)
  20 #define rot(x, k)   (((x) << (k)) | ((x) >> (32 - (k))))
  21
  22 #define mix(a, b, c) \
  23   { \
  24     a -= c; \
  25     a ^= rot(c, 4); \
  26     c += b; \
  27     b -= a; \
  28     b ^= rot(a, 6); \
  29     a += c; \
  30     c -= b; \
  31     c ^= rot(b, 8); \
  32     b += a; \
  33     a -= c; \
  34     a ^= rot(c, 16); \
  35     c += b; \
  36     b -= a; \
  37     b ^= rot(a, 19); \
  38     a += c; \
  39     c -= b; \
  40     c ^= rot(b, 4); \
  41     b += a; \
  42   }
  43
  44 #define final(a, b, c) \
  45   { \
  46     c ^= b; \
  47     c -= rot(b, 14); \
  48     a ^= c; \
  49     a -= rot(c, 11); \
  50     b ^= a; \
  51     b -= rot(a, 25); \
  52     c ^= b; \
  53     c -= rot(b, 16); \
  54     a ^= c; \
  55     a -= rot(c, 4); \
  56     b ^= a; \
  57     b -= rot(a, 14); \
  58     c ^= b; \
  59     c -= rot(b, 24); \
  60   }
  61
  62 #define JENKINS_INITVAL 13
  63
  64 /*
  65 jenkins_hash() -- hash a variable-length key into a 32-bit value
  66   k       : the key (the unaligned variable-length array of bytes)
  67   length  : the length of the key, counting by bytes
  68   initval : can be any 4-byte value
  69 Returns a 32-bit value.  Every bit of the key affects every bit of
  70 the return value.  Two keys differing by one or two bits will have
  71 totally different hash values.
  72
  73 The best hash table sizes are powers of 2.  There is no need to do
  74 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  75 use a bitmask.  For example, if you need only 10 bits, do
  76   h = (h & hashmask(10));
  77 In which case, the hash table should have hashsize(10) elements.
  78 */
  79
  80 #if HAVE_ASAN
  81 __attribute__((no_sanitize_address, no_sanitize("address")))
  82 #endif
  83 uint32_t
  84 hashkit_jenkins(const char *key, size_t length, void *) {
  85   uint32_t a, b, c; /* internal state */
  86 #if !WORDS_BIGENDIAN
  87   union {
  88     const void *ptr;
  89     size_t i;
  90   } u;
  91   u.ptr = key;
  92 #endif
  93
  94   /* Set up the internal state */
  95   a = b = c = 0xdeadbeef + ((uint32_t) length) + JENKINS_INITVAL;
  96
  97 #if !WORDS_BIGENDIAN
  98   if ((u.i & 0x3) == 0) {
  99     const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
 100
 101     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 102     while (length > 12) {
 103       a += k[0];
 104       b += k[1];
 105       c += k[2];
 106       mix(a, b, c);
 107       length -= 12;
 108       k += 3;
 109     }
 110
 111     /*----------------------------- handle the last (probably partial) block */
 112     /*
 113      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 114      * then masks off the part it's not allowed to read.  Because the
 115      * string is aligned, the masked-off tail is in the same word as the
 116      * rest of the string.  Every machine with memory protection I've seen
 117      * does it on word boundaries, so is OK with this.  But VALGRIND will
 118      * still catch it and complain.  The masking trick does make the hash
 119      * noticably faster for short strings (like English words).
 120      */
 121     switch (length) {
 122     case 12:
 123       c += k[2];
 124       b += k[1];
 125       a += k[0];
 126       break;
 127     case 11:
 128       c += k[2] & 0xffffff;
 129       b += k[1];
 130       a += k[0];
 131       break;
 132     case 10:
 133       c += k[2] & 0xffff;
 134       b += k[1];
 135       a += k[0];
 136       break;
 137     case 9:
 138       c += k[2] & 0xff;
 139       b += k[1];
 140       a += k[0];
 141       break;
 142     case 8:
 143       b += k[1];
 144       a += k[0];
 145       break;
 146     case 7:
 147       b += k[1] & 0xffffff;
 148       a += k[0];
 149       break;
 150     case 6:
 151       b += k[1] & 0xffff;
 152       a += k[0];
 153       break;
 154     case 5:
 155       b += k[1] & 0xff;
 156       a += k[0];
 157       break;
 158     case 4:
 159       a += k[0];
 160       break;
 161     case 3:
 162       a += k[0] & 0xffffff;
 163       break;
 164     case 2:
 165       a += k[0] & 0xffff;
 166       break;
 167     case 1:
 168       a += k[0] & 0xff;
 169       break;
 170     case 0:
 171       return c; /* zero length strings require no mixing */
 172     default:
 173       return c;
 174     }
 175
 176   } else if ((u.i & 0x1) == 0) {
 177     const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
 178     const uint8_t *k8;
 179
 180     /*--------------- all but last block: aligned reads and different mixing */
 181     while (length > 12) {
 182       a += k[0] + (((uint32_t) k[1]) << 16);
 183       b += k[2] + (((uint32_t) k[3]) << 16);
 184       c += k[4] + (((uint32_t) k[5]) << 16);
 185       mix(a, b, c);
 186       length -= 12;
 187       k += 6;
 188     }
 189
 190     /*----------------------------- handle the last (probably partial) block */
 191     k8 = (const uint8_t *) k;
 192     switch (length) {
 193     case 12:
 194       c += k[4] + (((uint32_t) k[5]) << 16);
 195       b += k[2] + (((uint32_t) k[3]) << 16);
 196       a += k[0] + (((uint32_t) k[1]) << 16);
 197       break;
 198     case 11:
 199       c += ((uint32_t) k8[10]) << 16;
 200       /* fall through */
 201     case 10:
 202       c += k[4];
 203       b += k[2] + (((uint32_t) k[3]) << 16);
 204       a += k[0] + (((uint32_t) k[1]) << 16);
 205       break;
 206     case 9:
 207       c += k8[8];
 208       /* fall through */
 209     case 8:
 210       b += k[2] + (((uint32_t) k[3]) << 16);
 211       a += k[0] + (((uint32_t) k[1]) << 16);
 212       break;
 213     case 7:
 214       b += ((uint32_t) k8[6]) << 16;
 215       /* fall through */
 216     case 6:
 217       b += k[2];
 218       a += k[0] + (((uint32_t) k[1]) << 16);
 219       break;
 220     case 5:
 221       b += k8[4];
 222       /* fall through */
 223     case 4:
 224       a += k[0] + (((uint32_t) k[1]) << 16);
 225       break;
 226     case 3:
 227       a += ((uint32_t) k8[2]) << 16;
 228       /* fall through */
 229     case 2:
 230       a += k[0];
 231       break;
 232     case 1:
 233       a += k8[0];
 234       break;
 235     case 0:
 236       return c; /* zero length requires no mixing */
 237     default:
 238       return c;
 239     }
 240
 241   } else { /* need to read the key one byte at a time */
 242 #endif     /* little endian */
 243     const uint8_t *k = (const uint8_t *) key;
 244
 245     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 246     while (length > 12) {
 247       a += k[0];
 248       a += ((uint32_t) k[1]) << 8;
 249       a += ((uint32_t) k[2]) << 16;
 250       a += ((uint32_t) k[3]) << 24;
 251       b += k[4];
 252       b += ((uint32_t) k[5]) << 8;
 253       b += ((uint32_t) k[6]) << 16;
 254       b += ((uint32_t) k[7]) << 24;
 255       c += k[8];
 256       c += ((uint32_t) k[9]) << 8;
 257       c += ((uint32_t) k[10]) << 16;
 258       c += ((uint32_t) k[11]) << 24;
 259       mix(a, b, c);
 260       length -= 12;
 261       k += 12;
 262     }
 263
 264     /*-------------------------------- last block: affect all 32 bits of (c) */
 265     switch (length) /* all the case statements fall through */ {
 266     case 12:
 267       c += ((uint32_t) k[11]) << 24;
 268       /* fall through */
 269     case 11:
 270       c += ((uint32_t) k[10]) << 16;
 271       /* fall through */
 272     case 10:
 273       c += ((uint32_t) k[9]) << 8;
 274       /* fall through */
 275     case 9:
 276       c += k[8];
 277       /* fall through */
 278     case 8:
 279       b += ((uint32_t) k[7]) << 24;
 280       /* fall through */
 281     case 7:
 282       b += ((uint32_t) k[6]) << 16;
 283       /* fall through */
 284     case 6:
 285       b += ((uint32_t) k[5]) << 8;
 286       /* fall through */
 287     case 5:
 288       b += k[4];
 289       /* fall through */
 290     case 4:
 291       a += ((uint32_t) k[3]) << 24;
 292       /* fall through */
 293     case 3:
 294       a += ((uint32_t) k[2]) << 16;
 295       /* fall through */
 296     case 2:
 297       a += ((uint32_t) k[1]) << 8;
 298       /* fall through */
 299     case 1:
 300       a += k[0];
 301       break;
 302     case 0:
 303       return c;
 304     default:
 305       return c;
 306     }
 307 #if !WORDS_BIGENDIAN
 308   }
 309 #endif
 310
 311   final(a, b, c);
 312   return c;
 313 }