git.m6w6.name Git - m6w6/libmemcached/blob - src/libhashkit/jenkins.cc

   1 /*
   2     +--------------------------------------------------------------------+
   3     | libmemcached - C/C++ Client Library for memcached                  |
   4     +--------------------------------------------------------------------+
   5     | Redistribution and use in source and binary forms, with or without |
   6     | modification, are permitted under the terms of the BSD license.    |
   7     | You should have received a copy of the license in a bundled file   |
   8     | named LICENSE; in case you did not receive a copy you can review   |
   9     | the terms online at: https://opensource.org/licenses/BSD-3-Clause  |
  10     +--------------------------------------------------------------------+
  11     | Copyright (c) 2006-2014 Brian Aker   https://datadifferential.com/ |
  12     | Copyright (c) 2020 Michael Wallner   <mike@php.net>                |
  13     +--------------------------------------------------------------------+
  14 */
  15
  16 #include "libhashkit/common.h"
  17
  18 #define hashsize(n) ((uint32_t) 1 << (n))
  19 #define hashmask(n) (hashsize(n) - 1)
  20 #define rot(x, k)   (((x) << (k)) | ((x) >> (32 - (k))))
  21
  22 #define mix(a, b, c) \
  23   { \
  24     a -= c; \
  25     a ^= rot(c, 4); \
  26     c += b; \
  27     b -= a; \
  28     b ^= rot(a, 6); \
  29     a += c; \
  30     c -= b; \
  31     c ^= rot(b, 8); \
  32     b += a; \
  33     a -= c; \
  34     a ^= rot(c, 16); \
  35     c += b; \
  36     b -= a; \
  37     b ^= rot(a, 19); \
  38     a += c; \
  39     c -= b; \
  40     c ^= rot(b, 4); \
  41     b += a; \
  42   }
  43
  44 #define final(a, b, c) \
  45   { \
  46     c ^= b; \
  47     c -= rot(b, 14); \
  48     a ^= c; \
  49     a -= rot(c, 11); \
  50     b ^= a; \
  51     b -= rot(a, 25); \
  52     c ^= b; \
  53     c -= rot(b, 16); \
  54     a ^= c; \
  55     a -= rot(c, 4); \
  56     b ^= a; \
  57     b -= rot(a, 14); \
  58     c ^= b; \
  59     c -= rot(b, 24); \
  60   }
  61
  62 #define JENKINS_INITVAL 13
  63
  64 /*
  65 jenkins_hash() -- hash a variable-length key into a 32-bit value
  66   k       : the key (the unaligned variable-length array of bytes)
  67   length  : the length of the key, counting by bytes
  68   initval : can be any 4-byte value
  69 Returns a 32-bit value.  Every bit of the key affects every bit of
  70 the return value.  Two keys differing by one or two bits will have
  71 totally different hash values.
  72
  73 The best hash table sizes are powers of 2.  There is no need to do
  74 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  75 use a bitmask.  For example, if you need only 10 bits, do
  76   h = (h & hashmask(10));
  77 In which case, the hash table should have hashsize(10) elements.
  78 */
  79
  80 #if HAVE_ASAN
  81 __attribute__((no_sanitize_address, no_sanitize("address")))
  82 #endif
  83 uint32_t
  84 hashkit_jenkins(const char *key, size_t length, void *) {
  85   uint32_t a, b, c; /* internal state */
  86 #if !WORDS_BIGENDIAN
  87   union {
  88     const void *ptr;
  89     size_t i;
  90   } u;
  91   u.ptr = key;
  92 #endif
  93
  94   /* Set up the internal state */
  95   a = b = c = 0xdeadbeef + ((uint32_t) length) + JENKINS_INITVAL;
  96
  97 #if !WORDS_BIGENDIAN
  98   if ((u.i & 0x3) == 0) {
  99     const uint32_t *k = (const uint32_t *) key; /* read 32-bit chunks */
 100
 101     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 102     while (length > 12) {
 103       a += k[0];
 104       b += k[1];
 105       c += k[2];
 106       mix(a, b, c);
 107       length -= 12;
 108       k += 3;
 109     }
 110
 111     /*----------------------------- handle the last (probably partial) block */
 112     /*
 113      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 114      * then masks off the part it's not allowed to read.  Because the
 115      * string is aligned, the masked-off tail is in the same word as the
 116      * rest of the string.  Every machine with memory protection I've seen
 117      * does it on word boundaries, so is OK with this.  But VALGRIND will
 118      * still catch it and complain.  The masking trick does make the hash
 119      * noticably faster for short strings (like English words).
 120      */
 121     switch (length) {
 122     case 12:
 123       c += k[2];
 124       b += k[1];
 125       a += k[0];
 126       break;
 127     case 11:
 128       c += k[2] & 0xffffff;
 129       b += k[1];
 130       a += k[0];
 131       break;
 132     case 10:
 133       c += k[2] & 0xffff;
 134       b += k[1];
 135       a += k[0];
 136       break;
 137     case 9:
 138       c += k[2] & 0xff;
 139       b += k[1];
 140       a += k[0];
 141       break;
 142     case 8:
 143       b += k[1];
 144       a += k[0];
 145       break;
 146     case 7:
 147       b += k[1] & 0xffffff;
 148       a += k[0];
 149       break;
 150     case 6:
 151       b += k[1] & 0xffff;
 152       a += k[0];
 153       break;
 154     case 5:
 155       b += k[1] & 0xff;
 156       a += k[0];
 157       break;
 158     case 4: a += k[0]; break;
 159     case 3: a += k[0] & 0xffffff; break;
 160     case 2: a += k[0] & 0xffff; break;
 161     case 1: a += k[0] & 0xff; break;
 162     case 0: return c; /* zero length strings require no mixing */
 163     default: return c;
 164     }
 165
 166   } else if ((u.i & 0x1) == 0) {
 167     const uint16_t *k = (const uint16_t *) key; /* read 16-bit chunks */
 168     const uint8_t *k8;
 169
 170     /*--------------- all but last block: aligned reads and different mixing */
 171     while (length > 12) {
 172       a += k[0] + (((uint32_t) k[1]) << 16);
 173       b += k[2] + (((uint32_t) k[3]) << 16);
 174       c += k[4] + (((uint32_t) k[5]) << 16);
 175       mix(a, b, c);
 176       length -= 12;
 177       k += 6;
 178     }
 179
 180     /*----------------------------- handle the last (probably partial) block */
 181     k8 = (const uint8_t *) k;
 182     switch (length) {
 183     case 12:
 184       c += k[4] + (((uint32_t) k[5]) << 16);
 185       b += k[2] + (((uint32_t) k[3]) << 16);
 186       a += k[0] + (((uint32_t) k[1]) << 16);
 187       break;
 188     case 11:
 189       c += ((uint32_t) k8[10]) << 16;
 190       /* fall through */
 191     case 10:
 192       c += k[4];
 193       b += k[2] + (((uint32_t) k[3]) << 16);
 194       a += k[0] + (((uint32_t) k[1]) << 16);
 195       break;
 196     case 9:
 197       c += k8[8];
 198       /* fall through */
 199     case 8:
 200       b += k[2] + (((uint32_t) k[3]) << 16);
 201       a += k[0] + (((uint32_t) k[1]) << 16);
 202       break;
 203     case 7:
 204       b += ((uint32_t) k8[6]) << 16;
 205       /* fall through */
 206     case 6:
 207       b += k[2];
 208       a += k[0] + (((uint32_t) k[1]) << 16);
 209       break;
 210     case 5:
 211       b += k8[4];
 212       /* fall through */
 213     case 4: a += k[0] + (((uint32_t) k[1]) << 16); break;
 214     case 3:
 215       a += ((uint32_t) k8[2]) << 16;
 216       /* fall through */
 217     case 2: a += k[0]; break;
 218     case 1: a += k8[0]; break;
 219     case 0: return c; /* zero length requires no mixing */
 220     default: return c;
 221     }
 222
 223   } else { /* need to read the key one byte at a time */
 224 #endif     /* little endian */
 225     const uint8_t *k = (const uint8_t *) key;
 226
 227     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 228     while (length > 12) {
 229       a += k[0];
 230       a += ((uint32_t) k[1]) << 8;
 231       a += ((uint32_t) k[2]) << 16;
 232       a += ((uint32_t) k[3]) << 24;
 233       b += k[4];
 234       b += ((uint32_t) k[5]) << 8;
 235       b += ((uint32_t) k[6]) << 16;
 236       b += ((uint32_t) k[7]) << 24;
 237       c += k[8];
 238       c += ((uint32_t) k[9]) << 8;
 239       c += ((uint32_t) k[10]) << 16;
 240       c += ((uint32_t) k[11]) << 24;
 241       mix(a, b, c);
 242       length -= 12;
 243       k += 12;
 244     }
 245
 246     /*-------------------------------- last block: affect all 32 bits of (c) */
 247     switch (length) /* all the case statements fall through */ {
 248     case 12:
 249       c += ((uint32_t) k[11]) << 24;
 250       /* fall through */
 251     case 11:
 252       c += ((uint32_t) k[10]) << 16;
 253       /* fall through */
 254     case 10:
 255       c += ((uint32_t) k[9]) << 8;
 256       /* fall through */
 257     case 9:
 258       c += k[8];
 259       /* fall through */
 260     case 8:
 261       b += ((uint32_t) k[7]) << 24;
 262       /* fall through */
 263     case 7:
 264       b += ((uint32_t) k[6]) << 16;
 265       /* fall through */
 266     case 6:
 267       b += ((uint32_t) k[5]) << 8;
 268       /* fall through */
 269     case 5:
 270       b += k[4];
 271       /* fall through */
 272     case 4:
 273       a += ((uint32_t) k[3]) << 24;
 274       /* fall through */
 275     case 3:
 276       a += ((uint32_t) k[2]) << 16;
 277       /* fall through */
 278     case 2:
 279       a += ((uint32_t) k[1]) << 8;
 280       /* fall through */
 281     case 1: a += k[0]; break;
 282     case 0: return c;
 283     default: return c;
 284     }
 285 #if !WORDS_BIGENDIAN
 286   }
 287 #endif
 288
 289   final(a, b, c);
 290   return c;
 291 }