git.m6w6.name Git - awesomized/libmemcached/blob - libhashkit/jenkins.cc

   1 /*  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
   2  *
   3  *  HashKit library
   4  *
   5  *  Copyright (C) 2011-2012 Data Differential, http://datadifferential.com/
   6  *  Copyright (C) 2006-2009 Brian Aker All rights reserved.
   7  *
   8  *  Redistribution and use in source and binary forms, with or without
   9  *  modification, are permitted provided that the following conditions are
  10  *  met:
  11  *
  12  *      * Redistributions of source code must retain the above copyright
  13  *  notice, this list of conditions and the following disclaimer.
  14  *
  15  *      * Redistributions in binary form must reproduce the above
  16  *  copyright notice, this list of conditions and the following disclaimer
  17  *  in the documentation and/or other materials provided with the
  18  *  distribution.
  19  *
  20  *      * The names of its contributors may not be used to endorse or
  21  *  promote products derived from this software without specific prior
  22  *  written permission.
  23  *
  24  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28  *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30  *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34  *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  */
  37
  38 /*
  39 *
  40 * By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
  41 * code any way you wish, private, educational, or commercial.  It's free.
  42 * Use for hash table lookup, or anything where one collision in 2^^32 is
  43 * acceptable.  Do NOT use for cryptographic purposes.
  44 * http://burtleburtle.net/bob/hash/index.html
  45 *
  46 * Modified by Brian Pontz for libmemcached
  47 * TODO:
  48 * Add big endian support
  49 */
  50
  51 #include <libhashkit/common.h>
  52
  53 #define hashsize(n) ((uint32_t)1<<(n))
  54 #define hashmask(n) (hashsize(n)-1)
  55 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  56
  57 #define mix(a,b,c) \
  58 { \
  59   a -= c;  a ^= rot(c, 4);  c += b; \
  60   b -= a;  b ^= rot(a, 6);  a += c; \
  61   c -= b;  c ^= rot(b, 8);  b += a; \
  62   a -= c;  a ^= rot(c,16);  c += b; \
  63   b -= a;  b ^= rot(a,19);  a += c; \
  64   c -= b;  c ^= rot(b, 4);  b += a; \
  65 }
  66
  67 #define final(a,b,c) \
  68 { \
  69   c ^= b; c -= rot(b,14); \
  70   a ^= c; a -= rot(c,11); \
  71   b ^= a; b -= rot(a,25); \
  72   c ^= b; c -= rot(b,16); \
  73   a ^= c; a -= rot(c,4);  \
  74   b ^= a; b -= rot(a,14); \
  75   c ^= b; c -= rot(b,24); \
  76 }
  77
  78 #define JENKINS_INITVAL 13
  79
  80 /*
  81 jenkins_hash() -- hash a variable-length key into a 32-bit value
  82   k       : the key (the unaligned variable-length array of bytes)
  83   length  : the length of the key, counting by bytes
  84   initval : can be any 4-byte value
  85 Returns a 32-bit value.  Every bit of the key affects every bit of
  86 the return value.  Two keys differing by one or two bits will have
  87 totally different hash values.
  88
  89 The best hash table sizes are powers of 2.  There is no need to do
  90 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  91 use a bitmask.  For example, if you need only 10 bits, do
  92   h = (h & hashmask(10));
  93 In which case, the hash table should have hashsize(10) elements.
  94 */
  95
  96 uint32_t hashkit_jenkins(const char *key, size_t length, void *)
  97 {
  98   uint32_t a,b,c;                                          /* internal state */
  99   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 100
 101   /* Set up the internal state */
 102   a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL;
 103
 104   u.ptr = key;
 105 #ifndef WORDS_BIGENDIAN
 106   if ((u.i & 0x3) == 0)
 107   {
 108     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 109
 110     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 111     while (length > 12)
 112     {
 113       a += k[0];
 114       b += k[1];
 115       c += k[2];
 116       mix(a,b,c);
 117       length -= 12;
 118       k += 3;
 119     }
 120
 121     /*----------------------------- handle the last (probably partial) block */
 122     /*
 123      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 124      * then masks off the part it's not allowed to read.  Because the
 125      * string is aligned, the masked-off tail is in the same word as the
 126      * rest of the string.  Every machine with memory protection I've seen
 127      * does it on word boundaries, so is OK with this.  But VALGRIND will
 128      * still catch it and complain.  The masking trick does make the hash
 129      * noticably faster for short strings (like English words).
 130      */
 131     switch(length)
 132     {
 133     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 134     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 135     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 136     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 137     case 8 : b+=k[1]; a+=k[0]; break;
 138     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 139     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 140     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 141     case 4 : a+=k[0]; break;
 142     case 3 : a+=k[0]&0xffffff; break;
 143     case 2 : a+=k[0]&0xffff; break;
 144     case 1 : a+=k[0]&0xff; break;
 145     case 0 : return c;              /* zero length strings require no mixing */
 146     default: return c;
 147     }
 148
 149   }
 150   else if ((u.i & 0x1) == 0)
 151   {
 152     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 153     const uint8_t  *k8;
 154
 155     /*--------------- all but last block: aligned reads and different mixing */
 156     while (length > 12)
 157     {
 158       a += k[0] + (((uint32_t)k[1])<<16);
 159       b += k[2] + (((uint32_t)k[3])<<16);
 160       c += k[4] + (((uint32_t)k[5])<<16);
 161       mix(a,b,c);
 162       length -= 12;
 163       k += 6;
 164     }
 165
 166     /*----------------------------- handle the last (probably partial) block */
 167     k8 = (const uint8_t *)k;
 168     switch(length)
 169     {
 170     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 171              b+=k[2]+(((uint32_t)k[3])<<16);
 172              a+=k[0]+(((uint32_t)k[1])<<16);
 173              break;
 174     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 175     case 10: c+=k[4];
 176              b+=k[2]+(((uint32_t)k[3])<<16);
 177              a+=k[0]+(((uint32_t)k[1])<<16);
 178              break;
 179     case 9 : c+=k8[8];                      /* fall through */
 180     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 181              a+=k[0]+(((uint32_t)k[1])<<16);
 182              break;
 183     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 184     case 6 : b+=k[2];
 185              a+=k[0]+(((uint32_t)k[1])<<16);
 186              break;
 187     case 5 : b+=k8[4];                      /* fall through */
 188     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 189              break;
 190     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 191     case 2 : a+=k[0];
 192              break;
 193     case 1 : a+=k8[0];
 194              break;
 195     case 0 : return c;                     /* zero length requires no mixing */
 196     default: return c;
 197     }
 198
 199   }
 200   else
 201   {                        /* need to read the key one byte at a time */
 202 #endif /* little endian */
 203     const uint8_t *k = (const uint8_t *)key;
 204
 205     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 206     while (length > 12)
 207     {
 208       a += k[0];
 209       a += ((uint32_t)k[1])<<8;
 210       a += ((uint32_t)k[2])<<16;
 211       a += ((uint32_t)k[3])<<24;
 212       b += k[4];
 213       b += ((uint32_t)k[5])<<8;
 214       b += ((uint32_t)k[6])<<16;
 215       b += ((uint32_t)k[7])<<24;
 216       c += k[8];
 217       c += ((uint32_t)k[9])<<8;
 218       c += ((uint32_t)k[10])<<16;
 219       c += ((uint32_t)k[11])<<24;
 220       mix(a,b,c);
 221       length -= 12;
 222       k += 12;
 223     }
 224
 225     /*-------------------------------- last block: affect all 32 bits of (c) */
 226     switch(length)                   /* all the case statements fall through */
 227     {
 228     case 12: c+=((uint32_t)k[11])<<24;
 229     case 11: c+=((uint32_t)k[10])<<16;
 230     case 10: c+=((uint32_t)k[9])<<8;
 231     case 9 : c+=k[8];
 232     case 8 : b+=((uint32_t)k[7])<<24;
 233     case 7 : b+=((uint32_t)k[6])<<16;
 234     case 6 : b+=((uint32_t)k[5])<<8;
 235     case 5 : b+=k[4];
 236     case 4 : a+=((uint32_t)k[3])<<24;
 237     case 3 : a+=((uint32_t)k[2])<<16;
 238     case 2 : a+=((uint32_t)k[1])<<8;
 239     case 1 : a+=k[0];
 240              break;
 241     case 0 : return c;
 242     default : return c;
 243     }
 244 #ifndef WORDS_BIGENDIAN
 245   }
 246 #endif
 247
 248   final(a,b,c);
 249   return c;
 250 }