git.m6w6.name Git - m6w6/libmemcached/blob - libhashkit/jenkins.cc

   1 /*  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
   2  *
   3  *  HashKit library
   4  *
   5  *  Copyright (C) 2011-2012 Data Differential, http://datadifferential.com/
   6  *  Copyright (C) 2006-2009 Brian Aker All rights reserved.
   7  *
   8  *  Redistribution and use in source and binary forms, with or without
   9  *  modification, are permitted provided that the following conditions are
  10  *  met:
  11  *
  12  *      * Redistributions of source code must retain the above copyright
  13  *  notice, this list of conditions and the following disclaimer.
  14  *
  15  *      * Redistributions in binary form must reproduce the above
  16  *  copyright notice, this list of conditions and the following disclaimer
  17  *  in the documentation and/or other materials provided with the
  18  *  distribution.
  19  *
  20  *      * The names of its contributors may not be used to endorse or
  21  *  promote products derived from this software without specific prior
  22  *  written permission.
  23  *
  24  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28  *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30  *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34  *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  */
  37
  38 /*
  39 *
  40 * By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
  41 * code any way you wish, private, educational, or commercial.  It's free.
  42 * Use for hash table lookup, or anything where one collision in 2^^32 is
  43 * acceptable.  Do NOT use for cryptographic purposes.
  44 * http://burtleburtle.net/bob/hash/index.html
  45 *
  46 * Modified by Brian Pontz for libmemcached
  47 * TODO:
  48 * Add big endian support
  49 */
  50
  51 #include <libhashkit/common.h>
  52
  53 #define hashsize(n) ((uint32_t)1<<(n))
  54 #define hashmask(n) (hashsize(n)-1)
  55 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  56
  57 #define mix(a,b,c) \
  58 { \
  59   a -= c;  a ^= rot(c, 4);  c += b; \
  60   b -= a;  b ^= rot(a, 6);  a += c; \
  61   c -= b;  c ^= rot(b, 8);  b += a; \
  62   a -= c;  a ^= rot(c,16);  c += b; \
  63   b -= a;  b ^= rot(a,19);  a += c; \
  64   c -= b;  c ^= rot(b, 4);  b += a; \
  65 }
  66
  67 #define final(a,b,c) \
  68 { \
  69   c ^= b; c -= rot(b,14); \
  70   a ^= c; a -= rot(c,11); \
  71   b ^= a; b -= rot(a,25); \
  72   c ^= b; c -= rot(b,16); \
  73   a ^= c; a -= rot(c,4);  \
  74   b ^= a; b -= rot(a,14); \
  75   c ^= b; c -= rot(b,24); \
  76 }
  77
  78 #define JENKINS_INITVAL 13
  79
  80 /*
  81 jenkins_hash() -- hash a variable-length key into a 32-bit value
  82   k       : the key (the unaligned variable-length array of bytes)
  83   length  : the length of the key, counting by bytes
  84   initval : can be any 4-byte value
  85 Returns a 32-bit value.  Every bit of the key affects every bit of
  86 the return value.  Two keys differing by one or two bits will have
  87 totally different hash values.
  88
  89 The best hash table sizes are powers of 2.  There is no need to do
  90 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  91 use a bitmask.  For example, if you need only 10 bits, do
  92   h = (h & hashmask(10));
  93 In which case, the hash table should have hashsize(10) elements.
  94 */
  95
  96 uint32_t hashkit_jenkins(const char *key, size_t length, void *)
  97 {
  98   uint32_t a,b,c;                                          /* internal state */
  99 #ifndef WORDS_BIGENDIAN
 100   union { const void *ptr; size_t i; } u;
 101   u.ptr = key;
 102 #endif
 103
 104   /* Set up the internal state */
 105   a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL;
 106
 107 #ifndef WORDS_BIGENDIAN
 108   if ((u.i & 0x3) == 0)
 109   {
 110     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 111
 112     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 113     while (length > 12)
 114     {
 115       a += k[0];
 116       b += k[1];
 117       c += k[2];
 118       mix(a,b,c);
 119       length -= 12;
 120       k += 3;
 121     }
 122
 123     /*----------------------------- handle the last (probably partial) block */
 124     /*
 125      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 126      * then masks off the part it's not allowed to read.  Because the
 127      * string is aligned, the masked-off tail is in the same word as the
 128      * rest of the string.  Every machine with memory protection I've seen
 129      * does it on word boundaries, so is OK with this.  But VALGRIND will
 130      * still catch it and complain.  The masking trick does make the hash
 131      * noticably faster for short strings (like English words).
 132      */
 133     switch(length)
 134     {
 135     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 136     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 137     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 138     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 139     case 8 : b+=k[1]; a+=k[0]; break;
 140     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 141     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 142     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 143     case 4 : a+=k[0]; break;
 144     case 3 : a+=k[0]&0xffffff; break;
 145     case 2 : a+=k[0]&0xffff; break;
 146     case 1 : a+=k[0]&0xff; break;
 147     case 0 : return c;              /* zero length strings require no mixing */
 148     default: return c;
 149     }
 150
 151   }
 152   else if ((u.i & 0x1) == 0)
 153   {
 154     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 155     const uint8_t  *k8;
 156
 157     /*--------------- all but last block: aligned reads and different mixing */
 158     while (length > 12)
 159     {
 160       a += k[0] + (((uint32_t)k[1])<<16);
 161       b += k[2] + (((uint32_t)k[3])<<16);
 162       c += k[4] + (((uint32_t)k[5])<<16);
 163       mix(a,b,c);
 164       length -= 12;
 165       k += 6;
 166     }
 167
 168     /*----------------------------- handle the last (probably partial) block */
 169     k8 = (const uint8_t *)k;
 170     switch(length)
 171     {
 172     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 173              b+=k[2]+(((uint32_t)k[3])<<16);
 174              a+=k[0]+(((uint32_t)k[1])<<16);
 175              break;
 176     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 177     case 10: c+=k[4];
 178              b+=k[2]+(((uint32_t)k[3])<<16);
 179              a+=k[0]+(((uint32_t)k[1])<<16);
 180              break;
 181     case 9 : c+=k8[8];                      /* fall through */
 182     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 183              a+=k[0]+(((uint32_t)k[1])<<16);
 184              break;
 185     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 186     case 6 : b+=k[2];
 187              a+=k[0]+(((uint32_t)k[1])<<16);
 188              break;
 189     case 5 : b+=k8[4];                      /* fall through */
 190     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 191              break;
 192     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 193     case 2 : a+=k[0];
 194              break;
 195     case 1 : a+=k8[0];
 196              break;
 197     case 0 : return c;                     /* zero length requires no mixing */
 198     default: return c;
 199     }
 200
 201   }
 202   else
 203   {                        /* need to read the key one byte at a time */
 204 #endif /* little endian */
 205     const uint8_t *k = (const uint8_t *)key;
 206
 207     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 208     while (length > 12)
 209     {
 210       a += k[0];
 211       a += ((uint32_t)k[1])<<8;
 212       a += ((uint32_t)k[2])<<16;
 213       a += ((uint32_t)k[3])<<24;
 214       b += k[4];
 215       b += ((uint32_t)k[5])<<8;
 216       b += ((uint32_t)k[6])<<16;
 217       b += ((uint32_t)k[7])<<24;
 218       c += k[8];
 219       c += ((uint32_t)k[9])<<8;
 220       c += ((uint32_t)k[10])<<16;
 221       c += ((uint32_t)k[11])<<24;
 222       mix(a,b,c);
 223       length -= 12;
 224       k += 12;
 225     }
 226
 227     /*-------------------------------- last block: affect all 32 bits of (c) */
 228     switch(length)                   /* all the case statements fall through */
 229     {
 230     case 12: c+=((uint32_t)k[11])<<24;
 231     case 11: c+=((uint32_t)k[10])<<16;
 232     case 10: c+=((uint32_t)k[9])<<8;
 233     case 9 : c+=k[8];
 234     case 8 : b+=((uint32_t)k[7])<<24;
 235     case 7 : b+=((uint32_t)k[6])<<16;
 236     case 6 : b+=((uint32_t)k[5])<<8;
 237     case 5 : b+=k[4];
 238     case 4 : a+=((uint32_t)k[3])<<24;
 239     case 3 : a+=((uint32_t)k[2])<<16;
 240     case 2 : a+=((uint32_t)k[1])<<8;
 241     case 1 : a+=k[0];
 242              break;
 243     case 0 : return c;
 244     default : return c;
 245     }
 246 #ifndef WORDS_BIGENDIAN
 247   }
 248 #endif
 249
 250   final(a,b,c);
 251   return c;
 252 }