From: Brian Aker Date: Wed, 15 Oct 2008 03:42:18 +0000 (-0700) Subject: Incoming update to Murmur X-Git-Tag: 0.25~17 X-Git-Url: https://git.m6w6.name/?a=commitdiff_plain;h=eef0b72fc61709c75274bae8a3240824482a8302;p=m6w6%2Flibmemcached Incoming update to Murmur --- diff --git a/ChangeLog b/ChangeLog index ecb91f72..4c885c94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 0.25 * Jenkins HASH added. + * Update of Murmur hash code 0.24 Tue Sep 16 02:59:03 PDT 2008 (never released) * Cleanup compile warnings. diff --git a/libmemcached/murmur_hash.c b/libmemcached/murmur_hash.c index 7275aa34..c9e60f5b 100644 --- a/libmemcached/murmur_hash.c +++ b/libmemcached/murmur_hash.c @@ -1,39 +1,75 @@ -#include "common.h" - -/* - "Murmur"hash provided by Austin, tanjent@gmail.com -*/ - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -uint32_t murmur_hash(const char *key, size_t length) -{ - const uint32_t m= 0x5bd1e995; - const int r= 16; - uint32_t h= length * m; - uint32_t k = 0; - - while(length >= 4) - { - k = *(uint32_t*)key; - MIX(h,k,m); - - key += 4; - length -= 4; - } - - switch(length) - { - case 3: k += key[2] << 16; - case 2: k += key[1] << 8; - case 1: k += key[0]; - MIX(h,k,m); - }; - - h *= m; - h ^= h >> 10; - h *= m; - h ^= h >> 17; - - return h; -} +/* + "Murmur" hash provided by Austin, tanjent@gmail.com + http://murmurhash.googlepages.com/ + + Note - This code makes a few assumptions about how your machine behaves - + + 1. We can read a 4-byte value from any address without crashing + 2. sizeof(int) == 4 + + And it has a few limitations - + 1. It will not work incrementally. + 2. It will not produce the same results on little-endian and big-endian + machines. + + Updated to murmur2 hash - BP +*/ + +#include "common.h" + +uint32_t murmur_hash(const char *key, size_t length) +{ + /* + 'm' and 'r' are mixing constants generated offline. They're not + really 'magic', they just happen to work well. + */ + + const unsigned int m= 0x5bd1e995; + const unsigned int seed= (0xdeadbeef * length); + const int r= 24; + + + // Initialize the hash to a 'random' value + + unsigned int h= seed ^ length; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data= (const unsigned char *)key; + + while(length >= 4) + { + unsigned int k = *(unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + length -= 4; + } + + // Handle the last few bytes of the input array + + switch(length) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + /* + Do a few final mixes of the hash to ensure the last few bytes are + well-incorporated. + */ + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +}