From a6b09c592769d07c446a627e7d0f8f42e1d25aac Mon Sep 17 00:00:00 2001 From: Brian Aker Date: Wed, 21 Nov 2012 07:05:17 -0500 Subject: [PATCH] Add support for murmur3 --- .bzrignore | 15 +- ChangeLog | 4 + docs/memcached_generate_hash_value.rst | 2 + libhashkit-1.0/algorithm.h | 33 +-- libhashkit-1.0/types.h | 1 + libhashkit/algorithm.cc | 5 + libhashkit/algorithm.h | 75 ++++++ libhashkit/common.h | 7 +- libhashkit/digest.cc | 3 + libhashkit/function.cc | 1 + libhashkit/has.cc | 1 + libhashkit/include.am | 37 +-- libhashkit/murmur3.cc | 317 ++++++++++++++++++++++++ libhashkit/murmur3.h | 16 ++ libhashkit/murmur3_api.cc | 49 ++++ libhashkit/str_algorithm.cc | 1 + libmemcached-1.0/types/hash.h | 1 + tests/hash_plus.cc | 1 + tests/hash_results.h | 9 + tests/hashkit_functions.cc | 22 ++ tests/libmemcached-1.0/all_tests.h | 1 + tests/libmemcached-1.0/mem_functions.cc | 21 ++ tests/libmemcached-1.0/mem_functions.h | 1 + 23 files changed, 570 insertions(+), 53 deletions(-) create mode 100644 libhashkit/algorithm.h create mode 100644 libhashkit/murmur3.cc create mode 100644 libhashkit/murmur3.h create mode 100644 libhashkit/murmur3_api.cc diff --git a/.bzrignore b/.bzrignore index f2156df5..18ba5811 100644 --- a/.bzrignore +++ b/.bzrignore @@ -29,6 +29,7 @@ Makefile Makefile.in TAGS aclocal.m4 +aminclude.am autom4te.cache autoscan.log build-aux/ @@ -46,9 +47,6 @@ clients/memrm clients/memslap clients/memstat clients/memtouch -mem_config.h -mem_config.h.in -mem_config.in config.log config.status config/compile @@ -60,6 +58,7 @@ config/ltmain.sh config/missing config/pandora_vc_revinfo config/top.h +configure configure.scan docs/*.[13] docs/*.html @@ -81,12 +80,17 @@ libmemcached-1.2/configure.h libmemcached-2.0/configure.h libmemcached-?.??/ libmemcached/configure.h +libmemcached/csl/parser.cc +libmemcached/csl/parser.h +libmemcached/csl/scanner.cc +libmemcached/csl/scanner.h libmemcached/dtrace_probes.h libmemcached/generated_probes.h libmemcached/memcached_configure.h libtest/.hg/ libtest/.hgignore libtest/abort +libtest/core-count libtest/skiptest libtest/unittest libtest/version.h @@ -101,6 +105,10 @@ m4/lt~obsolete.m4 man/*.1 man/*.3 man/*.8 +man/.doctrees/ +mem_config.h +mem_config.h.in +mem_config.in memcached/.git memcached/.gitignore memcached/memcached @@ -150,4 +158,3 @@ tests/testudp tests/var/ tmp_chroot unittests/unittests -aminclude.am diff --git a/ChangeLog b/ChangeLog index ad76682b..e6efb8ab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +1.0.15 + +* Added support for Murmur3 (HASHKIT_HASH_MURMUR3) + 1.0.14 Wed Nov 14 04:56:25 EST 2012 * CLIENT_ERROR fixed to not be treated as a fatal error. * Compiler fixes for older Ubuntu releases. diff --git a/docs/memcached_generate_hash_value.rst b/docs/memcached_generate_hash_value.rst index 5b835df9..9a9b5a67 100644 --- a/docs/memcached_generate_hash_value.rst +++ b/docs/memcached_generate_hash_value.rst @@ -40,6 +40,8 @@ SYNOPSIS .. c:type:: MEMCACHED_HASH_HSIEH +.. c:type:: MEMCACHED_HASH_MURMUR3 + Compile and link with -lmemcachedutil -lmemcached diff --git a/libhashkit-1.0/algorithm.h b/libhashkit-1.0/algorithm.h index b29e9c3e..72005aa1 100644 --- a/libhashkit-1.0/algorithm.h +++ b/libhashkit-1.0/algorithm.h @@ -71,42 +71,15 @@ uint32_t libhashkit_hsieh(const char *key, size_t key_length); HASHKIT_API uint32_t libhashkit_murmur(const char *key, size_t key_length); +HASHKIT_API +uint32_t libhashkit_murmur3(const char *key, size_t key_length); + HASHKIT_API uint32_t libhashkit_jenkins(const char *key, size_t key_length); HASHKIT_API uint32_t libhashkit_md5(const char *key, size_t key_length); -HASHKIT_LOCAL -uint32_t hashkit_one_at_a_time(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_fnv1_64(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_fnv1a_64(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_fnv1_32(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_fnv1a_32(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_crc32(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_hsieh(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_murmur(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_jenkins(const char *key, size_t key_length, void *context); - -HASHKIT_LOCAL -uint32_t hashkit_md5(const char *key, size_t key_length, void *context); - HASHKIT_API void libhashkit_md5_signature(const unsigned char *key, size_t length, unsigned char *result); diff --git a/libhashkit-1.0/types.h b/libhashkit-1.0/types.h index b2ce4278..37c33eeb 100644 --- a/libhashkit-1.0/types.h +++ b/libhashkit-1.0/types.h @@ -68,6 +68,7 @@ typedef enum { HASHKIT_HASH_HSIEH, HASHKIT_HASH_MURMUR, HASHKIT_HASH_JENKINS, + HASHKIT_HASH_MURMUR3, HASHKIT_HASH_CUSTOM, HASHKIT_HASH_MAX } hashkit_hash_algorithm_t; diff --git a/libhashkit/algorithm.cc b/libhashkit/algorithm.cc index 0462df0b..d6f2daf1 100644 --- a/libhashkit/algorithm.cc +++ b/libhashkit/algorithm.cc @@ -71,6 +71,11 @@ uint32_t libhashkit_hsieh(const char *key, size_t key_length) return hashkit_hsieh(key, key_length, NULL); } +uint32_t libhashkit_murmur3(const char *key, size_t key_length) +{ + return hashkit_murmur3(key, key_length, NULL); +} + uint32_t libhashkit_murmur(const char *key, size_t key_length) { return hashkit_murmur(key, key_length, NULL); diff --git a/libhashkit/algorithm.h b/libhashkit/algorithm.h new file mode 100644 index 00000000..46dc471b --- /dev/null +++ b/libhashkit/algorithm.h @@ -0,0 +1,75 @@ +/* vim:expandtab:shiftwidth=2:tabstop=2:smarttab: + * + * HashKit library + * + * Copyright (C) 2011-2012 Data Differential, http://datadifferential.com/ + * Copyright (C) 2009 Brian Aker All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * * The names of its contributors may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + + +/** + * @file + * @brief HashKit Header + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + + +uint32_t hashkit_one_at_a_time(const char *key, size_t key_length, void *context); + +uint32_t hashkit_fnv1_64(const char *key, size_t key_length, void *context); + +uint32_t hashkit_fnv1a_64(const char *key, size_t key_length, void *context); + +uint32_t hashkit_fnv1_32(const char *key, size_t key_length, void *context); + +uint32_t hashkit_fnv1a_32(const char *key, size_t key_length, void *context); + +uint32_t hashkit_crc32(const char *key, size_t key_length, void *context); + +uint32_t hashkit_hsieh(const char *key, size_t key_length, void *context); + +uint32_t hashkit_murmur(const char *key, size_t key_length, void *context); + +uint32_t hashkit_murmur3(const char *key, size_t key_length, void *context); + +uint32_t hashkit_jenkins(const char *key, size_t key_length, void *context); + +uint32_t hashkit_md5(const char *key, size_t key_length, void *context); + +#ifdef __cplusplus +} +#endif diff --git a/libhashkit/common.h b/libhashkit/common.h index 7affeb36..43a859fe 100644 --- a/libhashkit/common.h +++ b/libhashkit/common.h @@ -52,9 +52,10 @@ #endif #include -#include -#include -#include +#include "libhashkit/algorithm.h" +#include "libhashkit/is.h" +#include "libhashkit/string.h" +#include "libhashkit/aes.h" #ifdef __cplusplus extern "C" { diff --git a/libhashkit/digest.cc b/libhashkit/digest.cc index 3b220d08..23002934 100644 --- a/libhashkit/digest.cc +++ b/libhashkit/digest.cc @@ -66,6 +66,9 @@ uint32_t libhashkit_digest(const char *key, size_t key_length, hashkit_hash_algo #else return 1; #endif + case HASHKIT_HASH_MURMUR3: + return libhashkit_murmur3(key, key_length); + case HASHKIT_HASH_MURMUR: #ifdef HAVE_MURMUR_HASH return libhashkit_murmur(key, key_length); diff --git a/libhashkit/function.cc b/libhashkit/function.cc index 044ca07d..bee87ff7 100644 --- a/libhashkit/function.cc +++ b/libhashkit/function.cc @@ -57,6 +57,7 @@ static hashkit_return_t _set_function(struct hashkit_st::hashkit_function_st *se } return HASHKIT_INVALID_ARGUMENT; + case HASHKIT_HASH_MURMUR3: case HASHKIT_HASH_MURMUR: if (libhashkit_has_algorithm(HASHKIT_HASH_MURMUR)) { diff --git a/libhashkit/has.cc b/libhashkit/has.cc index 2230a56a..843e32e4 100644 --- a/libhashkit/has.cc +++ b/libhashkit/has.cc @@ -57,6 +57,7 @@ bool libhashkit_has_algorithm(const hashkit_hash_algorithm_t algo) return false; #endif + case HASHKIT_HASH_MURMUR3: case HASHKIT_HASH_MURMUR: #ifdef HAVE_MURMUR_HASH return true; diff --git a/libhashkit/include.am b/libhashkit/include.am index f24a6a4c..0b63d26f 100644 --- a/libhashkit/include.am +++ b/libhashkit/include.am @@ -12,6 +12,7 @@ lib_LTLIBRARIES+= libhashkit/libhashkit.la noinst_HEADERS+= libhashkit/aes.h +noinst_HEADERS+= libhashkit/murmur3.h noinst_HEADERS+= libhashkit/common.h noinst_HEADERS+= libhashkit/is.h noinst_HEADERS+= libhashkit/rijndael.hpp @@ -22,27 +23,30 @@ nobase_include_HEADERS+= libhashkit/hashkit.h libhashkit_libhashkit_la_SOURCES= libhashkit_libhashkit_la_CXXFLAGS= +libhashkit_libhashkit_la_CFLAGS= libhashkit_libhashkit_la_SOURCES+= libhashkit/aes.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/algorithm.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/behavior.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/crc32.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/digest.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/algorithm.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/behavior.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/crc32.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/digest.cc libhashkit_libhashkit_la_SOURCES+= libhashkit/encrypt.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/fnv_32.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/fnv_64.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/function.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/has.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/hashkit.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/jenkins.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/ketama.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/md5.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/murmur.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/one_at_a_time.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/fnv_32.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/fnv_64.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/function.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/has.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/hashkit.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/jenkins.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/ketama.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/md5.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/murmur.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/murmur3.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/murmur3_api.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/one_at_a_time.cc libhashkit_libhashkit_la_SOURCES+= libhashkit/rijndael.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/str_algorithm.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/str_algorithm.cc libhashkit_libhashkit_la_SOURCES+= libhashkit/strerror.cc -libhashkit_libhashkit_la_SOURCES+= libhashkit/string.cc +libhashkit_libhashkit_la_SOURCES+= libhashkit/string.cc if INCLUDE_HSIEH_SRC libhashkit_libhashkit_la_SOURCES+= libhashkit/hsieh.cc @@ -51,6 +55,7 @@ libhashkit_libhashkit_la_SOURCES+= libhashkit/nohsieh.cc endif libhashkit_libhashkit_la_CXXFLAGS+= -DBUILDING_HASHKIT +libhashkit_libhashkit_la_CFLAGS+= -DBUILDING_HASHKIT libhashkit_libhashkit_la_LIBADD= libhashkit_libhashkit_la_LDFLAGS= -version-info $(HASHKIT_LIBRARY_VERSION) diff --git a/libhashkit/murmur3.cc b/libhashkit/murmur3.cc new file mode 100644 index 00000000..5a5666e7 --- /dev/null +++ b/libhashkit/murmur3.cc @@ -0,0 +1,317 @@ +//----------------------------------------------------------------------------- +//MurmurHash3 was written by Austin Appleby, and is placed in the public +//domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "mem_config.h" + +#include "libhashkit/murmur3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +#ifdef __GNUC__ +#define FORCE_INLINE __attribute__((always_inline)) inline +#else +#define FORCE_INLINE inline +#endif + +static FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +#define getblock(p, i) (p[i]) + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 ( const void * key, int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + int i; + + uint32_t h1 = seed; + + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(i = -nblocks; i; i++) + { + uint32_t k1 = getblock(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128 ( const void * key, const int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + int i; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + uint32_t c1 = 0x239b961b; + uint32_t c2 = 0xab0e9789; + uint32_t c3 = 0x38b34ae5; + uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + + for(i = -nblocks; i; i++) + { + uint32_t k1 = getblock(blocks,i*4+0); + uint32_t k2 = getblock(blocks,i*4+1); + uint32_t k3 = getblock(blocks,i*4+2); + uint32_t k4 = getblock(blocks,i*4+3); + + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch(len & 15) + { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128 ( const void * key, const int len, + const uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + int i; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for(i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock(blocks,i*2+0); + uint64_t k2 = getblock(blocks,i*2+1); + + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= (uint64_t)(tail[14]) << 48; + case 14: k2 ^= (uint64_t)(tail[13]) << 40; + case 13: k2 ^= (uint64_t)(tail[12]) << 32; + case 12: k2 ^= (uint64_t)(tail[11]) << 24; + case 11: k2 ^= (uint64_t)(tail[10]) << 16; + case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; + case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; + case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; + case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; + case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; + case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; + case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; + case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; + case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- + diff --git a/libhashkit/murmur3.h b/libhashkit/murmur3.h new file mode 100644 index 00000000..4eb4fa64 --- /dev/null +++ b/libhashkit/murmur3.h @@ -0,0 +1,16 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the +// public domain. The author hereby disclaims copyright to this source +// code. + +#pragma once + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out); + +void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out); + +void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out); + +//----------------------------------------------------------------------------- diff --git a/libhashkit/murmur3_api.cc b/libhashkit/murmur3_api.cc new file mode 100644 index 00000000..137fd369 --- /dev/null +++ b/libhashkit/murmur3_api.cc @@ -0,0 +1,49 @@ +/* vim:expandtab:shiftwidth=2:tabstop=2:smarttab: + * + * HashKit library + * + * Copyright (C) 2012 Data Differential, http://datadifferential.com/ + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * * The names of its contributors may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "libhashkit/common.h" +#include "libhashkit/murmur3.h" + +uint32_t hashkit_murmur3(const char *key, size_t length, void *) +{ + const uint32_t seed= (0xdeadbeef * (uint32_t)length); + + uint32_t ret; + MurmurHash3_x86_32(key, length, seed, &ret); + + return ret; +} diff --git a/libhashkit/str_algorithm.cc b/libhashkit/str_algorithm.cc index 0a0613bc..b84a0bb7 100644 --- a/libhashkit/str_algorithm.cc +++ b/libhashkit/str_algorithm.cc @@ -49,6 +49,7 @@ const char * libhashkit_string_hash(hashkit_hash_algorithm_t type) case HASHKIT_HASH_FNV1A_32: return "FNV1A_32"; case HASHKIT_HASH_HSIEH: return "HSIEH"; case HASHKIT_HASH_MURMUR: return "MURMUR"; + case HASHKIT_HASH_MURMUR3: return "MURMUR3"; case HASHKIT_HASH_JENKINS: return "JENKINS"; case HASHKIT_HASH_CUSTOM: return "CUSTOM"; default: diff --git a/libmemcached-1.0/types/hash.h b/libmemcached-1.0/types/hash.h index 7651eb1a..8eee4df7 100644 --- a/libmemcached-1.0/types/hash.h +++ b/libmemcached-1.0/types/hash.h @@ -49,6 +49,7 @@ enum memcached_hash_t { MEMCACHED_HASH_HSIEH, MEMCACHED_HASH_MURMUR, MEMCACHED_HASH_JENKINS, + MEMCACHED_HASH_MURMUR3, MEMCACHED_HASH_CUSTOM, MEMCACHED_HASH_MAX }; diff --git a/tests/hash_plus.cc b/tests/hash_plus.cc index 1fb2ef2a..078bdd17 100644 --- a/tests/hash_plus.cc +++ b/tests/hash_plus.cc @@ -130,6 +130,7 @@ static test_return_t set_function_test(void *) list= hsieh_values; break; + case HASHKIT_HASH_MURMUR3: case HASHKIT_HASH_MURMUR: #ifdef WORDS_BIGENDIAN continue; diff --git a/tests/hash_results.h b/tests/hash_results.h index c05053a7..ce788b7c 100644 --- a/tests/hash_results.h +++ b/tests/hash_results.h @@ -113,8 +113,17 @@ static uint32_t murmur_values[]= { 4142305122U, 734504955U, 3802834688U, 407689 264013145U, 3995512858U, 2400956718U, 2346666219U, 926327338U, 442757446U, 1770805201U, 560483147U, 3902279934U }; + +static uint32_t murmur3_values[]= { 1120212521U, 1448785489U, 4186307405U, 2686268514U, + 444808887U, 221750260U, 3074673162U, 1946933257U, + 2826416675U, 2430719166U, 3200429559U, 297894347U, + 732888124U, 4050076964U, 3298336176U, 1336207361U, + 810553576U, 3748182674U, 3860119212U, 3439537197U, + 3044240981U, 1464271804U, 3896193724U, 2915115798U, + 1702843840U }; #else static uint32_t murmur_values[]= { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; +static uint32_t murmur3_values[]= { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; #endif static uint32_t jenkins_values[]= { 1442444624U, 4253821186U, 1885058256U, 2120131735U, diff --git a/tests/hashkit_functions.cc b/tests/hashkit_functions.cc index 1e9ec7bd..9def3025 100644 --- a/tests/hashkit_functions.cc +++ b/tests/hashkit_functions.cc @@ -262,6 +262,27 @@ static test_return_t hsieh_run (hashkit_st *) return TEST_SUCCESS; } +static test_return_t murmur3_TEST(hashkit_st *) +{ + test_skip(true, libhashkit_has_algorithm(HASHKIT_HASH_MURMUR3)); + +#ifdef WORDS_BIGENDIAN + (void)murmur3_values; + return TEST_SKIPPED; +#else + uint32_t x; + const char **ptr; + + for (ptr= list_to_hash, x= 0; *ptr; ptr++, x++) + { + test_compare(murmur3_values[x], + libhashkit_murmur3(*ptr, strlen(*ptr))); + } + + return TEST_SUCCESS; +#endif +} + static test_return_t murmur_run (hashkit_st *) { test_skip(true, libhashkit_has_algorithm(HASHKIT_HASH_MURMUR)); @@ -509,6 +530,7 @@ test_st hash_tests[] ={ {"fnv1a_32", 0, (test_callback_fn*)fnv1a_32_run }, {"hsieh", 0, (test_callback_fn*)hsieh_run }, {"murmur", 0, (test_callback_fn*)murmur_run }, + {"murmur3", 0, (test_callback_fn*)murmur3_TEST }, {"jenkis", 0, (test_callback_fn*)jenkins_run }, {0, 0, (test_callback_fn*)0} }; diff --git a/tests/libmemcached-1.0/all_tests.h b/tests/libmemcached-1.0/all_tests.h index 74e619d2..2da9c7e3 100644 --- a/tests/libmemcached-1.0/all_tests.h +++ b/tests/libmemcached-1.0/all_tests.h @@ -361,6 +361,7 @@ test_st hash_tests[] ={ {"fnv1a_32", false, (test_callback_fn*)fnv1a_32_run }, {"hsieh", false, (test_callback_fn*)hsieh_run }, {"murmur", false, (test_callback_fn*)murmur_run }, + {"murmur3", false, (test_callback_fn*)murmur3_TEST }, {"jenkis", false, (test_callback_fn*)jenkins_run }, {"memcached_get_hashkit", false, (test_callback_fn*)memcached_get_hashkit_test }, {0, 0, (test_callback_fn*)0} diff --git a/tests/libmemcached-1.0/mem_functions.cc b/tests/libmemcached-1.0/mem_functions.cc index a0570884..49be55da 100644 --- a/tests/libmemcached-1.0/mem_functions.cc +++ b/tests/libmemcached-1.0/mem_functions.cc @@ -3683,6 +3683,27 @@ test_return_t murmur_run (memcached_st *) #endif } +test_return_t murmur3_TEST(hashkit_st *) +{ + test_skip(true, libhashkit_has_algorithm(HASHKIT_HASH_MURMUR3)); + +#ifdef WORDS_BIGENDIAN + (void)murmur3_values; + return TEST_SKIPPED; +#else + uint32_t x; + const char **ptr; + + for (ptr= list_to_hash, x= 0; *ptr; ptr++, x++) + { + test_compare(murmur3_values[x], + memcached_generate_hash_value(*ptr, strlen(*ptr), MEMCACHED_HASH_MURMUR3)); + } + + return TEST_SUCCESS; +#endif +} + test_return_t jenkins_run (memcached_st *) { uint32_t x; diff --git a/tests/libmemcached-1.0/mem_functions.h b/tests/libmemcached-1.0/mem_functions.h index 104d44ee..b3c0bf57 100644 --- a/tests/libmemcached-1.0/mem_functions.h +++ b/tests/libmemcached-1.0/mem_functions.h @@ -114,6 +114,7 @@ test_return_t mget_result_test(memcached_st *memc); test_return_t mget_test(memcached_st *memc); test_return_t murmur_avaibility_test (memcached_st *memc); test_return_t murmur_run (memcached_st *); +test_return_t murmur3_TEST(hashkit_st *); test_return_t noreply_test(memcached_st *memc); test_return_t one_at_a_time_run (memcached_st *); test_return_t output_ketama_weighted_keys(memcached_st *); -- 2.30.2