From: Brian Aker Date: Fri, 23 Mar 2012 19:12:34 +0000 (-0700) Subject: Adding a copy of memcached to the tree. X-Git-Tag: 1.0.6~15 X-Git-Url: https://git.m6w6.name/?a=commitdiff_plain;h=ed597ac92b140f4c6723ff3b937f0903c1145542;p=awesomized%2Flibmemcached Adding a copy of memcached to the tree. --- diff --git a/.bzrignore b/.bzrignore index a3cc73e5..efe2f1bd 100644 --- a/.bzrignore +++ b/.bzrignore @@ -145,3 +145,6 @@ unittests/unittests tests/libmemcached-1.0/testsocket example/t/memcached_light libtest/abort +memcached/.git +memcached/.gitignore +memcached/memcached diff --git a/Makefile.am b/Makefile.am index f033421a..4d94c2fc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -30,6 +30,8 @@ EXTRA_DIST= \ include libtest/include.am +libtest_libtest_la_DEPENDENCIES+= memcached/memcached + include libhashkit-1.0/include.am include libmemcached-1.0/include.am include libmemcachedprotocol-0.0/include.am @@ -49,6 +51,7 @@ include support/include.am include tests/include.am include util/include.am include win32/include.am +include memcached/include.am docs: html man diff --git a/clients/include.am b/clients/include.am index bb35bdf5..aeda9f85 100644 --- a/clients/include.am +++ b/clients/include.am @@ -11,19 +11,19 @@ CLIENTS_LDADDS+= $(LIBSASL) endif bin_PROGRAMS+= \ - clients/memcapable \ - clients/memcat \ - clients/memcp \ - clients/memdump \ - clients/memerror \ - clients/memexist \ - clients/memtouch \ - clients/memflush \ - clients/memparse \ - clients/memping \ - clients/memrm \ - clients/memslap \ - clients/memstat + clients/memcapable \ + clients/memcat \ + clients/memcp \ + clients/memdump \ + clients/memerror \ + clients/memexist \ + clients/memtouch \ + clients/memflush \ + clients/memparse \ + clients/memping \ + clients/memrm \ + clients/memslap \ + clients/memstat if BUILD_MEMASLAP if HAVE_LIBEVENT @@ -34,18 +34,18 @@ endif endif noinst_HEADERS+= \ - clients/client_options.h \ - clients/execute.h \ - clients/generator.h \ - clients/ms_atomic.h \ - clients/ms_conn.h \ - clients/ms_memslap.h \ - clients/ms_setting.h \ - clients/ms_sigsegv.h \ - clients/ms_stats.h \ - clients/ms_task.h \ - clients/ms_thread.h \ - clients/utilities.h + clients/client_options.h \ + clients/execute.h \ + clients/generator.h \ + clients/ms_atomic.h \ + clients/ms_conn.h \ + clients/ms_memslap.h \ + clients/ms_setting.h \ + clients/ms_sigsegv.h \ + clients/ms_stats.h \ + clients/ms_task.h \ + clients/ms_thread.h \ + clients/utilities.h noinst_LTLIBRARIES+= clients/libutilities.la clients_libutilities_la_SOURCES= clients/utilities.cc diff --git a/configure.ac b/configure.ac index 965ad3bf..7deac711 100644 --- a/configure.ac +++ b/configure.ac @@ -67,6 +67,7 @@ m4_include([m4/memcached.m4]) m4_include([m4/memcached_sasl.m4]) m4_include([m4/gearmand.m4]) m4_include([m4/libgearman.m4]) +m4_include([memcached/version.m4]) AM_CONDITIONAL(BUILDING_LIBMEMCACHED, true) AM_CONDITIONAL(HAVE_LIBMEMCACHED, false) @@ -134,6 +135,14 @@ AC_CHECK_HEADERS([cxxabi.h], AC_CHECK_HEADERS([sys/sysctl.h]) +AC_CHECK_HEADER(umem.h, [ + AC_DEFINE([HAVE_UMEM_H], 1, + [Define this if you have umem.h]) + build_cache=no +], [build_cache=yes]) + +AM_CONDITIONAL([BUILD_CACHE], [test "x$build_cache" = "xyes"]) + AX_COMPILER_VENDOR AC_FUNC_ALLOCA @@ -195,6 +204,7 @@ AC_CHECK_LIB([rt], [clock_gettime], ]) AC_CHECK_LIB([m], [floor]) +AC_CHECK_FUNCS([sigignore]) AC_CHECK_HEADERS([atomic.h]) AS_IF([test "x$ac_cv_header_atomic_h" = "xyes"],[ @@ -214,6 +224,20 @@ AC_DEFINE([HAVE_LIBCURL], [0], [Support for libcurl]) AC_DEFINE([HAVE_MEMCACHED_LIGHT_BINARY], [1], [Support for memcached_light]) AC_DEFINE([MEMCACHED_LIGHT_BINARY], ["example/memcached_light"], [Support for memcached_light]) +dnl Check for the requirements for running memcached with less privileges +dnl than the default privilege set. On Solaris we need setppriv and priv.h +dnl If you want to add support for other platforms you should check for +dnl your requirements, define HAVE_DROP_PRIVILEGES, and make sure you add +dnl the source file containing the implementation into memcached_SOURCE +dnl in Makefile.am +AC_CHECK_FUNCS(setppriv, [ + AC_CHECK_HEADER(priv.h, [ + AC_DEFINE([HAVE_DROP_PRIVILEGES], 1, + [Define this if you have an implementation of drop_privileges()]) + build_solaris_privs=yes + ], []) +],[]) + AC_CHECK_HEADERS_ONCE(winsock2.h poll.h sys/wait.h fnmatch.h) AM_CONDITIONAL(BUILD_POLL, test "x$ac_cv_header_poll_h" = "xno") AM_CONDITIONAL(BUILD_WIN32_WRAPPERS, test "x$ac_cv_header_winsock2_h" = "xyes") @@ -233,6 +257,32 @@ AX_CHECK_LIBRARY([LIBEVENT], [event.h], [event], ]) AM_CONDITIONAL(HAVE_LIBEVENT, test "x${ax_cv_have_LIBEVENT}" = "xyes") +dnl Check if we're a little-endian or a big-endian system, needed by hash code +AC_DEFUN([AC_C_ENDIAN], +[AC_CACHE_CHECK(for endianness, ac_cv_c_endian, +[ + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([], [dnl + long val = 1; + char *c = (char *) &val; + exit(*c == 1); + ]) + ],[ + ac_cv_c_endian=big + ],[ + ac_cv_c_endian=little + ]) +]) +if test $ac_cv_c_endian = big; then + AC_DEFINE(ENDIAN_BIG, 1, [machine is bigendian]) +fi +if test $ac_cv_c_endian = little; then + AC_DEFINE(ENDIAN_LITTLE, 1, [machine is littleendian]) +fi +]) + +AC_C_ENDIAN + AC_CONFIG_FILES([ Makefile docs/conf.py diff --git a/libtest/include.am b/libtest/include.am index f4970642..c5f0b5cc 100644 --- a/libtest/include.am +++ b/libtest/include.am @@ -124,15 +124,21 @@ libtest_libtest_la_SOURCES= \ libtest/vchar.cc libtest_libtest_la_CXXFLAGS= +libtest_libtest_la_DEPENDENCIES= +libtest_libtest_la_LIBADD= + libtest_libtest_la_CXXFLAGS+= ${NO_CONVERSION} libtest_libtest_la_CXXFLAGS+= -DBUILDING_LIBTEST libtest_libtest_la_CXXFLAGS+= $(PTHREAD_CFLAGS) libtest_libtest_la_CXXFLAGS+= -DLIBTEST_TEMP="\"tmp_chroot\"" libtest_libtest_la_CXXFLAGS+= $(CURL_CFLAGS) -libtest_libtest_la_LIBADD= + libtest_libtest_la_LIBADD+= $(PTHREAD_LIBS) libtest_libtest_la_LIBADD+= $(CURL_LIBS) -libtest_libtest_la_DEPENDENCIES= libtest_tmp_dir + +libtest_libtest_la_DEPENDENCIES+= libtest_tmp_dir +libtest_libtest_la_DEPENDENCIES+=libtest/abort +libtest_libtest_la_DEPENDENCIES+=libtest/wait # Declare unittest so that we can append to it libtest_unittest_CXXFLAGS= @@ -203,7 +209,7 @@ tmp_chroot/var/run: tmp_chroot/var @$(mkdir_p) tmp_chroot/var/run -libtest_unittest_DEPENDENCIES+= libtest/libtest.la libtest_tmp_dir libtest/abort libtest/wait +libtest_unittest_DEPENDENCIES+= libtest/libtest.la libtest_unittest_LDADD+= libtest/libtest.la libtest_unittest_SOURCES= libtest/unittest.cc check_PROGRAMS+= libtest/unittest diff --git a/libtest/memcached.cc b/libtest/memcached.cc index 5327e9b8..ce523a7a 100644 --- a/libtest/memcached.cc +++ b/libtest/memcached.cc @@ -56,7 +56,11 @@ class Memcached : public libtest::Server std::string _password; public: - Memcached(const std::string& host_arg, const in_port_t port_arg, const bool is_socket_arg, const std::string& username_arg, const std::string& password_arg) : + Memcached(const std::string& host_arg, + const in_port_t port_arg, + const bool is_socket_arg, + const std::string& username_arg, + const std::string& password_arg) : libtest::Server(host_arg, port_arg, is_socket_arg), _username(username_arg), _password(password_arg) @@ -162,6 +166,16 @@ public: return MEMCACHED_BINARY; } + bool is_libtool() + { + if (MEMCACHED_BINARY and strcmp(MEMCACHED_BINARY, "memcached/memcached") == 0) + { + return true; + } + + return false; + } + virtual void pid_file_option(Application& app, const std::string& arg) { if (arg.empty() == false) @@ -205,11 +219,6 @@ public: } } - bool is_libtool() - { - return false; - } - bool broken_socket_cleanup() { return true; diff --git a/libtest/server.cc b/libtest/server.cc index 3e891bba..7591e8bf 100644 --- a/libtest/server.cc +++ b/libtest/server.cc @@ -181,7 +181,7 @@ bool Server::start() return false; } - if (is_helgrind() or is_valgrind()) + if (is_valgrind()) { dream(5, 50000); } diff --git a/m4/bottom.m4 b/m4/bottom.m4 index 289c2994..798ff2c4 100644 --- a/m4/bottom.m4 +++ b/m4/bottom.m4 @@ -23,8 +23,20 @@ AH_BOTTOM([ #if !defined(HAVE_ULONG) && !defined(__USE_MISC) # define HAVE_ULONG 1 typedef unsigned long int ulong; +#endif + + +#if defined(RMEMCACHED_PACKAGE) +#undef VERSION +#define VERSION VERSION_NUMBER #endif +#if defined(RMEMCACHED_PACKAGE) +#undef PACKAGE +#define PACKAGE RMEMCACHED_PACKAGE +#endif + + ]) ])dnl CONFIG_EXTRA diff --git a/m4/memcached.m4 b/m4/memcached.m4 index 7d648bf2..c33f1da2 100644 --- a/m4/memcached.m4 +++ b/m4/memcached.m4 @@ -1,10 +1,10 @@ -AX_WITH_PROG(MEMCACHED_BINARY,memcached) +AX_WITH_PROG(MEMCACHED_BINARY, [memcached]) AS_IF([test -f "$ac_cv_path_MEMCACHED_BINARY"], [ AC_DEFINE([HAVE_MEMCACHED_BINARY], [1], [If Memcached binary is available]) AC_DEFINE_UNQUOTED([MEMCACHED_BINARY], "$ac_cv_path_MEMCACHED_BINARY", [Name of the memcached binary used in make test]) ], [ - AC_DEFINE([HAVE_MEMCACHED_BINARY], [0], [If Memcached binary is available]) - AC_DEFINE([MEMCACHED_BINARY], [0], [Name of the memcached binary used in make test]) + AC_DEFINE([HAVE_MEMCACHED_BINARY], [1], [If Memcached binary is available]) + AC_DEFINE([MEMCACHED_BINARY], ["memcached/memcached"], [Name of the memcached binary used in make test]) ]) diff --git a/memcached/.shipit b/memcached/.shipit new file mode 100644 index 00000000..9b972676 --- /dev/null +++ b/memcached/.shipit @@ -0,0 +1,4 @@ +steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, AddToSVNDir + +AddToSVNDir.dir = ../website/dist +svn.tagpattern = %v diff --git a/memcached/AUTHORS b/memcached/AUTHORS new file mode 100644 index 00000000..fc95232f --- /dev/null +++ b/memcached/AUTHORS @@ -0,0 +1,2 @@ +Anatoly Vorobey +Brad Fitzpatrick diff --git a/memcached/BUILD b/memcached/BUILD new file mode 100644 index 00000000..d6ca4c49 --- /dev/null +++ b/memcached/BUILD @@ -0,0 +1,37 @@ +Ideally, you want to make a static binary, otherwise the dynamic +linker pollutes your address space with shared libs right in the +middle. (NOTE: actually, this shouldn't matter so much anymore, now +that we only allocate huge, fixed-size slabs) + +Make sure your libevent has epoll (Linux) or kqueue (BSD) support. +Using poll or select only is slow, and works for testing, but +shouldn't be used for high-traffic memcache installations. + +To build libevent with epoll on Linux, you need two things. First, +you need /usr/include/sys/epoll.h . To get it, you can install the +userspace epoll library, epoll-lib. The link to the latest version +is buried inside +http://www.xmailserver.org/linux-patches/nio-improve.html ; currently +it's http://www.xmailserver.org/linux-patches/epoll-lib-0.9.tar.gz . +If you're having any trouble building/installing it, you can just copy +epoll.h from that tarball to /usr/include/sys as that's the only thing +from there that libevent really needs. + +Secondly, you need to declare syscall numbers of epoll syscalls, so +libevent can use them. Put these declarations somewhere +inside : + +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 + +After this you should be able to build libevent with epoll support. +Once you build/install libevent, you don't need to +compile memcache or link it against libevent. Don't forget that for epoll +support to actually work at runtime you need to use a kernel with epoll +support patch applied, as explained in the README file. + +BSD users are luckier, and will get kqueue support by default. + + + diff --git a/memcached/CONTRIBUTING b/memcached/CONTRIBUTING new file mode 100644 index 00000000..e93b005f --- /dev/null +++ b/memcached/CONTRIBUTING @@ -0,0 +1,3 @@ +Want to contribute? Up-to-date pointers should be at: + + http://contributing.appspot.com/memcached diff --git a/memcached/COPYING b/memcached/COPYING new file mode 100644 index 00000000..4746b00c --- /dev/null +++ b/memcached/COPYING @@ -0,0 +1,30 @@ +Copyright (c) 2003, Danga Interactive, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + + * Neither the name of the Danga Interactive nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/memcached/ChangeLog b/memcached/ChangeLog new file mode 100644 index 00000000..db7800d6 --- /dev/null +++ b/memcached/ChangeLog @@ -0,0 +1,621 @@ +2010-10-11 + * ChangeLog is no longer being updated. + See http://code.google.com/p/memcached/wiki/ReleaseNotes + +2009-04-10 + + * *critical bugfix*. In 1.2.7 under multithreaded mode, memcached + would never restart accepting connections after hitting the + maximum connection limit. + +2009-04-02 + + * Fix: Rewrote stat buffer handling (Trond) + +2009-03-31 + + * Feature: -b option for configuring backlog + + * Feature: stats settings + + * Fix: binary stat alignment bug (bug26) + + * Fix: buffer overflow in stats (bug27) + + * Fix: recycle memory more aggressively (bug14) + + * Fix: incr validation (bug31) + + * Fix: 64-bit incr/decr delta problem (bug21) + + * Fix: Ascii UDP set (bug36) + + * Fix: stats slabs' used chunks (bug29) + + * Fix: stats reset resetting more (bug22) + + * Misc: More tests, documentation, cleanliness (godliness) + + * Stable merge (stats, debuggability, listen bugs) + +2009-03-11 + + * Protocol: Binary complete (Dustin, Trond, Toru, etc...) + + * Performance: improvements from facebook (Dormando) + + * Performance: per-thread stats (Trond) + + * Performance: Hash expansion in its own thread (Trond) + + * Stats: Lots of new stats (Dustin) + + * Bug fixes (various) + + See the 1.3.2 release notes or revision control for more detail. + +2008-09-06 + * Display error status on listen failures (Dormando) + + * Remove managed instance code. Incomplete/etc. (Dormando) + + * Handle broken IPV6 stacks better (Brian Aker) + + * Generate warnings on setsockopt() failures (Brian Aker) + + * Fix some indentation issues (Brian Aker) + + * UDP/TCP can be disabled by setting their port to zero (Brian Aker) + + * Zero out libevent thread structures before use (Ricky Zhou) + + * New stat: Last accessed time for last evicted item per slab class. + (Dormando) + + * Use a dedicated socket accept thread (Facebook) + + * Add -R option. Limit the number of requests processed by a connection + at once. Prevents starving other threads if bulk loading. (Facebook) + +2008-07-29 [Version 1.2.6 released] + +2008-07-24 [Version 1.2.6-rc1 released] + + * Add support for newer automake (Facebook) + + * DTrace support for Solaris/etc (Trond Norbye) + + * LRU tests (Steve Yen) + + * Handle negative length items properly (Dormando) + + * Don't leave stale data after failed set attempts (Dormando) + + * Fix refcount leaks, which would result in OOM's on all sets + (Dormando) + + * Fix buffer overruns (Dustin Sallings, Tomash Brechko) + + * Fix memory corruption with CAS (Dustin Sallings) + +2008-06-11 + + * Fix -k to work with -d. (reported by Gary Zhu) + +2008-03-02 [Version 1.2.5-rc1 released] + + * Add per-item-class tracking of evictions and OOM errors (dormando) + + * Optimize item_alloc() a little (dormando) + + * Give 'SERVER_ERROR out of memory' errors more context (dormando) + + * Enable usage of large memory pages under solaris + (Trond.Norbye@Sun.COM) + + * Enable UDP by default, clean up server socket code + (brian@tangent.org) + + * 'noreply' support (Tomash Brechko) + + * IPv6 support, and IPv6 multi-interface support (brian@tangent.org) + + * Add compiler options for Sun Studio compilers with --enable-threads + (Trond.Norbye@Sun.COM) + + * Add --enable-64bit for mulitarget platforms (Trond.Norbye@Sun.COM) + + * Use gettimeofday(2) instead of time(2). + + * Make -k option work (Tomash Brechko) + + * Fix chunk slab alignment (Trond.Norbye@Sun.COM) + +2007-12-06 [Version 1.2.4 released] + +2007-12-05 + + * Fix compilation on panther (JS and Dormando) + + * More CAS tests (Chris Goffinet) + + * Final fixes for all 1.2.4 features are in, -rc2 sent out. + +2007-11-19 [Version 1.2.4-rc1 released] + +2007-11-19 Dormando + + * Patch series from Tomash Brechko : + Minor fixes and optimisations. + + * Patches from Chris, Dustin, and Dormando to fix CAS. + + * Prepping for 1.2.4 release. + +2007-11-13 Dormando + + * Adjusted patch from js : Compile on OS X Panther + and earlier. + +2007-11-12 Steven Grimm + + * Patch from Tomash Brechko : Always send + "SERVER_ERROR out of memory" when memory exhausted. + +2007-10-15 Paul Lindner + + * Patch from David Bremner that implements + a new option "-a" which takes an octal permission mask + (like chmod) sets the permissions on the unix domain socket + (specified by "-s"). + +2007-10-03 Paul Lindner + * Incorporate "cas" operation developed by Dustin + Sallings This change allows you + to do atomic changes to an existing key. + + * Fix for stats.evictions not incrementing + when exptime == 0 items are kicked off the cache. + from Jean-Francois BUSTARRET . + + * Fix for do_item_cachedump() which was returning + an incorrect timestamp. + + * Switch to unsigned 64-bit increment/decrement counters + from Evan Miller and Dustin Sallings. + + * Add append command support written by Filipe Laborde. + Thread safe version plus prepend command from Maxim Dounin + + + * The memcached-tool script can now display stats. Patch + provided by Dan Christian + + * Fix for Unix Domain sockets on FreeBSD + FreeBSD's sendmsg() requires msg_name in msghdr structure + to be NULL if not used, setting msg_namelen to 0 isn't enough. + Patch from Maxim Dounin + +2007-08-21 Paul Lindner + * Incorporate incrememnt patch from Evan Miller + to define increment overflow + behavior. + +2007-08-07 Leon Brocard + * Bring the memcached.1 manpage up to date + +2007-08-06 Paul Lindner + * Fix crash when using -P and -d flags on x86_64 + with latest libevent release. + +2007-07-08 Steven Grimm + + * Item stats commands weren't thread-safe; wrap them with locks + when compiled in multithreaded mode. + * The "stats items" command now works again; it broke with the + introduction of the powers-of-N chunk size change. + +2007-07-06 [Version 1.2.3 released] + +2007-06-19 Paul Lindner + + * Solaris portability fixes from Trond Norbye + +2007-05-29 Paul Lindner + + * Properly document evictions statistic value + +2007-05-10 Paul Lindner + + * Flesh out tests for unix domain sockets and binary data. + * Update rpm spec file to run tests + +2007-05-07 Paul Lindner + + * Fix compilation bug on freebsd 6.x (and maybe others) + * Update RPM spec file per redhat bugzilla #238994 + * Move unistd.h to memcached.h to get rid of warnings + * Add string.h to thread.c to get correctly prototyped strerror() + +2007-05-04 Paul Lindner + + * Add fedora/redhat style init script and RPM spec file + +2007-05-12 [Version 1.2.2 released] + +2007-04-16 Steven Grimm + + * Command tokenizer performance and cleanliness improvement. + Patch contributed by Paolo Borelli . + +2007-04-16 Paul Lindner + + * Add notes to README about MacOS, libevent and kqueue. + + * Windows Patch integration -- part 1, warnings elimination. + +2007-04-12 Paul Lindner + + * Allow changes to the verbosity level of the server with a new + "verbosity" command and some compiler cleanups. + Patch contributed by Paolo Borelli . + +2007-04-08 Paul Lindner + + * Add cleanup patch from "Tim Yardley" to + clean up source spacing issues, fix -Wall warnings, add some + null checks, adds asserts at the top of each function for any + use of conn *c without checking to see if c is NULL first. + + * Also adjust clean-whitespace.pl to clean *.ac files. Add + script to test-suite to test for tabs. + +2007-04-04 Paul Lindner + + * Add clarification of flush_all in the protocol docs + from Elizabeth Mattijsen + +2007-03-31 Paul Lindner + + * Add patch from Eli Bingham to + re-enable the -n switch to memcached. + +2007-03-20 Paul Lindner + * Add patch to collect eviction statistics from + Jean-Francois BUSTARRET . + + * Updated docs, added new test cases for t/stats.t + +2007-03-18 Paul Lindner + + * Add more test cases using larger buffer sizes up to and greater + than 1MB. + + * Remove unused parameter to item_size_ok() + + * Use a single printf() in usage() + + * Add a failing test for conforming with maximum connections. + +2007-03-17 + * crash fix from Thomas van Gulick in + conn_shrink(), passing &ptr, instead of ptr to realloc(). + +2007-03-05 Paul Lindner + * Fix a number of places where (s)printf calls were using unsigned + or signed formats that did not match their arguments. + + * Add support for stdbool.h and stdint.h to use the bool and + uint8_t types. + + * Major refactoring - move API calls for assoc/items/slabs to + their own individual header files. Add apropriate const and + static declarations as appropriate. + + * Avoid type-punning. Do a more efficient realloc inside the + conn_shrink routine. + + * Fix overflow bug where uninitialized access to slabclass caused + size-0 mallocs during slab preallocation. + + * Use EXIT_SUCCESS/EXIT_FAILURE constants. + + * Convert some sprintf calls to snprintf to protect against + buffer overflows. + + * Explicitly compare against NULL or zero in many places. + +2007-03-05 + * Steven Grimm : Per-object-type stats collection + support. Specify the object type delimiter with the -D command line + option. Turn stats gathering on and off with "stats detail on" and + "stats detail off". Dump the per-object-type details with + "stats detail dump". + +2007-03-01 + * Steven Grimm : Fix an off-by-one error in the + multithreaded version's message passing code. + +2006-12-23 + * fix expirations of items set with absolute expiration times in + the past, before the server's start time. bug was introduced in + 1.2.0 with rel_time_t. Thanks to Adam Dixon + for the bug report and test case! + +2006-11-26 + * Steven Grimm : Performance improvements: + + Dynamic sizing of hashtable to reduce collisions on very large + caches and conserve memory on small caches. + + Only reposition items in the LRU queue once a minute, to reduce + overhead of accessing extremely frequently-used items. + + Stop listening for new connections until an existing one closes + if we run out of available file descriptors. + + Command parser refactoring: Add a single-pass tokenizer to cut + down on string scanning. Split the command processing into + separate functions for easier profiling and better readability. + Pass key lengths along with the keys in all API functions that + need keys, to avoid needing to call strlen() repeatedly. + +2006-11-25 + * Steve Peters : OpenBSD has a malloc.h, + but warns to use stdlib.h instead + +2006-11-22 + * Steven Grimm : Add support for multithreaded + execution. Run configure with "--enable-threads" to enable. See + doc/threads.txt for details. + +2006-11-13 + * Iain Wade : Fix for UDP responses on non-"get" + commands. + +2006-10-15 + * Steven Grimm : Dynamic sizing of hashtable to + reduce collisions on very large caches and conserve memory on + small caches. + +2006-10-13 + * Steven Grimm : New faster hash function. + +2006-09-20 + + * don't listen on UDP by default; more clear message when UDP port in use + +2006-09-09 + * release 1.2.0 (along with 1.1.13, which is the more tested branch) + + nobody has run 1.2.0 in production, to my knowledge. facebook has run + their pre-merge-with-trunk version, but bugs were discovered (and fixed) + after the merge. there might be more. you've been warned. :) + +2006-09-04 + * improved autoconf libevent detection, from the Tor project. + +2006-09-03 + * test suite and lot of expiration, delete, flush_all, etc corner + case bugs fixed (Brad Fitzpatrick) + +2006-09-02 + * Nathan Neulinger : fix breakage in expiration code + causing expiration times to not be processed correctly. + +2006-08-21 + * Nathan Neulinger : fix incompatabilities with + unix domain socket support and the UDP code and clean up stale + sockets + +2006-08-20 + * Nathan Neulinger : unix domain socket support + +2006-05-03 + * Steven Grimm : big bunch of changes: + big CPU reduction work, UDP-based interface, increased memory + efficiency. (intertwined patch, committed all together) + + or see svn commit logs + +2006-04-30 + * River Tarnell: autoconf work for Solaris 10. Brad: + merge and verify it works on Nexenta. + +2006-03-04 + * avva: bucket/generation patch (old, but Brad's just finally + committing it) + +2006-01-01 + * Brad Fitzpatrick : allocate 1 slab per class + on start-up, to avoid confusing users with out-of-memory errors + later. this is 18 MB of allocation on start, unless max memory + allowed with -m is lower, in which case only the smaller slab + classes are allocated. + +2005-08-09 + * Elizabeth Mattijsen : needed a way to flush all + memcached backend servers, but not at exactly the same time (to + reduce load peaks), I've added some simple functionality to the + memcached protocol in the "flush_all" command that allows you to + specify a time at which the flush will actually occur (instead of + always at the moment the "flush_all" command is received). + +2005-05-25 + * patch from Peter van Dijk to make + stderr unbuffered, for running under daemontools + +2005-04-04 + * patch from Don MacAskill 'flush_all' doesn't + seem to work properly. Basically, if you try to add a key which + is present, but expired, the store fails but the old key is no + longer expired. + + * release 1.1.12 + +2005-01-14 + * Date: Thu, 18 Nov 2004 15:25:59 -0600 + From: David Phillips + Here is a patch to configure.ac and Makefile.am to put the man page in + the correct location. Trying to install the man page from a + subdirectory results in the subdirectory being used in the install + path (it tries to install to doc/memcached.1). This is the correct + thing to do: + + - create a Makefile.am in the doc directory that installs the man page + with man_MANS + - modify Makefile.am in the base directory to reference the doc + directory using SUBDIRS + - modify the AC_CONFIG_FILES macro in configure.ac to output the + Makefile in doc + + +2005-01-14 + * pidfile saving support from Lisa Seelye , sent + Jan 13, 2005 + +2005-01-14 + * don't delete libevent events that haven't been added (the deltimer) + patch from Ted Schundler + +2004-12-10 + * document -M and -r in manpage (Doug Porter ) + +2004-07-22 + * fix buffer overflow in items.c with 250 byte keys along with + other info on the same line going into a 256 byte char[]. + thanks to Andrei Nigmatulin + +2004-06-15 + * immediate deletes weren't being unlinked a few seconds, + preventing "add" commands to the same key in that time period. + thanks to Michael Alan Dorman for the + bug report and demo script. + +2004-04-30 + * released 1.1.11 + +2004-04-24 + * Avva: Add a new command line option: -r , to maximize core file + limit. + +2004-03-31 + * Avva: Use getrlimit and setrlimit to set limits for number of + simultaneously open file descriptors. Get the current limits and + try to raise them if they're not enough for the specified (or the + default) setting of max connections. + +2004-02-24 + * Adds a '-M' flag to turn off tossing items from the cache. + (Jason Titus ) + +2004-02-19 (Evan) + * Install manpage on "make install", etc. + +2003-12-30 (Brad) + * remove static build stuff. interferes with PAM setuid stuff + and was only included as a possible fix with the old memory + allocator. really shouldn't make a difference. + * add Jay Bonci's Debian scripts and manpage + * release version 1.1.10 + +2003-12-01 (Avva) + * New command: flush_all, causes all existing items to + be invalidated immediately (without deleting them from + memory, merely causing memcached to no longer return them). +2003-10-23 + * Shift init code around to fix daemon mode on FreeBSD, + * and drop root only after creating the server socket (to + * allow the use of privileged ports) + * version 1.1.10pre + +2003-10-09 + * BSD compile fixes from Ryan T. Dean + * version 1.1.9 + +2003-09-29 + * ignore SIGPIPE at start instead of crashing in rare cases it + comes up. no other code had to be modified, since everything + else is already dead-connection-aware. (avva) + +2003-09-09 (Avva, Lisa Marie Seelye ) + * setuid support + +2003-09-05 (Avva) + * accept all new connections in the same event (so we work with ET epoll) + * mark all items as clsid=0 after slab page reassignment to please future + asserts (on the road to making slab page reassignment work fully) + +2003-08-12 (Brad Fitzpatrick) + * use TCP_CORK on Linux or TCP_PUSH on BSD + * only use TCP_NODELAY when we don't have alternatives + +2003-08-10 + * disable Nagel's Algorithm (TCP_NODELAY) for better performance (avva) + +2003-08-10 + * support multiple levels of verbosity (-vv) + +2003-08-10 (Evan Martin) + * Makefile.am: debug, optimization, and static flags are controlled + by the configure script. + * configure.ac: + - allow specifying libevent directory with --with-libevent=DIR + - check for malloc.h (unavailable on BSDs) + - check for socklen_t (unavailable on OSX) + * assoc.c, items.c, slabs.c: Remove some unused headers. + * memcached.c: allow for nonexistence of malloc.h; #define a POSIX + macro to import mlockall flags. + +2003-07-29 + * version 1.1.7 + * big bug fix: item exptime 0 meant expire immediately, not never + * version 1.1.8 + +2003-07-22 + * make 'delete' take second arg, of time to refuse new add/replace + * set/add/replace/delete can all take abs or delta time (delta can't + be larger than a month) + +2003-07-21 + * added doc/protocol.txt + +2003-07-01 + * report CPU usage in stats + +2003-06-30 + * version 1.1.6 + * fix a number of obscure bugs + * more stats reporting + +2003-06-10 + * removing use of Judy; use a hash. (judy caused memory fragmentation) + * shrink some structures + * security improvements + * version 1.1.0 + +2003-06-18 + * changing maxsize back to an unsigned int + +2003-06-16 + * adding PHP support + * added CONTRIBUTORS file + * version 1.0.4 + +2003-06-15 + * forgot to distribute website/api (still learning auto*) + * version 1.0.3 + +2003-06-15 + * update to version 1.0.2 + * autoconf/automake fixes for older versions + * make stats report version number + * change license from GPL to BSD + +Fri, 13 Jun 2003 10:05:51 -0700 Evan Martin + + * configure.ac, autogen.sh, Makefile.am: Use autotools. + * items.c, memcached.c: #include for time(), + printf time_t as %lu (is this correct?), + minor warnings fixes. + diff --git a/memcached/Doxyfile b/memcached/Doxyfile new file mode 100644 index 00000000..fc19d3d6 --- /dev/null +++ b/memcached/Doxyfile @@ -0,0 +1,1510 @@ +# Doxyfile 1.5.8 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = memcached + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc/doxy + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, +# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, +# Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, +# Spanish, Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it parses. +# With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this tag. +# The format is ext=language, where ext is a file extension, and language is one of +# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, +# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat +# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by +# doxygen. The layout file controls the global structure of the generated output files +# in an output format independent way. The create the layout file that represents +# doxygen's defaults, run doxygen with the -l option. You can optionally specify a +# file name after the option, if omitted DoxygenLayout.xml will be used as the name +# of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = . + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = *.h *.c + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = testapp.c + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER +# are set, an additional index file will be generated that can be used as input for +# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated +# HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. +# For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's +# filter section matches. +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to FRAME, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. Other possible values +# for this tag are: HIERARCHIES, which will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list; +# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which +# disables this behavior completely. For backwards compatibility with previous +# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE +# respectively. + +GENERATE_TREEVIEW = NONE + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = YES + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Options related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/memcached/HACKING b/memcached/HACKING new file mode 100644 index 00000000..60bfa9f2 --- /dev/null +++ b/memcached/HACKING @@ -0,0 +1,52 @@ +* Hacking Memcached + +* Prerequisites + + - autoconf + - automake + - autotools + - libevent + +* Getting Started + +After checking out a git repository, you must first run autogen.sh +once in order to create the configure script. + +Next, run the configure script and start doing builds. + +* Setting up Git + +Though not required, there are a couple of things you can add to git +to help development. + +** Pre Commit Hook + +The pre-commit hook can be used to ensure that your tree passes tests +before allowing a commit. To do so, add the following to +.git/hooks/pre-commit (which must be executable): + + #!/bin/sh + make test + +** Post Commit Hook + +Because the version number changes on each commit, it's good to use a +post commit hook to update the version number after each commit so as +to keep the reporting accurate. To do so, add the following to +.git/hooks/post-commit (which must be executable) + + #!/bin/sh + ./version.sh + +** Running memcached in gdb for tests. + +By default `make test` will spawn a memcached daemon for each test. +This doesn't let you easily drop into gdb or run verbosely. + +If you export the environment variable +T_MEMD_USE_DAEMON="127.0.0.1:11211" the tests will use an existing +daemon at that address. + +* Sending patches + +See current instructions at http://contributing.appspot.com/memcached diff --git a/memcached/LICENSE b/memcached/LICENSE new file mode 100644 index 00000000..4746b00c --- /dev/null +++ b/memcached/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2003, Danga Interactive, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + + * Neither the name of the Danga Interactive nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/memcached/Makefile.am b/memcached/Makefile.am new file mode 100644 index 00000000..3cc59520 --- /dev/null +++ b/memcached/Makefile.am @@ -0,0 +1,105 @@ +bin_PROGRAMS = memcached +pkginclude_HEADERS = protocol_binary.h +noinst_PROGRAMS = memcached-debug sizes testapp timedrun + +BUILT_SOURCES= + +testapp_SOURCES = testapp.c util.c util.h + +timedrun_SOURCES = timedrun.c + +memcached_SOURCES = memcached.c memcached.h \ + hash.c hash.h \ + slabs.c slabs.h \ + items.c items.h \ + assoc.c assoc.h \ + thread.c daemon.c \ + stats.c stats.h \ + util.c util.h \ + trace.h cache.h sasl_defs.h + +if BUILD_CACHE +memcached_SOURCES += cache.c +testapp_SOURCES += cache.c +endif + +if BUILD_SOLARIS_PRIVS +memcached_SOURCES += solaris_priv.c +endif + +if ENABLE_SASL +memcached_SOURCES += sasl_defs.c +endif + +memcached_debug_SOURCES = $(memcached_SOURCES) +memcached_CPPFLAGS = -DNDEBUG +memcached_debug_LDADD = @PROFILER_LDFLAGS@ +memcached_debug_CFLAGS = @PROFILER_FLAGS@ + +memcached_LDADD = +memcached_DEPENDENCIES = +memcached_debug_DEPENDENCIES = +CLEANFILES= + +if BUILD_DTRACE +BUILT_SOURCES += memcached_dtrace.h +CLEANFILES += memcached_dtrace.h +endif + +if DTRACE_INSTRUMENT_OBJ +memcached_LDADD += memcached_dtrace.o +memcached_DEPENDENCIES += memcached_dtrace.o +memcached_debug_LDADD += memcached_debug_dtrace.o +memcached_debug_DEPENDENCIES += memcached_debug_dtrace.o +CLEANFILES += memcached_dtrace.o memcached_debug_dtrace.o +endif + +memcached_dtrace.h: memcached_dtrace.d + ${DTRACE} -h -s memcached_dtrace.d + sed -e 's,void \*,const void \*,g' memcached_dtrace.h | \ + sed -e 's,char \*,const char \*,g' | tr '\t' ' ' > mmc_dtrace.tmp + mv mmc_dtrace.tmp memcached_dtrace.h + +memcached_dtrace.o: $(memcached_OBJECTS) + $(DTRACE) $(DTRACEFLAGS) -G -o memcached_dtrace.o -s ${srcdir}/memcached_dtrace.d $(memcached_OBJECTS) + +memcached_debug_dtrace.o: $(memcached_debug_OBJECTS) + $(DTRACE) $(DTRACEFLAGS) -G -o memcached_debug_dtrace.o -s ${srcdir}/memcached_dtrace.d $(memcached_debug_OBJECTS) + + +SUBDIRS = doc +DIST_DIRS = scripts +EXTRA_DIST = doc scripts t memcached.spec memcached_dtrace.d version.m4 + +MOSTLYCLEANFILES = *.gcov *.gcno *.gcda *.tcov + +test: memcached-debug sizes testapp + $(srcdir)/sizes + $(srcdir)/testapp + prove $(srcdir)/t + @if test `basename $(PROFILER)` = "gcov"; then \ + for file in memcached_debug-*.gc??; do \ + mv -f $$file `echo $$file | sed 's/memcached_debug-//'`; \ + done && \ + for file in *.gcda; do \ + srcfile=`echo $$file | sed 's/.gcda/.c/'`; \ + if test -n "`echo $(memcached_debug_SOURCES) | grep $$srcfile`"; then \ + echo `$(PROFILER) $$srcfile` | sed 's/'$$srcfile':.*//'; \ + fi \ + done \ + elif test `basename $(PROFILER)` = "tcov"; then \ + files=`grep SRCFILE memcached-debug.profile/tcovd | sed 's/SRCFILE://' | sort | uniq` && \ + $(PROFILER) -x memcached-debug.profile $$files 2>&1; \ + for file in *.tcov; do \ + srcfile=`echo $$file | sed 's/.tcov//'`; \ + if test -n "`echo $(memcached_debug_SOURCES) | grep $$srcfile`"; then \ + echo $$srcfile : `grep 'Percent of the file executed' $$file`; \ + fi \ + done \ + else :; fi + +docs: + (cat Doxyfile ; echo "PROJECT_NUMBER=`cat version.num`") | doxygen - + +dist-hook: + rm -f $(distdir)/*/*~ $(distdir)/t/lib/*~ $(distdir)/*~ diff --git a/memcached/NEWS b/memcached/NEWS new file mode 100644 index 00000000..78b1eca8 --- /dev/null +++ b/memcached/NEWS @@ -0,0 +1 @@ +http://www.danga.com/memcached/news.bml diff --git a/memcached/README b/memcached/README new file mode 100644 index 00000000..f20dd8c1 --- /dev/null +++ b/memcached/README @@ -0,0 +1,28 @@ +Dependencies: + + -- libevent, http://www.monkey.org/~provos/libevent/ (libevent-dev) + +If using Linux, you need a kernel with epoll. Sure, libevent will +work with normal select, but it sucks. + +epoll isn't in Linux 2.4, but there's a backport at: + + http://www.xmailserver.org/linux-patches/nio-improve.html + +You want the epoll-lt patch (level-triggered). + +If you're using MacOS, you'll want libevent 1.1 or higher to deal with +a kqueue bug. + +Also, be warned that the -k (mlockall) option to memcached might be +dangerous when using a large cache. Just make sure the memcached machines +don't swap. memcached does non-blocking network I/O, but not disk. (it +should never go to disk, or you've lost the whole point of it) + +The memcached website is at: + + http://www.memcached.org + +Want to contribute? Up-to-date pointers should be at: + + http://contributing.appspot.com/memcached diff --git a/memcached/assoc.c b/memcached/assoc.c new file mode 100644 index 00000000..f14d85d8 --- /dev/null +++ b/memcached/assoc.c @@ -0,0 +1,280 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Hash table + * + * The hash function used here is by Bob Jenkins, 1996: + * + * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. + * You may use this code any way you wish, private, educational, + * or commercial. It's free." + * + * The rest of the file is licensed under the BSD license. See LICENSE. + */ + +#include "memcached.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static pthread_cond_t maintenance_cond = PTHREAD_COND_INITIALIZER; + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +typedef unsigned long int ub4; /* unsigned 4-byte quantities */ +typedef unsigned char ub1; /* unsigned 1-byte quantities */ + +/* how many powers of 2's worth of buckets we use */ +static unsigned int hashpower = HASHPOWER_DEFAULT; + +#define hashsize(n) ((ub4)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + +/* Main hash table. This is where we look except during expansion. */ +static item** primary_hashtable = 0; + +/* + * Previous hash table. During expansion, we look here for keys that haven't + * been moved over to the primary yet. + */ +static item** old_hashtable = 0; + +/* Number of items in the hash table. */ +static unsigned int hash_items = 0; + +/* Flag: Are we in the middle of expanding now? */ +static bool expanding = false; + +/* + * During expansion we migrate values with bucket granularity; this is how + * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1. + */ +static unsigned int expand_bucket = 0; + +void assoc_init(const int hashtable_init) { + if (hashtable_init) { + hashpower = hashtable_init; + } + primary_hashtable = calloc(hashsize(hashpower), sizeof(void *)); + if (! primary_hashtable) { + fprintf(stderr, "Failed to init hashtable.\n"); + exit(EXIT_FAILURE); + } + STATS_LOCK(); + stats.hash_power_level = hashpower; + stats.hash_bytes = hashsize(hashpower) * sizeof(void *); + STATS_UNLOCK(); +} + +item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) { + item *it; + unsigned int oldbucket; + + if (expanding && + (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) + { + it = old_hashtable[oldbucket]; + } else { + it = primary_hashtable[hv & hashmask(hashpower)]; + } + + item *ret = NULL; + int depth = 0; + while (it) { + if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) { + ret = it; + break; + } + it = it->h_next; + ++depth; + } + MEMCACHED_ASSOC_FIND(key, nkey, depth); + return ret; +} + +/* returns the address of the item pointer before the key. if *item == 0, + the item wasn't found */ + +static item** _hashitem_before (const char *key, const size_t nkey, const uint32_t hv) { + item **pos; + unsigned int oldbucket; + + if (expanding && + (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) + { + pos = &old_hashtable[oldbucket]; + } else { + pos = &primary_hashtable[hv & hashmask(hashpower)]; + } + + while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) { + pos = &(*pos)->h_next; + } + return pos; +} + +/* grows the hashtable to the next power of 2. */ +static void assoc_expand(void) { + old_hashtable = primary_hashtable; + + primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *)); + if (primary_hashtable) { + if (settings.verbose > 1) + fprintf(stderr, "Hash table expansion starting\n"); + hashpower++; + expanding = true; + expand_bucket = 0; + STATS_LOCK(); + stats.hash_power_level = hashpower; + stats.hash_bytes += hashsize(hashpower) * sizeof(void *); + stats.hash_is_expanding = 1; + STATS_UNLOCK(); + pthread_cond_signal(&maintenance_cond); + } else { + primary_hashtable = old_hashtable; + /* Bad news, but we can keep running. */ + } +} + +/* Note: this isn't an assoc_update. The key must not already exist to call this */ +int assoc_insert(item *it, const uint32_t hv) { + unsigned int oldbucket; + +// assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */ + + if (expanding && + (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) + { + it->h_next = old_hashtable[oldbucket]; + old_hashtable[oldbucket] = it; + } else { + it->h_next = primary_hashtable[hv & hashmask(hashpower)]; + primary_hashtable[hv & hashmask(hashpower)] = it; + } + + hash_items++; + if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) { + assoc_expand(); + } + + MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items); + return 1; +} + +void assoc_delete(const char *key, const size_t nkey, const uint32_t hv) { + item **before = _hashitem_before(key, nkey, hv); + + if (*before) { + item *nxt; + hash_items--; + /* The DTrace probe cannot be triggered as the last instruction + * due to possible tail-optimization by the compiler + */ + MEMCACHED_ASSOC_DELETE(key, nkey, hash_items); + nxt = (*before)->h_next; + (*before)->h_next = 0; /* probably pointless, but whatever. */ + *before = nxt; + return; + } + /* Note: we never actually get here. the callers don't delete things + they can't find. */ + assert(*before != 0); +} + + +static volatile int do_run_maintenance_thread = 1; + +#define DEFAULT_HASH_BULK_MOVE 1 +int hash_bulk_move = DEFAULT_HASH_BULK_MOVE; + +static void *assoc_maintenance_thread(void *arg) { + + while (do_run_maintenance_thread) { + int ii = 0; + + /* Lock the cache, and bulk move multiple buckets to the new + * hash table. */ + mutex_lock(&cache_lock); + + for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { + item *it, *next; + int bucket; + + for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { + next = it->h_next; + + bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower); + it->h_next = primary_hashtable[bucket]; + primary_hashtable[bucket] = it; + } + + old_hashtable[expand_bucket] = NULL; + + expand_bucket++; + if (expand_bucket == hashsize(hashpower - 1)) { + expanding = false; + free(old_hashtable); + STATS_LOCK(); + stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *); + stats.hash_is_expanding = 0; + STATS_UNLOCK(); + if (settings.verbose > 1) + fprintf(stderr, "Hash table expansion done\n"); + } + } + + if (!expanding) { + /* We are done expanding.. just wait for next invocation */ + pthread_cond_wait(&maintenance_cond, &cache_lock); + } + + pthread_mutex_unlock(&cache_lock); + } + return NULL; +} + +static pthread_t maintenance_tid; + +int start_assoc_maintenance_thread() { + int ret; + char *env = getenv("MEMCACHED_HASH_BULK_MOVE"); + if (env != NULL) { + hash_bulk_move = atoi(env); + if (hash_bulk_move == 0) { + hash_bulk_move = DEFAULT_HASH_BULK_MOVE; + } + } + if ((ret = pthread_create(&maintenance_tid, NULL, + assoc_maintenance_thread, NULL)) != 0) { + fprintf(stderr, "Can't create thread: %s\n", strerror(ret)); + return -1; + } + return 0; +} + +void stop_assoc_maintenance_thread() { + mutex_lock(&cache_lock); + do_run_maintenance_thread = 0; + pthread_cond_signal(&maintenance_cond); + pthread_mutex_unlock(&cache_lock); + + /* Wait for the maintenance thread to stop */ + pthread_join(maintenance_tid, NULL); +} + + diff --git a/memcached/assoc.h b/memcached/assoc.h new file mode 100644 index 00000000..ccdfdd5a --- /dev/null +++ b/memcached/assoc.h @@ -0,0 +1,9 @@ +/* associative array */ +void assoc_init(const int hashpower_init); +item *assoc_find(const char *key, const size_t nkey, const uint32_t hv); +int assoc_insert(item *item, const uint32_t hv); +void assoc_delete(const char *key, const size_t nkey, const uint32_t hv); +void do_assoc_move_next_bucket(void); +int start_assoc_maintenance_thread(void); +void stop_assoc_maintenance_thread(void); + diff --git a/memcached/autogen.sh b/memcached/autogen.sh new file mode 100755 index 00000000..c735394d --- /dev/null +++ b/memcached/autogen.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +# Get the initial version. +perl version.pl + +die() { + echo "$@" + exit 1 +} + +# Try to locate a program by using which, and verify that the file is an +# executable +locate_binary() { + for f in $@ + do + file=`which $f 2>/dev/null | grep -v '^no '` + if test -n "$file" -a -x "$file"; then + echo $file + return 0 + fi + done + + echo "" + return 1 +} + +echo "aclocal..." +if test x$ACLOCAL = x; then + ACLOCAL=`locate_binary aclocal-1.11 aclocal-1.10 aclocal-1.9 aclocal19 aclocal-1.7 aclocal17 aclocal-1.5 aclocal15 aclocal` + if test x$ACLOCAL = x; then + die "Did not find a supported aclocal" + fi +fi +$ACLOCAL || exit 1 + +echo "autoheader..." +AUTOHEADER=${AUTOHEADER:-autoheader} +$AUTOHEADER || exit 1 + +echo "automake..." +if test x$AUTOMAKE = x; then + AUTOMAKE=`locate_binary automake-1.11 automake-1.10 automake-1.9 automake-1.7` + if test x$AUTOMAKE = x; then + die "Did not find a supported automake" + fi +fi +$AUTOMAKE --foreign --add-missing || $AUTOMAKE --gnu --add-missing || exit 1 + +echo "autoconf..." +AUTOCONF=${AUTOCONF:-autoconf} +$AUTOCONF || exit 1 + diff --git a/memcached/cache.c b/memcached/cache.c new file mode 100644 index 00000000..7dcf2fbb --- /dev/null +++ b/memcached/cache.c @@ -0,0 +1,150 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#include "memcached.h" + +#include +#include +#include +#include + +#ifndef NDEBUG +#include +#endif + +#include "cache.h" + +#ifndef NDEBUG +const uint64_t redzone_pattern = 0xdeadbeefcafebabe; +int cache_error = 0; +#endif + +const int initial_pool_size = 64; + +cache_t* cache_create(const char *name, size_t bufsize, size_t align, + cache_constructor_t* constructor, + cache_destructor_t* destructor) { + cache_t* ret = calloc(1, sizeof(cache_t)); + char* nm = strdup(name); + void** ptr = calloc(initial_pool_size, sizeof(void*)); + if (ret == NULL || nm == NULL || ptr == NULL || + pthread_mutex_init(&ret->mutex, NULL) == -1) { + free(ret); + free(nm); + free(ptr); + return NULL; + } + + ret->name = nm; + ret->ptr = ptr; + ret->freetotal = initial_pool_size; + ret->constructor = constructor; + ret->destructor = destructor; + +#ifndef NDEBUG + ret->bufsize = bufsize + 2 * sizeof(redzone_pattern); +#else + ret->bufsize = bufsize; +#endif + + return ret; +} + +static inline void* get_object(void *ptr) { +#ifndef NDEBUG + uint64_t *pre = ptr; + return pre + 1; +#else + return ptr; +#endif +} + +void cache_destroy(cache_t *cache) { + while (cache->freecurr > 0) { + void *ptr = cache->ptr[--cache->freecurr]; + if (cache->destructor) { + cache->destructor(get_object(ptr), NULL); + } + free(ptr); + } + free(cache->name); + free(cache->ptr); + pthread_mutex_destroy(&cache->mutex); +} + +void* cache_alloc(cache_t *cache) { + void *ret; + void *object; + pthread_mutex_lock(&cache->mutex); + if (cache->freecurr > 0) { + ret = cache->ptr[--cache->freecurr]; + object = get_object(ret); + } else { + object = ret = malloc(cache->bufsize); + if (ret != NULL) { + object = get_object(ret); + + if (cache->constructor != NULL && + cache->constructor(object, NULL, 0) != 0) { + free(ret); + object = NULL; + } + } + } + pthread_mutex_unlock(&cache->mutex); + +#ifndef NDEBUG + if (object != NULL) { + /* add a simple form of buffer-check */ + uint64_t *pre = ret; + *pre = redzone_pattern; + ret = pre+1; + memcpy(((char*)ret) + cache->bufsize - (2 * sizeof(redzone_pattern)), + &redzone_pattern, sizeof(redzone_pattern)); + } +#endif + + return object; +} + +void cache_free(cache_t *cache, void *ptr) { + pthread_mutex_lock(&cache->mutex); + +#ifndef NDEBUG + /* validate redzone... */ + if (memcmp(((char*)ptr) + cache->bufsize - (2 * sizeof(redzone_pattern)), + &redzone_pattern, sizeof(redzone_pattern)) != 0) { + raise(SIGABRT); + cache_error = 1; + pthread_mutex_unlock(&cache->mutex); + return; + } + uint64_t *pre = ptr; + --pre; + if (*pre != redzone_pattern) { + raise(SIGABRT); + cache_error = -1; + pthread_mutex_unlock(&cache->mutex); + return; + } + ptr = pre; +#endif + if (cache->freecurr < cache->freetotal) { + cache->ptr[cache->freecurr++] = ptr; + } else { + /* try to enlarge free connections array */ + size_t newtotal = cache->freetotal * 2; + void **new_free = realloc(cache->ptr, sizeof(char *) * newtotal); + if (new_free) { + cache->freetotal = newtotal; + cache->ptr = new_free; + cache->ptr[cache->freecurr++] = ptr; + } else { + if (cache->destructor) { + cache->destructor(ptr, NULL); + } + free(ptr); + + } + } + pthread_mutex_unlock(&cache->mutex); +} + diff --git a/memcached/cache.h b/memcached/cache.h new file mode 100644 index 00000000..12695036 --- /dev/null +++ b/memcached/cache.h @@ -0,0 +1,116 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#ifndef CACHE_H +#define CACHE_H +#include + +#ifdef HAVE_UMEM_H +#include +#define cache_t umem_cache_t +#define cache_alloc(a) umem_cache_alloc(a, UMEM_DEFAULT) +#define cache_free(a, b) umem_cache_free(a, b) +#define cache_create(a,b,c,d,e) umem_cache_create((char*)a, b, c, d, e, NULL, NULL, NULL, 0) +#define cache_destroy(a) umem_cache_destroy(a); + +#else + +#ifndef NDEBUG +/* may be used for debug purposes */ +extern int cache_error; +#endif + +/** + * Constructor used to initialize allocated objects + * + * @param obj pointer to the object to initialized. + * @param notused1 This parameter is currently not used. + * @param notused2 This parameter is currently not used. + * @return you should return 0, but currently this is not checked + */ +typedef int cache_constructor_t(void* obj, void* notused1, int notused2); +/** + * Destructor used to clean up allocated objects before they are + * returned to the operating system. + * + * @param obj pointer to the object to initialized. + * @param notused1 This parameter is currently not used. + * @param notused2 This parameter is currently not used. + * @return you should return 0, but currently this is not checked + */ +typedef void cache_destructor_t(void* obj, void* notused); + +/** + * Definition of the structure to keep track of the internal details of + * the cache allocator. Touching any of these variables results in + * undefined behavior. + */ +typedef struct { + /** Mutex to protect access to the structure */ + pthread_mutex_t mutex; + /** Name of the cache objects in this cache (provided by the caller) */ + char *name; + /** List of pointers to available buffers in this cache */ + void **ptr; + /** The size of each element in this cache */ + size_t bufsize; + /** The capacity of the list of elements */ + int freetotal; + /** The current number of free elements */ + int freecurr; + /** The constructor to be called each time we allocate more memory */ + cache_constructor_t* constructor; + /** The destructor to be called each time before we release memory */ + cache_destructor_t* destructor; +} cache_t; + +/** + * Create an object cache. + * + * The object cache will let you allocate objects of the same size. It is fully + * MT safe, so you may allocate objects from multiple threads without having to + * do any syncrhonization in the application code. + * + * @param name the name of the object cache. This name may be used for debug purposes + * and may help you track down what kind of object you have problems with + * (buffer overruns, leakage etc) + * @param bufsize the size of each object in the cache + * @param align the alignment requirements of the objects in the cache. + * @param constructor the function to be called to initialize memory when we need + * to allocate more memory from the os. + * @param destructor the function to be called before we release the memory back + * to the os. + * @return a handle to an object cache if successful, NULL otherwise. + */ +cache_t* cache_create(const char* name, size_t bufsize, size_t align, + cache_constructor_t* constructor, + cache_destructor_t* destructor); +/** + * Destroy an object cache. + * + * Destroy and invalidate an object cache. You should return all buffers allocated + * with cache_alloc by using cache_free before calling this function. Not doing + * so results in undefined behavior (the buffers may or may not be invalidated) + * + * @param handle the handle to the object cache to destroy. + */ +void cache_destroy(cache_t* handle); +/** + * Allocate an object from the cache. + * + * @param handle the handle to the object cache to allocate from + * @return a pointer to an initialized object from the cache, or NULL if + * the allocation cannot be satisfied. + */ +void* cache_alloc(cache_t* handle); +/** + * Return an object back to the cache. + * + * The caller should return the object in an initialized state so that + * the object may be returned in an expected state from cache_alloc. + * + * @param handle handle to the object cache to return the object to + * @param ptr pointer to the object to return. + */ +void cache_free(cache_t* handle, void* ptr); +#endif + +#endif diff --git a/memcached/configure.ac b/memcached/configure.ac new file mode 100644 index 00000000..9915a465 --- /dev/null +++ b/memcached/configure.ac @@ -0,0 +1,557 @@ +AC_PREREQ(2.52) +m4_include([version.m4]) +m4_include([m4/c99-backport.m4]) +AC_INIT(memcached, VERSION_NUMBER, memcached@googlegroups.com) +AC_CANONICAL_SYSTEM +AC_CONFIG_SRCDIR(memcached.c) +AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION) +AM_CONFIG_HEADER(config.h) + +AC_PROG_CC + +dnl ********************************************************************** +dnl DETECT_ICC ([ACTION-IF-YES], [ACTION-IF-NO]) +dnl +dnl check if this is the Intel ICC compiler, and if so run the ACTION-IF-YES +dnl sets the $ICC variable to "yes" or "no" +dnl ********************************************************************** +AC_DEFUN([DETECT_ICC], +[ + ICC="no" + AC_MSG_CHECKING([for icc in use]) + if test "$GCC" = "yes"; then + dnl check if this is icc acting as gcc in disguise + AC_EGREP_CPP([^__INTEL_COMPILER], [__INTEL_COMPILER], + AC_MSG_RESULT([no]) + [$2], + AC_MSG_RESULT([yes]) + [$1] + ICC="yes") + else + AC_MSG_RESULT([no]) + [$2] + fi +]) + +DETECT_ICC([], []) + +dnl ********************************************************************** +dnl DETECT_SUNCC ([ACTION-IF-YES], [ACTION-IF-NO]) +dnl +dnl check if this is the Sun Studio compiler, and if so run the ACTION-IF-YES +dnl sets the $SUNCC variable to "yes" or "no" +dnl ********************************************************************** +AC_DEFUN([DETECT_SUNCC], +[ + AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) + AS_IF(test "x$SUNCC" = "xyes", [$1], [$2]) + +]) + +DETECT_SUNCC([CFLAGS="-mt $CFLAGS"], []) + +if test "$ICC" = "no"; then + AC_PROG_CC_C99 +fi + +AM_PROG_CC_C_O +AC_PROG_INSTALL + +AC_ARG_ENABLE(sasl, + [AS_HELP_STRING([--enable-sasl],[Enable SASL authentication])]) + +AC_ARG_ENABLE(sasl_pwdb, + [AS_HELP_STRING([--enable-sasl-pwdb],[Enable plaintext password db])]) + +AS_IF([test "x$enable_sasl_pwdb" = "xyes"], + [enable_sasl=yes ]) + + +dnl ********************************************************************** +dnl DETECT_SASL_CB_GETCONF +dnl +dnl check if we can use SASL_CB_GETCONF +dnl ********************************************************************** +AC_DEFUN([AC_C_DETECT_SASL_CB_GETCONF], +[ + AC_CACHE_CHECK([for SASL_CB_GETCONF], + [ac_cv_c_sasl_cb_getconf], + [AC_TRY_COMPILE( + [ +#include + ], [ +unsigned long val = SASL_CB_GETCONF; + ], + [ ac_cv_c_sasl_cb_getconf=yes ], + [ ac_cv_c_sasl_cb_getconf=no ]) + ]) + AS_IF([test "$ac_cv_c_sasl_cb_getconf" = "yes"], + [AC_DEFINE([HAVE_SASL_CB_GETCONF], 1, + [Set to nonzero if your SASL implementation supports SASL_CB_GETCONF])]) +]) + +AC_CHECK_HEADERS([sasl/sasl.h]) +if test "x$enable_sasl" = "xyes"; then + AC_C_DETECT_SASL_CB_GETCONF + AC_DEFINE([ENABLE_SASL],1,[Set to nonzero if you want to include SASL]) + AC_SEARCH_LIBS([sasl_server_init], [sasl2 sasl], [], + [ + AC_MSG_ERROR([Failed to locate the library containing sasl_server_init]) + ]) + + AS_IF([test "x$enable_sasl_pwdb" = "xyes"], + [AC_DEFINE([ENABLE_SASL_PWDB], 1, + [Set to nonzero if you want to enable a SASL pwdb])]) +fi + +AC_ARG_ENABLE(dtrace, + [AS_HELP_STRING([--enable-dtrace],[Enable dtrace probes])]) +if test "x$enable_dtrace" = "xyes"; then + AC_PATH_PROG([DTRACE], [dtrace], "no", [/usr/sbin:$PATH]) + if test "x$DTRACE" != "xno"; then + AC_DEFINE([ENABLE_DTRACE],1,[Set to nonzero if you want to include DTRACE]) + build_dtrace=yes + # DTrace on MacOSX does not use -G option + $DTRACE -G -o conftest.$$ -s memcached_dtrace.d 2>/dev/zero + if test $? -eq 0 + then + dtrace_instrument_obj=yes + rm conftest.$$ + fi + + if test "`which tr`" = "/usr/ucb/tr"; then + AC_MSG_ERROR([Please remove /usr/ucb from your path. See man standards for more info]) + fi + else + AC_MSG_ERROR([Need dtrace binary and OS support.]) + fi +fi + +AM_CONDITIONAL([BUILD_DTRACE],[test "$build_dtrace" = "yes"]) +AM_CONDITIONAL([DTRACE_INSTRUMENT_OBJ],[test "$dtrace_instrument_obj" = "yes"]) +AM_CONDITIONAL([ENABLE_SASL],[test "$enable_sasl" = "yes"]) + +AC_SUBST(DTRACE) +AC_SUBST(DTRACEFLAGS) +AC_SUBST(ENABLE_SASL) +AC_SUBST(PROFILER_LDFLAGS) + +AC_ARG_ENABLE(coverage, + [AS_HELP_STRING([--disable-coverage],[Disable code coverage])]) + +if test "x$enable_coverage" != "xno"; then + if test "$ICC" = "yes" + then + dnl ICC trying to be gcc, but not well + CFLAGS="$CFLAGS -pthread" + elif test "$GCC" = "yes" + then + CFLAGS="$CFLAGS -pthread" + AC_PATH_PROG([PROFILER], [gcov], "no", [$PATH]) + if test "x$PROFILER" != "xno"; then + # Issue 97: The existense of gcov doesn't mean we have -lgcov + AC_CHECK_LIB(gcov, main, + [ + PROFILER_FLAGS="-fprofile-arcs -ftest-coverage" + PROFILER_LDFLAGS="-lgcov" + ], [ + PROFILER_FLAGS= + PROFILER_LDFLAGS= + ]) + fi + elif test "$SUNCC" = "yes" + then + AC_PATH_PROG([PROFILER], [tcov], "no", [$PATH]) + if test "x$PROFILER" != "xno"; then + PROFILER_FLAGS=-xprofile=tcov + fi + fi +fi +AC_SUBST(PROFILER_FLAGS) + + +AC_ARG_ENABLE(64bit, + [AS_HELP_STRING([--enable-64bit],[build 64bit version])]) +if test "x$enable_64bit" = "xyes" +then + org_cflags=$CFLAGS + CFLAGS=-m64 + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([], [dnl +return sizeof(void*) == 8 ? 0 : 1; + ]) + ],[ + CFLAGS="-m64 $org_cflags" + ],[ + AC_MSG_ERROR([Don't know how to build a 64-bit object.]) + ]) +fi + +# Issue 213: Search for clock_gettime to help people linking +# with a static version of libevent +AC_SEARCH_LIBS(clock_gettime, rt) +# Issue 214: Search for the network libraries _before_ searching +# for libevent (to help people linking with static libevent) +AC_SEARCH_LIBS(socket, socket) +AC_SEARCH_LIBS(gethostbyname, nsl) + +trylibeventdir="" +AC_ARG_WITH(libevent, + [ --with-libevent=PATH Specify path to libevent installation ], + [ + if test "x$withval" != "xno" ; then + trylibeventdir=$withval + fi + ] +) + +dnl ------------------------------------------------------ +dnl libevent detection. swiped from Tor. modified a bit. + +LIBEVENT_URL=http://www.monkey.org/~provos/libevent/ + +AC_CACHE_CHECK([for libevent directory], ac_cv_libevent_dir, [ + saved_LIBS="$LIBS" + saved_LDFLAGS="$LDFLAGS" + saved_CPPFLAGS="$CPPFLAGS" + le_found=no + for ledir in $trylibeventdir "" $prefix /usr/local ; do + LDFLAGS="$saved_LDFLAGS" + LIBS="-levent $saved_LIBS" + + # Skip the directory if it isn't there. + if test ! -z "$ledir" -a ! -d "$ledir" ; then + continue; + fi + if test ! -z "$ledir" ; then + if test -d "$ledir/lib" ; then + LDFLAGS="-L$ledir/lib $LDFLAGS" + else + LDFLAGS="-L$ledir $LDFLAGS" + fi + if test -d "$ledir/include" ; then + CPPFLAGS="-I$ledir/include $CPPFLAGS" + else + CPPFLAGS="-I$ledir $CPPFLAGS" + fi + fi + # Can I compile and link it? + AC_TRY_LINK([#include +#include +#include ], [ event_init(); ], + [ libevent_linked=yes ], [ libevent_linked=no ]) + if test $libevent_linked = yes; then + if test ! -z "$ledir" ; then + ac_cv_libevent_dir=$ledir + _myos=`echo $target_os | cut -f 1 -d .` + AS_IF(test "$SUNCC" = "yes" -o "x$_myos" = "xsolaris2", + [saved_LDFLAGS="$saved_LDFLAGS -Wl,-R$ledir/lib"], + [AS_IF(test "$GCC" = "yes", + [saved_LDFLAGS="$saved_LDFLAGS -Wl,-rpath,$ledir/lib"])]) + else + ac_cv_libevent_dir="(system)" + fi + le_found=yes + break + fi + done + LIBS="$saved_LIBS" + LDFLAGS="$saved_LDFLAGS" + CPPFLAGS="$saved_CPPFLAGS" + if test $le_found = no ; then + AC_MSG_ERROR([libevent is required. You can get it from $LIBEVENT_URL + + If it's already installed, specify its path using --with-libevent=/dir/ +]) + fi +]) +LIBS="-levent $LIBS" +if test $ac_cv_libevent_dir != "(system)"; then + if test -d "$ac_cv_libevent_dir/lib" ; then + LDFLAGS="-L$ac_cv_libevent_dir/lib $LDFLAGS" + le_libdir="$ac_cv_libevent_dir/lib" + else + LDFLAGS="-L$ac_cv_libevent_dir $LDFLAGS" + le_libdir="$ac_cv_libevent_dir" + fi + if test -d "$ac_cv_libevent_dir/include" ; then + CPPFLAGS="-I$ac_cv_libevent_dir/include $CPPFLAGS" + else + CPPFLAGS="-I$ac_cv_libevent_dir $CPPFLAGS" + fi +fi + +dnl ---------------------------------------------------------------------------- + +AC_SEARCH_LIBS(umem_cache_create, umem) +AC_SEARCH_LIBS(gethugepagesizes, hugetlbfs) + +AC_HEADER_STDBOOL +AH_BOTTOM([#if HAVE_STDBOOL_H +#include +#else +#define bool char +#define false 0 +#define true 1 +#endif ]) + +AC_CHECK_HEADERS([inttypes.h]) +AH_BOTTOM([#ifdef HAVE_INTTYPES_H +#include +#endif +]) + +dnl ********************************************************************** +dnl Figure out if this system has the stupid sasl_callback_ft +dnl ********************************************************************** + +AC_DEFUN([AC_HAVE_SASL_CALLBACK_FT], +[AC_CACHE_CHECK(for sasl_callback_ft, ac_cv_has_sasl_callback_ft, +[ + AC_TRY_COMPILE([ + #ifdef HAVE_SASL_SASL_H + #include + #include + #endif + ],[ + sasl_callback_ft a_callback; + ],[ + ac_cv_has_sasl_callback_ft=yes + ],[ + ac_cv_has_sasl_callback_ft=no + ]) +]) +if test $ac_cv_has_sasl_callback_ft = yes; then + AC_DEFINE(HAVE_SASL_CALLBACK_FT, 1, [we have sasl_callback_ft]) +fi +]) + +AC_HAVE_SASL_CALLBACK_FT + +dnl ********************************************************************** +dnl DETECT_UINT64_SUPPORT +dnl +dnl check if we can use a uint64_t +dnl ********************************************************************** +AC_DEFUN([AC_C_DETECT_UINT64_SUPPORT], +[ + AC_CACHE_CHECK([for print macros for integers (C99 section 7.8.1)], + [ac_cv_c_uint64_support], + [AC_TRY_COMPILE( + [ +#ifdef HAVE_INTTYPES_H +#include +#endif +#include + ], [ + uint64_t val = 0; + fprintf(stderr, "%" PRIu64 "\n", val); + ], + [ ac_cv_c_uint64_support=yes ], + [ ac_cv_c_uint64_support=no ]) + ]) +]) + +AC_C_DETECT_UINT64_SUPPORT +AS_IF([test "x$ac_cv_c_uint64_support" = "xno"], + [AC_MSG_WARN([ + +Failed to use print macros (PRIu) as defined in C99 section 7.8.1. + +])]) + +AC_C_CONST + +dnl From licq: Copyright (c) 2000 Dirk Mueller +dnl Check if the type socklen_t is defined anywhere +AC_DEFUN([AC_C_SOCKLEN_T], +[AC_CACHE_CHECK(for socklen_t, ac_cv_c_socklen_t, +[ + AC_TRY_COMPILE([ + #include + #include + ],[ + socklen_t foo; + ],[ + ac_cv_c_socklen_t=yes + ],[ + ac_cv_c_socklen_t=no + ]) +]) +if test $ac_cv_c_socklen_t = no; then + AC_DEFINE(socklen_t, int, [define to int if socklen_t not available]) +fi +]) + +AC_C_SOCKLEN_T + +dnl Check if we're a little-endian or a big-endian system, needed by hash code +AC_DEFUN([AC_C_ENDIAN], +[AC_CACHE_CHECK(for endianness, ac_cv_c_endian, +[ + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([], [dnl + long val = 1; + char *c = (char *) &val; + exit(*c == 1); + ]) + ],[ + ac_cv_c_endian=big + ],[ + ac_cv_c_endian=little + ]) +]) +if test $ac_cv_c_endian = big; then + AC_DEFINE(ENDIAN_BIG, 1, [machine is bigendian]) +fi +if test $ac_cv_c_endian = little; then + AC_DEFINE(ENDIAN_LITTLE, 1, [machine is littleendian]) +fi +]) + +AC_C_ENDIAN + +AC_DEFUN([AC_C_HTONLL], +[ + AC_MSG_CHECKING([for htonll]) + have_htoll="no" + AC_TRY_LINK([ +#include +#include +#ifdef HAVE_INTTYPES_H +#include */ +#endif + ], [ + return htonll(0); + ], [ + have_htoll="yes" + AC_DEFINE([HAVE_HTONLL], [1], [Have ntohll]) + ], [ + have_htoll="no" + ]) + + AC_MSG_RESULT([$have_htoll]) +]) + +AC_C_HTONLL + +dnl Check whether the user's system supports pthread +AC_SEARCH_LIBS(pthread_create, pthread) +if test "x$ac_cv_search_pthread_create" = "xno"; then + AC_MSG_ERROR([Can't enable threads without the POSIX thread library.]) +fi + +AC_CHECK_FUNCS(mlockall) +AC_CHECK_FUNCS(getpagesizes) +AC_CHECK_FUNCS(memcntl) +AC_CHECK_FUNCS(sigignore) +AC_CHECK_FUNCS(clock_gettime) + +AC_DEFUN([AC_C_ALIGNMENT], +[AC_CACHE_CHECK(for alignment, ac_cv_c_alignment, +[ + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([ +#include +#include + ], [ + char *buf = malloc(32); + + uint64_t *ptr = (uint64_t*)(buf+2); + // catch sigbus, etc. + *ptr = 0x1; + + // catch unaligned word access (ARM cpus) + *buf = 1; *(buf +1) = 2; *(buf + 2) = 2; *(buf + 3) = 3; *(buf + 4) = 4; + int* i = (int*)(buf+1); + return (84148994 == i) ? 0 : 1; + ]) + ],[ + ac_cv_c_alignment=none + ],[ + ac_cv_c_alignment=need + ],[ + ac_cv_c_alignment=need + ]) +]) +if test $ac_cv_c_alignment = need; then + AC_DEFINE(NEED_ALIGN, 1, [Machine need alignment]) +fi +]) + +AC_C_ALIGNMENT + +dnl Check for our specific usage of GCC atomics. +dnl These were added in 4.1.2, but 32bit OS's may lack shorts and 4.1.2 +dnl lacks testable defines. +have_gcc_atomics=no +AC_MSG_CHECKING(for GCC atomics) +AC_TRY_LINK([],[ + unsigned short a; + unsigned short b; + b = __sync_add_and_fetch(&a, 1); + b = __sync_sub_and_fetch(&a, 2); + ],[have_gcc_atomics=yes + AC_DEFINE(HAVE_GCC_ATOMICS, 1, [GCC Atomics available])]) +AC_MSG_RESULT($have_gcc_atomics) + +dnl Check for the requirements for running memcached with less privileges +dnl than the default privilege set. On Solaris we need setppriv and priv.h +dnl If you want to add support for other platforms you should check for +dnl your requirements, define HAVE_DROP_PRIVILEGES, and make sure you add +dnl the source file containing the implementation into memcached_SOURCE +dnl in Makefile.am +AC_CHECK_FUNCS(setppriv, [ + AC_CHECK_HEADER(priv.h, [ + AC_DEFINE([HAVE_DROP_PRIVILEGES], 1, + [Define this if you have an implementation of drop_privileges()]) + build_solaris_privs=yes + ], []) +],[]) + +AM_CONDITIONAL([BUILD_SOLARIS_PRIVS],[test "$build_solaris_privs" = "yes"]) + +AC_CHECK_HEADER(umem.h, [ + AC_DEFINE([HAVE_UMEM_H], 1, + [Define this if you have umem.h]) + build_cache=no +], [build_cache=yes]) + +AM_CONDITIONAL([BUILD_CACHE], [test "x$build_cache" = "xyes"]) + +AC_ARG_ENABLE(docs, + [AS_HELP_STRING([--disable-docs],[Disable documentation generation])]) + +AC_PATH_PROG([XML2RFC], [xml2rfc], "no") +AC_PATH_PROG([XSLTPROC], [xsltproc], "no") + +AM_CONDITIONAL([BUILD_SPECIFICATIONS], + [test "x$enable_docs" != "xno" -a "x$XML2RFC" != "xno" -a "x$XSLTPROC" != "xno"]) + + +dnl Let the compiler be a bit more picky. Please note that you cannot +dnl specify these flags to the compiler before AC_CHECK_FUNCS, because +dnl the test program will generate a compilation warning and hence fail +dnl to detect the function ;-) +if test "$ICC" = "yes" +then + dnl ICC trying to be gcc. + CFLAGS="$CFLAGS -diag-disable 187 -Wall -Werror" + AC_DEFINE([_GNU_SOURCE],[1],[find sigignore on Linux]) +elif test "$GCC" = "yes" +then + GCC_VERSION=`$CC -dumpversion` + CFLAGS="$CFLAGS -Wall -Werror -pedantic -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls" + case $GCC_VERSION in + 4.4.*) + CFLAGS="$CFLAGS -fno-strict-aliasing" + ;; + esac + AC_DEFINE([_GNU_SOURCE],[1],[find sigignore on Linux]) +elif test "$SUNCC" = "yes" +then + CFLAGS="$CFLAGS -errfmt=error -errwarn -errshort=tags" +fi + +AC_CONFIG_FILES(Makefile doc/Makefile) +AC_OUTPUT diff --git a/memcached/daemon.c b/memcached/daemon.c new file mode 100644 index 00000000..2f189157 --- /dev/null +++ b/memcached/daemon.c @@ -0,0 +1,89 @@ +/* $Header: /cvsroot/wikipedia/willow/src/bin/willow/daemon.c,v 1.1 2005/05/02 19:15:21 kateturner Exp $ */ +/* $NetBSD: daemon.c,v 1.9 2003/08/07 16:42:46 agc Exp $ */ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined __SUNPRO_C || defined __DECC || defined __HP_cc +# pragma ident "@(#)$Header: /cvsroot/wikipedia/willow/src/bin/willow/daemon.c,v 1.1 2005/05/02 19:15:21 kateturner Exp $" +# pragma ident "$NetBSD: daemon.c,v 1.9 2003/08/07 16:42:46 agc Exp $" +#endif + +#include "memcached.h" + +#include +#include +#include +#include + +int daemonize(int nochdir, int noclose) +{ + int fd; + + switch (fork()) { + case -1: + return (-1); + case 0: + break; + default: + _exit(EXIT_SUCCESS); + } + + if (setsid() == -1) + return (-1); + + if (nochdir == 0) { + if(chdir("/") != 0) { + perror("chdir"); + return (-1); + } + } + + if (noclose == 0 && (fd = open("/dev/null", O_RDWR, 0)) != -1) { + if(dup2(fd, STDIN_FILENO) < 0) { + perror("dup2 stdin"); + return (-1); + } + if(dup2(fd, STDOUT_FILENO) < 0) { + perror("dup2 stdout"); + return (-1); + } + if(dup2(fd, STDERR_FILENO) < 0) { + perror("dup2 stderr"); + return (-1); + } + + if (fd > STDERR_FILENO) { + if(close(fd) < 0) { + perror("close"); + return (-1); + } + } + } + return (0); +} diff --git a/memcached/devtools/bench_noreply.pl b/memcached/devtools/bench_noreply.pl new file mode 100644 index 00000000..365c6825 --- /dev/null +++ b/memcached/devtools/bench_noreply.pl @@ -0,0 +1,51 @@ +#! /usr/bin/perl +# +use warnings; +use strict; + +use IO::Socket::INET; + +use FindBin; + +@ARGV == 1 or @ARGV == 2 + or die "Usage: $FindBin::Script HOST:PORT [COUNT]\n"; + +# Note that it's better to run the test over the wire, because for +# localhost the task may become CPU bound. +my $addr = $ARGV[0]; +my $count = $ARGV[1] || 10_000; + +my $sock = IO::Socket::INET->new(PeerAddr => $addr, + Timeout => 3); +die "$!\n" unless $sock; + + +# By running 'noreply' test first we also ensure there are no reply +# packets left in the network. +foreach my $noreply (1, 0) { + use Time::HiRes qw(gettimeofday tv_interval); + + print "'noreply' is ", $noreply ? "enabled" : "disabled", ":\n"; + my $param = $noreply ? 'noreply' : ''; + my $start = [gettimeofday]; + foreach (1 .. $count) { + print $sock "add foo 0 0 1 $param\r\n1\r\n"; + scalar<$sock> unless $noreply; + print $sock "set foo 0 0 1 $param\r\n1\r\n"; + scalar<$sock> unless $noreply; + print $sock "replace foo 0 0 1 $param\r\n1\r\n"; + scalar<$sock> unless $noreply; + print $sock "append foo 0 0 1 $param\r\n1\r\n"; + scalar<$sock> unless $noreply; + print $sock "prepend foo 0 0 1 $param\r\n1\r\n"; + scalar<$sock> unless $noreply; + print $sock "incr foo 1 $param\r\n"; + scalar<$sock> unless $noreply; + print $sock "decr foo 1 $param\r\n"; + scalar<$sock> unless $noreply; + print $sock "delete foo $param\r\n"; + scalar<$sock> unless $noreply; + } + my $end = [gettimeofday]; + printf("update commands: %.2f secs\n\n", tv_interval($start, $end)); +} diff --git a/memcached/devtools/clean-whitespace.pl b/memcached/devtools/clean-whitespace.pl new file mode 100755 index 00000000..95481ef4 --- /dev/null +++ b/memcached/devtools/clean-whitespace.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +use strict; +use FindBin qw($Bin); +chdir "$Bin/.." or die; + +my @exempted = qw(Makefile.am ChangeLog doc/Makefile.am); +push(@exempted, glob("doc/*.xml")); +push(@exempted, glob("doc/xml2rfc/*.xsl")); +push(@exempted, glob("m4/*backport*m4")); +my %exempted_hash = map { $_ => 1 } @exempted; + +my @stuff = split /\0/, `git ls-files -z -c -m -o --exclude-standard`; +my @files = grep { ! $exempted_hash{$_} } @stuff; + +unless (@files) { + warn "ERROR: You don't seem to be running this from a git checkout\n"; + exit; +} + +foreach my $f (@files) { + open(my $fh, $f) or die; + my $before = do { local $/; <$fh>; }; + close ($fh); + my $after = $before; + $after =~ s/\t/ /g; + $after =~ s/ +$//mg; + $after .= "\n" unless $after =~ /\n$/; + next if $after eq $before; + open(my $fh, ">$f") or die; + print $fh $after; + close($fh); +} diff --git a/memcached/doc/CONTRIBUTORS b/memcached/doc/CONTRIBUTORS new file mode 100644 index 00000000..b87e5c60 --- /dev/null +++ b/memcached/doc/CONTRIBUTORS @@ -0,0 +1,45 @@ +MEMCACHED CONTRIBUTORS + +This file contains a list of people who have contributed code and +effort to the memcached project. If you don't see your name mentioned +send email to the memcached mailing list so you can be immortalized. + +Also see the ChangeLog for even more people who have helped over the +years by submitting fixes, patches and reporting bugs. + + +Major authors: +-------------- + +Brad Fitzpatrick -- maintainer, original implementations + +Anatoly Vorobey -- lots of the modern server code + +Steven Grimm -- iov writing (less CPU), UDP mode, + non-2.0 slab mantissas, multithread, ... + +Other Contributors +------------------ + +Evan Martin +Nathan Neulinger +Eric Hodel +Michael Johnson +Paul Querna +Jamie McCarthy +Philip Neustrom +Andrew O'Brien +Josh Rotenberg +Robin H. Johnson +Tim Yardley +Paolo Borelli +Eli Bingham +Jean-Francois Bustarret +Paul G +Paul Lindner +Dormando +Dustin Sallings +Chris Goffinet +Tomash Brechko +Brian Aker +Trond Norbye diff --git a/memcached/doc/Doxyfile b/memcached/doc/Doxyfile new file mode 100644 index 00000000..e1b4afb3 --- /dev/null +++ b/memcached/doc/Doxyfile @@ -0,0 +1,1258 @@ +# Doxyfile 1.5.2 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file that +# follow. The default is UTF-8 which is also the encoding used for all text before +# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into +# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of +# possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = memcached + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 0.8 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doxygen + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, +# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, +# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, +# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = NO + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = YES + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to +# include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = YES + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = .. + +# This tag can be used to specify the character encoding of the source files that +# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default +# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding. +# See http://www.gnu.org/software/libiconv for the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py + +FILE_PATTERNS = *.h \ + *.c + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = config.h + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the output. +# The symbol name can be a fully qualified name, a word, or if the wildcard * is used, +# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentstion. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to +# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to +# specify the directory where the mscgen tool resides. If left empty the tool is assumed to +# be found in the default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a caller dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen will always +# show the root nodes and its direct children regardless of this setting. + +DOT_GRAPH_MAX_NODES = 50 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/memcached/doc/Makefile.am b/memcached/doc/Makefile.am new file mode 100644 index 00000000..f9f4f463 --- /dev/null +++ b/memcached/doc/Makefile.am @@ -0,0 +1,20 @@ +man_MANS = memcached.1 + +EXTRA_DIST = *.txt + +BUILT_SOURCES= + +if BUILD_SPECIFICATIONS +BUILT_SOURCES += protocol-binary.txt protocol-binary-range.txt +MOSTLYCLEANFILES = protocol-binary.txt protocol-binary-range.txt +endif + +%.txt: %.full + @XML2RFC@ $< $@ + +%.chk: %.xml xml2rfc/rfc2629-refchk.xsl + @XSLTPROC@ xml2rfc/rfc2629-refchk.xsl $< >$@ + +%.full: %.xml xml2rfc/rfc2629-noinc.xsl + @XSLTPROC@ xml2rfc/rfc2629-noinc.xsl $< >$@ + diff --git a/memcached/doc/memcached.1 b/memcached/doc/memcached.1 new file mode 100644 index 00000000..b6f633dc --- /dev/null +++ b/memcached/doc/memcached.1 @@ -0,0 +1,153 @@ +.TH MEMCACHED 1 "April 11, 2005" +.SH NAME +memcached \- high-performance memory object caching system +.SH SYNOPSIS +.B memcached +.RI [ options ] +.br +.SH DESCRIPTION +This manual page documents briefly the +.B memcached +memory object caching daemon. +.PP +.B memcached +is a flexible memory object caching daemon designed to alleviate database load +in dynamic web applications by storing objects in memory. It's based on +libevent to scale to any size needed, and is specifically optimized to avoid +swapping and always use non-blocking I/O. +.br +.SH OPTIONS +These programs follow the usual GNU command line syntax. A summary of options +is included below. +.TP +.B \-s +Unix socket path to listen on (disables network support). +.TP +.B \-a +Permissions (in octal format) for Unix socket created with \-s option. +.TP +.B \-l +Listen on ; default to INADDR_ANY. This is an important option to +consider as there is no other way to secure the installation. Binding to an +internal or firewalled network interface is suggested. +.TP +.B \-d +Run memcached as a daemon. +.TP +.B \-u +Assume the identity of (only when run as root). +.TP +.B \-m +Use MB memory max to use for object storage; the default is 64 megabytes. +.TP +.B \-c +Use max simultaneous connections; the default is 1024. +.TP +.B \-R +This option seeks to prevent client starvation by setting a limit to the +number of sequential requests the server will process from an individual +client connection. Once a connection has exceeded this value, the server will +attempt to process I/O on other connections before handling any further +request from this connection. The default value for this option is 20. +.TP +.B \-k +Lock down all paged memory. This is a somewhat dangerous option with large +caches, so consult the README and memcached homepage for configuration +suggestions. +.TP +.B \-p +Listen on TCP port , the default is port 11211. +.TP +.B \-U +Listen on UDP port , the default is port 11211, 0 is off. +.TP +.B \-M +Disable automatic removal of items from the cache when out of memory. +Additions will not be possible until adequate space is freed up. +.TP +.B \-r +Raise the core file size limit to the maximum allowable. +.TP +.B \-f +Use as the multiplier for computing the sizes of memory chunks that +items are stored in. A lower value may result in less wasted memory depending +on the total amount of memory available and the distribution of item sizes. +The default is 1.25. +.TP +.B \-n +Allocate a minimum of bytes for the item key, value, and flags. The +default is 48. If you have a lot of small keys and values, you can get a +significant memory efficiency gain with a lower value. If you use a high +chunk growth factor (\-f option), on the other hand, you may want to increase +the size to allow a bigger percentage of your items to fit in the most densely +packed (smallest) chunks. +.TP +.B \-C +Disable the use of CAS (and reduce the per-item size by 8 bytes). +.TP +.B \-h +Show the version of memcached and a summary of options. +.TP +.B \-v +Be verbose during the event loop; print out errors and warnings. +.TP +.B \-vv +Be even more verbose; same as \-v but also print client commands and +responses. +.TP +.B \-i +Print memcached and libevent licenses. +.TP +.B \-P +Print pidfile to , only used under \-d option. +.TP +.B \-t +Number of threads to use to process incoming requests. This option is only +meaningful if memcached was compiled with thread support enabled. It is +typically not useful to set this higher than the number of CPU cores on the +memcached server. The default is 4. +.TP +.B \-D +Use as the delimiter between key prefixes and IDs. This is used for +per-prefix stats reporting. The default is ":" (colon). If this option is +specified, stats collection is turned on automatically; if not, then it may +be turned on by sending the "stats detail on" command to the server. +.TP +.B \-L +Try to use large memory pages (if available). Increasing the memory page size +could reduce the number of TLB misses and improve the performance. In order to +get large pages from the OS, memcached will allocate the total item-cache in +one large chunk. Only available if supported on your OS. +.TP +.B \-B +Specify the binding protocol to use. By default, the server will +autonegotiate client connections. By using this option, you can +specify the protocol clients must speak. Possible options are "auto" +(the default, autonegotiation behavior), "ascii" and "binary". +.TP +.B \-I +Override the default size of each slab page. Default is 1mb. Default is 1m, +minimum is 1k, max is 128m. Adjusting this value changes the item size limit. +Beware that this also increases the number of slabs (use -v to view), and the +overal memory usage of memcached. +.TP +.B \-o +Comma separated list of extended or experimental options. See -h or wiki for +up to date list. +.br +.SH LICENSE +The memcached daemon is copyright Danga Interactive and is distributed under +the BSD license. Note that daemon clients are licensed separately. +.br +.SH SEE ALSO +The README file that comes with memcached +.br +.B http://www.danga.com/memcached +.SH AUTHOR +The memcached daemon was written by Anatoly Vorobey +.B +and Brad Fitzpatrick +.B +and the rest of the crew of Danga Interactive +.B http://www.danga.com +.br diff --git a/memcached/doc/protocol-binary-range.xml b/memcached/doc/protocol-binary-range.xml new file mode 100644 index 00000000..02711a84 --- /dev/null +++ b/memcached/doc/protocol-binary-range.xml @@ -0,0 +1,226 @@ + + + + + + + + + + + + + + Memcache Binary Protocol: Extensions for UDP + + + Six Apart, Ltd. +
+ + 548 4th Street + San Francisco + CA + 94107 + USA + + aaron@serendipity.palo-alto.ca.us +
+
+ + + + Applications + + memcache memcached cache udp + + + + This memo explains extensions to the memcache binary protocol for use in a UDP environment. + + + + Memcache is a high performance key-value cache. It is intentionally a + dumb cache, optimized for speed only. Applications using memcache do + not rely on it for data -- a persistent database with guaranteed reliability + is strongly recommended -- but applications can run much faster when + cached data is available in memcache. + + +
+ + +
+ + Memcache is a high performance key-value cache. It is intentionally a + dumb cache, optimized for speed only. Applications using memcache do + not rely on it for data -- a persistent database with guaranteed reliability + is strongly recommended -- but applications can run much faster when + cached data is available in memcache. + + + Sites may find that, due to their network architecture or application usage patterns, + the stateless protocol better suits their needs. This document + provides extensions and descriptions of use of the memcache protocol + in a UDP environment. + + + It is a goal of this document to provide sufficient information in each UDP packet + as to avoid any requirement for statefulness on the part of the server nor significant + caching of outstanding packets on the part of the client. + +
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in . + +
+
+ +
+
+ + The magic bytes remains the same as in . + +
+ +
+ + Additional status values: + + Value is larger than a single response packet + + +
+ +
+ + Additional opcode values: + + Get Range + Set Range + + +
+ +
+ + There are no new data types in this extension. + +
+
+ +
+ +
+ + This section extends the behavior of the Get and GetQ commands as described in + . + + + + When a Get or GetQ request is made via UDP, and the value of the key for which + the request was made is larger than can be placed into a single UDP packet (noting + that the protocol header must also be counted), a Get Range response packet + MUST be sent instead of the Get response packet. In this instance: + + The Status field of the response header MUST be 0x0004. + The Offset field of the GetR response extras MUST be 0. + The Length field of the GetR response extras, and the data contained in + the Value field of the packet, SHOULD be the maximum + allowed length of a UDP packet, less the space required by the header + and extras; however it MAY be any amount below this maximum. + The Total value length field of the response extras MUST be the + actual length of the complete value. + + + + + The client, upon receipt of a Get Range response bearing Status 0x004 + and a Message ID corresponding to its Get request, shall then know that + it has received only the first portion of the value. The client MAY choose + to request the remaining portion of the value by sending one or more Get Range requests. + +
+ +
+ + The Get Range request is primarily intended for use over a UDP transport + to request byte ranges of the value for a key. In the event that the Data version + check fails to match that of the key, an error MUST be returned. + + +
+ Extra data for get range request: + +Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Flags | + +---------------+---------------+---------------+---------------+ + 4| Data version check | + | | + +---------------+---------------+---------------+---------------+ +12| Offset | + +---------------+---------------+---------------+---------------+ +16| Length | + +---------------+---------------+---------------+---------------+ +Total 20 bytes +
+
+
+ +
+ + The Get Range request is primarily intended for use over a UDP transport + to indicate the location of the bytes of the value for a key contained in + a given packet. A client receives enough information in each Get Range + extras to construct an appropriately sized buffer in its own memory and + blindly insert the contents of the packet at the given byte offset. + + +
+ Extra data for get range response: + +Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Flags | + +---------------+---------------+---------------+---------------+ + 4| Data version check | + | | + +---------------+---------------+---------------+---------------+ +12| Offset | + +---------------+---------------+---------------+---------------+ +16| Length | + +---------------+---------------+---------------+---------------+ +20| Total value length | + +---------------+---------------+---------------+---------------+ +Total 24 bytes +
+
+
+ +
+ +
+ + This document does not introduce any new security considerations + beyond those discussed in . + +
+ +
+ + + + + + + + + + +
+ diff --git a/memcached/doc/protocol-binary.xml b/memcached/doc/protocol-binary.xml new file mode 100644 index 00000000..2dfbc58d --- /dev/null +++ b/memcached/doc/protocol-binary.xml @@ -0,0 +1,1461 @@ + + + + + + + + + + + + Memcache Binary Protocol + + + Six Apart, Ltd. +
+ + 548 4th Street + San Francisco + CA + 94107 + USA + + aaron@serendipity.palo-alto.ca.us +
+
+ + Sun Microsystems, INC +
+ + Haakon VII g. 7B + Trondheim + NO-7485 Trondheim + Norway + + trond.norbye@sun.com +
+
+ + Applications + memcache memcached cache + + + This memo explains the memcache binary protocol for informational + purposes. + + + Memcache is a high performance key-value cache. It is intentionally a + dumb cache, optimized for speed only. Applications using memcache do + not rely on it for data -- a persistent database with guaranteed + reliability is strongly recommended -- but applications can run much + faster when cached data is available in memcache. + + +
+ + +
+ + Memcache is a high performance key-value cache. It is intentionally a + dumb cache, optimized for speed only. Applications using memcache should + not rely on it for data -- a persistent database with guaranteed + reliability is strongly recommended -- but applications can run much + faster when cached data is available in memcache. + + + Memcache was originally written to make + LiveJournal faster. It now powers all of + the fastest web sites that you love. + +
+ + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in + . + +
+
+ +
+
+ General format of a packet: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0/ HEADER / + / / + / / + / / + +---------------+---------------+---------------+---------------+ + 24/ COMMAND-SPECIFIC EXTRAS (as needed) / + +/ (note length in the extras length header field) / + +---------------+---------------+---------------+---------------+ + m/ Key (as needed) / + +/ (note length in key length header field) / + +---------------+---------------+---------------+---------------+ + n/ Value (as needed) / + +/ (note length is total body length header field, minus / + +/ sum of the extras and key length body fields) / + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +
+ +
+ Request header: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Magic | Opcode | Key length | + +---------------+---------------+---------------+---------------+ + 4| Extras length | Data type | Reserved | + +---------------+---------------+---------------+---------------+ + 8| Total body length | + +---------------+---------------+---------------+---------------+ + 12| Opaque | + +---------------+---------------+---------------+---------------+ + 16| CAS | + | | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +
+ +
+ Response header: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Magic | Opcode | Key Length | + +---------------+---------------+---------------+---------------+ + 4| Extras length | Data type | Status | + +---------------+---------------+---------------+---------------+ + 8| Total body length | + +---------------+---------------+---------------+---------------+ + 12| Opaque | + +---------------+---------------+---------------+---------------+ + 16| CAS | + | | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +
+ + + Header fields: + + Magic number. + Command code. + Length in bytes of the text key that follows the command extras. + Status of the response (non-zero on error). + Length in bytes of the command extras. + Reserved for future use (Sean is using this soon). + Really reserved for future use (up for grabs). + Length in bytes of extra + key + value. + Will be copied back to you in the response. + Data version check. + + +
+ +
+
+ + + Request packet for this protocol version + Response packet for this protocol version + + + + + Magic byte / version. For each version of the protocol, we'll use a + different request/response value pair. This is useful for protocol + analyzers to distinguish the nature of the packet from the direction + which it is moving. Note, it is common to run a memcached instance on + a host that also runs an application server. Such a host will both + send and receive memcache packets. + + + + The version should hopefully correspond only to different meanings of + the command byte. In an ideal world, we will not change the header + format. As reserved bytes are given defined meaning, the protocol + version / magic byte values should be incremented. + + + + Traffic analysis tools are encouraged to identify memcache packets + and provide detailed interpretation if the magic bytes are recognized + and otherwise to provide a generic breakdown of the packet. Note, that + the key and value positions can always be identified even if the magic + byte or command opcode are not recognized. + +
+ +
+ + Possible values of this two-byte field: + + No error + Key not found + Key exists + Value too large + Invalid arguments + Item not stored + Incr/Decr on non-numeric value. + Unknown command + Out of memory + + +
+ +
+ + Possible values of the one-byte field: + + Get + Set + Add + Replace + Delete + Increment + Decrement + Quit + Flush + GetQ + No-op + Version + GetK + GetKQ + Append + Prepend + Stat + SetQ + AddQ + ReplaceQ + DeleteQ + IncrementQ + DecrementQ + QuitQ + FlushQ + AppendQ + PrependQ + + + + As a convention all of the commands ending with "Q" for + Quiet. A quiet version of a command will omit responses + that are considered uninteresting. Whether a given response + is interesting is dependent upon the command. See the + descriptions of the + set commands + and set commands for + examples of commands that include quiet variants. + +
+ +
+ + Possible values of the one-byte field: + + Raw bytes + + +
+
+ +
+
+ + All communication is initiated by a request from the client, + and the server will respond to each request with zero or + multiple packets for each request. If the status code of a response + packet is non-nil, the body of the packet will contain a textual error + message. If the status code is nil, the command opcode will define the + layout of the body of the message. + +
+ + The following figure illustrates the packet layout for + a packet with an error message. + +
+ Packet layout: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x01 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x09 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x4e ('N') | 0x6f ('o') | 0x74 ('t') | 0x20 (' ') | + +---------------+---------------+---------------+---------------+ + 28| 0x66 ('f') | 0x6f ('o') | 0x75 ('u') | 0x6e ('n') | + +---------------+---------------+---------------+---------------+ + 32| 0x64 ('d') | + +---------------+ + Total 33 bytes (24 byte header, and 9 bytes value) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x00 +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Status (6,7) : 0x0001 +Total body (8-11) : 0x00000009 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : None +Value (24-32): The textual string "Not found" + +
+
+
+ +
+ + Request: + + + + MUST NOT have extras. + MUST have key. + MUST NOT have value. + + + + + Response (if found): + + + + MUST have extras. + MAY have key. + MAY have value. + + + + + + 4 byte flags + + + + +
+ Extra data for the get commands: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Flags | + +---------------+---------------+---------------+---------------+ + + Total 4 bytes + +
+
+ + + The get command gets a single key. The getq command is both mum + on cache miss and quiet, holding its response until a non-quiet + command is issued. Getk and getkq differs from get and getq by + adding the key into the response packet. + + + + You're not guaranteed a response to a getq/getkq cache hit until + you send a non-getq/getkq command later, which uncorks the + server and bundles up IOs to send to the client in one go. + + + + Clients should implement multi-get (still important for + reducing network roundtrips!) as n pipelined requests, the + first n-1 being getq/getkq, the last being a regular + get/getk. That way you're guaranteed to get a response, and + you know when the server's done. You can also do the naive + thing and send n pipelined get/getks, but then you could potentially + get back a lot of "NOT_FOUND" error code packets. + Alternatively, you can send 'n' getq/getkqs, followed by a + 'noop' command. + + +
+ + To request the data associated with the key "Hello" the + following fields must be specified in the packet. + +
+ get request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x48 ('H') | 0x65 ('e') | 0x6c ('l') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 28| 0x6f ('o') | + +---------------+ + + Total 29 bytes (24 byte header, and 5 bytes key) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x00 +Key length (2,3) : 0x0005 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000005 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key (24-29): The textual string: "Hello" +Value : None + +
+ If the item exist on the server the following packet is returned, + otherwise a packet with status code != 0 will be returned (see + Introduction) + +
+ get/getq response: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x04 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x09 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x01 | + +---------------+---------------+---------------+---------------+ + 24| 0xde | 0xad | 0xbe | 0xef | + +---------------+---------------+---------------+---------------+ + 28| 0x57 ('W') | 0x6f ('o') | 0x72 ('r') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 32| 0x64 ('d') | + +---------------+ + + Total 33 bytes (24 byte header, 4 byte extras and 5 byte value) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x00 +Key length (2,3) : 0x0000 +Extra length (4) : 0x04 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000009 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000001 +Extras : + Flags (24-27): 0xdeadbeef +Key : None +Value (28-32): The textual string "World" + +
+
+ getk/getkq response: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 4| 0x04 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x09 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x01 | + +---------------+---------------+---------------+---------------+ + 24| 0xde | 0xad | 0xbe | 0xef | + +---------------+---------------+---------------+---------------+ + 28| 0x48 ('H') | 0x65 ('e') | 0x6c ('l') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 32| 0x6f ('o') | 0x57 ('W') | 0x6f ('o') | 0x72 ('r') | + +---------------+---------------+---------------+---------------+ + 36| 0x6c ('l') | 0x64 ('d') | + +---------------+---------------+ + + Total 38 bytes (24 byte header, 4 byte extras, 5 byte key + and 5 byte value) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x00 +Key length (2,3) : 0x0005 +Extra length (4) : 0x04 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000009 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000001 +Extras : + Flags (24-27): 0xdeadbeef +Key (28-32): The textual string: "Hello" +Value (33-37): The textual string: "World" + +
+
+
+ +
+ + + MUST have extras. + MUST have key. + MUST have value. + + + + + + 4 byte flags + 4 byte expiration time + + + +
+ Extra data for set/add/replace: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Flags | + +---------------+---------------+---------------+---------------+ + 4| Expiration | + +---------------+---------------+---------------+---------------+ + Total 8 bytes + +
+ + + If the Data Version Check (CAS) is nonzero, the requested + operation MUST only succeed if the item exists and has a CAS value + identical to the provided value. + + + + Add MUST fail if the item already exist. + + + + Replace MUST fail if the item doesn't exist. + + + + Set should store the data unconditionally if the item exists + or not. + + + + Quiet mutations only return responses on failure. Success + is considered the general case and is suppressed when in + quiet mode, but errors should not be allowed to go + unnoticed. + + +
+ The following figure shows an add-command for + + Key: "Hello" + Value: "World" + Flags: 0xdeadbeef + Expiry: in two hours + + +
+ Add request: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x02 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 4| 0x08 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x12 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0xde | 0xad | 0xbe | 0xef | + +---------------+---------------+---------------+---------------+ + 28| 0x00 | 0x00 | 0x0e | 0x10 | + +---------------+---------------+---------------+---------------+ + 32| 0x48 ('H') | 0x65 ('e') | 0x6c ('l') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 36| 0x6f ('o') | 0x57 ('W') | 0x6f ('o') | 0x72 ('r') | + +---------------+---------------+---------------+---------------+ + 40| 0x6c ('l') | 0x64 ('d') | + +---------------+---------------+ + + Total 42 bytes (24 byte header, 8 byte extras, 5 byte key and + 5 byte value) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x02 +Key length (2,3) : 0x0005 +Extra length (4) : 0x08 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000012 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : + Flags (24-27): 0xdeadbeef + Expiry (28-31): 0x00000e10 +Key (32-36): The textual string "Hello" +Value (37-41): The textual string "World" + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. If the command + succeeds, the CAS value for the item is returned in the CAS-field + of the packet. + +
+ Successful add response: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x02 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x01 | + +---------------+---------------+---------------+---------------+ + + Total 24 bytes + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x02 +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000000 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000001 +Extras : None +Key : None +Value : None + +
+
+
+ +
+ + + MUST NOT have extras. + MUST have key. + MUST NOT have value. + + + + + Delete the item with the specific key. + + +
+ The following figure shows a delete message for the + item "Hello". +
+ Delete request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x04 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x48 ('H') | 0x65 ('e') | 0x6c ('l') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 28| 0x6f ('o') | + +---------------+ + + Total 29 bytes (24 byte header, 5 byte value) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x04 +Key length (2,3) : 0x0005 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000005 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : The textual string "Hello" +Value : None + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. + +
+
+ +
+ + + MUST have extras. + MUST have key. + MUST NOT have value. + + + + + + 8 byte value to add / subtract + 8 byte initial value (unsigned) + 4 byte expiration time + + +
+ Extra data for incr/decr: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Amount to add | + | | + +---------------+---------------+---------------+---------------+ + 8| Initial value | + | | + +---------------+---------------+---------------+---------------+ + 16| Expiration | + +---------------+---------------+---------------+---------------+ + Total 20 bytes + +
+ + + These commands will either add or remove the specified + amount to the requested counter. + + + If the counter does not exist, one of two things may happen: + + + + If the expiration value is all one-bits (0xffffffff), the + operation will fail with NOT_FOUND. + For all other expiration values, the operation will succeed + by seeding the value for this key with the provided initial + value to expire with the provided expiration time. The flags + will be set to zero. + + +
+ incr/decr response body: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 64-bit unsigned response. | + | | + +---------------+---------------+---------------+---------------+ + Total 8 bytes + +
+
+ The following figure shows an incr-command for + + Key: "counter" + Delta: 0x01 + Initial: 0x00 + Expiry: in two hours + + +
+ Increment request: + + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x05 | 0x00 | 0x07 | + +---------------+---------------+---------------+---------------+ + 4| 0x14 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x1b | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 28| 0x00 | 0x00 | 0x00 | 0x01 | + +---------------+---------------+---------------+---------------+ + 32| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 36| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 40| 0x00 | 0x00 | 0x0e | 0x10 | + +---------------+---------------+---------------+---------------+ + 44| 0x63 ('c') | 0x6f ('o') | 0x75 ('u') | 0x6e ('n') | + +---------------+---------------+---------------+---------------+ + 48| 0x74 ('t') | 0x65 ('e') | 0x72 ('r') | + +---------------+---------------+---------------+ + Total 51 bytes (24 byte header, 20 byte extras, 7 byte key) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x05 +Key length (2,3) : 0x0007 +Extra length (4) : 0x14 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x0000001b +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : + delta (24-31): 0x0000000000000001 + initial (32-39): 0x0000000000000000 + exipration (40-43): 0x00000e10 +Key : Textual string "counter" +Value : None + +
+ + If the key doesn't exist, the server will respond with the + initial value. If not the incremented value will be returned. + Let's assume that the key didn't exist, so the initial value + is returned. + +
+ Increment response: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x05 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x08 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 24| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 28| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + Total 32 bytes (24 byte header, 8 byte value) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x05 +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000008 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000005 +Extras : None +Key : None +Value : 0x0000000000000000 + +
+
+
+ +
+ + + MUST NOT have extras. + MUST NOT have key. + MUST NOT have value. + + + + + Close the connection to the server. + + +
+
+ Quit request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x07 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x07 +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000000 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : None +Value : None + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. The server will + then close the connection. + +
+
+ +
+ + + MAY have extras. + MUST NOT have key. + MUST NOT have value. + + + + + + 4 byte expiration time + + +
+ Extra data for flush: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| Expiration | + +---------------+---------------+---------------+---------------+ + Total 4 bytes + +
+ + Flush the items in the cache now or some time in the future as + specified by the expiration field. See the documentation of the + textual protocol for the full description on how to specify the + expiration time. + +
+ + To flush the cache (delete all items) in two hours, the set + the following values in the request + +
+ Flush request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x08 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x04 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x04 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x00 | 0x00 | 0x0e | 0x10 | + +---------------+---------------+---------------+---------------+ + Total 28 bytes (24 byte header, 4 byte body) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x08 +Key length (2,3) : 0x0000 +Extra length (4) : 0x04 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000004 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : + Expiry (24-27): 0x000e10 +Key : None +Value : None + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. + +
+
+
+ + + MUST NOT have extras. + MUST NOT have key. + MUST NOT have value. + + + + + Used as a keep alive. Flushes outstanding getq/getkq's. + +
+
+ Noop request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x0a | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x0a +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000000 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : None +Value : None + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. + +
+
+ +
+ + + MUST NOT have extras. + MUST NOT have key. + MUST NOT have value. + + + + + Request the server version. + + + The server responds with a packet containing the version string + in the body with the following format: "x.y.z" + +
+
+ Version request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x0b | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x0b +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000000 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None + +
+
+ Version response: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x0b | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x31 ('1') | 0x2e ('.') | 0x33 ('3') | 0x2e ('.') | + +---------------+---------------+---------------+---------------+ + 28| 0x31 ('1') | + +---------------+ + Total 29 bytes (24 byte header, 5 byte body) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x0b +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000005 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : None +Value : Textual string "1.3.1" + +
+
+
+ +
+ + + MUST NOT have extras. + MUST have key. + MUST have value. + + + + + These commands will either append or prepend the specified + value to the requested key. + + +
+ The following example appends '!' to the 'Hello' key. +
+ Append request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x0e | 0x00 | 0x05 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x06 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x48 ('H') | 0x65 ('e') | 0x6c ('l') | 0x6c ('l') | + +---------------+---------------+---------------+---------------+ + 28| 0x6f ('o') | 0x21 ('!') | + +---------------+---------------+ + Total 30 bytes (24 byte header, 5 byte key, 1 byte value) + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x0e +Key length (2,3) : 0x0005 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000006 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key (24-28): The textual string "Hello" +Value (29) : "!" + +
+ + The response-packet contains no extra data, and the result of the + operation is signaled through the status code. + +
+
+ +
+ + + MUST NOT have extras. + MAY have key. + MUST NOT have value. + + + + + Request server statistics. Without a key specified the server will + respond with a "default" set of statistics information. Each piece + of statistical information is returned in its own packet (key + contains the name of the statistical item and the body contains the + value in ASCII format). The sequence of return packets is terminated + with a packet that contains no key and no value. + +
+ The following example requests all statistics from the server +
+ Stat request: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x80 | 0x10 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + Total 24 bytes + +Field (offset) (value) +Magic (0) : 0x80 +Opcode (1) : 0x10 +Key length (2,3) : 0x0000 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Reserved (6,7) : 0x0000 +Total body (8-11) : 0x00000000 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Extras : None +Key : None +Value : None + +
+ + The server will send each value in a separate packet with + an "empty" packet (no key / no value) to terminate the sequence. + Each of the response packets look like the following example: + +
+ Stat response: + + Byte/ 0 | 1 | 2 | 3 | + / | | | | + |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| + +---------------+---------------+---------------+---------------+ + 0| 0x81 | 0x10 | 0x00 | 0x03 | + +---------------+---------------+---------------+---------------+ + 4| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 8| 0x00 | 0x00 | 0x00 | 0x07 | + +---------------+---------------+---------------+---------------+ + 12| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 16| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 20| 0x00 | 0x00 | 0x00 | 0x00 | + +---------------+---------------+---------------+---------------+ + 24| 0x70 ('p') | 0x69 ('i') | 0x64 ('d') | 0x33 ('3') | + +---------------+---------------+---------------+---------------+ + 28| 0x30 ('0') | 0x37 ('7') | 0x38 ('8') | + +---------------+---------------+---------------+ + Total 31 bytes (24 byte header, 3 byte key, 4 byte body) + +Field (offset) (value) +Magic (0) : 0x81 +Opcode (1) : 0x10 +Key length (2,3) : 0x0003 +Extra length (4) : 0x00 +Data type (5) : 0x00 +Status (6,7) : 0x0000 +Total body (8-11) : 0x00000007 +Opaque (12-15): 0x00000000 +CAS (16-23): 0x0000000000000000 +Exstras : None +Key : The textual string "pid" +Value : The textual string "3078" + +
+
+
+
+
+ + Memcache has no authentication or security layers whatsoever. It is + RECOMMENDED that memcache be deployed strictly on closed, protected, + back-end networks within a single data center, within a single cluster of + servers, or even on a single host, providing shared caching for multiple + applications. Memcache MUST NOT be made available on a public network. + +
+ +
+ + + + + + LJ NEEDS MOAR SPEED + + Danga Interactive + + + + http://www.livejournal.com/ + + + + + + +
+ + Thanks to Brad Fitzpatrick, Anatoly Vorobey, Steven Grimm, and Dustin + Sallings, for their work on the memcached server. + + + + Thanks to Sean Chittenden, Jonathan Steinert, Brian Aker, Evan Martin, + Nathan Neulinger, Eric Hodel, Michael Johnson, Paul Querna, Jamie + McCarthy, Philip Neustrom, Andrew O'Brien, Josh Rotenberg, Robin H. + Johnson, Tim Yardley, Paolo Borelli, Eli Bingham, Jean-Francois + Bustarret, Paul G, Paul Lindner, Alan Kasindorf, Chris Goffinet, Tomash + Brechko, and others for their work reporting bugs and maintaining + memcached client libraries and bindings in many languages. + +
+
+ +
+ diff --git a/memcached/doc/protocol.txt b/memcached/doc/protocol.txt new file mode 100644 index 00000000..1a844f0a --- /dev/null +++ b/memcached/doc/protocol.txt @@ -0,0 +1,774 @@ +Protocol +-------- + +Clients of memcached communicate with server through TCP connections. +(A UDP interface is also available; details are below under "UDP +protocol.") A given running memcached server listens on some +(configurable) port; clients connect to that port, send commands to +the server, read responses, and eventually close the connection. + +There is no need to send any command to end the session. A client may +just close the connection at any moment it no longer needs it. Note, +however, that clients are encouraged to cache their connections rather +than reopen them every time they need to store or retrieve data. This +is because memcached is especially designed to work very efficiently +with a very large number (many hundreds, more than a thousand if +necessary) of open connections. Caching connections will eliminate the +overhead associated with establishing a TCP connection (the overhead +of preparing for a new connection on the server side is insignificant +compared to this). + +There are two kinds of data sent in the memcache protocol: text lines +and unstructured data. Text lines are used for commands from clients +and responses from servers. Unstructured data is sent when a client +wants to store or retrieve data. The server will transmit back +unstructured data in exactly the same way it received it, as a byte +stream. The server doesn't care about byte order issues in +unstructured data and isn't aware of them. There are no limitations on +characters that may appear in unstructured data; however, the reader +of such data (either a client or a server) will always know, from a +preceding text line, the exact length of the data block being +transmitted. + +Text lines are always terminated by \r\n. Unstructured data is _also_ +terminated by \r\n, even though \r, \n or any other 8-bit characters +may also appear inside the data. Therefore, when a client retrieves +data from a server, it must use the length of the data block (which it +will be provided with) to determine where the data block ends, and not +the fact that \r\n follows the end of the data block, even though it +does. + +Keys +---- + +Data stored by memcached is identified with the help of a key. A key +is a text string which should uniquely identify the data for clients +that are interested in storing and retrieving it. Currently the +length limit of a key is set at 250 characters (of course, normally +clients wouldn't need to use such long keys); the key must not include +control characters or whitespace. + +Commands +-------- + +There are three types of commands. + +Storage commands (there are six: "set", "add", "replace", "append" +"prepend" and "cas") ask the server to store some data identified by a +key. The client sends a command line, and then a data block; after +that the client expects one line of response, which will indicate +success or failure. + +Retrieval commands (there are two: "get" and "gets") ask the server to +retrieve data corresponding to a set of keys (one or more keys in one +request). The client sends a command line, which includes all the +requested keys; after that for each item the server finds it sends to +the client one response line with information about the item, and one +data block with the item's data; this continues until the server +finished with the "END" response line. + +All other commands don't involve unstructured data. In all of them, +the client sends one command line, and expects (depending on the +command) either one line of response, or several lines of response +ending with "END" on the last line. + +A command line always starts with the name of the command, followed by +parameters (if any) delimited by whitespace. Command names are +lower-case and are case-sensitive. + +Expiration times +---------------- + +Some commands involve a client sending some kind of expiration time +(relative to an item or to an operation requested by the client) to +the server. In all such cases, the actual value sent may either be +Unix time (number of seconds since January 1, 1970, as a 32-bit +value), or a number of seconds starting from current time. In the +latter case, this number of seconds may not exceed 60*60*24*30 (number +of seconds in 30 days); if the number sent by a client is larger than +that, the server will consider it to be real Unix time value rather +than an offset from current time. + + +Error strings +------------- + +Each command sent by a client may be answered with an error string +from the server. These error strings come in three types: + +- "ERROR\r\n" + + means the client sent a nonexistent command name. + +- "CLIENT_ERROR \r\n" + + means some sort of client error in the input line, i.e. the input + doesn't conform to the protocol in some way. is a + human-readable error string. + +- "SERVER_ERROR \r\n" + + means some sort of server error prevents the server from carrying + out the command. is a human-readable error string. In cases + of severe server errors, which make it impossible to continue + serving the client (this shouldn't normally happen), the server will + close the connection after sending the error line. This is the only + case in which the server closes a connection to a client. + + +In the descriptions of individual commands below, these error lines +are not again specifically mentioned, but clients must allow for their +possibility. + + +Storage commands +---------------- + +First, the client sends a command line which looks like this: + + [noreply]\r\n +cas [noreply]\r\n + +- is "set", "add", "replace", "append" or "prepend" + + "set" means "store this data". + + "add" means "store this data, but only if the server *doesn't* already + hold data for this key". + + "replace" means "store this data, but only if the server *does* + already hold data for this key". + + "append" means "add this data to an existing key after existing data". + + "prepend" means "add this data to an existing key before existing data". + + The append and prepend commands do not accept flags or exptime. + They update existing data portions, and ignore new flag and exptime + settings. + + "cas" is a check and set operation which means "store this data but + only if no one else has updated since I last fetched it." + +- is the key under which the client asks to store the data + +- is an arbitrary 16-bit unsigned integer (written out in + decimal) that the server stores along with the data and sends back + when the item is retrieved. Clients may use this as a bit field to + store data-specific information; this field is opaque to the server. + Note that in memcached 1.2.1 and higher, flags may be 32-bits, instead + of 16, but you might want to restrict yourself to 16 bits for + compatibility with older versions. + +- is expiration time. If it's 0, the item never expires + (although it may be deleted from the cache to make place for other + items). If it's non-zero (either Unix time or offset in seconds from + current time), it is guaranteed that clients will not be able to + retrieve this item after the expiration time arrives (measured by + server time). + +- is the number of bytes in the data block to follow, *not* + including the delimiting \r\n. may be zero (in which case + it's followed by an empty data block). + +- is a unique 64-bit value of an existing entry. + Clients should use the value returned from the "gets" command + when issuing "cas" updates. + +- "noreply" optional parameter instructs the server to not send the + reply. NOTE: if the request line is malformed, the server can't + parse "noreply" option reliably. In this case it may send the error + to the client, and not reading it on the client side will break + things. Client should construct only valid requests. + +After this line, the client sends the data block: + +\r\n + +- is a chunk of arbitrary 8-bit data of length + from the previous line. + +After sending the command line and the data blockm the client awaits +the reply, which may be: + +- "STORED\r\n", to indicate success. + +- "NOT_STORED\r\n" to indicate the data was not stored, but not +because of an error. This normally means that the +condition for an "add" or a "replace" command wasn't met. + +- "EXISTS\r\n" to indicate that the item you are trying to store with +a "cas" command has been modified since you last fetched it. + +- "NOT_FOUND\r\n" to indicate that the item you are trying to store +with a "cas" command did not exist. + + +Retrieval command: +------------------ + +The retrieval commands "get" and "gets" operates like this: + +get *\r\n +gets *\r\n + +- * means one or more key strings separated by whitespace. + +After this command, the client expects zero or more items, each of +which is received as a text line followed by a data block. After all +the items have been transmitted, the server sends the string + +"END\r\n" + +to indicate the end of response. + +Each item sent by the server looks like this: + +VALUE []\r\n +\r\n + +- is the key for the item being sent + +- is the flags value set by the storage command + +- is the length of the data block to follow, *not* including + its delimiting \r\n + +- is a unique 64-bit integer that uniquely identifies + this specific item. + +- is the data for this item. + +If some of the keys appearing in a retrieval request are not sent back +by the server in the item list this means that the server does not +hold items with such keys (because they were never stored, or stored +but deleted to make space for more items, or expired, or explicitly +deleted by a client). + + +Deletion +-------- + +The command "delete" allows for explicit deletion of items: + +delete [noreply]\r\n + +- is the key of the item the client wishes the server to delete + +- "noreply" optional parameter instructs the server to not send the + reply. See the note in Storage commands regarding malformed + requests. + +The response line to this command can be one of: + +- "DELETED\r\n" to indicate success + +- "NOT_FOUND\r\n" to indicate that the item with this key was not + found. + +See the "flush_all" command below for immediate invalidation +of all existing items. + + +Increment/Decrement +------------------- + +Commands "incr" and "decr" are used to change data for some item +in-place, incrementing or decrementing it. The data for the item is +treated as decimal representation of a 64-bit unsigned integer. If +the current data value does not conform to such a representation, the +incr/decr commands return an error (memcached <= 1.2.6 treated the +bogus value as if it were 0, leading to confusing). Also, the item +must already exist for incr/decr to work; these commands won't pretend +that a non-existent key exists with value 0; instead, they will fail. + +The client sends the command line: + +incr [noreply]\r\n + +or + +decr [noreply]\r\n + +- is the key of the item the client wishes to change + +- is the amount by which the client wants to increase/decrease +the item. It is a decimal representation of a 64-bit unsigned integer. + +- "noreply" optional parameter instructs the server to not send the + reply. See the note in Storage commands regarding malformed + requests. + +The response will be one of: + +- "NOT_FOUND\r\n" to indicate the item with this value was not found + +- \r\n , where is the new value of the item's data, + after the increment/decrement operation was carried out. + +Note that underflow in the "decr" command is caught: if a client tries +to decrease the value below 0, the new value will be 0. Overflow in +the "incr" command will wrap around the 64 bit mark. + +Note also that decrementing a number such that it loses length isn't +guaranteed to decrement its returned length. The number MAY be +space-padded at the end, but this is purely an implementation +optimization, so you also shouldn't rely on that. + +Touch +----- + +The "touch" command is used to update the expiration time of an existing item +without fetching it. + +touch [noreply]\r\n + +- is the key of the item the client wishes the server to delete + +- is expiration time. Works the same as with the update commands + (set/add/etc). This replaces the existing expiration time. If an existing + item were to expire in 10 seconds, but then was touched with an + expiration time of "20", the item would then expire in 20 seconds. + +- "noreply" optional parameter instructs the server to not send the + reply. See the note in Storage commands regarding malformed + requests. + +The response line to this command can be one of: + +- "TOUCHED\r\n" to indicate success + +- "NOT_FOUND\r\n" to indicate that the item with this key was not + found. + +Slabs Reassign +-------------- + +NOTE: This command is subject to change as of this writing. + +The slabs reassign command is used to redistribute memory once a running +instance has hit its limit. It might be desireable to have memory laid out +differently than was automatically assigned after the server started. + +slabs reassign \r\n + +- is an id number for the slab class to steal a page from + +- is an id number for the slab class to move a page to + +The response line could be one of: + +- "OK" to indicate the page has been scheduled to move + +- "BUSY [message]" to indicate a page is already being processed, try again + later. + +- "BADCLASS [message]" a bad class id was specified + +- "NOSPARE [message]" source class has no spare pages + +- "NOTFULL [message]" dest class must be full to move new pages to it + +- "UNSAFE [message]" source class cannot move a page right now + +- "SAME [message]" must specify different source/dest ids. + +Slabs Automove +-------------- + +NOTE: This command is subject to change as of this writing. + +The slabs automove command enables a background thread which decides on its +own when to move memory between slab classes. Its implementation and options +will likely be in flux for several versions. See the wiki/mailing list for +more details. + +The automover can be enabled or disabled at runtime with this command. + +slabs automove <1|0> + +- 1|0 is the indicator on whether to enable the slabs automover or not. + +The response should always be "OK\r\n" + +Statistics +---------- + +The command "stats" is used to query the server about statistics it +maintains and other internal data. It has two forms. Without +arguments: + +stats\r\n + +it causes the server to output general-purpose statistics and +settings, documented below. In the other form it has some arguments: + +stats \r\n + +Depending on , various internal data is sent by the server. The +kinds of arguments and the data sent are not documented in this vesion +of the protocol, and are subject to change for the convenience of +memcache developers. + + +General-purpose statistics +-------------------------- + +Upon receiving the "stats" command without arguments, the server sents +a number of lines which look like this: + +STAT \r\n + +The server terminates this list with the line + +END\r\n + +In each line of statistics, is the name of this statistic, and + is the data. The following is the list of all names sent in +response to the "stats" command, together with the type of the value +sent for this name, and the meaning of the value. + +In the type column below, "32u" means a 32-bit unsigned integer, "64u" +means a 64-bit unsigned integer. '32u.32u' means two 32-bit unsigned +integers separated by a colon (treat this as a floating point number). + +|-----------------------+---------+-------------------------------------------| +| Name | Type | Meaning | +|-----------------------+---------+-------------------------------------------| +| pid | 32u | Process id of this server process | +| uptime | 32u | Number of secs since the server started | +| time | 32u | current UNIX time according to the server | +| version | string | Version string of this server | +| pointer_size | 32 | Default size of pointers on the host OS | +| | | (generally 32 or 64) | +| rusage_user | 32u.32u | Accumulated user time for this process | +| | | (seconds:microseconds) | +| rusage_system | 32u.32u | Accumulated system time for this process | +| | | (seconds:microseconds) | +| curr_items | 32u | Current number of items stored | +| total_items | 32u | Total number of items stored since | +| | | the server started | +| bytes | 64u | Current number of bytes used | +| | | to store items | +| curr_connections | 32u | Number of open connections | +| total_connections | 32u | Total number of connections opened since | +| | | the server started running | +| connection_structures | 32u | Number of connection structures allocated | +| | | by the server | +| reserved_fds | 32u | Number of misc fds used internally | +| cmd_get | 64u | Cumulative number of retrieval reqs | +| cmd_set | 64u | Cumulative number of storage reqs | +| cmd_flush | 64u | Cumulative number of flush reqs | +| cmd_touch | 64u | Cumulative number of touch reqs | +| get_hits | 64u | Number of keys that have been requested | +| | | and found present | +| get_misses | 64u | Number of items that have been requested | +| | | and not found | +| delete_misses | 64u | Number of deletions reqs for missing keys | +| delete_hits | 64u | Number of deletion reqs resulting in | +| | | an item being removed. | +| incr_misses | 64u | Number of incr reqs against missing keys. | +| incr_hits | 64u | Number of successful incr reqs. | +| decr_misses | 64u | Number of decr reqs against missing keys. | +| decr_hits | 64u | Number of successful decr reqs. | +| cas_misses | 64u | Number of CAS reqs against missing keys. | +| cas_hits | 64u | Number of successful CAS reqs. | +| cas_badval | 64u | Number of CAS reqs for which a key was | +| | | found, but the CAS value did not match. | +| touch_hits | 64u | Numer of keys that have been touched with | +| | | a new expiration time | +| touch_misses | 64u | Numer of items that have been touched and | +| | | not found | +| auth_cmds | 64u | Number of authentication commands | +| | | handled, success or failure. | +| auth_errors | 64u | Number of failed authentications. | +| evictions | 64u | Number of valid items removed from cache | +| | | to free memory for new items | +| reclaimed | 64u | Number of times an entry was stored using | +| | | memory from an expired entry | +| bytes_read | 64u | Total number of bytes read by this server | +| | | from network | +| bytes_written | 64u | Total number of bytes sent by this server | +| | | to network | +| limit_maxbytes | 32u | Number of bytes this server is allowed to | +| | | use for storage. | +| threads | 32u | Number of worker threads requested. | +| | | (see doc/threads.txt) | +| conn_yields | 64u | Number of times any connection yielded to | +| | | another due to hitting the -R limit. | +| hash_power_level | 32u | Current size multiplier for hash table | +| hash_bytes | 64u | Bytes currently used by hash tables | +| hash_is_expanding | bool | Indicates if the hash table is being | +| | | grown to a new size | +| expired_unfetched | 64u | Items pulled from LRU that were never | +| | | touched by get/incr/append/etc before | +| | | expiring | +| evicted_unfetched | 64u | Items evicted from LRU that were never | +| | | touched by get/incr/append/etc. | +| slab_reassign_running | bool | If a slab page is being moved | +| slabs_moved | 64u | Total slab pages moved | +|-----------------------+---------+-------------------------------------------| + +Settings statistics +------------------- +CAVEAT: This section describes statistics which are subject to change in the +future. + +The "stats" command with the argument of "settings" returns details of +the settings of the running memcached. This is primarily made up of +the results of processing commandline options. + +Note that these are not guaranteed to return in any specific order and +this list may not be exhaustive. Otherwise, this returns like any +other stats command. + +|-------------------+----------+----------------------------------------------| +| Name | Type | Meaning | +|-------------------+----------+----------------------------------------------| +| maxbytes | size_t | Maximum number of bytes allows in this cache | +| maxconns | 32 | Maximum number of clients allowed. | +| tcpport | 32 | TCP listen port. | +| udpport | 32 | UDP listen port. | +| inter | string | Listen interface. | +| verbosity | 32 | 0 = none, 1 = some, 2 = lots | +| oldest | 32u | Age of the oldest honored object. | +| evictions | on/off | When off, LRU evictions are disabled. | +| domain_socket | string | Path to the domain socket (if any). | +| umask | 32 (oct) | umask for the creation of the domain socket. | +| growth_factor | float | Chunk size growth factor. | +| chunk_size | 32 | Minimum space allocated for key+value+flags. | +| num_threads | 32 | Number of threads (including dispatch). | +| stat_key_prefix | char | Stats prefix separator character. | +| detail_enabled | bool | If yes, stats detail is enabled. | +| reqs_per_event | 32 | Max num IO ops processed within an event. | +| cas_enabled | bool | When no, CAS is not enabled for this server. | +| tcp_backlog | 32 | TCP listen backlog. | +| auth_enabled_sasl | yes/no | SASL auth requested and enabled. | +| item_size_max | size_t | maximum item size | +| maxconns_fast | bool | If fast disconnects are enabled | +| hashpower_init | 32 | Starting size multiplier for hash table | +| slab_reassign | bool | Whether slab page reassignment is allowed | +| slab_automove | bool | Whether slab page automover is enabled | +|-------------------+----------+----------------------------------------------| + + +Item statistics +--------------- +CAVEAT: This section describes statistics which are subject to change in the +future. + +The "stats" command with the argument of "items" returns information about +item storage per slab class. The data is returned in the format: + +STAT items:: \r\n + +The server terminates this list with the line + +END\r\n + +The slabclass aligns with class ids used by the "stats slabs" command. Where +"stats slabs" describes size and memory usage, "stats items" shows higher +level information. + +The following item values are defined as of writing. + +Name Meaning +------------------------------ +number Number of items presently stored in this class. Expired + items are not automatically excluded. +age Age of the oldest item in the LRU. +evicted Number of times an item had to be evicted from the LRU + before it expired. +evicted_nonzero Number of times an item which had an explicit expire + time set had to be evicted from the LRU before it + expired. +evicted_time Seconds since the last access for the most recent item + evicted from this class. Use this to judge how + recently active your evicted data is. +outofmemory Number of times the underlying slab class was unable to + store a new item. This means you are running with -M or + an eviction failed. +tailrepairs Number of times we self-healed a slab with a refcount + leak. If this counter is increasing a lot, please + report your situation to the developers. +reclaimed Number of times an entry was stored using memory from + an expired entry. +expired_unfetched Number of expired items reclaimed from the LRU which + were never touched after being set. +evicted_unfetched Number of valid items evicted from the LRU which were + never touched after being set. + +Note this will only display information about slabs which exist, so an empty +cache will return an empty set. + + +Item size statistics +-------------------- +CAVEAT: This section describes statistics which are subject to change in the +future. + +The "stats" command with the argument of "sizes" returns information about the +general size and count of all items stored in the cache. +WARNING: This command WILL lock up your cache! It iterates over *every item* +and examines the size. While the operation is fast, if you have many items +you could prevent memcached from serving requests for several seconds. + +The data is returned in the following format: + + \r\n + +The server terminates this list with the line + +END\r\n + +'size' is an approximate size of the item, within 32 bytes. +'count' is the amount of items that exist within that 32-byte range. + +This is essentially a display of all of your items if there was a slab class +for every 32 bytes. You can use this to determine if adjusting the slab growth +factor would save memory overhead. For example: generating more classes in the +lower range could allow items to fit more snugly into their slab classes, if +most of your items are less than 200 bytes in size. + + +Slab statistics +--------------- +CAVEAT: This section describes statistics which are subject to change in the +future. + +The "stats" command with the argument of "slabs" returns information about +each of the slabs created by memcached during runtime. This includes per-slab +information along with some totals. The data is returned in the format: + +STAT : \r\n +STAT \r\n + +The server terminates this list with the line + +END\r\n + +|-----------------+----------------------------------------------------------| +| Name | Meaning | +|-----------------+----------------------------------------------------------| +| chunk_size | The amount of space each chunk uses. One item will use | +| | one chunk of the appropriate size. | +| chunks_per_page | How many chunks exist within one page. A page by | +| | default is less than or equal to one megabyte in size. | +| | Slabs are allocated by page, then broken into chunks. | +| total_pages | Total number of pages allocated to the slab class. | +| total_chunks | Total number of chunks allocated to the slab class. | +| get_hits | Total number of get requests serviced by this class. | +| cmd_set | Total number of set requests storing data in this class. | +| delete_hits | Total number of successful deletes from this class. | +| incr_hits | Total number of incrs modifying this class. | +| decr_hits | Total number of decrs modifying this class. | +| cas_hits | Total number of CAS commands modifying this class. | +| cas_badval | Total number of CAS commands that failed to modify a | +| | value due to a bad CAS id. | +| touch_hits | Total number of touches serviced by this class. | +| used_chunks | How many chunks have been allocated to items. | +| free_chunks | Chunks not yet allocated to items, or freed via delete. | +| free_chunks_end | Number of free chunks at the end of the last allocated | +| | page. | +| mem_requested | Number of bytes requested to be stored in this slab[*]. | +| active_slabs | Total number of slab classes allocated. | +| total_malloced | Total amount of memory allocated to slab pages. | +|-----------------+----------------------------------------------------------| + +* Items are stored in a slab that is the same size or larger than the + item. mem_requested shows the size of all items within a + slab. (total_chunks * chunk_size) - mem_requested shows memory + wasted in a slab class. If you see a lot of waste, consider tuning + the slab factor. + +Other commands +-------------- + +"flush_all" is a command with an optional numeric argument. It always +succeeds, and the server sends "OK\r\n" in response (unless "noreply" +is given as the last parameter). Its effect is to invalidate all +existing items immediately (by default) or after the expiration +specified. After invalidation none of the items will be returned in +response to a retrieval command (unless it's stored again under the +same key *after* flush_all has invalidated the items). flush_all +doesn't actually free all the memory taken up by existing items; that +will happen gradually as new items are stored. The most precise +definition of what flush_all does is the following: it causes all +items whose update time is earlier than the time at which flush_all +was set to be executed to be ignored for retrieval purposes. + +The intent of flush_all with a delay, was that in a setting where you +have a pool of memcached servers, and you need to flush all content, +you have the option of not resetting all memcached servers at the +same time (which could e.g. cause a spike in database load with all +clients suddenly needing to recreate content that would otherwise +have been found in the memcached daemon). + +The delay option allows you to have them reset in e.g. 10 second +intervals (by passing 0 to the first, 10 to the second, 20 to the +third, etc. etc.). + + +"version" is a command with no arguments: + +version\r\n + +In response, the server sends + +"VERSION \r\n", where is the version string for the +server. + +"verbosity" is a command with a numeric argument. It always succeeds, +and the server sends "OK\r\n" in response (unless "noreply" is given +as the last parameter). Its effect is to set the verbosity level of +the logging output. + +"quit" is a command with no arguments: + +quit\r\n + +Upon receiving this command, the server closes the +connection. However, the client may also simply close the connection +when it no longer needs it, without issuing this command. + + +UDP protocol +------------ + +For very large installations where the number of clients is high enough +that the number of TCP connections causes scaling difficulties, there is +also a UDP-based interface. The UDP interface does not provide guaranteed +delivery, so should only be used for operations that aren't required to +succeed; typically it is used for "get" requests where a missing or +incomplete response can simply be treated as a cache miss. + +Each UDP datagram contains a simple frame header, followed by data in the +same format as the TCP protocol described above. In the current +implementation, requests must be contained in a single UDP datagram, but +responses may span several datagrams. (The only common requests that would +span multiple datagrams are huge multi-key "get" requests and "set" +requests, both of which are more suitable to TCP transport for reliability +reasons anyway.) + +The frame header is 8 bytes long, as follows (all values are 16-bit integers +in network byte order, high byte first): + +0-1 Request ID +2-3 Sequence number +4-5 Total number of datagrams in this message +6-7 Reserved for future use; must be 0 + +The request ID is supplied by the client. Typically it will be a +monotonically increasing value starting from a random seed, but the client +is free to use whatever request IDs it likes. The server's response will +contain the same ID as the incoming request. The client uses the request ID +to differentiate between responses to outstanding requests if there are +several pending from the same server; any datagrams with an unknown request +ID are probably delayed responses to an earlier request and should be +discarded. + +The sequence number ranges from 0 to n-1, where n is the total number of +datagrams in the message. The client should concatenate the payloads of the +datagrams for a given response in sequence number order; the resulting byte +stream will contain a complete response in the same format as the TCP +protocol (including terminating \r\n sequences). diff --git a/memcached/doc/readme.txt b/memcached/doc/readme.txt new file mode 100644 index 00000000..6a9d0c8d --- /dev/null +++ b/memcached/doc/readme.txt @@ -0,0 +1 @@ +To build the documentation you need xml2rfc ( http://xml.resource.org/ ). diff --git a/memcached/doc/xml2rfc/reference.RFC.0768.xml b/memcached/doc/xml2rfc/reference.RFC.0768.xml new file mode 100644 index 00000000..4452c347 --- /dev/null +++ b/memcached/doc/xml2rfc/reference.RFC.0768.xml @@ -0,0 +1,22 @@ + + + + + +User Datagram Protocol + +University of Southern California (USC)/Information Sciences Institute +
+ +4676 Admiralty Way +Marina del Rey +CA +90291 +US ++1 213 822 1511
+
+ + + + +
diff --git a/memcached/doc/xml2rfc/rfc2629-noinc.xsl b/memcached/doc/xml2rfc/rfc2629-noinc.xsl new file mode 100644 index 00000000..99b73a84 --- /dev/null +++ b/memcached/doc/xml2rfc/rfc2629-noinc.xsl @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + .xml + + + + + + + + + + + + + + + + + + + + + + http://xml.resource.org/public/rfc/ + + .xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/memcached/doc/xml2rfc/rfc2629-other.ent b/memcached/doc/xml2rfc/rfc2629-other.ent new file mode 100644 index 00000000..0da80b72 --- /dev/null +++ b/memcached/doc/xml2rfc/rfc2629-other.ent @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/memcached/doc/xml2rfc/rfc2629-refchk.xsl b/memcached/doc/xml2rfc/rfc2629-refchk.xsl new file mode 100644 index 00000000..0cea58e9 --- /dev/null +++ b/memcached/doc/xml2rfc/rfc2629-refchk.xsl @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/memcached/doc/xml2rfc/rfc2629-xhtml.ent b/memcached/doc/xml2rfc/rfc2629-xhtml.ent new file mode 100644 index 00000000..3d4a07dd --- /dev/null +++ b/memcached/doc/xml2rfc/rfc2629-xhtml.ent @@ -0,0 +1,165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/memcached/doc/xml2rfc/rfc2629.dtd b/memcached/doc/xml2rfc/rfc2629.dtd new file mode 100644 index 00000000..bc302607 --- /dev/null +++ b/memcached/doc/xml2rfc/rfc2629.dtd @@ -0,0 +1,304 @@ + + + + + + + + + + + + + +%rfc2629-xhtml; + + +%rfc2629-other; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/memcached/globals.c b/memcached/globals.c new file mode 100644 index 00000000..a01e370a --- /dev/null +++ b/memcached/globals.c @@ -0,0 +1,25 @@ +#include "memcached.h" + +/* + * This file contains global variables shared across the rest of the + * memcached codebase. These were originally in memcached.c but had + * to be removed to make the rest of the object files linkable into + * the test infrastructure. + * + */ + +/* + * We keep the current time of day in a global variable that's updated by a + * timer event. This saves us a bunch of time() system calls (we really only + * need to get the time once a second, whereas there can be tens of thousands + * of requests a second) and allows us to use server-start-relative timestamps + * rather than absolute UNIX timestamps, a space savings on systems where + * sizeof(time_t) > sizeof(unsigned int). + */ +volatile rel_time_t current_time; + +/** exported globals **/ +struct stats stats; +struct settings settings; +struct slab_rebalance slab_rebal; +volatile int slab_rebalance_signal; diff --git a/memcached/hash.c b/memcached/hash.c new file mode 100644 index 00000000..6c9b9b6a --- /dev/null +++ b/memcached/hash.c @@ -0,0 +1,437 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Hash table + * + * The hash function used here is by Bob Jenkins, 1996: + * + * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. + * You may use this code any way you wish, private, educational, + * or commercial. It's free." + * + */ +#include "memcached.h" + +/* + * Since the hash function does bit manipulation, it needs to know + * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG + * are set in the configure script. + */ +#if defined(ENDIAN_BIG) && ENDIAN_BIG == 1 +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# if defined(ENDIAN_LITTLE) && ENDIAN_LITTLE == 1 +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +# else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +# endif +#endif + +#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +#if HASH_LITTLE_ENDIAN == 1 +uint32_t hash( + const void *key, /* the key to hash */ + size_t length, /* length of the key */ + const uint32_t initval) /* initval */ +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif /* ifdef VALGRIND */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + default: + abort(); + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */ + case 10: c+=k[4]; /* @fallthrough@ */ + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* @fallthrough */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* @fallthrough */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length strings require no mixing */ + default: + abort(); + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; /* zero length strings require no mixing */ + default: + abort(); + } + } + + final(a,b,c); + return c; /* zero length strings require no mixing */ +} + +#elif HASH_BIG_ENDIAN == 1 +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t hash( const void *key, size_t length, const uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif /* ifdef VALGRIND */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} +#else /* HASH_XXX_ENDIAN == 1 */ +#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN +#endif /* HASH_XXX_ENDIAN == 1 */ diff --git a/memcached/hash.h b/memcached/hash.h new file mode 100644 index 00000000..aa02c58f --- /dev/null +++ b/memcached/hash.h @@ -0,0 +1,15 @@ +#ifndef HASH_H +#define HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + +uint32_t hash(const void *key, size_t length, const uint32_t initval); + +#ifdef __cplusplus +} +#endif + +#endif /* HASH_H */ + diff --git a/memcached/include.am b/memcached/include.am new file mode 100644 index 00000000..c1d6325d --- /dev/null +++ b/memcached/include.am @@ -0,0 +1,40 @@ +# vim:ft=automake + +noinst_HEADERS+= memcached/assoc.h +noinst_HEADERS+= memcached/cache.h +noinst_HEADERS+= memcached/hash.h +noinst_HEADERS+= memcached/items.h +noinst_HEADERS+= memcached/memcached.h +noinst_HEADERS+= memcached/protocol_binary.h +noinst_HEADERS+= memcached/sasl_defs.h +noinst_HEADERS+= memcached/slabs.h +noinst_HEADERS+= memcached/stats.h +noinst_HEADERS+= memcached/trace.h +noinst_HEADERS+= memcached/util.h + +memcached_memcached_SOURCES= +memcached_memcached_CFLAGS= +memcached_memcached_LDADD= + +memcached_memcached_SOURCES+= +memcached_memcached_SOURCES+= memcached/assoc.c +memcached_memcached_SOURCES+= memcached/cache.c +memcached_memcached_SOURCES+= memcached/daemon.c +memcached_memcached_SOURCES+= memcached/hash.c +memcached_memcached_SOURCES+= memcached/items.c +memcached_memcached_SOURCES+= memcached/memcached.c +memcached_memcached_SOURCES+= memcached/slabs.c +memcached_memcached_SOURCES+= memcached/stats.c +memcached_memcached_SOURCES+= memcached/thread.c +memcached_memcached_SOURCES+= memcached/trace.h +memcached_memcached_SOURCES+= memcached/util.c + +memcached_memcached_CFLAGS+= -DHAVE_CONFIG_H -std=c99 +memcached_memcached_CFLAGS+= ${PTHREAD_CFLAGS} +memcached_memcached_CFLAGS+= -DRMEMCACHED_VERSION="\"1.4.13\"" +memcached_memcached_CFLAGS+= -DRMEMCACHED_PACKAGE="\"memcached\"" + +memcached_memcached_LDADD+= $(LIBEVENT_LDFLAGS) +memcached_memcached_LDADD+= ${PTHREAD_LIBS} + +noinst_PROGRAMS+= memcached/memcached diff --git a/memcached/items.c b/memcached/items.c new file mode 100644 index 00000000..58fc8717 --- /dev/null +++ b/memcached/items.c @@ -0,0 +1,575 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#include "memcached.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Forward Declarations */ +static void item_link_q(item *it); +static void item_unlink_q(item *it); + +/* + * We only reposition items in the LRU queue if they haven't been repositioned + * in this many seconds. That saves us from churning on frequently-accessed + * items. + */ +#define ITEM_UPDATE_INTERVAL 60 + +#define LARGEST_ID POWER_LARGEST +typedef struct { + uint64_t evicted; + uint64_t evicted_nonzero; + rel_time_t evicted_time; + uint64_t reclaimed; + uint64_t outofmemory; + uint64_t tailrepairs; + uint64_t expired_unfetched; + uint64_t evicted_unfetched; +} itemstats_t; + +static item *heads[LARGEST_ID]; +static item *tails[LARGEST_ID]; +static itemstats_t itemstats[LARGEST_ID]; +static unsigned int sizes[LARGEST_ID]; + +void item_stats_reset(void) { + mutex_lock(&cache_lock); + memset(itemstats, 0, sizeof(itemstats)); + pthread_mutex_unlock(&cache_lock); +} + + +/* Get the next CAS id for a new item. */ +uint64_t get_cas_id(void) { + static uint64_t cas_id = 0; + return ++cas_id; +} + +/* Enable this for reference-count debugging. */ +#if 0 +# define DEBUG_REFCNT(it,op) \ + fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \ + it, op, it->refcount, \ + (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \ + (it->it_flags & ITEM_SLABBED) ? 'S' : ' ') +#else +# define DEBUG_REFCNT(it,op) while(0) +#endif + +/** + * Generates the variable-sized part of the header for an object. + * + * key - The key + * nkey - The length of the key + * flags - key flags + * nbytes - Number of bytes to hold value and addition CRLF terminator + * suffix - Buffer for the "VALUE" line suffix (flags, size). + * nsuffix - The length of the suffix is stored here. + * + * Returns the total size of the header. + */ +static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, + char *suffix, uint8_t *nsuffix) { + /* suffix is defined at 40 chars elsewhere.. */ + *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2); + return sizeof(item) + nkey + *nsuffix + nbytes; +} + +/*@null@*/ +item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { + uint8_t nsuffix; + item *it = NULL; + char suffix[40]; + size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); + if (settings.use_cas) { + ntotal += sizeof(uint64_t); + } + + unsigned int id = slabs_clsid(ntotal); + if (id == 0) + return 0; + + mutex_lock(&cache_lock); + /* do a quick check if we have any expired items in the tail.. */ + item *search; + rel_time_t oldest_live = settings.oldest_live; + + search = tails[id]; + if (search != NULL && (refcount_incr(&search->refcount) == 2)) { + if ((search->exptime != 0 && search->exptime < current_time) + || (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush + STATS_LOCK(); + stats.reclaimed++; + STATS_UNLOCK(); + itemstats[id].reclaimed++; + if ((search->it_flags & ITEM_FETCHED) == 0) { + STATS_LOCK(); + stats.expired_unfetched++; + STATS_UNLOCK(); + itemstats[id].expired_unfetched++; + } + it = search; + slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); + do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); + /* Initialize the item block: */ + it->slabs_clsid = 0; + } else if ((it = slabs_alloc(ntotal, id)) == NULL) { + if (settings.evict_to_free == 0) { + itemstats[id].outofmemory++; + pthread_mutex_unlock(&cache_lock); + return NULL; + } + itemstats[id].evicted++; + itemstats[id].evicted_time = current_time - search->time; + if (search->exptime != 0) + itemstats[id].evicted_nonzero++; + if ((search->it_flags & ITEM_FETCHED) == 0) { + STATS_LOCK(); + stats.evicted_unfetched++; + STATS_UNLOCK(); + itemstats[id].evicted_unfetched++; + } + STATS_LOCK(); + stats.evictions++; + STATS_UNLOCK(); + it = search; + slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal); + do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); + /* Initialize the item block: */ + it->slabs_clsid = 0; + } else { + refcount_decr(&search->refcount); + } + } else { + /* If the LRU is empty or locked, attempt to allocate memory */ + it = slabs_alloc(ntotal, id); + if (search != NULL) + refcount_decr(&search->refcount); + } + + if (it == NULL) { + itemstats[id].outofmemory++; + /* Last ditch effort. There was a very rare bug which caused + * refcount leaks. We leave this just in case they ever happen again. + * We can reasonably assume no item can stay locked for more than + * three hours, so if we find one in the tail which is that old, + * free it anyway. + */ + if (search != NULL && + search->refcount != 2 && + search->time + TAIL_REPAIR_TIME < current_time) { + itemstats[id].tailrepairs++; + search->refcount = 1; + do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0)); + } + pthread_mutex_unlock(&cache_lock); + return NULL; + } + + assert(it->slabs_clsid == 0); + assert(it != heads[id]); + + /* Item initialization can happen outside of the lock; the item's already + * been removed from the slab LRU. + */ + it->refcount = 1; /* the caller will have a reference */ + pthread_mutex_unlock(&cache_lock); + it->next = it->prev = it->h_next = 0; + it->slabs_clsid = id; + + DEBUG_REFCNT(it, '*'); + it->it_flags = settings.use_cas ? ITEM_CAS : 0; + it->nkey = nkey; + it->nbytes = nbytes; + memcpy(ITEM_key(it), key, nkey); + it->exptime = exptime; + memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); + it->nsuffix = nsuffix; + return it; +} + +void item_free(item *it) { + size_t ntotal = ITEM_ntotal(it); + unsigned int clsid; + assert((it->it_flags & ITEM_LINKED) == 0); + assert(it != heads[it->slabs_clsid]); + assert(it != tails[it->slabs_clsid]); + assert(it->refcount == 0); + + /* so slab size changer can tell later if item is already free or not */ + clsid = it->slabs_clsid; + it->slabs_clsid = 0; + DEBUG_REFCNT(it, 'F'); + slabs_free(it, ntotal, clsid); +} + +/** + * Returns true if an item will fit in the cache (its size does not exceed + * the maximum for a cache entry.) + */ +bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { + char prefix[40]; + uint8_t nsuffix; + + size_t ntotal = item_make_header(nkey + 1, flags, nbytes, + prefix, &nsuffix); + if (settings.use_cas) { + ntotal += sizeof(uint64_t); + } + + return slabs_clsid(ntotal) != 0; +} + +static void item_link_q(item *it) { /* item is the new head */ + item **head, **tail; + assert(it->slabs_clsid < LARGEST_ID); + assert((it->it_flags & ITEM_SLABBED) == 0); + + head = &heads[it->slabs_clsid]; + tail = &tails[it->slabs_clsid]; + assert(it != *head); + assert((*head && *tail) || (*head == 0 && *tail == 0)); + it->prev = 0; + it->next = *head; + if (it->next) it->next->prev = it; + *head = it; + if (*tail == 0) *tail = it; + sizes[it->slabs_clsid]++; + return; +} + +static void item_unlink_q(item *it) { + item **head, **tail; + assert(it->slabs_clsid < LARGEST_ID); + head = &heads[it->slabs_clsid]; + tail = &tails[it->slabs_clsid]; + + if (*head == it) { + assert(it->prev == 0); + *head = it->next; + } + if (*tail == it) { + assert(it->next == 0); + *tail = it->prev; + } + assert(it->next != it); + assert(it->prev != it); + + if (it->next) it->next->prev = it->prev; + if (it->prev) it->prev->next = it->next; + sizes[it->slabs_clsid]--; + return; +} + +int do_item_link(item *it, const uint32_t hv) { + MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes); + assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0); + mutex_lock(&cache_lock); + it->it_flags |= ITEM_LINKED; + it->time = current_time; + + STATS_LOCK(); + stats.curr_bytes += ITEM_ntotal(it); + stats.curr_items += 1; + stats.total_items += 1; + STATS_UNLOCK(); + + /* Allocate a new CAS ID on link. */ + ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0); + assoc_insert(it, hv); + item_link_q(it); + refcount_incr(&it->refcount); + pthread_mutex_unlock(&cache_lock); + + return 1; +} + +void do_item_unlink(item *it, const uint32_t hv) { + MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); + mutex_lock(&cache_lock); + if ((it->it_flags & ITEM_LINKED) != 0) { + it->it_flags &= ~ITEM_LINKED; + STATS_LOCK(); + stats.curr_bytes -= ITEM_ntotal(it); + stats.curr_items -= 1; + STATS_UNLOCK(); + assoc_delete(ITEM_key(it), it->nkey, hv); + item_unlink_q(it); + do_item_remove(it); + } + pthread_mutex_unlock(&cache_lock); +} + +/* FIXME: Is it necessary to keep this copy/pasted code? */ +void do_item_unlink_nolock(item *it, const uint32_t hv) { + MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes); + if ((it->it_flags & ITEM_LINKED) != 0) { + it->it_flags &= ~ITEM_LINKED; + STATS_LOCK(); + stats.curr_bytes -= ITEM_ntotal(it); + stats.curr_items -= 1; + STATS_UNLOCK(); + assoc_delete(ITEM_key(it), it->nkey, hv); + item_unlink_q(it); + do_item_remove(it); + } +} + +void do_item_remove(item *it) { + MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes); + assert((it->it_flags & ITEM_SLABBED) == 0); + + if (refcount_decr(&it->refcount) == 0) { + item_free(it); + } +} + +void do_item_update(item *it) { + MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes); + if (it->time < current_time - ITEM_UPDATE_INTERVAL) { + assert((it->it_flags & ITEM_SLABBED) == 0); + + mutex_lock(&cache_lock); + if ((it->it_flags & ITEM_LINKED) != 0) { + item_unlink_q(it); + it->time = current_time; + item_link_q(it); + } + pthread_mutex_unlock(&cache_lock); + } +} + +int do_item_replace(item *it, item *new_it, const uint32_t hv) { + MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes, + ITEM_key(new_it), new_it->nkey, new_it->nbytes); + assert((it->it_flags & ITEM_SLABBED) == 0); + + do_item_unlink(it, hv); + return do_item_link(new_it, hv); +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wshadow" +#endif + +/*@null@*/ +char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) { + unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */ + char *buffer; + unsigned int bufcurr; + item *it; + unsigned int len; + unsigned int shown = 0; + char key_temp[KEY_MAX_LENGTH + 1]; + char temp[512]; + + it = heads[slabs_clsid]; + + buffer = malloc((size_t)memlimit); + if (buffer == 0) return NULL; + bufcurr = 0; + + while (it != NULL && (limit == 0 || shown < limit)) { + assert(it->nkey <= KEY_MAX_LENGTH); + /* Copy the key since it may not be null-terminated in the struct */ + strncpy(key_temp, ITEM_key(it), it->nkey); + key_temp[it->nkey] = 0x00; /* terminate */ + len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n", + key_temp, it->nbytes - 2, + (unsigned long)it->exptime + process_started); + if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */ + break; + memcpy(buffer + bufcurr, temp, len); + bufcurr += len; + shown++; + it = it->next; + } + + memcpy(buffer + bufcurr, "END\r\n", 6); + bufcurr += 5; + + *bytes = bufcurr; + return buffer; +} + +void item_stats_evictions(uint64_t *evicted) { + int i; + mutex_lock(&cache_lock); + for (i = 0; i < LARGEST_ID; i++) { + evicted[i] = itemstats[i].evicted; + } + pthread_mutex_unlock(&cache_lock); +} + +void do_item_stats(ADD_STAT add_stats, void *c) { + int i; + for (i = 0; i < LARGEST_ID; i++) { + if (tails[i] != NULL) { + const char *fmt = "items:%d:%s"; + char key_str[STAT_KEY_LEN]; + char val_str[STAT_VAL_LEN]; + int klen = 0, vlen = 0; + if (tails[i] == NULL) { + /* We removed all of the items in this slab class */ + continue; + } + APPEND_NUM_FMT_STAT(fmt, i, "number", "%u", sizes[i]); + APPEND_NUM_FMT_STAT(fmt, i, "age", "%u", current_time - tails[i]->time); + APPEND_NUM_FMT_STAT(fmt, i, "evicted", + "%llu", (unsigned long long)itemstats[i].evicted); + APPEND_NUM_FMT_STAT(fmt, i, "evicted_nonzero", + "%llu", (unsigned long long)itemstats[i].evicted_nonzero); + APPEND_NUM_FMT_STAT(fmt, i, "evicted_time", + "%u", itemstats[i].evicted_time); + APPEND_NUM_FMT_STAT(fmt, i, "outofmemory", + "%llu", (unsigned long long)itemstats[i].outofmemory); + APPEND_NUM_FMT_STAT(fmt, i, "tailrepairs", + "%llu", (unsigned long long)itemstats[i].tailrepairs); + APPEND_NUM_FMT_STAT(fmt, i, "reclaimed", + "%llu", (unsigned long long)itemstats[i].reclaimed); + APPEND_NUM_FMT_STAT(fmt, i, "expired_unfetched", + "%llu", (unsigned long long)itemstats[i].expired_unfetched); + APPEND_NUM_FMT_STAT(fmt, i, "evicted_unfetched", + "%llu", (unsigned long long)itemstats[i].evicted_unfetched); + } + } + + /* getting here means both ascii and binary terminators fit */ + add_stats(NULL, 0, NULL, 0, c); +} + +/** dumps out a list of objects of each size, with granularity of 32 bytes */ +/*@null@*/ +void do_item_stats_sizes(ADD_STAT add_stats, void *c) { + + /* max 1MB object, divided into 32 bytes size buckets */ + const int num_buckets = 32768; + unsigned int *histogram = calloc(num_buckets, sizeof(int)); + + if (histogram != NULL) { + int i; + + /* build the histogram */ + for (i = 0; i < LARGEST_ID; i++) { + item *iter = heads[i]; + while (iter) { + int ntotal = ITEM_ntotal(iter); + int bucket = ntotal / 32; + if ((ntotal % 32) != 0) bucket++; + if (bucket < num_buckets) histogram[bucket]++; + iter = iter->next; + } + } + + /* write the buffer */ + for (i = 0; i < num_buckets; i++) { + if (histogram[i] != 0) { + char key[8]; + snprintf(key, sizeof(key), "%d", i * 32); + APPEND_STAT(key, "%u", histogram[i]); + } + } + free(histogram); + } + add_stats(NULL, 0, NULL, 0, c); +} + +/** wrapper around assoc_find which does the lazy expiration logic */ +item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) { + mutex_lock(&cache_lock); + item *it = assoc_find(key, nkey, hv); + if (it != NULL) { + refcount_incr(&it->refcount); + /* Optimization for slab reassignment. prevents popular items from + * jamming in busy wait. Can only do this here to satisfy lock order + * of item_lock, cache_lock, slabs_lock. */ + if (slab_rebalance_signal && + ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) { + do_item_unlink_nolock(it, hv); + do_item_remove(it); + it = NULL; + } + } + pthread_mutex_unlock(&cache_lock); + int was_found = 0; + + if (settings.verbose > 2) { + if (it == NULL) { + fprintf(stderr, "> NOT FOUND %s", key); + } else { + fprintf(stderr, "> FOUND KEY %s", ITEM_key(it)); + was_found++; + } + } + + if (it != NULL) { + if (settings.oldest_live != 0 && settings.oldest_live <= current_time && + it->time <= settings.oldest_live) { + do_item_unlink(it, hv); + do_item_remove(it); + it = NULL; + if (was_found) { + fprintf(stderr, " -nuked by flush"); + } + } else if (it->exptime != 0 && it->exptime <= current_time) { + do_item_unlink(it, hv); + do_item_remove(it); + it = NULL; + if (was_found) { + fprintf(stderr, " -nuked by expire"); + } + } else { + it->it_flags |= ITEM_FETCHED; + DEBUG_REFCNT(it, '+'); + } + } + + if (settings.verbose > 2) + fprintf(stderr, "\n"); + + return it; +} + +item *do_item_touch(const char *key, size_t nkey, uint32_t exptime, + const uint32_t hv) { + item *it = do_item_get(key, nkey, hv); + if (it != NULL) { + it->exptime = exptime; + } + return it; +} + +/* expires items that are more recent than the oldest_live setting. */ +void do_item_flush_expired(void) { + int i; + item *iter, *next; + if (settings.oldest_live == 0) + return; + for (i = 0; i < LARGEST_ID; i++) { + /* The LRU is sorted in decreasing time order, and an item's timestamp + * is never newer than its last access time, so we only need to walk + * back until we hit an item older than the oldest_live time. + * The oldest_live checking will auto-expire the remaining items. + */ + for (iter = heads[i]; iter != NULL; iter = next) { + if (iter->time >= settings.oldest_live) { + next = iter->next; + if ((iter->it_flags & ITEM_SLABBED) == 0) { + do_item_unlink_nolock(iter, hash(ITEM_key(iter), iter->nkey, 0)); + } + } else { + /* We've hit the first old item. Continue to the next queue. */ + break; + } + } + } +} diff --git a/memcached/items.h b/memcached/items.h new file mode 100644 index 00000000..2ec142db --- /dev/null +++ b/memcached/items.h @@ -0,0 +1,27 @@ +/* See items.c */ +uint64_t get_cas_id(void); + +/*@null@*/ +item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes); +void item_free(item *it); +bool item_size_ok(const size_t nkey, const int flags, const int nbytes); + +int do_item_link(item *it, const uint32_t hv); /** may fail if transgresses limits */ +void do_item_unlink(item *it, const uint32_t hv); +void do_item_unlink_nolock(item *it, const uint32_t hv); +void do_item_remove(item *it); +void do_item_update(item *it); /** update LRU time to current and reposition */ +int do_item_replace(item *it, item *new_it, const uint32_t hv); + +/*@null@*/ +char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes); +void do_item_stats(ADD_STAT add_stats, void *c); +/*@null@*/ +void do_item_stats_sizes(ADD_STAT add_stats, void *c); +void do_item_flush_expired(void); + +item *do_item_get(const char *key, const size_t nkey, const uint32_t hv); +item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv); +void item_stats_reset(void); +extern pthread_mutex_t cache_lock; +void item_stats_evictions(uint64_t *evicted); diff --git a/memcached/m4/c99-backport.m4 b/memcached/m4/c99-backport.m4 new file mode 100644 index 00000000..08125849 --- /dev/null +++ b/memcached/m4/c99-backport.m4 @@ -0,0 +1,188 @@ +# AC_PROG_CC_C99 ([ACTION-IF-AVAILABLE], [ACTION-IF-UNAVAILABLE]) +# ---------------------------------------------------------------- +# If the C compiler is not in ISO C99 mode by default, try to add an +# option to output variable CC to make it so. This macro tries +# various options that select ISO C99 on some system or another. It +# considers the compiler to be in ISO C99 mode if it handles _Bool, +# // comments, flexible array members, inline, long long int, mixed +# code and declarations, named initialization of structs, restrict, +# va_copy, varargs macros, variable declarations in for loops and +# variable length arrays. +AC_DEFUN([AC_PROG_CC_C99], +[AC_C_STD_TRY([c99], +[[#include +#include +#include +#include +#include + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +#define debug(...) fprintf (stderr, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + your preprocessor is broken; +#endif +#if BIG_OK +#else + your preprocessor is broken; +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\0'; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static void +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str; + int number; + float fnumber; + + while (*format) + { + switch (*format++) + { + case 's': // string + str = va_arg (args_copy, const char *); + break; + case 'd': // int + number = va_arg (args_copy, int); + break; + case 'f': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); +} +]], +[[ + // Check bool. + _Bool success = false; + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + test_varargs ("s, d' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' + || dynamic_array[ni.number - 1] != 543); +]], +dnl Try +dnl GCC -std=gnu99 (unused restrictive modes: -std=c99 -std=iso9899:1999) +dnl AIX -qlanglvl=extc99 (unused restrictive mode: -qlanglvl=stdc99) +dnl Intel ICC -c99 +dnl IRIX -c99 +dnl Solaris (unused because it causes the compiler to assume C99 semantics for +dnl library functions, and this is invalid before Solaris 10: -xc99) +dnl Tru64 -c99 +dnl with extended modes being tried first. +[[-std=gnu99 -c99 -qlanglvl=extc99]], [$1], [$2])[]dnl +])# AC_PROG_CC_C99 + +# AC_C_STD_TRY(STANDARD, TEST-PROLOGUE, TEST-BODY, OPTION-LIST, +# ACTION-IF-AVAILABLE, ACTION-IF-UNAVAILABLE) +# -------------------------------------------------------------- +# Check whether the C compiler accepts features of STANDARD (e.g `c89', `c99') +# by trying to compile a program of TEST-PROLOGUE and TEST-BODY. If this fails, +# try again with each compiler option in the space-separated OPTION-LIST; if one +# helps, append it to CC. If eventually successful, run ACTION-IF-AVAILABLE, +# else ACTION-IF-UNAVAILABLE. +AC_DEFUN([AC_C_STD_TRY], +[AC_MSG_CHECKING([for $CC option to accept ISO ]m4_translit($1, [c], [C])) +AC_CACHE_VAL(ac_cv_prog_cc_$1, +[ac_cv_prog_cc_$1=no +ac_save_CC=$CC +AC_LANG_CONFTEST([AC_LANG_PROGRAM([$2], [$3])]) +for ac_arg in '' $4 +do + CC="$ac_save_CC $ac_arg" + _AC_COMPILE_IFELSE([], [ac_cv_prog_cc_$1=$ac_arg]) + test "x$ac_cv_prog_cc_$1" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +])# AC_CACHE_VAL +case "x$ac_cv_prog_cc_$1" in + x) + AC_MSG_RESULT([none needed]) ;; + xno) + AC_MSG_RESULT([unsupported]) ;; + *) + CC="$CC $ac_cv_prog_cc_$1" + AC_MSG_RESULT([$ac_cv_prog_cc_$1]) ;; +esac +AS_IF([test "x$ac_cv_prog_cc_$1" != xno], [$5], [$6]) +])# AC_C_STD_TRY diff --git a/memcached/memcached.c b/memcached/memcached.c new file mode 100644 index 00000000..7d053cce --- /dev/null +++ b/memcached/memcached.c @@ -0,0 +1,5292 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * memcached - memory caching daemon + * + * http://www.danga.com/memcached/ + * + * Copyright 2003 Danga Interactive, Inc. All rights reserved. + * + * Use and distribution licensed under the BSD license. See + * the LICENSE file for full text. + * + * Authors: + * Anatoly Vorobey + * Brad Fitzpatrick + */ +#include "memcached.h" +#include +#include +#include +#include +#include +#include +#include +#include + +/* some POSIX systems need the following definition + * to get mlockall flags out of sys/mman.h. */ +#ifndef _P1003_1B_VISIBLE +#define _P1003_1B_VISIBLE +#endif +/* need this to get IOV_MAX on some platforms. */ +#ifndef __need_IOV_MAX +#define __need_IOV_MAX +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* FreeBSD 4.x doesn't have IOV_MAX exposed. */ +#ifndef IOV_MAX +#if defined(__FreeBSD__) || defined(__APPLE__) +# define IOV_MAX 1024 +#endif +#endif + +/* + * forward declarations + */ +static void drive_machine(conn *c); +static int new_socket(struct addrinfo *ai); +static int try_read_command(conn *c); + +enum try_read_result { + READ_DATA_RECEIVED, + READ_NO_DATA_RECEIVED, + READ_ERROR, /** an error occured (on the socket) (or client closed connection) */ + READ_MEMORY_ERROR /** failed to allocate more memory */ +}; + +static enum try_read_result try_read_network(conn *c); +static enum try_read_result try_read_udp(conn *c); + +static void conn_set_state(conn *c, enum conn_states state); + +/* stats */ +static void stats_init(void); +static void server_stats(ADD_STAT add_stats, conn *c); +static void process_stat_settings(ADD_STAT add_stats, void *c); + + +/* defaults */ +static void settings_init(void); + +/* event handling, network IO */ +static void event_handler(const int fd, const short which, void *arg); +static void conn_close(conn *c); +static void conn_init(void); +static bool update_event(conn *c, const int new_flags); +static void complete_nread(conn *c); +static void process_command(conn *c, char *command); +static void write_and_free(conn *c, char *buf, int bytes); +static int ensure_iov_space(conn *c); +static int add_iov(conn *c, const void *buf, int len); +static int add_msghdr(conn *c); + + +static void conn_free(conn *c); + +/** exported globals **/ +struct stats stats; +struct settings settings; +time_t process_started; /* when the process was started */ + +struct slab_rebalance slab_rebal; +volatile int slab_rebalance_signal; + +/** file scope variables **/ +static conn *listen_conn = NULL; +static struct event_base *main_base; + +enum transmit_result { + TRANSMIT_COMPLETE, /** All done writing. */ + TRANSMIT_INCOMPLETE, /** More data remaining to write. */ + TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */ + TRANSMIT_HARD_ERROR /** Can't write (c->state is set to conn_closing) */ +}; + +static enum transmit_result transmit(conn *c); + +/* This reduces the latency without adding lots of extra wiring to be able to + * notify the listener thread of when to listen again. + * Also, the clock timer could be broken out into its own thread and we + * can block the listener via a condition. + */ +static volatile bool allow_new_conns = true; +static struct event maxconnsevent; +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +static void maxconns_handler(const int fd, const short which, void *arg) { + struct timeval t = {.tv_sec = 0, .tv_usec = 10000}; + + if (fd == -42 || allow_new_conns == false) { + /* reschedule in 10ms if we need to keep polling */ + evtimer_set(&maxconnsevent, maxconns_handler, 0); + event_base_set(main_base, &maxconnsevent); + evtimer_add(&maxconnsevent, &t); + } else { + evtimer_del(&maxconnsevent); + accept_new_conns(true); + } +} + +#define REALTIME_MAXDELTA 60*60*24*30 + +/* + * given time value that's either unix time or delta from current unix time, return + * unix time. Use the fact that delta can't exceed one month (and real time value can't + * be that low). + */ +static rel_time_t realtime(const time_t exptime) { + /* no. of seconds in 30 days - largest possible delta exptime */ + + if (exptime == 0) return 0; /* 0 means never expire */ + + if (exptime > REALTIME_MAXDELTA) { + /* if item expiration is at/before the server started, give it an + expiration time of 1 second after the server started. + (because 0 means don't expire). without this, we'd + underflow and wrap around to some large value way in the + future, effectively making items expiring in the past + really expiring never */ + if (exptime <= process_started) + return (rel_time_t)1; + return (rel_time_t)(exptime - process_started); + } else { + return (rel_time_t)(exptime + current_time); + } +} + +static void stats_init(void) { + stats.curr_items = stats.total_items = stats.curr_conns = stats.total_conns = stats.conn_structs = 0; + stats.get_cmds = stats.set_cmds = stats.get_hits = stats.get_misses = stats.evictions = stats.reclaimed = 0; + stats.touch_cmds = stats.touch_misses = stats.touch_hits = stats.rejected_conns = 0; + stats.curr_bytes = stats.listen_disabled_num = 0; + stats.hash_power_level = stats.hash_bytes = stats.hash_is_expanding = 0; + stats.expired_unfetched = stats.evicted_unfetched = 0; + stats.slabs_moved = 0; + stats.accepting_conns = true; /* assuming we start in this state. */ + stats.slab_reassign_running = false; + + /* make the time we started always be 2 seconds before we really + did, so time(0) - time.started is never zero. if so, things + like 'settings.oldest_live' which act as booleans as well as + values are now false in boolean context... */ + process_started = time(0) - 2; + stats_prefix_init(); +} + +static void stats_reset(void) { + STATS_LOCK(); + stats.total_items = stats.total_conns = 0; + stats.rejected_conns = 0; + stats.evictions = 0; + stats.reclaimed = 0; + stats.listen_disabled_num = 0; + stats_prefix_clear(); + STATS_UNLOCK(); + threadlocal_stats_reset(); + item_stats_reset(); +} + +static void settings_init(void) { + settings.use_cas = true; + settings.access = 0700; + settings.port = 11211; + settings.udpport = 11211; + /* By default this string should be NULL for getaddrinfo() */ + settings.inter = NULL; + settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */ + settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */ + settings.verbose = 0; + settings.oldest_live = 0; + settings.evict_to_free = 1; /* push old items out of cache when memory runs out */ + settings.socketpath = NULL; /* by default, not using a unix socket */ + settings.factor = 1.25; + settings.chunk_size = 48; /* space for a modest key and value */ + settings.num_threads = 4; /* N workers */ + settings.num_threads_per_udp = 0; + settings.prefix_delimiter = ':'; + settings.detail_enabled = 0; + settings.reqs_per_event = 20; + settings.backlog = 1024; + settings.binding_protocol = negotiating_prot; + settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */ + settings.maxconns_fast = false; + settings.hashpower_init = 0; + settings.slab_reassign = false; + settings.slab_automove = false; +} + +/* + * Adds a message header to a connection. + * + * Returns 0 on success, -1 on out-of-memory. + */ +static int add_msghdr(conn *c) +{ + struct msghdr *msg; + + assert(c != NULL); + + if (c->msgsize == c->msgused) { + msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr)); + if (! msg) + return -1; + c->msglist = msg; + c->msgsize *= 2; + } + + msg = c->msglist + c->msgused; + + /* this wipes msg_iovlen, msg_control, msg_controllen, and + msg_flags, the last 3 of which aren't defined on solaris: */ + memset(msg, 0, sizeof(struct msghdr)); + + msg->msg_iov = &c->iov[c->iovused]; + + if (c->request_addr_size > 0) { + msg->msg_name = &c->request_addr; + msg->msg_namelen = c->request_addr_size; + } + + c->msgbytes = 0; + c->msgused++; + + if (IS_UDP(c->transport)) { + /* Leave room for the UDP header, which we'll fill in later. */ + return add_iov(c, NULL, UDP_HEADER_SIZE); + } + + return 0; +} + + +/* + * Free list management for connections. + */ + +static conn **freeconns; +static int freetotal; +static int freecurr; +/* Lock for connection freelist */ +static pthread_mutex_t conn_lock = PTHREAD_MUTEX_INITIALIZER; + + +static void conn_init(void) { + freetotal = 200; + freecurr = 0; + if ((freeconns = calloc(freetotal, sizeof(conn *))) == NULL) { + fprintf(stderr, "Failed to allocate connection structures\n"); + } + return; +} + +/* + * Returns a connection from the freelist, if any. + */ +conn *conn_from_freelist() { + conn *c; + + pthread_mutex_lock(&conn_lock); + if (freecurr > 0) { + c = freeconns[--freecurr]; + } else { + c = NULL; + } + pthread_mutex_unlock(&conn_lock); + + return c; +} + +/* + * Adds a connection to the freelist. 0 = success. + */ +bool conn_add_to_freelist(conn *c) { + bool ret = true; + pthread_mutex_lock(&conn_lock); + if (freecurr < freetotal) { + freeconns[freecurr++] = c; + ret = false; + } else { + /* try to enlarge free connections array */ + size_t newsize = freetotal * 2; + conn **new_freeconns = realloc(freeconns, sizeof(conn *) * newsize); + if (new_freeconns) { + freetotal = newsize; + freeconns = new_freeconns; + freeconns[freecurr++] = c; + ret = false; + } + } + pthread_mutex_unlock(&conn_lock); + return ret; +} + +static const char *prot_text(enum protocol prot) { + const char *rv = "unknown"; + switch(prot) { + case ascii_prot: + rv = "ascii"; + break; + case binary_prot: + rv = "binary"; + break; + case negotiating_prot: + rv = "auto-negotiate"; + break; + default: + abort(); + } + return rv; +} + +conn *conn_new(const int sfd, enum conn_states init_state, + const int event_flags, + const int read_buffer_size, enum network_transport transport, + struct event_base *base) { + conn *c = conn_from_freelist(); + + if (NULL == c) { + if (!(c = (conn *)calloc(1, sizeof(conn)))) { + fprintf(stderr, "calloc()\n"); + return NULL; + } + MEMCACHED_CONN_CREATE(c); + + c->rbuf = c->wbuf = 0; + c->ilist = 0; + c->suffixlist = 0; + c->iov = 0; + c->msglist = 0; + c->hdrbuf = 0; + + c->rsize = read_buffer_size; + c->wsize = DATA_BUFFER_SIZE; + c->isize = ITEM_LIST_INITIAL; + c->suffixsize = SUFFIX_LIST_INITIAL; + c->iovsize = IOV_LIST_INITIAL; + c->msgsize = MSG_LIST_INITIAL; + c->hdrsize = 0; + + c->rbuf = (char *)malloc((size_t)c->rsize); + c->wbuf = (char *)malloc((size_t)c->wsize); + c->ilist = (item **)malloc(sizeof(item *) * c->isize); + c->suffixlist = (char **)malloc(sizeof(char *) * c->suffixsize); + c->iov = (struct iovec *)malloc(sizeof(struct iovec) * c->iovsize); + c->msglist = (struct msghdr *)malloc(sizeof(struct msghdr) * c->msgsize); + + if (c->rbuf == 0 || c->wbuf == 0 || c->ilist == 0 || c->iov == 0 || + c->msglist == 0 || c->suffixlist == 0) { + conn_free(c); + fprintf(stderr, "malloc()\n"); + return NULL; + } + + STATS_LOCK(); + stats.conn_structs++; + STATS_UNLOCK(); + } + + c->transport = transport; + c->protocol = settings.binding_protocol; + + /* unix socket mode doesn't need this, so zeroed out. but why + * is this done for every command? presumably for UDP + * mode. */ + if (!settings.socketpath) { + c->request_addr_size = sizeof(c->request_addr); + } else { + c->request_addr_size = 0; + } + + if (settings.verbose > 1) { + if (init_state == conn_listening) { + fprintf(stderr, "<%d server listening (%s)\n", sfd, + prot_text(c->protocol)); + } else if (IS_UDP(transport)) { + fprintf(stderr, "<%d server listening (udp)\n", sfd); + } else if (c->protocol == negotiating_prot) { + fprintf(stderr, "<%d new auto-negotiating client connection\n", + sfd); + } else if (c->protocol == ascii_prot) { + fprintf(stderr, "<%d new ascii client connection.\n", sfd); + } else if (c->protocol == binary_prot) { + fprintf(stderr, "<%d new binary client connection.\n", sfd); + } else { + fprintf(stderr, "<%d new unknown (%d) client connection\n", + sfd, c->protocol); + assert(false); + } + } + + c->sfd = sfd; + c->state = init_state; + c->rlbytes = 0; + c->cmd = -1; + c->rbytes = c->wbytes = 0; + c->wcurr = c->wbuf; + c->rcurr = c->rbuf; + c->ritem = 0; + c->icurr = c->ilist; + c->suffixcurr = c->suffixlist; + c->ileft = 0; + c->suffixleft = 0; + c->iovused = 0; + c->msgcurr = 0; + c->msgused = 0; + + c->write_and_go = init_state; + c->write_and_free = 0; + c->item = 0; + + c->noreply = false; + + event_set(&c->event, sfd, event_flags, event_handler, (void *)c); + event_base_set(base, &c->event); + c->ev_flags = event_flags; + + if (event_add(&c->event, 0) == -1) { + if (conn_add_to_freelist(c)) { + conn_free(c); + } + perror("event_add"); + return NULL; + } + + STATS_LOCK(); + stats.curr_conns++; + stats.total_conns++; + STATS_UNLOCK(); + + MEMCACHED_CONN_ALLOCATE(c->sfd); + + return c; +} + +static void conn_cleanup(conn *c) { + assert(c != NULL); + + if (c->item) { + item_remove(c->item); + c->item = 0; + } + + if (c->ileft != 0) { + for (; c->ileft > 0; c->ileft--,c->icurr++) { + item_remove(*(c->icurr)); + } + } + + if (c->suffixleft != 0) { + for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) { + cache_free(c->thread->suffix_cache, *(c->suffixcurr)); + } + } + + if (c->write_and_free) { + free(c->write_and_free); + c->write_and_free = 0; + } + + if (c->sasl_conn) { + assert(settings.sasl); + sasl_dispose(&c->sasl_conn); + c->sasl_conn = NULL; + } + + if (IS_UDP(c->transport)) { + conn_set_state(c, conn_read); + } +} + +/* + * Frees a connection. + */ +void conn_free(conn *c) { + if (c) { + MEMCACHED_CONN_DESTROY(c); + if (c->hdrbuf) + free(c->hdrbuf); + if (c->msglist) + free(c->msglist); + if (c->rbuf) + free(c->rbuf); + if (c->wbuf) + free(c->wbuf); + if (c->ilist) + free(c->ilist); + if (c->suffixlist) + free(c->suffixlist); + if (c->iov) + free(c->iov); + free(c); + } +} + +static void conn_close(conn *c) { + assert(c != NULL); + + /* delete the event, the socket and the conn */ + event_del(&c->event); + + if (settings.verbose > 1) + fprintf(stderr, "<%d connection closed.\n", c->sfd); + + MEMCACHED_CONN_RELEASE(c->sfd); + close(c->sfd); + pthread_mutex_lock(&conn_lock); + allow_new_conns = true; + pthread_mutex_unlock(&conn_lock); + conn_cleanup(c); + + /* if the connection has big buffers, just free it */ + if (c->rsize > READ_BUFFER_HIGHWAT || conn_add_to_freelist(c)) { + conn_free(c); + } + + STATS_LOCK(); + stats.curr_conns--; + STATS_UNLOCK(); + + return; +} + +/* + * Shrinks a connection's buffers if they're too big. This prevents + * periodic large "get" requests from permanently chewing lots of server + * memory. + * + * This should only be called in between requests since it can wipe output + * buffers! + */ +static void conn_shrink(conn *c) { + assert(c != NULL); + + if (IS_UDP(c->transport)) + return; + + if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) { + char *newbuf; + + if (c->rcurr != c->rbuf) + memmove(c->rbuf, c->rcurr, (size_t)c->rbytes); + + newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE); + + if (newbuf) { + c->rbuf = newbuf; + c->rsize = DATA_BUFFER_SIZE; + } + /* TODO check other branch... */ + c->rcurr = c->rbuf; + } + + if (c->isize > ITEM_LIST_HIGHWAT) { + item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0])); + if (newbuf) { + c->ilist = newbuf; + c->isize = ITEM_LIST_INITIAL; + } + /* TODO check error condition? */ + } + + if (c->msgsize > MSG_LIST_HIGHWAT) { + struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0])); + if (newbuf) { + c->msglist = newbuf; + c->msgsize = MSG_LIST_INITIAL; + } + /* TODO check error condition? */ + } + + if (c->iovsize > IOV_LIST_HIGHWAT) { + struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0])); + if (newbuf) { + c->iov = newbuf; + c->iovsize = IOV_LIST_INITIAL; + } + /* TODO check return value */ + } +} + +/** + * Convert a state name to a human readable form. + */ +static const char *state_text(enum conn_states state) { + const char* const statenames[] = { "conn_listening", + "conn_new_cmd", + "conn_waiting", + "conn_read", + "conn_parse_cmd", + "conn_write", + "conn_nread", + "conn_swallow", + "conn_closing", + "conn_mwrite" }; + return statenames[state]; +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wtype-limits" +#endif +/* + * Sets a connection's current state in the state machine. Any special + * processing that needs to happen on certain state transitions can + * happen here. + */ +static void conn_set_state(conn *c, enum conn_states state) { + assert(c != NULL); + assert(state >= conn_listening && state < conn_max_state); + + if (state != c->state) { + if (settings.verbose > 2) { + fprintf(stderr, "%d: going from %s to %s\n", + c->sfd, state_text(c->state), + state_text(state)); + } + + if (state == conn_write || state == conn_mwrite) { + MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->wbuf, c->wbytes); + } + c->state = state; + } +} + +/* + * Ensures that there is room for another struct iovec in a connection's + * iov list. + * + * Returns 0 on success, -1 on out-of-memory. + */ +static int ensure_iov_space(conn *c) { + assert(c != NULL); + + if (c->iovused >= c->iovsize) { + int i, iovnum; + struct iovec *new_iov = (struct iovec *)realloc(c->iov, + (c->iovsize * 2) * sizeof(struct iovec)); + if (! new_iov) + return -1; + c->iov = new_iov; + c->iovsize *= 2; + + /* Point all the msghdr structures at the new list. */ + for (i = 0, iovnum = 0; i < c->msgused; i++) { + c->msglist[i].msg_iov = &c->iov[iovnum]; + iovnum += c->msglist[i].msg_iovlen; + } + } + + return 0; +} + + +/* + * Adds data to the list of pending data that will be written out to a + * connection. + * + * Returns 0 on success, -1 on out-of-memory. + */ + +static int add_iov(conn *c, const void *buf, int len) { + struct msghdr *m; + int leftover; + bool limit_to_mtu; + + assert(c != NULL); + + do { + m = &c->msglist[c->msgused - 1]; + + /* + * Limit UDP packets, and the first payloads of TCP replies, to + * UDP_MAX_PAYLOAD_SIZE bytes. + */ + limit_to_mtu = IS_UDP(c->transport) || (1 == c->msgused); + + /* We may need to start a new msghdr if this one is full. */ + if (m->msg_iovlen == IOV_MAX || + (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) { + add_msghdr(c); + m = &c->msglist[c->msgused - 1]; + } + + if (ensure_iov_space(c) != 0) + return -1; + + /* If the fragment is too big to fit in the datagram, split it up */ + if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) { + leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE; + len -= leftover; + } else { + leftover = 0; + } + + m = &c->msglist[c->msgused - 1]; + m->msg_iov[m->msg_iovlen].iov_base = (void *)buf; + m->msg_iov[m->msg_iovlen].iov_len = len; + + c->msgbytes += len; + c->iovused++; + m->msg_iovlen++; + + buf = ((char *)buf) + len; + len = leftover; + } while (leftover > 0); + + return 0; +} + + +/* + * Constructs a set of UDP headers and attaches them to the outgoing messages. + */ +static int build_udp_headers(conn *c) { + int i; + unsigned char *hdr; + + assert(c != NULL); + + if (c->msgused > c->hdrsize) { + void *new_hdrbuf; + if (c->hdrbuf) + new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE); + else + new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE); + if (! new_hdrbuf) + return -1; + c->hdrbuf = (unsigned char *)new_hdrbuf; + c->hdrsize = c->msgused * 2; + } + + hdr = c->hdrbuf; + for (i = 0; i < c->msgused; i++) { + c->msglist[i].msg_iov[0].iov_base = (void*)hdr; + c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE; + *hdr++ = c->request_id / 256; + *hdr++ = c->request_id % 256; + *hdr++ = i / 256; + *hdr++ = i % 256; + *hdr++ = c->msgused / 256; + *hdr++ = c->msgused % 256; + *hdr++ = 0; + *hdr++ = 0; + assert((void *) hdr == (caddr_t)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE); + } + + return 0; +} + + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wsign-compare" +#endif +static void out_string(conn *c, const char *str) { + size_t len; + + assert(c != NULL); + + if (c->noreply) { + if (settings.verbose > 1) + fprintf(stderr, ">%d NOREPLY %s\n", c->sfd, str); + c->noreply = false; + conn_set_state(c, conn_new_cmd); + return; + } + + if (settings.verbose > 1) + fprintf(stderr, ">%d %s\n", c->sfd, str); + + /* Nuke a partial output... */ + c->msgcurr = 0; + c->msgused = 0; + c->iovused = 0; + add_msghdr(c); + + len = strlen(str); + if ((len + 2) > c->wsize) { + /* ought to be always enough. just fail for simplicity */ + str = "SERVER_ERROR output line too long"; + len = strlen(str); + } + + memcpy(c->wbuf, str, len); + memcpy(c->wbuf + len, "\r\n", 2); + c->wbytes = len + 2; + c->wcurr = c->wbuf; + + conn_set_state(c, conn_write); + c->write_and_go = conn_new_cmd; + return; +} + +/* + * we get here after reading the value in set/add/replace commands. The command + * has been stored in c->cmd, and the item is ready in c->item. + */ +static void complete_nread_ascii(conn *c) { + assert(c != NULL); + + item *it = c->item; + int comm = c->cmd; + enum store_item_type ret; + + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[it->slabs_clsid].set_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) != 0) { + out_string(c, "CLIENT_ERROR bad data chunk"); + } else { + ret = store_item(it, comm, c); + +#ifdef ENABLE_DTRACE + uint64_t cas = ITEM_get_cas(it); + switch (c->cmd) { + case NREAD_ADD: + MEMCACHED_COMMAND_ADD(c->sfd, ITEM_key(it), it->nkey, + (ret == 1) ? it->nbytes : -1, cas); + break; + case NREAD_REPLACE: + MEMCACHED_COMMAND_REPLACE(c->sfd, ITEM_key(it), it->nkey, + (ret == 1) ? it->nbytes : -1, cas); + break; + case NREAD_APPEND: + MEMCACHED_COMMAND_APPEND(c->sfd, ITEM_key(it), it->nkey, + (ret == 1) ? it->nbytes : -1, cas); + break; + case NREAD_PREPEND: + MEMCACHED_COMMAND_PREPEND(c->sfd, ITEM_key(it), it->nkey, + (ret == 1) ? it->nbytes : -1, cas); + break; + case NREAD_SET: + MEMCACHED_COMMAND_SET(c->sfd, ITEM_key(it), it->nkey, + (ret == 1) ? it->nbytes : -1, cas); + break; + case NREAD_CAS: + MEMCACHED_COMMAND_CAS(c->sfd, ITEM_key(it), it->nkey, it->nbytes, + cas); + break; + } +#endif + + switch (ret) { + case STORED: + out_string(c, "STORED"); + break; + case EXISTS: + out_string(c, "EXISTS"); + break; + case NOT_FOUND: + out_string(c, "NOT_FOUND"); + break; + case NOT_STORED: + out_string(c, "NOT_STORED"); + break; + default: + out_string(c, "SERVER_ERROR Unhandled storage type."); + } + + } + + item_remove(c->item); /* release the c->item reference */ + c->item = 0; +} + +/** + * get a pointer to the start of the request struct for the current command + */ +static void* binary_get_request(conn *c) { + char *ret = c->rcurr; + ret -= (sizeof(c->binary_header) + c->binary_header.request.keylen + + c->binary_header.request.extlen); + + assert(ret >= c->rbuf); + return ret; +} + +/** + * get a pointer to the key in this request + */ +static char* binary_get_key(conn *c) { + return c->rcurr - (c->binary_header.request.keylen); +} + +static void add_bin_header(conn *c, uint16_t err, uint8_t hdr_len, uint16_t key_len, uint32_t body_len) { + protocol_binary_response_header* header; + + assert(c); + + c->msgcurr = 0; + c->msgused = 0; + c->iovused = 0; + if (add_msghdr(c) != 0) { + /* XXX: out_string is inappropriate here */ + out_string(c, "SERVER_ERROR out of memory"); + return; + } + + header = (protocol_binary_response_header *)c->wbuf; + + header->response.magic = (uint8_t)PROTOCOL_BINARY_RES; + header->response.opcode = c->binary_header.request.opcode; + header->response.keylen = (uint16_t)htons(key_len); + + header->response.extlen = (uint8_t)hdr_len; + header->response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES; + header->response.status = (uint16_t)htons(err); + + header->response.bodylen = htonl(body_len); + header->response.opaque = c->opaque; + header->response.cas = htonll(c->cas); + + if (settings.verbose > 1) { + int ii; + fprintf(stderr, ">%d Writing bin response:", c->sfd); + for (ii = 0; ii < sizeof(header->bytes); ++ii) { + if (ii % 4 == 0) { + fprintf(stderr, "\n>%d ", c->sfd); + } + fprintf(stderr, " 0x%02x", header->bytes[ii]); + } + fprintf(stderr, "\n"); + } + + add_iov(c, c->wbuf, sizeof(header->response)); +} + +static void write_bin_error(conn *c, protocol_binary_response_status err, int swallow) { + const char *errstr = "Unknown error"; + size_t len; + + switch (err) { + case PROTOCOL_BINARY_RESPONSE_ENOMEM: + errstr = "Out of memory"; + break; + case PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND: + errstr = "Unknown command"; + break; + case PROTOCOL_BINARY_RESPONSE_KEY_ENOENT: + errstr = "Not found"; + break; + case PROTOCOL_BINARY_RESPONSE_EINVAL: + errstr = "Invalid arguments"; + break; + case PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS: + errstr = "Data exists for key."; + break; + case PROTOCOL_BINARY_RESPONSE_E2BIG: + errstr = "Too large."; + break; + case PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL: + errstr = "Non-numeric server-side value for incr or decr"; + break; + case PROTOCOL_BINARY_RESPONSE_NOT_STORED: + errstr = "Not stored."; + break; + case PROTOCOL_BINARY_RESPONSE_AUTH_ERROR: + errstr = "Auth failure."; + break; + case PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE: + assert(false); + case PROTOCOL_BINARY_RESPONSE_SUCCESS: + assert(false); + default: + assert(false); + errstr = "UNHANDLED ERROR"; + fprintf(stderr, ">%d UNHANDLED ERROR: %d\n", c->sfd, err); + } + + if (settings.verbose > 1) { + fprintf(stderr, ">%d Writing an error: %s\n", c->sfd, errstr); + } + + len = strlen(errstr); + add_bin_header(c, err, 0, 0, len); + if (len > 0) { + add_iov(c, errstr, len); + } + conn_set_state(c, conn_mwrite); + if(swallow > 0) { + c->sbytes = swallow; + c->write_and_go = conn_swallow; + } else { + c->write_and_go = conn_new_cmd; + } +} + +/* Form and send a response to a command over the binary protocol */ +static void write_bin_response(conn *c, const void *d, int hlen, int keylen, int dlen) { + if (!c->noreply || c->cmd == PROTOCOL_BINARY_CMD_GET || + c->cmd == PROTOCOL_BINARY_CMD_GETK) { + add_bin_header(c, 0, hlen, keylen, dlen); + if(dlen > 0) { + add_iov(c, d, dlen); + } + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + } else { + conn_set_state(c, conn_new_cmd); + } +} + +static void complete_incr_bin(conn *c) { + item *it; + char *key; + size_t nkey; + /* Weird magic in add_delta forces me to pad here */ + char tmpbuf[INCR_MAX_STORAGE_LEN]; + uint64_t cas = 0; + + protocol_binary_response_incr* rsp = (protocol_binary_response_incr*)c->wbuf; + protocol_binary_request_incr* req = binary_get_request(c); + + assert(c != NULL); + assert(c->wsize >= sizeof(*rsp)); + + /* fix byteorder in the request */ + req->message.body.delta = ntohll(req->message.body.delta); + req->message.body.initial = ntohll(req->message.body.initial); + req->message.body.expiration = ntohl(req->message.body.expiration); + key = binary_get_key(c); + nkey = c->binary_header.request.keylen; + + if (settings.verbose > 1) { + int i; + fprintf(stderr, "incr "); + + for (i = 0; i < nkey; i++) { + fprintf(stderr, "%c", key[i]); + } + fprintf(stderr, " %lld, %llu, %d\n", + (long long)req->message.body.delta, + (long long)req->message.body.initial, + req->message.body.expiration); + } + + if (c->binary_header.request.cas != 0) { + cas = c->binary_header.request.cas; + } + switch(add_delta(c, key, nkey, c->cmd == PROTOCOL_BINARY_CMD_INCREMENT, + req->message.body.delta, tmpbuf, + &cas)) { + case OK: + rsp->message.body.value = htonll(strtoull(tmpbuf, NULL, 10)); + if (cas) { + c->cas = cas; + } + write_bin_response(c, &rsp->message.body, 0, 0, + sizeof(rsp->message.body.value)); + break; + case NON_NUMERIC: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL, 0); + break; + case EOM: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0); + break; + case DELTA_ITEM_NOT_FOUND: + if (req->message.body.expiration != 0xffffffff) { + /* Save some room for the response */ + rsp->message.body.value = htonll(req->message.body.initial); + it = item_alloc(key, nkey, 0, realtime(req->message.body.expiration), + INCR_MAX_STORAGE_LEN); + + if (it != NULL) { + snprintf(ITEM_data(it), INCR_MAX_STORAGE_LEN, "%llu", + (unsigned long long)req->message.body.initial); + + if (store_item(it, NREAD_ADD, c)) { + c->cas = ITEM_get_cas(it); + write_bin_response(c, &rsp->message.body, 0, 0, sizeof(rsp->message.body.value)); + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_NOT_STORED, 0); + } + item_remove(it); /* release our reference */ + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0); + } + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + if (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT) { + c->thread->stats.incr_misses++; + } else { + c->thread->stats.decr_misses++; + } + pthread_mutex_unlock(&c->thread->stats.mutex); + + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + } + break; + case DELTA_ITEM_CAS_MISMATCH: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0); + break; + + default: + assert(0); + abort(); + } +} + +static void complete_update_bin(conn *c) { + protocol_binary_response_status eno = PROTOCOL_BINARY_RESPONSE_EINVAL; + enum store_item_type ret = NOT_STORED; + assert(c != NULL); + + item *it = c->item; + + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[it->slabs_clsid].set_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + /* We don't actually receive the trailing two characters in the bin + * protocol, so we're going to just set them here */ + *(ITEM_data(it) + it->nbytes - 2) = '\r'; + *(ITEM_data(it) + it->nbytes - 1) = '\n'; + + ret = store_item(it, c->cmd, c); + +#ifdef ENABLE_DTRACE + uint64_t cas = ITEM_get_cas(it); + switch (c->cmd) { + case NREAD_ADD: + MEMCACHED_COMMAND_ADD(c->sfd, ITEM_key(it), it->nkey, + (ret == STORED) ? it->nbytes : -1, cas); + break; + case NREAD_REPLACE: + MEMCACHED_COMMAND_REPLACE(c->sfd, ITEM_key(it), it->nkey, + (ret == STORED) ? it->nbytes : -1, cas); + break; + case NREAD_APPEND: + MEMCACHED_COMMAND_APPEND(c->sfd, ITEM_key(it), it->nkey, + (ret == STORED) ? it->nbytes : -1, cas); + break; + case NREAD_PREPEND: + MEMCACHED_COMMAND_PREPEND(c->sfd, ITEM_key(it), it->nkey, + (ret == STORED) ? it->nbytes : -1, cas); + break; + case NREAD_SET: + MEMCACHED_COMMAND_SET(c->sfd, ITEM_key(it), it->nkey, + (ret == STORED) ? it->nbytes : -1, cas); + break; + } +#endif + + switch (ret) { + case STORED: + /* Stored */ + write_bin_response(c, NULL, 0, 0, 0); + break; + case EXISTS: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0); + break; + case NOT_FOUND: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + break; + case NOT_STORED: + if (c->cmd == NREAD_ADD) { + eno = PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS; + } else if(c->cmd == NREAD_REPLACE) { + eno = PROTOCOL_BINARY_RESPONSE_KEY_ENOENT; + } else { + eno = PROTOCOL_BINARY_RESPONSE_NOT_STORED; + } + write_bin_error(c, eno, 0); + default: + assert(false); + abort(); + } + + item_remove(c->item); /* release the c->item reference */ + c->item = 0; +} + +static void process_bin_touch(conn *c) { + item *it; + + protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf; + char* key = binary_get_key(c); + size_t nkey = c->binary_header.request.keylen; + protocol_binary_request_touch *t = (void *)&c->binary_header; + uint32_t exptime = ntohl(t->message.body.expiration); + + if (settings.verbose > 1) { + int ii; + /* May be GAT/GATQ/etc */ + fprintf(stderr, "<%d TOUCH ", c->sfd); + for (ii = 0; ii < nkey; ++ii) { + fprintf(stderr, "%c", key[ii]); + } + fprintf(stderr, "\n"); + } + + it = item_touch(key, nkey, realtime(exptime)); + + if (it) { + /* the length has two unnecessary bytes ("\r\n") */ + uint16_t keylen = 0; + uint32_t bodylen = sizeof(rsp->message.body) + (it->nbytes - 2); + + item_update(it); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.touch_cmds++; + c->thread->stats.slab_stats[it->slabs_clsid].touch_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + MEMCACHED_COMMAND_TOUCH(c->sfd, ITEM_key(it), it->nkey, + it->nbytes, ITEM_get_cas(it)); + + if (c->cmd == PROTOCOL_BINARY_CMD_TOUCH) { + bodylen -= it->nbytes - 2; + } else if (c->cmd == PROTOCOL_BINARY_CMD_GATK) { + bodylen += nkey; + keylen = nkey; + } + + add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen); + rsp->message.header.response.cas = htonll(ITEM_get_cas(it)); + + // add the flags + rsp->message.body.flags = htonl(strtoul(ITEM_suffix(it), NULL, 10)); + add_iov(c, &rsp->message.body, sizeof(rsp->message.body)); + + if (c->cmd == PROTOCOL_BINARY_CMD_GATK) { + add_iov(c, ITEM_key(it), nkey); + } + + /* Add the data minus the CRLF */ + if (c->cmd != PROTOCOL_BINARY_CMD_TOUCH) { + add_iov(c, ITEM_data(it), it->nbytes - 2); + } + + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + /* Remember this command so we can garbage collect it later */ + c->item = it; + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.touch_cmds++; + c->thread->stats.touch_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + MEMCACHED_COMMAND_TOUCH(c->sfd, key, nkey, -1, 0); + + if (c->noreply) { + conn_set_state(c, conn_new_cmd); + } else { + if (c->cmd == PROTOCOL_BINARY_CMD_GATK) { + char *ofs = c->wbuf + sizeof(protocol_binary_response_header); + add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, + 0, nkey, nkey); + memcpy(ofs, key, nkey); + add_iov(c, ofs, nkey); + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + } + } + } + + if (settings.detail_enabled) { + stats_prefix_record_get(key, nkey, NULL != it); + } +} + +static void process_bin_get(conn *c) { + item *it; + + protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf; + char* key = binary_get_key(c); + size_t nkey = c->binary_header.request.keylen; + + if (settings.verbose > 1) { + int ii; + fprintf(stderr, "<%d GET ", c->sfd); + for (ii = 0; ii < nkey; ++ii) { + fprintf(stderr, "%c", key[ii]); + } + fprintf(stderr, "\n"); + } + + it = item_get(key, nkey); + if (it) { + /* the length has two unnecessary bytes ("\r\n") */ + uint16_t keylen = 0; + uint32_t bodylen = sizeof(rsp->message.body) + (it->nbytes - 2); + + item_update(it); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.get_cmds++; + c->thread->stats.slab_stats[it->slabs_clsid].get_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + MEMCACHED_COMMAND_GET(c->sfd, ITEM_key(it), it->nkey, + it->nbytes, ITEM_get_cas(it)); + + if (c->cmd == PROTOCOL_BINARY_CMD_GETK) { + bodylen += nkey; + keylen = nkey; + } + add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen); + rsp->message.header.response.cas = htonll(ITEM_get_cas(it)); + + // add the flags + rsp->message.body.flags = htonl(strtoul(ITEM_suffix(it), NULL, 10)); + add_iov(c, &rsp->message.body, sizeof(rsp->message.body)); + + if (c->cmd == PROTOCOL_BINARY_CMD_GETK) { + add_iov(c, ITEM_key(it), nkey); + } + + /* Add the data minus the CRLF */ + add_iov(c, ITEM_data(it), it->nbytes - 2); + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + /* Remember this command so we can garbage collect it later */ + c->item = it; + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.get_cmds++; + c->thread->stats.get_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0); + + if (c->noreply) { + conn_set_state(c, conn_new_cmd); + } else { + if (c->cmd == PROTOCOL_BINARY_CMD_GETK) { + char *ofs = c->wbuf + sizeof(protocol_binary_response_header); + add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, + 0, nkey, nkey); + memcpy(ofs, key, nkey); + add_iov(c, ofs, nkey); + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + } + } + } + + if (settings.detail_enabled) { + stats_prefix_record_get(key, nkey, NULL != it); + } +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif +static void append_bin_stats(const char *key, const uint16_t klen, + const char *val, const uint32_t vlen, + conn *c) { + char *buf = c->stats.buffer + c->stats.offset; + uint32_t bodylen = klen + vlen; + protocol_binary_response_header header = { + .response.magic = (uint8_t)PROTOCOL_BINARY_RES, + .response.opcode = (uint8_t)PROTOCOL_BINARY_CMD_STAT, + .response.keylen = (uint16_t)htons(klen), + .response.extlen = (uint8_t)0, + .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES, + .response.status = (uint16_t)0, + .response.bodylen = htonl(bodylen), + .response.opaque = c->opaque, + .response.cas = (uint64_t)0 + }; + + memcpy(buf, header.bytes, sizeof(header.response)); + buf += sizeof(header.response); + + if (klen > 0) { + memcpy(buf, key, klen); + buf += klen; + + if (vlen > 0) { + memcpy(buf, val, vlen); + } + } + + c->stats.offset += sizeof(header.response) + bodylen; +} + +static void append_ascii_stats(const char *key, const uint16_t klen, + const char *val, const uint32_t vlen, + conn *c) { + char *pos = c->stats.buffer + c->stats.offset; + uint32_t nbytes = 0; + int remaining = c->stats.size - c->stats.offset; + int room = remaining - 1; + + if (klen == 0 && vlen == 0) { + nbytes = snprintf(pos, room, "END\r\n"); + } else if (vlen == 0) { + nbytes = snprintf(pos, room, "STAT %s\r\n", key); + } else { + nbytes = snprintf(pos, room, "STAT %s %s\r\n", key, val); + } + + c->stats.offset += nbytes; +} + +static bool grow_stats_buf(conn *c, size_t needed) { + size_t nsize = c->stats.size; + size_t available = nsize - c->stats.offset; + bool rv = true; + + /* Special case: No buffer -- need to allocate fresh */ + if (c->stats.buffer == NULL) { + nsize = 1024; + available = c->stats.size = c->stats.offset = 0; + } + + while (needed > available) { + assert(nsize > 0); + nsize = nsize << 1; + available = nsize - c->stats.offset; + } + + if (nsize != c->stats.size) { + char *ptr = realloc(c->stats.buffer, nsize); + if (ptr) { + c->stats.buffer = ptr; + c->stats.size = nsize; + } else { + rv = false; + } + } + + return rv; +} + +static void append_stats(const char *key, const uint16_t klen, + const char *val, const uint32_t vlen, + const void *cookie) +{ + /* value without a key is invalid */ + if (klen == 0 && vlen > 0) { + return ; + } + + conn *c = (conn*)cookie; + + if (c->protocol == binary_prot) { + size_t needed = vlen + klen + sizeof(protocol_binary_response_header); + if (!grow_stats_buf(c, needed)) { + return ; + } + append_bin_stats(key, klen, val, vlen, c); + } else { + size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n" + if (!grow_stats_buf(c, needed)) { + return ; + } + append_ascii_stats(key, klen, val, vlen, c); + } + + assert(c->stats.offset <= c->stats.size); +} + +static void process_bin_stat(conn *c) { + char *subcommand = binary_get_key(c); + size_t nkey = c->binary_header.request.keylen; + + if (settings.verbose > 1) { + int ii; + fprintf(stderr, "<%d STATS ", c->sfd); + for (ii = 0; ii < nkey; ++ii) { + fprintf(stderr, "%c", subcommand[ii]); + } + fprintf(stderr, "\n"); + } + + if (nkey == 0) { + /* request all statistics */ + server_stats(&append_stats, c); + (void)get_stats(NULL, 0, &append_stats, c); + } else if (strncmp(subcommand, "reset", 5) == 0) { + stats_reset(); + } else if (strncmp(subcommand, "settings", 8) == 0) { + process_stat_settings(&append_stats, c); + } else if (strncmp(subcommand, "detail", 6) == 0) { + char *subcmd_pos = subcommand + 6; + if (strncmp(subcmd_pos, " dump", 5) == 0) { + int len; + char *dump_buf = stats_prefix_dump(&len); + if (dump_buf == NULL || len <= 0) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0); + return ; + } else { + append_stats("detailed", strlen("detailed"), dump_buf, len, c); + free(dump_buf); + } + } else if (strncmp(subcmd_pos, " on", 3) == 0) { + settings.detail_enabled = 1; + } else if (strncmp(subcmd_pos, " off", 4) == 0) { + settings.detail_enabled = 0; + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + return; + } + } else { + if (get_stats(subcommand, nkey, &append_stats, c)) { + if (c->stats.buffer == NULL) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0); + } else { + write_and_free(c, c->stats.buffer, c->stats.offset); + c->stats.buffer = NULL; + } + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + } + + return; + } + + /* Append termination package and start the transfer */ + append_stats(NULL, 0, NULL, 0, c); + if (c->stats.buffer == NULL) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0); + } else { + write_and_free(c, c->stats.buffer, c->stats.offset); + c->stats.buffer = NULL; + } +} + +static void bin_read_key(conn *c, enum bin_substates next_substate, int extra) { + assert(c); + c->substate = next_substate; + c->rlbytes = c->keylen + extra; + + /* Ok... do we have room for the extras and the key in the input buffer? */ + ptrdiff_t offset = c->rcurr + sizeof(protocol_binary_request_header) - c->rbuf; + if (c->rlbytes > c->rsize - offset) { + size_t nsize = c->rsize; + size_t size = c->rlbytes + sizeof(protocol_binary_request_header); + + while (size > nsize) { + nsize *= 2; + } + + if (nsize != c->rsize) { + if (settings.verbose > 1) { + fprintf(stderr, "%d: Need to grow buffer from %lu to %lu\n", + c->sfd, (unsigned long)c->rsize, (unsigned long)nsize); + } + char *newm = realloc(c->rbuf, nsize); + if (newm == NULL) { + if (settings.verbose) { + fprintf(stderr, "%d: Failed to grow buffer.. closing connection\n", + c->sfd); + } + conn_set_state(c, conn_closing); + return; + } + + c->rbuf= newm; + /* rcurr should point to the same offset in the packet */ + c->rcurr = c->rbuf + offset - sizeof(protocol_binary_request_header); + c->rsize = nsize; + } + if (c->rbuf != c->rcurr) { + memmove(c->rbuf, c->rcurr, c->rbytes); + c->rcurr = c->rbuf; + if (settings.verbose > 1) { + fprintf(stderr, "%d: Repack input buffer\n", c->sfd); + } + } + } + + /* preserve the header in the buffer.. */ + c->ritem = c->rcurr + sizeof(protocol_binary_request_header); + conn_set_state(c, conn_nread); +} + +/* Just write an error message and disconnect the client */ +static void handle_binary_protocol_error(conn *c) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0); + if (settings.verbose) { + fprintf(stderr, "Protocol error (opcode %02x), close connection %d\n", + c->binary_header.request.opcode, c->sfd); + } + c->write_and_go = conn_closing; +} + +static void init_sasl_conn(conn *c) { + assert(c); + /* should something else be returned? */ + if (!settings.sasl) + return; + + if (!c->sasl_conn) { + int result=sasl_server_new("memcached", + NULL, + my_sasl_hostname[0] ? my_sasl_hostname : NULL, + NULL, NULL, + NULL, 0, &c->sasl_conn); + if (result != SASL_OK) { + if (settings.verbose) { + fprintf(stderr, "Failed to initialize SASL conn.\n"); + } + c->sasl_conn = NULL; + } + } +} + +static void bin_list_sasl_mechs(conn *c) { + // Guard against a disabled SASL. + if (!settings.sasl) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, + c->binary_header.request.bodylen + - c->binary_header.request.keylen); + return; + } + + init_sasl_conn(c); + const char *result_string = NULL; + unsigned int string_length = 0; + int result=sasl_listmech(c->sasl_conn, NULL, + "", /* What to prepend the string with */ + " ", /* What to separate mechanisms with */ + "", /* What to append to the string */ + &result_string, &string_length, + NULL); + if (result != SASL_OK) { + /* Perhaps there's a better error for this... */ + if (settings.verbose) { + fprintf(stderr, "Failed to list SASL mechanisms.\n"); + } + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); + return; + } + write_bin_response(c, (char*)result_string, 0, 0, string_length); +} + +static void process_bin_sasl_auth(conn *c) { + // Guard for handling disabled SASL on the server. + if (!settings.sasl) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, + c->binary_header.request.bodylen + - c->binary_header.request.keylen); + return; + } + + assert(c->binary_header.request.extlen == 0); + + int nkey = c->binary_header.request.keylen; + int vlen = c->binary_header.request.bodylen - nkey; + + if (nkey > MAX_SASL_MECH_LEN) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_EINVAL, vlen); + c->write_and_go = conn_swallow; + return; + } + + char *key = binary_get_key(c); + assert(key); + + item *it = item_alloc(key, nkey, 0, 0, vlen); + + if (it == 0) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen); + c->write_and_go = conn_swallow; + return; + } + + c->item = it; + c->ritem = ITEM_data(it); + c->rlbytes = vlen; + conn_set_state(c, conn_nread); + c->substate = bin_reading_sasl_auth_data; +} + +static void process_bin_complete_sasl_auth(conn *c) { + assert(settings.sasl); + const char *out = NULL; + unsigned int outlen = 0; + + assert(c->item); + init_sasl_conn(c); + + int nkey = c->binary_header.request.keylen; + int vlen = c->binary_header.request.bodylen - nkey; + + char mech[nkey+1]; + memcpy(mech, ITEM_key((item*)c->item), nkey); + mech[nkey] = 0x00; + + if (settings.verbose) + fprintf(stderr, "mech: ``%s'' with %d bytes of data\n", mech, vlen); + + const char *challenge = vlen == 0 ? NULL : ITEM_data((item*) c->item); + + int result=-1; + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_SASL_AUTH: + result = sasl_server_start(c->sasl_conn, mech, + challenge, vlen, + &out, &outlen); + break; + case PROTOCOL_BINARY_CMD_SASL_STEP: + result = sasl_server_step(c->sasl_conn, + challenge, vlen, + &out, &outlen); + break; + default: + assert(false); /* CMD should be one of the above */ + /* This code is pretty much impossible, but makes the compiler + happier */ + if (settings.verbose) { + fprintf(stderr, "Unhandled command %d with challenge %s\n", + c->cmd, challenge); + } + break; + } + + item_unlink(c->item); + + if (settings.verbose) { + fprintf(stderr, "sasl result code: %d\n", result); + } + + switch(result) { + case SASL_OK: + write_bin_response(c, "Authenticated", 0, 0, strlen("Authenticated")); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.auth_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + break; + case SASL_CONTINUE: + add_bin_header(c, PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE, 0, 0, outlen); + if(outlen > 0) { + add_iov(c, out, outlen); + } + conn_set_state(c, conn_mwrite); + c->write_and_go = conn_new_cmd; + break; + default: + if (settings.verbose) + fprintf(stderr, "Unknown sasl response: %d\n", result); + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.auth_cmds++; + c->thread->stats.auth_errors++; + pthread_mutex_unlock(&c->thread->stats.mutex); + } +} + +static bool authenticated(conn *c) { + assert(settings.sasl); + bool rv = false; + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_SASL_AUTH: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_SASL_STEP: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_VERSION: /* FALLTHROUGH */ + rv = true; + break; + default: + if (c->sasl_conn) { + const void *uname = NULL; + sasl_getprop(c->sasl_conn, SASL_USERNAME, &uname); + rv = uname != NULL; + } + } + + if (settings.verbose > 1) { + fprintf(stderr, "authenticated() in cmd 0x%02x is %s\n", + c->cmd, rv ? "true" : "false"); + } + + return rv; +} + +static void dispatch_bin_command(conn *c) { + int protocol_error = 0; + + int extlen = c->binary_header.request.extlen; + int keylen = c->binary_header.request.keylen; + uint32_t bodylen = c->binary_header.request.bodylen; + + if (settings.sasl && !authenticated(c)) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0); + c->write_and_go = conn_closing; + return; + } + + MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes); + c->noreply = true; + + /* binprot supports 16bit keys, but internals are still 8bit */ + if (keylen > KEY_MAX_LENGTH) { + handle_binary_protocol_error(c); + return; + } + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_SETQ: + c->cmd = PROTOCOL_BINARY_CMD_SET; + break; + case PROTOCOL_BINARY_CMD_ADDQ: + c->cmd = PROTOCOL_BINARY_CMD_ADD; + break; + case PROTOCOL_BINARY_CMD_REPLACEQ: + c->cmd = PROTOCOL_BINARY_CMD_REPLACE; + break; + case PROTOCOL_BINARY_CMD_DELETEQ: + c->cmd = PROTOCOL_BINARY_CMD_DELETE; + break; + case PROTOCOL_BINARY_CMD_INCREMENTQ: + c->cmd = PROTOCOL_BINARY_CMD_INCREMENT; + break; + case PROTOCOL_BINARY_CMD_DECREMENTQ: + c->cmd = PROTOCOL_BINARY_CMD_DECREMENT; + break; + case PROTOCOL_BINARY_CMD_QUITQ: + c->cmd = PROTOCOL_BINARY_CMD_QUIT; + break; + case PROTOCOL_BINARY_CMD_FLUSHQ: + c->cmd = PROTOCOL_BINARY_CMD_FLUSH; + break; + case PROTOCOL_BINARY_CMD_APPENDQ: + c->cmd = PROTOCOL_BINARY_CMD_APPEND; + break; + case PROTOCOL_BINARY_CMD_PREPENDQ: + c->cmd = PROTOCOL_BINARY_CMD_PREPEND; + break; + case PROTOCOL_BINARY_CMD_GETQ: + c->cmd = PROTOCOL_BINARY_CMD_GET; + break; + case PROTOCOL_BINARY_CMD_GETKQ: + c->cmd = PROTOCOL_BINARY_CMD_GETK; + break; + case PROTOCOL_BINARY_CMD_GATQ: + c->cmd = PROTOCOL_BINARY_CMD_GAT; + break; + case PROTOCOL_BINARY_CMD_GATKQ: + c->cmd = PROTOCOL_BINARY_CMD_GAT; + break; + default: + c->noreply = false; + } + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_VERSION: + if (extlen == 0 && keylen == 0 && bodylen == 0) { + write_bin_response(c, VERSION, 0, 0, strlen(VERSION)); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_FLUSH: + if (keylen == 0 && bodylen == extlen && (extlen == 0 || extlen == 4)) { + bin_read_key(c, bin_read_flush_exptime, extlen); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_NOOP: + if (extlen == 0 && keylen == 0 && bodylen == 0) { + write_bin_response(c, NULL, 0, 0, 0); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_SET: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_ADD: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_REPLACE: + if (extlen == 8 && keylen != 0 && bodylen >= (keylen + 8)) { + bin_read_key(c, bin_reading_set_header, 8); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_GETQ: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_GET: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_GETKQ: /* FALLTHROUGH */ + case PROTOCOL_BINARY_CMD_GETK: + if (extlen == 0 && bodylen == keylen && keylen > 0) { + bin_read_key(c, bin_reading_get_key, 0); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_DELETE: + if (keylen > 0 && extlen == 0 && bodylen == keylen) { + bin_read_key(c, bin_reading_del_header, extlen); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_INCREMENT: + case PROTOCOL_BINARY_CMD_DECREMENT: + if (keylen > 0 && extlen == 20 && bodylen == (keylen + extlen)) { + bin_read_key(c, bin_reading_incr_header, 20); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_APPEND: + case PROTOCOL_BINARY_CMD_PREPEND: + if (keylen > 0 && extlen == 0) { + bin_read_key(c, bin_reading_set_header, 0); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_STAT: + if (extlen == 0) { + bin_read_key(c, bin_reading_stat, 0); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_QUIT: + if (keylen == 0 && extlen == 0 && bodylen == 0) { + write_bin_response(c, NULL, 0, 0, 0); + c->write_and_go = conn_closing; + if (c->noreply) { + conn_set_state(c, conn_closing); + } + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: + if (extlen == 0 && keylen == 0 && bodylen == 0) { + bin_list_sasl_mechs(c); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_SASL_AUTH: + case PROTOCOL_BINARY_CMD_SASL_STEP: + if (extlen == 0 && keylen != 0) { + bin_read_key(c, bin_reading_sasl_auth, 0); + } else { + protocol_error = 1; + } + break; + case PROTOCOL_BINARY_CMD_TOUCH: + case PROTOCOL_BINARY_CMD_GAT: + case PROTOCOL_BINARY_CMD_GATQ: + case PROTOCOL_BINARY_CMD_GATK: + case PROTOCOL_BINARY_CMD_GATKQ: + if (extlen == 4 && keylen != 0) { + bin_read_key(c, bin_reading_touch_key, 4); + } else { + protocol_error = 1; + } + break; + default: + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, bodylen); + } + + if (protocol_error) + handle_binary_protocol_error(c); +} + +static void process_bin_update(conn *c) { + char *key; + int nkey; + int vlen; + item *it; + protocol_binary_request_set* req = binary_get_request(c); + + assert(c != NULL); + + key = binary_get_key(c); + nkey = c->binary_header.request.keylen; + + /* fix byteorder in the request */ + req->message.body.flags = ntohl(req->message.body.flags); + req->message.body.expiration = ntohl(req->message.body.expiration); + + vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen); + + if (settings.verbose > 1) { + int ii; + if (c->cmd == PROTOCOL_BINARY_CMD_ADD) { + fprintf(stderr, "<%d ADD ", c->sfd); + } else if (c->cmd == PROTOCOL_BINARY_CMD_SET) { + fprintf(stderr, "<%d SET ", c->sfd); + } else { + fprintf(stderr, "<%d REPLACE ", c->sfd); + } + for (ii = 0; ii < nkey; ++ii) { + fprintf(stderr, "%c", key[ii]); + } + + fprintf(stderr, " Value len is %d", vlen); + fprintf(stderr, "\n"); + } + + if (settings.detail_enabled) { + stats_prefix_record_set(key, nkey); + } + + it = item_alloc(key, nkey, req->message.body.flags, + realtime(req->message.body.expiration), vlen+2); + + if (it == 0) { + if (! item_size_ok(nkey, req->message.body.flags, vlen + 2)) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen); + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen); + } + + /* Avoid stale data persisting in cache because we failed alloc. + * Unacceptable for SET. Anywhere else too? */ + if (c->cmd == PROTOCOL_BINARY_CMD_SET) { + it = item_get(key, nkey); + if (it) { + item_unlink(it); + item_remove(it); + } + } + + /* swallow the data line */ + c->write_and_go = conn_swallow; + return; + } + + ITEM_set_cas(it, c->binary_header.request.cas); + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_ADD: + c->cmd = NREAD_ADD; + break; + case PROTOCOL_BINARY_CMD_SET: + c->cmd = NREAD_SET; + break; + case PROTOCOL_BINARY_CMD_REPLACE: + c->cmd = NREAD_REPLACE; + break; + default: + assert(0); + } + + if (ITEM_get_cas(it) != 0) { + c->cmd = NREAD_CAS; + } + + c->item = it; + c->ritem = ITEM_data(it); + c->rlbytes = vlen; + conn_set_state(c, conn_nread); + c->substate = bin_read_set_value; +} + +static void process_bin_append_prepend(conn *c) { + char *key; + int nkey; + int vlen; + item *it; + + assert(c != NULL); + + key = binary_get_key(c); + nkey = c->binary_header.request.keylen; + vlen = c->binary_header.request.bodylen - nkey; + + if (settings.verbose > 1) { + fprintf(stderr, "Value len is %d\n", vlen); + } + + if (settings.detail_enabled) { + stats_prefix_record_set(key, nkey); + } + + it = item_alloc(key, nkey, 0, 0, vlen+2); + + if (it == 0) { + if (! item_size_ok(nkey, 0, vlen + 2)) { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen); + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen); + } + /* swallow the data line */ + c->write_and_go = conn_swallow; + return; + } + + ITEM_set_cas(it, c->binary_header.request.cas); + + switch (c->cmd) { + case PROTOCOL_BINARY_CMD_APPEND: + c->cmd = NREAD_APPEND; + break; + case PROTOCOL_BINARY_CMD_PREPEND: + c->cmd = NREAD_PREPEND; + break; + default: + assert(0); + } + + c->item = it; + c->ritem = ITEM_data(it); + c->rlbytes = vlen; + conn_set_state(c, conn_nread); + c->substate = bin_read_set_value; +} + +static void process_bin_flush(conn *c) { + time_t exptime = 0; + protocol_binary_request_flush* req = binary_get_request(c); + + if (c->binary_header.request.extlen == sizeof(req->message.body)) { + exptime = ntohl(req->message.body.expiration); + } + + if (exptime > 0) { + settings.oldest_live = realtime(exptime) - 1; + } else { + settings.oldest_live = current_time - 1; + } + item_flush_expired(); + + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.flush_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + write_bin_response(c, NULL, 0, 0, 0); +} + +static void process_bin_delete(conn *c) { + item *it; + + protocol_binary_request_delete* req = binary_get_request(c); + + char* key = binary_get_key(c); + size_t nkey = c->binary_header.request.keylen; + + assert(c != NULL); + + if (settings.verbose > 1) { + fprintf(stderr, "Deleting %s\n", key); + } + + if (settings.detail_enabled) { + stats_prefix_record_delete(key, nkey); + } + + it = item_get(key, nkey); + if (it) { + uint64_t cas = ntohll(req->message.header.request.cas); + if (cas == 0 || cas == ITEM_get_cas(it)) { + MEMCACHED_COMMAND_DELETE(c->sfd, ITEM_key(it), it->nkey); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[it->slabs_clsid].delete_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + item_unlink(it); + write_bin_response(c, NULL, 0, 0, 0); + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0); + } + item_remove(it); /* release our reference */ + } else { + write_bin_error(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.delete_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + } +} + +static void complete_nread_binary(conn *c) { + assert(c != NULL); + assert(c->cmd >= 0); + + switch(c->substate) { + case bin_reading_set_header: + if (c->cmd == PROTOCOL_BINARY_CMD_APPEND || + c->cmd == PROTOCOL_BINARY_CMD_PREPEND) { + process_bin_append_prepend(c); + } else { + process_bin_update(c); + } + break; + case bin_read_set_value: + complete_update_bin(c); + break; + case bin_reading_get_key: + process_bin_get(c); + break; + case bin_reading_touch_key: + process_bin_touch(c); + break; + case bin_reading_stat: + process_bin_stat(c); + break; + case bin_reading_del_header: + process_bin_delete(c); + break; + case bin_reading_incr_header: + complete_incr_bin(c); + break; + case bin_read_flush_exptime: + process_bin_flush(c); + break; + case bin_reading_sasl_auth: + process_bin_sasl_auth(c); + break; + case bin_reading_sasl_auth_data: + process_bin_complete_sasl_auth(c); + break; + case bin_reading_cas_header: + assert(0); + case bin_no_state: + assert(0); + default: + fprintf(stderr, "Not handling substate %d\n", c->substate); + assert(0); + } +} + +static void reset_cmd_handler(conn *c) { + c->cmd = -1; + c->substate = bin_no_state; + if(c->item != NULL) { + item_remove(c->item); + c->item = NULL; + } + conn_shrink(c); + if (c->rbytes > 0) { + conn_set_state(c, conn_parse_cmd); + } else { + conn_set_state(c, conn_waiting); + } +} + +static void complete_nread(conn *c) { + assert(c != NULL); + assert(c->protocol == ascii_prot + || c->protocol == binary_prot); + + if (c->protocol == ascii_prot) { + complete_nread_ascii(c); + } else if (c->protocol == binary_prot) { + complete_nread_binary(c); + } +} + +/* + * Stores an item in the cache according to the semantics of one of the set + * commands. In threaded mode, this is protected by the cache lock. + * + * Returns the state of storage. + */ +enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t hv) { + char *key = ITEM_key(it); + item *old_it = do_item_get(key, it->nkey, hv); + enum store_item_type stored = NOT_STORED; + + item *new_it = NULL; + int flags; + + if (old_it != NULL && comm == NREAD_ADD) { + /* add only adds a nonexistent item, but promote to head of LRU */ + do_item_update(old_it); + } else if (!old_it && (comm == NREAD_REPLACE + || comm == NREAD_APPEND || comm == NREAD_PREPEND)) + { + /* replace only replaces an existing value; don't store */ + } else if (comm == NREAD_CAS) { + /* validate cas operation */ + if(old_it == NULL) { + // LRU expired + stored = NOT_FOUND; + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.cas_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + } + else if (ITEM_get_cas(it) == ITEM_get_cas(old_it)) { + // cas validates + // it and old_it may belong to different classes. + // I'm updating the stats for the one that's getting pushed out + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[old_it->slabs_clsid].cas_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + item_replace(old_it, it, hv); + stored = STORED; + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[old_it->slabs_clsid].cas_badval++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + if(settings.verbose > 1) { + fprintf(stderr, "CAS: failure: expected %llu, got %llu\n", + (unsigned long long)ITEM_get_cas(old_it), + (unsigned long long)ITEM_get_cas(it)); + } + stored = EXISTS; + } + } else { + /* + * Append - combine new and old record into single one. Here it's + * atomic and thread-safe. + */ + if (comm == NREAD_APPEND || comm == NREAD_PREPEND) { + /* + * Validate CAS + */ + if (ITEM_get_cas(it) != 0) { + // CAS much be equal + if (ITEM_get_cas(it) != ITEM_get_cas(old_it)) { + stored = EXISTS; + } + } + + if (stored == NOT_STORED) { + /* we have it and old_it here - alloc memory to hold both */ + /* flags was already lost - so recover them from ITEM_suffix(it) */ + + flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10); + + new_it = item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */); + + if (new_it == NULL) { + /* SERVER_ERROR out of memory */ + if (old_it != NULL) + do_item_remove(old_it); + + return NOT_STORED; + } + + /* copy data from it and old_it to new_it */ + + if (comm == NREAD_APPEND) { + memcpy(ITEM_data(new_it), ITEM_data(old_it), old_it->nbytes); + memcpy(ITEM_data(new_it) + old_it->nbytes - 2 /* CRLF */, ITEM_data(it), it->nbytes); + } else { + /* NREAD_PREPEND */ + memcpy(ITEM_data(new_it), ITEM_data(it), it->nbytes); + memcpy(ITEM_data(new_it) + it->nbytes - 2 /* CRLF */, ITEM_data(old_it), old_it->nbytes); + } + + it = new_it; + } + } + + if (stored == NOT_STORED) { + if (old_it != NULL) + item_replace(old_it, it, hv); + else + do_item_link(it, hv); + + c->cas = ITEM_get_cas(it); + + stored = STORED; + } + } + + if (old_it != NULL) + do_item_remove(old_it); /* release our reference */ + if (new_it != NULL) + do_item_remove(new_it); + + if (stored == STORED) { + c->cas = ITEM_get_cas(it); + } + + return stored; +} + +typedef struct token_s { + char *value; + size_t length; +} token_t; + +#define COMMAND_TOKEN 0 +#define SUBCOMMAND_TOKEN 1 +#define KEY_TOKEN 1 + +#define MAX_TOKENS 8 + +/* + * Tokenize the command string by replacing whitespace with '\0' and update + * the token array tokens with pointer to start of each token and length. + * Returns total number of tokens. The last valid token is the terminal + * token (value points to the first unprocessed character of the string and + * length zero). + * + * Usage example: + * + * while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) { + * for(int ix = 0; tokens[ix].length != 0; ix++) { + * ... + * } + * ncommand = tokens[ix].value - command; + * command = tokens[ix].value; + * } + */ +static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) { + char *s, *e; + size_t ntokens = 0; + size_t len = strlen(command); + unsigned int i = 0; + + assert(command != NULL && tokens != NULL && max_tokens > 1); + + s = e = command; + for (i = 0; i < len; i++) { + if (*e == ' ') { + if (s != e) { + tokens[ntokens].value = s; + tokens[ntokens].length = e - s; + ntokens++; + *e = '\0'; + if (ntokens == max_tokens - 1) { + e++; + s = e; /* so we don't add an extra token */ + break; + } + } + s = e + 1; + } + e++; + } + + if (s != e) { + tokens[ntokens].value = s; + tokens[ntokens].length = e - s; + ntokens++; + } + + /* + * If we scanned the whole string, the terminal value pointer is null, + * otherwise it is the first unprocessed character. + */ + tokens[ntokens].value = *e == '\0' ? NULL : e; + tokens[ntokens].length = 0; + ntokens++; + + return ntokens; +} + +/* set up a connection to write a buffer then free it, used for stats */ +static void write_and_free(conn *c, char *buf, int bytes) { + if (buf) { + c->write_and_free = buf; + c->wcurr = buf; + c->wbytes = bytes; + conn_set_state(c, conn_write); + c->write_and_go = conn_new_cmd; + } else { + out_string(c, "SERVER_ERROR out of memory writing stats"); + } +} + +static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens) +{ + int noreply_index = ntokens - 2; + + /* + NOTE: this function is not the first place where we are going to + send the reply. We could send it instead from process_command() + if the request line has wrong number of tokens. However parsing + malformed line for "noreply" option is not reliable anyway, so + it can't be helped. + */ + if (tokens[noreply_index].value + && strcmp(tokens[noreply_index].value, "noreply") == 0) { + c->noreply = true; + } + return c->noreply; +} + +void append_stat(const char *name, ADD_STAT add_stats, conn *c, + const char *fmt, ...) { + char val_str[STAT_VAL_LEN]; + int vlen; + va_list ap; + + assert(name); + assert(add_stats); + assert(c); + assert(fmt); + + va_start(ap, fmt); + vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap); + va_end(ap); + + add_stats(name, strlen(name), val_str, vlen, c); +} + +inline static void process_stats_detail(conn *c, const char *command) { + assert(c != NULL); + + if (strcmp(command, "on") == 0) { + settings.detail_enabled = 1; + out_string(c, "OK"); + } + else if (strcmp(command, "off") == 0) { + settings.detail_enabled = 0; + out_string(c, "OK"); + } + else if (strcmp(command, "dump") == 0) { + int len; + char *stats = stats_prefix_dump(&len); + write_and_free(c, stats, len); + } + else { + out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump"); + } +} + +/* return server specific stats only */ +static void server_stats(ADD_STAT add_stats, conn *c) { + pid_t pid = getpid(); + rel_time_t now = current_time; + + struct thread_stats thread_stats; + threadlocal_stats_aggregate(&thread_stats); + struct slab_stats slab_stats; + slab_stats_aggregate(&thread_stats, &slab_stats); + +#ifndef WIN32 + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); +#endif /* !WIN32 */ + + STATS_LOCK(); + + APPEND_STAT("pid", "%lu", (long)pid); + APPEND_STAT("uptime", "%u", now); + APPEND_STAT("time", "%ld", now + (long)process_started); + APPEND_STAT("version", "%s", VERSION); + APPEND_STAT("libevent", "%s", event_get_version()); + APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *))); + +#ifndef WIN32 + append_stat("rusage_user", add_stats, c, "%ld.%06ld", + (long)usage.ru_utime.tv_sec, + (long)usage.ru_utime.tv_usec); + append_stat("rusage_system", add_stats, c, "%ld.%06ld", + (long)usage.ru_stime.tv_sec, + (long)usage.ru_stime.tv_usec); +#endif /* !WIN32 */ + + APPEND_STAT("curr_connections", "%u", stats.curr_conns - 1); + APPEND_STAT("total_connections", "%u", stats.total_conns); + if (settings.maxconns_fast) { + APPEND_STAT("rejected_connections", "%llu", (unsigned long long)stats.rejected_conns); + } + APPEND_STAT("connection_structures", "%u", stats.conn_structs); + APPEND_STAT("reserved_fds", "%u", stats.reserved_fds); + APPEND_STAT("cmd_get", "%llu", (unsigned long long)thread_stats.get_cmds); + APPEND_STAT("cmd_set", "%llu", (unsigned long long)slab_stats.set_cmds); + APPEND_STAT("cmd_flush", "%llu", (unsigned long long)thread_stats.flush_cmds); + APPEND_STAT("cmd_touch", "%llu", (unsigned long long)thread_stats.touch_cmds); + APPEND_STAT("get_hits", "%llu", (unsigned long long)slab_stats.get_hits); + APPEND_STAT("get_misses", "%llu", (unsigned long long)thread_stats.get_misses); + APPEND_STAT("delete_misses", "%llu", (unsigned long long)thread_stats.delete_misses); + APPEND_STAT("delete_hits", "%llu", (unsigned long long)slab_stats.delete_hits); + APPEND_STAT("incr_misses", "%llu", (unsigned long long)thread_stats.incr_misses); + APPEND_STAT("incr_hits", "%llu", (unsigned long long)slab_stats.incr_hits); + APPEND_STAT("decr_misses", "%llu", (unsigned long long)thread_stats.decr_misses); + APPEND_STAT("decr_hits", "%llu", (unsigned long long)slab_stats.decr_hits); + APPEND_STAT("cas_misses", "%llu", (unsigned long long)thread_stats.cas_misses); + APPEND_STAT("cas_hits", "%llu", (unsigned long long)slab_stats.cas_hits); + APPEND_STAT("cas_badval", "%llu", (unsigned long long)slab_stats.cas_badval); + APPEND_STAT("touch_hits", "%llu", (unsigned long long)slab_stats.touch_hits); + APPEND_STAT("touch_misses", "%llu", (unsigned long long)thread_stats.touch_misses); + APPEND_STAT("auth_cmds", "%llu", (unsigned long long)thread_stats.auth_cmds); + APPEND_STAT("auth_errors", "%llu", (unsigned long long)thread_stats.auth_errors); + APPEND_STAT("bytes_read", "%llu", (unsigned long long)thread_stats.bytes_read); + APPEND_STAT("bytes_written", "%llu", (unsigned long long)thread_stats.bytes_written); + APPEND_STAT("limit_maxbytes", "%llu", (unsigned long long)settings.maxbytes); + APPEND_STAT("accepting_conns", "%u", stats.accepting_conns); + APPEND_STAT("listen_disabled_num", "%llu", (unsigned long long)stats.listen_disabled_num); + APPEND_STAT("threads", "%d", settings.num_threads); + APPEND_STAT("conn_yields", "%llu", (unsigned long long)thread_stats.conn_yields); + APPEND_STAT("hash_power_level", "%u", stats.hash_power_level); + APPEND_STAT("hash_bytes", "%llu", (unsigned long long)stats.hash_bytes); + APPEND_STAT("hash_is_expanding", "%u", stats.hash_is_expanding); + APPEND_STAT("expired_unfetched", "%llu", stats.expired_unfetched); + APPEND_STAT("evicted_unfetched", "%llu", stats.evicted_unfetched); + if (settings.slab_reassign) { + APPEND_STAT("slab_reassign_running", "%u", stats.slab_reassign_running); + APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved); + } + STATS_UNLOCK(); +} + +static void process_stat_settings(ADD_STAT add_stats, void *c) { + assert(add_stats); + APPEND_STAT("maxbytes", "%u", (unsigned int)settings.maxbytes); + APPEND_STAT("maxconns", "%d", settings.maxconns); + APPEND_STAT("tcpport", "%d", settings.port); + APPEND_STAT("udpport", "%d", settings.udpport); + APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL"); + APPEND_STAT("verbosity", "%d", settings.verbose); + APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live); + APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off"); + APPEND_STAT("domain_socket", "%s", + settings.socketpath ? settings.socketpath : "NULL"); + APPEND_STAT("umask", "%o", settings.access); + APPEND_STAT("growth_factor", "%.2f", settings.factor); + APPEND_STAT("chunk_size", "%d", settings.chunk_size); + APPEND_STAT("num_threads", "%d", settings.num_threads); + APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp); + APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter); + APPEND_STAT("detail_enabled", "%s", + settings.detail_enabled ? "yes" : "no"); + APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event); + APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no"); + APPEND_STAT("tcp_backlog", "%d", settings.backlog); + APPEND_STAT("binding_protocol", "%s", + prot_text(settings.binding_protocol)); + APPEND_STAT("auth_enabled_sasl", "%s", settings.sasl ? "yes" : "no"); + APPEND_STAT("item_size_max", "%d", settings.item_size_max); + APPEND_STAT("maxconns_fast", "%s", settings.maxconns_fast ? "yes" : "no"); + APPEND_STAT("hashpower_init", "%d", settings.hashpower_init); + APPEND_STAT("slab_reassign", "%s", settings.slab_reassign ? "yes" : "no"); + APPEND_STAT("slab_automove", "%s", settings.slab_automove ? "yes" : "no"); +} + +static void process_stat(conn *c, token_t *tokens, const size_t ntokens) { + const char *subcommand = tokens[SUBCOMMAND_TOKEN].value; + assert(c != NULL); + + if (ntokens < 2) { + out_string(c, "CLIENT_ERROR bad command line"); + return; + } + + if (ntokens == 2) { + server_stats(&append_stats, c); + (void)get_stats(NULL, 0, &append_stats, c); + } else if (strcmp(subcommand, "reset") == 0) { + stats_reset(); + out_string(c, "RESET"); + return ; + } else if (strcmp(subcommand, "detail") == 0) { + /* NOTE: how to tackle detail with binary? */ + if (ntokens < 4) + process_stats_detail(c, ""); /* outputs the error message */ + else + process_stats_detail(c, tokens[2].value); + /* Output already generated */ + return ; + } else if (strcmp(subcommand, "settings") == 0) { + process_stat_settings(&append_stats, c); + } else if (strcmp(subcommand, "cachedump") == 0) { + char *buf; + unsigned int bytes, id, limit = 0; + + if (ntokens < 5) { + out_string(c, "CLIENT_ERROR bad command line"); + return; + } + + if (!safe_strtoul(tokens[2].value, &id) || + !safe_strtoul(tokens[3].value, &limit)) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + if (id >= POWER_LARGEST) { + out_string(c, "CLIENT_ERROR Illegal slab id"); + return; + } + + buf = item_cachedump(id, limit, &bytes); + write_and_free(c, buf, bytes); + return ; + } else { + /* getting here means that the subcommand is either engine specific or + is invalid. query the engine and see. */ + if (get_stats(subcommand, strlen(subcommand), &append_stats, c)) { + if (c->stats.buffer == NULL) { + out_string(c, "SERVER_ERROR out of memory writing stats"); + } else { + write_and_free(c, c->stats.buffer, c->stats.offset); + c->stats.buffer = NULL; + } + } else { + out_string(c, "ERROR"); + } + return ; + } + + /* append terminator and start the transfer */ + append_stats(NULL, 0, NULL, 0, c); + + if (c->stats.buffer == NULL) { + out_string(c, "SERVER_ERROR out of memory writing stats"); + } else { + write_and_free(c, c->stats.buffer, c->stats.offset); + c->stats.buffer = NULL; + } +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-but-set-parameter" +#endif +/* ntokens is overwritten here... shrug.. */ +static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) { + char *key; + size_t nkey; + int i = 0; + item *it; + token_t *key_token = &tokens[KEY_TOKEN]; + char *suffix; + assert(c != NULL); + + do { + while(key_token->length != 0) { + + key = key_token->value; + nkey = key_token->length; + + if(nkey > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + it = item_get(key, nkey); + if (settings.detail_enabled) { + stats_prefix_record_get(key, nkey, NULL != it); + } + if (it) { + if (i >= c->isize) { + item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2); + if (new_list) { + c->isize *= 2; + c->ilist = new_list; + } else { + item_remove(it); + break; + } + } + + /* + * Construct the response. Each hit adds three elements to the + * outgoing data list: + * "VALUE " + * key + * " " + flags + " " + data length + "\r\n" + data (with \r\n) + */ + + if (return_cas) + { + MEMCACHED_COMMAND_GET(c->sfd, ITEM_key(it), it->nkey, + it->nbytes, ITEM_get_cas(it)); + /* Goofy mid-flight realloc. */ + if (i >= c->suffixsize) { + char **new_suffix_list = realloc(c->suffixlist, + sizeof(char *) * c->suffixsize * 2); + if (new_suffix_list) { + c->suffixsize *= 2; + c->suffixlist = new_suffix_list; + } else { + item_remove(it); + break; + } + } + + suffix = cache_alloc(c->thread->suffix_cache); + if (suffix == NULL) { + out_string(c, "SERVER_ERROR out of memory making CAS suffix"); + item_remove(it); + return; + } + *(c->suffixlist + i) = suffix; + int suffix_len = snprintf(suffix, SUFFIX_SIZE, + " %llu\r\n", + (unsigned long long)ITEM_get_cas(it)); + if (add_iov(c, "VALUE ", 6) != 0 || + add_iov(c, ITEM_key(it), it->nkey) != 0 || + add_iov(c, ITEM_suffix(it), it->nsuffix - 2) != 0 || + add_iov(c, suffix, suffix_len) != 0 || + add_iov(c, ITEM_data(it), it->nbytes) != 0) + { + item_remove(it); + break; + } + } + else + { + MEMCACHED_COMMAND_GET(c->sfd, ITEM_key(it), it->nkey, + it->nbytes, ITEM_get_cas(it)); + if (add_iov(c, "VALUE ", 6) != 0 || + add_iov(c, ITEM_key(it), it->nkey) != 0 || + add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0) + { + item_remove(it); + break; + } + } + + + if (settings.verbose > 1) + fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it)); + + /* item_get() has incremented it->refcount for us */ + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[it->slabs_clsid].get_hits++; + c->thread->stats.get_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + item_update(it); + *(c->ilist + i) = it; + i++; + + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.get_misses++; + c->thread->stats.get_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0); + } + + key_token++; + } + + /* + * If the command string hasn't been fully processed, get the next set + * of tokens. + */ + if(key_token->value != NULL) { + ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS); + key_token = tokens; + } + + } while(key_token->value != NULL); + + c->icurr = c->ilist; + c->ileft = i; + if (return_cas) { + c->suffixcurr = c->suffixlist; + c->suffixleft = i; + } + + if (settings.verbose > 1) + fprintf(stderr, ">%d END\n", c->sfd); + + /* + If the loop was terminated because of out-of-memory, it is not + reliable to add END\r\n to the buffer, because it might not end + in \r\n. So we send SERVER_ERROR instead. + */ + if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0 + || (IS_UDP(c->transport) && build_udp_headers(c) != 0)) { + out_string(c, "SERVER_ERROR out of memory writing get response"); + } + else { + conn_set_state(c, conn_mwrite); + c->msgcurr = 0; + } + + return; +} + +static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, int comm, bool handle_cas) { + char *key; + size_t nkey; + unsigned int flags; + int32_t exptime_int = 0; + time_t exptime; + int vlen; + uint64_t req_cas_id=0; + item *it; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags) + && safe_strtol(tokens[3].value, &exptime_int) + && safe_strtol(tokens[4].value, (int32_t *)&vlen))) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + /* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */ + exptime = exptime_int; + + /* Negative exptimes can underflow and end up immortal. realtime() will + immediately expire values that are greater than REALTIME_MAXDELTA, but less + than process_started, so lets aim for that. */ + if (exptime < 0) + exptime = REALTIME_MAXDELTA + 1; + + // does cas value exist? + if (handle_cas) { + if (!safe_strtoull(tokens[5].value, &req_cas_id)) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + } + + vlen += 2; + if (vlen < 0 || vlen - 2 < 0) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + if (settings.detail_enabled) { + stats_prefix_record_set(key, nkey); + } + + it = item_alloc(key, nkey, flags, realtime(exptime), vlen); + + if (it == 0) { + if (! item_size_ok(nkey, flags, vlen)) + out_string(c, "SERVER_ERROR object too large for cache"); + else + out_string(c, "SERVER_ERROR out of memory storing object"); + /* swallow the data line */ + c->write_and_go = conn_swallow; + c->sbytes = vlen; + + /* Avoid stale data persisting in cache because we failed alloc. + * Unacceptable for SET. Anywhere else too? */ + if (comm == NREAD_SET) { + it = item_get(key, nkey); + if (it) { + item_unlink(it); + item_remove(it); + } + } + + return; + } + ITEM_set_cas(it, req_cas_id); + + c->item = it; + c->ritem = ITEM_data(it); + c->rlbytes = it->nbytes; + c->cmd = comm; + conn_set_state(c, conn_nread); +} + +static void process_touch_command(conn *c, token_t *tokens, const size_t ntokens) { + char *key; + size_t nkey; + int32_t exptime_int = 0; + item *it; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + if (!safe_strtol(tokens[2].value, &exptime_int)) { + out_string(c, "CLIENT_ERROR invalid exptime argument"); + return; + } + + it = item_touch(key, nkey, realtime(exptime_int)); + if (it) { + item_update(it); + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.touch_cmds++; + c->thread->stats.slab_stats[it->slabs_clsid].touch_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + out_string(c, "TOUCHED"); + item_remove(it); + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.touch_cmds++; + c->thread->stats.touch_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + out_string(c, "NOT_FOUND"); + } +} + +static void process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) { + char temp[INCR_MAX_STORAGE_LEN]; + uint64_t delta; + char *key; + size_t nkey; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + if (!safe_strtoull(tokens[2].value, &delta)) { + out_string(c, "CLIENT_ERROR invalid numeric delta argument"); + return; + } + + switch(add_delta(c, key, nkey, incr, delta, temp, NULL)) { + case OK: + out_string(c, temp); + break; + case NON_NUMERIC: + out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value"); + break; + case EOM: + out_string(c, "SERVER_ERROR out of memory"); + break; + case DELTA_ITEM_NOT_FOUND: + pthread_mutex_lock(&c->thread->stats.mutex); + if (incr) { + c->thread->stats.incr_misses++; + } else { + c->thread->stats.decr_misses++; + } + pthread_mutex_unlock(&c->thread->stats.mutex); + + out_string(c, "NOT_FOUND"); + break; + case DELTA_ITEM_CAS_MISMATCH: + break; /* Should never get here */ + default: + assert(false); + abort(); + } +} + +/* + * adds a delta value to a numeric item. + * + * c connection requesting the operation + * it item to adjust + * incr true to increment value, false to decrement + * delta amount to adjust value by + * buf buffer for response string + * + * returns a response string to send back to the client. + */ +enum delta_result_type do_add_delta(conn *c, const char *key, const size_t nkey, + const bool incr, const int64_t delta, + char *buf, uint64_t *cas, + const uint32_t hv) { + char *ptr; + uint64_t value; + int res; + item *it; + + it = do_item_get(key, nkey, hv); + if (!it) { + return DELTA_ITEM_NOT_FOUND; + } + + if (cas != NULL && *cas != 0 && ITEM_get_cas(it) != *cas) { + do_item_remove(it); + return DELTA_ITEM_CAS_MISMATCH; + } + + ptr = ITEM_data(it); + + if (!safe_strtoull(ptr, &value)) { + do_item_remove(it); + return NON_NUMERIC; + } + + if (incr) { + value += delta; + MEMCACHED_COMMAND_INCR(c->sfd, ITEM_key(it), it->nkey, value); + } else { + if(delta > value) { + value = 0; + } else { + value -= delta; + } + MEMCACHED_COMMAND_DECR(c->sfd, ITEM_key(it), it->nkey, value); + } + + pthread_mutex_lock(&c->thread->stats.mutex); + if (incr) { + c->thread->stats.slab_stats[it->slabs_clsid].incr_hits++; + } else { + c->thread->stats.slab_stats[it->slabs_clsid].decr_hits++; + } + pthread_mutex_unlock(&c->thread->stats.mutex); + + snprintf(buf, INCR_MAX_STORAGE_LEN, "%llu", (unsigned long long)value); + res = strlen(buf); + if (res + 2 > it->nbytes || it->refcount != 1) { /* need to realloc */ + item *new_it; + new_it = item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2 ); + if (new_it == 0) { + do_item_remove(it); + return EOM; + } + memcpy(ITEM_data(new_it), buf, res); + memcpy(ITEM_data(new_it) + res, "\r\n", 2); + item_replace(it, new_it, hv); + // Overwrite the older item's CAS with our new CAS since we're + // returning the CAS of the old item below. + ITEM_set_cas(it, (settings.use_cas) ? ITEM_get_cas(new_it) : 0); + do_item_remove(new_it); /* release our reference */ + } else { /* replace in-place */ + /* When changing the value without replacing the item, we + need to update the CAS on the existing item. */ + mutex_lock(&cache_lock); /* FIXME */ + ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0); + pthread_mutex_unlock(&cache_lock); + + memcpy(ITEM_data(it), buf, res); + memset(ITEM_data(it) + res, ' ', it->nbytes - res - 2); + do_item_update(it); + } + + if (cas) { + *cas = ITEM_get_cas(it); /* swap the incoming CAS value */ + } + do_item_remove(it); /* release our reference */ + return OK; +} + +static void process_delete_command(conn *c, token_t *tokens, const size_t ntokens) { + char *key; + size_t nkey; + item *it; + + assert(c != NULL); + + if (ntokens > 3) { + bool hold_is_zero = strcmp(tokens[KEY_TOKEN+1].value, "0") == 0; + bool sets_noreply = set_noreply_maybe(c, tokens, ntokens); + bool valid = (ntokens == 4 && (hold_is_zero || sets_noreply)) + || (ntokens == 5 && hold_is_zero && sets_noreply); + if (!valid) { + out_string(c, "CLIENT_ERROR bad command line format. " + "Usage: delete [noreply]"); + return; + } + } + + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + if(nkey > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + if (settings.detail_enabled) { + stats_prefix_record_delete(key, nkey); + } + + it = item_get(key, nkey); + if (it) { + MEMCACHED_COMMAND_DELETE(c->sfd, ITEM_key(it), it->nkey); + + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.slab_stats[it->slabs_clsid].delete_hits++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + item_unlink(it); + item_remove(it); /* release our reference */ + out_string(c, "DELETED"); + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.delete_misses++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + out_string(c, "NOT_FOUND"); + } +} + +static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) { + unsigned int level; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + level = strtoul(tokens[1].value, NULL, 10); + settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level; + out_string(c, "OK"); + return; +} + +static void process_slabs_automove_command(conn *c, token_t *tokens, const size_t ntokens) { + unsigned int level; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + level = strtoul(tokens[2].value, NULL, 10); + if (level == 0) { + settings.slab_automove = false; + } else if (level == 1) { + settings.slab_automove = true; + } else { + out_string(c, "ERROR"); + return; + } + out_string(c, "OK"); + return; +} + +static void process_command(conn *c, char *command) { + + token_t tokens[MAX_TOKENS]; + size_t ntokens; + int comm; + + assert(c != NULL); + + MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes); + + if (settings.verbose > 1) + fprintf(stderr, "<%d %s\n", c->sfd, command); + + /* + * for commands set/add/replace, we build an item and read the data + * directly into it, then continue in nread_complete(). + */ + + c->msgcurr = 0; + c->msgused = 0; + c->iovused = 0; + if (add_msghdr(c) != 0) { + out_string(c, "SERVER_ERROR out of memory preparing response"); + return; + } + + ntokens = tokenize_command(command, tokens, MAX_TOKENS); + if (ntokens >= 3 && + ((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) || + (strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) { + + process_get_command(c, tokens, ntokens, false); + + } else if ((ntokens == 6 || ntokens == 7) && + ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = NREAD_ADD)) || + (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = NREAD_SET)) || + (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = NREAD_REPLACE)) || + (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = NREAD_PREPEND)) || + (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = NREAD_APPEND)) )) { + + process_update_command(c, tokens, ntokens, comm, false); + + } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = NREAD_CAS))) { + + process_update_command(c, tokens, ntokens, comm, true); + + } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) { + + process_arithmetic_command(c, tokens, ntokens, 1); + + } else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) { + + process_get_command(c, tokens, ntokens, true); + + } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "decr") == 0)) { + + process_arithmetic_command(c, tokens, ntokens, 0); + + } else if (ntokens >= 3 && ntokens <= 5 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) { + + process_delete_command(c, tokens, ntokens); + + } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "touch") == 0)) { + + process_touch_command(c, tokens, ntokens); + + } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) { + + process_stat(c, tokens, ntokens); + + } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) { + time_t exptime = 0; + + set_noreply_maybe(c, tokens, ntokens); + + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.flush_cmds++; + pthread_mutex_unlock(&c->thread->stats.mutex); + + if(ntokens == (c->noreply ? 3 : 2)) { + settings.oldest_live = current_time - 1; + item_flush_expired(); + out_string(c, "OK"); + return; + } + + exptime = strtol(tokens[1].value, NULL, 10); + if(errno == ERANGE) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + /* + If exptime is zero realtime() would return zero too, and + realtime(exptime) - 1 would overflow to the max unsigned + value. So we process exptime == 0 the same way we do when + no delay is given at all. + */ + if (exptime > 0) + settings.oldest_live = realtime(exptime) - 1; + else /* exptime == 0 */ + settings.oldest_live = current_time - 1; + item_flush_expired(); + out_string(c, "OK"); + return; + + } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) { + + out_string(c, "VERSION " VERSION); + + } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) { + + conn_set_state(c, conn_closing); + + } else if (ntokens > 1 && strcmp(tokens[COMMAND_TOKEN].value, "slabs") == 0) { + if (ntokens == 5 && strcmp(tokens[COMMAND_TOKEN + 1].value, "reassign") == 0) { + int src, dst, rv; + + if (settings.slab_reassign == false) { + out_string(c, "CLIENT_ERROR slab reassignment disabled"); + return; + } + + src = strtol(tokens[2].value, NULL, 10); + dst = strtol(tokens[3].value, NULL, 10); + + if (errno == ERANGE) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + rv = slabs_reassign(src, dst); + switch (rv) { + case REASSIGN_OK: + out_string(c, "OK"); + break; + case REASSIGN_RUNNING: + out_string(c, "BUSY currently processing reassign request"); + break; + case REASSIGN_BADCLASS: + out_string(c, "BADCLASS invalid src or dst class id"); + break; + case REASSIGN_NOSPARE: + out_string(c, "NOSPARE source class has no spare pages"); + break; + case REASSIGN_DEST_NOT_FULL: + out_string(c, "NOTFULL dest class has spare memory"); + break; + case REASSIGN_SRC_NOT_SAFE: + out_string(c, "UNSAFE src class is in an unsafe state"); + break; + case REASSIGN_SRC_DST_SAME: + out_string(c, "SAME src and dst class are identical"); + break; + default: + assert(false); + abort(); + } + return; + } else if (ntokens == 4 && + (strcmp(tokens[COMMAND_TOKEN + 1].value, "automove") == 0)) { + process_slabs_automove_command(c, tokens, ntokens); + } else { + out_string(c, "ERROR"); + } + } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) { + process_verbosity_command(c, tokens, ntokens); + } else { + out_string(c, "ERROR"); + } + return; +} + +/* + * if we have a complete line in the buffer, process it. + */ +static int try_read_command(conn *c) { + assert(c != NULL); + assert(c->rcurr <= (c->rbuf + c->rsize)); + assert(c->rbytes > 0); + + if (c->protocol == negotiating_prot || c->transport == udp_transport) { + if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) { + c->protocol = binary_prot; + } else { + c->protocol = ascii_prot; + } + + if (settings.verbose > 1) { + fprintf(stderr, "%d: Client using the %s protocol\n", c->sfd, + prot_text(c->protocol)); + } + } + + if (c->protocol == binary_prot) { + /* Do we have the complete packet header? */ + if (c->rbytes < sizeof(c->binary_header)) { + /* need more data! */ + return 0; + } else { +#ifdef NEED_ALIGN + if (((long)(c->rcurr)) % 8 != 0) { + /* must realign input buffer */ + memmove(c->rbuf, c->rcurr, c->rbytes); + c->rcurr = c->rbuf; + if (settings.verbose > 1) { + fprintf(stderr, "%d: Realign input buffer\n", c->sfd); + } + } +#endif + protocol_binary_request_header* req; + req = (protocol_binary_request_header*)c->rcurr; + + if (settings.verbose > 1) { + /* Dump the packet before we convert it to host order */ + int ii; + fprintf(stderr, "<%d Read binary protocol data:", c->sfd); + for (ii = 0; ii < sizeof(req->bytes); ++ii) { + if (ii % 4 == 0) { + fprintf(stderr, "\n<%d ", c->sfd); + } + fprintf(stderr, " 0x%02x", req->bytes[ii]); + } + fprintf(stderr, "\n"); + } + + c->binary_header = *req; + c->binary_header.request.keylen = ntohs(req->request.keylen); + c->binary_header.request.bodylen = ntohl(req->request.bodylen); + c->binary_header.request.cas = ntohll(req->request.cas); + + if (c->binary_header.request.magic != PROTOCOL_BINARY_REQ) { + if (settings.verbose) { + fprintf(stderr, "Invalid magic: %x\n", + c->binary_header.request.magic); + } + conn_set_state(c, conn_closing); + return -1; + } + + c->msgcurr = 0; + c->msgused = 0; + c->iovused = 0; + if (add_msghdr(c) != 0) { + out_string(c, "SERVER_ERROR out of memory"); + return 0; + } + + c->cmd = c->binary_header.request.opcode; + c->keylen = c->binary_header.request.keylen; + c->opaque = c->binary_header.request.opaque; + /* clear the returned cas value */ + c->cas = 0; + + dispatch_bin_command(c); + + c->rbytes -= sizeof(c->binary_header); + c->rcurr += sizeof(c->binary_header); + } + } else { + char *el, *cont; + + if (c->rbytes == 0) + return 0; + + el = memchr(c->rcurr, '\n', c->rbytes); + if (!el) { + if (c->rbytes > 1024) { + /* + * We didn't have a '\n' in the first k. This _has_ to be a + * large multiget, if not we should just nuke the connection. + */ + char *ptr = c->rcurr; + while (*ptr == ' ') { /* ignore leading whitespaces */ + ++ptr; + } + + if (ptr - c->rcurr > 100 || + (strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) { + + conn_set_state(c, conn_closing); + return 1; + } + } + + return 0; + } + cont = el + 1; + if ((el - c->rcurr) > 1 && *(el - 1) == '\r') { + el--; + } + *el = '\0'; + + assert(cont <= (c->rcurr + c->rbytes)); + + process_command(c, c->rcurr); + + c->rbytes -= (cont - c->rcurr); + c->rcurr = cont; + + assert(c->rcurr <= (c->rbuf + c->rsize)); + } + + return 1; +} + +/* + * read a UDP request. + */ +static enum try_read_result try_read_udp(conn *c) { + int res; + + assert(c != NULL); + + c->request_addr_size = sizeof(c->request_addr); + res = recvfrom(c->sfd, c->rbuf, c->rsize, + 0, &c->request_addr, &c->request_addr_size); + if (res > 8) { + unsigned char *buf = (unsigned char *)c->rbuf; + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.bytes_read += res; + pthread_mutex_unlock(&c->thread->stats.mutex); + + /* Beginning of UDP packet is the request ID; save it. */ + c->request_id = buf[0] * 256 + buf[1]; + + /* If this is a multi-packet request, drop it. */ + if (buf[4] != 0 || buf[5] != 1) { + out_string(c, "SERVER_ERROR multi-packet request not supported"); + return READ_NO_DATA_RECEIVED; + } + + /* Don't care about any of the rest of the header. */ + res -= 8; + memmove(c->rbuf, c->rbuf + 8, res); + + c->rbytes = res; + c->rcurr = c->rbuf; + return READ_DATA_RECEIVED; + } + return READ_NO_DATA_RECEIVED; +} + +/* + * read from network as much as we can, handle buffer overflow and connection + * close. + * before reading, move the remaining incomplete fragment of a command + * (if any) to the beginning of the buffer. + * + * To protect us from someone flooding a connection with bogus data causing + * the connection to eat up all available memory, break out and start looking + * at the data I've got after a number of reallocs... + * + * @return enum try_read_result + */ +static enum try_read_result try_read_network(conn *c) { + enum try_read_result gotdata = READ_NO_DATA_RECEIVED; + int res; + int num_allocs = 0; + assert(c != NULL); + + if (c->rcurr != c->rbuf) { + if (c->rbytes != 0) /* otherwise there's nothing to copy */ + memmove(c->rbuf, c->rcurr, c->rbytes); + c->rcurr = c->rbuf; + } + + while (1) { + if (c->rbytes >= c->rsize) { + if (num_allocs == 4) { + return gotdata; + } + ++num_allocs; + char *new_rbuf = realloc(c->rbuf, c->rsize * 2); + if (!new_rbuf) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't realloc input buffer\n"); + c->rbytes = 0; /* ignore what we read */ + out_string(c, "SERVER_ERROR out of memory reading request"); + c->write_and_go = conn_closing; + return READ_MEMORY_ERROR; + } + c->rcurr = c->rbuf = new_rbuf; + c->rsize *= 2; + } + + int avail = c->rsize - c->rbytes; + res = read(c->sfd, c->rbuf + c->rbytes, avail); + if (res > 0) { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.bytes_read += res; + pthread_mutex_unlock(&c->thread->stats.mutex); + gotdata = READ_DATA_RECEIVED; + c->rbytes += res; + if (res == avail) { + continue; + } else { + break; + } + } + if (res == 0) { + return READ_ERROR; + } + if (res == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + break; + } + return READ_ERROR; + } + } + return gotdata; +} + +static bool update_event(conn *c, const int new_flags) { + assert(c != NULL); + + struct event_base *base = c->event.ev_base; + if (c->ev_flags == new_flags) + return true; + if (event_del(&c->event) == -1) return false; + event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c); + event_base_set(base, &c->event); + c->ev_flags = new_flags; + if (event_add(&c->event, 0) == -1) return false; + return true; +} + +/* + * Sets whether we are listening for new connections or not. + */ +void do_accept_new_conns(const bool do_accept) { + conn *next; + + for (next = listen_conn; next; next = next->next) { + if (do_accept) { + update_event(next, EV_READ | EV_PERSIST); + if (listen(next->sfd, settings.backlog) != 0) { + perror("listen"); + } + } + else { + update_event(next, 0); + if (listen(next->sfd, 0) != 0) { + perror("listen"); + } + } + } + + if (do_accept) { + STATS_LOCK(); + stats.accepting_conns = true; + STATS_UNLOCK(); + } else { + STATS_LOCK(); + stats.accepting_conns = false; + stats.listen_disabled_num++; + STATS_UNLOCK(); + allow_new_conns = false; + maxconns_handler(-42, 0, 0); + } +} + +/* + * Transmit the next chunk of data from our list of msgbuf structures. + * + * Returns: + * TRANSMIT_COMPLETE All done writing. + * TRANSMIT_INCOMPLETE More data remaining to write. + * TRANSMIT_SOFT_ERROR Can't write any more right now. + * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing) + */ +static enum transmit_result transmit(conn *c) { + assert(c != NULL); + + if (c->msgcurr < c->msgused && + c->msglist[c->msgcurr].msg_iovlen == 0) { + /* Finished writing the current msg; advance to the next. */ + c->msgcurr++; + } + if (c->msgcurr < c->msgused) { + ssize_t res; + struct msghdr *m = &c->msglist[c->msgcurr]; + + res = sendmsg(c->sfd, m, 0); + if (res > 0) { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.bytes_written += res; + pthread_mutex_unlock(&c->thread->stats.mutex); + + /* We've written some of the data. Remove the completed + iovec entries from the list of pending writes. */ + while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) { + res -= m->msg_iov->iov_len; + m->msg_iovlen--; + m->msg_iov++; + } + + /* Might have written just part of the last iovec entry; + adjust it so the next write will do the rest. */ + if (res > 0) { + m->msg_iov->iov_base = (caddr_t)m->msg_iov->iov_base + res; + m->msg_iov->iov_len -= res; + } + return TRANSMIT_INCOMPLETE; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + if (!update_event(c, EV_WRITE | EV_PERSIST)) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't update event\n"); + conn_set_state(c, conn_closing); + return TRANSMIT_HARD_ERROR; + } + return TRANSMIT_SOFT_ERROR; + } + /* if res == 0 or res == -1 and error is not EAGAIN or EWOULDBLOCK, + we have a real error, on which we close the connection */ + if (settings.verbose > 0) + perror("Failed to write, and not due to blocking"); + + if (IS_UDP(c->transport)) + conn_set_state(c, conn_read); + else + conn_set_state(c, conn_closing); + return TRANSMIT_HARD_ERROR; + } else { + return TRANSMIT_COMPLETE; + } +} + +static void drive_machine(conn *c) { + bool stop = false; + int sfd, flags = 1; + socklen_t addrlen; + struct sockaddr_storage addr; + int nreqs = settings.reqs_per_event; + int res; + const char *str; + + assert(c != NULL); + + while (!stop) { + + switch(c->state) { + case conn_listening: + addrlen = sizeof(addr); + if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* these are transient, so don't log anything */ + stop = true; + } else if (errno == EMFILE) { + if (settings.verbose > 0) + fprintf(stderr, "Too many open connections\n"); + accept_new_conns(false); + stop = true; + } else { + perror("accept()"); + stop = true; + } + break; + } + if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 || + fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) { + perror("setting O_NONBLOCK"); + close(sfd); + break; + } + + if (settings.maxconns_fast && + stats.curr_conns + stats.reserved_fds >= settings.maxconns - 1) { + str = "ERROR Too many open connections\r\n"; + res = write(sfd, str, strlen(str)); + close(sfd); + STATS_LOCK(); + stats.rejected_conns++; + STATS_UNLOCK(); + } else { + dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST, + DATA_BUFFER_SIZE, tcp_transport); + } + + stop = true; + break; + + case conn_waiting: + if (!update_event(c, EV_READ | EV_PERSIST)) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't update event\n"); + conn_set_state(c, conn_closing); + break; + } + + conn_set_state(c, conn_read); + stop = true; + break; + + case conn_read: + res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c); + + switch (res) { + case READ_NO_DATA_RECEIVED: + conn_set_state(c, conn_waiting); + break; + case READ_DATA_RECEIVED: + conn_set_state(c, conn_parse_cmd); + break; + case READ_ERROR: + conn_set_state(c, conn_closing); + break; + case READ_MEMORY_ERROR: /* Failed to allocate more memory */ + /* State already set by try_read_network */ + break; + default: + assert(false); + abort(); + } + break; + + case conn_parse_cmd : + if (try_read_command(c) == 0) { + /* wee need more data! */ + conn_set_state(c, conn_waiting); + } + + break; + + case conn_new_cmd: + /* Only process nreqs at a time to avoid starving other + connections */ + + --nreqs; + if (nreqs >= 0) { + reset_cmd_handler(c); + } else { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.conn_yields++; + pthread_mutex_unlock(&c->thread->stats.mutex); + if (c->rbytes > 0) { + /* We have already read in data into the input buffer, + so libevent will most likely not signal read events + on the socket (unless more data is available. As a + hack we should just put in a request to write data, + because that should be possible ;-) + */ + if (!update_event(c, EV_WRITE | EV_PERSIST)) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't update event\n"); + conn_set_state(c, conn_closing); + } + } + stop = true; + } + break; + + case conn_nread: + if (c->rlbytes == 0) { + complete_nread(c); + break; + } + /* first check if we have leftovers in the conn_read buffer */ + if (c->rbytes > 0) { + int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes; + if (c->ritem != c->rcurr) { + memmove(c->ritem, c->rcurr, tocopy); + } + c->ritem += tocopy; + c->rlbytes -= tocopy; + c->rcurr += tocopy; + c->rbytes -= tocopy; + if (c->rlbytes == 0) { + break; + } + } + + /* now try reading from the socket */ + res = read(c->sfd, c->ritem, c->rlbytes); + if (res > 0) { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.bytes_read += res; + pthread_mutex_unlock(&c->thread->stats.mutex); + if (c->rcurr == c->ritem) { + c->rcurr += res; + } + c->ritem += res; + c->rlbytes -= res; + break; + } + if (res == 0) { /* end of stream */ + conn_set_state(c, conn_closing); + break; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + if (!update_event(c, EV_READ | EV_PERSIST)) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't update event\n"); + conn_set_state(c, conn_closing); + break; + } + stop = true; + break; + } + /* otherwise we have a real error, on which we close the connection */ + if (settings.verbose > 0) { + fprintf(stderr, "Failed to read, and not due to blocking:\n" + "errno: %d %s \n" + "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n", + errno, strerror(errno), + (long)c->rcurr, (long)c->ritem, (long)c->rbuf, + (int)c->rlbytes, (int)c->rsize); + } + conn_set_state(c, conn_closing); + break; + + case conn_swallow: + /* we are reading sbytes and throwing them away */ + if (c->sbytes == 0) { + conn_set_state(c, conn_new_cmd); + break; + } + + /* first check if we have leftovers in the conn_read buffer */ + if (c->rbytes > 0) { + int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes; + c->sbytes -= tocopy; + c->rcurr += tocopy; + c->rbytes -= tocopy; + break; + } + + /* now try reading from the socket */ + res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize); + if (res > 0) { + pthread_mutex_lock(&c->thread->stats.mutex); + c->thread->stats.bytes_read += res; + pthread_mutex_unlock(&c->thread->stats.mutex); + c->sbytes -= res; + break; + } + if (res == 0) { /* end of stream */ + conn_set_state(c, conn_closing); + break; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + if (!update_event(c, EV_READ | EV_PERSIST)) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't update event\n"); + conn_set_state(c, conn_closing); + break; + } + stop = true; + break; + } + /* otherwise we have a real error, on which we close the connection */ + if (settings.verbose > 0) + fprintf(stderr, "Failed to read, and not due to blocking\n"); + conn_set_state(c, conn_closing); + break; + + case conn_write: + /* + * We want to write out a simple response. If we haven't already, + * assemble it into a msgbuf list (this will be a single-entry + * list for TCP or a two-entry list for UDP). + */ + if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) { + if (add_iov(c, c->wcurr, c->wbytes) != 0) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't build response\n"); + conn_set_state(c, conn_closing); + break; + } + } + + /* fall through... */ + + case conn_mwrite: + if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) { + if (settings.verbose > 0) + fprintf(stderr, "Failed to build UDP headers\n"); + conn_set_state(c, conn_closing); + break; + } + switch (transmit(c)) { + case TRANSMIT_COMPLETE: + if (c->state == conn_mwrite) { + while (c->ileft > 0) { + item *it = *(c->icurr); + assert((it->it_flags & ITEM_SLABBED) == 0); + item_remove(it); + c->icurr++; + c->ileft--; + } + while (c->suffixleft > 0) { + char *suffix = *(c->suffixcurr); + cache_free(c->thread->suffix_cache, suffix); + c->suffixcurr++; + c->suffixleft--; + } + /* XXX: I don't know why this wasn't the general case */ + if(c->protocol == binary_prot) { + conn_set_state(c, c->write_and_go); + } else { + conn_set_state(c, conn_new_cmd); + } + } else if (c->state == conn_write) { + if (c->write_and_free) { + free(c->write_and_free); + c->write_and_free = 0; + } + conn_set_state(c, c->write_and_go); + } else { + if (settings.verbose > 0) + fprintf(stderr, "Unexpected state %d\n", c->state); + conn_set_state(c, conn_closing); + } + break; + + case TRANSMIT_INCOMPLETE: + case TRANSMIT_HARD_ERROR: + break; /* Continue in state machine. */ + + case TRANSMIT_SOFT_ERROR: + stop = true; + break; + default: + assert(false); + abort(); + } + break; + + case conn_closing: + if (IS_UDP(c->transport)) + conn_cleanup(c); + else + conn_close(c); + stop = true; + break; + + case conn_max_state: + assert(false); + break; + default: + assert(false); + abort(); + } + } + + return; +} + +void event_handler(const int fd, const short which, void *arg) { + conn *c; + + c = (conn *)arg; + assert(c != NULL); + + c->which = which; + + /* sanity */ + if (fd != c->sfd) { + if (settings.verbose > 0) + fprintf(stderr, "Catastrophic: event fd doesn't match conn fd!\n"); + conn_close(c); + return; + } + + drive_machine(c); + + /* wait for next event */ + return; +} + +static int new_socket(struct addrinfo *ai) { + int sfd; + int flags; + + if ((sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) == -1) { + return -1; + } + + if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 || + fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) { + perror("setting O_NONBLOCK"); + close(sfd); + return -1; + } + return sfd; +} + + +/* + * Sets a socket's send buffer size to the maximum allowed by the system. + */ +static void maximize_sndbuf(const int sfd) { + socklen_t intsize = sizeof(int); + int last_good = 0; + int min, max, avg; + int old_size; + + /* Start with the default size. */ + if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, &old_size, &intsize) != 0) { + if (settings.verbose > 0) + perror("getsockopt(SO_SNDBUF)"); + return; + } + + /* Binary-search for the real maximum. */ + min = old_size; + max = MAX_SENDBUF_SIZE; + + while (min <= max) { + avg = ((unsigned int)(min + max)) / 2; + if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) { + last_good = avg; + min = avg + 1; + } else { + max = avg - 1; + } + } + + if (settings.verbose > 1) + fprintf(stderr, "<%d send buffer was %d, now %d\n", sfd, old_size, last_good); +} + +/** + * Create a socket and bind it to a specific port number + * @param interface the interface to bind to + * @param port the port number to bind to + * @param transport the transport protocol (TCP / UDP) + * @param portnumber_file A filepointer to write the port numbers to + * when they are successfully added to the list of ports we + * listen on. + */ +static int server_socket(const char *interface, + int port, + enum network_transport transport, + FILE *portnumber_file) { + int sfd; + struct linger ling = {0, 0}; + struct addrinfo *ai; + struct addrinfo *next; + struct addrinfo hints = { .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC }; + char port_buf[NI_MAXSERV]; + int error; + int success = 0; + int flags =1; + + hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM; + + if (port == -1) { + port = 0; + } + snprintf(port_buf, sizeof(port_buf), "%d", port); + error= getaddrinfo(interface, port_buf, &hints, &ai); + if (error != 0) { + if (error != EAI_SYSTEM) + fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error)); + else + perror("getaddrinfo()"); + return 1; + } + + for (next= ai; next; next= next->ai_next) { + conn *listen_conn_add; + if ((sfd = new_socket(next)) == -1) { + /* getaddrinfo can return "junk" addresses, + * we make sure at least one works before erroring. + */ + if (errno == EMFILE) { + /* ...unless we're out of fds */ + perror("server_socket"); + exit(EX_OSERR); + } + continue; + } + +#ifdef IPV6_V6ONLY + if (next->ai_family == AF_INET6) { + error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags)); + if (error != 0) { + perror("setsockopt"); + close(sfd); + continue; + } + } +#endif + + setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags)); + if (IS_UDP(transport)) { + maximize_sndbuf(sfd); + } else { + error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags)); + if (error != 0) + perror("setsockopt"); + + error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling)); + if (error != 0) + perror("setsockopt"); + + error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags)); + if (error != 0) + perror("setsockopt"); + } + + if (bind(sfd, next->ai_addr, next->ai_addrlen) == -1) { + if (errno != EADDRINUSE) { + perror("bind()"); + close(sfd); + freeaddrinfo(ai); + return 1; + } + close(sfd); + continue; + } else { + success++; + if (!IS_UDP(transport) && listen(sfd, settings.backlog) == -1) { + perror("listen()"); + close(sfd); + freeaddrinfo(ai); + return 1; + } + if (portnumber_file != NULL && + (next->ai_addr->sa_family == AF_INET || + next->ai_addr->sa_family == AF_INET6)) { + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } my_sockaddr; + socklen_t len = sizeof(my_sockaddr); + if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) { + if (next->ai_addr->sa_family == AF_INET) { + fprintf(portnumber_file, "%s INET: %u\n", + IS_UDP(transport) ? "UDP" : "TCP", + ntohs(my_sockaddr.in.sin_port)); + } else { + fprintf(portnumber_file, "%s INET6: %u\n", + IS_UDP(transport) ? "UDP" : "TCP", + ntohs(my_sockaddr.in6.sin6_port)); + } + } + } + } + + if (IS_UDP(transport)) { + int c; + + for (c = 0; c < settings.num_threads_per_udp; c++) { + /* this is guaranteed to hit all threads because we round-robin */ + dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST, + UDP_READ_BUFFER_SIZE, transport); + } + } else { + if (!(listen_conn_add = conn_new(sfd, conn_listening, + EV_READ | EV_PERSIST, 1, + transport, main_base))) { + fprintf(stderr, "failed to create listening connection\n"); + exit(EXIT_FAILURE); + } + listen_conn_add->next = listen_conn; + listen_conn = listen_conn_add; + } + } + + freeaddrinfo(ai); + + /* Return zero iff we detected no errors in starting up connections */ + return success == 0; +} + +static int server_sockets(int port, enum network_transport transport, + FILE *portnumber_file) { + if (settings.inter == NULL) { + return server_socket(settings.inter, port, transport, portnumber_file); + } else { + // tokenize them and bind to each one of them.. + char *b; + int ret = 0; + char *list = strdup(settings.inter); + + if (list == NULL) { + fprintf(stderr, "Failed to allocate memory for parsing server interface string\n"); + return 1; + } + for (char *p = strtok_r(list, ";,", &b); + p != NULL; + p = strtok_r(NULL, ";,", &b)) { + int the_port = port; + char *s = strchr(p, ':'); + if (s != NULL) { + *s = '\0'; + ++s; + if (!safe_strtol(s, &the_port)) { + fprintf(stderr, "Invalid port number: \"%s\"", s); + return 1; + } + } + if (strcmp(p, "*") == 0) { + p = NULL; + } + ret |= server_socket(p, the_port, transport, portnumber_file); + } + free(list); + return ret; + } +} + +static int new_socket_unix(void) { + int sfd; + int flags; + + if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + perror("socket()"); + return -1; + } + + if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 || + fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) { + perror("setting O_NONBLOCK"); + close(sfd); + return -1; + } + return sfd; +} + +static int server_socket_unix(const char *path, int access_mask) { + int sfd; + struct linger ling = {0, 0}; + struct sockaddr_un addr; + struct stat tstat; + int flags =1; + int old_umask; + + if (!path) { + return 1; + } + + if ((sfd = new_socket_unix()) == -1) { + return 1; + } + + /* + * Clean up a previous socket file if we left it around + */ + if (lstat(path, &tstat) == 0) { + if (S_ISSOCK(tstat.st_mode)) + unlink(path); + } + + setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags)); + setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags)); + setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling)); + + /* + * the memset call clears nonstandard fields in some impementations + * that otherwise mess things up. + */ + memset(&addr, 0, sizeof(addr)); + + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + assert(strcmp(addr.sun_path, path) == 0); + old_umask = umask( ~(access_mask&0777)); + if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("bind()"); + close(sfd); + umask(old_umask); + return 1; + } + umask(old_umask); + if (listen(sfd, settings.backlog) == -1) { + perror("listen()"); + close(sfd); + return 1; + } + if (!(listen_conn = conn_new(sfd, conn_listening, + EV_READ | EV_PERSIST, 1, + local_transport, main_base))) { + fprintf(stderr, "failed to create listening connection\n"); + exit(EXIT_FAILURE); + } + + return 0; +} + +/* + * We keep the current time of day in a global variable that's updated by a + * timer event. This saves us a bunch of time() system calls (we really only + * need to get the time once a second, whereas there can be tens of thousands + * of requests a second) and allows us to use server-start-relative timestamps + * rather than absolute UNIX timestamps, a space savings on systems where + * sizeof(time_t) > sizeof(unsigned int). + */ +volatile rel_time_t current_time; +static struct event clockevent; + +/* libevent uses a monotonic clock when available for event scheduling. Aside + * from jitter, simply ticking our internal timer here is accurate enough. + * Note that users who are setting explicit dates for expiration times *must* + * ensure their clocks are correct before starting memcached. */ +static void clock_handler(const int fd, const short which, void *arg) { + struct timeval t = {.tv_sec = 1, .tv_usec = 0}; + static bool initialized = false; +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + static bool monotonic = false; + static time_t monotonic_start; +#endif + + if (initialized) { + /* only delete the event if it's actually there. */ + evtimer_del(&clockevent); + } else { + initialized = true; + /* process_started is initialized to time() - 2. We initialize to 1 so + * flush_all won't underflow during tests. */ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + monotonic = true; + monotonic_start = ts.tv_sec - 2; + } +#endif + } + + evtimer_set(&clockevent, clock_handler, 0); + event_base_set(main_base, &clockevent); + evtimer_add(&clockevent, &t); + +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + if (monotonic) { + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + return; + current_time = (rel_time_t) (ts.tv_sec - monotonic_start); + return; + } +#endif + { + struct timeval tv; + gettimeofday(&tv, NULL); + current_time = (rel_time_t) (tv.tv_sec - process_started); + } +} + +static void usage(void) { + printf(PACKAGE " " VERSION "\n"); + printf("-p TCP port number to listen on (default: 11211)\n" + "-U UDP port number to listen on (default: 11211, 0 is off)\n" + "-s UNIX socket path to listen on (disables network support)\n" + "-a access mask for UNIX socket, in octal (default: 0700)\n" + "-l interface to listen on (default: INADDR_ANY, all addresses)\n" + " may be specified as host:port. If you don't specify\n" + " a port number, the value you specified with -p or -U is\n" + " used. You may specify multiple addresses separated by comma\n" + " or by using -l multiple times\n" + + "-d run as a daemon\n" + "-r maximize core file limit\n" + "-u assume identity of (only when run as root)\n" + "-m max memory to use for items in megabytes (default: 64 MB)\n" + "-M return error on memory exhausted (rather than removing items)\n" + "-c max simultaneous connections (default: 1024)\n" + "-k lock down all paged memory. Note that there is a\n" + " limit on how much memory you may lock. Trying to\n" + " allocate more than that would fail, so be sure you\n" + " set the limit correctly for the user you started\n" + " the daemon with (not for -u user;\n" + " under sh this is done with 'ulimit -S -l NUM_KB').\n" + "-v verbose (print errors/warnings while in event loop)\n" + "-vv very verbose (also print client commands/reponses)\n" + "-vvv extremely verbose (also print internal state transitions)\n" + "-h print this help and exit\n" + "-i print memcached and libevent license\n" + "-P save PID in , only used with -d option\n" + "-f chunk size growth factor (default: 1.25)\n" + "-n minimum space allocated for key+value+flags (default: 48)\n"); + printf("-L Try to use large memory pages (if available). Increasing\n" + " the memory page size could reduce the number of TLB misses\n" + " and improve the performance. In order to get large pages\n" + " from the OS, memcached will allocate the total item-cache\n" + " in one large chunk.\n"); + printf("-D Use as the delimiter between key prefixes and IDs.\n" + " This is used for per-prefix stats reporting. The default is\n" + " \":\" (colon). If this option is specified, stats collection\n" + " is turned on automatically; if not, then it may be turned on\n" + " by sending the \"stats detail on\" command to the server.\n"); + printf("-t number of threads to use (default: 4)\n"); + printf("-R Maximum number of requests per event, limits the number of\n" + " requests process for a given connection to prevent \n" + " starvation (default: 20)\n"); + printf("-C Disable use of CAS\n"); + printf("-b Set the backlog queue limit (default: 1024)\n"); + printf("-B Binding protocol - one of ascii, binary, or auto (default)\n"); + printf("-I Override the size of each slab page. Adjusts max item size\n" + " (default: 1mb, min: 1k, max: 128m)\n"); +#ifdef ENABLE_SASL + printf("-S Turn on Sasl authentication\n"); +#endif + printf("-o Comma separated list of extended or experimental options\n" + " - (EXPERIMENTAL) maxconns_fast: immediately close new\n" + " connections if over maxconns limit\n" + " - hashpower: An integer multiplier for how large the hash\n" + " table should be. Can be grown at runtime if not big enough.\n" + " Set this based on \"STAT hash_power_level\" before a \n" + " restart.\n" + ); + return; +} + +static void usage_license(void) { + printf(PACKAGE " " VERSION "\n\n"); + printf( + "Copyright (c) 2003, Danga Interactive, Inc. \n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions are\n" + "met:\n" + "\n" + " * Redistributions of source code must retain the above copyright\n" + "notice, this list of conditions and the following disclaimer.\n" + "\n" + " * Redistributions in binary form must reproduce the above\n" + "copyright notice, this list of conditions and the following disclaimer\n" + "in the documentation and/or other materials provided with the\n" + "distribution.\n" + "\n" + " * Neither the name of the Danga Interactive nor the names of its\n" + "contributors may be used to endorse or promote products derived from\n" + "this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n" + "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n" + "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n" + "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n" + "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n" + "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n" + "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n" + "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + "\n" + "\n" + "This product includes software developed by Niels Provos.\n" + "\n" + "[ libevent ]\n" + "\n" + "Copyright 2000-2003 Niels Provos \n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions\n" + "are met:\n" + "1. Redistributions of source code must retain the above copyright\n" + " notice, this list of conditions and the following disclaimer.\n" + "2. Redistributions in binary form must reproduce the above copyright\n" + " notice, this list of conditions and the following disclaimer in the\n" + " documentation and/or other materials provided with the distribution.\n" + "3. All advertising materials mentioning features or use of this software\n" + " must display the following acknowledgement:\n" + " This product includes software developed by Niels Provos.\n" + "4. The name of the author may not be used to endorse or promote products\n" + " derived from this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" + "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" + "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" + "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" + "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n" + "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n" + "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + ); + + return; +} + +static void save_pid(const char *pid_file) { + FILE *fp; + if (access(pid_file, F_OK) == 0) { + if ((fp = fopen(pid_file, "r")) != NULL) { + char buffer[1024]; + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + unsigned int pid; + if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) { + fprintf(stderr, "WARNING: The pid file contained the following (running) pid: %u\n", pid); + } + } + fclose(fp); + } + } + + if ((fp = fopen(pid_file, "w")) == NULL) { + vperror("Could not open the pid file %s for writing", pid_file); + return; + } + + fprintf(fp,"%ld\n", (long)getpid()); + if (fclose(fp) == -1) { + vperror("Could not close the pid file %s", pid_file); + } +} + +static void remove_pidfile(const char *pid_file) { + if (pid_file == NULL) + return; + + if (unlink(pid_file) != 0) { + vperror("Could not remove the pid file %s", pid_file); + } + +} + +static void sig_handler(const int sig) { + printf("SIGINT handled.\n"); + exit(EXIT_SUCCESS); +} + +#ifndef HAVE_SIGIGNORE +static int sigignore(int sig) { + struct sigaction sa = { .sa_handler = SIG_IGN, .sa_flags = 0 }; + + if (sigemptyset(&sa.sa_mask) == -1 || sigaction(sig, &sa, 0) == -1) { + return -1; + } + return 0; +} +#endif + + +/* + * On systems that supports multiple page sizes we may reduce the + * number of TLB-misses by using the biggest available page size + */ +static int enable_large_pages(void) { +#if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL) + int ret = -1; + size_t sizes[32]; + int avail = getpagesizes(sizes, 32); + if (avail != -1) { + size_t max = sizes[0]; + struct memcntl_mha arg = {0}; + int ii; + + for (ii = 1; ii < avail; ++ii) { + if (max < sizes[ii]) { + max = sizes[ii]; + } + } + + arg.mha_flags = 0; + arg.mha_pagesize = max; + arg.mha_cmd = MHA_MAPSIZE_BSSBRK; + + if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) { + fprintf(stderr, "Failed to set large pages: %s\n", + strerror(errno)); + fprintf(stderr, "Will use default page size\n"); + } else { + ret = 0; + } + } else { + fprintf(stderr, "Failed to get supported pagesizes: %s\n", + strerror(errno)); + fprintf(stderr, "Will use default page size\n"); + } + + return ret; +#else + return 0; +#endif +} + +/** + * Do basic sanity check of the runtime environment + * @return true if no errors found, false if we can't use this env + */ +static bool sanitycheck(void) { + /* One of our biggest problems is old and bogus libevents */ + const char *ever = event_get_version(); + if (ever != NULL) { + if (strncmp(ever, "1.", 2) == 0) { + /* Require at least 1.3 (that's still a couple of years old) */ + if ((ever[2] == '1' || ever[2] == '2') && !isdigit(ever[3])) { + fprintf(stderr, "You are using libevent %s.\nPlease upgrade to" + " a more recent version (1.3 or newer)\n", + event_get_version()); + return false; + } + } + } + + return true; +} + +int main (int argc, char **argv) { + int c; + bool lock_memory = false; + bool do_daemonize = false; + bool preallocate = false; + int maxcore = 0; + char *username = NULL; + char *pid_file = NULL; + struct passwd *pw; + struct rlimit rlim; + char unit = '\0'; + int size_max = 0; + int retval = EXIT_SUCCESS; + /* listening sockets */ + static int *l_socket = NULL; + + /* udp socket */ + static int *u_socket = NULL; + bool protocol_specified = false; + bool tcp_specified = false; + bool udp_specified = false; + + char *subopts; + char *subopts_value; + enum { + MAXCONNS_FAST = 0, + HASHPOWER_INIT, + SLAB_REASSIGN, + SLAB_AUTOMOVE + }; + char *const subopts_tokens[] = { + [MAXCONNS_FAST] = (char*)"maxconns_fast", + [HASHPOWER_INIT] = (char*)"hashpower", + [SLAB_REASSIGN] = (char*)"slab_reassign", + [SLAB_AUTOMOVE] = (char*)"slab_automove", + NULL + }; + + if (!sanitycheck()) { + return EX_OSERR; + } + + /* handle SIGINT */ + signal(SIGINT, sig_handler); + + /* init settings */ + settings_init(); + + /* set stderr non-buffering (for running under, say, daemontools) */ + setbuf(stderr, NULL); + + /* process arguments */ + while (-1 != (c = getopt(argc, argv, + "a:" /* access mask for unix socket */ + "p:" /* TCP port number to listen on */ + "s:" /* unix socket path to listen on */ + "U:" /* UDP port number to listen on */ + "m:" /* max memory to use for items in megabytes */ + "M" /* return error on memory exhausted */ + "c:" /* max simultaneous connections */ + "k" /* lock down all paged memory */ + "hi" /* help, licence info */ + "r" /* maximize core file limit */ + "v" /* verbose */ + "d" /* daemon mode */ + "l:" /* interface to listen on */ + "u:" /* user identity to run as */ + "P:" /* save PID in file */ + "f:" /* factor? */ + "n:" /* minimum space allocated for key+value+flags */ + "t:" /* threads */ + "D:" /* prefix delimiter? */ + "L" /* Large memory pages */ + "R:" /* max requests per event */ + "C" /* Disable use of CAS */ + "b:" /* backlog queue limit */ + "B:" /* Binding protocol */ + "I:" /* Max item size */ + "S" /* Sasl ON */ + "o:" /* Extended generic options */ + ))) { + switch (c) { + case 'a': + /* access for unix domain socket, as octal mask (like chmod)*/ + settings.access= strtol(optarg,NULL,8); + break; + + case 'U': + settings.udpport = atoi(optarg); + udp_specified = true; + break; + case 'p': + settings.port = atoi(optarg); + tcp_specified = true; + break; + case 's': + settings.socketpath = optarg; + break; + case 'm': + settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024; + break; + case 'M': + settings.evict_to_free = 0; + break; + case 'c': + settings.maxconns = atoi(optarg); + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + case 'i': + usage_license(); + exit(EXIT_SUCCESS); + case 'k': + lock_memory = true; + break; + case 'v': + settings.verbose++; + break; + case 'l': + if (settings.inter != NULL) { + size_t len = strlen(settings.inter) + strlen(optarg) + 2; + char *p = malloc(len); + if (p == NULL) { + fprintf(stderr, "Failed to allocate memory\n"); + return 1; + } + snprintf(p, len, "%s,%s", settings.inter, optarg); + free(settings.inter); + settings.inter = p; + } else { + settings.inter= strdup(optarg); + } + break; + case 'd': + do_daemonize = true; + break; + case 'r': + maxcore = 1; + break; + case 'R': + settings.reqs_per_event = atoi(optarg); + if (settings.reqs_per_event == 0) { + fprintf(stderr, "Number of requests per event must be greater than 0\n"); + return 1; + } + break; + case 'u': + username = optarg; + break; + case 'P': + pid_file = optarg; + break; + case 'f': + settings.factor = atof(optarg); + if (settings.factor <= 1.0) { + fprintf(stderr, "Factor must be greater than 1\n"); + return 1; + } + break; + case 'n': + settings.chunk_size = atoi(optarg); + if (settings.chunk_size == 0) { + fprintf(stderr, "Chunk size must be greater than 0\n"); + return 1; + } + break; + case 't': + settings.num_threads = atoi(optarg); + if (settings.num_threads <= 0) { + fprintf(stderr, "Number of threads must be greater than 0\n"); + return 1; + } + /* There're other problems when you get above 64 threads. + * In the future we should portably detect # of cores for the + * default. + */ + if (settings.num_threads > 64) { + fprintf(stderr, "WARNING: Setting a high number of worker" + "threads is not recommended.\n" + " Set this value to the number of cores in" + " your machine or less.\n"); + } + break; + case 'D': + if (! optarg || ! optarg[0]) { + fprintf(stderr, "No delimiter specified\n"); + return 1; + } + settings.prefix_delimiter = optarg[0]; + settings.detail_enabled = 1; + break; + case 'L' : + if (enable_large_pages() == 0) { + preallocate = true; + } + break; + case 'C' : + settings.use_cas = false; + break; + case 'b' : + settings.backlog = atoi(optarg); + break; + case 'B': + protocol_specified = true; + if (strcmp(optarg, "auto") == 0) { + settings.binding_protocol = negotiating_prot; + } else if (strcmp(optarg, "binary") == 0) { + settings.binding_protocol = binary_prot; + } else if (strcmp(optarg, "ascii") == 0) { + settings.binding_protocol = ascii_prot; + } else { + fprintf(stderr, "Invalid value for binding protocol: %s\n" + " -- should be one of auto, binary, or ascii\n", optarg); + exit(EX_USAGE); + } + break; + case 'I': + unit = optarg[strlen(optarg)-1]; + if (unit == 'k' || unit == 'm' || + unit == 'K' || unit == 'M') { + optarg[strlen(optarg)-1] = '\0'; + size_max = atoi(optarg); + if (unit == 'k' || unit == 'K') + size_max *= 1024; + if (unit == 'm' || unit == 'M') + size_max *= 1024 * 1024; + settings.item_size_max = size_max; + } else { + settings.item_size_max = atoi(optarg); + } + if (settings.item_size_max < 1024) { + fprintf(stderr, "Item max size cannot be less than 1024 bytes.\n"); + return 1; + } + if (settings.item_size_max > 1024 * 1024 * 128) { + fprintf(stderr, "Cannot set item size limit higher than 128 mb.\n"); + return 1; + } + if (settings.item_size_max > 1024 * 1024) { + fprintf(stderr, "WARNING: Setting item max size above 1MB is not" + " recommended!\n" + " Raising this limit increases the minimum memory requirements\n" + " and will decrease your memory efficiency.\n" + ); + } + break; + case 'S': /* set Sasl authentication to true. Default is false */ +#ifndef ENABLE_SASL + fprintf(stderr, "This server is not built with SASL support.\n"); + exit(EX_USAGE); +#endif + settings.sasl = true; + break; + case 'o': /* It's sub-opts time! */ + subopts = optarg; + + while (*subopts != '\0') { + + switch (getsubopt(&subopts, subopts_tokens, &subopts_value)) { + case MAXCONNS_FAST: + settings.maxconns_fast = true; + break; + case HASHPOWER_INIT: + if (subopts_value == NULL) { + fprintf(stderr, "Missing numeric argument for hashpower\n"); + return 1; + } + settings.hashpower_init = atoi(subopts_value); + if (settings.hashpower_init < 12) { + fprintf(stderr, "Initial hashtable multiplier of %d is too low\n", + settings.hashpower_init); + return 1; + } else if (settings.hashpower_init > 64) { + fprintf(stderr, "Initial hashtable multiplier of %d is too high\n" + "Choose a value based on \"STAT hash_power_level\" from a running instance\n", + settings.hashpower_init); + return 1; + } + break; + case SLAB_REASSIGN: + settings.slab_reassign = true; + break; + case SLAB_AUTOMOVE: + settings.slab_automove = true; + break; + default: + printf("Illegal suboption \"%s\"\n", subopts_value); + return 1; + } + + } + break; + default: + fprintf(stderr, "Illegal argument \"%c\"\n", c); + return 1; + } + } + + /* + * Use one workerthread to serve each UDP port if the user specified + * multiple ports + */ + if (settings.inter != NULL && strchr(settings.inter, ',')) { + settings.num_threads_per_udp = 1; + } else { + settings.num_threads_per_udp = settings.num_threads; + } + + if (settings.sasl) { + if (!protocol_specified) { + settings.binding_protocol = binary_prot; + } else { + if (settings.binding_protocol != binary_prot) { + fprintf(stderr, "ERROR: You cannot allow the ASCII protocol while using SASL.\n"); + exit(EX_USAGE); + } + } + } + + if (tcp_specified && !udp_specified) { + settings.udpport = settings.port; + } else if (udp_specified && !tcp_specified) { + settings.port = settings.udpport; + } + + if (maxcore != 0) { + struct rlimit rlim_new; + /* + * First try raising to infinity; if that fails, try bringing + * the soft limit to the hard. + */ + if (getrlimit(RLIMIT_CORE, &rlim) == 0) { + rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) { + /* failed. try raising just to the old max */ + rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max; + (void)setrlimit(RLIMIT_CORE, &rlim_new); + } + } + /* + * getrlimit again to see what we ended up with. Only fail if + * the soft limit ends up 0, because then no core files will be + * created at all. + */ + + if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) { + fprintf(stderr, "failed to ensure corefile creation\n"); + exit(EX_OSERR); + } + } + + /* + * If needed, increase rlimits to allow as many connections + * as needed. + */ + + if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) { + fprintf(stderr, "failed to getrlimit number of files\n"); + exit(EX_OSERR); + } else { + rlim.rlim_cur = settings.maxconns; + rlim.rlim_max = settings.maxconns; + if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) { + fprintf(stderr, "failed to set rlimit for open files. Try starting as root or requesting smaller maxconns value.\n"); + exit(EX_OSERR); + } + } + + /* lose root privileges if we have them */ + if (getuid() == 0 || geteuid() == 0) { + if (username == 0 || *username == '\0') { + fprintf(stderr, "can't run as root without the -u switch\n"); + exit(EX_USAGE); + } + if ((pw = getpwnam(username)) == 0) { + fprintf(stderr, "can't find the user %s to switch to\n", username); + exit(EX_NOUSER); + } + if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) { + fprintf(stderr, "failed to assume identity of user %s\n", username); + exit(EX_OSERR); + } + } + + /* Initialize Sasl if -S was specified */ + if (settings.sasl) { + init_sasl(); + } + + /* daemonize if requested */ + /* if we want to ensure our ability to dump core, don't chdir to / */ + if (do_daemonize) { + if (sigignore(SIGHUP) == -1) { + perror("Failed to ignore SIGHUP"); + } + if (daemonize(maxcore, settings.verbose) == -1) { + fprintf(stderr, "failed to daemon() in order to daemonize\n"); + exit(EXIT_FAILURE); + } + } + + /* lock paged memory if needed */ + if (lock_memory) { +#ifdef HAVE_MLOCKALL + int res = mlockall(MCL_CURRENT | MCL_FUTURE); + if (res != 0) { + fprintf(stderr, "warning: -k invalid, mlockall() failed: %s\n", + strerror(errno)); + } +#else + fprintf(stderr, "warning: -k invalid, mlockall() not supported on this platform. proceeding without.\n"); +#endif + } + + /* initialize main thread libevent instance */ + main_base = event_init(); + + /* initialize other stuff */ + stats_init(); + assoc_init(settings.hashpower_init); + conn_init(); + slabs_init(settings.maxbytes, settings.factor, preallocate); + + /* + * ignore SIGPIPE signals; we can use errno == EPIPE if we + * need that information + */ + if (sigignore(SIGPIPE) == -1) { + perror("failed to ignore SIGPIPE; sigaction"); + exit(EX_OSERR); + } + /* start up worker threads if MT mode */ + thread_init(settings.num_threads, main_base); + + if (start_assoc_maintenance_thread() == -1) { + exit(EXIT_FAILURE); + } + + if (settings.slab_reassign && + start_slab_maintenance_thread() == -1) { + exit(EXIT_FAILURE); + } + + /* initialise clock event */ + clock_handler(0, 0, 0); + + /* create unix mode sockets after dropping privileges */ + if (settings.socketpath != NULL) { + errno = 0; + if (server_socket_unix(settings.socketpath,settings.access)) { + vperror("failed to listen on UNIX socket: %s", settings.socketpath); + exit(EX_OSERR); + } + } + + /* create the listening socket, bind it, and init */ + if (settings.socketpath == NULL) { + const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME"); + char temp_portnumber_filename[PATH_MAX]; + FILE *portnumber_file = NULL; + + if (portnumber_filename != NULL) { + snprintf(temp_portnumber_filename, + sizeof(temp_portnumber_filename), + "%s.lck", portnumber_filename); + + portnumber_file = fopen(temp_portnumber_filename, "a"); + if (portnumber_file == NULL) { + fprintf(stderr, "Failed to open \"%s\": %s\n", + temp_portnumber_filename, strerror(errno)); + } + } + + errno = 0; + if (settings.port && server_sockets(settings.port, tcp_transport, + portnumber_file)) { + vperror("failed to listen on TCP port %d", settings.port); + exit(EX_OSERR); + } + + /* + * initialization order: first create the listening sockets + * (may need root on low ports), then drop root if needed, + * then daemonise if needed, then init libevent (in some cases + * descriptors created by libevent wouldn't survive forking). + */ + + /* create the UDP listening socket and bind it */ + errno = 0; + if (settings.udpport && server_sockets(settings.udpport, udp_transport, + portnumber_file)) { + vperror("failed to listen on UDP port %d", settings.udpport); + exit(EX_OSERR); + } + + if (portnumber_file) { + fclose(portnumber_file); + rename(temp_portnumber_filename, portnumber_filename); + } + } + + /* Give the sockets a moment to open. I know this is dumb, but the error + * is only an advisory. + */ + usleep(1000); + if (stats.curr_conns + stats.reserved_fds >= settings.maxconns - 1) { + fprintf(stderr, "Maxconns setting is too low, use -c to increase.\n"); + exit(EXIT_FAILURE); + } + + if (pid_file != NULL) { + save_pid(pid_file); + } + + /* Drop privileges no longer needed */ + drop_privileges(); + + /* enter the event loop */ + if (event_base_loop(main_base, 0) != 0) { + retval = EXIT_FAILURE; + } + + stop_assoc_maintenance_thread(); + + /* remove the PID file if we're a daemon */ + if (do_daemonize) + remove_pidfile(pid_file); + /* Clean up strdup() call for bind() address */ + if (settings.inter) + free(settings.inter); + if (l_socket) + free(l_socket); + if (u_socket) + free(u_socket); + + return retval; +} diff --git a/memcached/memcached.h b/memcached/memcached.h new file mode 100644 index 00000000..d070502c --- /dev/null +++ b/memcached/memcached.h @@ -0,0 +1,571 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ + +/** \file + * The main memcached header holding commonly used data + * structures and function prototypes. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "protocol_binary.h" +#include "cache.h" + +#include "sasl_defs.h" + +/** Maximum length of a key. */ +#define KEY_MAX_LENGTH 250 + +/** Size of an incr buf. */ +#define INCR_MAX_STORAGE_LEN 24 + +#define DATA_BUFFER_SIZE 2048 +#define UDP_READ_BUFFER_SIZE 65536 +#define UDP_MAX_PAYLOAD_SIZE 1400 +#define UDP_HEADER_SIZE 8 +#define MAX_SENDBUF_SIZE (256 * 1024 * 1024) +/* I'm told the max length of a 64-bit num converted to string is 20 bytes. + * Plus a few for spaces, \r\n, \0 */ +#define SUFFIX_SIZE 24 + +/** Initial size of list of items being returned by "get". */ +#define ITEM_LIST_INITIAL 200 + +/** Initial size of list of CAS suffixes appended to "gets" lines. */ +#define SUFFIX_LIST_INITIAL 20 + +/** Initial size of the sendmsg() scatter/gather array. */ +#define IOV_LIST_INITIAL 400 + +/** Initial number of sendmsg() argument structures to allocate. */ +#define MSG_LIST_INITIAL 10 + +/** High water marks for buffer shrinking */ +#define READ_BUFFER_HIGHWAT 8192 +#define ITEM_LIST_HIGHWAT 400 +#define IOV_LIST_HIGHWAT 600 +#define MSG_LIST_HIGHWAT 100 + +/* Binary protocol stuff */ +#define MIN_BIN_PKT_LENGTH 16 +#define BIN_PKT_HDR_WORDS (MIN_BIN_PKT_LENGTH/sizeof(uint32_t)) + +/* Initial power multiplier for the hash table */ +#define HASHPOWER_DEFAULT 16 + +/* unistd.h is here */ +#if HAVE_UNISTD_H +# include +#endif + +/* Slab sizing definitions. */ +#define POWER_SMALLEST 1 +#define POWER_LARGEST 200 +#define CHUNK_ALIGN_BYTES 8 +#define DONT_PREALLOC_SLABS +#define MAX_NUMBER_OF_SLAB_CLASSES (POWER_LARGEST + 1) + +/** How long an object can reasonably be assumed to be locked before + harvesting it on a low memory condition. */ +#define TAIL_REPAIR_TIME (3 * 3600) + +/* warning: don't use these macros with a function, as it evals its arg twice */ +#define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \ + (i)->data->cas : (uint64_t)0) + +#define ITEM_set_cas(i,v) { \ + if ((i)->it_flags & ITEM_CAS) { \ + (i)->data->cas = v; \ + } \ +} + +#define ITEM_key(item) (((char*)&((item)->data)) \ + + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) + +#define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \ + + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) + +#define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \ + + (item)->nsuffix \ + + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) + +#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \ + + (item)->nsuffix + (item)->nbytes \ + + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) + +#define STAT_KEY_LEN 128 +#define STAT_VAL_LEN 128 + +/** Append a simple stat with a stat name, value format and value */ +#define APPEND_STAT(name, fmt, val) \ + append_stat(name, add_stats, c, fmt, val); + +/** Append an indexed stat with a stat name (with format), value format + and value */ +#define APPEND_NUM_FMT_STAT(name_fmt, num, name, fmt, val) \ + klen = snprintf(key_str, STAT_KEY_LEN, name_fmt, num, name); \ + vlen = snprintf(val_str, STAT_VAL_LEN, fmt, val); \ + add_stats(key_str, klen, val_str, vlen, c); + +/** Common APPEND_NUM_FMT_STAT format. */ +#define APPEND_NUM_STAT(num, name, fmt, val) \ + APPEND_NUM_FMT_STAT("%d:%s", num, name, fmt, val) + +/** + * Callback for any function producing stats. + * + * @param key the stat's key + * @param klen length of the key + * @param val the stat's value in an ascii form (e.g. text form of a number) + * @param vlen length of the value + * @parm cookie magic callback cookie + */ +typedef void (*ADD_STAT)(const char *key, const uint16_t klen, + const char *val, const uint32_t vlen, + const void *cookie); + +/* + * NOTE: If you modify this table you _MUST_ update the function state_text + */ +/** + * Possible states of a connection. + */ +enum conn_states { + conn_listening, /**< the socket which listens for connections */ + conn_new_cmd, /**< Prepare connection for next command */ + conn_waiting, /**< waiting for a readable socket */ + conn_read, /**< reading in a command line */ + conn_parse_cmd, /**< try to parse a command from the input buffer */ + conn_write, /**< writing out a simple response */ + conn_nread, /**< reading in a fixed number of bytes */ + conn_swallow, /**< swallowing unnecessary bytes w/o storing */ + conn_closing, /**< closing this connection */ + conn_mwrite, /**< writing out many items sequentially */ + conn_max_state /**< Max state value (used for assertion) */ +}; + +enum bin_substates { + bin_no_state, + bin_reading_set_header, + bin_reading_cas_header, + bin_read_set_value, + bin_reading_get_key, + bin_reading_stat, + bin_reading_del_header, + bin_reading_incr_header, + bin_read_flush_exptime, + bin_reading_sasl_auth, + bin_reading_sasl_auth_data, + bin_reading_touch_key, +}; + +enum protocol { + ascii_prot = 3, /* arbitrary value. */ + binary_prot, + negotiating_prot /* Discovering the protocol */ +}; + +enum network_transport { + local_transport, /* Unix sockets*/ + tcp_transport, + udp_transport +}; + +#define IS_UDP(x) (x == udp_transport) + +#define NREAD_ADD 1 +#define NREAD_SET 2 +#define NREAD_REPLACE 3 +#define NREAD_APPEND 4 +#define NREAD_PREPEND 5 +#define NREAD_CAS 6 + +enum store_item_type { + NOT_STORED=0, STORED, EXISTS, NOT_FOUND +}; + +enum delta_result_type { + OK, NON_NUMERIC, EOM, DELTA_ITEM_NOT_FOUND, DELTA_ITEM_CAS_MISMATCH +}; + +/** Time relative to server start. Smaller than time_t on 64-bit systems. */ +typedef unsigned int rel_time_t; + +/** Stats stored per slab (and per thread). */ +struct slab_stats { + uint64_t set_cmds; + uint64_t get_hits; + uint64_t touch_hits; + uint64_t delete_hits; + uint64_t cas_hits; + uint64_t cas_badval; + uint64_t incr_hits; + uint64_t decr_hits; +}; + +/** + * Stats stored per-thread. + */ +struct thread_stats { + pthread_mutex_t mutex; + uint64_t get_cmds; + uint64_t get_misses; + uint64_t touch_cmds; + uint64_t touch_misses; + uint64_t delete_misses; + uint64_t incr_misses; + uint64_t decr_misses; + uint64_t cas_misses; + uint64_t bytes_read; + uint64_t bytes_written; + uint64_t flush_cmds; + uint64_t conn_yields; /* # of yields for connections (-R option)*/ + uint64_t auth_cmds; + uint64_t auth_errors; + struct slab_stats slab_stats[MAX_NUMBER_OF_SLAB_CLASSES]; +}; + +/** + * Global stats. + */ +struct stats { + pthread_mutex_t mutex; + unsigned int curr_items; + unsigned int total_items; + uint64_t curr_bytes; + unsigned int curr_conns; + unsigned int total_conns; + uint64_t rejected_conns; + unsigned int reserved_fds; + unsigned int conn_structs; + uint64_t get_cmds; + uint64_t set_cmds; + uint64_t touch_cmds; + uint64_t get_hits; + uint64_t get_misses; + uint64_t touch_hits; + uint64_t touch_misses; + uint64_t evictions; + uint64_t reclaimed; + time_t started; /* when the process was started */ + bool accepting_conns; /* whether we are currently accepting */ + uint64_t listen_disabled_num; + unsigned int hash_power_level; /* Better hope it's not over 9000 */ + uint64_t hash_bytes; /* size used for hash tables */ + bool hash_is_expanding; /* If the hash table is being expanded */ + uint64_t expired_unfetched; /* items reclaimed but never touched */ + uint64_t evicted_unfetched; /* items evicted but never touched */ + bool slab_reassign_running; /* slab reassign in progress */ + uint64_t slabs_moved; /* times slabs were moved around */ +}; + +#define MAX_VERBOSITY_LEVEL 2 + +/* When adding a setting, be sure to update process_stat_settings */ +/** + * Globally accessible settings as derived from the commandline. + */ +struct settings { + size_t maxbytes; + int maxconns; + int port; + int udpport; + char *inter; + int verbose; + rel_time_t oldest_live; /* ignore existing items older than this */ + int evict_to_free; + char *socketpath; /* path to unix socket if using local socket */ + int access; /* access mask (a la chmod) for unix domain socket */ + double factor; /* chunk size growth factor */ + int chunk_size; + int num_threads; /* number of worker (without dispatcher) libevent threads to run */ + int num_threads_per_udp; /* number of worker threads serving each udp socket */ + char prefix_delimiter; /* character that marks a key prefix (for stats) */ + int detail_enabled; /* nonzero if we're collecting detailed stats */ + int reqs_per_event; /* Maximum number of io to process on each + io-event. */ + bool use_cas; + enum protocol binding_protocol; + int backlog; + int item_size_max; /* Maximum item size, and upper end for slabs */ + bool sasl; /* SASL on/off */ + bool maxconns_fast; /* Whether or not to early close connections */ + bool slab_reassign; /* Whether or not slab reassignment is allowed */ + bool slab_automove; /* Whether or not to automatically move slabs */ + int hashpower_init; /* Starting hash power level */ +}; + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wshadow" +#endif +extern struct stats stats; +extern time_t process_started; +extern struct settings settings; + +#define ITEM_LINKED 1 +#define ITEM_CAS 2 + +/* temp */ +#define ITEM_SLABBED 4 + +#define ITEM_FETCHED 8 + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wshadow" +#endif +/** + * Structure for storing items within memcached. + */ +typedef struct _stritem { + struct _stritem *next; + struct _stritem *prev; + struct _stritem *h_next; /* hash chain next */ + rel_time_t time; /* least recent access */ + rel_time_t exptime; /* expire time */ + int nbytes; /* size of data */ + unsigned short refcount; + uint8_t nsuffix; /* length of flags-and-length string */ + uint8_t it_flags; /* ITEM_* above */ + uint8_t slabs_clsid;/* which slab class we're in */ + uint8_t nkey; /* key length, w/terminating null and padding */ + /* this odd type prevents type-punning issues when we do + * the little shuffle to save space when not using CAS. */ + union { + uint64_t cas; + char end; + } data[]; + /* if it_flags & ITEM_CAS we have 8 bytes CAS */ + /* then null-terminated key */ + /* then " flags length\r\n" (no terminating null) */ + /* then data with terminating \r\n (no terminating null; it's binary!) */ +} item; + +typedef struct { + pthread_t thread_id; /* unique ID of this thread */ + struct event_base *base; /* libevent handle this thread uses */ + struct event notify_event; /* listen event for notify pipe */ + int notify_receive_fd; /* receiving end of notify pipe */ + int notify_send_fd; /* sending end of notify pipe */ + struct thread_stats stats; /* Stats generated by this thread */ + struct conn_queue *new_conn_queue; /* queue of new connections to handle */ + cache_t *suffix_cache; /* suffix cache */ +} LIBEVENT_THREAD; + +typedef struct { + pthread_t thread_id; /* unique ID of this thread */ + struct event_base *base; /* libevent handle this thread uses */ +} LIBEVENT_DISPATCHER_THREAD; + +/** + * The structure representing a connection into memcached. + */ +typedef struct conn conn; +struct conn { + int sfd; + sasl_conn_t *sasl_conn; + enum conn_states state; + enum bin_substates substate; + struct event event; + short ev_flags; + short which; /** which events were just triggered */ + + char *rbuf; /** buffer to read commands into */ + char *rcurr; /** but if we parsed some already, this is where we stopped */ + int rsize; /** total allocated size of rbuf */ + int rbytes; /** how much data, starting from rcur, do we have unparsed */ + + char *wbuf; + char *wcurr; + int wsize; + int wbytes; + /** which state to go into after finishing current write */ + enum conn_states write_and_go; + void *write_and_free; /** free this memory after finishing writing */ + + char *ritem; /** when we read in an item's value, it goes here */ + int rlbytes; + + /* data for the nread state */ + + /** + * item is used to hold an item structure created after reading the command + * line of set/add/replace commands, but before we finished reading the actual + * data. The data is read into ITEM_data(item) to avoid extra copying. + */ + + void *item; /* for commands set/add/replace */ + + /* data for the swallow state */ + int sbytes; /* how many bytes to swallow */ + + /* data for the mwrite state */ + struct iovec *iov; + int iovsize; /* number of elements allocated in iov[] */ + int iovused; /* number of elements used in iov[] */ + + struct msghdr *msglist; + int msgsize; /* number of elements allocated in msglist[] */ + int msgused; /* number of elements used in msglist[] */ + int msgcurr; /* element in msglist[] being transmitted now */ + int msgbytes; /* number of bytes in current msg */ + + item **ilist; /* list of items to write out */ + int isize; + item **icurr; + int ileft; + + char **suffixlist; + int suffixsize; + char **suffixcurr; + int suffixleft; + + enum protocol protocol; /* which protocol this connection speaks */ + enum network_transport transport; /* what transport is used by this connection */ + + /* data for UDP clients */ + int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */ + struct sockaddr request_addr; /* Who sent the most recent request */ + socklen_t request_addr_size; + unsigned char *hdrbuf; /* udp packet headers */ + int hdrsize; /* number of headers' worth of space is allocated */ + + bool noreply; /* True if the reply should not be sent. */ + /* current stats command */ + struct { + char *buffer; + size_t size; + size_t offset; + } stats; + + /* Binary protocol stuff */ + /* This is where the binary header goes */ + protocol_binary_request_header binary_header; + uint64_t cas; /* the cas to return */ + short cmd; /* current command being processed */ + int opaque; + int keylen; + conn *next; /* Used for generating a list of conn structures */ + LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */ +}; + + +/* current time of day (updated periodically) */ +extern volatile rel_time_t current_time; + +/* TODO: Move to slabs.h? */ +extern volatile int slab_rebalance_signal; + +struct slab_rebalance { + void *slab_start; + void *slab_end; + void *slab_pos; + int s_clsid; + int d_clsid; + int busy_items; + uint8_t done; +}; + +extern struct slab_rebalance slab_rebal; + +/* + * Functions + */ +void do_accept_new_conns(const bool do_accept); +enum delta_result_type do_add_delta(conn *c, const char *key, + const size_t nkey, const bool incr, + const int64_t delta, char *buf, + uint64_t *cas, const uint32_t hv); +enum store_item_type do_store_item(item *item, int comm, conn* c, const uint32_t hv); +conn *conn_new(const int sfd, const enum conn_states init_state, const int event_flags, const int read_buffer_size, enum network_transport transport, struct event_base *base); +extern int daemonize(int nochdir, int noclose); + +static inline int mutex_lock(pthread_mutex_t *mutex) +{ + while (pthread_mutex_trylock(mutex)); + return 0; +} + +#define mutex_unlock(x) pthread_mutex_unlock(x) + +#include "stats.h" +#include "slabs.h" +#include "assoc.h" +#include "items.h" +#include "trace.h" +#include "hash.h" +#include "util.h" + +/* + * Functions such as the libevent-related calls that need to do cross-thread + * communication in multithreaded mode (rather than actually doing the work + * in the current thread) are called via "dispatch_" frontends, which are + * also #define-d to directly call the underlying code in singlethreaded mode. + */ + +void thread_init(int nthreads, struct event_base *main_base); +int dispatch_event_add(int thread, conn *c); +void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, int read_buffer_size, enum network_transport transport); + +/* Lock wrappers for cache functions that are called from main loop. */ +enum delta_result_type add_delta(conn *c, const char *key, + const size_t nkey, const int incr, + const int64_t delta, char *buf, + uint64_t *cas); +void accept_new_conns(const bool do_accept); +conn *conn_from_freelist(void); +bool conn_add_to_freelist(conn *c); +int is_listen_thread(void); +item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes); +char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes); +void item_flush_expired(void); +item *item_get(const char *key, const size_t nkey); +item *item_touch(const char *key, const size_t nkey, uint32_t exptime); +int item_link(item *it); +void item_remove(item *it); +int item_replace(item *it, item *new_it, const uint32_t hv); +void item_stats(ADD_STAT add_stats, void *c); +void item_stats_sizes(ADD_STAT add_stats, void *c); +void item_unlink(item *it); +void item_update(item *it); + +void item_lock(uint32_t hv); +void item_unlock(uint32_t hv); +unsigned short refcount_incr(unsigned short *refcount); +unsigned short refcount_decr(unsigned short *refcount); +void STATS_LOCK(void); +void STATS_UNLOCK(void); +void threadlocal_stats_reset(void); +void threadlocal_stats_aggregate(struct thread_stats *stats); +void slab_stats_aggregate(struct thread_stats *stats, struct slab_stats *out); + +/* Stat processing functions */ +void append_stat(const char *name, ADD_STAT add_stats, conn *c, + const char *fmt, ...); + +enum store_item_type store_item(item *item, int comm, conn *c); + +#if defined(HAVE_DROP_PRIVILEGES) && HAVE_DROP_PRIVILEGES +extern void drop_privileges(void); +#else +#define drop_privileges() +#endif + +/* If supported, give compiler hints for branch prediction. */ +#if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#define __builtin_expect(x, expected_value) (x) +#endif + +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) diff --git a/memcached/memcached.spec.in b/memcached/memcached.spec.in new file mode 100644 index 00000000..f473d8fa --- /dev/null +++ b/memcached/memcached.spec.in @@ -0,0 +1,119 @@ +Name: memcached +Version: @VERSION@ +Release: @RELEASE@%{?dist} +Summary: High Performance, Distributed Memory Object Cache + +Group: System Environment/Daemons +License: BSD +URL: http://www.danga.com/memcached/ +Source0: http://memcached.googlecode.com/files/%{name}-@FULLVERSION@.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +BuildRequires: libevent-devel +BuildRequires: perl(Test::More) +BuildRequires: /usr/bin/prove +Requires: initscripts +Requires(post): /sbin/chkconfig +Requires(preun): /sbin/chkconfig, /sbin/service +Requires(postun): /sbin/service + +%description +memcached is a high-performance, distributed memory object caching +system, generic in nature, but intended for use in speeding up dynamic +web applications by alleviating database load. + +%prep +%setup -q -n %{name}-@FULLVERSION@ + + +%build +%configure + +make %{?_smp_mflags} + +%check +make test + +%install +rm -rf %{buildroot} +make install DESTDIR=%{buildroot} + +# remove memcached-debug +rm -f %{buildroot}/%{_bindir}/memcached-debug + +# Perl script for monitoring memcached +install -Dp -m0755 scripts/memcached-tool %{buildroot}%{_bindir}/memcached-tool + +# Init script +install -Dp -m0755 scripts/memcached.sysv %{buildroot}%{_initrddir}/memcached + +# Default configs +mkdir -p %{buildroot}/%{_sysconfdir}/sysconfig +cat <%{buildroot}/%{_sysconfdir}/sysconfig/%{name} +PORT="11211" +USER="nobody" +MAXCONN="1024" +CACHESIZE="64" +OPTIONS="" +EOF + +# pid directory +mkdir -p %{buildroot}/%{_localstatedir}/run/memcached + +%clean +rm -rf %{buildroot} + + +%post +/sbin/chkconfig --add %{name} + +%preun +if [ "$1" = 0 ] ; then + /sbin/service %{name} stop > /dev/null 2>&1 + /sbin/chkconfig --del %{name} +fi +exit 0 + +%postun +if [ "$1" -ge 1 ]; then + /sbin/service %{name} condrestart > /dev/null 2>&1 +fi +exit 0 + + +%files +%defattr(-,root,root,-) +%doc AUTHORS ChangeLog COPYING NEWS README doc/CONTRIBUTORS doc/*.txt +%config(noreplace) %{_sysconfdir}/sysconfig/%{name} + +%dir %attr(750,nobody,nobody) %{_localstatedir}/run/memcached +%{_bindir}/memcached-tool +%{_bindir}/memcached +%{_mandir}/man1/memcached.1* +%{_initrddir}/memcached +%{_includedir}/memcached + +%changelog +* Mon Nov 2 2009 Dormando - 1.4.3-1 +- Fix autogen more. + +* Sat Aug 29 2009 Dustin Sallings - 1.4.1-1 +- Autogenerate the version number from tags. + +* Wed Jul 4 2007 Paul Lindner - 1.2.2-5 +- Use /var/run/memcached/ directory to hold PID file + +* Sat May 12 2007 Paul Lindner - 1.2.2-4 +- Remove tabs from spec file, rpmlint reports no more errors + +* Thu May 10 2007 Paul Lindner - 1.2.2-3 +- Enable build-time regression tests +- add dependency on initscripts +- remove memcached-debug (not needed in dist) +- above suggestions from Bernard Johnson + +* Mon May 7 2007 Paul Lindner - 1.2.2-2 +- Tidyness improvements suggested by Ruben Kerkhof in bugzilla #238994 + +* Fri May 4 2007 Paul Lindner - 1.2.2-1 +- Initial spec file created via rpmdev-newspec diff --git a/memcached/memcached_dtrace.d b/memcached/memcached_dtrace.d new file mode 100644 index 00000000..eeffb6f3 --- /dev/null +++ b/memcached/memcached_dtrace.d @@ -0,0 +1,295 @@ +/* + * Copyright (c) <2008>, Sun Microsystems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +provider memcached { + /** + * Fired when a connection object is allocated from the connection pool. + * @param connid the connection id + */ + probe conn__allocate(int connid); + + /** + * Fired when a connection object is released back to the connection pool. + * @param connid the connection id + */ + probe conn__release(int connid); + + /** + * Fired when a new connection object is created (there are no more + * connection objects in the connection pool). + * @param ptr pointer to the connection object + */ + probe conn__create(void *ptr); + + /** + * Fired when a connection object is destroyed ("released back to + * the memory subsystem"). + * @param ptr pointer to the connection object + */ + probe conn__destroy(void *ptr); + + /** + * Fired when a connection is dispatched from the "main thread" to a + * worker thread. + * @param connid the connection id + * @param threadid the thread id + */ + probe conn__dispatch(int connid, int threadid); + + /** + * Allocate memory from the slab allocator. + * @param size the requested size + * @param slabclass the allocation will be fulfilled in this class + * @param slabsize the size of each item in this class + * @param ptr pointer to allocated memory + */ + probe slabs__allocate(int size, int slabclass, int slabsize, void* ptr); + + /** + * Failed to allocate memory (out of memory). + * @param size the requested size + * @param slabclass the class that failed to fulfill the request + */ + probe slabs__allocate__failed(int size, int slabclass); + + /** + * Fired when a slab class attempts to allocate more space. + * @param slabclass class that needs more memory + */ + probe slabs__slabclass__allocate(int slabclass); + + /** + * Failed to allocate memory (out of memory). + * @param slabclass the class that failed grab more memory + */ + probe slabs__slabclass__allocate__failed(int slabclass); + + /** + * Release memory. + * @param size the size of the memory + * @param slabclass the class the memory belongs to + * @param ptr pointer to the memory to release + */ + probe slabs__free(int size, int slabclass, void* ptr); + + /** + * Fired when the when we have searched the hash table for a named key. + * These two elements provide an insight in how well the hash function + * functions. Long traversals are a sign of a less optimal function, + * wasting cpu capacity. + * + * @param key the key searched for + * @param keylen length of the key + * @param depth the depth in the list of hash table + */ + probe assoc__find(const char *key, int keylen, int depth); + + /** + * Fired when a new item has been inserted. + * @param key the key just inserted + * @param keylen length of the key + * @param nokeys the total number of keys currently stored, + * including the key for which insert was called. + */ + probe assoc__insert(const char *key, int keylen, int nokeys); + + /** + * Fired when a new item has been removed. + * @param key the key just deleted + * @param keylen length of the key + * @param nokeys the total number of keys currently stored, + * excluding the key for which delete was called. + */ + probe assoc__delete(const char *key, int keylen, int nokeys); + + /** + * Fired when an item is linked into the cache. + * @param key the items key + * @param keylen length of the key + * @param size the size of the data + */ + probe item__link(const char *key, int keylen, int size); + + /** + * Fired when an item is deleted. + * @param key the items key + * @param keylen length of the key + * @param size the size of the data + */ + probe item__unlink(const char *key, int keylen, int size); + + /** + * Fired when the refcount for an item is reduced. + * @param key the items key + * @param keylen length of the key + * @param size the size of the data + */ + probe item__remove(const char *key, int keylen, int size); + + /** + * Fired when the "last refenced" time is updated. + * @param key the items key + * @param keylen length of the key + * @param size the size of the data + */ + probe item__update(const char *key, int keylen, int size); + + /** + * Fired when an item is replaced with another item. + * @param oldkey the key of the item to replace + * @param oldkeylen the length of the old key + * @param oldsize the size of the old item + * @param newkey the key of the new item + * @param newkeylen the length of the new key + * @param newsize the size of the new item + */ + probe item__replace(const char *oldkey, int oldkeylen, int oldsize, + const char *newkey, int newkeylen, int newsize); + + /** + * Fired when the processing of a command starts. + * @param connid the connection id + * @param request the incomming request + * @param size the size of the request + */ + probe process__command__start(int connid, const void *request, int size); + + /** + * Fired when the processing of a command is done. + * @param connid the connection id + * @param respnse the response to send back to the client + * @param size the size of the response + */ + probe process__command__end(int connid, const void *response, int size); + + /** + * Fired for a get-command + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size size of the key's data (or signed int -1 if not found) + * @param casid the casid for the item + */ + probe command__get(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for an add-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size the new size of the key's data (or signed int -1 if + * not found) + * @param casid the casid for the item + */ + probe command__add(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for a set-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size the new size of the key's data (or signed int -1 if + * not found) + * @param casid the casid for the item + */ + probe command__set(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for a replace-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size the new size of the key's data (or signed int -1 if + * not found) + * @param casid the casid for the item + */ + probe command__replace(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for a prepend-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size the new size of the key's data (or signed int -1 if + * not found) + * @param casid the casid for the item + */ + probe command__prepend(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for an append-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size the new size of the key's data (or signed int -1 if + * not found) + * @param casid the casid for the item + */ + probe command__append(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for a cas-command. + * @param connid connection id + * @param key requested key + * @param keylen length of the key + * @param size size of the key's data (or signed int -1 if not found) + * @param casid the cas id requested + */ + probe command__cas(int connid, const char *key, int keylen, int size, int64_t casid); + + /** + * Fired for an incr command. + * @param connid connection id + * @param key the requested key + * @param keylen length of the key + * @param val the new value + */ + probe command__incr(int connid, const char *key, int keylen, int64_t val); + + /** + * Fired for a decr command. + * @param connid connection id + * @param key the requested key + * @param keylen length of the key + * @param val the new value + */ + probe command__decr(int connid, const char *key, int keylen, int64_t val); + + /** + * Fired for a delete command. + * @param connid connection id + * @param key the requested key + * @param keylen length of the key + */ + probe command__delete(int connid, const char *key, int keylen); + +}; + +#pragma D attributes Unstable/Unstable/Common provider memcached provider +#pragma D attributes Private/Private/Common provider memcached module +#pragma D attributes Private/Private/Common provider memcached function +#pragma D attributes Unstable/Unstable/Common provider memcached name +#pragma D attributes Unstable/Unstable/Common provider memcached args diff --git a/memcached/protocol_binary.h b/memcached/protocol_binary.h new file mode 100644 index 00000000..dfc59c80 --- /dev/null +++ b/memcached/protocol_binary.h @@ -0,0 +1,470 @@ +/* + * Copyright (c) <2008>, Sun Microsystems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Summary: Constants used by to implement the binary protocol. + * + * Copy: See Copyright for the status of this software. + * + * Author: Trond Norbye + */ + +#ifndef PROTOCOL_BINARY_H +#define PROTOCOL_BINARY_H + +/** + * This file contains definitions of the constants and packet formats + * defined in the binary specification. Please note that you _MUST_ remember + * to convert each multibyte field to / from network byte order to / from + * host order. + */ +#ifdef __cplusplus +extern "C" +{ +#endif + + /** + * Definition of the legal "magic" values used in a packet. + * See section 3.1 Magic byte + */ + typedef enum { + PROTOCOL_BINARY_REQ = 0x80, + PROTOCOL_BINARY_RES = 0x81 + } protocol_binary_magic; + + /** + * Definition of the valid response status numbers. + * See section 3.2 Response Status + */ + typedef enum { + PROTOCOL_BINARY_RESPONSE_SUCCESS = 0x00, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT = 0x01, + PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS = 0x02, + PROTOCOL_BINARY_RESPONSE_E2BIG = 0x03, + PROTOCOL_BINARY_RESPONSE_EINVAL = 0x04, + PROTOCOL_BINARY_RESPONSE_NOT_STORED = 0x05, + PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL = 0x06, + PROTOCOL_BINARY_RESPONSE_AUTH_ERROR = 0x20, + PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE = 0x21, + PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND = 0x81, + PROTOCOL_BINARY_RESPONSE_ENOMEM = 0x82 + } protocol_binary_response_status; + + /** + * Defintion of the different command opcodes. + * See section 3.3 Command Opcodes + */ + typedef enum { + PROTOCOL_BINARY_CMD_GET = 0x00, + PROTOCOL_BINARY_CMD_SET = 0x01, + PROTOCOL_BINARY_CMD_ADD = 0x02, + PROTOCOL_BINARY_CMD_REPLACE = 0x03, + PROTOCOL_BINARY_CMD_DELETE = 0x04, + PROTOCOL_BINARY_CMD_INCREMENT = 0x05, + PROTOCOL_BINARY_CMD_DECREMENT = 0x06, + PROTOCOL_BINARY_CMD_QUIT = 0x07, + PROTOCOL_BINARY_CMD_FLUSH = 0x08, + PROTOCOL_BINARY_CMD_GETQ = 0x09, + PROTOCOL_BINARY_CMD_NOOP = 0x0a, + PROTOCOL_BINARY_CMD_VERSION = 0x0b, + PROTOCOL_BINARY_CMD_GETK = 0x0c, + PROTOCOL_BINARY_CMD_GETKQ = 0x0d, + PROTOCOL_BINARY_CMD_APPEND = 0x0e, + PROTOCOL_BINARY_CMD_PREPEND = 0x0f, + PROTOCOL_BINARY_CMD_STAT = 0x10, + PROTOCOL_BINARY_CMD_SETQ = 0x11, + PROTOCOL_BINARY_CMD_ADDQ = 0x12, + PROTOCOL_BINARY_CMD_REPLACEQ = 0x13, + PROTOCOL_BINARY_CMD_DELETEQ = 0x14, + PROTOCOL_BINARY_CMD_INCREMENTQ = 0x15, + PROTOCOL_BINARY_CMD_DECREMENTQ = 0x16, + PROTOCOL_BINARY_CMD_QUITQ = 0x17, + PROTOCOL_BINARY_CMD_FLUSHQ = 0x18, + PROTOCOL_BINARY_CMD_APPENDQ = 0x19, + PROTOCOL_BINARY_CMD_PREPENDQ = 0x1a, + PROTOCOL_BINARY_CMD_TOUCH = 0x1c, + PROTOCOL_BINARY_CMD_GAT = 0x1d, + PROTOCOL_BINARY_CMD_GATQ = 0x1e, + PROTOCOL_BINARY_CMD_GATK = 0x23, + PROTOCOL_BINARY_CMD_GATKQ = 0x24, + + PROTOCOL_BINARY_CMD_SASL_LIST_MECHS = 0x20, + PROTOCOL_BINARY_CMD_SASL_AUTH = 0x21, + PROTOCOL_BINARY_CMD_SASL_STEP = 0x22, + + /* These commands are used for range operations and exist within + * this header for use in other projects. Range operations are + * not expected to be implemented in the memcached server itself. + */ + PROTOCOL_BINARY_CMD_RGET = 0x30, + PROTOCOL_BINARY_CMD_RSET = 0x31, + PROTOCOL_BINARY_CMD_RSETQ = 0x32, + PROTOCOL_BINARY_CMD_RAPPEND = 0x33, + PROTOCOL_BINARY_CMD_RAPPENDQ = 0x34, + PROTOCOL_BINARY_CMD_RPREPEND = 0x35, + PROTOCOL_BINARY_CMD_RPREPENDQ = 0x36, + PROTOCOL_BINARY_CMD_RDELETE = 0x37, + PROTOCOL_BINARY_CMD_RDELETEQ = 0x38, + PROTOCOL_BINARY_CMD_RINCR = 0x39, + PROTOCOL_BINARY_CMD_RINCRQ = 0x3a, + PROTOCOL_BINARY_CMD_RDECR = 0x3b, + PROTOCOL_BINARY_CMD_RDECRQ = 0x3c + /* End Range operations */ + + } protocol_binary_command; + + /** + * Definition of the data types in the packet + * See section 3.4 Data Types + */ + typedef enum { + PROTOCOL_BINARY_RAW_BYTES = 0x00 + } protocol_binary_datatypes; + + /** + * Definition of the header structure for a request packet. + * See section 2 + */ + typedef union { + struct { + uint8_t magic; + uint8_t opcode; + uint16_t keylen; + uint8_t extlen; + uint8_t datatype; + uint16_t reserved; + uint32_t bodylen; + uint32_t opaque; + uint64_t cas; + } request; + uint8_t bytes[24]; + } protocol_binary_request_header; + + /** + * Definition of the header structure for a response packet. + * See section 2 + */ + typedef union { + struct { + uint8_t magic; + uint8_t opcode; + uint16_t keylen; + uint8_t extlen; + uint8_t datatype; + uint16_t status; + uint32_t bodylen; + uint32_t opaque; + uint64_t cas; + } response; + uint8_t bytes[24]; + } protocol_binary_response_header; + + /** + * Definition of a request-packet containing no extras + */ + typedef union { + struct { + protocol_binary_request_header header; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header)]; + } protocol_binary_request_no_extras; + + /** + * Definition of a response-packet containing no extras + */ + typedef union { + struct { + protocol_binary_response_header header; + } message; + uint8_t bytes[sizeof(protocol_binary_response_header)]; + } protocol_binary_response_no_extras; + + /** + * Definition of the packet used by the get, getq, getk and getkq command. + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_get; + typedef protocol_binary_request_no_extras protocol_binary_request_getq; + typedef protocol_binary_request_no_extras protocol_binary_request_getk; + typedef protocol_binary_request_no_extras protocol_binary_request_getkq; + + /** + * Definition of the packet returned from a successful get, getq, getk and + * getkq. + * See section 4 + */ + typedef union { + struct { + protocol_binary_response_header header; + struct { + uint32_t flags; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_response_header) + 4]; + } protocol_binary_response_get; + + typedef protocol_binary_response_get protocol_binary_response_getq; + typedef protocol_binary_response_get protocol_binary_response_getk; + typedef protocol_binary_response_get protocol_binary_response_getkq; + + /** + * Definition of the packet used by the delete command + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_delete; + + /** + * Definition of the packet returned by the delete command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_delete; + + /** + * Definition of the packet used by the flush command + * See section 4 + * Please note that the expiration field is optional, so remember to see + * check the header.bodysize to see if it is present. + */ + typedef union { + struct { + protocol_binary_request_header header; + struct { + uint32_t expiration; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 4]; + } protocol_binary_request_flush; + + /** + * Definition of the packet returned by the flush command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_flush; + + /** + * Definition of the packet used by set, add and replace + * See section 4 + */ + typedef union { + struct { + protocol_binary_request_header header; + struct { + uint32_t flags; + uint32_t expiration; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 8]; + } protocol_binary_request_set; + typedef protocol_binary_request_set protocol_binary_request_add; + typedef protocol_binary_request_set protocol_binary_request_replace; + + /** + * Definition of the packet returned by set, add and replace + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_set; + typedef protocol_binary_response_no_extras protocol_binary_response_add; + typedef protocol_binary_response_no_extras protocol_binary_response_replace; + + /** + * Definition of the noop packet + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_noop; + + /** + * Definition of the packet returned by the noop command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_noop; + + /** + * Definition of the structure used by the increment and decrement + * command. + * See section 4 + */ + typedef union { + struct { + protocol_binary_request_header header; + struct { + uint64_t delta; + uint64_t initial; + uint32_t expiration; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 20]; + } protocol_binary_request_incr; + typedef protocol_binary_request_incr protocol_binary_request_decr; + + /** + * Definition of the response from an incr or decr command + * command. + * See section 4 + */ + typedef union { + struct { + protocol_binary_response_header header; + struct { + uint64_t value; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_response_header) + 8]; + } protocol_binary_response_incr; + typedef protocol_binary_response_incr protocol_binary_response_decr; + + /** + * Definition of the quit + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_quit; + + /** + * Definition of the packet returned by the quit command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_quit; + + /** + * Definition of the packet used by append and prepend command + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_append; + typedef protocol_binary_request_no_extras protocol_binary_request_prepend; + + /** + * Definition of the packet returned from a successful append or prepend + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_append; + typedef protocol_binary_response_no_extras protocol_binary_response_prepend; + + /** + * Definition of the packet used by the version command + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_version; + + /** + * Definition of the packet returned from a successful version command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_version; + + + /** + * Definition of the packet used by the stats command. + * See section 4 + */ + typedef protocol_binary_request_no_extras protocol_binary_request_stats; + + /** + * Definition of the packet returned from a successful stats command + * See section 4 + */ + typedef protocol_binary_response_no_extras protocol_binary_response_stats; + + /** + * Definition of the packet used by the touch command. + */ + typedef union { + struct { + protocol_binary_request_header header; + struct { + uint32_t expiration; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 4]; + } protocol_binary_request_touch; + + /** + * Definition of the packet returned from the touch command + */ + typedef protocol_binary_response_no_extras protocol_binary_response_touch; + + /** + * Definition of the packet used by the GAT(Q) command. + */ + typedef union { + struct { + protocol_binary_request_header header; + struct { + uint32_t expiration; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 4]; + } protocol_binary_request_gat; + + typedef protocol_binary_request_gat protocol_binary_request_gatq; + typedef protocol_binary_request_gat protocol_binary_request_gatk; + typedef protocol_binary_request_gat protocol_binary_request_gatkq; + + /** + * Definition of the packet returned from the GAT(Q) + */ + typedef protocol_binary_response_get protocol_binary_response_gat; + typedef protocol_binary_response_get protocol_binary_response_gatq; + typedef protocol_binary_response_get protocol_binary_response_gatk; + typedef protocol_binary_response_get protocol_binary_response_gatkq; + + /** + * Definition of a request for a range operation. + * See http://code.google.com/p/memcached/wiki/RangeOps + * + * These types are used for range operations and exist within + * this header for use in other projects. Range operations are + * not expected to be implemented in the memcached server itself. + */ + typedef union { + struct { + protocol_binary_response_header header; + struct { + uint16_t size; + uint8_t reserved; + uint8_t flags; + uint32_t max_results; + } body; + } message; + uint8_t bytes[sizeof(protocol_binary_request_header) + 4]; + } protocol_binary_request_rangeop; + + typedef protocol_binary_request_rangeop protocol_binary_request_rget; + typedef protocol_binary_request_rangeop protocol_binary_request_rset; + typedef protocol_binary_request_rangeop protocol_binary_request_rsetq; + typedef protocol_binary_request_rangeop protocol_binary_request_rappend; + typedef protocol_binary_request_rangeop protocol_binary_request_rappendq; + typedef protocol_binary_request_rangeop protocol_binary_request_rprepend; + typedef protocol_binary_request_rangeop protocol_binary_request_rprependq; + typedef protocol_binary_request_rangeop protocol_binary_request_rdelete; + typedef protocol_binary_request_rangeop protocol_binary_request_rdeleteq; + typedef protocol_binary_request_rangeop protocol_binary_request_rincr; + typedef protocol_binary_request_rangeop protocol_binary_request_rincrq; + typedef protocol_binary_request_rangeop protocol_binary_request_rdecr; + typedef protocol_binary_request_rangeop protocol_binary_request_rdecrq; + +#ifdef __cplusplus +} +#endif +#endif /* PROTOCOL_BINARY_H */ diff --git a/memcached/sasl_defs.c b/memcached/sasl_defs.c new file mode 100644 index 00000000..1faefc80 --- /dev/null +++ b/memcached/sasl_defs.c @@ -0,0 +1,190 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#include "memcached.h" +#include +#include +#include +#include + +char my_sasl_hostname[1025]; + +#ifdef HAVE_SASL_CB_GETCONF +/* The locations we may search for a SASL config file if the user didn't + * specify one in the environment variable SASL_CONF_PATH + */ +const char * const locations[] = { + "/etc/sasl/memcached.conf", + "/etc/sasl2/memcached.conf", + NULL +}; +#endif + +#ifndef HAVE_SASL_CALLBACK_FT +typedef int (*sasl_callback_ft)(void); +#endif + +#ifdef ENABLE_SASL_PWDB +#define MAX_ENTRY_LEN 256 + +static const char *memcached_sasl_pwdb; + +static int sasl_server_userdb_checkpass(sasl_conn_t *conn, + void *context, + const char *user, + const char *pass, + unsigned passlen, + struct propctx *propctx) +{ + size_t unmlen = strlen(user); + if ((passlen + unmlen) > (MAX_ENTRY_LEN - 4)) { + fprintf(stderr, + "WARNING: Failed to authenticate <%s> due to too long password (%d)\n", + user, passlen); + return SASL_NOAUTHZ; + } + + FILE *pwfile = fopen(memcached_sasl_pwdb, "r"); + if (pwfile == NULL) { + if (settings.verbose) { + vperror("WARNING: Failed to open sasl database <%s>", + memcached_sasl_pwdb); + } + return SASL_NOAUTHZ; + } + + char buffer[MAX_ENTRY_LEN]; + bool ok = false; + + while ((fgets(buffer, sizeof(buffer), pwfile)) != NULL) { + if (memcmp(user, buffer, unmlen) == 0 && buffer[unmlen] == ':') { + /* This is the correct user */ + ++unmlen; + if (memcmp(pass, buffer + unmlen, passlen) == 0 && + (buffer[unmlen + passlen] == ':' || /* Additional tokens */ + buffer[unmlen + passlen] == '\n' || /* end of line */ + buffer[unmlen + passlen] == '\r'|| /* dos format? */ + buffer[unmlen + passlen] == '\0')) { /* line truncated */ + ok = true; + } + + break; + } + } + (void)fclose(pwfile); + if (ok) { + return SASL_OK; + } + + if (settings.verbose) { + fprintf(stderr, "INFO: User <%s> failed to authenticate\n", user); + } + + return SASL_NOAUTHZ; +} +#endif + +#ifdef HAVE_SASL_CB_GETCONF +static int sasl_getconf(void *context, const char **path) +{ + *path = getenv("SASL_CONF_PATH"); + + if (*path == NULL) { + for (int i = 0; locations[i] != NULL; ++i) { + if (access(locations[i], F_OK) == 0) { + *path = locations[i]; + break; + } + } + } + + if (settings.verbose) { + if (*path != NULL) { + fprintf(stderr, "Reading configuration from: <%s>\n", *path); + } else { + fprintf(stderr, "Failed to locate a config path\n"); + } + + } + + return (*path != NULL) ? SASL_OK : SASL_FAIL; +} +#endif + +static int sasl_log(void *context, int level, const char *message) +{ + bool log = true; + + switch (level) { + case SASL_LOG_NONE: + log = false; + break; + case SASL_LOG_PASS: + case SASL_LOG_TRACE: + case SASL_LOG_DEBUG: + case SASL_LOG_NOTE: + if (settings.verbose < 2) { + log = false; + } + break; + case SASL_LOG_WARN: + case SASL_LOG_FAIL: + if (settings.verbose < 1) { + log = false; + } + break; + default: + /* This is an error */ + ; + } + + if (log) { + fprintf(stderr, "SASL (severity %d): %s\n", level, message); + } + + return SASL_OK; +} + +static sasl_callback_t sasl_callbacks[] = { +#ifdef ENABLE_SASL_PWDB + { SASL_CB_SERVER_USERDB_CHECKPASS, sasl_server_userdb_checkpass, NULL }, +#endif + + { SASL_CB_LOG, (sasl_callback_ft)sasl_log, NULL }, + +#ifdef HAVE_SASL_CB_GETCONF + { SASL_CB_GETCONF, sasl_getconf, NULL }, +#endif + + { SASL_CB_LIST_END, NULL, NULL } +}; + +void init_sasl(void) { +#ifdef ENABLE_SASL_PWDB + memcached_sasl_pwdb = getenv("MEMCACHED_SASL_PWDB"); + if (memcached_sasl_pwdb == NULL) { + if (settings.verbose) { + fprintf(stderr, + "INFO: MEMCACHED_SASL_PWDB not specified. " + "Internal passwd database disabled\n"); + } + sasl_callbacks[0].id = SASL_CB_LIST_END; + sasl_callbacks[0].proc = NULL; + } +#endif + + memset(my_sasl_hostname, 0, sizeof(my_sasl_hostname)); + if (gethostname(my_sasl_hostname, sizeof(my_sasl_hostname)-1) == -1) { + if (settings.verbose) { + fprintf(stderr, "Error discovering hostname for SASL\n"); + } + my_sasl_hostname[0] = '\0'; + } + + if (sasl_server_init(sasl_callbacks, "memcached") != SASL_OK) { + fprintf(stderr, "Error initializing sasl.\n"); + exit(EXIT_FAILURE); + } else { + if (settings.verbose) { + fprintf(stderr, "Initialized SASL.\n"); + } + } +} diff --git a/memcached/sasl_defs.h b/memcached/sasl_defs.h new file mode 100644 index 00000000..f36c6944 --- /dev/null +++ b/memcached/sasl_defs.h @@ -0,0 +1,31 @@ +#ifndef SASL_DEFS_H +#define SASL_DEFS_H 1 + +// Longest one I could find was ``9798-U-RSA-SHA1-ENC'' +#define MAX_SASL_MECH_LEN 32 + +#if defined(HAVE_SASL_SASL_H) && defined(ENABLE_SASL) + +#include +void init_sasl(void); + +extern char my_sasl_hostname[1025]; + +#else /* End of SASL support */ + +typedef void* sasl_conn_t; + +#define init_sasl() {} +#define sasl_dispose(x) {} +#define sasl_server_new(a, b, c, d, e, f, g, h) 1 +#define sasl_listmech(a, b, c, d, e, f, g, h) 1 +#define sasl_server_start(a, b, c, d, e, f) 1 +#define sasl_server_step(a, b, c, d, e) 1 +#define sasl_getprop(a, b, c) {} + +#define SASL_OK 0 +#define SASL_CONTINUE -1 + +#endif /* sasl compat */ + +#endif /* SASL_DEFS_H */ diff --git a/memcached/scripts/README.damemtop b/memcached/scripts/README.damemtop new file mode 100644 index 00000000..b6b3a9ab --- /dev/null +++ b/memcached/scripts/README.damemtop @@ -0,0 +1,20 @@ +dormando's awesome memcached top + +A flexible 'top' like utility for viewing memcached clusters. + +Under development. Latest code is available at: +http://github.com/dormando/damemtop + +See --help for full information. + +Requires 'AnyEvent', and 'YAML' libraries from CPAN: +http://search.cpan.org/ + +'AnyEvent' depends on 'common::sense' (also at CPAN). + +If you have a large cluster and want higher performance, find +and install 'EV' from CPAN. AnyEvent will automagically use it +and use epoll, kqeueue, etc, for socket handling. + +Pester me for questions/bugs/ideas. As of writing the util is +in early release and missing many future features. diff --git a/memcached/scripts/damemtop b/memcached/scripts/damemtop new file mode 100755 index 00000000..a5e2bcfe --- /dev/null +++ b/memcached/scripts/damemtop @@ -0,0 +1,571 @@ +#!/usr/bin/perl +# dormando's awesome memcached top utility! +# +# Copyright 2009 Dormando (dormando@rydia.net). All rights reserved. +# +# Use and distribution licensed under the BSD license. See +# the COPYING file for full text. + +use strict; +use warnings FATAL => 'all'; + +use AnyEvent; +use AnyEvent::Socket; +use AnyEvent::Handle; +use Getopt::Long; +use YAML qw/Dump Load LoadFile/; +use Term::ReadKey qw/ReadMode ReadKey GetTerminalSize/; + +our $VERSION = '0.1'; + +my $CLEAR = `clear`; +my @TERM_SIZE = (); +$|++; + +my %opts = (); +GetOptions(\%opts, 'help|h', 'config=s'); + +if ($opts{help}) { + show_help(); exit; +} + +$SIG{INT} = sub { + ReadMode('normal'); + print "\n"; + exit; +}; + +# TODO: make this load from central location, and merge in homedir changes. +# then merge Getopt::Long stuff on top of that +# TODO: Set a bunch of defaults and merge in. +my $CONF = load_config(); +my %CONS = (); +my $LAST_RUN = time; # time after the last loop cycle. +my $TIME_SINCE_LAST_RUN = time; # time since last loop cycle. +my $loop_timer; +my $main_cond; +my $prev_stats_results; + +my %display_modes = ( + 't' => \&display_top_mode, + '?' => \&display_help_mode, + 'h' => \&display_help_mode, +); + +my %column_compute = ( + 'hostname' => { stats => [], code => \&compute_hostname}, + 'hit_rate' => { stats => ['get_hits', 'get_misses'], + code => \&compute_hit_rate }, + 'fill_rate' => { stats => ['bytes', 'limit_maxbytes'], + code => \&compute_fill_rate }, +); + +my %column_format = ( + 'hit_rate' => \&format_percent, + 'fill_rate' => \&format_percent, +); + +# This can collapse into %column_compute +my %column_format_totals = ( + 'hit_rate' => 0, + 'fill_rate' => 0, +); + +ReadMode('cbreak'); +my $LAST_KEY = ''; +my $read_keys = AnyEvent->io ( + fh => \*STDIN, poll => 'r', + cb => sub { + $LAST_KEY = ReadKey(-1); + # If there is a running timer, cancel it. + # Don't want to interrupt a main loop run. + # fire_main_loop()'s iteration will pick up the keypress. + if ($loop_timer) { + $loop_timer = undef; + $main_cond->send; + } + } +); + +# start main loop +fire_main_loop(); + +### AnyEvent related code. + +sub fire_main_loop { + for (;;) { + $loop_timer = undef; + $main_cond = AnyEvent->condvar; + my $time_taken = main_loop(); + my $delay = $CONF->{delay} - $time_taken; + $delay = 0 if $delay < 0; + $loop_timer = AnyEvent->timer( + after => $delay, + cb => $main_cond, + ); + $main_cond->recv; + } +} + +sub main_loop { + my $start = AnyEvent->now; # use ->time to find the end. + maintain_connections(); + + my $cv = AnyEvent->condvar; + + # FIXME: Need to dump early if there're no connected conns + # FIXME: Make this only fetch stats from cons we care to visualize? + # maybe keep everything anyway to maintain averages? + my %stats_results = (); + while (my ($hostname, $con) = each %CONS) { + $cv->begin; + call_stats($con, ['', 'items', 'slabs'], sub { + $stats_results{$hostname} = shift; + $cv->end; + }); + } + $cv->recv; + + # Short circuit since we don't have anything to compare to. + unless ($prev_stats_results) { + $prev_stats_results = \%stats_results; + return $CONF->{delay}; + } + + # Semi-exact global time diff for stats that want to average + # themselves per-second. + my $this_run = AnyEvent->time; + $TIME_SINCE_LAST_RUN = $this_run - $LAST_RUN; + $LAST_RUN = $this_run; + + # Done all our fetches. Drive the display. + display_run($prev_stats_results, \%stats_results); + $prev_stats_results = \%stats_results; + + my $end = AnyEvent->time; + my $diff = $LAST_RUN - $start; + print "loop took: $diff"; + return $diff; +} + +sub maintain_connections { + my $cv = AnyEvent->condvar; + + $cv->begin (sub { shift->send }); + for my $host (@{$CONF->{servers}}) { + next if $CONS{$host}; + $cv->begin; + $CONS{$host} = connect_memcached($host, sub { + if ($_[0] eq 'err') { + print "Failed connecting to $host: ", $_[1], "\n"; + delete $CONS{$host}; + } + $cv->end; + }); + } + $cv->end; + + $cv->recv; +} + +sub connect_memcached { + my ($fullhost, $cb) = @_; + my ($host, $port) = split /:/, $fullhost; + + my $con; $con = AnyEvent::Handle->new ( + connect => [$host => $port], + on_connect => sub { + $cb->('con'); + }, + on_connect_error => sub { + $cb->('err', $!); + $con->destroy; + }, + on_eof => sub { + $cb->('err', $!); + $con->destroy; + }, + ); + return $con; +} + +# Function's getting a little weird since I started optimizing it. +# As of my first set of production tests, this routine is where we spend +# almost all of our processing time. +sub call_stats { + my ($con, $cmds, $cb) = @_; + + my $stats = {}; + my $num_types = @$cmds; + + my $reader; $reader = sub { + my ($con, $results) = @_; + { + my %temp = (); + for my $line (split(/\n/, $results)) { + my ($k, $v) = (split(/\s+/, $line))[1,2]; + $temp{$k} = $v; + } + $stats->{$cmds->[0]} = \%temp; + } + shift @$cmds; + unless (@$cmds) { + # Out of commands to process, return goodies. + $cb->($stats); + return; + } + }; + + for my $cmd (@$cmds) { + $con->push_write('stats ' . $cmd . "\n"); + $stats->{$cmd} = {}; + $con->push_read(line => "END\r\n", $reader); + } +} + +### Compute routines + +sub compute_hostname { + return $_[0]; +} + +sub compute_hit_rate { + my $s = $_[1]; + my $total = $s->{get_hits} + $s->{get_misses}; + return 'NA' unless $total; + return $s->{get_hits} / $total; +} + +sub compute_fill_rate { + my $s = $_[1]; + return $s->{bytes} / $s->{limit_maxbytes}; +} + +sub format_column { + my ($col, $val) = @_; + my $res; + $col =~ s/^all_//; + if ($column_format{$col}) { + if (ref($column_format{$col}) eq 'CODE') { + return $column_format{$col}->($val); + } else { + return $val .= $column_format{$col}; + } + } else { + return format_commas($val); + } +} + +sub column_can_total { + my $col = shift; + $col =~ s/^all_//; + return 1 unless exists $column_format_totals{$col}; + return $column_format_totals{$col}; +} + +### Display routines + +# If there isn't a specific column type computer, see if we just want to +# look at the specific stat and return it. +# If column is a generic type and of 'all_cmd_get' format, return the more +# complete stat instead of the diffed stat. +sub compute_column { + my ($col, $host, $prev_stats, $curr_stats) = @_; + my $diff_stats = 1; + $diff_stats = 0 if ($col =~ s/^all_//); + + # Really should decide on whether or not to flatten the hash :/ + my $find_stat = sub { + for my $type (keys %{$_[0]}) { + return $_[0]->{$type}->{$_[1]} if exists $_[0]->{$type}->{$_[1]}; + } + }; + + my $diff_stat = sub { + my $stat = shift; + return 'NA' unless defined $find_stat->($curr_stats, $stat); + if ($diff_stats) { + my $diff = eval { + return ($find_stat->($curr_stats, $stat) + - $find_stat->($prev_stats, $stat)) + / $TIME_SINCE_LAST_RUN; + }; + return 'NA' if ($@); + return $diff; + } else { + return $find_stat->($curr_stats, $stat); + } + }; + + if (my $comp = $column_compute{$col}) { + my %s = (); + for my $stat (@{$comp->{stats}}) { + $s{$stat} = $diff_stat->($stat); + } + return $comp->{code}->($host, \%s); + } else { + return $diff_stat->($col); + } + return 'NA'; +} + +# We have a bunch of stats from a bunch of connections. +# At this point we run a particular display mode, capture the lines, then +# truncate and display them. +sub display_run { + my $prev_stats = shift; + my $curr_stats = shift; + @TERM_SIZE = GetTerminalSize; + die "cannot detect terminal size" unless $TERM_SIZE[0] && $TERM_SIZE[1]; + + if ($LAST_KEY eq 'q') { + print "\n"; + ReadMode('normal'); exit; + } + + if ($LAST_KEY ne $CONF->{mode} && exists $display_modes{$LAST_KEY}) { + $CONF->{prev_mode} = $CONF->{mode}; + $CONF->{mode} = $LAST_KEY; + } elsif ($CONF->{mode} eq 'h' || $CONF->{mode} eq '?') { + # Bust out of help mode on any key. + $CONF->{mode} = $CONF->{prev_mode}; + } + my $lines = $display_modes{$CONF->{mode}}->($prev_stats, $curr_stats); + display_lines($lines) if $lines; +} + +# Default "top" mode. +# create a set of computed columns as requested by the config. +# this has gotten a little out of hand... needs more cleanup/abstraction. +sub display_top_mode { + my $prev_stats = shift; + my $curr_stats = shift; + + my @columns = @{$CONF->{top_mode}->{columns}}; + my @rows = (); + my @tot_row = (); + + # Round one. + for my $host (sort keys %{$curr_stats}) { + my @row = (); + for my $colnum (0 .. @columns-1) { + my $col = $columns[$colnum]; + my $res = compute_column($col, $host, $prev_stats->{$host}, + $curr_stats->{$host}); + $tot_row[$colnum] += $res if is_numeric($res); + push @row, $res; + } + push(@rows, \@row); + } + + # Sort rows by sort column (ascending or descending) + if (my $sort = $CONF->{top_mode}->{sort_column}) { + my $order = $CONF->{top_mode}->{sort_order} || 'asc'; + my $colnum = 0; + for (0 .. @columns-1) { $colnum = $_ if $columns[$_] eq $sort; } + my @newrows; + if ($order eq 'asc') { + if (is_numeric($rows[0]->[$colnum])) { + @newrows = sort { $a->[$colnum] <=> $b->[$colnum] } @rows; + } else { + @newrows = sort { $a->[$colnum] cmp $b->[$colnum] } @rows; + } + } else { + if (is_numeric($rows[0]->[$colnum])) { + @newrows = sort { $b->[$colnum] <=> $a->[$colnum] } @rows; + } else { + @newrows = sort { $b->[$colnum] cmp $a->[$colnum] } @rows; + } + } + @rows = @newrows; + } + + # Format each column after the sort... + { + my @newrows = (); + for my $row (@rows) { + my @newrow = (); + for my $colnum (0 .. @columns-1) { + push @newrow, is_numeric($row->[$colnum]) ? + format_column($columns[$colnum], $row->[$colnum]) : + $row->[$colnum]; + } + push @newrows, \@newrow; + } + @rows = @newrows; + } + + # Create average and total rows. + my @avg_row = (); + for my $col (0 .. @columns-1) { + if (is_numeric($tot_row[$col])) { + my $countable_rows = 0; + for my $row (@rows) { + next unless $row->[$col]; + $countable_rows++ unless $row->[$col] eq 'NA'; + } + $countable_rows = 1 unless $countable_rows; + push @avg_row, format_column($columns[$col], + sprintf('%.2f', $tot_row[$col] / $countable_rows)); + } else { + push @avg_row, 'NA'; + } + $tot_row[$col] = 'NA' unless defined $tot_row[$col]; + $tot_row[$col] = 'NA' unless (column_can_total($columns[$col])); + $tot_row[$col] = format_column($columns[$col], $tot_row[$col]) + unless $tot_row[$col] eq 'NA'; + } + unshift @rows, \@avg_row; + unshift @rows, ['AVERAGE:']; + unshift @rows, \@tot_row; + unshift @rows, ['TOTAL:']; + + # Round two. Pass @rows into a function which returns an array with the + # desired format spacing for each column. + unshift @rows, \@columns; + my $spacing = find_optimal_spacing(\@rows); + + my @display_lines = (); + for my $row (@rows) { + my $line = ''; + for my $col (0 .. @$row-1) { + my $space = $spacing->[$col]; + $line .= sprintf("%-${space}s ", $row->[$col]); + } + push @display_lines, $line; + } + + return \@display_lines; +} + +sub display_help_mode { + my $help = <<"ENDHELP"; + +dormando's awesome memcached top utility version v$VERSION + +This early version requires you to edit the ~/.damemtop/damemtop.yaml +(or /etc/damemtop.yaml) file in order to change options. +See --help for more info. + +Hit any key to exit help. +ENDHELP + my @lines = split /\n/, $help; + display_lines(\@lines); + $LAST_KEY = ReadKey(0); + return; +} + +# Takes a set of lines, clears screen, dumps header, trims lines, etc +# MAYBE: mode to wrap lines instead of trim them? +sub display_lines { + my $lines = shift; + + my $width = $TERM_SIZE[0]; + my $height_remain = $TERM_SIZE[1]; + + unshift @$lines, display_header($width); + clear_screen() unless $CONF->{no_clear}; + + while (--$height_remain && @$lines) { + # truncate too long lines. + my $line = shift @$lines; + $line = substr $line, 0, $width-1; + print $line, "\n"; + } +} + +sub display_header { + my $topbar = 'damemtop: ' . scalar localtime; + if ($CONF->{mode} eq 't' && $CONF->{top_mode}->{sort_column}) { + $topbar .= ' [sort: ' . $CONF->{top_mode}->{sort_column} . ']'; + } + $topbar .= ' [delay: ' . $CONF->{delay} . 's]'; + return $topbar; +} + +### Utilities + +# find the optimal format spacing for each column, which is: +# longest length of item in col + 2 (whitespace). +sub find_optimal_spacing { + my $rows = shift; + my @maxes = (); + + my $num_cols = @{$rows->[0]}; + for my $row (@$rows) { + for my $col (0 .. $num_cols-1) { + $maxes[$col] = 0 unless $maxes[$col]; + next unless $row->[$col]; + $maxes[$col] = length($row->[$col]) + if length($row->[$col]) > $maxes[$col]; + } + } + for my $col (0 .. $num_cols) { + $maxes[$col] += 1; + } + + return \@maxes; +} + +# doesn't try too hard to identify numbers... +sub is_numeric { + return 0 unless $_[0]; + return 1 if $_[0] =~ m/^\d+(\.\d*)?(\w+)?$/; + return 0; +} + +sub format_percent { + return sprintf("%.2f%%", $_[0] * 100); +} + +sub format_commas { + my $num = shift; + $num = int($num); + $num =~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g; + return $num; +} + +# Can tick counters/etc here as well. +sub clear_screen { + print $CLEAR; +} + +# tries minimally to find a localized config file. +# TODO: Handle the YAML error and make it prettier. +sub load_config { + my $config = $opts{config} if $opts{config}; + my $homedir = "$ENV{HOME}/.damemtop/damemtop.yaml"; + if (-e $homedir) { + $config = $homedir; + } else { + $config = '/etc/damemtop.yaml'; + } + return LoadFile($config); +} + +sub show_help { + print <<"ENDHELP"; +dormando's awesome memcached top utility version v$VERSION + +This program is copyright (c) 2009 Dormando. +Use and distribution licensed under the BSD license. See +the COPYING file for full text. + +contact: dormando\@rydia.net or memcached\@googlegroups.com. + +This early version requires you to edit the ~/.damemtop/damemtop.yaml +(or /etc/damemtop.yaml) file in order to change options. + +You may display any column that is in the output of +'stats', 'stats items', or 'stats slabs' from memcached's ASCII protocol. +Start a column with 'all_' (ie; 'all_get_hits') to display the current stat, +otherwise the stat is displayed as an average per second. + +Specify a "sort_column" under "top_mode" to sort the output by any column. + +Some special "computed" columns exist: +hit_rate (get/miss hit ratio) +fill_rate (% bytes used out of the maximum memory limit) +ENDHELP + exit; +} diff --git a/memcached/scripts/damemtop.yaml b/memcached/scripts/damemtop.yaml new file mode 100644 index 00000000..f34acfdf --- /dev/null +++ b/memcached/scripts/damemtop.yaml @@ -0,0 +1,16 @@ +delay: 3 +mode: t +top_mode: + sort_column: "hostname" + sort_order: "asc" + columns: + - hostname + - all_version + - all_fill_rate + - hit_rate + - evictions + - bytes_written + - "2:get_hits" +servers: + - 127.0.0.1:11211 + - 127.0.0.2:11211 diff --git a/memcached/scripts/mc_slab_mover b/memcached/scripts/mc_slab_mover new file mode 100755 index 00000000..ac984d7b --- /dev/null +++ b/memcached/scripts/mc_slab_mover @@ -0,0 +1,260 @@ +#! /usr/bin/perl +# See memcached for LICENSE +# Copyright 2011 Dormando (dormando@rydia.net) + +=head1 NAME + +mc_slab_mover -- example utility for slab page reassignment for memcached + +=head1 SYNOPSIS + + $ mc_slab_mover --host="127.0.0.1:11211" --verbose + $ mc_slab_mover --host="127.0.0.1:11211" --automove + $ mc_slab_mover --host="127.0.0.1:11211" --sleep=60 --loops=4 --automove + +=head1 DESCRIPTION + +This utility is an example implementation of an algorithm for reassigning +slab memory in a running memcached instance. If memcached's built-in +automover isn't working for you, you may use this script as an example +base and expand on it. We welcome modifications or alternatives on the +mailing list. + +=head1 ALGORITHM + +The default algorithm is simple, and may serve for a common case: over +time one slab may grow in use compare to others, and as evictions stop +in one slab and start in another it will reassign memory. + +If a slab has the most evictions three times in a row, it will pull a page +from a slab which has had zero evictions three times in a row. + +There are many traffic patterns where this does not work well. IE: If you +never use expirations and rely on the LRU (so all slabs always evict), +it will not be as likely to find source pages to move. + +=head1 OPTIONS + +=over + +=item --host="IP:PORT" + +The hostname to connect to. NOTE: If connection to the host breaks, script +will stop. + +=item --sleep=10 + +How long to wait between loops for gathering stats. + +=item --loops=3 + +How many loops to run before making a decision for a move. + +=item --verbose + +Prints a formatted dump of some common statistics per loop. + +=item --automove + +Enables the automover, and will attempt to move memory around if it finds +viable candidates. + +=back + +=head1 AUTHOR + +Dormando ELE + +=head1 LICENSE + +Licensed for use and redistribution under the same terms as Memcached itself. + +=cut + +use warnings; +use strict; + +use IO::Socket::INET; + +use FindBin; +use Data::Dumper qw/Dumper/; +use Getopt::Long; + +my %opts = ('sleep' => 10, automove => 0, verbose => 0, loops => 3); +GetOptions( + "host=s" => \$opts{host}, + "sleep=i" => \$opts{'sleep'}, + "loops=i" => \$opts{loops}, + "automove" => \$opts{automove}, + "verbose" => \$opts{verbose}, + ) or usage(); + +die "Must specify at least --host='127.0.0.1:11211'" unless $opts{host}; +my $sock = IO::Socket::INET->new(PeerAddr => $opts{host}, + Timeout => 3); +die "$!\n" unless $sock; + +my %stats = (); +my %move = (winner => 0, wins => 0); + +$SIG{INT} = sub { + print "STATS: ", Dumper(\%stats), "\n"; + exit; +}; +$SIG{USR1} = sub { + print "STATS: ", Dumper(\%stats), "\n"; +}; +run(); + +sub usage { + print qq{Usage: + mc_slab_ratios --host="127.0.0.1:11211" --verbose --automove + run `perldoc mc_slab_ratios` for full information + +}; + exit 1; +} + +sub run { + my $slabs_before = grab_stats(); + + while (1) { + sleep $opts{'sleep'}; + my $slabs_after = grab_stats(); + + my ($totals, $sorted) = calc_results_evicted($slabs_before, $slabs_after); +# my ($totals, $sorted) = calc_results_numratio($slabs_before, $slabs_after); + + my $pct = sub { + my ($num, $divisor) = @_; + return 0 unless $divisor; + return ($num / $divisor); + }; + if ($opts{verbose}) { + printf " %02s: %-8s (pct ) %-10s (pct ) %-6s (pct ) get_hits (pct ) cmd_set (pct )\n", + 'sb', 'evicted', 'items', 'pages'; + for my $slab (@$sorted) { + printf " %02d: %-8d (%.2f%%) %-10s (%.4f%%) %-6d (%.2f%%) %-8d (%.3f%%) %-7d (%.2f%%)\n", + $slab->{slab}, $slab->{evicted_d}, + $pct->($slab->{evicted_d}, $totals->{evicted_d}), + $slab->{number}, + $pct->($slab->{number}, $totals->{number}), + $slab->{total_pages}, + $pct->($slab->{total_pages}, $totals->{total_pages}), + $slab->{get_hits_d}, + $pct->($slab->{get_hits_d}, $totals->{get_hits_d}), + $slab->{cmd_set_d}, + $pct->($slab->{cmd_set_d}, $totals->{cmd_set_d}); + } + } + + next unless @$sorted; + my $highest = $sorted->[-1]; + $stats{$highest->{slab}}++; + print " (winner: ", $highest->{slab}, " wins: ", $stats{$highest->{slab}}, ")\n"; + automove_basic($totals, $sorted) if ($opts{automove}); + + $slabs_before = $slabs_after; + } +} + +sub grab_stats { + my %slabs = (); + for my $stat (qw/items slabs/) { + print $sock "stats $stat\r\n"; + while (my $line = <$sock>) { + chomp $line; + last if ($line =~ m/^END/); + if ($line =~ m/^STAT (?:items:)?(\d+):(\S+) (\S+)/) { + my ($slab, $var, $val) = ($1, $2, $3); + $slabs{$slab}->{$var} = $val; + } + } + } + + return \%slabs; +} + +# Really stupid algo, same as the initial algo built into memcached. +# If a slab "wins" most evictions 3 times in a row, pick from a slab which +# has had 0 evictions 3 times in a row and move it over. +sub automove_basic { + my ($totals, $sorted) = @_; + + my $source = 0; + my $dest = 0; + my $high = $sorted->[-1]; + return unless $high->{evicted_d} > 0; + if ($move{winner} == $high->{slab}) { + $move{wins}++; + $dest = $move{winner} if $move{wins} >= $opts{loops}; + } else { + $move{wins} = 1; + $move{winner} = $high->{slab}; + } + for my $slab (@$sorted) { + my $id = $slab->{slab}; + if ($slab->{evicted_d} == 0 && $slab->{total_pages} > 2) { + $move{zeroes}->{$id}++; + $source = $id if (!$source && $move{zeroes}->{$id} >= $opts{loops}); + } else { + delete $move{zeroes}->{$slab->{slab}} + if exists $move{zeroes}->{$slab->{slab}}; + } + } + + if ($source && $dest) { + print " slabs reassign $source $dest\n"; + print $sock "slabs reassign $source $dest\r\n"; + my $res = <$sock>; + print " RES: ", $res; + } elsif ($dest && !$source) { + print "FAIL: want to move memory to $dest but no valid source slab available\n"; + } +} + +# Using just the evicted stats. +sub calc_results_evicted { + my ($slabs, $totals) = calc_slabs(@_); + my @sorted = sort { $a->{evicted_d} <=> $b->{evicted_d} } values %$slabs; + return ($totals, \@sorted); +} + +# Weighted ratios of evictions vs total stored items +# Seems to fail as an experiment, but it tries to weight stats. +# In this case evictions in underused classes tend to get vastly inflated +sub calc_results_numratio { + my ($slabs, $totals) = calc_slabs(@_, sub { + my ($sb, $sa, $s) = @_; + if ($s->{evicted_d}) { + $s->{numratio} = $s->{evicted_d} / $s->{number}; + } else { $s->{numratio} = 0; } + }); + my @sorted = sort { $a->{numratio} <=> $b->{numratio} } values %$slabs; + return ($totals, \@sorted); +} + +sub calc_slabs { + my ($slabs_before, $slabs_after, $code) = @_; + my %slabs = (); + my %totals = (); + for my $id (keys %$slabs_after) { + my $sb = $slabs_before->{$id}; + my $sa = $slabs_after->{$id}; + next unless ($sb && $sa); + my %slab = %$sa; + for my $key (keys %slab) { + # Add totals, diffs + if ($slab{$key} =~ m/^\d+$/) { + $totals{$key} += $slab{$key}; + $slab{$key . '_d'} = $sa->{$key} - $sb->{$key}; + $totals{$key . '_d'} += $sa->{$key} - $sb->{$key}; + } + } + # External code + $code->($sb, $sa, \%slab) if $code; + $slab{slab} = $id; + $slabs{$id} = \%slab; + } + return (\%slabs, \%totals); +} diff --git a/memcached/scripts/memcached-init b/memcached/scripts/memcached-init new file mode 100755 index 00000000..ebe2ab0c --- /dev/null +++ b/memcached/scripts/memcached-init @@ -0,0 +1,111 @@ +#! /bin/bash +### BEGIN INIT INFO +# Provides: memcached +# Required-Start: $syslog +# Required-Stop: $syslog +# Should-Start: $local_fs +# Should-Stop: $local_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: memcached - Memory caching daemon +# Description: memcached - Memory caching daemon +### END INIT INFO + +# Usage: +# cp /etc/memcached.conf /etc/memcached_server1.conf +# cp /etc/memcached.conf /etc/memcached_server2.conf +# start all instances: +# /etc/init.d/memcached start +# start one instance: +# /etc/init.d/memcached start server1 +# stop all instances: +# /etc/init.d/memcached stop +# stop one instance: +# /etc/init.d/memcached stop server1 +# There is no "status" command. + +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin +DAEMON=/usr/bin/memcached +DAEMONNAME=memcached +DAEMONBOOTSTRAP=/usr/share/memcached/scripts/start-memcached +DESC=memcached + +test -x $DAEMON || exit 0 +test -x $DAEMONBOOTSTRAP || exit 0 + +set -e + +FILES=(/etc/memcached_*.conf) +# check for alternative config schema +if [ -r "${FILES[0]}" ]; then + CONFIGS=() + for FILE in "${FILES[@]}"; + do + # remove prefix + NAME=${FILE#/etc/} + # remove suffix + NAME=${NAME%.conf} + + # check optional second param + if [ $# -ne 2 ]; + then + # add to config array + CONFIGS+=($NAME) + elif [ "memcached_$2" == "$NAME" ]; + then + # use only one memcached + CONFIGS=($NAME) + break; + fi; + done; + + if [ ${#CONFIGS[@]} == 0 ]; + then + echo "Config not exist for: $2" >&2 + exit 1 + fi; +else + CONFIGS=(memcached) +fi; + +CONFIG_NUM=${#CONFIGS[@]} +for ((i=0; i < $CONFIG_NUM; i++)); do + NAME=${CONFIGS[${i}]} + PIDFILE="/var/run/${NAME}.pid" + +case "$1" in + start) + echo -n "Starting $DESC: " + start-stop-daemon --start --quiet --exec "$DAEMONBOOTSTRAP" -- /etc/${NAME}.conf $PIDFILE + echo "$NAME." + ;; + stop) + echo -n "Stopping $DESC: " + start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE --exec $DAEMON + echo "$NAME." + rm -f $PIDFILE + ;; + + restart|force-reload) + # + # If the "reload" option is implemented, move the "force-reload" + # option to the "reload" entry above. If not, "force-reload" is + # just the same as "restart". + # + echo -n "Restarting $DESC: " + start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE + rm -f $PIDFILE + sleep 1 + start-stop-daemon --start --quiet --exec "$DAEMONBOOTSTRAP" -- /etc/${NAME}.conf $PIDFILE + echo "$NAME." + ;; + *) + N=/etc/init.d/$NAME + # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2 + echo "Usage: $N {start|stop|restart|force-reload}" >&2 + exit 1 + ;; +esac +done; + +exit 0 diff --git a/memcached/scripts/memcached-tool b/memcached/scripts/memcached-tool new file mode 100755 index 00000000..bae4efd7 --- /dev/null +++ b/memcached/scripts/memcached-tool @@ -0,0 +1,169 @@ +#!/usr/bin/perl +# +# memcached-tool: +# stats/management tool for memcached. +# +# Author: +# Brad Fitzpatrick +# +# License: +# public domain. I give up all rights to this +# tool. modify and copy at will. +# + +use strict; +use IO::Socket::INET; + +my $addr = shift; +my $mode = shift || "display"; +my ($from, $to); + +if ($mode eq "display") { + undef $mode if @ARGV; +} elsif ($mode eq "move") { + $from = shift; + $to = shift; + undef $mode if $from < 6 || $from > 17; + undef $mode if $to < 6 || $to > 17; + print STDERR "ERROR: parameters out of range\n\n" unless $mode; +} elsif ($mode eq 'dump') { + ; +} elsif ($mode eq 'stats') { + ; +} else { + undef $mode; +} + +undef $mode if @ARGV; + +die + "Usage: memcached-tool [mode]\n + memcached-tool 10.0.0.5:11211 display # shows slabs + memcached-tool 10.0.0.5:11211 # same. (default is display) + memcached-tool 10.0.0.5:11211 stats # shows general stats + memcached-tool 10.0.0.5:11211 dump # dumps keys and values +" unless $addr && $mode; + + +my $sock; +if ($addr =~ m:/:) { + $sock = IO::Socket::UNIX->new( + Peer => $addr, + ); +} +else { + $addr .= ':11211' unless $addr =~ /:\d+$/; + + $sock = IO::Socket::INET->new( + PeerAddr => $addr, + Proto => 'tcp', + ); +} +die "Couldn't connect to $addr\n" unless $sock; + +if ($mode eq 'dump') { + my %items; + my $totalitems; + + print $sock "stats items\r\n"; + + while (<$sock>) { + last if /^END/; + if (/^STAT items:(\d*):number (\d*)/) { + $items{$1} = $2; + $totalitems += $2; + } + } + print STDERR "Dumping memcache contents\n"; + print STDERR " Number of buckets: " . scalar(keys(%items)) . "\n"; + print STDERR " Number of items : $totalitems\n"; + + foreach my $bucket (sort(keys(%items))) { + print STDERR "Dumping bucket $bucket - " . $items{$bucket} . " total items\n"; + print $sock "stats cachedump $bucket $items{$bucket}\r\n"; + my %keyexp; + while (<$sock>) { + last if /^END/; + # return format looks like this + # ITEM foo [6 b; 1176415152 s] + if (/^ITEM (\S+) \[.* (\d+) s\]/) { + $keyexp{$1} = $2; + } + } + + foreach my $k (keys(%keyexp)) { + print $sock "get $k\r\n"; + my $response = <$sock>; + if ($response =~ /VALUE (\S+) (\d+) (\d+)/) { + my $flags = $2; + my $len = $3; + my $val; + read $sock, $val, $len; + print "add $k $flags $keyexp{$k} $len\r\n$val\r\n"; + # get the END + $_ = <$sock>; + $_ = <$sock>; + } + } + } + exit; +} + +if ($mode eq 'stats') { + my %items; + + print $sock "stats\r\n"; + + while (<$sock>) { + last if /^END/; + chomp; + if (/^STAT\s+(\S*)\s+(.*)/) { + $items{$1} = $2; + } + } + printf ("#%-17s %5s %11s\n", $addr, "Field", "Value"); + foreach my $name (sort(keys(%items))) { + printf ("%24s %12s\n", $name, $items{$name}); + + } + exit; +} + +# display mode: + +my %items; # class -> { number, age, chunk_size, chunks_per_page, +# total_pages, total_chunks, used_chunks, +# free_chunks, free_chunks_end } + +print $sock "stats items\r\n"; +my $max = 0; +while (<$sock>) { + last if /^END/; + if (/^STAT items:(\d+):(\w+) (\d+)/) { + $items{$1}{$2} = $3; + $max = $1; + } +} + +print $sock "stats slabs\r\n"; +while (<$sock>) { + last if /^END/; + if (/^STAT (\d+):(\w+) (\d+)/) { + $items{$1}{$2} = $3; + } +} + +print " # Item_Size Max_age Pages Count Full? Evicted Evict_Time OOM\n"; +foreach my $n (1..$max) { + my $it = $items{$n}; + next if (0 == $it->{total_pages}); + my $size = $it->{chunk_size} < 1024 ? + "$it->{chunk_size}B" : + sprintf("%.1fK", $it->{chunk_size} / 1024.0); + my $full = $it->{free_chunks_end} == 0 ? "yes" : " no"; + printf("%3d %8s %9ds %7d %7d %7s %8d %8d %4d\n", + $n, $size, $it->{age}, $it->{total_pages}, + $it->{number}, $full, $it->{evicted}, + $it->{evicted_time}, $it->{outofmemory}); +} + diff --git a/memcached/scripts/memcached.service b/memcached/scripts/memcached.service new file mode 100644 index 00000000..e896dd76 --- /dev/null +++ b/memcached/scripts/memcached.service @@ -0,0 +1,10 @@ +[Unit] +Description=memcached daemon +After=network.target + +[Service] +EnvironmentFile=/etc/sysconfig/memcached +ExecStart=/usr/bin/memcached -p ${PORT} -u ${USER} -m ${CACHESIZE} -c ${MAXCONN} $OPTIONS + +[Install] +WantedBy=multi-user.target diff --git a/memcached/scripts/memcached.sysv b/memcached/scripts/memcached.sysv new file mode 100755 index 00000000..024cf337 --- /dev/null +++ b/memcached/scripts/memcached.sysv @@ -0,0 +1,78 @@ +#! /bin/sh +# +# chkconfig: - 55 45 +# description: The memcached daemon is a network memory cache service. +# processname: memcached +# config: /etc/sysconfig/memcached + +# Source function library. +. /etc/rc.d/init.d/functions + +PORT=11211 +USER=nobody +MAXCONN=1024 +CACHESIZE=64 +OPTIONS="" + +if [ -f /etc/sysconfig/memcached ];then + . /etc/sysconfig/memcached +fi + +# Check that networking is up. +if [ "$NETWORKING" = "no" ] +then + exit 0 +fi + +RETVAL=0 +prog="memcached" + +start () { + echo -n $"Starting $prog: " + # insure that /var/run/memcached has proper permissions + chown $USER /var/run/memcached + daemon memcached -d -p $PORT -u $USER -m $CACHESIZE -c $MAXCONN -P /var/run/memcached/memcached.pid $OPTIONS + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/memcached +} +stop () { + echo -n $"Stopping $prog: " + killproc memcached + RETVAL=$? + echo + if [ $RETVAL -eq 0 ] ; then + rm -f /var/lock/subsys/memcached + rm -f /var/run/memcached.pid + fi +} + +restart () { + stop + start +} + + +# See how we were called. +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + status memcached + ;; + restart|reload) + restart + ;; + condrestart) + [ -f /var/lock/subsys/memcached ] && restart || : + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|reload|condrestart}" + exit 1 +esac + +exit $? diff --git a/memcached/scripts/start-memcached b/memcached/scripts/start-memcached new file mode 100755 index 00000000..ce39fa6f --- /dev/null +++ b/memcached/scripts/start-memcached @@ -0,0 +1,128 @@ +#!/usr/bin/perl -w + +# start-memcached +# 2003/2004 - Jay Bonci +# This script handles the parsing of the /etc/memcached.conf file +# and was originally created for the Debian distribution. +# Anyone may use this little script under the same terms as +# memcached itself. + +use POSIX qw(setsid); +use strict; + +if($> != 0 and $< != 0) +{ + print STDERR "Only root wants to run start-memcached.\n"; + exit; +} + +my $params; my $etchandle; my $etcfile = "/etc/memcached.conf"; + +# This script assumes that memcached is located at /usr/bin/memcached, and +# that the pidfile is writable at /var/run/memcached.pid + +my $memcached = "/usr/bin/memcached"; +my $pidfile = "/var/run/memcached.pid"; + +if (scalar(@ARGV) == 2) { + $etcfile = shift(@ARGV); + $pidfile = shift(@ARGV); +} + +# If we don't get a valid logfile parameter in the /etc/memcached.conf file, +# we'll just throw away all of our in-daemon output. We need to re-tie it so +# that non-bash shells will not hang on logout. Thanks to Michael Renner for +# the tip +my $fd_reopened = "/dev/null"; + +sub handle_logfile +{ + my ($logfile) = @_; + $fd_reopened = $logfile; +} + +sub reopen_logfile +{ + my ($logfile) = @_; + + open *STDERR, ">>$logfile"; + open *STDOUT, ">>$logfile"; + open *STDIN, ">>/dev/null"; + $fd_reopened = $logfile; +} + +# This is set up in place here to support other non -[a-z] directives + +my $conf_directives = { + "logfile" => \&handle_logfile, +}; + +if(open $etchandle, $etcfile) +{ + foreach my $line (<$etchandle>) + { + $line ||= ""; + $line =~ s/\#.*//g; + $line =~ s/\s+$//g; + $line =~ s/^\s+//g; + next unless $line; + next if $line =~ /^\-[dh]/; + + if($line =~ /^[^\-]/) + { + my ($directive, $arg) = $line =~ /^(.*?)\s+(.*)/; + $conf_directives->{$directive}->($arg); + next; + } + + push @$params, $line; + } + +}else{ + $params = []; +} + +push @$params, "-u root" unless(grep "-u", @$params); +$params = join " ", @$params; + +if(-e $pidfile) +{ + open PIDHANDLE, "$pidfile"; + my $localpid = ; + close PIDHANDLE; + + chomp $localpid; + if(-d "/proc/$localpid") + { + print STDERR "memcached is already running.\n"; + exit; + }else{ + `rm -f $localpid`; + } + +} + +my $pid = fork(); + +if($pid == 0) +{ + # setsid makes us the session leader + setsid(); + reopen_logfile($fd_reopened); + # must fork again now that tty is closed + $pid = fork(); + if ($pid) { + if(open PIDHANDLE,">$pidfile") + { + print PIDHANDLE $pid; + close PIDHANDLE; + }else{ + + print STDERR "Can't write pidfile to $pidfile.\n"; + } + exit(0); + } + exec "$memcached $params"; + exit(0); + +} diff --git a/memcached/sizes.c b/memcached/sizes.c new file mode 100644 index 00000000..95a644ca --- /dev/null +++ b/memcached/sizes.c @@ -0,0 +1,29 @@ +#include + +#include "memcached.h" + +static void display(const char *name, size_t size) { + printf("%s\t%d\n", name, (int)size); +} + +int main(int argc, char **argv) { + + display("Slab Stats", sizeof(struct slab_stats)); + display("Thread stats", + sizeof(struct thread_stats) + - (200 * sizeof(struct slab_stats))); + display("Global stats", sizeof(struct stats)); + display("Settings", sizeof(struct settings)); + display("Item (no cas)", sizeof(item)); + display("Item (cas)", sizeof(item) + sizeof(uint64_t)); + display("Libevent thread", + sizeof(LIBEVENT_THREAD) - sizeof(struct thread_stats)); + display("Connection", sizeof(conn)); + + printf("----------------------------------------\n"); + + display("libevent thread cumulative", sizeof(LIBEVENT_THREAD)); + display("Thread stats cumulative\t", sizeof(struct thread_stats)); + + return 0; +} diff --git a/memcached/slabs.c b/memcached/slabs.c new file mode 100644 index 00000000..7cffe434 --- /dev/null +++ b/memcached/slabs.c @@ -0,0 +1,840 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size + * and are divided into chunks. The chunk sizes start off at the size of the + * "item" structure plus space for a small key and value. They increase by + * a multiplier factor from there, up to half the maximum slab size. The last + * slab size is always 1MB, since that's the maximum item size allowed by the + * memcached protocol. + */ +#include "memcached.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* powers-of-N allocation structures */ + +typedef struct { + unsigned int size; /* sizes of items */ + unsigned int perslab; /* how many items per slab */ + + void *slots; /* list of item ptrs */ + unsigned int sl_curr; /* total free items in list */ + + void *end_page_ptr; /* pointer to next free item at end of page, or 0 */ + unsigned int end_page_free; /* number of items remaining at end of last alloced page */ + + unsigned int slabs; /* how many slabs were allocated for this class */ + + void **slab_list; /* array of slab pointers */ + unsigned int list_size; /* size of prev array */ + + unsigned int killing; /* index+1 of dying slab, or zero if none */ + size_t requested; /* The number of requested bytes */ +} slabclass_t; + +static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES]; +static size_t mem_limit = 0; +static size_t mem_malloced = 0; +static int power_largest; + +static void *mem_base = NULL; +static void *mem_current = NULL; +static size_t mem_avail = 0; + +/** + * Access to the slab allocator is protected by this lock + */ +static pthread_mutex_t slabs_lock = PTHREAD_MUTEX_INITIALIZER; + +/* + * Forward Declarations + */ +static int do_slabs_newslab(const unsigned int id); +static void *memory_allocate(size_t size); + +#ifndef DONT_PREALLOC_SLABS +/* Preallocate as many slab pages as possible (called from slabs_init) + on start-up, so users don't get confused out-of-memory errors when + they do have free (in-slab) space, but no space to make new slabs. + if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all + slab types can be made. if max memory is less than 18 MB, only the + smaller ones will be made. */ +static void slabs_preallocate (const unsigned int maxslabs); +#endif + +/* + * Figures out which slab class (chunk size) is required to store an item of + * a given size. + * + * Given object size, return id to use when allocating/freeing memory for object + * 0 means error: can't store such a large object + */ + +unsigned int slabs_clsid(const size_t size) { + int res = POWER_SMALLEST; + + if (size == 0) + return 0; + while (size > slabclass[res].size) + if (res++ == power_largest) /* won't fit in the biggest slab */ + return 0; + return res; +} + +/** + * Determines the chunk sizes and initializes the slab class descriptors + * accordingly. + */ +void slabs_init(const size_t limit, const double factor, const bool prealloc) { + int i = POWER_SMALLEST - 1; + unsigned int size = sizeof(item) + settings.chunk_size; + + mem_limit = limit; + + if (prealloc) { + /* Allocate everything in a big chunk with malloc */ + mem_base = malloc(mem_limit); + if (mem_base != NULL) { + mem_current = mem_base; + mem_avail = mem_limit; + } else { + fprintf(stderr, "Warning: Failed to allocate requested memory in" + " one large chunk.\nWill allocate in smaller chunks\n"); + } + } + + memset(slabclass, 0, sizeof(slabclass)); + + while (++i < POWER_LARGEST && size <= settings.item_size_max / factor) { + /* Make sure items are always n-byte aligned */ + if (size % CHUNK_ALIGN_BYTES) + size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES); + + slabclass[i].size = size; + slabclass[i].perslab = settings.item_size_max / slabclass[i].size; + size *= factor; + if (settings.verbose > 1) { + fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n", + i, slabclass[i].size, slabclass[i].perslab); + } + } + + power_largest = i; + slabclass[power_largest].size = settings.item_size_max; + slabclass[power_largest].perslab = 1; + if (settings.verbose > 1) { + fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n", + i, slabclass[i].size, slabclass[i].perslab); + } + + /* for the test suite: faking of how much we've already malloc'd */ + { + char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC"); + if (t_initial_malloc) { + mem_malloced = (size_t)atol(t_initial_malloc); + } + + } + +#ifndef DONT_PREALLOC_SLABS + { + char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC"); + + if (pre_alloc == NULL || atoi(pre_alloc) != 0) { + slabs_preallocate(power_largest); + } + } +#endif +} + +#ifndef DONT_PREALLOC_SLABS +static void slabs_preallocate (const unsigned int maxslabs) { + int i; + unsigned int prealloc = 0; + + /* pre-allocate a 1MB slab in every size class so people don't get + confused by non-intuitive "SERVER_ERROR out of memory" + messages. this is the most common question on the mailing + list. if you really don't want this, you can rebuild without + these three lines. */ + + for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) { + if (++prealloc > maxslabs) + return; + do_slabs_newslab(i); + } + +} +#endif + +static int grow_slab_list (const unsigned int id) { + slabclass_t *p = &slabclass[id]; + if (p->slabs == p->list_size) { + size_t new_size = (p->list_size != 0) ? p->list_size * 2 : 16; + void *new_list = realloc(p->slab_list, new_size * sizeof(void *)); + if (new_list == 0) return 0; + p->list_size = new_size; + p->slab_list = new_list; + } + return 1; +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wsign-compare" +#endif + +static int do_slabs_newslab(const unsigned int id) { + slabclass_t *p = &slabclass[id]; + int len = settings.slab_reassign ? settings.item_size_max + : p->size * p->perslab; + char *ptr; + + if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) || + (grow_slab_list(id) == 0) || + ((ptr = memory_allocate((size_t)len)) == 0)) { + + MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id); + return 0; + } + + memset(ptr, 0, (size_t)len); + p->end_page_ptr = ptr; + p->end_page_free = p->perslab; + + p->slab_list[p->slabs++] = ptr; + mem_malloced += len; + MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id); + + return 1; +} + +/*@null@*/ +static void *do_slabs_alloc(const size_t size, unsigned int id) { + slabclass_t *p; + void *ret = NULL; + item *it = NULL; + + if (id < POWER_SMALLEST || id > power_largest) { + MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0); + return NULL; + } + + p = &slabclass[id]; + assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0); + +#ifdef USE_SYSTEM_MALLOC + if (mem_limit && mem_malloced + size > mem_limit) { + MEMCACHED_SLABS_ALLOCATE_FAILED(size, id); + return 0; + } + mem_malloced += size; + ret = malloc(size); + MEMCACHED_SLABS_ALLOCATE(size, id, 0, ret); + return ret; +#endif + + /* fail unless we have space at the end of a recently allocated page, + we have something on our freelist, or we could allocate a new page */ + if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || + do_slabs_newslab(id) != 0)) { + /* We don't have more memory available */ + ret = NULL; + } else if (p->sl_curr != 0) { + /* return off our freelist */ + it = (item *)p->slots; + p->slots = it->next; + if (it->next) it->next->prev = 0; + p->sl_curr--; + ret = (void *)it; + } else { + /* if we recently allocated a whole page, return from that */ + assert(p->end_page_ptr != NULL); + ret = p->end_page_ptr; + if (--p->end_page_free != 0) { + p->end_page_ptr = ((caddr_t)p->end_page_ptr) + p->size; + } else { + p->end_page_ptr = 0; + } + } + + if (ret) { + p->requested += size; + MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret); + } else { + MEMCACHED_SLABS_ALLOCATE_FAILED(size, id); + } + + return ret; +} + +static void do_slabs_free(void *ptr, const size_t size, unsigned int id) { + slabclass_t *p; + item *it; + + assert(((item *)ptr)->slabs_clsid == 0); + assert(id >= POWER_SMALLEST && id <= power_largest); + if (id < POWER_SMALLEST || id > power_largest) + return; + + MEMCACHED_SLABS_FREE(size, id, ptr); + p = &slabclass[id]; + +#ifdef USE_SYSTEM_MALLOC + mem_malloced -= size; + free(ptr); + return; +#endif + + it = (item *)ptr; + it->it_flags |= ITEM_SLABBED; + it->prev = 0; + it->next = p->slots; + if (it->next) it->next->prev = it; + p->slots = it; + + p->sl_curr++; + p->requested -= size; + return; +} + +static int nz_strcmp(int nzlength, const char *nz, const char *z) { + int zlength=strlen(z); + return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1; +} + +bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) { + bool ret = true; + + if (add_stats != NULL) { + if (!stat_type) { + /* prepare general statistics for the engine */ + STATS_LOCK(); + APPEND_STAT("bytes", "%llu", (unsigned long long)stats.curr_bytes); + APPEND_STAT("curr_items", "%u", stats.curr_items); + APPEND_STAT("total_items", "%u", stats.total_items); + APPEND_STAT("evictions", "%llu", + (unsigned long long)stats.evictions); + APPEND_STAT("reclaimed", "%llu", + (unsigned long long)stats.reclaimed); + STATS_UNLOCK(); + } else if (nz_strcmp(nkey, stat_type, "items") == 0) { + item_stats(add_stats, c); + } else if (nz_strcmp(nkey, stat_type, "slabs") == 0) { + slabs_stats(add_stats, c); + } else if (nz_strcmp(nkey, stat_type, "sizes") == 0) { + item_stats_sizes(add_stats, c); + } else { + ret = false; + } + } else { + ret = false; + } + + return ret; +} + +/*@null@*/ +static void do_slabs_stats(ADD_STAT add_stats, void *c) { + int i, total; + /* Get the per-thread stats which contain some interesting aggregates */ + struct thread_stats thread_stats; + threadlocal_stats_aggregate(&thread_stats); + + total = 0; + for(i = POWER_SMALLEST; i <= power_largest; i++) { + slabclass_t *p = &slabclass[i]; + if (p->slabs != 0) { + uint32_t perslab, slabs; + slabs = p->slabs; + perslab = p->perslab; + + char key_str[STAT_KEY_LEN]; + char val_str[STAT_VAL_LEN]; + int klen = 0, vlen = 0; + + APPEND_NUM_STAT(i, "chunk_size", "%u", p->size); + APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab); + APPEND_NUM_STAT(i, "total_pages", "%u", slabs); + APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab); + APPEND_NUM_STAT(i, "used_chunks", "%u", + slabs*perslab - p->sl_curr - p->end_page_free); + APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr); + APPEND_NUM_STAT(i, "free_chunks_end", "%u", p->end_page_free); + APPEND_NUM_STAT(i, "mem_requested", "%llu", + (unsigned long long)p->requested); + APPEND_NUM_STAT(i, "get_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].get_hits); + APPEND_NUM_STAT(i, "cmd_set", "%llu", + (unsigned long long)thread_stats.slab_stats[i].set_cmds); + APPEND_NUM_STAT(i, "delete_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].delete_hits); + APPEND_NUM_STAT(i, "incr_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].incr_hits); + APPEND_NUM_STAT(i, "decr_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].decr_hits); + APPEND_NUM_STAT(i, "cas_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].cas_hits); + APPEND_NUM_STAT(i, "cas_badval", "%llu", + (unsigned long long)thread_stats.slab_stats[i].cas_badval); + APPEND_NUM_STAT(i, "touch_hits", "%llu", + (unsigned long long)thread_stats.slab_stats[i].touch_hits); + total++; + } + } + + /* add overall slab stats and append terminator */ + + APPEND_STAT("active_slabs", "%d", total); + APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced); + add_stats(NULL, 0, NULL, 0, c); +} + +static void *memory_allocate(size_t size) { + void *ret; + + if (mem_base == NULL) { + /* We are not using a preallocated large memory chunk */ + ret = malloc(size); + } else { + ret = mem_current; + + if (size > mem_avail) { + return NULL; + } + + /* mem_current pointer _must_ be aligned!!! */ + if (size % CHUNK_ALIGN_BYTES) { + size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES); + } + + mem_current = ((char*)mem_current) + size; + if (size < mem_avail) { + mem_avail -= size; + } else { + mem_avail = 0; + } + } + + return ret; +} + +void *slabs_alloc(size_t size, unsigned int id) { + void *ret; + + pthread_mutex_lock(&slabs_lock); + ret = do_slabs_alloc(size, id); + pthread_mutex_unlock(&slabs_lock); + return ret; +} + +void slabs_free(void *ptr, size_t size, unsigned int id) { + pthread_mutex_lock(&slabs_lock); + do_slabs_free(ptr, size, id); + pthread_mutex_unlock(&slabs_lock); +} + +void slabs_stats(ADD_STAT add_stats, void *c) { + pthread_mutex_lock(&slabs_lock); + do_slabs_stats(add_stats, c); + pthread_mutex_unlock(&slabs_lock); +} + +void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal) +{ + pthread_mutex_lock(&slabs_lock); + slabclass_t *p; + if (id < POWER_SMALLEST || id > power_largest) { + fprintf(stderr, "Internal error! Invalid slab class\n"); + abort(); + } + + p = &slabclass[id]; + p->requested = p->requested - old + ntotal; + pthread_mutex_unlock(&slabs_lock); +} + +static pthread_cond_t maintenance_cond = PTHREAD_COND_INITIALIZER; +static volatile int do_run_slab_thread = 1; + +#define DEFAULT_SLAB_BULK_CHECK 1 +int slab_bulk_check = DEFAULT_SLAB_BULK_CHECK; + +static int slab_rebalance_start(void) { + slabclass_t *s_cls; + slabclass_t *d_cls; + int no_go = 0; + + pthread_mutex_lock(&cache_lock); + pthread_mutex_lock(&slabs_lock); + + if (slab_rebal.s_clsid < POWER_SMALLEST || + slab_rebal.s_clsid > power_largest || + slab_rebal.d_clsid < POWER_SMALLEST || + slab_rebal.d_clsid > power_largest || + slab_rebal.s_clsid == slab_rebal.d_clsid) + no_go = -2; + + s_cls = &slabclass[slab_rebal.s_clsid]; + d_cls = &slabclass[slab_rebal.d_clsid]; + + if (d_cls->end_page_ptr || s_cls->end_page_ptr || + !grow_slab_list(slab_rebal.d_clsid)) { + no_go = -1; + } + + if (s_cls->slabs < 2) + no_go = -3; + + if (no_go != 0) { + pthread_mutex_unlock(&slabs_lock); + pthread_mutex_unlock(&cache_lock); + return no_go; /* Should use a wrapper function... */ + } + + s_cls->killing = 1; + + slab_rebal.slab_start = s_cls->slab_list[s_cls->killing - 1]; + slab_rebal.slab_end = (char *)slab_rebal.slab_start + + (s_cls->size * s_cls->perslab); + slab_rebal.slab_pos = slab_rebal.slab_start; + slab_rebal.done = 0; + + /* Also tells do_item_get to search for items in this slab */ + slab_rebalance_signal = 2; + + if (settings.verbose > 1) { + fprintf(stderr, "Started a slab rebalance\n"); + } + + pthread_mutex_unlock(&slabs_lock); + pthread_mutex_unlock(&cache_lock); + + STATS_LOCK(); + stats.slab_reassign_running = true; + STATS_UNLOCK(); + + return 0; +} + +enum move_status { + MOVE_PASS=0, MOVE_DONE, MOVE_BUSY +}; + +/* refcount == 0 is safe since nobody can incr while cache_lock is held. + * refcount != 0 is impossible since flags/etc can be modified in other + * threads. instead, note we found a busy one and bail. logic in do_item_get + * will prevent busy items from continuing to be busy + */ +static int slab_rebalance_move(void) { + slabclass_t *s_cls; + int x; + int was_busy = 0; + int refcount = 0; + enum move_status status = MOVE_PASS; + + pthread_mutex_lock(&cache_lock); + pthread_mutex_lock(&slabs_lock); + + s_cls = &slabclass[slab_rebal.s_clsid]; + + for (x = 0; x < slab_bulk_check; x++) { + item *it = slab_rebal.slab_pos; + status = MOVE_PASS; + if (it->slabs_clsid != 255) { + refcount = refcount_incr(&it->refcount); + if (refcount == 1) { /* item is unlinked, unused */ + if (it->it_flags & ITEM_SLABBED) { + /* remove from slab freelist */ + if (s_cls->slots == it) { + s_cls->slots = it->next; + } + if (it->next) it->next->prev = it->prev; + if (it->prev) it->prev->next = it->next; + s_cls->sl_curr--; + status = MOVE_DONE; + } else { + status = MOVE_BUSY; + } + } else if (refcount == 2) { /* item is linked but not busy */ + if ((it->it_flags & ITEM_LINKED) != 0) { + do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0)); + status = MOVE_DONE; + } else { + /* refcount == 1 + !ITEM_LINKED means the item is being + * uploaded to, or was just unlinked but hasn't been freed + * yet. Let it bleed off on its own and try again later */ + status = MOVE_BUSY; + } + } else { + if (settings.verbose > 2) { + fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n", + it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid); + } + status = MOVE_BUSY; + } + } + + switch (status) { + case MOVE_DONE: + it->refcount = 0; + it->it_flags = 0; + it->slabs_clsid = 255; + break; + case MOVE_BUSY: + slab_rebal.busy_items++; + was_busy++; + refcount_decr(&it->refcount); + break; + case MOVE_PASS: + break; + default: + assert(false); + abort(); + } + + slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size; + if (slab_rebal.slab_pos >= slab_rebal.slab_end) + break; + } + + if (slab_rebal.slab_pos >= slab_rebal.slab_end) { + /* Some items were busy, start again from the top */ + if (slab_rebal.busy_items) { + slab_rebal.slab_pos = slab_rebal.slab_start; + slab_rebal.busy_items = 0; + } else { + slab_rebal.done++; + } + } + + pthread_mutex_unlock(&slabs_lock); + pthread_mutex_unlock(&cache_lock); + + return was_busy; +} + +static void slab_rebalance_finish(void) { + slabclass_t *s_cls; + slabclass_t *d_cls; + + pthread_mutex_lock(&cache_lock); + pthread_mutex_lock(&slabs_lock); + + s_cls = &slabclass[slab_rebal.s_clsid]; + d_cls = &slabclass[slab_rebal.d_clsid]; + + /* At this point the stolen slab is completely clear */ + s_cls->slab_list[s_cls->killing - 1] = + s_cls->slab_list[s_cls->slabs - 1]; + s_cls->slabs--; + s_cls->killing = 0; + + memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max); + + d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start; + d_cls->end_page_ptr = slab_rebal.slab_start; + d_cls->end_page_free = d_cls->perslab; + + slab_rebal.done = 0; + slab_rebal.s_clsid = 0; + slab_rebal.d_clsid = 0; + slab_rebal.slab_start = NULL; + slab_rebal.slab_end = NULL; + slab_rebal.slab_pos = NULL; + + slab_rebalance_signal = 0; + + pthread_mutex_unlock(&slabs_lock); + pthread_mutex_unlock(&cache_lock); + + STATS_LOCK(); + stats.slab_reassign_running = false; + stats.slabs_moved++; + STATS_UNLOCK(); + + if (settings.verbose > 1) { + fprintf(stderr, "finished a slab move\n"); + } +} + +/* Return 1 means a decision was reached. + * Move to its own thread (created/destroyed as needed) once automover is more + * complex. + */ +static int slab_automove_decision(int *src, int *dst) { + static uint64_t evicted_old[POWER_LARGEST]; + static unsigned int slab_zeroes[POWER_LARGEST]; + static unsigned int slab_winner = 0; + static unsigned int slab_wins = 0; + uint64_t evicted_new[POWER_LARGEST]; + uint64_t evicted_diff = 0; + uint64_t evicted_max = 0; + unsigned int highest_slab = 0; + unsigned int total_pages[POWER_LARGEST]; + int i; + int source = 0; + int dest = 0; + static rel_time_t next_run; + + /* Run less frequently than the slabmove tester. */ + if (current_time >= next_run) { + next_run = current_time + 10; + } else { + return 0; + } + + item_stats_evictions(evicted_new); + pthread_mutex_lock(&cache_lock); + for (i = POWER_SMALLEST; i < power_largest; i++) { + total_pages[i] = slabclass[i].slabs; + } + pthread_mutex_unlock(&cache_lock); + + /* Find a candidate source; something with zero evicts 3+ times */ + for (i = POWER_SMALLEST; i < power_largest; i++) { + evicted_diff = evicted_new[i] - evicted_old[i]; + if (evicted_diff == 0 && total_pages[i] > 2) { + slab_zeroes[i]++; + if (source == 0 && slab_zeroes[i] >= 3) + source = i; + } else { + slab_zeroes[i] = 0; + if (evicted_diff > evicted_max) { + evicted_max = evicted_diff; + highest_slab = i; + } + } + evicted_old[i] = evicted_new[i]; + } + + /* Pick a valid destination */ + if (slab_winner != 0 && slab_winner == highest_slab) { + slab_wins++; + if (slab_wins >= 3) + dest = slab_winner; + } else { + slab_wins = 1; + slab_winner = highest_slab; + } + + if (source && dest) { + *src = source; + *dst = dest; + return 1; + } + return 0; +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +/* Slab rebalancer thread. + * Does not use spinlocks since it is not timing sensitive. Burn less CPU and + * go to sleep if locks are contended + */ +static void *slab_maintenance_thread(void *arg) { + int was_busy = 0; + int src, dest; + + while (do_run_slab_thread) { + if (slab_rebalance_signal == 1) { + if (slab_rebalance_start() < 0) { + /* Handle errors with more specifity as required. */ + slab_rebalance_signal = 0; + } + + } else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) { + /* If we have a decision to continue, continue it */ + was_busy = slab_rebalance_move(); + } else if (settings.slab_automove && slab_automove_decision(&src, &dest) == 1) { + /* Blind to the return codes. It will retry on its own */ + slabs_reassign(src, dest); + } + + if (slab_rebal.done) { + slab_rebalance_finish(); + } + + /* Sleep a bit if no work to do, or waiting on busy objects */ + if (was_busy || !slab_rebalance_signal) + sleep(1); + } + return NULL; +} + +static enum reassign_result_type do_slabs_reassign(int src, int dst) { + if (slab_rebalance_signal != 0) + return REASSIGN_RUNNING; + + if (src == dst) + return REASSIGN_SRC_DST_SAME; + + if (src < POWER_SMALLEST || src > power_largest || + dst < POWER_SMALLEST || dst > power_largest) + return REASSIGN_BADCLASS; + + if (slabclass[src].slabs < 2) + return REASSIGN_NOSPARE; + + if (slabclass[dst].end_page_ptr) + return REASSIGN_DEST_NOT_FULL; + + if (slabclass[src].end_page_ptr) + return REASSIGN_SRC_NOT_SAFE; + + slab_rebal.s_clsid = src; + slab_rebal.d_clsid = dst; + + slab_rebalance_signal = 1; + + return REASSIGN_OK; +} + +enum reassign_result_type slabs_reassign(int src, int dst) { + enum reassign_result_type ret; + mutex_lock(&slabs_lock); + ret = do_slabs_reassign(src, dst); + pthread_mutex_unlock(&slabs_lock); + return ret; +} + +static pthread_t maintenance_tid; + +int start_slab_maintenance_thread(void) { + int ret; + slab_rebalance_signal = 0; + slab_rebal.slab_start = NULL; + char *env = getenv("MEMCACHED_SLAB_BULK_CHECK"); + if (env != NULL) { + slab_bulk_check = atoi(env); + if (slab_bulk_check == 0) { + slab_bulk_check = DEFAULT_SLAB_BULK_CHECK; + } + } + if ((ret = pthread_create(&maintenance_tid, NULL, + slab_maintenance_thread, NULL)) != 0) { + fprintf(stderr, "Can't create thread: %s\n", strerror(ret)); + return -1; + } + return 0; +} + +void stop_slab_maintenance_thread(void) { + mutex_lock(&cache_lock); + do_run_slab_thread = 0; + pthread_cond_signal(&maintenance_cond); + pthread_mutex_unlock(&cache_lock); + + /* Wait for the maintenance thread to stop */ + pthread_join(maintenance_tid, NULL); +} diff --git a/memcached/slabs.h b/memcached/slabs.h new file mode 100644 index 00000000..90e2bd82 --- /dev/null +++ b/memcached/slabs.h @@ -0,0 +1,49 @@ +/* slabs memory allocation */ +#ifndef SLABS_H +#define SLABS_H + +/** Init the subsystem. 1st argument is the limit on no. of bytes to allocate, + 0 if no limit. 2nd argument is the growth factor; each slab will use a chunk + size equal to the previous slab's chunk size times this factor. + 3rd argument specifies if the slab allocator should allocate all memory + up front (if true), or allocate memory in chunks as it is needed (if false) +*/ +void slabs_init(const size_t limit, const double factor, const bool prealloc); + + +/** + * Given object size, return id to use when allocating/freeing memory for object + * 0 means error: can't store such a large object + */ + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wshadow" +#endif +unsigned int slabs_clsid(const size_t size); + +/** Allocate object of given length. 0 on error */ /*@null@*/ +void *slabs_alloc(const size_t size, unsigned int id); + +/** Free previously allocated object */ +void slabs_free(void *ptr, size_t size, unsigned int id); + +/** Adjust the stats for memory requested */ +void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal); + +/** Return a datum for stats in binary protocol */ +bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c); + +/** Fill buffer with stats */ /*@null@*/ +void slabs_stats(ADD_STAT add_stats, void *c); + +int start_slab_maintenance_thread(void); +void stop_slab_maintenance_thread(void); + +enum reassign_result_type { + REASSIGN_OK=0, REASSIGN_RUNNING, REASSIGN_BADCLASS, REASSIGN_NOSPARE, + REASSIGN_DEST_NOT_FULL, REASSIGN_SRC_NOT_SAFE, REASSIGN_SRC_DST_SAME +}; + +enum reassign_result_type slabs_reassign(int src, int dst); + +#endif diff --git a/memcached/solaris_priv.c b/memcached/solaris_priv.c new file mode 100644 index 00000000..349334e4 --- /dev/null +++ b/memcached/solaris_priv.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include "memcached.h" + +/* + * this section of code will drop all (Solaris) privileges including + * those normally granted to all userland process (basic privileges). The + * effect of this is that after running this code, the process will not able + * to fork(), exec(), etc. See privileges(5) for more information. + */ +void drop_privileges(void) { + priv_set_t *privs = priv_str_to_set("basic", ",", NULL); + + if (privs == NULL) { + perror("priv_str_to_set"); + exit(EXIT_FAILURE); + } + + (void)priv_delset(privs, PRIV_FILE_LINK_ANY); + (void)priv_delset(privs, PRIV_PROC_EXEC); + (void)priv_delset(privs, PRIV_PROC_FORK); + (void)priv_delset(privs, PRIV_PROC_INFO); + (void)priv_delset(privs, PRIV_PROC_SESSION); + + if (setppriv(PRIV_SET, PRIV_PERMITTED, privs) != 0) { + perror("setppriv(PRIV_SET, PRIV_PERMITTED)"); + exit(EXIT_FAILURE); + } + + priv_emptyset(privs); + + if (setppriv(PRIV_SET, PRIV_INHERITABLE, privs) != 0) { + perror("setppriv(PRIV_SET, PRIV_INHERITABLE)"); + exit(EXIT_FAILURE); + } + + if (setppriv(PRIV_SET, PRIV_LIMIT, privs) != 0) { + perror("setppriv(PRIV_SET, PRIV_LIMIT)"); + exit(EXIT_FAILURE); + } + + priv_freeset(privs); +} diff --git a/memcached/stats.c b/memcached/stats.c new file mode 100644 index 00000000..5e6b0406 --- /dev/null +++ b/memcached/stats.c @@ -0,0 +1,375 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Detailed statistics management. For simple stats like total number of + * "get" requests, we use inline code in memcached.c and friends, but when + * stats detail mode is activated, the code here records more information. + * + * Author: + * Steven Grimm + */ +#include "memcached.h" +#include +#include +#include +#include + +/* + * Stats are tracked on the basis of key prefixes. This is a simple + * fixed-size hash of prefixes; we run the prefixes through the same + * CRC function used by the cache hashtable. + */ +typedef struct _prefix_stats PREFIX_STATS; +struct _prefix_stats { + char *prefix; + size_t prefix_len; + uint64_t num_gets; + uint64_t num_sets; + uint64_t num_deletes; + uint64_t num_hits; + PREFIX_STATS *next; +}; + +#define PREFIX_HASH_SIZE 256 + +static PREFIX_STATS *prefix_stats[PREFIX_HASH_SIZE]; +static int num_prefixes = 0; +static int total_prefix_size = 0; + +void stats_prefix_init() { + memset(prefix_stats, 0, sizeof(prefix_stats)); +} + +/* + * Cleans up all our previously collected stats. NOTE: the stats lock is + * assumed to be held when this is called. + */ +void stats_prefix_clear() { + int i; + + for (i = 0; i < PREFIX_HASH_SIZE; i++) { + PREFIX_STATS *cur, *next; + for (cur = prefix_stats[i]; cur != NULL; cur = next) { + next = cur->next; + free(cur->prefix); + free(cur); + } + prefix_stats[i] = NULL; + } + num_prefixes = 0; + total_prefix_size = 0; +} + +/* + * Returns the stats structure for a prefix, creating it if it's not already + * in the list. + */ +/*@null@*/ +static PREFIX_STATS *stats_prefix_find(const char *key, const size_t nkey) { + PREFIX_STATS *pfs; + uint32_t hashval; + size_t length; + bool bailout = true; + + assert(key != NULL); + + for (length = 0; length < nkey && key[length] != '\0'; length++) { + if (key[length] == settings.prefix_delimiter) { + bailout = false; + break; + } + } + + if (bailout) { + return NULL; + } + + hashval = hash(key, length, 0) % PREFIX_HASH_SIZE; + + for (pfs = prefix_stats[hashval]; NULL != pfs; pfs = pfs->next) { + if (strncmp(pfs->prefix, key, length) == 0) + return pfs; + } + + pfs = calloc(sizeof(PREFIX_STATS), 1); + if (NULL == pfs) { + perror("Can't allocate space for stats structure: calloc"); + return NULL; + } + + pfs->prefix = malloc(length + 1); + if (NULL == pfs->prefix) { + perror("Can't allocate space for copy of prefix: malloc"); + free(pfs); + return NULL; + } + + strncpy(pfs->prefix, key, length); + pfs->prefix[length] = '\0'; /* because strncpy() sucks */ + pfs->prefix_len = length; + + pfs->next = prefix_stats[hashval]; + prefix_stats[hashval] = pfs; + + num_prefixes++; + total_prefix_size += length; + + return pfs; +} + +/* + * Records a "get" of a key. + */ +void stats_prefix_record_get(const char *key, const size_t nkey, const bool is_hit) { + PREFIX_STATS *pfs; + + STATS_LOCK(); + pfs = stats_prefix_find(key, nkey); + if (NULL != pfs) { + pfs->num_gets++; + if (is_hit) { + pfs->num_hits++; + } + } + STATS_UNLOCK(); +} + +/* + * Records a "delete" of a key. + */ +void stats_prefix_record_delete(const char *key, const size_t nkey) { + PREFIX_STATS *pfs; + + STATS_LOCK(); + pfs = stats_prefix_find(key, nkey); + if (NULL != pfs) { + pfs->num_deletes++; + } + STATS_UNLOCK(); +} + +/* + * Records a "set" of a key. + */ +void stats_prefix_record_set(const char *key, const size_t nkey) { + PREFIX_STATS *pfs; + + STATS_LOCK(); + pfs = stats_prefix_find(key, nkey); + if (NULL != pfs) { + pfs->num_sets++; + } + STATS_UNLOCK(); +} + +/* + * Returns stats in textual form suitable for writing to a client. + */ +/*@null@*/ +char *stats_prefix_dump(int *length) { + const char *format = "PREFIX %s get %llu hit %llu set %llu del %llu\r\n"; + PREFIX_STATS *pfs; + char *buf; + int i, pos; + size_t size = 0, written = 0, total_written = 0; + + /* + * Figure out how big the buffer needs to be. This is the sum of the + * lengths of the prefixes themselves, plus the size of one copy of + * the per-prefix output with 20-digit values for all the counts, + * plus space for the "END" at the end. + */ + STATS_LOCK(); + size = strlen(format) + total_prefix_size + + num_prefixes * (strlen(format) - 2 /* %s */ + + 4 * (20 - 4)) /* %llu replaced by 20-digit num */ + + sizeof("END\r\n"); + buf = malloc(size); + if (NULL == buf) { + perror("Can't allocate stats response: malloc"); + STATS_UNLOCK(); + return NULL; + } + + pos = 0; + for (i = 0; i < PREFIX_HASH_SIZE; i++) { + for (pfs = prefix_stats[i]; NULL != pfs; pfs = pfs->next) { + written = snprintf(buf + pos, size-pos, format, + pfs->prefix, pfs->num_gets, pfs->num_hits, + pfs->num_sets, pfs->num_deletes); + pos += written; + total_written += written; + assert(total_written < size); + } + } + + STATS_UNLOCK(); + memcpy(buf + pos, "END\r\n", 6); + + *length = pos + 5; + return buf; +} + + +#ifdef UNIT_TEST + +/**************************************************************************** + To run unit tests, compile with $(CC) -DUNIT_TEST stats.c assoc.o + (need assoc.o to get the hash() function). +****************************************************************************/ + +struct settings settings; + +static char *current_test = ""; +static int test_count = 0; +static int fail_count = 0; + +static void fail(char *what) { printf("\tFAIL: %s\n", what); fflush(stdout); fail_count++; } +static void test_equals_int(char *what, int a, int b) { test_count++; if (a != b) fail(what); } +static void test_equals_ptr(char *what, void *a, void *b) { test_count++; if (a != b) fail(what); } +static void test_equals_str(char *what, const char *a, const char *b) { test_count++; if (strcmp(a, b)) fail(what); } +static void test_equals_ull(char *what, uint64_t a, uint64_t b) { test_count++; if (a != b) fail(what); } +static void test_notequals_ptr(char *what, void *a, void *b) { test_count++; if (a == b) fail(what); } +static void test_notnull_ptr(char *what, void *a) { test_count++; if (NULL == a) fail(what); } + +static void test_prefix_find() { + PREFIX_STATS *pfs1, *pfs2; + + pfs1 = stats_prefix_find("abc"); + test_notnull_ptr("initial prefix find", pfs1); + test_equals_ull("request counts", 0ULL, + pfs1->num_gets + pfs1->num_sets + pfs1->num_deletes + pfs1->num_hits); + pfs2 = stats_prefix_find("abc"); + test_equals_ptr("find of same prefix", pfs1, pfs2); + pfs2 = stats_prefix_find("abc:"); + test_equals_ptr("find of same prefix, ignoring delimiter", pfs1, pfs2); + pfs2 = stats_prefix_find("abc:d"); + test_equals_ptr("find of same prefix, ignoring extra chars", pfs1, pfs2); + pfs2 = stats_prefix_find("xyz123"); + test_notequals_ptr("find of different prefix", pfs1, pfs2); + pfs2 = stats_prefix_find("ab:"); + test_notequals_ptr("find of shorter prefix", pfs1, pfs2); +} + +static void test_prefix_record_get() { + PREFIX_STATS *pfs; + + stats_prefix_record_get("abc:123", 0); + pfs = stats_prefix_find("abc:123"); + test_equals_ull("get count after get #1", 1, pfs->num_gets); + test_equals_ull("hit count after get #1", 0, pfs->num_hits); + stats_prefix_record_get("abc:456", 0); + test_equals_ull("get count after get #2", 2, pfs->num_gets); + test_equals_ull("hit count after get #2", 0, pfs->num_hits); + stats_prefix_record_get("abc:456", 1); + test_equals_ull("get count after get #3", 3, pfs->num_gets); + test_equals_ull("hit count after get #3", 1, pfs->num_hits); + stats_prefix_record_get("def:", 1); + test_equals_ull("get count after get #4", 3, pfs->num_gets); + test_equals_ull("hit count after get #4", 1, pfs->num_hits); +} + +static void test_prefix_record_delete() { + PREFIX_STATS *pfs; + + stats_prefix_record_delete("abc:123"); + pfs = stats_prefix_find("abc:123"); + test_equals_ull("get count after delete #1", 0, pfs->num_gets); + test_equals_ull("hit count after delete #1", 0, pfs->num_hits); + test_equals_ull("delete count after delete #1", 1, pfs->num_deletes); + test_equals_ull("set count after delete #1", 0, pfs->num_sets); + stats_prefix_record_delete("def:"); + test_equals_ull("delete count after delete #2", 1, pfs->num_deletes); +} + +static void test_prefix_record_set() { + PREFIX_STATS *pfs; + + stats_prefix_record_set("abc:123"); + pfs = stats_prefix_find("abc:123"); + test_equals_ull("get count after set #1", 0, pfs->num_gets); + test_equals_ull("hit count after set #1", 0, pfs->num_hits); + test_equals_ull("delete count after set #1", 0, pfs->num_deletes); + test_equals_ull("set count after set #1", 1, pfs->num_sets); + stats_prefix_record_delete("def:"); + test_equals_ull("set count after set #2", 1, pfs->num_sets); +} + +static void test_prefix_dump() { + int hashval = hash("abc", 3, 0) % PREFIX_HASH_SIZE; + char tmp[500]; + char *expected; + int keynum; + int length; + + test_equals_str("empty stats", "END\r\n", stats_prefix_dump(&length)); + test_equals_int("empty stats length", 5, length); + stats_prefix_record_set("abc:123"); + expected = "PREFIX abc get 0 hit 0 set 1 del 0\r\nEND\r\n"; + test_equals_str("stats after set", expected, stats_prefix_dump(&length)); + test_equals_int("stats length after set", strlen(expected), length); + stats_prefix_record_get("abc:123", 0); + expected = "PREFIX abc get 1 hit 0 set 1 del 0\r\nEND\r\n"; + test_equals_str("stats after get #1", expected, stats_prefix_dump(&length)); + test_equals_int("stats length after get #1", strlen(expected), length); + stats_prefix_record_get("abc:123", 1); + expected = "PREFIX abc get 2 hit 1 set 1 del 0\r\nEND\r\n"; + test_equals_str("stats after get #2", expected, stats_prefix_dump(&length)); + test_equals_int("stats length after get #2", strlen(expected), length); + stats_prefix_record_delete("abc:123"); + expected = "PREFIX abc get 2 hit 1 set 1 del 1\r\nEND\r\n"; + test_equals_str("stats after del #1", expected, stats_prefix_dump(&length)); + test_equals_int("stats length after del #1", strlen(expected), length); + + /* The order of results might change if we switch hash functions. */ + stats_prefix_record_delete("def:123"); + expected = "PREFIX abc get 2 hit 1 set 1 del 1\r\n" + "PREFIX def get 0 hit 0 set 0 del 1\r\n" + "END\r\n"; + test_equals_str("stats after del #2", expected, stats_prefix_dump(&length)); + test_equals_int("stats length after del #2", strlen(expected), length); + + /* Find a key that hashes to the same bucket as "abc" */ + for (keynum = 0; keynum < PREFIX_HASH_SIZE * 100; keynum++) { + snprintf(tmp, sizeof(tmp), "%d", keynum); + if (hashval == hash(tmp, strlen(tmp), 0) % PREFIX_HASH_SIZE) { + break; + } + } + stats_prefix_record_set(tmp); + snprintf(tmp, sizeof(tmp), + "PREFIX %d get 0 hit 0 set 1 del 0\r\n" + "PREFIX abc get 2 hit 1 set 1 del 1\r\n" + "PREFIX def get 0 hit 0 set 0 del 1\r\n" + "END\r\n", keynum); + test_equals_str("stats with two stats in one bucket", + tmp, stats_prefix_dump(&length)); + test_equals_int("stats length with two stats in one bucket", + strlen(tmp), length); +} + +static void run_test(char *what, void (*func)(void)) { + current_test = what; + test_count = fail_count = 0; + puts(what); + fflush(stdout); + + stats_prefix_clear(); + (func)(); + printf("\t%d / %d pass\n", (test_count - fail_count), test_count); +} + +/* In case we're compiled in thread mode */ +void mt_stats_lock() { } +void mt_stats_unlock() { } + +main(int argc, char **argv) { + stats_prefix_init(); + settings.prefix_delimiter = ':'; + run_test("stats_prefix_find", test_prefix_find); + run_test("stats_prefix_record_get", test_prefix_record_get); + run_test("stats_prefix_record_delete", test_prefix_record_delete); + run_test("stats_prefix_record_set", test_prefix_record_set); + run_test("stats_prefix_dump", test_prefix_dump); +} + +#endif diff --git a/memcached/stats.h b/memcached/stats.h new file mode 100644 index 00000000..4a27ae94 --- /dev/null +++ b/memcached/stats.h @@ -0,0 +1,8 @@ +/* stats */ +void stats_prefix_init(void); +void stats_prefix_clear(void); +void stats_prefix_record_get(const char *key, const size_t nkey, const bool is_hit); +void stats_prefix_record_delete(const char *key, const size_t nkey); +void stats_prefix_record_set(const char *key, const size_t nkey); +/*@null@*/ +char *stats_prefix_dump(int *length); diff --git a/memcached/t/00-startup.t b/memcached/t/00-startup.t new file mode 100755 index 00000000..26967d0d --- /dev/null +++ b/memcached/t/00-startup.t @@ -0,0 +1,82 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 18; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +eval { + my $server = new_memcached(); + ok($server, "started the server"); +}; +is($@, '', 'Basic startup works'); + +eval { + my $server = new_memcached("-l fooble"); +}; +ok($@, "Died with illegal -l args"); + +eval { + my $server = new_memcached("-l 127.0.0.1"); +}; +is($@,'', "-l 127.0.0.1 works"); + +eval { + my $server = new_memcached('-C'); + my $stats = mem_stats($server->sock, 'settings'); + is('no', $stats->{'cas_enabled'}); +}; +is($@, '', "-C works"); + +eval { + my $server = new_memcached('-b 8675'); + my $stats = mem_stats($server->sock, 'settings'); + is('8675', $stats->{'tcp_backlog'}); +}; +is($@, '', "-b works"); + +foreach my $val ('auto', 'ascii') { + eval { + my $server = new_memcached("-B $val"); + my $stats = mem_stats($server->sock, 'settings'); + ok($stats->{'binding_protocol'} =~ /$val/, "$val works"); + }; + is($@, '', "$val works"); +} + +# For the binary test, we just verify it starts since we don't have an easy bin client. +eval { + my $server = new_memcached("-B binary"); +}; +is($@, '', "binary works"); + +eval { + my $server = new_memcached("-vv -B auto"); +}; +is($@, '', "auto works"); + +eval { + my $server = new_memcached("-vv -B ascii"); +}; +is($@, '', "ascii works"); + + +# For the binary test, we just verify it starts since we don't have an easy bin client. +eval { + my $server = new_memcached("-vv -B binary"); +}; +is($@, '', "binary works"); + + +# Should blow up with something invalid. +eval { + my $server = new_memcached("-B http"); +}; +ok($@, "Died with illegal -B arg."); + +# Should not allow -t 0 +eval { + my $server = new_memcached("-t 0"); +}; +ok($@, "Died with illegal 0 thread count"); diff --git a/memcached/t/64bit.t b/memcached/t/64bit.t new file mode 100755 index 00000000..425ff8aa --- /dev/null +++ b/memcached/t/64bit.t @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +use strict; +use Test::More; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +$ENV{T_MEMD_INITIAL_MALLOC} = "4294967328"; # 2**32 + 32 , just over 4GB +$ENV{T_MEMD_SLABS_ALLOC} = 0; # don't preallocate slabs + +my $server = new_memcached("-m 4098 -M"); +my $sock = $server->sock; + +my ($stats, $slabs) = @_; + +$stats = mem_stats($sock); + +if ($stats->{'pointer_size'} eq "32") { + plan skip_all => 'Skipping 64-bit tests on 32-bit build'; + exit 0; +} else { + plan tests => 6; +} + +is($stats->{'pointer_size'}, 64, "is 64 bit"); +is($stats->{'limit_maxbytes'}, "4297064448", "max bytes is 4098 MB"); + +$slabs = mem_stats($sock, 'slabs'); +is($slabs->{'total_malloced'}, "4294967328", "expected (faked) value of total_malloced"); +is($slabs->{'active_slabs'}, 0, "no active slabs"); + +my $hit_limit = 0; +for (1..5) { + my $size = 400 * 1024; + my $data = "a" x $size; + print $sock "set big$_ 0 0 $size\r\n$data\r\n"; + my $res = <$sock>; + $hit_limit = 1 if $res ne "STORED\r\n"; +} +ok($hit_limit, "hit size limit"); + +$slabs = mem_stats($sock, 'slabs'); +is($slabs->{'active_slabs'}, 1, "1 active slab"); diff --git a/memcached/t/binary-get.t b/memcached/t/binary-get.t new file mode 100755 index 00000000..f57a8a34 --- /dev/null +++ b/memcached/t/binary-get.t @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 8; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +my $count = 1; + +foreach my $blob ("mooo\0", "mumble\0\0\0\0\r\rblarg", "\0", "\r") { + my $key = "foo$count"; + my $len = length($blob); + print "len is $len\n"; + print $sock "set $key 0 0 $len\r\n$blob\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored $key"); + mem_get_is($sock, $key, $blob); + $count++; +} + diff --git a/memcached/t/binary-sasl.t b/memcached/t/binary-sasl.t new file mode 100755 index 00000000..69a05c28 --- /dev/null +++ b/memcached/t/binary-sasl.t @@ -0,0 +1,632 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Cwd; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $supports_sasl = supports_sasl(); + +use Test::More; + +if (supports_sasl()) { + if ($ENV{'RUN_SASL_TESTS'}) { + plan tests => 25; + } else { + plan skip_all => 'Skipping SASL tests'; + exit 0; + } +} else { + plan tests => 1; + eval { + my $server = new_memcached("-S"); + }; + ok($@, "Died with illegal -S args when SASL is not supported."); + exit 0; +} + +eval { + my $server = new_memcached("-S -B auto"); +}; +ok($@, "SASL shouldn't be used with protocol auto negotiate"); + +eval { + my $server = new_memcached("-S -B ascii"); +}; +ok($@, "SASL isn't implemented in the ascii protocol"); + +eval { + my $server = new_memcached("-S -B binary -B ascii"); +}; +ok($@, "SASL isn't implemented in the ascii protocol"); + +# Based almost 100% off testClient.py which is: +# Copyright (c) 2007 Dustin Sallings + +# Command constants +use constant CMD_GET => 0x00; +use constant CMD_SET => 0x01; +use constant CMD_ADD => 0x02; +use constant CMD_REPLACE => 0x03; +use constant CMD_DELETE => 0x04; +use constant CMD_INCR => 0x05; +use constant CMD_DECR => 0x06; +use constant CMD_QUIT => 0x07; +use constant CMD_FLUSH => 0x08; +use constant CMD_GETQ => 0x09; +use constant CMD_NOOP => 0x0A; +use constant CMD_VERSION => 0x0B; +use constant CMD_GETK => 0x0C; +use constant CMD_GETKQ => 0x0D; +use constant CMD_APPEND => 0x0E; +use constant CMD_PREPEND => 0x0F; +use constant CMD_STAT => 0x10; +use constant CMD_SETQ => 0x11; +use constant CMD_ADDQ => 0x12; +use constant CMD_REPLACEQ => 0x13; +use constant CMD_DELETEQ => 0x14; +use constant CMD_INCREMENTQ => 0x15; +use constant CMD_DECREMENTQ => 0x16; +use constant CMD_QUITQ => 0x17; +use constant CMD_FLUSHQ => 0x18; +use constant CMD_APPENDQ => 0x19; +use constant CMD_PREPENDQ => 0x1A; + +use constant CMD_SASL_LIST_MECHS => 0x20; +use constant CMD_SASL_AUTH => 0x21; +use constant CMD_SASL_STEP => 0x22; +use constant ERR_AUTH_ERROR => 0x20; + + +# REQ and RES formats are divided even though they currently share +# the same format, since they _could_ differ in the future. +use constant REQ_PKT_FMT => "CCnCCnNNNN"; +use constant RES_PKT_FMT => "CCnCCnNNNN"; +use constant INCRDECR_PKT_FMT => "NNNNN"; +use constant MIN_RECV_BYTES => length(pack(RES_PKT_FMT)); +use constant REQ_MAGIC => 0x80; +use constant RES_MAGIC => 0x81; + +my $pwd=getcwd; +$ENV{'SASL_CONF_PATH'} = "$pwd/t/sasl"; + +my $server = new_memcached('-B binary -S '); + +my $mc = MC::Client->new; + +my $check = sub { + my ($key, $orig_val) = @_; + my ($status, $val, $cas) = $mc->get($key); + + if ($val =~ /^\d+$/) { + cmp_ok($val,'==', $orig_val, "$val = $orig_val"); + } + else { + cmp_ok($val, 'eq', $orig_val, "$val = $orig_val"); + } +}; + +my $set = sub { + my ($key, $orig_value, $exp) = @_; + $exp = defined $exp ? $exp : 0; + my ($status, $rv)= $mc->set($key, $orig_value, $exp); + $check->($key, $orig_value); +}; + +my $empty = sub { + my $key = shift; + my ($status,$rv) =()= eval { $mc->get($key) }; + #if ($status == ERR_AUTH_ERROR) { + # ok($@->auth_error, "Not authorized to connect"); + #} + #else { + # ok($@->not_found, "We got a not found error when we expected one"); + #} + if ($status) { + ok($@->not_found, "We got a not found error when we expected one"); + } +}; + +my $delete = sub { + my ($key, $when) = @_; + $mc->delete($key, $when); + $empty->($key); +}; + +# BEGIN THE TEST +ok($server, "started the server"); + +my $v = $mc->version; +ok(defined $v && length($v), "Proper version: $v"); + +# list mechs +my $mechs= $mc->list_mechs(); +Test::More::cmp_ok($mechs, 'eq', 'CRAM-MD5 PLAIN', "list_mechs $mechs"); + +# this should fail, not authenticated +{ + my ($status, $val)= $mc->set('x', "somevalue"); + ok($status, "this fails to authenticate"); + cmp_ok($status,'==',ERR_AUTH_ERROR, "error code matches"); +} +$empty->('x'); +{ + my $mc = MC::Client->new; + my ($status, $val) = $mc->delete('x'); + ok($status, "this fails to authenticate"); + cmp_ok($status,'==',ERR_AUTH_ERROR, "error code matches"); +} +$empty->('x'); +{ + my $mc = MC::Client->new; + my ($status, $val)= $mc->set('x', "somevalue"); + ok($status, "this fails to authenticate"); + cmp_ok($status,'==',ERR_AUTH_ERROR, "error code matches"); +} +$empty->('x'); +{ + my $mc = MC::Client->new; + my ($status, $val)= $mc->flush('x'); + ok($status, "this fails to authenticate"); + cmp_ok($status,'==',ERR_AUTH_ERROR, "error code matches"); +} +$empty->('x'); + +# Build the auth DB for testing. +my $sasldb = '/tmp/test-memcached.sasldb'; +unlink $sasldb; + +my $saslpasswd_path; +for my $dir (split(/:/, $ENV{PATH}), + "/usr/bin", + "/usr/sbin", + "/usr/local/bin", + "/usr/local/sbin", + ) { + my $exe = $dir . '/saslpasswd2'; + if (-x $exe) { + $saslpasswd_path = $exe; + last; + } +} + +system("echo testpass | $saslpasswd_path -a memcached -c -p testuser"); + +$mc = MC::Client->new; + +# Attempt a bad auth mech. +is ($mc->authenticate('testuser', 'testpass', "X" x 40), 0x4, "bad mech"); + +# Attempt bad authentication. +is ($mc->authenticate('testuser', 'wrongpassword'), 0x20, "bad auth"); + +# Now try good authentication and make the tests work. +is ($mc->authenticate('testuser', 'testpass'), 0, "authenticated"); +# these should work +{ + my ($status, $val)= $mc->set('x', "somevalue"); + ok(! $status); +} +$check->('x','somevalue'); + +{ + my ($status, $val)= $mc->delete('x'); + ok(! $status); +} +$empty->('x'); + +{ + my ($status, $val)= $mc->set('x', "somevalue"); + ok(! $status); +} +$check->('x','somevalue'); + +{ + my ($status, $val)= $mc->flush('x'); + ok(! $status); +} +$empty->('x'); + +# check the SASL stats, make sure they track things correctly +# note: the enabled or not is presence checked in stats.t + +# while authenticated, get current counter +# +# My initial approach was going to be to get current counts, reauthenticate +# and fail, followed by a reauth successfully so I'd know what happened. +# Reauthentication is currently unsupported, so it doesn't work that way at the +# moment. Adding tests may break this. + +{ + my %stats = $mc->stats(''); + is ($stats{'auth_cmds'}, 2, "auth commands counted"); + is ($stats{'auth_errors'}, 1, "auth errors correct"); +} + + +# Along with the assertion added to the code to verify we're staying +# within bounds when we do a stats detail dump (detail turned on at +# the top). +# my %stats = $mc->stats('detail dump'); + +# ###################################################################### +# Test ends around here. +# ###################################################################### + +package MC::Client; + +use strict; +use warnings; +use fields qw(socket); +use IO::Socket::INET; + +use constant ERR_AUTH_ERROR => 0x20; + +sub new { + my $self = shift; + my ($s) = @_; + $s = $server unless defined $s; + my $sock = $s->sock; + $self = fields::new($self); + $self->{socket} = $sock; + return $self; +} + +sub authenticate { + my ($self, $user, $pass, $mech)= @_; + $mech ||= 'PLAIN'; + my $buf = sprintf("%c%s%c%s", 0, $user, 0, $pass); + my ($status, $rv, undef) = $self->_do_command(::CMD_SASL_AUTH, $mech, $buf, ''); + return $status; +} +sub list_mechs { + my ($self)= @_; + my ($status, $rv, undef) = $self->_do_command(::CMD_SASL_LIST_MECHS, '', '', ''); + return join(" ", sort(split(/\s+/, $rv))); +} + +sub build_command { + my $self = shift; + die "Not enough args to send_command" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + $extra_header = '' unless defined $extra_header; + my $keylen = length($key); + my $vallen = length($val); + my $extralen = length($extra_header); + my $datatype = 0; # field for future use + my $reserved = 0; # field for future use + my $totallen = $keylen + $vallen + $extralen; + my $ident_hi = 0; + my $ident_lo = 0; + + if ($cas) { + $ident_hi = int($cas / 2 ** 32); + $ident_lo = int($cas % 2 ** 32); + } + + my $msg = pack(::REQ_PKT_FMT, ::REQ_MAGIC, $cmd, $keylen, $extralen, + $datatype, $reserved, $totallen, $opaque, $ident_hi, + $ident_lo); + my $full_msg = $msg . $extra_header . $key . $val; + return $full_msg; +} + +sub send_command { + my $self = shift; + die "Not enough args to send_command" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + my $full_msg = $self->build_command($cmd, $key, $val, $opaque, $extra_header, $cas); + + my $sent = $self->{socket}->send($full_msg); + die("Send failed: $!") unless $sent; + if($sent != length($full_msg)) { + die("only sent $sent of " . length($full_msg) . " bytes"); + } +} + +sub flush_socket { + my $self = shift; + $self->{socket}->flush; +} + +# Send a silent command and ensure it doesn't respond. +sub send_silent { + my $self = shift; + die "Not enough args to send_silent" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + $self->send_command($cmd, $key, $val, $opaque, $extra_header, $cas); + $self->send_command(::CMD_NOOP, '', '', $opaque + 1); + + my ($ropaque, $status, $data) = $self->_handle_single_response; + Test::More::is($ropaque, $opaque + 1); +} + +sub silent_mutation { + my $self = shift; + my ($cmd, $key, $value) = @_; + + $empty->($key); + my $extra = pack "NN", 82, 0; + $mc->send_silent($cmd, $key, $value, 7278552, $extra, 0); + $check->($key, $value); +} + +sub _handle_single_response { + my $self = shift; + my $myopaque = shift; + + $self->{socket}->recv(my $response, ::MIN_RECV_BYTES); + + my ($magic, $cmd, $keylen, $extralen, $datatype, $status, $remaining, + $opaque, $ident_hi, $ident_lo) = unpack(::RES_PKT_FMT, $response); + + return ($opaque, '', '', '', 0) if not defined $remaining; + return ($opaque, '', '', '', 0) if ($remaining == 0); + + # fetch the value + my $rv=""; + while($remaining - length($rv) > 0) { + $self->{socket}->recv(my $buf, $remaining - length($rv)); + $rv .= $buf; + } + if(length($rv) != $remaining) { + my $found = length($rv); + die("Expected $remaining bytes, got $found"); + } + + my $cas = ($ident_hi * 2 ** 32) + $ident_lo; + + #if ($status) { + #die MC::Error->new($status, $rv); + #} + + return ($opaque, $status, $rv, $cas, $keylen); +} + +sub _do_command { + my $self = shift; + die unless @_ >= 3; + my ($cmd, $key, $val, $extra_header, $cas) = @_; + + $extra_header = '' unless defined $extra_header; + my $opaque = int(rand(2**32)); + $self->send_command($cmd, $key, $val, $opaque, $extra_header, $cas); + my (undef, $status, $rv, $rcas) = $self->_handle_single_response($opaque); + return ($status, $rv, $rcas); +} + +sub _incrdecr_header { + my $self = shift; + my ($amt, $init, $exp) = @_; + + my $amt_hi = int($amt / 2 ** 32); + my $amt_lo = int($amt % 2 ** 32); + + my $init_hi = int($init / 2 ** 32); + my $init_lo = int($init % 2 ** 32); + + my $extra_header = pack(::INCRDECR_PKT_FMT, $amt_hi, $amt_lo, $init_hi, + $init_lo, $exp); + + return $extra_header; +} + +sub _incrdecr { + my $self = shift; + my ($cmd, $key, $amt, $init, $exp) = @_; + + my ($status, $data, undef) = $self->_do_command($cmd, $key, '', + $self->_incrdecr_header($amt, $init, $exp)); + + my $header = substr $data, 0, 8, ''; + my ($resp_hi, $resp_lo) = unpack "NN", $header; + my $resp = ($resp_hi * 2 ** 32) + $resp_lo; + + return $resp; +} + +sub silent_incrdecr { + my $self = shift; + my ($cmd, $key, $amt, $init, $exp) = @_; + my $opaque = 8275753; + + $mc->send_silent($cmd, $key, '', $opaque, + $mc->_incrdecr_header($amt, $init, $exp)); +} + +sub stats { + my $self = shift; + my $key = shift; + my $cas = 0; + my $opaque = int(rand(2**32)); + $self->send_command(::CMD_STAT, $key, '', $opaque, '', $cas); + + my %rv = (); + my $found_key = ''; + my $found_val = ''; + my $status= 0; + do { + my ($op, $status, $data, $cas, $keylen) = $self->_handle_single_response($opaque); + if ($keylen > 0) { + $found_key = substr($data, 0, $keylen); + $found_val = substr($data, $keylen); + $rv{$found_key} = $found_val; + } else { + $found_key = ''; + } + } while($found_key ne ''); + return %rv; +} + +sub get { + my $self = shift; + my $key = shift; + my ($status, $rv, $cas) = $self->_do_command(::CMD_GET, $key, '', ''); + + my $header = substr $rv, 0, 4, ''; + my $flags = unpack("N", $header); + + return ($status, $rv); +} + +sub get_multi { + my $self = shift; + my @keys = @_; + + for (my $i = 0; $i < @keys; $i++) { + $self->send_command(::CMD_GETQ, $keys[$i], '', $i, '', 0); + } + + my $terminal = @keys + 10; + $self->send_command(::CMD_NOOP, '', '', $terminal); + + my %return; + my $status = 0; + while (1) { + my ($opaque, $status, $data) = $self->_handle_single_response; + last if $opaque == $terminal; + + my $header = substr $data, 0, 4, ''; + my $flags = unpack("N", $header); + + $return{$keys[$opaque]} = [$flags, $data]; + } + + return %return if wantarray; + return \%return; +} + +sub version { + my $self = shift; + return $self->_do_command(::CMD_VERSION, '', ''); +} + +sub flush { + my $self = shift; + return $self->_do_command(::CMD_FLUSH, '', ''); +} + +sub add { + my $self = shift; + my ($key, $val, $flags, $expire) = @_; + my $extra_header = pack "NN", $flags, $expire; + my $cas = 0; + return $self->_do_command(::CMD_ADD, $key, $val, $extra_header, $cas); +} + +sub set { + my $self = shift; + my $flags = 0; + my $cas = 0; + my ($key, $val, $expire) = @_; + $expire = defined $expire ? $expire : 0; + my $extra_header = pack "NN", $flags, $expire; + return $self->_do_command(::CMD_SET, $key, $val, $extra_header, $cas); +} + +sub _append_prepend { + my $self = shift; + my ($cmd, $key, $val, $cas) = @_; + return $self->_do_command($cmd, $key, $val, '', $cas); +} + +sub replace { + my $self = shift; + my ($key, $val, $flags, $expire) = @_; + my $extra_header = pack "NN", $flags, $expire; + my $cas = 0; + return $self->_do_command(::CMD_REPLACE, $key, $val, $extra_header, $cas); +} + +sub delete { + my $self = shift; + my ($key) = @_; + return $self->_do_command(::CMD_DELETE, $key, ''); +} + +sub incr { + my $self = shift; + my ($key, $amt, $init, $exp) = @_; + $amt = 1 unless defined $amt; + $init = 0 unless defined $init; + $exp = 0 unless defined $exp; + + return $self->_incrdecr(::CMD_INCR, $key, $amt, $init, $exp); +} + +sub decr { + my $self = shift; + my ($key, $amt, $init, $exp) = @_; + $amt = 1 unless defined $amt; + $init = 0 unless defined $init; + $exp = 0 unless defined $exp; + + return $self->_incrdecr(::CMD_DECR, $key, $amt, $init, $exp); +} + +sub noop { + my $self = shift; + return $self->_do_command(::CMD_NOOP, '', ''); +} + +package MC::Error; + +use strict; +use warnings; + +use constant ERR_UNKNOWN_CMD => 0x81; +use constant ERR_NOT_FOUND => 0x1; +use constant ERR_EXISTS => 0x2; +use constant ERR_TOO_BIG => 0x3; +use constant ERR_EINVAL => 0x4; +use constant ERR_NOT_STORED => 0x5; +use constant ERR_DELTA_BADVAL => 0x6; +use constant ERR_AUTH_ERROR => 0x20; + +use overload '""' => sub { + my $self = shift; + return "Memcache Error ($self->[0]): $self->[1]"; +}; + +sub new { + my $class = shift; + my $error = [@_]; + my $self = bless $error, (ref $class || $class); + + return $self; +} + +sub not_found { + my $self = shift; + return $self->[0] == ERR_NOT_FOUND; +} + +sub exists { + my $self = shift; + return $self->[0] == ERR_EXISTS; +} + +sub too_big { + my $self = shift; + return $self->[0] == ERR_TOO_BIG; +} + +sub delta_badval { + my $self = shift; + return $self->[0] == ERR_DELTA_BADVAL; +} + +sub auth_error { + my $self = shift; + return $self->[0] == ERR_AUTH_ERROR; +} + +unlink $sasldb; + +# vim: filetype=perl + diff --git a/memcached/t/binary.t b/memcached/t/binary.t new file mode 100755 index 00000000..504ddef1 --- /dev/null +++ b/memcached/t/binary.t @@ -0,0 +1,885 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Test::More tests => 3539; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +ok($server, "started the server"); + +# Based almost 100% off testClient.py which is: +# Copyright (c) 2007 Dustin Sallings + +# Command constants +use constant CMD_GET => 0x00; +use constant CMD_SET => 0x01; +use constant CMD_ADD => 0x02; +use constant CMD_REPLACE => 0x03; +use constant CMD_DELETE => 0x04; +use constant CMD_INCR => 0x05; +use constant CMD_DECR => 0x06; +use constant CMD_QUIT => 0x07; +use constant CMD_FLUSH => 0x08; +use constant CMD_GETQ => 0x09; +use constant CMD_NOOP => 0x0A; +use constant CMD_VERSION => 0x0B; +use constant CMD_GETK => 0x0C; +use constant CMD_GETKQ => 0x0D; +use constant CMD_APPEND => 0x0E; +use constant CMD_PREPEND => 0x0F; +use constant CMD_STAT => 0x10; +use constant CMD_SETQ => 0x11; +use constant CMD_ADDQ => 0x12; +use constant CMD_REPLACEQ => 0x13; +use constant CMD_DELETEQ => 0x14; +use constant CMD_INCREMENTQ => 0x15; +use constant CMD_DECREMENTQ => 0x16; +use constant CMD_QUITQ => 0x17; +use constant CMD_FLUSHQ => 0x18; +use constant CMD_APPENDQ => 0x19; +use constant CMD_PREPENDQ => 0x1A; +use constant CMD_TOUCH => 0x1C; +use constant CMD_GAT => 0x1D; +use constant CMD_GATQ => 0x1E; +use constant CMD_GATK => 0x23; +use constant CMD_GATKQ => 0x24; + +# REQ and RES formats are divided even though they currently share +# the same format, since they _could_ differ in the future. +use constant REQ_PKT_FMT => "CCnCCnNNNN"; +use constant RES_PKT_FMT => "CCnCCnNNNN"; +use constant INCRDECR_PKT_FMT => "NNNNN"; +use constant MIN_RECV_BYTES => length(pack(RES_PKT_FMT)); +use constant REQ_MAGIC => 0x80; +use constant RES_MAGIC => 0x81; + +my $mc = MC::Client->new; + +# Let's turn on detail stats for all this stuff + +$mc->stats('detail on'); + +my $check = sub { + my ($key, $orig_flags, $orig_val) = @_; + my ($flags, $val, $cas) = $mc->get($key); + is($flags, $orig_flags, "Flags is set properly"); + ok($val eq $orig_val || $val == $orig_val, $val . " = " . $orig_val); +}; + +my $set = sub { + my ($key, $exp, $orig_flags, $orig_value) = @_; + $mc->set($key, $orig_value, $orig_flags, $exp); + $check->($key, $orig_flags, $orig_value); +}; + +my $empty = sub { + my $key = shift; + my $rv =()= eval { $mc->get($key) }; + is($rv, 0, "Didn't get a result from get"); + ok($@->not_found, "We got a not found error when we expected one"); +}; + +my $delete = sub { + my ($key, $when) = @_; + $mc->delete($key, $when); + $empty->($key); +}; + +# diag "Test Version"; +my $v = $mc->version; +ok(defined $v && length($v), "Proper version: $v"); + +# Bug 71 +{ + my %stats1 = $mc->stats(''); + $mc->flush; + my %stats2 = $mc->stats(''); + + is($stats2{'cmd_flush'}, $stats1{'cmd_flush'} + 1, + "Stats not updated on a binary flush"); +} + +# diag "Flushing..."; +$mc->flush; + +# diag "Noop"; +$mc->noop; + +# diag "Simple set/get"; +$set->('x', 5, 19, "somevalue"); + +# diag "Delete"; +$delete->('x'); + +# diag "Flush"; +$set->('x', 5, 19, "somevaluex"); +$set->('y', 5, 17, "somevaluey"); +$mc->flush; +$empty->('x'); +$empty->('y'); + +{ + # diag "Add"; + $empty->('i'); + $mc->add('i', 'ex', 5, 10); + $check->('i', 5, "ex"); + + my $rv =()= eval { $mc->add('i', "ex2", 10, 5) }; + is($rv, 0, "Add didn't return anything"); + ok($@->exists, "Expected exists error received"); + $check->('i', 5, "ex"); +} + +{ + # diag "Too big."; + $empty->('toobig'); + $mc->set('toobig', 'not too big', 10, 10); + eval { + my $bigval = ("x" x (1024*1024)) . "x"; + $mc->set('toobig', $bigval, 10, 10); + }; + ok($@->too_big, "Was too big"); + $empty->('toobig'); +} + +{ + # diag "Replace"; + $empty->('j'); + + my $rv =()= eval { $mc->replace('j', "ex", 19, 5) }; + is($rv, 0, "Replace didn't return anything"); + ok($@->not_found, "Expected not_found error received"); + $empty->('j'); + $mc->add('j', "ex2", 14, 5); + $check->('j', 14, "ex2"); + $mc->replace('j', "ex3", 24, 5); + $check->('j', 24, "ex3"); +} + +{ + # diag "MultiGet"; + $mc->add('xx', "ex", 1, 5); + $mc->add('wye', "why", 2, 5); + my $rv = $mc->get_multi(qw(xx wye zed)); + + # CAS is returned with all gets. + $rv->{xx}->[2] = 0; + $rv->{wye}->[2] = 0; + is_deeply($rv->{xx}, [1, 'ex', 0], "X is correct"); + is_deeply($rv->{wye}, [2, 'why', 0], "Y is correct"); + is(keys(%$rv), 2, "Got only two answers like we expect"); +} + +# diag "Test increment"; +$mc->flush; +is($mc->incr("x"), 0, "First incr call is zero"); +is($mc->incr("x"), 1, "Second incr call is one"); +is($mc->incr("x", 211), 212, "Adding 211 gives you 212"); +is($mc->incr("x", 2**33), 8589934804, "Blast the 32bit border"); + +# diag "Issue 48 - incrementing plain text."; +{ + $mc->set("issue48", "text", 0, 0); + my $rv =()= eval { $mc->incr('issue48'); }; + ok($@ && $@->delta_badval, "Expected invalid value when incrementing text."); + $check->('issue48', 0, "text"); + + $rv =()= eval { $mc->decr('issue48'); }; + ok($@ && $@->delta_badval, "Expected invalid value when decrementing text."); + $check->('issue48', 0, "text"); +} + + +# diag "Test decrement"; +$mc->flush; +is($mc->incr("x", undef, 5), 5, "Initial value"); +is($mc->decr("x"), 4, "Decrease by one"); +is($mc->decr("x", 211), 0, "Floor is zero"); + +{ + # diag "bug220 + my ($rv, $cas) = $mc->set("bug220", "100", 0, 0); + my ($irv, $icas) = $mc->incr_cas("bug220", 999); + ok($icas != $cas); + is($irv, 1099, "Incr amount failed"); + my ($flags, $val, $gcas) = $mc->get("bug220"); + is($gcas, $icas, "CAS didn't match after incr/gets"); + + ($irv, $icas) = $mc->incr_cas("bug220", 999); + ok($icas != $cas); + is($irv, 2098, "Incr amount failed"); + ($flags, $val, $gcas) = $mc->get("bug220"); + is($gcas, $icas, "CAS didn't match after incr/gets"); +} + +{ + # diag "bug21"; + $mc->add("bug21", "9223372036854775807", 0, 0); + is($mc->incr("bug21"), 9223372036854775808, "First incr for bug21."); + is($mc->incr("bug21"), 9223372036854775809, "Second incr for bug21."); + is($mc->decr("bug21"), 9223372036854775808, "Decr for bug21."); +} + +{ + # diag "CAS"; + $mc->flush; + + { + my $rv =()= eval { $mc->set("x", "bad value", 19, 5, 0x7FFFFFF) }; + is($rv, 0, "Empty return on expected failure"); + ok($@->not_found, "Error was 'not found' as expected"); + } + + my ($r, $rcas) = $mc->add("x", "original value", 5, 19); + + my ($flags, $val, $i) = $mc->get("x"); + is($val, "original value", "->gets returned proper value"); + is($rcas, $i, "Add CAS matched."); + + { + my $rv =()= eval { $mc->set("x", "broken value", 19, 5, $i+1) }; + is($rv, 0, "Empty return on expected failure (1)"); + ok($@->exists, "Expected error state of 'exists' (1)"); + } + + ($r, $rcas) = $mc->set("x", "new value", 19, 5, $i); + + my ($newflags, $newval, $newi) = $mc->get("x"); + is($newval, "new value", "CAS properly overwrote value"); + is($rcas, $newi, "Get CAS matched."); + + { + my $rv =()= eval { $mc->set("x", "replay value", 19, 5, $i) }; + is($rv, 0, "Empty return on expected failure (2)"); + ok($@->exists, "Expected error state of 'exists' (2)"); + } +} + +# diag "Touch commands"; +{ + $mc->flush; + $mc->set("totouch", "toast", 0, 1); + my $res = $mc->touch("totouch", 10); + sleep 2; + $check->("totouch", 0, "toast"); + + $mc->set("totouch", "toast2", 0, 1); + my ($flags, $val, $i) = $mc->gat("totouch", 10); + is($val, "toast2", "GAT returned correct value"); + sleep 2; + $check->("totouch", 0, "toast2"); + + # Test miss as well +} + +# diag "Silent set."; +$mc->silent_mutation(::CMD_SETQ, 'silentset', 'silentsetval'); + +# diag "Silent add."; +$mc->silent_mutation(::CMD_ADDQ, 'silentadd', 'silentaddval'); + +# diag "Silent replace."; +{ + my $key = "silentreplace"; + my $extra = pack "NN", 829, 0; + $empty->($key); + # $mc->send_silent(::CMD_REPLACEQ, $key, 'somevalue', 7278552, $extra, 0); + # $empty->($key); + + $mc->add($key, "xval", 831, 0); + $check->($key, 831, 'xval'); + + $mc->send_silent(::CMD_REPLACEQ, $key, 'somevalue', 7278552, $extra, 0); + $check->($key, 829, 'somevalue'); +} + +# diag "Silent delete"; +{ + my $key = "silentdelete"; + $empty->($key); + $mc->set($key, "some val", 19, 0); + $mc->send_silent(::CMD_DELETEQ, $key, '', 772); + $empty->($key); +} + +# diag "Silent increment"; +{ + my $key = "silentincr"; + my $opaque = 98428747; + $empty->($key); + $mc->silent_incrdecr(::CMD_INCREMENTQ, $key, 0, 0, 0); + is($mc->incr($key, 0), 0, "First call is 0"); + + $mc->silent_incrdecr(::CMD_INCREMENTQ, $key, 8, 0, 0); + is($mc->incr($key, 0), 8); +} + +# diag "Silent decrement"; +{ + my $key = "silentdecr"; + my $opaque = 98428147; + $empty->($key); + $mc->silent_incrdecr(::CMD_DECREMENTQ, $key, 0, 185, 0); + is($mc->incr($key, 0), 185); + + $mc->silent_incrdecr(::CMD_DECREMENTQ, $key, 8, 0, 0); + is($mc->incr($key, 0), 177); +} + +# diag "Silent flush"; +{ + my %stats1 = $mc->stats(''); + + $set->('x', 5, 19, "somevaluex"); + $set->('y', 5, 17, "somevaluey"); + $mc->send_silent(::CMD_FLUSHQ, '', '', 2775256); + $empty->('x'); + $empty->('y'); + + my %stats2 = $mc->stats(''); + is($stats2{'cmd_flush'}, $stats1{'cmd_flush'} + 1, + "Stats not updated on a binary quiet flush"); +} + +# diag "Append"; +{ + my $key = "appendkey"; + my $value = "some value"; + $set->($key, 8, 19, $value); + $mc->_append_prepend(::CMD_APPEND, $key, " more"); + $check->($key, 19, $value . " more"); +} + +# diag "Prepend"; +{ + my $key = "prependkey"; + my $value = "some value"; + $set->($key, 8, 19, $value); + $mc->_append_prepend(::CMD_PREPEND, $key, "prefixed "); + $check->($key, 19, "prefixed " . $value); +} + +# diag "Silent append"; +{ + my $key = "appendqkey"; + my $value = "some value"; + $set->($key, 8, 19, $value); + $mc->send_silent(::CMD_APPENDQ, $key, " more", 7284492); + $check->($key, 19, $value . " more"); +} + +# diag "Silent prepend"; +{ + my $key = "prependqkey"; + my $value = "some value"; + $set->($key, 8, 19, $value); + $mc->send_silent(::CMD_PREPENDQ, $key, "prefixed ", 7284492); + $check->($key, 19, "prefixed " . $value); +} + +# diag "Leaky binary get test."; +# # http://code.google.com/p/memcached/issues/detail?id=16 +{ + # Get a new socket so we can speak text to it. + my $sock = $server->new_sock; + my $max = 1024 * 1024; + my $big = "a big value that's > .5M and < 1M. "; + while (length($big) * 2 < $max) { + $big = $big . $big; + } + my $biglen = length($big); + + for(1..100) { + my $key = "some_key_$_"; + # print STDERR "Key is $key\n"; + # print $sock "set $key 0 0 $vallen\r\n$value\r\n"; + print $sock "set $key 0 0 $biglen\r\n$big\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored big"); + my ($f, $v, $c) = $mc->get($key); + } +} + +# diag "Test stats settings." +{ + my %stats = $mc->stats('settings'); + + is(1024, $stats{'maxconns'}); + is('NULL', $stats{'domain_socket'}); + is('on', $stats{'evictions'}); + is('yes', $stats{'cas_enabled'}); +} + +# diag "Test quit commands."; +{ + my $s2 = new_memcached(); + my $mc2 = MC::Client->new($s2); + $mc2->send_command(CMD_QUITQ, '', '', 0, '', 0); + + # Five seconds ought to be enough to get hung up on. + my $oldalarmt = alarm(5); + + # Verify we can't read anything. + my $bytesread = -1; + eval { + local $SIG{'ALRM'} = sub { die "timeout" }; + my $data = ""; + $bytesread = sysread($mc2->{socket}, $data, 24), + }; + is($bytesread, 0, "Read after quit."); + + # Restore signal stuff. + alarm($oldalarmt); +} + +# diag "Test protocol boundary overruns"; +{ + use List::Util qw[min]; + # Attempting some protocol overruns by toying around with the edge + # of the data buffer at a few different sizes. This assumes the + # boundary is at or around 2048 bytes. + for (my $i = 1900; $i < 2100; $i++) { + my $k = "test_key_$i"; + my $v = 'x' x $i; + # diag "Trying $i $k"; + my $extra = pack "NN", 82, 0; + my $data = $mc->build_command(::CMD_SETQ, $k, $v, 0, $extra, 0); + $data .= $mc->build_command(::CMD_SETQ, "alt_$k", "blah", 0, $extra, 0); + if (length($data) > 2024) { + for (my $j = 2024; $j < min(2096, length($data)); $j++) { + $mc->{socket}->send(substr($data, 0, $j)); + $mc->flush_socket; + sleep(0.001); + $mc->{socket}->send(substr($data, $j)); + $mc->flush_socket; + } + } else { + $mc->{socket}->send($data); + } + $mc->flush_socket; + $check->($k, 82, $v); + $check->("alt_$k", 82, "blah"); + } +} + +# Along with the assertion added to the code to verify we're staying +# within bounds when we do a stats detail dump (detail turned on at +# the top). +my %stats = $mc->stats('detail dump'); + +# This test causes a disconnection. +{ + # diag "Key too large."; + my $key = "x" x 365; + eval { + $mc->get($key, 'should die', 10, 10); + }; + ok($@->einval, "Invalid key length"); +} + +# ###################################################################### +# Test ends around here. +# ###################################################################### + +package MC::Client; + +use strict; +use warnings; +use fields qw(socket); +use IO::Socket::INET; + +sub new { + my $self = shift; + my ($s) = @_; + $s = $server unless defined $s; + my $sock = $s->sock; + $self = fields::new($self); + $self->{socket} = $sock; + return $self; +} + +sub build_command { + my $self = shift; + die "Not enough args to send_command" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + $extra_header = '' unless defined $extra_header; + my $keylen = length($key); + my $vallen = length($val); + my $extralen = length($extra_header); + my $datatype = 0; # field for future use + my $reserved = 0; # field for future use + my $totallen = $keylen + $vallen + $extralen; + my $ident_hi = 0; + my $ident_lo = 0; + + if ($cas) { + $ident_hi = int($cas / 2 ** 32); + $ident_lo = int($cas % 2 ** 32); + } + + my $msg = pack(::REQ_PKT_FMT, ::REQ_MAGIC, $cmd, $keylen, $extralen, + $datatype, $reserved, $totallen, $opaque, $ident_hi, + $ident_lo); + my $full_msg = $msg . $extra_header . $key . $val; + return $full_msg; +} + +sub send_command { + my $self = shift; + die "Not enough args to send_command" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + my $full_msg = $self->build_command($cmd, $key, $val, $opaque, $extra_header, $cas); + + my $sent = $self->{socket}->send($full_msg); + die("Send failed: $!") unless $sent; + if($sent != length($full_msg)) { + die("only sent $sent of " . length($full_msg) . " bytes"); + } +} + +sub flush_socket { + my $self = shift; + $self->{socket}->flush; +} + +# Send a silent command and ensure it doesn't respond. +sub send_silent { + my $self = shift; + die "Not enough args to send_silent" unless @_ >= 4; + my ($cmd, $key, $val, $opaque, $extra_header, $cas) = @_; + + $self->send_command($cmd, $key, $val, $opaque, $extra_header, $cas); + $self->send_command(::CMD_NOOP, '', '', $opaque + 1); + + my ($ropaque, $data) = $self->_handle_single_response; + Test::More::is($ropaque, $opaque + 1); +} + +sub silent_mutation { + my $self = shift; + my ($cmd, $key, $value) = @_; + + $empty->($key); + my $extra = pack "NN", 82, 0; + $mc->send_silent($cmd, $key, $value, 7278552, $extra, 0); + $check->($key, 82, $value); +} + +sub _handle_single_response { + my $self = shift; + my $myopaque = shift; + + my $hdr = ""; + while(::MIN_RECV_BYTES - length($hdr) > 0) { + $self->{socket}->recv(my $response, ::MIN_RECV_BYTES - length($hdr)); + $hdr .= $response; + } + Test::More::is(length($hdr), ::MIN_RECV_BYTES, "Expected read length"); + + my ($magic, $cmd, $keylen, $extralen, $datatype, $status, $remaining, + $opaque, $ident_hi, $ident_lo) = unpack(::RES_PKT_FMT, $hdr); + Test::More::is($magic, ::RES_MAGIC, "Got proper response magic"); + + my $cas = ($ident_hi * 2 ** 32) + $ident_lo; + + return ($opaque, '', $cas, 0) if($remaining == 0); + + # fetch the value + my $rv=""; + while($remaining - length($rv) > 0) { + $self->{socket}->recv(my $buf, $remaining - length($rv)); + $rv .= $buf; + } + if(length($rv) != $remaining) { + my $found = length($rv); + die("Expected $remaining bytes, got $found"); + } + + if (defined $myopaque) { + Test::More::is($opaque, $myopaque, "Expected opaque"); + } else { + Test::More::pass("Implicit pass since myopaque is undefined"); + } + + if ($status) { + die MC::Error->new($status, $rv); + } + + return ($opaque, $rv, $cas, $keylen); +} + +sub _do_command { + my $self = shift; + die unless @_ >= 3; + my ($cmd, $key, $val, $extra_header, $cas) = @_; + + $extra_header = '' unless defined $extra_header; + my $opaque = int(rand(2**32)); + $self->send_command($cmd, $key, $val, $opaque, $extra_header, $cas); + my (undef, $rv, $rcas) = $self->_handle_single_response($opaque); + return ($rv, $rcas); +} + +sub _incrdecr_header { + my $self = shift; + my ($amt, $init, $exp) = @_; + + my $amt_hi = int($amt / 2 ** 32); + my $amt_lo = int($amt % 2 ** 32); + + my $init_hi = int($init / 2 ** 32); + my $init_lo = int($init % 2 ** 32); + + my $extra_header = pack(::INCRDECR_PKT_FMT, $amt_hi, $amt_lo, $init_hi, + $init_lo, $exp); + + return $extra_header; +} + +sub _incrdecr_cas { + my $self = shift; + my ($cmd, $key, $amt, $init, $exp) = @_; + + my ($data, $rcas) = $self->_do_command($cmd, $key, '', + $self->_incrdecr_header($amt, $init, $exp)); + + my $header = substr $data, 0, 8, ''; + my ($resp_hi, $resp_lo) = unpack "NN", $header; + my $resp = ($resp_hi * 2 ** 32) + $resp_lo; + + return $resp, $rcas; +} + +sub _incrdecr { + my $self = shift; + my ($v, $c) = $self->_incrdecr_cas(@_); + return $v +} + +sub silent_incrdecr { + my $self = shift; + my ($cmd, $key, $amt, $init, $exp) = @_; + my $opaque = 8275753; + + $mc->send_silent($cmd, $key, '', $opaque, + $mc->_incrdecr_header($amt, $init, $exp)); +} + +sub stats { + my $self = shift; + my $key = shift; + my $cas = 0; + my $opaque = int(rand(2**32)); + $self->send_command(::CMD_STAT, $key, '', $opaque, '', $cas); + + my %rv = (); + my $found_key = ''; + my $found_val = ''; + do { + my ($op, $data, $cas, $keylen) = $self->_handle_single_response($opaque); + if($keylen > 0) { + $found_key = substr($data, 0, $keylen); + $found_val = substr($data, $keylen); + $rv{$found_key} = $found_val; + } else { + $found_key = ''; + } + } while($found_key ne ''); + return %rv; +} + +sub get { + my $self = shift; + my $key = shift; + my ($rv, $cas) = $self->_do_command(::CMD_GET, $key, '', ''); + + my $header = substr $rv, 0, 4, ''; + my $flags = unpack("N", $header); + + return ($flags, $rv, $cas); +} + +sub get_multi { + my $self = shift; + my @keys = @_; + + for (my $i = 0; $i < @keys; $i++) { + $self->send_command(::CMD_GETQ, $keys[$i], '', $i, '', 0); + } + + my $terminal = @keys + 10; + $self->send_command(::CMD_NOOP, '', '', $terminal); + + my %return; + while (1) { + my ($opaque, $data) = $self->_handle_single_response; + last if $opaque == $terminal; + + my $header = substr $data, 0, 4, ''; + my $flags = unpack("N", $header); + + $return{$keys[$opaque]} = [$flags, $data]; + } + + return %return if wantarray; + return \%return; +} + +sub touch { + my $self = shift; + my ($key, $expire) = @_; + my $extra_header = pack "N", $expire; + my $cas = 0; + return $self->_do_command(::CMD_TOUCH, $key, '', $extra_header, $cas); +} + +sub gat { + my $self = shift; + my $key = shift; + my $expire = shift; + my $extra_header = pack "N", $expire; + my ($rv, $cas) = $self->_do_command(::CMD_GAT, $key, '', $extra_header); + + my $header = substr $rv, 0, 4, ''; + my $flags = unpack("N", $header); + + return ($flags, $rv, $cas); +} + +sub version { + my $self = shift; + return $self->_do_command(::CMD_VERSION, '', ''); +} + +sub flush { + my $self = shift; + return $self->_do_command(::CMD_FLUSH, '', ''); +} + +sub add { + my $self = shift; + my ($key, $val, $flags, $expire) = @_; + my $extra_header = pack "NN", $flags, $expire; + my $cas = 0; + return $self->_do_command(::CMD_ADD, $key, $val, $extra_header, $cas); +} + +sub set { + my $self = shift; + my ($key, $val, $flags, $expire, $cas) = @_; + my $extra_header = pack "NN", $flags, $expire; + return $self->_do_command(::CMD_SET, $key, $val, $extra_header, $cas); +} + +sub _append_prepend { + my $self = shift; + my ($cmd, $key, $val, $cas) = @_; + return $self->_do_command($cmd, $key, $val, '', $cas); +} + +sub replace { + my $self = shift; + my ($key, $val, $flags, $expire) = @_; + my $extra_header = pack "NN", $flags, $expire; + my $cas = 0; + return $self->_do_command(::CMD_REPLACE, $key, $val, $extra_header, $cas); +} + +sub delete { + my $self = shift; + my ($key) = @_; + return $self->_do_command(::CMD_DELETE, $key, ''); +} + +sub incr { + my $self = shift; + my ($key, $amt, $init, $exp) = @_; + $amt = 1 unless defined $amt; + $init = 0 unless defined $init; + $exp = 0 unless defined $exp; + + return $self->_incrdecr(::CMD_INCR, $key, $amt, $init, $exp); +} + +sub incr_cas { + my $self = shift; + my ($key, $amt, $init, $exp) = @_; + $amt = 1 unless defined $amt; + $init = 0 unless defined $init; + $exp = 0 unless defined $exp; + + return $self->_incrdecr_cas(::CMD_INCR, $key, $amt, $init, $exp); +} + +sub decr { + my $self = shift; + my ($key, $amt, $init, $exp) = @_; + $amt = 1 unless defined $amt; + $init = 0 unless defined $init; + $exp = 0 unless defined $exp; + + return $self->_incrdecr(::CMD_DECR, $key, $amt, $init, $exp); +} + +sub noop { + my $self = shift; + return $self->_do_command(::CMD_NOOP, '', ''); +} + +package MC::Error; + +use strict; +use warnings; + +use constant ERR_UNKNOWN_CMD => 0x81; +use constant ERR_NOT_FOUND => 0x1; +use constant ERR_EXISTS => 0x2; +use constant ERR_TOO_BIG => 0x3; +use constant ERR_EINVAL => 0x4; +use constant ERR_NOT_STORED => 0x5; +use constant ERR_DELTA_BADVAL => 0x6; + +use overload '""' => sub { + my $self = shift; + return "Memcache Error ($self->[0]): $self->[1]"; +}; + +sub new { + my $class = shift; + my $error = [@_]; + my $self = bless $error, (ref $class || $class); + + return $self; +} + +sub not_found { + my $self = shift; + return $self->[0] == ERR_NOT_FOUND; +} + +sub exists { + my $self = shift; + return $self->[0] == ERR_EXISTS; +} + +sub too_big { + my $self = shift; + return $self->[0] == ERR_TOO_BIG; +} + +sub delta_badval { + my $self = shift; + return $self->[0] == ERR_DELTA_BADVAL; +} + +sub einval { + my $self = shift; + return $self->[0] == ERR_EINVAL; +} + +# vim: filetype=perl + diff --git a/memcached/t/bogus-commands.t b/memcached/t/bogus-commands.t new file mode 100755 index 00000000..aaae6cd6 --- /dev/null +++ b/memcached/t/bogus-commands.t @@ -0,0 +1,13 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 1; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +print $sock "boguscommand slkdsldkfjsd\r\n"; +is(scalar <$sock>, "ERROR\r\n", "got error back"); diff --git a/memcached/t/cas.t b/memcached/t/cas.t new file mode 100644 index 00000000..8797ebf2 --- /dev/null +++ b/memcached/t/cas.t @@ -0,0 +1,158 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 43; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + + +my $server = new_memcached(); +my $sock = $server->sock; +my $sock2 = $server->new_sock; + +my @result; +my @result2; + +ok($sock != $sock2, "have two different connections open"); + +sub check_args { + my ($line, $name) = @_; + + my $svr = new_memcached(); + my $s = $svr->sock; + + print $s $line; + is(scalar <$s>, "CLIENT_ERROR bad command line format\r\n", $name); + undef $svr; +} + +check_args "cas bad blah 0 0 0\r\n\r\n", "bad flags"; +check_args "cas bad 0 blah 0 0\r\n\r\n", "bad exp"; +check_args "cas bad 0 0 blah 0\r\n\r\n", "bad cas"; +check_args "cas bad 0 0 0 blah\r\n\r\n", "bad size"; + +# gets foo (should not exist) +print $sock "gets foo\r\n"; +is(scalar <$sock>, "END\r\n", "gets failed"); + +# set foo +print $sock "set foo 0 0 6\r\nbarval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored barval"); + +# gets foo and verify identifier exists +@result = mem_gets($sock, "foo"); +mem_gets_is($sock,$result[0],"foo","barval"); + +# cas fail +print $sock "cas foo 0 0 6 123\r\nbarva2\r\n"; +is(scalar <$sock>, "EXISTS\r\n", "cas failed for foo"); + +# gets foo - success +@result = mem_gets($sock, "foo"); +mem_gets_is($sock,$result[0],"foo","barval"); + +# cas success +print $sock "cas foo 0 0 6 $result[0]\r\nbarva2\r\n"; +is(scalar <$sock>, "STORED\r\n", "cas success, set foo"); + +# cas failure (reusing the same key) +print $sock "cas foo 0 0 6 $result[0]\r\nbarva2\r\n"; +is(scalar <$sock>, "EXISTS\r\n", "reusing a CAS ID"); + +# delete foo +print $sock "delete foo\r\n"; +is(scalar <$sock>, "DELETED\r\n", "deleted foo"); + +# cas missing +print $sock "cas foo 0 0 6 $result[0]\r\nbarva2\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "cas failed, foo does not exist"); + +# cas empty +print $sock "cas foo 0 0 6 \r\nbarva2\r\n"; +is(scalar <$sock>, "ERROR\r\n", "cas empty, throw error"); +# cant parse barval2\r\n +is(scalar <$sock>, "ERROR\r\n", "error out on barval2 parsing"); + +# set foo1 +print $sock "set foo1 0 0 1\r\n1\r\n"; +is(scalar <$sock>, "STORED\r\n", "set foo1"); +# set foo2 +print $sock "set foo2 0 0 1\r\n2\r\n"; +is(scalar <$sock>, "STORED\r\n", "set foo2"); + +# gets foo1 check +print $sock "gets foo1\r\n"; +ok(scalar <$sock> =~ /VALUE foo1 0 1 (\d+)\r\n/, "gets foo1 regexp success"); +my $foo1_cas = $1; +is(scalar <$sock>, "1\r\n","gets foo1 data is 1"); +is(scalar <$sock>, "END\r\n","gets foo1 END"); + +# gets foo2 check +print $sock "gets foo2\r\n"; +ok(scalar <$sock> =~ /VALUE foo2 0 1 (\d+)\r\n/,"gets foo2 regexp success"); +my $foo2_cas = $1; +is(scalar <$sock>, "2\r\n","gets foo2 data is 2"); +is(scalar <$sock>, "END\r\n","gets foo2 END"); + +# validate foo1 != foo2 +ok($foo1_cas != $foo2_cas,"foo1 != foo2 single-gets success"); + +# multi-gets +print $sock "gets foo1 foo2\r\n"; +ok(scalar <$sock> =~ /VALUE foo1 0 1 (\d+)\r\n/, "validating first set of data is foo1"); +$foo1_cas = $1; +is(scalar <$sock>, "1\r\n", "validating foo1 set of data is 1"); +ok(scalar <$sock> =~ /VALUE foo2 0 1 (\d+)\r\n/, "validating second set of data is foo2"); +$foo2_cas = $1; +is(scalar <$sock>, "2\r\n", "validating foo2 set of data is 2"); +is(scalar <$sock>, "END\r\n","validating foo1,foo2 gets is over - END"); + +# validate foo1 != foo2 +ok($foo1_cas != $foo2_cas, "foo1 != foo2 multi-gets success"); + +### simulate race condition with cas + +# gets foo1 - success +@result = mem_gets($sock, "foo1"); +ok($result[0] != "", "sock - gets foo1 is not empty"); + +# gets foo2 - success +@result2 = mem_gets($sock2, "foo1"); +ok($result2[0] != "","sock2 - gets foo1 is not empty"); + +print $sock "cas foo1 0 0 6 $result[0]\r\nbarva2\r\n"; +print $sock2 "cas foo1 0 0 5 $result2[0]\r\napple\r\n"; + +my $res1 = <$sock>; +my $res2 = <$sock2>; + +ok( ( $res1 eq "STORED\r\n" && $res2 eq "EXISTS\r\n") || + ( $res1 eq "EXISTS\r\n" && $res2 eq "STORED\r\n"), + "cas on same item from two sockets"); + +### bug 15: http://code.google.com/p/memcached/issues/detail?id=15 + +# set foo +print $sock "set bug15 0 0 1\r\n0\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored 0"); + +# Check out the first gets. +print $sock "gets bug15\r\n"; +ok(scalar <$sock> =~ /VALUE bug15 0 1 (\d+)\r\n/, "gets bug15 regexp success"); +my $bug15_cas = $1; +is(scalar <$sock>, "0\r\n", "gets bug15 data is 0"); +is(scalar <$sock>, "END\r\n","gets bug15 END"); + +# Increment +print $sock "incr bug15 1\r\n"; +is(scalar <$sock>, "1\r\n", "incr worked"); + +# Validate a changed CAS +print $sock "gets bug15\r\n"; +ok(scalar <$sock> =~ /VALUE bug15 0 1 (\d+)\r\n/, "gets bug15 regexp success"); +my $next_bug15_cas = $1; +is(scalar <$sock>, "1\r\n", "gets bug15 data is 0"); +is(scalar <$sock>, "END\r\n","gets bug15 END"); + +ok($bug15_cas != $next_bug15_cas, "CAS changed"); diff --git a/memcached/t/daemonize.t b/memcached/t/daemonize.t new file mode 100755 index 00000000..e6fe7f58 --- /dev/null +++ b/memcached/t/daemonize.t @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 7; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +use File::Temp qw(tempfile); + +my (undef, $tmpfn) = tempfile(); + +my $server = new_memcached("-d -P $tmpfn"); +my $sock = $server->sock; +sleep 0.5; + +ok(-e $tmpfn, "pid file exists"); +ok(-s $tmpfn, "pid file has length"); + +open (my $fh, $tmpfn) or die; +my $readpid = do { local $/; <$fh>; }; +chomp $readpid; +close ($fh); + +ok(kill(0, $readpid), "process is still running"); + +my $stats = mem_stats($sock); +is($stats->{pid}, $readpid, "memcached reports same pid as file"); + +ok($server->new_sock, "opened new socket"); +ok(kill(9, $readpid), "sent KILL signal"); +sleep 0.5; +ok(! $server->new_sock, "failed to open new socket"); diff --git a/memcached/t/dash-M.t b/memcached/t/dash-M.t new file mode 100644 index 00000000..af9e09be --- /dev/null +++ b/memcached/t/dash-M.t @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +use strict; +use Test::More; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached('-M -m 1'); +my $sock = $server->sock; + +my $value = "B" x 8192; +my $vallen = length($value); + +my $resp = "STORED\r\n"; +my $key = 0; + +while($resp eq "STORED\r\n") { + print $sock "set dash$key 0 0 $vallen\r\n$value\r\n"; + $key++; + $resp = scalar <$sock>; +} + +my $max_stored = $key - 1; + +plan tests => $max_stored + 1; + +print $sock "set dash$key 0 0 $vallen\r\n$value\r\n"; +is(scalar <$sock>, "SERVER_ERROR out of memory storing object\r\n", + "failed to add another one."); + +for($key = 0; $key < $max_stored; $key++) { + mem_get_is $sock, "dash$key", $value, "Failed at dash$key"; +} diff --git a/memcached/t/evictions.t b/memcached/t/evictions.t new file mode 100644 index 00000000..d0fa3940 --- /dev/null +++ b/memcached/t/evictions.t @@ -0,0 +1,31 @@ +#!/usr/bin/perl +# Test the 'stats items' evictions counters. + +use strict; +use Test::More tests => 92; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached("-m 3"); +my $sock = $server->sock; +my $value = "B"x66560; +my $key = 0; + +# These aren't set to expire. +for ($key = 0; $key < 40; $key++) { + print $sock "set key$key 0 0 66560\r\n$value\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +# These ones would expire in 600 seconds. +for ($key = 0; $key < 50; $key++) { + print $sock "set key$key 0 600 66560\r\n$value\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +my $stats = mem_stats($sock, "items"); +my $evicted = $stats->{"items:31:evicted"}; +isnt($evicted, "0", "check evicted"); +my $evicted_nonzero = $stats->{"items:31:evicted_nonzero"}; +isnt($evicted_nonzero, "0", "check evicted_nonzero"); diff --git a/memcached/t/expirations.t b/memcached/t/expirations.t new file mode 100755 index 00000000..c92125a9 --- /dev/null +++ b/memcached/t/expirations.t @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 15; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $expire; + +sub wait_for_early_second { + my $have_hires = eval "use Time::HiRes (); 1"; + if ($have_hires) { + my $tsh = Time::HiRes::time(); + my $ts = int($tsh); + return if ($tsh - $ts) < 0.5; + } + + my $ts = int(time()); + while (1) { + my $t = int(time()); + return if $t != $ts; + select undef, undef, undef, 0.10; # 1/10th of a second sleeps until time changes. + } +} + +wait_for_early_second(); + +print $sock "set foo 0 1 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); + +mem_get_is($sock, "foo", "fooval"); +sleep(1.5); +mem_get_is($sock, "foo", undef); + +$expire = time() - 1; +print $sock "set foo 0 $expire 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", undef, "already expired"); + +$expire = time() + 1; +print $sock "set foo 0 $expire 6\r\nfoov+1\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "foov+1"); +sleep(2.2); +mem_get_is($sock, "foo", undef, "now expired"); + +$expire = time() - 20; +print $sock "set boo 0 $expire 6\r\nbooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored boo"); +mem_get_is($sock, "boo", undef, "now expired"); + +print $sock "add add 0 2 6\r\naddval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored add"); +mem_get_is($sock, "add", "addval"); +# second add fails +print $sock "add add 0 2 7\r\naddval2\r\n"; +is(scalar <$sock>, "NOT_STORED\r\n", "add failure"); +sleep(2.3); +print $sock "add add 0 2 7\r\naddval3\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored add again"); +mem_get_is($sock, "add", "addval3"); diff --git a/memcached/t/flags.t b/memcached/t/flags.t new file mode 100755 index 00000000..e5231965 --- /dev/null +++ b/memcached/t/flags.t @@ -0,0 +1,18 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 6; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +# set foo (and should get it) +for my $flags (0, 123, 2**16-1) { + print $sock "set foo $flags 0 6\r\nfooval\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored foo"); + mem_get_is({ sock => $sock, + flags => $flags }, "foo", "fooval", "got flags $flags back"); +} diff --git a/memcached/t/flush-all.t b/memcached/t/flush-all.t new file mode 100755 index 00000000..b803bb7d --- /dev/null +++ b/memcached/t/flush-all.t @@ -0,0 +1,56 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 21; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $expire; + +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); + +mem_get_is($sock, "foo", "fooval"); +print $sock "flush_all\r\n"; +is(scalar <$sock>, "OK\r\n", "did flush_all"); +mem_get_is($sock, "foo", undef); + +# Test flush_all with zero delay. +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); + +mem_get_is($sock, "foo", "fooval"); +print $sock "flush_all 0\r\n"; +is(scalar <$sock>, "OK\r\n", "did flush_all"); +mem_get_is($sock, "foo", undef); + +# check that flush_all doesn't blow away items that immediately get set +print $sock "set foo 0 0 3\r\nnew\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo = 'new'"); +mem_get_is($sock, "foo", 'new'); + +# and the other form, specifying a flush_all time... +my $expire = time() + 2; +print $sock "flush_all $expire\r\n"; +is(scalar <$sock>, "OK\r\n", "did flush_all in future"); + +print $sock "set foo 0 0 4\r\n1234\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo = '1234'"); +mem_get_is($sock, "foo", '1234'); +sleep(3); +mem_get_is($sock, "foo", undef); + +print $sock "set foo 0 0 5\r\n12345\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo = '12345'"); +mem_get_is($sock, "foo", '12345'); +print $sock "flush_all 86400\r\n"; +is(scalar <$sock>, "OK\r\n", "did flush_all for far future"); +# Check foo still exists. +mem_get_is($sock, "foo", '12345'); +print $sock "set foo2 0 0 5\r\n54321\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo2 = '54321'"); +mem_get_is($sock, "foo", '12345'); +mem_get_is($sock, "foo2", '54321'); diff --git a/memcached/t/getset.t b/memcached/t/getset.t new file mode 100755 index 00000000..39922730 --- /dev/null +++ b/memcached/t/getset.t @@ -0,0 +1,99 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 539; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + + +my $server = new_memcached(); +my $sock = $server->sock; + + +# set foo (and should get it) +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +# add bar (and should get it) +print $sock "add bar 0 0 6\r\nbarval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored barval"); +mem_get_is($sock, "bar", "barval"); + +# add foo (but shouldn't get new value) +print $sock "add foo 0 0 5\r\nfoov2\r\n"; +is(scalar <$sock>, "NOT_STORED\r\n", "not stored"); +mem_get_is($sock, "foo", "fooval"); + +# replace bar (should work) +print $sock "replace bar 0 0 6\r\nbarva2\r\n"; +is(scalar <$sock>, "STORED\r\n", "replaced barval 2"); + +# replace notexist (shouldn't work) +print $sock "replace notexist 0 0 6\r\nbarva2\r\n"; +is(scalar <$sock>, "NOT_STORED\r\n", "didn't replace notexist"); + +# delete foo. +print $sock "delete foo\r\n"; +is(scalar <$sock>, "DELETED\r\n", "deleted foo"); + +# delete foo again. not found this time. +print $sock "delete foo\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "deleted foo, but not found"); + +# add moo +# +print $sock "add moo 0 0 6\r\nmooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored barval"); +mem_get_is($sock, "moo", "mooval"); + +# check-and-set (cas) failure case, try to set value with incorrect cas unique val +print $sock "cas moo 0 0 6 0\r\nMOOVAL\r\n"; +is(scalar <$sock>, "EXISTS\r\n", "check and set with invalid id"); + +# test "gets", grab unique ID +print $sock "gets moo\r\n"; +# VALUE moo 0 6 3084947704 +# +my @retvals = split(/ /, scalar <$sock>); +my $data = scalar <$sock>; # grab data +my $dot = scalar <$sock>; # grab dot on line by itself +is($retvals[0], "VALUE", "get value using 'gets'"); +my $unique_id = $retvals[4]; +# clean off \r\n +$unique_id =~ s/\r\n$//; +ok($unique_id =~ /^\d+$/, "unique ID '$unique_id' is an integer"); +# now test that we can store moo with the correct unique id +print $sock "cas moo 0 0 6 $unique_id\r\nMOOVAL\r\n"; +is(scalar <$sock>, "STORED\r\n"); +mem_get_is($sock, "moo", "MOOVAL"); + +# pipeling is okay +print $sock "set foo 0 0 6\r\nfooval\r\ndelete foo\r\nset foo 0 0 6\r\nfooval\r\ndelete foo\r\n"; +is(scalar <$sock>, "STORED\r\n", "pipeline set"); +is(scalar <$sock>, "DELETED\r\n", "pipeline delete"); +is(scalar <$sock>, "STORED\r\n", "pipeline set"); +is(scalar <$sock>, "DELETED\r\n", "pipeline delete"); + + +# Test sets up to a large size around 1MB. +# Everything up to 1MB - 1k should succeed, everything 1MB +1k should fail. + +my $len = 1024; +while ($len < 1024*1028) { + my $val = "B"x$len; + if ($len > (1024*1024)) { + # Ensure causing a memory overflow doesn't leave stale data. + print $sock "set foo_$len 0 0 3\r\nMOO\r\n"; + is(scalar <$sock>, "STORED\r\n"); + print $sock "set foo_$len 0 0 $len\r\n$val\r\n"; + is(scalar <$sock>, "SERVER_ERROR object too large for cache\r\n", "failed to store size $len"); + mem_get_is($sock, "foo_$len"); + } else { + print $sock "set foo_$len 0 0 $len\r\n$val\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored size $len"); + } + $len += 2048; +} + diff --git a/memcached/t/incrdecr.t b/memcached/t/incrdecr.t new file mode 100755 index 00000000..e0ba65f4 --- /dev/null +++ b/memcached/t/incrdecr.t @@ -0,0 +1,71 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 23; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +# Bug 21 +print $sock "set bug21 0 0 19\r\n9223372036854775807\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored text"); +print $sock "incr bug21 1\r\n"; +is(scalar <$sock>, "9223372036854775808\r\n", "bug21 incr 1"); +print $sock "incr bug21 1\r\n"; +is(scalar <$sock>, "9223372036854775809\r\n", "bug21 incr 2"); +print $sock "decr bug21 1\r\n"; +is(scalar <$sock>, "9223372036854775808\r\n", "bug21 decr"); + +print $sock "set num 0 0 1\r\n1\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored num"); +mem_get_is($sock, "num", 1, "stored 1"); + +print $sock "incr num 1\r\n"; +is(scalar <$sock>, "2\r\n", "+ 1 = 2"); +mem_get_is($sock, "num", 2); + +print $sock "incr num 8\r\n"; +is(scalar <$sock>, "10\r\n", "+ 8 = 10"); +mem_get_is($sock, "num", 10); + +print $sock "decr num 1\r\n"; +is(scalar <$sock>, "9\r\n", "- 1 = 9"); + +print $sock "decr num 9\r\n"; +is(scalar <$sock>, "0\r\n", "- 9 = 0"); + +print $sock "decr num 5\r\n"; +is(scalar <$sock>, "0\r\n", "- 5 = 0"); + +printf $sock "set num 0 0 10\r\n4294967296\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored 2**32"); + +print $sock "incr num 1\r\n"; +is(scalar <$sock>, "4294967297\r\n", "4294967296 + 1 = 4294967297"); + +printf $sock "set num 0 0 %d\r\n18446744073709551615\r\n", length("18446744073709551615"); +is(scalar <$sock>, "STORED\r\n", "stored 2**64-1"); + +print $sock "incr num 1\r\n"; +is(scalar <$sock>, "0\r\n", "(2**64 - 1) + 1 = 0"); + +print $sock "decr bogus 5\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "can't decr bogus key"); + +print $sock "decr incr 5\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "can't incr bogus key"); + +print $sock "set bigincr 0 0 1\r\n0\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored bigincr"); +print $sock "incr bigincr 18446744073709551610\r\n"; +is(scalar <$sock>, "18446744073709551610\r\n"); + +print $sock "set text 0 0 2\r\nhi\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored hi"); +print $sock "incr text 1\r\n"; +is(scalar <$sock>, + "CLIENT_ERROR cannot increment or decrement non-numeric value\r\n", + "hi - 1 = 0"); diff --git a/memcached/t/issue_104.t b/memcached/t/issue_104.t new file mode 100755 index 00000000..2213f690 --- /dev/null +++ b/memcached/t/issue_104.t @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 6; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +# first get should miss +print $sock "get foo\r\n"; +is(scalar <$sock>, "END\r\n", "get foo"); + +# Now set and get (should hit) +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +my $stats = mem_stats($sock); +is($stats->{cmd_get}, 2, "Should have 2 get requests"); +is($stats->{get_hits}, 1, "Should have 1 hit"); +is($stats->{get_misses}, 1, "Should have 1 miss"); diff --git a/memcached/t/issue_108.t b/memcached/t/issue_108.t new file mode 100644 index 00000000..07a78b7c --- /dev/null +++ b/memcached/t/issue_108.t @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 4; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $key = "del_key"; + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Added a key"); + +print $sock "delete $key 0\r\n"; +is (scalar <$sock>, "DELETED\r\n", "Properly deleted with 0"); + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Added again a key"); + +print $sock "delete $key 0 noreply\r\n"; +# will not reply, but a subsequent add will succeed + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Add succeeded after quiet deletion."); + diff --git a/memcached/t/issue_14.t b/memcached/t/issue_14.t new file mode 100644 index 00000000..fb14b030 --- /dev/null +++ b/memcached/t/issue_14.t @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 21; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $value = "B"x66560; +my $key = 0; + +for ($key = 0; $key < 10; $key++) { + print $sock "set key$key 0 2 66560\r\n$value\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +#print $sock "stats slabs" +my $first_stats = mem_stats($sock, "slabs"); +my $first_malloc = $first_stats->{total_malloced}; + +sleep(4); + +for ($key = 10; $key < 20; $key++) { + print $sock "set key$key 0 2 66560\r\n$value\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +my $second_stats = mem_stats($sock, "slabs"); +my $second_malloc = $second_stats->{total_malloced}; + + +is ($second_malloc, $first_malloc, "Memory grows..") diff --git a/memcached/t/issue_140.t b/memcached/t/issue_140.t new file mode 100644 index 00000000..16eac3dd --- /dev/null +++ b/memcached/t/issue_140.t @@ -0,0 +1,41 @@ +#!/usr/bin/perl + +use strict; +use Test::More; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +plan skip_all => 'Fix for Issue 140 was only an illusion'; + +plan tests => 7; + +my $server = new_memcached(); +my $sock = $server->sock; + +print $sock "set a 0 0 1\r\na\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); + +my $stats = mem_stats($sock, "items"); +my $age = $stats->{"items:1:age"}; +isnt ($age, "0", "Age should not be zero"); + +print $sock "flush_all\r\n"; +is (scalar <$sock>, "OK\r\n", "items flushed"); + +my $stats = mem_stats($sock, "items"); +my $age = $stats->{"items:1:age"}; +is ($age, undef, "all should be gone"); + +print $sock "set a 0 1 1\r\na\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); + +my $stats = mem_stats($sock, "items"); +my $age = $stats->{"items:1:age"}; +isnt ($age, "0", "Age should not be zero"); + +sleep(3); + +my $stats = mem_stats($sock, "items"); +my $age = $stats->{"items:1:age"}; +is ($age, undef, "all should be gone"); diff --git a/memcached/t/issue_152.t b/memcached/t/issue_152.t new file mode 100644 index 00000000..81aa710a --- /dev/null +++ b/memcached/t/issue_152.t @@ -0,0 +1,17 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 2; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $key = "a"x251; + +print $sock "set a 1 0 1\r\na\r\n"; +is (scalar <$sock>, "STORED\r\n", "Stored key"); + +print $sock "get a $key\r\n"; +is (scalar <$sock>, "CLIENT_ERROR bad command line format\r\n", "illegal key"); diff --git a/memcached/t/issue_163.t b/memcached/t/issue_163.t new file mode 100644 index 00000000..a5eb1ce4 --- /dev/null +++ b/memcached/t/issue_163.t @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 7; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $value1 = "A"x66560; +my $value2 = "B"x66570; + +print $sock "set key 0 1 66560\r\n$value1\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); + +my $stats = mem_stats($sock, "slabs"); +my $requested = $stats->{"31:mem_requested"}; +isnt ($requested, "0", "We should have requested some memory"); + +sleep(3); +print $sock "set key 0 0 66570\r\n$value2\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); + +my $stats = mem_stats($sock, "items"); +my $reclaimed = $stats->{"items:31:reclaimed"}; +is ($reclaimed, "1", "Objects should be reclaimed"); + +print $sock "delete key\r\n"; +is (scalar <$sock>, "DELETED\r\n", "deleted key"); + +print $sock "set key 0 0 66560\r\n$value1\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); + +my $stats = mem_stats($sock, "slabs"); +my $requested2 = $stats->{"31:mem_requested"}; +is ($requested2, $requested, "we've not allocated and freed the same amont"); diff --git a/memcached/t/issue_183.t b/memcached/t/issue_183.t new file mode 100644 index 00000000..94b956cf --- /dev/null +++ b/memcached/t/issue_183.t @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 5; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +print $sock "set key 0 0 1\r\n1\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); +my $s1 = mem_stats($sock); +my $r1 = $s1->{"reclaimed"}; +is ($r1, "0", "Objects should not be reclaimed"); +sleep(2); +print $sock "flush_all\r\n"; +is (scalar <$sock>, "OK\r\n", "Cache flushed"); +print $sock "set key 0 0 1\r\n1\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored key"); +my $s2 = mem_stats($sock); +my $r2 = $s2->{"reclaimed"}; +is ($r2, "1", "Objects should be reclaimed"); diff --git a/memcached/t/issue_22.t b/memcached/t/issue_22.t new file mode 100644 index 00000000..403d3d3d --- /dev/null +++ b/memcached/t/issue_22.t @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 84; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached("-m 3"); +my $sock = $server->sock; +my $value = "B"x66560; +my $key = 0; + +for ($key = 0; $key < 40; $key++) { + print $sock "set key$key 0 0 66560\r\n$value\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +my $first_stats = mem_stats($sock, "items"); +my $first_evicted = $first_stats->{"items:31:evicted"}; +# I get 1 eviction on a 32 bit binary, but 4 on a 64 binary.. +# Just check that I have evictions... +isnt ($first_evicted, "0", "check evicted"); + +print $sock "stats reset\r\n"; +is (scalar <$sock>, "RESET\r\n", "Stats reset"); + +my $second_stats = mem_stats($sock, "items"); +my $second_evicted = $second_stats->{"items:31:evicted"}; +is ($second_evicted, "0", "check evicted"); + +for ($key = 40; $key < 80; $key++) { + print $sock "set key$key 0 0 66560\r\n$value\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +my $last_stats = mem_stats($sock, "items"); +my $last_evicted = $last_stats->{"items:31:evicted"}; +is ($last_evicted, "40", "check evicted"); diff --git a/memcached/t/issue_29.t b/memcached/t/issue_29.t new file mode 100644 index 00000000..a585f97a --- /dev/null +++ b/memcached/t/issue_29.t @@ -0,0 +1,26 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 4; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +print $sock "set issue29 0 0 0\r\n\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored issue29"); + +my $first_stats = mem_stats($sock, "slabs"); +my $first_used = $first_stats->{"1:used_chunks"}; + +is(1, $first_used, "Used one"); + +print $sock "set issue29_b 0 0 0\r\n\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored issue29_b"); + +my $second_stats = mem_stats($sock, "slabs"); +my $second_used = $second_stats->{"1:used_chunks"}; + +is(2, $second_used, "Used two") diff --git a/memcached/t/issue_3.t b/memcached/t/issue_3.t new file mode 100644 index 00000000..66aaf163 --- /dev/null +++ b/memcached/t/issue_3.t @@ -0,0 +1,47 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 8; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $key = "del_key"; + +print $sock "delete $key\r\n"; +is (scalar <$sock>, "NOT_FOUND\r\n", "not found on delete"); + +print $sock "delete $key 10\r\n"; +is (scalar <$sock>, "CLIENT_ERROR bad command line format." + . " Usage: delete [noreply]\r\n", "invalid delete"); + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Add before a broken delete."); + +print $sock "delete $key 10 noreply\r\n"; +# Does not reply +# is (scalar <$sock>, "ERROR\r\n", "Even more invalid delete"); + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "NOT_STORED\r\n", "Failed to add after failed silent delete."); + +print $sock "delete $key noreply\r\n"; +# Will not reply, so let's do a set and check that. + +print $sock "set $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Stored a key"); + +print $sock "delete $key\r\n"; +is (scalar <$sock>, "DELETED\r\n", "Properly deleted"); + +print $sock "set $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Stored a key"); + +print $sock "delete $key noreply\r\n"; +# will not reply, but a subsequent add will succeed + +print $sock "add $key 0 0 1\r\nx\r\n"; +is (scalar <$sock>, "STORED\r\n", "Add succeeded after deletion."); + diff --git a/memcached/t/issue_41.t b/memcached/t/issue_41.t new file mode 100644 index 00000000..fb1a292c --- /dev/null +++ b/memcached/t/issue_41.t @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use POSIX qw(ceil); +use Test::More tests => 691; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +my $factor = 2; +my $val = "x" x $factor; +my $key = ''; + +# SET items of diverse size to the daemon so it can attempt +# to return a large stats output for slabs +for (my $i=0; $i<69; $i++) { + for (my $j=0; $j<10; $j++) { + $key = "$i:$j"; + print $sock "set key$key 0 0 $factor\r\n$val\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); + } + $factor *= 1.2; + $factor = ceil($factor); + $val = "x" x $factor; +} + +# This request will kill the daemon if it has not allocated +# enough memory internally. +my $stats = mem_stats($sock, "slabs"); + +# Verify whether the daemon is still running or not by asking +# it for statistics. +print $sock "version\r\n"; +my $v = scalar <$sock>; +ok(defined $v && length($v), "memcached didn't respond"); diff --git a/memcached/t/issue_42.t b/memcached/t/issue_42.t new file mode 100644 index 00000000..299c1ec6 --- /dev/null +++ b/memcached/t/issue_42.t @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 11; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $value = "B"x10; +my $key = 0; + +for ($key = 0; $key < 10; $key++) { + print $sock "set key$key 0 0 10\r\n$value\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored key$key"); +} + +my $first_stats = mem_stats($sock, "slabs"); +my $req = $first_stats->{"1:mem_requested"}; +ok ($req == "640" || $req == "800", "Check allocated size"); diff --git a/memcached/t/issue_50.t b/memcached/t/issue_50.t new file mode 100644 index 00000000..80876b27 --- /dev/null +++ b/memcached/t/issue_50.t @@ -0,0 +1,16 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 1; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached('-B binary'); +my $sock = $server->sock; + +$SIG{ALRM} = sub { die "alarm\n" }; +alarm(2); +print $sock "Here's a bunch of garbage that doesn't look like the bin prot."; +my $rv = <$sock>; +ok(1, "Either the above worked and quit, or hung forever."); diff --git a/memcached/t/issue_61.t b/memcached/t/issue_61.t new file mode 100644 index 00000000..ec38a7d9 --- /dev/null +++ b/memcached/t/issue_61.t @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 7; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached("-R 1"); +my $sock = $server->sock; + +print $sock "set foobar 0 0 5\r\nBubba\r\nset foobar 0 0 5\r\nBubba\r\nset foobar 0 0 5\r\nBubba\r\nset foobar 0 0 5\r\nBubba\r\nset foobar 0 0 5\r\nBubba\r\nset foobar 0 0 5\r\nBubba\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +is (scalar <$sock>, "STORED\r\n", "stored foobar"); +my $stats = mem_stats($sock); +is ($stats->{"conn_yields"}, "5", "Got a decent number of yields"); diff --git a/memcached/t/issue_67.t b/memcached/t/issue_67.t new file mode 100644 index 00000000..c6b8b4cb --- /dev/null +++ b/memcached/t/issue_67.t @@ -0,0 +1,86 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 22; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; +use Carp qw(croak); + +use Cwd; +my $builddir = getcwd; + +$ENV{'MEMCACHED_PORT_FILENAME'} = "/tmp/ports.$$"; + +sub read_ports { + my %rv = (); + open(my $f, "/tmp/ports.$$") || die("Can't open ports file."); + while(<$f>) { + my ($type, $port) = split(/:\s+/); + $rv{$type} = $port + 0; + } + unlink "/tmp/ports.$$"; + return %rv; +} + +sub validate_port { + my ($name, $got, $expected) = @_; + # diag "Wanted $expected, got $got"; + if ($expected == -1) { + ok(!defined($got), "$name expected no port, got $got"); + } elsif ($expected == 0) { + ok($got != 11211, "$name expected random port (got $got)"); + } else { + is($got, $expected, "$name"); + } +} + +sub run_server { + my ($args) = @_; + + my $exe = "$builddir/memcached-debug"; + croak("memcached binary doesn't exist. Haven't run 'make' ?\n") unless -e $exe; + + my $childpid = fork(); + + my $root = ''; + $root = "-u root" if ($< == 0); + my $cmd = "$builddir/timedrun 10 $exe $root $args"; + + unless($childpid) { + exec $cmd; + exit; # NOTREACHED + } + + for (1..20) { + if (-f "/tmp/ports.$$") { + return Memcached::Handle->new(pid => $childpid); + } + select undef, undef, undef, 0.10; + } + croak "Failed to start server."; +} + +sub when { + my ($name, $params, $expected_tcp, $expected_udp) = @_; + + my $server = run_server($params); + my %ports = read_ports(); + + validate_port($name, $ports{'TCP INET'}, $expected_tcp); + validate_port($name, $ports{'UDP INET'}, $expected_udp); +} + +# Disabling the defaults since it conflicts with a running instance. +# when('no arguments', '', 11211, 11211); +when('specifying tcp port', '-p 11212', 11212, 11212); +when('specifying udp port', '-U 11222', 11222, 11222); +when('specifying tcp ephemeral port', '-p -1', 0, 0); +when('specifying udp ephemeral port', '-U -1', 0, 0); +when('tcp port disabled', '-p 0', -1, -1); +when('udp port disabled', '-U 0', -1, -1); +when('specifying tcp and udp ports', '-p 11232 -U 11233', 11232, 11233); +when('specifying tcp and disabling udp', '-p 11242 -U 0', 11242, -1); +when('specifying udp and disabling tcp', '-p -1 -U 11252', 0, 11252); +when('specifying tcp and ephemeral udp', '-p 11262 -U -1', 11262, 0); +when('specifying udp and ephemeral tcp', '-p -1 -U 11272', 0, 11272); diff --git a/memcached/t/issue_68.t b/memcached/t/issue_68.t new file mode 100644 index 00000000..94e3e957 --- /dev/null +++ b/memcached/t/issue_68.t @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 996; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +for (my $keyi = 1; $keyi < 250; $keyi++) { + my $key = "x" x $keyi; + print $sock "set $key 0 0 1\r\n9\r\n"; + is (scalar <$sock>, "STORED\r\n", "stored $key"); + mem_get_is($sock, $key, "9"); + print $sock "incr $key 1\r\n"; + is (scalar <$sock>, "10\r\n", "incr $key to 10"); + mem_get_is($sock, $key, "10"); +} + diff --git a/memcached/t/issue_70.t b/memcached/t/issue_70.t new file mode 100644 index 00000000..95e39db1 --- /dev/null +++ b/memcached/t/issue_70.t @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 4; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +print $sock "set issue70 0 0 0\r\n\r\n"; +is (scalar <$sock>, "STORED\r\n", "stored issue70"); + +print $sock "set issue70 0 0 -1\r\n"; +is (scalar <$sock>, "CLIENT_ERROR bad command line format\r\n"); + +print $sock "set issue70 0 0 4294967295\r\n"; +is (scalar <$sock>, "CLIENT_ERROR bad command line format\r\n"); + +print $sock "set issue70 0 0 2147483647\r\nscoobyscoobydoo"; +is (scalar <$sock>, "CLIENT_ERROR bad command line format\r\n"); diff --git a/memcached/t/item_size_max.t b/memcached/t/item_size_max.t new file mode 100644 index 00000000..85460fad --- /dev/null +++ b/memcached/t/item_size_max.t @@ -0,0 +1,52 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 7; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + +my $stats = mem_stats($sock, ' settings'); + +# Ensure default still works. +is($stats->{item_size_max}, 1024 * 1024); +$server->stop(); + +# Should die. +eval { + $server = new_memcached('-I 1000'); +}; +ok($@ && $@ =~ m/^Failed/, "Shouldn't start with < 1k item max"); + +eval { + $server = new_memcached('-I 256m'); +}; +ok($@ && $@ =~ m/^Failed/, "Shouldn't start with > 128m item max"); + +# Minimum. +$server = new_memcached('-I 1024'); +my $stats = mem_stats($server->sock, ' settings'); +is($stats->{item_size_max}, 1024); +$server->stop(); + +# Reasonable but unreasonable. +$server = new_memcached('-I 1049600'); +my $stats = mem_stats($server->sock, ' settings'); +is($stats->{item_size_max}, 1049600); +$server->stop(); + +# Suffix kilobytes. +$server = new_memcached('-I 512k'); +my $stats = mem_stats($server->sock, ' settings'); +is($stats->{item_size_max}, 524288); +$server->stop(); + +# Suffix megabytes. +$server = new_memcached('-I 32m'); +my $stats = mem_stats($server->sock, ' settings'); +is($stats->{item_size_max}, 33554432); +$server->stop(); + diff --git a/memcached/t/lib/MemcachedTest.pm b/memcached/t/lib/MemcachedTest.pm new file mode 100644 index 00000000..88f37a2f --- /dev/null +++ b/memcached/t/lib/MemcachedTest.pm @@ -0,0 +1,268 @@ +package MemcachedTest; +use strict; +use IO::Socket::INET; +use IO::Socket::UNIX; +use Exporter 'import'; +use Carp qw(croak); +use vars qw(@EXPORT); + +# Instead of doing the substitution with Autoconf, we assume that +# cwd == builddir. +use Cwd; +my $builddir = getcwd; + + +@EXPORT = qw(new_memcached sleep mem_get_is mem_gets mem_gets_is mem_stats + supports_sasl free_port); + +sub sleep { + my $n = shift; + select undef, undef, undef, $n; +} + +sub mem_stats { + my ($sock, $type) = @_; + $type = $type ? " $type" : ""; + print $sock "stats$type\r\n"; + my $stats = {}; + while (<$sock>) { + last if /^(\.|END)/; + /^(STAT|ITEM) (\S+)\s+([^\r\n]+)/; + #print " slabs: $_"; + $stats->{$2} = $3; + } + return $stats; +} + +sub mem_get_is { + # works on single-line values only. no newlines in value. + my ($sock_opts, $key, $val, $msg) = @_; + my $opts = ref $sock_opts eq "HASH" ? $sock_opts : {}; + my $sock = ref $sock_opts eq "HASH" ? $opts->{sock} : $sock_opts; + + my $expect_flags = $opts->{flags} || 0; + my $dval = defined $val ? "'$val'" : ""; + $msg ||= "$key == $dval"; + + print $sock "get $key\r\n"; + if (! defined $val) { + my $line = scalar <$sock>; + if ($line =~ /^VALUE/) { + $line .= scalar(<$sock>) . scalar(<$sock>); + } + Test::More::is($line, "END\r\n", $msg); + } else { + my $len = length($val); + my $body = scalar(<$sock>); + my $expected = "VALUE $key $expect_flags $len\r\n$val\r\nEND\r\n"; + if (!$body || $body =~ /^END/) { + Test::More::is($body, $expected, $msg); + return; + } + $body .= scalar(<$sock>) . scalar(<$sock>); + Test::More::is($body, $expected, $msg); + } +} + +sub mem_gets { + # works on single-line values only. no newlines in value. + my ($sock_opts, $key) = @_; + my $opts = ref $sock_opts eq "HASH" ? $sock_opts : {}; + my $sock = ref $sock_opts eq "HASH" ? $opts->{sock} : $sock_opts; + my $val; + my $expect_flags = $opts->{flags} || 0; + + print $sock "gets $key\r\n"; + my $response = <$sock>; + if ($response =~ /^END/) { + return "NOT_FOUND"; + } + else + { + $response =~ /VALUE (.*) (\d+) (\d+) (\d+)/; + my $flags = $2; + my $len = $3; + my $identifier = $4; + read $sock, $val , $len; + # get the END + $_ = <$sock>; + $_ = <$sock>; + + return ($identifier,$val); + } + +} +sub mem_gets_is { + # works on single-line values only. no newlines in value. + my ($sock_opts, $identifier, $key, $val, $msg) = @_; + my $opts = ref $sock_opts eq "HASH" ? $sock_opts : {}; + my $sock = ref $sock_opts eq "HASH" ? $opts->{sock} : $sock_opts; + + my $expect_flags = $opts->{flags} || 0; + my $dval = defined $val ? "'$val'" : ""; + $msg ||= "$key == $dval"; + + print $sock "gets $key\r\n"; + if (! defined $val) { + my $line = scalar <$sock>; + if ($line =~ /^VALUE/) { + $line .= scalar(<$sock>) . scalar(<$sock>); + } + Test::More::is($line, "END\r\n", $msg); + } else { + my $len = length($val); + my $body = scalar(<$sock>); + my $expected = "VALUE $key $expect_flags $len $identifier\r\n$val\r\nEND\r\n"; + if (!$body || $body =~ /^END/) { + Test::More::is($body, $expected, $msg); + return; + } + $body .= scalar(<$sock>) . scalar(<$sock>); + Test::More::is($body, $expected, $msg); + } +} + +sub free_port { + my $type = shift || "tcp"; + my $sock; + my $port; + while (!$sock) { + $port = int(rand(20000)) + 30000; + $sock = IO::Socket::INET->new(LocalAddr => '127.0.0.1', + LocalPort => $port, + Proto => $type, + ReuseAddr => 1); + } + return $port; +} + +sub supports_udp { + my $output = `$builddir/memcached-debug -h`; + return 0 if $output =~ /^memcached 1\.1\./; + return 1; +} + +sub supports_sasl { + my $output = `$builddir/memcached-debug -h`; + return 1 if $output =~ /sasl/i; + return 0; +} + +sub new_memcached { + my ($args, $passed_port) = @_; + my $port = $passed_port || free_port(); + my $host = '127.0.0.1'; + + if ($ENV{T_MEMD_USE_DAEMON}) { + my ($host, $port) = ($ENV{T_MEMD_USE_DAEMON} =~ m/^([^:]+):(\d+)$/); + my $conn = IO::Socket::INET->new(PeerAddr => "$host:$port"); + if ($conn) { + return Memcached::Handle->new(conn => $conn, + host => $host, + port => $port); + } + croak("Failed to connect to specified memcached server.") unless $conn; + } + + my $udpport = free_port("udp"); + $args .= " -p $port"; + if (supports_udp()) { + $args .= " -U $udpport"; + } + if ($< == 0) { + $args .= " -u root"; + } + + my $childpid = fork(); + + my $exe = "$builddir/memcached-debug"; + croak("memcached binary doesn't exist. Haven't run 'make' ?\n") unless -e $exe; + croak("memcached binary not executable\n") unless -x _; + + unless ($childpid) { + exec "$builddir/timedrun 600 $exe $args"; + exit; # never gets here. + } + + # unix domain sockets + if ($args =~ /-s (\S+)/) { + sleep 1; + my $filename = $1; + my $conn = IO::Socket::UNIX->new(Peer => $filename) || + croak("Failed to connect to unix domain socket: $! '$filename'"); + + return Memcached::Handle->new(pid => $childpid, + conn => $conn, + domainsocket => $filename, + host => $host, + port => $port); + } + + # try to connect / find open port, only if we're not using unix domain + # sockets + + for (1..20) { + my $conn = IO::Socket::INET->new(PeerAddr => "127.0.0.1:$port"); + if ($conn) { + return Memcached::Handle->new(pid => $childpid, + conn => $conn, + udpport => $udpport, + host => $host, + port => $port); + } + select undef, undef, undef, 0.10; + } + croak("Failed to startup/connect to memcached server."); +} + +############################################################################ +package Memcached::Handle; +sub new { + my ($class, %params) = @_; + return bless \%params, $class; +} + +sub DESTROY { + my $self = shift; + kill 2, $self->{pid}; +} + +sub stop { + my $self = shift; + kill 15, $self->{pid}; +} + +sub host { $_[0]{host} } +sub port { $_[0]{port} } +sub udpport { $_[0]{udpport} } + +sub sock { + my $self = shift; + + if ($self->{conn} && ($self->{domainsocket} || getpeername($self->{conn}))) { + return $self->{conn}; + } + return $self->new_sock; +} + +sub new_sock { + my $self = shift; + if ($self->{domainsocket}) { + return IO::Socket::UNIX->new(Peer => $self->{domainsocket}); + } else { + return IO::Socket::INET->new(PeerAddr => "$self->{host}:$self->{port}"); + } +} + +sub new_udp_sock { + my $self = shift; + return IO::Socket::INET->new(PeerAddr => '127.0.0.1', + PeerPort => $self->{udpport}, + Proto => 'udp', + LocalAddr => '127.0.0.1', + LocalPort => MemcachedTest::free_port('udp'), + ); + +} + +1; diff --git a/memcached/t/line-lengths.t b/memcached/t/line-lengths.t new file mode 100755 index 00000000..829f4d0b --- /dev/null +++ b/memcached/t/line-lengths.t @@ -0,0 +1,25 @@ +#!/usr/bin/perl +use strict; +use FindBin qw($Bin); +our @files; + +BEGIN { + chdir "$Bin/.." or die; + @files = ( "doc/protocol.txt" ); +} + +use Test::More tests => scalar(@files); + +foreach my $f (@files) { + open(my $fh, $f) or die("Can't open $f"); + my @long_lines = (); + my $line_number = 0; + while(<$fh>) { + $line_number++; + if(length($_) > 80) { + push(@long_lines, $line_number); + } + } + close($fh); + ok(@long_lines == 0, "$f has a long lines: @long_lines"); +} diff --git a/memcached/t/lru.t b/memcached/t/lru.t new file mode 100755 index 00000000..86a8d1e5 --- /dev/null +++ b/memcached/t/lru.t @@ -0,0 +1,58 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 149; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +# assuming max slab is 1M and default mem is 64M +my $server = new_memcached(); +my $sock = $server->sock; + +# create a big value for the largest slab +my $max = 1024 * 1024; +my $big = 'x' x (1024 * 1024 - 250); + +ok(length($big) > 512 * 1024); +ok(length($big) < 1024 * 1024); + +# test that an even bigger value is rejected while we're here +my $too_big = $big . $big . $big; +my $len = length($too_big); +print $sock "set too_big 0 0 $len\r\n$too_big\r\n"; +is(scalar <$sock>, "SERVER_ERROR object too large for cache\r\n", "too_big not stored"); + +# set the big value +my $len = length($big); +print $sock "set big 0 0 $len\r\n$big\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored big"); +mem_get_is($sock, "big", $big); + +# no evictions yet +my $stats = mem_stats($sock); +is($stats->{"evictions"}, "0", "no evictions to start"); + +# set many big items, enough to get evictions +for (my $i = 0; $i < 100; $i++) { + print $sock "set item_$i 0 0 $len\r\n$big\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored item_$i"); +} + +# some evictions should have happened +my $stats = mem_stats($sock); +my $evictions = int($stats->{"evictions"}); +ok($evictions == 37, "some evictions happened"); + +# the first big value should be gone +mem_get_is($sock, "big", undef); + +# the earliest items should be gone too +for (my $i = 0; $i < $evictions - 1; $i++) { + mem_get_is($sock, "item_$i", undef); +} + +# check that the non-evicted are the right ones +for (my $i = $evictions - 1; $i < $evictions + 4; $i++) { + mem_get_is($sock, "item_$i", $big); +} diff --git a/memcached/t/maxconns.t b/memcached/t/maxconns.t new file mode 100755 index 00000000..14b5eae9 --- /dev/null +++ b/memcached/t/maxconns.t @@ -0,0 +1,28 @@ +#!/usr/bin/perl +# NOTE: This test never worked. Memcached would ignore maxconns requests lower +# than the current ulimit. Test needs to be updated. + +use strict; +use warnings; + +use Test::More tests => 11; + +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + + +# start up a server with 10 maximum connections +my $server = new_memcached('-c 100'); +my $sock = $server->sock; +my @sockets; + +ok(defined($sock), 'Connection 0'); +push (@sockets, $sock); + + +foreach my $conn (1..10) { + $sock = $server->new_sock; + ok(defined($sock), "Made connection $conn"); + push(@sockets, $sock); +} diff --git a/memcached/t/multiversioning.t b/memcached/t/multiversioning.t new file mode 100755 index 00000000..df9eab48 --- /dev/null +++ b/memcached/t/multiversioning.t @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 13; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $sock2 = $server->new_sock; + +ok($sock != $sock2, "have two different connections open"); + +# set large value +my $size = 256 * 1024; # 256 kB +my $bigval = "0123456789abcdef" x ($size / 16); +$bigval =~ s/^0/\[/; $bigval =~ s/f$/\]/; +my $bigval2 = uc($bigval); + +print $sock "set big 0 0 $size\r\n$bigval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "big", $bigval, "big value got correctly"); + +print $sock "get big\r\n"; +my $buf; +is(read($sock, $buf, $size / 2), $size / 2, "read half the answer back"); +like($buf, qr/VALUE big/, "buf has big value header in it"); +like($buf, qr/abcdef/, "buf has some data in it"); +unlike($buf, qr/abcde\]/, "buf doesn't yet close"); + +# sock2 interrupts (maybe sock1 is slow) and deletes stuff: +print $sock2 "delete big\r\n"; +is(scalar <$sock2>, "DELETED\r\n", "deleted big from sock2 while sock1's still reading it"); +mem_get_is($sock2, "big", undef, "nothing from sock2 now. gone from namespace."); +print $sock2 "set big 0 0 $size\r\n$bigval2\r\n"; +is(scalar <$sock2>, "STORED\r\n", "stored big w/ val2"); +mem_get_is($sock2, "big", $bigval2, "big value2 got correctly"); + +# sock1 resumes reading... +$buf .= <$sock>; +$buf .= <$sock>; +like($buf, qr/abcde\]/, "buf now closes"); + +# and if sock1 reads again, it's the uppercase version: +mem_get_is($sock, "big", $bigval2, "big value2 got correctly from sock1"); diff --git a/memcached/t/noreply.t b/memcached/t/noreply.t new file mode 100644 index 00000000..54a3f13d --- /dev/null +++ b/memcached/t/noreply.t @@ -0,0 +1,47 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 9; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + + +my $server = new_memcached(); +my $sock = $server->sock; + + +# Test that commands can take 'noreply' parameter. +print $sock "flush_all noreply\r\n"; +print $sock "flush_all 0 noreply\r\n"; + +print $sock "verbosity 0 noreply\r\n"; + +print $sock "add noreply:foo 0 0 1 noreply\r\n1\r\n"; +mem_get_is($sock, "noreply:foo", "1"); + +print $sock "set noreply:foo 0 0 1 noreply\r\n2\r\n"; +mem_get_is($sock, "noreply:foo", "2"); + +print $sock "replace noreply:foo 0 0 1 noreply\r\n3\r\n"; +mem_get_is($sock, "noreply:foo", "3"); + +print $sock "append noreply:foo 0 0 1 noreply\r\n4\r\n"; +mem_get_is($sock, "noreply:foo", "34"); + +print $sock "prepend noreply:foo 0 0 1 noreply\r\n5\r\n"; +my @result = mem_gets($sock, "noreply:foo"); +ok($result[1] eq "534"); + +print $sock "cas noreply:foo 0 0 1 $result[0] noreply\r\n6\r\n"; +mem_get_is($sock, "noreply:foo", "6"); + +print $sock "incr noreply:foo 3 noreply\r\n"; +mem_get_is($sock, "noreply:foo", "9"); + +print $sock "decr noreply:foo 2 noreply\r\n"; +mem_get_is($sock, "noreply:foo", "7"); + +print $sock "delete noreply:foo noreply\r\n"; +mem_get_is($sock, "noreply:foo"); + diff --git a/memcached/t/sasl/memcached.conf b/memcached/t/sasl/memcached.conf new file mode 100644 index 00000000..aacc5103 --- /dev/null +++ b/memcached/t/sasl/memcached.conf @@ -0,0 +1,3 @@ +mech_list: plain cram-md5 +log_level: 5 +sasldb_path: /tmp/test-memcached.sasldb diff --git a/memcached/t/slabs_reassign.t b/memcached/t/slabs_reassign.t new file mode 100644 index 00000000..cf4b6f8d --- /dev/null +++ b/memcached/t/slabs_reassign.t @@ -0,0 +1,71 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Test::More tests => 131; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +# Enable manual slab reassign, cap at 6 slabs +my $server = new_memcached('-o slab_reassign -m 4'); +my $stats = mem_stats($server->sock, ' settings'); +is($stats->{slab_reassign}, "yes"); + +my $sock = $server->sock; + +# Fill a largeish slab until it evicts (honors the -m 6) +my $bigdata = 'x' x 70000; # slab 31 +for (1 .. 60) { + print $sock "set bfoo$_ 0 0 70000\r\n", $bigdata, "\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored key"); +} + +# Fill a smaller slab until it evicts +my $smalldata = 'y' x 20000; # slab 25 +for (1 .. 60) { + print $sock "set sfoo$_ 0 0 20000\r\n", $smalldata, "\r\n"; + is(scalar <$sock>, "STORED\r\n", "stored key"); +} + +my $items_before = mem_stats($sock, "items"); +isnt($items_before->{"items:31:evicted"}, 0, "slab 31 evicted is nonzero"); +isnt($items_before->{"items:25:evicted"}, 0, "slab 25 evicted is nonzero"); + +my $slabs_before = mem_stats($sock, "slabs"); +# Move a large slab to the smaller slab +print $sock "slabs reassign 31 25\r\n"; +is(scalar <$sock>, "OK\r\n", "slab rebalancer started"); + +# Still working out how/if to signal the thread. For now, just sleep. +sleep 2; + +# Check that stats counters increased +my $slabs_after = mem_stats($sock, "slabs"); +$stats = mem_stats($sock); + +isnt($stats->{slabs_moved}, 0, "slabs moved is nonzero"); + +# Check that slab stats reflect the change +ok($slabs_before->{"31:total_pages"} != $slabs_after->{"31:total_pages"}, + "slab 31 pagecount changed"); +ok($slabs_before->{"25:total_pages"} != $slabs_after->{"25:total_pages"}, + "slab 25 pagecount changed"); + +# Try to move another slab, see that it complains +print $sock "slabs reassign 31 25\r\n"; +like(scalar <$sock>, qr/^NOTFULL/, "Cannot re-run against class with empty space"); + +# Try to move a page backwards. Should complain that source class isn't "safe" +# to move from. +print $sock "slabs reassign 25 31\r\n"; +like(scalar <$sock>, qr/^UNSAFE/, "Cannot move an unsafe slab back"); + +# Try to insert items into both slabs +print $sock "set bfoo51 0 0 70000\r\n", $bigdata, "\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored key"); + +print $sock "set sfoo51 0 0 20000\r\n", $smalldata, "\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored key"); + +# Do need to come up with better automated tests for this. diff --git a/memcached/t/stats-detail.t b/memcached/t/stats-detail.t new file mode 100644 index 00000000..b7099a24 --- /dev/null +++ b/memcached/t/stats-detail.t @@ -0,0 +1,63 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 24; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; +my $expire; + +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "END\r\n", "verified empty stats at start"); + +print $sock "stats detail on\r\n"; +is(scalar <$sock>, "OK\r\n", "detail collection turned on"); + +print $sock "set foo:123 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); + +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 0 hit 0 set 1 del 0\r\n", "details after set"); +is(scalar <$sock>, "END\r\n", "end of details"); + +mem_get_is($sock, "foo:123", "fooval"); +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 1 hit 1 set 1 del 0\r\n", "details after get with hit"); +is(scalar <$sock>, "END\r\n", "end of details"); + +mem_get_is($sock, "foo:124", undef); + +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 2 hit 1 set 1 del 0\r\n", "details after get without hit"); +is(scalar <$sock>, "END\r\n", "end of details"); + +print $sock "delete foo:125\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "sent delete command"); + +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 2 hit 1 set 1 del 1\r\n", "details after delete"); +is(scalar <$sock>, "END\r\n", "end of details"); + +print $sock "stats reset\r\n"; +is(scalar <$sock>, "RESET\r\n", "stats cleared"); + +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "END\r\n", "empty stats after clear"); + +mem_get_is($sock, "foo:123", "fooval"); +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 1 hit 1 set 0 del 0\r\n", "details after clear and get"); +is(scalar <$sock>, "END\r\n", "end of details"); + +print $sock "stats detail off\r\n"; +is(scalar <$sock>, "OK\r\n", "detail collection turned off"); + +mem_get_is($sock, "foo:124", undef); + +mem_get_is($sock, "foo:123", "fooval"); +print $sock "stats detail dump\r\n"; +is(scalar <$sock>, "PREFIX foo get 1 hit 1 set 0 del 0\r\n", "details after stats turned off"); +is(scalar <$sock>, "END\r\n", "end of details"); diff --git a/memcached/t/stats.t b/memcached/t/stats.t new file mode 100755 index 00000000..97a5b7ca --- /dev/null +++ b/memcached/t/stats.t @@ -0,0 +1,196 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 95; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $server = new_memcached(); +my $sock = $server->sock; + + +## Output looks like this: +## +## STAT pid 22969 +## STAT uptime 13 +## STAT time 1259170891 +## STAT version 1.4.3 +## STAT libevent 1.4.13-stable. +## STAT pointer_size 32 +## STAT rusage_user 0.001198 +## STAT rusage_system 0.003523 +## STAT curr_connections 10 +## STAT total_connections 11 +## STAT connection_structures 11 +## STAT cmd_get 0 +## STAT cmd_set 0 +## STAT cmd_flush 0 +## STAT get_hits 0 +## STAT get_misses 0 +## STAT delete_misses 0 +## STAT delete_hits 0 +## STAT incr_misses 0 +## STAT incr_hits 0 +## STAT decr_misses 0 +## STAT decr_hits 0 +## STAT cas_misses 0 +## STAT cas_hits 0 +## STAT cas_badval 0 +## STAT auth_cmds 0 +## STAT auth_unknowns 0 +## STAT bytes_read 7 +## STAT bytes_written 0 +## STAT limit_maxbytes 67108864 +## STAT accepting_conns 1 +## STAT listen_disabled_num 0 +## STAT threads 4 +## STAT conn_yields 0 +## STAT bytes 0 +## STAT curr_items 0 +## STAT total_items 0 +## STAT evictions 0 +## STAT reclaimed 0 + +# note that auth stats are tested in auth specfic tests + + +my $stats = mem_stats($sock); + +# Test number of keys +is(scalar(keys(%$stats)), 48, "48 stats values"); + +# Test initial state +foreach my $key (qw(curr_items total_items bytes cmd_get cmd_set get_hits evictions get_misses + bytes_written delete_hits delete_misses incr_hits incr_misses decr_hits + decr_misses listen_disabled_num)) { + is($stats->{$key}, 0, "initial $key is zero"); +} +is($stats->{accepting_conns}, 1, "initial accepting_conns is one"); + +# Do some operations + +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +my $stats = mem_stats($sock); + +foreach my $key (qw(total_items curr_items cmd_get cmd_set get_hits)) { + is($stats->{$key}, 1, "after one set/one get $key is 1"); +} + +my $cache_dump = mem_stats($sock, " cachedump 1 100"); +ok(defined $cache_dump->{'foo'}, "got foo from cachedump"); + +print $sock "delete foo\r\n"; +is(scalar <$sock>, "DELETED\r\n", "deleted foo"); + +my $stats = mem_stats($sock); +is($stats->{delete_hits}, 1); +is($stats->{delete_misses}, 0); + +print $sock "delete foo\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "shouldn't delete foo again"); + +my $stats = mem_stats($sock); +is($stats->{delete_hits}, 1); +is($stats->{delete_misses}, 1); + +# incr stats + +sub check_incr_stats { + my ($ih, $im, $dh, $dm) = @_; + my $stats = mem_stats($sock); + + is($stats->{incr_hits}, $ih); + is($stats->{incr_misses}, $im); + is($stats->{decr_hits}, $dh); + is($stats->{decr_misses}, $dm); +} + +print $sock "incr i 1\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "shouldn't incr a missing thing"); +check_incr_stats(0, 1, 0, 0); + +print $sock "decr d 1\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "shouldn't decr a missing thing"); +check_incr_stats(0, 1, 0, 1); + +print $sock "set n 0 0 1\r\n0\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored n"); + +print $sock "incr n 3\r\n"; +is(scalar <$sock>, "3\r\n", "incr works"); +check_incr_stats(1, 1, 0, 1); + +print $sock "decr n 1\r\n"; +is(scalar <$sock>, "2\r\n", "decr works"); +check_incr_stats(1, 1, 1, 1); + +# cas stats + +sub check_cas_stats { + my ($ch, $cm, $cb) = @_; + my $stats = mem_stats($sock); + + is($stats->{cas_hits}, $ch); + is($stats->{cas_misses}, $cm); + is($stats->{cas_badval}, $cb); +} + +check_cas_stats(0, 0, 0); + +print $sock "cas c 0 0 1 99999999\r\nz\r\n"; +is(scalar <$sock>, "NOT_FOUND\r\n", "missed cas"); +check_cas_stats(0, 1, 0); + +print $sock "set c 0 0 1\r\nx\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored c"); +my ($id, $v) = mem_gets($sock, 'c'); +is('x', $v, 'got the expected value'); + +print $sock "cas c 0 0 1 99999999\r\nz\r\n"; +is(scalar <$sock>, "EXISTS\r\n", "missed cas"); +check_cas_stats(0, 1, 1); +my ($newid, $v) = mem_gets($sock, 'c'); +is('x', $v, 'got the expected value'); + +print $sock "cas c 0 0 1 $id\r\nz\r\n"; +is(scalar <$sock>, "STORED\r\n", "good cas"); +check_cas_stats(1, 1, 1); +my ($newid, $v) = mem_gets($sock, 'c'); +is('z', $v, 'got the expected value'); + +my $settings = mem_stats($sock, ' settings'); +is(1024, $settings->{'maxconns'}); +is('NULL', $settings->{'domain_socket'}); +is('on', $settings->{'evictions'}); +is('yes', $settings->{'cas_enabled'}); +is('no', $settings->{'auth_enabled_sasl'}); + +print $sock "stats reset\r\n"; +is(scalar <$sock>, "RESET\r\n", "good stats reset"); + +my $stats = mem_stats($sock); +is(0, $stats->{'cmd_get'}); +is(0, $stats->{'cmd_set'}); +is(0, $stats->{'get_hits'}); +is(0, $stats->{'get_misses'}); +is(0, $stats->{'delete_misses'}); +is(0, $stats->{'delete_hits'}); +is(0, $stats->{'incr_misses'}); +is(0, $stats->{'incr_hits'}); +is(0, $stats->{'decr_misses'}); +is(0, $stats->{'decr_hits'}); +is(0, $stats->{'cas_misses'}); +is(0, $stats->{'cas_hits'}); +is(0, $stats->{'cas_badval'}); +is(0, $stats->{'evictions'}); +is(0, $stats->{'reclaimed'}); + +print $sock "flush_all\r\n"; +is(scalar <$sock>, "OK\r\n", "flushed"); + +my $stats = mem_stats($sock); +is($stats->{cmd_flush}, 1, "after one flush cmd_flush is 1"); diff --git a/memcached/t/stress-memcached.pl b/memcached/t/stress-memcached.pl new file mode 100755 index 00000000..aeacbc2c --- /dev/null +++ b/memcached/t/stress-memcached.pl @@ -0,0 +1,101 @@ +#!/usr/bin/perl +# + +use strict; +use lib '../../api/perl/lib'; +use Cache::Memcached; +use Time::HiRes qw(time); + +unless (@ARGV == 2) { + die "Usage: stress-memcached.pl ip:port threads\n"; +} + +my $host = shift; +my $threads = shift; + +my $memc = new Cache::Memcached; +$memc->set_servers([$host]); + +unless ($memc->set("foo", "bar") && + $memc->get("foo") eq "bar") { + die "memcached not running at $host ?\n"; +} +$memc->disconnect_all(); + + +my $running = 0; +while (1) { + if ($running < $threads) { + my $cpid = fork(); + if ($cpid) { + $running++; + #print "Launched $cpid. Running $running threads.\n"; + } else { + stress(); + exit 0; + } + } else { + wait(); + $running--; + } +} + +sub stress { + undef $memc; + $memc = new Cache::Memcached; + $memc->set_servers([$host]); + + my ($t1, $t2); + my $start = sub { $t1 = time(); }; + my $stop = sub { + my $op = shift; + $t2 = time(); + my $td = sprintf("%0.3f", $t2 - $t1); + if ($td > 0.25) { print "Took $td seconds for: $op\n"; } + }; + + my $max = rand(50); + my $sets = 0; + + for (my $i = 0; $i < $max; $i++) { + my $key = key($i); + my $set = $memc->set($key, $key); + $sets++ if $set; + } + + for (1..int(rand(500))) { + my $rand = int(rand($max)); + my $key = key($rand); + my $meth = int(rand(3)); + my $exp = int(rand(3)); + undef $exp unless $exp; + $start->(); + if ($meth == 0) { + $memc->add($key, $key, $exp); + $stop->("add"); + } elsif ($meth == 1) { + $memc->delete($key); + $stop->("delete"); + } else { + $memc->set($key, $key, $exp); + $stop->("set"); + } + $rand = int(rand($max)); + $key = key($rand); + $start->(); + my $v = $memc->get($key); + $stop->("get"); + if ($v && $v ne $key) { die "Bogus: $v for key $rand\n"; } + } + + $start->(); + my $multi = $memc->get_multi(map { key(int(rand($max))) } (1..$max)); + $stop->("get_multi"); +} + +sub key { + my $n = shift; + $_ = sprintf("%04d", $n); + if ($n % 2) { $_ .= "a"x20; } + $_; +} diff --git a/memcached/t/touch.t b/memcached/t/touch.t new file mode 100755 index 00000000..dd2eba15 --- /dev/null +++ b/memcached/t/touch.t @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 4; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + + +my $server = new_memcached(); +my $sock = $server->sock; + +# set foo (and should get it) +print $sock "set foo 0 2 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +# touch it +print $sock "touch foo 10\r\n"; +is(scalar <$sock>, "TOUCHED\r\n", "touched foo"); + +sleep 2; +mem_get_is($sock, "foo", "fooval"); diff --git a/memcached/t/udp.t b/memcached/t/udp.t new file mode 100755 index 00000000..d79b07a7 --- /dev/null +++ b/memcached/t/udp.t @@ -0,0 +1,260 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 48; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +use constant IS_ASCII => 0; +use constant IS_BINARY => 1; +use constant ENTRY_EXISTS => 0; +use constant ENTRY_MISSING => 1; +use constant BIN_REQ_MAGIC => 0x80; +use constant BIN_RES_MAGIC => 0x81; +use constant CMD_GET => 0x00; +use constant CMD_SET => 0x01; +use constant CMD_ADD => 0x02; +use constant CMD_REPLACE => 0x03; +use constant CMD_DELETE => 0x04; +use constant CMD_INCR => 0x05; +use constant CMD_DECR => 0x06; +use constant CMD_APPEND => 0x0E; +use constant CMD_PREPEND => 0x0F; +use constant REQ_PKT_FMT => "CCnCCnNNNN"; +use constant RES_PKT_FMT => "CCnCCnNNNN"; +use constant INCRDECR_PKT_FMT => "NNNNN"; +use constant MIN_RECV_BYTES => length(pack(RES_PKT_FMT)); + + +my $server = new_memcached(); +my $sock = $server->sock; + +# set foo (and should get it) +print $sock "set foo 0 0 6\r\nfooval\r\n"; +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +my $usock = $server->new_udp_sock + or die "Can't bind : $@\n"; + +# testing sequence of request ids +for my $offt (1, 1, 2) { + my $req = 160 + $offt; + my $res = send_udp_request($usock, $req, "get foo\r\n"); + ok($res, "got result"); + is(keys %$res, 1, "one key (one packet)"); + ok($res->{0}, "only got seq number 0"); + is(substr($res->{0}, 8), "VALUE foo 0 6\r\nfooval\r\nEND\r\n"); + is(hexify(substr($res->{0}, 0, 2)), hexify(pack("n", $req)), "udp request number in response ($req) is correct"); +} + +# op tests +for my $prot (::IS_ASCII,::IS_BINARY) { + udp_set_test($prot,45,"aval$prot","1",0,0); + udp_set_test($prot,45,"bval$prot","abcd" x 1024,0,0); + udp_get_test($prot,45,"aval$prot","1",::ENTRY_EXISTS); + udp_get_test($prot,45,"404$prot","1",::ENTRY_MISSING); + udp_incr_decr_test($prot,45,"aval$prot","1","incr",1); + udp_incr_decr_test($prot,45,"aval$prot","1","decr",2); + udp_delete_test($prot,45,"aval$prot"); +} + +sub udp_set_test { + my ($protocol, $req_id, $key, $value, $flags, $exp) = @_; + my $req = ""; + my $val_len = length($value); + + if ($protocol == ::IS_ASCII) { + $req = "set $key $flags $exp $val_len\r\n$value\r\n"; + } elsif ($protocol == ::IS_BINARY) { + my $key_len = length($key); + my $extra = pack "NN",$flags,$exp; + my $extra_len = length($extra); + my $total_len = $val_len + $extra_len + $key_len; + $req = pack(::REQ_PKT_FMT, ::BIN_REQ_MAGIC, ::CMD_SET, $key_len, $extra_len, 0, 0, $total_len, 0, 0, 0); + $req .= $extra . $key . $value; + } + + my $datagrams = send_udp_request($usock, $req_id, $req); + my $resp = construct_udp_message($datagrams); + + if ($protocol == ::IS_ASCII) { + is($resp,"STORED\r\n","Store key $key using ASCII protocol"); + } elsif ($protocol == ::IS_BINARY) { + my ($resp_magic, $resp_op_code, $resp_key_len, $resp_extra_len, $resp_data_type, $resp_status, $resp_total_len, + $resp_opaque, $resp_ident_hi, $resp_ident_lo) = unpack(::RES_PKT_FMT, $resp); + is($resp_status,"0","Store key $key using binary protocol"); + } +} + +sub udp_get_test { + my ($protocol, $req_id, $key, $value, $exists) = @_; + my $key_len = length($key); + my $value_len = length($value); + my $req = ""; + + if ($protocol == ::IS_ASCII) { + $req = "get $key\r\n"; + } elsif ($protocol == ::IS_BINARY) { + $req = pack(::REQ_PKT_FMT, ::BIN_REQ_MAGIC, ::CMD_GET, $key_len, 0, 0, 0, $key_len, 0, 0, 0); + $req .= $key; + } + + my $datagrams = send_udp_request($usock, $req_id, $req); + my $resp = construct_udp_message($datagrams); + + if ($protocol == ::IS_ASCII) { + if ($exists == ::ENTRY_EXISTS) { + is($resp,"VALUE $key 0 $value_len\r\n$value\r\nEND\r\n","Retrieve entry with key $key using ASCII protocol"); + } else { + is($resp,"END\r\n","Retrieve non existing entry with key $key using ASCII protocol"); + } + } elsif ($protocol == ::IS_BINARY) { + my ($resp_magic, $resp_op_code, $resp_key_len, $resp_extra_len, $resp_data_type, $resp_status, $resp_total_len, + $resp_opaque, $resp_ident_hi, $resp_ident_lo) = unpack(::RES_PKT_FMT, $resp); + if ($exists == ::ENTRY_EXISTS) { + is($resp_status,"0","Retrieve entry with key $key using binary protocol"); + is(substr($resp,::MIN_RECV_BYTES + $resp_extra_len + $resp_key_len, $value_len),$value,"Value for key $key retrieved with binary protocol matches"); + } else { + is($resp_status,"1","Retrieve non existing entry with key $key using binary protocol"); + } + } +} + +sub udp_delete_test { + my ($protocol, $req_id, $key) = @_; + my $req = ""; + my $key_len = length($key); + + if ($protocol == ::IS_ASCII) { + $req = "delete $key\r\n"; + } elsif ($protocol == ::IS_BINARY) { + $req = pack(::REQ_PKT_FMT, ::BIN_REQ_MAGIC, ::CMD_DELETE, $key_len, 0, 0, 0, $key_len, 0, 0, 0); + $req .= $key; + } + + my $datagrams = send_udp_request($usock, $req_id, $req); + my $resp = construct_udp_message($datagrams); + + if ($protocol == ::IS_ASCII) { + is($resp,"DELETED\r\n","Delete key $key using ASCII protocol"); + } elsif ($protocol == ::IS_BINARY) { + my ($resp_magic, $resp_op_code, $resp_key_len, $resp_extra_len, $resp_data_type, $resp_status, $resp_total_len, + $resp_opaque, $resp_ident_hi, $resp_ident_lo) = unpack(::RES_PKT_FMT, $resp); + is($resp_status,"0","Delete key $key using binary protocol"); + } +} + +sub udp_incr_decr_test { + my ($protocol, $req_id, $key, $val, $optype, $init_val) = @_; + my $req = ""; + my $key_len = length($key); + my $expected_value = 0; + my $acmd = "incr"; + my $bcmd = ::CMD_INCR; + if ($optype eq "incr") { + $expected_value = $init_val + $val; + } else { + $acmd = "decr"; + $bcmd = ::CMD_DECR; + $expected_value = $init_val - $val; + } + + if ($protocol == ::IS_ASCII) { + $req = "$acmd $key $val\r\n"; + } elsif ($protocol == ::IS_BINARY) { + my $extra = pack(::INCRDECR_PKT_FMT, ($val / 2 ** 32),($val % 2 ** 32), 0, 0, 0); + my $extra_len = length($extra); + $req = pack(::REQ_PKT_FMT, ::BIN_REQ_MAGIC, $bcmd, $key_len, $extra_len, 0, 0, $key_len + $extra_len, 0, 0, 0); + $req .= $extra . $key; + } + + my $datagrams = send_udp_request($usock, $req_id, $req); + my $resp = construct_udp_message($datagrams); + + if ($protocol == ::IS_ASCII) { + is($resp,"$expected_value\r\n","perform $acmd math operation on key $key with ASCII protocol"); + } elsif ($protocol == ::IS_BINARY) { + my ($resp_magic, $resp_op_code, $resp_key_len, $resp_extra_len, $resp_data_type, $resp_status, $resp_total_len, + $resp_opaque, $resp_ident_hi, $resp_ident_lo) = unpack(::RES_PKT_FMT, $resp); + is($resp_status,"0","perform $acmd math operation on key $key with binary protocol"); + my ($resp_hi,$resp_lo) = unpack("NN",substr($resp,::MIN_RECV_BYTES + $resp_extra_len + $resp_key_len, + $resp_total_len - $resp_extra_len - $resp_key_len)); + is(($resp_hi * 2 ** 32) + $resp_lo,$expected_value,"validate result of binary protocol math operation $acmd . Expected value $expected_value") + } +} + +sub construct_udp_message { + my $datagrams = shift; + my $num_datagram = keys (%$datagrams); + my $msg = ""; + my $cur_dg =""; + my $cur_udp_header =""; + for (my $cur_dg_index = 0; $cur_dg_index < $num_datagram; $cur_dg_index++) { + $cur_dg = $datagrams->{$cur_dg_index}; + isnt($cur_dg,"","missing datagram for segment $cur_dg_index"); + $cur_udp_header=substr($cur_dg, 0, 8); + $msg .= substr($cur_dg,8); + } + return $msg; +} + +sub hexify { + my $val = shift; + $val =~ s/(.)/sprintf("%02x", ord($1))/egs; + return $val; +} + +# returns undef on select timeout, or hashref of "seqnum" -> payload (including headers) +# verifies that resp_id is equal to id sent in request +# ensures consistency in num packets that make up response +sub send_udp_request { + my ($sock, $reqid, $req) = @_; + + my $pkt = pack("nnnn", $reqid, 0, 1, 0); # request id (opaque), seq num, #packets, reserved (must be 0) + $pkt .= $req; + my $fail = sub { + my $msg = shift; + warn " FAILING send_udp because: $msg\n"; + return undef; + }; + return $fail->("send") unless send($sock, $pkt, 0); + + my $ret = {}; + + my $got = 0; # packets got + my $numpkts = undef; + + while (!defined($numpkts) || $got < $numpkts) { + my $rin = ''; + vec($rin, fileno($sock), 1) = 1; + my $rout; + return $fail->("timeout after $got packets") unless + select($rout = $rin, undef, undef, 1.5); + + my $res; + my $sender = $sock->recv($res, 1500, 0); + my ($resid, $seq, $this_numpkts, $resv) = unpack("nnnn", substr($res, 0, 8)); + die "Response ID of $resid doesn't match request if of $reqid" unless $resid == $reqid; + die "Reserved area not zero" unless $resv == 0; + die "num packets changed midstream!" if defined $numpkts && $this_numpkts != $numpkts; + $numpkts = $this_numpkts; + $ret->{$seq} = $res; + $got++; + } + return $ret; +} + + +__END__ + $sender = recv($usock, $ans, 1050, 0); + +__END__ + $usock->send + + + ($hispaddr = recv(SOCKET, $rtime, 4, 0)) || die "recv: $!"; +($port, $hisiaddr) = sockaddr_in($hispaddr); +$host = gethostbyaddr($hisiaddr, AF_INET); +$histime = unpack("N", $rtime) - $SECS_of_70_YEARS ; diff --git a/memcached/t/unixsocket.t b/memcached/t/unixsocket.t new file mode 100755 index 00000000..492215ed --- /dev/null +++ b/memcached/t/unixsocket.t @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use Test::More tests => 3; +use FindBin qw($Bin); +use lib "$Bin/lib"; +use MemcachedTest; + +my $filename = "/tmp/memcachetest$$"; + +my $server = new_memcached("-s $filename"); +my $sock = $server->sock; + +ok(-S $filename, "creating unix domain socket $filename"); + +# set foo (and should get it) +print $sock "set foo 0 0 6\r\nfooval\r\n"; + +is(scalar <$sock>, "STORED\r\n", "stored foo"); +mem_get_is($sock, "foo", "fooval"); + +unlink($filename); + +## Just some basic stuff for now... diff --git a/memcached/t/whitespace.t b/memcached/t/whitespace.t new file mode 100755 index 00000000..22c610cb --- /dev/null +++ b/memcached/t/whitespace.t @@ -0,0 +1,40 @@ +#!/usr/bin/perl +use strict; +use FindBin qw($Bin); +our @files; + +BEGIN { + chdir "$Bin/.." or die; + + my @exempted = qw(Makefile.am ChangeLog doc/Makefile.am); + push(@exempted, glob("doc/*.xml")); + push(@exempted, glob("doc/xml2rfc/*.xsl")); + push(@exempted, glob("m4/*backport*m4")); + my %exempted_hash = map { $_ => 1 } @exempted; + + my @stuff = split /\0/, `git ls-files -z -c -m -o --exclude-standard`; + @files = grep { ! $exempted_hash{$_} } @stuff; + + # We won't find any files if git isn't installed. If git isn't + # installed, they're probably not doing any useful development, or + # at the very least am will clean up whitespace when we receive + # their patch. + unless (@files) { + use Test::More; + plan skip_all => "Skipping tests probably because you don't have git."; + exit 0; + } +} + +use Test::More tests => scalar(@files); + +foreach my $f (@files) { + open(my $fh, $f) or die; + my $before = do { local $/; <$fh>; }; + close ($fh); + my $after = $before; + $after =~ s/\t/ /g; + $after =~ s/ +$//mg; + $after .= "\n" unless $after =~ /\n$/; + ok ($after eq $before, "$f (see devtools/clean-whitespace.pl)"); +} diff --git a/memcached/testapp.c b/memcached/testapp.c new file mode 100644 index 00000000..aba0b905 --- /dev/null +++ b/memcached/testapp.c @@ -0,0 +1,1938 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "cache.h" +#include "util.h" +#include "protocol_binary.h" + +#define TMP_TEMPLATE "/tmp/test_file.XXXXXXX" + +enum test_return { TEST_SKIP, TEST_PASS, TEST_FAIL }; + +static pid_t server_pid; +static in_port_t port; +static int sock; +static bool allow_closed_read = false; + +static enum test_return cache_create_test(void) +{ + cache_t *cache = cache_create("test", sizeof(uint32_t), sizeof(char*), + NULL, NULL); + assert(cache != NULL); + cache_destroy(cache); + return TEST_PASS; +} + +const uint64_t constructor_pattern = 0xdeadcafebabebeef; + +static int cache_constructor(void *buffer, void *notused1, int notused2) { + uint64_t *ptr = buffer; + *ptr = constructor_pattern; + return 0; +} + +static enum test_return cache_constructor_test(void) +{ + cache_t *cache = cache_create("test", sizeof(uint64_t), sizeof(uint64_t), + cache_constructor, NULL); + assert(cache != NULL); + uint64_t *ptr = cache_alloc(cache); + uint64_t pattern = *ptr; + cache_free(cache, ptr); + cache_destroy(cache); + return (pattern == constructor_pattern) ? TEST_PASS : TEST_FAIL; +} + +static int cache_fail_constructor(void *buffer, void *notused1, int notused2) { + return 1; +} + +static enum test_return cache_fail_constructor_test(void) +{ + enum test_return ret = TEST_PASS; + + cache_t *cache = cache_create("test", sizeof(uint64_t), sizeof(uint64_t), + cache_fail_constructor, NULL); + assert(cache != NULL); + uint64_t *ptr = cache_alloc(cache); + if (ptr != NULL) { + ret = TEST_FAIL; + } + cache_destroy(cache); + return ret; +} + +static void *destruct_data = 0; + +static void cache_destructor(void *buffer, void *notused) { + destruct_data = buffer; +} + +static enum test_return cache_destructor_test(void) +{ + cache_t *cache = cache_create("test", sizeof(uint32_t), sizeof(char*), + NULL, cache_destructor); + assert(cache != NULL); + char *ptr = cache_alloc(cache); + cache_free(cache, ptr); + cache_destroy(cache); + + return (ptr == destruct_data) ? TEST_PASS : TEST_FAIL; +} + +static enum test_return cache_reuse_test(void) +{ + int ii; + cache_t *cache = cache_create("test", sizeof(uint32_t), sizeof(char*), + NULL, NULL); + char *ptr = cache_alloc(cache); + cache_free(cache, ptr); + for (ii = 0; ii < 100; ++ii) { + char *p = cache_alloc(cache); + assert(p == ptr); + cache_free(cache, ptr); + } + cache_destroy(cache); + return TEST_PASS; +} + + +static enum test_return cache_bulkalloc(size_t datasize) +{ + cache_t *cache = cache_create("test", datasize, sizeof(char*), + NULL, NULL); +#define ITERATIONS 1024 + void *ptr[ITERATIONS]; + + for (int ii = 0; ii < ITERATIONS; ++ii) { + ptr[ii] = cache_alloc(cache); + assert(ptr[ii] != 0); + memset(ptr[ii], 0xff, datasize); + } + + for (int ii = 0; ii < ITERATIONS; ++ii) { + cache_free(cache, ptr[ii]); + } + +#undef ITERATIONS + cache_destroy(cache); + return TEST_PASS; +} + +static enum test_return test_issue_161(void) +{ + enum test_return ret = cache_bulkalloc(1); + if (ret == TEST_PASS) { + ret = cache_bulkalloc(512); + } + + return ret; +} + +static enum test_return cache_redzone_test(void) +{ +#ifndef HAVE_UMEM_H + cache_t *cache = cache_create("test", sizeof(uint32_t), sizeof(char*), + NULL, NULL); + + /* Ignore SIGABORT */ + struct sigaction old_action; + struct sigaction action = { .sa_handler = SIG_IGN, .sa_flags = 0}; + sigemptyset(&action.sa_mask); + sigaction(SIGABRT, &action, &old_action); + + /* check memory debug.. */ + char *p = cache_alloc(cache); + char old = *(p - 1); + *(p - 1) = 0; + cache_free(cache, p); + assert(cache_error == -1); + *(p - 1) = old; + + p[sizeof(uint32_t)] = 0; + cache_free(cache, p); + assert(cache_error == 1); + + /* restore signal handler */ + sigaction(SIGABRT, &old_action, NULL); + + cache_destroy(cache); + + return TEST_PASS; +#else + return TEST_SKIP; +#endif +} + +static enum test_return test_safe_strtoul(void) { + uint32_t val; + assert(safe_strtoul("123", &val)); + assert(val == 123); + assert(safe_strtoul("+123", &val)); + assert(val == 123); + assert(!safe_strtoul("", &val)); // empty + assert(!safe_strtoul("123BOGUS", &val)); // non-numeric + assert(!safe_strtoul(" issue221", &val)); // non-numeric + /* Not sure what it does, but this works with ICC :/ + assert(!safe_strtoul("92837498237498237498029383", &val)); // out of range + */ + + // extremes: + assert(safe_strtoul("4294967295", &val)); // 2**32 - 1 + assert(val == 4294967295L); + /* This actually works on 64-bit ubuntu + assert(!safe_strtoul("4294967296", &val)); // 2**32 + */ + assert(!safe_strtoul("-1", &val)); // negative + return TEST_PASS; +} + + +static enum test_return test_safe_strtoull(void) { + uint64_t val; + assert(safe_strtoull("123", &val)); + assert(val == 123); + assert(safe_strtoull("+123", &val)); + assert(val == 123); + assert(!safe_strtoull("", &val)); // empty + assert(!safe_strtoull("123BOGUS", &val)); // non-numeric + assert(!safe_strtoull("92837498237498237498029383", &val)); // out of range + assert(!safe_strtoull(" issue221", &val)); // non-numeric + + // extremes: + assert(safe_strtoull("18446744073709551615", &val)); // 2**64 - 1 + assert(val == 18446744073709551615ULL); + assert(!safe_strtoull("18446744073709551616", &val)); // 2**64 + assert(!safe_strtoull("-1", &val)); // negative + return TEST_PASS; +} + +static enum test_return test_safe_strtoll(void) { + int64_t val; + assert(safe_strtoll("123", &val)); + assert(val == 123); + assert(safe_strtoll("+123", &val)); + assert(val == 123); + assert(safe_strtoll("-123", &val)); + assert(val == -123); + assert(!safe_strtoll("", &val)); // empty + assert(!safe_strtoll("123BOGUS", &val)); // non-numeric + assert(!safe_strtoll("92837498237498237498029383", &val)); // out of range + assert(!safe_strtoll(" issue221", &val)); // non-numeric + + // extremes: + assert(!safe_strtoll("18446744073709551615", &val)); // 2**64 - 1 + assert(safe_strtoll("9223372036854775807", &val)); // 2**63 - 1 + assert(val == 9223372036854775807LL); + /* + assert(safe_strtoll("-9223372036854775808", &val)); // -2**63 + assert(val == -9223372036854775808LL); + */ + assert(!safe_strtoll("-9223372036854775809", &val)); // -2**63 - 1 + + // We'll allow space to terminate the string. And leading space. + assert(safe_strtoll(" 123 foo", &val)); + assert(val == 123); + return TEST_PASS; +} + +static enum test_return test_safe_strtol(void) { + int32_t val; + assert(safe_strtol("123", &val)); + assert(val == 123); + assert(safe_strtol("+123", &val)); + assert(val == 123); + assert(safe_strtol("-123", &val)); + assert(val == -123); + assert(!safe_strtol("", &val)); // empty + assert(!safe_strtol("123BOGUS", &val)); // non-numeric + assert(!safe_strtol("92837498237498237498029383", &val)); // out of range + assert(!safe_strtol(" issue221", &val)); // non-numeric + + // extremes: + /* This actually works on 64-bit ubuntu + assert(!safe_strtol("2147483648", &val)); // (expt 2.0 31.0) + */ + assert(safe_strtol("2147483647", &val)); // (- (expt 2.0 31) 1) + assert(val == 2147483647L); + /* This actually works on 64-bit ubuntu + assert(!safe_strtol("-2147483649", &val)); // (- (expt -2.0 31) 1) + */ + + // We'll allow space to terminate the string. And leading space. + assert(safe_strtol(" 123 foo", &val)); + assert(val == 123); + return TEST_PASS; +} + +/** + * Function to start the server and let it listen on a random port + * + * @param port_out where to store the TCP port number the server is + * listening on + * @param daemon set to true if you want to run the memcached server + * as a daemon process + * @return the pid of the memcached server + */ +static pid_t start_server(in_port_t *port_out, bool daemon, int timeout) { + char environment[80]; + snprintf(environment, sizeof(environment), + "MEMCACHED_PORT_FILENAME=/tmp/ports.%lu", (long)getpid()); + char *filename= environment + strlen("MEMCACHED_PORT_FILENAME="); + char pid_file[80]; + snprintf(pid_file, sizeof(pid_file), "/tmp/pid.%lu", (long)getpid()); + + remove(filename); + remove(pid_file); + +#ifdef __sun + /* I want to name the corefiles differently so that they don't + overwrite each other + */ + char coreadm[128]; + snprintf(coreadm, sizeof(coreadm), + "coreadm -p core.%%f.%%p %lu", (unsigned long)getpid()); + system(coreadm); +#endif + + pid_t pid = fork(); + assert(pid != -1); + + if (pid == 0) { + /* Child */ + char *argv[20]; + int arg = 0; + char tmo[24]; + snprintf(tmo, sizeof(tmo), "%u", timeout); + + putenv(environment); +#ifdef __sun + putenv("LD_PRELOAD=watchmalloc.so.1"); + putenv("MALLOC_DEBUG=WATCH"); +#endif + + if (!daemon) { + argv[arg++] = "./timedrun"; + argv[arg++] = tmo; + } + argv[arg++] = "./memcached-debug"; + argv[arg++] = "-p"; + argv[arg++] = "-1"; + argv[arg++] = "-U"; + argv[arg++] = "0"; + /* Handle rpmbuild and the like doing this as root */ + if (getuid() == 0) { + argv[arg++] = "-u"; + argv[arg++] = "root"; + } + if (daemon) { + argv[arg++] = "-d"; + argv[arg++] = "-P"; + argv[arg++] = pid_file; + } +#ifdef MESSAGE_DEBUG + argv[arg++] = "-vvv"; +#endif + argv[arg++] = NULL; + assert(execv(argv[0], argv) != -1); + } + + /* Yeah just let us "busy-wait" for the file to be created ;-) */ + while (access(filename, F_OK) == -1) { + usleep(10); + } + + FILE *fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "Failed to open the file containing port numbers: %s\n", + strerror(errno)); + assert(false); + } + + *port_out = (in_port_t)-1; + char buffer[80]; + while ((fgets(buffer, sizeof(buffer), fp)) != NULL) { + if (strncmp(buffer, "TCP INET: ", 10) == 0) { + int32_t val; + assert(safe_strtol(buffer + 10, &val)); + *port_out = (in_port_t)val; + } + } + fclose(fp); + assert(remove(filename) == 0); + + if (daemon) { + /* loop and wait for the pid file.. There is a potential race + * condition that the server just created the file but isn't + * finished writing the content, but I'll take the chance.... + */ + while (access(pid_file, F_OK) == -1) { + usleep(10); + } + + fp = fopen(pid_file, "r"); + if (fp == NULL) { + fprintf(stderr, "Failed to open pid file: %s\n", + strerror(errno)); + assert(false); + } + assert(fgets(buffer, sizeof(buffer), fp) != NULL); + fclose(fp); + + int32_t val; + assert(safe_strtol(buffer, &val)); + pid = (pid_t)val; + } + + return pid; +} + +static enum test_return test_issue_44(void) { + in_port_t port; + pid_t pid = start_server(&port, true, 15); + assert(kill(pid, SIGHUP) == 0); + sleep(1); + assert(kill(pid, SIGTERM) == 0); + + return TEST_PASS; +} + +static struct addrinfo *lookuphost(const char *hostname, in_port_t port) +{ + struct addrinfo *ai = 0; + struct addrinfo hints = { .ai_family = AF_UNSPEC, + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM }; + char service[NI_MAXSERV]; + int error; + + (void)snprintf(service, NI_MAXSERV, "%d", port); + if ((error = getaddrinfo(hostname, service, &hints, &ai)) != 0) { + if (error != EAI_SYSTEM) { + fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error)); + } else { + perror("getaddrinfo()"); + } + } + + return ai; +} + +static int connect_server(const char *hostname, in_port_t port, bool nonblock) +{ + struct addrinfo *ai = lookuphost(hostname, port); + int sock = -1; + if (ai != NULL) { + if ((sock = socket(ai->ai_family, ai->ai_socktype, + ai->ai_protocol)) != -1) { + if (connect(sock, ai->ai_addr, ai->ai_addrlen) == -1) { + fprintf(stderr, "Failed to connect socket: %s\n", + strerror(errno)); + close(sock); + sock = -1; + } else if (nonblock) { + int flags = fcntl(sock, F_GETFL, 0); + if (flags < 0 || fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) { + fprintf(stderr, "Failed to enable nonblocking mode: %s\n", + strerror(errno)); + close(sock); + sock = -1; + } + } + } else { + fprintf(stderr, "Failed to create socket: %s\n", strerror(errno)); + } + + freeaddrinfo(ai); + } + return sock; +} + +static enum test_return test_vperror(void) { + int rv = 0; + int oldstderr = dup(STDERR_FILENO); + char tmpl[sizeof(TMP_TEMPLATE)+1]; + strncpy(tmpl, TMP_TEMPLATE, sizeof(TMP_TEMPLATE)+1); + + int newfile = mkstemp(tmpl); + assert(newfile > 0); + rv = dup2(newfile, STDERR_FILENO); + assert(rv == STDERR_FILENO); + rv = close(newfile); + assert(rv == 0); + + errno = EIO; + vperror("Old McDonald had a farm. %s", "EI EIO"); + + /* Restore stderr */ + rv = dup2(oldstderr, STDERR_FILENO); + assert(rv == STDERR_FILENO); + + + /* Go read the file */ + char buf[80] = { 0 }; + FILE *efile = fopen(tmpl, "r"); + assert(efile); + char *prv = fgets(buf, sizeof(buf), efile); + assert(prv); + fclose(efile); + + unlink(tmpl); + + char expected[80] = { 0 }; + snprintf(expected, sizeof(expected), + "Old McDonald had a farm. EI EIO: %s\n", strerror(EIO)); + + /* + fprintf(stderr, + "\nExpected: ``%s''" + "\nGot: ``%s''\n", expected, buf); + */ + + return strcmp(expected, buf) == 0 ? TEST_PASS : TEST_FAIL; +} + +static void send_ascii_command(const char *buf) { + off_t offset = 0; + const char* ptr = buf; + size_t len = strlen(buf); + + do { + ssize_t nw = write(sock, ptr + offset, len - offset); + if (nw == -1) { + if (errno != EINTR) { + fprintf(stderr, "Failed to write: %s\n", strerror(errno)); + abort(); + } + } else { + offset += nw; + } + } while (offset < len); +} + +/* + * This is a dead slow single byte read, but it should only read out + * _one_ response and I don't have an input buffer... The current + * implementation only supports single-line responses, so if you want to use + * it for get commands you need to implement that first ;-) + */ +static void read_ascii_response(char *buffer, size_t size) { + off_t offset = 0; + bool need_more = true; + do { + ssize_t nr = read(sock, buffer + offset, 1); + if (nr == -1) { + if (errno != EINTR) { + fprintf(stderr, "Failed to read: %s\n", strerror(errno)); + abort(); + } + } else { + assert(nr == 1); + if (buffer[offset] == '\n') { + need_more = false; + buffer[offset + 1] = '\0'; + } + offset += nr; + assert(offset + 1 < size); + } + } while (need_more); +} + +static enum test_return test_issue_92(void) { + char buffer[1024]; + + close(sock); + sock = connect_server("127.0.0.1", port, false); + + send_ascii_command("stats cachedump 1 0 0\r\n"); + read_ascii_response(buffer, sizeof(buffer)); + assert(strncmp(buffer, "END", strlen("END")) == 0); + + send_ascii_command("stats cachedump 200 0 0\r\n"); + read_ascii_response(buffer, sizeof(buffer)); + assert(strncmp(buffer, "CLIENT_ERROR", strlen("CLIENT_ERROR")) == 0); + + close(sock); + sock = connect_server("127.0.0.1", port, false); + return TEST_PASS; +} + +static enum test_return test_issue_102(void) { + char buffer[4096]; + memset(buffer, ' ', sizeof(buffer)); + buffer[sizeof(buffer) - 1] = '\0'; + + close(sock); + sock = connect_server("127.0.0.1", port, false); + + send_ascii_command(buffer); + /* verify that the server closed the connection */ + assert(read(sock, buffer, sizeof(buffer)) == 0); + close(sock); + sock = connect_server("127.0.0.1", port, false); + + snprintf(buffer, sizeof(buffer), "gets "); + size_t offset = 5; + while (offset < 4000) { + offset += snprintf(buffer + offset, sizeof(buffer) - offset, + "%010u ", (unsigned int)offset); + } + + send_ascii_command(buffer); + usleep(250); + + send_ascii_command("\r\n"); + char rsp[80]; + read_ascii_response(rsp, sizeof(rsp)); + assert(strncmp(rsp, "END", strlen("END")) == 0); + buffer[3]= ' '; + send_ascii_command(buffer); + usleep(250); + send_ascii_command("\r\n"); + read_ascii_response(rsp, sizeof(rsp)); + assert(strncmp(rsp, "END", strlen("END")) == 0); + + memset(buffer, ' ', sizeof(buffer)); + int len = snprintf(buffer + 101, sizeof(buffer) - 101, "gets foo"); + buffer[101 + len] = ' '; + buffer[sizeof(buffer) - 1] = '\0'; + send_ascii_command(buffer); + /* verify that the server closed the connection */ + assert(read(sock, buffer, sizeof(buffer)) == 0); + + close(sock); + sock = connect_server("127.0.0.1", port, false); + + return TEST_PASS; +} + +static enum test_return start_memcached_server(void) { + server_pid = start_server(&port, false, 600); + sock = connect_server("127.0.0.1", port, false); + return TEST_PASS; +} + +static enum test_return stop_memcached_server(void) { + close(sock); + assert(kill(server_pid, SIGTERM) == 0); + return TEST_PASS; +} + +static void safe_send(const void* buf, size_t len, bool hickup) +{ + off_t offset = 0; + const char* ptr = buf; +#ifdef MESSAGE_DEBUG + uint8_t val = *ptr; + assert(val == (uint8_t)0x80); + fprintf(stderr, "About to send %lu bytes:", (unsigned long)len); + for (int ii = 0; ii < len; ++ii) { + if (ii % 4 == 0) { + fprintf(stderr, "\n "); + } + val = *(ptr + ii); + fprintf(stderr, " 0x%02x", val); + } + fprintf(stderr, "\n"); + usleep(500); +#endif + + do { + size_t num_bytes = len - offset; + if (hickup) { + if (num_bytes > 1024) { + num_bytes = (rand() % 1023) + 1; + } + } + + ssize_t nw = write(sock, ptr + offset, num_bytes); + if (nw == -1) { + if (errno != EINTR) { + fprintf(stderr, "Failed to write: %s\n", strerror(errno)); + abort(); + } + } else { + if (hickup) { + usleep(100); + } + offset += nw; + } + } while (offset < len); +} + +static bool safe_recv(void *buf, size_t len) { + if (len == 0) { + return true; + } + off_t offset = 0; + do { + ssize_t nr = read(sock, ((char*)buf) + offset, len - offset); + if (nr == -1) { + if (errno != EINTR) { + fprintf(stderr, "Failed to read: %s\n", strerror(errno)); + abort(); + } + } else { + if (nr == 0 && allow_closed_read) { + return false; + } + assert(nr != 0); + offset += nr; + } + } while (offset < len); + + return true; +} + +static bool safe_recv_packet(void *buf, size_t size) { + protocol_binary_response_no_extras *response = buf; + assert(size > sizeof(*response)); + if (!safe_recv(response, sizeof(*response))) { + return false; + } + response->message.header.response.keylen = ntohs(response->message.header.response.keylen); + response->message.header.response.status = ntohs(response->message.header.response.status); + response->message.header.response.bodylen = ntohl(response->message.header.response.bodylen); + + size_t len = sizeof(*response); + + char *ptr = buf; + ptr += len; + if (!safe_recv(ptr, response->message.header.response.bodylen)) { + return false; + } + +#ifdef MESSAGE_DEBUG + usleep(500); + ptr = buf; + len += response->message.header.response.bodylen; + uint8_t val = *ptr; + assert(val == (uint8_t)0x81); + fprintf(stderr, "Received %lu bytes:", (unsigned long)len); + for (int ii = 0; ii < len; ++ii) { + if (ii % 4 == 0) { + fprintf(stderr, "\n "); + } + val = *(ptr + ii); + fprintf(stderr, " 0x%02x", val); + } + fprintf(stderr, "\n"); +#endif + return true; +} + +static off_t storage_command(char*buf, + size_t bufsz, + uint8_t cmd, + const void* key, + size_t keylen, + const void* dta, + size_t dtalen, + uint32_t flags, + uint32_t exp) { + /* all of the storage commands use the same command layout */ + protocol_binary_request_set *request = (void*)buf; + assert(bufsz > sizeof(*request) + keylen + dtalen); + + memset(request, 0, sizeof(*request)); + request->message.header.request.magic = PROTOCOL_BINARY_REQ; + request->message.header.request.opcode = cmd; + request->message.header.request.keylen = htons(keylen); + request->message.header.request.extlen = 8; + request->message.header.request.bodylen = htonl(keylen + 8 + dtalen); + request->message.header.request.opaque = 0xdeadbeef; + request->message.body.flags = flags; + request->message.body.expiration = exp; + + off_t key_offset = sizeof(protocol_binary_request_no_extras) + 8; + + memcpy(buf + key_offset, key, keylen); + if (dta != NULL) { + memcpy(buf + key_offset + keylen, dta, dtalen); + } + + return key_offset + keylen + dtalen; +} + +static off_t raw_command(char* buf, + size_t bufsz, + uint8_t cmd, + const void* key, + size_t keylen, + const void* dta, + size_t dtalen) { + /* all of the storage commands use the same command layout */ + protocol_binary_request_no_extras *request = (void*)buf; + assert(bufsz > sizeof(*request) + keylen + dtalen); + + memset(request, 0, sizeof(*request)); + request->message.header.request.magic = PROTOCOL_BINARY_REQ; + request->message.header.request.opcode = cmd; + request->message.header.request.keylen = htons(keylen); + request->message.header.request.bodylen = htonl(keylen + dtalen); + request->message.header.request.opaque = 0xdeadbeef; + + off_t key_offset = sizeof(protocol_binary_request_no_extras); + + if (key != NULL) { + memcpy(buf + key_offset, key, keylen); + } + if (dta != NULL) { + memcpy(buf + key_offset + keylen, dta, dtalen); + } + + return sizeof(*request) + keylen + dtalen; +} + +static off_t flush_command(char* buf, size_t bufsz, uint8_t cmd, uint32_t exptime, bool use_extra) { + protocol_binary_request_flush *request = (void*)buf; + assert(bufsz > sizeof(*request)); + + memset(request, 0, sizeof(*request)); + request->message.header.request.magic = PROTOCOL_BINARY_REQ; + request->message.header.request.opcode = cmd; + + off_t size = sizeof(protocol_binary_request_no_extras); + if (use_extra) { + request->message.header.request.extlen = 4; + request->message.body.expiration = htonl(exptime); + request->message.header.request.bodylen = htonl(4); + size += 4; + } + + request->message.header.request.opaque = 0xdeadbeef; + + return size; +} + + +static off_t touch_command(char* buf, + size_t bufsz, + uint8_t cmd, + const void* key, + size_t keylen, + uint32_t exptime) { + protocol_binary_request_touch *request = (void*)buf; + assert(bufsz > sizeof(*request)); + + memset(request, 0, sizeof(*request)); + request->message.header.request.magic = PROTOCOL_BINARY_REQ; + request->message.header.request.opcode = cmd; + + request->message.header.request.keylen = htons(keylen); + request->message.header.request.extlen = 4; + request->message.body.expiration = htonl(exptime); + request->message.header.request.bodylen = htonl(keylen + 4); + + request->message.header.request.opaque = 0xdeadbeef; + + off_t key_offset = sizeof(protocol_binary_request_no_extras) + 4; + + memcpy(buf + key_offset, key, keylen); + return sizeof(protocol_binary_request_no_extras) + 4 + keylen; +} + +static off_t arithmetic_command(char* buf, + size_t bufsz, + uint8_t cmd, + const void* key, + size_t keylen, + uint64_t delta, + uint64_t initial, + uint32_t exp) { + protocol_binary_request_incr *request = (void*)buf; + assert(bufsz > sizeof(*request) + keylen); + + memset(request, 0, sizeof(*request)); + request->message.header.request.magic = PROTOCOL_BINARY_REQ; + request->message.header.request.opcode = cmd; + request->message.header.request.keylen = htons(keylen); + request->message.header.request.extlen = 20; + request->message.header.request.bodylen = htonl(keylen + 20); + request->message.header.request.opaque = 0xdeadbeef; + request->message.body.delta = htonll(delta); + request->message.body.initial = htonll(initial); + request->message.body.expiration = htonl(exp); + + off_t key_offset = sizeof(protocol_binary_request_no_extras) + 20; + + memcpy(buf + key_offset, key, keylen); + return key_offset + keylen; +} + +static void validate_response_header(protocol_binary_response_no_extras *response, + uint8_t cmd, uint16_t status) +{ + assert(response->message.header.response.magic == PROTOCOL_BINARY_RES); + assert(response->message.header.response.opcode == cmd); + assert(response->message.header.response.datatype == PROTOCOL_BINARY_RAW_BYTES); + assert(response->message.header.response.status == status); + assert(response->message.header.response.opaque == 0xdeadbeef); + + if (status == PROTOCOL_BINARY_RESPONSE_SUCCESS) { + switch (cmd) { + case PROTOCOL_BINARY_CMD_ADDQ: + case PROTOCOL_BINARY_CMD_APPENDQ: + case PROTOCOL_BINARY_CMD_DECREMENTQ: + case PROTOCOL_BINARY_CMD_DELETEQ: + case PROTOCOL_BINARY_CMD_FLUSHQ: + case PROTOCOL_BINARY_CMD_INCREMENTQ: + case PROTOCOL_BINARY_CMD_PREPENDQ: + case PROTOCOL_BINARY_CMD_QUITQ: + case PROTOCOL_BINARY_CMD_REPLACEQ: + case PROTOCOL_BINARY_CMD_SETQ: + assert("Quiet command shouldn't return on success" == NULL); + default: + break; + } + + switch (cmd) { + case PROTOCOL_BINARY_CMD_ADD: + case PROTOCOL_BINARY_CMD_REPLACE: + case PROTOCOL_BINARY_CMD_SET: + case PROTOCOL_BINARY_CMD_APPEND: + case PROTOCOL_BINARY_CMD_PREPEND: + assert(response->message.header.response.keylen == 0); + assert(response->message.header.response.extlen == 0); + assert(response->message.header.response.bodylen == 0); + assert(response->message.header.response.cas != 0); + break; + case PROTOCOL_BINARY_CMD_FLUSH: + case PROTOCOL_BINARY_CMD_NOOP: + case PROTOCOL_BINARY_CMD_QUIT: + case PROTOCOL_BINARY_CMD_DELETE: + assert(response->message.header.response.keylen == 0); + assert(response->message.header.response.extlen == 0); + assert(response->message.header.response.bodylen == 0); + assert(response->message.header.response.cas == 0); + break; + + case PROTOCOL_BINARY_CMD_DECREMENT: + case PROTOCOL_BINARY_CMD_INCREMENT: + assert(response->message.header.response.keylen == 0); + assert(response->message.header.response.extlen == 0); + assert(response->message.header.response.bodylen == 8); + assert(response->message.header.response.cas != 0); + break; + + case PROTOCOL_BINARY_CMD_STAT: + assert(response->message.header.response.extlen == 0); + /* key and value exists in all packets except in the terminating */ + assert(response->message.header.response.cas == 0); + break; + + case PROTOCOL_BINARY_CMD_VERSION: + assert(response->message.header.response.keylen == 0); + assert(response->message.header.response.extlen == 0); + assert(response->message.header.response.bodylen != 0); + assert(response->message.header.response.cas == 0); + break; + + case PROTOCOL_BINARY_CMD_GET: + case PROTOCOL_BINARY_CMD_GETQ: + assert(response->message.header.response.keylen == 0); + assert(response->message.header.response.extlen == 4); + assert(response->message.header.response.cas != 0); + break; + + case PROTOCOL_BINARY_CMD_GETK: + case PROTOCOL_BINARY_CMD_GETKQ: + assert(response->message.header.response.keylen != 0); + assert(response->message.header.response.extlen == 4); + assert(response->message.header.response.cas != 0); + break; + + default: + /* Undefined command code */ + break; + } + } else { + assert(response->message.header.response.cas == 0); + assert(response->message.header.response.extlen == 0); + if (cmd != PROTOCOL_BINARY_CMD_GETK && + cmd != PROTOCOL_BINARY_CMD_GATK) { + assert(response->message.header.response.keylen == 0); + } + } +} + +static enum test_return test_binary_noop(void) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } buffer; + + size_t len = raw_command(buffer.bytes, sizeof(buffer.bytes), + PROTOCOL_BINARY_CMD_NOOP, + NULL, 0, NULL, 0); + + safe_send(buffer.bytes, len, false); + safe_recv_packet(buffer.bytes, sizeof(buffer.bytes)); + validate_response_header(&buffer.response, PROTOCOL_BINARY_CMD_NOOP, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + return TEST_PASS; +} + +static enum test_return test_binary_quit_impl(uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } buffer; + size_t len = raw_command(buffer.bytes, sizeof(buffer.bytes), + cmd, NULL, 0, NULL, 0); + + safe_send(buffer.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_QUIT) { + safe_recv_packet(buffer.bytes, sizeof(buffer.bytes)); + validate_response_header(&buffer.response, PROTOCOL_BINARY_CMD_QUIT, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + /* Socket should be closed now, read should return 0 */ + assert(read(sock, buffer.bytes, sizeof(buffer.bytes)) == 0); + close(sock); + sock = connect_server("127.0.0.1", port, false); + + return TEST_PASS; +} + +static enum test_return test_binary_quit(void) { + return test_binary_quit_impl(PROTOCOL_BINARY_CMD_QUIT); +} + +static enum test_return test_binary_quitq(void) { + return test_binary_quit_impl(PROTOCOL_BINARY_CMD_QUITQ); +} + +static enum test_return test_binary_set_impl(const char *key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + uint64_t value = 0xdeadbeefdeadcafe; + size_t len = storage_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), &value, sizeof(value), + 0, 0); + + /* Set should work over and over again */ + int ii; + for (ii = 0; ii < 10; ++ii) { + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_SET) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + } + + if (cmd == PROTOCOL_BINARY_CMD_SETQ) { + return test_binary_noop(); + } + + send.request.message.header.request.cas = receive.response.message.header.response.cas; + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_SET) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + assert(receive.response.message.header.response.cas != send.request.message.header.request.cas); + } else { + return test_binary_noop(); + } + + return TEST_PASS; +} + +static enum test_return test_binary_set(void) { + return test_binary_set_impl("test_binary_set", PROTOCOL_BINARY_CMD_SET); +} + +static enum test_return test_binary_setq(void) { + return test_binary_set_impl("test_binary_setq", PROTOCOL_BINARY_CMD_SETQ); +} + + +static enum test_return test_binary_add_impl(const char *key, uint8_t cmd) { + uint64_t value = 0xdeadbeefdeadcafe; + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + size_t len = storage_command(send.bytes, sizeof(send.bytes), cmd, key, + strlen(key), &value, sizeof(value), + 0, 0); + + /* Add should only work the first time */ + int ii; + for (ii = 0; ii < 10; ++ii) { + safe_send(send.bytes, len, false); + if (ii == 0) { + if (cmd == PROTOCOL_BINARY_CMD_ADD) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + } else { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS); + } + } + + return TEST_PASS; +} + +static enum test_return test_binary_add(void) { + return test_binary_add_impl("test_binary_add", PROTOCOL_BINARY_CMD_ADD); +} + +static enum test_return test_binary_addq(void) { + return test_binary_add_impl("test_binary_addq", PROTOCOL_BINARY_CMD_ADDQ); +} + +static enum test_return test_binary_replace_impl(const char* key, uint8_t cmd) { + uint64_t value = 0xdeadbeefdeadcafe; + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + size_t len = storage_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), &value, sizeof(value), + 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), &value, sizeof(value), 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + len = storage_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), &value, sizeof(value), 0, 0); + int ii; + for (ii = 0; ii < 10; ++ii) { + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_REPLACE) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, + PROTOCOL_BINARY_CMD_REPLACE, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + } + + if (cmd == PROTOCOL_BINARY_CMD_REPLACEQ) { + test_binary_noop(); + } + + return TEST_PASS; +} + +static enum test_return test_binary_replace(void) { + return test_binary_replace_impl("test_binary_replace", + PROTOCOL_BINARY_CMD_REPLACE); +} + +static enum test_return test_binary_replaceq(void) { + return test_binary_replace_impl("test_binary_replaceq", + PROTOCOL_BINARY_CMD_REPLACEQ); +} + +static enum test_return test_binary_delete_impl(const char *key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + size_t len = raw_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), NULL, 0); + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), NULL, 0, 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + len = raw_command(send.bytes, sizeof(send.bytes), + cmd, key, strlen(key), NULL, 0); + safe_send(send.bytes, len, false); + + if (cmd == PROTOCOL_BINARY_CMD_DELETE) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_DELETE, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + + return TEST_PASS; +} + +static enum test_return test_binary_delete(void) { + return test_binary_delete_impl("test_binary_delete", + PROTOCOL_BINARY_CMD_DELETE); +} + +static enum test_return test_binary_deleteq(void) { + return test_binary_delete_impl("test_binary_deleteq", + PROTOCOL_BINARY_CMD_DELETEQ); +} + +static enum test_return test_binary_get_impl(const char *key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + size_t len = raw_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), NULL, 0); + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + + len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), NULL, 0, + 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + /* run a little pipeline test ;-) */ + len = 0; + int ii; + for (ii = 0; ii < 10; ++ii) { + union { + protocol_binary_request_no_extras request; + char bytes[1024]; + } temp; + size_t l = raw_command(temp.bytes, sizeof(temp.bytes), + cmd, key, strlen(key), NULL, 0); + memcpy(send.bytes + len, temp.bytes, l); + len += l; + } + + safe_send(send.bytes, len, false); + for (ii = 0; ii < 10; ++ii) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + return TEST_PASS; +} + +static enum test_return test_binary_get(void) { + return test_binary_get_impl("test_binary_get", PROTOCOL_BINARY_CMD_GET); +} + +static enum test_return test_binary_getk(void) { + return test_binary_get_impl("test_binary_getk", PROTOCOL_BINARY_CMD_GETK); +} + +static enum test_return test_binary_getq_impl(const char *key, uint8_t cmd) { + const char *missing = "test_binary_getq_missing"; + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, temp, receive; + size_t len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), NULL, 0, + 0, 0); + size_t len2 = raw_command(temp.bytes, sizeof(temp.bytes), cmd, + missing, strlen(missing), NULL, 0); + /* I need to change the first opaque so that I can separate the two + * return packets */ + temp.request.message.header.request.opaque = 0xfeedface; + memcpy(send.bytes + len, temp.bytes, len2); + len += len2; + + len2 = raw_command(temp.bytes, sizeof(temp.bytes), cmd, + key, strlen(key), NULL, 0); + memcpy(send.bytes + len, temp.bytes, len2); + len += len2; + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + /* The first GETQ shouldn't return anything */ + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + return TEST_PASS; +} + +static enum test_return test_binary_getq(void) { + return test_binary_getq_impl("test_binary_getq", PROTOCOL_BINARY_CMD_GETQ); +} + +static enum test_return test_binary_getkq(void) { + return test_binary_getq_impl("test_binary_getkq", PROTOCOL_BINARY_CMD_GETKQ); +} + +static enum test_return test_binary_incr_impl(const char* key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response_header; + protocol_binary_response_incr response; + char bytes[1024]; + } send, receive; + size_t len = arithmetic_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), 1, 0, 0); + + int ii; + for (ii = 0; ii < 10; ++ii) { + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_INCREMENT) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response_header, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + assert(ntohll(receive.response.message.body.value) == ii); + } + } + + if (cmd == PROTOCOL_BINARY_CMD_INCREMENTQ) { + test_binary_noop(); + } + return TEST_PASS; +} + +static enum test_return test_binary_incr(void) { + return test_binary_incr_impl("test_binary_incr", + PROTOCOL_BINARY_CMD_INCREMENT); +} + +static enum test_return test_binary_incrq(void) { + return test_binary_incr_impl("test_binary_incrq", + PROTOCOL_BINARY_CMD_INCREMENTQ); +} + +static enum test_return test_binary_decr_impl(const char* key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response_header; + protocol_binary_response_decr response; + char bytes[1024]; + } send, receive; + size_t len = arithmetic_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), 1, 9, 0); + + int ii; + for (ii = 9; ii >= 0; --ii) { + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_DECREMENT) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response_header, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + assert(ntohll(receive.response.message.body.value) == ii); + } + } + + /* decr on 0 should not wrap */ + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_DECREMENT) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response_header, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + assert(ntohll(receive.response.message.body.value) == 0); + } else { + test_binary_noop(); + } + + return TEST_PASS; +} + +static enum test_return test_binary_decr(void) { + return test_binary_decr_impl("test_binary_decr", + PROTOCOL_BINARY_CMD_DECREMENT); +} + +static enum test_return test_binary_decrq(void) { + return test_binary_decr_impl("test_binary_decrq", + PROTOCOL_BINARY_CMD_DECREMENTQ); +} + +static enum test_return test_binary_version(void) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } buffer; + + size_t len = raw_command(buffer.bytes, sizeof(buffer.bytes), + PROTOCOL_BINARY_CMD_VERSION, + NULL, 0, NULL, 0); + + safe_send(buffer.bytes, len, false); + safe_recv_packet(buffer.bytes, sizeof(buffer.bytes)); + validate_response_header(&buffer.response, PROTOCOL_BINARY_CMD_VERSION, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + return TEST_PASS; +} + +static enum test_return test_binary_flush_impl(const char *key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + + size_t len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), NULL, 0, 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + len = flush_command(send.bytes, sizeof(send.bytes), cmd, 2, true); + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_FLUSH) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + len = raw_command(send.bytes, sizeof(send.bytes), PROTOCOL_BINARY_CMD_GET, + key, strlen(key), NULL, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_GET, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + sleep(2); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_GET, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + + int ii; + for (ii = 0; ii < 2; ++ii) { + len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), NULL, 0, 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + len = flush_command(send.bytes, sizeof(send.bytes), cmd, 0, ii == 0); + safe_send(send.bytes, len, false); + if (cmd == PROTOCOL_BINARY_CMD_FLUSH) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + len = raw_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_GET, + key, strlen(key), NULL, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_GET, + PROTOCOL_BINARY_RESPONSE_KEY_ENOENT); + } + + return TEST_PASS; +} + +static enum test_return test_binary_flush(void) { + return test_binary_flush_impl("test_binary_flush", + PROTOCOL_BINARY_CMD_FLUSH); +} + +static enum test_return test_binary_flushq(void) { + return test_binary_flush_impl("test_binary_flushq", + PROTOCOL_BINARY_CMD_FLUSHQ); +} + +static enum test_return test_binary_concat_impl(const char *key, uint8_t cmd) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } send, receive; + const char *value = "world"; + + size_t len = raw_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), value, strlen(value)); + + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_NOT_STORED); + + len = storage_command(send.bytes, sizeof(send.bytes), + PROTOCOL_BINARY_CMD_ADD, + key, strlen(key), value, strlen(value), 0, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_ADD, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + len = raw_command(send.bytes, sizeof(send.bytes), cmd, + key, strlen(key), value, strlen(value)); + safe_send(send.bytes, len, false); + + if (cmd == PROTOCOL_BINARY_CMD_APPEND || cmd == PROTOCOL_BINARY_CMD_PREPEND) { + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, cmd, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } else { + len = raw_command(send.bytes, sizeof(send.bytes), PROTOCOL_BINARY_CMD_NOOP, + NULL, 0, NULL, 0); + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_NOOP, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } + + len = raw_command(send.bytes, sizeof(send.bytes), PROTOCOL_BINARY_CMD_GETK, + key, strlen(key), NULL, 0); + + safe_send(send.bytes, len, false); + safe_recv_packet(receive.bytes, sizeof(receive.bytes)); + validate_response_header(&receive.response, PROTOCOL_BINARY_CMD_GETK, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + + assert(receive.response.message.header.response.keylen == strlen(key)); + assert(receive.response.message.header.response.bodylen == (strlen(key) + 2*strlen(value) + 4)); + + char *ptr = receive.bytes; + ptr += sizeof(receive.response); + ptr += 4; + + assert(memcmp(ptr, key, strlen(key)) == 0); + ptr += strlen(key); + assert(memcmp(ptr, value, strlen(value)) == 0); + ptr += strlen(value); + assert(memcmp(ptr, value, strlen(value)) == 0); + + return TEST_PASS; +} + +static enum test_return test_binary_append(void) { + return test_binary_concat_impl("test_binary_append", + PROTOCOL_BINARY_CMD_APPEND); +} + +static enum test_return test_binary_prepend(void) { + return test_binary_concat_impl("test_binary_prepend", + PROTOCOL_BINARY_CMD_PREPEND); +} + +static enum test_return test_binary_appendq(void) { + return test_binary_concat_impl("test_binary_appendq", + PROTOCOL_BINARY_CMD_APPENDQ); +} + +static enum test_return test_binary_prependq(void) { + return test_binary_concat_impl("test_binary_prependq", + PROTOCOL_BINARY_CMD_PREPENDQ); +} + +static enum test_return test_binary_stat(void) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } buffer; + + size_t len = raw_command(buffer.bytes, sizeof(buffer.bytes), + PROTOCOL_BINARY_CMD_STAT, + NULL, 0, NULL, 0); + + safe_send(buffer.bytes, len, false); + do { + safe_recv_packet(buffer.bytes, sizeof(buffer.bytes)); + validate_response_header(&buffer.response, PROTOCOL_BINARY_CMD_STAT, + PROTOCOL_BINARY_RESPONSE_SUCCESS); + } while (buffer.response.message.header.response.keylen != 0); + + return TEST_PASS; +} + +static enum test_return test_binary_illegal(void) { + uint8_t cmd = 0x25; + while (cmd != 0x00) { + union { + protocol_binary_request_no_extras request; + protocol_binary_response_no_extras response; + char bytes[1024]; + } buffer; + size_t len = raw_command(buffer.bytes, sizeof(buffer.bytes), + cmd, NULL, 0, NULL, 0); + safe_send(buffer.bytes, len, false); + safe_recv_packet(buffer.bytes, sizeof(buffer.bytes)); + validate_response_header(&buffer.response, cmd, + PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND); + ++cmd; + } + + return TEST_PASS; +} + +volatile bool hickup_thread_running; + +static void *binary_hickup_recv_verification_thread(void *arg) { + protocol_binary_response_no_extras *response = malloc(65*1024); + if (response != NULL) { + while (safe_recv_packet(response, 65*1024)) { + /* Just validate the packet format */ + validate_response_header(response, + response->message.header.response.opcode, + response->message.header.response.status); + } + free(response); + } + hickup_thread_running = false; + allow_closed_read = false; + return NULL; +} + +static enum test_return test_binary_pipeline_hickup_chunk(void *buffer, size_t buffersize) { + off_t offset = 0; + char *key[256]; + uint64_t value = 0xfeedfacedeadbeef; + + while (hickup_thread_running && + offset + sizeof(protocol_binary_request_no_extras) < buffersize) { + union { + protocol_binary_request_no_extras request; + char bytes[65 * 1024]; + } command; + uint8_t cmd = (uint8_t)(rand() & 0xff); + size_t len; + size_t keylen = (rand() % 250) + 1; + + switch (cmd) { + case PROTOCOL_BINARY_CMD_ADD: + case PROTOCOL_BINARY_CMD_ADDQ: + case PROTOCOL_BINARY_CMD_REPLACE: + case PROTOCOL_BINARY_CMD_REPLACEQ: + case PROTOCOL_BINARY_CMD_SET: + case PROTOCOL_BINARY_CMD_SETQ: + len = storage_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen , &value, sizeof(value), + 0, 0); + break; + case PROTOCOL_BINARY_CMD_APPEND: + case PROTOCOL_BINARY_CMD_APPENDQ: + case PROTOCOL_BINARY_CMD_PREPEND: + case PROTOCOL_BINARY_CMD_PREPENDQ: + len = raw_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen, &value, sizeof(value)); + break; + case PROTOCOL_BINARY_CMD_FLUSH: + case PROTOCOL_BINARY_CMD_FLUSHQ: + len = raw_command(command.bytes, sizeof(command.bytes), cmd, + NULL, 0, NULL, 0); + break; + case PROTOCOL_BINARY_CMD_NOOP: + len = raw_command(command.bytes, sizeof(command.bytes), cmd, + NULL, 0, NULL, 0); + break; + case PROTOCOL_BINARY_CMD_DELETE: + case PROTOCOL_BINARY_CMD_DELETEQ: + len = raw_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen, NULL, 0); + break; + case PROTOCOL_BINARY_CMD_DECREMENT: + case PROTOCOL_BINARY_CMD_DECREMENTQ: + case PROTOCOL_BINARY_CMD_INCREMENT: + case PROTOCOL_BINARY_CMD_INCREMENTQ: + len = arithmetic_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen, 1, 0, 0); + break; + case PROTOCOL_BINARY_CMD_VERSION: + len = raw_command(command.bytes, sizeof(command.bytes), + PROTOCOL_BINARY_CMD_VERSION, + NULL, 0, NULL, 0); + break; + case PROTOCOL_BINARY_CMD_GET: + case PROTOCOL_BINARY_CMD_GETK: + case PROTOCOL_BINARY_CMD_GETKQ: + case PROTOCOL_BINARY_CMD_GETQ: + len = raw_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen, NULL, 0); + break; + + case PROTOCOL_BINARY_CMD_TOUCH: + case PROTOCOL_BINARY_CMD_GAT: + case PROTOCOL_BINARY_CMD_GATQ: + case PROTOCOL_BINARY_CMD_GATK: + case PROTOCOL_BINARY_CMD_GATKQ: + len = touch_command(command.bytes, sizeof(command.bytes), cmd, + key, keylen, 10); + break; + + case PROTOCOL_BINARY_CMD_STAT: + len = raw_command(command.bytes, sizeof(command.bytes), + PROTOCOL_BINARY_CMD_STAT, + NULL, 0, NULL, 0); + break; + + case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: + case PROTOCOL_BINARY_CMD_SASL_AUTH: + case PROTOCOL_BINARY_CMD_SASL_STEP: + /* Ignoring SASL */ + case PROTOCOL_BINARY_CMD_QUITQ: + case PROTOCOL_BINARY_CMD_QUIT: + /* I don't want to pass on the quit commands ;-) */ + cmd |= 0xf0; + /* FALLTHROUGH */ + default: + len = raw_command(command.bytes, sizeof(command.bytes), + cmd, NULL, 0, NULL, 0); + } + + if ((len + offset) < buffersize) { + memcpy(((char*)buffer) + offset, command.bytes, len); + offset += len; + } else { + break; + } + } + safe_send(buffer, offset, true); + + return TEST_PASS; +} + +static enum test_return test_binary_pipeline_hickup(void) +{ + size_t buffersize = 65 * 1024; + void *buffer = malloc(buffersize); + int ii; + + pthread_t tid; + int ret; + allow_closed_read = true; + hickup_thread_running = true; + if ((ret = pthread_create(&tid, NULL, + binary_hickup_recv_verification_thread, NULL)) != 0) { + fprintf(stderr, "Can't create thread: %s\n", strerror(ret)); + return TEST_FAIL; + } + + /* Allow the thread to start */ + usleep(250); + + srand((int)time(NULL)); + for (ii = 0; ii < 2; ++ii) { + test_binary_pipeline_hickup_chunk(buffer, buffersize); + } + + /* send quitq to shut down the read thread ;-) */ + size_t len = raw_command(buffer, buffersize, PROTOCOL_BINARY_CMD_QUITQ, + NULL, 0, NULL, 0); + safe_send(buffer, len, false); + + pthread_join(tid, NULL); + free(buffer); + return TEST_PASS; +} + + +static enum test_return test_issue_101(void) { + const int max = 2; + enum test_return ret = TEST_PASS; + int fds[max]; + int ii = 0; + pid_t child = 0; + + if (getenv("SKIP_TEST_101") != NULL) { + return TEST_SKIP; + } + + const char *command = "stats\r\nstats\r\nstats\r\nstats\r\nstats\r\n"; + size_t cmdlen = strlen(command); + + server_pid = start_server(&port, false, 1000); + + for (ii = 0; ii < max; ++ii) { + fds[ii] = connect_server("127.0.0.1", port, true); + assert(fds[ii] > 0); + } + + /* Send command on the connection until it blocks */ + for (ii = 0; ii < max; ++ii) { + bool more = true; + do { + ssize_t err = write(fds[ii], command, cmdlen); + if (err == -1) { + switch (errno) { + case EINTR: + break; + case ENOMEM: + case EWOULDBLOCK: + more = false; + break; + default: + ret = TEST_FAIL; + goto cleanup; + } + } + } while (more); + } + + child = fork(); + if (child == (pid_t)-1) { + abort(); + } else if (child > 0) { + int stat; + pid_t c; + while ((c = waitpid(child, &stat, 0)) == (pid_t)-1 && errno == EINTR); + assert(c == child); + assert(stat == 0); + } else { + sock = connect_server("127.0.0.1", port, false); + ret = test_binary_noop(); + close(sock); + exit(0); + } + + cleanup: + /* close all connections */ + for (ii = 0; ii < max; ++ii) { + close(fds[ii]); + } + + assert(kill(server_pid, SIGTERM) == 0); + + return ret; +} + +typedef enum test_return (*TEST_FUNC)(void); +struct testcase { + const char *description; + TEST_FUNC function; +}; + +struct testcase testcases[] = { + { "cache_create", cache_create_test }, + { "cache_constructor", cache_constructor_test }, + { "cache_constructor_fail", cache_fail_constructor_test }, + { "cache_destructor", cache_destructor_test }, + { "cache_reuse", cache_reuse_test }, + { "cache_redzone", cache_redzone_test }, + { "issue_161", test_issue_161 }, + { "strtol", test_safe_strtol }, + { "strtoll", test_safe_strtoll }, + { "strtoul", test_safe_strtoul }, + { "strtoull", test_safe_strtoull }, + { "issue_44", test_issue_44 }, + { "vperror", test_vperror }, + { "issue_101", test_issue_101 }, + /* The following tests all run towards the same server */ + { "start_server", start_memcached_server }, + { "issue_92", test_issue_92 }, + { "issue_102", test_issue_102 }, + { "binary_noop", test_binary_noop }, + { "binary_quit", test_binary_quit }, + { "binary_quitq", test_binary_quitq }, + { "binary_set", test_binary_set }, + { "binary_setq", test_binary_setq }, + { "binary_add", test_binary_add }, + { "binary_addq", test_binary_addq }, + { "binary_replace", test_binary_replace }, + { "binary_replaceq", test_binary_replaceq }, + { "binary_delete", test_binary_delete }, + { "binary_deleteq", test_binary_deleteq }, + { "binary_get", test_binary_get }, + { "binary_getq", test_binary_getq }, + { "binary_getk", test_binary_getk }, + { "binary_getkq", test_binary_getkq }, + { "binary_incr", test_binary_incr }, + { "binary_incrq", test_binary_incrq }, + { "binary_decr", test_binary_decr }, + { "binary_decrq", test_binary_decrq }, + { "binary_version", test_binary_version }, + { "binary_flush", test_binary_flush }, + { "binary_flushq", test_binary_flushq }, + { "binary_append", test_binary_append }, + { "binary_appendq", test_binary_appendq }, + { "binary_prepend", test_binary_prepend }, + { "binary_prependq", test_binary_prependq }, + { "binary_stat", test_binary_stat }, + { "binary_illegal", test_binary_illegal }, + { "binary_pipeline_hickup", test_binary_pipeline_hickup }, + { "stop_server", stop_memcached_server }, + { NULL, NULL } +}; + +int main(int argc, char **argv) +{ + int exitcode = 0; + int ii = 0, num_cases = 0; + + for (num_cases = 0; testcases[num_cases].description; num_cases++) { + /* Just counting */ + } + + printf("1..%d\n", num_cases); + + for (ii = 0; testcases[ii].description != NULL; ++ii) { + fflush(stdout); +#ifndef DEBUG + /* the test program shouldn't run longer than 10 minutes... */ + alarm(600); +#endif + enum test_return ret = testcases[ii].function(); + if (ret == TEST_SKIP) { + fprintf(stdout, "ok # SKIP %d - %s\n", ii + 1, testcases[ii].description); + } else if (ret == TEST_PASS) { + fprintf(stdout, "ok %d - %s\n", ii + 1, testcases[ii].description); + } else { + fprintf(stdout, "not ok %d - %s\n", ii + 1, testcases[ii].description); + exitcode = 1; + } + fflush(stdout); + } + + return exitcode; +} diff --git a/memcached/thread.c b/memcached/thread.c new file mode 100644 index 00000000..e3622497 --- /dev/null +++ b/memcached/thread.c @@ -0,0 +1,743 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Thread management for memcached. + */ +#include "memcached.h" +#include +#include +#include +#include +#include +#include +#include + +#ifdef __sun +#include +#endif + +#define ITEMS_PER_ALLOC 64 + +/* An item in the connection queue. */ +typedef struct conn_queue_item CQ_ITEM; +struct conn_queue_item { + int sfd; + enum conn_states init_state; + int event_flags; + int read_buffer_size; + enum network_transport transport; + CQ_ITEM *next; +}; + +/* A connection queue. */ +typedef struct conn_queue CQ; +struct conn_queue { + CQ_ITEM *head; + CQ_ITEM *tail; + pthread_mutex_t lock; + pthread_cond_t cond; +}; + +/* Lock for cache operations (item_*, assoc_*) */ +pthread_mutex_t cache_lock; + +/* Connection lock around accepting new connections */ +pthread_mutex_t conn_lock = PTHREAD_MUTEX_INITIALIZER; + +#if !defined(HAVE_GCC_ATOMICS) && !defined(__sun) +pthread_mutex_t atomics_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +/* Lock for global stats */ +static pthread_mutex_t stats_lock; + +/* Free list of CQ_ITEM structs */ +static CQ_ITEM *cqi_freelist; +static pthread_mutex_t cqi_freelist_lock; + +static pthread_mutex_t *item_locks; +/* size of the item lock hash table */ +static uint32_t item_lock_count; +/* size - 1 for lookup masking */ +static uint32_t item_lock_mask; + +static LIBEVENT_DISPATCHER_THREAD dispatcher_thread; + +/* + * Each libevent instance has a wakeup pipe, which other threads + * can use to signal that they've put a new connection on its queue. + */ +static LIBEVENT_THREAD *threads; + +/* + * Number of worker threads that have finished setting themselves up. + */ +static int init_count = 0; +static pthread_mutex_t init_lock; +static pthread_cond_t init_cond; + + +static void thread_libevent_process(int fd, short which, void *arg); + +unsigned short refcount_incr(unsigned short *refcount) { +#ifdef HAVE_GCC_ATOMICS + return __sync_add_and_fetch(refcount, 1); +#elif defined(__sun) + return atomic_inc_ushort_nv(refcount); +#else + unsigned short res; + mutex_lock(&atomics_mutex); + (*refcount)++; + res = *refcount; + pthread_mutex_unlock(&atomics_mutex); + return res; +#endif +} + +unsigned short refcount_decr(unsigned short *refcount) { +#ifdef HAVE_GCC_ATOMICS + return __sync_sub_and_fetch(refcount, 1); +#elif defined(__sun) + return atomic_dec_ushort_nv(refcount); +#else + unsigned short res; + mutex_lock(&atomics_mutex); + (*refcount)--; + res = *refcount; + pthread_mutex_unlock(&atomics_mutex); + return res; +#endif +} + +void item_lock(uint32_t hv) { + mutex_lock(&item_locks[hv & item_lock_mask]); +} + +void item_unlock(uint32_t hv) { + pthread_mutex_unlock(&item_locks[hv & item_lock_mask]); +} + +/* + * Initializes a connection queue. + */ +static void cq_init(CQ *cq) { + pthread_mutex_init(&cq->lock, NULL); + pthread_cond_init(&cq->cond, NULL); + cq->head = NULL; + cq->tail = NULL; +} + +/* + * Looks for an item on a connection queue, but doesn't block if there isn't + * one. + * Returns the item, or NULL if no item is available + */ +static CQ_ITEM *cq_pop(CQ *cq) { + CQ_ITEM *item; + + pthread_mutex_lock(&cq->lock); + item = cq->head; + if (NULL != item) { + cq->head = item->next; + if (NULL == cq->head) + cq->tail = NULL; + } + pthread_mutex_unlock(&cq->lock); + + return item; +} + +/* + * Adds an item to a connection queue. + */ +static void cq_push(CQ *cq, CQ_ITEM *item) { + item->next = NULL; + + pthread_mutex_lock(&cq->lock); + if (NULL == cq->tail) + cq->head = item; + else + cq->tail->next = item; + cq->tail = item; + pthread_cond_signal(&cq->cond); + pthread_mutex_unlock(&cq->lock); +} + +/* + * Returns a fresh connection queue item. + */ +static CQ_ITEM *cqi_new(void) { + CQ_ITEM *item = NULL; + pthread_mutex_lock(&cqi_freelist_lock); + if (cqi_freelist) { + item = cqi_freelist; + cqi_freelist = item->next; + } + pthread_mutex_unlock(&cqi_freelist_lock); + + if (NULL == item) { + int i; + + /* Allocate a bunch of items at once to reduce fragmentation */ + item = malloc(sizeof(CQ_ITEM) * ITEMS_PER_ALLOC); + if (NULL == item) + return NULL; + + /* + * Link together all the new items except the first one + * (which we'll return to the caller) for placement on + * the freelist. + */ + for (i = 2; i < ITEMS_PER_ALLOC; i++) + item[i - 1].next = &item[i]; + + pthread_mutex_lock(&cqi_freelist_lock); + item[ITEMS_PER_ALLOC - 1].next = cqi_freelist; + cqi_freelist = &item[1]; + pthread_mutex_unlock(&cqi_freelist_lock); + } + + return item; +} + + +/* + * Frees a connection queue item (adds it to the freelist.) + */ +static void cqi_free(CQ_ITEM *item) { + pthread_mutex_lock(&cqi_freelist_lock); + item->next = cqi_freelist; + cqi_freelist = item; + pthread_mutex_unlock(&cqi_freelist_lock); +} + + +/* + * Creates a worker thread. + */ +static void create_worker(void *(*func)(void *), void *arg) { + pthread_t thread; + pthread_attr_t attr; + int ret; + + pthread_attr_init(&attr); + + if ((ret = pthread_create(&thread, &attr, func, arg)) != 0) { + fprintf(stderr, "Can't create thread: %s\n", + strerror(ret)); + exit(1); + } +} + +/* + * Sets whether or not we accept new connections. + */ +void accept_new_conns(const bool do_accept) { + pthread_mutex_lock(&conn_lock); + do_accept_new_conns(do_accept); + pthread_mutex_unlock(&conn_lock); +} +/****************************** LIBEVENT THREADS *****************************/ + +/* + * Set up a thread's information. + */ +static void setup_thread(LIBEVENT_THREAD *me) { + me->base = event_init(); + if (! me->base) { + fprintf(stderr, "Can't allocate event base\n"); + exit(1); + } + + /* Listen for notifications from other threads */ + event_set(&me->notify_event, me->notify_receive_fd, + EV_READ | EV_PERSIST, thread_libevent_process, me); + event_base_set(me->base, &me->notify_event); + + if (event_add(&me->notify_event, 0) == -1) { + fprintf(stderr, "Can't monitor libevent notify pipe\n"); + exit(1); + } + + me->new_conn_queue = malloc(sizeof(struct conn_queue)); + if (me->new_conn_queue == NULL) { + perror("Failed to allocate memory for connection queue"); + exit(EXIT_FAILURE); + } + cq_init(me->new_conn_queue); + + if (pthread_mutex_init(&me->stats.mutex, NULL) != 0) { + perror("Failed to initialize mutex"); + exit(EXIT_FAILURE); + } + + me->suffix_cache = cache_create("suffix", SUFFIX_SIZE, sizeof(char*), + NULL, NULL); + if (me->suffix_cache == NULL) { + fprintf(stderr, "Failed to create suffix cache\n"); + exit(EXIT_FAILURE); + } +} + + +/* + * Worker thread: main event loop + */ +static void *worker_libevent(void *arg) { + LIBEVENT_THREAD *me = arg; + + /* Any per-thread setup can happen here; thread_init() will block until + * all threads have finished initializing. + */ + + pthread_mutex_lock(&init_lock); + init_count++; + pthread_cond_signal(&init_cond); + pthread_mutex_unlock(&init_lock); + + event_base_loop(me->base, 0); + return NULL; +} + + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +/* + * Processes an incoming "handle a new connection" item. This is called when + * input arrives on the libevent wakeup pipe. + */ +static void thread_libevent_process(int fd, short which, void *arg) { + LIBEVENT_THREAD *me = arg; + CQ_ITEM *item; + char buf[1]; + + if (read(fd, buf, 1) != 1) + if (settings.verbose > 0) + fprintf(stderr, "Can't read from libevent pipe\n"); + + item = cq_pop(me->new_conn_queue); + + if (NULL != item) { + conn *c = conn_new(item->sfd, item->init_state, item->event_flags, + item->read_buffer_size, item->transport, me->base); + if (c == NULL) { + if (IS_UDP(item->transport)) { + fprintf(stderr, "Can't listen for events on UDP socket\n"); + exit(1); + } else { + if (settings.verbose > 0) { + fprintf(stderr, "Can't listen for events on fd %d\n", + item->sfd); + } + close(item->sfd); + } + } else { + c->thread = me; + } + cqi_free(item); + } +} + +/* Which thread we assigned a connection to most recently. */ +static int last_thread = -1; + +/* + * Dispatches a new connection to another thread. This is only ever called + * from the main thread, either during initialization (for UDP) or because + * of an incoming connection. + */ +void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, + int read_buffer_size, enum network_transport transport) { + CQ_ITEM *item = cqi_new(); + int tid = (last_thread + 1) % settings.num_threads; + + LIBEVENT_THREAD *thread = threads + tid; + + last_thread = tid; + + item->sfd = sfd; + item->init_state = init_state; + item->event_flags = event_flags; + item->read_buffer_size = read_buffer_size; + item->transport = transport; + + cq_push(thread->new_conn_queue, item); + + MEMCACHED_CONN_DISPATCH(sfd, thread->thread_id); + if (write(thread->notify_send_fd, "", 1) != 1) { + perror("Writing to thread notify pipe"); + } +} + +/* + * Returns true if this is the thread that listens for new TCP connections. + */ +int is_listen_thread() { + return pthread_self() == dispatcher_thread.thread_id; +} + +/********************************* ITEM ACCESS *******************************/ + +/* + * Allocates a new item. + */ +item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) { + item *it; + /* do_item_alloc handles its own locks */ + it = do_item_alloc(key, nkey, flags, exptime, nbytes); + return it; +} + +/* + * Returns an item if it hasn't been marked as expired, + * lazy-expiring as needed. + */ +item *item_get(const char *key, const size_t nkey) { + item *it; + uint32_t hv; + hv = hash(key, nkey, 0); + item_lock(hv); + it = do_item_get(key, nkey, hv); + item_unlock(hv); + return it; +} + +item *item_touch(const char *key, size_t nkey, uint32_t exptime) { + item *it; + uint32_t hv; + hv = hash(key, nkey, 0); + item_lock(hv); + it = do_item_touch(key, nkey, exptime, hv); + item_unlock(hv); + return it; +} + +/* + * Links an item into the LRU and hashtable. + */ +int item_link(item *item) { + int ret; + uint32_t hv; + + hv = hash(ITEM_key(item), item->nkey, 0); + item_lock(hv); + ret = do_item_link(item, hv); + item_unlock(hv); + return ret; +} + +/* + * Decrements the reference count on an item and adds it to the freelist if + * needed. + */ +void item_remove(item *item) { + uint32_t hv; + hv = hash(ITEM_key(item), item->nkey, 0); + + item_lock(hv); + do_item_remove(item); + item_unlock(hv); +} + +/* + * Replaces one item with another in the hashtable. + * Unprotected by a mutex lock since the core server does not require + * it to be thread-safe. + */ +int item_replace(item *old_it, item *new_it, const uint32_t hv) { + return do_item_replace(old_it, new_it, hv); +} + +/* + * Unlinks an item from the LRU and hashtable. + */ +void item_unlink(item *item) { + uint32_t hv; + hv = hash(ITEM_key(item), item->nkey, 0); + item_lock(hv); + do_item_unlink(item, hv); + item_unlock(hv); +} + +/* + * Moves an item to the back of the LRU queue. + */ +void item_update(item *item) { + uint32_t hv; + hv = hash(ITEM_key(item), item->nkey, 0); + + item_lock(hv); + do_item_update(item); + item_unlock(hv); +} + +/* + * Does arithmetic on a numeric item value. + */ +enum delta_result_type add_delta(conn *c, const char *key, + const size_t nkey, int incr, + const int64_t delta, char *buf, + uint64_t *cas) { + enum delta_result_type ret; + uint32_t hv; + + hv = hash(key, nkey, 0); + item_lock(hv); + ret = do_add_delta(c, key, nkey, incr, delta, buf, cas, hv); + item_unlock(hv); + return ret; +} + +/* + * Stores an item in the cache (high level, obeys set/add/replace semantics) + */ +enum store_item_type store_item(item *item, int comm, conn* c) { + enum store_item_type ret; + uint32_t hv; + + hv = hash(ITEM_key(item), item->nkey, 0); + item_lock(hv); + ret = do_store_item(item, comm, c, hv); + item_unlock(hv); + return ret; +} + +/* + * Flushes expired items after a flush_all call + */ +void item_flush_expired() { + mutex_lock(&cache_lock); + do_item_flush_expired(); + pthread_mutex_unlock(&cache_lock); +} + +/* + * Dumps part of the cache + */ +char *item_cachedump(unsigned int slabs_clsid, unsigned int limit, unsigned int *bytes) { + char *ret; + + mutex_lock(&cache_lock); + ret = do_item_cachedump(slabs_clsid, limit, bytes); + pthread_mutex_unlock(&cache_lock); + return ret; +} + +/* + * Dumps statistics about slab classes + */ +void item_stats(ADD_STAT add_stats, void *c) { + mutex_lock(&cache_lock); + do_item_stats(add_stats, c); + pthread_mutex_unlock(&cache_lock); +} + +/* + * Dumps a list of objects of each size in 32-byte increments + */ +void item_stats_sizes(ADD_STAT add_stats, void *c) { + mutex_lock(&cache_lock); + do_item_stats_sizes(add_stats, c); + pthread_mutex_unlock(&cache_lock); +} + +/******************************* GLOBAL STATS ******************************/ + +void STATS_LOCK() { + pthread_mutex_lock(&stats_lock); +} + +void STATS_UNLOCK() { + pthread_mutex_unlock(&stats_lock); +} + +void threadlocal_stats_reset(void) { + int ii, sid; + for (ii = 0; ii < settings.num_threads; ++ii) { + pthread_mutex_lock(&threads[ii].stats.mutex); + + threads[ii].stats.get_cmds = 0; + threads[ii].stats.get_misses = 0; + threads[ii].stats.touch_cmds = 0; + threads[ii].stats.touch_misses = 0; + threads[ii].stats.delete_misses = 0; + threads[ii].stats.incr_misses = 0; + threads[ii].stats.decr_misses = 0; + threads[ii].stats.cas_misses = 0; + threads[ii].stats.bytes_read = 0; + threads[ii].stats.bytes_written = 0; + threads[ii].stats.flush_cmds = 0; + threads[ii].stats.conn_yields = 0; + threads[ii].stats.auth_cmds = 0; + threads[ii].stats.auth_errors = 0; + + for(sid = 0; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) { + threads[ii].stats.slab_stats[sid].set_cmds = 0; + threads[ii].stats.slab_stats[sid].get_hits = 0; + threads[ii].stats.slab_stats[sid].touch_hits = 0; + threads[ii].stats.slab_stats[sid].delete_hits = 0; + threads[ii].stats.slab_stats[sid].incr_hits = 0; + threads[ii].stats.slab_stats[sid].decr_hits = 0; + threads[ii].stats.slab_stats[sid].cas_hits = 0; + threads[ii].stats.slab_stats[sid].cas_badval = 0; + } + + pthread_mutex_unlock(&threads[ii].stats.mutex); + } +} + +void threadlocal_stats_aggregate(struct thread_stats *stats) { + int ii, sid; + + /* The struct has a mutex, but we can safely set the whole thing + * to zero since it is unused when aggregating. */ + memset(stats, 0, sizeof(*stats)); + + for (ii = 0; ii < settings.num_threads; ++ii) { + pthread_mutex_lock(&threads[ii].stats.mutex); + + stats->get_cmds += threads[ii].stats.get_cmds; + stats->get_misses += threads[ii].stats.get_misses; + stats->touch_cmds += threads[ii].stats.touch_cmds; + stats->touch_misses += threads[ii].stats.touch_misses; + stats->delete_misses += threads[ii].stats.delete_misses; + stats->decr_misses += threads[ii].stats.decr_misses; + stats->incr_misses += threads[ii].stats.incr_misses; + stats->cas_misses += threads[ii].stats.cas_misses; + stats->bytes_read += threads[ii].stats.bytes_read; + stats->bytes_written += threads[ii].stats.bytes_written; + stats->flush_cmds += threads[ii].stats.flush_cmds; + stats->conn_yields += threads[ii].stats.conn_yields; + stats->auth_cmds += threads[ii].stats.auth_cmds; + stats->auth_errors += threads[ii].stats.auth_errors; + + for (sid = 0; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) { + stats->slab_stats[sid].set_cmds += + threads[ii].stats.slab_stats[sid].set_cmds; + stats->slab_stats[sid].get_hits += + threads[ii].stats.slab_stats[sid].get_hits; + stats->slab_stats[sid].touch_hits += + threads[ii].stats.slab_stats[sid].touch_hits; + stats->slab_stats[sid].delete_hits += + threads[ii].stats.slab_stats[sid].delete_hits; + stats->slab_stats[sid].decr_hits += + threads[ii].stats.slab_stats[sid].decr_hits; + stats->slab_stats[sid].incr_hits += + threads[ii].stats.slab_stats[sid].incr_hits; + stats->slab_stats[sid].cas_hits += + threads[ii].stats.slab_stats[sid].cas_hits; + stats->slab_stats[sid].cas_badval += + threads[ii].stats.slab_stats[sid].cas_badval; + } + + pthread_mutex_unlock(&threads[ii].stats.mutex); + } +} + +void slab_stats_aggregate(struct thread_stats *stats, struct slab_stats *out) { + int sid; + + out->set_cmds = 0; + out->get_hits = 0; + out->touch_hits = 0; + out->delete_hits = 0; + out->incr_hits = 0; + out->decr_hits = 0; + out->cas_hits = 0; + out->cas_badval = 0; + + for (sid = 0; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) { + out->set_cmds += stats->slab_stats[sid].set_cmds; + out->get_hits += stats->slab_stats[sid].get_hits; + out->touch_hits += stats->slab_stats[sid].touch_hits; + out->delete_hits += stats->slab_stats[sid].delete_hits; + out->decr_hits += stats->slab_stats[sid].decr_hits; + out->incr_hits += stats->slab_stats[sid].incr_hits; + out->cas_hits += stats->slab_stats[sid].cas_hits; + out->cas_badval += stats->slab_stats[sid].cas_badval; + } +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wsign-compare" +#endif +/* + * Initializes the thread subsystem, creating various worker threads. + * + * nthreads Number of worker event handler threads to spawn + * main_base Event base for main thread + */ +void thread_init(int nthreads, struct event_base *main_base) { + int i; + int power; + + pthread_mutex_init(&cache_lock, NULL); + pthread_mutex_init(&stats_lock, NULL); + + pthread_mutex_init(&init_lock, NULL); + pthread_cond_init(&init_cond, NULL); + + pthread_mutex_init(&cqi_freelist_lock, NULL); + cqi_freelist = NULL; + + /* Want a wide lock table, but don't waste memory */ + if (nthreads < 3) { + power = 10; + } else if (nthreads < 4) { + power = 11; + } else if (nthreads < 5) { + power = 12; + } else { + /* 8192 buckets, and central locks don't scale much past 5 threads */ + power = 13; + } + + item_lock_count = ((unsigned long int)1 << (power)); + item_lock_mask = item_lock_count - 1; + + item_locks = calloc(item_lock_count, sizeof(pthread_mutex_t)); + if (! item_locks) { + perror("Can't allocate item locks"); + exit(1); + } + for (i = 0; i < item_lock_count; i++) { + pthread_mutex_init(&item_locks[i], NULL); + } + + threads = calloc(nthreads, sizeof(LIBEVENT_THREAD)); + if (! threads) { + perror("Can't allocate thread descriptors"); + exit(1); + } + + dispatcher_thread.base = main_base; + dispatcher_thread.thread_id = pthread_self(); + + for (i = 0; i < nthreads; i++) { + int fds[2]; + if (pipe(fds)) { + perror("Can't create notify pipe"); + exit(1); + } + + threads[i].notify_receive_fd = fds[0]; + threads[i].notify_send_fd = fds[1]; + + setup_thread(&threads[i]); + /* Reserve three fds for the libevent base, and two for the pipe */ + stats.reserved_fds += 5; + } + + /* Create threads after we've done all the libevent setup. */ + for (i = 0; i < nthreads; i++) { + create_worker(worker_libevent, &threads[i]); + } + + /* Wait for all the threads to set themselves up before returning. */ + pthread_mutex_lock(&init_lock); + while (init_count < nthreads) { + pthread_cond_wait(&init_cond, &init_lock); + } + pthread_mutex_unlock(&init_lock); +} + diff --git a/memcached/timedrun.c b/memcached/timedrun.c new file mode 100644 index 00000000..6d7afb33 --- /dev/null +++ b/memcached/timedrun.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include + +#include + +static int caught = 0; + +static void caught_signal(int which) +{ + caught = which; +} + +static int wait_for_process(pid_t pid) +{ + int rv = EX_SOFTWARE; + int stats = 0; + int i = 0; + struct sigaction sig_handler; + + sig_handler.sa_handler = caught_signal; + sig_handler.sa_flags = 0; + + sigaction(SIGALRM, &sig_handler, NULL); + sigaction(SIGHUP, &sig_handler, NULL); + sigaction(SIGINT, &sig_handler, NULL); + sigaction(SIGTERM, &sig_handler, NULL); + sigaction(SIGPIPE, &sig_handler, NULL); + + /* Loop forever waiting for the process to quit */ + for (i = 0; ;i++) { + pid_t p = waitpid(pid, &stats, 0); + if (p == pid) { + /* child exited. Let's get out of here */ + rv = WIFEXITED(stats) ? + WEXITSTATUS(stats) : + (0x80 | WTERMSIG(stats)); + break; + } else { + int sig = 0; + switch (i) { + case 0: + /* On the first iteration, pass the signal through */ + sig = caught > 0 ? caught : SIGTERM; + if (caught == SIGALRM) { + fprintf(stderr, "Timeout.. killing the process\n"); + } + break; + case 1: + sig = SIGTERM; + break; + default: + sig = SIGKILL; + break; + } + if (kill(pid, sig) < 0) { + /* Kill failed. Must have lost the process. :/ */ + perror("lost child when trying to kill"); + } + /* Wait up to 5 seconds for the pid */ + alarm(5); + } + } + return rv; +} + +static int spawn_and_wait(char **argv) +{ + int rv = EX_SOFTWARE; + pid_t pid = fork(); + + switch (pid) { + case -1: + perror("fork"); + rv = EX_OSERR; + break; /* NOTREACHED */ + case 0: + execvp(argv[0], argv); + perror("exec"); + rv = EX_SOFTWARE; + break; /* NOTREACHED */ + default: + rv = wait_for_process(pid); + } + return rv; +} + +int main(int argc, char **argv) +{ + int naptime = 0; + assert(argc > 2); + + naptime = atoi(argv[1]); + assert(naptime > 0 && naptime < 1800); + + alarm(naptime); + + return spawn_and_wait(argv+2); +} diff --git a/memcached/trace.h b/memcached/trace.h new file mode 100644 index 00000000..dc792a0a --- /dev/null +++ b/memcached/trace.h @@ -0,0 +1,71 @@ +#ifndef TRACE_H +#define TRACE_H + +#ifdef ENABLE_DTRACE +#include "memcached_dtrace.h" +#else +#define MEMCACHED_ASSOC_DELETE(arg0, arg1, arg2) +#define MEMCACHED_ASSOC_DELETE_ENABLED() (0) +#define MEMCACHED_ASSOC_FIND(arg0, arg1, arg2) +#define MEMCACHED_ASSOC_FIND_ENABLED() (0) +#define MEMCACHED_ASSOC_INSERT(arg0, arg1, arg2) +#define MEMCACHED_ASSOC_INSERT_ENABLED() (0) +#define MEMCACHED_COMMAND_ADD(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_ADD_ENABLED() (0) +#define MEMCACHED_COMMAND_APPEND(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_APPEND_ENABLED() (0) +#define MEMCACHED_COMMAND_CAS(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_CAS_ENABLED() (0) +#define MEMCACHED_COMMAND_DECR(arg0, arg1, arg2, arg3) +#define MEMCACHED_COMMAND_DECR_ENABLED() (0) +#define MEMCACHED_COMMAND_DELETE(arg0, arg1, arg2) +#define MEMCACHED_COMMAND_DELETE_ENABLED() (0) +#define MEMCACHED_COMMAND_GET(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_GET_ENABLED() (0) +#define MEMCACHED_COMMAND_TOUCH(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_TOUCH_ENABLED() (0) +#define MEMCACHED_COMMAND_INCR(arg0, arg1, arg2, arg3) +#define MEMCACHED_COMMAND_INCR_ENABLED() (0) +#define MEMCACHED_COMMAND_PREPEND(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_PREPEND_ENABLED() (0) +#define MEMCACHED_COMMAND_REPLACE(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_REPLACE_ENABLED() (0) +#define MEMCACHED_COMMAND_SET(arg0, arg1, arg2, arg3, arg4) +#define MEMCACHED_COMMAND_SET_ENABLED() (0) +#define MEMCACHED_CONN_ALLOCATE(arg0) +#define MEMCACHED_CONN_ALLOCATE_ENABLED() (0) +#define MEMCACHED_CONN_CREATE(arg0) +#define MEMCACHED_CONN_CREATE_ENABLED() (0) +#define MEMCACHED_CONN_DESTROY(arg0) +#define MEMCACHED_CONN_DESTROY_ENABLED() (0) +#define MEMCACHED_CONN_DISPATCH(arg0, arg1) +#define MEMCACHED_CONN_DISPATCH_ENABLED() (0) +#define MEMCACHED_CONN_RELEASE(arg0) +#define MEMCACHED_CONN_RELEASE_ENABLED() (0) +#define MEMCACHED_ITEM_LINK(arg0, arg1, arg2) +#define MEMCACHED_ITEM_LINK_ENABLED() (0) +#define MEMCACHED_ITEM_REMOVE(arg0, arg1, arg2) +#define MEMCACHED_ITEM_REMOVE_ENABLED() (0) +#define MEMCACHED_ITEM_REPLACE(arg0, arg1, arg2, arg3, arg4, arg5) +#define MEMCACHED_ITEM_REPLACE_ENABLED() (0) +#define MEMCACHED_ITEM_UNLINK(arg0, arg1, arg2) +#define MEMCACHED_ITEM_UNLINK_ENABLED() (0) +#define MEMCACHED_ITEM_UPDATE(arg0, arg1, arg2) +#define MEMCACHED_ITEM_UPDATE_ENABLED() (0) +#define MEMCACHED_PROCESS_COMMAND_END(arg0, arg1, arg2) +#define MEMCACHED_PROCESS_COMMAND_END_ENABLED() (0) +#define MEMCACHED_PROCESS_COMMAND_START(arg0, arg1, arg2) +#define MEMCACHED_PROCESS_COMMAND_START_ENABLED() (0) +#define MEMCACHED_SLABS_ALLOCATE(arg0, arg1, arg2, arg3) +#define MEMCACHED_SLABS_ALLOCATE_ENABLED() (0) +#define MEMCACHED_SLABS_ALLOCATE_FAILED(arg0, arg1) +#define MEMCACHED_SLABS_ALLOCATE_FAILED_ENABLED() (0) +#define MEMCACHED_SLABS_FREE(arg0, arg1, arg2) +#define MEMCACHED_SLABS_FREE_ENABLED() (0) +#define MEMCACHED_SLABS_SLABCLASS_ALLOCATE(arg0) +#define MEMCACHED_SLABS_SLABCLASS_ALLOCATE_ENABLED() (0) +#define MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(arg0) +#define MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED_ENABLED() (0) +#endif + +#endif diff --git a/memcached/util.c b/memcached/util.c new file mode 100644 index 00000000..d92e6212 --- /dev/null +++ b/memcached/util.c @@ -0,0 +1,144 @@ +#include "memcached.h" + +#include +#include +#include +#include +#include +#include +#include + +/* Avoid warnings on solaris, where isspace() is an index into an array, and gcc uses signed chars */ +#define xisspace(c) isspace((unsigned char)c) + +bool safe_strtoull(const char *str, uint64_t *out) { + assert(out != NULL); + errno = 0; + *out = 0; + char *endptr; + unsigned long long ull = strtoull(str, &endptr, 10); + if ((errno == ERANGE) || (str == endptr)) { + return false; + } + + if (xisspace(*endptr) || (*endptr == '\0' && endptr != str)) { + if ((long long) ull < 0) { + /* only check for negative signs in the uncommon case when + * the unsigned number is so big that it's negative as a + * signed number. */ + if (strchr(str, '-') != NULL) { + return false; + } + } + *out = ull; + return true; + } + return false; +} + +bool safe_strtoll(const char *str, int64_t *out) { + assert(out != NULL); + errno = 0; + *out = 0; + char *endptr; + long long ll = strtoll(str, &endptr, 10); + if ((errno == ERANGE) || (str == endptr)) { + return false; + } + + if (xisspace(*endptr) || (*endptr == '\0' && endptr != str)) { + *out = ll; + return true; + } + return false; +} + +bool safe_strtoul(const char *str, uint32_t *out) { + char *endptr = NULL; + unsigned long l = 0; + assert(out); + assert(str); + *out = 0; + errno = 0; + + l = strtoul(str, &endptr, 10); + if ((errno == ERANGE) || (str == endptr)) { + return false; + } + + if (xisspace(*endptr) || (*endptr == '\0' && endptr != str)) { + if ((long) l < 0) { + /* only check for negative signs in the uncommon case when + * the unsigned number is so big that it's negative as a + * signed number. */ + if (strchr(str, '-') != NULL) { + return false; + } + } + *out = l; + return true; + } + + return false; +} + +bool safe_strtol(const char *str, int32_t *out) { + assert(out != NULL); + errno = 0; + *out = 0; + char *endptr; + long l = strtol(str, &endptr, 10); + if ((errno == ERANGE) || (str == endptr)) { + return false; + } + + if (xisspace(*endptr) || (*endptr == '\0' && endptr != str)) { + *out = l; + return true; + } + return false; +} + +void vperror(const char *fmt, ...) { + int old_errno = errno; + char buf[1024]; + va_list ap; + + va_start(ap, fmt); + if (vsnprintf(buf, sizeof(buf), fmt, ap) == -1) { + buf[sizeof(buf) - 1] = '\0'; + } + va_end(ap); + + errno = old_errno; + + perror(buf); +} + +#ifndef HAVE_HTONLL +static uint64_t mc_swap64(uint64_t in) { +#ifdef ENDIAN_LITTLE + /* Little endian, flip the bytes around until someone makes a faster/better + * way to do this. */ + int64_t rv = 0; + int i = 0; + for(i = 0; i<8; i++) { + rv = (rv << 8) | (in & 0xff); + in >>= 8; + } + return rv; +#else + /* big-endian machines don't need byte swapping */ + return in; +#endif +} + +uint64_t ntohll(uint64_t val) { + return mc_swap64(val); +} + +uint64_t htonll(uint64_t val) { + return mc_swap64(val); +} +#endif + diff --git a/memcached/util.h b/memcached/util.h new file mode 100644 index 00000000..1ec1a525 --- /dev/null +++ b/memcached/util.h @@ -0,0 +1,33 @@ +/* + * Wrappers around strtoull/strtoll that are safer and easier to + * use. For tests and assumptions, see internal_tests.c. + * + * str a NULL-terminated base decimal 10 unsigned integer + * out out parameter, if conversion succeeded + * + * returns true if conversion succeeded. + */ +bool safe_strtoull(const char *str, uint64_t *out); +bool safe_strtoll(const char *str, int64_t *out); +bool safe_strtoul(const char *str, uint32_t *out); +bool safe_strtol(const char *str, int32_t *out); + +#ifndef HAVE_HTONLL +extern uint64_t htonll(uint64_t); +extern uint64_t ntohll(uint64_t); +#endif + +#ifdef __GCC +# define __gcc_attribute__ __attribute__ +#else +# define __gcc_attribute__(x) +#endif + +/** + * Vararg variant of perror that makes for more useful error messages + * when reporting with parameters. + * + * @param fmt a printf format + */ +void vperror(const char *fmt, ...) + __gcc_attribute__ ((format (printf, 1, 2))); diff --git a/memcached/version.m4 b/memcached/version.m4 new file mode 100644 index 00000000..59142923 --- /dev/null +++ b/memcached/version.m4 @@ -0,0 +1 @@ +m4_define([VERSION_NUMBER], [1.4.13]) diff --git a/memcached/version.pl b/memcached/version.pl new file mode 100755 index 00000000..6f08fafc --- /dev/null +++ b/memcached/version.pl @@ -0,0 +1,59 @@ +#!/usr/bin/perl +# If you think this is stupid/overkill, blame dormando + +use warnings; +use strict; + +my $version = `git describe`; +chomp $version; +# Test the various versions. +#my $version = 'foob'; +#my $version = '1.4.2-30-gf966dba'; +#my $version = '1.4.3-rc1'; +#my $version = '1.4.3'; +unless ($version =~ m/^\d+\.\d+\.\d+/) { + write_file('version.m4', "m4_define([VERSION_NUMBER], [UNKNOWN])\n"); + exit; +} + +$version =~ s/-/_/g; +write_file('version.m4', "m4_define([VERSION_NUMBER], [$version])\n"); +my ($VERSION, $FULLVERSION, $RELEASE); + +if ($version =~ m/^(\d+\.\d+\.\d+)_rc(\d+)$/) { + $VERSION = $1; + $FULLVERSION = $version; + $RELEASE = '0.1.rc' . $2; +} elsif ($version =~ m/^(\d+\.\d+\.\d+)_(.+)$/) { + $VERSION = $1; + $FULLVERSION = $version; + $RELEASE = '1.' . $2; +} elsif ($version =~ m/^(\d+\.\d+\.\d+)$/) { + $VERSION = $1; + $FULLVERSION = $version; + $RELEASE = '1'; +} + +my $spec = read_file('memcached.spec.in'); +$spec =~ s/\@VERSION\@/$VERSION/gm; +$spec =~ s/\@FULLVERSION\@/$FULLVERSION/gm; +$spec =~ s/\@RELEASE\@/$RELEASE/gm; + +write_file('memcached.spec', $spec); + +sub write_file { + my $file = shift; + my $data = shift; + open(my $fh, "> $file") or die "Can't open $file: $!"; + print $fh $data; + close($fh); +} + +sub read_file { + my $file = shift; + local $/ = undef; + open(my $fh, "< $file") or die "Can't open $file: $!"; + my $data = <$fh>; + close($fh); + return $data; +} diff --git a/memcached/version.sh b/memcached/version.sh new file mode 100755 index 00000000..a2860175 --- /dev/null +++ b/memcached/version.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +if git describe | sed s/-/_/g > version.num.tmp +then + mv version.num.tmp version.num + echo "m4_define([VERSION_NUMBER], [`tr -d '\n' < version.num`])" \ + > version.m4 + sed s/@VERSION@/`cat version.num`/ < memcached.spec.in > memcached.spec +else + rm version.num.tmp +fi