1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
10 #include "memcached.h"
12 #include <sys/socket.h>
13 #include <sys/signal.h>
14 #include <sys/resource.h>
16 #include <netinet/in.h>
24 /* powers-of-N allocation structures */
27 unsigned int size
; /* sizes of items */
28 unsigned int perslab
; /* how many items per slab */
30 void *slots
; /* list of item ptrs */
31 unsigned int sl_curr
; /* total free items in list */
33 unsigned int slabs
; /* how many slabs were allocated for this class */
35 void **slab_list
; /* array of slab pointers */
36 unsigned int list_size
; /* size of prev array */
38 unsigned int killing
; /* index+1 of dying slab, or zero if none */
39 size_t requested
; /* The number of requested bytes */
42 static slabclass_t slabclass
[MAX_NUMBER_OF_SLAB_CLASSES
];
43 static size_t mem_limit
= 0;
44 static size_t mem_malloced
= 0;
45 static int power_largest
;
47 static void *mem_base
= NULL
;
48 static void *mem_current
= NULL
;
49 static size_t mem_avail
= 0;
52 * Access to the slab allocator is protected by this lock
54 static pthread_mutex_t slabs_lock
= PTHREAD_MUTEX_INITIALIZER
;
55 static pthread_mutex_t slabs_rebalance_lock
= PTHREAD_MUTEX_INITIALIZER
;
58 * Forward Declarations
60 static int do_slabs_newslab(const unsigned int id
);
61 static void *memory_allocate(size_t size
);
62 static void do_slabs_free(void *ptr
, const size_t size
, unsigned int id
);
64 /* Preallocate as many slab pages as possible (called from slabs_init)
65 on start-up, so users don't get confused out-of-memory errors when
66 they do have free (in-slab) space, but no space to make new slabs.
67 if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
68 slab types can be made. if max memory is less than 18 MB, only the
69 smaller ones will be made. */
70 static void slabs_preallocate (const unsigned int maxslabs
);
73 * Figures out which slab class (chunk size) is required to store an item of
76 * Given object size, return id to use when allocating/freeing memory for object
77 * 0 means error: can't store such a large object
80 unsigned int slabs_clsid(const size_t size
) {
81 int res
= POWER_SMALLEST
;
85 while (size
> slabclass
[res
].size
)
86 if (res
++ == power_largest
) /* won't fit in the biggest slab */
92 * Determines the chunk sizes and initializes the slab class descriptors
95 void slabs_init(const size_t limit
, const double factor
, const bool prealloc
) {
96 int i
= POWER_SMALLEST
- 1;
97 unsigned int size
= sizeof(item
) + settings
.chunk_size
;
102 /* Allocate everything in a big chunk with malloc */
103 mem_base
= malloc(mem_limit
);
104 if (mem_base
!= NULL
) {
105 mem_current
= mem_base
;
106 mem_avail
= mem_limit
;
108 fprintf(stderr
, "Warning: Failed to allocate requested memory in"
109 " one large chunk.\nWill allocate in smaller chunks\n");
113 memset(slabclass
, 0, sizeof(slabclass
));
115 while (++i
< POWER_LARGEST
&& size
<= settings
.item_size_max
/ factor
) {
116 /* Make sure items are always n-byte aligned */
117 if (size
% CHUNK_ALIGN_BYTES
)
118 size
+= CHUNK_ALIGN_BYTES
- (size
% CHUNK_ALIGN_BYTES
);
120 slabclass
[i
].size
= size
;
121 slabclass
[i
].perslab
= settings
.item_size_max
/ slabclass
[i
].size
;
123 if (settings
.verbose
> 1) {
124 fprintf(stderr
, "slab class %3d: chunk size %9u perslab %7u\n",
125 i
, slabclass
[i
].size
, slabclass
[i
].perslab
);
130 slabclass
[power_largest
].size
= settings
.item_size_max
;
131 slabclass
[power_largest
].perslab
= 1;
132 if (settings
.verbose
> 1) {
133 fprintf(stderr
, "slab class %3d: chunk size %9u perslab %7u\n",
134 i
, slabclass
[i
].size
, slabclass
[i
].perslab
);
137 /* for the test suite: faking of how much we've already malloc'd */
139 char *t_initial_malloc
= getenv("T_MEMD_INITIAL_MALLOC");
140 if (t_initial_malloc
) {
141 mem_malloced
= (size_t)atol(t_initial_malloc
);
147 slabs_preallocate(power_largest
);
151 static void slabs_preallocate (const unsigned int maxslabs
) {
153 unsigned int prealloc
= 0;
155 /* pre-allocate a 1MB slab in every size class so people don't get
156 confused by non-intuitive "SERVER_ERROR out of memory"
157 messages. this is the most common question on the mailing
158 list. if you really don't want this, you can rebuild without
159 these three lines. */
161 for (i
= POWER_SMALLEST
; i
<= POWER_LARGEST
; i
++) {
162 if (++prealloc
> maxslabs
)
164 if (do_slabs_newslab(i
) == 0) {
165 fprintf(stderr
, "Error while preallocating slab memory!\n"
166 "If using -L or other prealloc options, max memory must be "
167 "at least %d megabytes.\n", power_largest
);
174 static int grow_slab_list (const unsigned int id
) {
175 slabclass_t
*p
= &slabclass
[id
];
176 if (p
->slabs
== p
->list_size
) {
177 size_t new_size
= (p
->list_size
!= 0) ? p
->list_size
* 2 : 16;
178 void *new_list
= realloc(p
->slab_list
, new_size
* sizeof(void *));
179 if (new_list
== 0) return 0;
180 p
->list_size
= new_size
;
181 p
->slab_list
= new_list
;
186 static void split_slab_page_into_freelist(char *ptr
, const unsigned int id
) {
187 slabclass_t
*p
= &slabclass
[id
];
189 for (x
= 0; x
< p
->perslab
; x
++) {
190 do_slabs_free(ptr
, 0, id
);
195 static int do_slabs_newslab(const unsigned int id
) {
196 slabclass_t
*p
= &slabclass
[id
];
197 int len
= settings
.slab_reassign
? settings
.item_size_max
198 : p
->size
* p
->perslab
;
201 if ((mem_limit
&& mem_malloced
+ len
> mem_limit
&& p
->slabs
> 0) ||
202 (grow_slab_list(id
) == 0) ||
203 ((ptr
= memory_allocate((size_t)len
)) == 0)) {
205 MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id
);
209 memset(ptr
, 0, (size_t)len
);
210 split_slab_page_into_freelist(ptr
, id
);
212 p
->slab_list
[p
->slabs
++] = ptr
;
214 MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id
);
220 static void *do_slabs_alloc(const size_t size
, unsigned int id
) {
225 if (id
< POWER_SMALLEST
|| id
> power_largest
) {
226 MEMCACHED_SLABS_ALLOCATE_FAILED(size
, 0);
231 assert(p
->sl_curr
== 0 || ((item
*)p
->slots
)->slabs_clsid
== 0);
233 /* fail unless we have space at the end of a recently allocated page,
234 we have something on our freelist, or we could allocate a new page */
235 if (! (p
->sl_curr
!= 0 || do_slabs_newslab(id
) != 0)) {
236 /* We don't have more memory available */
238 } else if (p
->sl_curr
!= 0) {
239 /* return off our freelist */
240 it
= (item
*)p
->slots
;
242 if (it
->next
) it
->next
->prev
= 0;
248 p
->requested
+= size
;
249 MEMCACHED_SLABS_ALLOCATE(size
, id
, p
->size
, ret
);
251 MEMCACHED_SLABS_ALLOCATE_FAILED(size
, id
);
257 static void do_slabs_free(void *ptr
, const size_t size
, unsigned int id
) {
261 assert(((item
*)ptr
)->slabs_clsid
== 0);
262 assert(id
>= POWER_SMALLEST
&& id
<= power_largest
);
263 if (id
< POWER_SMALLEST
|| id
> power_largest
)
266 MEMCACHED_SLABS_FREE(size
, id
, ptr
);
270 it
->it_flags
|= ITEM_SLABBED
;
273 if (it
->next
) it
->next
->prev
= it
;
277 p
->requested
-= size
;
281 static int nz_strcmp(int nzlength
, const char *nz
, const char *z
) {
282 int zlength
=strlen(z
);
283 return (zlength
== nzlength
) && (strncmp(nz
, z
, zlength
) == 0) ? 0 : -1;
286 bool get_stats(const char *stat_type
, int nkey
, ADD_STAT add_stats
, void *c
) {
289 if (add_stats
!= NULL
) {
291 /* prepare general statistics for the engine */
293 APPEND_STAT("bytes", "%llu", (unsigned long long)stats
.curr_bytes
);
294 APPEND_STAT("curr_items", "%u", stats
.curr_items
);
295 APPEND_STAT("total_items", "%u", stats
.total_items
);
296 APPEND_STAT("evictions", "%llu",
297 (unsigned long long)stats
.evictions
);
298 APPEND_STAT("reclaimed", "%llu",
299 (unsigned long long)stats
.reclaimed
);
301 } else if (nz_strcmp(nkey
, stat_type
, "items") == 0) {
302 item_stats(add_stats
, c
);
303 } else if (nz_strcmp(nkey
, stat_type
, "slabs") == 0) {
304 slabs_stats(add_stats
, c
);
305 } else if (nz_strcmp(nkey
, stat_type
, "sizes") == 0) {
306 item_stats_sizes(add_stats
, c
);
318 static void do_slabs_stats(ADD_STAT add_stats
, void *c
) {
320 /* Get the per-thread stats which contain some interesting aggregates */
321 struct thread_stats thread_stats
;
322 threadlocal_stats_aggregate(&thread_stats
);
325 for(i
= POWER_SMALLEST
; i
<= power_largest
; i
++) {
326 slabclass_t
*p
= &slabclass
[i
];
328 uint32_t perslab
, slabs
;
330 perslab
= p
->perslab
;
332 char key_str
[STAT_KEY_LEN
];
333 char val_str
[STAT_VAL_LEN
];
334 int klen
= 0, vlen
= 0;
336 APPEND_NUM_STAT(i
, "chunk_size", "%u", p
->size
);
337 APPEND_NUM_STAT(i
, "chunks_per_page", "%u", perslab
);
338 APPEND_NUM_STAT(i
, "total_pages", "%u", slabs
);
339 APPEND_NUM_STAT(i
, "total_chunks", "%u", slabs
* perslab
);
340 APPEND_NUM_STAT(i
, "used_chunks", "%u",
341 slabs
*perslab
- p
->sl_curr
);
342 APPEND_NUM_STAT(i
, "free_chunks", "%u", p
->sl_curr
);
343 /* Stat is dead, but displaying zero instead of removing it. */
344 APPEND_NUM_STAT(i
, "free_chunks_end", "%u", 0);
345 APPEND_NUM_STAT(i
, "mem_requested", "%llu",
346 (unsigned long long)p
->requested
);
347 APPEND_NUM_STAT(i
, "get_hits", "%llu",
348 (unsigned long long)thread_stats
.slab_stats
[i
].get_hits
);
349 APPEND_NUM_STAT(i
, "cmd_set", "%llu",
350 (unsigned long long)thread_stats
.slab_stats
[i
].set_cmds
);
351 APPEND_NUM_STAT(i
, "delete_hits", "%llu",
352 (unsigned long long)thread_stats
.slab_stats
[i
].delete_hits
);
353 APPEND_NUM_STAT(i
, "incr_hits", "%llu",
354 (unsigned long long)thread_stats
.slab_stats
[i
].incr_hits
);
355 APPEND_NUM_STAT(i
, "decr_hits", "%llu",
356 (unsigned long long)thread_stats
.slab_stats
[i
].decr_hits
);
357 APPEND_NUM_STAT(i
, "cas_hits", "%llu",
358 (unsigned long long)thread_stats
.slab_stats
[i
].cas_hits
);
359 APPEND_NUM_STAT(i
, "cas_badval", "%llu",
360 (unsigned long long)thread_stats
.slab_stats
[i
].cas_badval
);
361 APPEND_NUM_STAT(i
, "touch_hits", "%llu",
362 (unsigned long long)thread_stats
.slab_stats
[i
].touch_hits
);
367 /* add overall slab stats and append terminator */
369 APPEND_STAT("active_slabs", "%d", total
);
370 APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced
);
371 add_stats(NULL
, 0, NULL
, 0, c
);
374 static void *memory_allocate(size_t size
) {
377 if (mem_base
== NULL
) {
378 /* We are not using a preallocated large memory chunk */
383 if (size
> mem_avail
) {
387 /* mem_current pointer _must_ be aligned!!! */
388 if (size
% CHUNK_ALIGN_BYTES
) {
389 size
+= CHUNK_ALIGN_BYTES
- (size
% CHUNK_ALIGN_BYTES
);
392 mem_current
= ((char*)mem_current
) + size
;
393 if (size
< mem_avail
) {
403 void *slabs_alloc(size_t size
, unsigned int id
) {
406 pthread_mutex_lock(&slabs_lock
);
407 ret
= do_slabs_alloc(size
, id
);
408 pthread_mutex_unlock(&slabs_lock
);
412 void slabs_free(void *ptr
, size_t size
, unsigned int id
) {
413 pthread_mutex_lock(&slabs_lock
);
414 do_slabs_free(ptr
, size
, id
);
415 pthread_mutex_unlock(&slabs_lock
);
418 void slabs_stats(ADD_STAT add_stats
, void *c
) {
419 pthread_mutex_lock(&slabs_lock
);
420 do_slabs_stats(add_stats
, c
);
421 pthread_mutex_unlock(&slabs_lock
);
424 void slabs_adjust_mem_requested(unsigned int id
, size_t old
, size_t ntotal
)
426 pthread_mutex_lock(&slabs_lock
);
428 if (id
< POWER_SMALLEST
|| id
> power_largest
) {
429 fprintf(stderr
, "Internal error! Invalid slab class\n");
434 p
->requested
= p
->requested
- old
+ ntotal
;
435 pthread_mutex_unlock(&slabs_lock
);
438 static pthread_cond_t maintenance_cond
= PTHREAD_COND_INITIALIZER
;
439 static pthread_cond_t slab_rebalance_cond
= PTHREAD_COND_INITIALIZER
;
440 static volatile int do_run_slab_thread
= 1;
441 static volatile int do_run_slab_rebalance_thread
= 1;
443 #define DEFAULT_SLAB_BULK_CHECK 1
444 int slab_bulk_check
= DEFAULT_SLAB_BULK_CHECK
;
446 static int slab_rebalance_start(void) {
450 pthread_mutex_lock(&cache_lock
);
451 pthread_mutex_lock(&slabs_lock
);
453 if (slab_rebal
.s_clsid
< POWER_SMALLEST
||
454 slab_rebal
.s_clsid
> power_largest
||
455 slab_rebal
.d_clsid
< POWER_SMALLEST
||
456 slab_rebal
.d_clsid
> power_largest
||
457 slab_rebal
.s_clsid
== slab_rebal
.d_clsid
)
460 s_cls
= &slabclass
[slab_rebal
.s_clsid
];
462 if (!grow_slab_list(slab_rebal
.d_clsid
)) {
466 if (s_cls
->slabs
< 2)
470 pthread_mutex_unlock(&slabs_lock
);
471 pthread_mutex_unlock(&cache_lock
);
472 return no_go
; /* Should use a wrapper function... */
477 slab_rebal
.slab_start
= s_cls
->slab_list
[s_cls
->killing
- 1];
478 slab_rebal
.slab_end
= (char *)slab_rebal
.slab_start
+
479 (s_cls
->size
* s_cls
->perslab
);
480 slab_rebal
.slab_pos
= slab_rebal
.slab_start
;
483 /* Also tells do_item_get to search for items in this slab */
484 slab_rebalance_signal
= 2;
486 if (settings
.verbose
> 1) {
487 fprintf(stderr
, "Started a slab rebalance\n");
490 pthread_mutex_unlock(&slabs_lock
);
491 pthread_mutex_unlock(&cache_lock
);
494 stats
.slab_reassign_running
= true;
501 MOVE_PASS
=0, MOVE_DONE
, MOVE_BUSY
504 /* refcount == 0 is safe since nobody can incr while cache_lock is held.
505 * refcount != 0 is impossible since flags/etc can be modified in other
506 * threads. instead, note we found a busy one and bail. logic in do_item_get
507 * will prevent busy items from continuing to be busy
509 static int slab_rebalance_move(void) {
514 enum move_status status
= MOVE_PASS
;
516 pthread_mutex_lock(&cache_lock
);
517 pthread_mutex_lock(&slabs_lock
);
519 s_cls
= &slabclass
[slab_rebal
.s_clsid
];
521 for (x
= 0; x
< slab_bulk_check
; x
++) {
522 item
*it
= slab_rebal
.slab_pos
;
524 if (it
->slabs_clsid
!= 255) {
525 refcount
= refcount_incr(&it
->refcount
);
526 if (refcount
== 1) { /* item is unlinked, unused */
527 if (it
->it_flags
& ITEM_SLABBED
) {
528 /* remove from slab freelist */
529 if (s_cls
->slots
== it
) {
530 s_cls
->slots
= it
->next
;
532 if (it
->next
) it
->next
->prev
= it
->prev
;
533 if (it
->prev
) it
->prev
->next
= it
->next
;
539 } else if (refcount
== 2) { /* item is linked but not busy */
540 if ((it
->it_flags
& ITEM_LINKED
) != 0) {
541 do_item_unlink_nolock(it
, hash(ITEM_key(it
), it
->nkey
, 0));
544 /* refcount == 1 + !ITEM_LINKED means the item is being
545 * uploaded to, or was just unlinked but hasn't been freed
546 * yet. Let it bleed off on its own and try again later */
550 if (settings
.verbose
> 2) {
551 fprintf(stderr
, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
552 it
->refcount
, slab_rebal
.s_clsid
, slab_rebal
.d_clsid
);
562 it
->slabs_clsid
= 255;
565 slab_rebal
.busy_items
++;
567 refcount_decr(&it
->refcount
);
573 slab_rebal
.slab_pos
= (char *)slab_rebal
.slab_pos
+ s_cls
->size
;
574 if (slab_rebal
.slab_pos
>= slab_rebal
.slab_end
)
578 if (slab_rebal
.slab_pos
>= slab_rebal
.slab_end
) {
579 /* Some items were busy, start again from the top */
580 if (slab_rebal
.busy_items
) {
581 slab_rebal
.slab_pos
= slab_rebal
.slab_start
;
582 slab_rebal
.busy_items
= 0;
588 pthread_mutex_unlock(&slabs_lock
);
589 pthread_mutex_unlock(&cache_lock
);
594 static void slab_rebalance_finish(void) {
598 pthread_mutex_lock(&cache_lock
);
599 pthread_mutex_lock(&slabs_lock
);
601 s_cls
= &slabclass
[slab_rebal
.s_clsid
];
602 d_cls
= &slabclass
[slab_rebal
.d_clsid
];
604 /* At this point the stolen slab is completely clear */
605 s_cls
->slab_list
[s_cls
->killing
- 1] =
606 s_cls
->slab_list
[s_cls
->slabs
- 1];
610 memset(slab_rebal
.slab_start
, 0, (size_t)settings
.item_size_max
);
612 d_cls
->slab_list
[d_cls
->slabs
++] = slab_rebal
.slab_start
;
613 split_slab_page_into_freelist(slab_rebal
.slab_start
,
617 slab_rebal
.s_clsid
= 0;
618 slab_rebal
.d_clsid
= 0;
619 slab_rebal
.slab_start
= NULL
;
620 slab_rebal
.slab_end
= NULL
;
621 slab_rebal
.slab_pos
= NULL
;
623 slab_rebalance_signal
= 0;
625 pthread_mutex_unlock(&slabs_lock
);
626 pthread_mutex_unlock(&cache_lock
);
629 stats
.slab_reassign_running
= false;
633 if (settings
.verbose
> 1) {
634 fprintf(stderr
, "finished a slab move\n");
638 /* Return 1 means a decision was reached.
639 * Move to its own thread (created/destroyed as needed) once automover is more
642 static int slab_automove_decision(int *src
, int *dst
) {
643 static uint64_t evicted_old
[POWER_LARGEST
];
644 static unsigned int slab_zeroes
[POWER_LARGEST
];
645 static unsigned int slab_winner
= 0;
646 static unsigned int slab_wins
= 0;
647 uint64_t evicted_new
[POWER_LARGEST
];
648 uint64_t evicted_diff
= 0;
649 uint64_t evicted_max
= 0;
650 unsigned int highest_slab
= 0;
651 unsigned int total_pages
[POWER_LARGEST
];
655 static rel_time_t next_run
;
657 /* Run less frequently than the slabmove tester. */
658 if (current_time
>= next_run
) {
659 next_run
= current_time
+ 10;
664 item_stats_evictions(evicted_new
);
665 pthread_mutex_lock(&cache_lock
);
666 for (i
= POWER_SMALLEST
; i
< power_largest
; i
++) {
667 total_pages
[i
] = slabclass
[i
].slabs
;
669 pthread_mutex_unlock(&cache_lock
);
671 /* Find a candidate source; something with zero evicts 3+ times */
672 for (i
= POWER_SMALLEST
; i
< power_largest
; i
++) {
673 evicted_diff
= evicted_new
[i
] - evicted_old
[i
];
674 if (evicted_diff
== 0 && total_pages
[i
] > 2) {
676 if (source
== 0 && slab_zeroes
[i
] >= 3)
680 if (evicted_diff
> evicted_max
) {
681 evicted_max
= evicted_diff
;
685 evicted_old
[i
] = evicted_new
[i
];
688 /* Pick a valid destination */
689 if (slab_winner
!= 0 && slab_winner
== highest_slab
) {
695 slab_winner
= highest_slab
;
698 if (source
&& dest
) {
706 /* Slab rebalancer thread.
707 * Does not use spinlocks since it is not timing sensitive. Burn less CPU and
708 * go to sleep if locks are contended
710 static void *slab_maintenance_thread(void *arg
) {
713 while (do_run_slab_thread
) {
714 if (settings
.slab_automove
== 1) {
715 if (slab_automove_decision(&src
, &dest
) == 1) {
716 /* Blind to the return codes. It will retry on its own */
717 slabs_reassign(src
, dest
);
721 /* Don't wake as often if we're not enabled.
722 * This is lazier than setting up a condition right now. */
729 /* Slab mover thread.
730 * Sits waiting for a condition to jump off and shovel some memory about
732 static void *slab_rebalance_thread(void *arg
) {
735 while (do_run_slab_rebalance_thread
) {
736 if (slab_rebalance_signal
== 1) {
737 if (slab_rebalance_start() < 0) {
738 /* Handle errors with more specifity as required. */
739 slab_rebalance_signal
= 0;
743 } else if (slab_rebalance_signal
&& slab_rebal
.slab_start
!= NULL
) {
744 was_busy
= slab_rebalance_move();
747 if (slab_rebal
.done
) {
748 slab_rebalance_finish();
749 } else if (was_busy
) {
750 /* Stuck waiting for some items to unlock, so slow down a bit
751 * to give them a chance to free up */
755 if (slab_rebalance_signal
== 0) {
756 /* always hold this lock while we're running */
757 pthread_cond_wait(&slab_rebalance_cond
, &slabs_rebalance_lock
);
763 /* Iterate at most once through the slab classes and pick a "random" source.
764 * I like this better than calling rand() since rand() is slow enough that we
765 * can just check all of the classes once instead.
767 static int slabs_reassign_pick_any(int dst
) {
768 static int cur
= POWER_SMALLEST
- 1;
769 int tries
= power_largest
- POWER_SMALLEST
+ 1;
770 for (; tries
> 0; tries
--) {
772 if (cur
> power_largest
)
773 cur
= POWER_SMALLEST
;
776 if (slabclass
[cur
].slabs
> 1) {
783 static enum reassign_result_type
do_slabs_reassign(int src
, int dst
) {
784 if (slab_rebalance_signal
!= 0)
785 return REASSIGN_RUNNING
;
788 return REASSIGN_SRC_DST_SAME
;
790 /* Special indicator to choose ourselves. */
792 src
= slabs_reassign_pick_any(dst
);
793 /* TODO: If we end up back at -1, return a new error type */
796 if (src
< POWER_SMALLEST
|| src
> power_largest
||
797 dst
< POWER_SMALLEST
|| dst
> power_largest
)
798 return REASSIGN_BADCLASS
;
800 if (slabclass
[src
].slabs
< 2)
801 return REASSIGN_NOSPARE
;
803 slab_rebal
.s_clsid
= src
;
804 slab_rebal
.d_clsid
= dst
;
806 slab_rebalance_signal
= 1;
807 pthread_cond_signal(&slab_rebalance_cond
);
812 enum reassign_result_type
slabs_reassign(int src
, int dst
) {
813 enum reassign_result_type ret
;
814 if (pthread_mutex_trylock(&slabs_rebalance_lock
) != 0) {
815 return REASSIGN_RUNNING
;
817 ret
= do_slabs_reassign(src
, dst
);
818 pthread_mutex_unlock(&slabs_rebalance_lock
);
822 static pthread_t maintenance_tid
;
823 static pthread_t rebalance_tid
;
825 int start_slab_maintenance_thread(void) {
827 slab_rebalance_signal
= 0;
828 slab_rebal
.slab_start
= NULL
;
829 char *env
= getenv("MEMCACHED_SLAB_BULK_CHECK");
831 slab_bulk_check
= atoi(env
);
832 if (slab_bulk_check
== 0) {
833 slab_bulk_check
= DEFAULT_SLAB_BULK_CHECK
;
837 if (pthread_cond_init(&slab_rebalance_cond
, NULL
) != 0) {
838 fprintf(stderr
, "Can't intiialize rebalance condition\n");
841 pthread_mutex_init(&slabs_rebalance_lock
, NULL
);
843 if ((ret
= pthread_create(&maintenance_tid
, NULL
,
844 slab_maintenance_thread
, NULL
)) != 0) {
845 fprintf(stderr
, "Can't create slab maint thread: %s\n", strerror(ret
));
848 if ((ret
= pthread_create(&rebalance_tid
, NULL
,
849 slab_rebalance_thread
, NULL
)) != 0) {
850 fprintf(stderr
, "Can't create rebal thread: %s\n", strerror(ret
));
856 void stop_slab_maintenance_thread(void) {
857 mutex_lock(&cache_lock
);
858 do_run_slab_thread
= 0;
859 do_run_slab_rebalance_thread
= 0;
860 pthread_cond_signal(&maintenance_cond
);
861 pthread_mutex_unlock(&cache_lock
);
863 /* Wait for the maintenance thread to stop */
864 pthread_join(maintenance_tid
, NULL
);
865 pthread_join(rebalance_tid
, NULL
);