Mercurial > hg > memcached
diff items.c @ 0:30782bb1fc04 MEMCACHED_1_2_3
memcached-1.2.3
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Sun, 23 Sep 2007 03:58:34 +0400 |
parents | |
children |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/items.c @@ -0,0 +1,435 @@ +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* $Id: items.c 551 2007-05-07 21:24:31Z plindner $ */ +#include "memcached.h" +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/signal.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <assert.h> + +/* Forward Declarations */ +static void item_link_q(item *it); +static void item_unlink_q(item *it); + +/* + * We only reposition items in the LRU queue if they haven't been repositioned + * in this many seconds. That saves us from churning on frequently-accessed + * items. + */ +#define ITEM_UPDATE_INTERVAL 60 + +#define LARGEST_ID 255 +static item *heads[LARGEST_ID]; +static item *tails[LARGEST_ID]; +static unsigned int sizes[LARGEST_ID]; + +void item_init(void) { + int i; + for(i = 0; i < LARGEST_ID; i++) { + heads[i] = NULL; + tails[i] = NULL; + sizes[i] = 0; + } +} + +/* Enable this for reference-count debugging. */ +#if 0 +# define DEBUG_REFCNT(it,op) \ + fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \ + it, op, it->refcount, \ + (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \ + (it->it_flags & ITEM_SLABBED) ? 'S' : ' ', \ + (it->it_flags & ITEM_DELETED) ? 'D' : ' ') +#else +# define DEBUG_REFCNT(it,op) while(0) +#endif + +/* + * Generates the variable-sized part of the header for an object. + * + * key - The key + * nkey - The length of the key + * flags - key flags + * nbytes - Number of bytes to hold value and addition CRLF terminator + * suffix - Buffer for the "VALUE" line suffix (flags, size). + * nsuffix - The length of the suffix is stored here. + * + * Returns the total size of the header. + */ +static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, + char *suffix, uint8_t *nsuffix) { + /* suffix is defined at 40 chars elsewhere.. */ + *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2); + return sizeof(item) + nkey + *nsuffix + nbytes; +} + +/*@null@*/ +item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { + uint8_t nsuffix; + item *it; + char suffix[40]; + size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); + + unsigned int id = slabs_clsid(ntotal); + if (id == 0) + return 0; + + it = slabs_alloc(ntotal); + if (it == 0) { + int tries = 50; + item *search; + + /* If requested to not push old items out of cache when memory runs out, + * we're out of luck at this point... + */ + + if (settings.evict_to_free == 0) return NULL; + + /* + * try to get one off the right LRU + * don't necessariuly unlink the tail because it may be locked: refcount>0 + * search up from tail an item with refcount==0 and unlink it; give up after 50 + * tries + */ + + if (id > LARGEST_ID) return NULL; + if (tails[id] == 0) return NULL; + + for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { + if (search->refcount == 0) { + if (search->exptime > current_time) { + STATS_LOCK(); + stats.evictions++; + STATS_UNLOCK(); + } + do_item_unlink(search); + break; + } + } + it = slabs_alloc(ntotal); + if (it == 0) return NULL; + } + + assert(it->slabs_clsid == 0); + + it->slabs_clsid = id; + + assert(it != heads[it->slabs_clsid]); + + it->next = it->prev = it->h_next = 0; + it->refcount = 1; /* the caller will have a reference */ + DEBUG_REFCNT(it, '*'); + it->it_flags = 0; + it->nkey = nkey; + it->nbytes = nbytes; + strcpy(ITEM_key(it), key); + it->exptime = exptime; + memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); + it->nsuffix = nsuffix; + return it; +} + +void item_free(item *it) { + size_t ntotal = ITEM_ntotal(it); + assert((it->it_flags & ITEM_LINKED) == 0); + assert(it != heads[it->slabs_clsid]); + assert(it != tails[it->slabs_clsid]); + assert(it->refcount == 0); + + /* so slab size changer can tell later if item is already free or not */ + it->slabs_clsid = 0; + it->it_flags |= ITEM_SLABBED; + DEBUG_REFCNT(it, 'F'); + slabs_free(it, ntotal); +} + +/* + * Returns true if an item will fit in the cache (its size does not exceed + * the maximum for a cache entry.) + */ +bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { + char prefix[40]; + uint8_t nsuffix; + + return slabs_clsid(item_make_header(nkey + 1, flags, nbytes, + prefix, &nsuffix)) != 0; +} + +static void item_link_q(item *it) { /* item is the new head */ + item **head, **tail; + /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ + assert((it->it_flags & ITEM_SLABBED) == 0); + + head = &heads[it->slabs_clsid]; + tail = &tails[it->slabs_clsid]; + assert(it != *head); + assert((*head && *tail) || (*head == 0 && *tail == 0)); + it->prev = 0; + it->next = *head; + if (it->next) it->next->prev = it; + *head = it; + if (*tail == 0) *tail = it; + sizes[it->slabs_clsid]++; + return; +} + +static void item_unlink_q(item *it) { + item **head, **tail; + /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ + head = &heads[it->slabs_clsid]; + tail = &tails[it->slabs_clsid]; + + if (*head == it) { + assert(it->prev == 0); + *head = it->next; + } + if (*tail == it) { + assert(it->next == 0); + *tail = it->prev; + } + assert(it->next != it); + assert(it->prev != it); + + if (it->next) it->next->prev = it->prev; + if (it->prev) it->prev->next = it->next; + sizes[it->slabs_clsid]--; + return; +} + +int do_item_link(item *it) { + assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0); + assert(it->nbytes < 1048576); + it->it_flags |= ITEM_LINKED; + it->time = current_time; + assoc_insert(it); + + STATS_LOCK(); + stats.curr_bytes += ITEM_ntotal(it); + stats.curr_items += 1; + stats.total_items += 1; + STATS_UNLOCK(); + + item_link_q(it); + + return 1; +} + +void do_item_unlink(item *it) { + if ((it->it_flags & ITEM_LINKED) != 0) { + it->it_flags &= ~ITEM_LINKED; + STATS_LOCK(); + stats.curr_bytes -= ITEM_ntotal(it); + stats.curr_items -= 1; + STATS_UNLOCK(); + assoc_delete(ITEM_key(it), it->nkey); + item_unlink_q(it); + if (it->refcount == 0) item_free(it); + } +} + +void do_item_remove(item *it) { + assert((it->it_flags & ITEM_SLABBED) == 0); + if (it->refcount != 0) { + it->refcount--; + DEBUG_REFCNT(it, '-'); + } + assert((it->it_flags & ITEM_DELETED) == 0 || it->refcount != 0); + if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) { + item_free(it); + } +} + +void do_item_update(item *it) { + if (it->time < current_time - ITEM_UPDATE_INTERVAL) { + assert((it->it_flags & ITEM_SLABBED) == 0); + + if (it->it_flags & ITEM_LINKED) { + item_unlink_q(it); + it->time = current_time; + item_link_q(it); + } + } +} + +int do_item_replace(item *it, item *new_it) { + assert((it->it_flags & ITEM_SLABBED) == 0); + + do_item_unlink(it); + return do_item_link(new_it); +} + +/*@null@*/ +char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) { + int memlimit = 2097152; /* 2097152: (2 * 1024 * 1024) */ + char *buffer; + unsigned int bufcurr; + item *it; + int len; + int shown = 0; + char temp[512]; + + if (slabs_clsid > LARGEST_ID) return NULL; + it = heads[slabs_clsid]; + + buffer = malloc((size_t)memlimit); + if (buffer == 0) return NULL; + bufcurr = 0; + + while (it != NULL && (limit == 0 || shown < limit)) { + len = snprintf(temp, 512, "ITEM %s [%d b; %lu s]\r\n", ITEM_key(it), it->nbytes - 2, it->time + stats.started); + if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */ + break; + strcpy(buffer + bufcurr, temp); + bufcurr += len; + shown++; + it = it->next; + } + + memcpy(buffer + bufcurr, "END\r\n", 6); + bufcurr += 5; + + *bytes = bufcurr; + return buffer; +} + +void item_stats(char *buffer, const int buflen) { + int i; + char *bufcurr = buffer; + rel_time_t now = current_time; + + if (buflen < 4096) { + strcpy(buffer, "SERVER_ERROR out of memory"); + return; + } + + for (i = 0; i < LARGEST_ID; i++) { + if (tails[i] != NULL) + bufcurr += snprintf(bufcurr, (size_t)buflen, "STAT items:%d:number %u\r\nSTAT items:%d:age %u\r\n", + i, sizes[i], i, now - tails[i]->time); + } + memcpy(bufcurr, "END", 4); + return; +} + +/* dumps out a list of objects of each size, with granularity of 32 bytes */ +/*@null@*/ +char* item_stats_sizes(int *bytes) { + const int num_buckets = 32768; /* max 1MB object, divided into 32 bytes size buckets */ + unsigned int *histogram = (unsigned int *)malloc((size_t)num_buckets * sizeof(int)); + char *buf = (char *)malloc(2097152 * sizeof(char)); /* 2097152: 2 * 1024 * 1024 */ + int i; + + if (histogram == 0 || buf == 0) { + if (histogram) free(histogram); + if (buf) free(buf); + return NULL; + } + + /* build the histogram */ + memset(histogram, 0, (size_t)num_buckets * sizeof(int)); + for (i = 0; i < LARGEST_ID; i++) { + item *iter = heads[i]; + while (iter) { + int ntotal = ITEM_ntotal(iter); + int bucket = ntotal / 32; + if ((ntotal % 32) != 0) bucket++; + if (bucket < num_buckets) histogram[bucket]++; + iter = iter->next; + } + } + + /* write the buffer */ + *bytes = 0; + for (i = 0; i < num_buckets; i++) { + if (histogram[i] != 0) { + *bytes += sprintf(&buf[*bytes], "%d %u\r\n", i * 32, histogram[i]); + } + } + *bytes += sprintf(&buf[*bytes], "END\r\n"); + free(histogram); + return buf; +} + +/* returns true if a deleted item's delete-locked-time is over, and it + should be removed from the namespace */ +bool item_delete_lock_over (item *it) { + assert(it->it_flags & ITEM_DELETED); + return (current_time >= it->exptime); +} + +/* wrapper around assoc_find which does the lazy expiration/deletion logic */ +item *do_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) { + item *it = assoc_find(key, nkey); + if (delete_locked) *delete_locked = false; + if (it && (it->it_flags & ITEM_DELETED)) { + /* it's flagged as delete-locked. let's see if that condition + is past due, and the 5-second delete_timer just hasn't + gotten to it yet... */ + if (!item_delete_lock_over(it)) { + if (delete_locked) *delete_locked = true; + it = 0; + } + } + if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time && + it->time <= settings.oldest_live) { + do_item_unlink(it); // MTSAFE - cache_lock held + it = 0; + } + if (it != NULL && it->exptime != 0 && it->exptime <= current_time) { + do_item_unlink(it); // MTSAFE - cache_lock held + it = 0; + } + + if (it != NULL) { + it->refcount++; + DEBUG_REFCNT(it, '+'); + } + return it; +} + +item *item_get(const char *key, const size_t nkey) { + return item_get_notedeleted(key, nkey, 0); +} + +/* returns an item whether or not it's delete-locked or expired. */ +item *do_item_get_nocheck(const char *key, const size_t nkey) { + item *it = assoc_find(key, nkey); + if (it) { + it->refcount++; + DEBUG_REFCNT(it, '+'); + } + return it; +} + +/* expires items that are more recent than the oldest_live setting. */ +void do_item_flush_expired(void) { + int i; + item *iter, *next; + if (settings.oldest_live == 0) + return; + for (i = 0; i < LARGEST_ID; i++) { + /* The LRU is sorted in decreasing time order, and an item's timestamp + * is never newer than its last access time, so we only need to walk + * back until we hit an item older than the oldest_live time. + * The oldest_live checking will auto-expire the remaining items. + */ + for (iter = heads[i]; iter != NULL; iter = next) { + if (iter->time >= settings.oldest_live) { + next = iter->next; + if ((iter->it_flags & ITEM_SLABBED) == 0) { + do_item_unlink(iter); + } + } else { + /* We've hit the first old item. Continue to the next queue. */ + break; + } + } + } +}