0
|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
2 /* $Id: items.c 551 2007-05-07 21:24:31Z plindner $ */
|
|
3 #include "memcached.h"
|
|
4 #include <sys/stat.h>
|
|
5 #include <sys/socket.h>
|
|
6 #include <sys/signal.h>
|
|
7 #include <sys/resource.h>
|
|
8 #include <fcntl.h>
|
|
9 #include <netinet/in.h>
|
|
10 #include <errno.h>
|
|
11 #include <stdlib.h>
|
|
12 #include <stdio.h>
|
|
13 #include <string.h>
|
|
14 #include <time.h>
|
|
15 #include <assert.h>
|
|
16
|
|
17 /* Forward Declarations */
|
|
18 static void item_link_q(item *it);
|
|
19 static void item_unlink_q(item *it);
|
|
20
|
|
21 /*
|
|
22 * We only reposition items in the LRU queue if they haven't been repositioned
|
|
23 * in this many seconds. That saves us from churning on frequently-accessed
|
|
24 * items.
|
|
25 */
|
|
26 #define ITEM_UPDATE_INTERVAL 60
|
|
27
|
|
28 #define LARGEST_ID 255
|
|
29 static item *heads[LARGEST_ID];
|
|
30 static item *tails[LARGEST_ID];
|
|
31 static unsigned int sizes[LARGEST_ID];
|
|
32
|
|
33 void item_init(void) {
|
|
34 int i;
|
|
35 for(i = 0; i < LARGEST_ID; i++) {
|
|
36 heads[i] = NULL;
|
|
37 tails[i] = NULL;
|
|
38 sizes[i] = 0;
|
|
39 }
|
|
40 }
|
|
41
|
|
42 /* Enable this for reference-count debugging. */
|
|
43 #if 0
|
|
44 # define DEBUG_REFCNT(it,op) \
|
|
45 fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
|
|
46 it, op, it->refcount, \
|
|
47 (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
|
|
48 (it->it_flags & ITEM_SLABBED) ? 'S' : ' ', \
|
|
49 (it->it_flags & ITEM_DELETED) ? 'D' : ' ')
|
|
50 #else
|
|
51 # define DEBUG_REFCNT(it,op) while(0)
|
|
52 #endif
|
|
53
|
|
54 /*
|
|
55 * Generates the variable-sized part of the header for an object.
|
|
56 *
|
|
57 * key - The key
|
|
58 * nkey - The length of the key
|
|
59 * flags - key flags
|
|
60 * nbytes - Number of bytes to hold value and addition CRLF terminator
|
|
61 * suffix - Buffer for the "VALUE" line suffix (flags, size).
|
|
62 * nsuffix - The length of the suffix is stored here.
|
|
63 *
|
|
64 * Returns the total size of the header.
|
|
65 */
|
|
66 static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes,
|
|
67 char *suffix, uint8_t *nsuffix) {
|
|
68 /* suffix is defined at 40 chars elsewhere.. */
|
|
69 *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2);
|
|
70 return sizeof(item) + nkey + *nsuffix + nbytes;
|
|
71 }
|
|
72
|
|
73 /*@null@*/
|
|
74 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
|
|
75 uint8_t nsuffix;
|
|
76 item *it;
|
|
77 char suffix[40];
|
|
78 size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
|
|
79
|
|
80 unsigned int id = slabs_clsid(ntotal);
|
|
81 if (id == 0)
|
|
82 return 0;
|
|
83
|
|
84 it = slabs_alloc(ntotal);
|
|
85 if (it == 0) {
|
|
86 int tries = 50;
|
|
87 item *search;
|
|
88
|
|
89 /* If requested to not push old items out of cache when memory runs out,
|
|
90 * we're out of luck at this point...
|
|
91 */
|
|
92
|
|
93 if (settings.evict_to_free == 0) return NULL;
|
|
94
|
|
95 /*
|
|
96 * try to get one off the right LRU
|
|
97 * don't necessariuly unlink the tail because it may be locked: refcount>0
|
|
98 * search up from tail an item with refcount==0 and unlink it; give up after 50
|
|
99 * tries
|
|
100 */
|
|
101
|
|
102 if (id > LARGEST_ID) return NULL;
|
|
103 if (tails[id] == 0) return NULL;
|
|
104
|
|
105 for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
|
|
106 if (search->refcount == 0) {
|
|
107 if (search->exptime > current_time) {
|
|
108 STATS_LOCK();
|
|
109 stats.evictions++;
|
|
110 STATS_UNLOCK();
|
|
111 }
|
|
112 do_item_unlink(search);
|
|
113 break;
|
|
114 }
|
|
115 }
|
|
116 it = slabs_alloc(ntotal);
|
|
117 if (it == 0) return NULL;
|
|
118 }
|
|
119
|
|
120 assert(it->slabs_clsid == 0);
|
|
121
|
|
122 it->slabs_clsid = id;
|
|
123
|
|
124 assert(it != heads[it->slabs_clsid]);
|
|
125
|
|
126 it->next = it->prev = it->h_next = 0;
|
|
127 it->refcount = 1; /* the caller will have a reference */
|
|
128 DEBUG_REFCNT(it, '*');
|
|
129 it->it_flags = 0;
|
|
130 it->nkey = nkey;
|
|
131 it->nbytes = nbytes;
|
|
132 strcpy(ITEM_key(it), key);
|
|
133 it->exptime = exptime;
|
|
134 memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
|
|
135 it->nsuffix = nsuffix;
|
|
136 return it;
|
|
137 }
|
|
138
|
|
139 void item_free(item *it) {
|
|
140 size_t ntotal = ITEM_ntotal(it);
|
|
141 assert((it->it_flags & ITEM_LINKED) == 0);
|
|
142 assert(it != heads[it->slabs_clsid]);
|
|
143 assert(it != tails[it->slabs_clsid]);
|
|
144 assert(it->refcount == 0);
|
|
145
|
|
146 /* so slab size changer can tell later if item is already free or not */
|
|
147 it->slabs_clsid = 0;
|
|
148 it->it_flags |= ITEM_SLABBED;
|
|
149 DEBUG_REFCNT(it, 'F');
|
|
150 slabs_free(it, ntotal);
|
|
151 }
|
|
152
|
|
153 /*
|
|
154 * Returns true if an item will fit in the cache (its size does not exceed
|
|
155 * the maximum for a cache entry.)
|
|
156 */
|
|
157 bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
|
|
158 char prefix[40];
|
|
159 uint8_t nsuffix;
|
|
160
|
|
161 return slabs_clsid(item_make_header(nkey + 1, flags, nbytes,
|
|
162 prefix, &nsuffix)) != 0;
|
|
163 }
|
|
164
|
|
165 static void item_link_q(item *it) { /* item is the new head */
|
|
166 item **head, **tail;
|
|
167 /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */
|
|
168 assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
169
|
|
170 head = &heads[it->slabs_clsid];
|
|
171 tail = &tails[it->slabs_clsid];
|
|
172 assert(it != *head);
|
|
173 assert((*head && *tail) || (*head == 0 && *tail == 0));
|
|
174 it->prev = 0;
|
|
175 it->next = *head;
|
|
176 if (it->next) it->next->prev = it;
|
|
177 *head = it;
|
|
178 if (*tail == 0) *tail = it;
|
|
179 sizes[it->slabs_clsid]++;
|
|
180 return;
|
|
181 }
|
|
182
|
|
183 static void item_unlink_q(item *it) {
|
|
184 item **head, **tail;
|
|
185 /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */
|
|
186 head = &heads[it->slabs_clsid];
|
|
187 tail = &tails[it->slabs_clsid];
|
|
188
|
|
189 if (*head == it) {
|
|
190 assert(it->prev == 0);
|
|
191 *head = it->next;
|
|
192 }
|
|
193 if (*tail == it) {
|
|
194 assert(it->next == 0);
|
|
195 *tail = it->prev;
|
|
196 }
|
|
197 assert(it->next != it);
|
|
198 assert(it->prev != it);
|
|
199
|
|
200 if (it->next) it->next->prev = it->prev;
|
|
201 if (it->prev) it->prev->next = it->next;
|
|
202 sizes[it->slabs_clsid]--;
|
|
203 return;
|
|
204 }
|
|
205
|
|
206 int do_item_link(item *it) {
|
|
207 assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
|
|
208 assert(it->nbytes < 1048576);
|
|
209 it->it_flags |= ITEM_LINKED;
|
|
210 it->time = current_time;
|
|
211 assoc_insert(it);
|
|
212
|
|
213 STATS_LOCK();
|
|
214 stats.curr_bytes += ITEM_ntotal(it);
|
|
215 stats.curr_items += 1;
|
|
216 stats.total_items += 1;
|
|
217 STATS_UNLOCK();
|
|
218
|
|
219 item_link_q(it);
|
|
220
|
|
221 return 1;
|
|
222 }
|
|
223
|
|
224 void do_item_unlink(item *it) {
|
|
225 if ((it->it_flags & ITEM_LINKED) != 0) {
|
|
226 it->it_flags &= ~ITEM_LINKED;
|
|
227 STATS_LOCK();
|
|
228 stats.curr_bytes -= ITEM_ntotal(it);
|
|
229 stats.curr_items -= 1;
|
|
230 STATS_UNLOCK();
|
|
231 assoc_delete(ITEM_key(it), it->nkey);
|
|
232 item_unlink_q(it);
|
|
233 if (it->refcount == 0) item_free(it);
|
|
234 }
|
|
235 }
|
|
236
|
|
237 void do_item_remove(item *it) {
|
|
238 assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
239 if (it->refcount != 0) {
|
|
240 it->refcount--;
|
|
241 DEBUG_REFCNT(it, '-');
|
|
242 }
|
|
243 assert((it->it_flags & ITEM_DELETED) == 0 || it->refcount != 0);
|
|
244 if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) {
|
|
245 item_free(it);
|
|
246 }
|
|
247 }
|
|
248
|
|
249 void do_item_update(item *it) {
|
|
250 if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
|
|
251 assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
252
|
|
253 if (it->it_flags & ITEM_LINKED) {
|
|
254 item_unlink_q(it);
|
|
255 it->time = current_time;
|
|
256 item_link_q(it);
|
|
257 }
|
|
258 }
|
|
259 }
|
|
260
|
|
261 int do_item_replace(item *it, item *new_it) {
|
|
262 assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
263
|
|
264 do_item_unlink(it);
|
|
265 return do_item_link(new_it);
|
|
266 }
|
|
267
|
|
268 /*@null@*/
|
|
269 char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
|
|
270 int memlimit = 2097152; /* 2097152: (2 * 1024 * 1024) */
|
|
271 char *buffer;
|
|
272 unsigned int bufcurr;
|
|
273 item *it;
|
|
274 int len;
|
|
275 int shown = 0;
|
|
276 char temp[512];
|
|
277
|
|
278 if (slabs_clsid > LARGEST_ID) return NULL;
|
|
279 it = heads[slabs_clsid];
|
|
280
|
|
281 buffer = malloc((size_t)memlimit);
|
|
282 if (buffer == 0) return NULL;
|
|
283 bufcurr = 0;
|
|
284
|
|
285 while (it != NULL && (limit == 0 || shown < limit)) {
|
|
286 len = snprintf(temp, 512, "ITEM %s [%d b; %lu s]\r\n", ITEM_key(it), it->nbytes - 2, it->time + stats.started);
|
|
287 if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */
|
|
288 break;
|
|
289 strcpy(buffer + bufcurr, temp);
|
|
290 bufcurr += len;
|
|
291 shown++;
|
|
292 it = it->next;
|
|
293 }
|
|
294
|
|
295 memcpy(buffer + bufcurr, "END\r\n", 6);
|
|
296 bufcurr += 5;
|
|
297
|
|
298 *bytes = bufcurr;
|
|
299 return buffer;
|
|
300 }
|
|
301
|
|
302 void item_stats(char *buffer, const int buflen) {
|
|
303 int i;
|
|
304 char *bufcurr = buffer;
|
|
305 rel_time_t now = current_time;
|
|
306
|
|
307 if (buflen < 4096) {
|
|
308 strcpy(buffer, "SERVER_ERROR out of memory");
|
|
309 return;
|
|
310 }
|
|
311
|
|
312 for (i = 0; i < LARGEST_ID; i++) {
|
|
313 if (tails[i] != NULL)
|
|
314 bufcurr += snprintf(bufcurr, (size_t)buflen, "STAT items:%d:number %u\r\nSTAT items:%d:age %u\r\n",
|
|
315 i, sizes[i], i, now - tails[i]->time);
|
|
316 }
|
|
317 memcpy(bufcurr, "END", 4);
|
|
318 return;
|
|
319 }
|
|
320
|
|
321 /* dumps out a list of objects of each size, with granularity of 32 bytes */
|
|
322 /*@null@*/
|
|
323 char* item_stats_sizes(int *bytes) {
|
|
324 const int num_buckets = 32768; /* max 1MB object, divided into 32 bytes size buckets */
|
|
325 unsigned int *histogram = (unsigned int *)malloc((size_t)num_buckets * sizeof(int));
|
|
326 char *buf = (char *)malloc(2097152 * sizeof(char)); /* 2097152: 2 * 1024 * 1024 */
|
|
327 int i;
|
|
328
|
|
329 if (histogram == 0 || buf == 0) {
|
|
330 if (histogram) free(histogram);
|
|
331 if (buf) free(buf);
|
|
332 return NULL;
|
|
333 }
|
|
334
|
|
335 /* build the histogram */
|
|
336 memset(histogram, 0, (size_t)num_buckets * sizeof(int));
|
|
337 for (i = 0; i < LARGEST_ID; i++) {
|
|
338 item *iter = heads[i];
|
|
339 while (iter) {
|
|
340 int ntotal = ITEM_ntotal(iter);
|
|
341 int bucket = ntotal / 32;
|
|
342 if ((ntotal % 32) != 0) bucket++;
|
|
343 if (bucket < num_buckets) histogram[bucket]++;
|
|
344 iter = iter->next;
|
|
345 }
|
|
346 }
|
|
347
|
|
348 /* write the buffer */
|
|
349 *bytes = 0;
|
|
350 for (i = 0; i < num_buckets; i++) {
|
|
351 if (histogram[i] != 0) {
|
|
352 *bytes += sprintf(&buf[*bytes], "%d %u\r\n", i * 32, histogram[i]);
|
|
353 }
|
|
354 }
|
|
355 *bytes += sprintf(&buf[*bytes], "END\r\n");
|
|
356 free(histogram);
|
|
357 return buf;
|
|
358 }
|
|
359
|
|
360 /* returns true if a deleted item's delete-locked-time is over, and it
|
|
361 should be removed from the namespace */
|
|
362 bool item_delete_lock_over (item *it) {
|
|
363 assert(it->it_flags & ITEM_DELETED);
|
|
364 return (current_time >= it->exptime);
|
|
365 }
|
|
366
|
|
367 /* wrapper around assoc_find which does the lazy expiration/deletion logic */
|
|
368 item *do_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) {
|
|
369 item *it = assoc_find(key, nkey);
|
|
370 if (delete_locked) *delete_locked = false;
|
|
371 if (it && (it->it_flags & ITEM_DELETED)) {
|
|
372 /* it's flagged as delete-locked. let's see if that condition
|
|
373 is past due, and the 5-second delete_timer just hasn't
|
|
374 gotten to it yet... */
|
|
375 if (!item_delete_lock_over(it)) {
|
|
376 if (delete_locked) *delete_locked = true;
|
|
377 it = 0;
|
|
378 }
|
|
379 }
|
|
380 if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time &&
|
|
381 it->time <= settings.oldest_live) {
|
|
382 do_item_unlink(it); // MTSAFE - cache_lock held
|
|
383 it = 0;
|
|
384 }
|
|
385 if (it != NULL && it->exptime != 0 && it->exptime <= current_time) {
|
|
386 do_item_unlink(it); // MTSAFE - cache_lock held
|
|
387 it = 0;
|
|
388 }
|
|
389
|
|
390 if (it != NULL) {
|
|
391 it->refcount++;
|
|
392 DEBUG_REFCNT(it, '+');
|
|
393 }
|
|
394 return it;
|
|
395 }
|
|
396
|
|
397 item *item_get(const char *key, const size_t nkey) {
|
|
398 return item_get_notedeleted(key, nkey, 0);
|
|
399 }
|
|
400
|
|
401 /* returns an item whether or not it's delete-locked or expired. */
|
|
402 item *do_item_get_nocheck(const char *key, const size_t nkey) {
|
|
403 item *it = assoc_find(key, nkey);
|
|
404 if (it) {
|
|
405 it->refcount++;
|
|
406 DEBUG_REFCNT(it, '+');
|
|
407 }
|
|
408 return it;
|
|
409 }
|
|
410
|
|
411 /* expires items that are more recent than the oldest_live setting. */
|
|
412 void do_item_flush_expired(void) {
|
|
413 int i;
|
|
414 item *iter, *next;
|
|
415 if (settings.oldest_live == 0)
|
|
416 return;
|
|
417 for (i = 0; i < LARGEST_ID; i++) {
|
|
418 /* The LRU is sorted in decreasing time order, and an item's timestamp
|
|
419 * is never newer than its last access time, so we only need to walk
|
|
420 * back until we hit an item older than the oldest_live time.
|
|
421 * The oldest_live checking will auto-expire the remaining items.
|
|
422 */
|
|
423 for (iter = heads[i]; iter != NULL; iter = next) {
|
|
424 if (iter->time >= settings.oldest_live) {
|
|
425 next = iter->next;
|
|
426 if ((iter->it_flags & ITEM_SLABBED) == 0) {
|
|
427 do_item_unlink(iter);
|
|
428 }
|
|
429 } else {
|
|
430 /* We've hit the first old item. Continue to the next queue. */
|
|
431 break;
|
|
432 }
|
|
433 }
|
|
434 }
|
|
435 }
|