Mercurial > hg > memcached
comparison items.c @ 0:30782bb1fc04 MEMCACHED_1_2_3
memcached-1.2.3
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Sun, 23 Sep 2007 03:58:34 +0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:30782bb1fc04 |
---|---|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ | |
2 /* $Id: items.c 551 2007-05-07 21:24:31Z plindner $ */ | |
3 #include "memcached.h" | |
4 #include <sys/stat.h> | |
5 #include <sys/socket.h> | |
6 #include <sys/signal.h> | |
7 #include <sys/resource.h> | |
8 #include <fcntl.h> | |
9 #include <netinet/in.h> | |
10 #include <errno.h> | |
11 #include <stdlib.h> | |
12 #include <stdio.h> | |
13 #include <string.h> | |
14 #include <time.h> | |
15 #include <assert.h> | |
16 | |
17 /* Forward Declarations */ | |
18 static void item_link_q(item *it); | |
19 static void item_unlink_q(item *it); | |
20 | |
21 /* | |
22 * We only reposition items in the LRU queue if they haven't been repositioned | |
23 * in this many seconds. That saves us from churning on frequently-accessed | |
24 * items. | |
25 */ | |
26 #define ITEM_UPDATE_INTERVAL 60 | |
27 | |
28 #define LARGEST_ID 255 | |
29 static item *heads[LARGEST_ID]; | |
30 static item *tails[LARGEST_ID]; | |
31 static unsigned int sizes[LARGEST_ID]; | |
32 | |
33 void item_init(void) { | |
34 int i; | |
35 for(i = 0; i < LARGEST_ID; i++) { | |
36 heads[i] = NULL; | |
37 tails[i] = NULL; | |
38 sizes[i] = 0; | |
39 } | |
40 } | |
41 | |
42 /* Enable this for reference-count debugging. */ | |
43 #if 0 | |
44 # define DEBUG_REFCNT(it,op) \ | |
45 fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \ | |
46 it, op, it->refcount, \ | |
47 (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \ | |
48 (it->it_flags & ITEM_SLABBED) ? 'S' : ' ', \ | |
49 (it->it_flags & ITEM_DELETED) ? 'D' : ' ') | |
50 #else | |
51 # define DEBUG_REFCNT(it,op) while(0) | |
52 #endif | |
53 | |
54 /* | |
55 * Generates the variable-sized part of the header for an object. | |
56 * | |
57 * key - The key | |
58 * nkey - The length of the key | |
59 * flags - key flags | |
60 * nbytes - Number of bytes to hold value and addition CRLF terminator | |
61 * suffix - Buffer for the "VALUE" line suffix (flags, size). | |
62 * nsuffix - The length of the suffix is stored here. | |
63 * | |
64 * Returns the total size of the header. | |
65 */ | |
66 static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, | |
67 char *suffix, uint8_t *nsuffix) { | |
68 /* suffix is defined at 40 chars elsewhere.. */ | |
69 *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2); | |
70 return sizeof(item) + nkey + *nsuffix + nbytes; | |
71 } | |
72 | |
73 /*@null@*/ | |
74 item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { | |
75 uint8_t nsuffix; | |
76 item *it; | |
77 char suffix[40]; | |
78 size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); | |
79 | |
80 unsigned int id = slabs_clsid(ntotal); | |
81 if (id == 0) | |
82 return 0; | |
83 | |
84 it = slabs_alloc(ntotal); | |
85 if (it == 0) { | |
86 int tries = 50; | |
87 item *search; | |
88 | |
89 /* If requested to not push old items out of cache when memory runs out, | |
90 * we're out of luck at this point... | |
91 */ | |
92 | |
93 if (settings.evict_to_free == 0) return NULL; | |
94 | |
95 /* | |
96 * try to get one off the right LRU | |
97 * don't necessariuly unlink the tail because it may be locked: refcount>0 | |
98 * search up from tail an item with refcount==0 and unlink it; give up after 50 | |
99 * tries | |
100 */ | |
101 | |
102 if (id > LARGEST_ID) return NULL; | |
103 if (tails[id] == 0) return NULL; | |
104 | |
105 for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { | |
106 if (search->refcount == 0) { | |
107 if (search->exptime > current_time) { | |
108 STATS_LOCK(); | |
109 stats.evictions++; | |
110 STATS_UNLOCK(); | |
111 } | |
112 do_item_unlink(search); | |
113 break; | |
114 } | |
115 } | |
116 it = slabs_alloc(ntotal); | |
117 if (it == 0) return NULL; | |
118 } | |
119 | |
120 assert(it->slabs_clsid == 0); | |
121 | |
122 it->slabs_clsid = id; | |
123 | |
124 assert(it != heads[it->slabs_clsid]); | |
125 | |
126 it->next = it->prev = it->h_next = 0; | |
127 it->refcount = 1; /* the caller will have a reference */ | |
128 DEBUG_REFCNT(it, '*'); | |
129 it->it_flags = 0; | |
130 it->nkey = nkey; | |
131 it->nbytes = nbytes; | |
132 strcpy(ITEM_key(it), key); | |
133 it->exptime = exptime; | |
134 memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); | |
135 it->nsuffix = nsuffix; | |
136 return it; | |
137 } | |
138 | |
139 void item_free(item *it) { | |
140 size_t ntotal = ITEM_ntotal(it); | |
141 assert((it->it_flags & ITEM_LINKED) == 0); | |
142 assert(it != heads[it->slabs_clsid]); | |
143 assert(it != tails[it->slabs_clsid]); | |
144 assert(it->refcount == 0); | |
145 | |
146 /* so slab size changer can tell later if item is already free or not */ | |
147 it->slabs_clsid = 0; | |
148 it->it_flags |= ITEM_SLABBED; | |
149 DEBUG_REFCNT(it, 'F'); | |
150 slabs_free(it, ntotal); | |
151 } | |
152 | |
153 /* | |
154 * Returns true if an item will fit in the cache (its size does not exceed | |
155 * the maximum for a cache entry.) | |
156 */ | |
157 bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { | |
158 char prefix[40]; | |
159 uint8_t nsuffix; | |
160 | |
161 return slabs_clsid(item_make_header(nkey + 1, flags, nbytes, | |
162 prefix, &nsuffix)) != 0; | |
163 } | |
164 | |
165 static void item_link_q(item *it) { /* item is the new head */ | |
166 item **head, **tail; | |
167 /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ | |
168 assert((it->it_flags & ITEM_SLABBED) == 0); | |
169 | |
170 head = &heads[it->slabs_clsid]; | |
171 tail = &tails[it->slabs_clsid]; | |
172 assert(it != *head); | |
173 assert((*head && *tail) || (*head == 0 && *tail == 0)); | |
174 it->prev = 0; | |
175 it->next = *head; | |
176 if (it->next) it->next->prev = it; | |
177 *head = it; | |
178 if (*tail == 0) *tail = it; | |
179 sizes[it->slabs_clsid]++; | |
180 return; | |
181 } | |
182 | |
183 static void item_unlink_q(item *it) { | |
184 item **head, **tail; | |
185 /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ | |
186 head = &heads[it->slabs_clsid]; | |
187 tail = &tails[it->slabs_clsid]; | |
188 | |
189 if (*head == it) { | |
190 assert(it->prev == 0); | |
191 *head = it->next; | |
192 } | |
193 if (*tail == it) { | |
194 assert(it->next == 0); | |
195 *tail = it->prev; | |
196 } | |
197 assert(it->next != it); | |
198 assert(it->prev != it); | |
199 | |
200 if (it->next) it->next->prev = it->prev; | |
201 if (it->prev) it->prev->next = it->next; | |
202 sizes[it->slabs_clsid]--; | |
203 return; | |
204 } | |
205 | |
206 int do_item_link(item *it) { | |
207 assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0); | |
208 assert(it->nbytes < 1048576); | |
209 it->it_flags |= ITEM_LINKED; | |
210 it->time = current_time; | |
211 assoc_insert(it); | |
212 | |
213 STATS_LOCK(); | |
214 stats.curr_bytes += ITEM_ntotal(it); | |
215 stats.curr_items += 1; | |
216 stats.total_items += 1; | |
217 STATS_UNLOCK(); | |
218 | |
219 item_link_q(it); | |
220 | |
221 return 1; | |
222 } | |
223 | |
224 void do_item_unlink(item *it) { | |
225 if ((it->it_flags & ITEM_LINKED) != 0) { | |
226 it->it_flags &= ~ITEM_LINKED; | |
227 STATS_LOCK(); | |
228 stats.curr_bytes -= ITEM_ntotal(it); | |
229 stats.curr_items -= 1; | |
230 STATS_UNLOCK(); | |
231 assoc_delete(ITEM_key(it), it->nkey); | |
232 item_unlink_q(it); | |
233 if (it->refcount == 0) item_free(it); | |
234 } | |
235 } | |
236 | |
237 void do_item_remove(item *it) { | |
238 assert((it->it_flags & ITEM_SLABBED) == 0); | |
239 if (it->refcount != 0) { | |
240 it->refcount--; | |
241 DEBUG_REFCNT(it, '-'); | |
242 } | |
243 assert((it->it_flags & ITEM_DELETED) == 0 || it->refcount != 0); | |
244 if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) { | |
245 item_free(it); | |
246 } | |
247 } | |
248 | |
249 void do_item_update(item *it) { | |
250 if (it->time < current_time - ITEM_UPDATE_INTERVAL) { | |
251 assert((it->it_flags & ITEM_SLABBED) == 0); | |
252 | |
253 if (it->it_flags & ITEM_LINKED) { | |
254 item_unlink_q(it); | |
255 it->time = current_time; | |
256 item_link_q(it); | |
257 } | |
258 } | |
259 } | |
260 | |
261 int do_item_replace(item *it, item *new_it) { | |
262 assert((it->it_flags & ITEM_SLABBED) == 0); | |
263 | |
264 do_item_unlink(it); | |
265 return do_item_link(new_it); | |
266 } | |
267 | |
268 /*@null@*/ | |
269 char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) { | |
270 int memlimit = 2097152; /* 2097152: (2 * 1024 * 1024) */ | |
271 char *buffer; | |
272 unsigned int bufcurr; | |
273 item *it; | |
274 int len; | |
275 int shown = 0; | |
276 char temp[512]; | |
277 | |
278 if (slabs_clsid > LARGEST_ID) return NULL; | |
279 it = heads[slabs_clsid]; | |
280 | |
281 buffer = malloc((size_t)memlimit); | |
282 if (buffer == 0) return NULL; | |
283 bufcurr = 0; | |
284 | |
285 while (it != NULL && (limit == 0 || shown < limit)) { | |
286 len = snprintf(temp, 512, "ITEM %s [%d b; %lu s]\r\n", ITEM_key(it), it->nbytes - 2, it->time + stats.started); | |
287 if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */ | |
288 break; | |
289 strcpy(buffer + bufcurr, temp); | |
290 bufcurr += len; | |
291 shown++; | |
292 it = it->next; | |
293 } | |
294 | |
295 memcpy(buffer + bufcurr, "END\r\n", 6); | |
296 bufcurr += 5; | |
297 | |
298 *bytes = bufcurr; | |
299 return buffer; | |
300 } | |
301 | |
302 void item_stats(char *buffer, const int buflen) { | |
303 int i; | |
304 char *bufcurr = buffer; | |
305 rel_time_t now = current_time; | |
306 | |
307 if (buflen < 4096) { | |
308 strcpy(buffer, "SERVER_ERROR out of memory"); | |
309 return; | |
310 } | |
311 | |
312 for (i = 0; i < LARGEST_ID; i++) { | |
313 if (tails[i] != NULL) | |
314 bufcurr += snprintf(bufcurr, (size_t)buflen, "STAT items:%d:number %u\r\nSTAT items:%d:age %u\r\n", | |
315 i, sizes[i], i, now - tails[i]->time); | |
316 } | |
317 memcpy(bufcurr, "END", 4); | |
318 return; | |
319 } | |
320 | |
321 /* dumps out a list of objects of each size, with granularity of 32 bytes */ | |
322 /*@null@*/ | |
323 char* item_stats_sizes(int *bytes) { | |
324 const int num_buckets = 32768; /* max 1MB object, divided into 32 bytes size buckets */ | |
325 unsigned int *histogram = (unsigned int *)malloc((size_t)num_buckets * sizeof(int)); | |
326 char *buf = (char *)malloc(2097152 * sizeof(char)); /* 2097152: 2 * 1024 * 1024 */ | |
327 int i; | |
328 | |
329 if (histogram == 0 || buf == 0) { | |
330 if (histogram) free(histogram); | |
331 if (buf) free(buf); | |
332 return NULL; | |
333 } | |
334 | |
335 /* build the histogram */ | |
336 memset(histogram, 0, (size_t)num_buckets * sizeof(int)); | |
337 for (i = 0; i < LARGEST_ID; i++) { | |
338 item *iter = heads[i]; | |
339 while (iter) { | |
340 int ntotal = ITEM_ntotal(iter); | |
341 int bucket = ntotal / 32; | |
342 if ((ntotal % 32) != 0) bucket++; | |
343 if (bucket < num_buckets) histogram[bucket]++; | |
344 iter = iter->next; | |
345 } | |
346 } | |
347 | |
348 /* write the buffer */ | |
349 *bytes = 0; | |
350 for (i = 0; i < num_buckets; i++) { | |
351 if (histogram[i] != 0) { | |
352 *bytes += sprintf(&buf[*bytes], "%d %u\r\n", i * 32, histogram[i]); | |
353 } | |
354 } | |
355 *bytes += sprintf(&buf[*bytes], "END\r\n"); | |
356 free(histogram); | |
357 return buf; | |
358 } | |
359 | |
360 /* returns true if a deleted item's delete-locked-time is over, and it | |
361 should be removed from the namespace */ | |
362 bool item_delete_lock_over (item *it) { | |
363 assert(it->it_flags & ITEM_DELETED); | |
364 return (current_time >= it->exptime); | |
365 } | |
366 | |
367 /* wrapper around assoc_find which does the lazy expiration/deletion logic */ | |
368 item *do_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) { | |
369 item *it = assoc_find(key, nkey); | |
370 if (delete_locked) *delete_locked = false; | |
371 if (it && (it->it_flags & ITEM_DELETED)) { | |
372 /* it's flagged as delete-locked. let's see if that condition | |
373 is past due, and the 5-second delete_timer just hasn't | |
374 gotten to it yet... */ | |
375 if (!item_delete_lock_over(it)) { | |
376 if (delete_locked) *delete_locked = true; | |
377 it = 0; | |
378 } | |
379 } | |
380 if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time && | |
381 it->time <= settings.oldest_live) { | |
382 do_item_unlink(it); // MTSAFE - cache_lock held | |
383 it = 0; | |
384 } | |
385 if (it != NULL && it->exptime != 0 && it->exptime <= current_time) { | |
386 do_item_unlink(it); // MTSAFE - cache_lock held | |
387 it = 0; | |
388 } | |
389 | |
390 if (it != NULL) { | |
391 it->refcount++; | |
392 DEBUG_REFCNT(it, '+'); | |
393 } | |
394 return it; | |
395 } | |
396 | |
397 item *item_get(const char *key, const size_t nkey) { | |
398 return item_get_notedeleted(key, nkey, 0); | |
399 } | |
400 | |
401 /* returns an item whether or not it's delete-locked or expired. */ | |
402 item *do_item_get_nocheck(const char *key, const size_t nkey) { | |
403 item *it = assoc_find(key, nkey); | |
404 if (it) { | |
405 it->refcount++; | |
406 DEBUG_REFCNT(it, '+'); | |
407 } | |
408 return it; | |
409 } | |
410 | |
411 /* expires items that are more recent than the oldest_live setting. */ | |
412 void do_item_flush_expired(void) { | |
413 int i; | |
414 item *iter, *next; | |
415 if (settings.oldest_live == 0) | |
416 return; | |
417 for (i = 0; i < LARGEST_ID; i++) { | |
418 /* The LRU is sorted in decreasing time order, and an item's timestamp | |
419 * is never newer than its last access time, so we only need to walk | |
420 * back until we hit an item older than the oldest_live time. | |
421 * The oldest_live checking will auto-expire the remaining items. | |
422 */ | |
423 for (iter = heads[i]; iter != NULL; iter = next) { | |
424 if (iter->time >= settings.oldest_live) { | |
425 next = iter->next; | |
426 if ((iter->it_flags & ITEM_SLABBED) == 0) { | |
427 do_item_unlink(iter); | |
428 } | |
429 } else { | |
430 /* We've hit the first old item. Continue to the next queue. */ | |
431 break; | |
432 } | |
433 } | |
434 } | |
435 } |