comparison slabs.c @ 0:30782bb1fc04 MEMCACHED_1_2_3

memcached-1.2.3
author Maxim Dounin <mdounin@mdounin.ru>
date Sun, 23 Sep 2007 03:58:34 +0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:30782bb1fc04
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
8 * memcached protocol.
9 *
10 * $Id: slabs.c 551 2007-05-07 21:24:31Z plindner $
11 */
12 #include "memcached.h"
13 #include <sys/stat.h>
14 #include <sys/socket.h>
15 #include <sys/signal.h>
16 #include <sys/resource.h>
17 #include <fcntl.h>
18 #include <netinet/in.h>
19 #include <errno.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <assert.h>
24
25 #define POWER_SMALLEST 1
26 #define POWER_LARGEST 200
27 #define POWER_BLOCK 1048576
28 #define CHUNK_ALIGN_BYTES (sizeof(void *))
29 #define DONT_PREALLOC_SLABS
30
31 /* powers-of-N allocation structures */
32
33 typedef struct {
34 unsigned int size; /* sizes of items */
35 unsigned int perslab; /* how many items per slab */
36
37 void **slots; /* list of item ptrs */
38 unsigned int sl_total; /* size of previous array */
39 unsigned int sl_curr; /* first free slot */
40
41 void *end_page_ptr; /* pointer to next free item at end of page, or 0 */
42 unsigned int end_page_free; /* number of items remaining at end of last alloced page */
43
44 unsigned int slabs; /* how many slabs were allocated for this class */
45
46 void **slab_list; /* array of slab pointers */
47 unsigned int list_size; /* size of prev array */
48
49 unsigned int killing; /* index+1 of dying slab, or zero if none */
50 } slabclass_t;
51
52 static slabclass_t slabclass[POWER_LARGEST + 1];
53 static size_t mem_limit = 0;
54 static size_t mem_malloced = 0;
55 static int power_largest;
56
57 /*
58 * Forward Declarations
59 */
60 static int do_slabs_newslab(const unsigned int id);
61
62 #ifndef DONT_PREALLOC_SLABS
63 /* Preallocate as many slab pages as possible (called from slabs_init)
64 on start-up, so users don't get confused out-of-memory errors when
65 they do have free (in-slab) space, but no space to make new slabs.
66 if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
67 slab types can be made. if max memory is less than 18 MB, only the
68 smaller ones will be made. */
69 static void slabs_preallocate (const unsigned int maxslabs);
70 #endif
71
72 /*
73 * Figures out which slab class (chunk size) is required to store an item of
74 * a given size.
75 *
76 * Given object size, return id to use when allocating/freeing memory for object
77 * 0 means error: can't store such a large object
78 */
79
80 unsigned int slabs_clsid(const size_t size) {
81 int res = POWER_SMALLEST;
82
83 if (size == 0)
84 return 0;
85 while (size > slabclass[res].size)
86 if (res++ == power_largest) /* won't fit in the biggest slab */
87 return 0;
88 return res;
89 }
90
91 /*
92 * Determines the chunk sizes and initializes the slab class descriptors
93 * accordingly.
94 */
95 void slabs_init(const size_t limit, const double factor) {
96 int i = POWER_SMALLEST - 1;
97 unsigned int size = sizeof(item) + settings.chunk_size;
98
99 /* Factor of 2.0 means use the default memcached behavior */
100 if (factor == 2.0 && size < 128)
101 size = 128;
102
103 mem_limit = limit;
104 memset(slabclass, 0, sizeof(slabclass));
105
106 while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
107 /* Make sure items are always n-byte aligned */
108 if (size % CHUNK_ALIGN_BYTES)
109 size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
110
111 slabclass[i].size = size;
112 slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
113 size *= factor;
114 if (settings.verbose > 1) {
115 fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
116 i, slabclass[i].size, slabclass[i].perslab);
117 }
118 }
119
120 power_largest = i;
121 slabclass[power_largest].size = POWER_BLOCK;
122 slabclass[power_largest].perslab = 1;
123
124 /* for the test suite: faking of how much we've already malloc'd */
125 {
126 char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
127 if (t_initial_malloc) {
128 mem_malloced = (size_t)atol(t_initial_malloc);
129 }
130
131 }
132
133 #ifndef DONT_PREALLOC_SLABS
134 {
135 char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
136
137 if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
138 slabs_preallocate(power_largest);
139 }
140 }
141 #endif
142 }
143
144 #ifndef DONT_PREALLOC_SLABS
145 static void slabs_preallocate (const unsigned int maxslabs) {
146 int i;
147 unsigned int prealloc = 0;
148
149 /* pre-allocate a 1MB slab in every size class so people don't get
150 confused by non-intuitive "SERVER_ERROR out of memory"
151 messages. this is the most common question on the mailing
152 list. if you really don't want this, you can rebuild without
153 these three lines. */
154
155 for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
156 if (++prealloc > maxslabs)
157 return;
158 do_slabs_newslab(i);
159 }
160
161 }
162 #endif
163
164 static int grow_slab_list (const unsigned int id) {
165 slabclass_t *p = &slabclass[id];
166 if (p->slabs == p->list_size) {
167 size_t new_size = (p->list_size != 0) ? p->list_size * 2 : 16;
168 void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
169 if (new_list == 0) return 0;
170 p->list_size = new_size;
171 p->slab_list = new_list;
172 }
173 return 1;
174 }
175
176 static int do_slabs_newslab(const unsigned int id) {
177 slabclass_t *p = &slabclass[id];
178 #ifdef ALLOW_SLABS_REASSIGN
179 int len = POWER_BLOCK;
180 #else
181 int len = p->size * p->perslab;
182 #endif
183 char *ptr;
184
185 if (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)
186 return 0;
187
188 if (grow_slab_list(id) == 0) return 0;
189
190 ptr = malloc((size_t)len);
191 if (ptr == 0) return 0;
192
193 memset(ptr, 0, (size_t)len);
194 p->end_page_ptr = ptr;
195 p->end_page_free = p->perslab;
196
197 p->slab_list[p->slabs++] = ptr;
198 mem_malloced += len;
199 return 1;
200 }
201
202 /*@null@*/
203 void *do_slabs_alloc(const size_t size) {
204 slabclass_t *p;
205
206 unsigned int id = slabs_clsid(size);
207 if (id < POWER_SMALLEST || id > power_largest)
208 return NULL;
209
210 p = &slabclass[id];
211 assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
212
213 #ifdef USE_SYSTEM_MALLOC
214 if (mem_limit && mem_malloced + size > mem_limit)
215 return 0;
216 mem_malloced += size;
217 return malloc(size);
218 #endif
219
220 /* fail unless we have space at the end of a recently allocated page,
221 we have something on our freelist, or we could allocate a new page */
222 if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0))
223 return 0;
224
225 /* return off our freelist, if we have one */
226 if (p->sl_curr != 0)
227 return p->slots[--p->sl_curr];
228
229 /* if we recently allocated a whole page, return from that */
230 if (p->end_page_ptr) {
231 void *ptr = p->end_page_ptr;
232 if (--p->end_page_free != 0) {
233 p->end_page_ptr += p->size;
234 } else {
235 p->end_page_ptr = 0;
236 }
237 return ptr;
238 }
239
240 return NULL; /* shouldn't ever get here */
241 }
242
243 void do_slabs_free(void *ptr, const size_t size) {
244 unsigned char id = slabs_clsid(size);
245 slabclass_t *p;
246
247 assert(((item *)ptr)->slabs_clsid == 0);
248 assert(id >= POWER_SMALLEST && id <= power_largest);
249 if (id < POWER_SMALLEST || id > power_largest)
250 return;
251
252 p = &slabclass[id];
253
254 #ifdef USE_SYSTEM_MALLOC
255 mem_malloced -= size;
256 free(ptr);
257 return;
258 #endif
259
260 if (p->sl_curr == p->sl_total) { /* need more space on the free list */
261 int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16; /* 16 is arbitrary */
262 void **new_slots = realloc(p->slots, new_size * sizeof(void *));
263 if (new_slots == 0)
264 return;
265 p->slots = new_slots;
266 p->sl_total = new_size;
267 }
268 p->slots[p->sl_curr++] = ptr;
269 return;
270 }
271
272 /*@null@*/
273 char* do_slabs_stats(int *buflen) {
274 int i, total;
275 char *buf = (char *)malloc(power_largest * 200 + 100);
276 char *bufcurr = buf;
277
278 *buflen = 0;
279 if (buf == NULL) return NULL;
280
281 total = 0;
282 for(i = POWER_SMALLEST; i <= power_largest; i++) {
283 slabclass_t *p = &slabclass[i];
284 if (p->slabs != 0) {
285 unsigned int perslab, slabs;
286
287 slabs = p->slabs;
288 perslab = p->perslab;
289
290 bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
291 bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
292 bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
293 bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
294 bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr);
295 bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
296 bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
297 total++;
298 }
299 }
300 bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
301 bufcurr += sprintf(bufcurr, "END\r\n");
302 *buflen = bufcurr - buf;
303 return buf;
304 }
305
306 #ifdef ALLOW_SLABS_REASSIGN
307 /* Blows away all the items in a slab class and moves its slabs to another
308 class. This is only used by the "slabs reassign" command, for manual tweaking
309 of memory allocation. It's disabled by default since it requires that all
310 slabs be the same size (which can waste space for chunk size mantissas of
311 other than 2.0).
312 1 = success
313 0 = fail
314 -1 = tried. busy. send again shortly. */
315 int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
316 void *slab, *slab_end;
317 slabclass_t *p, *dp;
318 void *iter;
319 bool was_busy = false;
320
321 if (srcid < POWER_SMALLEST || srcid > power_largest ||
322 dstid < POWER_SMALLEST || dstid > power_largest)
323 return 0;
324
325 p = &slabclass[srcid];
326 dp = &slabclass[dstid];
327
328 /* fail if src still populating, or no slab to give up in src */
329 if (p->end_page_ptr || ! p->slabs)
330 return 0;
331
332 /* fail if dst is still growing or we can't make room to hold its new one */
333 if (dp->end_page_ptr || ! grow_slab_list(dstid))
334 return 0;
335
336 if (p->killing == 0) p->killing = 1;
337
338 slab = p->slab_list[p->killing - 1];
339 slab_end = (char*)slab + POWER_BLOCK;
340
341 for (iter = slab; iter < slab_end; (char*)iter += p->size) {
342 item *it = (item *)iter;
343 if (it->slabs_clsid) {
344 if (it->refcount) was_busy = true;
345 item_unlink(it);
346 }
347 }
348
349 /* go through free list and discard items that are no longer part of this slab */
350 {
351 int fi;
352 for (fi = p->sl_curr - 1; fi >= 0; fi--) {
353 if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
354 p->sl_curr--;
355 if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
356 }
357 }
358 }
359
360 if (was_busy) return -1;
361
362 /* if good, now move it to the dst slab class */
363 p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
364 p->slabs--;
365 p->killing = 0;
366 dp->slab_list[dp->slabs++] = slab;
367 dp->end_page_ptr = slab;
368 dp->end_page_free = dp->perslab;
369 /* this isn't too critical, but other parts of the code do asserts to
370 make sure this field is always 0. */
371 for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
372 ((item *)iter)->slabs_clsid = 0;
373 }
374 return 1;
375 }
376 #endif