0
|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
2 /*
|
|
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
|
|
4 * and are divided into chunks. The chunk sizes start off at the size of the
|
|
5 * "item" structure plus space for a small key and value. They increase by
|
|
6 * a multiplier factor from there, up to half the maximum slab size. The last
|
|
7 * slab size is always 1MB, since that's the maximum item size allowed by the
|
|
8 * memcached protocol.
|
|
9 *
|
|
10 * $Id: slabs.c 551 2007-05-07 21:24:31Z plindner $
|
|
11 */
|
|
12 #include "memcached.h"
|
|
13 #include <sys/stat.h>
|
|
14 #include <sys/socket.h>
|
|
15 #include <sys/signal.h>
|
|
16 #include <sys/resource.h>
|
|
17 #include <fcntl.h>
|
|
18 #include <netinet/in.h>
|
|
19 #include <errno.h>
|
|
20 #include <stdlib.h>
|
|
21 #include <stdio.h>
|
|
22 #include <string.h>
|
|
23 #include <assert.h>
|
|
24
|
|
25 #define POWER_SMALLEST 1
|
|
26 #define POWER_LARGEST 200
|
|
27 #define POWER_BLOCK 1048576
|
|
28 #define CHUNK_ALIGN_BYTES (sizeof(void *))
|
|
29 #define DONT_PREALLOC_SLABS
|
|
30
|
|
31 /* powers-of-N allocation structures */
|
|
32
|
|
33 typedef struct {
|
|
34 unsigned int size; /* sizes of items */
|
|
35 unsigned int perslab; /* how many items per slab */
|
|
36
|
|
37 void **slots; /* list of item ptrs */
|
|
38 unsigned int sl_total; /* size of previous array */
|
|
39 unsigned int sl_curr; /* first free slot */
|
|
40
|
|
41 void *end_page_ptr; /* pointer to next free item at end of page, or 0 */
|
|
42 unsigned int end_page_free; /* number of items remaining at end of last alloced page */
|
|
43
|
|
44 unsigned int slabs; /* how many slabs were allocated for this class */
|
|
45
|
|
46 void **slab_list; /* array of slab pointers */
|
|
47 unsigned int list_size; /* size of prev array */
|
|
48
|
|
49 unsigned int killing; /* index+1 of dying slab, or zero if none */
|
|
50 } slabclass_t;
|
|
51
|
|
52 static slabclass_t slabclass[POWER_LARGEST + 1];
|
|
53 static size_t mem_limit = 0;
|
|
54 static size_t mem_malloced = 0;
|
|
55 static int power_largest;
|
|
56
|
|
57 /*
|
|
58 * Forward Declarations
|
|
59 */
|
|
60 static int do_slabs_newslab(const unsigned int id);
|
|
61
|
|
62 #ifndef DONT_PREALLOC_SLABS
|
|
63 /* Preallocate as many slab pages as possible (called from slabs_init)
|
|
64 on start-up, so users don't get confused out-of-memory errors when
|
|
65 they do have free (in-slab) space, but no space to make new slabs.
|
|
66 if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
|
|
67 slab types can be made. if max memory is less than 18 MB, only the
|
|
68 smaller ones will be made. */
|
|
69 static void slabs_preallocate (const unsigned int maxslabs);
|
|
70 #endif
|
|
71
|
|
72 /*
|
|
73 * Figures out which slab class (chunk size) is required to store an item of
|
|
74 * a given size.
|
|
75 *
|
|
76 * Given object size, return id to use when allocating/freeing memory for object
|
|
77 * 0 means error: can't store such a large object
|
|
78 */
|
|
79
|
|
80 unsigned int slabs_clsid(const size_t size) {
|
|
81 int res = POWER_SMALLEST;
|
|
82
|
|
83 if (size == 0)
|
|
84 return 0;
|
|
85 while (size > slabclass[res].size)
|
|
86 if (res++ == power_largest) /* won't fit in the biggest slab */
|
|
87 return 0;
|
|
88 return res;
|
|
89 }
|
|
90
|
|
91 /*
|
|
92 * Determines the chunk sizes and initializes the slab class descriptors
|
|
93 * accordingly.
|
|
94 */
|
|
95 void slabs_init(const size_t limit, const double factor) {
|
|
96 int i = POWER_SMALLEST - 1;
|
|
97 unsigned int size = sizeof(item) + settings.chunk_size;
|
|
98
|
|
99 /* Factor of 2.0 means use the default memcached behavior */
|
|
100 if (factor == 2.0 && size < 128)
|
|
101 size = 128;
|
|
102
|
|
103 mem_limit = limit;
|
|
104 memset(slabclass, 0, sizeof(slabclass));
|
|
105
|
|
106 while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
|
|
107 /* Make sure items are always n-byte aligned */
|
|
108 if (size % CHUNK_ALIGN_BYTES)
|
|
109 size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
|
|
110
|
|
111 slabclass[i].size = size;
|
|
112 slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
|
|
113 size *= factor;
|
|
114 if (settings.verbose > 1) {
|
|
115 fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
|
|
116 i, slabclass[i].size, slabclass[i].perslab);
|
|
117 }
|
|
118 }
|
|
119
|
|
120 power_largest = i;
|
|
121 slabclass[power_largest].size = POWER_BLOCK;
|
|
122 slabclass[power_largest].perslab = 1;
|
|
123
|
|
124 /* for the test suite: faking of how much we've already malloc'd */
|
|
125 {
|
|
126 char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
|
|
127 if (t_initial_malloc) {
|
|
128 mem_malloced = (size_t)atol(t_initial_malloc);
|
|
129 }
|
|
130
|
|
131 }
|
|
132
|
|
133 #ifndef DONT_PREALLOC_SLABS
|
|
134 {
|
|
135 char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
|
|
136
|
|
137 if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
|
|
138 slabs_preallocate(power_largest);
|
|
139 }
|
|
140 }
|
|
141 #endif
|
|
142 }
|
|
143
|
|
144 #ifndef DONT_PREALLOC_SLABS
|
|
145 static void slabs_preallocate (const unsigned int maxslabs) {
|
|
146 int i;
|
|
147 unsigned int prealloc = 0;
|
|
148
|
|
149 /* pre-allocate a 1MB slab in every size class so people don't get
|
|
150 confused by non-intuitive "SERVER_ERROR out of memory"
|
|
151 messages. this is the most common question on the mailing
|
|
152 list. if you really don't want this, you can rebuild without
|
|
153 these three lines. */
|
|
154
|
|
155 for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
|
|
156 if (++prealloc > maxslabs)
|
|
157 return;
|
|
158 do_slabs_newslab(i);
|
|
159 }
|
|
160
|
|
161 }
|
|
162 #endif
|
|
163
|
|
164 static int grow_slab_list (const unsigned int id) {
|
|
165 slabclass_t *p = &slabclass[id];
|
|
166 if (p->slabs == p->list_size) {
|
|
167 size_t new_size = (p->list_size != 0) ? p->list_size * 2 : 16;
|
|
168 void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
|
|
169 if (new_list == 0) return 0;
|
|
170 p->list_size = new_size;
|
|
171 p->slab_list = new_list;
|
|
172 }
|
|
173 return 1;
|
|
174 }
|
|
175
|
|
176 static int do_slabs_newslab(const unsigned int id) {
|
|
177 slabclass_t *p = &slabclass[id];
|
|
178 #ifdef ALLOW_SLABS_REASSIGN
|
|
179 int len = POWER_BLOCK;
|
|
180 #else
|
|
181 int len = p->size * p->perslab;
|
|
182 #endif
|
|
183 char *ptr;
|
|
184
|
|
185 if (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)
|
|
186 return 0;
|
|
187
|
|
188 if (grow_slab_list(id) == 0) return 0;
|
|
189
|
|
190 ptr = malloc((size_t)len);
|
|
191 if (ptr == 0) return 0;
|
|
192
|
|
193 memset(ptr, 0, (size_t)len);
|
|
194 p->end_page_ptr = ptr;
|
|
195 p->end_page_free = p->perslab;
|
|
196
|
|
197 p->slab_list[p->slabs++] = ptr;
|
|
198 mem_malloced += len;
|
|
199 return 1;
|
|
200 }
|
|
201
|
|
202 /*@null@*/
|
|
203 void *do_slabs_alloc(const size_t size) {
|
|
204 slabclass_t *p;
|
|
205
|
|
206 unsigned int id = slabs_clsid(size);
|
|
207 if (id < POWER_SMALLEST || id > power_largest)
|
|
208 return NULL;
|
|
209
|
|
210 p = &slabclass[id];
|
|
211 assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
|
|
212
|
|
213 #ifdef USE_SYSTEM_MALLOC
|
|
214 if (mem_limit && mem_malloced + size > mem_limit)
|
|
215 return 0;
|
|
216 mem_malloced += size;
|
|
217 return malloc(size);
|
|
218 #endif
|
|
219
|
|
220 /* fail unless we have space at the end of a recently allocated page,
|
|
221 we have something on our freelist, or we could allocate a new page */
|
|
222 if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0))
|
|
223 return 0;
|
|
224
|
|
225 /* return off our freelist, if we have one */
|
|
226 if (p->sl_curr != 0)
|
|
227 return p->slots[--p->sl_curr];
|
|
228
|
|
229 /* if we recently allocated a whole page, return from that */
|
|
230 if (p->end_page_ptr) {
|
|
231 void *ptr = p->end_page_ptr;
|
|
232 if (--p->end_page_free != 0) {
|
|
233 p->end_page_ptr += p->size;
|
|
234 } else {
|
|
235 p->end_page_ptr = 0;
|
|
236 }
|
|
237 return ptr;
|
|
238 }
|
|
239
|
|
240 return NULL; /* shouldn't ever get here */
|
|
241 }
|
|
242
|
|
243 void do_slabs_free(void *ptr, const size_t size) {
|
|
244 unsigned char id = slabs_clsid(size);
|
|
245 slabclass_t *p;
|
|
246
|
|
247 assert(((item *)ptr)->slabs_clsid == 0);
|
|
248 assert(id >= POWER_SMALLEST && id <= power_largest);
|
|
249 if (id < POWER_SMALLEST || id > power_largest)
|
|
250 return;
|
|
251
|
|
252 p = &slabclass[id];
|
|
253
|
|
254 #ifdef USE_SYSTEM_MALLOC
|
|
255 mem_malloced -= size;
|
|
256 free(ptr);
|
|
257 return;
|
|
258 #endif
|
|
259
|
|
260 if (p->sl_curr == p->sl_total) { /* need more space on the free list */
|
|
261 int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16; /* 16 is arbitrary */
|
|
262 void **new_slots = realloc(p->slots, new_size * sizeof(void *));
|
|
263 if (new_slots == 0)
|
|
264 return;
|
|
265 p->slots = new_slots;
|
|
266 p->sl_total = new_size;
|
|
267 }
|
|
268 p->slots[p->sl_curr++] = ptr;
|
|
269 return;
|
|
270 }
|
|
271
|
|
272 /*@null@*/
|
|
273 char* do_slabs_stats(int *buflen) {
|
|
274 int i, total;
|
|
275 char *buf = (char *)malloc(power_largest * 200 + 100);
|
|
276 char *bufcurr = buf;
|
|
277
|
|
278 *buflen = 0;
|
|
279 if (buf == NULL) return NULL;
|
|
280
|
|
281 total = 0;
|
|
282 for(i = POWER_SMALLEST; i <= power_largest; i++) {
|
|
283 slabclass_t *p = &slabclass[i];
|
|
284 if (p->slabs != 0) {
|
|
285 unsigned int perslab, slabs;
|
|
286
|
|
287 slabs = p->slabs;
|
|
288 perslab = p->perslab;
|
|
289
|
|
290 bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
|
|
291 bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
|
|
292 bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
|
|
293 bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
|
|
294 bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr);
|
|
295 bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
|
|
296 bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
|
|
297 total++;
|
|
298 }
|
|
299 }
|
|
300 bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
|
|
301 bufcurr += sprintf(bufcurr, "END\r\n");
|
|
302 *buflen = bufcurr - buf;
|
|
303 return buf;
|
|
304 }
|
|
305
|
|
306 #ifdef ALLOW_SLABS_REASSIGN
|
|
307 /* Blows away all the items in a slab class and moves its slabs to another
|
|
308 class. This is only used by the "slabs reassign" command, for manual tweaking
|
|
309 of memory allocation. It's disabled by default since it requires that all
|
|
310 slabs be the same size (which can waste space for chunk size mantissas of
|
|
311 other than 2.0).
|
|
312 1 = success
|
|
313 0 = fail
|
|
314 -1 = tried. busy. send again shortly. */
|
|
315 int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
|
|
316 void *slab, *slab_end;
|
|
317 slabclass_t *p, *dp;
|
|
318 void *iter;
|
|
319 bool was_busy = false;
|
|
320
|
|
321 if (srcid < POWER_SMALLEST || srcid > power_largest ||
|
|
322 dstid < POWER_SMALLEST || dstid > power_largest)
|
|
323 return 0;
|
|
324
|
|
325 p = &slabclass[srcid];
|
|
326 dp = &slabclass[dstid];
|
|
327
|
|
328 /* fail if src still populating, or no slab to give up in src */
|
|
329 if (p->end_page_ptr || ! p->slabs)
|
|
330 return 0;
|
|
331
|
|
332 /* fail if dst is still growing or we can't make room to hold its new one */
|
|
333 if (dp->end_page_ptr || ! grow_slab_list(dstid))
|
|
334 return 0;
|
|
335
|
|
336 if (p->killing == 0) p->killing = 1;
|
|
337
|
|
338 slab = p->slab_list[p->killing - 1];
|
|
339 slab_end = (char*)slab + POWER_BLOCK;
|
|
340
|
|
341 for (iter = slab; iter < slab_end; (char*)iter += p->size) {
|
|
342 item *it = (item *)iter;
|
|
343 if (it->slabs_clsid) {
|
|
344 if (it->refcount) was_busy = true;
|
|
345 item_unlink(it);
|
|
346 }
|
|
347 }
|
|
348
|
|
349 /* go through free list and discard items that are no longer part of this slab */
|
|
350 {
|
|
351 int fi;
|
|
352 for (fi = p->sl_curr - 1; fi >= 0; fi--) {
|
|
353 if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
|
|
354 p->sl_curr--;
|
|
355 if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
|
|
356 }
|
|
357 }
|
|
358 }
|
|
359
|
|
360 if (was_busy) return -1;
|
|
361
|
|
362 /* if good, now move it to the dst slab class */
|
|
363 p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
|
|
364 p->slabs--;
|
|
365 p->killing = 0;
|
|
366 dp->slab_list[dp->slabs++] = slab;
|
|
367 dp->end_page_ptr = slab;
|
|
368 dp->end_page_free = dp->perslab;
|
|
369 /* this isn't too critical, but other parts of the code do asserts to
|
|
370 make sure this field is always 0. */
|
|
371 for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
|
|
372 ((item *)iter)->slabs_clsid = 0;
|
|
373 }
|
|
374 return 1;
|
|
375 }
|
|
376 #endif
|