comparison assoc.c @ 0:30782bb1fc04 MEMCACHED_1_2_3

memcached-1.2.3
author Maxim Dounin <mdounin@mdounin.ru>
date Sun, 23 Sep 2007 03:58:34 +0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:30782bb1fc04
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * Hash table
4 *
5 * The hash function used here is by Bob Jenkins, 1996:
6 * <http://burtleburtle.net/bob/hash/doobs.html>
7 * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net.
8 * You may use this code any way you wish, private, educational,
9 * or commercial. It's free."
10 *
11 * The rest of the file is licensed under the BSD license. See LICENSE.
12 *
13 * $Id: assoc.c 551 2007-05-07 21:24:31Z plindner $
14 */
15
16 #include "memcached.h"
17 #include <sys/stat.h>
18 #include <sys/socket.h>
19 #include <sys/signal.h>
20 #include <sys/resource.h>
21 #include <fcntl.h>
22 #include <netinet/in.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <assert.h>
28
29 /*
30 * Since the hash function does bit manipulation, it needs to know
31 * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
32 * are set in the configure script.
33 */
34 #if ENDIAN_BIG == 1
35 # define HASH_LITTLE_ENDIAN 0
36 # define HASH_BIG_ENDIAN 1
37 #else
38 # if ENDIAN_LITTLE == 1
39 # define HASH_LITTLE_ENDIAN 1
40 # define HASH_BIG_ENDIAN 0
41 # else
42 # define HASH_LITTLE_ENDIAN 0
43 # define HASH_BIG_ENDIAN 0
44 # endif
45 #endif
46
47 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
48
49 /*
50 -------------------------------------------------------------------------------
51 mix -- mix 3 32-bit values reversibly.
52
53 This is reversible, so any information in (a,b,c) before mix() is
54 still in (a,b,c) after mix().
55
56 If four pairs of (a,b,c) inputs are run through mix(), or through
57 mix() in reverse, there are at least 32 bits of the output that
58 are sometimes the same for one pair and different for another pair.
59 This was tested for:
60 * pairs that differed by one bit, by two bits, in any combination
61 of top bits of (a,b,c), or in any combination of bottom bits of
62 (a,b,c).
63 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
64 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
65 is commonly produced by subtraction) look like a single 1-bit
66 difference.
67 * the base values were pseudorandom, all zero but one bit set, or
68 all zero plus a counter that starts at zero.
69
70 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
71 satisfy this are
72 4 6 8 16 19 4
73 9 15 3 18 27 15
74 14 9 3 7 17 3
75 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
76 for "differ" defined as + with a one-bit base and a two-bit delta. I
77 used http://burtleburtle.net/bob/hash/avalanche.html to choose
78 the operations, constants, and arrangements of the variables.
79
80 This does not achieve avalanche. There are input bits of (a,b,c)
81 that fail to affect some output bits of (a,b,c), especially of a. The
82 most thoroughly mixed value is c, but it doesn't really even achieve
83 avalanche in c.
84
85 This allows some parallelism. Read-after-writes are good at doubling
86 the number of bits affected, so the goal of mixing pulls in the opposite
87 direction as the goal of parallelism. I did what I could. Rotates
88 seem to cost as much as shifts on every machine I could lay my hands
89 on, and rotates are much kinder to the top and bottom bits, so I used
90 rotates.
91 -------------------------------------------------------------------------------
92 */
93 #define mix(a,b,c) \
94 { \
95 a -= c; a ^= rot(c, 4); c += b; \
96 b -= a; b ^= rot(a, 6); a += c; \
97 c -= b; c ^= rot(b, 8); b += a; \
98 a -= c; a ^= rot(c,16); c += b; \
99 b -= a; b ^= rot(a,19); a += c; \
100 c -= b; c ^= rot(b, 4); b += a; \
101 }
102
103 /*
104 -------------------------------------------------------------------------------
105 final -- final mixing of 3 32-bit values (a,b,c) into c
106
107 Pairs of (a,b,c) values differing in only a few bits will usually
108 produce values of c that look totally different. This was tested for
109 * pairs that differed by one bit, by two bits, in any combination
110 of top bits of (a,b,c), or in any combination of bottom bits of
111 (a,b,c).
112 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
113 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
114 is commonly produced by subtraction) look like a single 1-bit
115 difference.
116 * the base values were pseudorandom, all zero but one bit set, or
117 all zero plus a counter that starts at zero.
118
119 These constants passed:
120 14 11 25 16 4 14 24
121 12 14 25 16 4 14 24
122 and these came close:
123 4 8 15 26 3 22 24
124 10 8 15 26 3 22 24
125 11 8 15 26 3 22 24
126 -------------------------------------------------------------------------------
127 */
128 #define final(a,b,c) \
129 { \
130 c ^= b; c -= rot(b,14); \
131 a ^= c; a -= rot(c,11); \
132 b ^= a; b -= rot(a,25); \
133 c ^= b; c -= rot(b,16); \
134 a ^= c; a -= rot(c,4); \
135 b ^= a; b -= rot(a,14); \
136 c ^= b; c -= rot(b,24); \
137 }
138
139 #if HASH_LITTLE_ENDIAN == 1
140 uint32_t hash(
141 const void *key, /* the key to hash */
142 size_t length, /* length of the key */
143 const uint32_t initval) /* initval */
144 {
145 uint32_t a,b,c; /* internal state */
146 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
147
148 /* Set up the internal state */
149 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
150
151 u.ptr = key;
152 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
153 const uint32_t *k = key; /* read 32-bit chunks */
154 #ifdef VALGRIND
155 const uint8_t *k8;
156 #endif // ifdef VALGRIND
157
158 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
159 while (length > 12)
160 {
161 a += k[0];
162 b += k[1];
163 c += k[2];
164 mix(a,b,c);
165 length -= 12;
166 k += 3;
167 }
168
169 /*----------------------------- handle the last (probably partial) block */
170 /*
171 * "k[2]&0xffffff" actually reads beyond the end of the string, but
172 * then masks off the part it's not allowed to read. Because the
173 * string is aligned, the masked-off tail is in the same word as the
174 * rest of the string. Every machine with memory protection I've seen
175 * does it on word boundaries, so is OK with this. But VALGRIND will
176 * still catch it and complain. The masking trick does make the hash
177 * noticably faster for short strings (like English words).
178 */
179 #ifndef VALGRIND
180
181 switch(length)
182 {
183 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
184 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
185 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
186 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
187 case 8 : b+=k[1]; a+=k[0]; break;
188 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
189 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
190 case 5 : b+=k[1]&0xff; a+=k[0]; break;
191 case 4 : a+=k[0]; break;
192 case 3 : a+=k[0]&0xffffff; break;
193 case 2 : a+=k[0]&0xffff; break;
194 case 1 : a+=k[0]&0xff; break;
195 case 0 : return c; /* zero length strings require no mixing */
196 }
197
198 #else /* make valgrind happy */
199
200 k8 = (const uint8_t *)k;
201 switch(length)
202 {
203 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
204 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
205 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
206 case 9 : c+=k8[8]; /* fall through */
207 case 8 : b+=k[1]; a+=k[0]; break;
208 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
209 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
210 case 5 : b+=k8[4]; /* fall through */
211 case 4 : a+=k[0]; break;
212 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
213 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
214 case 1 : a+=k8[0]; break;
215 case 0 : return c; /* zero length strings require no mixing */
216 }
217
218 #endif /* !valgrind */
219
220 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
221 const uint16_t *k = key; /* read 16-bit chunks */
222 const uint8_t *k8;
223
224 /*--------------- all but last block: aligned reads and different mixing */
225 while (length > 12)
226 {
227 a += k[0] + (((uint32_t)k[1])<<16);
228 b += k[2] + (((uint32_t)k[3])<<16);
229 c += k[4] + (((uint32_t)k[5])<<16);
230 mix(a,b,c);
231 length -= 12;
232 k += 6;
233 }
234
235 /*----------------------------- handle the last (probably partial) block */
236 k8 = (const uint8_t *)k;
237 switch(length)
238 {
239 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
240 b+=k[2]+(((uint32_t)k[3])<<16);
241 a+=k[0]+(((uint32_t)k[1])<<16);
242 break;
243 case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */
244 case 10: c+=k[4]; /* @fallthrough@ */
245 b+=k[2]+(((uint32_t)k[3])<<16);
246 a+=k[0]+(((uint32_t)k[1])<<16);
247 break;
248 case 9 : c+=k8[8]; /* @fallthrough */
249 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
250 a+=k[0]+(((uint32_t)k[1])<<16);
251 break;
252 case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */
253 case 6 : b+=k[2];
254 a+=k[0]+(((uint32_t)k[1])<<16);
255 break;
256 case 5 : b+=k8[4]; /* @fallthrough */
257 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
258 break;
259 case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */
260 case 2 : a+=k[0];
261 break;
262 case 1 : a+=k8[0];
263 break;
264 case 0 : return c; /* zero length strings require no mixing */
265 }
266
267 } else { /* need to read the key one byte at a time */
268 const uint8_t *k = key;
269
270 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
271 while (length > 12)
272 {
273 a += k[0];
274 a += ((uint32_t)k[1])<<8;
275 a += ((uint32_t)k[2])<<16;
276 a += ((uint32_t)k[3])<<24;
277 b += k[4];
278 b += ((uint32_t)k[5])<<8;
279 b += ((uint32_t)k[6])<<16;
280 b += ((uint32_t)k[7])<<24;
281 c += k[8];
282 c += ((uint32_t)k[9])<<8;
283 c += ((uint32_t)k[10])<<16;
284 c += ((uint32_t)k[11])<<24;
285 mix(a,b,c);
286 length -= 12;
287 k += 12;
288 }
289
290 /*-------------------------------- last block: affect all 32 bits of (c) */
291 switch(length) /* all the case statements fall through */
292 {
293 case 12: c+=((uint32_t)k[11])<<24;
294 case 11: c+=((uint32_t)k[10])<<16;
295 case 10: c+=((uint32_t)k[9])<<8;
296 case 9 : c+=k[8];
297 case 8 : b+=((uint32_t)k[7])<<24;
298 case 7 : b+=((uint32_t)k[6])<<16;
299 case 6 : b+=((uint32_t)k[5])<<8;
300 case 5 : b+=k[4];
301 case 4 : a+=((uint32_t)k[3])<<24;
302 case 3 : a+=((uint32_t)k[2])<<16;
303 case 2 : a+=((uint32_t)k[1])<<8;
304 case 1 : a+=k[0];
305 break;
306 case 0 : return c; /* zero length strings require no mixing */
307 }
308 }
309
310 final(a,b,c);
311 return c; /* zero length strings require no mixing */
312 }
313
314 #elif HASH_BIG_ENDIAN == 1
315 /*
316 * hashbig():
317 * This is the same as hashword() on big-endian machines. It is different
318 * from hashlittle() on all machines. hashbig() takes advantage of
319 * big-endian byte ordering.
320 */
321 uint32_t hash( const void *key, size_t length, const uint32_t initval)
322 {
323 uint32_t a,b,c;
324 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
325
326 /* Set up the internal state */
327 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
328
329 u.ptr = key;
330 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
331 const uint32_t *k = key; /* read 32-bit chunks */
332 #ifdef VALGRIND
333 const uint8_t *k8;
334 #endif // ifdef VALGRIND
335
336 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
337 while (length > 12)
338 {
339 a += k[0];
340 b += k[1];
341 c += k[2];
342 mix(a,b,c);
343 length -= 12;
344 k += 3;
345 }
346
347 /*----------------------------- handle the last (probably partial) block */
348 /*
349 * "k[2]<<8" actually reads beyond the end of the string, but
350 * then shifts out the part it's not allowed to read. Because the
351 * string is aligned, the illegal read is in the same word as the
352 * rest of the string. Every machine with memory protection I've seen
353 * does it on word boundaries, so is OK with this. But VALGRIND will
354 * still catch it and complain. The masking trick does make the hash
355 * noticably faster for short strings (like English words).
356 */
357 #ifndef VALGRIND
358
359 switch(length)
360 {
361 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
362 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
363 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
364 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
365 case 8 : b+=k[1]; a+=k[0]; break;
366 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
367 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
368 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
369 case 4 : a+=k[0]; break;
370 case 3 : a+=k[0]&0xffffff00; break;
371 case 2 : a+=k[0]&0xffff0000; break;
372 case 1 : a+=k[0]&0xff000000; break;
373 case 0 : return c; /* zero length strings require no mixing */
374 }
375
376 #else /* make valgrind happy */
377
378 k8 = (const uint8_t *)k;
379 switch(length) /* all the case statements fall through */
380 {
381 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
382 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
383 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
384 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
385 case 8 : b+=k[1]; a+=k[0]; break;
386 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
387 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
388 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
389 case 4 : a+=k[0]; break;
390 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
391 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
392 case 1 : a+=((uint32_t)k8[0])<<24; break;
393 case 0 : return c;
394 }
395
396 #endif /* !VALGRIND */
397
398 } else { /* need to read the key one byte at a time */
399 const uint8_t *k = key;
400
401 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
402 while (length > 12)
403 {
404 a += ((uint32_t)k[0])<<24;
405 a += ((uint32_t)k[1])<<16;
406 a += ((uint32_t)k[2])<<8;
407 a += ((uint32_t)k[3]);
408 b += ((uint32_t)k[4])<<24;
409 b += ((uint32_t)k[5])<<16;
410 b += ((uint32_t)k[6])<<8;
411 b += ((uint32_t)k[7]);
412 c += ((uint32_t)k[8])<<24;
413 c += ((uint32_t)k[9])<<16;
414 c += ((uint32_t)k[10])<<8;
415 c += ((uint32_t)k[11]);
416 mix(a,b,c);
417 length -= 12;
418 k += 12;
419 }
420
421 /*-------------------------------- last block: affect all 32 bits of (c) */
422 switch(length) /* all the case statements fall through */
423 {
424 case 12: c+=k[11];
425 case 11: c+=((uint32_t)k[10])<<8;
426 case 10: c+=((uint32_t)k[9])<<16;
427 case 9 : c+=((uint32_t)k[8])<<24;
428 case 8 : b+=k[7];
429 case 7 : b+=((uint32_t)k[6])<<8;
430 case 6 : b+=((uint32_t)k[5])<<16;
431 case 5 : b+=((uint32_t)k[4])<<24;
432 case 4 : a+=k[3];
433 case 3 : a+=((uint32_t)k[2])<<8;
434 case 2 : a+=((uint32_t)k[1])<<16;
435 case 1 : a+=((uint32_t)k[0])<<24;
436 break;
437 case 0 : return c;
438 }
439 }
440
441 final(a,b,c);
442 return c;
443 }
444 #else // HASH_XXX_ENDIAN == 1
445 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
446 #endif // hash_XXX_ENDIAN == 1
447
448 typedef unsigned long int ub4; /* unsigned 4-byte quantities */
449 typedef unsigned char ub1; /* unsigned 1-byte quantities */
450
451 /* how many powers of 2's worth of buckets we use */
452 static int hashpower = 16;
453
454 #define hashsize(n) ((ub4)1<<(n))
455 #define hashmask(n) (hashsize(n)-1)
456
457 /* Main hash table. This is where we look except during expansion. */
458 static item** primary_hashtable = 0;
459
460 /*
461 * Previous hash table. During expansion, we look here for keys that haven't
462 * been moved over to the primary yet.
463 */
464 static item** old_hashtable = 0;
465
466 /* Number of items in the hash table. */
467 static int hash_items = 0;
468
469 /* Flag: Are we in the middle of expanding now? */
470 static int expanding = 0;
471
472 /*
473 * During expansion we migrate values with bucket granularity; this is how
474 * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
475 */
476 static int expand_bucket = 0;
477
478 void assoc_init(void) {
479 unsigned int hash_size = hashsize(hashpower) * sizeof(void*);
480 primary_hashtable = malloc(hash_size);
481 if (! primary_hashtable) {
482 fprintf(stderr, "Failed to init hashtable.\n");
483 exit(EXIT_FAILURE);
484 }
485 memset(primary_hashtable, 0, hash_size);
486 }
487
488 item *assoc_find(const char *key, const size_t nkey) {
489 uint32_t hv = hash(key, nkey, 0);
490 item *it;
491 int oldbucket;
492
493 if (expanding &&
494 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
495 {
496 it = old_hashtable[oldbucket];
497 } else {
498 it = primary_hashtable[hv & hashmask(hashpower)];
499 }
500
501 while (it) {
502 if ((nkey == it->nkey) &&
503 (memcmp(key, ITEM_key(it), nkey) == 0)) {
504 return it;
505 }
506 it = it->h_next;
507 }
508 return 0;
509 }
510
511 /* returns the address of the item pointer before the key. if *item == 0,
512 the item wasn't found */
513
514 static item** _hashitem_before (const char *key, const size_t nkey) {
515 uint32_t hv = hash(key, nkey, 0);
516 item **pos;
517 int oldbucket;
518
519 if (expanding &&
520 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
521 {
522 pos = &old_hashtable[oldbucket];
523 } else {
524 pos = &primary_hashtable[hv & hashmask(hashpower)];
525 }
526
527 while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
528 pos = &(*pos)->h_next;
529 }
530 return pos;
531 }
532
533 /* grows the hashtable to the next power of 2. */
534 static void assoc_expand(void) {
535 old_hashtable = primary_hashtable;
536
537 primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
538 if (primary_hashtable) {
539 if (settings.verbose > 1)
540 fprintf(stderr, "Hash table expansion starting\n");
541 hashpower++;
542 expanding = 1;
543 expand_bucket = 0;
544 do_assoc_move_next_bucket();
545 } else {
546 primary_hashtable = old_hashtable;
547 /* Bad news, but we can keep running. */
548 }
549 }
550
551 /* migrates the next bucket to the primary hashtable if we're expanding. */
552 void do_assoc_move_next_bucket(void) {
553 item *it, *next;
554 int bucket;
555
556 if (expanding) {
557 for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
558 next = it->h_next;
559
560 bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower);
561 it->h_next = primary_hashtable[bucket];
562 primary_hashtable[bucket] = it;
563 }
564
565 old_hashtable[expand_bucket] = NULL;
566
567 expand_bucket++;
568 if (expand_bucket == hashsize(hashpower - 1)) {
569 expanding = 0;
570 free(old_hashtable);
571 if (settings.verbose > 1)
572 fprintf(stderr, "Hash table expansion done\n");
573 }
574 }
575 }
576
577 /* Note: this isn't an assoc_update. The key must not already exist to call this */
578 int assoc_insert(item *it) {
579 uint32_t hv;
580 int oldbucket;
581
582 assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */
583
584 hv = hash(ITEM_key(it), it->nkey, 0);
585 if (expanding &&
586 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
587 {
588 it->h_next = old_hashtable[oldbucket];
589 old_hashtable[oldbucket] = it;
590 } else {
591 it->h_next = primary_hashtable[hv & hashmask(hashpower)];
592 primary_hashtable[hv & hashmask(hashpower)] = it;
593 }
594
595 hash_items++;
596 if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
597 assoc_expand();
598 }
599
600 return 1;
601 }
602
603 void assoc_delete(const char *key, const size_t nkey) {
604 item **before = _hashitem_before(key, nkey);
605
606 if (*before) {
607 item *nxt = (*before)->h_next;
608 (*before)->h_next = 0; /* probably pointless, but whatever. */
609 *before = nxt;
610 hash_items--;
611 return;
612 }
613 /* Note: we never actually get here. the callers don't delete things
614 they can't find. */
615 assert(*before != 0);
616 }