Mercurial > hg > memcached
comparison assoc.c @ 0:30782bb1fc04 MEMCACHED_1_2_3
memcached-1.2.3
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Sun, 23 Sep 2007 03:58:34 +0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:30782bb1fc04 |
---|---|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ | |
2 /* | |
3 * Hash table | |
4 * | |
5 * The hash function used here is by Bob Jenkins, 1996: | |
6 * <http://burtleburtle.net/bob/hash/doobs.html> | |
7 * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. | |
8 * You may use this code any way you wish, private, educational, | |
9 * or commercial. It's free." | |
10 * | |
11 * The rest of the file is licensed under the BSD license. See LICENSE. | |
12 * | |
13 * $Id: assoc.c 551 2007-05-07 21:24:31Z plindner $ | |
14 */ | |
15 | |
16 #include "memcached.h" | |
17 #include <sys/stat.h> | |
18 #include <sys/socket.h> | |
19 #include <sys/signal.h> | |
20 #include <sys/resource.h> | |
21 #include <fcntl.h> | |
22 #include <netinet/in.h> | |
23 #include <errno.h> | |
24 #include <stdlib.h> | |
25 #include <stdio.h> | |
26 #include <string.h> | |
27 #include <assert.h> | |
28 | |
29 /* | |
30 * Since the hash function does bit manipulation, it needs to know | |
31 * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG | |
32 * are set in the configure script. | |
33 */ | |
34 #if ENDIAN_BIG == 1 | |
35 # define HASH_LITTLE_ENDIAN 0 | |
36 # define HASH_BIG_ENDIAN 1 | |
37 #else | |
38 # if ENDIAN_LITTLE == 1 | |
39 # define HASH_LITTLE_ENDIAN 1 | |
40 # define HASH_BIG_ENDIAN 0 | |
41 # else | |
42 # define HASH_LITTLE_ENDIAN 0 | |
43 # define HASH_BIG_ENDIAN 0 | |
44 # endif | |
45 #endif | |
46 | |
47 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) | |
48 | |
49 /* | |
50 ------------------------------------------------------------------------------- | |
51 mix -- mix 3 32-bit values reversibly. | |
52 | |
53 This is reversible, so any information in (a,b,c) before mix() is | |
54 still in (a,b,c) after mix(). | |
55 | |
56 If four pairs of (a,b,c) inputs are run through mix(), or through | |
57 mix() in reverse, there are at least 32 bits of the output that | |
58 are sometimes the same for one pair and different for another pair. | |
59 This was tested for: | |
60 * pairs that differed by one bit, by two bits, in any combination | |
61 of top bits of (a,b,c), or in any combination of bottom bits of | |
62 (a,b,c). | |
63 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed | |
64 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as | |
65 is commonly produced by subtraction) look like a single 1-bit | |
66 difference. | |
67 * the base values were pseudorandom, all zero but one bit set, or | |
68 all zero plus a counter that starts at zero. | |
69 | |
70 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that | |
71 satisfy this are | |
72 4 6 8 16 19 4 | |
73 9 15 3 18 27 15 | |
74 14 9 3 7 17 3 | |
75 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing | |
76 for "differ" defined as + with a one-bit base and a two-bit delta. I | |
77 used http://burtleburtle.net/bob/hash/avalanche.html to choose | |
78 the operations, constants, and arrangements of the variables. | |
79 | |
80 This does not achieve avalanche. There are input bits of (a,b,c) | |
81 that fail to affect some output bits of (a,b,c), especially of a. The | |
82 most thoroughly mixed value is c, but it doesn't really even achieve | |
83 avalanche in c. | |
84 | |
85 This allows some parallelism. Read-after-writes are good at doubling | |
86 the number of bits affected, so the goal of mixing pulls in the opposite | |
87 direction as the goal of parallelism. I did what I could. Rotates | |
88 seem to cost as much as shifts on every machine I could lay my hands | |
89 on, and rotates are much kinder to the top and bottom bits, so I used | |
90 rotates. | |
91 ------------------------------------------------------------------------------- | |
92 */ | |
93 #define mix(a,b,c) \ | |
94 { \ | |
95 a -= c; a ^= rot(c, 4); c += b; \ | |
96 b -= a; b ^= rot(a, 6); a += c; \ | |
97 c -= b; c ^= rot(b, 8); b += a; \ | |
98 a -= c; a ^= rot(c,16); c += b; \ | |
99 b -= a; b ^= rot(a,19); a += c; \ | |
100 c -= b; c ^= rot(b, 4); b += a; \ | |
101 } | |
102 | |
103 /* | |
104 ------------------------------------------------------------------------------- | |
105 final -- final mixing of 3 32-bit values (a,b,c) into c | |
106 | |
107 Pairs of (a,b,c) values differing in only a few bits will usually | |
108 produce values of c that look totally different. This was tested for | |
109 * pairs that differed by one bit, by two bits, in any combination | |
110 of top bits of (a,b,c), or in any combination of bottom bits of | |
111 (a,b,c). | |
112 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed | |
113 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as | |
114 is commonly produced by subtraction) look like a single 1-bit | |
115 difference. | |
116 * the base values were pseudorandom, all zero but one bit set, or | |
117 all zero plus a counter that starts at zero. | |
118 | |
119 These constants passed: | |
120 14 11 25 16 4 14 24 | |
121 12 14 25 16 4 14 24 | |
122 and these came close: | |
123 4 8 15 26 3 22 24 | |
124 10 8 15 26 3 22 24 | |
125 11 8 15 26 3 22 24 | |
126 ------------------------------------------------------------------------------- | |
127 */ | |
128 #define final(a,b,c) \ | |
129 { \ | |
130 c ^= b; c -= rot(b,14); \ | |
131 a ^= c; a -= rot(c,11); \ | |
132 b ^= a; b -= rot(a,25); \ | |
133 c ^= b; c -= rot(b,16); \ | |
134 a ^= c; a -= rot(c,4); \ | |
135 b ^= a; b -= rot(a,14); \ | |
136 c ^= b; c -= rot(b,24); \ | |
137 } | |
138 | |
139 #if HASH_LITTLE_ENDIAN == 1 | |
140 uint32_t hash( | |
141 const void *key, /* the key to hash */ | |
142 size_t length, /* length of the key */ | |
143 const uint32_t initval) /* initval */ | |
144 { | |
145 uint32_t a,b,c; /* internal state */ | |
146 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ | |
147 | |
148 /* Set up the internal state */ | |
149 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; | |
150 | |
151 u.ptr = key; | |
152 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { | |
153 const uint32_t *k = key; /* read 32-bit chunks */ | |
154 #ifdef VALGRIND | |
155 const uint8_t *k8; | |
156 #endif // ifdef VALGRIND | |
157 | |
158 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ | |
159 while (length > 12) | |
160 { | |
161 a += k[0]; | |
162 b += k[1]; | |
163 c += k[2]; | |
164 mix(a,b,c); | |
165 length -= 12; | |
166 k += 3; | |
167 } | |
168 | |
169 /*----------------------------- handle the last (probably partial) block */ | |
170 /* | |
171 * "k[2]&0xffffff" actually reads beyond the end of the string, but | |
172 * then masks off the part it's not allowed to read. Because the | |
173 * string is aligned, the masked-off tail is in the same word as the | |
174 * rest of the string. Every machine with memory protection I've seen | |
175 * does it on word boundaries, so is OK with this. But VALGRIND will | |
176 * still catch it and complain. The masking trick does make the hash | |
177 * noticably faster for short strings (like English words). | |
178 */ | |
179 #ifndef VALGRIND | |
180 | |
181 switch(length) | |
182 { | |
183 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; | |
184 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; | |
185 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; | |
186 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; | |
187 case 8 : b+=k[1]; a+=k[0]; break; | |
188 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; | |
189 case 6 : b+=k[1]&0xffff; a+=k[0]; break; | |
190 case 5 : b+=k[1]&0xff; a+=k[0]; break; | |
191 case 4 : a+=k[0]; break; | |
192 case 3 : a+=k[0]&0xffffff; break; | |
193 case 2 : a+=k[0]&0xffff; break; | |
194 case 1 : a+=k[0]&0xff; break; | |
195 case 0 : return c; /* zero length strings require no mixing */ | |
196 } | |
197 | |
198 #else /* make valgrind happy */ | |
199 | |
200 k8 = (const uint8_t *)k; | |
201 switch(length) | |
202 { | |
203 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; | |
204 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ | |
205 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ | |
206 case 9 : c+=k8[8]; /* fall through */ | |
207 case 8 : b+=k[1]; a+=k[0]; break; | |
208 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ | |
209 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ | |
210 case 5 : b+=k8[4]; /* fall through */ | |
211 case 4 : a+=k[0]; break; | |
212 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ | |
213 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ | |
214 case 1 : a+=k8[0]; break; | |
215 case 0 : return c; /* zero length strings require no mixing */ | |
216 } | |
217 | |
218 #endif /* !valgrind */ | |
219 | |
220 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { | |
221 const uint16_t *k = key; /* read 16-bit chunks */ | |
222 const uint8_t *k8; | |
223 | |
224 /*--------------- all but last block: aligned reads and different mixing */ | |
225 while (length > 12) | |
226 { | |
227 a += k[0] + (((uint32_t)k[1])<<16); | |
228 b += k[2] + (((uint32_t)k[3])<<16); | |
229 c += k[4] + (((uint32_t)k[5])<<16); | |
230 mix(a,b,c); | |
231 length -= 12; | |
232 k += 6; | |
233 } | |
234 | |
235 /*----------------------------- handle the last (probably partial) block */ | |
236 k8 = (const uint8_t *)k; | |
237 switch(length) | |
238 { | |
239 case 12: c+=k[4]+(((uint32_t)k[5])<<16); | |
240 b+=k[2]+(((uint32_t)k[3])<<16); | |
241 a+=k[0]+(((uint32_t)k[1])<<16); | |
242 break; | |
243 case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */ | |
244 case 10: c+=k[4]; /* @fallthrough@ */ | |
245 b+=k[2]+(((uint32_t)k[3])<<16); | |
246 a+=k[0]+(((uint32_t)k[1])<<16); | |
247 break; | |
248 case 9 : c+=k8[8]; /* @fallthrough */ | |
249 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); | |
250 a+=k[0]+(((uint32_t)k[1])<<16); | |
251 break; | |
252 case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */ | |
253 case 6 : b+=k[2]; | |
254 a+=k[0]+(((uint32_t)k[1])<<16); | |
255 break; | |
256 case 5 : b+=k8[4]; /* @fallthrough */ | |
257 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); | |
258 break; | |
259 case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */ | |
260 case 2 : a+=k[0]; | |
261 break; | |
262 case 1 : a+=k8[0]; | |
263 break; | |
264 case 0 : return c; /* zero length strings require no mixing */ | |
265 } | |
266 | |
267 } else { /* need to read the key one byte at a time */ | |
268 const uint8_t *k = key; | |
269 | |
270 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ | |
271 while (length > 12) | |
272 { | |
273 a += k[0]; | |
274 a += ((uint32_t)k[1])<<8; | |
275 a += ((uint32_t)k[2])<<16; | |
276 a += ((uint32_t)k[3])<<24; | |
277 b += k[4]; | |
278 b += ((uint32_t)k[5])<<8; | |
279 b += ((uint32_t)k[6])<<16; | |
280 b += ((uint32_t)k[7])<<24; | |
281 c += k[8]; | |
282 c += ((uint32_t)k[9])<<8; | |
283 c += ((uint32_t)k[10])<<16; | |
284 c += ((uint32_t)k[11])<<24; | |
285 mix(a,b,c); | |
286 length -= 12; | |
287 k += 12; | |
288 } | |
289 | |
290 /*-------------------------------- last block: affect all 32 bits of (c) */ | |
291 switch(length) /* all the case statements fall through */ | |
292 { | |
293 case 12: c+=((uint32_t)k[11])<<24; | |
294 case 11: c+=((uint32_t)k[10])<<16; | |
295 case 10: c+=((uint32_t)k[9])<<8; | |
296 case 9 : c+=k[8]; | |
297 case 8 : b+=((uint32_t)k[7])<<24; | |
298 case 7 : b+=((uint32_t)k[6])<<16; | |
299 case 6 : b+=((uint32_t)k[5])<<8; | |
300 case 5 : b+=k[4]; | |
301 case 4 : a+=((uint32_t)k[3])<<24; | |
302 case 3 : a+=((uint32_t)k[2])<<16; | |
303 case 2 : a+=((uint32_t)k[1])<<8; | |
304 case 1 : a+=k[0]; | |
305 break; | |
306 case 0 : return c; /* zero length strings require no mixing */ | |
307 } | |
308 } | |
309 | |
310 final(a,b,c); | |
311 return c; /* zero length strings require no mixing */ | |
312 } | |
313 | |
314 #elif HASH_BIG_ENDIAN == 1 | |
315 /* | |
316 * hashbig(): | |
317 * This is the same as hashword() on big-endian machines. It is different | |
318 * from hashlittle() on all machines. hashbig() takes advantage of | |
319 * big-endian byte ordering. | |
320 */ | |
321 uint32_t hash( const void *key, size_t length, const uint32_t initval) | |
322 { | |
323 uint32_t a,b,c; | |
324 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ | |
325 | |
326 /* Set up the internal state */ | |
327 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; | |
328 | |
329 u.ptr = key; | |
330 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { | |
331 const uint32_t *k = key; /* read 32-bit chunks */ | |
332 #ifdef VALGRIND | |
333 const uint8_t *k8; | |
334 #endif // ifdef VALGRIND | |
335 | |
336 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ | |
337 while (length > 12) | |
338 { | |
339 a += k[0]; | |
340 b += k[1]; | |
341 c += k[2]; | |
342 mix(a,b,c); | |
343 length -= 12; | |
344 k += 3; | |
345 } | |
346 | |
347 /*----------------------------- handle the last (probably partial) block */ | |
348 /* | |
349 * "k[2]<<8" actually reads beyond the end of the string, but | |
350 * then shifts out the part it's not allowed to read. Because the | |
351 * string is aligned, the illegal read is in the same word as the | |
352 * rest of the string. Every machine with memory protection I've seen | |
353 * does it on word boundaries, so is OK with this. But VALGRIND will | |
354 * still catch it and complain. The masking trick does make the hash | |
355 * noticably faster for short strings (like English words). | |
356 */ | |
357 #ifndef VALGRIND | |
358 | |
359 switch(length) | |
360 { | |
361 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; | |
362 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; | |
363 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; | |
364 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; | |
365 case 8 : b+=k[1]; a+=k[0]; break; | |
366 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; | |
367 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; | |
368 case 5 : b+=k[1]&0xff000000; a+=k[0]; break; | |
369 case 4 : a+=k[0]; break; | |
370 case 3 : a+=k[0]&0xffffff00; break; | |
371 case 2 : a+=k[0]&0xffff0000; break; | |
372 case 1 : a+=k[0]&0xff000000; break; | |
373 case 0 : return c; /* zero length strings require no mixing */ | |
374 } | |
375 | |
376 #else /* make valgrind happy */ | |
377 | |
378 k8 = (const uint8_t *)k; | |
379 switch(length) /* all the case statements fall through */ | |
380 { | |
381 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; | |
382 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ | |
383 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ | |
384 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ | |
385 case 8 : b+=k[1]; a+=k[0]; break; | |
386 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ | |
387 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ | |
388 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ | |
389 case 4 : a+=k[0]; break; | |
390 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ | |
391 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ | |
392 case 1 : a+=((uint32_t)k8[0])<<24; break; | |
393 case 0 : return c; | |
394 } | |
395 | |
396 #endif /* !VALGRIND */ | |
397 | |
398 } else { /* need to read the key one byte at a time */ | |
399 const uint8_t *k = key; | |
400 | |
401 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ | |
402 while (length > 12) | |
403 { | |
404 a += ((uint32_t)k[0])<<24; | |
405 a += ((uint32_t)k[1])<<16; | |
406 a += ((uint32_t)k[2])<<8; | |
407 a += ((uint32_t)k[3]); | |
408 b += ((uint32_t)k[4])<<24; | |
409 b += ((uint32_t)k[5])<<16; | |
410 b += ((uint32_t)k[6])<<8; | |
411 b += ((uint32_t)k[7]); | |
412 c += ((uint32_t)k[8])<<24; | |
413 c += ((uint32_t)k[9])<<16; | |
414 c += ((uint32_t)k[10])<<8; | |
415 c += ((uint32_t)k[11]); | |
416 mix(a,b,c); | |
417 length -= 12; | |
418 k += 12; | |
419 } | |
420 | |
421 /*-------------------------------- last block: affect all 32 bits of (c) */ | |
422 switch(length) /* all the case statements fall through */ | |
423 { | |
424 case 12: c+=k[11]; | |
425 case 11: c+=((uint32_t)k[10])<<8; | |
426 case 10: c+=((uint32_t)k[9])<<16; | |
427 case 9 : c+=((uint32_t)k[8])<<24; | |
428 case 8 : b+=k[7]; | |
429 case 7 : b+=((uint32_t)k[6])<<8; | |
430 case 6 : b+=((uint32_t)k[5])<<16; | |
431 case 5 : b+=((uint32_t)k[4])<<24; | |
432 case 4 : a+=k[3]; | |
433 case 3 : a+=((uint32_t)k[2])<<8; | |
434 case 2 : a+=((uint32_t)k[1])<<16; | |
435 case 1 : a+=((uint32_t)k[0])<<24; | |
436 break; | |
437 case 0 : return c; | |
438 } | |
439 } | |
440 | |
441 final(a,b,c); | |
442 return c; | |
443 } | |
444 #else // HASH_XXX_ENDIAN == 1 | |
445 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN | |
446 #endif // hash_XXX_ENDIAN == 1 | |
447 | |
448 typedef unsigned long int ub4; /* unsigned 4-byte quantities */ | |
449 typedef unsigned char ub1; /* unsigned 1-byte quantities */ | |
450 | |
451 /* how many powers of 2's worth of buckets we use */ | |
452 static int hashpower = 16; | |
453 | |
454 #define hashsize(n) ((ub4)1<<(n)) | |
455 #define hashmask(n) (hashsize(n)-1) | |
456 | |
457 /* Main hash table. This is where we look except during expansion. */ | |
458 static item** primary_hashtable = 0; | |
459 | |
460 /* | |
461 * Previous hash table. During expansion, we look here for keys that haven't | |
462 * been moved over to the primary yet. | |
463 */ | |
464 static item** old_hashtable = 0; | |
465 | |
466 /* Number of items in the hash table. */ | |
467 static int hash_items = 0; | |
468 | |
469 /* Flag: Are we in the middle of expanding now? */ | |
470 static int expanding = 0; | |
471 | |
472 /* | |
473 * During expansion we migrate values with bucket granularity; this is how | |
474 * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1. | |
475 */ | |
476 static int expand_bucket = 0; | |
477 | |
478 void assoc_init(void) { | |
479 unsigned int hash_size = hashsize(hashpower) * sizeof(void*); | |
480 primary_hashtable = malloc(hash_size); | |
481 if (! primary_hashtable) { | |
482 fprintf(stderr, "Failed to init hashtable.\n"); | |
483 exit(EXIT_FAILURE); | |
484 } | |
485 memset(primary_hashtable, 0, hash_size); | |
486 } | |
487 | |
488 item *assoc_find(const char *key, const size_t nkey) { | |
489 uint32_t hv = hash(key, nkey, 0); | |
490 item *it; | |
491 int oldbucket; | |
492 | |
493 if (expanding && | |
494 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) | |
495 { | |
496 it = old_hashtable[oldbucket]; | |
497 } else { | |
498 it = primary_hashtable[hv & hashmask(hashpower)]; | |
499 } | |
500 | |
501 while (it) { | |
502 if ((nkey == it->nkey) && | |
503 (memcmp(key, ITEM_key(it), nkey) == 0)) { | |
504 return it; | |
505 } | |
506 it = it->h_next; | |
507 } | |
508 return 0; | |
509 } | |
510 | |
511 /* returns the address of the item pointer before the key. if *item == 0, | |
512 the item wasn't found */ | |
513 | |
514 static item** _hashitem_before (const char *key, const size_t nkey) { | |
515 uint32_t hv = hash(key, nkey, 0); | |
516 item **pos; | |
517 int oldbucket; | |
518 | |
519 if (expanding && | |
520 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) | |
521 { | |
522 pos = &old_hashtable[oldbucket]; | |
523 } else { | |
524 pos = &primary_hashtable[hv & hashmask(hashpower)]; | |
525 } | |
526 | |
527 while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) { | |
528 pos = &(*pos)->h_next; | |
529 } | |
530 return pos; | |
531 } | |
532 | |
533 /* grows the hashtable to the next power of 2. */ | |
534 static void assoc_expand(void) { | |
535 old_hashtable = primary_hashtable; | |
536 | |
537 primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *)); | |
538 if (primary_hashtable) { | |
539 if (settings.verbose > 1) | |
540 fprintf(stderr, "Hash table expansion starting\n"); | |
541 hashpower++; | |
542 expanding = 1; | |
543 expand_bucket = 0; | |
544 do_assoc_move_next_bucket(); | |
545 } else { | |
546 primary_hashtable = old_hashtable; | |
547 /* Bad news, but we can keep running. */ | |
548 } | |
549 } | |
550 | |
551 /* migrates the next bucket to the primary hashtable if we're expanding. */ | |
552 void do_assoc_move_next_bucket(void) { | |
553 item *it, *next; | |
554 int bucket; | |
555 | |
556 if (expanding) { | |
557 for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { | |
558 next = it->h_next; | |
559 | |
560 bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower); | |
561 it->h_next = primary_hashtable[bucket]; | |
562 primary_hashtable[bucket] = it; | |
563 } | |
564 | |
565 old_hashtable[expand_bucket] = NULL; | |
566 | |
567 expand_bucket++; | |
568 if (expand_bucket == hashsize(hashpower - 1)) { | |
569 expanding = 0; | |
570 free(old_hashtable); | |
571 if (settings.verbose > 1) | |
572 fprintf(stderr, "Hash table expansion done\n"); | |
573 } | |
574 } | |
575 } | |
576 | |
577 /* Note: this isn't an assoc_update. The key must not already exist to call this */ | |
578 int assoc_insert(item *it) { | |
579 uint32_t hv; | |
580 int oldbucket; | |
581 | |
582 assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */ | |
583 | |
584 hv = hash(ITEM_key(it), it->nkey, 0); | |
585 if (expanding && | |
586 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) | |
587 { | |
588 it->h_next = old_hashtable[oldbucket]; | |
589 old_hashtable[oldbucket] = it; | |
590 } else { | |
591 it->h_next = primary_hashtable[hv & hashmask(hashpower)]; | |
592 primary_hashtable[hv & hashmask(hashpower)] = it; | |
593 } | |
594 | |
595 hash_items++; | |
596 if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) { | |
597 assoc_expand(); | |
598 } | |
599 | |
600 return 1; | |
601 } | |
602 | |
603 void assoc_delete(const char *key, const size_t nkey) { | |
604 item **before = _hashitem_before(key, nkey); | |
605 | |
606 if (*before) { | |
607 item *nxt = (*before)->h_next; | |
608 (*before)->h_next = 0; /* probably pointless, but whatever. */ | |
609 *before = nxt; | |
610 hash_items--; | |
611 return; | |
612 } | |
613 /* Note: we never actually get here. the callers don't delete things | |
614 they can't find. */ | |
615 assert(*before != 0); | |
616 } |