0
|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
2 /*
|
|
3 * Hash table
|
|
4 *
|
|
5 * The hash function used here is by Bob Jenkins, 1996:
|
|
6 * <http://burtleburtle.net/bob/hash/doobs.html>
|
|
7 * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net.
|
|
8 * You may use this code any way you wish, private, educational,
|
|
9 * or commercial. It's free."
|
|
10 *
|
|
11 * The rest of the file is licensed under the BSD license. See LICENSE.
|
|
12 *
|
|
13 * $Id: assoc.c 551 2007-05-07 21:24:31Z plindner $
|
|
14 */
|
|
15
|
|
16 #include "memcached.h"
|
|
17 #include <sys/stat.h>
|
|
18 #include <sys/socket.h>
|
|
19 #include <sys/signal.h>
|
|
20 #include <sys/resource.h>
|
|
21 #include <fcntl.h>
|
|
22 #include <netinet/in.h>
|
|
23 #include <errno.h>
|
|
24 #include <stdlib.h>
|
|
25 #include <stdio.h>
|
|
26 #include <string.h>
|
|
27 #include <assert.h>
|
|
28
|
|
29 /*
|
|
30 * Since the hash function does bit manipulation, it needs to know
|
|
31 * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
|
|
32 * are set in the configure script.
|
|
33 */
|
|
34 #if ENDIAN_BIG == 1
|
|
35 # define HASH_LITTLE_ENDIAN 0
|
|
36 # define HASH_BIG_ENDIAN 1
|
|
37 #else
|
|
38 # if ENDIAN_LITTLE == 1
|
|
39 # define HASH_LITTLE_ENDIAN 1
|
|
40 # define HASH_BIG_ENDIAN 0
|
|
41 # else
|
|
42 # define HASH_LITTLE_ENDIAN 0
|
|
43 # define HASH_BIG_ENDIAN 0
|
|
44 # endif
|
|
45 #endif
|
|
46
|
|
47 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
|
|
48
|
|
49 /*
|
|
50 -------------------------------------------------------------------------------
|
|
51 mix -- mix 3 32-bit values reversibly.
|
|
52
|
|
53 This is reversible, so any information in (a,b,c) before mix() is
|
|
54 still in (a,b,c) after mix().
|
|
55
|
|
56 If four pairs of (a,b,c) inputs are run through mix(), or through
|
|
57 mix() in reverse, there are at least 32 bits of the output that
|
|
58 are sometimes the same for one pair and different for another pair.
|
|
59 This was tested for:
|
|
60 * pairs that differed by one bit, by two bits, in any combination
|
|
61 of top bits of (a,b,c), or in any combination of bottom bits of
|
|
62 (a,b,c).
|
|
63 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
|
64 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
|
65 is commonly produced by subtraction) look like a single 1-bit
|
|
66 difference.
|
|
67 * the base values were pseudorandom, all zero but one bit set, or
|
|
68 all zero plus a counter that starts at zero.
|
|
69
|
|
70 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
|
|
71 satisfy this are
|
|
72 4 6 8 16 19 4
|
|
73 9 15 3 18 27 15
|
|
74 14 9 3 7 17 3
|
|
75 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
|
|
76 for "differ" defined as + with a one-bit base and a two-bit delta. I
|
|
77 used http://burtleburtle.net/bob/hash/avalanche.html to choose
|
|
78 the operations, constants, and arrangements of the variables.
|
|
79
|
|
80 This does not achieve avalanche. There are input bits of (a,b,c)
|
|
81 that fail to affect some output bits of (a,b,c), especially of a. The
|
|
82 most thoroughly mixed value is c, but it doesn't really even achieve
|
|
83 avalanche in c.
|
|
84
|
|
85 This allows some parallelism. Read-after-writes are good at doubling
|
|
86 the number of bits affected, so the goal of mixing pulls in the opposite
|
|
87 direction as the goal of parallelism. I did what I could. Rotates
|
|
88 seem to cost as much as shifts on every machine I could lay my hands
|
|
89 on, and rotates are much kinder to the top and bottom bits, so I used
|
|
90 rotates.
|
|
91 -------------------------------------------------------------------------------
|
|
92 */
|
|
93 #define mix(a,b,c) \
|
|
94 { \
|
|
95 a -= c; a ^= rot(c, 4); c += b; \
|
|
96 b -= a; b ^= rot(a, 6); a += c; \
|
|
97 c -= b; c ^= rot(b, 8); b += a; \
|
|
98 a -= c; a ^= rot(c,16); c += b; \
|
|
99 b -= a; b ^= rot(a,19); a += c; \
|
|
100 c -= b; c ^= rot(b, 4); b += a; \
|
|
101 }
|
|
102
|
|
103 /*
|
|
104 -------------------------------------------------------------------------------
|
|
105 final -- final mixing of 3 32-bit values (a,b,c) into c
|
|
106
|
|
107 Pairs of (a,b,c) values differing in only a few bits will usually
|
|
108 produce values of c that look totally different. This was tested for
|
|
109 * pairs that differed by one bit, by two bits, in any combination
|
|
110 of top bits of (a,b,c), or in any combination of bottom bits of
|
|
111 (a,b,c).
|
|
112 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
|
113 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
|
114 is commonly produced by subtraction) look like a single 1-bit
|
|
115 difference.
|
|
116 * the base values were pseudorandom, all zero but one bit set, or
|
|
117 all zero plus a counter that starts at zero.
|
|
118
|
|
119 These constants passed:
|
|
120 14 11 25 16 4 14 24
|
|
121 12 14 25 16 4 14 24
|
|
122 and these came close:
|
|
123 4 8 15 26 3 22 24
|
|
124 10 8 15 26 3 22 24
|
|
125 11 8 15 26 3 22 24
|
|
126 -------------------------------------------------------------------------------
|
|
127 */
|
|
128 #define final(a,b,c) \
|
|
129 { \
|
|
130 c ^= b; c -= rot(b,14); \
|
|
131 a ^= c; a -= rot(c,11); \
|
|
132 b ^= a; b -= rot(a,25); \
|
|
133 c ^= b; c -= rot(b,16); \
|
|
134 a ^= c; a -= rot(c,4); \
|
|
135 b ^= a; b -= rot(a,14); \
|
|
136 c ^= b; c -= rot(b,24); \
|
|
137 }
|
|
138
|
|
139 #if HASH_LITTLE_ENDIAN == 1
|
|
140 uint32_t hash(
|
|
141 const void *key, /* the key to hash */
|
|
142 size_t length, /* length of the key */
|
|
143 const uint32_t initval) /* initval */
|
|
144 {
|
|
145 uint32_t a,b,c; /* internal state */
|
|
146 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
|
|
147
|
|
148 /* Set up the internal state */
|
|
149 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
|
150
|
|
151 u.ptr = key;
|
|
152 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
|
153 const uint32_t *k = key; /* read 32-bit chunks */
|
|
154 #ifdef VALGRIND
|
|
155 const uint8_t *k8;
|
|
156 #endif // ifdef VALGRIND
|
|
157
|
|
158 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
|
159 while (length > 12)
|
|
160 {
|
|
161 a += k[0];
|
|
162 b += k[1];
|
|
163 c += k[2];
|
|
164 mix(a,b,c);
|
|
165 length -= 12;
|
|
166 k += 3;
|
|
167 }
|
|
168
|
|
169 /*----------------------------- handle the last (probably partial) block */
|
|
170 /*
|
|
171 * "k[2]&0xffffff" actually reads beyond the end of the string, but
|
|
172 * then masks off the part it's not allowed to read. Because the
|
|
173 * string is aligned, the masked-off tail is in the same word as the
|
|
174 * rest of the string. Every machine with memory protection I've seen
|
|
175 * does it on word boundaries, so is OK with this. But VALGRIND will
|
|
176 * still catch it and complain. The masking trick does make the hash
|
|
177 * noticably faster for short strings (like English words).
|
|
178 */
|
|
179 #ifndef VALGRIND
|
|
180
|
|
181 switch(length)
|
|
182 {
|
|
183 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
|
184 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
|
185 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
|
186 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
|
187 case 8 : b+=k[1]; a+=k[0]; break;
|
|
188 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
|
189 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
|
190 case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
|
191 case 4 : a+=k[0]; break;
|
|
192 case 3 : a+=k[0]&0xffffff; break;
|
|
193 case 2 : a+=k[0]&0xffff; break;
|
|
194 case 1 : a+=k[0]&0xff; break;
|
|
195 case 0 : return c; /* zero length strings require no mixing */
|
|
196 }
|
|
197
|
|
198 #else /* make valgrind happy */
|
|
199
|
|
200 k8 = (const uint8_t *)k;
|
|
201 switch(length)
|
|
202 {
|
|
203 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
|
204 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
|
205 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
|
|
206 case 9 : c+=k8[8]; /* fall through */
|
|
207 case 8 : b+=k[1]; a+=k[0]; break;
|
|
208 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
|
209 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
|
|
210 case 5 : b+=k8[4]; /* fall through */
|
|
211 case 4 : a+=k[0]; break;
|
|
212 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
|
213 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
|
|
214 case 1 : a+=k8[0]; break;
|
|
215 case 0 : return c; /* zero length strings require no mixing */
|
|
216 }
|
|
217
|
|
218 #endif /* !valgrind */
|
|
219
|
|
220 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
|
221 const uint16_t *k = key; /* read 16-bit chunks */
|
|
222 const uint8_t *k8;
|
|
223
|
|
224 /*--------------- all but last block: aligned reads and different mixing */
|
|
225 while (length > 12)
|
|
226 {
|
|
227 a += k[0] + (((uint32_t)k[1])<<16);
|
|
228 b += k[2] + (((uint32_t)k[3])<<16);
|
|
229 c += k[4] + (((uint32_t)k[5])<<16);
|
|
230 mix(a,b,c);
|
|
231 length -= 12;
|
|
232 k += 6;
|
|
233 }
|
|
234
|
|
235 /*----------------------------- handle the last (probably partial) block */
|
|
236 k8 = (const uint8_t *)k;
|
|
237 switch(length)
|
|
238 {
|
|
239 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
|
|
240 b+=k[2]+(((uint32_t)k[3])<<16);
|
|
241 a+=k[0]+(((uint32_t)k[1])<<16);
|
|
242 break;
|
|
243 case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */
|
|
244 case 10: c+=k[4]; /* @fallthrough@ */
|
|
245 b+=k[2]+(((uint32_t)k[3])<<16);
|
|
246 a+=k[0]+(((uint32_t)k[1])<<16);
|
|
247 break;
|
|
248 case 9 : c+=k8[8]; /* @fallthrough */
|
|
249 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
|
|
250 a+=k[0]+(((uint32_t)k[1])<<16);
|
|
251 break;
|
|
252 case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */
|
|
253 case 6 : b+=k[2];
|
|
254 a+=k[0]+(((uint32_t)k[1])<<16);
|
|
255 break;
|
|
256 case 5 : b+=k8[4]; /* @fallthrough */
|
|
257 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
|
|
258 break;
|
|
259 case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */
|
|
260 case 2 : a+=k[0];
|
|
261 break;
|
|
262 case 1 : a+=k8[0];
|
|
263 break;
|
|
264 case 0 : return c; /* zero length strings require no mixing */
|
|
265 }
|
|
266
|
|
267 } else { /* need to read the key one byte at a time */
|
|
268 const uint8_t *k = key;
|
|
269
|
|
270 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
|
271 while (length > 12)
|
|
272 {
|
|
273 a += k[0];
|
|
274 a += ((uint32_t)k[1])<<8;
|
|
275 a += ((uint32_t)k[2])<<16;
|
|
276 a += ((uint32_t)k[3])<<24;
|
|
277 b += k[4];
|
|
278 b += ((uint32_t)k[5])<<8;
|
|
279 b += ((uint32_t)k[6])<<16;
|
|
280 b += ((uint32_t)k[7])<<24;
|
|
281 c += k[8];
|
|
282 c += ((uint32_t)k[9])<<8;
|
|
283 c += ((uint32_t)k[10])<<16;
|
|
284 c += ((uint32_t)k[11])<<24;
|
|
285 mix(a,b,c);
|
|
286 length -= 12;
|
|
287 k += 12;
|
|
288 }
|
|
289
|
|
290 /*-------------------------------- last block: affect all 32 bits of (c) */
|
|
291 switch(length) /* all the case statements fall through */
|
|
292 {
|
|
293 case 12: c+=((uint32_t)k[11])<<24;
|
|
294 case 11: c+=((uint32_t)k[10])<<16;
|
|
295 case 10: c+=((uint32_t)k[9])<<8;
|
|
296 case 9 : c+=k[8];
|
|
297 case 8 : b+=((uint32_t)k[7])<<24;
|
|
298 case 7 : b+=((uint32_t)k[6])<<16;
|
|
299 case 6 : b+=((uint32_t)k[5])<<8;
|
|
300 case 5 : b+=k[4];
|
|
301 case 4 : a+=((uint32_t)k[3])<<24;
|
|
302 case 3 : a+=((uint32_t)k[2])<<16;
|
|
303 case 2 : a+=((uint32_t)k[1])<<8;
|
|
304 case 1 : a+=k[0];
|
|
305 break;
|
|
306 case 0 : return c; /* zero length strings require no mixing */
|
|
307 }
|
|
308 }
|
|
309
|
|
310 final(a,b,c);
|
|
311 return c; /* zero length strings require no mixing */
|
|
312 }
|
|
313
|
|
314 #elif HASH_BIG_ENDIAN == 1
|
|
315 /*
|
|
316 * hashbig():
|
|
317 * This is the same as hashword() on big-endian machines. It is different
|
|
318 * from hashlittle() on all machines. hashbig() takes advantage of
|
|
319 * big-endian byte ordering.
|
|
320 */
|
|
321 uint32_t hash( const void *key, size_t length, const uint32_t initval)
|
|
322 {
|
|
323 uint32_t a,b,c;
|
|
324 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
|
|
325
|
|
326 /* Set up the internal state */
|
|
327 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
|
328
|
|
329 u.ptr = key;
|
|
330 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
|
|
331 const uint32_t *k = key; /* read 32-bit chunks */
|
|
332 #ifdef VALGRIND
|
|
333 const uint8_t *k8;
|
|
334 #endif // ifdef VALGRIND
|
|
335
|
|
336 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
|
337 while (length > 12)
|
|
338 {
|
|
339 a += k[0];
|
|
340 b += k[1];
|
|
341 c += k[2];
|
|
342 mix(a,b,c);
|
|
343 length -= 12;
|
|
344 k += 3;
|
|
345 }
|
|
346
|
|
347 /*----------------------------- handle the last (probably partial) block */
|
|
348 /*
|
|
349 * "k[2]<<8" actually reads beyond the end of the string, but
|
|
350 * then shifts out the part it's not allowed to read. Because the
|
|
351 * string is aligned, the illegal read is in the same word as the
|
|
352 * rest of the string. Every machine with memory protection I've seen
|
|
353 * does it on word boundaries, so is OK with this. But VALGRIND will
|
|
354 * still catch it and complain. The masking trick does make the hash
|
|
355 * noticably faster for short strings (like English words).
|
|
356 */
|
|
357 #ifndef VALGRIND
|
|
358
|
|
359 switch(length)
|
|
360 {
|
|
361 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
|
362 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
|
|
363 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
|
|
364 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
|
|
365 case 8 : b+=k[1]; a+=k[0]; break;
|
|
366 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
|
|
367 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
|
|
368 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
|
|
369 case 4 : a+=k[0]; break;
|
|
370 case 3 : a+=k[0]&0xffffff00; break;
|
|
371 case 2 : a+=k[0]&0xffff0000; break;
|
|
372 case 1 : a+=k[0]&0xff000000; break;
|
|
373 case 0 : return c; /* zero length strings require no mixing */
|
|
374 }
|
|
375
|
|
376 #else /* make valgrind happy */
|
|
377
|
|
378 k8 = (const uint8_t *)k;
|
|
379 switch(length) /* all the case statements fall through */
|
|
380 {
|
|
381 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
|
382 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
|
|
383 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
|
|
384 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
|
|
385 case 8 : b+=k[1]; a+=k[0]; break;
|
|
386 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
|
|
387 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
|
|
388 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
|
|
389 case 4 : a+=k[0]; break;
|
|
390 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
|
|
391 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
|
|
392 case 1 : a+=((uint32_t)k8[0])<<24; break;
|
|
393 case 0 : return c;
|
|
394 }
|
|
395
|
|
396 #endif /* !VALGRIND */
|
|
397
|
|
398 } else { /* need to read the key one byte at a time */
|
|
399 const uint8_t *k = key;
|
|
400
|
|
401 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
|
402 while (length > 12)
|
|
403 {
|
|
404 a += ((uint32_t)k[0])<<24;
|
|
405 a += ((uint32_t)k[1])<<16;
|
|
406 a += ((uint32_t)k[2])<<8;
|
|
407 a += ((uint32_t)k[3]);
|
|
408 b += ((uint32_t)k[4])<<24;
|
|
409 b += ((uint32_t)k[5])<<16;
|
|
410 b += ((uint32_t)k[6])<<8;
|
|
411 b += ((uint32_t)k[7]);
|
|
412 c += ((uint32_t)k[8])<<24;
|
|
413 c += ((uint32_t)k[9])<<16;
|
|
414 c += ((uint32_t)k[10])<<8;
|
|
415 c += ((uint32_t)k[11]);
|
|
416 mix(a,b,c);
|
|
417 length -= 12;
|
|
418 k += 12;
|
|
419 }
|
|
420
|
|
421 /*-------------------------------- last block: affect all 32 bits of (c) */
|
|
422 switch(length) /* all the case statements fall through */
|
|
423 {
|
|
424 case 12: c+=k[11];
|
|
425 case 11: c+=((uint32_t)k[10])<<8;
|
|
426 case 10: c+=((uint32_t)k[9])<<16;
|
|
427 case 9 : c+=((uint32_t)k[8])<<24;
|
|
428 case 8 : b+=k[7];
|
|
429 case 7 : b+=((uint32_t)k[6])<<8;
|
|
430 case 6 : b+=((uint32_t)k[5])<<16;
|
|
431 case 5 : b+=((uint32_t)k[4])<<24;
|
|
432 case 4 : a+=k[3];
|
|
433 case 3 : a+=((uint32_t)k[2])<<8;
|
|
434 case 2 : a+=((uint32_t)k[1])<<16;
|
|
435 case 1 : a+=((uint32_t)k[0])<<24;
|
|
436 break;
|
|
437 case 0 : return c;
|
|
438 }
|
|
439 }
|
|
440
|
|
441 final(a,b,c);
|
|
442 return c;
|
|
443 }
|
|
444 #else // HASH_XXX_ENDIAN == 1
|
|
445 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
|
|
446 #endif // hash_XXX_ENDIAN == 1
|
|
447
|
|
448 typedef unsigned long int ub4; /* unsigned 4-byte quantities */
|
|
449 typedef unsigned char ub1; /* unsigned 1-byte quantities */
|
|
450
|
|
451 /* how many powers of 2's worth of buckets we use */
|
|
452 static int hashpower = 16;
|
|
453
|
|
454 #define hashsize(n) ((ub4)1<<(n))
|
|
455 #define hashmask(n) (hashsize(n)-1)
|
|
456
|
|
457 /* Main hash table. This is where we look except during expansion. */
|
|
458 static item** primary_hashtable = 0;
|
|
459
|
|
460 /*
|
|
461 * Previous hash table. During expansion, we look here for keys that haven't
|
|
462 * been moved over to the primary yet.
|
|
463 */
|
|
464 static item** old_hashtable = 0;
|
|
465
|
|
466 /* Number of items in the hash table. */
|
|
467 static int hash_items = 0;
|
|
468
|
|
469 /* Flag: Are we in the middle of expanding now? */
|
|
470 static int expanding = 0;
|
|
471
|
|
472 /*
|
|
473 * During expansion we migrate values with bucket granularity; this is how
|
|
474 * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
|
|
475 */
|
|
476 static int expand_bucket = 0;
|
|
477
|
|
478 void assoc_init(void) {
|
|
479 unsigned int hash_size = hashsize(hashpower) * sizeof(void*);
|
|
480 primary_hashtable = malloc(hash_size);
|
|
481 if (! primary_hashtable) {
|
|
482 fprintf(stderr, "Failed to init hashtable.\n");
|
|
483 exit(EXIT_FAILURE);
|
|
484 }
|
|
485 memset(primary_hashtable, 0, hash_size);
|
|
486 }
|
|
487
|
|
488 item *assoc_find(const char *key, const size_t nkey) {
|
|
489 uint32_t hv = hash(key, nkey, 0);
|
|
490 item *it;
|
|
491 int oldbucket;
|
|
492
|
|
493 if (expanding &&
|
|
494 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
|
|
495 {
|
|
496 it = old_hashtable[oldbucket];
|
|
497 } else {
|
|
498 it = primary_hashtable[hv & hashmask(hashpower)];
|
|
499 }
|
|
500
|
|
501 while (it) {
|
|
502 if ((nkey == it->nkey) &&
|
|
503 (memcmp(key, ITEM_key(it), nkey) == 0)) {
|
|
504 return it;
|
|
505 }
|
|
506 it = it->h_next;
|
|
507 }
|
|
508 return 0;
|
|
509 }
|
|
510
|
|
511 /* returns the address of the item pointer before the key. if *item == 0,
|
|
512 the item wasn't found */
|
|
513
|
|
514 static item** _hashitem_before (const char *key, const size_t nkey) {
|
|
515 uint32_t hv = hash(key, nkey, 0);
|
|
516 item **pos;
|
|
517 int oldbucket;
|
|
518
|
|
519 if (expanding &&
|
|
520 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
|
|
521 {
|
|
522 pos = &old_hashtable[oldbucket];
|
|
523 } else {
|
|
524 pos = &primary_hashtable[hv & hashmask(hashpower)];
|
|
525 }
|
|
526
|
|
527 while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
|
|
528 pos = &(*pos)->h_next;
|
|
529 }
|
|
530 return pos;
|
|
531 }
|
|
532
|
|
533 /* grows the hashtable to the next power of 2. */
|
|
534 static void assoc_expand(void) {
|
|
535 old_hashtable = primary_hashtable;
|
|
536
|
|
537 primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
|
|
538 if (primary_hashtable) {
|
|
539 if (settings.verbose > 1)
|
|
540 fprintf(stderr, "Hash table expansion starting\n");
|
|
541 hashpower++;
|
|
542 expanding = 1;
|
|
543 expand_bucket = 0;
|
|
544 do_assoc_move_next_bucket();
|
|
545 } else {
|
|
546 primary_hashtable = old_hashtable;
|
|
547 /* Bad news, but we can keep running. */
|
|
548 }
|
|
549 }
|
|
550
|
|
551 /* migrates the next bucket to the primary hashtable if we're expanding. */
|
|
552 void do_assoc_move_next_bucket(void) {
|
|
553 item *it, *next;
|
|
554 int bucket;
|
|
555
|
|
556 if (expanding) {
|
|
557 for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
|
|
558 next = it->h_next;
|
|
559
|
|
560 bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower);
|
|
561 it->h_next = primary_hashtable[bucket];
|
|
562 primary_hashtable[bucket] = it;
|
|
563 }
|
|
564
|
|
565 old_hashtable[expand_bucket] = NULL;
|
|
566
|
|
567 expand_bucket++;
|
|
568 if (expand_bucket == hashsize(hashpower - 1)) {
|
|
569 expanding = 0;
|
|
570 free(old_hashtable);
|
|
571 if (settings.verbose > 1)
|
|
572 fprintf(stderr, "Hash table expansion done\n");
|
|
573 }
|
|
574 }
|
|
575 }
|
|
576
|
|
577 /* Note: this isn't an assoc_update. The key must not already exist to call this */
|
|
578 int assoc_insert(item *it) {
|
|
579 uint32_t hv;
|
|
580 int oldbucket;
|
|
581
|
|
582 assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */
|
|
583
|
|
584 hv = hash(ITEM_key(it), it->nkey, 0);
|
|
585 if (expanding &&
|
|
586 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
|
|
587 {
|
|
588 it->h_next = old_hashtable[oldbucket];
|
|
589 old_hashtable[oldbucket] = it;
|
|
590 } else {
|
|
591 it->h_next = primary_hashtable[hv & hashmask(hashpower)];
|
|
592 primary_hashtable[hv & hashmask(hashpower)] = it;
|
|
593 }
|
|
594
|
|
595 hash_items++;
|
|
596 if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
|
|
597 assoc_expand();
|
|
598 }
|
|
599
|
|
600 return 1;
|
|
601 }
|
|
602
|
|
603 void assoc_delete(const char *key, const size_t nkey) {
|
|
604 item **before = _hashitem_before(key, nkey);
|
|
605
|
|
606 if (*before) {
|
|
607 item *nxt = (*before)->h_next;
|
|
608 (*before)->h_next = 0; /* probably pointless, but whatever. */
|
|
609 *before = nxt;
|
|
610 hash_items--;
|
|
611 return;
|
|
612 }
|
|
613 /* Note: we never actually get here. the callers don't delete things
|
|
614 they can't find. */
|
|
615 assert(*before != 0);
|
|
616 }
|