# HG changeset patch # User Matt Mackall # Date 1190955558 18000 # Node ID d0c48891dd4a7334548e54cfb33d39e26a7b24d7 # Parent 458acf92b49e4be54f780f4a0512d91502a5aa67 bdiff: switch to lyhash lyhash is a very simple and fast hash function that had the fewest hash collisions on a 3.9M line text corpus and 190k line binary corpus and should have significantly fewer collisions than the current hash function. diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c --- a/mercurial/bdiff.c +++ b/mercurial/bdiff.c @@ -59,14 +59,9 @@ struct hunklist { struct hunk *base, *head; }; -static inline uint32_t rol32(uint32_t word, unsigned int shift) -{ - return (word << shift) | (word >> (32 - shift)); -} - int splitlines(const char *a, int len, struct line **lr) { - int g, h, i; + int h, i; const char *p, *b = a; const char * const plast = a + len - 1; struct line *l; @@ -84,24 +79,17 @@ int splitlines(const char *a, int len, s /* build the line array and calculate hashes */ h = 0; for (p = a; p < a + len; p++) { - /* - * a simple hash from GNU diff, with better collision - * resistance from hashpjw. this slows down common - * case by 10%, but speeds up worst case by 100x. - */ - h = *p + rol32(h, 7); - if ((g = h & 0xf0000000)) { - h ^= g >> 24; - h ^= g; - } + /* Leonid Yuriev's hash */ + h = (h * 1664525) + *p + 1013904223; + if (*p == '\n' || p == plast) { + l->h = h; + h = 0; l->len = p - b + 1; - l->h = h * l->len; l->l = b; l->n = INT_MAX; l++; b = p + 1; - h = 0; } }