Mercurial > hg > mercurial-crew-with-dirclash
comparison mercurial/bdiff.c @ 5366:d0c48891dd4a
bdiff: switch to lyhash
lyhash is a very simple and fast hash function that had the fewest
hash collisions on a 3.9M line text corpus and 190k line binary corpus
and should have significantly fewer collisions than the current hash
function.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 27 Sep 2007 23:59:18 -0500 |
parents | 458acf92b49e |
children | 82b4ff3abbcd |
comparison
equal
deleted
inserted
replaced
5365:458acf92b49e | 5366:d0c48891dd4a |
---|---|
57 | 57 |
58 struct hunklist { | 58 struct hunklist { |
59 struct hunk *base, *head; | 59 struct hunk *base, *head; |
60 }; | 60 }; |
61 | 61 |
62 static inline uint32_t rol32(uint32_t word, unsigned int shift) | |
63 { | |
64 return (word << shift) | (word >> (32 - shift)); | |
65 } | |
66 | |
67 int splitlines(const char *a, int len, struct line **lr) | 62 int splitlines(const char *a, int len, struct line **lr) |
68 { | 63 { |
69 int g, h, i; | 64 int h, i; |
70 const char *p, *b = a; | 65 const char *p, *b = a; |
71 const char * const plast = a + len - 1; | 66 const char * const plast = a + len - 1; |
72 struct line *l; | 67 struct line *l; |
73 | 68 |
74 /* count the lines */ | 69 /* count the lines */ |
82 return -1; | 77 return -1; |
83 | 78 |
84 /* build the line array and calculate hashes */ | 79 /* build the line array and calculate hashes */ |
85 h = 0; | 80 h = 0; |
86 for (p = a; p < a + len; p++) { | 81 for (p = a; p < a + len; p++) { |
87 /* | 82 /* Leonid Yuriev's hash */ |
88 * a simple hash from GNU diff, with better collision | 83 h = (h * 1664525) + *p + 1013904223; |
89 * resistance from hashpjw. this slows down common | 84 |
90 * case by 10%, but speeds up worst case by 100x. | |
91 */ | |
92 h = *p + rol32(h, 7); | |
93 if ((g = h & 0xf0000000)) { | |
94 h ^= g >> 24; | |
95 h ^= g; | |
96 } | |
97 if (*p == '\n' || p == plast) { | 85 if (*p == '\n' || p == plast) { |
86 l->h = h; | |
87 h = 0; | |
98 l->len = p - b + 1; | 88 l->len = p - b + 1; |
99 l->h = h * l->len; | |
100 l->l = b; | 89 l->l = b; |
101 l->n = INT_MAX; | 90 l->n = INT_MAX; |
102 l++; | 91 l++; |
103 b = p + 1; | 92 b = p + 1; |
104 h = 0; | |
105 } | 93 } |
106 } | 94 } |
107 | 95 |
108 /* set up a sentinel */ | 96 /* set up a sentinel */ |
109 l->h = l->len = 0; | 97 l->h = l->len = 0; |