comparison mercurial/bdiff.c @ 5366:d0c48891dd4a

bdiff: switch to lyhash lyhash is a very simple and fast hash function that had the fewest hash collisions on a 3.9M line text corpus and 190k line binary corpus and should have significantly fewer collisions than the current hash function.
author Matt Mackall <mpm@selenic.com>
date Thu, 27 Sep 2007 23:59:18 -0500
parents 458acf92b49e
children 82b4ff3abbcd
comparison
equal deleted inserted replaced
5365:458acf92b49e 5366:d0c48891dd4a
57 57
58 struct hunklist { 58 struct hunklist {
59 struct hunk *base, *head; 59 struct hunk *base, *head;
60 }; 60 };
61 61
62 static inline uint32_t rol32(uint32_t word, unsigned int shift)
63 {
64 return (word << shift) | (word >> (32 - shift));
65 }
66
67 int splitlines(const char *a, int len, struct line **lr) 62 int splitlines(const char *a, int len, struct line **lr)
68 { 63 {
69 int g, h, i; 64 int h, i;
70 const char *p, *b = a; 65 const char *p, *b = a;
71 const char * const plast = a + len - 1; 66 const char * const plast = a + len - 1;
72 struct line *l; 67 struct line *l;
73 68
74 /* count the lines */ 69 /* count the lines */
82 return -1; 77 return -1;
83 78
84 /* build the line array and calculate hashes */ 79 /* build the line array and calculate hashes */
85 h = 0; 80 h = 0;
86 for (p = a; p < a + len; p++) { 81 for (p = a; p < a + len; p++) {
87 /* 82 /* Leonid Yuriev's hash */
88 * a simple hash from GNU diff, with better collision 83 h = (h * 1664525) + *p + 1013904223;
89 * resistance from hashpjw. this slows down common 84
90 * case by 10%, but speeds up worst case by 100x.
91 */
92 h = *p + rol32(h, 7);
93 if ((g = h & 0xf0000000)) {
94 h ^= g >> 24;
95 h ^= g;
96 }
97 if (*p == '\n' || p == plast) { 85 if (*p == '\n' || p == plast) {
86 l->h = h;
87 h = 0;
98 l->len = p - b + 1; 88 l->len = p - b + 1;
99 l->h = h * l->len;
100 l->l = b; 89 l->l = b;
101 l->n = INT_MAX; 90 l->n = INT_MAX;
102 l++; 91 l++;
103 b = p + 1; 92 b = p + 1;
104 h = 0;
105 } 93 }
106 } 94 }
107 95
108 /* set up a sentinel */ 96 /* set up a sentinel */
109 l->h = l->len = 0; 97 l->h = l->len = 0;