# HG changeset patch # User Matt Mackall # Date 1191366258 18000 # Node ID adce4d30a6eaf9824ce3c46d7b4ec94de410eba6 # Parent d0c48891dd4a7334548e54cfb33d39e26a7b24d7# Parent b98c377b3c163d01346f63851710d183c43076da Merge with crew diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c --- a/mercurial/bdiff.c +++ b/mercurial/bdiff.c @@ -12,6 +12,7 @@ #include #include #include +#include #if defined __hpux || defined __SUNPRO_C || defined _AIX # define inline @@ -58,21 +59,17 @@ struct hunklist { struct hunk *base, *head; }; -static inline uint32_t rol32(uint32_t word, unsigned int shift) -{ - return (word << shift) | (word >> (32 - shift)); -} - int splitlines(const char *a, int len, struct line **lr) { - int g, h, i; + int h, i; const char *p, *b = a; + const char * const plast = a + len - 1; struct line *l; /* count the lines */ i = 1; /* extra line for sentinel */ for (p = a; p < a + len; p++) - if (*p == '\n' || p == a + len - 1) + if (*p == '\n' || p == plast) i++; *lr = l = (struct line *)malloc(sizeof(struct line) * i); @@ -82,24 +79,17 @@ int splitlines(const char *a, int len, s /* build the line array and calculate hashes */ h = 0; for (p = a; p < a + len; p++) { - /* - * a simple hash from GNU diff, with better collision - * resistance from hashpjw. this slows down common - * case by 10%, but speeds up worst case by 100x. - */ - h = *p + rol32(h, 7); - if ((g = h & 0xf0000000)) { - h ^= g >> 24; - h ^= g; - } - if (*p == '\n' || p == a + len - 1) { + /* Leonid Yuriev's hash */ + h = (h * 1664525) + *p + 1013904223; + + if (*p == '\n' || p == plast) { + l->h = h; + h = 0; l->len = p - b + 1; - l->h = h * l->len; l->l = b; - l->n = -1; + l->n = INT_MAX; l++; b = p + 1; - h = 0; } } @@ -117,27 +107,34 @@ int inline cmp(struct line *a, struct li static int equatelines(struct line *a, int an, struct line *b, int bn) { int i, j, buckets = 1, t; + int scale = 32; struct pos *h; /* build a hash table of the next highest power of 2 */ while (buckets < bn + 1) buckets *= 2; - h = (struct pos *)malloc(buckets * sizeof(struct pos)); - buckets = buckets - 1; + /* try to allocate a large hash table to avoid collisions */ + do { + scale /= 2; + h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); + } while (!h && scale != 1); + if (!h) return 0; + buckets = buckets * scale - 1; + /* clear the hash table */ for (i = 0; i <= buckets; i++) { - h[i].pos = -1; + h[i].pos = INT_MAX; h[i].len = 0; } /* add lines to the hash table chains */ for (i = bn - 1; i >= 0; i--) { /* find the equivalence class */ - for (j = b[i].h & buckets; h[j].pos != -1; + for (j = b[i].h & buckets; h[j].pos != INT_MAX; j = (j + 1) & buckets) if (!cmp(b + i, b + h[j].pos)) break; @@ -155,7 +152,7 @@ static int equatelines(struct line *a, i /* match items in a to their equivalence class in b */ for (i = 0; i < an; i++) { /* find the equivalence class */ - for (j = a[i].h & buckets; h[j].pos != -1; + for (j = a[i].h & buckets; h[j].pos != INT_MAX; j = (j + 1) & buckets) if (!cmp(a + i, b + h[j].pos)) break; @@ -164,7 +161,7 @@ static int equatelines(struct line *a, i if (h[j].len <= t) a[i].n = h[j].pos; /* point to head of match list */ else - a[i].n = -1; /* too popular */ + a[i].n = INT_MAX; /* too popular */ } /* discard hash tables */ @@ -179,11 +176,11 @@ static int longest_match(struct line *a, for (i = a1; i < a2; i++) { /* skip things before the current block */ - for (j = a[i].n; j != -1 && j < b1; j = b[j].n) + for (j = a[i].n; j < b1; j = b[j].n) ; /* loop through all lines match a[i] in b */ - for (; j != -1 && j < b2; j = b[j].n) { + for (; j < b2; j = b[j].n) { /* does this extend an earlier match? */ if (i > a1 && j > b1 && pos[j - 1].pos == i - 1) k = pos[j - 1].len + 1; @@ -216,6 +213,7 @@ static int longest_match(struct line *a, *omi = mi - mb; *omj = mj - mb; + return mk + mb; }