# HG changeset patch # User Matt Mackall # Date 1192081616 18000 # Node ID 82b4ff3abbcdfc75c74971512aa23e5f30ed89a5 # Parent 0a43875677b1eec91a08f94fb8a06e4c855b7736 bdiff: tweaks for large files - adjust the common line threshold to .1% this speeds up a delta of 7M lines of source from 10m to 40s - adjust the scaling of the hash array down a bit as it was raising the peak memory usage significantly diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c --- a/mercurial/bdiff.c +++ b/mercurial/bdiff.c @@ -106,19 +106,19 @@ int inline cmp(struct line *a, struct li static int equatelines(struct line *a, int an, struct line *b, int bn) { - int i, j, buckets = 1, t; - int scale = 32; - struct pos *h; + int i, j, buckets = 1, t, scale; + struct pos *h = NULL; /* build a hash table of the next highest power of 2 */ while (buckets < bn + 1) buckets *= 2; /* try to allocate a large hash table to avoid collisions */ - do { - scale /= 2; + for (scale = 4; scale; scale /= 2) { h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); - } while (!h && scale != 1); + if (h) + break; + } if (!h) return 0; @@ -147,7 +147,7 @@ static int equatelines(struct line *a, i } /* compute popularity threshold */ - t = (bn >= 200) ? bn / 100 : bn + 1; + t = (bn >= 4000) ? bn / 1000 : bn + 1; /* match items in a to their equivalence class in b */ for (i = 0; i < an; i++) {