comparison mercurial/bdiff.c @ 5452:82b4ff3abbcd

bdiff: tweaks for large files - adjust the common line threshold to .1% this speeds up a delta of 7M lines of source from 10m to 40s - adjust the scaling of the hash array down a bit as it was raising the peak memory usage significantly
author Matt Mackall <mpm@selenic.com>
date Thu, 11 Oct 2007 00:46:56 -0500
parents d0c48891dd4a
children
comparison
equal deleted inserted replaced
5451:0a43875677b1 5452:82b4ff3abbcd
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); 104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 } 105 }
106 106
107 static int equatelines(struct line *a, int an, struct line *b, int bn) 107 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 { 108 {
109 int i, j, buckets = 1, t; 109 int i, j, buckets = 1, t, scale;
110 int scale = 32; 110 struct pos *h = NULL;
111 struct pos *h;
112 111
113 /* build a hash table of the next highest power of 2 */ 112 /* build a hash table of the next highest power of 2 */
114 while (buckets < bn + 1) 113 while (buckets < bn + 1)
115 buckets *= 2; 114 buckets *= 2;
116 115
117 /* try to allocate a large hash table to avoid collisions */ 116 /* try to allocate a large hash table to avoid collisions */
118 do { 117 for (scale = 4; scale; scale /= 2) {
119 scale /= 2;
120 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); 118 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
121 } while (!h && scale != 1); 119 if (h)
120 break;
121 }
122 122
123 if (!h) 123 if (!h)
124 return 0; 124 return 0;
125 125
126 buckets = buckets * scale - 1; 126 buckets = buckets * scale - 1;
145 h[j].pos = i; 145 h[j].pos = i;
146 h[j].len++; /* keep track of popularity */ 146 h[j].len++; /* keep track of popularity */
147 } 147 }
148 148
149 /* compute popularity threshold */ 149 /* compute popularity threshold */
150 t = (bn >= 200) ? bn / 100 : bn + 1; 150 t = (bn >= 4000) ? bn / 1000 : bn + 1;
151 151
152 /* match items in a to their equivalence class in b */ 152 /* match items in a to their equivalence class in b */
153 for (i = 0; i < an; i++) { 153 for (i = 0; i < an; i++) {
154 /* find the equivalence class */ 154 /* find the equivalence class */
155 for (j = a[i].h & buckets; h[j].pos != INT_MAX; 155 for (j = a[i].h & buckets; h[j].pos != INT_MAX;