Mercurial > hg > mercurial-crew-with-dirclash
comparison mercurial/bdiff.c @ 433:79c694462294
Add bdiff.blocks / minor performance tweaks
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Add bdiff.blocks / minor performance tweaks
This refactors bdiff.bdiff so that we can get a list of matching
blocks of line numbers for use by annotate/unidiff.
Minor performance tweaks:
- - add a field for equivalence so we can keep h around a bit longer for cmp
- - mix len into the hash to reduce collisions
- - move an operation into the slow path in longest_match
manifest hash: b1aee590b6291b31069ea8a86b6aa8fb259ac244
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
iD8DBQFCubu2ywK+sNU5EO8RAm4FAJ9r10aJpT7qA96nqGYFHcuy4XcIHgCfeFx5
q0PyTXeZQc7Fw5kwEPcoykI=
=QXSb
-----END PGP SIGNATURE-----
author | mpm@selenic.com |
---|---|
date | Wed, 22 Jun 2005 11:27:50 -0800 |
parents | 9e9f7ab43ce2 |
children | e731d25ddab2 |
comparison
equal
deleted
inserted
replaced
432:3b9e3d3d2810 | 433:79c694462294 |
---|---|
28 #include <netinet/in.h> | 28 #include <netinet/in.h> |
29 #include <sys/types.h> | 29 #include <sys/types.h> |
30 #endif | 30 #endif |
31 | 31 |
32 struct line { | 32 struct line { |
33 int h, len, n; | 33 int h, len, n, e; |
34 const char *l; | 34 const char *l; |
35 }; | 35 }; |
36 | 36 |
37 struct hunk { | 37 struct hunk { |
38 int a1, a2, b1, b2; | 38 int a1, a2, b1, b2; |
67 h = 0; | 67 h = 0; |
68 for (p = a; p < a + len; p++) { | 68 for (p = a; p < a + len; p++) { |
69 h = *p + rol32(h, 7); /* a simple hash from GNU diff */ | 69 h = *p + rol32(h, 7); /* a simple hash from GNU diff */ |
70 if (*p == '\n' || p == a + len - 1) { | 70 if (*p == '\n' || p == a + len - 1) { |
71 l->len = p - b + 1; | 71 l->len = p - b + 1; |
72 l->h = h; | 72 l->h = h * l->len; |
73 l->l = b; | 73 l->l = b; |
74 l->n = -1; | 74 l->n = -1; |
75 l++; | 75 l++; |
76 b = p + 1; | 76 b = p + 1; |
77 h = 0; | 77 h = 0; |
84 return i - 1; | 84 return i - 1; |
85 } | 85 } |
86 | 86 |
87 int inline cmp(struct line *a, struct line *b) | 87 int inline cmp(struct line *a, struct line *b) |
88 { | 88 { |
89 return a->len != b->len || memcmp(a->l, b->l, a->len); | 89 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
90 } | 90 } |
91 | 91 |
92 static int equatelines(struct line *a, int an, struct line *b, int bn) | 92 static int equatelines(struct line *a, int an, struct line *b, int bn) |
93 { | 93 { |
94 int i, j, buckets = 1, t, *h, *l; | 94 int i, j, buckets = 1, t, *h, *l; |
116 if (!cmp(b + i, b + h[j])) | 116 if (!cmp(b + i, b + h[j])) |
117 break; | 117 break; |
118 | 118 |
119 /* add to the head of the equivalence class */ | 119 /* add to the head of the equivalence class */ |
120 b[i].n = h[j]; | 120 b[i].n = h[j]; |
121 b[i].h = j; | 121 b[i].e = j; |
122 h[j] = i; | 122 h[j] = i; |
123 l[j]++; /* keep track of popularity */ | 123 l[j]++; /* keep track of popularity */ |
124 } | 124 } |
125 | 125 |
126 /* compute popularity threshold */ | 126 /* compute popularity threshold */ |
131 /* find the equivalence class */ | 131 /* find the equivalence class */ |
132 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) | 132 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) |
133 if (!cmp(a + i, b + h[j])) | 133 if (!cmp(a + i, b + h[j])) |
134 break; | 134 break; |
135 | 135 |
136 a[i].h = j; /* use equivalence class for quick compare */ | 136 a[i].e = j; /* use equivalence class for quick compare */ |
137 if(l[j] <= t) | 137 if(l[j] <= t) |
138 a[i].n = h[j]; /* point to head of match list */ | 138 a[i].n = h[j]; /* point to head of match list */ |
139 else | 139 else |
140 a[i].n = -1; /* too popular */ | 140 a[i].n = -1; /* too popular */ |
141 } | 141 } |
157 ; | 157 ; |
158 | 158 |
159 /* loop through all lines match a[i] in b */ | 159 /* loop through all lines match a[i] in b */ |
160 for (; j != -1 && j < b2; j = b[j].n) { | 160 for (; j != -1 && j < b2; j = b[j].n) { |
161 /* does this extend an earlier match? */ | 161 /* does this extend an earlier match? */ |
162 if (i > a1 && j > b1 && jpos[j - 1] == i) | 162 if (i > a1 && j > b1 && jpos[j - 1] == i - 1) |
163 k = jlen[j - 1] + 1; | 163 k = jlen[j - 1] + 1; |
164 else | 164 else |
165 k = 1; | 165 k = 1; |
166 jpos[j] = i + 1; | 166 jpos[j] = i; |
167 jlen[j] = k; | 167 jlen[j] = k; |
168 | 168 |
169 /* best match so far? */ | 169 /* best match so far? */ |
170 if (k > mk) { | 170 if (k > mk) { |
171 mi = i; | 171 mi = i; |
180 mj = mj - mk + 1; | 180 mj = mj - mk + 1; |
181 } | 181 } |
182 | 182 |
183 /* expand match to include neighboring popular lines */ | 183 /* expand match to include neighboring popular lines */ |
184 while (mi - mb > a1 && mj - mb > b1 && | 184 while (mi - mb > a1 && mj - mb > b1 && |
185 a[mi - mb - 1].h == b[mj - mb - 1].h) | 185 a[mi - mb - 1].e == b[mj - mb - 1].e) |
186 mb++; | 186 mb++; |
187 while (mi + mk < a2 && mj + mk < b2 && | 187 while (mi + mk < a2 && mj + mk < b2 && |
188 a[mi + mk].h == b[mj + mk].h) | 188 a[mi + mk].e == b[mj + mk].e) |
189 mk++; | 189 mk++; |
190 | 190 |
191 *omi = mi - mb; | 191 *omi = mi - mb; |
192 *omj = mj - mb; | 192 *omj = mj - mb; |
193 return mk + mb; | 193 return mk + mb; |
211 l->head->b2 = j + k; | 211 l->head->b2 = j + k; |
212 l->head++; | 212 l->head++; |
213 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); | 213 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); |
214 } | 214 } |
215 | 215 |
216 static PyObject *bdiff(PyObject *self, PyObject *args) | 216 static struct hunklist diff(struct line *a, int an, struct line *b, int bn) |
217 { | 217 { |
218 PyObject *sa, *sb, *result = NULL; | |
219 struct hunklist l; | 218 struct hunklist l; |
220 struct hunk *h; | 219 int *jpos, *jlen, t; |
221 struct line *al, *bl; | |
222 char encode[12], *rb; | |
223 int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen; | |
224 | |
225 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |
226 return NULL; | |
227 | 220 |
228 /* allocate and fill arrays */ | 221 /* allocate and fill arrays */ |
229 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); | 222 t = equatelines(a, an, b, bn); |
230 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); | |
231 t = equatelines(al, an, bl, bn); | |
232 jpos = calloc(bn, sizeof(int)); | 223 jpos = calloc(bn, sizeof(int)); |
233 jlen = calloc(bn, sizeof(int)); | 224 jlen = calloc(bn, sizeof(int)); |
234 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); | 225 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
235 if (!al || !bl || !jpos || !jlen || !l.base || !t) | 226 |
236 goto nomem; | 227 if (jpos && jlen && l.base && t) { |
237 | 228 /* generate the matching block list */ |
238 /* generate the matching block list */ | 229 recurse(a, b, jpos, jlen, 0, an, 0, bn, &l); |
239 recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l); | 230 l.head->a1 = an; |
240 l.head->a1 = an; | 231 l.head->b1 = bn; |
241 l.head->b1 = bn; | 232 l.head++; |
242 l.head++; | 233 } |
234 | |
235 free(jpos); | |
236 free(jlen); | |
237 return l; | |
238 } | |
239 | |
240 static PyObject *blocks(PyObject *self, PyObject *args) | |
241 { | |
242 PyObject *sa, *sb, *rl, *m; | |
243 struct line *a, *b; | |
244 struct hunklist l; | |
245 struct hunk *h; | |
246 int an, bn, pos = 0; | |
247 | |
248 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |
249 return NULL; | |
250 | |
251 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a); | |
252 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b); | |
253 if (!a || !b) | |
254 goto nomem; | |
255 | |
256 l = diff(a, an, b, bn); | |
257 rl = PyList_New(l.head - l.base); | |
258 if (!l.head || !rl) | |
259 goto nomem; | |
260 | |
261 for(h = l.base; h != l.head; h++) { | |
262 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); | |
263 PyList_SetItem(rl, pos, m); | |
264 pos++; | |
265 } | |
266 | |
267 nomem: | |
268 free(a); | |
269 free(b); | |
270 free(l.base); | |
271 return rl ? rl : PyErr_NoMemory(); | |
272 } | |
273 | |
274 static PyObject *bdiff(PyObject *self, PyObject *args) | |
275 { | |
276 PyObject *sa, *sb, *result = NULL; | |
277 struct line *al, *bl; | |
278 struct hunklist l; | |
279 struct hunk *h; | |
280 char encode[12], *rb; | |
281 int an, bn, len = 0, la = 0, lb = 0; | |
282 | |
283 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |
284 return NULL; | |
285 | |
286 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); | |
287 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); | |
288 if (!al || !bl) | |
289 goto nomem; | |
290 | |
291 l = diff(al, an, bl, bn); | |
292 if (!l.head) | |
293 goto nomem; | |
243 | 294 |
244 /* calculate length of output */ | 295 /* calculate length of output */ |
245 for(h = l.base; h != l.head; h++) { | 296 for(h = l.base; h != l.head; h++) { |
246 if (h->a1 != la || h->b1 != lb) | 297 if (h->a1 != la || h->b1 != lb) |
247 len += 12 + bl[h->b1].l - bl[lb].l; | 298 len += 12 + bl[h->b1].l - bl[lb].l; |
272 } | 323 } |
273 | 324 |
274 nomem: | 325 nomem: |
275 free(al); | 326 free(al); |
276 free(bl); | 327 free(bl); |
277 free(jpos); | |
278 free(jlen); | |
279 free(l.base); | 328 free(l.base); |
280 return result ? result : PyErr_NoMemory(); | 329 return result ? result : PyErr_NoMemory(); |
281 } | 330 } |
282 | 331 |
283 static char mdiff_doc[] = "Efficient binary diff."; | 332 static char mdiff_doc[] = "Efficient binary diff."; |
284 | 333 |
285 static PyMethodDef methods[] = { | 334 static PyMethodDef methods[] = { |
286 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, | 335 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
336 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, | |
287 {NULL, NULL} | 337 {NULL, NULL} |
288 }; | 338 }; |
289 | 339 |
290 PyMODINIT_FUNC initbdiff(void) | 340 PyMODINIT_FUNC initbdiff(void) |
291 { | 341 { |