changeset 433 | 79c694462294 |
parent 411 | 9e9f7ab43ce2 |
child 435 | e731d25ddab2 |
432:3b9e3d3d2810 | 433:79c694462294 |
---|---|
28 #include <netinet/in.h> |
28 #include <netinet/in.h> |
29 #include <sys/types.h> |
29 #include <sys/types.h> |
30 #endif |
30 #endif |
31 |
31 |
32 struct line { |
32 struct line { |
33 int h, len, n; |
33 int h, len, n, e; |
34 const char *l; |
34 const char *l; |
35 }; |
35 }; |
36 |
36 |
37 struct hunk { |
37 struct hunk { |
38 int a1, a2, b1, b2; |
38 int a1, a2, b1, b2; |
67 h = 0; |
67 h = 0; |
68 for (p = a; p < a + len; p++) { |
68 for (p = a; p < a + len; p++) { |
69 h = *p + rol32(h, 7); /* a simple hash from GNU diff */ |
69 h = *p + rol32(h, 7); /* a simple hash from GNU diff */ |
70 if (*p == '\n' || p == a + len - 1) { |
70 if (*p == '\n' || p == a + len - 1) { |
71 l->len = p - b + 1; |
71 l->len = p - b + 1; |
72 l->h = h; |
72 l->h = h * l->len; |
73 l->l = b; |
73 l->l = b; |
74 l->n = -1; |
74 l->n = -1; |
75 l++; |
75 l++; |
76 b = p + 1; |
76 b = p + 1; |
77 h = 0; |
77 h = 0; |
84 return i - 1; |
84 return i - 1; |
85 } |
85 } |
86 |
86 |
87 int inline cmp(struct line *a, struct line *b) |
87 int inline cmp(struct line *a, struct line *b) |
88 { |
88 { |
89 return a->len != b->len || memcmp(a->l, b->l, a->len); |
89 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
90 } |
90 } |
91 |
91 |
92 static int equatelines(struct line *a, int an, struct line *b, int bn) |
92 static int equatelines(struct line *a, int an, struct line *b, int bn) |
93 { |
93 { |
94 int i, j, buckets = 1, t, *h, *l; |
94 int i, j, buckets = 1, t, *h, *l; |
116 if (!cmp(b + i, b + h[j])) |
116 if (!cmp(b + i, b + h[j])) |
117 break; |
117 break; |
118 |
118 |
119 /* add to the head of the equivalence class */ |
119 /* add to the head of the equivalence class */ |
120 b[i].n = h[j]; |
120 b[i].n = h[j]; |
121 b[i].h = j; |
121 b[i].e = j; |
122 h[j] = i; |
122 h[j] = i; |
123 l[j]++; /* keep track of popularity */ |
123 l[j]++; /* keep track of popularity */ |
124 } |
124 } |
125 |
125 |
126 /* compute popularity threshold */ |
126 /* compute popularity threshold */ |
131 /* find the equivalence class */ |
131 /* find the equivalence class */ |
132 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) |
132 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) |
133 if (!cmp(a + i, b + h[j])) |
133 if (!cmp(a + i, b + h[j])) |
134 break; |
134 break; |
135 |
135 |
136 a[i].h = j; /* use equivalence class for quick compare */ |
136 a[i].e = j; /* use equivalence class for quick compare */ |
137 if(l[j] <= t) |
137 if(l[j] <= t) |
138 a[i].n = h[j]; /* point to head of match list */ |
138 a[i].n = h[j]; /* point to head of match list */ |
139 else |
139 else |
140 a[i].n = -1; /* too popular */ |
140 a[i].n = -1; /* too popular */ |
141 } |
141 } |
157 ; |
157 ; |
158 |
158 |
159 /* loop through all lines match a[i] in b */ |
159 /* loop through all lines match a[i] in b */ |
160 for (; j != -1 && j < b2; j = b[j].n) { |
160 for (; j != -1 && j < b2; j = b[j].n) { |
161 /* does this extend an earlier match? */ |
161 /* does this extend an earlier match? */ |
162 if (i > a1 && j > b1 && jpos[j - 1] == i) |
162 if (i > a1 && j > b1 && jpos[j - 1] == i - 1) |
163 k = jlen[j - 1] + 1; |
163 k = jlen[j - 1] + 1; |
164 else |
164 else |
165 k = 1; |
165 k = 1; |
166 jpos[j] = i + 1; |
166 jpos[j] = i; |
167 jlen[j] = k; |
167 jlen[j] = k; |
168 |
168 |
169 /* best match so far? */ |
169 /* best match so far? */ |
170 if (k > mk) { |
170 if (k > mk) { |
171 mi = i; |
171 mi = i; |
180 mj = mj - mk + 1; |
180 mj = mj - mk + 1; |
181 } |
181 } |
182 |
182 |
183 /* expand match to include neighboring popular lines */ |
183 /* expand match to include neighboring popular lines */ |
184 while (mi - mb > a1 && mj - mb > b1 && |
184 while (mi - mb > a1 && mj - mb > b1 && |
185 a[mi - mb - 1].h == b[mj - mb - 1].h) |
185 a[mi - mb - 1].e == b[mj - mb - 1].e) |
186 mb++; |
186 mb++; |
187 while (mi + mk < a2 && mj + mk < b2 && |
187 while (mi + mk < a2 && mj + mk < b2 && |
188 a[mi + mk].h == b[mj + mk].h) |
188 a[mi + mk].e == b[mj + mk].e) |
189 mk++; |
189 mk++; |
190 |
190 |
191 *omi = mi - mb; |
191 *omi = mi - mb; |
192 *omj = mj - mb; |
192 *omj = mj - mb; |
193 return mk + mb; |
193 return mk + mb; |
211 l->head->b2 = j + k; |
211 l->head->b2 = j + k; |
212 l->head++; |
212 l->head++; |
213 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); |
213 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); |
214 } |
214 } |
215 |
215 |
216 static PyObject *bdiff(PyObject *self, PyObject *args) |
216 static struct hunklist diff(struct line *a, int an, struct line *b, int bn) |
217 { |
217 { |
218 PyObject *sa, *sb, *result = NULL; |
|
219 struct hunklist l; |
218 struct hunklist l; |
220 struct hunk *h; |
219 int *jpos, *jlen, t; |
221 struct line *al, *bl; |
|
222 char encode[12], *rb; |
|
223 int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen; |
|
224 |
|
225 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
|
226 return NULL; |
|
227 |
220 |
228 /* allocate and fill arrays */ |
221 /* allocate and fill arrays */ |
229 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); |
222 t = equatelines(a, an, b, bn); |
230 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); |
|
231 t = equatelines(al, an, bl, bn); |
|
232 jpos = calloc(bn, sizeof(int)); |
223 jpos = calloc(bn, sizeof(int)); |
233 jlen = calloc(bn, sizeof(int)); |
224 jlen = calloc(bn, sizeof(int)); |
234 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
225 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
235 if (!al || !bl || !jpos || !jlen || !l.base || !t) |
226 |
236 goto nomem; |
227 if (jpos && jlen && l.base && t) { |
237 |
228 /* generate the matching block list */ |
238 /* generate the matching block list */ |
229 recurse(a, b, jpos, jlen, 0, an, 0, bn, &l); |
239 recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l); |
230 l.head->a1 = an; |
240 l.head->a1 = an; |
231 l.head->b1 = bn; |
241 l.head->b1 = bn; |
232 l.head++; |
242 l.head++; |
233 } |
234 |
|
235 free(jpos); |
|
236 free(jlen); |
|
237 return l; |
|
238 } |
|
239 |
|
240 static PyObject *blocks(PyObject *self, PyObject *args) |
|
241 { |
|
242 PyObject *sa, *sb, *rl, *m; |
|
243 struct line *a, *b; |
|
244 struct hunklist l; |
|
245 struct hunk *h; |
|
246 int an, bn, pos = 0; |
|
247 |
|
248 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
|
249 return NULL; |
|
250 |
|
251 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a); |
|
252 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b); |
|
253 if (!a || !b) |
|
254 goto nomem; |
|
255 |
|
256 l = diff(a, an, b, bn); |
|
257 rl = PyList_New(l.head - l.base); |
|
258 if (!l.head || !rl) |
|
259 goto nomem; |
|
260 |
|
261 for(h = l.base; h != l.head; h++) { |
|
262 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); |
|
263 PyList_SetItem(rl, pos, m); |
|
264 pos++; |
|
265 } |
|
266 |
|
267 nomem: |
|
268 free(a); |
|
269 free(b); |
|
270 free(l.base); |
|
271 return rl ? rl : PyErr_NoMemory(); |
|
272 } |
|
273 |
|
274 static PyObject *bdiff(PyObject *self, PyObject *args) |
|
275 { |
|
276 PyObject *sa, *sb, *result = NULL; |
|
277 struct line *al, *bl; |
|
278 struct hunklist l; |
|
279 struct hunk *h; |
|
280 char encode[12], *rb; |
|
281 int an, bn, len = 0, la = 0, lb = 0; |
|
282 |
|
283 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
|
284 return NULL; |
|
285 |
|
286 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); |
|
287 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); |
|
288 if (!al || !bl) |
|
289 goto nomem; |
|
290 |
|
291 l = diff(al, an, bl, bn); |
|
292 if (!l.head) |
|
293 goto nomem; |
|
243 |
294 |
244 /* calculate length of output */ |
295 /* calculate length of output */ |
245 for(h = l.base; h != l.head; h++) { |
296 for(h = l.base; h != l.head; h++) { |
246 if (h->a1 != la || h->b1 != lb) |
297 if (h->a1 != la || h->b1 != lb) |
247 len += 12 + bl[h->b1].l - bl[lb].l; |
298 len += 12 + bl[h->b1].l - bl[lb].l; |
272 } |
323 } |
273 |
324 |
274 nomem: |
325 nomem: |
275 free(al); |
326 free(al); |
276 free(bl); |
327 free(bl); |
277 free(jpos); |
|
278 free(jlen); |
|
279 free(l.base); |
328 free(l.base); |
280 return result ? result : PyErr_NoMemory(); |
329 return result ? result : PyErr_NoMemory(); |
281 } |
330 } |
282 |
331 |
283 static char mdiff_doc[] = "Efficient binary diff."; |
332 static char mdiff_doc[] = "Efficient binary diff."; |
284 |
333 |
285 static PyMethodDef methods[] = { |
334 static PyMethodDef methods[] = { |
286 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
335 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
336 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, |
|
287 {NULL, NULL} |
337 {NULL, NULL} |
288 }; |
338 }; |
289 |
339 |
290 PyMODINIT_FUNC initbdiff(void) |
340 PyMODINIT_FUNC initbdiff(void) |
291 { |
341 { |