mercurial/bdiff.c
changeset 433 79c694462294
parent 411 9e9f7ab43ce2
child 435 e731d25ddab2
equal deleted inserted replaced
432:3b9e3d3d2810 433:79c694462294
    28   #include <netinet/in.h>
    28   #include <netinet/in.h>
    29   #include <sys/types.h>
    29   #include <sys/types.h>
    30 #endif
    30 #endif
    31 
    31 
    32 struct line {
    32 struct line {
    33 	int h, len, n;
    33 	int h, len, n, e;
    34 	const char *l;
    34 	const char *l;
    35 };
    35 };
    36 
    36 
    37 struct hunk {
    37 struct hunk {
    38 	int a1, a2, b1, b2;
    38 	int a1, a2, b1, b2;
    67 	h = 0;
    67 	h = 0;
    68 	for (p = a; p < a + len; p++) {
    68 	for (p = a; p < a + len; p++) {
    69 		h = *p + rol32(h, 7); /* a simple hash from GNU diff */
    69 		h = *p + rol32(h, 7); /* a simple hash from GNU diff */
    70 		if (*p == '\n' || p == a + len - 1) {
    70 		if (*p == '\n' || p == a + len - 1) {
    71 			l->len = p - b + 1;
    71 			l->len = p - b + 1;
    72 			l->h = h;
    72 			l->h = h * l->len;
    73 			l->l = b;
    73 			l->l = b;
    74 			l->n = -1;
    74 			l->n = -1;
    75 			l++;
    75 			l++;
    76 			b = p + 1;
    76 			b = p + 1;
    77 			h = 0;
    77 			h = 0;
    84 	return i - 1;
    84 	return i - 1;
    85 }
    85 }
    86 
    86 
    87 int inline cmp(struct line *a, struct line *b)
    87 int inline cmp(struct line *a, struct line *b)
    88 {
    88 {
    89 	return a->len != b->len || memcmp(a->l, b->l, a->len);
    89 	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
    90 }
    90 }
    91 
    91 
    92 static int equatelines(struct line *a, int an, struct line *b, int bn)
    92 static int equatelines(struct line *a, int an, struct line *b, int bn)
    93 {
    93 {
    94 	int i, j, buckets = 1, t, *h, *l;
    94 	int i, j, buckets = 1, t, *h, *l;
   116 			if (!cmp(b + i, b + h[j]))
   116 			if (!cmp(b + i, b + h[j]))
   117 				break;
   117 				break;
   118 
   118 
   119 		/* add to the head of the equivalence class */
   119 		/* add to the head of the equivalence class */
   120 		b[i].n = h[j];
   120 		b[i].n = h[j];
   121 		b[i].h = j;
   121 		b[i].e = j;
   122 		h[j] = i;
   122 		h[j] = i;
   123 		l[j]++; /* keep track of popularity */
   123 		l[j]++; /* keep track of popularity */
   124 	}
   124 	}
   125 
   125 
   126 	/* compute popularity threshold */
   126 	/* compute popularity threshold */
   131 		/* find the equivalence class */
   131 		/* find the equivalence class */
   132 		for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
   132 		for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
   133 			if (!cmp(a + i, b + h[j]))
   133 			if (!cmp(a + i, b + h[j]))
   134 				break;
   134 				break;
   135 
   135 
   136 		a[i].h = j; /* use equivalence class for quick compare */
   136 		a[i].e = j; /* use equivalence class for quick compare */
   137 		if(l[j] <= t)
   137 		if(l[j] <= t)
   138 			a[i].n = h[j]; /* point to head of match list */
   138 			a[i].n = h[j]; /* point to head of match list */
   139 		else
   139 		else
   140 			a[i].n = -1; /* too popular */
   140 			a[i].n = -1; /* too popular */
   141 	}
   141 	}
   157 			;
   157 			;
   158 
   158 
   159 		/* loop through all lines match a[i] in b */
   159 		/* loop through all lines match a[i] in b */
   160 		for (; j != -1 && j < b2; j = b[j].n) {
   160 		for (; j != -1 && j < b2; j = b[j].n) {
   161 			/* does this extend an earlier match? */
   161 			/* does this extend an earlier match? */
   162 			if (i > a1 && j > b1 && jpos[j - 1] == i)
   162 			if (i > a1 && j > b1 && jpos[j - 1] == i - 1)
   163 				k = jlen[j - 1] + 1;
   163 				k = jlen[j - 1] + 1;
   164 			else
   164 			else
   165 				k = 1;
   165 				k = 1;
   166 			jpos[j] = i + 1;
   166 			jpos[j] = i;
   167 			jlen[j] = k;
   167 			jlen[j] = k;
   168 
   168 
   169 			/* best match so far? */
   169 			/* best match so far? */
   170 			if (k > mk) {
   170 			if (k > mk) {
   171 				mi = i;
   171 				mi = i;
   180 		mj = mj - mk + 1;
   180 		mj = mj - mk + 1;
   181 	}
   181 	}
   182 
   182 
   183 	/* expand match to include neighboring popular lines */
   183 	/* expand match to include neighboring popular lines */
   184 	while (mi - mb > a1 && mj - mb > b1 &&
   184 	while (mi - mb > a1 && mj - mb > b1 &&
   185 	       a[mi - mb - 1].h == b[mj - mb - 1].h)
   185 	       a[mi - mb - 1].e == b[mj - mb - 1].e)
   186 		mb++;
   186 		mb++;
   187 	while (mi + mk < a2 && mj + mk < b2 &&
   187 	while (mi + mk < a2 && mj + mk < b2 &&
   188 	       a[mi + mk].h == b[mj + mk].h)
   188 	       a[mi + mk].e == b[mj + mk].e)
   189 		mk++;
   189 		mk++;
   190 
   190 
   191 	*omi = mi - mb;
   191 	*omi = mi - mb;
   192 	*omj = mj - mb;
   192 	*omj = mj - mb;
   193 	return mk + mb;
   193 	return mk + mb;
   211 	l->head->b2 = j + k;
   211 	l->head->b2 = j + k;
   212 	l->head++;
   212 	l->head++;
   213 	recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
   213 	recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
   214 }
   214 }
   215 
   215 
   216 static PyObject *bdiff(PyObject *self, PyObject *args)
   216 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
   217 {
   217 {
   218 	PyObject *sa, *sb, *result = NULL;
       
   219 	struct hunklist l;
   218 	struct hunklist l;
   220 	struct hunk *h;
   219 	int *jpos, *jlen, t;
   221 	struct line *al, *bl;
       
   222 	char encode[12], *rb;
       
   223 	int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen;
       
   224 
       
   225 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
       
   226 		return NULL;
       
   227 
   220 
   228 	/* allocate and fill arrays */
   221 	/* allocate and fill arrays */
   229 	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
   222 	t = equatelines(a, an, b, bn);
   230 	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
       
   231 	t = equatelines(al, an, bl, bn);
       
   232 	jpos = calloc(bn, sizeof(int));
   223 	jpos = calloc(bn, sizeof(int));
   233 	jlen = calloc(bn, sizeof(int));
   224 	jlen = calloc(bn, sizeof(int));
   234 	l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
   225 	l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
   235 	if (!al || !bl || !jpos || !jlen || !l.base || !t)
   226 
   236 		goto nomem;
   227 	if (jpos && jlen && l.base && t) {
   237 
   228 		/* generate the matching block list */
   238 	/* generate the matching block list */
   229 		recurse(a, b, jpos, jlen, 0, an, 0, bn, &l);
   239 	recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l);
   230 		l.head->a1 = an;
   240 	l.head->a1 = an;
   231 		l.head->b1 = bn;
   241 	l.head->b1 = bn;
   232 		l.head++;
   242 	l.head++;
   233 	}
       
   234 
       
   235 	free(jpos);
       
   236 	free(jlen);
       
   237 	return l;
       
   238 }
       
   239 
       
   240 static PyObject *blocks(PyObject *self, PyObject *args)
       
   241 {
       
   242 	PyObject *sa, *sb, *rl, *m;
       
   243 	struct line *a, *b;
       
   244 	struct hunklist l;
       
   245 	struct hunk *h;
       
   246 	int an, bn, pos = 0;
       
   247 
       
   248 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
       
   249 		return NULL;
       
   250 
       
   251 	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
       
   252 	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
       
   253 	if (!a || !b)
       
   254 		goto nomem;
       
   255 
       
   256 	l = diff(a, an, b, bn);
       
   257 	rl = PyList_New(l.head - l.base);
       
   258 	if (!l.head || !rl)
       
   259 		goto nomem;
       
   260 
       
   261 	for(h = l.base; h != l.head; h++) {
       
   262 		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
       
   263 		PyList_SetItem(rl, pos, m);
       
   264 		pos++;
       
   265 	}
       
   266 
       
   267 nomem:
       
   268 	free(a);
       
   269 	free(b);
       
   270 	free(l.base);
       
   271 	return rl ? rl : PyErr_NoMemory();
       
   272 }
       
   273 
       
   274 static PyObject *bdiff(PyObject *self, PyObject *args)
       
   275 {
       
   276 	PyObject *sa, *sb, *result = NULL;
       
   277 	struct line *al, *bl;
       
   278 	struct hunklist l;
       
   279 	struct hunk *h;
       
   280 	char encode[12], *rb;
       
   281 	int an, bn, len = 0, la = 0, lb = 0;
       
   282 
       
   283 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
       
   284 		return NULL;
       
   285 
       
   286 	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
       
   287 	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
       
   288 	if (!al || !bl)
       
   289 		goto nomem;
       
   290 
       
   291 	l = diff(al, an, bl, bn);
       
   292 	if (!l.head)
       
   293 		goto nomem;
   243 
   294 
   244 	/* calculate length of output */
   295 	/* calculate length of output */
   245 	for(h = l.base; h != l.head; h++) {
   296 	for(h = l.base; h != l.head; h++) {
   246 		if (h->a1 != la || h->b1 != lb)
   297 		if (h->a1 != la || h->b1 != lb)
   247 			len += 12 + bl[h->b1].l - bl[lb].l;
   298 			len += 12 + bl[h->b1].l - bl[lb].l;
   272 	}
   323 	}
   273 
   324 
   274 nomem:
   325 nomem:
   275 	free(al);
   326 	free(al);
   276 	free(bl);
   327 	free(bl);
   277 	free(jpos);
       
   278 	free(jlen);
       
   279 	free(l.base);
   328 	free(l.base);
   280 	return result ? result : PyErr_NoMemory();
   329 	return result ? result : PyErr_NoMemory();
   281 }
   330 }
   282 
   331 
   283 static char mdiff_doc[] = "Efficient binary diff.";
   332 static char mdiff_doc[] = "Efficient binary diff.";
   284 
   333 
   285 static PyMethodDef methods[] = {
   334 static PyMethodDef methods[] = {
   286 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
   335 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
       
   336 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
   287 	{NULL, NULL}
   337 	{NULL, NULL}
   288 };
   338 };
   289 
   339 
   290 PyMODINIT_FUNC initbdiff(void)
   340 PyMODINIT_FUNC initbdiff(void)
   291 {
   341 {