mercurial/mpatch.c
changeset 72 4a6ab4d80dc4
child 82 7ed96baa7caa
equal deleted inserted replaced
71:47c9a869adee 72:4a6ab4d80dc4
       
     1 /*
       
     2  mpatch.c - efficient binary patching for Mercurial
       
     3 
       
     4  This implements a patch algorithm that's O(m + nlog n) where m is the
       
     5  size of the output and n is the number of patches.
       
     6 
       
     7  Given a list of binary patches, it unpacks each into a hunk list,
       
     8  then combines the hunk lists with a treewise recursion to form a
       
     9  single hunk list. This hunk list is then applied to the original
       
    10  text.
       
    11 
       
    12  The text (or binary) fragments are copied directly from their source
       
    13  Python objects into a preallocated output string to avoid the
       
    14  allocation of intermediate Python objects. Working memory is about 2x
       
    15  the total number of hunks.
       
    16 
       
    17  Copyright 2005 Matt Mackall <mpm@selenic.com>
       
    18 
       
    19  This software may be used and distributed according to the terms
       
    20  of the GNU General Public License, incorporated herein by reference.
       
    21 */
       
    22 
       
    23 #include <Python.h>
       
    24 #include <stdlib.h>
       
    25 #include <string.h>
       
    26 #include <netinet/in.h>
       
    27 #include <sys/types.h>
       
    28 
       
    29 static char mpatch_doc[] = "Efficient binary patching.";
       
    30 
       
    31 struct frag {
       
    32 	int start, end, len;
       
    33 	char *data;
       
    34 };
       
    35 
       
    36 struct flist {
       
    37 	struct frag *base, *head, *tail;
       
    38 };
       
    39 
       
    40 static struct flist *lalloc(int size)
       
    41 {
       
    42 	struct flist *a;
       
    43 
       
    44 	a = malloc(sizeof(struct flist));
       
    45 	a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
       
    46 	return a;
       
    47 }
       
    48 
       
    49 static void lfree(struct flist *a)
       
    50 {
       
    51 	free(a->base);
       
    52 	free(a);
       
    53 }
       
    54 
       
    55 static int lsize(struct flist *a)
       
    56 {
       
    57 	return a->tail - a->head;
       
    58 }
       
    59 
       
    60 /* move hunks in source that are less cut to dest, compensating
       
    61    for changes in offset. the last hunk may be split if necessary.
       
    62 */
       
    63 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
       
    64 {
       
    65 	struct frag *d = dest->tail, *s = src->head;
       
    66 	int postend, c, l;
       
    67 
       
    68 	while (s != src->tail) {
       
    69 		if (s->start + offset >= cut)
       
    70 			goto exit; /* we've gone far enough */
       
    71 
       
    72 		postend = offset + s->start + s->len;
       
    73 		if (postend <= cut) {
       
    74 			/* save this hunk */
       
    75 			offset += s->start + s->len - s->end;
       
    76 			*d++ = *s++;
       
    77 		}
       
    78 		else {
       
    79 			/* break up this hunk */
       
    80 			c = cut - offset;
       
    81 			if (s->end < c)
       
    82 				c = s->end;
       
    83 			l = cut - offset - s->start;
       
    84 			if (s->len < l)
       
    85 				l = s->len;
       
    86 
       
    87 			offset += s->start + l - c;
       
    88 
       
    89 			d->start = s->start;
       
    90 			d->end = c;
       
    91 			d->len = l;
       
    92 			d->data = s->data;
       
    93 			d++;
       
    94 			s->start = c;
       
    95 			s->len = s->len - l;
       
    96 			s->data = s->data + l;
       
    97 
       
    98 			goto exit;
       
    99 		}
       
   100 	}
       
   101 
       
   102  exit:
       
   103 	dest->tail = d;
       
   104 	src->head = s;
       
   105 	return offset;
       
   106 }
       
   107 
       
   108 /* like gather, but with no output list */
       
   109 static int discard(struct flist *src, int cut, int offset)
       
   110 {
       
   111 	struct frag *s = src->head;
       
   112 	int postend, c, l;
       
   113 
       
   114 	while (s != src->tail) {
       
   115 		if (s->start + offset >= cut)
       
   116 			goto exit;
       
   117 
       
   118 		postend = offset + s->start + s->len;
       
   119 		if (postend <= cut) {
       
   120 			offset += s->start + s->len - s->end;
       
   121 			s++;
       
   122 		}
       
   123 		else {
       
   124 			c = cut - offset;
       
   125 			if (s->end < c)
       
   126 				c = s->end;
       
   127 			l = cut - offset - s->start;
       
   128 			if (s->len < l)
       
   129 				l = s->len;
       
   130 
       
   131 			offset += s->start + l - c;
       
   132 			s->start = c;
       
   133 			s->len = s->len - l;
       
   134 			s->data = s->data + l;
       
   135 
       
   136 			goto exit;
       
   137 		}
       
   138 	}
       
   139 
       
   140  exit:
       
   141 	src->head = s;
       
   142 	return offset;
       
   143 }
       
   144 
       
   145 /* combine hunk lists a and b, while adjusting b for offset changes in a/
       
   146    this deletes a and b and returns the resultant list. */
       
   147 static struct flist *combine(struct flist *a, struct flist *b)
       
   148 {
       
   149 	struct flist *c;
       
   150 	struct frag *bh = b->head, *ct;
       
   151 	int offset = 0, post;
       
   152 
       
   153 	c = lalloc((lsize(a) + lsize(b)) * 2);
       
   154 
       
   155 	while (bh != b->tail) {
       
   156 		/* save old hunks */
       
   157 		offset = gather(c, a, bh->start, offset);
       
   158 
       
   159 		/* discard replaced hunks */
       
   160 		post = discard(a, bh->end, offset);
       
   161 
       
   162 		/* insert new hunk */
       
   163 		ct = c->tail;
       
   164 		ct->start = bh->start - offset;
       
   165 		ct->end = bh->end - post;
       
   166 		ct->len = bh->len;
       
   167 		ct->data = bh->data;
       
   168 		c->tail++;
       
   169 		bh++;
       
   170 		offset = post;
       
   171 	}
       
   172 
       
   173 	/* hold on to tail from a */
       
   174 	memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
       
   175 	c->tail += lsize(a);
       
   176 	lfree(a);
       
   177 	lfree(b);
       
   178 	return c;
       
   179 }
       
   180 
       
   181 /* decode a binary patch into a hunk list */
       
   182 static struct flist *decode(char *bin, int len)
       
   183 {
       
   184 	struct flist *l;
       
   185 	struct frag *lt;
       
   186 	char *end = bin + len;
       
   187 
       
   188 	/* assume worst case size, we won't have many of these lists */
       
   189 	l = lalloc(len / 12);
       
   190 	lt = l->tail;
       
   191 
       
   192 	while (bin < end) {
       
   193 		lt->start = ntohl(*(uint32_t *)bin);
       
   194 		lt->end = ntohl(*(uint32_t *)(bin + 4));
       
   195 		lt->len = ntohl(*(uint32_t *)(bin + 8));
       
   196 		lt->data = bin + 12;
       
   197 		bin += 12 + lt->len;
       
   198 		lt++;
       
   199 	}
       
   200 
       
   201 	l->tail = lt;
       
   202 	return l;
       
   203 }
       
   204 
       
   205 /* calculate the size of resultant text */
       
   206 static int calcsize(int len, struct flist *l)
       
   207 {
       
   208 	int outlen = 0, last = 0;
       
   209 	struct frag *f = l->head;
       
   210 
       
   211 	while (f != l->tail) {
       
   212 		outlen += f->start - last;
       
   213 		last = f->end;
       
   214 		outlen += f->len;
       
   215 		f++;
       
   216 	}
       
   217 
       
   218 	outlen += len - last;
       
   219 	return outlen;
       
   220 }
       
   221 
       
   222 static void apply(char *buf, char *orig, int len, struct flist *l)
       
   223 {
       
   224 	struct frag *f = l->head;
       
   225 	int last = 0;
       
   226 	char *p = buf;
       
   227 
       
   228 	while (f != l->tail) {
       
   229 		memcpy(p, orig + last, f->start - last);
       
   230 		p += f->start - last;
       
   231 		memcpy(p, f->data, f->len);
       
   232 		last = f->end;
       
   233 		p += f->len;
       
   234 		f++;
       
   235 	}
       
   236 	memcpy(p, orig + last, len - last);
       
   237 }
       
   238 
       
   239 /* recursively generate a patch of all bins between start and end */
       
   240 static struct flist *fold(PyObject *bins, int start, int end)
       
   241 {
       
   242 	int len;
       
   243 
       
   244 	if (start + 1 == end) {
       
   245 		/* trivial case, output a decoded list */
       
   246 		PyObject *tmp = PyList_GetItem(bins, start);
       
   247 		return decode(PyString_AsString(tmp), PyString_Size(tmp));
       
   248 	}
       
   249 
       
   250 	/* divide and conquer, memory management is elsewhere */
       
   251 	len = (end - start) / 2;
       
   252 	return combine(fold(bins, start, start + len),
       
   253 		       fold(bins, start + len, end));
       
   254 }
       
   255 
       
   256 static PyObject *
       
   257 patches(PyObject *self, PyObject *args)
       
   258 {
       
   259 	PyObject *text, *bins, *result;
       
   260 	struct flist *patch;
       
   261 	char *in, *out;
       
   262 	int len, outlen;
       
   263 
       
   264 	if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
       
   265 		return NULL;
       
   266 
       
   267 	len = PyList_Size(bins);
       
   268 	if (!len) {
       
   269 		/* nothing to do */
       
   270 		Py_INCREF(text);
       
   271 		return text;
       
   272 	}
       
   273 
       
   274 	patch = fold(bins, 0, len);
       
   275 	outlen = calcsize(PyString_Size(text), patch);
       
   276 	result = PyString_FromStringAndSize(NULL, outlen);
       
   277 	in = PyString_AsString(text);
       
   278 	out = PyString_AsString(result);
       
   279 	apply(out, in, PyString_Size(text), patch);
       
   280 	lfree(patch);
       
   281 
       
   282 	return result;
       
   283 }
       
   284 
       
   285 static PyMethodDef methods[] = {
       
   286 	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
       
   287 	{NULL, NULL}
       
   288 };
       
   289 
       
   290 PyMODINIT_FUNC
       
   291 initmpatch(void)
       
   292 {
       
   293 	Py_InitModule3("mpatch", methods, mpatch_doc);
       
   294 }
       
   295