comparison mercurial/mpatch.c @ 72:4a6ab4d80dc4

Add an O(m + nlog n) patching extension
author mpm@selenic.com
date Mon, 16 May 2005 22:08:33 -0800
parents
children 7ed96baa7caa
comparison
equal deleted inserted replaced
71:47c9a869adee 72:4a6ab4d80dc4
1 /*
2 mpatch.c - efficient binary patching for Mercurial
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
6
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
10 text.
11
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
21 */
22
23 #include <Python.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <netinet/in.h>
27 #include <sys/types.h>
28
29 static char mpatch_doc[] = "Efficient binary patching.";
30
31 struct frag {
32 int start, end, len;
33 char *data;
34 };
35
36 struct flist {
37 struct frag *base, *head, *tail;
38 };
39
40 static struct flist *lalloc(int size)
41 {
42 struct flist *a;
43
44 a = malloc(sizeof(struct flist));
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
46 return a;
47 }
48
49 static void lfree(struct flist *a)
50 {
51 free(a->base);
52 free(a);
53 }
54
55 static int lsize(struct flist *a)
56 {
57 return a->tail - a->head;
58 }
59
60 /* move hunks in source that are less cut to dest, compensating
61 for changes in offset. the last hunk may be split if necessary.
62 */
63 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
64 {
65 struct frag *d = dest->tail, *s = src->head;
66 int postend, c, l;
67
68 while (s != src->tail) {
69 if (s->start + offset >= cut)
70 goto exit; /* we've gone far enough */
71
72 postend = offset + s->start + s->len;
73 if (postend <= cut) {
74 /* save this hunk */
75 offset += s->start + s->len - s->end;
76 *d++ = *s++;
77 }
78 else {
79 /* break up this hunk */
80 c = cut - offset;
81 if (s->end < c)
82 c = s->end;
83 l = cut - offset - s->start;
84 if (s->len < l)
85 l = s->len;
86
87 offset += s->start + l - c;
88
89 d->start = s->start;
90 d->end = c;
91 d->len = l;
92 d->data = s->data;
93 d++;
94 s->start = c;
95 s->len = s->len - l;
96 s->data = s->data + l;
97
98 goto exit;
99 }
100 }
101
102 exit:
103 dest->tail = d;
104 src->head = s;
105 return offset;
106 }
107
108 /* like gather, but with no output list */
109 static int discard(struct flist *src, int cut, int offset)
110 {
111 struct frag *s = src->head;
112 int postend, c, l;
113
114 while (s != src->tail) {
115 if (s->start + offset >= cut)
116 goto exit;
117
118 postend = offset + s->start + s->len;
119 if (postend <= cut) {
120 offset += s->start + s->len - s->end;
121 s++;
122 }
123 else {
124 c = cut - offset;
125 if (s->end < c)
126 c = s->end;
127 l = cut - offset - s->start;
128 if (s->len < l)
129 l = s->len;
130
131 offset += s->start + l - c;
132 s->start = c;
133 s->len = s->len - l;
134 s->data = s->data + l;
135
136 goto exit;
137 }
138 }
139
140 exit:
141 src->head = s;
142 return offset;
143 }
144
145 /* combine hunk lists a and b, while adjusting b for offset changes in a/
146 this deletes a and b and returns the resultant list. */
147 static struct flist *combine(struct flist *a, struct flist *b)
148 {
149 struct flist *c;
150 struct frag *bh = b->head, *ct;
151 int offset = 0, post;
152
153 c = lalloc((lsize(a) + lsize(b)) * 2);
154
155 while (bh != b->tail) {
156 /* save old hunks */
157 offset = gather(c, a, bh->start, offset);
158
159 /* discard replaced hunks */
160 post = discard(a, bh->end, offset);
161
162 /* insert new hunk */
163 ct = c->tail;
164 ct->start = bh->start - offset;
165 ct->end = bh->end - post;
166 ct->len = bh->len;
167 ct->data = bh->data;
168 c->tail++;
169 bh++;
170 offset = post;
171 }
172
173 /* hold on to tail from a */
174 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
175 c->tail += lsize(a);
176 lfree(a);
177 lfree(b);
178 return c;
179 }
180
181 /* decode a binary patch into a hunk list */
182 static struct flist *decode(char *bin, int len)
183 {
184 struct flist *l;
185 struct frag *lt;
186 char *end = bin + len;
187
188 /* assume worst case size, we won't have many of these lists */
189 l = lalloc(len / 12);
190 lt = l->tail;
191
192 while (bin < end) {
193 lt->start = ntohl(*(uint32_t *)bin);
194 lt->end = ntohl(*(uint32_t *)(bin + 4));
195 lt->len = ntohl(*(uint32_t *)(bin + 8));
196 lt->data = bin + 12;
197 bin += 12 + lt->len;
198 lt++;
199 }
200
201 l->tail = lt;
202 return l;
203 }
204
205 /* calculate the size of resultant text */
206 static int calcsize(int len, struct flist *l)
207 {
208 int outlen = 0, last = 0;
209 struct frag *f = l->head;
210
211 while (f != l->tail) {
212 outlen += f->start - last;
213 last = f->end;
214 outlen += f->len;
215 f++;
216 }
217
218 outlen += len - last;
219 return outlen;
220 }
221
222 static void apply(char *buf, char *orig, int len, struct flist *l)
223 {
224 struct frag *f = l->head;
225 int last = 0;
226 char *p = buf;
227
228 while (f != l->tail) {
229 memcpy(p, orig + last, f->start - last);
230 p += f->start - last;
231 memcpy(p, f->data, f->len);
232 last = f->end;
233 p += f->len;
234 f++;
235 }
236 memcpy(p, orig + last, len - last);
237 }
238
239 /* recursively generate a patch of all bins between start and end */
240 static struct flist *fold(PyObject *bins, int start, int end)
241 {
242 int len;
243
244 if (start + 1 == end) {
245 /* trivial case, output a decoded list */
246 PyObject *tmp = PyList_GetItem(bins, start);
247 return decode(PyString_AsString(tmp), PyString_Size(tmp));
248 }
249
250 /* divide and conquer, memory management is elsewhere */
251 len = (end - start) / 2;
252 return combine(fold(bins, start, start + len),
253 fold(bins, start + len, end));
254 }
255
256 static PyObject *
257 patches(PyObject *self, PyObject *args)
258 {
259 PyObject *text, *bins, *result;
260 struct flist *patch;
261 char *in, *out;
262 int len, outlen;
263
264 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
265 return NULL;
266
267 len = PyList_Size(bins);
268 if (!len) {
269 /* nothing to do */
270 Py_INCREF(text);
271 return text;
272 }
273
274 patch = fold(bins, 0, len);
275 outlen = calcsize(PyString_Size(text), patch);
276 result = PyString_FromStringAndSize(NULL, outlen);
277 in = PyString_AsString(text);
278 out = PyString_AsString(result);
279 apply(out, in, PyString_Size(text), patch);
280 lfree(patch);
281
282 return result;
283 }
284
285 static PyMethodDef methods[] = {
286 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
287 {NULL, NULL}
288 };
289
290 PyMODINIT_FUNC
291 initmpatch(void)
292 {
293 Py_InitModule3("mpatch", methods, mpatch_doc);
294 }
295