comparison contrib/simplemerge @ 4358:465b9ea02868

Import 3-way merge code from bzr merge3.py is imported as contrib/simplemerge test_merge3.py is imported as tests/test-simplemerge.py
author Alexis S. L. Carvalho <alexis@cecm.usp.br>
date Mon, 16 Apr 2007 20:17:39 -0300
parents
children 2e3c54fb79a3
comparison
equal deleted inserted replaced
4357:99c853a1408c 4358:465b9ea02868
1 # Copyright (C) 2004, 2005 Canonical Ltd
2 #
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
17
18 # mbp: "you know that thing where cvs gives you conflict markers?"
19 # s: "i hate that."
20
21
22 from bzrlib.errors import CantReprocessAndShowBase
23 import bzrlib.patiencediff
24 from bzrlib.textfile import check_text_lines
25
26
27 def intersect(ra, rb):
28 """Given two ranges return the range where they intersect or None.
29
30 >>> intersect((0, 10), (0, 6))
31 (0, 6)
32 >>> intersect((0, 10), (5, 15))
33 (5, 10)
34 >>> intersect((0, 10), (10, 15))
35 >>> intersect((0, 9), (10, 15))
36 >>> intersect((0, 9), (7, 15))
37 (7, 9)
38 """
39 assert ra[0] <= ra[1]
40 assert rb[0] <= rb[1]
41
42 sa = max(ra[0], rb[0])
43 sb = min(ra[1], rb[1])
44 if sa < sb:
45 return sa, sb
46 else:
47 return None
48
49
50 def compare_range(a, astart, aend, b, bstart, bend):
51 """Compare a[astart:aend] == b[bstart:bend], without slicing.
52 """
53 if (aend-astart) != (bend-bstart):
54 return False
55 for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
56 if a[ia] != b[ib]:
57 return False
58 else:
59 return True
60
61
62
63
64 class Merge3(object):
65 """3-way merge of texts.
66
67 Given BASE, OTHER, THIS, tries to produce a combined text
68 incorporating the changes from both BASE->OTHER and BASE->THIS.
69 All three will typically be sequences of lines."""
70 def __init__(self, base, a, b):
71 check_text_lines(base)
72 check_text_lines(a)
73 check_text_lines(b)
74 self.base = base
75 self.a = a
76 self.b = b
77
78
79
80 def merge_lines(self,
81 name_a=None,
82 name_b=None,
83 name_base=None,
84 start_marker='<<<<<<<',
85 mid_marker='=======',
86 end_marker='>>>>>>>',
87 base_marker=None,
88 reprocess=False):
89 """Return merge in cvs-like form.
90 """
91 newline = '\n'
92 if len(self.a) > 0:
93 if self.a[0].endswith('\r\n'):
94 newline = '\r\n'
95 elif self.a[0].endswith('\r'):
96 newline = '\r'
97 if base_marker and reprocess:
98 raise CantReprocessAndShowBase()
99 if name_a:
100 start_marker = start_marker + ' ' + name_a
101 if name_b:
102 end_marker = end_marker + ' ' + name_b
103 if name_base and base_marker:
104 base_marker = base_marker + ' ' + name_base
105 merge_regions = self.merge_regions()
106 if reprocess is True:
107 merge_regions = self.reprocess_merge_regions(merge_regions)
108 for t in merge_regions:
109 what = t[0]
110 if what == 'unchanged':
111 for i in range(t[1], t[2]):
112 yield self.base[i]
113 elif what == 'a' or what == 'same':
114 for i in range(t[1], t[2]):
115 yield self.a[i]
116 elif what == 'b':
117 for i in range(t[1], t[2]):
118 yield self.b[i]
119 elif what == 'conflict':
120 yield start_marker + newline
121 for i in range(t[3], t[4]):
122 yield self.a[i]
123 if base_marker is not None:
124 yield base_marker + newline
125 for i in range(t[1], t[2]):
126 yield self.base[i]
127 yield mid_marker + newline
128 for i in range(t[5], t[6]):
129 yield self.b[i]
130 yield end_marker + newline
131 else:
132 raise ValueError(what)
133
134
135
136
137
138 def merge_annotated(self):
139 """Return merge with conflicts, showing origin of lines.
140
141 Most useful for debugging merge.
142 """
143 for t in self.merge_regions():
144 what = t[0]
145 if what == 'unchanged':
146 for i in range(t[1], t[2]):
147 yield 'u | ' + self.base[i]
148 elif what == 'a' or what == 'same':
149 for i in range(t[1], t[2]):
150 yield what[0] + ' | ' + self.a[i]
151 elif what == 'b':
152 for i in range(t[1], t[2]):
153 yield 'b | ' + self.b[i]
154 elif what == 'conflict':
155 yield '<<<<\n'
156 for i in range(t[3], t[4]):
157 yield 'A | ' + self.a[i]
158 yield '----\n'
159 for i in range(t[5], t[6]):
160 yield 'B | ' + self.b[i]
161 yield '>>>>\n'
162 else:
163 raise ValueError(what)
164
165
166
167
168
169 def merge_groups(self):
170 """Yield sequence of line groups. Each one is a tuple:
171
172 'unchanged', lines
173 Lines unchanged from base
174
175 'a', lines
176 Lines taken from a
177
178 'same', lines
179 Lines taken from a (and equal to b)
180
181 'b', lines
182 Lines taken from b
183
184 'conflict', base_lines, a_lines, b_lines
185 Lines from base were changed to either a or b and conflict.
186 """
187 for t in self.merge_regions():
188 what = t[0]
189 if what == 'unchanged':
190 yield what, self.base[t[1]:t[2]]
191 elif what == 'a' or what == 'same':
192 yield what, self.a[t[1]:t[2]]
193 elif what == 'b':
194 yield what, self.b[t[1]:t[2]]
195 elif what == 'conflict':
196 yield (what,
197 self.base[t[1]:t[2]],
198 self.a[t[3]:t[4]],
199 self.b[t[5]:t[6]])
200 else:
201 raise ValueError(what)
202
203
204 def merge_regions(self):
205 """Return sequences of matching and conflicting regions.
206
207 This returns tuples, where the first value says what kind we
208 have:
209
210 'unchanged', start, end
211 Take a region of base[start:end]
212
213 'same', astart, aend
214 b and a are different from base but give the same result
215
216 'a', start, end
217 Non-clashing insertion from a[start:end]
218
219 Method is as follows:
220
221 The two sequences align only on regions which match the base
222 and both descendents. These are found by doing a two-way diff
223 of each one against the base, and then finding the
224 intersections between those regions. These "sync regions"
225 are by definition unchanged in both and easily dealt with.
226
227 The regions in between can be in any of three cases:
228 conflicted, or changed on only one side.
229 """
230
231 # section a[0:ia] has been disposed of, etc
232 iz = ia = ib = 0
233
234 for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
235 #print 'match base [%d:%d]' % (zmatch, zend)
236
237 matchlen = zend - zmatch
238 assert matchlen >= 0
239 assert matchlen == (aend - amatch)
240 assert matchlen == (bend - bmatch)
241
242 len_a = amatch - ia
243 len_b = bmatch - ib
244 len_base = zmatch - iz
245 assert len_a >= 0
246 assert len_b >= 0
247 assert len_base >= 0
248
249 #print 'unmatched a=%d, b=%d' % (len_a, len_b)
250
251 if len_a or len_b:
252 # try to avoid actually slicing the lists
253 equal_a = compare_range(self.a, ia, amatch,
254 self.base, iz, zmatch)
255 equal_b = compare_range(self.b, ib, bmatch,
256 self.base, iz, zmatch)
257 same = compare_range(self.a, ia, amatch,
258 self.b, ib, bmatch)
259
260 if same:
261 yield 'same', ia, amatch
262 elif equal_a and not equal_b:
263 yield 'b', ib, bmatch
264 elif equal_b and not equal_a:
265 yield 'a', ia, amatch
266 elif not equal_a and not equal_b:
267 yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
268 else:
269 raise AssertionError("can't handle a=b=base but unmatched")
270
271 ia = amatch
272 ib = bmatch
273 iz = zmatch
274
275 # if the same part of the base was deleted on both sides
276 # that's OK, we can just skip it.
277
278
279 if matchlen > 0:
280 assert ia == amatch
281 assert ib == bmatch
282 assert iz == zmatch
283
284 yield 'unchanged', zmatch, zend
285 iz = zend
286 ia = aend
287 ib = bend
288
289
290 def reprocess_merge_regions(self, merge_regions):
291 """Where there are conflict regions, remove the agreed lines.
292
293 Lines where both A and B have made the same changes are
294 eliminated.
295 """
296 for region in merge_regions:
297 if region[0] != "conflict":
298 yield region
299 continue
300 type, iz, zmatch, ia, amatch, ib, bmatch = region
301 a_region = self.a[ia:amatch]
302 b_region = self.b[ib:bmatch]
303 matches = bzrlib.patiencediff.PatienceSequenceMatcher(
304 None, a_region, b_region).get_matching_blocks()
305 next_a = ia
306 next_b = ib
307 for region_ia, region_ib, region_len in matches[:-1]:
308 region_ia += ia
309 region_ib += ib
310 reg = self.mismatch_region(next_a, region_ia, next_b,
311 region_ib)
312 if reg is not None:
313 yield reg
314 yield 'same', region_ia, region_len+region_ia
315 next_a = region_ia + region_len
316 next_b = region_ib + region_len
317 reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
318 if reg is not None:
319 yield reg
320
321
322 @staticmethod
323 def mismatch_region(next_a, region_ia, next_b, region_ib):
324 if next_a < region_ia or next_b < region_ib:
325 return 'conflict', None, None, next_a, region_ia, next_b, region_ib
326
327
328 def find_sync_regions(self):
329 """Return a list of sync regions, where both descendents match the base.
330
331 Generates a list of (base1, base2, a1, a2, b1, b2). There is
332 always a zero-length sync region at the end of all the files.
333 """
334
335 ia = ib = 0
336 amatches = bzrlib.patiencediff.PatienceSequenceMatcher(
337 None, self.base, self.a).get_matching_blocks()
338 bmatches = bzrlib.patiencediff.PatienceSequenceMatcher(
339 None, self.base, self.b).get_matching_blocks()
340 len_a = len(amatches)
341 len_b = len(bmatches)
342
343 sl = []
344
345 while ia < len_a and ib < len_b:
346 abase, amatch, alen = amatches[ia]
347 bbase, bmatch, blen = bmatches[ib]
348
349 # there is an unconflicted block at i; how long does it
350 # extend? until whichever one ends earlier.
351 i = intersect((abase, abase+alen), (bbase, bbase+blen))
352 if i:
353 intbase = i[0]
354 intend = i[1]
355 intlen = intend - intbase
356
357 # found a match of base[i[0], i[1]]; this may be less than
358 # the region that matches in either one
359 assert intlen <= alen
360 assert intlen <= blen
361 assert abase <= intbase
362 assert bbase <= intbase
363
364 asub = amatch + (intbase - abase)
365 bsub = bmatch + (intbase - bbase)
366 aend = asub + intlen
367 bend = bsub + intlen
368
369 assert self.base[intbase:intend] == self.a[asub:aend], \
370 (self.base[intbase:intend], self.a[asub:aend])
371
372 assert self.base[intbase:intend] == self.b[bsub:bend]
373
374 sl.append((intbase, intend,
375 asub, aend,
376 bsub, bend))
377
378 # advance whichever one ends first in the base text
379 if (abase + alen) < (bbase + blen):
380 ia += 1
381 else:
382 ib += 1
383
384 intbase = len(self.base)
385 abase = len(self.a)
386 bbase = len(self.b)
387 sl.append((intbase, intbase, abase, abase, bbase, bbase))
388
389 return sl
390
391
392
393 def find_unconflicted(self):
394 """Return a list of ranges in base that are not conflicted."""
395 am = bzrlib.patiencediff.PatienceSequenceMatcher(
396 None, self.base, self.a).get_matching_blocks()
397 bm = bzrlib.patiencediff.PatienceSequenceMatcher(
398 None, self.base, self.b).get_matching_blocks()
399
400 unc = []
401
402 while am and bm:
403 # there is an unconflicted block at i; how long does it
404 # extend? until whichever one ends earlier.
405 a1 = am[0][0]
406 a2 = a1 + am[0][2]
407 b1 = bm[0][0]
408 b2 = b1 + bm[0][2]
409 i = intersect((a1, a2), (b1, b2))
410 if i:
411 unc.append(i)
412
413 if a2 < b2:
414 del am[0]
415 else:
416 del bm[0]
417
418 return unc
419
420
421 def main(argv):
422 # as for diff3 and meld the syntax is "MINE BASE OTHER"
423 a = file(argv[1], 'rt').readlines()
424 base = file(argv[2], 'rt').readlines()
425 b = file(argv[3], 'rt').readlines()
426
427 m3 = Merge3(base, a, b)
428
429 #for sr in m3.find_sync_regions():
430 # print sr
431
432 # sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
433 sys.stdout.writelines(m3.merge_annotated())
434
435
436 if __name__ == '__main__':
437 import sys
438 sys.exit(main(sys.argv))