Mercurial > hg > mercurial-crew-with-dirclash
comparison mercurial/mdiff.py @ 184:697f05bfe976
Improved binary diff from Christopher Li
This is more intelligent/efficient by combining neighboring inserts,
replaces and deletes. Passes test of converting kernel repo, but
doesn't appear to substantially affect compression or performance.
author | mpm@selenic.com |
---|---|
date | Fri, 27 May 2005 19:38:34 -0800 |
parents | e6c621a825f2 |
children | 75840796e8e2 afe895fcc0d0 |
comparison
equal
deleted
inserted
replaced
183:767916673e16 | 184:697f05bfe976 |
---|---|
17 def textdiff(a, b): | 17 def textdiff(a, b): |
18 return diff(a.splitlines(1), b.splitlines(1)) | 18 return diff(a.splitlines(1), b.splitlines(1)) |
19 | 19 |
20 def sortdiff(a, b): | 20 def sortdiff(a, b): |
21 la = lb = 0 | 21 la = lb = 0 |
22 | 22 lena = len(a) |
23 lenb = len(b) | |
23 while 1: | 24 while 1: |
24 if la >= len(a) or lb >= len(b): break | 25 am, bm, = la, lb |
25 if b[lb] < a[la]: | 26 while lb < lenb and la < len and a[la] == b[lb] : |
26 si = lb | |
27 while lb < len(b) and b[lb] < a[la] : lb += 1 | |
28 yield "insert", la, la, si, lb | |
29 elif a[la] < b[lb]: | |
30 si = la | |
31 while la < len(a) and a[la] < b[lb]: la += 1 | |
32 yield "delete", si, la, lb, lb | |
33 else: | |
34 la += 1 | 27 la += 1 |
35 lb += 1 | 28 lb += 1 |
36 | 29 if la>am: yield (am, bm, la-am) |
37 if lb < len(b): | 30 while lb < lenb and b[lb] < a[la]: lb += 1 |
38 yield "insert", la, la, lb, len(b) | 31 if lb>=lenb: break |
39 | 32 while la < lena and b[lb] > a[la]: la += 1 |
40 if la < len(a): | 33 if la>=lena: break |
41 yield "delete", la, len(a), lb, lb | 34 yield (lena, lenb, 0) |
42 | 35 |
43 def diff(a, b, sorted=0): | 36 def diff(a, b, sorted=0): |
37 if not a: | |
38 s = "".join(b) | |
39 return s and (struct.pack(">lll", 0, 0, len(s)) + s) | |
40 | |
44 bin = [] | 41 bin = [] |
45 p = [0] | 42 p = [0] |
46 for i in a: p.append(p[-1] + len(i)) | 43 for i in a: p.append(p[-1] + len(i)) |
47 | 44 |
48 if sorted: | 45 if sorted: |
49 d = sortdiff(a, b) | 46 d = sortdiff(a, b) |
50 else: | 47 else: |
51 d = difflib.SequenceMatcher(None, a, b).get_opcodes() | 48 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
52 | 49 la = 0 |
53 for o, m, n, s, t in d: | 50 lb = 0 |
54 if o == 'equal': continue | 51 for am, bm, size in d: |
55 s = "".join(b[s:t]) | 52 s = "".join(b[lb:bm]) |
56 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) | 53 if am > la or s: |
57 | 54 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
55 la = am + size | |
56 lb = bm + size | |
57 | |
58 return "".join(bin) | 58 return "".join(bin) |
59 | 59 |
60 def patchtext(bin): | 60 def patchtext(bin): |
61 pos = 0 | 61 pos = 0 |
62 t = [] | 62 t = [] |