annotate mercurial/mdiff.py @ 120:bae6f0328f63

Add a function to return the new text from a binary diff
author mpm@selenic.com
date Fri, 20 May 2005 17:42:29 -0800
parents b942bbe4bb84
children 8913e13196e1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
1 #!/usr/bin/python
71
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
2 import difflib, struct, mmap
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
3
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
4 devzero = file("/dev/zero")
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
5
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
6 def unidiff(a, ad, b, bd, fn):
35
9197c26a414b unidiff: punt on comparing empty files
mpm@selenic.com
parents: 0
diff changeset
7 if not a and not b: return ""
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
8 a = a.splitlines(1)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
9 b = b.splitlines(1)
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
10 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
11 return "".join(l)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
12
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
13 def textdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
14 return diff(a.splitlines(1), b.splitlines(1))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
15
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
16 def sortdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
17 la = lb = 0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
18
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
19 while 1:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
20 if la >= len(a) or lb >= len(b): break
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
21 if b[lb] < a[la]:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
22 si = lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
23 while lb < len(b) and b[lb] < a[la] : lb += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
24 yield "insert", la, la, si, lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
25 elif a[la] < b[lb]:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
26 si = la
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
27 while la < len(a) and a[la] < b[lb]: la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
28 yield "delete", si, la, lb, lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
29 else:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
30 la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
31 lb += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
32
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
33 if lb < len(b):
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
34 yield "insert", la, la, lb, len(b)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
35
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
36 if la < len(a):
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
37 yield "delete", la, len(a), lb, lb
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
38
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
39 def diff(a, b, sorted=0):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
40 bin = []
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
41 p = [0]
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
42 for i in a: p.append(p[-1] + len(i))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
43
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
44 if sorted:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
45 d = sortdiff(a, b)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
46 else:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
47 d = difflib.SequenceMatcher(None, a, b).get_opcodes()
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
48
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
49 for o, m, n, s, t in d:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
50 if o == 'equal': continue
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
51 s = "".join(b[s:t])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
52 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
53
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
54 return "".join(bin)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
55
120
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
56 def patchtext(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
57 pos = 0
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
58 t = []
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
59 while pos < len(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
60 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
61 pos += 12
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
62 t.append(bin[pos:pos + l])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
63 pos += l
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
64 return "".join(t)
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
65
71
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
66 # This attempts to apply a series of patches in time proportional to
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
67 # the total size of the patches, rather than patches * len(text). This
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
68 # means rather than shuffling strings around, we shuffle around
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
69 # pointers to fragments with fragment lists.
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
70 #
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
71 # When the fragment lists get too long, we collapse them. To do this
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
72 # efficiently, we do all our operations inside a buffer created by
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
73 # mmap and simply use memmove. This avoids creating a bunch of large
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
74 # temporary string buffers.
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
75
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
76 def patches(a, bins):
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
77 if not bins: return a
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
78
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
79 plens = [len(x) for x in bins]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
80 pl = sum(plens)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
81 bl = len(a) + pl
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
82 tl = bl + bl + pl # enough for the patches and two working texts
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
83 b1, b2 = 0, bl
75
b942bbe4bb84 Fix a bug in patches() if there's not text and no patch
mpm@selenic.com
parents: 72
diff changeset
84
b942bbe4bb84 Fix a bug in patches() if there's not text and no patch
mpm@selenic.com
parents: 72
diff changeset
85 if not tl: return a
b942bbe4bb84 Fix a bug in patches() if there's not text and no patch
mpm@selenic.com
parents: 72
diff changeset
86
71
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
87 m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
88
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
89 # load our original text
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
90 m.write(a)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
91 frags = [(len(a), b1)]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
92
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
93 # copy all the patches into our segment so we can memmove from them
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
94 pos = b2 + bl
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
95 m.seek(pos)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
96 for p in bins: m.write(p)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
97
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
98 def pull(dst, src, l): # pull l bytes from src
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
99 while l:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
100 f = src.pop(0)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
101 if f[0] > l: # do we need to split?
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
102 src.insert(0, (f[0] - l, f[1] + l))
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
103 dst.append((l, f[1]))
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
104 return
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
105 dst.append(f)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
106 l -= f[0]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
107
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
108 def collect(buf, list):
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
109 start = buf
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
110 for l, p in list:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
111 m.move(buf, p, l)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
112 buf += l
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
113 return (buf - start, start)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
114
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
115 for plen in plens:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
116 # if our list gets too long, execute it
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
117 if len(frags) > 128:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
118 b2, b1 = b1, b2
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
119 frags = [collect(b1, frags)]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
120
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
121 new = []
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
122 end = pos + plen
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
123 last = 0
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
124 while pos < end:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
125 p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
126 pull(new, frags, p1 - last) # what didn't change
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
127 pull([], frags, p2 - p1) # what got deleted
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
128 new.append((l, pos + 12)) # what got added
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
129 pos += l + 12
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
130 last = p2
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
131 frags = new + frags # what was left at the end
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
132
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
133 t = collect(b2, frags)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
134
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
135 return m[t[1]:t[1] + t[0]]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
136
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
137 def patch(a, bin):
72
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
138 return patches(a, [bin])
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
139
72
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
140 try:
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
141 import mpatch
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
142 patches = mpatch.patches
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
143 except:
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
144 pass