Mercurial > hg > mercurial-crew-with-dirclash
comparison contrib/hgdiff @ 1636:7da32bb3d1d3
contrib: add Chris Mason's stand-alone diff tool
This uses Mercurial's diff algorithm to generate unidiffs like the traditional diff tool.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Tue, 24 Jan 2006 14:49:19 +1300 |
parents | |
children | e7e6504c4989 |
comparison
equal
deleted
inserted
replaced
1635:ae61937c61c5 | 1636:7da32bb3d1d3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os, sys, struct, stat | |
4 import difflib | |
5 import re | |
6 from optparse import OptionParser | |
7 from mercurial.bdiff import bdiff, blocks | |
8 | |
9 VERSION="0.2" | |
10 usage = "usage: %prog [options] file1 file2" | |
11 parser = OptionParser(usage=usage) | |
12 | |
13 parser.add_option("-d", "--difflib", action="store_true", default=False) | |
14 parser.add_option('-x', '--count', default=1) | |
15 parser.add_option('-c', '--context', type="int", default=3) | |
16 parser.add_option('-p', '--show-c-function', action="store_true", default=False) | |
17 parser.add_option('-w', '--ignore-all-space', action="store_true", | |
18 default=False) | |
19 | |
20 (options, args) = parser.parse_args() | |
21 | |
22 if not args: | |
23 parser.print_help() | |
24 sys.exit(1) | |
25 | |
26 # somewhat self contained replacement for difflib.unified_diff | |
27 # t1 and t2 are the text to be diffed | |
28 # l1 and l2 are the text broken up into lines | |
29 # header1 and header2 are the filenames for the diff output | |
30 # context is the number of context lines | |
31 # showfunc enables diff -p output | |
32 # ignorews ignores all whitespace changes in the diff | |
33 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False, | |
34 ignorews=False): | |
35 def contextend(l, len): | |
36 ret = l + context | |
37 if ret > len: | |
38 ret = len | |
39 return ret | |
40 | |
41 def contextstart(l): | |
42 ret = l - context | |
43 if ret < 0: | |
44 return 0 | |
45 return ret | |
46 | |
47 def yieldhunk(hunk, header): | |
48 if header: | |
49 for x in header: | |
50 yield x | |
51 (astart, a2, bstart, b2, delta) = hunk | |
52 aend = contextend(a2, len(l1)) | |
53 alen = aend - astart | |
54 blen = b2 - bstart + aend - a2 | |
55 | |
56 func = "" | |
57 if showfunc: | |
58 # walk backwards from the start of the context | |
59 # to find a line starting with an alphanumeric char. | |
60 for x in xrange(astart, -1, -1): | |
61 t = l1[x] | |
62 if funcre.match(t): | |
63 func = ' ' + t[:40] | |
64 break | |
65 | |
66 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, | |
67 bstart + 1, blen, func) | |
68 for x in delta: | |
69 yield x | |
70 for x in xrange(a2, aend): | |
71 yield ' ' + l1[x] | |
72 | |
73 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ] | |
74 | |
75 if showfunc: | |
76 funcre = re.compile('\w') | |
77 if ignorews: | |
78 wsre = re.compile('[ \t]') | |
79 | |
80 # bdiff.blocks gives us the matching sequences in the files. The loop | |
81 # below finds the spaces between those matching sequences and translates | |
82 # them into diff output. | |
83 # | |
84 diff = blocks(t1, t2) | |
85 hunk = None | |
86 for i in xrange(len(diff)): | |
87 # The first match is special. | |
88 # we've either found a match starting at line 0 or a match later | |
89 # in the file. If it starts later, old and new below will both be | |
90 # empty and we'll continue to the next match. | |
91 if i > 0: | |
92 s = diff[i-1] | |
93 else: | |
94 s = [0, 0, 0, 0] | |
95 delta = [] | |
96 s1 = diff[i] | |
97 a1 = s[1] | |
98 a2 = s1[0] | |
99 b1 = s[3] | |
100 b2 = s1[2] | |
101 old = l1[a1:a2] | |
102 new = l2[b1:b2] | |
103 | |
104 # bdiff sometimes gives huge matches past eof, this check eats them, | |
105 # and deals with the special first match case described above | |
106 if not old and not new: | |
107 continue | |
108 | |
109 if ignorews: | |
110 wsold = wsre.sub('', "".join(old)) | |
111 wsnew = wsre.sub('', "".join(new)) | |
112 if wsold == wsnew: | |
113 continue | |
114 | |
115 astart = contextstart(a1) | |
116 bstart = contextstart(b1) | |
117 prev = None | |
118 if hunk: | |
119 # join with the previous hunk if it falls inside the context | |
120 if astart < hunk[1] + context + 1: | |
121 prev = hunk | |
122 astart = hunk[1] | |
123 bstart = hunk[3] | |
124 else: | |
125 for x in yieldhunk(hunk, header): | |
126 yield x | |
127 # we only want to yield the header if the files differ, and | |
128 # we only want to yield it once. | |
129 header = None | |
130 if prev: | |
131 # we've joined the previous hunk, record the new ending points. | |
132 hunk[1] = a2 | |
133 hunk[3] = b2 | |
134 delta = hunk[4] | |
135 else: | |
136 # create a new hunk | |
137 hunk = [ astart, a2, bstart, b2, delta ] | |
138 | |
139 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ] | |
140 delta[len(delta):] = [ '-' + x for x in old ] | |
141 delta[len(delta):] = [ '+' + x for x in new ] | |
142 | |
143 if hunk: | |
144 for x in yieldhunk(hunk, header): | |
145 yield x | |
146 | |
147 # simple utility function to put all the | |
148 # files from a directory tree into a dict | |
149 def buildlist(names, top): | |
150 tlen = len(top) | |
151 for root, dirs, files in os.walk(top): | |
152 l = root[tlen + 1:] | |
153 for x in files: | |
154 p = os.path.join(root, x) | |
155 st = os.lstat(p) | |
156 if stat.S_ISREG(st.st_mode): | |
157 names[os.path.join(l, x)] = (st.st_dev, st.st_ino) | |
158 | |
159 def diff_files(file1, file2): | |
160 if file1 == None: | |
161 b = file(file2).read().splitlines(1) | |
162 l1 = "--- %s\n" % (file2) | |
163 l2 = "+++ %s\n" % (file2) | |
164 l3 = "@@ -0,0 +1,%d @@\n" % len(b) | |
165 l = [l1, l2, l3] + ["+" + e for e in b] | |
166 elif file2 == None: | |
167 a = file(file1).read().splitlines(1) | |
168 l1 = "--- %s\n" % (file1) | |
169 l2 = "+++ %s\n" % (file1) | |
170 l3 = "@@ -1,%d +0,0 @@\n" % len(a) | |
171 l = [l1, l2, l3] + ["-" + e for e in a] | |
172 else: | |
173 t1 = file(file1).read() | |
174 t2 = file(file2).read() | |
175 l1 = t1.splitlines(1) | |
176 l2 = t2.splitlines(1) | |
177 if options.difflib: | |
178 l = difflib.unified_diff(l1, l2, file1, file2) | |
179 else: | |
180 l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context, | |
181 showfunc=options.show_c_function, | |
182 ignorews=options.ignore_all_space) | |
183 for x in l: | |
184 if x[-1] != '\n': | |
185 x += "\n\ No newline at end of file\n" | |
186 print x, | |
187 | |
188 file1 = args[0] | |
189 file2 = args[1] | |
190 | |
191 if os.path.isfile(file1) and os.path.isfile(file2): | |
192 diff_files(file1, file2) | |
193 elif os.path.isdir(file1): | |
194 if not os.path.isdir(file2): | |
195 sys.stderr.write("file types don't match\n") | |
196 sys.exit(1) | |
197 | |
198 d1 = {} | |
199 d2 = {} | |
200 | |
201 buildlist(d1, file1) | |
202 buildlist(d2, file2) | |
203 keys = d1.keys() | |
204 keys.sort() | |
205 for x in keys: | |
206 if x not in d2: | |
207 f2 = None | |
208 else: | |
209 f2 = os.path.join(file2, x) | |
210 st1 = d1[x] | |
211 st2 = d2[x] | |
212 del d2[x] | |
213 if st1[0] == st2[0] and st1[1] == st2[1]: | |
214 sys.stderr.write("%s is a hard link\n" % x) | |
215 continue | |
216 x = os.path.join(file1, x) | |
217 diff_files(x, f2) | |
218 keys = d2.keys() | |
219 keys.sort() | |
220 for x in keys: | |
221 f1 = None | |
222 x = os.path.join(file2, x) | |
223 diff_files(f1, x) | |
224 |