comparison contrib/hgdiff @ 1636:7da32bb3d1d3

contrib: add Chris Mason's stand-alone diff tool This uses Mercurial's diff algorithm to generate unidiffs like the traditional diff tool.
author Matt Mackall <mpm@selenic.com>
date Tue, 24 Jan 2006 14:49:19 +1300
parents
children e7e6504c4989
comparison
equal deleted inserted replaced
1635:ae61937c61c5 1636:7da32bb3d1d3
1 #!/usr/bin/env python
2
3 import os, sys, struct, stat
4 import difflib
5 import re
6 from optparse import OptionParser
7 from mercurial.bdiff import bdiff, blocks
8
9 VERSION="0.2"
10 usage = "usage: %prog [options] file1 file2"
11 parser = OptionParser(usage=usage)
12
13 parser.add_option("-d", "--difflib", action="store_true", default=False)
14 parser.add_option('-x', '--count', default=1)
15 parser.add_option('-c', '--context', type="int", default=3)
16 parser.add_option('-p', '--show-c-function', action="store_true", default=False)
17 parser.add_option('-w', '--ignore-all-space', action="store_true",
18 default=False)
19
20 (options, args) = parser.parse_args()
21
22 if not args:
23 parser.print_help()
24 sys.exit(1)
25
26 # somewhat self contained replacement for difflib.unified_diff
27 # t1 and t2 are the text to be diffed
28 # l1 and l2 are the text broken up into lines
29 # header1 and header2 are the filenames for the diff output
30 # context is the number of context lines
31 # showfunc enables diff -p output
32 # ignorews ignores all whitespace changes in the diff
33 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
34 ignorews=False):
35 def contextend(l, len):
36 ret = l + context
37 if ret > len:
38 ret = len
39 return ret
40
41 def contextstart(l):
42 ret = l - context
43 if ret < 0:
44 return 0
45 return ret
46
47 def yieldhunk(hunk, header):
48 if header:
49 for x in header:
50 yield x
51 (astart, a2, bstart, b2, delta) = hunk
52 aend = contextend(a2, len(l1))
53 alen = aend - astart
54 blen = b2 - bstart + aend - a2
55
56 func = ""
57 if showfunc:
58 # walk backwards from the start of the context
59 # to find a line starting with an alphanumeric char.
60 for x in xrange(astart, -1, -1):
61 t = l1[x]
62 if funcre.match(t):
63 func = ' ' + t[:40]
64 break
65
66 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
67 bstart + 1, blen, func)
68 for x in delta:
69 yield x
70 for x in xrange(a2, aend):
71 yield ' ' + l1[x]
72
73 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
74
75 if showfunc:
76 funcre = re.compile('\w')
77 if ignorews:
78 wsre = re.compile('[ \t]')
79
80 # bdiff.blocks gives us the matching sequences in the files. The loop
81 # below finds the spaces between those matching sequences and translates
82 # them into diff output.
83 #
84 diff = blocks(t1, t2)
85 hunk = None
86 for i in xrange(len(diff)):
87 # The first match is special.
88 # we've either found a match starting at line 0 or a match later
89 # in the file. If it starts later, old and new below will both be
90 # empty and we'll continue to the next match.
91 if i > 0:
92 s = diff[i-1]
93 else:
94 s = [0, 0, 0, 0]
95 delta = []
96 s1 = diff[i]
97 a1 = s[1]
98 a2 = s1[0]
99 b1 = s[3]
100 b2 = s1[2]
101 old = l1[a1:a2]
102 new = l2[b1:b2]
103
104 # bdiff sometimes gives huge matches past eof, this check eats them,
105 # and deals with the special first match case described above
106 if not old and not new:
107 continue
108
109 if ignorews:
110 wsold = wsre.sub('', "".join(old))
111 wsnew = wsre.sub('', "".join(new))
112 if wsold == wsnew:
113 continue
114
115 astart = contextstart(a1)
116 bstart = contextstart(b1)
117 prev = None
118 if hunk:
119 # join with the previous hunk if it falls inside the context
120 if astart < hunk[1] + context + 1:
121 prev = hunk
122 astart = hunk[1]
123 bstart = hunk[3]
124 else:
125 for x in yieldhunk(hunk, header):
126 yield x
127 # we only want to yield the header if the files differ, and
128 # we only want to yield it once.
129 header = None
130 if prev:
131 # we've joined the previous hunk, record the new ending points.
132 hunk[1] = a2
133 hunk[3] = b2
134 delta = hunk[4]
135 else:
136 # create a new hunk
137 hunk = [ astart, a2, bstart, b2, delta ]
138
139 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
140 delta[len(delta):] = [ '-' + x for x in old ]
141 delta[len(delta):] = [ '+' + x for x in new ]
142
143 if hunk:
144 for x in yieldhunk(hunk, header):
145 yield x
146
147 # simple utility function to put all the
148 # files from a directory tree into a dict
149 def buildlist(names, top):
150 tlen = len(top)
151 for root, dirs, files in os.walk(top):
152 l = root[tlen + 1:]
153 for x in files:
154 p = os.path.join(root, x)
155 st = os.lstat(p)
156 if stat.S_ISREG(st.st_mode):
157 names[os.path.join(l, x)] = (st.st_dev, st.st_ino)
158
159 def diff_files(file1, file2):
160 if file1 == None:
161 b = file(file2).read().splitlines(1)
162 l1 = "--- %s\n" % (file2)
163 l2 = "+++ %s\n" % (file2)
164 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
165 l = [l1, l2, l3] + ["+" + e for e in b]
166 elif file2 == None:
167 a = file(file1).read().splitlines(1)
168 l1 = "--- %s\n" % (file1)
169 l2 = "+++ %s\n" % (file1)
170 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
171 l = [l1, l2, l3] + ["-" + e for e in a]
172 else:
173 t1 = file(file1).read()
174 t2 = file(file2).read()
175 l1 = t1.splitlines(1)
176 l2 = t2.splitlines(1)
177 if options.difflib:
178 l = difflib.unified_diff(l1, l2, file1, file2)
179 else:
180 l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context,
181 showfunc=options.show_c_function,
182 ignorews=options.ignore_all_space)
183 for x in l:
184 if x[-1] != '\n':
185 x += "\n\ No newline at end of file\n"
186 print x,
187
188 file1 = args[0]
189 file2 = args[1]
190
191 if os.path.isfile(file1) and os.path.isfile(file2):
192 diff_files(file1, file2)
193 elif os.path.isdir(file1):
194 if not os.path.isdir(file2):
195 sys.stderr.write("file types don't match\n")
196 sys.exit(1)
197
198 d1 = {}
199 d2 = {}
200
201 buildlist(d1, file1)
202 buildlist(d2, file2)
203 keys = d1.keys()
204 keys.sort()
205 for x in keys:
206 if x not in d2:
207 f2 = None
208 else:
209 f2 = os.path.join(file2, x)
210 st1 = d1[x]
211 st2 = d2[x]
212 del d2[x]
213 if st1[0] == st2[0] and st1[1] == st2[1]:
214 sys.stderr.write("%s is a hard link\n" % x)
215 continue
216 x = os.path.join(file1, x)
217 diff_files(x, f2)
218 keys = d2.keys()
219 keys.sort()
220 for x in keys:
221 f1 = None
222 x = os.path.join(file2, x)
223 diff_files(f1, x)
224