comparison mercurial/mdiff.py @ 2873:4ec58b157265

refactor text diff/patch code. rename commands.dodiff to patch.diff. rename commands.doexport to patch.export. move some functions from commands to new mercurial.cmdutil module. turn list of diff options into mdiff.diffopts class. patch.diff and patch.export now has clean api for call from 3rd party python code.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Sat, 12 Aug 2006 16:13:27 -0700
parents 345bac2bc4ec
children 8b02af865990
comparison
equal deleted inserted replaced
2872:5dd6631c8238 2873:4ec58b157265
17 lines.pop() 17 lines.pop()
18 else: 18 else:
19 lines[-1] = lines[-1][:-1] 19 lines[-1] = lines[-1][:-1]
20 return lines 20 return lines
21 21
22 def unidiff(a, ad, b, bd, fn, r=None, text=False, 22 class diffopts(object):
23 showfunc=False, ignorews=False, ignorewsamount=False, 23 '''context is the number of context lines
24 ignoreblanklines=False): 24 text treats all files as text
25 25 showfunc enables diff -p output
26 ignorews ignores all whitespace changes in the diff
27 ignorewsamount ignores changes in the amount of whitespace
28 ignoreblanklines ignores changes whose lines are all blank'''
29
30 defaults = {
31 'context': 3,
32 'text': False,
33 'showfunc': True,
34 'ignorews': False,
35 'ignorewsamount': False,
36 'ignoreblanklines': False,
37 }
38
39 __slots__ = defaults.keys()
40
41 def __init__(self, **opts):
42 for k in self.__slots__:
43 v = opts.get(k)
44 if v is None:
45 v = self.defaults[k]
46 setattr(self, k, v)
47
48 defaultopts = diffopts()
49
50 def unidiff(a, ad, b, bd, fn, r=None, opts=defaultopts):
26 if not a and not b: return "" 51 if not a and not b: return ""
27 epoch = util.datestr((0, 0)) 52 epoch = util.datestr((0, 0))
28 53
29 if not text and (util.binary(a) or util.binary(b)): 54 if not opts.text and (util.binary(a) or util.binary(b)):
30 l = ['Binary file %s has changed\n' % fn] 55 l = ['Binary file %s has changed\n' % fn]
31 elif not a: 56 elif not a:
32 b = splitnewlines(b) 57 b = splitnewlines(b)
33 if a is None: 58 if a is None:
34 l1 = "--- %s\t%s\n" % ("/dev/null", epoch) 59 l1 = "--- %s\t%s\n" % ("/dev/null", epoch)
47 l3 = "@@ -1,%d +0,0 @@\n" % len(a) 72 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
48 l = [l1, l2, l3] + ["-" + e for e in a] 73 l = [l1, l2, l3] + ["-" + e for e in a]
49 else: 74 else:
50 al = splitnewlines(a) 75 al = splitnewlines(a)
51 bl = splitnewlines(b) 76 bl = splitnewlines(b)
52 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn, 77 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn, opts=opts))
53 showfunc=showfunc, ignorews=ignorews,
54 ignorewsamount=ignorewsamount,
55 ignoreblanklines=ignoreblanklines))
56 if not l: return "" 78 if not l: return ""
57 # difflib uses a space, rather than a tab 79 # difflib uses a space, rather than a tab
58 l[0] = "%s\t%s\n" % (l[0][:-2], ad) 80 l[0] = "%s\t%s\n" % (l[0][:-2], ad)
59 l[1] = "%s\t%s\n" % (l[1][:-2], bd) 81 l[1] = "%s\t%s\n" % (l[1][:-2], bd)
60 82
70 92
71 # somewhat self contained replacement for difflib.unified_diff 93 # somewhat self contained replacement for difflib.unified_diff
72 # t1 and t2 are the text to be diffed 94 # t1 and t2 are the text to be diffed
73 # l1 and l2 are the text broken up into lines 95 # l1 and l2 are the text broken up into lines
74 # header1 and header2 are the filenames for the diff output 96 # header1 and header2 are the filenames for the diff output
75 # context is the number of context lines 97 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
76 # showfunc enables diff -p output
77 # ignorews ignores all whitespace changes in the diff
78 # ignorewsamount ignores changes in the amount of whitespace
79 # ignoreblanklines ignores changes whose lines are all blank
80 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
81 ignorews=False, ignorewsamount=False, ignoreblanklines=False):
82 def contextend(l, len): 98 def contextend(l, len):
83 ret = l + context 99 ret = l + opts.context
84 if ret > len: 100 if ret > len:
85 ret = len 101 ret = len
86 return ret 102 return ret
87 103
88 def contextstart(l): 104 def contextstart(l):
89 ret = l - context 105 ret = l - opts.context
90 if ret < 0: 106 if ret < 0:
91 return 0 107 return 0
92 return ret 108 return ret
93 109
94 def yieldhunk(hunk, header): 110 def yieldhunk(hunk, header):
99 aend = contextend(a2, len(l1)) 115 aend = contextend(a2, len(l1))
100 alen = aend - astart 116 alen = aend - astart
101 blen = b2 - bstart + aend - a2 117 blen = b2 - bstart + aend - a2
102 118
103 func = "" 119 func = ""
104 if showfunc: 120 if opts.showfunc:
105 # walk backwards from the start of the context 121 # walk backwards from the start of the context
106 # to find a line starting with an alphanumeric char. 122 # to find a line starting with an alphanumeric char.
107 for x in xrange(astart, -1, -1): 123 for x in xrange(astart, -1, -1):
108 t = l1[x].rstrip() 124 t = l1[x].rstrip()
109 if funcre.match(t): 125 if funcre.match(t):
117 for x in xrange(a2, aend): 133 for x in xrange(a2, aend):
118 yield ' ' + l1[x] 134 yield ' ' + l1[x]
119 135
120 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ] 136 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
121 137
122 if showfunc: 138 if opts.showfunc:
123 funcre = re.compile('\w') 139 funcre = re.compile('\w')
124 if ignorewsamount: 140 if opts.ignorewsamount:
125 wsamountre = re.compile('[ \t]+') 141 wsamountre = re.compile('[ \t]+')
126 wsappendedre = re.compile(' \n') 142 wsappendedre = re.compile(' \n')
127 if ignoreblanklines: 143 if opts.ignoreblanklines:
128 wsblanklinesre = re.compile('\n') 144 wsblanklinesre = re.compile('\n')
129 if ignorews: 145 if opts.ignorews:
130 wsre = re.compile('[ \t]') 146 wsre = re.compile('[ \t]')
131 147
132 # bdiff.blocks gives us the matching sequences in the files. The loop 148 # bdiff.blocks gives us the matching sequences in the files. The loop
133 # below finds the spaces between those matching sequences and translates 149 # below finds the spaces between those matching sequences and translates
134 # them into diff output. 150 # them into diff output.
157 # bdiff sometimes gives huge matches past eof, this check eats them, 173 # bdiff sometimes gives huge matches past eof, this check eats them,
158 # and deals with the special first match case described above 174 # and deals with the special first match case described above
159 if not old and not new: 175 if not old and not new:
160 continue 176 continue
161 177
162 if ignoreblanklines: 178 if opts.ignoreblanklines:
163 wsold = wsblanklinesre.sub('', "".join(old)) 179 wsold = wsblanklinesre.sub('', "".join(old))
164 wsnew = wsblanklinesre.sub('', "".join(new)) 180 wsnew = wsblanklinesre.sub('', "".join(new))
165 if wsold == wsnew: 181 if wsold == wsnew:
166 continue 182 continue
167 183
168 if ignorewsamount: 184 if opts.ignorewsamount:
169 wsold = wsamountre.sub(' ', "".join(old)) 185 wsold = wsamountre.sub(' ', "".join(old))
170 wsold = wsappendedre.sub('\n', wsold) 186 wsold = wsappendedre.sub('\n', wsold)
171 wsnew = wsamountre.sub(' ', "".join(new)) 187 wsnew = wsamountre.sub(' ', "".join(new))
172 wsnew = wsappendedre.sub('\n', wsnew) 188 wsnew = wsappendedre.sub('\n', wsnew)
173 if wsold == wsnew: 189 if wsold == wsnew:
174 continue 190 continue
175 191
176 if ignorews: 192 if opts.ignorews:
177 wsold = wsre.sub('', "".join(old)) 193 wsold = wsre.sub('', "".join(old))
178 wsnew = wsre.sub('', "".join(new)) 194 wsnew = wsre.sub('', "".join(new))
179 if wsold == wsnew: 195 if wsold == wsnew:
180 continue 196 continue
181 197
182 astart = contextstart(a1) 198 astart = contextstart(a1)
183 bstart = contextstart(b1) 199 bstart = contextstart(b1)
184 prev = None 200 prev = None
185 if hunk: 201 if hunk:
186 # join with the previous hunk if it falls inside the context 202 # join with the previous hunk if it falls inside the context
187 if astart < hunk[1] + context + 1: 203 if astart < hunk[1] + opts.context + 1:
188 prev = hunk 204 prev = hunk
189 astart = hunk[1] 205 astart = hunk[1]
190 bstart = hunk[3] 206 bstart = hunk[3]
191 else: 207 else:
192 for x in yieldhunk(hunk, header): 208 for x in yieldhunk(hunk, header):