contrib/hgdiff
changeset 1636 7da32bb3d1d3
child 1644 e7e6504c4989
equal deleted inserted replaced
1635:ae61937c61c5 1636:7da32bb3d1d3
       
     1 #!/usr/bin/env python
       
     2 
       
     3 import os, sys, struct, stat
       
     4 import difflib
       
     5 import re
       
     6 from optparse import OptionParser
       
     7 from mercurial.bdiff import bdiff, blocks
       
     8 
       
     9 VERSION="0.2"
       
    10 usage = "usage: %prog [options] file1 file2"
       
    11 parser = OptionParser(usage=usage)
       
    12 
       
    13 parser.add_option("-d", "--difflib", action="store_true", default=False)
       
    14 parser.add_option('-x', '--count', default=1)
       
    15 parser.add_option('-c', '--context', type="int", default=3)
       
    16 parser.add_option('-p', '--show-c-function', action="store_true", default=False)
       
    17 parser.add_option('-w', '--ignore-all-space', action="store_true", 
       
    18                   default=False)
       
    19 
       
    20 (options, args) = parser.parse_args()
       
    21 
       
    22 if not args:
       
    23     parser.print_help()
       
    24     sys.exit(1)
       
    25 
       
    26 # somewhat self contained replacement for difflib.unified_diff
       
    27 # t1 and t2 are the text to be diffed
       
    28 # l1 and l2 are the text broken up into lines
       
    29 # header1 and header2 are the filenames for the diff output
       
    30 # context is the number of context lines
       
    31 # showfunc enables diff -p output
       
    32 # ignorews ignores all whitespace changes in the diff
       
    33 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False, 
       
    34              ignorews=False):
       
    35     def contextend(l, len):
       
    36         ret = l + context
       
    37         if ret > len:
       
    38             ret = len
       
    39         return ret
       
    40 
       
    41     def contextstart(l):
       
    42         ret = l - context
       
    43         if ret < 0:
       
    44             return 0
       
    45         return ret
       
    46 
       
    47     def yieldhunk(hunk, header):
       
    48         if header:
       
    49             for x in header:
       
    50                 yield x
       
    51         (astart, a2, bstart, b2, delta) = hunk
       
    52         aend = contextend(a2, len(l1))
       
    53         alen = aend - astart
       
    54         blen = b2 - bstart + aend - a2
       
    55 
       
    56         func = ""
       
    57         if showfunc:
       
    58             # walk backwards from the start of the context
       
    59             # to find a line starting with an alphanumeric char.
       
    60             for x in xrange(astart, -1, -1):
       
    61                 t = l1[x]
       
    62                 if funcre.match(t):
       
    63                     func = ' ' + t[:40]
       
    64                     break
       
    65             
       
    66         yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, 
       
    67                                            bstart + 1, blen, func)
       
    68         for x in delta:
       
    69             yield x
       
    70         for x in xrange(a2, aend):
       
    71             yield ' ' + l1[x]
       
    72 
       
    73     header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
       
    74 
       
    75     if showfunc:
       
    76         funcre = re.compile('\w')
       
    77     if ignorews:
       
    78         wsre = re.compile('[ \t]')
       
    79 
       
    80     # bdiff.blocks gives us the matching sequences in the files.  The loop
       
    81     # below finds the spaces between those matching sequences and translates
       
    82     # them into diff output.
       
    83     #
       
    84     diff = blocks(t1, t2)
       
    85     hunk = None
       
    86     for i in xrange(len(diff)):
       
    87         # The first match is special.
       
    88         # we've either found a match starting at line 0 or a match later
       
    89         # in the file.  If it starts later, old and new below will both be
       
    90         # empty and we'll continue to the next match.
       
    91         if i > 0:
       
    92             s = diff[i-1]
       
    93         else:
       
    94             s = [0, 0, 0, 0]
       
    95         delta = []
       
    96         s1 = diff[i]
       
    97         a1 = s[1]
       
    98         a2 = s1[0]
       
    99         b1 = s[3]
       
   100         b2 = s1[2]
       
   101         old = l1[a1:a2]
       
   102         new = l2[b1:b2]
       
   103 
       
   104         # bdiff sometimes gives huge matches past eof, this check eats them,
       
   105         # and deals with the special first match case described above
       
   106         if not old and not new:
       
   107             continue
       
   108 
       
   109         if ignorews:
       
   110             wsold = wsre.sub('', "".join(old))
       
   111             wsnew = wsre.sub('', "".join(new))
       
   112             if wsold == wsnew:
       
   113                 continue
       
   114 
       
   115         astart = contextstart(a1)
       
   116         bstart = contextstart(b1)
       
   117         prev = None
       
   118         if hunk:
       
   119             # join with the previous hunk if it falls inside the context
       
   120             if astart < hunk[1] + context + 1:
       
   121                 prev = hunk
       
   122                 astart = hunk[1]
       
   123                 bstart = hunk[3]
       
   124             else:
       
   125                 for x in yieldhunk(hunk, header):
       
   126                     yield x
       
   127                 # we only want to yield the header if the files differ, and
       
   128                 # we only want to yield it once.
       
   129                 header = None
       
   130         if prev:
       
   131             # we've joined the previous hunk, record the new ending points.
       
   132             hunk[1] = a2
       
   133             hunk[3] = b2
       
   134             delta = hunk[4]
       
   135         else:
       
   136             # create a new hunk
       
   137             hunk = [ astart, a2, bstart, b2, delta ]
       
   138 
       
   139         delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
       
   140         delta[len(delta):] = [ '-' + x for x in old ]
       
   141         delta[len(delta):] = [ '+' + x for x in new ]
       
   142 
       
   143     if hunk:
       
   144         for x in yieldhunk(hunk, header):
       
   145             yield x
       
   146 
       
   147 # simple utility function to put all the
       
   148 # files from a directory tree into a dict
       
   149 def buildlist(names, top):
       
   150     tlen = len(top)
       
   151     for root, dirs, files in os.walk(top):
       
   152         l = root[tlen + 1:]
       
   153         for x in files:
       
   154             p = os.path.join(root, x)
       
   155             st = os.lstat(p)
       
   156             if stat.S_ISREG(st.st_mode):
       
   157                 names[os.path.join(l, x)] = (st.st_dev, st.st_ino)
       
   158 
       
   159 def diff_files(file1, file2):
       
   160     if file1 == None:
       
   161         b = file(file2).read().splitlines(1)
       
   162         l1 = "--- %s\n" % (file2)
       
   163         l2 = "+++ %s\n" % (file2)
       
   164         l3 = "@@ -0,0 +1,%d @@\n" % len(b)
       
   165         l = [l1, l2, l3] + ["+" + e for e in b]
       
   166     elif file2 == None:
       
   167         a = file(file1).read().splitlines(1)
       
   168         l1 = "--- %s\n" % (file1)
       
   169         l2 = "+++ %s\n" % (file1)
       
   170         l3 = "@@ -1,%d +0,0 @@\n" % len(a)
       
   171         l = [l1, l2, l3] + ["-" + e for e in a]
       
   172     else:
       
   173         t1 = file(file1).read()
       
   174         t2 = file(file2).read()
       
   175         l1 = t1.splitlines(1)
       
   176         l2 = t2.splitlines(1)
       
   177         if options.difflib:
       
   178             l = difflib.unified_diff(l1, l2, file1, file2)
       
   179         else:
       
   180             l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context,
       
   181                      showfunc=options.show_c_function,
       
   182                      ignorews=options.ignore_all_space)
       
   183     for x in l:
       
   184         if x[-1] != '\n':
       
   185             x += "\n\ No newline at end of file\n"
       
   186         print x,
       
   187 
       
   188 file1 = args[0]
       
   189 file2 = args[1]
       
   190 
       
   191 if os.path.isfile(file1) and os.path.isfile(file2):
       
   192     diff_files(file1, file2)
       
   193 elif os.path.isdir(file1):
       
   194     if not os.path.isdir(file2):
       
   195         sys.stderr.write("file types don't match\n")
       
   196         sys.exit(1)
       
   197 
       
   198     d1 = {}
       
   199     d2 = {}
       
   200 
       
   201     buildlist(d1, file1)
       
   202     buildlist(d2, file2)
       
   203     keys = d1.keys()
       
   204     keys.sort()
       
   205     for x in keys:
       
   206         if x not in d2:
       
   207             f2 = None
       
   208         else:
       
   209             f2 = os.path.join(file2, x)
       
   210             st1 = d1[x]
       
   211             st2 = d2[x]
       
   212             del d2[x]
       
   213             if st1[0] == st2[0] and st1[1] == st2[1]:
       
   214                 sys.stderr.write("%s is a hard link\n" % x)
       
   215                 continue
       
   216         x = os.path.join(file1, x)
       
   217         diff_files(x, f2)
       
   218     keys = d2.keys()
       
   219     keys.sort()
       
   220     for x in keys:
       
   221         f1 = None
       
   222         x = os.path.join(file2, x)
       
   223         diff_files(f1, x)
       
   224