contrib/hgdiff
author Bryan O'Sullivan <bos@serpentine.com>
Tue, 24 Apr 2007 10:53:25 -0700
changeset 4369 d7ad1e42a368
parent 3398 0f308690bda8
child 5115 ea7b982b6c08
permissions -rwxr-xr-x
util._matcher: speed up regexp matching. In 4babaa52badf, Benoit made a change that substantially slows matching when a big .hgignore file is in play, because it calls into the regexp matching engine potentially hundreds of times per file to be matched. I've partly rolled back his change, so that we only call into the matcher once per file, but preserved the ability to report a meaningful error message if there's a syntax error in the regexp.

#!/usr/bin/env python

import os, sys, struct, stat
import difflib
import re
from optparse import OptionParser
from mercurial.bdiff import bdiff, blocks
from mercurial.mdiff import bunidiff, diffopts

VERSION="0.3"
usage = "usage: %prog [options] file1 file2"
parser = OptionParser(usage=usage)

parser.add_option("-d", "--difflib", action="store_true", default=False)
parser.add_option('-x', '--count', default=1)
parser.add_option('-c', '--context', type="int", default=3)
parser.add_option('-p', '--show-c-function', action="store_true", default=False)
parser.add_option('-w', '--ignore-all-space', action="store_true", 
                  default=False)

(options, args) = parser.parse_args()

if not args:
    parser.print_help()
    sys.exit(1)

# simple utility function to put all the
# files from a directory tree into a dict
def buildlist(names, top):
    tlen = len(top)
    for root, dirs, files in os.walk(top):
        l = root[tlen + 1:]
        for x in files:
            p = os.path.join(root, x)
            st = os.lstat(p)
            if stat.S_ISREG(st.st_mode):
                names[os.path.join(l, x)] = (st.st_dev, st.st_ino)

def diff_files(file1, file2):
    if file1 == None:
        b = file(file2).read().splitlines(1)
        l1 = "--- %s\n" % (file2)
        l2 = "+++ %s\n" % (file2)
        l3 = "@@ -0,0 +1,%d @@\n" % len(b)
        l = [l1, l2, l3] + ["+" + e for e in b]
    elif file2 == None:
        a = file(file1).read().splitlines(1)
        l1 = "--- %s\n" % (file1)
        l2 = "+++ %s\n" % (file1)
        l3 = "@@ -1,%d +0,0 @@\n" % len(a)
        l = [l1, l2, l3] + ["-" + e for e in a]
    else:
        t1 = file(file1).read()
        t2 = file(file2).read()
        l1 = t1.splitlines(1)
        l2 = t2.splitlines(1)
        if options.difflib:
            l = difflib.unified_diff(l1, l2, file1, file2)
        else:
            l = bunidiff(t1, t2, l1, l2, file1, file2,
                         diffopts(context=options.context,
                                  showfunc=options.show_c_function,
                                  ignorews=options.ignore_all_space))
    for x in l:
        if x[-1] != '\n':
            x += "\n\ No newline at end of file\n"
        print x,

file1 = args[0]
file2 = args[1]

if os.path.isfile(file1) and os.path.isfile(file2):
    diff_files(file1, file2)
elif os.path.isdir(file1):
    if not os.path.isdir(file2):
        sys.stderr.write("file types don't match\n")
        sys.exit(1)

    d1 = {}
    d2 = {}

    buildlist(d1, file1)
    buildlist(d2, file2)
    keys = d1.keys()
    keys.sort()
    for x in keys:
        if x not in d2:
            f2 = None
        else:
            f2 = os.path.join(file2, x)
            st1 = d1[x]
            st2 = d2[x]
            del d2[x]
            if st1[0] == st2[0] and st1[1] == st2[1]:
                sys.stderr.write("%s is a hard link\n" % x)
                continue
        x = os.path.join(file1, x)
        diff_files(x, f2)
    keys = d2.keys()
    keys.sort()
    for x in keys:
        f1 = None
        x = os.path.join(file2, x)
        diff_files(f1, x)