contrib/hg-relink
author Bryan O'Sullivan <bos@serpentine.com>
Tue, 24 Apr 2007 10:53:25 -0700
changeset 4369 d7ad1e42a368
parent 4260 29eb88bd5c8d
permissions -rwxr-xr-x
util._matcher: speed up regexp matching. In 4babaa52badf, Benoit made a change that substantially slows matching when a big .hgignore file is in play, because it calls into the regexp matching engine potentially hundreds of times per file to be matched. I've partly rolled back his change, so that we only call into the matcher once per file, but preserved the ability to report a meaningful error message if there's a syntax error in the regexp.

#!/usr/bin/env python
#
# Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.

import os, sys

class ConfigError(Exception): pass

def usage():
    print """relink <source> <destination>
    Recreate hard links between source and destination repositories"""

class Config:
    def __init__(self, args):
        if len(args) != 3:
            raise ConfigError("wrong number of arguments")
        self.src = os.path.abspath(args[1])
        self.dst = os.path.abspath(args[2])
        for d in (self.src, self.dst):
            if not os.path.exists(os.path.join(d, '.hg')):
                raise ConfigError("%s: not a mercurial repository" % d)

def collect(src):
    seplen = len(os.path.sep)
    candidates = []
    for dirpath, dirnames, filenames in os.walk(src):
        relpath = dirpath[len(src) + seplen:]
        for filename in filenames:
            if not filename.endswith('.i'):
                continue
            st = os.stat(os.path.join(dirpath, filename))
            candidates.append((os.path.join(relpath, filename), st))

    return candidates

def prune(candidates, dst):
    def getdatafile(path):
        if not path.endswith('.i'):
            return None, None
        df = path[:-1] + 'd'
        try:
            st = os.stat(df)
        except OSError:
            return None, None
        return df, st

    def linkfilter(dst, st):
        try:
            ts = os.stat(dst)
        except OSError:
            # Destination doesn't have this file?
            return False
        if st.st_ino == ts.st_ino:
            return False
        if st.st_dev != ts.st_dev:
            # No point in continuing
            raise Exception('Source and destination are on different devices')
        if st.st_size != ts.st_size:
            # TODO: compare revlog heads
            return False
        return st

    targets = []
    for fn, st in candidates:
        tgt = os.path.join(dst, fn)
        ts = linkfilter(tgt, st)
        if not ts:
            continue
        targets.append((fn, ts.st_size))
        df, ts = getdatafile(tgt)
        if df:
            targets.append((fn[:-1] + 'd', ts.st_size))

    return targets

def relink(src, dst, files):
    def relinkfile(src, dst):
        bak = dst + '.bak'
        os.rename(dst, bak)
        try:
            os.link(src, dst)
        except OSError:
            os.rename(bak, dst)
            raise
        os.remove(bak)

    CHUNKLEN = 65536
    relinked = 0
    savedbytes = 0

    for f, sz in files:
        source = os.path.join(src, f)
        tgt = os.path.join(dst, f)
        sfp = file(source)
        dfp = file(tgt)
        sin = sfp.read(CHUNKLEN)
        while sin:
            din = dfp.read(CHUNKLEN)
            if sin != din:
                break
            sin = sfp.read(CHUNKLEN)
        if sin:
            continue
        try:
            relinkfile(source, tgt)
            print 'Relinked %s' % f
            relinked += 1
            savedbytes += sz
        except OSError, inst:
            print '%s: %s' % (tgt, str(inst))

    print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)

try:
    cfg = Config(sys.argv)
except ConfigError, inst:
    print str(inst)
    usage()
    sys.exit(1)

src = os.path.join(cfg.src, '.hg')
dst = os.path.join(cfg.dst, '.hg')
candidates = collect(src)
targets = prune(candidates, dst)
relink(src, dst, targets)