Mercurial > hg > mercurial-crew-with-dirclash
view contrib/hg-relink @ 5369:7530334bf301
revlog: generate trivial deltas against null revision
To avoid extra memory usage and performance issues with large files,
generate a trivial delta header for deltas against the null revision
rather than calling the usual delta generator.
We append the delta header to meta rather than prepending it to data
to avoid a large allocate and copy.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Wed, 03 Oct 2007 17:17:27 -0500 |
parents | 29eb88bd5c8d |
children |
line wrap: on
line source
#!/usr/bin/env python # # Copyright (C) 2007 Brendan Cully <brendan@kublai.com> # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. import os, sys class ConfigError(Exception): pass def usage(): print """relink <source> <destination> Recreate hard links between source and destination repositories""" class Config: def __init__(self, args): if len(args) != 3: raise ConfigError("wrong number of arguments") self.src = os.path.abspath(args[1]) self.dst = os.path.abspath(args[2]) for d in (self.src, self.dst): if not os.path.exists(os.path.join(d, '.hg')): raise ConfigError("%s: not a mercurial repository" % d) def collect(src): seplen = len(os.path.sep) candidates = [] for dirpath, dirnames, filenames in os.walk(src): relpath = dirpath[len(src) + seplen:] for filename in filenames: if not filename.endswith('.i'): continue st = os.stat(os.path.join(dirpath, filename)) candidates.append((os.path.join(relpath, filename), st)) return candidates def prune(candidates, dst): def getdatafile(path): if not path.endswith('.i'): return None, None df = path[:-1] + 'd' try: st = os.stat(df) except OSError: return None, None return df, st def linkfilter(dst, st): try: ts = os.stat(dst) except OSError: # Destination doesn't have this file? return False if st.st_ino == ts.st_ino: return False if st.st_dev != ts.st_dev: # No point in continuing raise Exception('Source and destination are on different devices') if st.st_size != ts.st_size: # TODO: compare revlog heads return False return st targets = [] for fn, st in candidates: tgt = os.path.join(dst, fn) ts = linkfilter(tgt, st) if not ts: continue targets.append((fn, ts.st_size)) df, ts = getdatafile(tgt) if df: targets.append((fn[:-1] + 'd', ts.st_size)) return targets def relink(src, dst, files): def relinkfile(src, dst): bak = dst + '.bak' os.rename(dst, bak) try: os.link(src, dst) except OSError: os.rename(bak, dst) raise os.remove(bak) CHUNKLEN = 65536 relinked = 0 savedbytes = 0 for f, sz in files: source = os.path.join(src, f) tgt = os.path.join(dst, f) sfp = file(source) dfp = file(tgt) sin = sfp.read(CHUNKLEN) while sin: din = dfp.read(CHUNKLEN) if sin != din: break sin = sfp.read(CHUNKLEN) if sin: continue try: relinkfile(source, tgt) print 'Relinked %s' % f relinked += 1 savedbytes += sz except OSError, inst: print '%s: %s' % (tgt, str(inst)) print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes) try: cfg = Config(sys.argv) except ConfigError, inst: print str(inst) usage() sys.exit(1) src = os.path.join(cfg.src, '.hg') dst = os.path.join(cfg.dst, '.hg') candidates = collect(src) targets = prune(candidates, dst) relink(src, dst, targets)