Avoid extra filelogs entries.
Right now, there are some situations in which localrepo.filecommit can
create filelog entries even though they're not needed. For example:
- permissions for a file have changed;
- qrefresh can create a filelog entry identical to its parent (see the
added test);
- convert-repo creates extra filelog entries in every merge where the
first parent has added files (for example, changeset ebebe9577a1a of
the kernel repo added extra filelog entries to files in the
arch/blackfin directory, even though the merge should only touch the
drivers/ata directory). This makes "hg log file" in a converted repo
less useful than it could be, since it may mention many merges that
don't actually touch that specific file.
They all come from the same basic problem: localrepo.commit (through
filecommit) creates new filelog entries for all files passed to it
(except for some cases during a merge).
Patch and test case provided by Benoit.
This should fix issue351.
#!/usr/bin/env python
#
# Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
import os, sys
class ConfigError(Exception): pass
def usage():
print """relink <source> <destination>
Recreate hard links between source and destination repositories"""
class Config:
def __init__(self, args):
if len(args) != 3:
raise ConfigError("wrong number of arguments")
self.src = os.path.abspath(args[1])
self.dst = os.path.abspath(args[2])
for d in (self.src, self.dst):
if not os.path.exists(os.path.join(d, '.hg')):
raise ConfigError("%s: not a mercurial repository" % d)
def collect(src):
seplen = len(os.path.sep)
candidates = []
for dirpath, dirnames, filenames in os.walk(src):
relpath = dirpath[len(src) + seplen:]
for filename in filenames:
if not filename.endswith('.i'):
continue
st = os.stat(os.path.join(dirpath, filename))
candidates.append((os.path.join(relpath, filename), st))
return candidates
def prune(candidates, dst):
def getdatafile(path):
if not path.endswith('.i'):
return None, None
df = path[:-1] + 'd'
try:
st = os.stat(df)
except OSError:
return None, None
return df, st
def linkfilter(dst, st):
try:
ts = os.stat(dst)
except OSError:
# Destination doesn't have this file?
return False
if st.st_ino == ts.st_ino:
return False
if st.st_dev != ts.st_dev:
# No point in continuing
raise Exception('Source and destination are on different devices')
if st.st_size != ts.st_size:
# TODO: compare revlog heads
return False
return st
targets = []
for fn, st in candidates:
tgt = os.path.join(dst, fn)
ts = linkfilter(tgt, st)
if not ts:
continue
targets.append((fn, ts.st_size))
df, ts = getdatafile(tgt)
if df:
targets.append((fn[:-1] + 'd', ts.st_size))
return targets
def relink(src, dst, files):
def relinkfile(src, dst):
bak = dst + '.bak'
os.rename(dst, bak)
try:
os.link(src, dst)
except OSError:
os.rename(bak, dst)
raise
os.remove(bak)
CHUNKLEN = 65536
relinked = 0
savedbytes = 0
for f, sz in files:
source = os.path.join(src, f)
tgt = os.path.join(dst, f)
sfp = file(source)
dfp = file(tgt)
sin = sfp.read(CHUNKLEN)
while sin:
din = dfp.read(CHUNKLEN)
if sin != din:
break
sin = sfp.read(CHUNKLEN)
if sin:
continue
try:
relinkfile(source, tgt)
print 'Relinked %s' % f
relinked += 1
savedbytes += sz
except OSError, inst:
print '%s: %s' % (tgt, str(inst))
print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
try:
cfg = Config(sys.argv)
except ConfigError, inst:
print str(inst)
usage()
sys.exit(1)
src = os.path.join(cfg.src, '.hg')
dst = os.path.join(cfg.dst, '.hg')
candidates = collect(src)
targets = prune(candidates, dst)
relink(src, dst, targets)