# HG changeset patch # User Vadim Gelfer # Date 1155964438 25200 # Node ID ff3ea21a981a448598f2e4f58430d1e688bfebb9 # Parent 6e062d9b188fe8f8f4cbd041621d6cb6195dbb28 addremove: add -s/--similarity option progress on issue 295. diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -8,7 +8,7 @@ from demandload import demandload from node import * from i18n import gettext as _ -demandload(globals(), 'util') +demandload(globals(), 'mdiff util') demandload(globals(), 'os sys') def make_filename(repo, pat, node, @@ -93,19 +93,53 @@ def walk(repo, pats=[], opts={}, node=No for r in results: yield r -def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None): +def findrenames(repo, added=None, removed=None, threshold=0.5): + if added is None or removed is None: + added, removed = repo.status()[1:3] + changes = repo.changelog.read(repo.dirstate.parents()[0]) + mf = repo.manifest.read(changes[0]) + for a in added: + aa = repo.wread(a) + bestscore, bestname = None, None + for r in removed: + rr = repo.file(r).read(mf[r]) + delta = mdiff.textdiff(aa, rr) + if len(delta) < len(aa): + myscore = 1.0 - (float(len(delta)) / len(aa)) + if bestscore is None or myscore > bestscore: + bestscore, bestname = myscore, r + if bestname and bestscore >= threshold: + yield bestname, a, bestscore + +def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None, + similarity=None): if dry_run is None: dry_run = opts.get('dry_run') + if similarity is None: + similarity = float(opts.get('similarity') or 0) add, remove = [], [] + mapping = {} for src, abs, rel, exact in walk(repo, pats, opts): if src == 'f' and repo.dirstate.state(abs) == '?': add.append(abs) + mapping[abs] = rel, exact if repo.ui.verbose or not exact: repo.ui.status(_('adding %s\n') % ((pats and rel) or abs)) if repo.dirstate.state(abs) != 'r' and not os.path.exists(rel): remove.append(abs) + mapping[abs] = rel, exact if repo.ui.verbose or not exact: repo.ui.status(_('removing %s\n') % ((pats and rel) or abs)) if not dry_run: repo.add(add, wlock=wlock) repo.remove(remove, wlock=wlock) + if similarity > 0: + for old, new, score in findrenames(repo, add, remove, similarity): + oldrel, oldexact = mapping[old] + newrel, newexact = mapping[new] + if repo.ui.verbose or not oldexact or not newexact: + repo.ui.status(_('recording removal of %s as rename to %s ' + '(%d%% similar)\n') % + (oldrel, newrel, score * 100)) + if not dry_run: + repo.copy(old, new, wlock=wlock) diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -658,8 +658,17 @@ def addremove(ui, repo, *pats, **opts): New files are ignored if they match any of the patterns in .hgignore. As with add, these changes take effect at the next commit. + + Use the -s option to detect renamed files. With a parameter > 0, + this compares every removed file with every added file and records + those similar enough as renames. This option takes a percentage + between 0 (disabled) and 100 (files must be identical) as its + parameter. Detecting renamed files this way can be expensive. """ - return cmdutil.addremove(repo, pats, opts) + sim = float(opts.get('similarity') or 0) + if sim < 0 or sim > 100: + raise util.Abort(_('similarity must be between 0 and 100')) + return cmdutil.addremove(repo, pats, opts, similarity=sim/100.) def annotate(ui, repo, *pats, **opts): """show changeset information per file line @@ -2747,7 +2756,10 @@ table = { (addremove, [('I', 'include', [], _('include names matching the given patterns')), ('X', 'exclude', [], _('exclude names matching the given patterns')), - ('n', 'dry-run', None, _('do not perform actions, just print output'))], + ('n', 'dry-run', None, + _('do not perform actions, just print output')), + ('s', 'similarity', '', + _('guess renamed files by similarity (0<=s<=1)'))], _('hg addremove [OPTION]... [FILE]...')), "^annotate": (annotate, diff --git a/tests/test-addremove b/tests/test-addremove --- a/tests/test-addremove +++ b/tests/test-addremove @@ -10,3 +10,17 @@ cd dir/ touch ../foo_2 bar_2 hg -v addremove hg -v commit -m "add 2" -d "1000000 0" + +cd .. +hg init sim +cd sim +echo a > a +echo a >> a +echo a >> a +echo c > c +hg commit -Ama +mv a b +rm c +echo d > d +hg addremove -s 0.5 +hg commit -mb diff --git a/tests/test-addremove.out b/tests/test-addremove.out --- a/tests/test-addremove.out +++ b/tests/test-addremove.out @@ -6,3 +6,10 @@ adding dir/bar_2 adding foo_2 dir/bar_2 foo_2 +adding a +adding c +adding b +adding d +removing a +removing c +recording removal of a as rename to b (100% similar)