mercurial-crew-with-dirclash: mercurial/cmdutil.py comparison

comparison mercurial/cmdutil.py @ 4135:6cb6cfe43c5d

Avoid some false positives for addremove -s The original code uses the similary score 1 - len(diff(after, before)) / len(after) The diff can at most be the size of the 'before' file, so any small 'before' file would be considered very similar. Removing an empty file would cause all files added in the same revision to be considered copies of the removed file. This changes the metric to bytes_overlap(before, after) / len(before + after) i.e. the actual percentage of bytes shared between the two files.

author	Erling Ellingsen <erlingalf@gmail.com>
date	Sun, 18 Feb 2007 20:39:25 +0100
parents	431f3c1d3a37
children	eb5d4fec1487

comparison

equal deleted inserted replaced

-:9dc64c8414ca
+:6cb6cfe43c5d
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 from node import *
 from i18n import _
-import os, sys, mdiff, util, templater, patch
+import os, sys, mdiff, bdiff, util, templater, patch
 revrangesep = ':'
 def revpair(repo, revs):
 '''return pair of nodes, given list of revisions. second item can
 for src, fn in repo.walk(node=node, files=files, match=matchfn,
 badmatch=badmatch):
 yield src, fn, util.pathto(repo.getcwd(), fn), fn in exact
 def findrenames(repo, added=None, removed=None, threshold=0.5):
+'''find renamed files -- yields (before, after, score) tuples'''
 if added is None or removed is None:
 added, removed = repo.status()[1:3]
 ctx = repo.changectx()
 for a in added:
 aa = repo.wread(a)
-bestscore, bestname = None, None
+bestname, bestscore = None, threshold
 for r in removed:
 rr = ctx.filectx(r).data()
-delta = mdiff.textdiff(aa, rr)
-if len(delta) < len(aa):
+# bdiff.blocks() returns blocks of matching lines
-myscore = 1.0 - (float(len(delta)) / len(aa))
+# count the number of bytes in each
-if bestscore is None or myscore > bestscore:
+equal = 0
-bestscore, bestname = myscore, r
+alines = mdiff.splitnewlines(aa)
-if bestname and bestscore >= threshold:
+matches = bdiff.blocks(aa, rr)
+for x1,x2,y1,y2 in matches:
+for line in alines[x1:x2]:
+equal += len(line)
+myscore = equal*2.0 / (len(aa)+len(rr))
+if myscore >= bestscore:
+bestname, bestscore = r, myscore
+if bestname:
 yield bestname, a, bestscore
 def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None,
 similarity=None):
 if dry_run is None:

Mercurial > hg > mercurial-crew-with-dirclash

comparison mercurial/cmdutil.py @ 4135:6cb6cfe43c5d