changeset 5385:caadfbc439c6

Merge with crew.
author Bryan O'Sullivan <bos@serpentine.com>
date Fri, 05 Oct 2007 12:17:29 -0700
parents d7d395fb7cd5 (diff) e3a0c092b4e2 (current diff)
children 8051549afb32
files
diffstat 30 files changed, 1005 insertions(+), 549 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/__init__.py
+++ b/hgext/convert/__init__.py
@@ -5,14 +5,15 @@
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 
-from common import NoRepo, converter_source, converter_sink
+from common import NoRepo, SKIPREV, converter_source, converter_sink
 from cvs import convert_cvs
 from darcs import darcs_source
 from git import convert_git
 from hg import mercurial_source, mercurial_sink
 from subversion import convert_svn, debugsvnlog
+import filemap
 
-import os, shlex, shutil
+import os, shutil
 from mercurial import hg, ui, util, commands
 from mercurial.i18n import _
 
@@ -40,7 +41,7 @@ def convertsink(ui, path):
     raise util.Abort('%s: unknown repository type' % path)
 
 class converter(object):
-    def __init__(self, ui, source, dest, revmapfile, filemapper, opts):
+    def __init__(self, ui, source, dest, revmapfile, opts):
 
         self.source = source
         self.dest = dest
@@ -51,13 +52,15 @@ class converter(object):
         self.revmapfilefd = None
         self.authors = {}
         self.authorfile = None
-        self.mapfile = filemapper
 
+        self.maporder = []
         self.map = {}
         try:
             origrevmapfile = open(self.revmapfile, 'r')
             for l in origrevmapfile:
                 sv, dv = l[:-1].split()
+                if sv not in self.map:
+                    self.maporder.append(sv)
                 self.map[sv] = dv
             origrevmapfile.close()
         except IOError:
@@ -199,7 +202,15 @@ class converter(object):
         do_copies = hasattr(self.dest, 'copyfile')
         filenames = []
 
-        files, copies = self.source.getchanges(rev)
+        changes = self.source.getchanges(rev)
+        if isinstance(changes, basestring):
+            if changes == SKIPREV:
+                dest = SKIPREV
+            else:
+                dest = self.map[changes]
+            self.mapentry(rev, dest)
+            return
+        files, copies = changes
         parents = [self.map[r] for r in commit.parents]
         if commit.parents:
             prev = commit.parents[0]
@@ -210,35 +221,28 @@ class converter(object):
             pbranch = None
         self.dest.setbranch(commit.branch, pbranch, parents)
         for f, v in files:
-            newf = self.mapfile(f)
-            if not newf:
-                continue
-            filenames.append(newf)
+            filenames.append(f)
             try:
                 data = self.source.getfile(f, v)
             except IOError, inst:
-                self.dest.delfile(newf)
+                self.dest.delfile(f)
             else:
                 e = self.source.getmode(f, v)
-                self.dest.putfile(newf, e, data)
+                self.dest.putfile(f, e, data)
                 if do_copies:
                     if f in copies:
-                        copyf = self.mapfile(copies[f])
-                        if copyf:
-                            # Merely marks that a copy happened.
-                            self.dest.copyfile(copyf, newf)
+                        copyf = copies[f]
+                        # Merely marks that a copy happened.
+                        self.dest.copyfile(copyf, f)
 
-        if not filenames and self.mapfile.active():
-            newnode = parents[0]
-        else:
-            newnode = self.dest.putcommit(filenames, parents, commit)
+        newnode = self.dest.putcommit(filenames, parents, commit)
         self.mapentry(rev, newnode)
 
     def convert(self):
         try:
             self.source.before()
             self.dest.before()
-            self.source.setrevmap(self.map)
+            self.source.setrevmap(self.map, self.maporder)
             self.ui.status("scanning source...\n")
             heads = self.source.getheads()
             parents = self.walktree(heads)
@@ -260,7 +264,7 @@ class converter(object):
             ctags = {}
             for k in tags:
                 v = tags[k]
-                if v in self.map:
+                if self.map.get(v, SKIPREV) != SKIPREV:
                     ctags[k] = self.map[v]
 
             if c and ctags:
@@ -282,92 +286,6 @@ class converter(object):
         if self.revmapfilefd:
             self.revmapfilefd.close()
 
-def rpairs(name):
-    e = len(name)
-    while e != -1:
-        yield name[:e], name[e+1:]
-        e = name.rfind('/', 0, e)
-
-class filemapper(object):
-    '''Map and filter filenames when importing.
-    A name can be mapped to itself, a new name, or None (omit from new
-    repository).'''
-
-    def __init__(self, ui, path=None):
-        self.ui = ui
-        self.include = {}
-        self.exclude = {}
-        self.rename = {}
-        if path:
-            if self.parse(path):
-                raise util.Abort(_('errors in filemap'))
-
-    def parse(self, path):
-        errs = 0
-        def check(name, mapping, listname):
-            if name in mapping:
-                self.ui.warn(_('%s:%d: %r already in %s list\n') %
-                             (lex.infile, lex.lineno, name, listname))
-                return 1
-            return 0
-        lex = shlex.shlex(open(path), path, True)
-        lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
-        cmd = lex.get_token()
-        while cmd:
-            if cmd == 'include':
-                name = lex.get_token()
-                errs += check(name, self.exclude, 'exclude')
-                self.include[name] = name
-            elif cmd == 'exclude':
-                name = lex.get_token()
-                errs += check(name, self.include, 'include')
-                errs += check(name, self.rename, 'rename')
-                self.exclude[name] = name
-            elif cmd == 'rename':
-                src = lex.get_token()
-                dest = lex.get_token()
-                errs += check(src, self.exclude, 'exclude')
-                self.rename[src] = dest
-            elif cmd == 'source':
-                errs += self.parse(lex.get_token())
-            else:
-                self.ui.warn(_('%s:%d: unknown directive %r\n') %
-                             (lex.infile, lex.lineno, cmd))
-                errs += 1
-            cmd = lex.get_token()
-        return errs
-
-    def lookup(self, name, mapping):
-        for pre, suf in rpairs(name):
-            try:
-                return mapping[pre], pre, suf
-            except KeyError, err:
-                pass
-        return '', name, ''
-
-    def __call__(self, name):
-        if self.include:
-            inc = self.lookup(name, self.include)[0]
-        else:
-            inc = name
-        if self.exclude:
-            exc = self.lookup(name, self.exclude)[0]
-        else:
-            exc = ''
-        if not inc or exc:
-            return None
-        newpre, pre, suf = self.lookup(name, self.rename)
-        if newpre:
-            if newpre == '.':
-                return suf
-            if suf:
-                return newpre + '/' + suf
-            return newpre
-        return name
-
-    def active(self):
-        return bool(self.include or self.exclude or self.rename)
-
 def convert(ui, src, dest=None, revmapfile=None, **opts):
     """Convert a foreign SCM repository to a Mercurial one.
 
@@ -461,15 +379,18 @@ def convert(ui, src, dest=None, revmapfi
             shutil.rmtree(dest, True)
         raise
 
+    fmap = opts.get('filemap')
+    if fmap:
+        srcc = filemap.filemap_source(ui, srcc, fmap)
+        destc.setfilemapmode(True)
+
     if not revmapfile:
         try:
             revmapfile = destc.revmapfile()
         except:
             revmapfile = os.path.join(destc, "map")
 
-
-    c = converter(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
-                  opts)
+    c = converter(ui, srcc, destc, revmapfile, opts)
     c.convert()
 
 
--- a/hgext/convert/common.py
+++ b/hgext/convert/common.py
@@ -17,6 +17,8 @@ def decodeargs(s):
 
 class NoRepo(Exception): pass
 
+SKIPREV = 'hg-convert-skipped-revision'
+
 class commit(object):
     def __init__(self, author, date, desc, parents, branch=None, rev=None):
         self.author = author
@@ -44,8 +46,11 @@ class converter_source(object):
     def after(self):
         pass
 
-    def setrevmap(self, revmap):
-        """set the map of already-converted revisions"""
+    def setrevmap(self, revmap, order):
+        """set the map of already-converted revisions
+        
+        order is a list with the keys from revmap in the order they
+        appear in the revision map file."""
         pass
 
     def getheads(self):
@@ -91,6 +96,19 @@ class converter_source(object):
             except:
                 return s.decode(encoding, "replace").encode("utf-8")
 
+    def getchangedfiles(self, rev, i):
+        """Return the files changed by rev compared to parent[i].
+    
+        i is an index selecting one of the parents of rev.  The return
+        value should be the list of files that are different in rev and
+        this parent.
+
+        If rev has no parents, i is None.
+    
+        This function is only needed to support --filemap
+        """
+        raise NotImplementedError()
+
 class converter_sink(object):
     """Conversion sink (target) interface"""
 
@@ -149,3 +167,13 @@ class converter_sink(object):
         pbranch: branch name of parent commit
         parents: destination revisions of parent"""
         pass
+
+    def setfilemapmode(self, active):
+        """Tell the destination that we're using a filemap
+
+        Some converter_sources (svn in particular) can claim that a file
+        was changed in a revision, even if there was no change.  This method
+        tells the destination that we're using a filemap and that it should
+        filter empty revisions.
+        """
+        pass
--- a/hgext/convert/cvs.py
+++ b/hgext/convert/cvs.py
@@ -266,3 +266,9 @@ class convert_cvs(converter_source):
 
     def gettags(self):
         return self.tags
+
+    def getchangedfiles(self, rev, i):
+        files = self.files[rev].keys()
+        files.sort()
+        return files
+
copy from hgext/convert/__init__.py
copy to hgext/convert/filemap.py
--- a/hgext/convert/__init__.py
+++ b/hgext/convert/filemap.py
@@ -1,286 +1,13 @@
-# convert.py Foreign SCM converter
-#
-# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
+# Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
+# Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
 #
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
-
-from common import NoRepo, converter_source, converter_sink
-from cvs import convert_cvs
-from darcs import darcs_source
-from git import convert_git
-from hg import mercurial_source, mercurial_sink
-from subversion import convert_svn, debugsvnlog
-
-import os, shlex, shutil
-from mercurial import hg, ui, util, commands
-from mercurial.i18n import _
-
-commands.norepo += " convert debugsvnlog"
-
-converters = [convert_cvs, convert_git, convert_svn, mercurial_source,
-              mercurial_sink, darcs_source]
-
-def convertsource(ui, path, **opts):
-    for c in converters:
-        try:
-            return c.getcommit and c(ui, path, **opts)
-        except (AttributeError, NoRepo):
-            pass
-    raise util.Abort('%s: unknown repository type' % path)
-
-def convertsink(ui, path):
-    if not os.path.isdir(path):
-        raise util.Abort("%s: not a directory" % path)
-    for c in converters:
-        try:
-            return c.putcommit and c(ui, path)
-        except (AttributeError, NoRepo):
-            pass
-    raise util.Abort('%s: unknown repository type' % path)
-
-class converter(object):
-    def __init__(self, ui, source, dest, revmapfile, filemapper, opts):
-
-        self.source = source
-        self.dest = dest
-        self.ui = ui
-        self.opts = opts
-        self.commitcache = {}
-        self.revmapfile = revmapfile
-        self.revmapfilefd = None
-        self.authors = {}
-        self.authorfile = None
-        self.mapfile = filemapper
-
-        self.map = {}
-        try:
-            origrevmapfile = open(self.revmapfile, 'r')
-            for l in origrevmapfile:
-                sv, dv = l[:-1].split()
-                self.map[sv] = dv
-            origrevmapfile.close()
-        except IOError:
-            pass
-
-        # Read first the dst author map if any
-        authorfile = self.dest.authorfile()
-        if authorfile and os.path.exists(authorfile):
-            self.readauthormap(authorfile)
-        # Extend/Override with new author map if necessary
-        if opts.get('authors'):
-            self.readauthormap(opts.get('authors'))
-            self.authorfile = self.dest.authorfile()
-
-    def walktree(self, heads):
-        '''Return a mapping that identifies the uncommitted parents of every
-        uncommitted changeset.'''
-        visit = heads
-        known = {}
-        parents = {}
-        while visit:
-            n = visit.pop(0)
-            if n in known or n in self.map: continue
-            known[n] = 1
-            commit = self.cachecommit(n)
-            parents[n] = []
-            for p in commit.parents:
-                parents[n].append(p)
-                visit.append(p)
-
-        return parents
-
-    def toposort(self, parents):
-        '''Return an ordering such that every uncommitted changeset is
-        preceeded by all its uncommitted ancestors.'''
-        visit = parents.keys()
-        seen = {}
-        children = {}
-
-        while visit:
-            n = visit.pop(0)
-            if n in seen: continue
-            seen[n] = 1
-            # Ensure that nodes without parents are present in the 'children'
-            # mapping.
-            children.setdefault(n, [])
-            for p in parents[n]:
-                if not p in self.map:
-                    visit.append(p)
-                children.setdefault(p, []).append(n)
-
-        s = []
-        removed = {}
-        visit = children.keys()
-        while visit:
-            n = visit.pop(0)
-            if n in removed: continue
-            dep = 0
-            if n in parents:
-                for p in parents[n]:
-                    if p in self.map: continue
-                    if p not in removed:
-                        # we're still dependent
-                        visit.append(n)
-                        dep = 1
-                        break
-
-            if not dep:
-                # all n's parents are in the list
-                removed[n] = 1
-                if n not in self.map:
-                    s.append(n)
-                if n in children:
-                    for c in children[n]:
-                        visit.insert(0, c)
+# This software may be used and distributed according to the terms of
+# the GNU General Public License, incorporated herein by reference.
 
-        if self.opts.get('datesort'):
-            depth = {}
-            for n in s:
-                depth[n] = 0
-                pl = [p for p in self.commitcache[n].parents
-                      if p not in self.map]
-                if pl:
-                    depth[n] = max([depth[p] for p in pl]) + 1
-
-            s = [(depth[n], self.commitcache[n].date, n) for n in s]
-            s.sort()
-            s = [e[2] for e in s]
-
-        return s
-
-    def mapentry(self, src, dst):
-        if self.revmapfilefd is None:
-            try:
-                self.revmapfilefd = open(self.revmapfile, "a")
-            except IOError, (errno, strerror):
-                raise util.Abort("Could not open map file %s: %s, %s\n" % (self.revmapfile, errno, strerror))
-        self.map[src] = dst
-        self.revmapfilefd.write("%s %s\n" % (src, dst))
-        self.revmapfilefd.flush()
-
-    def writeauthormap(self):
-        authorfile = self.authorfile
-        if authorfile:
-           self.ui.status('Writing author map file %s\n' % authorfile)
-           ofile = open(authorfile, 'w+')
-           for author in self.authors:
-               ofile.write("%s=%s\n" % (author, self.authors[author]))
-           ofile.close()
-
-    def readauthormap(self, authorfile):
-        afile = open(authorfile, 'r')
-        for line in afile:
-            try:
-                srcauthor = line.split('=')[0].strip()
-                dstauthor = line.split('=')[1].strip()
-                if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
-                    self.ui.status(
-                        'Overriding mapping for author %s, was %s, will be %s\n'
-                        % (srcauthor, self.authors[srcauthor], dstauthor))
-                else:
-                    self.ui.debug('Mapping author %s to %s\n'
-                                  % (srcauthor, dstauthor))
-                    self.authors[srcauthor] = dstauthor
-            except IndexError:
-                self.ui.warn(
-                    'Ignoring bad line in author file map %s: %s\n'
-                    % (authorfile, line))
-        afile.close()
-
-    def cachecommit(self, rev):
-        commit = self.source.getcommit(rev)
-        commit.author = self.authors.get(commit.author, commit.author)
-        self.commitcache[rev] = commit
-        return commit
-
-    def copy(self, rev):
-        commit = self.commitcache[rev]
-        do_copies = hasattr(self.dest, 'copyfile')
-        filenames = []
-
-        files, copies = self.source.getchanges(rev)
-        parents = [self.map[r] for r in commit.parents]
-        if commit.parents:
-            prev = commit.parents[0]
-            if prev not in self.commitcache:
-                self.cachecommit(prev)
-            pbranch = self.commitcache[prev].branch
-        else:
-            pbranch = None
-        self.dest.setbranch(commit.branch, pbranch, parents)
-        for f, v in files:
-            newf = self.mapfile(f)
-            if not newf:
-                continue
-            filenames.append(newf)
-            try:
-                data = self.source.getfile(f, v)
-            except IOError, inst:
-                self.dest.delfile(newf)
-            else:
-                e = self.source.getmode(f, v)
-                self.dest.putfile(newf, e, data)
-                if do_copies:
-                    if f in copies:
-                        copyf = self.mapfile(copies[f])
-                        if copyf:
-                            # Merely marks that a copy happened.
-                            self.dest.copyfile(copyf, newf)
-
-        if not filenames and self.mapfile.active():
-            newnode = parents[0]
-        else:
-            newnode = self.dest.putcommit(filenames, parents, commit)
-        self.mapentry(rev, newnode)
-
-    def convert(self):
-        try:
-            self.source.before()
-            self.dest.before()
-            self.source.setrevmap(self.map)
-            self.ui.status("scanning source...\n")
-            heads = self.source.getheads()
-            parents = self.walktree(heads)
-            self.ui.status("sorting...\n")
-            t = self.toposort(parents)
-            num = len(t)
-            c = None
-
-            self.ui.status("converting...\n")
-            for c in t:
-                num -= 1
-                desc = self.commitcache[c].desc
-                if "\n" in desc:
-                    desc = desc.splitlines()[0]
-                self.ui.status("%d %s\n" % (num, desc))
-                self.copy(c)
-
-            tags = self.source.gettags()
-            ctags = {}
-            for k in tags:
-                v = tags[k]
-                if v in self.map:
-                    ctags[k] = self.map[v]
-
-            if c and ctags:
-                nrev = self.dest.puttags(ctags)
-                # write another hash correspondence to override the previous
-                # one so we don't end up with extra tag heads
-                if nrev:
-                    self.mapentry(c, nrev)
-
-            self.writeauthormap()
-        finally:
-            self.cleanup()
-
-    def cleanup(self):
-        try:
-            self.dest.after()
-        finally:
-            self.source.after()
-        if self.revmapfilefd:
-            self.revmapfilefd.close()
+import shlex
+from mercurial.i18n import _
+from mercurial import util
+from common import SKIPREV
 
 def rpairs(name):
     e = len(name)
@@ -368,122 +95,236 @@ class filemapper(object):
     def active(self):
         return bool(self.include or self.exclude or self.rename)
 
-def convert(ui, src, dest=None, revmapfile=None, **opts):
-    """Convert a foreign SCM repository to a Mercurial one.
-
-    Accepted source formats:
-    - CVS
-    - Darcs
-    - git
-    - Subversion
-
-    Accepted destination formats:
-    - Mercurial
+# This class does two additional things compared to a regular source:
+#
+# - Filter and rename files.  This is mostly wrapped by the filemapper
+#   class above. We hide the original filename in the revision that is
+#   returned by getchanges to be able to find things later in getfile
+#   and getmode.
+#
+# - Return only revisions that matter for the files we're interested in.
+#   This involves rewriting the parents of the original revision to
+#   create a graph that is restricted to those revisions.
+#
+#   This set of revisions includes not only revisions that directly
+#   touch files we're interested in, but also merges that merge two
+#   or more interesting revisions.
 
-    If no revision is given, all revisions will be converted. Otherwise,
-    convert will only import up to the named revision (given in a format
-    understood by the source).
-
-    If no destination directory name is specified, it defaults to the
-    basename of the source with '-hg' appended.  If the destination
-    repository doesn't exist, it will be created.
-
-    If <revmapfile> isn't given, it will be put in a default location
-    (<dest>/.hg/shamap by default).  The <revmapfile> is a simple text
-    file that maps each source commit ID to the destination ID for
-    that revision, like so:
-    <source ID> <destination ID>
-
-    If the file doesn't exist, it's automatically created.  It's updated
-    on each commit copied, so convert-repo can be interrupted and can
-    be run repeatedly to copy new commits.
+class filemap_source(object):
+    def __init__(self, ui, baseconverter, filemap):
+        self.ui = ui
+        self.base = baseconverter
+        self.filemapper = filemapper(ui, filemap)
+        self.commits = {}
+        # if a revision rev has parent p in the original revision graph, then
+        # rev will have parent self.parentmap[p] in the restricted graph.
+        self.parentmap = {}
+        # self.wantedancestors[rev] is the set of all ancestors of rev that
+        # are in the restricted graph.
+        self.wantedancestors = {}
+        self.convertedorder = None
+        self._rebuilt = False
+        self.origparents = {}
 
-    The [username mapping] file is a simple text file that maps each source
-    commit author to a destination commit author. It is handy for source SCMs
-    that use unix logins to identify authors (eg: CVS). One line per author
-    mapping and the line format is:
-    srcauthor=whatever string you want
-
-    The filemap is a file that allows filtering and remapping of files
-    and directories.  Comment lines start with '#'.  Each line can
-    contain one of the following directives:
-
-      include path/to/file
-
-      exclude path/to/file
+    def setrevmap(self, revmap, order):
+        # rebuild our state to make things restartable
+        #
+        # To avoid calling getcommit for every revision that has already
+        # been converted, we rebuild only the parentmap, delaying the
+        # rebuild of wantedancestors until we need it (i.e. until a
+        # merge).
+        #
+        # We assume the order argument lists the revisions in
+        # topological order, so that we can infer which revisions were
+        # wanted by previous runs.
+        self._rebuilt = not revmap
+        seen = {SKIPREV: SKIPREV}
+        dummyset = util.set()
+        converted = []
+        for rev in order:
+            mapped = revmap[rev]
+            wanted = mapped not in seen
+            if wanted:
+                seen[mapped] = rev
+                self.parentmap[rev] = rev
+            else:
+                self.parentmap[rev] = seen[mapped]
+            self.wantedancestors[rev] = dummyset
+            arg = seen[mapped]
+            if arg == SKIPREV:
+                arg = None
+            converted.append((rev, wanted, arg))
+        self.convertedorder = converted
+        return self.base.setrevmap(revmap, order)
 
-      rename from/file to/file
-    
-    The 'include' directive causes a file, or all files under a
-    directory, to be included in the destination repository.  The
-    'exclude' directive causes files or directories to be omitted.
-    The 'rename' directive renames a file or directory.  To rename
-    from a subdirectory into the root of the repository, use '.' as
-    the path to rename to.
-    """
+    def rebuild(self):
+        if self._rebuilt:
+            return True
+        self._rebuilt = True
+        pmap = self.parentmap.copy()
+        self.parentmap.clear()
+        self.wantedancestors.clear()
+        for rev, wanted, arg in self.convertedorder:
+            parents = self.origparents.get(rev)
+            if parents is None:
+                parents = self.base.getcommit(rev).parents
+            if wanted:
+                self.mark_wanted(rev, parents)
+            else:
+                self.mark_not_wanted(rev, arg)
+
+        assert pmap == self.parentmap
+        return True
+
+    def getheads(self):
+        return self.base.getheads()
 
-    util._encoding = 'UTF-8'
+    def getcommit(self, rev):
+        # We want to save a reference to the commit objects to be able
+        # to rewrite their parents later on.
+        self.commits[rev] = self.base.getcommit(rev)
+        return self.commits[rev]
 
-    if not dest:
-        dest = hg.defaultdest(src) + "-hg"
-        ui.status("assuming destination %s\n" % dest)
+    def wanted(self, rev, i):
+        # Return True if we're directly interested in rev.
+        #
+        # i is an index selecting one of the parents of rev (if rev
+        # has no parents, i is None).  getchangedfiles will give us
+        # the list of files that are different in rev and in the parent
+        # indicated by i.  If we're interested in any of these files,
+        # we're interested in rev.
+        try:
+            files = self.base.getchangedfiles(rev, i)
+        except NotImplementedError:
+            raise util.Abort(_("source repository doesn't support --filemap"))
+        for f in files:
+            if self.filemapper(f):
+                return True
+        return False
+
+    def mark_not_wanted(self, rev, p):
+        # Mark rev as not interesting and update data structures.
 
-    # Try to be smart and initalize things when required
-    created = False
-    if os.path.isdir(dest):
-        if len(os.listdir(dest)) > 0:
-            try:
-                hg.repository(ui, dest)
-                ui.status("destination %s is a Mercurial repository\n" % dest)
-            except hg.RepoError:
-                raise util.Abort(
-                    "destination directory %s is not empty.\n"
-                    "Please specify an empty directory to be initialized\n"
-                    "or an already initialized mercurial repository"
-                    % dest)
-        else:
-            ui.status("initializing destination %s repository\n" % dest)
-            hg.repository(ui, dest, create=True)
-            created = True
-    elif os.path.exists(dest):
-        raise util.Abort("destination %s exists and is not a directory" % dest)
-    else:
-        ui.status("initializing destination %s repository\n" % dest)
-        hg.repository(ui, dest, create=True)
-        created = True
+        if p is None:
+            # A root revision. Use SKIPREV to indicate that it doesn't
+            # map to any revision in the restricted graph.  Put SKIPREV
+            # in the set of wanted ancestors to simplify code elsewhere
+            self.parentmap[rev] = SKIPREV
+            self.wantedancestors[rev] = util.set((SKIPREV,))
+            return
+
+        # Reuse the data from our parent.
+        self.parentmap[rev] = self.parentmap[p]
+        self.wantedancestors[rev] = self.wantedancestors[p]
+
+    def mark_wanted(self, rev, parents):
+        # Mark rev ss wanted and update data structures.
+
+        # rev will be in the restricted graph, so children of rev in
+        # the original graph should still have rev as a parent in the
+        # restricted graph.
+        self.parentmap[rev] = rev
+
+        # The set of wanted ancestors of rev is the union of the sets
+        # of wanted ancestors of its parents. Plus rev itself.
+        wrev = util.set()
+        for p in parents:
+            wrev.update(self.wantedancestors[p])
+        wrev.add(rev)
+        self.wantedancestors[rev] = wrev
 
-    destc = convertsink(ui, dest)
+    def getchanges(self, rev):
+        parents = self.commits[rev].parents
+        if len(parents) > 1:
+            self.rebuild()
+
+        # To decide whether we're interested in rev we:
+        #
+        # - calculate what parents rev will have if it turns out we're
+        #   interested in it.  If it's going to have more than 1 parent,
+        #   we're interested in it.
+        #
+        # - otherwise, we'll compare it with the single parent we found.
+        #   If any of the files we're interested in is different in the
+        #   the two revisions, we're interested in rev.
+
+        # A parent p is interesting if its mapped version (self.parentmap[p]):
+        # - is not SKIPREV
+        # - is still not in the list of parents (we don't want duplicates)
+        # - is not an ancestor of the mapped versions of the other parents
+        mparents = []
+        wp = None
+        for i, p1 in enumerate(parents):
+            mp1 = self.parentmap[p1]
+            if mp1 == SKIPREV or mp1 in mparents:
+                continue
+            for p2 in parents:
+                if p1 == p2 or mp1 == self.parentmap[p2]:
+                    continue
+                if mp1 in self.wantedancestors[p2]:
+                    break
+            else:
+                mparents.append(mp1)
+                wp = i
 
-    try:
-        srcc = convertsource(ui, src, rev=opts.get('rev'))
-    except Exception:
-        if created:
-            shutil.rmtree(dest, True)
-        raise
+        if wp is None and parents:
+            wp = 0
+
+        self.origparents[rev] = parents
 
-    if not revmapfile:
-        try:
-            revmapfile = destc.revmapfile()
-        except:
-            revmapfile = os.path.join(destc, "map")
+        if len(mparents) < 2 and not self.wanted(rev, wp):
+            # We don't want this revision.
+            # Update our state and tell the convert process to map this
+            # revision to the same revision its parent as mapped to.
+            p = None
+            if parents:
+                p = parents[wp]
+            self.mark_not_wanted(rev, p)
+            self.convertedorder.append((rev, False, p))
+            return self.parentmap[rev]
 
+        # We want this revision.
+        # Rewrite the parents of the commit object
+        self.commits[rev].parents = mparents
+        self.mark_wanted(rev, parents)
+        self.convertedorder.append((rev, True, None))
 
-    c = converter(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
-                  opts)
-    c.convert()
-
+        # Get the real changes and do the filtering/mapping.
+        # To be able to get the files later on in getfile and getmode,
+        # we hide the original filename in the rev part of the return
+        # value.
+        changes, copies = self.base.getchanges(rev)
+        newnames = {}
+        files = []
+        for f, r in changes:
+            newf = self.filemapper(f)
+            if newf:
+                files.append((newf, (f, r)))
+                newnames[f] = newf
 
-cmdtable = {
-    "convert":
-        (convert,
-         [('A', 'authors', '', 'username mapping filename'),
-          ('', 'filemap', '', 'remap file names using contents of file'),
-          ('r', 'rev', '', 'import up to target revision REV'),
-          ('', 'datesort', None, 'try to sort changesets by date')],
-         'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
-    "debugsvnlog":
-        (debugsvnlog,
-         [],
-         'hg debugsvnlog'),
-}
+        ncopies = {}
+        for c in copies:
+            newc = self.filemapper(c)
+            if newc:
+                newsource = self.filemapper(copies[c])
+                if newsource:
+                    ncopies[newc] = newsource
+
+        return files, ncopies
 
+    def getfile(self, name, rev):
+        realname, realrev = rev
+        return self.base.getfile(realname, realrev)
+
+    def getmode(self, name, rev):
+        realname, realrev = rev
+        return self.base.getmode(realname, realrev)
+
+    def gettags(self):
+        return self.base.gettags()
+
+    def before(self):
+        pass
+
+    def after(self):
+        pass
--- a/hgext/convert/git.py
+++ b/hgext/convert/git.py
@@ -122,3 +122,21 @@ class convert_git(converter_source):
             tags[tag] = node
 
         return tags
+
+    def getchangedfiles(self, version, i):
+        changes = []
+        if i is None:
+            fh = self.gitcmd("git-diff-tree --root -m -r %s" % version)
+            for l in fh:
+                if "\t" not in l:
+                    continue
+                m, f = l[:-1].split("\t")
+                changes.append(f)
+            fh.close()
+        else:
+            fh = self.gitcmd("git-diff-tree --name-only --root -r %s %s^%s --"
+                             % (version, version, i+1))
+            changes = [f.rstrip('\n') for f in fh]
+            fh.close()
+
+        return changes
--- a/hgext/convert/hg.py
+++ b/hgext/convert/hg.py
@@ -28,6 +28,7 @@ class mercurial_sink(converter_sink):
             raise NoRepo("could not open hg repo %s as sink" % path)
         self.lock = None
         self.wlock = None
+        self.filemapmode = False
 
     def before(self):
         self.wlock = self.repo.wlock()
@@ -96,6 +97,10 @@ class mercurial_sink(converter_sink):
                 pl.append(p)
                 seen[p] = 1
         parents = pl
+        nparents = len(parents)
+        if self.filemapmode and nparents == 1:
+            m1node = self.repo.changelog.read(bin(parents[0]))[0]
+            parent = parents[0]
 
         if len(parents) < 2: parents.append("0" * 40)
         if len(parents) < 2: parents.append("0" * 40)
@@ -117,6 +122,13 @@ class mercurial_sink(converter_sink):
             text = "(octopus merge fixup)\n"
             p2 = hg.hex(self.repo.changelog.tip())
 
+        if self.filemapmode and nparents == 1:
+            man = self.repo.manifest
+            mnode = self.repo.changelog.read(bin(p2))[0]
+            if not man.cmp(m1node, man.revision(mnode)):
+                self.repo.rollback()
+                self.repo.dirstate.clear()
+                return parent
         return p2
 
     def puttags(self, tags):
@@ -153,6 +165,9 @@ class mercurial_sink(converter_sink):
                                 date, tagparent, nullid)
             return hex(self.repo.changelog.tip())
 
+    def setfilemapmode(self, active):
+        self.filemapmode = active
+
 class mercurial_source(converter_source):
     def __init__(self, ui, path, rev=None):
         converter_source.__init__(self, ui, path, rev)
@@ -162,6 +177,7 @@ class mercurial_source(converter_source)
             raise NoRepo("could not open hg repo %s as source" % path)
         self.lastrev = None
         self.lastctx = None
+        self._changescache = None
 
     def changectx(self, rev):
         if self.lastrev != rev:
@@ -187,7 +203,10 @@ class mercurial_source(converter_source)
 
     def getchanges(self, rev):
         ctx = self.changectx(rev)
-        m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3]
+        if self._changescache and self._changescache[0] == rev:
+            m, a, r = self._changescache[1]
+        else:
+            m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3]
         changes = [(name, rev) for name in m + a + r]
         changes.sort()
         return (changes, self.getcopies(ctx, m + a))
@@ -211,3 +230,14 @@ class mercurial_source(converter_source)
     def gettags(self):
         tags = [t for t in self.repo.tagslist() if t[0] != 'tip']
         return dict([(name, hex(node)) for name, node in tags])
+
+    def getchangedfiles(self, rev, i):
+        ctx = self.changectx(rev)
+        i = i or 0
+        changes = self.repo.status(ctx.parents()[i].node(), ctx.node())[:3]
+
+        if i == 0:
+            self._changescache = (rev, changes)
+
+        return changes[0] + changes[1] + changes[2]
+
--- a/hgext/convert/subversion.py
+++ b/hgext/convert/subversion.py
@@ -146,8 +146,9 @@ class convert_svn(converter_source):
         self.last_changed = self.latest(self.module, latest)
 
         self.head = self.revid(self.last_changed)
+        self._changescache = None
 
-    def setrevmap(self, revmap):
+    def setrevmap(self, revmap, order):
         lastrevs = {}
         for revid in revmap.keys():
             uuid, module, revnum = self.revsplit(revid)
@@ -206,6 +207,9 @@ class convert_svn(converter_source):
         return self.modecache[(file, rev)]
 
     def getchanges(self, rev):
+        if self._changescache and self._changescache[0] == rev:
+            return self._changescache[1]
+        self._changescache = None
         self.modecache = {}
         (paths, parents) = self.paths[rev]
         files, copies = self.expandpaths(rev, paths, parents)
@@ -216,6 +220,11 @@ class convert_svn(converter_source):
         del self.paths[rev]
         return (files, copies)
 
+    def getchangedfiles(self, rev, i):
+        changes = self.getchanges(rev)
+        self._changescache = (rev, changes)
+        return [f[0] for f in changes[0]]
+
     def getcommit(self, rev):
         if rev not in self.commits:
             uuid, module, revnum = self.revsplit(rev)
--- a/mercurial/bdiff.c
+++ b/mercurial/bdiff.c
@@ -12,6 +12,7 @@
 #include <Python.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #if defined __hpux || defined __SUNPRO_C || defined _AIX
 # define inline
@@ -58,21 +59,17 @@ struct hunklist {
 	struct hunk *base, *head;
 };
 
-static inline uint32_t rol32(uint32_t word, unsigned int shift)
-{
-        return (word << shift) | (word >> (32 - shift));
-}
-
 int splitlines(const char *a, int len, struct line **lr)
 {
-	int g, h, i;
+	int h, i;
 	const char *p, *b = a;
+	const char * const plast = a + len - 1;
 	struct line *l;
 
 	/* count the lines */
 	i = 1; /* extra line for sentinel */
 	for (p = a; p < a + len; p++)
-		if (*p == '\n' || p == a + len - 1)
+		if (*p == '\n' || p == plast)
 			i++;
 
 	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
@@ -82,24 +79,17 @@ int splitlines(const char *a, int len, s
 	/* build the line array and calculate hashes */
 	h = 0;
 	for (p = a; p < a + len; p++) {
-		/*
-		 * a simple hash from GNU diff, with better collision
-		 * resistance from hashpjw. this slows down common
-		 * case by 10%, but speeds up worst case by 100x.
-		 */
-		h = *p + rol32(h, 7);
-		if ((g = h & 0xf0000000)) {
-			h ^= g >> 24;
-			h ^= g;
-		}
-		if (*p == '\n' || p == a + len - 1) {
+		/* Leonid Yuriev's hash */
+                h = (h * 1664525) + *p + 1013904223;
+
+		if (*p == '\n' || p == plast) {
+			l->h = h;
+			h = 0;
 			l->len = p - b + 1;
-			l->h = h * l->len;
 			l->l = b;
-			l->n = -1;
+			l->n = INT_MAX;
 			l++;
 			b = p + 1;
-			h = 0;
 		}
 	}
 
@@ -117,27 +107,34 @@ int inline cmp(struct line *a, struct li
 static int equatelines(struct line *a, int an, struct line *b, int bn)
 {
 	int i, j, buckets = 1, t;
+	int scale = 32;
 	struct pos *h;
 
 	/* build a hash table of the next highest power of 2 */
 	while (buckets < bn + 1)
 		buckets *= 2;
 
-	h = (struct pos *)malloc(buckets * sizeof(struct pos));
-	buckets = buckets - 1;
+	/* try to allocate a large hash table to avoid collisions */
+	do {
+		scale /= 2;
+		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
+	} while (!h && scale != 1);
+
 	if (!h)
 		return 0;
 
+	buckets = buckets * scale - 1;
+
 	/* clear the hash table */
 	for (i = 0; i <= buckets; i++) {
-		h[i].pos = -1;
+		h[i].pos = INT_MAX;
 		h[i].len = 0;
 	}
 
 	/* add lines to the hash table chains */
 	for (i = bn - 1; i >= 0; i--) {
 		/* find the equivalence class */
-		for (j = b[i].h & buckets; h[j].pos != -1;
+		for (j = b[i].h & buckets; h[j].pos != INT_MAX;
 		     j = (j + 1) & buckets)
 			if (!cmp(b + i, b + h[j].pos))
 				break;
@@ -155,7 +152,7 @@ static int equatelines(struct line *a, i
 	/* match items in a to their equivalence class in b */
 	for (i = 0; i < an; i++) {
 		/* find the equivalence class */
-		for (j = a[i].h & buckets; h[j].pos != -1;
+		for (j = a[i].h & buckets; h[j].pos != INT_MAX;
 		     j = (j + 1) & buckets)
 			if (!cmp(a + i, b + h[j].pos))
 				break;
@@ -164,7 +161,7 @@ static int equatelines(struct line *a, i
 		if (h[j].len <= t)
 			a[i].n = h[j].pos; /* point to head of match list */
 		else
-			a[i].n = -1; /* too popular */
+			a[i].n = INT_MAX; /* too popular */
 	}
 
 	/* discard hash tables */
@@ -179,11 +176,11 @@ static int longest_match(struct line *a,
 
 	for (i = a1; i < a2; i++) {
 		/* skip things before the current block */
-		for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
+		for (j = a[i].n; j < b1; j = b[j].n)
 			;
 
 		/* loop through all lines match a[i] in b */
-		for (; j != -1 && j < b2; j = b[j].n) {
+		for (; j < b2; j = b[j].n) {
 			/* does this extend an earlier match? */
 			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
 				k = pos[j - 1].len + 1;
@@ -216,6 +213,7 @@ static int longest_match(struct line *a,
 
 	*omi = mi - mb;
 	*omj = mj - mb;
+
 	return mk + mb;
 }
 
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -33,10 +33,9 @@ def chunkiter(source):
             break
         yield c
 
-def genchunk(data):
-    """build a changegroup chunk"""
-    header = struct.pack(">l", len(data)+ 4)
-    return "%s%s" % (header, data)
+def chunkheader(length):
+    """build a changegroup chunk header"""
+    return struct.pack(">l", length + 4)
 
 def closechunk():
     return struct.pack(">l", 0)
@@ -86,7 +85,12 @@ def writebundle(cg, filename, bundletype
             empty = True
             for chunk in chunkiter(cg):
                 empty = False
-                fh.write(z.compress(genchunk(chunk)))
+                fh.write(z.compress(chunkheader(len(chunk))))
+                pos = 0
+                while pos < len(chunk):
+                    next = pos + 2**20
+                    fh.write(z.compress(chunk[pos:next]))
+                    pos = next
             fh.write(z.compress(closechunk()))
         fh.write(z.flush())
         cleanup = None
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1720,7 +1720,8 @@ class localrepository(repo.repository):
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
                 if len(msng_filenode_lst) > 0:
-                    yield changegroup.genchunk(fname)
+                    yield changegroup.chunkheader(len(fname))
+                    yield fname
                     # Sort the filenodes by their revision #
                     msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
                     # Create a group generator and only pass in a changenode
@@ -1796,7 +1797,8 @@ class localrepository(repo.repository):
                 nodeiter = gennodelst(filerevlog)
                 nodeiter = list(nodeiter)
                 if nodeiter:
-                    yield changegroup.genchunk(fname)
+                    yield changegroup.chunkheader(len(fname))
+                    yield fname
                     lookup = lookuprevlink_func(filerevlog)
                     for chnk in filerevlog.group(nodeiter, lookup):
                         yield chnk
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -245,6 +245,9 @@ def patch(a, bin):
 def get_matching_blocks(a, b):
     return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
 
+def trivialdiffheader(length):
+    return struct.pack(">lll", 0, 0, length)
+
 patches = mpatch.patches
 patchedsize = mpatch.patchedsize
 textdiff = bdiff.bdiff
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -188,9 +188,18 @@ def findcopies(repo, m1, m2, ma, limit):
     if not m1 or not m2 or not ma:
         return {}, {}
 
+    repo.ui.debug(_("  searching for copies back to rev %d\n") % limit)
+
     u1 = nonoverlap(m1, m2, ma)
     u2 = nonoverlap(m2, m1, ma)
 
+    if u1:
+        repo.ui.debug(_("  unmatched files in local:\n   %s\n")
+                      % "\n   ".join(u1))
+    if u2:
+        repo.ui.debug(_("  unmatched files in other:\n   %s\n")
+                      % "\n   ".join(u2))
+
     for f in u1:
         checkcopies(ctx(f, m1[f]), m2, ma)
 
@@ -204,9 +213,19 @@ def findcopies(repo, m1, m2, ma, limit):
             fo.remove(f)
             d2[f] = (of, fo)
 
+    if fullcopy:
+        repo.ui.debug(_("  all copies found (* = to merge, ! = divergent):\n"))
+        for f in fullcopy:
+            note = ""
+            if f in copy: note += "*"
+            if f in diverge: note += "!"
+            repo.ui.debug(_("   %s -> %s %s\n") % (f, fullcopy[f], note))
+
     if not fullcopy or not repo.ui.configbool("merge", "followdirs", True):
         return copy, diverge
 
+    repo.ui.debug(_("  checking for directory renames\n"))
+
     # generate a directory move map
     d1, d2 = dirs(m1), dirs(m2)
     invalid = {}
@@ -241,6 +260,9 @@ def findcopies(repo, m1, m2, ma, limit):
     if not dirmove:
         return copy, diverge
 
+    for d in dirmove:
+        repo.ui.debug(_("  dir %s -> %s\n") % (d, dirmove[d]))
+
     # check unaccounted nonoverlapping files against directory moves
     for f in u1 + u2:
         if f not in fullcopy:
@@ -248,6 +270,7 @@ def findcopies(repo, m1, m2, ma, limit):
                 if f.startswith(d):
                     # new file added in a directory that was moved, move it
                     copy[f] = dirmove[d] + f[len(d):]
+                    repo.ui.debug(_("  file %s -> %s\n") % (f, copy[f]))
                     break
 
     return copy, diverge
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1087,10 +1087,16 @@ class revlog(object):
             if infocollect is not None:
                 infocollect(nb)
 
-            d = self.revdiff(a, b)
             p = self.parents(nb)
             meta = nb + p[0] + p[1] + lookup(nb)
-            yield changegroup.genchunk("%s%s" % (meta, d))
+            if a == -1:
+                d = self.revision(nb)
+                meta += mdiff.trivialdiffheader(len(d))
+            else:
+                d = self.revdiff(a, b)
+            yield changegroup.chunkheader(len(meta) + len(d))
+            yield meta
+            yield d
 
         yield changegroup.closechunk()
 
--- a/tests/test-convert-cvs
+++ b/tests/test-convert-cvs
@@ -29,16 +29,30 @@ cd ..
 echo % checkout source directory
 cvs -q checkout src
 
+echo % commit a new revision changing b/c
+cd src
+echo c >> b/c
+cvs -q commit -mci0 . | grep '<--' |\
+    sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
+cd ..
+
 echo % convert fresh repo
 hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
 cat src-hg/a
 cat src-hg/b/c
 
+echo % convert fresh repo with --filemap
+echo include b/c > filemap
+hg convert --filemap filemap src src-filemap | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
+cat src-hg/b/c
+hg -R src-filemap log --template '#rev# #desc# files: #files#\n'
+
 echo % commit new file revisions
 cd src
 echo a >> a
 echo c >> b/c
-cvs -q commit -mci1 . | sed -e 's:.*src/\(.*\),v:src/\1,v:g'
+cvs -q commit -mci1 . | grep '<--' |\
+    sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
 cd ..
 
 echo % convert again
@@ -46,5 +60,8 @@ hg convert src src-hg | sed -e 's/connec
 cat src-hg/a
 cat src-hg/b/c
 
+echo % convert again with --filemap
+hg convert --filemap filemap src src-filemap | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
+cat src-hg/b/c
+hg -R src-filemap log --template '#rev# #desc# files: #files#\n'
 
-
--- a/tests/test-convert-cvs.out
+++ b/tests/test-convert-cvs.out
@@ -9,22 +9,40 @@ No conflicts created by this import
 % checkout source directory
 U src/a
 U src/b/c
+% commit a new revision changing b/c
+checking in src/b/c,v
 % convert fresh repo
 initializing destination src-hg repository
 connecting to cvsrepo
 scanning source...
 sorting...
 converting...
-1 Initial revision
-0 import
+2 Initial revision
+1 import
+0 ci0
 updating tags
 a
 c
+c
+% convert fresh repo with --filemap
+initializing destination src-filemap repository
+connecting to cvsrepo
+scanning source...
+sorting...
+converting...
+2 Initial revision
+1 import
+rolling back last transaction
+0 ci0
+updating tags
+c
+c
+2 update tags files: .hgtags
+1 ci0 files: b/c
+0 Initial revision files: b/c
 % commit new file revisions
-src/a,v  <--  a
-new revision: 1.2; previous revision: 1.1
-src/b/c,v  <--  b/c
-new revision: 1.2; previous revision: 1.1
+checking in src/a,v
+checking in src/b/c,v
 % convert again
 destination src-hg is a Mercurial repository
 connecting to cvsrepo
@@ -32,8 +50,22 @@ scanning source...
 sorting...
 converting...
 0 ci1
-updating tags
 a
 a
 c
 c
+c
+% convert again with --filemap
+destination src-filemap is a Mercurial repository
+connecting to cvsrepo
+scanning source...
+sorting...
+converting...
+0 ci1
+c
+c
+c
+3 ci1 files: b/c
+2 update tags files: .hgtags
+1 ci0 files: b/c
+0 Initial revision files: b/c
new file mode 100755
--- /dev/null
+++ b/tests/test-convert-filemap
@@ -0,0 +1,124 @@
+#!/bin/sh
+
+HGMERGE=true; export HGMERGE
+
+echo '[extensions]' >> $HGRCPATH
+echo 'hgext.graphlog =' >> $HGRCPATH
+echo 'hgext.convert =' >> $HGRCPATH
+
+glog()
+{
+    hg glog --template '#rev# "#desc#" files: #files#\n' "$@"
+}
+
+hg init source
+cd source
+
+echo foo > foo
+echo baz > baz
+mkdir dir
+echo dir/file >> dir/file
+echo dir/file2 >> dir/file2
+hg ci -d '0 0' -qAm '0: add foo baz dir/'
+
+echo bar > bar
+echo quux > quux
+hg copy foo copied
+hg ci -d '1 0' -qAm '1: add bar quux; copy foo to copied'
+
+echo >> foo
+hg ci -d '2 0' -m '2: change foo'
+
+hg up -qC 1
+echo >> bar
+echo >> quux
+hg ci -d '3 0' -m '3: change bar quux'
+
+hg up -qC 2
+hg merge -qr 3
+echo >> bar
+echo >> baz
+hg ci -d '4 0' -m '4: first merge; change bar baz'
+
+echo >> bar
+echo 1 >> baz
+echo >> quux
+hg ci -d '5 0' -m '5: change bar baz quux'
+
+hg up -qC 4
+echo >> foo
+echo 2 >> baz
+hg ci -d '6 0' -m '6: change foo baz'
+
+hg up -qC 5
+hg merge -qr 6
+echo >> bar
+hg ci -d '7 0' -m '7: second merge; change bar'
+
+echo >> foo
+hg ci -m '8: change foo'
+
+glog
+
+echo '% final file versions in this repo:'
+hg manifest --debug
+hg debugrename copied
+echo
+
+cd ..
+
+splitrepo()
+{
+    msg="$1"
+    files="$2"
+    opts=$3
+    echo "% $files: $msg"
+    prefix=`echo "$files" | sed -e 's/ /-/g'`
+    fmap="$prefix.fmap"
+    repo="$prefix.repo"
+    for i in $files; do
+	echo "include $i" >> "$fmap"
+    done
+    hg -q convert $opts --filemap "$fmap" --datesort source "$repo"
+    glog -R "$repo"
+    hg -R "$repo" manifest --debug
+}
+
+splitrepo 'skip unwanted merges; use 1st parent in 1st merge, 2nd in 2nd' foo
+
+splitrepo 'merges are not merges anymore' bar
+
+splitrepo '1st merge is not a merge anymore; 2nd still is' baz
+
+splitrepo 'we add additional merges when they are interesting' 'foo quux'
+
+splitrepo 'partial conversion' 'bar quux' '-r 3'
+splitrepo 'complete the partial conversion' 'bar quux'
+
+rm -r foo.repo
+splitrepo 'partial conversion' 'foo' '-r 3'
+splitrepo 'complete the partial conversion' 'foo'
+
+splitrepo 'copied file; source not included in new repo' copied
+hg --cwd copied.repo debugrename copied
+
+splitrepo 'copied file; source included in new repo' 'foo copied'
+hg --cwd foo-copied.repo debugrename copied
+
+cat > renames.fmap <<EOF
+include dir
+exclude dir/file2
+rename dir dir2
+include foo
+include copied
+rename foo foo2
+rename copied copied2
+EOF
+hg -q convert --filemap renames.fmap --datesort source renames.repo
+glog -R renames.repo
+hg -R renames.repo manifest --debug
+hg --cwd renames.repo debugrename copied2
+echo 'copied:'
+hg --cwd source cat copied
+echo 'copied2:'
+hg --cwd renames.repo cat copied2
new file mode 100644
--- /dev/null
+++ b/tests/test-convert-filemap.out
@@ -0,0 +1,154 @@
+@  8 "8: change foo" files: foo
+|
+o    7 "7: second merge; change bar" files: bar baz
+|\
+| o  6 "6: change foo baz" files: baz foo
+| |
+o |  5 "5: change bar baz quux" files: bar baz quux
+|/
+o    4 "4: first merge; change bar baz" files: bar baz
+|\
+| o  3 "3: change bar quux" files: bar quux
+| |
+o |  2 "2: change foo" files: foo
+|/
+o  1 "1: add bar quux; copy foo to copied" files: bar copied quux
+|
+o  0 "0: add foo baz dir/" files: baz dir/file dir/file2 foo
+
+% final file versions in this repo:
+9463f52fe115e377cf2878d4fc548117211063f2 644 bar
+94c1be4dfde2ee8d78db8bbfcf81210813307c3d 644 baz
+6ca237634e1f6bee1b6db94292fb44f092a25842 644 copied
+3e20847584beff41d7cd16136b7331ab3d754be0 644 dir/file
+75e6d3f8328f5f6ace6bf10b98df793416a09dca 644 dir/file2
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo
+bc3eca3f47023a3e70ca0d8cc95a22a6827db19d 644 quux
+copied renamed from foo:2ed2a3912a0b24502043eae84ee4b279c18b90dd
+
+% foo: skip unwanted merges; use 1st parent in 1st merge, 2nd in 2nd
+o  3 "8: change foo" files: foo
+|
+o  2 "6: change foo baz" files: foo
+|
+o  1 "2: change foo" files: foo
+|
+o  0 "0: add foo baz dir/" files: foo
+
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo
+% bar: merges are not merges anymore
+o  4 "7: second merge; change bar" files: bar
+|
+o  3 "5: change bar baz quux" files: bar
+|
+o  2 "4: first merge; change bar baz" files: bar
+|
+o  1 "3: change bar quux" files: bar
+|
+o  0 "1: add bar quux; copy foo to copied" files: bar
+
+9463f52fe115e377cf2878d4fc548117211063f2 644 bar
+% baz: 1st merge is not a merge anymore; 2nd still is
+o    4 "7: second merge; change bar" files: baz
+|\
+| o  3 "6: change foo baz" files: baz
+| |
+o |  2 "5: change bar baz quux" files: baz
+|/
+o  1 "4: first merge; change bar baz" files: baz
+|
+o  0 "0: add foo baz dir/" files: baz
+
+94c1be4dfde2ee8d78db8bbfcf81210813307c3d 644 baz
+% foo quux: we add additional merges when they are interesting
+o  8 "8: change foo" files: foo
+|
+o    7 "7: second merge; change bar" files:
+|\
+| o  6 "6: change foo baz" files: foo
+| |
+o |  5 "5: change bar baz quux" files: quux
+|/
+o    4 "4: first merge; change bar baz" files:
+|\
+| o  3 "3: change bar quux" files: quux
+| |
+o |  2 "2: change foo" files: foo
+|/
+o  1 "1: add bar quux; copy foo to copied" files: quux
+|
+o  0 "0: add foo baz dir/" files: foo
+
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo
+bc3eca3f47023a3e70ca0d8cc95a22a6827db19d 644 quux
+% bar quux: partial conversion
+o  1 "3: change bar quux" files: bar quux
+|
+o  0 "1: add bar quux; copy foo to copied" files: bar quux
+
+b79105bedc55102f394e90a789c9c380117c1b4a 644 bar
+db0421cc6b685a458c8d86c7d5c004f94429ea23 644 quux
+% bar quux: complete the partial conversion
+o  4 "7: second merge; change bar" files: bar
+|
+o  3 "5: change bar baz quux" files: bar quux
+|
+o  2 "4: first merge; change bar baz" files: bar
+|
+o  1 "3: change bar quux" files: bar quux
+|
+o  0 "1: add bar quux; copy foo to copied" files: bar quux
+
+9463f52fe115e377cf2878d4fc548117211063f2 644 bar
+bc3eca3f47023a3e70ca0d8cc95a22a6827db19d 644 quux
+% foo: partial conversion
+o  0 "0: add foo baz dir/" files: foo
+
+2ed2a3912a0b24502043eae84ee4b279c18b90dd 644 foo
+% foo: complete the partial conversion
+o  3 "8: change foo" files: foo
+|
+o  2 "6: change foo baz" files: foo
+|
+o  1 "2: change foo" files: foo
+|
+o  0 "0: add foo baz dir/" files: foo
+
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo
+% copied: copied file; source not included in new repo
+o  0 "1: add bar quux; copy foo to copied" files: copied
+
+2ed2a3912a0b24502043eae84ee4b279c18b90dd 644 copied
+copied not renamed
+% foo copied: copied file; source included in new repo
+o  4 "8: change foo" files: foo
+|
+o  3 "6: change foo baz" files: foo
+|
+o  2 "2: change foo" files: foo
+|
+o  1 "1: add bar quux; copy foo to copied" files: copied
+|
+o  0 "0: add foo baz dir/" files: foo
+
+6ca237634e1f6bee1b6db94292fb44f092a25842 644 copied
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo
+copied renamed from foo:2ed2a3912a0b24502043eae84ee4b279c18b90dd
+o  4 "8: change foo" files: foo2
+|
+o  3 "6: change foo baz" files: foo2
+|
+o  2 "2: change foo" files: foo2
+|
+o  1 "1: add bar quux; copy foo to copied" files: copied2
+|
+o  0 "0: add foo baz dir/" files: dir2/file foo2
+
+e5e3d520be9be45937d0b06b004fadcd6c221fa2 644 copied2
+3e20847584beff41d7cd16136b7331ab3d754be0 644 dir2/file
+9a7b52012991e4873687192c3e17e61ba3e837a3 644 foo2
+copied2 renamed from foo2:2ed2a3912a0b24502043eae84ee4b279c18b90dd
+copied:
+foo
+copied2:
+foo
--- a/tests/test-convert-git
+++ b/tests/test-convert-git
@@ -4,6 +4,7 @@
 
 echo "[extensions]" >> $HGRCPATH
 echo "convert=" >> $HGRCPATH
+echo 'hgext.graphlog =' >> $HGRCPATH
 
 GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
 GIT_AUTHOR_EMAIL='test@example.org'; export GIT_AUTHOR_EMAIL
@@ -32,7 +33,7 @@ commit -a -m t1
 
 # Remove the directory, then try to replace it with a file
 # (issue 754)
-git rm -r d
+git rm -f d/b
 commit -m t2
 echo d > d
 git add d
@@ -54,3 +55,78 @@ cd ..
 hg convert --datesort git-repo
 
 hg -R git-repo-hg tip -v
+
+count=10
+mkdir git-repo2
+cd git-repo2
+git init-db >/dev/null 2>/dev/null
+
+echo foo > foo
+git add foo
+commit -a -m 'add foo'
+
+echo >> foo
+commit -a -m 'change foo'
+
+git checkout -b Bar HEAD^ >/dev/null 2>/dev/null
+echo quux >> quux
+git add quux
+commit -a -m 'add quux'
+
+echo bar > bar
+git add bar
+commit -a -m 'add bar'
+
+git checkout -b Baz HEAD^ >/dev/null 2>/dev/null
+echo baz > baz
+git add baz
+commit -a -m 'add baz'
+
+git checkout master >/dev/null 2>/dev/null
+git pull --no-commit . Bar Baz > /dev/null 2>/dev/null
+commit -m 'Octopus merge'
+
+echo bar >> bar
+commit -a -m 'change bar'
+
+git checkout -b Foo HEAD^ >/dev/null 2>/dev/null
+echo >> foo
+commit -a -m 'change foo'
+
+git checkout master >/dev/null 2>/dev/null
+git pull --no-commit -s ours . Foo > /dev/null 2>/dev/null
+commit -m 'Discard change to foo'
+
+cd ..
+
+glog()
+{
+    hg glog --template '#rev# "#desc|firstline#" files: #files#\n' "$@"
+}
+
+splitrepo()
+{
+    msg="$1"
+    files="$2"
+    opts=$3
+    echo "% $files: $msg"
+    prefix=`echo "$files" | sed -e 's/ /-/g'`
+    fmap="$prefix.fmap"
+    repo="$prefix.repo"
+    for i in $files; do
+	echo "include $i" >> "$fmap"
+    done
+    hg -q convert $opts --filemap "$fmap" --datesort git-repo2 "$repo"
+    glog -R "$repo"
+    hg -R "$repo" manifest --debug
+}
+
+echo '% full conversion'
+hg -q convert --datesort git-repo2 fullrepo
+glog -R fullrepo
+hg -R fullrepo manifest --debug
+
+splitrepo 'octopus merge' 'foo bar baz'
+
+splitrepo 'only some parents of an octopus merge; "discard" a head' 'foo baz quux'
+
--- a/tests/test-convert-git.out
+++ b/tests/test-convert-git.out
@@ -23,3 +23,68 @@ Merge branch other
 committer: test <test@example.org>
 
 
+% full conversion
+o    9 "Discard change to foo" files: foo
+|\
+| o  8 "change foo" files: foo
+| |
+o |  7 "change bar" files: bar
+|/
+o    6 "(octopus merge fixup)" files:
+|\
+| o    5 "Octopus merge" files: baz
+| |\
+o | |  4 "add baz" files: baz
+| | |
++---o  3 "add bar" files: bar
+| |
+o |  2 "add quux" files: quux
+| |
+| o  1 "change foo" files: foo
+|/
+o  0 "add foo" files: foo
+
+245a3b8bc653999c2b22cdabd517ccb47aecafdf 644 bar
+354ae8da6e890359ef49ade27b68bbc361f3ca88 644 baz
+9277c9cc8dd4576fc01a17939b4351e5ada93466 644 foo
+88dfeab657e8cf2cef3dec67b914f49791ae76b1 644 quux
+% foo bar baz: octopus merge
+o    8 "Discard change to foo" files: foo
+|\
+| o  7 "change foo" files: foo
+| |
+o |  6 "change bar" files: bar
+|/
+o    5 "(octopus merge fixup)" files:
+|\
+| o    4 "Octopus merge" files: baz
+| |\
+o | |  3 "add baz" files: baz
+| | |
++---o  2 "add bar" files: bar
+| |
+| o  1 "change foo" files: foo
+|/
+o  0 "add foo" files: foo
+
+245a3b8bc653999c2b22cdabd517ccb47aecafdf 644 bar
+354ae8da6e890359ef49ade27b68bbc361f3ca88 644 baz
+9277c9cc8dd4576fc01a17939b4351e5ada93466 644 foo
+% foo baz quux: only some parents of an octopus merge; "discard" a head
+o  6 "Discard change to foo" files: foo
+|
+o  5 "change foo" files: foo
+|
+o    4 "Octopus merge" files:
+|\
+| o  3 "add baz" files: baz
+| |
+| o  2 "add quux" files: quux
+| |
+o |  1 "change foo" files: foo
+|/
+o  0 "add foo" files: foo
+
+354ae8da6e890359ef49ade27b68bbc361f3ca88 644 baz
+9277c9cc8dd4576fc01a17939b4351e5ada93466 644 foo
+88dfeab657e8cf2cef3dec67b914f49791ae76b1 644 quux
--- a/tests/test-convert-svn
+++ b/tests/test-convert-svn
@@ -51,3 +51,10 @@ cd ..
 echo % test incremental conversion
 hg convert $svnurl
 
+echo % test filemap
+echo 'include b' > filemap
+hg convert --filemap filemap $svnurl fmap
+echo '[extensions]' >> $HGRCPATH
+echo 'hgext.graphlog =' >> $HGRCPATH
+hg glog -R fmap --template '#rev# #desc|firstline# files: #files#\n'
+
--- a/tests/test-convert-svn.out
+++ b/tests/test-convert-svn.out
@@ -30,3 +30,15 @@ scanning source...
 sorting...
 converting...
 0 changeb
+% test filemap
+initializing destination fmap repository
+scanning source...
+sorting...
+converting...
+2 init
+1 changea
+0 changeb
+o  1 changeb files: b
+|
+o  0 changea files: b
+
--- a/tests/test-copy-move-merge.out
+++ b/tests/test-copy-move-merge.out
@@ -2,6 +2,14 @@ 1 files updated, 0 files merged, 2 files
 resolving manifests
  overwrite None partial False
  ancestor 583c7b748052 local fb3948d97f07+ remote 40da226db0f0
+  searching for copies back to rev 1
+  unmatched files in other:
+   b
+   c
+  all copies found (* = to merge, ! = divergent):
+   c -> a *
+   b -> a *
+  checking for directory renames
  a: remote moved to c -> m
  a: remote moved to b -> m
 copying a to b
--- a/tests/test-double-merge.out
+++ b/tests/test-double-merge.out
@@ -1,6 +1,12 @@
 resolving manifests
  overwrite None partial False
  ancestor 310fd17130da local 2092631ce82b+ remote 7731dad1c2b9
+  searching for copies back to rev 1
+  unmatched files in other:
+   bar
+  all copies found (* = to merge, ! = divergent):
+   bar -> foo *
+  checking for directory renames
  foo: versions differ -> m
  foo: remote copied to bar -> m
 copying foo to bar
--- a/tests/test-issue522.out
+++ b/tests/test-issue522.out
@@ -4,6 +4,9 @@ 1 files updated, 0 files merged, 0 files
 resolving manifests
  overwrite None partial False
  ancestor bbd179dfa0a7 local 71766447bdbb+ remote 4d9e78aaceee
+  searching for copies back to rev 1
+  unmatched files in local:
+   bar
  foo: remote is newer -> g
 getting foo
 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
--- a/tests/test-issue672.out
+++ b/tests/test-issue672.out
@@ -4,6 +4,12 @@ 1 files updated, 0 files merged, 1 files
 resolving manifests
  overwrite None partial False
  ancestor 81f4b099af3d local c64f439569a9+ remote 2f8037f47a5c
+  searching for copies back to rev 1
+  unmatched files in other:
+   1a
+  all copies found (* = to merge, ! = divergent):
+   1a -> 1 
+  checking for directory renames
  1: other deleted -> r
  1a: remote created -> g
 removing 1
@@ -15,6 +21,12 @@ 1 files updated, 0 files merged, 1 files
 resolving manifests
  overwrite None partial False
  ancestor c64f439569a9 local ac7575e3c052+ remote 746e9549ea96
+  searching for copies back to rev 1
+  unmatched files in local:
+   1a
+  all copies found (* = to merge, ! = divergent):
+   1a -> 1 *
+  checking for directory renames
  1a: local moved to 1 -> m
 merging 1a and 1
 my 1a@ac7575e3c052+ other 1@746e9549ea96 ancestor 1@81f4b099af3d
@@ -24,6 +36,12 @@ 1 files updated, 0 files merged, 1 files
 resolving manifests
  overwrite None partial False
  ancestor c64f439569a9 local 746e9549ea96+ remote ac7575e3c052
+  searching for copies back to rev 1
+  unmatched files in other:
+   1a
+  all copies found (* = to merge, ! = divergent):
+   1a -> 1 *
+  checking for directory renames
  1: remote moved to 1a -> m
 copying 1 to 1a
 merging 1 and 1a
--- a/tests/test-merge-commit.out
+++ b/tests/test-merge-commit.out
@@ -23,6 +23,7 @@ 0:2665aaee66e9
 resolving manifests
  overwrite None partial False
  ancestor 0a3ab4856510 local 2d2f9a22c82b+ remote 7d3b554bfdf1
+  searching for copies back to rev 1
  bar: versions differ -> m
 merging bar
 my bar@2d2f9a22c82b+ other bar@7d3b554bfdf1 ancestor bar@0a3ab4856510
@@ -68,6 +69,7 @@ 0:2665aaee66e9
 resolving manifests
  overwrite None partial False
  ancestor 0a3ab4856510 local 2d2f9a22c82b+ remote 96ab80c60897
+  searching for copies back to rev 1
  bar: versions differ -> m
 merging bar
 my bar@2d2f9a22c82b+ other bar@96ab80c60897 ancestor bar@0a3ab4856510
--- a/tests/test-merge7.out
+++ b/tests/test-merge7.out
@@ -24,6 +24,7 @@ warning: conflicts during merge.
 resolving manifests
  overwrite None partial False
  ancestor faaea63e63a9 local 451c744aabcc+ remote a070d41e8360
+  searching for copies back to rev 1
  test.txt: versions differ -> m
 merging test.txt
 my test.txt@451c744aabcc+ other test.txt@a070d41e8360 ancestor test.txt@faaea63e63a9
--- a/tests/test-rename-dir-merge.out
+++ b/tests/test-rename-dir-merge.out
@@ -9,6 +9,18 @@ 2 files updated, 0 files merged, 2 files
 resolving manifests
  overwrite None partial False
  ancestor f9b20c0d4c51 local ce36d17b18fb+ remote 55119e611c80
+  searching for copies back to rev 1
+  unmatched files in local:
+   a/c
+  unmatched files in other:
+   b/a
+   b/b
+  all copies found (* = to merge, ! = divergent):
+   b/a -> a/a 
+   b/b -> a/b 
+  checking for directory renames
+  dir a/ -> b/
+  file a/c -> b/c
  a/c: remote renamed directory to b/c -> d
  a/b: other deleted -> r
  a/a: other deleted -> r
@@ -34,6 +46,18 @@ 0 files updated, 0 files merged, 1 files
 resolving manifests
  overwrite None partial False
  ancestor f9b20c0d4c51 local 55119e611c80+ remote ce36d17b18fb
+  searching for copies back to rev 1
+  unmatched files in local:
+   b/a
+   b/b
+  unmatched files in other:
+   a/c
+  all copies found (* = to merge, ! = divergent):
+   b/a -> a/a 
+   b/b -> a/b 
+  checking for directory renames
+  dir a/ -> b/
+  file a/c -> b/c
  None: local renamed directory to b/c -> d
 getting a/c to b/c
 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
--- a/tests/test-rename-merge1.out
+++ b/tests/test-rename-merge1.out
@@ -4,6 +4,17 @@ merge
 resolving manifests
  overwrite None partial False
  ancestor af1939970a1c local f26ec4fc3fa3+ remote 8e765a822af2
+  searching for copies back to rev 1
+  unmatched files in local:
+   c2
+  unmatched files in other:
+   b
+   b2
+  all copies found (* = to merge, ! = divergent):
+   c2 -> a2 
+   b -> a *
+   b2 -> a2 
+  checking for directory renames
  a2: divergent renames -> dr
  a: remote moved to b -> m
  b2: remote created -> g
--- a/tests/test-up-local-change.out
+++ b/tests/test-up-local-change.out
@@ -17,6 +17,9 @@ summary:     1
 resolving manifests
  overwrite False partial False
  ancestor 33aaa84a386b local 33aaa84a386b+ remote 802f095af299
+  searching for copies back to rev 1
+  unmatched files in other:
+   b
  a: versions differ -> m
  b: remote created -> g
 merging a
@@ -50,6 +53,9 @@ summary:     1
 resolving manifests
  overwrite False partial False
  ancestor 33aaa84a386b local 33aaa84a386b+ remote 802f095af299
+  searching for copies back to rev 1
+  unmatched files in other:
+   b
  a: versions differ -> m
  b: remote created -> g
 merging a
@@ -100,6 +106,7 @@ failed
 resolving manifests
  overwrite False partial False
  ancestor 33aaa84a386b local 802f095af299+ remote 030602aee63d
+  searching for copies back to rev 1
  a: versions differ -> m
  b: versions differ -> m
 merging a