changeset 5076:ef338e34a906

convert: look up copies in getchanges instead of getcommit svn: defer path expansion until getchanges to reduce latency, as well as memory usage when converting incrementally.
author Brendan Cully <brendan@kublai.com>
date Sun, 05 Aug 2007 12:03:27 -0700
parents 514c06098e9c
children 487659a90497
files hgext/convert/__init__.py hgext/convert/common.py hgext/convert/cvs.py hgext/convert/git.py hgext/convert/hg.py hgext/convert/subversion.py
diffstat 6 files changed, 30 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/__init__.py
+++ b/hgext/convert/__init__.py
@@ -193,7 +193,8 @@ class convert(object):
         do_copies = hasattr(self.dest, 'copyfile')
         filenames = []
 
-        for f, v in self.source.getchanges(rev):
+        files, copies = self.source.getchanges(rev)
+        for f, v in files:
             newf = self.mapfile(f)
             if not newf:
                 continue
@@ -206,8 +207,8 @@ class convert(object):
                 e = self.source.getmode(f, v)
                 self.dest.putfile(newf, e, data)
                 if do_copies:
-                    if f in commit.copies:
-                        copyf = self.mapfile(commit.copies[f])
+                    if f in copies:
+                        copyf = self.mapfile(copies[f])
                         if copyf:
                             # Merely marks that a copy happened.
                             self.dest.copyfile(copyf, newf)
--- a/hgext/convert/common.py
+++ b/hgext/convert/common.py
@@ -3,15 +3,13 @@
 class NoRepo(Exception): pass
 
 class commit(object):
-    def __init__(self, author, date, desc, parents, branch=None, rev=None,
-                 copies={}):
+    def __init__(self, author, date, desc, parents, branch=None, rev=None):
         self.author = author
         self.date = date
         self.desc = desc
         self.parents = parents
         self.branch = branch
         self.rev = rev
-        self.copies = copies
 
 class converter_source(object):
     """Conversion source interface"""
@@ -42,10 +40,12 @@ class converter_source(object):
         raise NotImplementedError()
 
     def getchanges(self, version):
-        """Return sorted list of (filename, id) tuples for all files changed in rev.
+        """Returns a tuple of (files, copies)
+        Files is a sorted list of (filename, id) tuples for all files changed
+        in version, where id is the source revision id of the file.
 
-        id just tells us which revision to return in getfile(), e.g. in
-        git it's an object hash."""
+        copies is a dictionary of dest: source
+        """
         raise NotImplementedError()
 
     def getcommit(self, version):
--- a/hgext/convert/cvs.py
+++ b/hgext/convert/cvs.py
@@ -250,7 +250,7 @@ class convert_cvs(converter_source):
         files = self.files[rev]
         cl = files.items()
         cl.sort()
-        return cl
+        return (cl, {})
 
     def getcommit(self, rev):
         return self.changeset[rev]
--- a/hgext/convert/git.py
+++ b/hgext/convert/git.py
@@ -48,7 +48,7 @@ class convert_git(converter_source):
             s = (m[1] == "120000")
             self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
             changes.append((f, h))
-        return changes
+        return (changes, {})
 
     def getcommit(self, version):
         c = self.catfile(version, "commit") # read the commit hash
--- a/hgext/convert/hg.py
+++ b/hgext/convert/hg.py
@@ -151,7 +151,7 @@ class mercurial_source(converter_source)
         m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3]
         changes = [(name, rev) for name in m + a + r]
         changes.sort()
-        return changes
+        return (changes, self.getcopies(ctx))
 
     def getcopies(self, ctx):
         added = self.repo.status(ctx.parents()[0].node(), ctx.node())[1]
@@ -168,7 +168,7 @@ class mercurial_source(converter_source)
         parents = [hex(p.node()) for p in ctx.parents() if p.node() != nullid]
         return commit(author=ctx.user(), date=util.datestr(ctx.date()),
                       desc=ctx.description(), parents=parents,
-                      branch=ctx.branch(), copies=self.getcopies(ctx))
+                      branch=ctx.branch())
 
     def gettags(self):
         tags = [t for t in self.repo.tagslist() if t[0] != 'tip']
--- a/hgext/convert/subversion.py
+++ b/hgext/convert/subversion.py
@@ -98,7 +98,7 @@ class convert_svn(converter_source):
             self.module = self.url[len(self.base):]
             self.modulemap = {} # revision, module
             self.commits = {}
-            self.files = {}
+            self.paths = {}
             self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
         except SubversionException, e:
             raise NoRepo("couldn't open SVN repo %s" % self.url)
@@ -173,12 +173,14 @@ class convert_svn(converter_source):
 
     def getchanges(self, rev):
         self.modecache = {}
-        files = self.files[rev]
-        cl = files
-        cl.sort()
+        (paths, parents) = self.paths[rev]
+        files, copies = self.expandpaths(rev, paths, parents)
+        files.sort()
+        files = zip(files, [rev] * len(files))
+
         # caller caches the result, so free it here to release memory
-        del self.files[rev]
-        return cl
+        del self.paths[rev]
+        return (files, copies)
 
     def getcommit(self, rev):
         if rev not in self.commits:
@@ -350,8 +352,14 @@ class convert_svn(converter_source):
         copies = {}
         revnum = self.revnum(rev)
 
+        if revnum in self.modulemap:
+            new_module = self.modulemap[revnum]
+            if new_module != self.module:
+                self.module = new_module
+                self.reparent(self.module)
+
         for path, ent in paths:
-            # self.ui.write("path %s\n" % path)
+            self.ui.write("path %s\n" % path)
             entrypath = get_entry_from_path(path, module=self.module)
             entry = entrypath.decode(self.encoding)
 
@@ -554,12 +562,7 @@ class convert_svn(converter_source):
                     continue
                 paths.append((path, ent))
 
-            entries, copies = self.expandpaths(rev, paths, parents)
-            # a list of (filename, id) where id lets us retrieve the file.
-            # eg in git, id is the object hash. for svn it'll be the
-            self.files[rev] = zip(entries, [rev] * len(entries))
-            if not entries:
-                return
+            self.paths[rev] = (paths, parents)
 
             # Example SVN datetime. Includes microseconds.
             # ISO-8601 conformant
@@ -579,7 +582,6 @@ class convert_svn(converter_source):
                           date=util.datestr(date),
                           desc=log,
                           parents=parents,
-                          copies=copies,
                           branch=branch,
                           rev=rev.encode('utf-8'))