changeset 635:85e2209d401c

Protocol switch from using generators to stream-like objects. This allows the the pull side to precisely control how much data is read so that another encapsulation layer is not needed. An http client gets a response with a finite size. Because ssh clients need to keep the stream open, we must not read more data than is sent in a response. But due to the streaming nature of the changegroup scheme, only the piece that's parsing the data knows how far it's allowed to read. This means the generator scheme isn't fine-grained enough. Instead we need file-like objects with a read(x) method. This switches everything for push/pull over to using file-like objects rather than generators.
author Matt Mackall <mpm@selenic.com>
date Wed, 06 Jul 2005 22:20:12 -0800
parents da5378d39269
children ac0ec421e3a5
files mercurial/commands.py mercurial/hg.py mercurial/hgweb.py
diffstat 3 files changed, 74 insertions(+), 72 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -867,18 +867,21 @@ def serve(ui, repo, **opts):
     """export the repository via HTTP"""
 
     if opts["stdio"]:
+        fin, fout = sys.stdin, sys.stdout
+        sys.stdout = sys.stderr
+
         def getarg():
-            argline = sys.stdin.readline()[:-1]
+            argline = fin.readline()[:-1]
             arg, l = argline.split()
-            val = sys.stdin.read(int(l))
+            val = fin.read(int(l))
             return arg, val
         def respond(v):
-            sys.stdout.write("%d\n" % len(v))
-            sys.stdout.write(v)
-            sys.stdout.flush()
+            fout.write("%d\n" % len(v))
+            fout.write(v)
+            fout.flush()
 
         while 1:
-            cmd = sys.stdin.readline()[:-1]
+            cmd = fin.readline()[:-1]
             if cmd == '':
                 return
             if cmd == "heads":
@@ -903,24 +906,13 @@ def serve(ui, repo, **opts):
                 arg, roots = getarg()
                 nodes = map(hg.bin, roots.split(" "))
 
-                b = []
-                t = 0
-                for chunk in repo.changegroup(nodes):
-                    t += len(chunk)
-                    b.append(chunk)
-                    if t > 4096:
-                        sys.stdout.write(struct.pack(">l", t))
-                        for c in b:
-                            sys.stdout.write(c)
-                        t = 0
-                        b = []
+                cg = repo.changegroup(nodes)
+                while 1:
+                    d = cg.read(4096)
+                    if not d: break
+                    fout.write(d)
 
-                sys.stdout.write(struct.pack(">l", t))
-                for c in b:
-                    sys.stdout.write(c)
-
-                sys.stdout.write(struct.pack(">l", -1))
-                sys.stdout.flush()
+                out.flush()
 
     def openlog(opt, default):
         if opts[opt] and opts[opt] != '-': return open(opts[opt], 'w')
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -1025,35 +1025,6 @@ class localrepository:
         return remote.addchangegroup(cg)
 
     def changegroup(self, basenodes):
-        nodes = self.newer(basenodes)
-
-        # construct the link map
-        linkmap = {}
-        for n in nodes:
-            linkmap[self.changelog.rev(n)] = n
-
-        # construct a list of all changed files
-        changed = {}
-        for n in nodes:
-            c = self.changelog.read(n)
-            for f in c[3]:
-                changed[f] = 1
-        changed = changed.keys()
-        changed.sort()
-
-        # the changegroup is changesets + manifests + all file revs
-        revs = [ self.changelog.rev(n) for n in nodes ]
-
-        for y in self.changelog.group(linkmap): yield y
-        for y in self.manifest.group(linkmap): yield y
-        for f in changed:
-            yield struct.pack(">l", len(f) + 4) + f
-            g = self.file(f).group(linkmap)
-            for y in g:
-                yield y
-
-    def addchangegroup(self, generator):
-
         class genread:
             def __init__(self, generator):
                 self.g = generator
@@ -1067,6 +1038,40 @@ class localrepository:
                 d, self.buf = self.buf[:l], self.buf[l:]
                 return d
 
+        def gengroup():
+            nodes = self.newer(basenodes)
+
+            # construct the link map
+            linkmap = {}
+            for n in nodes:
+                linkmap[self.changelog.rev(n)] = n
+
+            # construct a list of all changed files
+            changed = {}
+            for n in nodes:
+                c = self.changelog.read(n)
+                for f in c[3]:
+                    changed[f] = 1
+            changed = changed.keys()
+            changed.sort()
+
+            # the changegroup is changesets + manifests + all file revs
+            revs = [ self.changelog.rev(n) for n in nodes ]
+
+            for y in self.changelog.group(linkmap): yield y
+            for y in self.manifest.group(linkmap): yield y
+            for f in changed:
+                yield struct.pack(">l", len(f) + 4) + f
+                g = self.file(f).group(linkmap)
+                for y in g:
+                    yield y
+
+            yield struct.pack(">l", 0)
+
+        return genread(gengroup())
+
+    def addchangegroup(self, source):
+
         def getchunk():
             d = source.read(4)
             if not d: return ""
@@ -1087,10 +1092,9 @@ class localrepository:
         def revmap(x):
             return self.changelog.rev(x)
 
-        if not generator: return
+        if not source: return
         changesets = files = revisions = 0
 
-        source = genread(generator)
         tr = self.transaction()
 
         # pull off the changeset group
@@ -1592,17 +1596,27 @@ class httprepository:
 
     def changegroup(self, nodes):
         n = " ".join(map(hex, nodes))
-        zd = zlib.decompressobj()
         f = self.do_cmd("changegroup", roots=n)
         bytes = 0
-        while 1:
-            d = f.read(4096)
-            bytes += len(d)
-            if not d:
-                yield zd.flush()
-                break
-            yield zd.decompress(d)
-        self.ui.note("%d bytes of data transfered\n" % bytes)
+
+        class zread:
+            def __init__(self, f):
+                self.zd = zlib.decompressobj()
+                self.f = f
+                self.buf = ""
+            def read(self, l):
+                while l > len(self.buf):
+                    r = f.read(4096)
+                    if r:
+                        self.buf += self.zd.decompress(r)
+                    else:
+                        self.buf += self.zd.flush()
+                        break
+                d, self.buf = self.buf[:l], self.buf[l:]
+                return d
+
+        return zread(f)
+
 
 class sshrepository:
     def __init__(self, ui, path):
@@ -1680,14 +1694,7 @@ class sshrepository:
     def changegroup(self, nodes):
         n = " ".join(map(hex, nodes))
         f = self.do_cmd("changegroup", roots=n)
-        bytes = 0
-        while 1:
-            l = struct.unpack(">l", f.read(4))[0]
-            if l == -1: break
-            d = f.read(l)
-            bytes += len(d)
-            yield d
-        self.ui.note("%d bytes of data transfered\n" % bytes)
+        return self.pipei
 
 def repository(ui, path=None, create=0):
     if path:
--- a/mercurial/hgweb.py
+++ b/mercurial/hgweb.py
@@ -687,7 +687,10 @@ class hgweb:
                 nodes = map(bin, args['roots'][0].split(" "))
 
             z = zlib.compressobj()
-            for chunk in self.repo.changegroup(nodes):
+            f = self.repo.changegroup(nodes)
+            while 1:
+                chunk = f.read(4096)
+                if not chunk: break
                 sys.stdout.write(z.compress(chunk))
 
             sys.stdout.write(z.flush())