diff mercurial/mdiff.py @ 71:47c9a869adee

Add mdiff.patches to speed up applying thousands of patches to the manifest
author mpm@selenic.com
date Sat, 14 May 2005 10:27:14 -0800
parents b3e2ddff0159
children 4a6ab4d80dc4
line wrap: on
line diff
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python
-import difflib, struct
-from cStringIO import StringIO
+import difflib, struct, mmap
+
+devzero = file("/dev/zero")
 
 def unidiff(a, ad, b, bd, fn):
     if not a and not b: return ""
@@ -52,6 +53,74 @@ def diff(a, b, sorted=0):
 
     return "".join(bin)
 
+# This attempts to apply a series of patches in time proportional to
+# the total size of the patches, rather than patches * len(text). This
+# means rather than shuffling strings around, we shuffle around
+# pointers to fragments with fragment lists.
+#
+# When the fragment lists get too long, we collapse them. To do this
+# efficiently, we do all our operations inside a buffer created by
+# mmap and simply use memmove. This avoids creating a bunch of large
+# temporary string buffers.
+
+def patches(a, bins):
+    if not bins: return a
+
+    plens = [len(x) for x in bins]
+    pl = sum(plens)
+    bl = len(a) + pl
+    tl = bl + bl + pl # enough for the patches and two working texts
+    b1, b2 = 0, bl
+    m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
+
+    # load our original text
+    m.write(a)
+    frags = [(len(a), b1)]
+
+    # copy all the patches into our segment so we can memmove from them
+    pos = b2 + bl
+    m.seek(pos)
+    for p in bins: m.write(p)
+
+    def pull(dst, src, l): # pull l bytes from src
+        while l:
+            f = src.pop(0)
+            if f[0] > l: # do we need to split?
+                src.insert(0, (f[0] - l, f[1] + l))
+                dst.append((l, f[1]))
+                return
+            dst.append(f)
+            l -= f[0]
+
+    def collect(buf, list):
+        start = buf
+        for l, p in list:
+            m.move(buf, p, l)
+            buf += l
+        return (buf - start, start)
+
+    for plen in plens:
+        # if our list gets too long, execute it
+        if len(frags) > 128:
+            b2, b1 = b1, b2
+            frags = [collect(b1, frags)]
+
+        new = []
+        end = pos + plen
+        last = 0
+        while pos < end:
+            p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
+            pull(new, frags, p1 - last) # what didn't change
+            pull([], frags, p2 - p1)    # what got deleted
+            new.append((l, pos + 12))        # what got added
+            pos += l + 12
+            last = p2
+        frags = new + frags                    # what was left at the end
+
+    t = collect(b2, frags)
+
+    return m[t[1]:t[1] + t[0]]
+
 def patch(a, bin):
     last = pos = 0
     r = []