1 #!/usr/bin/python |
1 #!/usr/bin/python |
2 import difflib, struct |
2 import difflib, struct, mmap |
3 from cStringIO import StringIO |
3 |
|
4 devzero = file("/dev/zero") |
4 |
5 |
5 def unidiff(a, ad, b, bd, fn): |
6 def unidiff(a, ad, b, bd, fn): |
6 if not a and not b: return "" |
7 if not a and not b: return "" |
7 a = a.splitlines(1) |
8 a = a.splitlines(1) |
8 b = b.splitlines(1) |
9 b = b.splitlines(1) |
50 s = "".join(b[s:t]) |
51 s = "".join(b[s:t]) |
51 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) |
52 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) |
52 |
53 |
53 return "".join(bin) |
54 return "".join(bin) |
54 |
55 |
|
56 # This attempts to apply a series of patches in time proportional to |
|
57 # the total size of the patches, rather than patches * len(text). This |
|
58 # means rather than shuffling strings around, we shuffle around |
|
59 # pointers to fragments with fragment lists. |
|
60 # |
|
61 # When the fragment lists get too long, we collapse them. To do this |
|
62 # efficiently, we do all our operations inside a buffer created by |
|
63 # mmap and simply use memmove. This avoids creating a bunch of large |
|
64 # temporary string buffers. |
|
65 |
|
66 def patches(a, bins): |
|
67 if not bins: return a |
|
68 |
|
69 plens = [len(x) for x in bins] |
|
70 pl = sum(plens) |
|
71 bl = len(a) + pl |
|
72 tl = bl + bl + pl # enough for the patches and two working texts |
|
73 b1, b2 = 0, bl |
|
74 m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) |
|
75 |
|
76 # load our original text |
|
77 m.write(a) |
|
78 frags = [(len(a), b1)] |
|
79 |
|
80 # copy all the patches into our segment so we can memmove from them |
|
81 pos = b2 + bl |
|
82 m.seek(pos) |
|
83 for p in bins: m.write(p) |
|
84 |
|
85 def pull(dst, src, l): # pull l bytes from src |
|
86 while l: |
|
87 f = src.pop(0) |
|
88 if f[0] > l: # do we need to split? |
|
89 src.insert(0, (f[0] - l, f[1] + l)) |
|
90 dst.append((l, f[1])) |
|
91 return |
|
92 dst.append(f) |
|
93 l -= f[0] |
|
94 |
|
95 def collect(buf, list): |
|
96 start = buf |
|
97 for l, p in list: |
|
98 m.move(buf, p, l) |
|
99 buf += l |
|
100 return (buf - start, start) |
|
101 |
|
102 for plen in plens: |
|
103 # if our list gets too long, execute it |
|
104 if len(frags) > 128: |
|
105 b2, b1 = b1, b2 |
|
106 frags = [collect(b1, frags)] |
|
107 |
|
108 new = [] |
|
109 end = pos + plen |
|
110 last = 0 |
|
111 while pos < end: |
|
112 p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) |
|
113 pull(new, frags, p1 - last) # what didn't change |
|
114 pull([], frags, p2 - p1) # what got deleted |
|
115 new.append((l, pos + 12)) # what got added |
|
116 pos += l + 12 |
|
117 last = p2 |
|
118 frags = new + frags # what was left at the end |
|
119 |
|
120 t = collect(b2, frags) |
|
121 |
|
122 return m[t[1]:t[1] + t[0]] |
|
123 |
55 def patch(a, bin): |
124 def patch(a, bin): |
56 last = pos = 0 |
125 last = pos = 0 |
57 r = [] |
126 r = [] |
58 |
127 |
59 c = 0 |
128 c = 0 |