comparison mercurial/revlog.py @ 5451:0a43875677b1

revlog: break up compression of large deltas Python's zlib apparently makes an internal copy of strings passed to compress(). To avoid this, compress strings 1M at a time, then join them at the end if the result would be smaller than the original. For initial commits of large but compressible files, this cuts peak memory usage nearly in half.
author Matt Mackall <mpm@selenic.com>
date Thu, 11 Oct 2007 00:46:54 -0500
parents c728424d44c6
children 9d77f2b47eb7
comparison
equal deleted inserted replaced
5450:c728424d44c6 5451:0a43875677b1
59 59
60 def compress(text): 60 def compress(text):
61 """ generate a possibly-compressed representation of text """ 61 """ generate a possibly-compressed representation of text """
62 if not text: 62 if not text:
63 return ("", text) 63 return ("", text)
64 if len(text) < 44: 64 l = len(text)
65 if l < 44:
65 if text[0] == '\0': 66 if text[0] == '\0':
66 return ("", text) 67 return ("", text)
67 return ('u', text) 68 return ('u', text)
68 bin = _compress(text) 69 elif l > 1000000:
69 if len(bin) > len(text): 70 # zlib makes an internal copy, thus doubling memory usage for
71 # large files, so lets do this in pieces
72 z = zlib.compressobj()
73 p = []
74 pos = 0
75 while pos < l:
76 pos2 = pos + 2**20
77 p.append(z.compress(text[pos:pos2]))
78 pos = pos2
79 p.append(z.flush())
80 if sum(map(len, p)) < l:
81 bin = "".join(p)
82 else:
83 bin = _compress(text)
84 if len(bin) > l:
70 if text[0] == '\0': 85 if text[0] == '\0':
71 return ("", text) 86 return ("", text)
72 return ('u', text) 87 return ('u', text)
73 return ("", bin) 88 return ("", bin)
74 89