revlog: break up compression of large deltas
Python's zlib apparently makes an internal copy of strings passed to
compress(). To avoid this, compress strings 1M at a time, then join
them at the end if the result would be smaller than the original.
For initial commits of large but compressible files, this cuts peak
memory usage nearly in half.
#!/bin/sh
hg init t
cd t
# we need a repo with some legacy latin-1 changesets
hg unbundle $TESTDIR/legacy-encoding.hg
hg co
python << EOF
f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
EOF
echo % should fail with encoding error
echo "plain old ascii" > a
hg st
HGENCODING=ascii hg ci -l latin-1 -d "1000000 0"
echo % these should work
echo "latin-1" > a
HGENCODING=latin-1 hg ci -l latin-1 -d "1000000 0"
echo "utf-8" > a
HGENCODING=utf-8 hg ci -l utf-8 -d "1000000 0"
HGENCODING=latin-1 hg tag -d "1000000 0" `cat latin-1-tag`
HGENCODING=latin-1 hg branch `cat latin-1-tag`
HGENCODING=latin-1 hg ci -d "1000000 0" -m 'latin1 branch'
rm .hg/branch
echo % ascii
hg --encoding ascii log
echo % latin-1
hg --encoding latin-1 log
echo % utf-8
hg --encoding utf-8 log
echo % ascii
HGENCODING=ascii hg tags
echo % latin-1
HGENCODING=latin-1 hg tags
echo % utf-8
HGENCODING=utf-8 hg tags
echo % ascii
HGENCODING=ascii hg branches
echo % latin-1
HGENCODING=latin-1 hg branches
echo % utf-8
HGENCODING=utf-8 hg branches
echo '[ui]' >> .hg/hgrc
echo 'fallbackencoding = koi8-r' >> .hg/hgrc
echo % utf-8
HGENCODING=utf-8 hg log
HGENCODING=dolphin hg log
HGENCODING=ascii hg branch `cat latin-1-tag`
cp latin-1-tag .hg/branch
HGENCODING=latin-1 hg ci -d "1000000 0" -m 'should fail'
exit 0