comparison mercurial/revlog.py @ 1533:3d11f81c9145

Reduce string duplication in compression code This cuts down on string copies and allows buffers (instead of strings) to be passed into the compression code
author mason@suse.com
date Fri, 11 Nov 2005 18:20:19 -0800
parents 46a07392cf28
children 7ae0ce7a3dc4
comparison
equal deleted inserted replaced
1532:27077812fffb 1533:3d11f81c9145
29 s.update(text) 29 s.update(text)
30 return s.digest() 30 return s.digest()
31 31
32 def compress(text): 32 def compress(text):
33 """ generate a possibly-compressed representation of text """ 33 """ generate a possibly-compressed representation of text """
34 if not text: return text 34 if not text: return ("", text)
35 if len(text) < 44: 35 if len(text) < 44:
36 if text[0] == '\0': return text 36 if text[0] == '\0': return ("", text)
37 return 'u' + text 37 return ('u', text)
38 bin = zlib.compress(text) 38 bin = zlib.compress(text)
39 if len(bin) > len(text): 39 if len(bin) > len(text):
40 if text[0] == '\0': return text 40 if text[0] == '\0': return ("", text)
41 return 'u' + text 41 return ('u', text)
42 return bin 42 return ("", bin)
43 43
44 def decompress(bin): 44 def decompress(bin):
45 """ decompress the given input """ 45 """ decompress the given input """
46 if not bin: return bin 46 if not bin: return bin
47 t = bin[0] 47 t = bin[0]
541 base = self.base(t) 541 base = self.base(t)
542 start = self.start(base) 542 start = self.start(base)
543 end = self.end(t) 543 end = self.end(t)
544 if not d: 544 if not d:
545 prev = self.revision(self.tip()) 545 prev = self.revision(self.tip())
546 d = self.diff(prev, text) 546 d = self.diff(prev, str(text))
547 data = compress(d) 547 data = compress(d)
548 dist = end - start + len(data) 548 l = len(data[1]) + len(data[0])
549 dist = end - start + l
549 550
550 # full versions are inserted when the needed deltas 551 # full versions are inserted when the needed deltas
551 # become comparable to the uncompressed text 552 # become comparable to the uncompressed text
552 if not n or dist > len(text) * 2: 553 if not n or dist > len(text) * 2:
553 data = compress(text) 554 data = compress(text)
555 l = len(data[1]) + len(data[0])
554 base = n 556 base = n
555 else: 557 else:
556 base = self.base(t) 558 base = self.base(t)
557 559
558 offset = 0 560 offset = 0
559 if t >= 0: 561 if t >= 0:
560 offset = self.end(t) 562 offset = self.end(t)
561 563
562 e = (offset, len(data), base, link, p1, p2, node) 564 e = (offset, l, base, link, p1, p2, node)
563 565
564 self.index.append(e) 566 self.index.append(e)
565 self.nodemap[node] = n 567 self.nodemap[node] = n
566 entry = struct.pack(indexformat, *e) 568 entry = struct.pack(indexformat, *e)
567 569
568 transaction.add(self.datafile, e[0]) 570 transaction.add(self.datafile, e[0])
569 self.opener(self.datafile, "a").write(data) 571 f = self.opener(self.datafile, "a")
572 if data[0]:
573 f.write(data[0])
574 f.write(data[1])
570 transaction.add(self.indexfile, n * len(entry)) 575 transaction.add(self.indexfile, n * len(entry))
571 self.opener(self.indexfile, "a").write(entry) 576 self.opener(self.indexfile, "a").write(entry)
572 577
573 self.cache = (node, n, text) 578 self.cache = (node, n, text)
574 return node 579 return node
799 # version is not the one we have a delta against. We use 804 # version is not the one we have a delta against. We use
800 # the size of the previous full rev as a proxy for the 805 # the size of the previous full rev as a proxy for the
801 # current size. 806 # current size.
802 807
803 if chain == prev: 808 if chain == prev:
804 cdelta = compress(delta) 809 tempd = compress(delta)
810 cdelta = tempd[0] + tempd[1]
805 811
806 if chain != prev or (end - start + len(cdelta)) > measure * 2: 812 if chain != prev or (end - start + len(cdelta)) > measure * 2:
807 # flush our writes here so we can read it in revision 813 # flush our writes here so we can read it in revision
808 dfh.flush() 814 dfh.flush()
809 ifh.flush() 815 ifh.flush()