comparison mercurial/revlog.py @ 1598:14d1f1868bf6

cleanup of revlog.group when repository is local revlog.group cached every chunk from the revlog, the behaviour was needed to minimize the roundtrip with old-http. We now cache the revlog data ~4MB at a time. The memory used server side when pulling goes down to 35Mo maximum whereas without the patch more than 160Mo was used when cloning the linux kernel repository. The time used by cloning is higher mainly because of the check in revlog.revision. before 110.25user 20.90system 2:52.00elapsed 76%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+708707minor)pagefaults 0swaps after 117.56user 18.86system 2:50.43elapsed 80%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+748366minor)pagefaults 0swaps
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Tue, 27 Dec 2005 13:09:49 -0600
parents 59b3639df0a9
children bfa90d9a3b77
comparison
equal deleted inserted replaced
1597:96b47ef8f740 1598:14d1f1868bf6
186 """ 186 """
187 self.indexfile = indexfile 187 self.indexfile = indexfile
188 self.datafile = datafile 188 self.datafile = datafile
189 self.opener = opener 189 self.opener = opener
190 self.cache = None 190 self.cache = None
191 self.chunkcache = None
191 192
192 try: 193 try:
193 i = self.opener(self.indexfile).read() 194 i = self.opener(self.indexfile).read()
194 except IOError, inst: 195 except IOError, inst:
195 if inst.errno != errno.ENOENT: 196 if inst.errno != errno.ENOENT:
471 472
472 def patches(self, t, pl): 473 def patches(self, t, pl):
473 """apply a list of patches to a string""" 474 """apply a list of patches to a string"""
474 return mdiff.patches(t, pl) 475 return mdiff.patches(t, pl)
475 476
477 def chunk(self, rev):
478 start, length = self.start(rev), self.length(rev)
479 end = start + length
480
481 def loadcache():
482 cache_length = max(4096 * 1024, length) # 4Mo
483 df = self.opener(self.datafile)
484 df.seek(start)
485 self.chunkcache = (start, df.read(cache_length))
486
487 if not self.chunkcache:
488 loadcache()
489
490 cache_start = self.chunkcache[0]
491 cache_end = cache_start + len(self.chunkcache[1])
492 if start >= cache_start and end <= cache_end:
493 # it is cached
494 offset = start - cache_start
495 else:
496 loadcache()
497 offset = 0
498
499 #def checkchunk():
500 # df = self.opener(self.datafile)
501 # df.seek(start)
502 # return df.read(length)
503 #assert s == checkchunk()
504 return decompress(self.chunkcache[1][offset:offset + length])
505
476 def delta(self, node): 506 def delta(self, node):
477 """return or calculate a delta between a node and its predecessor""" 507 """return or calculate a delta between a node and its predecessor"""
478 r = self.rev(node) 508 r = self.rev(node)
479 b = self.base(r) 509 b = self.base(r)
480 if r == b: 510 if r == b:
481 return self.diff(self.revision(self.node(r - 1)), 511 return self.diff(self.revision(self.node(r - 1)),
482 self.revision(node)) 512 self.revision(node))
483 else: 513 else:
484 f = self.opener(self.datafile) 514 return self.chunk(r)
485 f.seek(self.start(r))
486 data = f.read(self.length(r))
487 return decompress(data)
488 515
489 def revision(self, node): 516 def revision(self, node):
490 """return an uncompressed revision of a given""" 517 """return an uncompressed revision of a given"""
491 if node == nullid: return "" 518 if node == nullid: return ""
492 if self.cache and self.cache[0] == node: return self.cache[2] 519 if self.cache and self.cache[0] == node: return self.cache[2]
493 520
494 # look up what we need to read 521 # look up what we need to read
495 text = None 522 text = None
496 rev = self.rev(node) 523 rev = self.rev(node)
497 start, length, base, link, p1, p2, node = self.index[rev] 524 base = self.base(rev)
498 end = start + length
499 if base != rev: start = self.start(base)
500 525
501 # do we have useful data cached? 526 # do we have useful data cached?
502 if self.cache and self.cache[1] >= base and self.cache[1] < rev: 527 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
503 base = self.cache[1] 528 base = self.cache[1]
504 start = self.start(base + 1)
505 text = self.cache[2] 529 text = self.cache[2]
506 last = 0 530 else:
507 531 text = self.chunk(base)
508 f = self.opener(self.datafile)
509 f.seek(start)
510 data = f.read(end - start)
511
512 if text is None:
513 last = self.length(base)
514 text = decompress(data[:last])
515 532
516 bins = [] 533 bins = []
517 for r in xrange(base + 1, rev + 1): 534 for r in xrange(base + 1, rev + 1):
518 s = self.length(r) 535 bins.append(self.chunk(r))
519 bins.append(decompress(data[last:last + s]))
520 last = last + s
521 536
522 text = mdiff.patches(text, bins) 537 text = mdiff.patches(text, bins)
523 538
539 p1, p2 = self.parents(node)
524 if node != hash(text, p1, p2): 540 if node != hash(text, p1, p2):
525 raise RevlogError(_("integrity check failed on %s:%d") 541 raise RevlogError(_("integrity check failed on %s:%d")
526 % (self.datafile, rev)) 542 % (self.datafile, rev))
527 543
528 self.cache = (node, rev, text) 544 self.cache = (node, rev, text)
648 gy = y.next() 664 gy = y.next()
649 else: 665 else:
650 #print "next x" 666 #print "next x"
651 gx = x.next() 667 gx = x.next()
652 668
653 def group(self, nodelist, lookup, infocollect = None): 669 def group(self, nodelist, lookup, infocollect=None):
654 """calculate a delta group 670 """calculate a delta group
655 671
656 Given a list of changeset revs, return a set of deltas and 672 Given a list of changeset revs, return a set of deltas and
657 metadata corresponding to nodes. the first delta is 673 metadata corresponding to nodes. the first delta is
658 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to 674 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
659 have this parent as it has all history before these 675 have this parent as it has all history before these
660 changesets. parent is parent[0] 676 changesets. parent is parent[0]
661 """ 677 """
662 revs = [self.rev(n) for n in nodelist] 678 revs = [self.rev(n) for n in nodelist]
663 needed = dict.fromkeys(revs, 1)
664 679
665 # if we don't have any revisions touched by these changesets, bail 680 # if we don't have any revisions touched by these changesets, bail
666 if not revs: 681 if not revs:
667 yield struct.pack(">l", 0) 682 yield struct.pack(">l", 0)
668 return 683 return
669 684
670 # add the parent of the first rev 685 # add the parent of the first rev
671 p = self.parents(self.node(revs[0]))[0] 686 p = self.parents(self.node(revs[0]))[0]
672 revs.insert(0, self.rev(p)) 687 revs.insert(0, self.rev(p))
673 688
674 # for each delta that isn't contiguous in the log, we need to
675 # reconstruct the base, reconstruct the result, and then
676 # calculate the delta. We also need to do this where we've
677 # stored a full version and not a delta
678 for i in xrange(0, len(revs) - 1):
679 a, b = revs[i], revs[i + 1]
680 if a + 1 != b or self.base(b) == b:
681 for j in xrange(self.base(a), a + 1):
682 needed[j] = 1
683 for j in xrange(self.base(b), b + 1):
684 needed[j] = 1
685
686 # calculate spans to retrieve from datafile
687 needed = needed.keys()
688 needed.sort()
689 spans = []
690 oo = -1
691 ol = 0
692 for n in needed:
693 if n < 0: continue
694 o = self.start(n)
695 l = self.length(n)
696 if oo + ol == o: # can we merge with the previous?
697 nl = spans[-1][2]
698 nl.append((n, l))
699 ol += l
700 spans[-1] = (oo, ol, nl)
701 else:
702 oo = o
703 ol = l
704 spans.append((oo, ol, [(n, l)]))
705
706 # read spans in, divide up chunks
707 chunks = {}
708 for span in spans:
709 # we reopen the file for each span to make http happy for now
710 f = self.opener(self.datafile)
711 f.seek(span[0])
712 data = f.read(span[1])
713
714 # divide up the span
715 pos = 0
716 for r, l in span[2]:
717 chunks[r] = decompress(data[pos: pos + l])
718 pos += l
719
720 # helper to reconstruct intermediate versions 689 # helper to reconstruct intermediate versions
721 def construct(text, base, rev): 690 def construct(text, base, rev):
722 bins = [chunks[r] for r in xrange(base + 1, rev + 1)] 691 bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)]
723 return mdiff.patches(text, bins) 692 return mdiff.patches(text, bins)
724 693
725 # build deltas 694 # build deltas
726 deltas = []
727 for d in xrange(0, len(revs) - 1): 695 for d in xrange(0, len(revs) - 1):
728 a, b = revs[d], revs[d + 1] 696 a, b = revs[d], revs[d + 1]
729 n = self.node(b) 697 na = self.node(a)
698 nb = self.node(b)
730 699
731 if infocollect is not None: 700 if infocollect is not None:
732 infocollect(n) 701 infocollect(nb)
733 702
734 # do we need to construct a new delta? 703 # do we need to construct a new delta?
735 if a + 1 != b or self.base(b) == b: 704 if a + 1 != b or self.base(b) == b:
736 if a >= 0: 705 ta = self.revision(na)
737 base = self.base(a) 706 tb = self.revision(nb)
738 ta = chunks[self.base(a)]
739 ta = construct(ta, base, a)
740 else:
741 ta = ""
742
743 base = self.base(b)
744 if a > base:
745 base = a
746 tb = ta
747 else:
748 tb = chunks[self.base(b)]
749 tb = construct(tb, base, b)
750 d = self.diff(ta, tb) 707 d = self.diff(ta, tb)
751 else: 708 else:
752 d = chunks[b] 709 d = self.chunk(b)
753 710
754 p = self.parents(n) 711 p = self.parents(nb)
755 meta = n + p[0] + p[1] + lookup(n) 712 meta = nb + p[0] + p[1] + lookup(nb)
756 l = struct.pack(">l", len(meta) + len(d) + 4) 713 l = struct.pack(">l", len(meta) + len(d) + 4)
757 yield l 714 yield l
758 yield meta 715 yield meta
759 yield d 716 yield d
760 717