comparison mercurial/revlog.py @ 1677:11d12bd6e1dc

cleanup of revlog.group when repository is local revlog.group cached every chunk from the revlog, the behaviour was needed to minimize the roundtrip with old-http. The patch export the information that the repository is local or not from the repository object down to the revlog. Then it uses the workaround for old-http only if the repository is non-local. The memory used server side when pulling goes down to less than 30Mo maximum whereas without the patch more than 160Mo was used when cloning the linux kernel repository. The time used by cloning is roughly the same (although some caching could be implemented if needed): before 110.25user 20.90system 2:52.00elapsed 76%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+708707minor)pagefaults 0swaps after 112.85user 22.98system 2:50.66elapsed 79%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+862862minor)pagefaults 0swaps
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Thu, 08 Dec 2005 15:12:02 +0100
parents 59b3639df0a9
children b345cc4c22c0
comparison
equal deleted inserted replaced
1582:63799b01985c 1677:11d12bd6e1dc
175 Both pieces of the revlog are written to in an append-only 175 Both pieces of the revlog are written to in an append-only
176 fashion, which means we never need to rewrite a file to insert or 176 fashion, which means we never need to rewrite a file to insert or
177 remove data, and can use some simple techniques to avoid the need 177 remove data, and can use some simple techniques to avoid the need
178 for locking while reading. 178 for locking while reading.
179 """ 179 """
180 def __init__(self, opener, indexfile, datafile): 180 def __init__(self, opener, indexfile, datafile, local=True):
181 """ 181 """
182 create a revlog object 182 create a revlog object
183 183
184 opener is a function that abstracts the file opening operation 184 opener is a function that abstracts the file opening operation
185 and can be used to implement COW semantics or the like. 185 and can be used to implement COW semantics or the like.
186 """ 186 """
187 self.indexfile = indexfile 187 self.indexfile = indexfile
188 self.datafile = datafile 188 self.datafile = datafile
189 self.opener = opener 189 self.opener = opener
190 self.cache = None 190 self.cache = None
191 self.local = local # XXX only needed because statichttp
191 192
192 try: 193 try:
193 i = self.opener(self.indexfile).read() 194 i = self.opener(self.indexfile).read()
194 except IOError, inst: 195 except IOError, inst:
195 if inst.errno != errno.ENOENT: 196 if inst.errno != errno.ENOENT:
648 gy = y.next() 649 gy = y.next()
649 else: 650 else:
650 #print "next x" 651 #print "next x"
651 gx = x.next() 652 gx = x.next()
652 653
653 def group(self, nodelist, lookup, infocollect = None): 654 def group(self, nodelist, lookup, infocollect=None):
654 """calculate a delta group 655 """calculate a delta group
655 656
656 Given a list of changeset revs, return a set of deltas and 657 Given a list of changeset revs, return a set of deltas and
657 metadata corresponding to nodes. the first delta is 658 metadata corresponding to nodes. the first delta is
658 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to 659 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
659 have this parent as it has all history before these 660 have this parent as it has all history before these
660 changesets. parent is parent[0] 661 changesets. parent is parent[0]
661 """ 662 """
662 revs = [self.rev(n) for n in nodelist] 663 revs = [self.rev(n) for n in nodelist]
663 needed = dict.fromkeys(revs, 1)
664 664
665 # if we don't have any revisions touched by these changesets, bail 665 # if we don't have any revisions touched by these changesets, bail
666 if not revs: 666 if not revs:
667 yield struct.pack(">l", 0) 667 yield struct.pack(">l", 0)
668 return 668 return
669 669
670 # add the parent of the first rev 670 # add the parent of the first rev
671 p = self.parents(self.node(revs[0]))[0] 671 p = self.parents(self.node(revs[0]))[0]
672 revs.insert(0, self.rev(p)) 672 revs.insert(0, self.rev(p))
673 673
674 # for each delta that isn't contiguous in the log, we need to 674 if self.local:
675 # reconstruct the base, reconstruct the result, and then 675 mm = self.opener(self.datafile)
676 # calculate the delta. We also need to do this where we've 676 def chunk(r):
677 # stored a full version and not a delta 677 o = self.start(r)
678 for i in xrange(0, len(revs) - 1): 678 l = self.length(r)
679 a, b = revs[i], revs[i + 1] 679 mm.seek(o)
680 if a + 1 != b or self.base(b) == b: 680 return decompress(mm.read(l))
681 for j in xrange(self.base(a), a + 1): 681 else:
682 needed[j] = 1 682 # XXX: statichttp workaround
683 for j in xrange(self.base(b), b + 1): 683 needed = dict.fromkeys(revs[1:], 1)
684 needed[j] = 1 684 # for each delta that isn't contiguous in the log, we need to
685 685 # reconstruct the base, reconstruct the result, and then
686 # calculate spans to retrieve from datafile 686 # calculate the delta. We also need to do this where we've
687 needed = needed.keys() 687 # stored a full version and not a delta
688 needed.sort() 688 for i in xrange(0, len(revs) - 1):
689 spans = [] 689 a, b = revs[i], revs[i + 1]
690 oo = -1 690 if a + 1 != b or self.base(b) == b:
691 ol = 0 691 for j in xrange(self.base(a), a + 1):
692 for n in needed: 692 needed[j] = 1
693 if n < 0: continue 693 for j in xrange(self.base(b), b + 1):
694 o = self.start(n) 694 needed[j] = 1
695 l = self.length(n) 695
696 if oo + ol == o: # can we merge with the previous? 696 # calculate spans to retrieve from datafile
697 nl = spans[-1][2] 697 needed = needed.keys()
698 nl.append((n, l)) 698 needed.sort()
699 ol += l 699 spans = []
700 spans[-1] = (oo, ol, nl) 700 oo = -1
701 else: 701 ol = 0
702 oo = o 702 for n in needed:
703 ol = l 703 if n < 0: continue
704 spans.append((oo, ol, [(n, l)])) 704 o = self.start(n)
705 705 l = self.length(n)
706 # read spans in, divide up chunks 706 if oo + ol == o: # can we merge with the previous?
707 chunks = {} 707 nl = spans[-1][2]
708 for span in spans: 708 nl.append((n, l))
709 # we reopen the file for each span to make http happy for now 709 ol += l
710 f = self.opener(self.datafile) 710 spans[-1] = (oo, ol, nl)
711 f.seek(span[0]) 711 else:
712 data = f.read(span[1]) 712 oo = o
713 713 ol = l
714 # divide up the span 714 spans.append((oo, ol, [(n, l)]))
715 pos = 0 715
716 for r, l in span[2]: 716 # read spans in, divide up chunks
717 chunks[r] = decompress(data[pos: pos + l]) 717 chunks = {}
718 pos += l 718 for span in spans:
719 # we reopen the file for each span to make http happy for now
720 f = self.opener(self.datafile)
721 f.seek(span[0])
722 data = f.read(span[1])
723
724 # divide up the span
725 pos = 0
726 for r, l in span[2]:
727 chunks[r] = decompress(data[pos: pos + l])
728 pos += l
729 def chunk(r):
730 return chunks[r]
719 731
720 # helper to reconstruct intermediate versions 732 # helper to reconstruct intermediate versions
721 def construct(text, base, rev): 733 def construct(text, base, rev):
722 bins = [chunks[r] for r in xrange(base + 1, rev + 1)] 734 bins = [chunk(r) for r in xrange(base + 1, rev + 1)]
723 return mdiff.patches(text, bins) 735 return mdiff.patches(text, bins)
724 736
725 # build deltas 737 # build deltas
726 deltas = []
727 for d in xrange(0, len(revs) - 1): 738 for d in xrange(0, len(revs) - 1):
728 a, b = revs[d], revs[d + 1] 739 a, b = revs[d], revs[d + 1]
729 n = self.node(b) 740 n = self.node(b)
730 741
731 if infocollect is not None: 742 if infocollect is not None:
733 744
734 # do we need to construct a new delta? 745 # do we need to construct a new delta?
735 if a + 1 != b or self.base(b) == b: 746 if a + 1 != b or self.base(b) == b:
736 if a >= 0: 747 if a >= 0:
737 base = self.base(a) 748 base = self.base(a)
738 ta = chunks[self.base(a)] 749 ta = chunk(self.base(a))
739 ta = construct(ta, base, a) 750 ta = construct(ta, base, a)
740 else: 751 else:
741 ta = "" 752 ta = ""
742 753
743 base = self.base(b) 754 base = self.base(b)
744 if a > base: 755 if a > base:
745 base = a 756 base = a
746 tb = ta 757 tb = ta
747 else: 758 else:
748 tb = chunks[self.base(b)] 759 tb = chunk(self.base(b))
749 tb = construct(tb, base, b) 760 tb = construct(tb, base, b)
750 d = self.diff(ta, tb) 761 d = self.diff(ta, tb)
751 else: 762 else:
752 d = chunks[b] 763 d = chunk(b)
753 764
754 p = self.parents(n) 765 p = self.parents(n)
755 meta = n + p[0] + p[1] + lookup(n) 766 meta = n + p[0] + p[1] + lookup(n)
756 l = struct.pack(">l", len(meta) + len(d) + 4) 767 l = struct.pack(">l", len(meta) + len(d) + 4)
757 yield l 768 yield l