comparison mercurial/revlog.py @ 2073:1e6745f78989

Implement data inlined with the index file This patch allows you to optionally inline data bytes with the revlog index file. It saves considerable space and checkout time by reducing the number of inodes, wasted partial blocks and system calls. To use the inline data add this to your .hgrc [revlog] # inline data only works with revlogng format=1 # inline is the only valid flag right now. flags=inline
author mason@suse.com
date Tue, 04 Apr 2006 16:38:43 -0400
parents 74d3f5336b66
children 343aeefb553b
comparison
equal deleted inserted replaced
2072:74d3f5336b66 2073:1e6745f78989
17 demandload(globals(), "sha struct zlib") 17 demandload(globals(), "sha struct zlib")
18 18
19 # revlog version strings 19 # revlog version strings
20 REVLOGV0 = 0 20 REVLOGV0 = 0
21 REVLOGNG = 1 21 REVLOGNG = 1
22
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
25
26 def flagstr(flag):
27 if flag == "inline":
28 return REVLOGNGINLINEDATA
29 raise RevlogError(_("unknown revlog flag %s" % flag))
22 30
23 def hash(text, p1, p2): 31 def hash(text, p1, p2):
24 """generate a hash from the given text and its parent hashes 32 """generate a hash from the given text and its parent hashes
25 33
26 This hash combines both the current file contents and its history 34 This hash combines both the current file contents and its history
232 and st.st_ctime == oldst.st_ctime): 240 and st.st_ctime == oldst.st_ctime):
233 return 241 return
234 self.indexstat = st 242 self.indexstat = st
235 if len(i) > 0: 243 if len(i) > 0:
236 v = struct.unpack(versionformat, i[:4])[0] 244 v = struct.unpack(versionformat, i[:4])[0]
237 if v != 0: 245 flags = v & ~0xFFFF
238 flags = v & ~0xFFFF 246 fmt = v & 0xFFFF
239 fmt = v & 0xFFFF 247 if fmt == 0:
240 if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)): 248 if flags:
241 raise RevlogError( 249 raise RevlogError(_("index %s invalid flags %x for format v0" %
242 _("unknown version format %d or flags %x on %s") % 250 (self.indexfile, flags)))
243 (v, flags, self.indexfile)) 251 elif fmt == REVLOGNG:
252 if flags & ~REVLOGNGINLINEDATA:
253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 (self.indexfile, flags)))
255 else:
256 raise RevlogError(_("index %s invalid format %d" %
257 (self.indexfile, fmt)))
244 self.version = v 258 self.version = v
245 if v == 0: 259 if v == 0:
246 self.indexformat = indexformatv0 260 self.indexformat = indexformatv0
247 else: 261 else:
248 self.indexformat = indexformatng 262 self.indexformat = indexformatng
249 263
250 if i: 264 if i:
251 if st and st.st_size > 10000: 265 if not self.inlinedata() and st and st.st_size > 10000:
252 # big index, let's parse it on demand 266 # big index, let's parse it on demand
253 parser = lazyparser(i, self, self.indexformat) 267 parser = lazyparser(i, self, self.indexformat)
254 self.index = lazyindex(parser) 268 self.index = lazyindex(parser)
255 self.nodemap = lazymap(parser) 269 self.nodemap = lazymap(parser)
256 else: 270 else:
257 self.parseindex(i) 271 self.parseindex(i)
272 if self.inlinedata():
273 # we've already got the entire data file read in, save it
274 # in the chunk data
275 self.chunkcache = (0, i)
258 if self.version != 0: 276 if self.version != 0:
259 e = list(self.index[0]) 277 e = list(self.index[0])
260 type = self.ngtype(e[0]) 278 type = self.ngtype(e[0])
261 e[0] = self.offset_type(0, type) 279 e[0] = self.offset_type(0, type)
262 self.index[0] = e 280 self.index[0] = e
268 def parseindex(self, data): 286 def parseindex(self, data):
269 s = struct.calcsize(self.indexformat) 287 s = struct.calcsize(self.indexformat)
270 l = len(data) 288 l = len(data)
271 self.index = [] 289 self.index = []
272 self.nodemap = {nullid: -1} 290 self.nodemap = {nullid: -1}
291 inline = self.inlinedata()
273 off = 0 292 off = 0
274 n = 0 293 n = 0
275 while off < l: 294 while off < l:
276 e = struct.unpack(self.indexformat, data[off:off + s]) 295 e = struct.unpack(self.indexformat, data[off:off + s])
277 self.index.append(e) 296 self.index.append(e)
278 self.nodemap[e[-1]] = n 297 self.nodemap[e[-1]] = n
279 n += 1 298 n += 1
280 off += s 299 off += s
300 if inline:
301 off += e[1]
281 302
282 def ngoffset(self, q): 303 def ngoffset(self, q):
283 if q & 0xFFFF: 304 if q & 0xFFFF:
284 raise RevlogError(_('%s: incompatible revision flag %x') % 305 raise RevlogError(_('%s: incompatible revision flag %x') %
285 (self.indexfile, type)) 306 (self.indexfile, type))
295 """loads both the map and the index from the lazy parser""" 316 """loads both the map and the index from the lazy parser"""
296 if isinstance(self.index, lazyindex): 317 if isinstance(self.index, lazyindex):
297 p = self.index.p 318 p = self.index.p
298 p.load() 319 p.load()
299 320
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
300 def tip(self): return self.node(len(self.index) - 1) 322 def tip(self): return self.node(len(self.index) - 1)
301 def count(self): return len(self.index) 323 def count(self): return len(self.index)
302 def node(self, rev): 324 def node(self, rev):
303 return (rev < 0) and nullid or self.index[rev][-1] 325 return (rev < 0) and nullid or self.index[rev][-1]
304 def rev(self, node): 326 def rev(self, node):
566 """apply a list of patches to a string""" 588 """apply a list of patches to a string"""
567 return mdiff.patches(t, pl) 589 return mdiff.patches(t, pl)
568 590
569 def chunk(self, rev, df=None, cachelen=4096): 591 def chunk(self, rev, df=None, cachelen=4096):
570 start, length = self.start(rev), self.length(rev) 592 start, length = self.start(rev), self.length(rev)
593 inline = self.inlinedata()
594 if inline:
595 start += (rev + 1) * struct.calcsize(self.indexformat)
571 end = start + length 596 end = start + length
572 def loadcache(df): 597 def loadcache(df):
573 cache_length = max(cachelen, length) # 4k 598 cache_length = max(cachelen, length) # 4k
574 if not df: 599 if not df:
575 df = self.opener(self.datafile) 600 if inline:
601 df = self.opener(self.indexfile)
602 else:
603 df = self.opener(self.datafile)
576 df.seek(start) 604 df.seek(start)
577 self.chunkcache = (start, df.read(cache_length)) 605 self.chunkcache = (start, df.read(cache_length))
578 606
579 if not self.chunkcache: 607 if not self.chunkcache:
580 loadcache(df) 608 loadcache(df)
618 # look up what we need to read 646 # look up what we need to read
619 text = None 647 text = None
620 rev = self.rev(node) 648 rev = self.rev(node)
621 base = self.base(rev) 649 base = self.base(rev)
622 650
623 df = self.opener(self.datafile) 651 if self.inlinedata():
652 # we probably have the whole chunk cached
653 df = None
654 else:
655 df = self.opener(self.datafile)
624 656
625 # do we have useful data cached? 657 # do we have useful data cached?
626 if self.cache and self.cache[1] >= base and self.cache[1] < rev: 658 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
627 base = self.cache[1] 659 base = self.cache[1]
628 text = self.cache[2] 660 text = self.cache[2]
640 raise RevlogError(_("integrity check failed on %s:%d") 672 raise RevlogError(_("integrity check failed on %s:%d")
641 % (self.datafile, rev)) 673 % (self.datafile, rev))
642 674
643 self.cache = (node, rev, text) 675 self.cache = (node, rev, text)
644 return text 676 return text
677
678 def checkinlinesize(self, fp, tr):
679 if not self.inlinedata():
680 return
681 size = fp.tell()
682 if size < 131072:
683 return
684 tr.add(self.datafile, 0)
685 df = self.opener(self.datafile, 'w')
686 calc = struct.calcsize(self.indexformat)
687 for r in xrange(self.count()):
688 start = self.start(r) + (r + 1) * calc
689 length = self.length(r)
690 fp.seek(start)
691 d = fp.read(length)
692 df.write(d)
693 fp.close()
694 df.close()
695 fp = self.opener(self.indexfile, 'w', atomic=True)
696 self.version &= ~(REVLOGNGINLINEDATA)
697 if self.count():
698 x = self.index[0]
699 e = struct.pack(self.indexformat, *x)[4:]
700 l = struct.pack(versionformat, self.version)
701 fp.write(l)
702 fp.write(e)
703
704 for i in xrange(1, self.count()):
705 x = self.index[i]
706 e = struct.pack(self.indexformat, *x)
707 fp.write(e)
708
709 fp.close()
710 self.chunkcache = None
645 711
646 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): 712 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
647 """add a revision to the log 713 """add a revision to the log
648 714
649 text - the revision data to add 715 text - the revision data to add
696 762
697 self.index.append(e) 763 self.index.append(e)
698 self.nodemap[node] = n 764 self.nodemap[node] = n
699 entry = struct.pack(self.indexformat, *e) 765 entry = struct.pack(self.indexformat, *e)
700 766
701 transaction.add(self.datafile, offset) 767 if not self.inlinedata():
702 transaction.add(self.indexfile, n * len(entry)) 768 transaction.add(self.datafile, offset)
703 f = self.opener(self.datafile, "a") 769 transaction.add(self.indexfile, n * len(entry))
704 if data[0]: 770 f = self.opener(self.datafile, "a")
705 f.write(data[0]) 771 if data[0]:
706 f.write(data[1]) 772 f.write(data[0])
707 f = self.opener(self.indexfile, "a") 773 f.write(data[1])
774 f = self.opener(self.indexfile, "a")
775 else:
776 f = self.opener(self.indexfile, "a+")
777 transaction.add(self.indexfile, f.tell())
708 778
709 if len(self.index) == 1 and self.version != 0: 779 if len(self.index) == 1 and self.version != 0:
710 l = struct.pack(versionformat, self.version) 780 l = struct.pack(versionformat, self.version)
711 f.write(l) 781 f.write(l)
712 entry = entry[4:] 782 entry = entry[4:]
713 783
714 f.write(entry) 784 f.write(entry)
785
786 if self.inlinedata():
787 f.write(data[0])
788 f.write(data[1])
789 self.checkinlinesize(f, transaction)
715 790
716 self.cache = (node, n, text) 791 self.cache = (node, n, text)
717 return node 792 return node
718 793
719 def ancestor(self, a, b): 794 def ancestor(self, a, b):
828 if r: 903 if r:
829 end = self.end(t) 904 end = self.end(t)
830 905
831 ifh = self.opener(self.indexfile, "a+") 906 ifh = self.opener(self.indexfile, "a+")
832 transaction.add(self.indexfile, ifh.tell()) 907 transaction.add(self.indexfile, ifh.tell())
833 transaction.add(self.datafile, end) 908 if self.inlinedata():
834 dfh = self.opener(self.datafile, "a") 909 dfh = None
910 else:
911 transaction.add(self.datafile, end)
912 dfh = self.opener(self.datafile, "a")
835 913
836 # loop through our set of deltas 914 # loop through our set of deltas
837 chain = None 915 chain = None
838 for chunk in revs: 916 for chunk in revs:
839 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80]) 917 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
883 else: 961 else:
884 e = (self.offset_type(end, 0), len(cdelta), -1, base, 962 e = (self.offset_type(end, 0), len(cdelta), -1, base,
885 link, self.rev(p1), self.rev(p2), node) 963 link, self.rev(p1), self.rev(p2), node)
886 self.index.append(e) 964 self.index.append(e)
887 self.nodemap[node] = r 965 self.nodemap[node] = r
888 dfh.write(cdelta) 966 if self.inlinedata():
889 ifh.write(struct.pack(self.indexformat, *e)) 967 ifh.write(struct.pack(self.indexformat, *e))
968 ifh.write(cdelta)
969 self.checkinlinesize(ifh, transaction)
970 if not self.inlinedata():
971 dfh = self.opener(self.datafile, "a")
972 ifh = self.opener(self.indexfile, "a")
973 else:
974 if not dfh:
975 # addrevision switched from inline to conventional
976 # reopen the index
977 dfh = self.opener(self.datafile, "a")
978 ifh = self.opener(self.indexfile, "a")
979 dfh.write(cdelta)
980 ifh.write(struct.pack(self.indexformat, *e))
890 981
891 t, r, chain, prev = r, r + 1, node, node 982 t, r, chain, prev = r, r + 1, node, node
892 base = self.base(t) 983 base = self.base(t)
893 start = self.start(base) 984 start = self.start(base)
894 end = self.end(t) 985 end = self.end(t)
913 if rev >= self.count(): 1004 if rev >= self.count():
914 return 1005 return
915 1006
916 # first truncate the files on disk 1007 # first truncate the files on disk
917 end = self.start(rev) 1008 end = self.start(rev)
918 df = self.opener(self.datafile, "a") 1009 if not self.inlinedata():
919 df.truncate(end) 1010 df = self.opener(self.datafile, "a")
920 end = rev * struct.calcsize(self.indexformat) 1011 df.truncate(end)
1012 end = rev * struct.calcsize(self.indexformat)
1013 else:
1014 end += rev * struct.calcsize(self.indexformat)
921 1015
922 indexf = self.opener(self.indexfile, "a") 1016 indexf = self.opener(self.indexfile, "a")
923 indexf.truncate(end) 1017 indexf.truncate(end)
924 1018
925 # then reset internal state in memory to forget those revisions 1019 # then reset internal state in memory to forget those revisions
950 f.seek(0, 2) 1044 f.seek(0, 2)
951 actual = f.tell() 1045 actual = f.tell()
952 s = struct.calcsize(self.indexformat) 1046 s = struct.calcsize(self.indexformat)
953 i = actual / s 1047 i = actual / s
954 di = actual - (i * s) 1048 di = actual - (i * s)
1049 if self.inlinedata():
1050 databytes = 0
1051 for r in xrange(self.count()):
1052 databytes += self.length(r)
1053 dd = 0
1054 di = actual - self.count() * s - databytes
955 except IOError, inst: 1055 except IOError, inst:
956 if inst.errno != errno.ENOENT: 1056 if inst.errno != errno.ENOENT:
957 raise 1057 raise
958 di = 0 1058 di = 0
959 1059