mercurial/revlog.py
changeset 2073 1e6745f78989
parent 2072 74d3f5336b66
child 2075 343aeefb553b
equal deleted inserted replaced
2072:74d3f5336b66 2073:1e6745f78989
    17 demandload(globals(), "sha struct zlib")
    17 demandload(globals(), "sha struct zlib")
    18 
    18 
    19 # revlog version strings
    19 # revlog version strings
    20 REVLOGV0 = 0
    20 REVLOGV0 = 0
    21 REVLOGNG = 1
    21 REVLOGNG = 1
       
    22 
       
    23 # revlog flags
       
    24 REVLOGNGINLINEDATA = (1 << 16)
       
    25 
       
    26 def flagstr(flag):
       
    27     if flag == "inline":
       
    28         return REVLOGNGINLINEDATA
       
    29     raise RevlogError(_("unknown revlog flag %s" % flag))
    22 
    30 
    23 def hash(text, p1, p2):
    31 def hash(text, p1, p2):
    24     """generate a hash from the given text and its parent hashes
    32     """generate a hash from the given text and its parent hashes
    25 
    33 
    26     This hash combines both the current file contents and its history
    34     This hash combines both the current file contents and its history
   232                     and st.st_ctime == oldst.st_ctime):
   240                     and st.st_ctime == oldst.st_ctime):
   233                     return
   241                     return
   234                 self.indexstat = st
   242                 self.indexstat = st
   235                 if len(i) > 0:
   243                 if len(i) > 0:
   236                     v = struct.unpack(versionformat, i[:4])[0]
   244                     v = struct.unpack(versionformat, i[:4])[0]
   237                     if v != 0:
   245         flags = v & ~0xFFFF
   238                         flags = v & ~0xFFFF
   246         fmt = v & 0xFFFF
   239                         fmt = v & 0xFFFF
   247         if fmt == 0:
   240                         if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)):
   248             if flags:
   241                             raise RevlogError(
   249                 raise RevlogError(_("index %s invalid flags %x for format v0" %
   242                              _("unknown version format %d or flags %x on %s") %
   250                                    (self.indexfile, flags)))
   243                              (v, flags, self.indexfile))
   251         elif fmt == REVLOGNG:
       
   252             if flags & ~REVLOGNGINLINEDATA:
       
   253                 raise RevlogError(_("index %s invalid flags %x for revlogng" %
       
   254                                    (self.indexfile, flags)))
       
   255         else:
       
   256             raise RevlogError(_("index %s invalid format %d" %
       
   257                                (self.indexfile, fmt)))
   244         self.version = v
   258         self.version = v
   245         if v == 0:
   259         if v == 0:
   246             self.indexformat = indexformatv0
   260             self.indexformat = indexformatv0
   247         else:
   261         else:
   248             self.indexformat = indexformatng
   262             self.indexformat = indexformatng
   249 
   263 
   250         if i:
   264         if i:
   251             if st and st.st_size > 10000:
   265             if not self.inlinedata() and st and st.st_size > 10000:
   252                 # big index, let's parse it on demand
   266                 # big index, let's parse it on demand
   253                 parser = lazyparser(i, self, self.indexformat)
   267                 parser = lazyparser(i, self, self.indexformat)
   254                 self.index = lazyindex(parser)
   268                 self.index = lazyindex(parser)
   255                 self.nodemap = lazymap(parser)
   269                 self.nodemap = lazymap(parser)
   256             else:
   270             else:
   257                 self.parseindex(i)
   271                 self.parseindex(i)
       
   272             if self.inlinedata():
       
   273                 # we've already got the entire data file read in, save it
       
   274                 # in the chunk data
       
   275                 self.chunkcache = (0, i)
   258             if self.version != 0:
   276             if self.version != 0:
   259                 e = list(self.index[0])
   277                 e = list(self.index[0])
   260                 type = self.ngtype(e[0])
   278                 type = self.ngtype(e[0])
   261                 e[0] = self.offset_type(0, type)
   279                 e[0] = self.offset_type(0, type)
   262                 self.index[0] = e
   280                 self.index[0] = e
   268     def parseindex(self, data):
   286     def parseindex(self, data):
   269         s = struct.calcsize(self.indexformat)
   287         s = struct.calcsize(self.indexformat)
   270         l = len(data)
   288         l = len(data)
   271         self.index = []
   289         self.index = []
   272         self.nodemap =  {nullid: -1}
   290         self.nodemap =  {nullid: -1}
       
   291         inline = self.inlinedata()
   273         off = 0
   292         off = 0
   274         n = 0
   293         n = 0
   275         while off < l:
   294         while off < l:
   276             e = struct.unpack(self.indexformat, data[off:off + s])
   295             e = struct.unpack(self.indexformat, data[off:off + s])
   277             self.index.append(e)
   296             self.index.append(e)
   278             self.nodemap[e[-1]] = n
   297             self.nodemap[e[-1]] = n
   279             n += 1
   298             n += 1
   280             off += s
   299             off += s
       
   300             if inline:
       
   301                 off += e[1]
   281 
   302 
   282     def ngoffset(self, q):
   303     def ngoffset(self, q):
   283         if q & 0xFFFF:
   304         if q & 0xFFFF:
   284             raise RevlogError(_('%s: incompatible revision flag %x') %
   305             raise RevlogError(_('%s: incompatible revision flag %x') %
   285                                (self.indexfile, type))
   306                                (self.indexfile, type))
   295         """loads both the map and the index from the lazy parser"""
   316         """loads both the map and the index from the lazy parser"""
   296         if isinstance(self.index, lazyindex):
   317         if isinstance(self.index, lazyindex):
   297             p = self.index.p
   318             p = self.index.p
   298             p.load()
   319             p.load()
   299 
   320 
       
   321     def inlinedata(self): return self.version & REVLOGNGINLINEDATA
   300     def tip(self): return self.node(len(self.index) - 1)
   322     def tip(self): return self.node(len(self.index) - 1)
   301     def count(self): return len(self.index)
   323     def count(self): return len(self.index)
   302     def node(self, rev):
   324     def node(self, rev):
   303         return (rev < 0) and nullid or self.index[rev][-1]
   325         return (rev < 0) and nullid or self.index[rev][-1]
   304     def rev(self, node):
   326     def rev(self, node):
   566         """apply a list of patches to a string"""
   588         """apply a list of patches to a string"""
   567         return mdiff.patches(t, pl)
   589         return mdiff.patches(t, pl)
   568 
   590 
   569     def chunk(self, rev, df=None, cachelen=4096):
   591     def chunk(self, rev, df=None, cachelen=4096):
   570         start, length = self.start(rev), self.length(rev)
   592         start, length = self.start(rev), self.length(rev)
       
   593         inline = self.inlinedata()
       
   594         if inline:
       
   595             start += (rev + 1) * struct.calcsize(self.indexformat)
   571         end = start + length
   596         end = start + length
   572         def loadcache(df):
   597         def loadcache(df):
   573             cache_length = max(cachelen, length) # 4k
   598             cache_length = max(cachelen, length) # 4k
   574             if not df:
   599             if not df:
   575                 df = self.opener(self.datafile)
   600                 if inline:
       
   601                     df = self.opener(self.indexfile)
       
   602                 else:
       
   603                     df = self.opener(self.datafile)
   576             df.seek(start)
   604             df.seek(start)
   577             self.chunkcache = (start, df.read(cache_length))
   605             self.chunkcache = (start, df.read(cache_length))
   578 
   606 
   579         if not self.chunkcache:
   607         if not self.chunkcache:
   580             loadcache(df)
   608             loadcache(df)
   618         # look up what we need to read
   646         # look up what we need to read
   619         text = None
   647         text = None
   620         rev = self.rev(node)
   648         rev = self.rev(node)
   621         base = self.base(rev)
   649         base = self.base(rev)
   622 
   650 
   623         df = self.opener(self.datafile)
   651         if self.inlinedata():
       
   652             # we probably have the whole chunk cached
       
   653             df = None
       
   654         else:
       
   655             df = self.opener(self.datafile)
   624 
   656 
   625         # do we have useful data cached?
   657         # do we have useful data cached?
   626         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   658         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   627             base = self.cache[1]
   659             base = self.cache[1]
   628             text = self.cache[2]
   660             text = self.cache[2]
   640             raise RevlogError(_("integrity check failed on %s:%d")
   672             raise RevlogError(_("integrity check failed on %s:%d")
   641                           % (self.datafile, rev))
   673                           % (self.datafile, rev))
   642 
   674 
   643         self.cache = (node, rev, text)
   675         self.cache = (node, rev, text)
   644         return text
   676         return text
       
   677 
       
   678     def checkinlinesize(self, fp, tr):
       
   679         if not self.inlinedata():
       
   680             return
       
   681         size = fp.tell()
       
   682         if size < 131072:
       
   683             return
       
   684         tr.add(self.datafile, 0)
       
   685         df = self.opener(self.datafile, 'w')
       
   686         calc = struct.calcsize(self.indexformat)
       
   687         for r in xrange(self.count()):
       
   688             start = self.start(r) + (r + 1) * calc
       
   689             length = self.length(r)
       
   690             fp.seek(start)
       
   691             d = fp.read(length)
       
   692             df.write(d)
       
   693         fp.close()
       
   694         df.close()
       
   695         fp = self.opener(self.indexfile, 'w', atomic=True)
       
   696         self.version &= ~(REVLOGNGINLINEDATA)
       
   697         if self.count():
       
   698             x = self.index[0]
       
   699             e = struct.pack(self.indexformat, *x)[4:]
       
   700             l = struct.pack(versionformat, self.version)
       
   701             fp.write(l)
       
   702             fp.write(e)
       
   703 
       
   704         for i in xrange(1, self.count()):
       
   705             x = self.index[i]
       
   706             e = struct.pack(self.indexformat, *x)
       
   707             fp.write(e)
       
   708 
       
   709         fp.close()
       
   710         self.chunkcache = None
   645 
   711 
   646     def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
   712     def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
   647         """add a revision to the log
   713         """add a revision to the log
   648 
   714 
   649         text - the revision data to add
   715         text - the revision data to add
   696 
   762 
   697         self.index.append(e)
   763         self.index.append(e)
   698         self.nodemap[node] = n
   764         self.nodemap[node] = n
   699         entry = struct.pack(self.indexformat, *e)
   765         entry = struct.pack(self.indexformat, *e)
   700 
   766 
   701         transaction.add(self.datafile, offset)
   767         if not self.inlinedata():
   702         transaction.add(self.indexfile, n * len(entry))
   768             transaction.add(self.datafile, offset)
   703         f = self.opener(self.datafile, "a")
   769             transaction.add(self.indexfile, n * len(entry))
   704         if data[0]:
   770             f = self.opener(self.datafile, "a")
   705             f.write(data[0])
   771             if data[0]:
   706         f.write(data[1])
   772                 f.write(data[0])
   707         f = self.opener(self.indexfile, "a")
   773             f.write(data[1])
       
   774             f = self.opener(self.indexfile, "a")
       
   775         else:
       
   776             f = self.opener(self.indexfile, "a+")
       
   777             transaction.add(self.indexfile, f.tell())
   708 
   778 
   709         if len(self.index) == 1 and self.version != 0:
   779         if len(self.index) == 1 and self.version != 0:
   710             l = struct.pack(versionformat, self.version)
   780             l = struct.pack(versionformat, self.version)
   711             f.write(l)
   781             f.write(l)
   712             entry = entry[4:]
   782             entry = entry[4:]
   713 
   783 
   714         f.write(entry)
   784         f.write(entry)
       
   785 
       
   786         if self.inlinedata():
       
   787             f.write(data[0])
       
   788             f.write(data[1])
       
   789             self.checkinlinesize(f, transaction)
   715 
   790 
   716         self.cache = (node, n, text)
   791         self.cache = (node, n, text)
   717         return node
   792         return node
   718 
   793 
   719     def ancestor(self, a, b):
   794     def ancestor(self, a, b):
   828         if r:
   903         if r:
   829             end = self.end(t)
   904             end = self.end(t)
   830 
   905 
   831         ifh = self.opener(self.indexfile, "a+")
   906         ifh = self.opener(self.indexfile, "a+")
   832         transaction.add(self.indexfile, ifh.tell())
   907         transaction.add(self.indexfile, ifh.tell())
   833         transaction.add(self.datafile, end)
   908         if self.inlinedata():
   834         dfh = self.opener(self.datafile, "a")
   909             dfh = None
       
   910         else:
       
   911             transaction.add(self.datafile, end)
       
   912             dfh = self.opener(self.datafile, "a")
   835 
   913 
   836         # loop through our set of deltas
   914         # loop through our set of deltas
   837         chain = None
   915         chain = None
   838         for chunk in revs:
   916         for chunk in revs:
   839             node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
   917             node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
   883                 else:
   961                 else:
   884                     e = (self.offset_type(end, 0), len(cdelta), -1, base,
   962                     e = (self.offset_type(end, 0), len(cdelta), -1, base,
   885                          link, self.rev(p1), self.rev(p2), node)
   963                          link, self.rev(p1), self.rev(p2), node)
   886                 self.index.append(e)
   964                 self.index.append(e)
   887                 self.nodemap[node] = r
   965                 self.nodemap[node] = r
   888                 dfh.write(cdelta)
   966                 if self.inlinedata():
   889                 ifh.write(struct.pack(self.indexformat, *e))
   967                     ifh.write(struct.pack(self.indexformat, *e))
       
   968                     ifh.write(cdelta)
       
   969                     self.checkinlinesize(ifh, transaction)
       
   970                     if not self.inlinedata():
       
   971                         dfh = self.opener(self.datafile, "a")
       
   972                         ifh = self.opener(self.indexfile, "a")
       
   973                 else:
       
   974                     if not dfh:
       
   975                         # addrevision switched from inline to conventional
       
   976                         # reopen the index
       
   977                         dfh = self.opener(self.datafile, "a")
       
   978                         ifh = self.opener(self.indexfile, "a")
       
   979                     dfh.write(cdelta)
       
   980                     ifh.write(struct.pack(self.indexformat, *e))
   890 
   981 
   891             t, r, chain, prev = r, r + 1, node, node
   982             t, r, chain, prev = r, r + 1, node, node
   892             base = self.base(t)
   983             base = self.base(t)
   893             start = self.start(base)
   984             start = self.start(base)
   894             end = self.end(t)
   985             end = self.end(t)
   913             if rev >= self.count():
  1004             if rev >= self.count():
   914                 return
  1005                 return
   915 
  1006 
   916         # first truncate the files on disk
  1007         # first truncate the files on disk
   917         end = self.start(rev)
  1008         end = self.start(rev)
   918         df = self.opener(self.datafile, "a")
  1009         if not self.inlinedata():
   919         df.truncate(end)
  1010             df = self.opener(self.datafile, "a")
   920         end = rev * struct.calcsize(self.indexformat)
  1011             df.truncate(end)
       
  1012             end = rev * struct.calcsize(self.indexformat)
       
  1013         else:
       
  1014             end += rev * struct.calcsize(self.indexformat)
   921 
  1015 
   922         indexf = self.opener(self.indexfile, "a")
  1016         indexf = self.opener(self.indexfile, "a")
   923         indexf.truncate(end)
  1017         indexf.truncate(end)
   924 
  1018 
   925         # then reset internal state in memory to forget those revisions
  1019         # then reset internal state in memory to forget those revisions
   950             f.seek(0, 2)
  1044             f.seek(0, 2)
   951             actual = f.tell()
  1045             actual = f.tell()
   952             s = struct.calcsize(self.indexformat)
  1046             s = struct.calcsize(self.indexformat)
   953             i = actual / s
  1047             i = actual / s
   954             di = actual - (i * s)
  1048             di = actual - (i * s)
       
  1049             if self.inlinedata():
       
  1050                 databytes = 0
       
  1051                 for r in xrange(self.count()):
       
  1052                     databytes += self.length(r)
       
  1053                 dd = 0
       
  1054                 di = actual - self.count() * s - databytes
   955         except IOError, inst:
  1055         except IOError, inst:
   956             if inst.errno != errno.ENOENT:
  1056             if inst.errno != errno.ENOENT:
   957                 raise
  1057                 raise
   958             di = 0
  1058             di = 0
   959 
  1059