mercurial/revlog.py
changeset 1083 30974cf73435
parent 1074 55bf5cfde69e
child 1089 142b5d5ec9cc
equal deleted inserted replaced
1082:ce96e316278a 1083:30974cf73435
     1 # revlog.py - storage back-end for mercurial
     1 """
     2 #
     2 revlog.py - storage back-end for mercurial
     3 # This provides efficient delta storage with O(1) retrieve and append
     3 
     4 # and O(changes) merge between branches
     4 This provides efficient delta storage with O(1) retrieve and append
     5 #
     5 and O(changes) merge between branches
     6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
     6 
     7 #
     7 Copyright 2005 Matt Mackall <mpm@selenic.com>
     8 # This software may be used and distributed according to the terms
     8 
     9 # of the GNU General Public License, incorporated herein by reference.
     9 This software may be used and distributed according to the terms
       
    10 of the GNU General Public License, incorporated herein by reference.
       
    11 """
    10 
    12 
    11 import zlib, struct, sha, binascii, heapq
    13 import zlib, struct, sha, binascii, heapq
    12 from mercurial import mdiff
    14 from mercurial import mdiff
    13 
    15 
    14 def hex(node): return binascii.hexlify(node)
    16 def hex(node): return binascii.hexlify(node)
    15 def bin(node): return binascii.unhexlify(node)
    17 def bin(node): return binascii.unhexlify(node)
    16 def short(node): return hex(node[:6])
    18 def short(node): return hex(node[:6])
    17 
    19 
    18 def compress(text):
    20 def compress(text):
       
    21     """ generate a possibly-compressed representation of text """
    19     if not text: return text
    22     if not text: return text
    20     if len(text) < 44:
    23     if len(text) < 44:
    21         if text[0] == '\0': return text
    24         if text[0] == '\0': return text
    22         return 'u' + text
    25         return 'u' + text
    23     bin = zlib.compress(text)
    26     bin = zlib.compress(text)
    25         if text[0] == '\0': return text
    28         if text[0] == '\0': return text
    26         return 'u' + text
    29         return 'u' + text
    27     return bin
    30     return bin
    28 
    31 
    29 def decompress(bin):
    32 def decompress(bin):
       
    33     """ decompress the given input """
    30     if not bin: return bin
    34     if not bin: return bin
    31     t = bin[0]
    35     t = bin[0]
    32     if t == '\0': return bin
    36     if t == '\0': return bin
    33     if t == 'x': return zlib.decompress(bin)
    37     if t == 'x': return zlib.decompress(bin)
    34     if t == 'u': return bin[1:]
    38     if t == 'u': return bin[1:]
    35     raise RevlogError("unknown compression type %s" % t)
    39     raise RevlogError("unknown compression type %s" % t)
    36 
    40 
    37 def hash(text, p1, p2):
    41 def hash(text, p1, p2):
       
    42     """generate a hash from the given text and its parent hashes
       
    43 
       
    44     This hash combines both the current file contents and its history
       
    45     in a manner that makes it easy to distinguish nodes with the same
       
    46     content in the revision graph.
       
    47     """
    38     l = [p1, p2]
    48     l = [p1, p2]
    39     l.sort()
    49     l.sort()
    40     s = sha.new(l[0])
    50     s = sha.new(l[0])
    41     s.update(l[1])
    51     s.update(l[1])
    42     s.update(text)
    52     s.update(text)
    44 
    54 
    45 nullid = "\0" * 20
    55 nullid = "\0" * 20
    46 indexformat = ">4l20s20s20s"
    56 indexformat = ">4l20s20s20s"
    47 
    57 
    48 class lazyparser:
    58 class lazyparser:
       
    59     """
       
    60     this class avoids the need to parse the entirety of large indices
       
    61 
       
    62     By default we parse and load 1000 entries at a time.
       
    63 
       
    64     If no position is specified, we load the whole index, and replace
       
    65     the lazy objects in revlog with the underlying objects for
       
    66     efficiency in cases where we look at most of the nodes.
       
    67     """
    49     def __init__(self, data, revlog):
    68     def __init__(self, data, revlog):
    50         self.data = data
    69         self.data = data
    51         self.s = struct.calcsize(indexformat)
    70         self.s = struct.calcsize(indexformat)
    52         self.l = len(data)/self.s
    71         self.l = len(data)/self.s
    53         self.index = [None] * self.l
    72         self.index = [None] * self.l
    74             self.index[i] = e
    93             self.index[i] = e
    75             self.map[e[6]] = i
    94             self.map[e[6]] = i
    76             i += 1
    95             i += 1
    77 
    96 
    78 class lazyindex:
    97 class lazyindex:
       
    98     """a lazy version of the index array"""
    79     def __init__(self, parser):
    99     def __init__(self, parser):
    80         self.p = parser
   100         self.p = parser
    81     def __len__(self):
   101     def __len__(self):
    82         return len(self.p.index)
   102         return len(self.p.index)
    83     def load(self, pos):
   103     def load(self, pos):
    87         return self.p.index[pos] or self.load(pos)
   107         return self.p.index[pos] or self.load(pos)
    88     def append(self, e):
   108     def append(self, e):
    89         self.p.index.append(e)
   109         self.p.index.append(e)
    90 
   110 
    91 class lazymap:
   111 class lazymap:
       
   112     """a lazy version of the node map"""
    92     def __init__(self, parser):
   113     def __init__(self, parser):
    93         self.p = parser
   114         self.p = parser
    94     def load(self, key):
   115     def load(self, key):
    95         if self.p.all: return
   116         if self.p.all: return
    96         n = self.p.data.find(key)
   117         n = self.p.data.find(key)
   121         self.p.map[key] = val
   142         self.p.map[key] = val
   122 
   143 
   123 class RevlogError(Exception): pass
   144 class RevlogError(Exception): pass
   124 
   145 
   125 class revlog:
   146 class revlog:
       
   147     """
       
   148     the underlying revision storage object
       
   149 
       
   150     A revlog consists of two parts, an index and the revision data.
       
   151 
       
   152     The index is a file with a fixed record size containing
       
   153     information on each revision, includings its nodeid (hash), the
       
   154     nodeids of its parents, the position and offset of its data within
       
   155     the data file, and the revision it's based on. Finally, each entry
       
   156     contains a linkrev entry that can serve as a pointer to external
       
   157     data.
       
   158 
       
   159     The revision data itself is a linear collection of data chunks.
       
   160     Each chunk represents a revision and is usually represented as a
       
   161     delta against the previous chunk. To bound lookup time, runs of
       
   162     deltas are limited to about 2 times the length of the original
       
   163     version data. This makes retrieval of a version proportional to
       
   164     its size, or O(1) relative to the number of revisions.
       
   165 
       
   166     Both pieces of the revlog are written to in an append-only
       
   167     fashion, which means we never need to rewrite a file to insert or
       
   168     remove data, and can use some simple techniques to avoid the need
       
   169     for locking while reading.
       
   170     """
   126     def __init__(self, opener, indexfile, datafile):
   171     def __init__(self, opener, indexfile, datafile):
       
   172         """
       
   173         create a revlog object
       
   174 
       
   175         opener is a function that abstracts the file opening operation
       
   176         and can be used to implement COW semantics or the like.
       
   177         """
   127         self.indexfile = indexfile
   178         self.indexfile = indexfile
   128         self.datafile = datafile
   179         self.datafile = datafile
   129         self.opener = opener
   180         self.opener = opener
   130         self.cache = None
   181         self.cache = None
   131 
   182 
   191                     reachable[p] = 1
   242                     reachable[p] = 1
   192                     visit.append(p)
   243                     visit.append(p)
   193         return reachable
   244         return reachable
   194 
   245 
   195     def heads(self, stop=None):
   246     def heads(self, stop=None):
       
   247         """return the list of all nodes that have no children"""
   196         p = {}
   248         p = {}
   197         h = []
   249         h = []
   198         stoprev = 0
   250         stoprev = 0
   199         if stop and stop in self.nodemap:
   251         if stop and stop in self.nodemap:
   200             stoprev = self.rev(stop)
   252             stoprev = self.rev(stop)
   201             
   253 
   202         for r in range(self.count() - 1, -1, -1):
   254         for r in range(self.count() - 1, -1, -1):
   203             n = self.node(r)
   255             n = self.node(r)
   204             if n not in p:
   256             if n not in p:
   205                 h.append(n)
   257                 h.append(n)
   206             if n == stop:
   258             if n == stop:
   210             for pn in self.parents(n):
   262             for pn in self.parents(n):
   211                 p[pn] = 1
   263                 p[pn] = 1
   212         return h
   264         return h
   213 
   265 
   214     def children(self, node):
   266     def children(self, node):
       
   267         """find the children of a given node"""
   215         c = []
   268         c = []
   216         p = self.rev(node)
   269         p = self.rev(node)
   217         for r in range(p + 1, self.count()):
   270         for r in range(p + 1, self.count()):
   218             n = self.node(r)
   271             n = self.node(r)
   219             for pn in self.parents(n):
   272             for pn in self.parents(n):
   223                 elif pn == nullid:
   276                 elif pn == nullid:
   224                     continue
   277                     continue
   225         return c
   278         return c
   226 
   279 
   227     def lookup(self, id):
   280     def lookup(self, id):
       
   281         """locate a node based on revision number or subset of hex nodeid"""
   228         try:
   282         try:
   229             rev = int(id)
   283             rev = int(id)
   230             if str(rev) != id: raise ValueError
   284             if str(rev) != id: raise ValueError
   231             if rev < 0: rev = self.count() + rev
   285             if rev < 0: rev = self.count() + rev
   232             if rev < 0 or rev >= self.count(): raise ValueError
   286             if rev < 0 or rev >= self.count(): raise ValueError
   241             return c[0]
   295             return c[0]
   242 
   296 
   243         return None
   297         return None
   244 
   298 
   245     def diff(self, a, b):
   299     def diff(self, a, b):
       
   300         """return a delta between two revisions"""
   246         return mdiff.textdiff(a, b)
   301         return mdiff.textdiff(a, b)
   247 
   302 
   248     def patches(self, t, pl):
   303     def patches(self, t, pl):
       
   304         """apply a list of patches to a string"""
   249         return mdiff.patches(t, pl)
   305         return mdiff.patches(t, pl)
   250 
   306 
   251     def delta(self, node):
   307     def delta(self, node):
       
   308         """return or calculate a delta between a node and its predecessor"""
   252         r = self.rev(node)
   309         r = self.rev(node)
   253         b = self.base(r)
   310         b = self.base(r)
   254         if r == b:
   311         if r == b:
   255             return self.diff(self.revision(self.node(r - 1)),
   312             return self.diff(self.revision(self.node(r - 1)),
   256                              self.revision(node))
   313                              self.revision(node))
   259             f.seek(self.start(r))
   316             f.seek(self.start(r))
   260             data = f.read(self.length(r))
   317             data = f.read(self.length(r))
   261         return decompress(data)
   318         return decompress(data)
   262 
   319 
   263     def revision(self, node):
   320     def revision(self, node):
       
   321         """return an uncompressed revision of a given"""
   264         if node == nullid: return ""
   322         if node == nullid: return ""
   265         if self.cache and self.cache[0] == node: return self.cache[2]
   323         if self.cache and self.cache[0] == node: return self.cache[2]
   266 
   324 
       
   325         # look up what we need to read
   267         text = None
   326         text = None
   268         rev = self.rev(node)
   327         rev = self.rev(node)
   269         start, length, base, link, p1, p2, node = self.index[rev]
   328         start, length, base, link, p1, p2, node = self.index[rev]
   270         end = start + length
   329         end = start + length
   271         if base != rev: start = self.start(base)
   330         if base != rev: start = self.start(base)
   272 
   331 
       
   332         # do we have useful data cached?
   273         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   333         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   274             base = self.cache[1]
   334             base = self.cache[1]
   275             start = self.start(base + 1)
   335             start = self.start(base + 1)
   276             text = self.cache[2]
   336             text = self.cache[2]
   277             last = 0
   337             last = 0
   298 
   358 
   299         self.cache = (node, rev, text)
   359         self.cache = (node, rev, text)
   300         return text
   360         return text
   301 
   361 
   302     def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
   362     def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
       
   363         """add a revision to the log
       
   364 
       
   365         text - the revision data to add
       
   366         transaction - the transaction object used for rollback
       
   367         link - the linkrev data to add
       
   368         p1, p2 - the parent nodeids of the revision
       
   369         d - an optional precomputed delta
       
   370         """
   303         if text is None: text = ""
   371         if text is None: text = ""
   304         if p1 is None: p1 = self.tip()
   372         if p1 is None: p1 = self.tip()
   305         if p2 is None: p2 = nullid
   373         if p2 is None: p2 = nullid
   306 
   374 
   307         node = hash(text, p1, p2)
   375         node = hash(text, p1, p2)
   347 
   415 
   348         self.cache = (node, n, text)
   416         self.cache = (node, n, text)
   349         return node
   417         return node
   350 
   418 
   351     def ancestor(self, a, b):
   419     def ancestor(self, a, b):
       
   420         """calculate the least common ancestor of nodes a and b"""
   352         # calculate the distance of every node from root
   421         # calculate the distance of every node from root
   353         dist = {nullid: 0}
   422         dist = {nullid: 0}
   354         for i in xrange(self.count()):
   423         for i in xrange(self.count()):
   355             n = self.node(i)
   424             n = self.node(i)
   356             p1, p2 = self.parents(n)
   425             p1, p2 = self.parents(n)
   385                 ly = y.next()
   454                 ly = y.next()
   386             elif lx > ly:
   455             elif lx > ly:
   387                 lx = x.next()
   456                 lx = x.next()
   388 
   457 
   389     def group(self, linkmap):
   458     def group(self, linkmap):
   390         # given a list of changeset revs, return a set of deltas and
   459         """calculate a delta group
   391         # metadata corresponding to nodes. the first delta is
   460 
   392         # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
   461         Given a list of changeset revs, return a set of deltas and
   393         # have this parent as it has all history before these
   462         metadata corresponding to nodes. the first delta is
   394         # changesets. parent is parent[0]
   463         parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
   395 
   464         have this parent as it has all history before these
       
   465         changesets. parent is parent[0]
       
   466         """
   396         revs = []
   467         revs = []
   397         needed = {}
   468         needed = {}
   398 
   469 
   399         # find file nodes/revs that match changeset revs
   470         # find file nodes/revs that match changeset revs
   400         for i in xrange(0, self.count()):
   471         for i in xrange(0, self.count()):
   496             yield d
   567             yield d
   497 
   568 
   498         yield struct.pack(">l", 0)
   569         yield struct.pack(">l", 0)
   499 
   570 
   500     def addgroup(self, revs, linkmapper, transaction, unique=0):
   571     def addgroup(self, revs, linkmapper, transaction, unique=0):
   501         # given a set of deltas, add them to the revision log. the
   572         """
   502         # first delta is against its parent, which should be in our
   573         add a delta group
   503         # log, the rest are against the previous delta.
   574 
   504 
   575         given a set of deltas, add them to the revision log. the
   505         # track the base of the current delta log
   576         first delta is against its parent, which should be in our
       
   577         log, the rest are against the previous delta.
       
   578         """
       
   579 
       
   580         #track the base of the current delta log
   506         r = self.count()
   581         r = self.count()
   507         t = r - 1
   582         t = r - 1
   508         node = nullid
   583         node = nullid
   509 
   584 
   510         base = prev = -1
   585         base = prev = -1