comparison mercurial/revlog.py @ 1083:30974cf73435

Add some docstrings to revlog.py
author mpm@selenic.com
date Sat, 27 Aug 2005 01:43:48 -0700
parents 55bf5cfde69e
children 142b5d5ec9cc
comparison
equal deleted inserted replaced
1082:ce96e316278a 1083:30974cf73435
1 # revlog.py - storage back-end for mercurial 1 """
2 # 2 revlog.py - storage back-end for mercurial
3 # This provides efficient delta storage with O(1) retrieve and append 3
4 # and O(changes) merge between branches 4 This provides efficient delta storage with O(1) retrieve and append
5 # 5 and O(changes) merge between branches
6 # Copyright 2005 Matt Mackall <mpm@selenic.com> 6
7 # 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 # This software may be used and distributed according to the terms 8
9 # of the GNU General Public License, incorporated herein by reference. 9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
11 """
10 12
11 import zlib, struct, sha, binascii, heapq 13 import zlib, struct, sha, binascii, heapq
12 from mercurial import mdiff 14 from mercurial import mdiff
13 15
14 def hex(node): return binascii.hexlify(node) 16 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node) 17 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:6]) 18 def short(node): return hex(node[:6])
17 19
18 def compress(text): 20 def compress(text):
21 """ generate a possibly-compressed representation of text """
19 if not text: return text 22 if not text: return text
20 if len(text) < 44: 23 if len(text) < 44:
21 if text[0] == '\0': return text 24 if text[0] == '\0': return text
22 return 'u' + text 25 return 'u' + text
23 bin = zlib.compress(text) 26 bin = zlib.compress(text)
25 if text[0] == '\0': return text 28 if text[0] == '\0': return text
26 return 'u' + text 29 return 'u' + text
27 return bin 30 return bin
28 31
29 def decompress(bin): 32 def decompress(bin):
33 """ decompress the given input """
30 if not bin: return bin 34 if not bin: return bin
31 t = bin[0] 35 t = bin[0]
32 if t == '\0': return bin 36 if t == '\0': return bin
33 if t == 'x': return zlib.decompress(bin) 37 if t == 'x': return zlib.decompress(bin)
34 if t == 'u': return bin[1:] 38 if t == 'u': return bin[1:]
35 raise RevlogError("unknown compression type %s" % t) 39 raise RevlogError("unknown compression type %s" % t)
36 40
37 def hash(text, p1, p2): 41 def hash(text, p1, p2):
42 """generate a hash from the given text and its parent hashes
43
44 This hash combines both the current file contents and its history
45 in a manner that makes it easy to distinguish nodes with the same
46 content in the revision graph.
47 """
38 l = [p1, p2] 48 l = [p1, p2]
39 l.sort() 49 l.sort()
40 s = sha.new(l[0]) 50 s = sha.new(l[0])
41 s.update(l[1]) 51 s.update(l[1])
42 s.update(text) 52 s.update(text)
44 54
45 nullid = "\0" * 20 55 nullid = "\0" * 20
46 indexformat = ">4l20s20s20s" 56 indexformat = ">4l20s20s20s"
47 57
48 class lazyparser: 58 class lazyparser:
59 """
60 this class avoids the need to parse the entirety of large indices
61
62 By default we parse and load 1000 entries at a time.
63
64 If no position is specified, we load the whole index, and replace
65 the lazy objects in revlog with the underlying objects for
66 efficiency in cases where we look at most of the nodes.
67 """
49 def __init__(self, data, revlog): 68 def __init__(self, data, revlog):
50 self.data = data 69 self.data = data
51 self.s = struct.calcsize(indexformat) 70 self.s = struct.calcsize(indexformat)
52 self.l = len(data)/self.s 71 self.l = len(data)/self.s
53 self.index = [None] * self.l 72 self.index = [None] * self.l
74 self.index[i] = e 93 self.index[i] = e
75 self.map[e[6]] = i 94 self.map[e[6]] = i
76 i += 1 95 i += 1
77 96
78 class lazyindex: 97 class lazyindex:
98 """a lazy version of the index array"""
79 def __init__(self, parser): 99 def __init__(self, parser):
80 self.p = parser 100 self.p = parser
81 def __len__(self): 101 def __len__(self):
82 return len(self.p.index) 102 return len(self.p.index)
83 def load(self, pos): 103 def load(self, pos):
87 return self.p.index[pos] or self.load(pos) 107 return self.p.index[pos] or self.load(pos)
88 def append(self, e): 108 def append(self, e):
89 self.p.index.append(e) 109 self.p.index.append(e)
90 110
91 class lazymap: 111 class lazymap:
112 """a lazy version of the node map"""
92 def __init__(self, parser): 113 def __init__(self, parser):
93 self.p = parser 114 self.p = parser
94 def load(self, key): 115 def load(self, key):
95 if self.p.all: return 116 if self.p.all: return
96 n = self.p.data.find(key) 117 n = self.p.data.find(key)
121 self.p.map[key] = val 142 self.p.map[key] = val
122 143
123 class RevlogError(Exception): pass 144 class RevlogError(Exception): pass
124 145
125 class revlog: 146 class revlog:
147 """
148 the underlying revision storage object
149
150 A revlog consists of two parts, an index and the revision data.
151
152 The index is a file with a fixed record size containing
153 information on each revision, includings its nodeid (hash), the
154 nodeids of its parents, the position and offset of its data within
155 the data file, and the revision it's based on. Finally, each entry
156 contains a linkrev entry that can serve as a pointer to external
157 data.
158
159 The revision data itself is a linear collection of data chunks.
160 Each chunk represents a revision and is usually represented as a
161 delta against the previous chunk. To bound lookup time, runs of
162 deltas are limited to about 2 times the length of the original
163 version data. This makes retrieval of a version proportional to
164 its size, or O(1) relative to the number of revisions.
165
166 Both pieces of the revlog are written to in an append-only
167 fashion, which means we never need to rewrite a file to insert or
168 remove data, and can use some simple techniques to avoid the need
169 for locking while reading.
170 """
126 def __init__(self, opener, indexfile, datafile): 171 def __init__(self, opener, indexfile, datafile):
172 """
173 create a revlog object
174
175 opener is a function that abstracts the file opening operation
176 and can be used to implement COW semantics or the like.
177 """
127 self.indexfile = indexfile 178 self.indexfile = indexfile
128 self.datafile = datafile 179 self.datafile = datafile
129 self.opener = opener 180 self.opener = opener
130 self.cache = None 181 self.cache = None
131 182
191 reachable[p] = 1 242 reachable[p] = 1
192 visit.append(p) 243 visit.append(p)
193 return reachable 244 return reachable
194 245
195 def heads(self, stop=None): 246 def heads(self, stop=None):
247 """return the list of all nodes that have no children"""
196 p = {} 248 p = {}
197 h = [] 249 h = []
198 stoprev = 0 250 stoprev = 0
199 if stop and stop in self.nodemap: 251 if stop and stop in self.nodemap:
200 stoprev = self.rev(stop) 252 stoprev = self.rev(stop)
201 253
202 for r in range(self.count() - 1, -1, -1): 254 for r in range(self.count() - 1, -1, -1):
203 n = self.node(r) 255 n = self.node(r)
204 if n not in p: 256 if n not in p:
205 h.append(n) 257 h.append(n)
206 if n == stop: 258 if n == stop:
210 for pn in self.parents(n): 262 for pn in self.parents(n):
211 p[pn] = 1 263 p[pn] = 1
212 return h 264 return h
213 265
214 def children(self, node): 266 def children(self, node):
267 """find the children of a given node"""
215 c = [] 268 c = []
216 p = self.rev(node) 269 p = self.rev(node)
217 for r in range(p + 1, self.count()): 270 for r in range(p + 1, self.count()):
218 n = self.node(r) 271 n = self.node(r)
219 for pn in self.parents(n): 272 for pn in self.parents(n):
223 elif pn == nullid: 276 elif pn == nullid:
224 continue 277 continue
225 return c 278 return c
226 279
227 def lookup(self, id): 280 def lookup(self, id):
281 """locate a node based on revision number or subset of hex nodeid"""
228 try: 282 try:
229 rev = int(id) 283 rev = int(id)
230 if str(rev) != id: raise ValueError 284 if str(rev) != id: raise ValueError
231 if rev < 0: rev = self.count() + rev 285 if rev < 0: rev = self.count() + rev
232 if rev < 0 or rev >= self.count(): raise ValueError 286 if rev < 0 or rev >= self.count(): raise ValueError
241 return c[0] 295 return c[0]
242 296
243 return None 297 return None
244 298
245 def diff(self, a, b): 299 def diff(self, a, b):
300 """return a delta between two revisions"""
246 return mdiff.textdiff(a, b) 301 return mdiff.textdiff(a, b)
247 302
248 def patches(self, t, pl): 303 def patches(self, t, pl):
304 """apply a list of patches to a string"""
249 return mdiff.patches(t, pl) 305 return mdiff.patches(t, pl)
250 306
251 def delta(self, node): 307 def delta(self, node):
308 """return or calculate a delta between a node and its predecessor"""
252 r = self.rev(node) 309 r = self.rev(node)
253 b = self.base(r) 310 b = self.base(r)
254 if r == b: 311 if r == b:
255 return self.diff(self.revision(self.node(r - 1)), 312 return self.diff(self.revision(self.node(r - 1)),
256 self.revision(node)) 313 self.revision(node))
259 f.seek(self.start(r)) 316 f.seek(self.start(r))
260 data = f.read(self.length(r)) 317 data = f.read(self.length(r))
261 return decompress(data) 318 return decompress(data)
262 319
263 def revision(self, node): 320 def revision(self, node):
321 """return an uncompressed revision of a given"""
264 if node == nullid: return "" 322 if node == nullid: return ""
265 if self.cache and self.cache[0] == node: return self.cache[2] 323 if self.cache and self.cache[0] == node: return self.cache[2]
266 324
325 # look up what we need to read
267 text = None 326 text = None
268 rev = self.rev(node) 327 rev = self.rev(node)
269 start, length, base, link, p1, p2, node = self.index[rev] 328 start, length, base, link, p1, p2, node = self.index[rev]
270 end = start + length 329 end = start + length
271 if base != rev: start = self.start(base) 330 if base != rev: start = self.start(base)
272 331
332 # do we have useful data cached?
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev: 333 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
274 base = self.cache[1] 334 base = self.cache[1]
275 start = self.start(base + 1) 335 start = self.start(base + 1)
276 text = self.cache[2] 336 text = self.cache[2]
277 last = 0 337 last = 0
298 358
299 self.cache = (node, rev, text) 359 self.cache = (node, rev, text)
300 return text 360 return text
301 361
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): 362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
363 """add a revision to the log
364
365 text - the revision data to add
366 transaction - the transaction object used for rollback
367 link - the linkrev data to add
368 p1, p2 - the parent nodeids of the revision
369 d - an optional precomputed delta
370 """
303 if text is None: text = "" 371 if text is None: text = ""
304 if p1 is None: p1 = self.tip() 372 if p1 is None: p1 = self.tip()
305 if p2 is None: p2 = nullid 373 if p2 is None: p2 = nullid
306 374
307 node = hash(text, p1, p2) 375 node = hash(text, p1, p2)
347 415
348 self.cache = (node, n, text) 416 self.cache = (node, n, text)
349 return node 417 return node
350 418
351 def ancestor(self, a, b): 419 def ancestor(self, a, b):
420 """calculate the least common ancestor of nodes a and b"""
352 # calculate the distance of every node from root 421 # calculate the distance of every node from root
353 dist = {nullid: 0} 422 dist = {nullid: 0}
354 for i in xrange(self.count()): 423 for i in xrange(self.count()):
355 n = self.node(i) 424 n = self.node(i)
356 p1, p2 = self.parents(n) 425 p1, p2 = self.parents(n)
385 ly = y.next() 454 ly = y.next()
386 elif lx > ly: 455 elif lx > ly:
387 lx = x.next() 456 lx = x.next()
388 457
389 def group(self, linkmap): 458 def group(self, linkmap):
390 # given a list of changeset revs, return a set of deltas and 459 """calculate a delta group
391 # metadata corresponding to nodes. the first delta is 460
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to 461 Given a list of changeset revs, return a set of deltas and
393 # have this parent as it has all history before these 462 metadata corresponding to nodes. the first delta is
394 # changesets. parent is parent[0] 463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
395 464 have this parent as it has all history before these
465 changesets. parent is parent[0]
466 """
396 revs = [] 467 revs = []
397 needed = {} 468 needed = {}
398 469
399 # find file nodes/revs that match changeset revs 470 # find file nodes/revs that match changeset revs
400 for i in xrange(0, self.count()): 471 for i in xrange(0, self.count()):
496 yield d 567 yield d
497 568
498 yield struct.pack(">l", 0) 569 yield struct.pack(">l", 0)
499 570
500 def addgroup(self, revs, linkmapper, transaction, unique=0): 571 def addgroup(self, revs, linkmapper, transaction, unique=0):
501 # given a set of deltas, add them to the revision log. the 572 """
502 # first delta is against its parent, which should be in our 573 add a delta group
503 # log, the rest are against the previous delta. 574
504 575 given a set of deltas, add them to the revision log. the
505 # track the base of the current delta log 576 first delta is against its parent, which should be in our
577 log, the rest are against the previous delta.
578 """
579
580 #track the base of the current delta log
506 r = self.count() 581 r = self.count()
507 t = r - 1 582 t = r - 1
508 node = nullid 583 node = nullid
509 584
510 base = prev = -1 585 base = prev = -1