1 # revlog.py - storage back-end for mercurial |
1 """ |
2 # |
2 revlog.py - storage back-end for mercurial |
3 # This provides efficient delta storage with O(1) retrieve and append |
3 |
4 # and O(changes) merge between branches |
4 This provides efficient delta storage with O(1) retrieve and append |
5 # |
5 and O(changes) merge between branches |
6 # Copyright 2005 Matt Mackall <mpm@selenic.com> |
6 |
7 # |
7 Copyright 2005 Matt Mackall <mpm@selenic.com> |
8 # This software may be used and distributed according to the terms |
8 |
9 # of the GNU General Public License, incorporated herein by reference. |
9 This software may be used and distributed according to the terms |
|
10 of the GNU General Public License, incorporated herein by reference. |
|
11 """ |
10 |
12 |
11 import zlib, struct, sha, binascii, heapq |
13 import zlib, struct, sha, binascii, heapq |
12 from mercurial import mdiff |
14 from mercurial import mdiff |
13 |
15 |
14 def hex(node): return binascii.hexlify(node) |
16 def hex(node): return binascii.hexlify(node) |
15 def bin(node): return binascii.unhexlify(node) |
17 def bin(node): return binascii.unhexlify(node) |
16 def short(node): return hex(node[:6]) |
18 def short(node): return hex(node[:6]) |
17 |
19 |
18 def compress(text): |
20 def compress(text): |
|
21 """ generate a possibly-compressed representation of text """ |
19 if not text: return text |
22 if not text: return text |
20 if len(text) < 44: |
23 if len(text) < 44: |
21 if text[0] == '\0': return text |
24 if text[0] == '\0': return text |
22 return 'u' + text |
25 return 'u' + text |
23 bin = zlib.compress(text) |
26 bin = zlib.compress(text) |
25 if text[0] == '\0': return text |
28 if text[0] == '\0': return text |
26 return 'u' + text |
29 return 'u' + text |
27 return bin |
30 return bin |
28 |
31 |
29 def decompress(bin): |
32 def decompress(bin): |
|
33 """ decompress the given input """ |
30 if not bin: return bin |
34 if not bin: return bin |
31 t = bin[0] |
35 t = bin[0] |
32 if t == '\0': return bin |
36 if t == '\0': return bin |
33 if t == 'x': return zlib.decompress(bin) |
37 if t == 'x': return zlib.decompress(bin) |
34 if t == 'u': return bin[1:] |
38 if t == 'u': return bin[1:] |
35 raise RevlogError("unknown compression type %s" % t) |
39 raise RevlogError("unknown compression type %s" % t) |
36 |
40 |
37 def hash(text, p1, p2): |
41 def hash(text, p1, p2): |
|
42 """generate a hash from the given text and its parent hashes |
|
43 |
|
44 This hash combines both the current file contents and its history |
|
45 in a manner that makes it easy to distinguish nodes with the same |
|
46 content in the revision graph. |
|
47 """ |
38 l = [p1, p2] |
48 l = [p1, p2] |
39 l.sort() |
49 l.sort() |
40 s = sha.new(l[0]) |
50 s = sha.new(l[0]) |
41 s.update(l[1]) |
51 s.update(l[1]) |
42 s.update(text) |
52 s.update(text) |
44 |
54 |
45 nullid = "\0" * 20 |
55 nullid = "\0" * 20 |
46 indexformat = ">4l20s20s20s" |
56 indexformat = ">4l20s20s20s" |
47 |
57 |
48 class lazyparser: |
58 class lazyparser: |
|
59 """ |
|
60 this class avoids the need to parse the entirety of large indices |
|
61 |
|
62 By default we parse and load 1000 entries at a time. |
|
63 |
|
64 If no position is specified, we load the whole index, and replace |
|
65 the lazy objects in revlog with the underlying objects for |
|
66 efficiency in cases where we look at most of the nodes. |
|
67 """ |
49 def __init__(self, data, revlog): |
68 def __init__(self, data, revlog): |
50 self.data = data |
69 self.data = data |
51 self.s = struct.calcsize(indexformat) |
70 self.s = struct.calcsize(indexformat) |
52 self.l = len(data)/self.s |
71 self.l = len(data)/self.s |
53 self.index = [None] * self.l |
72 self.index = [None] * self.l |
87 return self.p.index[pos] or self.load(pos) |
107 return self.p.index[pos] or self.load(pos) |
88 def append(self, e): |
108 def append(self, e): |
89 self.p.index.append(e) |
109 self.p.index.append(e) |
90 |
110 |
91 class lazymap: |
111 class lazymap: |
|
112 """a lazy version of the node map""" |
92 def __init__(self, parser): |
113 def __init__(self, parser): |
93 self.p = parser |
114 self.p = parser |
94 def load(self, key): |
115 def load(self, key): |
95 if self.p.all: return |
116 if self.p.all: return |
96 n = self.p.data.find(key) |
117 n = self.p.data.find(key) |
121 self.p.map[key] = val |
142 self.p.map[key] = val |
122 |
143 |
123 class RevlogError(Exception): pass |
144 class RevlogError(Exception): pass |
124 |
145 |
125 class revlog: |
146 class revlog: |
|
147 """ |
|
148 the underlying revision storage object |
|
149 |
|
150 A revlog consists of two parts, an index and the revision data. |
|
151 |
|
152 The index is a file with a fixed record size containing |
|
153 information on each revision, includings its nodeid (hash), the |
|
154 nodeids of its parents, the position and offset of its data within |
|
155 the data file, and the revision it's based on. Finally, each entry |
|
156 contains a linkrev entry that can serve as a pointer to external |
|
157 data. |
|
158 |
|
159 The revision data itself is a linear collection of data chunks. |
|
160 Each chunk represents a revision and is usually represented as a |
|
161 delta against the previous chunk. To bound lookup time, runs of |
|
162 deltas are limited to about 2 times the length of the original |
|
163 version data. This makes retrieval of a version proportional to |
|
164 its size, or O(1) relative to the number of revisions. |
|
165 |
|
166 Both pieces of the revlog are written to in an append-only |
|
167 fashion, which means we never need to rewrite a file to insert or |
|
168 remove data, and can use some simple techniques to avoid the need |
|
169 for locking while reading. |
|
170 """ |
126 def __init__(self, opener, indexfile, datafile): |
171 def __init__(self, opener, indexfile, datafile): |
|
172 """ |
|
173 create a revlog object |
|
174 |
|
175 opener is a function that abstracts the file opening operation |
|
176 and can be used to implement COW semantics or the like. |
|
177 """ |
127 self.indexfile = indexfile |
178 self.indexfile = indexfile |
128 self.datafile = datafile |
179 self.datafile = datafile |
129 self.opener = opener |
180 self.opener = opener |
130 self.cache = None |
181 self.cache = None |
131 |
182 |
191 reachable[p] = 1 |
242 reachable[p] = 1 |
192 visit.append(p) |
243 visit.append(p) |
193 return reachable |
244 return reachable |
194 |
245 |
195 def heads(self, stop=None): |
246 def heads(self, stop=None): |
|
247 """return the list of all nodes that have no children""" |
196 p = {} |
248 p = {} |
197 h = [] |
249 h = [] |
198 stoprev = 0 |
250 stoprev = 0 |
199 if stop and stop in self.nodemap: |
251 if stop and stop in self.nodemap: |
200 stoprev = self.rev(stop) |
252 stoprev = self.rev(stop) |
201 |
253 |
202 for r in range(self.count() - 1, -1, -1): |
254 for r in range(self.count() - 1, -1, -1): |
203 n = self.node(r) |
255 n = self.node(r) |
204 if n not in p: |
256 if n not in p: |
205 h.append(n) |
257 h.append(n) |
206 if n == stop: |
258 if n == stop: |
210 for pn in self.parents(n): |
262 for pn in self.parents(n): |
211 p[pn] = 1 |
263 p[pn] = 1 |
212 return h |
264 return h |
213 |
265 |
214 def children(self, node): |
266 def children(self, node): |
|
267 """find the children of a given node""" |
215 c = [] |
268 c = [] |
216 p = self.rev(node) |
269 p = self.rev(node) |
217 for r in range(p + 1, self.count()): |
270 for r in range(p + 1, self.count()): |
218 n = self.node(r) |
271 n = self.node(r) |
219 for pn in self.parents(n): |
272 for pn in self.parents(n): |
223 elif pn == nullid: |
276 elif pn == nullid: |
224 continue |
277 continue |
225 return c |
278 return c |
226 |
279 |
227 def lookup(self, id): |
280 def lookup(self, id): |
|
281 """locate a node based on revision number or subset of hex nodeid""" |
228 try: |
282 try: |
229 rev = int(id) |
283 rev = int(id) |
230 if str(rev) != id: raise ValueError |
284 if str(rev) != id: raise ValueError |
231 if rev < 0: rev = self.count() + rev |
285 if rev < 0: rev = self.count() + rev |
232 if rev < 0 or rev >= self.count(): raise ValueError |
286 if rev < 0 or rev >= self.count(): raise ValueError |
241 return c[0] |
295 return c[0] |
242 |
296 |
243 return None |
297 return None |
244 |
298 |
245 def diff(self, a, b): |
299 def diff(self, a, b): |
|
300 """return a delta between two revisions""" |
246 return mdiff.textdiff(a, b) |
301 return mdiff.textdiff(a, b) |
247 |
302 |
248 def patches(self, t, pl): |
303 def patches(self, t, pl): |
|
304 """apply a list of patches to a string""" |
249 return mdiff.patches(t, pl) |
305 return mdiff.patches(t, pl) |
250 |
306 |
251 def delta(self, node): |
307 def delta(self, node): |
|
308 """return or calculate a delta between a node and its predecessor""" |
252 r = self.rev(node) |
309 r = self.rev(node) |
253 b = self.base(r) |
310 b = self.base(r) |
254 if r == b: |
311 if r == b: |
255 return self.diff(self.revision(self.node(r - 1)), |
312 return self.diff(self.revision(self.node(r - 1)), |
256 self.revision(node)) |
313 self.revision(node)) |
259 f.seek(self.start(r)) |
316 f.seek(self.start(r)) |
260 data = f.read(self.length(r)) |
317 data = f.read(self.length(r)) |
261 return decompress(data) |
318 return decompress(data) |
262 |
319 |
263 def revision(self, node): |
320 def revision(self, node): |
|
321 """return an uncompressed revision of a given""" |
264 if node == nullid: return "" |
322 if node == nullid: return "" |
265 if self.cache and self.cache[0] == node: return self.cache[2] |
323 if self.cache and self.cache[0] == node: return self.cache[2] |
266 |
324 |
|
325 # look up what we need to read |
267 text = None |
326 text = None |
268 rev = self.rev(node) |
327 rev = self.rev(node) |
269 start, length, base, link, p1, p2, node = self.index[rev] |
328 start, length, base, link, p1, p2, node = self.index[rev] |
270 end = start + length |
329 end = start + length |
271 if base != rev: start = self.start(base) |
330 if base != rev: start = self.start(base) |
272 |
331 |
|
332 # do we have useful data cached? |
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
333 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
274 base = self.cache[1] |
334 base = self.cache[1] |
275 start = self.start(base + 1) |
335 start = self.start(base + 1) |
276 text = self.cache[2] |
336 text = self.cache[2] |
277 last = 0 |
337 last = 0 |
298 |
358 |
299 self.cache = (node, rev, text) |
359 self.cache = (node, rev, text) |
300 return text |
360 return text |
301 |
361 |
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): |
362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): |
|
363 """add a revision to the log |
|
364 |
|
365 text - the revision data to add |
|
366 transaction - the transaction object used for rollback |
|
367 link - the linkrev data to add |
|
368 p1, p2 - the parent nodeids of the revision |
|
369 d - an optional precomputed delta |
|
370 """ |
303 if text is None: text = "" |
371 if text is None: text = "" |
304 if p1 is None: p1 = self.tip() |
372 if p1 is None: p1 = self.tip() |
305 if p2 is None: p2 = nullid |
373 if p2 is None: p2 = nullid |
306 |
374 |
307 node = hash(text, p1, p2) |
375 node = hash(text, p1, p2) |
347 |
415 |
348 self.cache = (node, n, text) |
416 self.cache = (node, n, text) |
349 return node |
417 return node |
350 |
418 |
351 def ancestor(self, a, b): |
419 def ancestor(self, a, b): |
|
420 """calculate the least common ancestor of nodes a and b""" |
352 # calculate the distance of every node from root |
421 # calculate the distance of every node from root |
353 dist = {nullid: 0} |
422 dist = {nullid: 0} |
354 for i in xrange(self.count()): |
423 for i in xrange(self.count()): |
355 n = self.node(i) |
424 n = self.node(i) |
356 p1, p2 = self.parents(n) |
425 p1, p2 = self.parents(n) |
385 ly = y.next() |
454 ly = y.next() |
386 elif lx > ly: |
455 elif lx > ly: |
387 lx = x.next() |
456 lx = x.next() |
388 |
457 |
389 def group(self, linkmap): |
458 def group(self, linkmap): |
390 # given a list of changeset revs, return a set of deltas and |
459 """calculate a delta group |
391 # metadata corresponding to nodes. the first delta is |
460 |
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to |
461 Given a list of changeset revs, return a set of deltas and |
393 # have this parent as it has all history before these |
462 metadata corresponding to nodes. the first delta is |
394 # changesets. parent is parent[0] |
463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to |
395 |
464 have this parent as it has all history before these |
|
465 changesets. parent is parent[0] |
|
466 """ |
396 revs = [] |
467 revs = [] |
397 needed = {} |
468 needed = {} |
398 |
469 |
399 # find file nodes/revs that match changeset revs |
470 # find file nodes/revs that match changeset revs |
400 for i in xrange(0, self.count()): |
471 for i in xrange(0, self.count()): |
496 yield d |
567 yield d |
497 |
568 |
498 yield struct.pack(">l", 0) |
569 yield struct.pack(">l", 0) |
499 |
570 |
500 def addgroup(self, revs, linkmapper, transaction, unique=0): |
571 def addgroup(self, revs, linkmapper, transaction, unique=0): |
501 # given a set of deltas, add them to the revision log. the |
572 """ |
502 # first delta is against its parent, which should be in our |
573 add a delta group |
503 # log, the rest are against the previous delta. |
574 |
504 |
575 given a set of deltas, add them to the revision log. the |
505 # track the base of the current delta log |
576 first delta is against its parent, which should be in our |
|
577 log, the rest are against the previous delta. |
|
578 """ |
|
579 |
|
580 #track the base of the current delta log |
506 r = self.count() |
581 r = self.count() |
507 t = r - 1 |
582 t = r - 1 |
508 node = nullid |
583 node = nullid |
509 |
584 |
510 base = prev = -1 |
585 base = prev = -1 |