comparison mercurial/revlog.py @ 0:9117c6561b0b

Add back links from file revisions to changeset revisions Add simple transaction support Add hg verify Improve caching in revlog Fix a bunch of bugs Self-hosting now that the metadata is close to finalized
author mpm@selenic.com
date Tue, 03 May 2005 13:16:10 -0800
parents
children ecf3fd948051
comparison
equal deleted inserted replaced
-1:000000000000 0:9117c6561b0b
1 # revlog.py - storage back-end for mercurial
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
10
11 import zlib, struct, sha, binascii, os, tempfile
12 from mercurial import mdiff
13
14 def compress(text):
15 return zlib.compress(text)
16
17 def decompress(bin):
18 return zlib.decompress(bin)
19
20 def hash(text, p1, p2):
21 l = [p1, p2]
22 l.sort()
23 return sha.sha(l[0] + l[1] + text).digest()
24
25 nullid = "\0" * 20
26 indexformat = ">4l20s20s20s"
27
28 class revlog:
29 def __init__(self, opener, indexfile, datafile):
30 self.indexfile = indexfile
31 self.datafile = datafile
32 self.index = []
33 self.opener = opener
34 self.cache = None
35 self.nodemap = { -1: nullid, nullid: -1 }
36 # read the whole index for now, handle on-demand later
37 try:
38 n = 0
39 i = self.opener(self.indexfile).read()
40 s = struct.calcsize(indexformat)
41 for f in range(0, len(i), s):
42 # offset, size, base, linkrev, p1, p2, nodeid, changeset
43 e = struct.unpack(indexformat, i[f:f + s])
44 self.nodemap[e[6]] = n
45 self.index.append(e)
46 n += 1
47 except IOError: pass
48
49 def tip(self): return self.node(len(self.index) - 1)
50 def count(self): return len(self.index)
51 def node(self, rev): return rev < 0 and nullid or self.index[rev][6]
52 def rev(self, node): return self.nodemap[node]
53 def linkrev(self, node): return self.index[self.nodemap[node]][3]
54 def parents(self, node): return self.index[self.nodemap[node]][4:6]
55
56 def start(self, rev): return self.index[rev][0]
57 def length(self, rev): return self.index[rev][1]
58 def end(self, rev): return self.start(rev) + self.length(rev)
59 def base(self, rev): return self.index[rev][2]
60
61 def revisions(self, list):
62 # this can be optimized to do spans, etc
63 # be stupid for now
64 for r in list:
65 yield self.revision(r)
66
67 def diff(self, a, b):
68 return mdiff.textdiff(a, b)
69
70 def patch(self, text, patch):
71 return mdiff.patch(text, patch)
72
73 def revision(self, node):
74 if node is nullid: return ""
75 if self.cache and self.cache[0] == node: return self.cache[2]
76
77 text = None
78 rev = self.rev(node)
79 base = self.base(rev)
80 start = self.start(base)
81 end = self.end(rev)
82
83 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
84 base = self.cache[1]
85 start = self.start(base + 1)
86 text = self.cache[2]
87 last = 0
88
89 f = self.opener(self.datafile)
90 f.seek(start)
91 data = f.read(end - start)
92
93 if not text:
94 last = self.length(base)
95 text = decompress(data[:last])
96
97 for r in range(base + 1, rev + 1):
98 s = self.length(r)
99 b = decompress(data[last:last + s])
100 text = self.patch(text, b)
101 last = last + s
102
103 (p1, p2) = self.parents(node)
104 if self.node(rev) != hash(text, p1, p2):
105 raise "Consistency check failed on %s:%d" % (self.datafile, rev)
106
107 self.cache = (node, rev, text)
108 return text
109
110 def addrevision(self, text, transaction, link, p1=None, p2=None):
111 if text is None: text = ""
112 if p1 is None: p1 = self.tip()
113 if p2 is None: p2 = nullid
114
115 node = hash(text, p1, p2)
116
117 n = self.count()
118 t = n - 1
119
120 if n:
121 start = self.start(self.base(t))
122 end = self.end(t)
123 prev = self.revision(self.tip())
124 if 0:
125 dd = self.diff(prev, text)
126 tt = self.patch(prev, dd)
127 if tt != text:
128 print prev
129 print text
130 print tt
131 raise "diff+patch failed"
132 data = compress(self.diff(prev, text))
133
134 # full versions are inserted when the needed deltas
135 # become comparable to the uncompressed text
136 if not n or (end + len(data) - start) > len(text) * 2:
137 data = compress(text)
138 base = n
139 else:
140 base = self.base(t)
141
142 offset = 0
143 if t >= 0:
144 offset = self.end(t)
145
146 e = (offset, len(data), base, link, p1, p2, node)
147
148 self.index.append(e)
149 self.nodemap[node] = n
150 entry = struct.pack(indexformat, *e)
151
152 transaction.add(self.datafile, e[0])
153 self.opener(self.datafile, "a").write(data)
154 transaction.add(self.indexfile, n * len(entry))
155 self.opener(self.indexfile, "a").write(entry)
156
157 self.cache = (node, n, text)
158 return node
159
160 def ancestor(self, a, b):
161 def expand(e1, e2, a1, a2):
162 ne = []
163 for n in e1:
164 (p1, p2) = self.parents(n)
165 if p1 in a2: return p1
166 if p2 in a2: return p2
167 if p1 != nullid and p1 not in a1:
168 a1[p1] = 1
169 ne.append(p1)
170 if p2 != nullid and p2 not in a1:
171 a1[p2] = 1
172 ne.append(p2)
173 return expand(e2, ne, a2, a1)
174 return expand([a], [b], {a:1}, {b:1})
175
176 def mergedag(self, other, transaction, linkseq, accumulate = None):
177 """combine the nodes from other's DAG into ours"""
178 old = self.tip()
179 i = self.count()
180 l = []
181
182 # merge the other revision log into our DAG
183 for r in range(other.count()):
184 id = other.node(r)
185 if id not in self.nodemap:
186 (xn, yn) = other.parents(id)
187 l.append((id, xn, yn))
188 self.nodemap[id] = i
189 i += 1
190
191 # merge node date for new nodes
192 r = other.revisions([e[0] for e in l])
193 for e in l:
194 t = r.next()
195 if accumulate: accumulate(t)
196 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
197
198 # return the unmerged heads for later resolving
199 return (old, self.tip())