diff --git a/mercurial/revlog.py b/mercurial/revlog.py new file mode 100644 --- /dev/null +++ b/mercurial/revlog.py @@ -0,0 +1,199 @@ +# revlog.py - storage back-end for mercurial +# +# This provides efficient delta storage with O(1) retrieve and append +# and O(changes) merge between branches +# +# Copyright 2005 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import zlib, struct, sha, binascii, os, tempfile +from mercurial import mdiff + +def compress(text): + return zlib.compress(text) + +def decompress(bin): + return zlib.decompress(bin) + +def hash(text, p1, p2): + l = [p1, p2] + l.sort() + return sha.sha(l[0] + l[1] + text).digest() + +nullid = "\0" * 20 +indexformat = ">4l20s20s20s" + +class revlog: + def __init__(self, opener, indexfile, datafile): + self.indexfile = indexfile + self.datafile = datafile + self.index = [] + self.opener = opener + self.cache = None + self.nodemap = { -1: nullid, nullid: -1 } + # read the whole index for now, handle on-demand later + try: + n = 0 + i = self.opener(self.indexfile).read() + s = struct.calcsize(indexformat) + for f in range(0, len(i), s): + # offset, size, base, linkrev, p1, p2, nodeid, changeset + e = struct.unpack(indexformat, i[f:f + s]) + self.nodemap[e[6]] = n + self.index.append(e) + n += 1 + except IOError: pass + + def tip(self): return self.node(len(self.index) - 1) + def count(self): return len(self.index) + def node(self, rev): return rev < 0 and nullid or self.index[rev][6] + def rev(self, node): return self.nodemap[node] + def linkrev(self, node): return self.index[self.nodemap[node]][3] + def parents(self, node): return self.index[self.nodemap[node]][4:6] + + def start(self, rev): return self.index[rev][0] + def length(self, rev): return self.index[rev][1] + def end(self, rev): return self.start(rev) + self.length(rev) + def base(self, rev): return self.index[rev][2] + + def revisions(self, list): + # this can be optimized to do spans, etc + # be stupid for now + for r in list: + yield self.revision(r) + + def diff(self, a, b): + return mdiff.textdiff(a, b) + + def patch(self, text, patch): + return mdiff.patch(text, patch) + + def revision(self, node): + if node is nullid: return "" + if self.cache and self.cache[0] == node: return self.cache[2] + + text = None + rev = self.rev(node) + base = self.base(rev) + start = self.start(base) + end = self.end(rev) + + if self.cache and self.cache[1] >= base and self.cache[1] < rev: + base = self.cache[1] + start = self.start(base + 1) + text = self.cache[2] + last = 0 + + f = self.opener(self.datafile) + f.seek(start) + data = f.read(end - start) + + if not text: + last = self.length(base) + text = decompress(data[:last]) + + for r in range(base + 1, rev + 1): + s = self.length(r) + b = decompress(data[last:last + s]) + text = self.patch(text, b) + last = last + s + + (p1, p2) = self.parents(node) + if self.node(rev) != hash(text, p1, p2): + raise "Consistency check failed on %s:%d" % (self.datafile, rev) + + self.cache = (node, rev, text) + return text + + def addrevision(self, text, transaction, link, p1=None, p2=None): + if text is None: text = "" + if p1 is None: p1 = self.tip() + if p2 is None: p2 = nullid + + node = hash(text, p1, p2) + + n = self.count() + t = n - 1 + + if n: + start = self.start(self.base(t)) + end = self.end(t) + prev = self.revision(self.tip()) + if 0: + dd = self.diff(prev, text) + tt = self.patch(prev, dd) + if tt != text: + print prev + print text + print tt + raise "diff+patch failed" + data = compress(self.diff(prev, text)) + + # full versions are inserted when the needed deltas + # become comparable to the uncompressed text + if not n or (end + len(data) - start) > len(text) * 2: + data = compress(text) + base = n + else: + base = self.base(t) + + offset = 0 + if t >= 0: + offset = self.end(t) + + e = (offset, len(data), base, link, p1, p2, node) + + self.index.append(e) + self.nodemap[node] = n + entry = struct.pack(indexformat, *e) + + transaction.add(self.datafile, e[0]) + self.opener(self.datafile, "a").write(data) + transaction.add(self.indexfile, n * len(entry)) + self.opener(self.indexfile, "a").write(entry) + + self.cache = (node, n, text) + return node + + def ancestor(self, a, b): + def expand(e1, e2, a1, a2): + ne = [] + for n in e1: + (p1, p2) = self.parents(n) + if p1 in a2: return p1 + if p2 in a2: return p2 + if p1 != nullid and p1 not in a1: + a1[p1] = 1 + ne.append(p1) + if p2 != nullid and p2 not in a1: + a1[p2] = 1 + ne.append(p2) + return expand(e2, ne, a2, a1) + return expand([a], [b], {a:1}, {b:1}) + + def mergedag(self, other, transaction, linkseq, accumulate = None): + """combine the nodes from other's DAG into ours""" + old = self.tip() + i = self.count() + l = [] + + # merge the other revision log into our DAG + for r in range(other.count()): + id = other.node(r) + if id not in self.nodemap: + (xn, yn) = other.parents(id) + l.append((id, xn, yn)) + self.nodemap[id] = i + i += 1 + + # merge node date for new nodes + r = other.revisions([e[0] for e in l]) + for e in l: + t = r.next() + if accumulate: accumulate(t) + self.addrevision(t, transaction, linkseq.next(), e[1], e[2]) + + # return the unmerged heads for later resolving + return (old, self.tip())