merge with mainline.
--- a/mercurial/appendfile.py
+++ b/mercurial/appendfile.py
@@ -42,9 +42,19 @@ class appendfile(object):
# seek and read can be fast.
self.fpsize = os.fstat(fp.fileno()).st_size
- def seek(self, offset):
+ def end(self):
+ self.tmpfp.flush() # make sure the stat is correct
+ return self.fpsize + os.fstat(self.tmpfp.fileno()).st_size
+
+ def seek(self, offset, whence=0):
'''virtual file offset spans real file and temp file.'''
- self.offset = offset
+ if whence == 0:
+ self.offset = offset
+ elif whence == 1:
+ self.offset += offset
+ elif whence == 2:
+ self.offset = self.end() + offset
+
if self.offset < self.fpsize:
self.realfp.seek(self.offset)
else:
@@ -103,8 +113,16 @@ class sharedfile(object):
self.fp = fp
self.offset = 0
- def seek(self, offset):
- self.offset = offset
+ def tell(self):
+ return self.offset
+
+ def seek(self, offset, whence=0):
+ if whence == 0:
+ self.offset = offset
+ elif whence == 1:
+ self.offset += offset
+ elif whence == 2:
+ self.offset = self.fp.end() + offset
def read(self, count=-1):
try:
@@ -143,7 +161,7 @@ class appendopener(object):
'''open file. return same cached appendfile object for every
later call.'''
- assert mode in 'ra'
+ assert mode in 'ra+'
fp = self.fps.get(name)
if fp is None:
fp = appendfile(self.realopener(name, 'a+'))
@@ -162,11 +180,15 @@ class appendopener(object):
# not mixed up together.
class appendchangelog(changelog.changelog, appendopener):
- def __init__(self, opener):
+ def __init__(self, opener, version):
appendopener.__init__(self, opener)
- changelog.changelog.__init__(self, self)
+ changelog.changelog.__init__(self, self, version)
+ def checkinlinesize(self, fp, tr):
+ return
class appendmanifest(manifest.manifest, appendopener):
- def __init__(self, opener):
+ def __init__(self, opener, version):
appendopener.__init__(self, opener)
- manifest.manifest.__init__(self, self)
+ manifest.manifest.__init__(self, self, version)
+ def checkinlinesize(self, fp, tr):
+ return
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -31,6 +31,7 @@ class bundlerevlog(revlog.revlog):
#
revlog.revlog.__init__(self, opener, indexfile, datafile)
self.bundlefile = bundlefile
+ self.basemap = {}
def chunkpositer():
for chunk in changegroup.chunkiter(bundlefile):
pos = bundlefile.tell()
@@ -58,7 +59,8 @@ class bundlerevlog(revlog.revlog):
if not prev:
prev = p1
# start, size, base is not used, link, p1, p2, delta ref
- e = (start, size, None, link, p1, p2, node, prev)
+ e = (start, size, None, link, p1, p2, node)
+ self.basemap[n] = prev
self.index.append(e)
self.nodemap[node] = n
prev = node
@@ -68,9 +70,9 @@ class bundlerevlog(revlog.revlog):
"""is rev from the bundle"""
if rev < 0:
return False
- return len(self.index[rev]) > 7
- def bundlebase(self, rev): return self.index[rev][7]
- def chunk(self, rev):
+ return rev in self.basemap
+ def bundlebase(self, rev): return self.basemap[rev]
+ def chunk(self, rev, df=None):
# Warning: in case of bundle, the diff is against bundlebase,
# not against rev - 1
# XXX: could use some caching
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -11,8 +11,9 @@ from demandload import demandload
demandload(globals(), "os time util")
class changelog(revlog):
- def __init__(self, opener):
- revlog.__init__(self, opener, "00changelog.i", "00changelog.d")
+ def __init__(self, opener, defversion=0):
+ revlog.__init__(self, opener, "00changelog.i", "00changelog.d",
+ defversion)
def extract(self, text):
if not text:
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -1268,7 +1268,7 @@ def copy(ui, repo, *pats, **opts):
def debugancestor(ui, index, rev1, rev2):
"""find the ancestor revision of two revisions in a given index"""
- r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "")
+ r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "", 0)
a = r.ancestor(r.lookup(rev1), r.lookup(rev2))
ui.write("%d:%s\n" % (r.rev(a), hex(a)))
@@ -1372,7 +1372,7 @@ def debugstate(ui, repo):
def debugdata(ui, file_, rev):
"""dump the contents of an data file revision"""
r = revlog.revlog(util.opener(os.getcwd(), audit=False),
- file_[:-2] + ".i", file_)
+ file_[:-2] + ".i", file_, 0)
try:
ui.write(r.revision(r.lookup(rev)))
except KeyError:
@@ -1380,18 +1380,19 @@ def debugdata(ui, file_, rev):
def debugindex(ui, file_):
"""dump the contents of an index file"""
- r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "")
+ r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0)
ui.write(" rev offset length base linkrev" +
" nodeid p1 p2\n")
for i in range(r.count()):
- e = r.index[i]
+ node = r.node(i)
+ pp = r.parents(node)
ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
- i, e[0], e[1], e[2], e[3],
- short(e[6]), short(e[4]), short(e[5])))
+ i, r.start(i), r.length(i), r.base(i), r.linkrev(node),
+ short(node), short(pp[0]), short(pp[1])))
def debugindexdot(ui, file_):
"""dump an index DAG as a .dot file"""
- r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "")
+ r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0)
ui.write("digraph G {\n")
for i in range(r.count()):
e = r.index[i]
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -11,10 +11,11 @@ from demandload import *
demandload(globals(), "bdiff")
class filelog(revlog):
- def __init__(self, opener, path):
+ def __init__(self, opener, path, defversion=0):
revlog.__init__(self, opener,
os.path.join("data", self.encodedir(path + ".i")),
- os.path.join("data", self.encodedir(path + ".d")))
+ os.path.join("data", self.encodedir(path + ".d")),
+ defversion)
# This avoids a collision between a file named foo and a dir named
# foo.i or foo.d
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -10,8 +10,8 @@ import filelog, manifest, changelog, dir
from node import *
from i18n import gettext as _
from demandload import *
-demandload(globals(), "re lock transaction tempfile stat mdiff errno ui")
demandload(globals(), "appendfile changegroup")
+demandload(globals(), "re lock transaction tempfile stat mdiff errno ui revlog")
class localrepository(object):
def __del__(self):
@@ -35,8 +35,31 @@ class localrepository(object):
self.ui = ui.ui(parentui=parentui)
self.opener = util.opener(self.path)
self.wopener = util.opener(self.root)
- self.manifest = manifest.manifest(self.opener)
- self.changelog = changelog.changelog(self.opener)
+
+ try:
+ self.ui.readconfig(self.join("hgrc"), self.root)
+ except IOError:
+ pass
+
+ v = self.ui.revlogopts
+ self.revlogversion = int(v.get('format', 0))
+ flags = 0
+ for x in v.get('flags', "").split():
+ flags |= revlog.flagstr(x)
+
+ v = self.revlogversion | flags
+ self.manifest = manifest.manifest(self.opener, v)
+ self.changelog = changelog.changelog(self.opener, v)
+
+ # the changelog might not have the inline index flag
+ # on. If the format of the changelog is the same as found in
+ # .hgrc, apply any flags found in the .hgrc as well.
+ # Otherwise, just version from the changelog
+ v = self.changelog.version
+ if v == self.revlogversion:
+ v |= flags
+ self.revlogversion = v
+
self.tagscache = None
self.nodetagscache = None
self.encodepats = None
@@ -48,11 +71,6 @@ class localrepository(object):
os.mkdir(self.join("data"))
self.dirstate = dirstate.dirstate(self.opener, self.ui, self.root)
- try:
- self.ui.readconfig(self.join("hgrc"), self.root)
- except IOError:
- pass
-
def hook(self, name, throw=False, **args):
def runhook(name, cmd):
self.ui.note(_("running hook %s: %s\n") % (name, cmd))
@@ -150,6 +168,7 @@ class localrepository(object):
try:
return self.changelog.lookup(key)
except:
+ raise
raise repo.RepoError(_("unknown revision '%s'") % key)
def dev(self):
@@ -167,7 +186,7 @@ class localrepository(object):
def file(self, f):
if f[0] == '/':
f = f[1:]
- return filelog.filelog(self.opener, f)
+ return filelog.filelog(self.opener, f, self.revlogversion)
def getcwd(self):
return self.dirstate.getcwd()
@@ -1394,7 +1413,7 @@ class localrepository(object):
# write changelog and manifest data to temp files so
# concurrent readers will not see inconsistent view
- cl = appendfile.appendchangelog(self.opener)
+ cl = appendfile.appendchangelog(self.opener, self.changelog.version)
oldheads = len(cl.heads())
@@ -1408,7 +1427,7 @@ class localrepository(object):
cnr = cor
changesets = cnr - cor
- mf = appendfile.appendmanifest(self.opener)
+ mf = appendfile.appendmanifest(self.opener, self.manifest.version)
# pull off the manifest group
self.ui.status(_("adding manifests\n"))
@@ -1436,8 +1455,10 @@ class localrepository(object):
cl.writedata()
# make changelog and manifest see real files again
- self.changelog = changelog.changelog(self.opener)
- self.manifest = manifest.manifest(self.opener)
+ self.changelog = changelog.changelog(self.opener, self.changelog.version)
+ self.manifest = manifest.manifest(self.opener, self.manifest.version)
+ self.changelog.checkinlinesize(tr)
+ self.manifest.checkinlinesize(tr)
newheads = len(self.changelog.heads())
heads = ""
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -12,10 +12,11 @@ from demandload import *
demandload(globals(), "bisect array")
class manifest(revlog):
- def __init__(self, opener):
+ def __init__(self, opener, defversion=0):
self.mapcache = None
self.listcache = None
- revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
+ revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
+ defversion)
def read(self, node):
if node == nullid: return {} # don't upset local cache
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -192,4 +192,5 @@ def patch(a, bin):
return mpatch.patches(a, [bin])
patches = mpatch.patches
+patchedsize = mpatch.patchedsize
textdiff = bdiff.bdiff
--- a/mercurial/mpatch.c
+++ b/mercurial/mpatch.c
@@ -354,8 +354,44 @@ cleanup:
return result;
}
+/* calculate size of a patched file directly */
+static PyObject *
+patchedsize(PyObject *self, PyObject *args)
+{
+ long orig, start, end, len, outlen = 0, last = 0;
+ int patchlen;
+ char *bin, *binend;
+ char decode[12]; /* for dealing with alignment issues */
+
+ if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
+ return NULL;
+
+ binend = bin + patchlen;
+
+ while (bin < binend) {
+ memcpy(decode, bin, 12);
+ start = ntohl(*(uint32_t *)decode);
+ end = ntohl(*(uint32_t *)(decode + 4));
+ len = ntohl(*(uint32_t *)(decode + 8));
+ bin += 12 + len;
+ outlen += start - last;
+ last = end;
+ outlen += len;
+ }
+
+ if (bin != binend) {
+ if (!PyErr_Occurred())
+ PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+ return NULL;
+ }
+
+ outlen += orig - last;
+ return Py_BuildValue("l", outlen);
+}
+
static PyMethodDef methods[] = {
{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
+ {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
{NULL, NULL}
};
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -16,6 +16,18 @@ from demandload import demandload
demandload(globals(), "binascii changegroup errno heapq mdiff os")
demandload(globals(), "sha struct zlib")
+# revlog version strings
+REVLOGV0 = 0
+REVLOGNG = 1
+
+# revlog flags
+REVLOGNGINLINEDATA = (1 << 16)
+
+def flagstr(flag):
+ if flag == "inline":
+ return REVLOGNGINLINEDATA
+ raise RevlogError(_("unknown revlog flag %s" % flag))
+
def hash(text, p1, p2):
"""generate a hash from the given text and its parent hashes
@@ -51,49 +63,148 @@ def decompress(bin):
if t == 'u': return bin[1:]
raise RevlogError(_("unknown compression type %r") % t)
-indexformat = ">4l20s20s20s"
+indexformatv0 = ">4l20s20s20s"
+v0shaoffset = 56
+# index ng:
+# 6 bytes offset
+# 2 bytes flags
+# 4 bytes compressed length
+# 4 bytes uncompressed length
+# 4 bytes: base rev
+# 4 bytes link rev
+# 4 bytes parent 1 rev
+# 4 bytes parent 2 rev
+# 32 bytes: nodeid
+indexformatng = ">Qiiiiii20s12x"
+ngshaoffset = 32
+versionformat = ">i"
class lazyparser(object):
"""
this class avoids the need to parse the entirety of large indices
-
- By default we parse and load 1000 entries at a time.
-
- If no position is specified, we load the whole index, and replace
- the lazy objects in revlog with the underlying objects for
- efficiency in cases where we look at most of the nodes.
"""
- def __init__(self, data, revlog):
- self.data = data
+ def __init__(self, dataf, size, indexformat, shaoffset):
+ self.dataf = dataf
+ self.format = indexformat
self.s = struct.calcsize(indexformat)
- self.l = len(data)/self.s
+ self.indexformat = indexformat
+ self.datasize = size
+ self.l = size/self.s
self.index = [None] * self.l
self.map = {nullid: -1}
+ self.allmap = 0
self.all = 0
- self.revlog = revlog
+ self.mapfind_count = 0
+ self.shaoffset = shaoffset
- def trunc(self, pos):
- self.l = pos/self.s
+ def loadmap(self):
+ """
+ during a commit, we need to make sure the rev being added is
+ not a duplicate. This requires loading the entire index,
+ which is fairly slow. loadmap can load up just the node map,
+ which takes much less time.
+ """
+ if self.allmap: return
+ start = 0
+ end = self.datasize
+ self.allmap = 1
+ cur = 0
+ count = 0
+ blocksize = self.s * 256
+ self.dataf.seek(0)
+ while cur < end:
+ data = self.dataf.read(blocksize)
+ off = 0
+ for x in xrange(256):
+ n = data[off + self.shaoffset:off + self.shaoffset + 20]
+ self.map[n] = count
+ count += 1
+ if count >= self.l:
+ break
+ off += self.s
+ cur += blocksize
+
+ def loadblock(self, blockstart, blocksize, data=None):
+ if self.all: return
+ if data is None:
+ self.dataf.seek(blockstart)
+ data = self.dataf.read(blocksize)
+ lend = len(data) / self.s
+ i = blockstart / self.s
+ off = 0
+ for x in xrange(lend):
+ if self.index[i + x] == None:
+ b = data[off : off + self.s]
+ self.index[i + x] = b
+ n = b[self.shaoffset:self.shaoffset + 20]
+ self.map[n] = i + x
+ off += self.s
- def load(self, pos=None):
+ def findnode(self, node):
+ """search backwards through the index file for a specific node"""
+ if self.allmap: return None
+
+ # hg log will cause many many searches for the manifest
+ # nodes. After we get called a few times, just load the whole
+ # thing.
+ if self.mapfind_count > 8:
+ self.loadmap()
+ if node in self.map:
+ return node
+ return None
+ self.mapfind_count += 1
+ last = self.l - 1
+ while self.index[last] != None:
+ if last == 0:
+ self.all = 1
+ self.allmap = 1
+ return None
+ last -= 1
+ end = (last + 1) * self.s
+ blocksize = self.s * 256
+ while end >= 0:
+ start = max(end - blocksize, 0)
+ self.dataf.seek(start)
+ data = self.dataf.read(end - start)
+ findend = end - start
+ while True:
+ # we're searching backwards, so weh have to make sure
+ # we don't find a changeset where this node is a parent
+ off = data.rfind(node, 0, findend)
+ findend = off
+ if off >= 0:
+ i = off / self.s
+ off = i * self.s
+ n = data[off + self.shaoffset:off + self.shaoffset + 20]
+ if n == node:
+ self.map[n] = i + start / self.s
+ return node
+ else:
+ break
+ end -= blocksize
+ return None
+
+ def loadindex(self, i=None, end=None):
if self.all: return
- if pos is not None:
- block = pos / 1000
- i = block * 1000
- end = min(self.l, i + 1000)
+ all = False
+ if i == None:
+ blockstart = 0
+ blocksize = (512 / self.s) * self.s
+ end = self.datasize
+ all = True
else:
- self.all = 1
- i = 0
- end = self.l
- self.revlog.index = self.index
- self.revlog.nodemap = self.map
-
- while i < end:
- d = self.data[i * self.s: (i + 1) * self.s]
- e = struct.unpack(indexformat, d)
- self.index[i] = e
- self.map[e[6]] = i
- i += 1
+ if end:
+ blockstart = i * self.s
+ end = end * self.s
+ blocksize = end - blockstart
+ else:
+ blockstart = (i & ~(32)) * self.s
+ blocksize = self.s * 64
+ end = blockstart + blocksize
+ while blockstart < end:
+ self.loadblock(blockstart, blocksize)
+ blockstart += blocksize
+ if all: self.all = True
class lazyindex(object):
"""a lazy version of the index array"""
@@ -104,39 +215,43 @@ class lazyindex(object):
def load(self, pos):
if pos < 0:
pos += len(self.p.index)
- self.p.load(pos)
+ self.p.loadindex(pos)
return self.p.index[pos]
def __getitem__(self, pos):
- return self.p.index[pos] or self.load(pos)
+ ret = self.p.index[pos] or self.load(pos)
+ if isinstance(ret, str):
+ ret = struct.unpack(self.p.indexformat, ret)
+ return ret
+ def __setitem__(self, pos, item):
+ self.p.index[pos] = item
def __delitem__(self, pos):
del self.p.index[pos]
def append(self, e):
self.p.index.append(e)
- def trunc(self, pos):
- self.p.trunc(pos)
class lazymap(object):
"""a lazy version of the node map"""
def __init__(self, parser):
self.p = parser
def load(self, key):
- if self.p.all: return
- n = self.p.data.find(key)
- if n < 0:
+ n = self.p.findnode(key)
+ if n == None:
raise KeyError(key)
- pos = n / self.p.s
- self.p.load(pos)
def __contains__(self, key):
- self.p.load()
+ if key in self.p.map:
+ return True
+ self.p.loadmap()
return key in self.p.map
def __iter__(self):
yield nullid
for i in xrange(self.p.l):
- try:
- yield self.p.index[i][6]
- except:
- self.p.load(i)
- yield self.p.index[i][6]
+ ret = self.p.index[i]
+ if not ret:
+ self.p.loadindex(i)
+ ret = self.p.index[i]
+ if isinstance(ret, str):
+ ret = struct.unpack(self.p.indexformat, ret)
+ yield ret[-1]
def __getitem__(self, key):
try:
return self.p.map[key]
@@ -178,7 +293,7 @@ class revlog(object):
remove data, and can use some simple techniques to avoid the need
for locking while reading.
"""
- def __init__(self, opener, indexfile, datafile):
+ def __init__(self, opener, indexfile, datafile, defversion=0):
"""
create a revlog object
@@ -192,11 +307,15 @@ class revlog(object):
self.indexstat = None
self.cache = None
self.chunkcache = None
+ self.defversion = defversion
self.load()
def load(self):
+ v = self.defversion
try:
f = self.opener(self.indexfile)
+ i = f.read(4)
+ f.seek(0)
except IOError, inst:
if inst.errno != errno.ENOENT:
raise
@@ -213,56 +332,164 @@ class revlog(object):
and st.st_mtime == oldst.st_mtime
and st.st_ctime == oldst.st_ctime):
return
- self.indexstat = st
- i = f.read()
+ self.indexstat = st
+ if len(i) > 0:
+ v = struct.unpack(versionformat, i)[0]
+ flags = v & ~0xFFFF
+ fmt = v & 0xFFFF
+ if fmt == 0:
+ if flags:
+ raise RevlogError(_("index %s invalid flags %x for format v0" %
+ (self.indexfile, flags)))
+ elif fmt == REVLOGNG:
+ if flags & ~REVLOGNGINLINEDATA:
+ raise RevlogError(_("index %s invalid flags %x for revlogng" %
+ (self.indexfile, flags)))
+ else:
+ raise RevlogError(_("index %s invalid format %d" %
+ (self.indexfile, fmt)))
+ self.version = v
+ if v == 0:
+ self.indexformat = indexformatv0
+ shaoffset = v0shaoffset
+ else:
+ self.indexformat = indexformatng
+ shaoffset = ngshaoffset
- if i and i[:4] != "\0\0\0\0":
- raise RevlogError(_("incompatible revlog signature on %s") %
- self.indexfile)
-
- if len(i) > 10000:
- # big index, let's parse it on demand
- parser = lazyparser(i, self)
- self.index = lazyindex(parser)
- self.nodemap = lazymap(parser)
+ if i:
+ if not self.inlinedata() and st and st.st_size > 10000:
+ # big index, let's parse it on demand
+ parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
+ self.index = lazyindex(parser)
+ self.nodemap = lazymap(parser)
+ else:
+ i = f.read()
+ self.parseindex(i)
+ if self.inlinedata():
+ # we've already got the entire data file read in, save it
+ # in the chunk data
+ self.chunkcache = (0, i)
+ if self.version != 0:
+ e = list(self.index[0])
+ type = self.ngtype(e[0])
+ e[0] = self.offset_type(0, type)
+ self.index[0] = e
else:
- s = struct.calcsize(indexformat)
- l = len(i) / s
- self.index = [None] * l
- m = [None] * l
+ self.nodemap = { nullid: -1}
+ self.index = []
+
+
+ def parseindex(self, data):
+ s = struct.calcsize(self.indexformat)
+ l = len(data)
+ self.index = []
+ self.nodemap = {nullid: -1}
+ inline = self.inlinedata()
+ off = 0
+ n = 0
+ while off < l:
+ e = struct.unpack(self.indexformat, data[off:off + s])
+ self.index.append(e)
+ self.nodemap[e[-1]] = n
+ n += 1
+ off += s
+ if inline:
+ off += e[1]
- n = 0
- for f in xrange(0, l * s, s):
- # offset, size, base, linkrev, p1, p2, nodeid
- e = struct.unpack(indexformat, i[f:f + s])
- m[n] = (e[6], n)
- self.index[n] = e
- n += 1
+ def ngoffset(self, q):
+ if q & 0xFFFF:
+ raise RevlogError(_('%s: incompatible revision flag %x') %
+ (self.indexfile, type))
+ return long(q >> 16)
+
+ def ngtype(self, q):
+ return int(q & 0xFFFF)
+
+ def offset_type(self, offset, type):
+ return long(long(offset) << 16 | type)
- self.nodemap = dict(m)
- self.nodemap[nullid] = -1
+ def loadindex(self, start, end):
+ """load a block of indexes all at once from the lazy parser"""
+ if isinstance(self.index, lazyindex):
+ self.index.p.loadindex(start, end)
+ def loadindexmap(self):
+ """loads both the map and the index from the lazy parser"""
+ if isinstance(self.index, lazyindex):
+ p = self.index.p
+ p.loadindex()
+ self.nodemap = p.map
+
+ def loadmap(self):
+ """loads the map from the lazy parser"""
+ if isinstance(self.nodemap, lazymap):
+ self.nodemap.p.loadmap()
+ self.nodemap = self.nodemap.p.map
+
+ def inlinedata(self): return self.version & REVLOGNGINLINEDATA
def tip(self): return self.node(len(self.index) - 1)
def count(self): return len(self.index)
- def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
+ def node(self, rev):
+ return (rev < 0) and nullid or self.index[rev][-1]
def rev(self, node):
try:
return self.nodemap[node]
except KeyError:
raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
- def linkrev(self, node): return self.index[self.rev(node)][3]
+ def linkrev(self, node): return self.index[self.rev(node)][-4]
def parents(self, node):
if node == nullid: return (nullid, nullid)
- return self.index[self.rev(node)][4:6]
+ r = self.rev(node)
+ d = self.index[r][-3:-1]
+ if self.version == 0:
+ return d
+ return [ self.node(x) for x in d ]
+ def start(self, rev):
+ if rev < 0:
+ return -1
+ if self.version != 0:
+ return self.ngoffset(self.index[rev][0])
+ return self.index[rev][0]
+
+ def end(self, rev): return self.start(rev) + self.length(rev)
+
+ def size(self, rev):
+ """return the length of the uncompressed text for a given revision"""
+ l = -1
+ if self.version != 0:
+ l = self.index[rev][2]
+ if l >= 0:
+ return l
- def start(self, rev): return (rev < 0) and -1 or self.index[rev][0]
+ t = self.revision(self.node(rev))
+ return len(t)
+
+ # alternate implementation, The advantage to this code is it
+ # will be faster for a single revision. But, the results are not
+ # cached, so finding the size of every revision will be slower.
+ """
+ if self.cache and self.cache[1] == rev:
+ return len(self.cache[2])
+
+ base = self.base(rev)
+ if self.cache and self.cache[1] >= base and self.cache[1] < rev:
+ base = self.cache[1]
+ text = self.cache[2]
+ else:
+ text = self.revision(self.node(base))
+
+ l = len(text)
+ for x in xrange(base + 1, rev + 1):
+ l = mdiff.patchedsize(l, self.chunk(x))
+ return l
+ """
+
def length(self, rev):
if rev < 0:
return 0
else:
return self.index[rev][1]
- def end(self, rev): return self.start(rev) + self.length(rev)
- def base(self, rev): return (rev < 0) and rev or self.index[rev][2]
+ def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
def reachable(self, rev, stop=None):
reachable = {}
@@ -501,18 +728,24 @@ class revlog(object):
"""apply a list of patches to a string"""
return mdiff.patches(t, pl)
- def chunk(self, rev):
+ def chunk(self, rev, df=None, cachelen=4096):
start, length = self.start(rev), self.length(rev)
+ inline = self.inlinedata()
+ if inline:
+ start += (rev + 1) * struct.calcsize(self.indexformat)
end = start + length
-
- def loadcache():
- cache_length = max(4096 * 1024, length) # 4Mo
- df = self.opener(self.datafile)
+ def loadcache(df):
+ cache_length = max(cachelen, length) # 4k
+ if not df:
+ if inline:
+ df = self.opener(self.indexfile)
+ else:
+ df = self.opener(self.datafile)
df.seek(start)
self.chunkcache = (start, df.read(cache_length))
if not self.chunkcache:
- loadcache()
+ loadcache(df)
cache_start = self.chunkcache[0]
cache_end = cache_start + len(self.chunkcache[1])
@@ -520,7 +753,7 @@ class revlog(object):
# it is cached
offset = start - cache_start
else:
- loadcache()
+ loadcache(df)
offset = 0
#def checkchunk():
@@ -555,16 +788,24 @@ class revlog(object):
rev = self.rev(node)
base = self.base(rev)
+ if self.inlinedata():
+ # we probably have the whole chunk cached
+ df = None
+ else:
+ df = self.opener(self.datafile)
+
# do we have useful data cached?
if self.cache and self.cache[1] >= base and self.cache[1] < rev:
base = self.cache[1]
text = self.cache[2]
+ self.loadindex(base, rev + 1)
else:
- text = self.chunk(base)
+ self.loadindex(base, rev + 1)
+ text = self.chunk(base, df=df)
bins = []
for r in xrange(base + 1, rev + 1):
- bins.append(self.chunk(r))
+ bins.append(self.chunk(r, df=df))
text = self.patches(text, bins)
@@ -576,6 +817,55 @@ class revlog(object):
self.cache = (node, rev, text)
return text
+ def checkinlinesize(self, tr, fp=None):
+ if not self.inlinedata():
+ return
+ if not fp:
+ fp = self.opener(self.indexfile, 'r')
+ fp.seek(0, 2)
+ size = fp.tell()
+ if size < 131072:
+ return
+ trinfo = tr.find(self.indexfile)
+ if trinfo == None:
+ raise RevlogError(_("%s not found in the transaction" %
+ self.indexfile))
+
+ trindex = trinfo[2]
+ dataoff = self.start(trindex)
+
+ tr.add(self.datafile, dataoff)
+ df = self.opener(self.datafile, 'w')
+ calc = struct.calcsize(self.indexformat)
+ for r in xrange(self.count()):
+ start = self.start(r) + (r + 1) * calc
+ length = self.length(r)
+ fp.seek(start)
+ d = fp.read(length)
+ df.write(d)
+ fp.close()
+ df.close()
+ fp = self.opener(self.indexfile, 'w', atomictemp=True)
+ self.version &= ~(REVLOGNGINLINEDATA)
+ if self.count():
+ x = self.index[0]
+ e = struct.pack(self.indexformat, *x)[4:]
+ l = struct.pack(versionformat, self.version)
+ fp.write(l)
+ fp.write(e)
+
+ for i in xrange(1, self.count()):
+ x = self.index[i]
+ e = struct.pack(self.indexformat, *x)
+ fp.write(e)
+
+ # if we don't call rename, the temp file will never replace the
+ # real index
+ fp.rename()
+
+ tr.replace(self.indexfile, trindex * calc)
+ self.chunkcache = None
+
def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
"""add a revision to the log
@@ -621,25 +911,64 @@ class revlog(object):
if t >= 0:
offset = self.end(t)
- e = (offset, l, base, link, p1, p2, node)
+ if self.version == 0:
+ e = (offset, l, base, link, p1, p2, node)
+ else:
+ e = (self.offset_type(offset, 0), l, len(text),
+ base, link, self.rev(p1), self.rev(p2), node)
self.index.append(e)
self.nodemap[node] = n
- entry = struct.pack(indexformat, *e)
+ entry = struct.pack(self.indexformat, *e)
- transaction.add(self.datafile, e[0])
- f = self.opener(self.datafile, "a")
- if data[0]:
+ if not self.inlinedata():
+ transaction.add(self.datafile, offset)
+ transaction.add(self.indexfile, n * len(entry))
+ f = self.opener(self.datafile, "a")
+ if data[0]:
+ f.write(data[0])
+ f.write(data[1])
+ f = self.opener(self.indexfile, "a")
+ else:
+ f = self.opener(self.indexfile, "a+")
+ f.seek(0, 2)
+ transaction.add(self.indexfile, f.tell(), self.count() - 1)
+
+ if len(self.index) == 1 and self.version != 0:
+ l = struct.pack(versionformat, self.version)
+ f.write(l)
+ entry = entry[4:]
+
+ f.write(entry)
+
+ if self.inlinedata():
f.write(data[0])
- f.write(data[1])
- transaction.add(self.indexfile, n * len(entry))
- self.opener(self.indexfile, "a").write(entry)
+ f.write(data[1])
+ self.checkinlinesize(transaction, f)
self.cache = (node, n, text)
return node
def ancestor(self, a, b):
"""calculate the least common ancestor of nodes a and b"""
+
+ # start with some short cuts for the linear cases
+ if a == b:
+ return a
+ ra = self.rev(a)
+ rb = self.rev(b)
+ if ra < rb:
+ last = b
+ first = a
+ else:
+ last = a
+ first = b
+
+ # reachable won't include stop in the list, so we have to use a parent
+ reachable = self.reachable(last, stop=self.parents(first)[0])
+ if first in reachable:
+ return first
+
# calculate the distance of every node from root
dist = {nullid: 0}
for i in xrange(self.count()):
@@ -746,18 +1075,18 @@ class revlog(object):
node = None
base = prev = -1
- start = end = measure = 0
+ start = end = textlen = 0
if r:
- base = self.base(t)
- start = self.start(base)
end = self.end(t)
- measure = self.length(base)
- prev = self.tip()
- transaction.add(self.datafile, end)
- transaction.add(self.indexfile, r * struct.calcsize(indexformat))
- dfh = self.opener(self.datafile, "a")
- ifh = self.opener(self.indexfile, "a")
+ ifh = self.opener(self.indexfile, "a+")
+ ifh.seek(0, 2)
+ transaction.add(self.indexfile, ifh.tell(), self.count())
+ if self.inlinedata():
+ dfh = None
+ else:
+ transaction.add(self.datafile, end)
+ dfh = self.opener(self.datafile, "a")
# loop through our set of deltas
chain = None
@@ -791,31 +1120,48 @@ class revlog(object):
if chain == prev:
tempd = compress(delta)
cdelta = tempd[0] + tempd[1]
+ textlen = mdiff.patchedsize(textlen, delta)
- if chain != prev or (end - start + len(cdelta)) > measure * 2:
+ if chain != prev or (end - start + len(cdelta)) > textlen * 2:
# flush our writes here so we can read it in revision
- dfh.flush()
+ if dfh:
+ dfh.flush()
ifh.flush()
text = self.revision(chain)
text = self.patches(text, [delta])
chk = self.addrevision(text, transaction, link, p1, p2)
if chk != node:
raise RevlogError(_("consistency error adding group"))
- measure = len(text)
+ textlen = len(text)
else:
- e = (end, len(cdelta), base, link, p1, p2, node)
+ if self.version == 0:
+ e = (end, len(cdelta), base, link, p1, p2, node)
+ else:
+ e = (self.offset_type(end, 0), len(cdelta), textlen, base,
+ link, self.rev(p1), self.rev(p2), node)
self.index.append(e)
self.nodemap[node] = r
- dfh.write(cdelta)
- ifh.write(struct.pack(indexformat, *e))
+ if self.inlinedata():
+ ifh.write(struct.pack(self.indexformat, *e))
+ ifh.write(cdelta)
+ self.checkinlinesize(transaction, ifh)
+ if not self.inlinedata():
+ dfh = self.opener(self.datafile, "a")
+ ifh = self.opener(self.indexfile, "a")
+ else:
+ if not dfh:
+ # addrevision switched from inline to conventional
+ # reopen the index
+ dfh = self.opener(self.datafile, "a")
+ ifh = self.opener(self.indexfile, "a")
+ dfh.write(cdelta)
+ ifh.write(struct.pack(self.indexformat, *e))
t, r, chain, prev = r, r + 1, node, node
base = self.base(t)
start = self.start(base)
end = self.end(t)
- dfh.close()
- ifh.close()
if node is None:
raise RevlogError(_("group to be added is empty"))
return node
@@ -824,32 +1170,37 @@ class revlog(object):
if self.count() == 0 or rev >= self.count():
return
+ if isinstance(self.index, lazyindex):
+ self.loadindexmap()
+
# When stripping away a revision, we need to make sure it
# does not actually belong to an older changeset.
# The minlink parameter defines the oldest revision
# we're allowed to strip away.
- while minlink > self.index[rev][3]:
+ while minlink > self.index[rev][-4]:
rev += 1
if rev >= self.count():
return
# first truncate the files on disk
end = self.start(rev)
- self.opener(self.datafile, "a").truncate(end)
- end = rev * struct.calcsize(indexformat)
- self.opener(self.indexfile, "a").truncate(end)
+ if not self.inlinedata():
+ df = self.opener(self.datafile, "a")
+ df.truncate(end)
+ end = rev * struct.calcsize(self.indexformat)
+ else:
+ end += rev * struct.calcsize(self.indexformat)
+
+ indexf = self.opener(self.indexfile, "a")
+ indexf.truncate(end)
# then reset internal state in memory to forget those revisions
self.cache = None
self.chunkcache = None
- for p in self.index[rev:]:
- del self.nodemap[p[6]]
- del self.index[rev:]
+ for x in xrange(rev, self.count()):
+ del self.nodemap[self.node(x)]
- # truncating the lazyindex also truncates the lazymap.
- if isinstance(self.index, lazyindex):
- self.index.trunc(end)
-
+ del self.index[rev:]
def checksize(self):
expected = 0
@@ -870,9 +1221,15 @@ class revlog(object):
f = self.opener(self.indexfile)
f.seek(0, 2)
actual = f.tell()
- s = struct.calcsize(indexformat)
+ s = struct.calcsize(self.indexformat)
i = actual / s
di = actual - (i * s)
+ if self.inlinedata():
+ databytes = 0
+ for r in xrange(self.count()):
+ databytes += self.length(r)
+ dd = 0
+ di = actual - self.count() * s - databytes
except IOError, inst:
if inst.errno != errno.ENOENT:
raise
--- a/mercurial/statichttprepo.py
+++ b/mercurial/statichttprepo.py
@@ -32,6 +32,7 @@ class statichttprepository(localrepo.loc
def __init__(self, ui, path):
self.path = (path + "/.hg")
self.ui = ui
+ self.revlogversion = 0
self.opener = opener(self.path)
self.manifest = manifest.manifest(self.opener)
self.changelog = changelog.changelog(self.opener)
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -39,14 +39,27 @@ class transaction(object):
try: os.unlink(self.journal)
except: pass
- def add(self, file, offset):
+ def add(self, file, offset, data=None):
if file in self.map: return
- self.entries.append((file, offset))
- self.map[file] = 1
+ self.entries.append((file, offset, data))
+ self.map[file] = len(self.entries) - 1
# add enough data to the journal to do the truncate
self.file.write("%s\0%d\n" % (file, offset))
self.file.flush()
+ def find(self, file):
+ if file in self.map:
+ return self.entries[self.map[file]]
+ return None
+
+ def replace(self, file, offset, data=None):
+ if file not in self.map:
+ raise KeyError(file)
+ index = self.map[file]
+ self.entries[index] = (file, offset, data)
+ self.file.write("%s\0%d\n" % (file, offset))
+ self.file.flush()
+
def nest(self):
self.count += 1
return self
@@ -71,7 +84,7 @@ class transaction(object):
self.report(_("transaction abort!\n"))
- for f, o in self.entries:
+ for f, o, ignore in self.entries:
try:
self.opener(f, "a").truncate(o)
except:
@@ -82,8 +95,12 @@ class transaction(object):
self.report(_("rollback completed\n"))
def rollback(opener, file):
+ files = {}
for l in open(file).readlines():
f, o = l.split('\0')
+ files[f] = o
+ for f in files:
+ o = files[f]
opener(f, "a").truncate(int(o))
os.unlink(file)
--- a/mercurial/ui.py
+++ b/mercurial/ui.py
@@ -29,6 +29,7 @@ class ui(object):
self.diffcache = None
self.header = []
self.prev_header = []
+ self.revlogopts = self.configrevlog()
else:
# parentui may point to an ui object which is already a child
self.parentui = parentui.parentui or parentui
@@ -134,6 +135,12 @@ class ui(object):
result.append(path)
return result
+ def configrevlog(self):
+ ret = {}
+ for x in self.configitems("revlog"):
+ k = x[0].lower()
+ ret[k] = x[1]
+ return ret
def diffopts(self):
if self.diffcache:
return self.diffcache
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -446,20 +446,33 @@ def opener(base, audit=True):
os.chmod(temp, st.st_mode)
return temp
- class atomicfile(file):
- """the file will only be copied on close"""
- def __init__(self, name, mode, atomic=False):
+ class atomictempfile(file):
+ """the file will only be copied when rename is called"""
+ def __init__(self, name, mode):
self.__name = name
self.temp = mktempcopy(name)
file.__init__(self, self.temp, mode)
- def close(self):
+ def rename(self):
if not self.closed:
file.close(self)
rename(self.temp, self.__name)
def __del__(self):
- self.close()
+ if not self.closed:
+ try:
+ os.unlink(self.temp)
+ except: pass
+ file.close(self)
- def o(path, mode="r", text=False, atomic=False):
+ class atomicfile(atomictempfile):
+ """the file will only be copied on close"""
+ def __init__(self, name, mode):
+ atomictempfile.__init__(self, name, mode)
+ def close(self):
+ self.rename()
+ def __del__(self):
+ self.rename()
+
+ def o(path, mode="r", text=False, atomic=False, atomictemp=False):
if audit_p:
audit_path(path)
f = os.path.join(p, path)
@@ -477,6 +490,8 @@ def opener(base, audit=True):
else:
if atomic:
return atomicfile(f, mode)
+ elif atomictemp:
+ return atomictempfile(f, mode)
if nlink > 1:
rename(mktempcopy(f), f)
return file(f, mode)