mercurial/streamclone.py
author Alexis S. L. Carvalho <alexis@cecm.usp.br>
Thu, 23 Aug 2007 01:48:29 -0300
changeset 5210 90d9ec0dc69d
parent 4959 97b734fb9c6f
child 5396 5105b119edd2
permissions -rw-r--r--
merge: forcefully mark files that we get from the second parent as dirty After a hg merge, we want to include in the commit all the files that we got from the second parent, so that we have the correct file-level history. To make them visible to hg commit, we try to mark them as dirty. Unfortunately, right now we can't really mark them as dirty[1] - the best we can do is to mark them as needing a full comparison of their contents, but they will still be considered clean if they happen to be identical to the version in the first parent. This changeset extends the dirstate format in a compatible way, so that we can mark a file as dirty: Right now we use a negative file size to indicate we don't have valid stat data for this entry. In practice, this size is always -1. This patch uses -2 to indicate that the entry is dirty. Older versions of hg won't choke on this dirstate, but they may happily mark the file as clean after a full comparison, destroying all of our hard work. The patch adds a dirstate.normallookup method with the semantics of the current normaldirty, and changes normaldirty to forcefully mark the entry as dirty. This should fix issue522. [1] - well, we could put them in state 'm', but that state has a different meaning.

# streamclone.py - streaming clone server support for mercurial
#
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.

from i18n import _
import os, stat, util, lock

# if server supports streaming clone, it advertises "stream"
# capability with value that is version+flags of repo it is serving.
# client only streams if it can read that repo format.

def walkrepo(root):
    '''iterate over metadata files in repository.
    walk in natural (sorted) order.
    yields 2-tuples: name of .d or .i file, size of file.'''

    strip_count = len(root) + len(os.sep)
    def walk(path, recurse):
        ents = os.listdir(path)
        ents.sort()
        for e in ents:
            pe = os.path.join(path, e)
            st = os.lstat(pe)
            if stat.S_ISDIR(st.st_mode):
                if recurse:
                    for x in walk(pe, True):
                        yield x
            else:
                if not stat.S_ISREG(st.st_mode) or len(e) < 2:
                    continue
                sfx = e[-2:]
                if sfx in ('.d', '.i'):
                    yield pe[strip_count:], st.st_size
    # write file data first
    for x in walk(os.path.join(root, 'data'), True):
        yield x
    # write manifest before changelog
    meta = list(walk(root, False))
    meta.sort()
    meta.reverse()
    for x in meta:
        yield x

# stream file format is simple.
#
# server writes out line that says how many files, how many total
# bytes.  separator is ascii space, byte counts are strings.
#
# then for each file:
#
#   server writes out line that says file name, how many bytes in
#   file.  separator is ascii nul, byte count is string.
#
#   server writes out raw file data.

def stream_out(repo, fileobj, untrusted=False):
    '''stream out all metadata files in repository.
    writes to file-like object, must support write() and optional flush().'''

    if not repo.ui.configbool('server', 'uncompressed', untrusted=untrusted):
        fileobj.write('1\n')
        return

    # get consistent snapshot of repo. lock during scan so lock not
    # needed while we stream, and commits can happen.
    lock = None
    try:
        try:
            repolock = repo.lock()
        except (lock.LockHeld, lock.LockUnavailable), inst:
            repo.ui.warn('locking the repository failed: %s\n' % (inst,))
            fileobj.write('2\n')
            return

        fileobj.write('0\n')
        repo.ui.debug('scanning\n')
        entries = []
        total_bytes = 0
        for name, size in walkrepo(repo.spath):
            name = repo.decodefn(util.pconvert(name))
            entries.append((name, size))
            total_bytes += size
    finally:
        del repolock

    repo.ui.debug('%d files, %d bytes to transfer\n' %
                  (len(entries), total_bytes))
    fileobj.write('%d %d\n' % (len(entries), total_bytes))
    for name, size in entries:
        repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
        fileobj.write('%s\0%d\n' % (name, size))
        for chunk in util.filechunkiter(repo.sopener(name), limit=size):
            fileobj.write(chunk)
    flush = getattr(fileobj, 'flush', None)
    if flush: flush()