comparison mercurial/streamclone.py @ 2612:ffb895f16925

add support for streaming clone. existing clone code uses pull to get changes from remote repo. is very slow, uses lots of memory and cpu. new clone code has server write file data straight to client, client writes file data straight to disk. memory and cpu used are very low, clone is much faster over lan. new client can still clone with pull, can still clone from older servers. new server can still serve older clients.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Fri, 14 Jul 2006 11:17:22 -0700
parents
children 5a5852a417b1
comparison
equal deleted inserted replaced
2611:1b4eb1f92433 2612:ffb895f16925
1 # streamclone.py - streaming clone server support for mercurial
2 #
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 from demandload import demandload
9 from i18n import gettext as _
10 demandload(globals(), "os stat util")
11
12 # if server supports streaming clone, it advertises "stream"
13 # capability with value that is version+flags of repo it is serving.
14 # client only streams if it can read that repo format.
15
16 def walkrepo(root):
17 '''iterate over metadata files in repository.
18 walk in natural (sorted) order.
19 yields 2-tuples: name of .d or .i file, size of file.'''
20
21 strip_count = len(root) + len(os.sep)
22 def walk(path, recurse):
23 ents = os.listdir(path)
24 ents.sort()
25 for e in ents:
26 pe = os.path.join(path, e)
27 st = os.lstat(pe)
28 if stat.S_ISDIR(st.st_mode):
29 if recurse:
30 for x in walk(pe, True):
31 yield x
32 else:
33 if not stat.S_ISREG(st.st_mode) or len(e) < 2:
34 continue
35 sfx = e[-2:]
36 if sfx in ('.d', '.i'):
37 yield pe[strip_count:], st.st_size
38 # write file data first
39 for x in walk(os.path.join(root, 'data'), True):
40 yield x
41 # write manifest before changelog
42 meta = list(walk(root, False))
43 meta.sort(reverse=True)
44 for x in meta:
45 yield x
46
47 # stream file format is simple.
48 #
49 # server writes out line that says how many files, how many total
50 # bytes. separator is ascii space, byte counts are strings.
51 #
52 # then for each file:
53 #
54 # server writes out line that says file name, how many bytes in
55 # file. separator is ascii nul, byte count is string.
56 #
57 # server writes out raw file data.
58
59 def stream_out(repo, fileobj):
60 '''stream out all metadata files in repository.
61 writes to file-like object, must support write() and optional flush().'''
62 # get consistent snapshot of repo. lock during scan so lock not
63 # needed while we stream, and commits can happen.
64 lock = repo.lock()
65 repo.ui.debug('scanning\n')
66 entries = []
67 total_bytes = 0
68 for name, size in walkrepo(repo.path):
69 entries.append((name, size))
70 total_bytes += size
71 lock.release()
72
73 repo.ui.debug('%d files, %d bytes to transfer\n' %
74 (len(entries), total_bytes))
75 fileobj.write('%d %d\n' % (len(entries), total_bytes))
76 for name, size in entries:
77 repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
78 fileobj.write('%s\0%d\n' % (name, size))
79 for chunk in util.filechunkiter(repo.opener(name), limit=size):
80 fileobj.write(chunk)
81 flush = getattr(fileobj, 'flush', None)
82 if flush: flush()