|
1 # appendfile.py - special classes to make repo updates atomic |
|
2 # |
|
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> |
|
4 # |
|
5 # This software may be used and distributed according to the terms |
|
6 # of the GNU General Public License, incorporated herein by reference. |
|
7 |
|
8 from demandload import * |
|
9 demandload(globals(), "cStringIO changelog manifest os tempfile") |
|
10 |
|
11 # writes to metadata files are ordered. reads: changelog, manifest, |
|
12 # normal files. writes: normal files, manifest, changelog. |
|
13 |
|
14 # manifest contains pointers to offsets in normal files. changelog |
|
15 # contains pointers to offsets in manifest. if reader reads old |
|
16 # changelog while manifest or normal files are written, it has no |
|
17 # pointers into new parts of those files that are maybe not consistent |
|
18 # yet, so will not read them. |
|
19 |
|
20 # localrepo.addchangegroup thinks it writes changelog first, then |
|
21 # manifest, then normal files (this is order they are available, and |
|
22 # needed for computing linkrev fields), but uses appendfile to hide |
|
23 # updates from readers. data not written to manifest or changelog |
|
24 # until all normal files updated. write manifest first, then |
|
25 # changelog. |
|
26 |
|
27 # with this write ordering, readers cannot see inconsistent view of |
|
28 # repo during update. |
|
29 |
|
30 class appendfile(object): |
|
31 '''implement enough of file protocol to append to revlog file. |
|
32 appended data is written to temp file. reads and seeks span real |
|
33 file and temp file. readers cannot see appended data until |
|
34 writedata called.''' |
|
35 |
|
36 def __init__(self, fp): |
|
37 fd, self.tmpname = tempfile.mkstemp() |
|
38 self.tmpfp = os.fdopen(fd, 'ab+') |
|
39 self.realfp = fp |
|
40 self.offset = 0 |
|
41 # real file is not written by anyone else. cache its size so |
|
42 # seek and read can be fast. |
|
43 self.fpsize = os.fstat(fp.fileno()).st_size |
|
44 |
|
45 def seek(self, offset): |
|
46 '''virtual file offset spans real file and temp file.''' |
|
47 self.offset = offset |
|
48 if self.offset < self.fpsize: |
|
49 self.realfp.seek(self.offset) |
|
50 else: |
|
51 self.tmpfp.seek(self.offset - self.fpsize) |
|
52 |
|
53 def read(self, count=-1): |
|
54 '''only trick here is reads that span real file and temp file.''' |
|
55 fp = cStringIO.StringIO() |
|
56 old_offset = self.offset |
|
57 if self.offset < self.fpsize: |
|
58 s = self.realfp.read(count) |
|
59 fp.write(s) |
|
60 self.offset += len(s) |
|
61 if count > 0: |
|
62 count -= len(s) |
|
63 if count != 0: |
|
64 if old_offset != self.offset: |
|
65 self.tmpfp.seek(self.offset - self.fpsize) |
|
66 s = self.tmpfp.read(count) |
|
67 fp.write(s) |
|
68 self.offset += len(s) |
|
69 return fp.getvalue() |
|
70 |
|
71 def write(self, s): |
|
72 '''append to temp file.''' |
|
73 self.tmpfp.write(s) |
|
74 # all writes are appends, so offset must go to end of file. |
|
75 self.offset = self.fpsize + self.tmpfp.tell() |
|
76 |
|
77 def writedata(self): |
|
78 '''copy data from temp file to real file.''' |
|
79 self.tmpfp.seek(0) |
|
80 s = self.tmpfp.read() |
|
81 self.tmpfp.close() |
|
82 self.realfp.seek(0, 2) |
|
83 # small race here. we write all new data in one call, but |
|
84 # reader can see partial update due to python or os. file |
|
85 # locking no help: slow, not portable, not reliable over nfs. |
|
86 # only safe thing is write to temp file every time and rename, |
|
87 # but performance bad when manifest or changelog gets big. |
|
88 self.realfp.write(s) |
|
89 self.realfp.close() |
|
90 |
|
91 def __del__(self): |
|
92 '''delete temp file even if exception raised.''' |
|
93 try: os.unlink(self.tmpname) |
|
94 except: pass |
|
95 |
|
96 class sharedfile(object): |
|
97 '''let file objects share a single appendfile safely. each |
|
98 sharedfile has own offset, syncs up with appendfile offset before |
|
99 read and after read and write.''' |
|
100 |
|
101 def __init__(self, fp): |
|
102 self.fp = fp |
|
103 self.offset = 0 |
|
104 |
|
105 def seek(self, offset): |
|
106 self.offset = offset |
|
107 |
|
108 def read(self, count=-1): |
|
109 try: |
|
110 if self.offset != self.fp.offset: |
|
111 self.fp.seek(self.offset) |
|
112 return self.fp.read(count) |
|
113 finally: |
|
114 self.offset = self.fp.offset |
|
115 |
|
116 def write(self, s): |
|
117 try: |
|
118 return self.fp.write(s) |
|
119 finally: |
|
120 self.offset = self.fp.offset |
|
121 |
|
122 def close(self): |
|
123 # revlog wants this. |
|
124 pass |
|
125 |
|
126 def flush(self): |
|
127 # revlog wants this. |
|
128 pass |
|
129 |
|
130 def writedata(self): |
|
131 self.fp.writedata() |
|
132 |
|
133 class appendopener(object): |
|
134 '''special opener for files that only read or append.''' |
|
135 |
|
136 def __init__(self, opener): |
|
137 self.realopener = opener |
|
138 # key: file name, value: appendfile object |
|
139 self.fps = {} |
|
140 |
|
141 def __call__(self, name, mode='r'): |
|
142 '''open file. return same cached appendfile object for every |
|
143 later call.''' |
|
144 |
|
145 assert mode in 'ra' |
|
146 fp = self.fps.get(name) |
|
147 if fp is None: |
|
148 fp = appendfile(self.realopener(name, 'a+')) |
|
149 self.fps[name] = fp |
|
150 return sharedfile(fp) |
|
151 |
|
152 def writedata(self): |
|
153 '''copy data from temp files to real files.''' |
|
154 # write .d file before .i file. |
|
155 fps = self.fps.items() |
|
156 fps.sort() |
|
157 for name, fp in fps: |
|
158 fp.writedata() |
|
159 |
|
160 # files for changelog and manifest are in different appendopeners, so |
|
161 # not mixed up together. |
|
162 |
|
163 class appendchangelog(changelog.changelog, appendopener): |
|
164 def __init__(self, opener): |
|
165 appendopener.__init__(self, opener) |
|
166 changelog.changelog.__init__(self, self) |
|
167 |
|
168 class appendmanifest(manifest.manifest, appendopener): |
|
169 def __init__(self, opener): |
|
170 appendopener.__init__(self, opener) |
|
171 manifest.manifest.__init__(self, self) |