comparison mercurial/appendfile.py @ 2089:cb99c711c59f

make appendfile simpler so it does not break with revlogng on windows. it used to cache open files. this made revlogng break because it wants to rename files when splitting .i into .i/.d, but cannot rename or unlink open files on windows. new code is bit slower, but safe on linux and windows. proper fix for too many open/close of changelog/manifest belongs in different place. can get 10% speed improvement back.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Wed, 19 Apr 2006 08:33:46 -0700
parents 856f0ba200bc
children c62112815801
comparison
equal deleted inserted replaced
2088:f16435b45780 2089:cb99c711c59f
4 # 4 #
5 # This software may be used and distributed according to the terms 5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference. 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 from demandload import * 8 from demandload import *
9 demandload(globals(), "cStringIO changelog manifest os tempfile") 9 demandload(globals(), "cStringIO changelog errno manifest os tempfile")
10 10
11 # writes to metadata files are ordered. reads: changelog, manifest, 11 # writes to metadata files are ordered. reads: changelog, manifest,
12 # normal files. writes: normal files, manifest, changelog. 12 # normal files. writes: normal files, manifest, changelog.
13 13
14 # manifest contains pointers to offsets in normal files. changelog 14 # manifest contains pointers to offsets in normal files. changelog
31 '''implement enough of file protocol to append to revlog file. 31 '''implement enough of file protocol to append to revlog file.
32 appended data is written to temp file. reads and seeks span real 32 appended data is written to temp file. reads and seeks span real
33 file and temp file. readers cannot see appended data until 33 file and temp file. readers cannot see appended data until
34 writedata called.''' 34 writedata called.'''
35 35
36 def __init__(self, fp): 36 def __init__(self, fp, tmpname):
37 fd, self.tmpname = tempfile.mkstemp() 37 if tmpname:
38 self.tmpfp = os.fdopen(fd, 'ab+') 38 self.tmpname = tmpname
39 self.tmpfp = open(self.tmpname, 'ab+')
40 else:
41 fd, self.tmpname = tempfile.mkstemp()
42 self.tmpfp = os.fdopen(fd, 'ab+')
39 self.realfp = fp 43 self.realfp = fp
40 self.offset = fp.tell() 44 self.offset = fp.tell()
41 # real file is not written by anyone else. cache its size so 45 # real file is not written by anyone else. cache its size so
42 # seek and read can be fast. 46 # seek and read can be fast.
43 self.fpsize = os.fstat(fp.fileno()).st_size 47 self.realsize = os.fstat(fp.fileno()).st_size
44 48
45 def end(self): 49 def end(self):
46 self.tmpfp.flush() # make sure the stat is correct 50 self.tmpfp.flush() # make sure the stat is correct
47 return self.fpsize + os.fstat(self.tmpfp.fileno()).st_size 51 return self.realsize + os.fstat(self.tmpfp.fileno()).st_size
52
53 def tell(self):
54 return self.offset
55
56 def flush(self):
57 self.tmpfp.flush()
58
59 def close(self):
60 self.realfp.close()
61 self.tmpfp.close()
48 62
49 def seek(self, offset, whence=0): 63 def seek(self, offset, whence=0):
50 '''virtual file offset spans real file and temp file.''' 64 '''virtual file offset spans real file and temp file.'''
51 if whence == 0: 65 if whence == 0:
52 self.offset = offset 66 self.offset = offset
53 elif whence == 1: 67 elif whence == 1:
54 self.offset += offset 68 self.offset += offset
55 elif whence == 2: 69 elif whence == 2:
56 self.offset = self.end() + offset 70 self.offset = self.end() + offset
57 71
58 if self.offset < self.fpsize: 72 if self.offset < self.realsize:
59 self.realfp.seek(self.offset) 73 self.realfp.seek(self.offset)
60 else: 74 else:
61 self.tmpfp.seek(self.offset - self.fpsize) 75 self.tmpfp.seek(self.offset - self.realsize)
62 76
63 def read(self, count=-1): 77 def read(self, count=-1):
64 '''only trick here is reads that span real file and temp file.''' 78 '''only trick here is reads that span real file and temp file.'''
65 fp = cStringIO.StringIO() 79 fp = cStringIO.StringIO()
66 old_offset = self.offset 80 old_offset = self.offset
67 if self.offset < self.fpsize: 81 if self.offset < self.realsize:
68 s = self.realfp.read(count) 82 s = self.realfp.read(count)
69 fp.write(s) 83 fp.write(s)
70 self.offset += len(s) 84 self.offset += len(s)
71 if count > 0: 85 if count > 0:
72 count -= len(s) 86 count -= len(s)
73 if count != 0: 87 if count != 0:
74 if old_offset != self.offset: 88 if old_offset != self.offset:
75 self.tmpfp.seek(self.offset - self.fpsize) 89 self.tmpfp.seek(self.offset - self.realsize)
76 s = self.tmpfp.read(count) 90 s = self.tmpfp.read(count)
77 fp.write(s) 91 fp.write(s)
78 self.offset += len(s) 92 self.offset += len(s)
79 return fp.getvalue() 93 return fp.getvalue()
80 94
81 def write(self, s): 95 def write(self, s):
82 '''append to temp file.''' 96 '''append to temp file.'''
83 self.tmpfp.seek(0, 2) 97 self.tmpfp.seek(0, 2)
84 self.tmpfp.write(s) 98 self.tmpfp.write(s)
85 # all writes are appends, so offset must go to end of file. 99 # all writes are appends, so offset must go to end of file.
86 self.offset = self.fpsize + self.tmpfp.tell() 100 self.offset = self.realsize + self.tmpfp.tell()
87
88 def writedata(self):
89 '''copy data from temp file to real file.'''
90 self.tmpfp.seek(0)
91 s = self.tmpfp.read()
92 self.tmpfp.close()
93 self.realfp.seek(0, 2)
94 # small race here. we write all new data in one call, but
95 # reader can see partial update due to python or os. file
96 # locking no help: slow, not portable, not reliable over nfs.
97 # only safe thing is write to temp file every time and rename,
98 # but performance bad when manifest or changelog gets big.
99 self.realfp.write(s)
100 self.realfp.close()
101
102 def __del__(self):
103 '''delete temp file even if exception raised.'''
104 try: os.unlink(self.tmpname)
105 except: pass
106
107 class sharedfile(object):
108 '''let file objects share a single appendfile safely. each
109 sharedfile has own offset, syncs up with appendfile offset before
110 read and after read and write.'''
111
112 def __init__(self, fp):
113 self.fp = fp
114 self.offset = 0
115
116 def tell(self):
117 return self.offset
118
119 def seek(self, offset, whence=0):
120 if whence == 0:
121 self.offset = offset
122 elif whence == 1:
123 self.offset += offset
124 elif whence == 2:
125 self.offset = self.fp.end() + offset
126
127 def read(self, count=-1):
128 try:
129 if self.offset != self.fp.offset:
130 self.fp.seek(self.offset)
131 return self.fp.read(count)
132 finally:
133 self.offset = self.fp.offset
134
135 def write(self, s):
136 try:
137 return self.fp.write(s)
138 finally:
139 self.offset = self.fp.offset
140
141 def close(self):
142 # revlog wants this.
143 pass
144
145 def flush(self):
146 # revlog wants this.
147 pass
148
149 def writedata(self):
150 self.fp.writedata()
151 101
152 class appendopener(object): 102 class appendopener(object):
153 '''special opener for files that only read or append.''' 103 '''special opener for files that only read or append.'''
154 104
155 def __init__(self, opener): 105 def __init__(self, opener):
156 self.realopener = opener 106 self.realopener = opener
157 # key: file name, value: appendfile object 107 # key: file name, value: appendfile name
158 self.fps = {} 108 self.tmpnames = {}
159 109
160 def __call__(self, name, mode='r'): 110 def __call__(self, name, mode='r'):
161 '''open file. return same cached appendfile object for every 111 '''open file.'''
162 later call.'''
163 112
164 assert mode in 'ra+' 113 assert mode in 'ra+'
165 fp = self.fps.get(name) 114 try:
166 if fp is None: 115 realfp = self.realopener(name, 'r')
167 fp = appendfile(self.realopener(name, 'a+')) 116 except IOError, err:
168 self.fps[name] = fp 117 if err.errno != errno.ENOENT: raise
169 return sharedfile(fp) 118 realfp = self.realopener(name, 'w+')
119 tmpname = self.tmpnames.get(name)
120 fp = appendfile(realfp, tmpname)
121 if tmpname is None:
122 self.tmpnames[name] = fp.tmpname
123 return fp
170 124
171 def writedata(self): 125 def writedata(self):
172 '''copy data from temp files to real files.''' 126 '''copy data from temp files to real files.'''
173 # write .d file before .i file. 127 # write .d file before .i file.
174 fps = self.fps.items() 128 tmpnames = self.tmpnames.items()
175 fps.sort() 129 tmpnames.sort()
176 for name, fp in fps: 130 for name, tmpname in tmpnames:
177 fp.writedata() 131 fp = open(tmpname, 'rb')
132 s = fp.read()
133 fp.close()
134 fp = self.realopener(name, 'a')
135 fp.write(s)
136 fp.close()
137
138 def __del__(self):
139 for tmpname in self.tmpnames.itervalues():
140 os.unlink(tmpname)
178 141
179 # files for changelog and manifest are in different appendopeners, so 142 # files for changelog and manifest are in different appendopeners, so
180 # not mixed up together. 143 # not mixed up together.
181 144
182 class appendchangelog(changelog.changelog, appendopener): 145 class appendchangelog(changelog.changelog, appendopener):