Mercurial > hg > mercurial-crew-with-dirclash
comparison mercurial/byterange.py @ 0:9117c6561b0b
Add back links from file revisions to changeset revisions
Add simple transaction support
Add hg verify
Improve caching in revlog
Fix a bunch of bugs
Self-hosting now that the metadata is close to finalized
author | mpm@selenic.com |
---|---|
date | Tue, 03 May 2005 13:16:10 -0800 |
parents | |
children | 7f5ce4bbdd7b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9117c6561b0b |
---|---|
1 # This library is free software; you can redistribute it and/or | |
2 # modify it under the terms of the GNU Lesser General Public | |
3 # License as published by the Free Software Foundation; either | |
4 # version 2.1 of the License, or (at your option) any later version. | |
5 # | |
6 # This library is distributed in the hope that it will be useful, | |
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
9 # Lesser General Public License for more details. | |
10 # | |
11 # You should have received a copy of the GNU Lesser General Public | |
12 # License along with this library; if not, write to the | |
13 # Free Software Foundation, Inc., | |
14 # 59 Temple Place, Suite 330, | |
15 # Boston, MA 02111-1307 USA | |
16 | |
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber | |
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | |
19 | |
20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ | |
21 | |
22 import os | |
23 import stat | |
24 import urllib | |
25 import urllib2 | |
26 import rfc822 | |
27 | |
28 try: | |
29 from cStringIO import StringIO | |
30 except ImportError, msg: | |
31 from StringIO import StringIO | |
32 | |
33 class RangeError(IOError): | |
34 """Error raised when an unsatisfiable range is requested.""" | |
35 pass | |
36 | |
37 class HTTPRangeHandler(urllib2.BaseHandler): | |
38 """Handler that enables HTTP Range headers. | |
39 | |
40 This was extremely simple. The Range header is a HTTP feature to | |
41 begin with so all this class does is tell urllib2 that the | |
42 "206 Partial Content" reponse from the HTTP server is what we | |
43 expected. | |
44 | |
45 Example: | |
46 import urllib2 | |
47 import byterange | |
48 | |
49 range_handler = range.HTTPRangeHandler() | |
50 opener = urllib2.build_opener(range_handler) | |
51 | |
52 # install it | |
53 urllib2.install_opener(opener) | |
54 | |
55 # create Request and set Range header | |
56 req = urllib2.Request('http://www.python.org/') | |
57 req.header['Range'] = 'bytes=30-50' | |
58 f = urllib2.urlopen(req) | |
59 """ | |
60 | |
61 def http_error_206(self, req, fp, code, msg, hdrs): | |
62 # 206 Partial Content Response | |
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url()) | |
64 r.code = code | |
65 r.msg = msg | |
66 return r | |
67 | |
68 def http_error_416(self, req, fp, code, msg, hdrs): | |
69 # HTTP's Range Not Satisfiable error | |
70 raise RangeError('Requested Range Not Satisfiable') | |
71 | |
72 class RangeableFileObject: | |
73 """File object wrapper to enable raw range handling. | |
74 This was implemented primarilary for handling range | |
75 specifications for file:// urls. This object effectively makes | |
76 a file object look like it consists only of a range of bytes in | |
77 the stream. | |
78 | |
79 Examples: | |
80 # expose 10 bytes, starting at byte position 20, from | |
81 # /etc/aliases. | |
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) | |
83 # seek seeks within the range (to position 23 in this case) | |
84 >>> fo.seek(3) | |
85 # tell tells where your at _within the range_ (position 3 in | |
86 # this case) | |
87 >>> fo.tell() | |
88 # read EOFs if an attempt is made to read past the last | |
89 # byte in the range. the following will return only 7 bytes. | |
90 >>> fo.read(30) | |
91 """ | |
92 | |
93 def __init__(self, fo, rangetup): | |
94 """Create a RangeableFileObject. | |
95 fo -- a file like object. only the read() method need be | |
96 supported but supporting an optimized seek() is | |
97 preferable. | |
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range | |
99 to work over. | |
100 The file object provided is assumed to be at byte offset 0. | |
101 """ | |
102 self.fo = fo | |
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) | |
104 self.realpos = 0 | |
105 self._do_seek(self.firstbyte) | |
106 | |
107 def __getattr__(self, name): | |
108 """This effectively allows us to wrap at the instance level. | |
109 Any attribute not found in _this_ object will be searched for | |
110 in self.fo. This includes methods.""" | |
111 if hasattr(self.fo, name): | |
112 return getattr(self.fo, name) | |
113 raise AttributeError, name | |
114 | |
115 def tell(self): | |
116 """Return the position within the range. | |
117 This is different from fo.seek in that position 0 is the | |
118 first byte position of the range tuple. For example, if | |
119 this object was created with a range tuple of (500,899), | |
120 tell() will return 0 when at byte position 500 of the file. | |
121 """ | |
122 return (self.realpos - self.firstbyte) | |
123 | |
124 def seek(self,offset,whence=0): | |
125 """Seek within the byte range. | |
126 Positioning is identical to that described under tell(). | |
127 """ | |
128 assert whence in (0, 1, 2) | |
129 if whence == 0: # absolute seek | |
130 realoffset = self.firstbyte + offset | |
131 elif whence == 1: # relative seek | |
132 realoffset = self.realpos + offset | |
133 elif whence == 2: # absolute from end of file | |
134 # XXX: are we raising the right Error here? | |
135 raise IOError('seek from end of file not supported.') | |
136 | |
137 # do not allow seek past lastbyte in range | |
138 if self.lastbyte and (realoffset >= self.lastbyte): | |
139 realoffset = self.lastbyte | |
140 | |
141 self._do_seek(realoffset - self.realpos) | |
142 | |
143 def read(self, size=-1): | |
144 """Read within the range. | |
145 This method will limit the size read based on the range. | |
146 """ | |
147 size = self._calc_read_size(size) | |
148 rslt = self.fo.read(size) | |
149 self.realpos += len(rslt) | |
150 return rslt | |
151 | |
152 def readline(self, size=-1): | |
153 """Read lines within the range. | |
154 This method will limit the size read based on the range. | |
155 """ | |
156 size = self._calc_read_size(size) | |
157 rslt = self.fo.readline(size) | |
158 self.realpos += len(rslt) | |
159 return rslt | |
160 | |
161 def _calc_read_size(self, size): | |
162 """Handles calculating the amount of data to read based on | |
163 the range. | |
164 """ | |
165 if self.lastbyte: | |
166 if size > -1: | |
167 if ((self.realpos + size) >= self.lastbyte): | |
168 size = (self.lastbyte - self.realpos) | |
169 else: | |
170 size = (self.lastbyte - self.realpos) | |
171 return size | |
172 | |
173 def _do_seek(self,offset): | |
174 """Seek based on whether wrapped object supports seek(). | |
175 offset is relative to the current position (self.realpos). | |
176 """ | |
177 assert offset >= 0 | |
178 if not hasattr(self.fo, 'seek'): | |
179 self._poor_mans_seek(offset) | |
180 else: | |
181 self.fo.seek(self.realpos + offset) | |
182 self.realpos+= offset | |
183 | |
184 def _poor_mans_seek(self,offset): | |
185 """Seek by calling the wrapped file objects read() method. | |
186 This is used for file like objects that do not have native | |
187 seek support. The wrapped objects read() method is called | |
188 to manually seek to the desired position. | |
189 offset -- read this number of bytes from the wrapped | |
190 file object. | |
191 raise RangeError if we encounter EOF before reaching the | |
192 specified offset. | |
193 """ | |
194 pos = 0 | |
195 bufsize = 1024 | |
196 while pos < offset: | |
197 if (pos + bufsize) > offset: | |
198 bufsize = offset - pos | |
199 buf = self.fo.read(bufsize) | |
200 if len(buf) != bufsize: | |
201 raise RangeError('Requested Range Not Satisfiable') | |
202 pos+= bufsize | |
203 | |
204 class FileRangeHandler(urllib2.FileHandler): | |
205 """FileHandler subclass that adds Range support. | |
206 This class handles Range headers exactly like an HTTP | |
207 server would. | |
208 """ | |
209 def open_local_file(self, req): | |
210 import mimetypes | |
211 import mimetools | |
212 host = req.get_host() | |
213 file = req.get_selector() | |
214 localfile = urllib.url2pathname(file) | |
215 stats = os.stat(localfile) | |
216 size = stats[stat.ST_SIZE] | |
217 modified = rfc822.formatdate(stats[stat.ST_MTIME]) | |
218 mtype = mimetypes.guess_type(file)[0] | |
219 if host: | |
220 host, port = urllib.splitport(host) | |
221 if port or socket.gethostbyname(host) not in self.get_names(): | |
222 raise URLError('file not on local host') | |
223 fo = open(localfile,'rb') | |
224 brange = req.headers.get('Range',None) | |
225 brange = range_header_to_tuple(brange) | |
226 assert brange != () | |
227 if brange: | |
228 (fb,lb) = brange | |
229 if lb == '': lb = size | |
230 if fb < 0 or fb > size or lb > size: | |
231 raise RangeError('Requested Range Not Satisfiable') | |
232 size = (lb - fb) | |
233 fo = RangeableFileObject(fo, (fb,lb)) | |
234 headers = mimetools.Message(StringIO( | |
235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | |
236 (mtype or 'text/plain', size, modified))) | |
237 return urllib.addinfourl(fo, headers, 'file:'+file) | |
238 | |
239 | |
240 # FTP Range Support | |
241 # Unfortunately, a large amount of base FTP code had to be copied | |
242 # from urllib and urllib2 in order to insert the FTP REST command. | |
243 # Code modifications for range support have been commented as | |
244 # follows: | |
245 # -- range support modifications start/end here | |
246 | |
247 from urllib import splitport, splituser, splitpasswd, splitattr, \ | |
248 unquote, addclosehook, addinfourl | |
249 import ftplib | |
250 import socket | |
251 import sys | |
252 import ftplib | |
253 import mimetypes | |
254 import mimetools | |
255 | |
256 class FTPRangeHandler(urllib2.FTPHandler): | |
257 def ftp_open(self, req): | |
258 host = req.get_host() | |
259 if not host: | |
260 raise IOError, ('ftp error', 'no host given') | |
261 host, port = splitport(host) | |
262 if port is None: | |
263 port = ftplib.FTP_PORT | |
264 | |
265 # username/password handling | |
266 user, host = splituser(host) | |
267 if user: | |
268 user, passwd = splitpasswd(user) | |
269 else: | |
270 passwd = None | |
271 host = unquote(host) | |
272 user = unquote(user or '') | |
273 passwd = unquote(passwd or '') | |
274 | |
275 try: | |
276 host = socket.gethostbyname(host) | |
277 except socket.error, msg: | |
278 raise URLError(msg) | |
279 path, attrs = splitattr(req.get_selector()) | |
280 dirs = path.split('/') | |
281 dirs = map(unquote, dirs) | |
282 dirs, file = dirs[:-1], dirs[-1] | |
283 if dirs and not dirs[0]: | |
284 dirs = dirs[1:] | |
285 try: | |
286 fw = self.connect_ftp(user, passwd, host, port, dirs) | |
287 type = file and 'I' or 'D' | |
288 for attr in attrs: | |
289 attr, value = splitattr(attr) | |
290 if attr.lower() == 'type' and \ | |
291 value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
292 type = value.upper() | |
293 | |
294 # -- range support modifications start here | |
295 rest = None | |
296 range_tup = range_header_to_tuple(req.headers.get('Range',None)) | |
297 assert range_tup != () | |
298 if range_tup: | |
299 (fb,lb) = range_tup | |
300 if fb > 0: rest = fb | |
301 # -- range support modifications end here | |
302 | |
303 fp, retrlen = fw.retrfile(file, type, rest) | |
304 | |
305 # -- range support modifications start here | |
306 if range_tup: | |
307 (fb,lb) = range_tup | |
308 if lb == '': | |
309 if retrlen is None or retrlen == 0: | |
310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') | |
311 lb = retrlen | |
312 retrlen = lb - fb | |
313 if retrlen < 0: | |
314 # beginning of range is larger than file | |
315 raise RangeError('Requested Range Not Satisfiable') | |
316 else: | |
317 retrlen = lb - fb | |
318 fp = RangeableFileObject(fp, (0,retrlen)) | |
319 # -- range support modifications end here | |
320 | |
321 headers = "" | |
322 mtype = mimetypes.guess_type(req.get_full_url())[0] | |
323 if mtype: | |
324 headers += "Content-Type: %s\n" % mtype | |
325 if retrlen is not None and retrlen >= 0: | |
326 headers += "Content-Length: %d\n" % retrlen | |
327 sf = StringIO(headers) | |
328 headers = mimetools.Message(sf) | |
329 return addinfourl(fp, headers, req.get_full_url()) | |
330 except ftplib.all_errors, msg: | |
331 raise IOError, ('ftp error', msg), sys.exc_info()[2] | |
332 | |
333 def connect_ftp(self, user, passwd, host, port, dirs): | |
334 fw = ftpwrapper(user, passwd, host, port, dirs) | |
335 return fw | |
336 | |
337 class ftpwrapper(urllib.ftpwrapper): | |
338 # range support note: | |
339 # this ftpwrapper code is copied directly from | |
340 # urllib. The only enhancement is to add the rest | |
341 # argument and pass it on to ftp.ntransfercmd | |
342 def retrfile(self, file, type, rest=None): | |
343 self.endtransfer() | |
344 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | |
345 else: cmd = 'TYPE ' + type; isdir = 0 | |
346 try: | |
347 self.ftp.voidcmd(cmd) | |
348 except ftplib.all_errors: | |
349 self.init() | |
350 self.ftp.voidcmd(cmd) | |
351 conn = None | |
352 if file and not isdir: | |
353 # Use nlst to see if the file exists at all | |
354 try: | |
355 self.ftp.nlst(file) | |
356 except ftplib.error_perm, reason: | |
357 raise IOError, ('ftp error', reason), sys.exc_info()[2] | |
358 # Restore the transfer mode! | |
359 self.ftp.voidcmd(cmd) | |
360 # Try to retrieve as a file | |
361 try: | |
362 cmd = 'RETR ' + file | |
363 conn = self.ftp.ntransfercmd(cmd, rest) | |
364 except ftplib.error_perm, reason: | |
365 if str(reason)[:3] == '501': | |
366 # workaround for REST not supported error | |
367 fp, retrlen = self.retrfile(file, type) | |
368 fp = RangeableFileObject(fp, (rest,'')) | |
369 return (fp, retrlen) | |
370 elif str(reason)[:3] != '550': | |
371 raise IOError, ('ftp error', reason), sys.exc_info()[2] | |
372 if not conn: | |
373 # Set transfer mode to ASCII! | |
374 self.ftp.voidcmd('TYPE A') | |
375 # Try a directory listing | |
376 if file: cmd = 'LIST ' + file | |
377 else: cmd = 'LIST' | |
378 conn = self.ftp.ntransfercmd(cmd) | |
379 self.busy = 1 | |
380 # Pass back both a suitably decorated object and a retrieval length | |
381 return (addclosehook(conn[0].makefile('rb'), | |
382 self.endtransfer), conn[1]) | |
383 | |
384 | |
385 #################################################################### | |
386 # Range Tuple Functions | |
387 # XXX: These range tuple functions might go better in a class. | |
388 | |
389 _rangere = None | |
390 def range_header_to_tuple(range_header): | |
391 """Get a (firstbyte,lastbyte) tuple from a Range header value. | |
392 | |
393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This | |
394 function pulls the firstbyte and lastbyte values and returns | |
395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in | |
396 the header value, it is returned as an empty string in the | |
397 tuple. | |
398 | |
399 Return None if range_header is None | |
400 Return () if range_header does not conform to the range spec | |
401 pattern. | |
402 | |
403 """ | |
404 global _rangere | |
405 if range_header is None: return None | |
406 if _rangere is None: | |
407 import re | |
408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') | |
409 match = _rangere.match(range_header) | |
410 if match: | |
411 tup = range_tuple_normalize(match.group(1,2)) | |
412 if tup and tup[1]: | |
413 tup = (tup[0],tup[1]+1) | |
414 return tup | |
415 return () | |
416 | |
417 def range_tuple_to_header(range_tup): | |
418 """Convert a range tuple to a Range header value. | |
419 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None | |
420 if no range is needed. | |
421 """ | |
422 if range_tup is None: return None | |
423 range_tup = range_tuple_normalize(range_tup) | |
424 if range_tup: | |
425 if range_tup[1]: | |
426 range_tup = (range_tup[0],range_tup[1] - 1) | |
427 return 'bytes=%s-%s' % range_tup | |
428 | |
429 def range_tuple_normalize(range_tup): | |
430 """Normalize a (first_byte,last_byte) range tuple. | |
431 Return a tuple whose first element is guaranteed to be an int | |
432 and whose second element will be '' (meaning: the last byte) or | |
433 an int. Finally, return None if the normalized tuple == (0,'') | |
434 as that is equivelant to retrieving the entire file. | |
435 """ | |
436 if range_tup is None: return None | |
437 # handle first byte | |
438 fb = range_tup[0] | |
439 if fb in (None,''): fb = 0 | |
440 else: fb = int(fb) | |
441 # handle last byte | |
442 try: lb = range_tup[1] | |
443 except IndexError: lb = '' | |
444 else: | |
445 if lb is None: lb = '' | |
446 elif lb != '': lb = int(lb) | |
447 # check if range is over the entire file | |
448 if (fb,lb) == (0,''): return None | |
449 # check that the range is valid | |
450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) | |
451 return (fb,lb) | |
452 |