Mercurial > hg > mercurial-crew-with-dirclash
comparison mercurial/byterange.py @ 575:7f5ce4bbdd7b
More whitespace cleanups
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
More whitespace cleanups
manifest hash: 6721a7b11295e0127386b080fd7a7b516ebced74
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
iD8DBQFCxwm/ywK+sNU5EO8RAnl+AKCg/7ZCW7zIoG0nefksZzgLzgNmFACgnAFz
ZkW0LyZcU/hkFgXoGwo7ktk=
=AW+0
-----END PGP SIGNATURE-----
author | mpm@selenic.com |
---|---|
date | Sat, 02 Jul 2005 13:40:15 -0800 |
parents | 9117c6561b0b |
children | 31a9aa890016 |
comparison
equal
deleted
inserted
replaced
574:af7e21f3b2d9 | 575:7f5ce4bbdd7b |
---|---|
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
9 # Lesser General Public License for more details. | 9 # Lesser General Public License for more details. |
10 # | 10 # |
11 # You should have received a copy of the GNU Lesser General Public | 11 # You should have received a copy of the GNU Lesser General Public |
12 # License along with this library; if not, write to the | 12 # License along with this library; if not, write to the |
13 # Free Software Foundation, Inc., | 13 # Free Software Foundation, Inc., |
14 # 59 Temple Place, Suite 330, | 14 # 59 Temple Place, Suite 330, |
15 # Boston, MA 02111-1307 USA | 15 # Boston, MA 02111-1307 USA |
16 | 16 |
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber | 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber |
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko |
19 | 19 |
23 import stat | 23 import stat |
24 import urllib | 24 import urllib |
25 import urllib2 | 25 import urllib2 |
26 import rfc822 | 26 import rfc822 |
27 | 27 |
28 try: | 28 try: |
29 from cStringIO import StringIO | 29 from cStringIO import StringIO |
30 except ImportError, msg: | 30 except ImportError, msg: |
31 from StringIO import StringIO | 31 from StringIO import StringIO |
32 | 32 |
33 class RangeError(IOError): | 33 class RangeError(IOError): |
34 """Error raised when an unsatisfiable range is requested.""" | 34 """Error raised when an unsatisfiable range is requested.""" |
35 pass | 35 pass |
36 | 36 |
37 class HTTPRangeHandler(urllib2.BaseHandler): | 37 class HTTPRangeHandler(urllib2.BaseHandler): |
38 """Handler that enables HTTP Range headers. | 38 """Handler that enables HTTP Range headers. |
39 | 39 |
40 This was extremely simple. The Range header is a HTTP feature to | 40 This was extremely simple. The Range header is a HTTP feature to |
41 begin with so all this class does is tell urllib2 that the | 41 begin with so all this class does is tell urllib2 that the |
42 "206 Partial Content" reponse from the HTTP server is what we | 42 "206 Partial Content" reponse from the HTTP server is what we |
43 expected. | 43 expected. |
44 | 44 |
45 Example: | 45 Example: |
46 import urllib2 | 46 import urllib2 |
47 import byterange | 47 import byterange |
48 | 48 |
49 range_handler = range.HTTPRangeHandler() | 49 range_handler = range.HTTPRangeHandler() |
50 opener = urllib2.build_opener(range_handler) | 50 opener = urllib2.build_opener(range_handler) |
51 | 51 |
52 # install it | 52 # install it |
53 urllib2.install_opener(opener) | 53 urllib2.install_opener(opener) |
54 | 54 |
55 # create Request and set Range header | 55 # create Request and set Range header |
56 req = urllib2.Request('http://www.python.org/') | 56 req = urllib2.Request('http://www.python.org/') |
57 req.header['Range'] = 'bytes=30-50' | 57 req.header['Range'] = 'bytes=30-50' |
58 f = urllib2.urlopen(req) | 58 f = urllib2.urlopen(req) |
59 """ | 59 """ |
60 | 60 |
61 def http_error_206(self, req, fp, code, msg, hdrs): | 61 def http_error_206(self, req, fp, code, msg, hdrs): |
62 # 206 Partial Content Response | 62 # 206 Partial Content Response |
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url()) | 63 r = urllib.addinfourl(fp, hdrs, req.get_full_url()) |
64 r.code = code | 64 r.code = code |
65 r.msg = msg | 65 r.msg = msg |
66 return r | 66 return r |
67 | 67 |
68 def http_error_416(self, req, fp, code, msg, hdrs): | 68 def http_error_416(self, req, fp, code, msg, hdrs): |
69 # HTTP's Range Not Satisfiable error | 69 # HTTP's Range Not Satisfiable error |
70 raise RangeError('Requested Range Not Satisfiable') | 70 raise RangeError('Requested Range Not Satisfiable') |
71 | 71 |
72 class RangeableFileObject: | 72 class RangeableFileObject: |
73 """File object wrapper to enable raw range handling. | 73 """File object wrapper to enable raw range handling. |
74 This was implemented primarilary for handling range | 74 This was implemented primarilary for handling range |
75 specifications for file:// urls. This object effectively makes | 75 specifications for file:// urls. This object effectively makes |
76 a file object look like it consists only of a range of bytes in | 76 a file object look like it consists only of a range of bytes in |
77 the stream. | 77 the stream. |
78 | 78 |
79 Examples: | 79 Examples: |
80 # expose 10 bytes, starting at byte position 20, from | 80 # expose 10 bytes, starting at byte position 20, from |
81 # /etc/aliases. | 81 # /etc/aliases. |
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) | 82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) |
83 # seek seeks within the range (to position 23 in this case) | 83 # seek seeks within the range (to position 23 in this case) |
84 >>> fo.seek(3) | 84 >>> fo.seek(3) |
85 # tell tells where your at _within the range_ (position 3 in | 85 # tell tells where your at _within the range_ (position 3 in |
87 >>> fo.tell() | 87 >>> fo.tell() |
88 # read EOFs if an attempt is made to read past the last | 88 # read EOFs if an attempt is made to read past the last |
89 # byte in the range. the following will return only 7 bytes. | 89 # byte in the range. the following will return only 7 bytes. |
90 >>> fo.read(30) | 90 >>> fo.read(30) |
91 """ | 91 """ |
92 | 92 |
93 def __init__(self, fo, rangetup): | 93 def __init__(self, fo, rangetup): |
94 """Create a RangeableFileObject. | 94 """Create a RangeableFileObject. |
95 fo -- a file like object. only the read() method need be | 95 fo -- a file like object. only the read() method need be |
96 supported but supporting an optimized seek() is | 96 supported but supporting an optimized seek() is |
97 preferable. | 97 preferable. |
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range | 98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range |
99 to work over. | 99 to work over. |
100 The file object provided is assumed to be at byte offset 0. | 100 The file object provided is assumed to be at byte offset 0. |
101 """ | 101 """ |
102 self.fo = fo | 102 self.fo = fo |
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) | 103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) |
104 self.realpos = 0 | 104 self.realpos = 0 |
105 self._do_seek(self.firstbyte) | 105 self._do_seek(self.firstbyte) |
106 | 106 |
107 def __getattr__(self, name): | 107 def __getattr__(self, name): |
108 """This effectively allows us to wrap at the instance level. | 108 """This effectively allows us to wrap at the instance level. |
109 Any attribute not found in _this_ object will be searched for | 109 Any attribute not found in _this_ object will be searched for |
110 in self.fo. This includes methods.""" | 110 in self.fo. This includes methods.""" |
111 if hasattr(self.fo, name): | 111 if hasattr(self.fo, name): |
112 return getattr(self.fo, name) | 112 return getattr(self.fo, name) |
113 raise AttributeError, name | 113 raise AttributeError, name |
114 | 114 |
115 def tell(self): | 115 def tell(self): |
116 """Return the position within the range. | 116 """Return the position within the range. |
117 This is different from fo.seek in that position 0 is the | 117 This is different from fo.seek in that position 0 is the |
118 first byte position of the range tuple. For example, if | 118 first byte position of the range tuple. For example, if |
119 this object was created with a range tuple of (500,899), | 119 this object was created with a range tuple of (500,899), |
120 tell() will return 0 when at byte position 500 of the file. | 120 tell() will return 0 when at byte position 500 of the file. |
121 """ | 121 """ |
122 return (self.realpos - self.firstbyte) | 122 return (self.realpos - self.firstbyte) |
123 | 123 |
124 def seek(self,offset,whence=0): | 124 def seek(self,offset,whence=0): |
125 """Seek within the byte range. | 125 """Seek within the byte range. |
126 Positioning is identical to that described under tell(). | 126 Positioning is identical to that described under tell(). |
127 """ | 127 """ |
128 assert whence in (0, 1, 2) | 128 assert whence in (0, 1, 2) |
131 elif whence == 1: # relative seek | 131 elif whence == 1: # relative seek |
132 realoffset = self.realpos + offset | 132 realoffset = self.realpos + offset |
133 elif whence == 2: # absolute from end of file | 133 elif whence == 2: # absolute from end of file |
134 # XXX: are we raising the right Error here? | 134 # XXX: are we raising the right Error here? |
135 raise IOError('seek from end of file not supported.') | 135 raise IOError('seek from end of file not supported.') |
136 | 136 |
137 # do not allow seek past lastbyte in range | 137 # do not allow seek past lastbyte in range |
138 if self.lastbyte and (realoffset >= self.lastbyte): | 138 if self.lastbyte and (realoffset >= self.lastbyte): |
139 realoffset = self.lastbyte | 139 realoffset = self.lastbyte |
140 | 140 |
141 self._do_seek(realoffset - self.realpos) | 141 self._do_seek(realoffset - self.realpos) |
142 | 142 |
143 def read(self, size=-1): | 143 def read(self, size=-1): |
144 """Read within the range. | 144 """Read within the range. |
145 This method will limit the size read based on the range. | 145 This method will limit the size read based on the range. |
146 """ | 146 """ |
147 size = self._calc_read_size(size) | 147 size = self._calc_read_size(size) |
148 rslt = self.fo.read(size) | 148 rslt = self.fo.read(size) |
149 self.realpos += len(rslt) | 149 self.realpos += len(rslt) |
150 return rslt | 150 return rslt |
151 | 151 |
152 def readline(self, size=-1): | 152 def readline(self, size=-1): |
153 """Read lines within the range. | 153 """Read lines within the range. |
154 This method will limit the size read based on the range. | 154 This method will limit the size read based on the range. |
155 """ | 155 """ |
156 size = self._calc_read_size(size) | 156 size = self._calc_read_size(size) |
157 rslt = self.fo.readline(size) | 157 rslt = self.fo.readline(size) |
158 self.realpos += len(rslt) | 158 self.realpos += len(rslt) |
159 return rslt | 159 return rslt |
160 | 160 |
161 def _calc_read_size(self, size): | 161 def _calc_read_size(self, size): |
162 """Handles calculating the amount of data to read based on | 162 """Handles calculating the amount of data to read based on |
163 the range. | 163 the range. |
164 """ | 164 """ |
165 if self.lastbyte: | 165 if self.lastbyte: |
167 if ((self.realpos + size) >= self.lastbyte): | 167 if ((self.realpos + size) >= self.lastbyte): |
168 size = (self.lastbyte - self.realpos) | 168 size = (self.lastbyte - self.realpos) |
169 else: | 169 else: |
170 size = (self.lastbyte - self.realpos) | 170 size = (self.lastbyte - self.realpos) |
171 return size | 171 return size |
172 | 172 |
173 def _do_seek(self,offset): | 173 def _do_seek(self,offset): |
174 """Seek based on whether wrapped object supports seek(). | 174 """Seek based on whether wrapped object supports seek(). |
175 offset is relative to the current position (self.realpos). | 175 offset is relative to the current position (self.realpos). |
176 """ | 176 """ |
177 assert offset >= 0 | 177 assert offset >= 0 |
178 if not hasattr(self.fo, 'seek'): | 178 if not hasattr(self.fo, 'seek'): |
179 self._poor_mans_seek(offset) | 179 self._poor_mans_seek(offset) |
180 else: | 180 else: |
181 self.fo.seek(self.realpos + offset) | 181 self.fo.seek(self.realpos + offset) |
182 self.realpos+= offset | 182 self.realpos+= offset |
183 | 183 |
184 def _poor_mans_seek(self,offset): | 184 def _poor_mans_seek(self,offset): |
185 """Seek by calling the wrapped file objects read() method. | 185 """Seek by calling the wrapped file objects read() method. |
186 This is used for file like objects that do not have native | 186 This is used for file like objects that do not have native |
187 seek support. The wrapped objects read() method is called | 187 seek support. The wrapped objects read() method is called |
188 to manually seek to the desired position. | 188 to manually seek to the desired position. |
189 offset -- read this number of bytes from the wrapped | 189 offset -- read this number of bytes from the wrapped |
190 file object. | 190 file object. |
191 raise RangeError if we encounter EOF before reaching the | 191 raise RangeError if we encounter EOF before reaching the |
192 specified offset. | 192 specified offset. |
193 """ | 193 """ |
194 pos = 0 | 194 pos = 0 |
195 bufsize = 1024 | 195 bufsize = 1024 |
196 while pos < offset: | 196 while pos < offset: |
235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | 235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % |
236 (mtype or 'text/plain', size, modified))) | 236 (mtype or 'text/plain', size, modified))) |
237 return urllib.addinfourl(fo, headers, 'file:'+file) | 237 return urllib.addinfourl(fo, headers, 'file:'+file) |
238 | 238 |
239 | 239 |
240 # FTP Range Support | 240 # FTP Range Support |
241 # Unfortunately, a large amount of base FTP code had to be copied | 241 # Unfortunately, a large amount of base FTP code had to be copied |
242 # from urllib and urllib2 in order to insert the FTP REST command. | 242 # from urllib and urllib2 in order to insert the FTP REST command. |
243 # Code modifications for range support have been commented as | 243 # Code modifications for range support have been commented as |
244 # follows: | 244 # follows: |
245 # -- range support modifications start/end here | 245 # -- range support modifications start/end here |
246 | 246 |
247 from urllib import splitport, splituser, splitpasswd, splitattr, \ | 247 from urllib import splitport, splituser, splitpasswd, splitattr, \ |
248 unquote, addclosehook, addinfourl | 248 unquote, addclosehook, addinfourl |
269 else: | 269 else: |
270 passwd = None | 270 passwd = None |
271 host = unquote(host) | 271 host = unquote(host) |
272 user = unquote(user or '') | 272 user = unquote(user or '') |
273 passwd = unquote(passwd or '') | 273 passwd = unquote(passwd or '') |
274 | 274 |
275 try: | 275 try: |
276 host = socket.gethostbyname(host) | 276 host = socket.gethostbyname(host) |
277 except socket.error, msg: | 277 except socket.error, msg: |
278 raise URLError(msg) | 278 raise URLError(msg) |
279 path, attrs = splitattr(req.get_selector()) | 279 path, attrs = splitattr(req.get_selector()) |
288 for attr in attrs: | 288 for attr in attrs: |
289 attr, value = splitattr(attr) | 289 attr, value = splitattr(attr) |
290 if attr.lower() == 'type' and \ | 290 if attr.lower() == 'type' and \ |
291 value in ('a', 'A', 'i', 'I', 'd', 'D'): | 291 value in ('a', 'A', 'i', 'I', 'd', 'D'): |
292 type = value.upper() | 292 type = value.upper() |
293 | 293 |
294 # -- range support modifications start here | 294 # -- range support modifications start here |
295 rest = None | 295 rest = None |
296 range_tup = range_header_to_tuple(req.headers.get('Range',None)) | 296 range_tup = range_header_to_tuple(req.headers.get('Range',None)) |
297 assert range_tup != () | 297 assert range_tup != () |
298 if range_tup: | 298 if range_tup: |
299 (fb,lb) = range_tup | 299 (fb,lb) = range_tup |
300 if fb > 0: rest = fb | 300 if fb > 0: rest = fb |
301 # -- range support modifications end here | 301 # -- range support modifications end here |
302 | 302 |
303 fp, retrlen = fw.retrfile(file, type, rest) | 303 fp, retrlen = fw.retrfile(file, type, rest) |
304 | 304 |
305 # -- range support modifications start here | 305 # -- range support modifications start here |
306 if range_tup: | 306 if range_tup: |
307 (fb,lb) = range_tup | 307 (fb,lb) = range_tup |
308 if lb == '': | 308 if lb == '': |
309 if retrlen is None or retrlen == 0: | 309 if retrlen is None or retrlen == 0: |
310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') | 310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') |
311 lb = retrlen | 311 lb = retrlen |
312 retrlen = lb - fb | 312 retrlen = lb - fb |
313 if retrlen < 0: | 313 if retrlen < 0: |
315 raise RangeError('Requested Range Not Satisfiable') | 315 raise RangeError('Requested Range Not Satisfiable') |
316 else: | 316 else: |
317 retrlen = lb - fb | 317 retrlen = lb - fb |
318 fp = RangeableFileObject(fp, (0,retrlen)) | 318 fp = RangeableFileObject(fp, (0,retrlen)) |
319 # -- range support modifications end here | 319 # -- range support modifications end here |
320 | 320 |
321 headers = "" | 321 headers = "" |
322 mtype = mimetypes.guess_type(req.get_full_url())[0] | 322 mtype = mimetypes.guess_type(req.get_full_url())[0] |
323 if mtype: | 323 if mtype: |
324 headers += "Content-Type: %s\n" % mtype | 324 headers += "Content-Type: %s\n" % mtype |
325 if retrlen is not None and retrlen >= 0: | 325 if retrlen is not None and retrlen >= 0: |
387 # XXX: These range tuple functions might go better in a class. | 387 # XXX: These range tuple functions might go better in a class. |
388 | 388 |
389 _rangere = None | 389 _rangere = None |
390 def range_header_to_tuple(range_header): | 390 def range_header_to_tuple(range_header): |
391 """Get a (firstbyte,lastbyte) tuple from a Range header value. | 391 """Get a (firstbyte,lastbyte) tuple from a Range header value. |
392 | 392 |
393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This | 393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This |
394 function pulls the firstbyte and lastbyte values and returns | 394 function pulls the firstbyte and lastbyte values and returns |
395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in | 395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in |
396 the header value, it is returned as an empty string in the | 396 the header value, it is returned as an empty string in the |
397 tuple. | 397 tuple. |
398 | 398 |
399 Return None if range_header is None | 399 Return None if range_header is None |
400 Return () if range_header does not conform to the range spec | 400 Return () if range_header does not conform to the range spec |
401 pattern. | 401 pattern. |
402 | 402 |
403 """ | 403 """ |
404 global _rangere | 404 global _rangere |
405 if range_header is None: return None | 405 if range_header is None: return None |
406 if _rangere is None: | 406 if _rangere is None: |
407 import re | 407 import re |
408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') | 408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') |
409 match = _rangere.match(range_header) | 409 match = _rangere.match(range_header) |
410 if match: | 410 if match: |
411 tup = range_tuple_normalize(match.group(1,2)) | 411 tup = range_tuple_normalize(match.group(1,2)) |
412 if tup and tup[1]: | 412 if tup and tup[1]: |
413 tup = (tup[0],tup[1]+1) | 413 tup = (tup[0],tup[1]+1) |
414 return tup | 414 return tup |
415 return () | 415 return () |
416 | 416 |
417 def range_tuple_to_header(range_tup): | 417 def range_tuple_to_header(range_tup): |
420 if no range is needed. | 420 if no range is needed. |
421 """ | 421 """ |
422 if range_tup is None: return None | 422 if range_tup is None: return None |
423 range_tup = range_tuple_normalize(range_tup) | 423 range_tup = range_tuple_normalize(range_tup) |
424 if range_tup: | 424 if range_tup: |
425 if range_tup[1]: | 425 if range_tup[1]: |
426 range_tup = (range_tup[0],range_tup[1] - 1) | 426 range_tup = (range_tup[0],range_tup[1] - 1) |
427 return 'bytes=%s-%s' % range_tup | 427 return 'bytes=%s-%s' % range_tup |
428 | 428 |
429 def range_tuple_normalize(range_tup): | 429 def range_tuple_normalize(range_tup): |
430 """Normalize a (first_byte,last_byte) range tuple. | 430 """Normalize a (first_byte,last_byte) range tuple. |
431 Return a tuple whose first element is guaranteed to be an int | 431 Return a tuple whose first element is guaranteed to be an int |
432 and whose second element will be '' (meaning: the last byte) or | 432 and whose second element will be '' (meaning: the last byte) or |
433 an int. Finally, return None if the normalized tuple == (0,'') | 433 an int. Finally, return None if the normalized tuple == (0,'') |
434 as that is equivelant to retrieving the entire file. | 434 as that is equivelant to retrieving the entire file. |
435 """ | 435 """ |
436 if range_tup is None: return None | 436 if range_tup is None: return None |
437 # handle first byte | 437 # handle first byte |
439 if fb in (None,''): fb = 0 | 439 if fb in (None,''): fb = 0 |
440 else: fb = int(fb) | 440 else: fb = int(fb) |
441 # handle last byte | 441 # handle last byte |
442 try: lb = range_tup[1] | 442 try: lb = range_tup[1] |
443 except IndexError: lb = '' | 443 except IndexError: lb = '' |
444 else: | 444 else: |
445 if lb is None: lb = '' | 445 if lb is None: lb = '' |
446 elif lb != '': lb = int(lb) | 446 elif lb != '': lb = int(lb) |
447 # check if range is over the entire file | 447 # check if range is over the entire file |
448 if (fb,lb) == (0,''): return None | 448 if (fb,lb) == (0,''): return None |
449 # check that the range is valid | 449 # check that the range is valid |
450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) | 450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) |
451 return (fb,lb) | 451 return (fb,lb) |
452 |