comparison mercurial/byterange.py @ 575:7f5ce4bbdd7b

More whitespace cleanups -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 More whitespace cleanups manifest hash: 6721a7b11295e0127386b080fd7a7b516ebced74 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.0 (GNU/Linux) iD8DBQFCxwm/ywK+sNU5EO8RAnl+AKCg/7ZCW7zIoG0nefksZzgLzgNmFACgnAFz ZkW0LyZcU/hkFgXoGwo7ktk= =AW+0 -----END PGP SIGNATURE-----
author mpm@selenic.com
date Sat, 02 Jul 2005 13:40:15 -0800
parents 9117c6561b0b
children 31a9aa890016
comparison
equal deleted inserted replaced
574:af7e21f3b2d9 575:7f5ce4bbdd7b
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details. 9 # Lesser General Public License for more details.
10 # 10 #
11 # You should have received a copy of the GNU Lesser General Public 11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the 12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc., 13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330, 14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA 15 # Boston, MA 02111-1307 USA
16 16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
23 import stat 23 import stat
24 import urllib 24 import urllib
25 import urllib2 25 import urllib2
26 import rfc822 26 import rfc822
27 27
28 try: 28 try:
29 from cStringIO import StringIO 29 from cStringIO import StringIO
30 except ImportError, msg: 30 except ImportError, msg:
31 from StringIO import StringIO 31 from StringIO import StringIO
32 32
33 class RangeError(IOError): 33 class RangeError(IOError):
34 """Error raised when an unsatisfiable range is requested.""" 34 """Error raised when an unsatisfiable range is requested."""
35 pass 35 pass
36 36
37 class HTTPRangeHandler(urllib2.BaseHandler): 37 class HTTPRangeHandler(urllib2.BaseHandler):
38 """Handler that enables HTTP Range headers. 38 """Handler that enables HTTP Range headers.
39 39
40 This was extremely simple. The Range header is a HTTP feature to 40 This was extremely simple. The Range header is a HTTP feature to
41 begin with so all this class does is tell urllib2 that the 41 begin with so all this class does is tell urllib2 that the
42 "206 Partial Content" reponse from the HTTP server is what we 42 "206 Partial Content" reponse from the HTTP server is what we
43 expected. 43 expected.
44 44
45 Example: 45 Example:
46 import urllib2 46 import urllib2
47 import byterange 47 import byterange
48 48
49 range_handler = range.HTTPRangeHandler() 49 range_handler = range.HTTPRangeHandler()
50 opener = urllib2.build_opener(range_handler) 50 opener = urllib2.build_opener(range_handler)
51 51
52 # install it 52 # install it
53 urllib2.install_opener(opener) 53 urllib2.install_opener(opener)
54 54
55 # create Request and set Range header 55 # create Request and set Range header
56 req = urllib2.Request('http://www.python.org/') 56 req = urllib2.Request('http://www.python.org/')
57 req.header['Range'] = 'bytes=30-50' 57 req.header['Range'] = 'bytes=30-50'
58 f = urllib2.urlopen(req) 58 f = urllib2.urlopen(req)
59 """ 59 """
60 60
61 def http_error_206(self, req, fp, code, msg, hdrs): 61 def http_error_206(self, req, fp, code, msg, hdrs):
62 # 206 Partial Content Response 62 # 206 Partial Content Response
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url()) 63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
64 r.code = code 64 r.code = code
65 r.msg = msg 65 r.msg = msg
66 return r 66 return r
67 67
68 def http_error_416(self, req, fp, code, msg, hdrs): 68 def http_error_416(self, req, fp, code, msg, hdrs):
69 # HTTP's Range Not Satisfiable error 69 # HTTP's Range Not Satisfiable error
70 raise RangeError('Requested Range Not Satisfiable') 70 raise RangeError('Requested Range Not Satisfiable')
71 71
72 class RangeableFileObject: 72 class RangeableFileObject:
73 """File object wrapper to enable raw range handling. 73 """File object wrapper to enable raw range handling.
74 This was implemented primarilary for handling range 74 This was implemented primarilary for handling range
75 specifications for file:// urls. This object effectively makes 75 specifications for file:// urls. This object effectively makes
76 a file object look like it consists only of a range of bytes in 76 a file object look like it consists only of a range of bytes in
77 the stream. 77 the stream.
78 78
79 Examples: 79 Examples:
80 # expose 10 bytes, starting at byte position 20, from 80 # expose 10 bytes, starting at byte position 20, from
81 # /etc/aliases. 81 # /etc/aliases.
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) 82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83 # seek seeks within the range (to position 23 in this case) 83 # seek seeks within the range (to position 23 in this case)
84 >>> fo.seek(3) 84 >>> fo.seek(3)
85 # tell tells where your at _within the range_ (position 3 in 85 # tell tells where your at _within the range_ (position 3 in
87 >>> fo.tell() 87 >>> fo.tell()
88 # read EOFs if an attempt is made to read past the last 88 # read EOFs if an attempt is made to read past the last
89 # byte in the range. the following will return only 7 bytes. 89 # byte in the range. the following will return only 7 bytes.
90 >>> fo.read(30) 90 >>> fo.read(30)
91 """ 91 """
92 92
93 def __init__(self, fo, rangetup): 93 def __init__(self, fo, rangetup):
94 """Create a RangeableFileObject. 94 """Create a RangeableFileObject.
95 fo -- a file like object. only the read() method need be 95 fo -- a file like object. only the read() method need be
96 supported but supporting an optimized seek() is 96 supported but supporting an optimized seek() is
97 preferable. 97 preferable.
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range 98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
99 to work over. 99 to work over.
100 The file object provided is assumed to be at byte offset 0. 100 The file object provided is assumed to be at byte offset 0.
101 """ 101 """
102 self.fo = fo 102 self.fo = fo
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) 103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
104 self.realpos = 0 104 self.realpos = 0
105 self._do_seek(self.firstbyte) 105 self._do_seek(self.firstbyte)
106 106
107 def __getattr__(self, name): 107 def __getattr__(self, name):
108 """This effectively allows us to wrap at the instance level. 108 """This effectively allows us to wrap at the instance level.
109 Any attribute not found in _this_ object will be searched for 109 Any attribute not found in _this_ object will be searched for
110 in self.fo. This includes methods.""" 110 in self.fo. This includes methods."""
111 if hasattr(self.fo, name): 111 if hasattr(self.fo, name):
112 return getattr(self.fo, name) 112 return getattr(self.fo, name)
113 raise AttributeError, name 113 raise AttributeError, name
114 114
115 def tell(self): 115 def tell(self):
116 """Return the position within the range. 116 """Return the position within the range.
117 This is different from fo.seek in that position 0 is the 117 This is different from fo.seek in that position 0 is the
118 first byte position of the range tuple. For example, if 118 first byte position of the range tuple. For example, if
119 this object was created with a range tuple of (500,899), 119 this object was created with a range tuple of (500,899),
120 tell() will return 0 when at byte position 500 of the file. 120 tell() will return 0 when at byte position 500 of the file.
121 """ 121 """
122 return (self.realpos - self.firstbyte) 122 return (self.realpos - self.firstbyte)
123 123
124 def seek(self,offset,whence=0): 124 def seek(self,offset,whence=0):
125 """Seek within the byte range. 125 """Seek within the byte range.
126 Positioning is identical to that described under tell(). 126 Positioning is identical to that described under tell().
127 """ 127 """
128 assert whence in (0, 1, 2) 128 assert whence in (0, 1, 2)
131 elif whence == 1: # relative seek 131 elif whence == 1: # relative seek
132 realoffset = self.realpos + offset 132 realoffset = self.realpos + offset
133 elif whence == 2: # absolute from end of file 133 elif whence == 2: # absolute from end of file
134 # XXX: are we raising the right Error here? 134 # XXX: are we raising the right Error here?
135 raise IOError('seek from end of file not supported.') 135 raise IOError('seek from end of file not supported.')
136 136
137 # do not allow seek past lastbyte in range 137 # do not allow seek past lastbyte in range
138 if self.lastbyte and (realoffset >= self.lastbyte): 138 if self.lastbyte and (realoffset >= self.lastbyte):
139 realoffset = self.lastbyte 139 realoffset = self.lastbyte
140 140
141 self._do_seek(realoffset - self.realpos) 141 self._do_seek(realoffset - self.realpos)
142 142
143 def read(self, size=-1): 143 def read(self, size=-1):
144 """Read within the range. 144 """Read within the range.
145 This method will limit the size read based on the range. 145 This method will limit the size read based on the range.
146 """ 146 """
147 size = self._calc_read_size(size) 147 size = self._calc_read_size(size)
148 rslt = self.fo.read(size) 148 rslt = self.fo.read(size)
149 self.realpos += len(rslt) 149 self.realpos += len(rslt)
150 return rslt 150 return rslt
151 151
152 def readline(self, size=-1): 152 def readline(self, size=-1):
153 """Read lines within the range. 153 """Read lines within the range.
154 This method will limit the size read based on the range. 154 This method will limit the size read based on the range.
155 """ 155 """
156 size = self._calc_read_size(size) 156 size = self._calc_read_size(size)
157 rslt = self.fo.readline(size) 157 rslt = self.fo.readline(size)
158 self.realpos += len(rslt) 158 self.realpos += len(rslt)
159 return rslt 159 return rslt
160 160
161 def _calc_read_size(self, size): 161 def _calc_read_size(self, size):
162 """Handles calculating the amount of data to read based on 162 """Handles calculating the amount of data to read based on
163 the range. 163 the range.
164 """ 164 """
165 if self.lastbyte: 165 if self.lastbyte:
167 if ((self.realpos + size) >= self.lastbyte): 167 if ((self.realpos + size) >= self.lastbyte):
168 size = (self.lastbyte - self.realpos) 168 size = (self.lastbyte - self.realpos)
169 else: 169 else:
170 size = (self.lastbyte - self.realpos) 170 size = (self.lastbyte - self.realpos)
171 return size 171 return size
172 172
173 def _do_seek(self,offset): 173 def _do_seek(self,offset):
174 """Seek based on whether wrapped object supports seek(). 174 """Seek based on whether wrapped object supports seek().
175 offset is relative to the current position (self.realpos). 175 offset is relative to the current position (self.realpos).
176 """ 176 """
177 assert offset >= 0 177 assert offset >= 0
178 if not hasattr(self.fo, 'seek'): 178 if not hasattr(self.fo, 'seek'):
179 self._poor_mans_seek(offset) 179 self._poor_mans_seek(offset)
180 else: 180 else:
181 self.fo.seek(self.realpos + offset) 181 self.fo.seek(self.realpos + offset)
182 self.realpos+= offset 182 self.realpos+= offset
183 183
184 def _poor_mans_seek(self,offset): 184 def _poor_mans_seek(self,offset):
185 """Seek by calling the wrapped file objects read() method. 185 """Seek by calling the wrapped file objects read() method.
186 This is used for file like objects that do not have native 186 This is used for file like objects that do not have native
187 seek support. The wrapped objects read() method is called 187 seek support. The wrapped objects read() method is called
188 to manually seek to the desired position. 188 to manually seek to the desired position.
189 offset -- read this number of bytes from the wrapped 189 offset -- read this number of bytes from the wrapped
190 file object. 190 file object.
191 raise RangeError if we encounter EOF before reaching the 191 raise RangeError if we encounter EOF before reaching the
192 specified offset. 192 specified offset.
193 """ 193 """
194 pos = 0 194 pos = 0
195 bufsize = 1024 195 bufsize = 1024
196 while pos < offset: 196 while pos < offset:
235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
236 (mtype or 'text/plain', size, modified))) 236 (mtype or 'text/plain', size, modified)))
237 return urllib.addinfourl(fo, headers, 'file:'+file) 237 return urllib.addinfourl(fo, headers, 'file:'+file)
238 238
239 239
240 # FTP Range Support 240 # FTP Range Support
241 # Unfortunately, a large amount of base FTP code had to be copied 241 # Unfortunately, a large amount of base FTP code had to be copied
242 # from urllib and urllib2 in order to insert the FTP REST command. 242 # from urllib and urllib2 in order to insert the FTP REST command.
243 # Code modifications for range support have been commented as 243 # Code modifications for range support have been commented as
244 # follows: 244 # follows:
245 # -- range support modifications start/end here 245 # -- range support modifications start/end here
246 246
247 from urllib import splitport, splituser, splitpasswd, splitattr, \ 247 from urllib import splitport, splituser, splitpasswd, splitattr, \
248 unquote, addclosehook, addinfourl 248 unquote, addclosehook, addinfourl
269 else: 269 else:
270 passwd = None 270 passwd = None
271 host = unquote(host) 271 host = unquote(host)
272 user = unquote(user or '') 272 user = unquote(user or '')
273 passwd = unquote(passwd or '') 273 passwd = unquote(passwd or '')
274 274
275 try: 275 try:
276 host = socket.gethostbyname(host) 276 host = socket.gethostbyname(host)
277 except socket.error, msg: 277 except socket.error, msg:
278 raise URLError(msg) 278 raise URLError(msg)
279 path, attrs = splitattr(req.get_selector()) 279 path, attrs = splitattr(req.get_selector())
288 for attr in attrs: 288 for attr in attrs:
289 attr, value = splitattr(attr) 289 attr, value = splitattr(attr)
290 if attr.lower() == 'type' and \ 290 if attr.lower() == 'type' and \
291 value in ('a', 'A', 'i', 'I', 'd', 'D'): 291 value in ('a', 'A', 'i', 'I', 'd', 'D'):
292 type = value.upper() 292 type = value.upper()
293 293
294 # -- range support modifications start here 294 # -- range support modifications start here
295 rest = None 295 rest = None
296 range_tup = range_header_to_tuple(req.headers.get('Range',None)) 296 range_tup = range_header_to_tuple(req.headers.get('Range',None))
297 assert range_tup != () 297 assert range_tup != ()
298 if range_tup: 298 if range_tup:
299 (fb,lb) = range_tup 299 (fb,lb) = range_tup
300 if fb > 0: rest = fb 300 if fb > 0: rest = fb
301 # -- range support modifications end here 301 # -- range support modifications end here
302 302
303 fp, retrlen = fw.retrfile(file, type, rest) 303 fp, retrlen = fw.retrfile(file, type, rest)
304 304
305 # -- range support modifications start here 305 # -- range support modifications start here
306 if range_tup: 306 if range_tup:
307 (fb,lb) = range_tup 307 (fb,lb) = range_tup
308 if lb == '': 308 if lb == '':
309 if retrlen is None or retrlen == 0: 309 if retrlen is None or retrlen == 0:
310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') 310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
311 lb = retrlen 311 lb = retrlen
312 retrlen = lb - fb 312 retrlen = lb - fb
313 if retrlen < 0: 313 if retrlen < 0:
315 raise RangeError('Requested Range Not Satisfiable') 315 raise RangeError('Requested Range Not Satisfiable')
316 else: 316 else:
317 retrlen = lb - fb 317 retrlen = lb - fb
318 fp = RangeableFileObject(fp, (0,retrlen)) 318 fp = RangeableFileObject(fp, (0,retrlen))
319 # -- range support modifications end here 319 # -- range support modifications end here
320 320
321 headers = "" 321 headers = ""
322 mtype = mimetypes.guess_type(req.get_full_url())[0] 322 mtype = mimetypes.guess_type(req.get_full_url())[0]
323 if mtype: 323 if mtype:
324 headers += "Content-Type: %s\n" % mtype 324 headers += "Content-Type: %s\n" % mtype
325 if retrlen is not None and retrlen >= 0: 325 if retrlen is not None and retrlen >= 0:
387 # XXX: These range tuple functions might go better in a class. 387 # XXX: These range tuple functions might go better in a class.
388 388
389 _rangere = None 389 _rangere = None
390 def range_header_to_tuple(range_header): 390 def range_header_to_tuple(range_header):
391 """Get a (firstbyte,lastbyte) tuple from a Range header value. 391 """Get a (firstbyte,lastbyte) tuple from a Range header value.
392 392
393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This 393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
394 function pulls the firstbyte and lastbyte values and returns 394 function pulls the firstbyte and lastbyte values and returns
395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in 395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
396 the header value, it is returned as an empty string in the 396 the header value, it is returned as an empty string in the
397 tuple. 397 tuple.
398 398
399 Return None if range_header is None 399 Return None if range_header is None
400 Return () if range_header does not conform to the range spec 400 Return () if range_header does not conform to the range spec
401 pattern. 401 pattern.
402 402
403 """ 403 """
404 global _rangere 404 global _rangere
405 if range_header is None: return None 405 if range_header is None: return None
406 if _rangere is None: 406 if _rangere is None:
407 import re 407 import re
408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') 408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
409 match = _rangere.match(range_header) 409 match = _rangere.match(range_header)
410 if match: 410 if match:
411 tup = range_tuple_normalize(match.group(1,2)) 411 tup = range_tuple_normalize(match.group(1,2))
412 if tup and tup[1]: 412 if tup and tup[1]:
413 tup = (tup[0],tup[1]+1) 413 tup = (tup[0],tup[1]+1)
414 return tup 414 return tup
415 return () 415 return ()
416 416
417 def range_tuple_to_header(range_tup): 417 def range_tuple_to_header(range_tup):
420 if no range is needed. 420 if no range is needed.
421 """ 421 """
422 if range_tup is None: return None 422 if range_tup is None: return None
423 range_tup = range_tuple_normalize(range_tup) 423 range_tup = range_tuple_normalize(range_tup)
424 if range_tup: 424 if range_tup:
425 if range_tup[1]: 425 if range_tup[1]:
426 range_tup = (range_tup[0],range_tup[1] - 1) 426 range_tup = (range_tup[0],range_tup[1] - 1)
427 return 'bytes=%s-%s' % range_tup 427 return 'bytes=%s-%s' % range_tup
428 428
429 def range_tuple_normalize(range_tup): 429 def range_tuple_normalize(range_tup):
430 """Normalize a (first_byte,last_byte) range tuple. 430 """Normalize a (first_byte,last_byte) range tuple.
431 Return a tuple whose first element is guaranteed to be an int 431 Return a tuple whose first element is guaranteed to be an int
432 and whose second element will be '' (meaning: the last byte) or 432 and whose second element will be '' (meaning: the last byte) or
433 an int. Finally, return None if the normalized tuple == (0,'') 433 an int. Finally, return None if the normalized tuple == (0,'')
434 as that is equivelant to retrieving the entire file. 434 as that is equivelant to retrieving the entire file.
435 """ 435 """
436 if range_tup is None: return None 436 if range_tup is None: return None
437 # handle first byte 437 # handle first byte
439 if fb in (None,''): fb = 0 439 if fb in (None,''): fb = 0
440 else: fb = int(fb) 440 else: fb = int(fb)
441 # handle last byte 441 # handle last byte
442 try: lb = range_tup[1] 442 try: lb = range_tup[1]
443 except IndexError: lb = '' 443 except IndexError: lb = ''
444 else: 444 else:
445 if lb is None: lb = '' 445 if lb is None: lb = ''
446 elif lb != '': lb = int(lb) 446 elif lb != '': lb = int(lb)
447 # check if range is over the entire file 447 # check if range is over the entire file
448 if (fb,lb) == (0,''): return None 448 if (fb,lb) == (0,''): return None
449 # check that the range is valid 449 # check that the range is valid
450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) 450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
451 return (fb,lb) 451 return (fb,lb)
452