comparison mercurial/byterange.py @ 0:9117c6561b0b

Add back links from file revisions to changeset revisions Add simple transaction support Add hg verify Improve caching in revlog Fix a bunch of bugs Self-hosting now that the metadata is close to finalized
author mpm@selenic.com
date Tue, 03 May 2005 13:16:10 -0800
parents
children 7f5ce4bbdd7b
comparison
equal deleted inserted replaced
-1:000000000000 0:9117c6561b0b
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
5 #
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
10 #
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
21
22 import os
23 import stat
24 import urllib
25 import urllib2
26 import rfc822
27
28 try:
29 from cStringIO import StringIO
30 except ImportError, msg:
31 from StringIO import StringIO
32
33 class RangeError(IOError):
34 """Error raised when an unsatisfiable range is requested."""
35 pass
36
37 class HTTPRangeHandler(urllib2.BaseHandler):
38 """Handler that enables HTTP Range headers.
39
40 This was extremely simple. The Range header is a HTTP feature to
41 begin with so all this class does is tell urllib2 that the
42 "206 Partial Content" reponse from the HTTP server is what we
43 expected.
44
45 Example:
46 import urllib2
47 import byterange
48
49 range_handler = range.HTTPRangeHandler()
50 opener = urllib2.build_opener(range_handler)
51
52 # install it
53 urllib2.install_opener(opener)
54
55 # create Request and set Range header
56 req = urllib2.Request('http://www.python.org/')
57 req.header['Range'] = 'bytes=30-50'
58 f = urllib2.urlopen(req)
59 """
60
61 def http_error_206(self, req, fp, code, msg, hdrs):
62 # 206 Partial Content Response
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
64 r.code = code
65 r.msg = msg
66 return r
67
68 def http_error_416(self, req, fp, code, msg, hdrs):
69 # HTTP's Range Not Satisfiable error
70 raise RangeError('Requested Range Not Satisfiable')
71
72 class RangeableFileObject:
73 """File object wrapper to enable raw range handling.
74 This was implemented primarilary for handling range
75 specifications for file:// urls. This object effectively makes
76 a file object look like it consists only of a range of bytes in
77 the stream.
78
79 Examples:
80 # expose 10 bytes, starting at byte position 20, from
81 # /etc/aliases.
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83 # seek seeks within the range (to position 23 in this case)
84 >>> fo.seek(3)
85 # tell tells where your at _within the range_ (position 3 in
86 # this case)
87 >>> fo.tell()
88 # read EOFs if an attempt is made to read past the last
89 # byte in the range. the following will return only 7 bytes.
90 >>> fo.read(30)
91 """
92
93 def __init__(self, fo, rangetup):
94 """Create a RangeableFileObject.
95 fo -- a file like object. only the read() method need be
96 supported but supporting an optimized seek() is
97 preferable.
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
99 to work over.
100 The file object provided is assumed to be at byte offset 0.
101 """
102 self.fo = fo
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
104 self.realpos = 0
105 self._do_seek(self.firstbyte)
106
107 def __getattr__(self, name):
108 """This effectively allows us to wrap at the instance level.
109 Any attribute not found in _this_ object will be searched for
110 in self.fo. This includes methods."""
111 if hasattr(self.fo, name):
112 return getattr(self.fo, name)
113 raise AttributeError, name
114
115 def tell(self):
116 """Return the position within the range.
117 This is different from fo.seek in that position 0 is the
118 first byte position of the range tuple. For example, if
119 this object was created with a range tuple of (500,899),
120 tell() will return 0 when at byte position 500 of the file.
121 """
122 return (self.realpos - self.firstbyte)
123
124 def seek(self,offset,whence=0):
125 """Seek within the byte range.
126 Positioning is identical to that described under tell().
127 """
128 assert whence in (0, 1, 2)
129 if whence == 0: # absolute seek
130 realoffset = self.firstbyte + offset
131 elif whence == 1: # relative seek
132 realoffset = self.realpos + offset
133 elif whence == 2: # absolute from end of file
134 # XXX: are we raising the right Error here?
135 raise IOError('seek from end of file not supported.')
136
137 # do not allow seek past lastbyte in range
138 if self.lastbyte and (realoffset >= self.lastbyte):
139 realoffset = self.lastbyte
140
141 self._do_seek(realoffset - self.realpos)
142
143 def read(self, size=-1):
144 """Read within the range.
145 This method will limit the size read based on the range.
146 """
147 size = self._calc_read_size(size)
148 rslt = self.fo.read(size)
149 self.realpos += len(rslt)
150 return rslt
151
152 def readline(self, size=-1):
153 """Read lines within the range.
154 This method will limit the size read based on the range.
155 """
156 size = self._calc_read_size(size)
157 rslt = self.fo.readline(size)
158 self.realpos += len(rslt)
159 return rslt
160
161 def _calc_read_size(self, size):
162 """Handles calculating the amount of data to read based on
163 the range.
164 """
165 if self.lastbyte:
166 if size > -1:
167 if ((self.realpos + size) >= self.lastbyte):
168 size = (self.lastbyte - self.realpos)
169 else:
170 size = (self.lastbyte - self.realpos)
171 return size
172
173 def _do_seek(self,offset):
174 """Seek based on whether wrapped object supports seek().
175 offset is relative to the current position (self.realpos).
176 """
177 assert offset >= 0
178 if not hasattr(self.fo, 'seek'):
179 self._poor_mans_seek(offset)
180 else:
181 self.fo.seek(self.realpos + offset)
182 self.realpos+= offset
183
184 def _poor_mans_seek(self,offset):
185 """Seek by calling the wrapped file objects read() method.
186 This is used for file like objects that do not have native
187 seek support. The wrapped objects read() method is called
188 to manually seek to the desired position.
189 offset -- read this number of bytes from the wrapped
190 file object.
191 raise RangeError if we encounter EOF before reaching the
192 specified offset.
193 """
194 pos = 0
195 bufsize = 1024
196 while pos < offset:
197 if (pos + bufsize) > offset:
198 bufsize = offset - pos
199 buf = self.fo.read(bufsize)
200 if len(buf) != bufsize:
201 raise RangeError('Requested Range Not Satisfiable')
202 pos+= bufsize
203
204 class FileRangeHandler(urllib2.FileHandler):
205 """FileHandler subclass that adds Range support.
206 This class handles Range headers exactly like an HTTP
207 server would.
208 """
209 def open_local_file(self, req):
210 import mimetypes
211 import mimetools
212 host = req.get_host()
213 file = req.get_selector()
214 localfile = urllib.url2pathname(file)
215 stats = os.stat(localfile)
216 size = stats[stat.ST_SIZE]
217 modified = rfc822.formatdate(stats[stat.ST_MTIME])
218 mtype = mimetypes.guess_type(file)[0]
219 if host:
220 host, port = urllib.splitport(host)
221 if port or socket.gethostbyname(host) not in self.get_names():
222 raise URLError('file not on local host')
223 fo = open(localfile,'rb')
224 brange = req.headers.get('Range',None)
225 brange = range_header_to_tuple(brange)
226 assert brange != ()
227 if brange:
228 (fb,lb) = brange
229 if lb == '': lb = size
230 if fb < 0 or fb > size or lb > size:
231 raise RangeError('Requested Range Not Satisfiable')
232 size = (lb - fb)
233 fo = RangeableFileObject(fo, (fb,lb))
234 headers = mimetools.Message(StringIO(
235 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
236 (mtype or 'text/plain', size, modified)))
237 return urllib.addinfourl(fo, headers, 'file:'+file)
238
239
240 # FTP Range Support
241 # Unfortunately, a large amount of base FTP code had to be copied
242 # from urllib and urllib2 in order to insert the FTP REST command.
243 # Code modifications for range support have been commented as
244 # follows:
245 # -- range support modifications start/end here
246
247 from urllib import splitport, splituser, splitpasswd, splitattr, \
248 unquote, addclosehook, addinfourl
249 import ftplib
250 import socket
251 import sys
252 import ftplib
253 import mimetypes
254 import mimetools
255
256 class FTPRangeHandler(urllib2.FTPHandler):
257 def ftp_open(self, req):
258 host = req.get_host()
259 if not host:
260 raise IOError, ('ftp error', 'no host given')
261 host, port = splitport(host)
262 if port is None:
263 port = ftplib.FTP_PORT
264
265 # username/password handling
266 user, host = splituser(host)
267 if user:
268 user, passwd = splitpasswd(user)
269 else:
270 passwd = None
271 host = unquote(host)
272 user = unquote(user or '')
273 passwd = unquote(passwd or '')
274
275 try:
276 host = socket.gethostbyname(host)
277 except socket.error, msg:
278 raise URLError(msg)
279 path, attrs = splitattr(req.get_selector())
280 dirs = path.split('/')
281 dirs = map(unquote, dirs)
282 dirs, file = dirs[:-1], dirs[-1]
283 if dirs and not dirs[0]:
284 dirs = dirs[1:]
285 try:
286 fw = self.connect_ftp(user, passwd, host, port, dirs)
287 type = file and 'I' or 'D'
288 for attr in attrs:
289 attr, value = splitattr(attr)
290 if attr.lower() == 'type' and \
291 value in ('a', 'A', 'i', 'I', 'd', 'D'):
292 type = value.upper()
293
294 # -- range support modifications start here
295 rest = None
296 range_tup = range_header_to_tuple(req.headers.get('Range',None))
297 assert range_tup != ()
298 if range_tup:
299 (fb,lb) = range_tup
300 if fb > 0: rest = fb
301 # -- range support modifications end here
302
303 fp, retrlen = fw.retrfile(file, type, rest)
304
305 # -- range support modifications start here
306 if range_tup:
307 (fb,lb) = range_tup
308 if lb == '':
309 if retrlen is None or retrlen == 0:
310 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
311 lb = retrlen
312 retrlen = lb - fb
313 if retrlen < 0:
314 # beginning of range is larger than file
315 raise RangeError('Requested Range Not Satisfiable')
316 else:
317 retrlen = lb - fb
318 fp = RangeableFileObject(fp, (0,retrlen))
319 # -- range support modifications end here
320
321 headers = ""
322 mtype = mimetypes.guess_type(req.get_full_url())[0]
323 if mtype:
324 headers += "Content-Type: %s\n" % mtype
325 if retrlen is not None and retrlen >= 0:
326 headers += "Content-Length: %d\n" % retrlen
327 sf = StringIO(headers)
328 headers = mimetools.Message(sf)
329 return addinfourl(fp, headers, req.get_full_url())
330 except ftplib.all_errors, msg:
331 raise IOError, ('ftp error', msg), sys.exc_info()[2]
332
333 def connect_ftp(self, user, passwd, host, port, dirs):
334 fw = ftpwrapper(user, passwd, host, port, dirs)
335 return fw
336
337 class ftpwrapper(urllib.ftpwrapper):
338 # range support note:
339 # this ftpwrapper code is copied directly from
340 # urllib. The only enhancement is to add the rest
341 # argument and pass it on to ftp.ntransfercmd
342 def retrfile(self, file, type, rest=None):
343 self.endtransfer()
344 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
345 else: cmd = 'TYPE ' + type; isdir = 0
346 try:
347 self.ftp.voidcmd(cmd)
348 except ftplib.all_errors:
349 self.init()
350 self.ftp.voidcmd(cmd)
351 conn = None
352 if file and not isdir:
353 # Use nlst to see if the file exists at all
354 try:
355 self.ftp.nlst(file)
356 except ftplib.error_perm, reason:
357 raise IOError, ('ftp error', reason), sys.exc_info()[2]
358 # Restore the transfer mode!
359 self.ftp.voidcmd(cmd)
360 # Try to retrieve as a file
361 try:
362 cmd = 'RETR ' + file
363 conn = self.ftp.ntransfercmd(cmd, rest)
364 except ftplib.error_perm, reason:
365 if str(reason)[:3] == '501':
366 # workaround for REST not supported error
367 fp, retrlen = self.retrfile(file, type)
368 fp = RangeableFileObject(fp, (rest,''))
369 return (fp, retrlen)
370 elif str(reason)[:3] != '550':
371 raise IOError, ('ftp error', reason), sys.exc_info()[2]
372 if not conn:
373 # Set transfer mode to ASCII!
374 self.ftp.voidcmd('TYPE A')
375 # Try a directory listing
376 if file: cmd = 'LIST ' + file
377 else: cmd = 'LIST'
378 conn = self.ftp.ntransfercmd(cmd)
379 self.busy = 1
380 # Pass back both a suitably decorated object and a retrieval length
381 return (addclosehook(conn[0].makefile('rb'),
382 self.endtransfer), conn[1])
383
384
385 ####################################################################
386 # Range Tuple Functions
387 # XXX: These range tuple functions might go better in a class.
388
389 _rangere = None
390 def range_header_to_tuple(range_header):
391 """Get a (firstbyte,lastbyte) tuple from a Range header value.
392
393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
394 function pulls the firstbyte and lastbyte values and returns
395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
396 the header value, it is returned as an empty string in the
397 tuple.
398
399 Return None if range_header is None
400 Return () if range_header does not conform to the range spec
401 pattern.
402
403 """
404 global _rangere
405 if range_header is None: return None
406 if _rangere is None:
407 import re
408 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
409 match = _rangere.match(range_header)
410 if match:
411 tup = range_tuple_normalize(match.group(1,2))
412 if tup and tup[1]:
413 tup = (tup[0],tup[1]+1)
414 return tup
415 return ()
416
417 def range_tuple_to_header(range_tup):
418 """Convert a range tuple to a Range header value.
419 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
420 if no range is needed.
421 """
422 if range_tup is None: return None
423 range_tup = range_tuple_normalize(range_tup)
424 if range_tup:
425 if range_tup[1]:
426 range_tup = (range_tup[0],range_tup[1] - 1)
427 return 'bytes=%s-%s' % range_tup
428
429 def range_tuple_normalize(range_tup):
430 """Normalize a (first_byte,last_byte) range tuple.
431 Return a tuple whose first element is guaranteed to be an int
432 and whose second element will be '' (meaning: the last byte) or
433 an int. Finally, return None if the normalized tuple == (0,'')
434 as that is equivelant to retrieving the entire file.
435 """
436 if range_tup is None: return None
437 # handle first byte
438 fb = range_tup[0]
439 if fb in (None,''): fb = 0
440 else: fb = int(fb)
441 # handle last byte
442 try: lb = range_tup[1]
443 except IndexError: lb = ''
444 else:
445 if lb is None: lb = ''
446 elif lb != '': lb = int(lb)
447 # check if range is over the entire file
448 if (fb,lb) == (0,''): return None
449 # check that the range is valid
450 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
451 return (fb,lb)
452