mercurial/keepalive.py
changeset 2600 c4325f0a9b91
parent 2444 5eb02f9ed804
child 4026 8520a773a141
equal deleted inserted replaced
2599:e4b5e48052c6 2600:c4325f0a9b91
     7 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
     7 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
     8 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     8 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     9 #   Lesser General Public License for more details.
     9 #   Lesser General Public License for more details.
    10 #
    10 #
    11 #   You should have received a copy of the GNU Lesser General Public
    11 #   You should have received a copy of the GNU Lesser General Public
    12 #   License along with this library; if not, write to the 
    12 #   License along with this library; if not, write to the
    13 #      Free Software Foundation, Inc., 
    13 #      Free Software Foundation, Inc.,
    14 #      59 Temple Place, Suite 330, 
    14 #      59 Temple Place, Suite 330,
    15 #      Boston, MA  02111-1307  USA
    15 #      Boston, MA  02111-1307  USA
    16 
    16 
    17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
    17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
    18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
    18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
    19 
    19 
    22 >>> import urllib2
    22 >>> import urllib2
    23 >>> from keepalive import HTTPHandler
    23 >>> from keepalive import HTTPHandler
    24 >>> keepalive_handler = HTTPHandler()
    24 >>> keepalive_handler = HTTPHandler()
    25 >>> opener = urllib2.build_opener(keepalive_handler)
    25 >>> opener = urllib2.build_opener(keepalive_handler)
    26 >>> urllib2.install_opener(opener)
    26 >>> urllib2.install_opener(opener)
    27 >>> 
    27 >>>
    28 >>> fo = urllib2.urlopen('http://www.python.org')
    28 >>> fo = urllib2.urlopen('http://www.python.org')
    29 
    29 
    30 If a connection to a given host is requested, and all of the existing
    30 If a connection to a given host is requested, and all of the existing
    31 connections are still in use, another connection will be opened.  If
    31 connections are still in use, another connection will be opened.  If
    32 the handler tries to use an existing connection but it fails in some
    32 the handler tries to use an existing connection but it fails in some
   109 DEBUG = None
   109 DEBUG = None
   110 
   110 
   111 import sys
   111 import sys
   112 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
   112 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
   113 else: HANDLE_ERRORS = 0
   113 else: HANDLE_ERRORS = 0
   114     
   114 
   115 class ConnectionManager:
   115 class ConnectionManager:
   116     """
   116     """
   117     The connection manager must be able to:
   117     The connection manager must be able to:
   118       * keep track of all existing
   118       * keep track of all existing
   119       """
   119       """
   149             self._lock.release()
   149             self._lock.release()
   150 
   150 
   151     def set_ready(self, connection, ready):
   151     def set_ready(self, connection, ready):
   152         try: self._readymap[connection] = ready
   152         try: self._readymap[connection] = ready
   153         except KeyError: pass
   153         except KeyError: pass
   154         
   154 
   155     def get_ready_conn(self, host):
   155     def get_ready_conn(self, host):
   156         conn = None
   156         conn = None
   157         self._lock.acquire()
   157         self._lock.acquire()
   158         try:
   158         try:
   159             if self._hostmap.has_key(host):
   159             if self._hostmap.has_key(host):
   173             return dict(self._hostmap)
   173             return dict(self._hostmap)
   174 
   174 
   175 class HTTPHandler(urllib2.HTTPHandler):
   175 class HTTPHandler(urllib2.HTTPHandler):
   176     def __init__(self):
   176     def __init__(self):
   177         self._cm = ConnectionManager()
   177         self._cm = ConnectionManager()
   178         
   178 
   179     #### Connection Management
   179     #### Connection Management
   180     def open_connections(self):
   180     def open_connections(self):
   181         """return a list of connected hosts and the number of connections
   181         """return a list of connected hosts and the number of connections
   182         to each.  [('foo.com:80', 2), ('bar.org', 1)]"""
   182         to each.  [('foo.com:80', 2), ('bar.org', 1)]"""
   183         return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
   183         return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
   187         host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
   187         host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
   188         no error occurs if there is no connection to that host."""
   188         no error occurs if there is no connection to that host."""
   189         for h in self._cm.get_all(host):
   189         for h in self._cm.get_all(host):
   190             self._cm.remove(h)
   190             self._cm.remove(h)
   191             h.close()
   191             h.close()
   192         
   192 
   193     def close_all(self):
   193     def close_all(self):
   194         """close all open connections"""
   194         """close all open connections"""
   195         for host, conns in self._cm.get_all().items():
   195         for host, conns in self._cm.get_all().items():
   196             for h in conns:
   196             for h in conns:
   197                 self._cm.remove(h)
   197                 self._cm.remove(h)
   198                 h.close()
   198                 h.close()
   199         
   199 
   200     def _request_closed(self, request, host, connection):
   200     def _request_closed(self, request, host, connection):
   201         """tells us that this request is now closed and the the
   201         """tells us that this request is now closed and the the
   202         connection is ready for another request"""
   202         connection is ready for another request"""
   203         self._cm.set_ready(connection, 1)
   203         self._cm.set_ready(connection, 1)
   204 
   204 
   205     def _remove_connection(self, host, connection, close=0):
   205     def _remove_connection(self, host, connection, close=0):
   206         if close: connection.close()
   206         if close: connection.close()
   207         self._cm.remove(connection)
   207         self._cm.remove(connection)
   208         
   208 
   209     #### Transaction Execution
   209     #### Transaction Execution
   210     def http_open(self, req):
   210     def http_open(self, req):
   211         return self.do_open(HTTPConnection, req)
   211         return self.do_open(HTTPConnection, req)
   212 
   212 
   213     def do_open(self, http_class, req):
   213     def do_open(self, http_class, req):
   237                 self._cm.add(host, h, 0)
   237                 self._cm.add(host, h, 0)
   238                 self._start_transaction(h, req)
   238                 self._start_transaction(h, req)
   239                 r = h.getresponse()
   239                 r = h.getresponse()
   240         except (socket.error, httplib.HTTPException), err:
   240         except (socket.error, httplib.HTTPException), err:
   241             raise urllib2.URLError(err)
   241             raise urllib2.URLError(err)
   242             
   242 
   243         # if not a persistent connection, don't try to reuse it
   243         # if not a persistent connection, don't try to reuse it
   244         if r.will_close: self._cm.remove(h)
   244         if r.will_close: self._cm.remove(h)
   245 
   245 
   246         if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
   246         if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
   247         r._handler = self
   247         r._handler = self
   249         r._url = req.get_full_url()
   249         r._url = req.get_full_url()
   250         r._connection = h
   250         r._connection = h
   251         r.code = r.status
   251         r.code = r.status
   252         r.headers = r.msg
   252         r.headers = r.msg
   253         r.msg = r.reason
   253         r.msg = r.reason
   254         
   254 
   255         if r.status == 200 or not HANDLE_ERRORS:
   255         if r.status == 200 or not HANDLE_ERRORS:
   256             return r
   256             return r
   257         else:
   257         else:
   258             return self.parent.error('http', req, r,
   258             return self.parent.error('http', req, r,
   259                                      r.status, r.msg, r.headers)
   259                                      r.status, r.msg, r.headers)
   285             if DEBUG: DEBUG.error("unexpected exception - closing " + \
   285             if DEBUG: DEBUG.error("unexpected exception - closing " + \
   286                                   "connection to %s (%d)", host, id(h))
   286                                   "connection to %s (%d)", host, id(h))
   287             self._cm.remove(h)
   287             self._cm.remove(h)
   288             h.close()
   288             h.close()
   289             raise
   289             raise
   290                     
   290 
   291         if r is None or r.version == 9:
   291         if r is None or r.version == 9:
   292             # httplib falls back to assuming HTTP 0.9 if it gets a
   292             # httplib falls back to assuming HTTP 0.9 if it gets a
   293             # bad header back.  This is most likely to happen if
   293             # bad header back.  This is most likely to happen if
   294             # the socket has been closed by the server since we
   294             # the socket has been closed by the server since we
   295             # last used the connection.
   295             # last used the connection.
   340 
   340 
   341     # the read method wraps the original to accomodate buffering,
   341     # the read method wraps the original to accomodate buffering,
   342     # although read() never adds to the buffer.
   342     # although read() never adds to the buffer.
   343     # Both readline and readlines have been stolen with almost no
   343     # Both readline and readlines have been stolen with almost no
   344     # modification from socket.py
   344     # modification from socket.py
   345     
   345 
   346 
   346 
   347     def __init__(self, sock, debuglevel=0, strict=0, method=None):
   347     def __init__(self, sock, debuglevel=0, strict=0, method=None):
   348         if method: # the httplib in python 2.3 uses the method arg
   348         if method: # the httplib in python 2.3 uses the method arg
   349             httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
   349             httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
   350         else: # 2.2 doesn't
   350         else: # 2.2 doesn't
   369                                               self._connection)
   369                                               self._connection)
   370 
   370 
   371     def close_connection(self):
   371     def close_connection(self):
   372         self._handler._remove_connection(self._host, self._connection, close=1)
   372         self._handler._remove_connection(self._host, self._connection, close=1)
   373         self.close()
   373         self.close()
   374         
   374 
   375     def info(self):
   375     def info(self):
   376         return self.headers
   376         return self.headers
   377 
   377 
   378     def geturl(self):
   378     def geturl(self):
   379         return self._url
   379         return self._url
   423 
   423 
   424 
   424 
   425 class HTTPConnection(httplib.HTTPConnection):
   425 class HTTPConnection(httplib.HTTPConnection):
   426     # use the modified response class
   426     # use the modified response class
   427     response_class = HTTPResponse
   427     response_class = HTTPResponse
   428     
   428 
   429 #########################################################################
   429 #########################################################################
   430 #####   TEST FUNCTIONS
   430 #####   TEST FUNCTIONS
   431 #########################################################################
   431 #########################################################################
   432 
   432 
   433 def error_handler(url):
   433 def error_handler(url):
   457     keepalive_handler.close_all()
   457     keepalive_handler.close_all()
   458 
   458 
   459 def continuity(url):
   459 def continuity(url):
   460     import md5
   460     import md5
   461     format = '%25s: %s'
   461     format = '%25s: %s'
   462     
   462 
   463     # first fetch the file with the normal http handler
   463     # first fetch the file with the normal http handler
   464     opener = urllib2.build_opener()
   464     opener = urllib2.build_opener()
   465     urllib2.install_opener(opener)
   465     urllib2.install_opener(opener)
   466     fo = urllib2.urlopen(url)
   466     fo = urllib2.urlopen(url)
   467     foo = fo.read()
   467     foo = fo.read()
   504     opener = urllib2.build_opener(HTTPHandler())
   504     opener = urllib2.build_opener(HTTPHandler())
   505     urllib2.install_opener(opener)
   505     urllib2.install_opener(opener)
   506     t2 = fetch(N, url)
   506     t2 = fetch(N, url)
   507     print '  TIME: %.3f s' % t2
   507     print '  TIME: %.3f s' % t2
   508     print '  improvement factor: %.2f' % (t1/t2, )
   508     print '  improvement factor: %.2f' % (t1/t2, )
   509     
   509 
   510 def fetch(N, url, delay=0):
   510 def fetch(N, url, delay=0):
   511     import time
   511     import time
   512     lens = []
   512     lens = []
   513     starttime = time.time()
   513     starttime = time.time()
   514     for i in range(N):
   514     for i in range(N):
   536     DEBUG = FakeLogger()
   536     DEBUG = FakeLogger()
   537     print "  fetching the file to establish a connection"
   537     print "  fetching the file to establish a connection"
   538     fo = urllib2.urlopen(url)
   538     fo = urllib2.urlopen(url)
   539     data1 = fo.read()
   539     data1 = fo.read()
   540     fo.close()
   540     fo.close()
   541  
   541 
   542     i = 20
   542     i = 20
   543     print "  waiting %i seconds for the server to close the connection" % i
   543     print "  waiting %i seconds for the server to close the connection" % i
   544     while i > 0:
   544     while i > 0:
   545         sys.stdout.write('\r  %2i' % i)
   545         sys.stdout.write('\r  %2i' % i)
   546         sys.stdout.flush()
   546         sys.stdout.flush()
   558     else:
   558     else:
   559         print '  ERROR: DATA DIFFER'
   559         print '  ERROR: DATA DIFFER'
   560 
   560 
   561     DEBUG = dbbackup
   561     DEBUG = dbbackup
   562 
   562 
   563     
   563 
   564 def test(url, N=10):
   564 def test(url, N=10):
   565     print "checking error hander (do this on a non-200)"
   565     print "checking error hander (do this on a non-200)"
   566     try: error_handler(url)
   566     try: error_handler(url)
   567     except IOError, e:
   567     except IOError, e:
   568         print "exiting - exception will prevent further tests"
   568         print "exiting - exception will prevent further tests"
   574     print "performing speed comparison"
   574     print "performing speed comparison"
   575     comp(N, url)
   575     comp(N, url)
   576     print
   576     print
   577     print "performing dropped-connection check"
   577     print "performing dropped-connection check"
   578     test_timeout(url)
   578     test_timeout(url)
   579     
   579 
   580 if __name__ == '__main__':
   580 if __name__ == '__main__':
   581     import time
   581     import time
   582     import sys
   582     import sys
   583     try:
   583     try:
   584         N = int(sys.argv[1])
   584         N = int(sys.argv[1])