from __future__ import absolute_import
"""
M2Crypto enhancement to Python's urllib2 for handling
'https' url's.
Code from urllib2 is Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007
Python Software Foundation; All Rights Reserved
Summary of changes:
- Use an HTTPSProxyConnection if the request is going through a proxy.
- Add the SSL context to the https connection when performing https_open.
- Add the M2Crypto HTTPSHandler when building a default opener.
"""
import socket
from M2Crypto import SSL, httpslib
from urllib.parse import urldefrag, urlparse as url_parse
from urllib.response import addinfourl
from typing import Optional # noqa
from urllib.request import * # noqa other modules want to import
from urllib.error import * # noqa other modules want to import
try:
mother_class = socket._fileobject
except AttributeError:
mother_class = socket.SocketIO
class _closing_fileobject(mother_class): # noqa
"""socket._fileobject that propagates self.close() to the socket.
Python 2.5 provides this as socket._fileobject(sock, close=True).
"""
# for python 3
try:
AbstractHTTPHandler
except NameError:
# somehow this won't get imported by the import * above
import urllib.request
AbstractHTTPHandler = urllib.request.AbstractHTTPHandler
[docs]
class HTTPSHandler(AbstractHTTPHandler):
def __init__(self, ssl_context=None):
# type: (SSL.Context) -> None
AbstractHTTPHandler.__init__(self)
if ssl_context is not None:
assert isinstance(ssl_context, SSL.Context), ssl_context
self.ctx = ssl_context
else:
self.ctx = SSL.Context()
# Copied from urllib2, so we can set the ssl context.
[docs]
def https_open(self, req):
# type: (Request) -> addinfourl
"""Return an addinfourl object for the request, using http_class.
http_class must implement the HTTPConnection API from httplib.
The addinfourl return value is a file-like object. It also
has methods and attributes including:
- info(): return a mimetools.Message object for the headers
- geturl(): return the original request URL
- code: HTTP status code
"""
# https://docs.python.org/3.3/library/urllib.request.html#urllib.request.Request.get_host
try: # up to python-3.2
host = req.get_host()
except AttributeError: # from python-3.3
host = req.host
if not host:
raise URLError('no host given')
# Our change: Check to see if we're using a proxy.
# Then create an appropriate ssl-aware connection.
full_url = req.get_full_url()
target_host = url_parse(full_url)[1]
if target_host != host:
request_uri = urldefrag(full_url)[0]
h = httpslib.ProxyHTTPSConnection(host=host, ssl_context=self.ctx)
else:
try: # up to python-3.2
request_uri = req.get_selector()
except AttributeError: # from python-3.3
request_uri = req.selector
h = httpslib.HTTPSConnection(host=host, ssl_context=self.ctx)
# End our change
h.set_debuglevel(self._debuglevel)
headers = dict(req.headers)
headers.update(req.unredirected_hdrs)
# We want to make an HTTP/1.1 request, but the addinfourl
# class isn't prepared to deal with a persistent connection.
# It will try to read all remaining data from the socket,
# which will block while the server waits for the next request.
# So make sure the connection gets closed after the (only)
# request.
headers["Connection"] = "close"
try:
h.request(req.get_method(), request_uri, req.data, headers)
r = h.getresponse()
except socket.error as err: # XXX what error?
raise URLError(err)
# Pick apart the HTTPResponse object to get the addinfourl
# object initialized properly.
# Wrap the HTTPResponse object in socket's file object adapter
# for Windows. That adapter calls recv(), so delegate recv()
# to read(). This weird wrapping allows the returned object to
# have readline() and readlines() methods.
r.recv = r.read
r._decref_socketios = lambda: None
r.ssl = h.sock.ssl
r._timeout = -1.0
r.recv_into = r.readinto
fp = socket.SocketIO(r, 'rb')
resp = addinfourl(fp, r.msg, req.get_full_url())
resp.code = r.status
resp.msg = r.reason
return resp
https_request = AbstractHTTPHandler.do_request_
# Copied from urllib2 with modifications for ssl
[docs]
def build_opener(ssl_context=None, *handlers):
# type: (Optional[SSL.Context], *object) -> OpenerDirector
"""Create an opener object from a list of handlers.
The opener will use several default handlers, including support
for HTTP and FTP.
If any of the handlers passed as arguments are subclasses of the
default handlers, the default handlers will not be used.
"""
def isclass(obj):
return isinstance(obj, type) or hasattr(obj, "__bases__")
opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler,
FTPHandler, FileHandler, HTTPErrorProcessor]
skip = []
for klass in default_classes:
for check in handlers:
if isclass(check):
if issubclass(check, klass):
skip.append(klass)
elif isinstance(check, klass):
skip.append(klass)
for klass in skip:
default_classes.remove(klass)
for klass in default_classes:
opener.add_handler(klass())
# Add the HTTPS handler with ssl_context
if HTTPSHandler not in skip:
opener.add_handler(HTTPSHandler(ssl_context))
for h in handlers:
if isclass(h):
h = h()
opener.add_handler(h)
return opener