OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # |
| 3 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software |
| 4 # Foundation; All Rights Reserved |
| 5 |
| 6 """A HTTPSConnection/Handler with additional proxy and cert validation features. |
| 7 |
| 8 In particular, monkey patches in Python r74203 to provide support for CONNECT |
| 9 proxies and adds SSL cert validation if the ssl module is present. |
| 10 """ |
| 11 |
| 12 __author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)" |
| 13 |
| 14 import base64 |
| 15 import httplib |
| 16 import logging |
| 17 import re |
| 18 import socket |
| 19 import urllib2 |
| 20 |
| 21 from urllib import splittype |
| 22 from urllib import splituser |
| 23 from urllib import splitpasswd |
| 24 |
| 25 class InvalidCertificateException(httplib.HTTPException): |
| 26 """Raised when a certificate is provided with an invalid hostname.""" |
| 27 |
| 28 def __init__(self, host, cert, reason): |
| 29 """Constructor. |
| 30 |
| 31 Args: |
| 32 host: The hostname the connection was made to. |
| 33 cert: The SSL certificate (as a dictionary) the host returned. |
| 34 """ |
| 35 httplib.HTTPException.__init__(self) |
| 36 self.host = host |
| 37 self.cert = cert |
| 38 self.reason = reason |
| 39 |
| 40 def __str__(self): |
| 41 return ('Host %s returned an invalid certificate (%s): %s\n' |
| 42 'To learn more, see ' |
| 43 'http://code.google.com/appengine/kb/general.html#rpcssl' % |
| 44 (self.host, self.reason, self.cert)) |
| 45 |
| 46 def can_validate_certs(): |
| 47 """Return True if we have the SSL package and can validate certificates.""" |
| 48 try: |
| 49 import ssl |
| 50 return True |
| 51 except ImportError: |
| 52 return False |
| 53 |
| 54 def _create_fancy_connection(tunnel_host=None, key_file=None, |
| 55 cert_file=None, ca_certs=None): |
| 56 # This abomination brought to you by the fact that |
| 57 # the HTTPHandler creates the connection instance in the middle |
| 58 # of do_open so we need to add the tunnel host to the class. |
| 59 |
| 60 class PresetProxyHTTPSConnection(httplib.HTTPSConnection): |
| 61 """An HTTPS connection that uses a proxy defined by the enclosing scope.""" |
| 62 |
| 63 def __init__(self, *args, **kwargs): |
| 64 httplib.HTTPSConnection.__init__(self, *args, **kwargs) |
| 65 |
| 66 self._tunnel_host = tunnel_host |
| 67 if tunnel_host: |
| 68 logging.debug("Creating preset proxy https conn: %s", tunnel_host) |
| 69 |
| 70 self.key_file = key_file |
| 71 self.cert_file = cert_file |
| 72 self.ca_certs = ca_certs |
| 73 try: |
| 74 import ssl |
| 75 if self.ca_certs: |
| 76 self.cert_reqs = ssl.CERT_REQUIRED |
| 77 else: |
| 78 self.cert_reqs = ssl.CERT_NONE |
| 79 except ImportError: |
| 80 pass |
| 81 |
| 82 def _tunnel(self): |
| 83 self._set_hostport(self._tunnel_host, None) |
| 84 logging.info("Connecting through tunnel to: %s:%d", |
| 85 self.host, self.port) |
| 86 self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port)) |
| 87 response = self.response_class(self.sock, strict=self.strict, |
| 88 method=self._method) |
| 89 (_, code, message) = response._read_status() |
| 90 |
| 91 if code != 200: |
| 92 self.close() |
| 93 raise socket.error, "Tunnel connection failed: %d %s" % ( |
| 94 code, message.strip()) |
| 95 |
| 96 while True: |
| 97 line = response.fp.readline() |
| 98 if line == "\r\n": |
| 99 break |
| 100 |
| 101 def _get_valid_hosts_for_cert(self, cert): |
| 102 """Returns a list of valid host globs for an SSL certificate. |
| 103 |
| 104 Args: |
| 105 cert: A dictionary representing an SSL certificate. |
| 106 Returns: |
| 107 list: A list of valid host globs. |
| 108 """ |
| 109 if 'subjectAltName' in cert: |
| 110 return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns'] |
| 111 else: |
| 112 # Return a list of commonName fields |
| 113 return [x[0][1] for x in cert['subject'] |
| 114 if x[0][0].lower() == 'commonname'] |
| 115 |
| 116 def _validate_certificate_hostname(self, cert, hostname): |
| 117 """Validates that a given hostname is valid for an SSL certificate. |
| 118 |
| 119 Args: |
| 120 cert: A dictionary representing an SSL certificate. |
| 121 hostname: The hostname to test. |
| 122 Returns: |
| 123 bool: Whether or not the hostname is valid for this certificate. |
| 124 """ |
| 125 hosts = self._get_valid_hosts_for_cert(cert) |
| 126 for host in hosts: |
| 127 # Convert the glob-style hostname expression (eg, '*.google.com') into a |
| 128 # valid regular expression. |
| 129 host_re = host.replace('.', '\.').replace('*', '[^.]*') |
| 130 if re.search('^%s$' % (host_re,), hostname, re.I): |
| 131 return True |
| 132 return False |
| 133 |
| 134 |
| 135 def connect(self): |
| 136 # TODO(frew): When we drop support for <2.6 (in the far distant future), |
| 137 # change this to socket.create_connection. |
| 138 self.sock = _create_connection((self.host, self.port)) |
| 139 |
| 140 if self._tunnel_host: |
| 141 self._tunnel() |
| 142 |
| 143 # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl, |
| 144 # with fallback. |
| 145 try: |
| 146 import ssl |
| 147 self.sock = ssl.wrap_socket(self.sock, |
| 148 keyfile=self.key_file, |
| 149 certfile=self.cert_file, |
| 150 ca_certs=self.ca_certs, |
| 151 cert_reqs=self.cert_reqs) |
| 152 |
| 153 if self.cert_reqs & ssl.CERT_REQUIRED: |
| 154 cert = self.sock.getpeercert() |
| 155 hostname = self.host.split(':', 0)[0] |
| 156 if not self._validate_certificate_hostname(cert, hostname): |
| 157 raise InvalidCertificateException(hostname, cert, |
| 158 'hostname mismatch') |
| 159 except ImportError: |
| 160 ssl = socket.ssl(self.sock, |
| 161 keyfile=self.key_file, |
| 162 certfile=self.cert_file) |
| 163 self.sock = httplib.FakeSocket(self.sock, ssl) |
| 164 |
| 165 return PresetProxyHTTPSConnection |
| 166 |
| 167 |
| 168 # Here to end of _create_connection copied wholesale from Python 2.6"s socket.py |
| 169 _GLOBAL_DEFAULT_TIMEOUT = object() |
| 170 |
| 171 |
| 172 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT): |
| 173 """Connect to *address* and return the socket object. |
| 174 |
| 175 Convenience function. Connect to *address* (a 2-tuple ``(host, |
| 176 port)``) and return the socket object. Passing the optional |
| 177 *timeout* parameter will set the timeout on the socket instance |
| 178 before attempting to connect. If no *timeout* is supplied, the |
| 179 global default timeout setting returned by :func:`getdefaulttimeout` |
| 180 is used. |
| 181 """ |
| 182 |
| 183 msg = "getaddrinfo returns an empty list" |
| 184 host, port = address |
| 185 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): |
| 186 af, socktype, proto, canonname, sa = res |
| 187 sock = None |
| 188 try: |
| 189 sock = socket.socket(af, socktype, proto) |
| 190 if timeout is not _GLOBAL_DEFAULT_TIMEOUT: |
| 191 sock.settimeout(timeout) |
| 192 sock.connect(sa) |
| 193 return sock |
| 194 |
| 195 except socket.error, msg: |
| 196 if sock is not None: |
| 197 sock.close() |
| 198 |
| 199 raise socket.error, msg |
| 200 |
| 201 |
| 202 class FancyRequest(urllib2.Request): |
| 203 """A request that allows the use of a CONNECT proxy.""" |
| 204 |
| 205 def __init__(self, *args, **kwargs): |
| 206 urllib2.Request.__init__(self, *args, **kwargs) |
| 207 self._tunnel_host = None |
| 208 self._key_file = None |
| 209 self._cert_file = None |
| 210 self._ca_certs = None |
| 211 |
| 212 def set_proxy(self, host, type): |
| 213 saved_type = None |
| 214 |
| 215 if self.get_type() == "https" and not self._tunnel_host: |
| 216 self._tunnel_host = self.get_host() |
| 217 saved_type = self.get_type() |
| 218 urllib2.Request.set_proxy(self, host, type) |
| 219 |
| 220 if saved_type: |
| 221 # Don't set self.type, we want to preserve the |
| 222 # type for tunneling. |
| 223 self.type = saved_type |
| 224 |
| 225 def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None): |
| 226 self._key_file = key_file |
| 227 self._cert_file = cert_file |
| 228 self._ca_certs = ca_certs |
| 229 |
| 230 |
| 231 class FancyProxyHandler(urllib2.ProxyHandler): |
| 232 """A ProxyHandler that works with CONNECT-enabled proxies.""" |
| 233 |
| 234 # Taken verbatim from /usr/lib/python2.5/urllib2.py |
| 235 def _parse_proxy(self, proxy): |
| 236 """Return (scheme, user, password, host/port) given a URL or an authority. |
| 237 |
| 238 If a URL is supplied, it must have an authority (host:port) component. |
| 239 According to RFC 3986, having an authority component means the URL must |
| 240 have two slashes after the scheme: |
| 241 |
| 242 >>> _parse_proxy('file:/ftp.example.com/') |
| 243 Traceback (most recent call last): |
| 244 ValueError: proxy URL with no authority: 'file:/ftp.example.com/' |
| 245 |
| 246 The first three items of the returned tuple may be None. |
| 247 |
| 248 Examples of authority parsing: |
| 249 |
| 250 >>> _parse_proxy('proxy.example.com') |
| 251 (None, None, None, 'proxy.example.com') |
| 252 >>> _parse_proxy('proxy.example.com:3128') |
| 253 (None, None, None, 'proxy.example.com:3128') |
| 254 |
| 255 The authority component may optionally include userinfo (assumed to be |
| 256 username:password): |
| 257 |
| 258 >>> _parse_proxy('joe:password@proxy.example.com') |
| 259 (None, 'joe', 'password', 'proxy.example.com') |
| 260 >>> _parse_proxy('joe:password@proxy.example.com:3128') |
| 261 (None, 'joe', 'password', 'proxy.example.com:3128') |
| 262 |
| 263 Same examples, but with URLs instead: |
| 264 |
| 265 >>> _parse_proxy('http://proxy.example.com/') |
| 266 ('http', None, None, 'proxy.example.com') |
| 267 >>> _parse_proxy('http://proxy.example.com:3128/') |
| 268 ('http', None, None, 'proxy.example.com:3128') |
| 269 >>> _parse_proxy('http://joe:password@proxy.example.com/') |
| 270 ('http', 'joe', 'password', 'proxy.example.com') |
| 271 >>> _parse_proxy('http://joe:password@proxy.example.com:3128') |
| 272 ('http', 'joe', 'password', 'proxy.example.com:3128') |
| 273 |
| 274 Everything after the authority is ignored: |
| 275 |
| 276 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') |
| 277 ('ftp', 'joe', 'password', 'proxy.example.com') |
| 278 |
| 279 Test for no trailing '/' case: |
| 280 |
| 281 >>> _parse_proxy('http://joe:password@proxy.example.com') |
| 282 ('http', 'joe', 'password', 'proxy.example.com') |
| 283 |
| 284 """ |
| 285 scheme, r_scheme = splittype(proxy) |
| 286 if not r_scheme.startswith("/"): |
| 287 # authority |
| 288 scheme = None |
| 289 authority = proxy |
| 290 else: |
| 291 # URL |
| 292 if not r_scheme.startswith("//"): |
| 293 raise ValueError("proxy URL with no authority: %r" % proxy) |
| 294 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. |
| 295 # and 3.3.), path is empty or starts with '/' |
| 296 end = r_scheme.find("/", 2) |
| 297 if end == -1: |
| 298 end = None |
| 299 authority = r_scheme[2:end] |
| 300 userinfo, hostport = splituser(authority) |
| 301 if userinfo is not None: |
| 302 user, password = splitpasswd(userinfo) |
| 303 else: |
| 304 user = password = None |
| 305 return scheme, user, password, hostport |
| 306 |
| 307 def proxy_open(self, req, proxy, type): |
| 308 # This block is copied wholesale from Python2.6 urllib2. |
| 309 # It is idempotent, so the superclass method call executes as normal |
| 310 # if invoked. |
| 311 orig_type = req.get_type() |
| 312 proxy_type, user, password, hostport = self._parse_proxy(proxy) |
| 313 if proxy_type is None: |
| 314 proxy_type = orig_type |
| 315 if user and password: |
| 316 user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password)) |
| 317 creds = base64.b64encode(user_pass).strip() |
| 318 # Later calls overwrite earlier calls for the same header |
| 319 req.add_header("Proxy-authorization", "Basic " + creds) |
| 320 hostport = urllib2.unquote(hostport) |
| 321 req.set_proxy(hostport, proxy_type) |
| 322 # This condition is the change |
| 323 if orig_type == "https": |
| 324 return None |
| 325 |
| 326 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type) |
| 327 |
| 328 |
| 329 class FancyHTTPSHandler(urllib2.HTTPSHandler): |
| 330 """An HTTPSHandler that works with CONNECT-enabled proxies.""" |
| 331 |
| 332 def do_open(self, http_class, req): |
| 333 # Intentionally very specific so as to opt for false negatives |
| 334 # rather than false positives. |
| 335 try: |
| 336 return urllib2.HTTPSHandler.do_open( |
| 337 self, |
| 338 _create_fancy_connection(req._tunnel_host, |
| 339 req._key_file, |
| 340 req._cert_file, |
| 341 req._ca_certs), |
| 342 req) |
| 343 except urllib2.URLError, url_error: |
| 344 try: |
| 345 import ssl |
| 346 if (type(url_error.reason) == ssl.SSLError and |
| 347 url_error.reason.args[0] == 1): |
| 348 # Display the reason to the user. Need to use args for python2.5 |
| 349 # compat. |
| 350 raise InvalidCertificateException(req.host, '', |
| 351 url_error.reason.args[1]) |
| 352 except ImportError: |
| 353 pass |
| 354 |
| 355 raise url_error |
| 356 |
| 357 |
| 358 # We have to implement this so that we persist the tunneling behavior |
| 359 # through redirects. |
| 360 class FancyRedirectHandler(urllib2.HTTPRedirectHandler): |
| 361 """A redirect handler that persists CONNECT-enabled proxy information.""" |
| 362 |
| 363 def redirect_request(self, req, *args, **kwargs): |
| 364 new_req = urllib2.HTTPRedirectHandler.redirect_request( |
| 365 self, req, *args, **kwargs) |
| 366 # Same thing as in our set_proxy implementation, but in this case |
| 367 # we"ve only got a Request to work with, so it was this or copy |
| 368 # everything over piecemeal. |
| 369 # |
| 370 # Note that we do not persist tunneling behavior from an http request |
| 371 # to an https request, because an http request does not set _tunnel_host. |
| 372 # |
| 373 # Also note that in Python < 2.6, you will get an error in |
| 374 # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http |
| 375 # proxy, since the proxy type will be set to http instead of https. |
| 376 # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to |
| 377 # https.) Such an urllib2.Request could result from this redirect |
| 378 # if you are redirecting from an http request (since an an http request |
| 379 # does not have _tunnel_host set, and thus you will not set the proxy |
| 380 # in the code below), and if you have defined a proxy for https in, say, |
| 381 # FancyProxyHandler, and that proxy has type http. |
| 382 if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request): |
| 383 if new_req.get_type() == "https": |
| 384 if req._tunnel_host: |
| 385 # req is proxied, so copy the proxy info. |
| 386 new_req._tunnel_host = new_req.get_host() |
| 387 new_req.set_proxy(req.host, "https") |
| 388 else: |
| 389 # req is not proxied, so just make sure _tunnel_host is defined. |
| 390 new_req._tunnel_host = None |
| 391 new_req.type = "https" |
| 392 if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request): |
| 393 # Copy the auxiliary data in case this or any further redirect is https |
| 394 new_req._key_file = req._key_file |
| 395 new_req._cert_file = req._cert_file |
| 396 new_req._ca_certs = req._ca_certs |
| 397 |
| 398 return new_req |
OLD | NEW |