OLD | NEW |
(Empty) | |
| 1 # urllib3/util.py |
| 2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) |
| 3 # |
| 4 # This module is part of urllib3 and is released under |
| 5 # the MIT License: http://www.opensource.org/licenses/mit-license.php |
| 6 |
| 7 |
| 8 from base64 import b64encode |
| 9 from collections import namedtuple |
| 10 from socket import error as SocketError |
| 11 from hashlib import md5, sha1 |
| 12 from binascii import hexlify, unhexlify |
| 13 |
| 14 try: |
| 15 from select import poll, POLLIN |
| 16 except ImportError: # `poll` doesn't exist on OSX and other platforms |
| 17 poll = False |
| 18 try: |
| 19 from select import select |
| 20 except ImportError: # `select` doesn't exist on AppEngine. |
| 21 select = False |
| 22 |
| 23 try: # Test for SSL features |
| 24 SSLContext = None |
| 25 HAS_SNI = False |
| 26 |
| 27 import ssl |
| 28 from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 |
| 29 from ssl import SSLContext # Modern SSL? |
| 30 from ssl import HAS_SNI # Has SNI? |
| 31 except ImportError: |
| 32 pass |
| 33 |
| 34 |
| 35 from .packages import six |
| 36 from .exceptions import LocationParseError, SSLError |
| 37 |
| 38 |
| 39 class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
'fragment'])): |
| 40 """ |
| 41 Datastructure for representing an HTTP URL. Used as a return value for |
| 42 :func:`parse_url`. |
| 43 """ |
| 44 slots = () |
| 45 |
| 46 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, qu
ery=None, fragment=None): |
| 47 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, quer
y, fragment) |
| 48 |
| 49 @property |
| 50 def hostname(self): |
| 51 """For backwards-compatibility with urlparse. We're nice like that.""" |
| 52 return self.host |
| 53 |
| 54 @property |
| 55 def request_uri(self): |
| 56 """Absolute path including the query string.""" |
| 57 uri = self.path or '/' |
| 58 |
| 59 if self.query is not None: |
| 60 uri += '?' + self.query |
| 61 |
| 62 return uri |
| 63 |
| 64 |
| 65 def split_first(s, delims): |
| 66 """ |
| 67 Given a string and an iterable of delimiters, split on the first found |
| 68 delimiter. Return two split parts and the matched delimiter. |
| 69 |
| 70 If not found, then the first part is the full input string. |
| 71 |
| 72 Example: :: |
| 73 |
| 74 >>> split_first('foo/bar?baz', '?/=') |
| 75 ('foo', 'bar?baz', '/') |
| 76 >>> split_first('foo/bar?baz', '123') |
| 77 ('foo/bar?baz', '', None) |
| 78 |
| 79 Scales linearly with number of delims. Not ideal for large number of delims. |
| 80 """ |
| 81 min_idx = None |
| 82 min_delim = None |
| 83 for d in delims: |
| 84 idx = s.find(d) |
| 85 if idx < 0: |
| 86 continue |
| 87 |
| 88 if min_idx is None or idx < min_idx: |
| 89 min_idx = idx |
| 90 min_delim = d |
| 91 |
| 92 if min_idx is None or min_idx < 0: |
| 93 return s, '', None |
| 94 |
| 95 return s[:min_idx], s[min_idx+1:], min_delim |
| 96 |
| 97 |
| 98 def parse_url(url): |
| 99 """ |
| 100 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is |
| 101 performed to parse incomplete urls. Fields not provided will be None. |
| 102 |
| 103 Partly backwards-compatible with :mod:`urlparse`. |
| 104 |
| 105 Example: :: |
| 106 |
| 107 >>> parse_url('http://google.com/mail/') |
| 108 Url(scheme='http', host='google.com', port=None, path='/', ...) |
| 109 >>> parse_url('google.com:80') |
| 110 Url(scheme=None, host='google.com', port=80, path=None, ...) |
| 111 >>> parse_url('/foo?bar') |
| 112 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) |
| 113 """ |
| 114 |
| 115 # While this code has overlap with stdlib's urlparse, it is much |
| 116 # simplified for our needs and less annoying. |
| 117 # Additionally, this imeplementations does silly things to be optimal |
| 118 # on CPython. |
| 119 |
| 120 scheme = None |
| 121 auth = None |
| 122 host = None |
| 123 port = None |
| 124 path = None |
| 125 fragment = None |
| 126 query = None |
| 127 |
| 128 # Scheme |
| 129 if '://' in url: |
| 130 scheme, url = url.split('://', 1) |
| 131 |
| 132 # Find the earliest Authority Terminator |
| 133 # (http://tools.ietf.org/html/rfc3986#section-3.2) |
| 134 url, path_, delim = split_first(url, ['/', '?', '#']) |
| 135 |
| 136 if delim: |
| 137 # Reassemble the path |
| 138 path = delim + path_ |
| 139 |
| 140 # Auth |
| 141 if '@' in url: |
| 142 auth, url = url.split('@', 1) |
| 143 |
| 144 # IPv6 |
| 145 if url and url[0] == '[': |
| 146 host, url = url[1:].split(']', 1) |
| 147 |
| 148 # Port |
| 149 if ':' in url: |
| 150 _host, port = url.split(':', 1) |
| 151 |
| 152 if not host: |
| 153 host = _host |
| 154 |
| 155 if not port.isdigit(): |
| 156 raise LocationParseError("Failed to parse: %s" % url) |
| 157 |
| 158 port = int(port) |
| 159 |
| 160 elif not host and url: |
| 161 host = url |
| 162 |
| 163 if not path: |
| 164 return Url(scheme, auth, host, port, path, query, fragment) |
| 165 |
| 166 # Fragment |
| 167 if '#' in path: |
| 168 path, fragment = path.split('#', 1) |
| 169 |
| 170 # Query |
| 171 if '?' in path: |
| 172 path, query = path.split('?', 1) |
| 173 |
| 174 return Url(scheme, auth, host, port, path, query, fragment) |
| 175 |
| 176 |
| 177 def get_host(url): |
| 178 """ |
| 179 Deprecated. Use :func:`.parse_url` instead. |
| 180 """ |
| 181 p = parse_url(url) |
| 182 return p.scheme or 'http', p.hostname, p.port |
| 183 |
| 184 |
| 185 def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, |
| 186 basic_auth=None): |
| 187 """ |
| 188 Shortcuts for generating request headers. |
| 189 |
| 190 :param keep_alive: |
| 191 If ``True``, adds 'connection: keep-alive' header. |
| 192 |
| 193 :param accept_encoding: |
| 194 Can be a boolean, list, or string. |
| 195 ``True`` translates to 'gzip,deflate'. |
| 196 List will get joined by comma. |
| 197 String will be used as provided. |
| 198 |
| 199 :param user_agent: |
| 200 String representing the user-agent you want, such as |
| 201 "python-urllib3/0.6" |
| 202 |
| 203 :param basic_auth: |
| 204 Colon-separated username:password string for 'authorization: basic ...' |
| 205 auth header. |
| 206 |
| 207 Example: :: |
| 208 |
| 209 >>> make_headers(keep_alive=True, user_agent="Batman/1.0") |
| 210 {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} |
| 211 >>> make_headers(accept_encoding=True) |
| 212 {'accept-encoding': 'gzip,deflate'} |
| 213 """ |
| 214 headers = {} |
| 215 if accept_encoding: |
| 216 if isinstance(accept_encoding, str): |
| 217 pass |
| 218 elif isinstance(accept_encoding, list): |
| 219 accept_encoding = ','.join(accept_encoding) |
| 220 else: |
| 221 accept_encoding = 'gzip,deflate' |
| 222 headers['accept-encoding'] = accept_encoding |
| 223 |
| 224 if user_agent: |
| 225 headers['user-agent'] = user_agent |
| 226 |
| 227 if keep_alive: |
| 228 headers['connection'] = 'keep-alive' |
| 229 |
| 230 if basic_auth: |
| 231 headers['authorization'] = 'Basic ' + \ |
| 232 b64encode(six.b(basic_auth)).decode('utf-8') |
| 233 |
| 234 return headers |
| 235 |
| 236 |
| 237 def is_connection_dropped(conn): # Platform-specific |
| 238 """ |
| 239 Returns True if the connection is dropped and should be closed. |
| 240 |
| 241 :param conn: |
| 242 :class:`httplib.HTTPConnection` object. |
| 243 |
| 244 Note: For platforms like AppEngine, this will always return ``False`` to |
| 245 let the platform handle connection recycling transparently for us. |
| 246 """ |
| 247 sock = getattr(conn, 'sock', False) |
| 248 if not sock: # Platform-specific: AppEngine |
| 249 return False |
| 250 |
| 251 if not poll: |
| 252 if not select: # Platform-specific: AppEngine |
| 253 return False |
| 254 |
| 255 try: |
| 256 return select([sock], [], [], 0.0)[0] |
| 257 except SocketError: |
| 258 return True |
| 259 |
| 260 # This version is better on platforms that support it. |
| 261 p = poll() |
| 262 p.register(sock, POLLIN) |
| 263 for (fno, ev) in p.poll(0.0): |
| 264 if fno == sock.fileno(): |
| 265 # Either data is buffered (bad), or the connection is dropped. |
| 266 return True |
| 267 |
| 268 |
| 269 def resolve_cert_reqs(candidate): |
| 270 """ |
| 271 Resolves the argument to a numeric constant, which can be passed to |
| 272 the wrap_socket function/method from the ssl module. |
| 273 Defaults to :data:`ssl.CERT_NONE`. |
| 274 If given a string it is assumed to be the name of the constant in the |
| 275 :mod:`ssl` module or its abbrevation. |
| 276 (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. |
| 277 If it's neither `None` nor a string we assume it is already the numeric |
| 278 constant which can directly be passed to wrap_socket. |
| 279 """ |
| 280 if candidate is None: |
| 281 return CERT_NONE |
| 282 |
| 283 if isinstance(candidate, str): |
| 284 res = getattr(ssl, candidate, None) |
| 285 if res is None: |
| 286 res = getattr(ssl, 'CERT_' + candidate) |
| 287 return res |
| 288 |
| 289 return candidate |
| 290 |
| 291 |
| 292 def resolve_ssl_version(candidate): |
| 293 """ |
| 294 like resolve_cert_reqs |
| 295 """ |
| 296 if candidate is None: |
| 297 return PROTOCOL_SSLv23 |
| 298 |
| 299 if isinstance(candidate, str): |
| 300 res = getattr(ssl, candidate, None) |
| 301 if res is None: |
| 302 res = getattr(ssl, 'PROTOCOL_' + candidate) |
| 303 return res |
| 304 |
| 305 return candidate |
| 306 |
| 307 |
| 308 def assert_fingerprint(cert, fingerprint): |
| 309 """ |
| 310 Checks if given fingerprint matches the supplied certificate. |
| 311 |
| 312 :param cert: |
| 313 Certificate as bytes object. |
| 314 :param fingerprint: |
| 315 Fingerprint as string of hexdigits, can be interspersed by colons. |
| 316 """ |
| 317 |
| 318 # Maps the length of a digest to a possible hash function producing |
| 319 # this digest. |
| 320 hashfunc_map = { |
| 321 16: md5, |
| 322 20: sha1 |
| 323 } |
| 324 |
| 325 fingerprint = fingerprint.replace(':', '').lower() |
| 326 |
| 327 digest_length, rest = divmod(len(fingerprint), 2) |
| 328 |
| 329 if rest or digest_length not in hashfunc_map: |
| 330 raise SSLError('Fingerprint is of invalid length.') |
| 331 |
| 332 # We need encode() here for py32; works on py2 and p33. |
| 333 fingerprint_bytes = unhexlify(fingerprint.encode()) |
| 334 |
| 335 hashfunc = hashfunc_map[digest_length] |
| 336 |
| 337 cert_digest = hashfunc(cert).digest() |
| 338 |
| 339 if not cert_digest == fingerprint_bytes: |
| 340 raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' |
| 341 .format(hexlify(fingerprint_bytes), |
| 342 hexlify(cert_digest))) |
| 343 |
| 344 |
| 345 if SSLContext is not None: # Python 3.2+ |
| 346 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, |
| 347 ca_certs=None, server_hostname=None, |
| 348 ssl_version=None): |
| 349 """ |
| 350 All arguments except `server_hostname` have the same meaning as for |
| 351 :func:`ssl.wrap_socket` |
| 352 |
| 353 :param server_hostname: |
| 354 Hostname of the expected certificate |
| 355 """ |
| 356 context = SSLContext(ssl_version) |
| 357 context.verify_mode = cert_reqs |
| 358 if ca_certs: |
| 359 try: |
| 360 context.load_verify_locations(ca_certs) |
| 361 # Py32 raises IOError |
| 362 # Py33 raises FileNotFoundError |
| 363 except Exception as e: # Reraise as SSLError |
| 364 raise SSLError(e) |
| 365 if certfile: |
| 366 # FIXME: This block needs a test. |
| 367 context.load_cert_chain(certfile, keyfile) |
| 368 if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI |
| 369 return context.wrap_socket(sock, server_hostname=server_hostname) |
| 370 return context.wrap_socket(sock) |
| 371 |
| 372 else: # Python 3.1 and earlier |
| 373 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, |
| 374 ca_certs=None, server_hostname=None, |
| 375 ssl_version=None): |
| 376 return wrap_socket(sock, keyfile=keyfile, certfile=certfile, |
| 377 ca_certs=ca_certs, cert_reqs=cert_reqs, |
| 378 ssl_version=ssl_version) |
OLD | NEW |