OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 |
| 3 """ |
| 4 requests.utils |
| 5 ~~~~~~~~~~~~~~ |
| 6 |
| 7 This module provides utility functions that are used within Requests |
| 8 that are also useful for external consumption. |
| 9 |
| 10 """ |
| 11 |
| 12 import cgi |
| 13 import codecs |
| 14 import collections |
| 15 import os |
| 16 import platform |
| 17 import re |
| 18 import sys |
| 19 from netrc import netrc, NetrcParseError |
| 20 |
| 21 from . import __version__ |
| 22 from . import certs |
| 23 from .compat import parse_http_list as _parse_list_header |
| 24 from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse |
| 25 from .cookies import RequestsCookieJar, cookiejar_from_dict |
| 26 from .structures import CaseInsensitiveDict |
| 27 |
| 28 _hush_pyflakes = (RequestsCookieJar,) |
| 29 |
| 30 NETRC_FILES = ('.netrc', '_netrc') |
| 31 |
| 32 DEFAULT_CA_BUNDLE_PATH = certs.where() |
| 33 |
| 34 |
| 35 def dict_to_sequence(d): |
| 36 """Returns an internal sequence dictionary update.""" |
| 37 |
| 38 if hasattr(d, 'items'): |
| 39 d = d.items() |
| 40 |
| 41 return d |
| 42 |
| 43 |
| 44 def super_len(o): |
| 45 if hasattr(o, '__len__'): |
| 46 return len(o) |
| 47 if hasattr(o, 'len'): |
| 48 return o.len |
| 49 if hasattr(o, 'fileno'): |
| 50 return os.fstat(o.fileno()).st_size |
| 51 |
| 52 |
| 53 def get_netrc_auth(url): |
| 54 """Returns the Requests tuple auth for a given url from netrc.""" |
| 55 |
| 56 try: |
| 57 locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) |
| 58 netrc_path = None |
| 59 |
| 60 for loc in locations: |
| 61 if os.path.exists(loc) and not netrc_path: |
| 62 netrc_path = loc |
| 63 |
| 64 # Abort early if there isn't one. |
| 65 if netrc_path is None: |
| 66 return netrc_path |
| 67 |
| 68 ri = urlparse(url) |
| 69 |
| 70 # Strip port numbers from netloc |
| 71 host = ri.netloc.split(':')[0] |
| 72 |
| 73 try: |
| 74 _netrc = netrc(netrc_path).authenticators(host) |
| 75 if _netrc: |
| 76 # Return with login / password |
| 77 login_i = (0 if _netrc[0] else 1) |
| 78 return (_netrc[login_i], _netrc[2]) |
| 79 except (NetrcParseError, IOError): |
| 80 # If there was a parsing error or a permissions issue reading the fi
le, |
| 81 # we'll just skip netrc auth |
| 82 pass |
| 83 |
| 84 # AppEngine hackiness. |
| 85 except (ImportError, AttributeError): |
| 86 pass |
| 87 |
| 88 |
| 89 def guess_filename(obj): |
| 90 """Tries to guess the filename of the given object.""" |
| 91 name = getattr(obj, 'name', None) |
| 92 if name and name[0] != '<' and name[-1] != '>': |
| 93 return os.path.basename(name) |
| 94 |
| 95 |
| 96 def from_key_val_list(value): |
| 97 """Take an object and test to see if it can be represented as a |
| 98 dictionary. Unless it can not be represented as such, return an |
| 99 OrderedDict, e.g., |
| 100 |
| 101 :: |
| 102 |
| 103 >>> from_key_val_list([('key', 'val')]) |
| 104 OrderedDict([('key', 'val')]) |
| 105 >>> from_key_val_list('string') |
| 106 ValueError: need more than 1 value to unpack |
| 107 >>> from_key_val_list({'key': 'val'}) |
| 108 OrderedDict([('key', 'val')]) |
| 109 """ |
| 110 if value is None: |
| 111 return None |
| 112 |
| 113 if isinstance(value, (str, bytes, bool, int)): |
| 114 raise ValueError('cannot encode objects that are not 2-tuples') |
| 115 |
| 116 return OrderedDict(value) |
| 117 |
| 118 |
| 119 def to_key_val_list(value): |
| 120 """Take an object and test to see if it can be represented as a |
| 121 dictionary. If it can be, return a list of tuples, e.g., |
| 122 |
| 123 :: |
| 124 |
| 125 >>> to_key_val_list([('key', 'val')]) |
| 126 [('key', 'val')] |
| 127 >>> to_key_val_list({'key': 'val'}) |
| 128 [('key', 'val')] |
| 129 >>> to_key_val_list('string') |
| 130 ValueError: cannot encode objects that are not 2-tuples. |
| 131 """ |
| 132 if value is None: |
| 133 return None |
| 134 |
| 135 if isinstance(value, (str, bytes, bool, int)): |
| 136 raise ValueError('cannot encode objects that are not 2-tuples') |
| 137 |
| 138 if isinstance(value, collections.Mapping): |
| 139 value = value.items() |
| 140 |
| 141 return list(value) |
| 142 |
| 143 |
| 144 # From mitsuhiko/werkzeug (used with permission). |
| 145 def parse_list_header(value): |
| 146 """Parse lists as described by RFC 2068 Section 2. |
| 147 |
| 148 In particular, parse comma-separated lists where the elements of |
| 149 the list may include quoted-strings. A quoted-string could |
| 150 contain a comma. A non-quoted string could have quotes in the |
| 151 middle. Quotes are removed automatically after parsing. |
| 152 |
| 153 It basically works like :func:`parse_set_header` just that items |
| 154 may appear multiple times and case sensitivity is preserved. |
| 155 |
| 156 The return value is a standard :class:`list`: |
| 157 |
| 158 >>> parse_list_header('token, "quoted value"') |
| 159 ['token', 'quoted value'] |
| 160 |
| 161 To create a header from the :class:`list` again, use the |
| 162 :func:`dump_header` function. |
| 163 |
| 164 :param value: a string with a list header. |
| 165 :return: :class:`list` |
| 166 """ |
| 167 result = [] |
| 168 for item in _parse_list_header(value): |
| 169 if item[:1] == item[-1:] == '"': |
| 170 item = unquote_header_value(item[1:-1]) |
| 171 result.append(item) |
| 172 return result |
| 173 |
| 174 |
| 175 # From mitsuhiko/werkzeug (used with permission). |
| 176 def parse_dict_header(value): |
| 177 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and |
| 178 convert them into a python dict: |
| 179 |
| 180 >>> d = parse_dict_header('foo="is a fish", bar="as well"') |
| 181 >>> type(d) is dict |
| 182 True |
| 183 >>> sorted(d.items()) |
| 184 [('bar', 'as well'), ('foo', 'is a fish')] |
| 185 |
| 186 If there is no value for a key it will be `None`: |
| 187 |
| 188 >>> parse_dict_header('key_without_value') |
| 189 {'key_without_value': None} |
| 190 |
| 191 To create a header from the :class:`dict` again, use the |
| 192 :func:`dump_header` function. |
| 193 |
| 194 :param value: a string with a dict header. |
| 195 :return: :class:`dict` |
| 196 """ |
| 197 result = {} |
| 198 for item in _parse_list_header(value): |
| 199 if '=' not in item: |
| 200 result[item] = None |
| 201 continue |
| 202 name, value = item.split('=', 1) |
| 203 if value[:1] == value[-1:] == '"': |
| 204 value = unquote_header_value(value[1:-1]) |
| 205 result[name] = value |
| 206 return result |
| 207 |
| 208 |
| 209 # From mitsuhiko/werkzeug (used with permission). |
| 210 def unquote_header_value(value, is_filename=False): |
| 211 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). |
| 212 This does not use the real unquoting but what browsers are actually |
| 213 using for quoting. |
| 214 |
| 215 :param value: the header value to unquote. |
| 216 """ |
| 217 if value and value[0] == value[-1] == '"': |
| 218 # this is not the real unquoting, but fixing this so that the |
| 219 # RFC is met will result in bugs with internet explorer and |
| 220 # probably some other browsers as well. IE for example is |
| 221 # uploading files with "C:\foo\bar.txt" as filename |
| 222 value = value[1:-1] |
| 223 |
| 224 # if this is a filename and the starting characters look like |
| 225 # a UNC path, then just return the value without quotes. Using the |
| 226 # replace sequence below on a UNC path has the effect of turning |
| 227 # the leading double slash into a single slash and then |
| 228 # _fix_ie_filename() doesn't work correctly. See #458. |
| 229 if not is_filename or value[:2] != '\\\\': |
| 230 return value.replace('\\\\', '\\').replace('\\"', '"') |
| 231 return value |
| 232 |
| 233 |
| 234 def dict_from_cookiejar(cj): |
| 235 """Returns a key/value dictionary from a CookieJar. |
| 236 |
| 237 :param cj: CookieJar object to extract cookies from. |
| 238 """ |
| 239 |
| 240 cookie_dict = {} |
| 241 |
| 242 for cookie in cj: |
| 243 cookie_dict[cookie.name] = cookie.value |
| 244 |
| 245 return cookie_dict |
| 246 |
| 247 |
| 248 def add_dict_to_cookiejar(cj, cookie_dict): |
| 249 """Returns a CookieJar from a key/value dictionary. |
| 250 |
| 251 :param cj: CookieJar to insert cookies into. |
| 252 :param cookie_dict: Dict of key/values to insert into CookieJar. |
| 253 """ |
| 254 |
| 255 cj2 = cookiejar_from_dict(cookie_dict) |
| 256 cj.update(cj2) |
| 257 return cj |
| 258 |
| 259 |
| 260 def get_encodings_from_content(content): |
| 261 """Returns encodings from given content string. |
| 262 |
| 263 :param content: bytestring to extract encodings from. |
| 264 """ |
| 265 |
| 266 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) |
| 267 |
| 268 return charset_re.findall(content) |
| 269 |
| 270 |
| 271 def get_encoding_from_headers(headers): |
| 272 """Returns encodings from given HTTP Header Dict. |
| 273 |
| 274 :param headers: dictionary to extract encoding from. |
| 275 """ |
| 276 |
| 277 content_type = headers.get('content-type') |
| 278 |
| 279 if not content_type: |
| 280 return None |
| 281 |
| 282 content_type, params = cgi.parse_header(content_type) |
| 283 |
| 284 if 'charset' in params: |
| 285 return params['charset'].strip("'\"") |
| 286 |
| 287 if 'text' in content_type: |
| 288 return 'ISO-8859-1' |
| 289 |
| 290 |
| 291 def stream_decode_response_unicode(iterator, r): |
| 292 """Stream decodes a iterator.""" |
| 293 |
| 294 if r.encoding is None: |
| 295 for item in iterator: |
| 296 yield item |
| 297 return |
| 298 |
| 299 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') |
| 300 for chunk in iterator: |
| 301 rv = decoder.decode(chunk) |
| 302 if rv: |
| 303 yield rv |
| 304 rv = decoder.decode('', final=True) |
| 305 if rv: |
| 306 yield rv |
| 307 |
| 308 |
| 309 def iter_slices(string, slice_length): |
| 310 """Iterate over slices of a string.""" |
| 311 pos = 0 |
| 312 while pos < len(string): |
| 313 yield string[pos:pos + slice_length] |
| 314 pos += slice_length |
| 315 |
| 316 |
| 317 def get_unicode_from_response(r): |
| 318 """Returns the requested content back in unicode. |
| 319 |
| 320 :param r: Response object to get unicode content from. |
| 321 |
| 322 Tried: |
| 323 |
| 324 1. charset from content-type |
| 325 |
| 326 2. every encodings from ``<meta ... charset=XXX>`` |
| 327 |
| 328 3. fall back and replace all unicode characters |
| 329 |
| 330 """ |
| 331 |
| 332 tried_encodings = [] |
| 333 |
| 334 # Try charset from content-type |
| 335 encoding = get_encoding_from_headers(r.headers) |
| 336 |
| 337 if encoding: |
| 338 try: |
| 339 return str(r.content, encoding) |
| 340 except UnicodeError: |
| 341 tried_encodings.append(encoding) |
| 342 |
| 343 # Fall back: |
| 344 try: |
| 345 return str(r.content, encoding, errors='replace') |
| 346 except TypeError: |
| 347 return r.content |
| 348 |
| 349 |
| 350 # The unreserved URI characters (RFC 3986) |
| 351 UNRESERVED_SET = frozenset( |
| 352 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
| 353 + "0123456789-._~") |
| 354 |
| 355 |
| 356 def unquote_unreserved(uri): |
| 357 """Un-escape any percent-escape sequences in a URI that are unreserved |
| 358 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. |
| 359 """ |
| 360 parts = uri.split('%') |
| 361 for i in range(1, len(parts)): |
| 362 h = parts[i][0:2] |
| 363 if len(h) == 2 and h.isalnum(): |
| 364 c = chr(int(h, 16)) |
| 365 if c in UNRESERVED_SET: |
| 366 parts[i] = c + parts[i][2:] |
| 367 else: |
| 368 parts[i] = '%' + parts[i] |
| 369 else: |
| 370 parts[i] = '%' + parts[i] |
| 371 return ''.join(parts) |
| 372 |
| 373 |
| 374 def requote_uri(uri): |
| 375 """Re-quote the given URI. |
| 376 |
| 377 This function passes the given URI through an unquote/quote cycle to |
| 378 ensure that it is fully and consistently quoted. |
| 379 """ |
| 380 # Unquote only the unreserved characters |
| 381 # Then quote only illegal characters (do not quote reserved, unreserved, |
| 382 # or '%') |
| 383 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") |
| 384 |
| 385 |
| 386 def get_environ_proxies(url): |
| 387 """Return a dict of environment proxies.""" |
| 388 |
| 389 proxy_keys = [ |
| 390 'all', |
| 391 'http', |
| 392 'https', |
| 393 'ftp', |
| 394 'socks' |
| 395 ] |
| 396 |
| 397 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) |
| 398 |
| 399 # First check whether no_proxy is defined. If it is, check that the URL |
| 400 # we're getting isn't in the no_proxy list. |
| 401 no_proxy = get_proxy('no_proxy') |
| 402 |
| 403 if no_proxy: |
| 404 # We need to check whether we match here. We need to see if we match |
| 405 # the end of the netloc, both with and without the port. |
| 406 no_proxy = no_proxy.split(',') |
| 407 netloc = urlparse(url).netloc |
| 408 |
| 409 for host in no_proxy: |
| 410 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): |
| 411 # The URL does match something in no_proxy, so we don't want |
| 412 # to apply the proxies on this URL. |
| 413 return {} |
| 414 |
| 415 # If we get here, we either didn't have no_proxy set or we're not going |
| 416 # anywhere that no_proxy applies to. |
| 417 proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] |
| 418 return dict([(key, val) for (key, val) in proxies if val]) |
| 419 |
| 420 |
| 421 def default_user_agent(): |
| 422 """Return a string representing the default user agent.""" |
| 423 _implementation = platform.python_implementation() |
| 424 |
| 425 if _implementation == 'CPython': |
| 426 _implementation_version = platform.python_version() |
| 427 elif _implementation == 'PyPy': |
| 428 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, |
| 429 sys.pypy_version_info.minor, |
| 430 sys.pypy_version_info.micro) |
| 431 if sys.pypy_version_info.releaselevel != 'final': |
| 432 _implementation_version = ''.join([_implementation_version, sys.pypy
_version_info.releaselevel]) |
| 433 elif _implementation == 'Jython': |
| 434 _implementation_version = platform.python_version() # Complete Guess |
| 435 elif _implementation == 'IronPython': |
| 436 _implementation_version = platform.python_version() # Complete Guess |
| 437 else: |
| 438 _implementation_version = 'Unknown' |
| 439 |
| 440 try: |
| 441 p_system = platform.system() |
| 442 p_release = platform.release() |
| 443 except IOError: |
| 444 p_system = 'Unknown' |
| 445 p_release = 'Unknown' |
| 446 |
| 447 return " ".join(['python-requests/%s' % __version__, |
| 448 '%s/%s' % (_implementation, _implementation_version), |
| 449 '%s/%s' % (p_system, p_release)]) |
| 450 |
| 451 |
| 452 def default_headers(): |
| 453 return CaseInsensitiveDict({ |
| 454 'User-Agent': default_user_agent(), |
| 455 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), |
| 456 'Accept': '*/*' |
| 457 }) |
| 458 |
| 459 |
| 460 def parse_header_links(value): |
| 461 """Return a dict of parsed link headers proxies. |
| 462 |
| 463 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../
back.jpeg>; rel=back;type="image/jpeg" |
| 464 |
| 465 """ |
| 466 |
| 467 links = [] |
| 468 |
| 469 replace_chars = " '\"" |
| 470 |
| 471 for val in value.split(","): |
| 472 try: |
| 473 url, params = val.split(";", 1) |
| 474 except ValueError: |
| 475 url, params = val, '' |
| 476 |
| 477 link = {} |
| 478 |
| 479 link["url"] = url.strip("<> '\"") |
| 480 |
| 481 for param in params.split(";"): |
| 482 try: |
| 483 key, value = param.split("=") |
| 484 except ValueError: |
| 485 break |
| 486 |
| 487 link[key.strip(replace_chars)] = value.strip(replace_chars) |
| 488 |
| 489 links.append(link) |
| 490 |
| 491 return links |
| 492 |
| 493 |
| 494 # Null bytes; no need to recreate these on each call to guess_json_utf |
| 495 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 |
| 496 _null2 = _null * 2 |
| 497 _null3 = _null * 3 |
| 498 |
| 499 |
| 500 def guess_json_utf(data): |
| 501 # JSON always starts with two ASCII characters, so detection is as |
| 502 # easy as counting the nulls and from their location and count |
| 503 # determine the encoding. Also detect a BOM, if present. |
| 504 sample = data[:4] |
| 505 if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE): |
| 506 return 'utf-32' # BOM included |
| 507 if sample[:3] == codecs.BOM_UTF8: |
| 508 return 'utf-8-sig' # BOM included, MS style (discouraged) |
| 509 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): |
| 510 return 'utf-16' # BOM included |
| 511 nullcount = sample.count(_null) |
| 512 if nullcount == 0: |
| 513 return 'utf-8' |
| 514 if nullcount == 2: |
| 515 if sample[::2] == _null2: # 1st and 3rd are null |
| 516 return 'utf-16-be' |
| 517 if sample[1::2] == _null2: # 2nd and 4th are null |
| 518 return 'utf-16-le' |
| 519 # Did not detect 2 valid UTF-16 ascii-range characters |
| 520 if nullcount == 3: |
| 521 if sample[:3] == _null3: |
| 522 return 'utf-32-be' |
| 523 if sample[1:] == _null3: |
| 524 return 'utf-32-le' |
| 525 # Did not detect a valid UTF-32 ascii-range character |
| 526 return None |
| 527 |
| 528 |
| 529 def prepend_scheme_if_needed(url, new_scheme): |
| 530 '''Given a URL that may or may not have a scheme, prepend the given scheme. |
| 531 Does not replace a present scheme with the one provided as an argument.''' |
| 532 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) |
| 533 |
| 534 # urlparse is a finicky beast, and sometimes decides that there isn't a |
| 535 # netloc present. Assume that it's being over-cautious, and switch netloc |
| 536 # and path if urlparse decided there was no netloc. |
| 537 if not netloc: |
| 538 netloc, path = path, netloc |
| 539 |
| 540 return urlunparse((scheme, netloc, path, params, query, fragment)) |
| 541 |
| 542 |
| 543 def get_auth_from_url(url): |
| 544 """Given a url with authentication components, extract them into a tuple of |
| 545 username,password.""" |
| 546 if url: |
| 547 parsed = urlparse(url) |
| 548 return (parsed.username, parsed.password) |
| 549 else: |
| 550 return ('', '') |
OLD | NEW |