OLD | NEW |
(Empty) | |
| 1 """HTTP library functions. |
| 2 |
| 3 This module contains functions for building an HTTP application |
| 4 framework: any one, not just one whose name starts with "Ch". ;) If you |
| 5 reference any modules from some popular framework inside *this* module, |
| 6 FuManChu will personally hang you up by your thumbs and submit you |
| 7 to a public caning. |
| 8 """ |
| 9 |
| 10 from binascii import b2a_base64 |
| 11 from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, rev
ersed, sorted |
| 12 from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr, unicod
estr, unquote_qs |
| 13 response_codes = BaseHTTPRequestHandler.responses.copy() |
| 14 |
| 15 # From http://www.cherrypy.org/ticket/361 |
| 16 response_codes[500] = ('Internal Server Error', |
| 17 'The server encountered an unexpected condition ' |
| 18 'which prevented it from fulfilling the request.') |
| 19 response_codes[503] = ('Service Unavailable', |
| 20 'The server is currently unable to handle the ' |
| 21 'request due to a temporary overloading or ' |
| 22 'maintenance of the server.') |
| 23 |
| 24 import re |
| 25 import urllib |
| 26 |
| 27 |
| 28 |
| 29 def urljoin(*atoms): |
| 30 """Return the given path \*atoms, joined into a single URL. |
| 31 |
| 32 This will correctly join a SCRIPT_NAME and PATH_INFO into the |
| 33 original URL, even if either atom is blank. |
| 34 """ |
| 35 url = "/".join([x for x in atoms if x]) |
| 36 while "//" in url: |
| 37 url = url.replace("//", "/") |
| 38 # Special-case the final url of "", and return "/" instead. |
| 39 return url or "/" |
| 40 |
| 41 def urljoin_bytes(*atoms): |
| 42 """Return the given path *atoms, joined into a single URL. |
| 43 |
| 44 This will correctly join a SCRIPT_NAME and PATH_INFO into the |
| 45 original URL, even if either atom is blank. |
| 46 """ |
| 47 url = ntob("/").join([x for x in atoms if x]) |
| 48 while ntob("//") in url: |
| 49 url = url.replace(ntob("//"), ntob("/")) |
| 50 # Special-case the final url of "", and return "/" instead. |
| 51 return url or ntob("/") |
| 52 |
| 53 def protocol_from_http(protocol_str): |
| 54 """Return a protocol tuple from the given 'HTTP/x.y' string.""" |
| 55 return int(protocol_str[5]), int(protocol_str[7]) |
| 56 |
| 57 def get_ranges(headervalue, content_length): |
| 58 """Return a list of (start, stop) indices from a Range header, or None. |
| 59 |
| 60 Each (start, stop) tuple will be composed of two ints, which are suitable |
| 61 for use in a slicing operation. That is, the header "Range: bytes=3-6", |
| 62 if applied against a Python string, is requesting resource[3:7]. This |
| 63 function will return the list [(3, 7)]. |
| 64 |
| 65 If this function returns an empty list, you should return HTTP 416. |
| 66 """ |
| 67 |
| 68 if not headervalue: |
| 69 return None |
| 70 |
| 71 result = [] |
| 72 bytesunit, byteranges = headervalue.split("=", 1) |
| 73 for brange in byteranges.split(","): |
| 74 start, stop = [x.strip() for x in brange.split("-", 1)] |
| 75 if start: |
| 76 if not stop: |
| 77 stop = content_length - 1 |
| 78 start, stop = int(start), int(stop) |
| 79 if start >= content_length: |
| 80 # From rfc 2616 sec 14.16: |
| 81 # "If the server receives a request (other than one |
| 82 # including an If-Range request-header field) with an |
| 83 # unsatisfiable Range request-header field (that is, |
| 84 # all of whose byte-range-spec values have a first-byte-pos |
| 85 # value greater than the current length of the selected |
| 86 # resource), it SHOULD return a response code of 416 |
| 87 # (Requested range not satisfiable)." |
| 88 continue |
| 89 if stop < start: |
| 90 # From rfc 2616 sec 14.16: |
| 91 # "If the server ignores a byte-range-spec because it |
| 92 # is syntactically invalid, the server SHOULD treat |
| 93 # the request as if the invalid Range header field |
| 94 # did not exist. (Normally, this means return a 200 |
| 95 # response containing the full entity)." |
| 96 return None |
| 97 result.append((start, stop + 1)) |
| 98 else: |
| 99 if not stop: |
| 100 # See rfc quote above. |
| 101 return None |
| 102 # Negative subscript (last N bytes) |
| 103 result.append((content_length - int(stop), content_length)) |
| 104 |
| 105 return result |
| 106 |
| 107 |
| 108 class HeaderElement(object): |
| 109 """An element (with parameters) from an HTTP header's element list.""" |
| 110 |
| 111 def __init__(self, value, params=None): |
| 112 self.value = value |
| 113 if params is None: |
| 114 params = {} |
| 115 self.params = params |
| 116 |
| 117 def __cmp__(self, other): |
| 118 return cmp(self.value, other.value) |
| 119 |
| 120 def __lt__(self, other): |
| 121 return self.value < other.value |
| 122 |
| 123 def __str__(self): |
| 124 p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] |
| 125 return "%s%s" % (self.value, "".join(p)) |
| 126 |
| 127 def __bytes__(self): |
| 128 return ntob(self.__str__()) |
| 129 |
| 130 def __unicode__(self): |
| 131 return ntou(self.__str__()) |
| 132 |
| 133 def parse(elementstr): |
| 134 """Transform 'token;key=val' to ('token', {'key': 'val'}).""" |
| 135 # Split the element into a value and parameters. The 'value' may |
| 136 # be of the form, "token=token", but we don't split that here. |
| 137 atoms = [x.strip() for x in elementstr.split(";") if x.strip()] |
| 138 if not atoms: |
| 139 initial_value = '' |
| 140 else: |
| 141 initial_value = atoms.pop(0).strip() |
| 142 params = {} |
| 143 for atom in atoms: |
| 144 atom = [x.strip() for x in atom.split("=", 1) if x.strip()] |
| 145 key = atom.pop(0) |
| 146 if atom: |
| 147 val = atom[0] |
| 148 else: |
| 149 val = "" |
| 150 params[key] = val |
| 151 return initial_value, params |
| 152 parse = staticmethod(parse) |
| 153 |
| 154 def from_str(cls, elementstr): |
| 155 """Construct an instance from a string of the form 'token;key=val'.""" |
| 156 ival, params = cls.parse(elementstr) |
| 157 return cls(ival, params) |
| 158 from_str = classmethod(from_str) |
| 159 |
| 160 |
| 161 q_separator = re.compile(r'; *q *=') |
| 162 |
| 163 class AcceptElement(HeaderElement): |
| 164 """An element (with parameters) from an Accept* header's element list. |
| 165 |
| 166 AcceptElement objects are comparable; the more-preferred object will be |
| 167 "less than" the less-preferred object. They are also therefore sortable; |
| 168 if you sort a list of AcceptElement objects, they will be listed in |
| 169 priority order; the most preferred value will be first. Yes, it should |
| 170 have been the other way around, but it's too late to fix now. |
| 171 """ |
| 172 |
| 173 def from_str(cls, elementstr): |
| 174 qvalue = None |
| 175 # The first "q" parameter (if any) separates the initial |
| 176 # media-range parameter(s) (if any) from the accept-params. |
| 177 atoms = q_separator.split(elementstr, 1) |
| 178 media_range = atoms.pop(0).strip() |
| 179 if atoms: |
| 180 # The qvalue for an Accept header can have extensions. The other |
| 181 # headers cannot, but it's easier to parse them as if they did. |
| 182 qvalue = HeaderElement.from_str(atoms[0].strip()) |
| 183 |
| 184 media_type, params = cls.parse(media_range) |
| 185 if qvalue is not None: |
| 186 params["q"] = qvalue |
| 187 return cls(media_type, params) |
| 188 from_str = classmethod(from_str) |
| 189 |
| 190 def qvalue(self): |
| 191 val = self.params.get("q", "1") |
| 192 if isinstance(val, HeaderElement): |
| 193 val = val.value |
| 194 return float(val) |
| 195 qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") |
| 196 |
| 197 def __cmp__(self, other): |
| 198 diff = cmp(self.qvalue, other.qvalue) |
| 199 if diff == 0: |
| 200 diff = cmp(str(self), str(other)) |
| 201 return diff |
| 202 |
| 203 def __lt__(self, other): |
| 204 if self.qvalue == other.qvalue: |
| 205 return str(self) < str(other) |
| 206 else: |
| 207 return self.qvalue < other.qvalue |
| 208 |
| 209 |
| 210 def header_elements(fieldname, fieldvalue): |
| 211 """Return a sorted HeaderElement list from a comma-separated header string."
"" |
| 212 if not fieldvalue: |
| 213 return [] |
| 214 |
| 215 result = [] |
| 216 for element in fieldvalue.split(","): |
| 217 if fieldname.startswith("Accept") or fieldname == 'TE': |
| 218 hv = AcceptElement.from_str(element) |
| 219 else: |
| 220 hv = HeaderElement.from_str(element) |
| 221 result.append(hv) |
| 222 |
| 223 return list(reversed(sorted(result))) |
| 224 |
| 225 def decode_TEXT(value): |
| 226 r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" |
| 227 try: |
| 228 # Python 3 |
| 229 from email.header import decode_header |
| 230 except ImportError: |
| 231 from email.Header import decode_header |
| 232 atoms = decode_header(value) |
| 233 decodedvalue = "" |
| 234 for atom, charset in atoms: |
| 235 if charset is not None: |
| 236 atom = atom.decode(charset) |
| 237 decodedvalue += atom |
| 238 return decodedvalue |
| 239 |
| 240 def valid_status(status): |
| 241 """Return legal HTTP status Code, Reason-phrase and Message. |
| 242 |
| 243 The status arg must be an int, or a str that begins with an int. |
| 244 |
| 245 If status is an int, or a str and no reason-phrase is supplied, |
| 246 a default reason-phrase will be provided. |
| 247 """ |
| 248 |
| 249 if not status: |
| 250 status = 200 |
| 251 |
| 252 status = str(status) |
| 253 parts = status.split(" ", 1) |
| 254 if len(parts) == 1: |
| 255 # No reason supplied. |
| 256 code, = parts |
| 257 reason = None |
| 258 else: |
| 259 code, reason = parts |
| 260 reason = reason.strip() |
| 261 |
| 262 try: |
| 263 code = int(code) |
| 264 except ValueError: |
| 265 raise ValueError("Illegal response status from server " |
| 266 "(%s is non-numeric)." % repr(code)) |
| 267 |
| 268 if code < 100 or code > 599: |
| 269 raise ValueError("Illegal response status from server " |
| 270 "(%s is out of range)." % repr(code)) |
| 271 |
| 272 if code not in response_codes: |
| 273 # code is unknown but not illegal |
| 274 default_reason, message = "", "" |
| 275 else: |
| 276 default_reason, message = response_codes[code] |
| 277 |
| 278 if reason is None: |
| 279 reason = default_reason |
| 280 |
| 281 return code, reason, message |
| 282 |
| 283 |
| 284 # NOTE: the parse_qs functions that follow are modified version of those |
| 285 # in the python3.0 source - we need to pass through an encoding to the unquote |
| 286 # method, but the default parse_qs function doesn't allow us to. These do. |
| 287 |
| 288 def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): |
| 289 """Parse a query given as a string argument. |
| 290 |
| 291 Arguments: |
| 292 |
| 293 qs: URL-encoded query string to be parsed |
| 294 |
| 295 keep_blank_values: flag indicating whether blank values in |
| 296 URL encoded queries should be treated as blank strings. A |
| 297 true value indicates that blanks should be retained as blank |
| 298 strings. The default false value indicates that blank values |
| 299 are to be ignored and treated as if they were not included. |
| 300 |
| 301 strict_parsing: flag indicating what to do with parsing errors. If |
| 302 false (the default), errors are silently ignored. If true, |
| 303 errors raise a ValueError exception. |
| 304 |
| 305 Returns a dict, as G-d intended. |
| 306 """ |
| 307 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] |
| 308 d = {} |
| 309 for name_value in pairs: |
| 310 if not name_value and not strict_parsing: |
| 311 continue |
| 312 nv = name_value.split('=', 1) |
| 313 if len(nv) != 2: |
| 314 if strict_parsing: |
| 315 raise ValueError("bad query field: %r" % (name_value,)) |
| 316 # Handle case of a control-name with no equal sign |
| 317 if keep_blank_values: |
| 318 nv.append('') |
| 319 else: |
| 320 continue |
| 321 if len(nv[1]) or keep_blank_values: |
| 322 name = unquote_qs(nv[0], encoding) |
| 323 value = unquote_qs(nv[1], encoding) |
| 324 if name in d: |
| 325 if not isinstance(d[name], list): |
| 326 d[name] = [d[name]] |
| 327 d[name].append(value) |
| 328 else: |
| 329 d[name] = value |
| 330 return d |
| 331 |
| 332 |
| 333 image_map_pattern = re.compile(r"[0-9]+,[0-9]+") |
| 334 |
| 335 def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): |
| 336 """Build a params dictionary from a query_string. |
| 337 |
| 338 Duplicate key/value pairs in the provided query_string will be |
| 339 returned as {'key': [val1, val2, ...]}. Single key/values will |
| 340 be returned as strings: {'key': 'value'}. |
| 341 """ |
| 342 if image_map_pattern.match(query_string): |
| 343 # Server-side image map. Map the coords to 'x' and 'y' |
| 344 # (like CGI::Request does). |
| 345 pm = query_string.split(",") |
| 346 pm = {'x': int(pm[0]), 'y': int(pm[1])} |
| 347 else: |
| 348 pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) |
| 349 return pm |
| 350 |
| 351 |
| 352 class CaseInsensitiveDict(dict): |
| 353 """A case-insensitive dict subclass. |
| 354 |
| 355 Each key is changed on entry to str(key).title(). |
| 356 """ |
| 357 |
| 358 def __getitem__(self, key): |
| 359 return dict.__getitem__(self, str(key).title()) |
| 360 |
| 361 def __setitem__(self, key, value): |
| 362 dict.__setitem__(self, str(key).title(), value) |
| 363 |
| 364 def __delitem__(self, key): |
| 365 dict.__delitem__(self, str(key).title()) |
| 366 |
| 367 def __contains__(self, key): |
| 368 return dict.__contains__(self, str(key).title()) |
| 369 |
| 370 def get(self, key, default=None): |
| 371 return dict.get(self, str(key).title(), default) |
| 372 |
| 373 if hasattr({}, 'has_key'): |
| 374 def has_key(self, key): |
| 375 return dict.has_key(self, str(key).title()) |
| 376 |
| 377 def update(self, E): |
| 378 for k in E.keys(): |
| 379 self[str(k).title()] = E[k] |
| 380 |
| 381 def fromkeys(cls, seq, value=None): |
| 382 newdict = cls() |
| 383 for k in seq: |
| 384 newdict[str(k).title()] = value |
| 385 return newdict |
| 386 fromkeys = classmethod(fromkeys) |
| 387 |
| 388 def setdefault(self, key, x=None): |
| 389 key = str(key).title() |
| 390 try: |
| 391 return self[key] |
| 392 except KeyError: |
| 393 self[key] = x |
| 394 return x |
| 395 |
| 396 def pop(self, key, default): |
| 397 return dict.pop(self, str(key).title(), default) |
| 398 |
| 399 |
| 400 # TEXT = <any OCTET except CTLs, but including LWS> |
| 401 # |
| 402 # A CRLF is allowed in the definition of TEXT only as part of a header |
| 403 # field continuation. It is expected that the folding LWS will be |
| 404 # replaced with a single SP before interpretation of the TEXT value." |
| 405 if nativestr == bytestr: |
| 406 header_translate_table = ''.join([chr(i) for i in xrange(256)]) |
| 407 header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(1
27) |
| 408 else: |
| 409 header_translate_table = None |
| 410 header_translate_deletechars = bytes(range(32)) + bytes([127]) |
| 411 |
| 412 |
| 413 class HeaderMap(CaseInsensitiveDict): |
| 414 """A dict subclass for HTTP request and response headers. |
| 415 |
| 416 Each key is changed on entry to str(key).title(). This allows headers |
| 417 to be case-insensitive and avoid duplicates. |
| 418 |
| 419 Values are header values (decoded according to :rfc:`2047` if necessary). |
| 420 """ |
| 421 |
| 422 protocol=(1, 1) |
| 423 encodings = ["ISO-8859-1"] |
| 424 |
| 425 # Someday, when http-bis is done, this will probably get dropped |
| 426 # since few servers, clients, or intermediaries do it. But until then, |
| 427 # we're going to obey the spec as is. |
| 428 # "Words of *TEXT MAY contain characters from character sets other than |
| 429 # ISO-8859-1 only when encoded according to the rules of RFC 2047." |
| 430 use_rfc_2047 = True |
| 431 |
| 432 def elements(self, key): |
| 433 """Return a sorted list of HeaderElements for the given header.""" |
| 434 key = str(key).title() |
| 435 value = self.get(key) |
| 436 return header_elements(key, value) |
| 437 |
| 438 def values(self, key): |
| 439 """Return a sorted list of HeaderElement.value for the given header.""" |
| 440 return [e.value for e in self.elements(key)] |
| 441 |
| 442 def output(self): |
| 443 """Transform self into a list of (name, value) tuples.""" |
| 444 header_list = [] |
| 445 for k, v in self.items(): |
| 446 if isinstance(k, unicodestr): |
| 447 k = self.encode(k) |
| 448 |
| 449 if not isinstance(v, basestring): |
| 450 v = str(v) |
| 451 |
| 452 if isinstance(v, unicodestr): |
| 453 v = self.encode(v) |
| 454 |
| 455 # See header_translate_* constants above. |
| 456 # Replace only if you really know what you're doing. |
| 457 k = k.translate(header_translate_table, header_translate_deletechars
) |
| 458 v = v.translate(header_translate_table, header_translate_deletechars
) |
| 459 |
| 460 header_list.append((k, v)) |
| 461 return header_list |
| 462 |
| 463 def encode(self, v): |
| 464 """Return the given header name or value, encoded for HTTP output.""" |
| 465 for enc in self.encodings: |
| 466 try: |
| 467 return v.encode(enc) |
| 468 except UnicodeEncodeError: |
| 469 continue |
| 470 |
| 471 if self.protocol == (1, 1) and self.use_rfc_2047: |
| 472 # Encode RFC-2047 TEXT |
| 473 # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). |
| 474 # We do our own here instead of using the email module |
| 475 # because we never want to fold lines--folding has |
| 476 # been deprecated by the HTTP working group. |
| 477 v = b2a_base64(v.encode('utf-8')) |
| 478 return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) |
| 479 |
| 480 raise ValueError("Could not encode header part %r using " |
| 481 "any of the encodings %r." % |
| 482 (v, self.encodings)) |
| 483 |
| 484 |
| 485 class Host(object): |
| 486 """An internet address. |
| 487 |
| 488 name |
| 489 Should be the client's host name. If not available (because no DNS |
| 490 lookup is performed), the IP address should be used instead. |
| 491 |
| 492 """ |
| 493 |
| 494 ip = "0.0.0.0" |
| 495 port = 80 |
| 496 name = "unknown.tld" |
| 497 |
| 498 def __init__(self, ip, port, name=None): |
| 499 self.ip = ip |
| 500 self.port = port |
| 501 if name is None: |
| 502 name = ip |
| 503 self.name = name |
| 504 |
| 505 def __repr__(self): |
| 506 return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) |
OLD | NEW |