| Index: third_party/cherrypy/_cpreqbody.py
|
| ===================================================================
|
| --- third_party/cherrypy/_cpreqbody.py (revision 0)
|
| +++ third_party/cherrypy/_cpreqbody.py (revision 0)
|
| @@ -0,0 +1,965 @@
|
| +"""Request body processing for CherryPy.
|
| +
|
| +.. versionadded:: 3.2
|
| +
|
| +Application authors have complete control over the parsing of HTTP request
|
| +entities. In short, :attr:`cherrypy.request.body<cherrypy._cprequest.Request.body>`
|
| +is now always set to an instance of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>`,
|
| +and *that* class is a subclass of :class:`Entity<cherrypy._cpreqbody.Entity>`.
|
| +
|
| +When an HTTP request includes an entity body, it is often desirable to
|
| +provide that information to applications in a form other than the raw bytes.
|
| +Different content types demand different approaches. Examples:
|
| +
|
| + * For a GIF file, we want the raw bytes in a stream.
|
| + * An HTML form is better parsed into its component fields, and each text field
|
| + decoded from bytes to unicode.
|
| + * A JSON body should be deserialized into a Python dict or list.
|
| +
|
| +When the request contains a Content-Type header, the media type is used as a
|
| +key to look up a value in the
|
| +:attr:`request.body.processors<cherrypy._cpreqbody.Entity.processors>` dict.
|
| +If the full media
|
| +type is not found, then the major type is tried; for example, if no processor
|
| +is found for the 'image/jpeg' type, then we look for a processor for the 'image'
|
| +types altogether. If neither the full type nor the major type has a matching
|
| +processor, then a default processor is used
|
| +(:func:`default_proc<cherrypy._cpreqbody.Entity.default_proc>`). For most
|
| +types, this means no processing is done, and the body is left unread as a
|
| +raw byte stream. Processors are configurable in an 'on_start_resource' hook.
|
| +
|
| +Some processors, especially those for the 'text' types, attempt to decode bytes
|
| +to unicode. If the Content-Type request header includes a 'charset' parameter,
|
| +this is used to decode the entity. Otherwise, one or more default charsets may
|
| +be attempted, although this decision is up to each processor. If a processor
|
| +successfully decodes an Entity or Part, it should set the
|
| +:attr:`charset<cherrypy._cpreqbody.Entity.charset>` attribute
|
| +on the Entity or Part to the name of the successful charset, so that
|
| +applications can easily re-encode or transcode the value if they wish.
|
| +
|
| +If the Content-Type of the request entity is of major type 'multipart', then
|
| +the above parsing process, and possibly a decoding process, is performed for
|
| +each part.
|
| +
|
| +For both the full entity and multipart parts, a Content-Disposition header may
|
| +be used to fill :attr:`name<cherrypy._cpreqbody.Entity.name>` and
|
| +:attr:`filename<cherrypy._cpreqbody.Entity.filename>` attributes on the
|
| +request.body or the Part.
|
| +
|
| +.. _custombodyprocessors:
|
| +
|
| +Custom Processors
|
| +=================
|
| +
|
| +You can add your own processors for any specific or major MIME type. Simply add
|
| +it to the :attr:`processors<cherrypy._cprequest.Entity.processors>` dict in a
|
| +hook/tool that runs at ``on_start_resource`` or ``before_request_body``.
|
| +Here's the built-in JSON tool for an example::
|
| +
|
| + def json_in(force=True, debug=False):
|
| + request = cherrypy.serving.request
|
| + def json_processor(entity):
|
| + \"""Read application/json data into request.json.\"""
|
| + if not entity.headers.get("Content-Length", ""):
|
| + raise cherrypy.HTTPError(411)
|
| +
|
| + body = entity.fp.read()
|
| + try:
|
| + request.json = json_decode(body)
|
| + except ValueError:
|
| + raise cherrypy.HTTPError(400, 'Invalid JSON document')
|
| + if force:
|
| + request.body.processors.clear()
|
| + request.body.default_proc = cherrypy.HTTPError(
|
| + 415, 'Expected an application/json content type')
|
| + request.body.processors['application/json'] = json_processor
|
| +
|
| +We begin by defining a new ``json_processor`` function to stick in the ``processors``
|
| +dictionary. All processor functions take a single argument, the ``Entity`` instance
|
| +they are to process. It will be called whenever a request is received (for those
|
| +URI's where the tool is turned on) which has a ``Content-Type`` of
|
| +"application/json".
|
| +
|
| +First, it checks for a valid ``Content-Length`` (raising 411 if not valid), then
|
| +reads the remaining bytes on the socket. The ``fp`` object knows its own length, so
|
| +it won't hang waiting for data that never arrives. It will return when all data
|
| +has been read. Then, we decode those bytes using Python's built-in ``json`` module,
|
| +and stick the decoded result onto ``request.json`` . If it cannot be decoded, we
|
| +raise 400.
|
| +
|
| +If the "force" argument is True (the default), the ``Tool`` clears the ``processors``
|
| +dict so that request entities of other ``Content-Types`` aren't parsed at all. Since
|
| +there's no entry for those invalid MIME types, the ``default_proc`` method of ``cherrypy.request.body``
|
| +is called. But this does nothing by default (usually to provide the page handler an opportunity to handle it.)
|
| +But in our case, we want to raise 415, so we replace ``request.body.default_proc``
|
| +with the error (``HTTPError`` instances, when called, raise themselves).
|
| +
|
| +If we were defining a custom processor, we can do so without making a ``Tool``. Just add the config entry::
|
| +
|
| + request.body.processors = {'application/json': json_processor}
|
| +
|
| +Note that you can only replace the ``processors`` dict wholesale this way, not update the existing one.
|
| +"""
|
| +
|
| +try:
|
| + from io import DEFAULT_BUFFER_SIZE
|
| +except ImportError:
|
| + DEFAULT_BUFFER_SIZE = 8192
|
| +import re
|
| +import sys
|
| +import tempfile
|
| +try:
|
| + from urllib import unquote_plus
|
| +except ImportError:
|
| + def unquote_plus(bs):
|
| + """Bytes version of urllib.parse.unquote_plus."""
|
| + bs = bs.replace(ntob('+'), ntob(' '))
|
| + atoms = bs.split(ntob('%'))
|
| + for i in range(1, len(atoms)):
|
| + item = atoms[i]
|
| + try:
|
| + pct = int(item[:2], 16)
|
| + atoms[i] = bytes([pct]) + item[2:]
|
| + except ValueError:
|
| + pass
|
| + return ntob('').join(atoms)
|
| +
|
| +import cherrypy
|
| +from cherrypy._cpcompat import basestring, ntob, ntou
|
| +from cherrypy.lib import httputil
|
| +
|
| +
|
| +# -------------------------------- Processors -------------------------------- #
|
| +
|
| +def process_urlencoded(entity):
|
| + """Read application/x-www-form-urlencoded data into entity.params."""
|
| + qs = entity.fp.read()
|
| + for charset in entity.attempt_charsets:
|
| + try:
|
| + params = {}
|
| + for aparam in qs.split(ntob('&')):
|
| + for pair in aparam.split(ntob(';')):
|
| + if not pair:
|
| + continue
|
| +
|
| + atoms = pair.split(ntob('='), 1)
|
| + if len(atoms) == 1:
|
| + atoms.append(ntob(''))
|
| +
|
| + key = unquote_plus(atoms[0]).decode(charset)
|
| + value = unquote_plus(atoms[1]).decode(charset)
|
| +
|
| + if key in params:
|
| + if not isinstance(params[key], list):
|
| + params[key] = [params[key]]
|
| + params[key].append(value)
|
| + else:
|
| + params[key] = value
|
| + except UnicodeDecodeError:
|
| + pass
|
| + else:
|
| + entity.charset = charset
|
| + break
|
| + else:
|
| + raise cherrypy.HTTPError(
|
| + 400, "The request entity could not be decoded. The following "
|
| + "charsets were attempted: %s" % repr(entity.attempt_charsets))
|
| +
|
| + # Now that all values have been successfully parsed and decoded,
|
| + # apply them to the entity.params dict.
|
| + for key, value in params.items():
|
| + if key in entity.params:
|
| + if not isinstance(entity.params[key], list):
|
| + entity.params[key] = [entity.params[key]]
|
| + entity.params[key].append(value)
|
| + else:
|
| + entity.params[key] = value
|
| +
|
| +
|
| +def process_multipart(entity):
|
| + """Read all multipart parts into entity.parts."""
|
| + ib = ""
|
| + if 'boundary' in entity.content_type.params:
|
| + # http://tools.ietf.org/html/rfc2046#section-5.1.1
|
| + # "The grammar for parameters on the Content-type field is such that it
|
| + # is often necessary to enclose the boundary parameter values in quotes
|
| + # on the Content-type line"
|
| + ib = entity.content_type.params['boundary'].strip('"')
|
| +
|
| + if not re.match("^[ -~]{0,200}[!-~]$", ib):
|
| + raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
|
| +
|
| + ib = ('--' + ib).encode('ascii')
|
| +
|
| + # Find the first marker
|
| + while True:
|
| + b = entity.readline()
|
| + if not b:
|
| + return
|
| +
|
| + b = b.strip()
|
| + if b == ib:
|
| + break
|
| +
|
| + # Read all parts
|
| + while True:
|
| + part = entity.part_class.from_fp(entity.fp, ib)
|
| + entity.parts.append(part)
|
| + part.process()
|
| + if part.fp.done:
|
| + break
|
| +
|
| +def process_multipart_form_data(entity):
|
| + """Read all multipart/form-data parts into entity.parts or entity.params."""
|
| + process_multipart(entity)
|
| +
|
| + kept_parts = []
|
| + for part in entity.parts:
|
| + if part.name is None:
|
| + kept_parts.append(part)
|
| + else:
|
| + if part.filename is None:
|
| + # It's a regular field
|
| + value = part.fullvalue()
|
| + else:
|
| + # It's a file upload. Retain the whole part so consumer code
|
| + # has access to its .file and .filename attributes.
|
| + value = part
|
| +
|
| + if part.name in entity.params:
|
| + if not isinstance(entity.params[part.name], list):
|
| + entity.params[part.name] = [entity.params[part.name]]
|
| + entity.params[part.name].append(value)
|
| + else:
|
| + entity.params[part.name] = value
|
| +
|
| + entity.parts = kept_parts
|
| +
|
| +def _old_process_multipart(entity):
|
| + """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3."""
|
| + process_multipart(entity)
|
| +
|
| + params = entity.params
|
| +
|
| + for part in entity.parts:
|
| + if part.name is None:
|
| + key = ntou('parts')
|
| + else:
|
| + key = part.name
|
| +
|
| + if part.filename is None:
|
| + # It's a regular field
|
| + value = part.fullvalue()
|
| + else:
|
| + # It's a file upload. Retain the whole part so consumer code
|
| + # has access to its .file and .filename attributes.
|
| + value = part
|
| +
|
| + if key in params:
|
| + if not isinstance(params[key], list):
|
| + params[key] = [params[key]]
|
| + params[key].append(value)
|
| + else:
|
| + params[key] = value
|
| +
|
| +
|
| +
|
| +# --------------------------------- Entities --------------------------------- #
|
| +
|
| +
|
| +class Entity(object):
|
| + """An HTTP request body, or MIME multipart body.
|
| +
|
| + This class collects information about the HTTP request entity. When a
|
| + given entity is of MIME type "multipart", each part is parsed into its own
|
| + Entity instance, and the set of parts stored in
|
| + :attr:`entity.parts<cherrypy._cpreqbody.Entity.parts>`.
|
| +
|
| + Between the ``before_request_body`` and ``before_handler`` tools, CherryPy
|
| + tries to process the request body (if any) by calling
|
| + :func:`request.body.process<cherrypy._cpreqbody.RequestBody.process`.
|
| + This uses the ``content_type`` of the Entity to look up a suitable processor
|
| + in :attr:`Entity.processors<cherrypy._cpreqbody.Entity.processors>`, a dict.
|
| + If a matching processor cannot be found for the complete Content-Type,
|
| + it tries again using the major type. For example, if a request with an
|
| + entity of type "image/jpeg" arrives, but no processor can be found for
|
| + that complete type, then one is sought for the major type "image". If a
|
| + processor is still not found, then the
|
| + :func:`default_proc<cherrypy._cpreqbody.Entity.default_proc>` method of the
|
| + Entity is called (which does nothing by default; you can override this too).
|
| +
|
| + CherryPy includes processors for the "application/x-www-form-urlencoded"
|
| + type, the "multipart/form-data" type, and the "multipart" major type.
|
| + CherryPy 3.2 processes these types almost exactly as older versions.
|
| + Parts are passed as arguments to the page handler using their
|
| + ``Content-Disposition.name`` if given, otherwise in a generic "parts"
|
| + argument. Each such part is either a string, or the
|
| + :class:`Part<cherrypy._cpreqbody.Part>` itself if it's a file. (In this
|
| + case it will have ``file`` and ``filename`` attributes, or possibly a
|
| + ``value`` attribute). Each Part is itself a subclass of
|
| + Entity, and has its own ``process`` method and ``processors`` dict.
|
| +
|
| + There is a separate processor for the "multipart" major type which is more
|
| + flexible, and simply stores all multipart parts in
|
| + :attr:`request.body.parts<cherrypy._cpreqbody.Entity.parts>`. You can
|
| + enable it with::
|
| +
|
| + cherrypy.request.body.processors['multipart'] = _cpreqbody.process_multipart
|
| +
|
| + in an ``on_start_resource`` tool.
|
| + """
|
| +
|
| + # http://tools.ietf.org/html/rfc2046#section-4.1.2:
|
| + # "The default character set, which must be assumed in the
|
| + # absence of a charset parameter, is US-ASCII."
|
| + # However, many browsers send data in utf-8 with no charset.
|
| + attempt_charsets = ['utf-8']
|
| + """A list of strings, each of which should be a known encoding.
|
| +
|
| + When the Content-Type of the request body warrants it, each of the given
|
| + encodings will be tried in order. The first one to successfully decode the
|
| + entity without raising an error is stored as
|
| + :attr:`entity.charset<cherrypy._cpreqbody.Entity.charset>`. This defaults
|
| + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by
|
| + `HTTP/1.1 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1>`_),
|
| + but ``['us-ascii', 'utf-8']`` for multipart parts.
|
| + """
|
| +
|
| + charset = None
|
| + """The successful decoding; see "attempt_charsets" above."""
|
| +
|
| + content_type = None
|
| + """The value of the Content-Type request header.
|
| +
|
| + If the Entity is part of a multipart payload, this will be the Content-Type
|
| + given in the MIME headers for this part.
|
| + """
|
| +
|
| + default_content_type = 'application/x-www-form-urlencoded'
|
| + """This defines a default ``Content-Type`` to use if no Content-Type header
|
| + is given. The empty string is used for RequestBody, which results in the
|
| + request body not being read or parsed at all. This is by design; a missing
|
| + ``Content-Type`` header in the HTTP request entity is an error at best,
|
| + and a security hole at worst. For multipart parts, however, the MIME spec
|
| + declares that a part with no Content-Type defaults to "text/plain"
|
| + (see :class:`Part<cherrypy._cpreqbody.Part>`).
|
| + """
|
| +
|
| + filename = None
|
| + """The ``Content-Disposition.filename`` header, if available."""
|
| +
|
| + fp = None
|
| + """The readable socket file object."""
|
| +
|
| + headers = None
|
| + """A dict of request/multipart header names and values.
|
| +
|
| + This is a copy of the ``request.headers`` for the ``request.body``;
|
| + for multipart parts, it is the set of headers for that part.
|
| + """
|
| +
|
| + length = None
|
| + """The value of the ``Content-Length`` header, if provided."""
|
| +
|
| + name = None
|
| + """The "name" parameter of the ``Content-Disposition`` header, if any."""
|
| +
|
| + params = None
|
| + """
|
| + If the request Content-Type is 'application/x-www-form-urlencoded' or
|
| + multipart, this will be a dict of the params pulled from the entity
|
| + body; that is, it will be the portion of request.params that come
|
| + from the message body (sometimes called "POST params", although they
|
| + can be sent with various HTTP method verbs). This value is set between
|
| + the 'before_request_body' and 'before_handler' hooks (assuming that
|
| + process_request_body is True)."""
|
| +
|
| + processors = {'application/x-www-form-urlencoded': process_urlencoded,
|
| + 'multipart/form-data': process_multipart_form_data,
|
| + 'multipart': process_multipart,
|
| + }
|
| + """A dict of Content-Type names to processor methods."""
|
| +
|
| + parts = None
|
| + """A list of Part instances if ``Content-Type`` is of major type "multipart"."""
|
| +
|
| + part_class = None
|
| + """The class used for multipart parts.
|
| +
|
| + You can replace this with custom subclasses to alter the processing of
|
| + multipart parts.
|
| + """
|
| +
|
| + def __init__(self, fp, headers, params=None, parts=None):
|
| + # Make an instance-specific copy of the class processors
|
| + # so Tools, etc. can replace them per-request.
|
| + self.processors = self.processors.copy()
|
| +
|
| + self.fp = fp
|
| + self.headers = headers
|
| +
|
| + if params is None:
|
| + params = {}
|
| + self.params = params
|
| +
|
| + if parts is None:
|
| + parts = []
|
| + self.parts = parts
|
| +
|
| + # Content-Type
|
| + self.content_type = headers.elements('Content-Type')
|
| + if self.content_type:
|
| + self.content_type = self.content_type[0]
|
| + else:
|
| + self.content_type = httputil.HeaderElement.from_str(
|
| + self.default_content_type)
|
| +
|
| + # Copy the class 'attempt_charsets', prepending any Content-Type charset
|
| + dec = self.content_type.params.get("charset", None)
|
| + if dec:
|
| + self.attempt_charsets = [dec] + [c for c in self.attempt_charsets
|
| + if c != dec]
|
| + else:
|
| + self.attempt_charsets = self.attempt_charsets[:]
|
| +
|
| + # Length
|
| + self.length = None
|
| + clen = headers.get('Content-Length', None)
|
| + # If Transfer-Encoding is 'chunked', ignore any Content-Length.
|
| + if clen is not None and 'chunked' not in headers.get('Transfer-Encoding', ''):
|
| + try:
|
| + self.length = int(clen)
|
| + except ValueError:
|
| + pass
|
| +
|
| + # Content-Disposition
|
| + self.name = None
|
| + self.filename = None
|
| + disp = headers.elements('Content-Disposition')
|
| + if disp:
|
| + disp = disp[0]
|
| + if 'name' in disp.params:
|
| + self.name = disp.params['name']
|
| + if self.name.startswith('"') and self.name.endswith('"'):
|
| + self.name = self.name[1:-1]
|
| + if 'filename' in disp.params:
|
| + self.filename = disp.params['filename']
|
| + if self.filename.startswith('"') and self.filename.endswith('"'):
|
| + self.filename = self.filename[1:-1]
|
| +
|
| + # The 'type' attribute is deprecated in 3.2; remove it in 3.3.
|
| + type = property(lambda self: self.content_type,
|
| + doc="""A deprecated alias for :attr:`content_type<cherrypy._cpreqbody.Entity.content_type>`.""")
|
| +
|
| + def read(self, size=None, fp_out=None):
|
| + return self.fp.read(size, fp_out)
|
| +
|
| + def readline(self, size=None):
|
| + return self.fp.readline(size)
|
| +
|
| + def readlines(self, sizehint=None):
|
| + return self.fp.readlines(sizehint)
|
| +
|
| + def __iter__(self):
|
| + return self
|
| +
|
| + def __next__(self):
|
| + line = self.readline()
|
| + if not line:
|
| + raise StopIteration
|
| + return line
|
| +
|
| + def next(self):
|
| + return self.__next__()
|
| +
|
| + def read_into_file(self, fp_out=None):
|
| + """Read the request body into fp_out (or make_file() if None). Return fp_out."""
|
| + if fp_out is None:
|
| + fp_out = self.make_file()
|
| + self.read(fp_out=fp_out)
|
| + return fp_out
|
| +
|
| + def make_file(self):
|
| + """Return a file-like object into which the request body will be read.
|
| +
|
| + By default, this will return a TemporaryFile. Override as needed.
|
| + See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`."""
|
| + return tempfile.TemporaryFile()
|
| +
|
| + def fullvalue(self):
|
| + """Return this entity as a string, whether stored in a file or not."""
|
| + if self.file:
|
| + # It was stored in a tempfile. Read it.
|
| + self.file.seek(0)
|
| + value = self.file.read()
|
| + self.file.seek(0)
|
| + else:
|
| + value = self.value
|
| + return value
|
| +
|
| + def process(self):
|
| + """Execute the best-match processor for the given media type."""
|
| + proc = None
|
| + ct = self.content_type.value
|
| + try:
|
| + proc = self.processors[ct]
|
| + except KeyError:
|
| + toptype = ct.split('/', 1)[0]
|
| + try:
|
| + proc = self.processors[toptype]
|
| + except KeyError:
|
| + pass
|
| + if proc is None:
|
| + self.default_proc()
|
| + else:
|
| + proc(self)
|
| +
|
| + def default_proc(self):
|
| + """Called if a more-specific processor is not found for the ``Content-Type``."""
|
| + # Leave the fp alone for someone else to read. This works fine
|
| + # for request.body, but the Part subclasses need to override this
|
| + # so they can move on to the next part.
|
| + pass
|
| +
|
| +
|
| +class Part(Entity):
|
| + """A MIME part entity, part of a multipart entity."""
|
| +
|
| + # "The default character set, which must be assumed in the absence of a
|
| + # charset parameter, is US-ASCII."
|
| + attempt_charsets = ['us-ascii', 'utf-8']
|
| + """A list of strings, each of which should be a known encoding.
|
| +
|
| + When the Content-Type of the request body warrants it, each of the given
|
| + encodings will be tried in order. The first one to successfully decode the
|
| + entity without raising an error is stored as
|
| + :attr:`entity.charset<cherrypy._cpreqbody.Entity.charset>`. This defaults
|
| + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by
|
| + `HTTP/1.1 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1>`_),
|
| + but ``['us-ascii', 'utf-8']`` for multipart parts.
|
| + """
|
| +
|
| + boundary = None
|
| + """The MIME multipart boundary."""
|
| +
|
| + default_content_type = 'text/plain'
|
| + """This defines a default ``Content-Type`` to use if no Content-Type header
|
| + is given. The empty string is used for RequestBody, which results in the
|
| + request body not being read or parsed at all. This is by design; a missing
|
| + ``Content-Type`` header in the HTTP request entity is an error at best,
|
| + and a security hole at worst. For multipart parts, however (this class),
|
| + the MIME spec declares that a part with no Content-Type defaults to
|
| + "text/plain".
|
| + """
|
| +
|
| + # This is the default in stdlib cgi. We may want to increase it.
|
| + maxrambytes = 1000
|
| + """The threshold of bytes after which point the ``Part`` will store its data
|
| + in a file (generated by :func:`make_file<cherrypy._cprequest.Entity.make_file>`)
|
| + instead of a string. Defaults to 1000, just like the :mod:`cgi` module in
|
| + Python's standard library.
|
| + """
|
| +
|
| + def __init__(self, fp, headers, boundary):
|
| + Entity.__init__(self, fp, headers)
|
| + self.boundary = boundary
|
| + self.file = None
|
| + self.value = None
|
| +
|
| + def from_fp(cls, fp, boundary):
|
| + headers = cls.read_headers(fp)
|
| + return cls(fp, headers, boundary)
|
| + from_fp = classmethod(from_fp)
|
| +
|
| + def read_headers(cls, fp):
|
| + headers = httputil.HeaderMap()
|
| + while True:
|
| + line = fp.readline()
|
| + if not line:
|
| + # No more data--illegal end of headers
|
| + raise EOFError("Illegal end of headers.")
|
| +
|
| + if line == ntob('\r\n'):
|
| + # Normal end of headers
|
| + break
|
| + if not line.endswith(ntob('\r\n')):
|
| + raise ValueError("MIME requires CRLF terminators: %r" % line)
|
| +
|
| + if line[0] in ntob(' \t'):
|
| + # It's a continuation line.
|
| + v = line.strip().decode('ISO-8859-1')
|
| + else:
|
| + k, v = line.split(ntob(":"), 1)
|
| + k = k.strip().decode('ISO-8859-1')
|
| + v = v.strip().decode('ISO-8859-1')
|
| +
|
| + existing = headers.get(k)
|
| + if existing:
|
| + v = ", ".join((existing, v))
|
| + headers[k] = v
|
| +
|
| + return headers
|
| + read_headers = classmethod(read_headers)
|
| +
|
| + def read_lines_to_boundary(self, fp_out=None):
|
| + """Read bytes from self.fp and return or write them to a file.
|
| +
|
| + If the 'fp_out' argument is None (the default), all bytes read are
|
| + returned in a single byte string.
|
| +
|
| + If the 'fp_out' argument is not None, it must be a file-like object that
|
| + supports the 'write' method; all bytes read will be written to the fp,
|
| + and that fp is returned.
|
| + """
|
| + endmarker = self.boundary + ntob("--")
|
| + delim = ntob("")
|
| + prev_lf = True
|
| + lines = []
|
| + seen = 0
|
| + while True:
|
| + line = self.fp.readline(1<<16)
|
| + if not line:
|
| + raise EOFError("Illegal end of multipart body.")
|
| + if line.startswith(ntob("--")) and prev_lf:
|
| + strippedline = line.strip()
|
| + if strippedline == self.boundary:
|
| + break
|
| + if strippedline == endmarker:
|
| + self.fp.finish()
|
| + break
|
| +
|
| + line = delim + line
|
| +
|
| + if line.endswith(ntob("\r\n")):
|
| + delim = ntob("\r\n")
|
| + line = line[:-2]
|
| + prev_lf = True
|
| + elif line.endswith(ntob("\n")):
|
| + delim = ntob("\n")
|
| + line = line[:-1]
|
| + prev_lf = True
|
| + else:
|
| + delim = ntob("")
|
| + prev_lf = False
|
| +
|
| + if fp_out is None:
|
| + lines.append(line)
|
| + seen += len(line)
|
| + if seen > self.maxrambytes:
|
| + fp_out = self.make_file()
|
| + for line in lines:
|
| + fp_out.write(line)
|
| + else:
|
| + fp_out.write(line)
|
| +
|
| + if fp_out is None:
|
| + result = ntob('').join(lines)
|
| + for charset in self.attempt_charsets:
|
| + try:
|
| + result = result.decode(charset)
|
| + except UnicodeDecodeError:
|
| + pass
|
| + else:
|
| + self.charset = charset
|
| + return result
|
| + else:
|
| + raise cherrypy.HTTPError(
|
| + 400, "The request entity could not be decoded. The following "
|
| + "charsets were attempted: %s" % repr(self.attempt_charsets))
|
| + else:
|
| + fp_out.seek(0)
|
| + return fp_out
|
| +
|
| + def default_proc(self):
|
| + """Called if a more-specific processor is not found for the ``Content-Type``."""
|
| + if self.filename:
|
| + # Always read into a file if a .filename was given.
|
| + self.file = self.read_into_file()
|
| + else:
|
| + result = self.read_lines_to_boundary()
|
| + if isinstance(result, basestring):
|
| + self.value = result
|
| + else:
|
| + self.file = result
|
| +
|
| + def read_into_file(self, fp_out=None):
|
| + """Read the request body into fp_out (or make_file() if None). Return fp_out."""
|
| + if fp_out is None:
|
| + fp_out = self.make_file()
|
| + self.read_lines_to_boundary(fp_out=fp_out)
|
| + return fp_out
|
| +
|
| +Entity.part_class = Part
|
| +
|
| +try:
|
| + inf = float('inf')
|
| +except ValueError:
|
| + # Python 2.4 and lower
|
| + class Infinity(object):
|
| + def __cmp__(self, other):
|
| + return 1
|
| + def __sub__(self, other):
|
| + return self
|
| + inf = Infinity()
|
| +
|
| +
|
| +comma_separated_headers = ['Accept', 'Accept-Charset', 'Accept-Encoding',
|
| + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', 'Connection',
|
| + 'Content-Encoding', 'Content-Language', 'Expect', 'If-Match',
|
| + 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'Te', 'Trailer',
|
| + 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', 'Www-Authenticate']
|
| +
|
| +
|
| +class SizedReader:
|
| +
|
| + def __init__(self, fp, length, maxbytes, bufsize=DEFAULT_BUFFER_SIZE, has_trailers=False):
|
| + # Wrap our fp in a buffer so peek() works
|
| + self.fp = fp
|
| + self.length = length
|
| + self.maxbytes = maxbytes
|
| + self.buffer = ntob('')
|
| + self.bufsize = bufsize
|
| + self.bytes_read = 0
|
| + self.done = False
|
| + self.has_trailers = has_trailers
|
| +
|
| + def read(self, size=None, fp_out=None):
|
| + """Read bytes from the request body and return or write them to a file.
|
| +
|
| + A number of bytes less than or equal to the 'size' argument are read
|
| + off the socket. The actual number of bytes read are tracked in
|
| + self.bytes_read. The number may be smaller than 'size' when 1) the
|
| + client sends fewer bytes, 2) the 'Content-Length' request header
|
| + specifies fewer bytes than requested, or 3) the number of bytes read
|
| + exceeds self.maxbytes (in which case, 413 is raised).
|
| +
|
| + If the 'fp_out' argument is None (the default), all bytes read are
|
| + returned in a single byte string.
|
| +
|
| + If the 'fp_out' argument is not None, it must be a file-like object that
|
| + supports the 'write' method; all bytes read will be written to the fp,
|
| + and None is returned.
|
| + """
|
| +
|
| + if self.length is None:
|
| + if size is None:
|
| + remaining = inf
|
| + else:
|
| + remaining = size
|
| + else:
|
| + remaining = self.length - self.bytes_read
|
| + if size and size < remaining:
|
| + remaining = size
|
| + if remaining == 0:
|
| + self.finish()
|
| + if fp_out is None:
|
| + return ntob('')
|
| + else:
|
| + return None
|
| +
|
| + chunks = []
|
| +
|
| + # Read bytes from the buffer.
|
| + if self.buffer:
|
| + if remaining is inf:
|
| + data = self.buffer
|
| + self.buffer = ntob('')
|
| + else:
|
| + data = self.buffer[:remaining]
|
| + self.buffer = self.buffer[remaining:]
|
| + datalen = len(data)
|
| + remaining -= datalen
|
| +
|
| + # Check lengths.
|
| + self.bytes_read += datalen
|
| + if self.maxbytes and self.bytes_read > self.maxbytes:
|
| + raise cherrypy.HTTPError(413)
|
| +
|
| + # Store the data.
|
| + if fp_out is None:
|
| + chunks.append(data)
|
| + else:
|
| + fp_out.write(data)
|
| +
|
| + # Read bytes from the socket.
|
| + while remaining > 0:
|
| + chunksize = min(remaining, self.bufsize)
|
| + try:
|
| + data = self.fp.read(chunksize)
|
| + except Exception:
|
| + e = sys.exc_info()[1]
|
| + if e.__class__.__name__ == 'MaxSizeExceeded':
|
| + # Post data is too big
|
| + raise cherrypy.HTTPError(
|
| + 413, "Maximum request length: %r" % e.args[1])
|
| + else:
|
| + raise
|
| + if not data:
|
| + self.finish()
|
| + break
|
| + datalen = len(data)
|
| + remaining -= datalen
|
| +
|
| + # Check lengths.
|
| + self.bytes_read += datalen
|
| + if self.maxbytes and self.bytes_read > self.maxbytes:
|
| + raise cherrypy.HTTPError(413)
|
| +
|
| + # Store the data.
|
| + if fp_out is None:
|
| + chunks.append(data)
|
| + else:
|
| + fp_out.write(data)
|
| +
|
| + if fp_out is None:
|
| + return ntob('').join(chunks)
|
| +
|
| + def readline(self, size=None):
|
| + """Read a line from the request body and return it."""
|
| + chunks = []
|
| + while size is None or size > 0:
|
| + chunksize = self.bufsize
|
| + if size is not None and size < self.bufsize:
|
| + chunksize = size
|
| + data = self.read(chunksize)
|
| + if not data:
|
| + break
|
| + pos = data.find(ntob('\n')) + 1
|
| + if pos:
|
| + chunks.append(data[:pos])
|
| + remainder = data[pos:]
|
| + self.buffer += remainder
|
| + self.bytes_read -= len(remainder)
|
| + break
|
| + else:
|
| + chunks.append(data)
|
| + return ntob('').join(chunks)
|
| +
|
| + def readlines(self, sizehint=None):
|
| + """Read lines from the request body and return them."""
|
| + if self.length is not None:
|
| + if sizehint is None:
|
| + sizehint = self.length - self.bytes_read
|
| + else:
|
| + sizehint = min(sizehint, self.length - self.bytes_read)
|
| +
|
| + lines = []
|
| + seen = 0
|
| + while True:
|
| + line = self.readline()
|
| + if not line:
|
| + break
|
| + lines.append(line)
|
| + seen += len(line)
|
| + if seen >= sizehint:
|
| + break
|
| + return lines
|
| +
|
| + def finish(self):
|
| + self.done = True
|
| + if self.has_trailers and hasattr(self.fp, 'read_trailer_lines'):
|
| + self.trailers = {}
|
| +
|
| + try:
|
| + for line in self.fp.read_trailer_lines():
|
| + if line[0] in ntob(' \t'):
|
| + # It's a continuation line.
|
| + v = line.strip()
|
| + else:
|
| + try:
|
| + k, v = line.split(ntob(":"), 1)
|
| + except ValueError:
|
| + raise ValueError("Illegal header line.")
|
| + k = k.strip().title()
|
| + v = v.strip()
|
| +
|
| + if k in comma_separated_headers:
|
| + existing = self.trailers.get(envname)
|
| + if existing:
|
| + v = ntob(", ").join((existing, v))
|
| + self.trailers[k] = v
|
| + except Exception:
|
| + e = sys.exc_info()[1]
|
| + if e.__class__.__name__ == 'MaxSizeExceeded':
|
| + # Post data is too big
|
| + raise cherrypy.HTTPError(
|
| + 413, "Maximum request length: %r" % e.args[1])
|
| + else:
|
| + raise
|
| +
|
| +
|
| +class RequestBody(Entity):
|
| + """The entity of the HTTP request."""
|
| +
|
| + bufsize = 8 * 1024
|
| + """The buffer size used when reading the socket."""
|
| +
|
| + # Don't parse the request body at all if the client didn't provide
|
| + # a Content-Type header. See http://www.cherrypy.org/ticket/790
|
| + default_content_type = ''
|
| + """This defines a default ``Content-Type`` to use if no Content-Type header
|
| + is given. The empty string is used for RequestBody, which results in the
|
| + request body not being read or parsed at all. This is by design; a missing
|
| + ``Content-Type`` header in the HTTP request entity is an error at best,
|
| + and a security hole at worst. For multipart parts, however, the MIME spec
|
| + declares that a part with no Content-Type defaults to "text/plain"
|
| + (see :class:`Part<cherrypy._cpreqbody.Part>`).
|
| + """
|
| +
|
| + maxbytes = None
|
| + """Raise ``MaxSizeExceeded`` if more bytes than this are read from the socket."""
|
| +
|
| + def __init__(self, fp, headers, params=None, request_params=None):
|
| + Entity.__init__(self, fp, headers, params)
|
| +
|
| + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
|
| + # When no explicit charset parameter is provided by the
|
| + # sender, media subtypes of the "text" type are defined
|
| + # to have a default charset value of "ISO-8859-1" when
|
| + # received via HTTP.
|
| + if self.content_type.value.startswith('text/'):
|
| + for c in ('ISO-8859-1', 'iso-8859-1', 'Latin-1', 'latin-1'):
|
| + if c in self.attempt_charsets:
|
| + break
|
| + else:
|
| + self.attempt_charsets.append('ISO-8859-1')
|
| +
|
| + # Temporary fix while deprecating passing .parts as .params.
|
| + self.processors['multipart'] = _old_process_multipart
|
| +
|
| + if request_params is None:
|
| + request_params = {}
|
| + self.request_params = request_params
|
| +
|
| + def process(self):
|
| + """Process the request entity based on its Content-Type."""
|
| + # "The presence of a message-body in a request is signaled by the
|
| + # inclusion of a Content-Length or Transfer-Encoding header field in
|
| + # the request's message-headers."
|
| + # It is possible to send a POST request with no body, for example;
|
| + # however, app developers are responsible in that case to set
|
| + # cherrypy.request.process_body to False so this method isn't called.
|
| + h = cherrypy.serving.request.headers
|
| + if 'Content-Length' not in h and 'Transfer-Encoding' not in h:
|
| + raise cherrypy.HTTPError(411)
|
| +
|
| + self.fp = SizedReader(self.fp, self.length,
|
| + self.maxbytes, bufsize=self.bufsize,
|
| + has_trailers='Trailer' in h)
|
| + super(RequestBody, self).process()
|
| +
|
| + # Body params should also be a part of the request_params
|
| + # add them in here.
|
| + request_params = self.request_params
|
| + for key, value in self.params.items():
|
| + # Python 2 only: keyword arguments must be byte strings (type 'str').
|
| + if sys.version_info < (3, 0):
|
| + if isinstance(key, unicode):
|
| + key = key.encode('ISO-8859-1')
|
| +
|
| + if key in request_params:
|
| + if not isinstance(request_params[key], list):
|
| + request_params[key] = [request_params[key]]
|
| + request_params[key].append(value)
|
| + else:
|
| + request_params[key] = value
|
|
|
| Property changes on: third_party/cherrypy/_cpreqbody.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|