third_party/cherrypy/lib/encoding.py - Issue 9368042: Add CherryPy to third_party.

Unified Diff: third_party/cherrypy/lib/encoding.py

Issue 9368042: Add CherryPy to third_party. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build/

Patch Set: '' Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/cherrypy/lib/encoding.py

===================================================================

--- third_party/cherrypy/lib/encoding.py (revision 0)

+++ third_party/cherrypy/lib/encoding.py (revision 0)

@@ -0,0 +1,388 @@

+import struct

+import time

+import cherrypy

+from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr

+from cherrypy.lib import file_generator

+from cherrypy.lib import set_vary_header

+def decode(encoding=None, default_encoding='utf-8'):

+ """Replace or extend the list of charsets used to decode a request entity.

+ Either argument may be a single string or a list of strings.

+ encoding

+ If not None, restricts the set of charsets attempted while decoding

+ a request entity to the given set (even if a different charset is given in

+ the Content-Type request header).

+ default_encoding

+ Only in effect if the 'encoding' argument is not given.

+ If given, the set of charsets attempted while decoding a request entity is

+ *extended* with the given value(s).

+ """

+ body = cherrypy.request.body

+ if encoding is not None:

+ if not isinstance(encoding, list):

+ encoding = [encoding]

+ body.attempt_charsets = encoding

+ elif default_encoding:

+ if not isinstance(default_encoding, list):

+ default_encoding = [default_encoding]

+ body.attempt_charsets = body.attempt_charsets + default_encoding

+class ResponseEncoder:

+ default_encoding = 'utf-8'

+ failmsg = "Response body could not be encoded with %r."

+ encoding = None

+ errors = 'strict'

+ text_only = True

+ add_charset = True

+ debug = False

+ def __init__(self, **kwargs):

+ for k, v in kwargs.items():

+ setattr(self, k, v)

+ self.attempted_charsets = set()

+ request = cherrypy.serving.request

+ if request.handler is not None:

+ # Replace request.handler with self

+ if self.debug:

+ cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')

+ self.oldhandler = request.handler

+ request.handler = self

+ def encode_stream(self, encoding):

+ """Encode a streaming response body.

+ Use a generator wrapper, and just pray it works as the stream is

+ being written out.

+ """

+ if encoding in self.attempted_charsets:

+ return False

+ self.attempted_charsets.add(encoding)

+ def encoder(body):

+ for chunk in body:

+ if isinstance(chunk, unicodestr):

+ chunk = chunk.encode(encoding, self.errors)

+ yield chunk

+ self.body = encoder(self.body)

+ return True

+ def encode_string(self, encoding):

+ """Encode a buffered response body."""

+ if encoding in self.attempted_charsets:

+ return False

+ self.attempted_charsets.add(encoding)

+ try:

+ body = []

+ for chunk in self.body:

+ if isinstance(chunk, unicodestr):

+ chunk = chunk.encode(encoding, self.errors)

+ body.append(chunk)

+ self.body = body

+ except (LookupError, UnicodeError):

+ return False

+ else:

+ return True

+ def find_acceptable_charset(self):

+ request = cherrypy.serving.request

+ response = cherrypy.serving.response

+ if self.debug:

+ cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE')

+ if response.stream:

+ encoder = self.encode_stream

+ else:

+ encoder = self.encode_string

+ if "Content-Length" in response.headers:

+ # Delete Content-Length header so finalize() recalcs it.

+ # Encoded strings may be of different lengths from their

+ # unicode equivalents, and even from each other. For example:

+ # >>> t = u"\u7007\u3040"

+ # >>> len(t)

+ # 2

+ # >>> len(t.encode("UTF-8"))

+ # 6

+ # >>> len(t.encode("utf7"))

+ # 8

+ del response.headers["Content-Length"]

+ # Parse the Accept-Charset request header, and try to provide one

+ # of the requested charsets (in order of user preference).

+ encs = request.headers.elements('Accept-Charset')

+ charsets = [enc.value.lower() for enc in encs]

+ if self.debug:

+ cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')

+ if self.encoding is not None:

+ # If specified, force this encoding to be used, or fail.

+ encoding = self.encoding.lower()

+ if self.debug:

+ cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE')

+ if (not charsets) or "*" in charsets or encoding in charsets:

+ if self.debug:

+ cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE')

+ if encoder(encoding):

+ return encoding

+ else:

+ if not encs:

+ if self.debug:

+ cherrypy.log('Attempting default encoding %r' %

+ self.default_encoding, 'TOOLS.ENCODE')

+ # Any character-set is acceptable.

+ if encoder(self.default_encoding):

+ return self.default_encoding

+ else:

+ raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding)

+ else:

+ for element in encs:

+ if element.qvalue > 0:

+ if element.value == "*":

+ # Matches any charset. Try our default.

+ if self.debug:

+ cherrypy.log('Attempting default encoding due '

+ 'to %r' % element, 'TOOLS.ENCODE')

+ if encoder(self.default_encoding):

+ return self.default_encoding

+ else:

+ encoding = element.value

+ if self.debug:

+ cherrypy.log('Attempting encoding %s (qvalue >'

+ '0)' % element, 'TOOLS.ENCODE')

+ if encoder(encoding):

+ return encoding

+ if "*" not in charsets:

+ # If no "*" is present in an Accept-Charset field, then all

+ # character sets not explicitly mentioned get a quality

+ # value of 0, except for ISO-8859-1, which gets a quality

+ # value of 1 if not explicitly mentioned.

+ iso = 'iso-8859-1'

+ if iso not in charsets:

+ if self.debug:

+ cherrypy.log('Attempting ISO-8859-1 encoding',

+ 'TOOLS.ENCODE')

+ if encoder(iso):

+ return iso

+ # No suitable encoding found.

+ ac = request.headers.get('Accept-Charset')

+ if ac is None:

+ msg = "Your client did not send an Accept-Charset header."

+ else:

+ msg = "Your client sent this Accept-Charset header: %s." % ac

+ msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets)

+ raise cherrypy.HTTPError(406, msg)

+ def __call__(self, *args, **kwargs):

+ response = cherrypy.serving.response

+ self.body = self.oldhandler(*args, **kwargs)

+ if isinstance(self.body, basestring):

+ # strings get wrapped in a list because iterating over a single

+ # item list is much faster than iterating over every character

+ # in a long string.

+ if self.body:

+ self.body = [self.body]

+ else:

+ # [''] doesn't evaluate to False, so replace it with [].

+ self.body = []

+ elif hasattr(self.body, 'read'):

+ self.body = file_generator(self.body)

+ elif self.body is None:

+ self.body = []

+ ct = response.headers.elements("Content-Type")

+ if self.debug:

+ cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE')

+ if ct:

+ ct = ct[0]

+ if self.text_only:

+ if ct.value.lower().startswith("text/"):

+ if self.debug:

+ cherrypy.log('Content-Type %s starts with "text/"' % ct,

+ 'TOOLS.ENCODE')

+ do_find = True

+ else:

+ if self.debug:

+ cherrypy.log('Not finding because Content-Type %s does '

+ 'not start with "text/"' % ct,

+ 'TOOLS.ENCODE')

+ do_find = False

+ else:

+ if self.debug:

+ cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE')

+ do_find = True

+ if do_find:

+ # Set "charset=..." param on response Content-Type header

+ ct.params['charset'] = self.find_acceptable_charset()

+ if self.add_charset:

+ if self.debug:

+ cherrypy.log('Setting Content-Type %s' % ct,

+ 'TOOLS.ENCODE')

+ response.headers["Content-Type"] = str(ct)

+ return self.body

+# GZIP

+def compress(body, compress_level):

+ """Compress 'body' at the given compress_level."""

+ import zlib

+ # See http://www.gzip.org/zlib/rfc-gzip.html

+ yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker

+ yield ntob('\x08') # CM: compression method

+ yield ntob('\x00') # FLG: none set

+ # MTIME: 4 bytes

+ yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))

+ yield ntob('\x02') # XFL: max compression, slowest algo

+ yield ntob('\xff') # OS: unknown

+ crc = zlib.crc32(ntob(""))

+ size = 0

+ zobj = zlib.compressobj(compress_level,

+ zlib.DEFLATED, -zlib.MAX_WBITS,

+ zlib.DEF_MEM_LEVEL, 0)

+ for line in body:

+ size += len(line)

+ crc = zlib.crc32(line, crc)

+ yield zobj.compress(line)

+ yield zobj.flush()

+ # CRC32: 4 bytes

+ yield struct.pack("<L", crc & int('FFFFFFFF', 16))

+ # ISIZE: 4 bytes

+ yield struct.pack("<L", size & int('FFFFFFFF', 16))

+def decompress(body):

+ import gzip

+ zbuf = BytesIO()

+ zbuf.write(body)

+ zbuf.seek(0)

+ zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)

+ data = zfile.read()

+ zfile.close()

+ return data

+def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False):

+ """Try to gzip the response body if Content-Type in mime_types.

+ cherrypy.response.headers['Content-Type'] must be set to one of the

+ values in the mime_types arg before calling this function.

+ The provided list of mime-types must be of one of the following form:

+ * type/subtype

+ * type/*

+ * type/*+subtype

+ No compression is performed if any of the following hold:

+ * The client sends no Accept-Encoding request header

+ * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header

+ * No 'gzip' or 'x-gzip' with a qvalue > 0 is present

+ * The 'identity' value is given with a qvalue > 0.

+ """

+ request = cherrypy.serving.request

+ response = cherrypy.serving.response

+ set_vary_header(response, "Accept-Encoding")

+ if not response.body:

+ # Response body is empty (might be a 304 for instance)

+ if debug:

+ cherrypy.log('No response body', context='TOOLS.GZIP')

+ return

+ # If returning cached content (which should already have been gzipped),

+ # don't re-zip.

+ if getattr(request, "cached", False):

+ if debug:

+ cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')

+ return

+ acceptable = request.headers.elements('Accept-Encoding')

+ if not acceptable:

+ # If no Accept-Encoding field is present in a request,

+ # the server MAY assume that the client will accept any

+ # content coding. In this case, if "identity" is one of

+ # the available content-codings, then the server SHOULD use

+ # the "identity" content-coding, unless it has additional

+ # information that a different content-coding is meaningful

+ # to the client.

+ if debug:

+ cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')

+ return

+ ct = response.headers.get('Content-Type', '').split(';')[0]

+ for coding in acceptable:

+ if coding.value == 'identity' and coding.qvalue != 0:

+ if debug:

+ cherrypy.log('Non-zero identity qvalue: %s' % coding,

+ context='TOOLS.GZIP')

+ return

+ if coding.value in ('gzip', 'x-gzip'):

+ if coding.qvalue == 0:

+ if debug:

+ cherrypy.log('Zero gzip qvalue: %s' % coding,

+ context='TOOLS.GZIP')

+ return

+ if ct not in mime_types:

+ # If the list of provided mime-types contains tokens

+ # such as 'text/*' or 'application/*+xml',

+ # we go through them and find the most appropriate one

+ # based on the given content-type.

+ # The pattern matching is only caring about the most

+ # common cases, as stated above, and doesn't support

+ # for extra parameters.

+ found = False

+ if '/' in ct:

+ ct_media_type, ct_sub_type = ct.split('/')

+ for mime_type in mime_types:

+ if '/' in mime_type:

+ media_type, sub_type = mime_type.split('/')

+ if ct_media_type == media_type:

+ if sub_type == '*':

+ found = True

+ break

+ elif '+' in sub_type and '+' in ct_sub_type:

+ ct_left, ct_right = ct_sub_type.split('+')

+ left, right = sub_type.split('+')

+ if left == '*' and ct_right == right:

+ found = True

+ break

+ if not found:

+ if debug:

+ cherrypy.log('Content-Type %s not in mime_types %r' %

+ (ct, mime_types), context='TOOLS.GZIP')

+ return

+ if debug:

+ cherrypy.log('Gzipping', context='TOOLS.GZIP')

+ # Return a generator that compresses the page

+ response.headers['Content-Encoding'] = 'gzip'

+ response.body = compress(response.body, compress_level)

+ if "Content-Length" in response.headers:

+ # Delete Content-Length header so finalize() recalcs it.

+ del response.headers["Content-Length"]

+ return

+ if debug:

+ cherrypy.log('No acceptable encoding found.', context='GZIP')

+ cherrypy.HTTPError(406, "identity, gzip").set_response()

Property changes on: third_party/cherrypy/lib/encoding.py

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « third_party/cherrypy/lib/cptools.py ('k') | third_party/cherrypy/lib/gctools.py » ('j') | no next file with comments »