Index: third_party/cherrypy/lib/encoding.py |
=================================================================== |
--- third_party/cherrypy/lib/encoding.py (revision 0) |
+++ third_party/cherrypy/lib/encoding.py (revision 0) |
@@ -0,0 +1,388 @@ |
+import struct |
+import time |
+ |
+import cherrypy |
+from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr |
+from cherrypy.lib import file_generator |
+from cherrypy.lib import set_vary_header |
+ |
+ |
+def decode(encoding=None, default_encoding='utf-8'): |
+ """Replace or extend the list of charsets used to decode a request entity. |
+ |
+ Either argument may be a single string or a list of strings. |
+ |
+ encoding |
+ If not None, restricts the set of charsets attempted while decoding |
+ a request entity to the given set (even if a different charset is given in |
+ the Content-Type request header). |
+ |
+ default_encoding |
+ Only in effect if the 'encoding' argument is not given. |
+ If given, the set of charsets attempted while decoding a request entity is |
+ *extended* with the given value(s). |
+ |
+ """ |
+ body = cherrypy.request.body |
+ if encoding is not None: |
+ if not isinstance(encoding, list): |
+ encoding = [encoding] |
+ body.attempt_charsets = encoding |
+ elif default_encoding: |
+ if not isinstance(default_encoding, list): |
+ default_encoding = [default_encoding] |
+ body.attempt_charsets = body.attempt_charsets + default_encoding |
+ |
+ |
+class ResponseEncoder: |
+ |
+ default_encoding = 'utf-8' |
+ failmsg = "Response body could not be encoded with %r." |
+ encoding = None |
+ errors = 'strict' |
+ text_only = True |
+ add_charset = True |
+ debug = False |
+ |
+ def __init__(self, **kwargs): |
+ for k, v in kwargs.items(): |
+ setattr(self, k, v) |
+ |
+ self.attempted_charsets = set() |
+ request = cherrypy.serving.request |
+ if request.handler is not None: |
+ # Replace request.handler with self |
+ if self.debug: |
+ cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') |
+ self.oldhandler = request.handler |
+ request.handler = self |
+ |
+ def encode_stream(self, encoding): |
+ """Encode a streaming response body. |
+ |
+ Use a generator wrapper, and just pray it works as the stream is |
+ being written out. |
+ """ |
+ if encoding in self.attempted_charsets: |
+ return False |
+ self.attempted_charsets.add(encoding) |
+ |
+ def encoder(body): |
+ for chunk in body: |
+ if isinstance(chunk, unicodestr): |
+ chunk = chunk.encode(encoding, self.errors) |
+ yield chunk |
+ self.body = encoder(self.body) |
+ return True |
+ |
+ def encode_string(self, encoding): |
+ """Encode a buffered response body.""" |
+ if encoding in self.attempted_charsets: |
+ return False |
+ self.attempted_charsets.add(encoding) |
+ |
+ try: |
+ body = [] |
+ for chunk in self.body: |
+ if isinstance(chunk, unicodestr): |
+ chunk = chunk.encode(encoding, self.errors) |
+ body.append(chunk) |
+ self.body = body |
+ except (LookupError, UnicodeError): |
+ return False |
+ else: |
+ return True |
+ |
+ def find_acceptable_charset(self): |
+ request = cherrypy.serving.request |
+ response = cherrypy.serving.response |
+ |
+ if self.debug: |
+ cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') |
+ if response.stream: |
+ encoder = self.encode_stream |
+ else: |
+ encoder = self.encode_string |
+ if "Content-Length" in response.headers: |
+ # Delete Content-Length header so finalize() recalcs it. |
+ # Encoded strings may be of different lengths from their |
+ # unicode equivalents, and even from each other. For example: |
+ # >>> t = u"\u7007\u3040" |
+ # >>> len(t) |
+ # 2 |
+ # >>> len(t.encode("UTF-8")) |
+ # 6 |
+ # >>> len(t.encode("utf7")) |
+ # 8 |
+ del response.headers["Content-Length"] |
+ |
+ # Parse the Accept-Charset request header, and try to provide one |
+ # of the requested charsets (in order of user preference). |
+ encs = request.headers.elements('Accept-Charset') |
+ charsets = [enc.value.lower() for enc in encs] |
+ if self.debug: |
+ cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') |
+ |
+ if self.encoding is not None: |
+ # If specified, force this encoding to be used, or fail. |
+ encoding = self.encoding.lower() |
+ if self.debug: |
+ cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') |
+ if (not charsets) or "*" in charsets or encoding in charsets: |
+ if self.debug: |
+ cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE') |
+ if encoder(encoding): |
+ return encoding |
+ else: |
+ if not encs: |
+ if self.debug: |
+ cherrypy.log('Attempting default encoding %r' % |
+ self.default_encoding, 'TOOLS.ENCODE') |
+ # Any character-set is acceptable. |
+ if encoder(self.default_encoding): |
+ return self.default_encoding |
+ else: |
+ raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding) |
+ else: |
+ for element in encs: |
+ if element.qvalue > 0: |
+ if element.value == "*": |
+ # Matches any charset. Try our default. |
+ if self.debug: |
+ cherrypy.log('Attempting default encoding due ' |
+ 'to %r' % element, 'TOOLS.ENCODE') |
+ if encoder(self.default_encoding): |
+ return self.default_encoding |
+ else: |
+ encoding = element.value |
+ if self.debug: |
+ cherrypy.log('Attempting encoding %s (qvalue >' |
+ '0)' % element, 'TOOLS.ENCODE') |
+ if encoder(encoding): |
+ return encoding |
+ |
+ if "*" not in charsets: |
+ # If no "*" is present in an Accept-Charset field, then all |
+ # character sets not explicitly mentioned get a quality |
+ # value of 0, except for ISO-8859-1, which gets a quality |
+ # value of 1 if not explicitly mentioned. |
+ iso = 'iso-8859-1' |
+ if iso not in charsets: |
+ if self.debug: |
+ cherrypy.log('Attempting ISO-8859-1 encoding', |
+ 'TOOLS.ENCODE') |
+ if encoder(iso): |
+ return iso |
+ |
+ # No suitable encoding found. |
+ ac = request.headers.get('Accept-Charset') |
+ if ac is None: |
+ msg = "Your client did not send an Accept-Charset header." |
+ else: |
+ msg = "Your client sent this Accept-Charset header: %s." % ac |
+ msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets) |
+ raise cherrypy.HTTPError(406, msg) |
+ |
+ def __call__(self, *args, **kwargs): |
+ response = cherrypy.serving.response |
+ self.body = self.oldhandler(*args, **kwargs) |
+ |
+ if isinstance(self.body, basestring): |
+ # strings get wrapped in a list because iterating over a single |
+ # item list is much faster than iterating over every character |
+ # in a long string. |
+ if self.body: |
+ self.body = [self.body] |
+ else: |
+ # [''] doesn't evaluate to False, so replace it with []. |
+ self.body = [] |
+ elif hasattr(self.body, 'read'): |
+ self.body = file_generator(self.body) |
+ elif self.body is None: |
+ self.body = [] |
+ |
+ ct = response.headers.elements("Content-Type") |
+ if self.debug: |
+ cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE') |
+ if ct: |
+ ct = ct[0] |
+ if self.text_only: |
+ if ct.value.lower().startswith("text/"): |
+ if self.debug: |
+ cherrypy.log('Content-Type %s starts with "text/"' % ct, |
+ 'TOOLS.ENCODE') |
+ do_find = True |
+ else: |
+ if self.debug: |
+ cherrypy.log('Not finding because Content-Type %s does ' |
+ 'not start with "text/"' % ct, |
+ 'TOOLS.ENCODE') |
+ do_find = False |
+ else: |
+ if self.debug: |
+ cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE') |
+ do_find = True |
+ |
+ if do_find: |
+ # Set "charset=..." param on response Content-Type header |
+ ct.params['charset'] = self.find_acceptable_charset() |
+ if self.add_charset: |
+ if self.debug: |
+ cherrypy.log('Setting Content-Type %s' % ct, |
+ 'TOOLS.ENCODE') |
+ response.headers["Content-Type"] = str(ct) |
+ |
+ return self.body |
+ |
+# GZIP |
+ |
+def compress(body, compress_level): |
+ """Compress 'body' at the given compress_level.""" |
+ import zlib |
+ |
+ # See http://www.gzip.org/zlib/rfc-gzip.html |
+ yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker |
+ yield ntob('\x08') # CM: compression method |
+ yield ntob('\x00') # FLG: none set |
+ # MTIME: 4 bytes |
+ yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) |
+ yield ntob('\x02') # XFL: max compression, slowest algo |
+ yield ntob('\xff') # OS: unknown |
+ |
+ crc = zlib.crc32(ntob("")) |
+ size = 0 |
+ zobj = zlib.compressobj(compress_level, |
+ zlib.DEFLATED, -zlib.MAX_WBITS, |
+ zlib.DEF_MEM_LEVEL, 0) |
+ for line in body: |
+ size += len(line) |
+ crc = zlib.crc32(line, crc) |
+ yield zobj.compress(line) |
+ yield zobj.flush() |
+ |
+ # CRC32: 4 bytes |
+ yield struct.pack("<L", crc & int('FFFFFFFF', 16)) |
+ # ISIZE: 4 bytes |
+ yield struct.pack("<L", size & int('FFFFFFFF', 16)) |
+ |
+def decompress(body): |
+ import gzip |
+ |
+ zbuf = BytesIO() |
+ zbuf.write(body) |
+ zbuf.seek(0) |
+ zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) |
+ data = zfile.read() |
+ zfile.close() |
+ return data |
+ |
+ |
+def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False): |
+ """Try to gzip the response body if Content-Type in mime_types. |
+ |
+ cherrypy.response.headers['Content-Type'] must be set to one of the |
+ values in the mime_types arg before calling this function. |
+ |
+ The provided list of mime-types must be of one of the following form: |
+ * type/subtype |
+ * type/* |
+ * type/*+subtype |
+ |
+ No compression is performed if any of the following hold: |
+ * The client sends no Accept-Encoding request header |
+ * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header |
+ * No 'gzip' or 'x-gzip' with a qvalue > 0 is present |
+ * The 'identity' value is given with a qvalue > 0. |
+ |
+ """ |
+ request = cherrypy.serving.request |
+ response = cherrypy.serving.response |
+ |
+ set_vary_header(response, "Accept-Encoding") |
+ |
+ if not response.body: |
+ # Response body is empty (might be a 304 for instance) |
+ if debug: |
+ cherrypy.log('No response body', context='TOOLS.GZIP') |
+ return |
+ |
+ # If returning cached content (which should already have been gzipped), |
+ # don't re-zip. |
+ if getattr(request, "cached", False): |
+ if debug: |
+ cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') |
+ return |
+ |
+ acceptable = request.headers.elements('Accept-Encoding') |
+ if not acceptable: |
+ # If no Accept-Encoding field is present in a request, |
+ # the server MAY assume that the client will accept any |
+ # content coding. In this case, if "identity" is one of |
+ # the available content-codings, then the server SHOULD use |
+ # the "identity" content-coding, unless it has additional |
+ # information that a different content-coding is meaningful |
+ # to the client. |
+ if debug: |
+ cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') |
+ return |
+ |
+ ct = response.headers.get('Content-Type', '').split(';')[0] |
+ for coding in acceptable: |
+ if coding.value == 'identity' and coding.qvalue != 0: |
+ if debug: |
+ cherrypy.log('Non-zero identity qvalue: %s' % coding, |
+ context='TOOLS.GZIP') |
+ return |
+ if coding.value in ('gzip', 'x-gzip'): |
+ if coding.qvalue == 0: |
+ if debug: |
+ cherrypy.log('Zero gzip qvalue: %s' % coding, |
+ context='TOOLS.GZIP') |
+ return |
+ |
+ if ct not in mime_types: |
+ # If the list of provided mime-types contains tokens |
+ # such as 'text/*' or 'application/*+xml', |
+ # we go through them and find the most appropriate one |
+ # based on the given content-type. |
+ # The pattern matching is only caring about the most |
+ # common cases, as stated above, and doesn't support |
+ # for extra parameters. |
+ found = False |
+ if '/' in ct: |
+ ct_media_type, ct_sub_type = ct.split('/') |
+ for mime_type in mime_types: |
+ if '/' in mime_type: |
+ media_type, sub_type = mime_type.split('/') |
+ if ct_media_type == media_type: |
+ if sub_type == '*': |
+ found = True |
+ break |
+ elif '+' in sub_type and '+' in ct_sub_type: |
+ ct_left, ct_right = ct_sub_type.split('+') |
+ left, right = sub_type.split('+') |
+ if left == '*' and ct_right == right: |
+ found = True |
+ break |
+ |
+ if not found: |
+ if debug: |
+ cherrypy.log('Content-Type %s not in mime_types %r' % |
+ (ct, mime_types), context='TOOLS.GZIP') |
+ return |
+ |
+ if debug: |
+ cherrypy.log('Gzipping', context='TOOLS.GZIP') |
+ # Return a generator that compresses the page |
+ response.headers['Content-Encoding'] = 'gzip' |
+ response.body = compress(response.body, compress_level) |
+ if "Content-Length" in response.headers: |
+ # Delete Content-Length header so finalize() recalcs it. |
+ del response.headers["Content-Length"] |
+ |
+ return |
+ |
+ if debug: |
+ cherrypy.log('No acceptable encoding found.', context='GZIP') |
+ cherrypy.HTTPError(406, "identity, gzip").set_response() |
+ |
Property changes on: third_party/cherrypy/lib/encoding.py |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |