third_party/cherrypy/lib/encoding.py - Issue 9368042: Add CherryPy to third_party.

Side by Side Diff: third_party/cherrypy/lib/encoding.py

Issue 9368042: Add CherryPy to third_party. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build/

Patch Set: '' Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 import struct

	2 import time

	3

	4 import cherrypy

	5 from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr

	6 from cherrypy.lib import file_generator

	7 from cherrypy.lib import set_vary_header

	8

	9

	10 def decode(encoding=None, default_encoding='utf-8'):

	11 """Replace or extend the list of charsets used to decode a request entity.

	12

	13 Either argument may be a single string or a list of strings.

	14

	15 encoding

	16 If not None, restricts the set of charsets attempted while decoding

	17 a request entity to the given set (even if a different charset is given in

	18 the Content-Type request header).

	19

	20 default_encoding

	21 Only in effect if the 'encoding' argument is not given.

	22 If given, the set of charsets attempted while decoding a request entity is

	23 extended with the given value(s).

	24

	25 """

	26 body = cherrypy.request.body

	27 if encoding is not None:

	28 if not isinstance(encoding, list):

	29 encoding = [encoding]

	30 body.attempt_charsets = encoding

	31 elif default_encoding:

	32 if not isinstance(default_encoding, list):

	33 default_encoding = [default_encoding]

	34 body.attempt_charsets = body.attempt_charsets + default_encoding

	35

	36

	37 class ResponseEncoder:

	38

	39 default_encoding = 'utf-8'

	40 failmsg = "Response body could not be encoded with %r."

	41 encoding = None

	42 errors = 'strict'

	43 text_only = True

	44 add_charset = True

	45 debug = False

	46

	47 def __init__(self, **kwargs):

	48 for k, v in kwargs.items():

	49 setattr(self, k, v)

	50

	51 self.attempted_charsets = set()

	52 request = cherrypy.serving.request

	53 if request.handler is not None:

	54 # Replace request.handler with self

	55 if self.debug:

	56 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')

	57 self.oldhandler = request.handler

	58 request.handler = self

	59

	60 def encode_stream(self, encoding):

	61 """Encode a streaming response body.

	62

	63 Use a generator wrapper, and just pray it works as the stream is

	64 being written out.

	65 """

	66 if encoding in self.attempted_charsets:

	67 return False

	68 self.attempted_charsets.add(encoding)

	69

	70 def encoder(body):

	71 for chunk in body:

	72 if isinstance(chunk, unicodestr):

	73 chunk = chunk.encode(encoding, self.errors)

	74 yield chunk

	75 self.body = encoder(self.body)

	76 return True

	77

	78 def encode_string(self, encoding):

	79 """Encode a buffered response body."""

	80 if encoding in self.attempted_charsets:

	81 return False

	82 self.attempted_charsets.add(encoding)

	83

	84 try:

	85 body = []

	86 for chunk in self.body:

	87 if isinstance(chunk, unicodestr):

	88 chunk = chunk.encode(encoding, self.errors)

	89 body.append(chunk)

	90 self.body = body

	91 except (LookupError, UnicodeError):

	92 return False

	93 else:

	94 return True

	95

	96 def find_acceptable_charset(self):

	97 request = cherrypy.serving.request

	98 response = cherrypy.serving.response

	99

	100 if self.debug:

	101 cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE')

	102 if response.stream:

	103 encoder = self.encode_stream

	104 else:

	105 encoder = self.encode_string

	106 if "Content-Length" in response.headers:

	107 # Delete Content-Length header so finalize() recalcs it.

	108 # Encoded strings may be of different lengths from their

	109 # unicode equivalents, and even from each other. For example:

	110 # >>> t = u"\u7007\u3040"

	111 # >>> len(t)

	112 # 2

	113 # >>> len(t.encode("UTF-8"))

	114 # 6

	115 # >>> len(t.encode("utf7"))

	116 # 8

	117 del response.headers["Content-Length"]

	118

	119 # Parse the Accept-Charset request header, and try to provide one

	120 # of the requested charsets (in order of user preference).

	121 encs = request.headers.elements('Accept-Charset')

	122 charsets = [enc.value.lower() for enc in encs]

	123 if self.debug:

	124 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')

	125

	126 if self.encoding is not None:

	127 # If specified, force this encoding to be used, or fail.

	128 encoding = self.encoding.lower()

	129 if self.debug:

	130 cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE')

	131 if (not charsets) or "*" in charsets or encoding in charsets:

	132 if self.debug:

	133 cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENC ODE')

	134 if encoder(encoding):

	135 return encoding

	136 else:

	137 if not encs:

	138 if self.debug:

	139 cherrypy.log('Attempting default encoding %r' %

	140 self.default_encoding, 'TOOLS.ENCODE')

	141 # Any character-set is acceptable.

	142 if encoder(self.default_encoding):

	143 return self.default_encoding

	144 else:

	145 raise cherrypy.HTTPError(500, self.failmsg % self.default_en coding)

	146 else:

	147 for element in encs:

	148 if element.qvalue > 0:

	149 if element.value == "*":

	150 # Matches any charset. Try our default.

	151 if self.debug:

	152 cherrypy.log('Attempting default encoding due '

	153 'to %r' % element, 'TOOLS.ENCODE')

	154 if encoder(self.default_encoding):

	155 return self.default_encoding

	156 else:

	157 encoding = element.value

	158 if self.debug:

	159 cherrypy.log('Attempting encoding %s (qvalue >'

	160 '0)' % element, 'TOOLS.ENCODE')

	161 if encoder(encoding):

	162 return encoding

	163

	164 if "*" not in charsets:

	165 # If no "*" is present in an Accept-Charset field, then all

	166 # character sets not explicitly mentioned get a quality

	167 # value of 0, except for ISO-8859-1, which gets a quality

	168 # value of 1 if not explicitly mentioned.

	169 iso = 'iso-8859-1'

	170 if iso not in charsets:

	171 if self.debug:

	172 cherrypy.log('Attempting ISO-8859-1 encoding',

	173 'TOOLS.ENCODE')

	174 if encoder(iso):

	175 return iso

	176

	177 # No suitable encoding found.

	178 ac = request.headers.get('Accept-Charset')

	179 if ac is None:

	180 msg = "Your client did not send an Accept-Charset header."

	181 else:

	182 msg = "Your client sent this Accept-Charset header: %s." % ac

	183 msg += " We tried these charsets: %s." % ", ".join(self.attempted_charse ts)

	184 raise cherrypy.HTTPError(406, msg)

	185

	186 def __call__(self, args, *kwargs):

	187 response = cherrypy.serving.response

	188 self.body = self.oldhandler(args, *kwargs)

	189

	190 if isinstance(self.body, basestring):

	191 # strings get wrapped in a list because iterating over a single

	192 # item list is much faster than iterating over every character

	193 # in a long string.

	194 if self.body:

	195 self.body = [self.body]

	196 else:

	197 # [''] doesn't evaluate to False, so replace it with [].

	198 self.body = []

	199 elif hasattr(self.body, 'read'):

	200 self.body = file_generator(self.body)

	201 elif self.body is None:

	202 self.body = []

	203

	204 ct = response.headers.elements("Content-Type")

	205 if self.debug:

	206 cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCOD E')

	207 if ct:

	208 ct = ct[0]

	209 if self.text_only:

	210 if ct.value.lower().startswith("text/"):

	211 if self.debug:

	212 cherrypy.log('Content-Type %s starts with "text/"' % ct,

	213 'TOOLS.ENCODE')

	214 do_find = True

	215 else:

	216 if self.debug:

	217 cherrypy.log('Not finding because Content-Type %s does '

	218 'not start with "text/"' % ct,

	219 'TOOLS.ENCODE')

	220 do_find = False

	221 else:

	222 if self.debug:

	223 cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE' )

	224 do_find = True

	225

	226 if do_find:

	227 # Set "charset=..." param on response Content-Type header

	228 ct.params['charset'] = self.find_acceptable_charset()

	229 if self.add_charset:

	230 if self.debug:

	231 cherrypy.log('Setting Content-Type %s' % ct,

	232 'TOOLS.ENCODE')

	233 response.headers["Content-Type"] = str(ct)

	234

	235 return self.body

	236

	237 # GZIP

	238

	239 def compress(body, compress_level):

	240 """Compress 'body' at the given compress_level."""

	241 import zlib

	242

	243 # See http://www.gzip.org/zlib/rfc-gzip.html

	244 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker

	245 yield ntob('\x08') # CM: compression method

	246 yield ntob('\x00') # FLG: none set

	247 # MTIME: 4 bytes

	248 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))

	249 yield ntob('\x02') # XFL: max compression, slowest algo

	250 yield ntob('\xff') # OS: unknown

	251

	252 crc = zlib.crc32(ntob(""))

	253 size = 0

	254 zobj = zlib.compressobj(compress_level,

	255 zlib.DEFLATED, -zlib.MAX_WBITS,

	256 zlib.DEF_MEM_LEVEL, 0)

	257 for line in body:

	258 size += len(line)

	259 crc = zlib.crc32(line, crc)

	260 yield zobj.compress(line)

	261 yield zobj.flush()

	262

	263 # CRC32: 4 bytes

	264 yield struct.pack("<L", crc & int('FFFFFFFF', 16))

	265 # ISIZE: 4 bytes

	266 yield struct.pack("<L", size & int('FFFFFFFF', 16))

	267

	268 def decompress(body):

	269 import gzip

	270

	271 zbuf = BytesIO()

	272 zbuf.write(body)

	273 zbuf.seek(0)

	274 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)

	275 data = zfile.read()

	276 zfile.close()

	277 return data

	278

	279

	280 def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False):

	281 """Try to gzip the response body if Content-Type in mime_types.

	282

	283 cherrypy.response.headers['Content-Type'] must be set to one of the

	284 values in the mime_types arg before calling this function.

	285

	286 The provided list of mime-types must be of one of the following form:

	287 * type/subtype

	288 * type/*

	289 * type/*+subtype

	290

	291 No compression is performed if any of the following hold:

	292 * The client sends no Accept-Encoding request header

	293 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header

	294 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present

	295 * The 'identity' value is given with a qvalue > 0.

	296

	297 """

	298 request = cherrypy.serving.request

	299 response = cherrypy.serving.response

	300

	301 set_vary_header(response, "Accept-Encoding")

	302

	303 if not response.body:

	304 # Response body is empty (might be a 304 for instance)

	305 if debug:

	306 cherrypy.log('No response body', context='TOOLS.GZIP')

	307 return

	308

	309 # If returning cached content (which should already have been gzipped),

	310 # don't re-zip.

	311 if getattr(request, "cached", False):

	312 if debug:

	313 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')

	314 return

	315

	316 acceptable = request.headers.elements('Accept-Encoding')

	317 if not acceptable:

	318 # If no Accept-Encoding field is present in a request,

	319 # the server MAY assume that the client will accept any

	320 # content coding. In this case, if "identity" is one of

	321 # the available content-codings, then the server SHOULD use

	322 # the "identity" content-coding, unless it has additional

	323 # information that a different content-coding is meaningful

	324 # to the client.

	325 if debug:

	326 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')

	327 return

	328

	329 ct = response.headers.get('Content-Type', '').split(';')[0]

	330 for coding in acceptable:

	331 if coding.value == 'identity' and coding.qvalue != 0:

	332 if debug:

	333 cherrypy.log('Non-zero identity qvalue: %s' % coding,

	334 context='TOOLS.GZIP')

	335 return

	336 if coding.value in ('gzip', 'x-gzip'):

	337 if coding.qvalue == 0:

	338 if debug:

	339 cherrypy.log('Zero gzip qvalue: %s' % coding,

	340 context='TOOLS.GZIP')

	341 return

	342

	343 if ct not in mime_types:

	344 # If the list of provided mime-types contains tokens

	345 # such as 'text/' or 'application/+xml',

	346 # we go through them and find the most appropriate one

	347 # based on the given content-type.

	348 # The pattern matching is only caring about the most

	349 # common cases, as stated above, and doesn't support

	350 # for extra parameters.

	351 found = False

	352 if '/' in ct:

	353 ct_media_type, ct_sub_type = ct.split('/')

	354 for mime_type in mime_types:

	355 if '/' in mime_type:

	356 media_type, sub_type = mime_type.split('/')

	357 if ct_media_type == media_type:

	358 if sub_type == '*':

	359 found = True

	360 break

	361 elif '+' in sub_type and '+' in ct_sub_type:

	362 ct_left, ct_right = ct_sub_type.split('+')

	363 left, right = sub_type.split('+')

	364 if left == '*' and ct_right == right:

	365 found = True

	366 break

	367

	368 if not found:

	369 if debug:

	370 cherrypy.log('Content-Type %s not in mime_types %r' %

	371 (ct, mime_types), context='TOOLS.GZIP')

	372 return

	373

	374 if debug:

	375 cherrypy.log('Gzipping', context='TOOLS.GZIP')

	376 # Return a generator that compresses the page

	377 response.headers['Content-Encoding'] = 'gzip'

	378 response.body = compress(response.body, compress_level)

	379 if "Content-Length" in response.headers:

	380 # Delete Content-Length header so finalize() recalcs it.

	381 del response.headers["Content-Length"]

	382

	383 return

	384

	385 if debug:

	386 cherrypy.log('No acceptable encoding found.', context='GZIP')

	387 cherrypy.HTTPError(406, "identity, gzip").set_response()

	388

OLD	NEW

« no previous file with comments | « third_party/cherrypy/lib/cptools.py ('k') | third_party/cherrypy/lib/gctools.py » ('j') | no next file with comments »