OLD | NEW |
(Empty) | |
| 1 import struct |
| 2 import time |
| 3 |
| 4 import cherrypy |
| 5 from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr |
| 6 from cherrypy.lib import file_generator |
| 7 from cherrypy.lib import set_vary_header |
| 8 |
| 9 |
| 10 def decode(encoding=None, default_encoding='utf-8'): |
| 11 """Replace or extend the list of charsets used to decode a request entity. |
| 12 |
| 13 Either argument may be a single string or a list of strings. |
| 14 |
| 15 encoding |
| 16 If not None, restricts the set of charsets attempted while decoding |
| 17 a request entity to the given set (even if a different charset is given
in |
| 18 the Content-Type request header). |
| 19 |
| 20 default_encoding |
| 21 Only in effect if the 'encoding' argument is not given. |
| 22 If given, the set of charsets attempted while decoding a request entity
is |
| 23 *extended* with the given value(s). |
| 24 |
| 25 """ |
| 26 body = cherrypy.request.body |
| 27 if encoding is not None: |
| 28 if not isinstance(encoding, list): |
| 29 encoding = [encoding] |
| 30 body.attempt_charsets = encoding |
| 31 elif default_encoding: |
| 32 if not isinstance(default_encoding, list): |
| 33 default_encoding = [default_encoding] |
| 34 body.attempt_charsets = body.attempt_charsets + default_encoding |
| 35 |
| 36 |
| 37 class ResponseEncoder: |
| 38 |
| 39 default_encoding = 'utf-8' |
| 40 failmsg = "Response body could not be encoded with %r." |
| 41 encoding = None |
| 42 errors = 'strict' |
| 43 text_only = True |
| 44 add_charset = True |
| 45 debug = False |
| 46 |
| 47 def __init__(self, **kwargs): |
| 48 for k, v in kwargs.items(): |
| 49 setattr(self, k, v) |
| 50 |
| 51 self.attempted_charsets = set() |
| 52 request = cherrypy.serving.request |
| 53 if request.handler is not None: |
| 54 # Replace request.handler with self |
| 55 if self.debug: |
| 56 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') |
| 57 self.oldhandler = request.handler |
| 58 request.handler = self |
| 59 |
| 60 def encode_stream(self, encoding): |
| 61 """Encode a streaming response body. |
| 62 |
| 63 Use a generator wrapper, and just pray it works as the stream is |
| 64 being written out. |
| 65 """ |
| 66 if encoding in self.attempted_charsets: |
| 67 return False |
| 68 self.attempted_charsets.add(encoding) |
| 69 |
| 70 def encoder(body): |
| 71 for chunk in body: |
| 72 if isinstance(chunk, unicodestr): |
| 73 chunk = chunk.encode(encoding, self.errors) |
| 74 yield chunk |
| 75 self.body = encoder(self.body) |
| 76 return True |
| 77 |
| 78 def encode_string(self, encoding): |
| 79 """Encode a buffered response body.""" |
| 80 if encoding in self.attempted_charsets: |
| 81 return False |
| 82 self.attempted_charsets.add(encoding) |
| 83 |
| 84 try: |
| 85 body = [] |
| 86 for chunk in self.body: |
| 87 if isinstance(chunk, unicodestr): |
| 88 chunk = chunk.encode(encoding, self.errors) |
| 89 body.append(chunk) |
| 90 self.body = body |
| 91 except (LookupError, UnicodeError): |
| 92 return False |
| 93 else: |
| 94 return True |
| 95 |
| 96 def find_acceptable_charset(self): |
| 97 request = cherrypy.serving.request |
| 98 response = cherrypy.serving.response |
| 99 |
| 100 if self.debug: |
| 101 cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') |
| 102 if response.stream: |
| 103 encoder = self.encode_stream |
| 104 else: |
| 105 encoder = self.encode_string |
| 106 if "Content-Length" in response.headers: |
| 107 # Delete Content-Length header so finalize() recalcs it. |
| 108 # Encoded strings may be of different lengths from their |
| 109 # unicode equivalents, and even from each other. For example: |
| 110 # >>> t = u"\u7007\u3040" |
| 111 # >>> len(t) |
| 112 # 2 |
| 113 # >>> len(t.encode("UTF-8")) |
| 114 # 6 |
| 115 # >>> len(t.encode("utf7")) |
| 116 # 8 |
| 117 del response.headers["Content-Length"] |
| 118 |
| 119 # Parse the Accept-Charset request header, and try to provide one |
| 120 # of the requested charsets (in order of user preference). |
| 121 encs = request.headers.elements('Accept-Charset') |
| 122 charsets = [enc.value.lower() for enc in encs] |
| 123 if self.debug: |
| 124 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') |
| 125 |
| 126 if self.encoding is not None: |
| 127 # If specified, force this encoding to be used, or fail. |
| 128 encoding = self.encoding.lower() |
| 129 if self.debug: |
| 130 cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') |
| 131 if (not charsets) or "*" in charsets or encoding in charsets: |
| 132 if self.debug: |
| 133 cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENC
ODE') |
| 134 if encoder(encoding): |
| 135 return encoding |
| 136 else: |
| 137 if not encs: |
| 138 if self.debug: |
| 139 cherrypy.log('Attempting default encoding %r' % |
| 140 self.default_encoding, 'TOOLS.ENCODE') |
| 141 # Any character-set is acceptable. |
| 142 if encoder(self.default_encoding): |
| 143 return self.default_encoding |
| 144 else: |
| 145 raise cherrypy.HTTPError(500, self.failmsg % self.default_en
coding) |
| 146 else: |
| 147 for element in encs: |
| 148 if element.qvalue > 0: |
| 149 if element.value == "*": |
| 150 # Matches any charset. Try our default. |
| 151 if self.debug: |
| 152 cherrypy.log('Attempting default encoding due ' |
| 153 'to %r' % element, 'TOOLS.ENCODE') |
| 154 if encoder(self.default_encoding): |
| 155 return self.default_encoding |
| 156 else: |
| 157 encoding = element.value |
| 158 if self.debug: |
| 159 cherrypy.log('Attempting encoding %s (qvalue >' |
| 160 '0)' % element, 'TOOLS.ENCODE') |
| 161 if encoder(encoding): |
| 162 return encoding |
| 163 |
| 164 if "*" not in charsets: |
| 165 # If no "*" is present in an Accept-Charset field, then all |
| 166 # character sets not explicitly mentioned get a quality |
| 167 # value of 0, except for ISO-8859-1, which gets a quality |
| 168 # value of 1 if not explicitly mentioned. |
| 169 iso = 'iso-8859-1' |
| 170 if iso not in charsets: |
| 171 if self.debug: |
| 172 cherrypy.log('Attempting ISO-8859-1 encoding', |
| 173 'TOOLS.ENCODE') |
| 174 if encoder(iso): |
| 175 return iso |
| 176 |
| 177 # No suitable encoding found. |
| 178 ac = request.headers.get('Accept-Charset') |
| 179 if ac is None: |
| 180 msg = "Your client did not send an Accept-Charset header." |
| 181 else: |
| 182 msg = "Your client sent this Accept-Charset header: %s." % ac |
| 183 msg += " We tried these charsets: %s." % ", ".join(self.attempted_charse
ts) |
| 184 raise cherrypy.HTTPError(406, msg) |
| 185 |
| 186 def __call__(self, *args, **kwargs): |
| 187 response = cherrypy.serving.response |
| 188 self.body = self.oldhandler(*args, **kwargs) |
| 189 |
| 190 if isinstance(self.body, basestring): |
| 191 # strings get wrapped in a list because iterating over a single |
| 192 # item list is much faster than iterating over every character |
| 193 # in a long string. |
| 194 if self.body: |
| 195 self.body = [self.body] |
| 196 else: |
| 197 # [''] doesn't evaluate to False, so replace it with []. |
| 198 self.body = [] |
| 199 elif hasattr(self.body, 'read'): |
| 200 self.body = file_generator(self.body) |
| 201 elif self.body is None: |
| 202 self.body = [] |
| 203 |
| 204 ct = response.headers.elements("Content-Type") |
| 205 if self.debug: |
| 206 cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCOD
E') |
| 207 if ct: |
| 208 ct = ct[0] |
| 209 if self.text_only: |
| 210 if ct.value.lower().startswith("text/"): |
| 211 if self.debug: |
| 212 cherrypy.log('Content-Type %s starts with "text/"' % ct, |
| 213 'TOOLS.ENCODE') |
| 214 do_find = True |
| 215 else: |
| 216 if self.debug: |
| 217 cherrypy.log('Not finding because Content-Type %s does ' |
| 218 'not start with "text/"' % ct, |
| 219 'TOOLS.ENCODE') |
| 220 do_find = False |
| 221 else: |
| 222 if self.debug: |
| 223 cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE'
) |
| 224 do_find = True |
| 225 |
| 226 if do_find: |
| 227 # Set "charset=..." param on response Content-Type header |
| 228 ct.params['charset'] = self.find_acceptable_charset() |
| 229 if self.add_charset: |
| 230 if self.debug: |
| 231 cherrypy.log('Setting Content-Type %s' % ct, |
| 232 'TOOLS.ENCODE') |
| 233 response.headers["Content-Type"] = str(ct) |
| 234 |
| 235 return self.body |
| 236 |
| 237 # GZIP |
| 238 |
| 239 def compress(body, compress_level): |
| 240 """Compress 'body' at the given compress_level.""" |
| 241 import zlib |
| 242 |
| 243 # See http://www.gzip.org/zlib/rfc-gzip.html |
| 244 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker |
| 245 yield ntob('\x08') # CM: compression method |
| 246 yield ntob('\x00') # FLG: none set |
| 247 # MTIME: 4 bytes |
| 248 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) |
| 249 yield ntob('\x02') # XFL: max compression, slowest algo |
| 250 yield ntob('\xff') # OS: unknown |
| 251 |
| 252 crc = zlib.crc32(ntob("")) |
| 253 size = 0 |
| 254 zobj = zlib.compressobj(compress_level, |
| 255 zlib.DEFLATED, -zlib.MAX_WBITS, |
| 256 zlib.DEF_MEM_LEVEL, 0) |
| 257 for line in body: |
| 258 size += len(line) |
| 259 crc = zlib.crc32(line, crc) |
| 260 yield zobj.compress(line) |
| 261 yield zobj.flush() |
| 262 |
| 263 # CRC32: 4 bytes |
| 264 yield struct.pack("<L", crc & int('FFFFFFFF', 16)) |
| 265 # ISIZE: 4 bytes |
| 266 yield struct.pack("<L", size & int('FFFFFFFF', 16)) |
| 267 |
| 268 def decompress(body): |
| 269 import gzip |
| 270 |
| 271 zbuf = BytesIO() |
| 272 zbuf.write(body) |
| 273 zbuf.seek(0) |
| 274 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) |
| 275 data = zfile.read() |
| 276 zfile.close() |
| 277 return data |
| 278 |
| 279 |
| 280 def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False): |
| 281 """Try to gzip the response body if Content-Type in mime_types. |
| 282 |
| 283 cherrypy.response.headers['Content-Type'] must be set to one of the |
| 284 values in the mime_types arg before calling this function. |
| 285 |
| 286 The provided list of mime-types must be of one of the following form: |
| 287 * type/subtype |
| 288 * type/* |
| 289 * type/*+subtype |
| 290 |
| 291 No compression is performed if any of the following hold: |
| 292 * The client sends no Accept-Encoding request header |
| 293 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header |
| 294 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present |
| 295 * The 'identity' value is given with a qvalue > 0. |
| 296 |
| 297 """ |
| 298 request = cherrypy.serving.request |
| 299 response = cherrypy.serving.response |
| 300 |
| 301 set_vary_header(response, "Accept-Encoding") |
| 302 |
| 303 if not response.body: |
| 304 # Response body is empty (might be a 304 for instance) |
| 305 if debug: |
| 306 cherrypy.log('No response body', context='TOOLS.GZIP') |
| 307 return |
| 308 |
| 309 # If returning cached content (which should already have been gzipped), |
| 310 # don't re-zip. |
| 311 if getattr(request, "cached", False): |
| 312 if debug: |
| 313 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') |
| 314 return |
| 315 |
| 316 acceptable = request.headers.elements('Accept-Encoding') |
| 317 if not acceptable: |
| 318 # If no Accept-Encoding field is present in a request, |
| 319 # the server MAY assume that the client will accept any |
| 320 # content coding. In this case, if "identity" is one of |
| 321 # the available content-codings, then the server SHOULD use |
| 322 # the "identity" content-coding, unless it has additional |
| 323 # information that a different content-coding is meaningful |
| 324 # to the client. |
| 325 if debug: |
| 326 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') |
| 327 return |
| 328 |
| 329 ct = response.headers.get('Content-Type', '').split(';')[0] |
| 330 for coding in acceptable: |
| 331 if coding.value == 'identity' and coding.qvalue != 0: |
| 332 if debug: |
| 333 cherrypy.log('Non-zero identity qvalue: %s' % coding, |
| 334 context='TOOLS.GZIP') |
| 335 return |
| 336 if coding.value in ('gzip', 'x-gzip'): |
| 337 if coding.qvalue == 0: |
| 338 if debug: |
| 339 cherrypy.log('Zero gzip qvalue: %s' % coding, |
| 340 context='TOOLS.GZIP') |
| 341 return |
| 342 |
| 343 if ct not in mime_types: |
| 344 # If the list of provided mime-types contains tokens |
| 345 # such as 'text/*' or 'application/*+xml', |
| 346 # we go through them and find the most appropriate one |
| 347 # based on the given content-type. |
| 348 # The pattern matching is only caring about the most |
| 349 # common cases, as stated above, and doesn't support |
| 350 # for extra parameters. |
| 351 found = False |
| 352 if '/' in ct: |
| 353 ct_media_type, ct_sub_type = ct.split('/') |
| 354 for mime_type in mime_types: |
| 355 if '/' in mime_type: |
| 356 media_type, sub_type = mime_type.split('/') |
| 357 if ct_media_type == media_type: |
| 358 if sub_type == '*': |
| 359 found = True |
| 360 break |
| 361 elif '+' in sub_type and '+' in ct_sub_type: |
| 362 ct_left, ct_right = ct_sub_type.split('+') |
| 363 left, right = sub_type.split('+') |
| 364 if left == '*' and ct_right == right: |
| 365 found = True |
| 366 break |
| 367 |
| 368 if not found: |
| 369 if debug: |
| 370 cherrypy.log('Content-Type %s not in mime_types %r' % |
| 371 (ct, mime_types), context='TOOLS.GZIP') |
| 372 return |
| 373 |
| 374 if debug: |
| 375 cherrypy.log('Gzipping', context='TOOLS.GZIP') |
| 376 # Return a generator that compresses the page |
| 377 response.headers['Content-Encoding'] = 'gzip' |
| 378 response.body = compress(response.body, compress_level) |
| 379 if "Content-Length" in response.headers: |
| 380 # Delete Content-Length header so finalize() recalcs it. |
| 381 del response.headers["Content-Length"] |
| 382 |
| 383 return |
| 384 |
| 385 if debug: |
| 386 cherrypy.log('No acceptable encoding found.', context='GZIP') |
| 387 cherrypy.HTTPError(406, "identity, gzip").set_response() |
| 388 |
OLD | NEW |