OLD | NEW |
(Empty) | |
| 1 """ |
| 2 CherryPy implements a simple caching system as a pluggable Tool. This tool tries |
| 3 to be an (in-process) HTTP/1.1-compliant cache. It's not quite there yet, but |
| 4 it's probably good enough for most sites. |
| 5 |
| 6 In general, GET responses are cached (along with selecting headers) and, if |
| 7 another request arrives for the same resource, the caching Tool will return 304 |
| 8 Not Modified if possible, or serve the cached response otherwise. It also sets |
| 9 request.cached to True if serving a cached representation, and sets |
| 10 request.cacheable to False (so it doesn't get cached again). |
| 11 |
| 12 If POST, PUT, or DELETE requests are made for a cached resource, they invalidate |
| 13 (delete) any cached response. |
| 14 |
| 15 Usage |
| 16 ===== |
| 17 |
| 18 Configuration file example:: |
| 19 |
| 20 [/] |
| 21 tools.caching.on = True |
| 22 tools.caching.delay = 3600 |
| 23 |
| 24 You may use a class other than the default |
| 25 :class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config |
| 26 entry ``cache_class``; supply the full dotted name of the replacement class |
| 27 as the config value. It must implement the basic methods ``get``, ``put``, |
| 28 ``delete``, and ``clear``. |
| 29 |
| 30 You may set any attribute, including overriding methods, on the cache |
| 31 instance by providing them in config. The above sets the |
| 32 :attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example. |
| 33 """ |
| 34 |
| 35 import datetime |
| 36 import sys |
| 37 import threading |
| 38 import time |
| 39 |
| 40 import cherrypy |
| 41 from cherrypy.lib import cptools, httputil |
| 42 from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted |
| 43 |
| 44 |
| 45 class Cache(object): |
| 46 """Base class for Cache implementations.""" |
| 47 |
| 48 def get(self): |
| 49 """Return the current variant if in the cache, else None.""" |
| 50 raise NotImplemented |
| 51 |
| 52 def put(self, obj, size): |
| 53 """Store the current variant in the cache.""" |
| 54 raise NotImplemented |
| 55 |
| 56 def delete(self): |
| 57 """Remove ALL cached variants of the current resource.""" |
| 58 raise NotImplemented |
| 59 |
| 60 def clear(self): |
| 61 """Reset the cache to its initial, empty state.""" |
| 62 raise NotImplemented |
| 63 |
| 64 |
| 65 |
| 66 # ------------------------------- Memory Cache ------------------------------- # |
| 67 |
| 68 |
| 69 class AntiStampedeCache(dict): |
| 70 """A storage system for cached items which reduces stampede collisions.""" |
| 71 |
| 72 def wait(self, key, timeout=5, debug=False): |
| 73 """Return the cached value for the given key, or None. |
| 74 |
| 75 If timeout is not None, and the value is already |
| 76 being calculated by another thread, wait until the given timeout has |
| 77 elapsed. If the value is available before the timeout expires, it is |
| 78 returned. If not, None is returned, and a sentinel placed in the cache |
| 79 to signal other threads to wait. |
| 80 |
| 81 If timeout is None, no waiting is performed nor sentinels used. |
| 82 """ |
| 83 value = self.get(key) |
| 84 if isinstance(value, threading._Event): |
| 85 if timeout is None: |
| 86 # Ignore the other thread and recalc it ourselves. |
| 87 if debug: |
| 88 cherrypy.log('No timeout', 'TOOLS.CACHING') |
| 89 return None |
| 90 |
| 91 # Wait until it's done or times out. |
| 92 if debug: |
| 93 cherrypy.log('Waiting up to %s seconds' % timeout, 'TOOLS.CACHIN
G') |
| 94 value.wait(timeout) |
| 95 if value.result is not None: |
| 96 # The other thread finished its calculation. Use it. |
| 97 if debug: |
| 98 cherrypy.log('Result!', 'TOOLS.CACHING') |
| 99 return value.result |
| 100 # Timed out. Stick an Event in the slot so other threads wait |
| 101 # on this one to finish calculating the value. |
| 102 if debug: |
| 103 cherrypy.log('Timed out', 'TOOLS.CACHING') |
| 104 e = threading.Event() |
| 105 e.result = None |
| 106 dict.__setitem__(self, key, e) |
| 107 |
| 108 return None |
| 109 elif value is None: |
| 110 # Stick an Event in the slot so other threads wait |
| 111 # on this one to finish calculating the value. |
| 112 if debug: |
| 113 cherrypy.log('Timed out', 'TOOLS.CACHING') |
| 114 e = threading.Event() |
| 115 e.result = None |
| 116 dict.__setitem__(self, key, e) |
| 117 return value |
| 118 |
| 119 def __setitem__(self, key, value): |
| 120 """Set the cached value for the given key.""" |
| 121 existing = self.get(key) |
| 122 dict.__setitem__(self, key, value) |
| 123 if isinstance(existing, threading._Event): |
| 124 # Set Event.result so other threads waiting on it have |
| 125 # immediate access without needing to poll the cache again. |
| 126 existing.result = value |
| 127 existing.set() |
| 128 |
| 129 |
| 130 class MemoryCache(Cache): |
| 131 """An in-memory cache for varying response content. |
| 132 |
| 133 Each key in self.store is a URI, and each value is an AntiStampedeCache. |
| 134 The response for any given URI may vary based on the values of |
| 135 "selecting request headers"; that is, those named in the Vary |
| 136 response header. We assume the list of header names to be constant |
| 137 for each URI throughout the lifetime of the application, and store |
| 138 that list in ``self.store[uri].selecting_headers``. |
| 139 |
| 140 The items contained in ``self.store[uri]`` have keys which are tuples of |
| 141 request header values (in the same order as the names in its |
| 142 selecting_headers), and values which are the actual responses. |
| 143 """ |
| 144 |
| 145 maxobjects = 1000 |
| 146 """The maximum number of cached objects; defaults to 1000.""" |
| 147 |
| 148 maxobj_size = 100000 |
| 149 """The maximum size of each cached object in bytes; defaults to 100 KB.""" |
| 150 |
| 151 maxsize = 10000000 |
| 152 """The maximum size of the entire cache in bytes; defaults to 10 MB.""" |
| 153 |
| 154 delay = 600 |
| 155 """Seconds until the cached content expires; defaults to 600 (10 minutes).""
" |
| 156 |
| 157 antistampede_timeout = 5 |
| 158 """Seconds to wait for other threads to release a cache lock.""" |
| 159 |
| 160 expire_freq = 0.1 |
| 161 """Seconds to sleep between cache expiration sweeps.""" |
| 162 |
| 163 debug = False |
| 164 |
| 165 def __init__(self): |
| 166 self.clear() |
| 167 |
| 168 # Run self.expire_cache in a separate daemon thread. |
| 169 t = threading.Thread(target=self.expire_cache, name='expire_cache') |
| 170 self.expiration_thread = t |
| 171 set_daemon(t, True) |
| 172 t.start() |
| 173 |
| 174 def clear(self): |
| 175 """Reset the cache to its initial, empty state.""" |
| 176 self.store = {} |
| 177 self.expirations = {} |
| 178 self.tot_puts = 0 |
| 179 self.tot_gets = 0 |
| 180 self.tot_hist = 0 |
| 181 self.tot_expires = 0 |
| 182 self.tot_non_modified = 0 |
| 183 self.cursize = 0 |
| 184 |
| 185 def expire_cache(self): |
| 186 """Continuously examine cached objects, expiring stale ones. |
| 187 |
| 188 This function is designed to be run in its own daemon thread, |
| 189 referenced at ``self.expiration_thread``. |
| 190 """ |
| 191 # It's possible that "time" will be set to None |
| 192 # arbitrarily, so we check "while time" to avoid exceptions. |
| 193 # See tickets #99 and #180 for more information. |
| 194 while time: |
| 195 now = time.time() |
| 196 # Must make a copy of expirations so it doesn't change size |
| 197 # during iteration |
| 198 for expiration_time, objects in copyitems(self.expirations): |
| 199 if expiration_time <= now: |
| 200 for obj_size, uri, sel_header_values in objects: |
| 201 try: |
| 202 del self.store[uri][tuple(sel_header_values)] |
| 203 self.tot_expires += 1 |
| 204 self.cursize -= obj_size |
| 205 except KeyError: |
| 206 # the key may have been deleted elsewhere |
| 207 pass |
| 208 del self.expirations[expiration_time] |
| 209 time.sleep(self.expire_freq) |
| 210 |
| 211 def get(self): |
| 212 """Return the current variant if in the cache, else None.""" |
| 213 request = cherrypy.serving.request |
| 214 self.tot_gets += 1 |
| 215 |
| 216 uri = cherrypy.url(qs=request.query_string) |
| 217 uricache = self.store.get(uri) |
| 218 if uricache is None: |
| 219 return None |
| 220 |
| 221 header_values = [request.headers.get(h, '') |
| 222 for h in uricache.selecting_headers] |
| 223 variant = uricache.wait(key=tuple(sorted(header_values)), |
| 224 timeout=self.antistampede_timeout, |
| 225 debug=self.debug) |
| 226 if variant is not None: |
| 227 self.tot_hist += 1 |
| 228 return variant |
| 229 |
| 230 def put(self, variant, size): |
| 231 """Store the current variant in the cache.""" |
| 232 request = cherrypy.serving.request |
| 233 response = cherrypy.serving.response |
| 234 |
| 235 uri = cherrypy.url(qs=request.query_string) |
| 236 uricache = self.store.get(uri) |
| 237 if uricache is None: |
| 238 uricache = AntiStampedeCache() |
| 239 uricache.selecting_headers = [ |
| 240 e.value for e in response.headers.elements('Vary')] |
| 241 self.store[uri] = uricache |
| 242 |
| 243 if len(self.store) < self.maxobjects: |
| 244 total_size = self.cursize + size |
| 245 |
| 246 # checks if there's space for the object |
| 247 if (size < self.maxobj_size and total_size < self.maxsize): |
| 248 # add to the expirations list |
| 249 expiration_time = response.time + self.delay |
| 250 bucket = self.expirations.setdefault(expiration_time, []) |
| 251 bucket.append((size, uri, uricache.selecting_headers)) |
| 252 |
| 253 # add to the cache |
| 254 header_values = [request.headers.get(h, '') |
| 255 for h in uricache.selecting_headers] |
| 256 uricache[tuple(sorted(header_values))] = variant |
| 257 self.tot_puts += 1 |
| 258 self.cursize = total_size |
| 259 |
| 260 def delete(self): |
| 261 """Remove ALL cached variants of the current resource.""" |
| 262 uri = cherrypy.url(qs=cherrypy.serving.request.query_string) |
| 263 self.store.pop(uri, None) |
| 264 |
| 265 |
| 266 def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs): |
| 267 """Try to obtain cached output. If fresh enough, raise HTTPError(304). |
| 268 |
| 269 If POST, PUT, or DELETE: |
| 270 * invalidates (deletes) any cached response for this resource |
| 271 * sets request.cached = False |
| 272 * sets request.cacheable = False |
| 273 |
| 274 else if a cached copy exists: |
| 275 * sets request.cached = True |
| 276 * sets request.cacheable = False |
| 277 * sets response.headers to the cached values |
| 278 * checks the cached Last-Modified response header against the |
| 279 current If-(Un)Modified-Since request headers; raises 304 |
| 280 if necessary. |
| 281 * sets response.status and response.body to the cached values |
| 282 * returns True |
| 283 |
| 284 otherwise: |
| 285 * sets request.cached = False |
| 286 * sets request.cacheable = True |
| 287 * returns False |
| 288 """ |
| 289 request = cherrypy.serving.request |
| 290 response = cherrypy.serving.response |
| 291 |
| 292 if not hasattr(cherrypy, "_cache"): |
| 293 # Make a process-wide Cache object. |
| 294 cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() |
| 295 |
| 296 # Take all remaining kwargs and set them on the Cache object. |
| 297 for k, v in kwargs.items(): |
| 298 setattr(cherrypy._cache, k, v) |
| 299 cherrypy._cache.debug = debug |
| 300 |
| 301 # POST, PUT, DELETE should invalidate (delete) the cached copy. |
| 302 # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. |
| 303 if request.method in invalid_methods: |
| 304 if debug: |
| 305 cherrypy.log('request.method %r in invalid_methods %r' % |
| 306 (request.method, invalid_methods), 'TOOLS.CACHING') |
| 307 cherrypy._cache.delete() |
| 308 request.cached = False |
| 309 request.cacheable = False |
| 310 return False |
| 311 |
| 312 if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: |
| 313 request.cached = False |
| 314 request.cacheable = True |
| 315 return False |
| 316 |
| 317 cache_data = cherrypy._cache.get() |
| 318 request.cached = bool(cache_data) |
| 319 request.cacheable = not request.cached |
| 320 if request.cached: |
| 321 # Serve the cached copy. |
| 322 max_age = cherrypy._cache.delay |
| 323 for v in [e.value for e in request.headers.elements('Cache-Control')]: |
| 324 atoms = v.split('=', 1) |
| 325 directive = atoms.pop(0) |
| 326 if directive == 'max-age': |
| 327 if len(atoms) != 1 or not atoms[0].isdigit(): |
| 328 raise cherrypy.HTTPError(400, "Invalid Cache-Control header"
) |
| 329 max_age = int(atoms[0]) |
| 330 break |
| 331 elif directive == 'no-cache': |
| 332 if debug: |
| 333 cherrypy.log('Ignoring cache due to Cache-Control: no-cache'
, |
| 334 'TOOLS.CACHING') |
| 335 request.cached = False |
| 336 request.cacheable = True |
| 337 return False |
| 338 |
| 339 if debug: |
| 340 cherrypy.log('Reading response from cache', 'TOOLS.CACHING') |
| 341 s, h, b, create_time = cache_data |
| 342 age = int(response.time - create_time) |
| 343 if (age > max_age): |
| 344 if debug: |
| 345 cherrypy.log('Ignoring cache due to age > %d' % max_age, |
| 346 'TOOLS.CACHING') |
| 347 request.cached = False |
| 348 request.cacheable = True |
| 349 return False |
| 350 |
| 351 # Copy the response headers. See http://www.cherrypy.org/ticket/721. |
| 352 response.headers = rh = httputil.HeaderMap() |
| 353 for k in h: |
| 354 dict.__setitem__(rh, k, dict.__getitem__(h, k)) |
| 355 |
| 356 # Add the required Age header |
| 357 response.headers["Age"] = str(age) |
| 358 |
| 359 try: |
| 360 # Note that validate_since depends on a Last-Modified header; |
| 361 # this was put into the cached copy, and should have been |
| 362 # resurrected just above (response.headers = cache_data[1]). |
| 363 cptools.validate_since() |
| 364 except cherrypy.HTTPRedirect: |
| 365 x = sys.exc_info()[1] |
| 366 if x.status == 304: |
| 367 cherrypy._cache.tot_non_modified += 1 |
| 368 raise |
| 369 |
| 370 # serve it & get out from the request |
| 371 response.status = s |
| 372 response.body = b |
| 373 else: |
| 374 if debug: |
| 375 cherrypy.log('request is not cached', 'TOOLS.CACHING') |
| 376 return request.cached |
| 377 |
| 378 |
| 379 def tee_output(): |
| 380 """Tee response output to cache storage. Internal.""" |
| 381 # Used by CachingTool by attaching to request.hooks |
| 382 |
| 383 request = cherrypy.serving.request |
| 384 if 'no-store' in request.headers.values('Cache-Control'): |
| 385 return |
| 386 |
| 387 def tee(body): |
| 388 """Tee response.body into a list.""" |
| 389 if ('no-cache' in response.headers.values('Pragma') or |
| 390 'no-store' in response.headers.values('Cache-Control')): |
| 391 for chunk in body: |
| 392 yield chunk |
| 393 return |
| 394 |
| 395 output = [] |
| 396 for chunk in body: |
| 397 output.append(chunk) |
| 398 yield chunk |
| 399 |
| 400 # save the cache data |
| 401 body = ntob('').join(output) |
| 402 cherrypy._cache.put((response.status, response.headers or {}, |
| 403 body, response.time), len(body)) |
| 404 |
| 405 response = cherrypy.serving.response |
| 406 response.body = tee(response.body) |
| 407 |
| 408 |
| 409 def expires(secs=0, force=False, debug=False): |
| 410 """Tool for influencing cache mechanisms using the 'Expires' header. |
| 411 |
| 412 secs |
| 413 Must be either an int or a datetime.timedelta, and indicates the |
| 414 number of seconds between response.time and when the response should |
| 415 expire. The 'Expires' header will be set to response.time + secs. |
| 416 If secs is zero, the 'Expires' header is set one year in the past, and |
| 417 the following "cache prevention" headers are also set: |
| 418 |
| 419 * Pragma: no-cache |
| 420 * Cache-Control': no-cache, must-revalidate |
| 421 |
| 422 force |
| 423 If False, the following headers are checked: |
| 424 |
| 425 * Etag |
| 426 * Last-Modified |
| 427 * Age |
| 428 * Expires |
| 429 |
| 430 If any are already present, none of the above response headers are set. |
| 431 |
| 432 """ |
| 433 |
| 434 response = cherrypy.serving.response |
| 435 headers = response.headers |
| 436 |
| 437 cacheable = False |
| 438 if not force: |
| 439 # some header names that indicate that the response can be cached |
| 440 for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): |
| 441 if indicator in headers: |
| 442 cacheable = True |
| 443 break |
| 444 |
| 445 if not cacheable and not force: |
| 446 if debug: |
| 447 cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') |
| 448 else: |
| 449 if debug: |
| 450 cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') |
| 451 if isinstance(secs, datetime.timedelta): |
| 452 secs = (86400 * secs.days) + secs.seconds |
| 453 |
| 454 if secs == 0: |
| 455 if force or ("Pragma" not in headers): |
| 456 headers["Pragma"] = "no-cache" |
| 457 if cherrypy.serving.request.protocol >= (1, 1): |
| 458 if force or "Cache-Control" not in headers: |
| 459 headers["Cache-Control"] = "no-cache, must-revalidate" |
| 460 # Set an explicit Expires date in the past. |
| 461 expiry = httputil.HTTPDate(1169942400.0) |
| 462 else: |
| 463 expiry = httputil.HTTPDate(response.time + secs) |
| 464 if force or "Expires" not in headers: |
| 465 headers["Expires"] = expiry |
OLD | NEW |