Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(716)

Side by Side Diff: app.py

Issue 10448057: Add refresh support to the console page. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build
Patch Set: use reload instead of refresh Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« PRESUBMIT.py ('K') | « PRESUBMIT.py ('k') | app_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from __future__ import with_statement 5 from __future__ import with_statement
6 6
7 import datetime 7 import datetime
8 import json
8 import logging 9 import logging
9 import os 10 import os
10 import random 11 import random
11 import re 12 import re
12 import string 13 import string
13 import urllib 14 import urllib
14 15
15 from google.appengine.api import files, memcache, urlfetch 16 from google.appengine.api import files, memcache, urlfetch
16 from google.appengine.api.app_identity import get_application_id 17 from google.appengine.api.app_identity import get_application_id
17 from google.appengine.ext import blobstore, db, deferred 18 from google.appengine.ext import blobstore, db, deferred
(...skipping 12 matching lines...) Expand all
30 APP_NAME = get_application_id() 31 APP_NAME = get_application_id()
31 32
32 # Deadline for fetching URLs (in seconds). 33 # Deadline for fetching URLs (in seconds).
33 URLFETCH_DEADLINE = 60*5 # 5 mins 34 URLFETCH_DEADLINE = 60*5 # 5 mins
34 35
35 36
36 # Perform initial bootstrap for this module. 37 # Perform initial bootstrap for this module.
37 console_template = '' 38 console_template = ''
38 def bootstrap(): 39 def bootstrap():
39 global console_template 40 global console_template
40 with open('templates/console.html', 'r') as fh: 41 with open('templates/merger.html', 'r') as fh:
41 console_template = fh.read() 42 console_template = fh.read()
42 43
43 44
44 # Assumes localpath is already unquoted. 45 def get_pagedata_from_cache(localpath):
45 def get_and_cache_page(localpath): 46 memcache_data = memcache.get(localpath)
46 # E1101: 29,12:get_and_cache_page: Module 'google.appengine.api.memcache' has 47 if not memcache_data:
47 # no 'get' member 48 return None
48 # pylint: disable=E1101 49 logging.debug('content for %s found in memcache' % localpath)
49 content = memcache.get(localpath) 50 return json.loads(memcache_data)
50 if content is not None:
51 logging.debug('content for %s found in memcache' % localpath)
52 return content
53 51
52
53 def put_pagedata_into_cache(localpath, page_data):
54 memcache_data = json.dumps(page_data)
55 if not memcache.set(key=localpath, value=memcache_data, time=2*60):
56 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % (
57 localpath))
58
59
60 def get_and_cache_pagedata(localpath):
61 """Return a page_data dict, optionally caching and looking up a blob.
M-A Ruel 2012/05/29 20:03:49 Returns
cmp 2012/05/29 20:20:28 Done.
62
63 get_and_cache_pagedata takes a localpath which is used to fetch data
64 from the cache. If the data is present and there's no content blob,
65 then we have all of the data we need to return a page view to the user
66 and we return early.
67
68 Otherwise, we need to fetch the page object and set up the page data
69 for the page view. If the page has a blob associated with it, then we
70 mark the page data as having a blob and cache it as-is without the blob.
71 If there's no blob, we associate the content with the page data and
72 cache that. This is so the next time get_and_cache_pagedata is called
73 for either case, we'll get the same behavior (a page-lookup for blobful
74 content and a page cache hit for blobless content).
75
76 Here we assume localpath is already unquoted.
77 """
78 page_data = get_pagedata_from_cache(localpath)
79 if page_data and not page_data.get('content_blob'):
80 return page_data
54 page = Page.all().filter('localpath =', localpath).get() 81 page = Page.all().filter('localpath =', localpath).get()
55 if not page: 82 if not page:
56 logging.error('get_and_cache_page(\'%s\'): no matching localpath in ' 83 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '
57 'datastore' % localpath) 84 'datastore' % localpath)
58 return None 85 return {'content': None}
59 if page.content_blob is not None: 86 page_data = {
87 'body_class': page.body_class,
88 'offsite_base': page.offsite_base,
89 'title': page.title,
90 }
91 if page.content_blob:
60 # Get the blob. 92 # Get the blob.
93 logging.debug('content for %s found in blobstore' % localpath)
61 blob_reader = blobstore.BlobReader(page.content_blob) 94 blob_reader = blobstore.BlobReader(page.content_blob)
62 content = blob_reader.read().decode('utf-8', 'replace') 95 page_data['content_blob'] = True
63 logging.debug('content for %s found in blobstore' % localpath) 96 put_pagedata_into_cache(localpath, page_data)
97 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')
64 else: 98 else:
65 logging.debug('content for %s found in datastore' % localpath) 99 logging.debug('content for %s found in datastore' % localpath)
66 content = page.content 100 page_data['content'] = page.content
67 # E1101: 39,11:get_and_cache_page: Module 'google.appengine.api.memcache' 101 put_pagedata_into_cache(localpath, page_data)
68 # has no 'set' member 102 return page_data
69 # pylint: disable=E1101
70 if not memcache.set(key=localpath, value=content, time=2*60):
71 logging.error('get_and_cache_page(\'%s\'): memcache.set() failed' %
72 localpath)
73 return content
74 103
75 104
76 class ConsoleData(object): 105 class ConsoleData(object):
77 def __init__(self): 106 def __init__(self):
78 self.row_orderedkeys = [] 107 self.row_orderedkeys = []
79 self.row_data = {} 108 self.row_data = {}
80 109
81 # Retain order of observed masters. 110 # Retain order of observed masters.
82 self.masters = [] 111 self.masters = []
83 112
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 def Finish(self): 193 def Finish(self):
165 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True) 194 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)
166 # TODO(cmp): Look for row/master/categories that are unset. If they are 195 # TODO(cmp): Look for row/master/categories that are unset. If they are
167 # at the latest revisions, leave them unset. If they are at 196 # at the latest revisions, leave them unset. If they are at
168 # the earliest revisions, set them to ''. 197 # the earliest revisions, set them to ''.
169 198
170 199
171 # W0613:169,39:console_merger: Unused argument 'remoteurl' 200 # W0613:169,39:console_merger: Unused argument 'remoteurl'
172 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath' 201 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'
173 # pylint: disable=W0613 202 # pylint: disable=W0613
174 def console_merger(unquoted_localpath, remote_url, content=None): 203 def console_merger(unquoted_localpath, remote_url, page_data=None):
175 if content is None: 204 page_data = page_data or {}
176 return None
177 205
178 masters = [ 206 masters = [
179 'chromium.main', 207 'chromium.main',
180 'chromium.chromiumos', 208 'chromium.chromiumos',
181 'chromium.chrome', 209 'chromium.chrome',
182 'chromium.memory', 210 'chromium.memory',
183 ] 211 ]
184 mergedconsole = ConsoleData() 212 mergedconsole = ConsoleData()
185 merged_page = None 213 merged_page = None
186 merged_tag = None 214 merged_tag = None
187 fetch_timestamp = datetime.datetime.now() 215 fetch_timestamp = datetime.datetime.now()
188 for master in masters: 216 for master in masters:
189 master_content = get_and_cache_page('%s/console' % master) 217 page_data = get_and_cache_pagedata('%s/console' % master)
218 master_content = page_data['content']
190 if master_content is None: 219 if master_content is None:
191 continue 220 continue
192 master_content = master_content.encode('ascii', 'replace') 221 master_content = master_content.encode('ascii', 'replace')
193 this_page = BeautifulSoup(master_content) 222 this_page = BeautifulSoup(master_content)
194 this_tag = this_page.find('table', {'class': 'ConsoleData'}) 223 this_tag = this_page.find('table', {'class': 'ConsoleData'})
195 # The first console is special, we reuse all of the console page. 224 # The first console is special, we reuse all of the console page.
196 if not merged_page: 225 if not merged_page:
197 merged_page = this_page 226 merged_page = this_page
198 merged_tag = this_tag 227 merged_tag = this_tag
199 mergedconsole.SawMaster(master) 228 mergedconsole.SawMaster(master)
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
274 merged_content = re.sub( 303 merged_content = re.sub(
275 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) 304 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)
276 merged_content = re.sub( 305 merged_content = re.sub(
277 r'\<iframe\>\</iframe\>', 306 r'\<iframe\>\</iframe\>',
278 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>', 307 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',
279 merged_content) 308 merged_content)
280 309
281 # Update the merged console page. 310 # Update the merged console page.
282 merged_page = get_or_create_page('chromium/console', None, maxage=30) 311 merged_page = get_or_create_page('chromium/console', None, maxage=30)
283 logging.debug('console_merger: saving merged console') 312 logging.debug('console_merger: saving merged console')
284 save_page(merged_page, 'chromium/console', merged_content, 313 page_data['title'] = 'BuildBot: Chromium'
285 fetch_timestamp) 314 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'
286 return merged_content 315 page_data['body_class'] = 'interface'
316 page_data['content'] = merged_content
317 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)
318 return
287 319
288 320
289 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath' 321 def console_handler(_unquoted_localpath, remoteurl, page_data=None):
290 # pylint: disable=W0613 322 page_data = page_data or {}
291 def console_handler(unquoted_localpath, remoteurl, content=None): 323 content = page_data.get('content')
292 if content is None: 324 if not content:
293 return None 325 return page_data
294 # TODO(cmp): Fix the LKGR link.
295 326
296 # Decode content from utf-8 to unicode, replacing bad characters. 327 # Decode content from utf-8 to unicode, replacing bad characters.
297 content = content.decode('utf-8', 'replace') 328 content = content.decode('utf-8', 'replace')
298 329
299 # Scrub in sheriff file content to console. 330 # Scrub in sheriff file content to console.
300 sheriff_files = [ 331 sheriff_files = [
301 'sheriff', 332 'sheriff',
302 'sheriff_android', 333 'sheriff_android',
303 'sheriff_cr_cros_gardeners', 334 'sheriff_cr_cros_gardeners',
304 'sheriff_cros_mtv', 335 'sheriff_cros_mtv',
305 'sheriff_cros_nonmtv', 336 'sheriff_cros_nonmtv',
306 'sheriff_gpu', 337 'sheriff_gpu',
307 'sheriff_memory', 338 'sheriff_memory',
308 'sheriff_nacl', 339 'sheriff_nacl',
309 'sheriff_perf', 340 'sheriff_perf',
310 'sheriff_webkit', 341 'sheriff_webkit',
311 ] 342 ]
312 for sheriff_file in sheriff_files: 343 for sheriff_file in sheriff_files:
313 sheriff_content = get_and_cache_page('chromium/%s.js' % sheriff_file) 344 sheriff_page_data = get_and_cache_pagedata('chromium/%s.js' % sheriff_file)
345 sheriff_content = sheriff_page_data['content']
314 console_re = (r'<script src=\'http://chromium-build.appspot.com/' 346 console_re = (r'<script src=\'http://chromium-build.appspot.com/'
315 'p/chromium/%s.js\'></script>') 347 'p/chromium/%s.js\'></script>')
316 content = re.sub(console_re % sheriff_file, 348 content = re.sub(console_re % sheriff_file,
317 '<script>%s</script>' % sheriff_content, content) 349 '<script>%s</script>' % sheriff_content, content)
318 350
319 # Replace showBuildBox with direct links. 351 # Replace showBuildBox with direct links.
320 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox\(\"./(.+)\", event\);' 352 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox\(\"./(.+)\", event\);'
321 ' return false;\'', 353 ' return false;\'',
322 r"<a href='\1'", content) 354 r"<a href='\1'", content)
323 355
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 content = string.replace(content, 399 content = string.replace(content,
368 "'/json/builders/Linux%20x64/builds/-1?as_text=1';", 400 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",
369 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';") 401 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")
370 402
371 # Fix up a reference to http chromium-build in BarUrl(). 403 # Fix up a reference to http chromium-build in BarUrl().
372 content = string.replace(content, 404 content = string.replace(content,
373 "return 'http://chromium-build.appspot.com/p/'", 405 "return 'http://chromium-build.appspot.com/p/'",
374 "return 'https://chromium-build.appspot.com/p/'") 406 "return 'https://chromium-build.appspot.com/p/'")
375 407
376 # Encode content from unicode to utf-8. 408 # Encode content from unicode to utf-8.
377 content = content.encode('utf-8') 409 page_data['content'] = content.encode('utf-8')
378 return content 410
411 # Last tweaks to HTML, plus extracting metadata about the page itself.
412 page_data['offsite_base'] = remoteurl + '/../'
413
414 # Extract the title from the page.
415 md = re.search(
416 r'^.*<title>([^\<]+)</title>',
417 page_data['content'],
418 re.MULTILINE|re.DOTALL)
419 if not md:
420 raise Exception('failed to locate title in page')
421 page_data['title'] = md.group(1)
422
423 # Remove the leading text up to the end of the opening body tag. While
424 # there, extract the body_class from the page.
425 md = re.search(
426 r'^.*<body class="(\w+)\">(.*)$',
427 page_data['content'],
428 re.MULTILINE|re.DOTALL)
429 if not md:
430 raise Exception('failed to locate leading text up to body tag')
431 page_data['body_class'] = md.group(1)
432 page_data['content'] = md.group(2)
433
434 # Remove the leading div and hr tags.
435 md = re.search(
436 r'^.*?<hr/>(.*)$',
437 page_data['content'],
438 re.MULTILINE|re.DOTALL)
439 if not md:
440 raise Exception('failed to locate leading div and hr tags')
441 page_data['content'] = md.group(1)
442
443 # Strip the trailing body and html tags.
444 md = re.search(
445 r'^(.*)</body>.*$',
446 page_data['content'],
447 re.MULTILINE|re.DOTALL)
448 if not md:
449 raise Exception('failed to locate trailing body and html tags')
450 page_data['content'] = md.group(1)
451
452 return page_data
379 453
380 454
381 def one_box_handler(unquoted_localpath, remoteurl, content=None): 455 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):
456 page_data = page_data or {}
457 content = page_data.get('content')
382 if content is None: 458 if content is None:
383 return None 459 return page_data
384 # Get the site name from the local path. 460 # Get the site name from the local path.
385 md = re.match('^([^\/]+)/.*$', unquoted_localpath) 461 md = re.match('^([^\/]+)/.*$', unquoted_localpath)
386 if not md: 462 if not md:
387 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site ' 463 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '
388 'from local path' % (unquoted_localpath, remoteurl, content)) 464 'from local path' % (
389 return content 465 unquoted_localpath, remoteurl, page_data))
466 return page_data
390 site = md.group(1) 467 site = md.group(1)
391 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site 468 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site
392 content = re.sub(r'waterfall', new_waterfall_url, content) 469 page_data['content'] = re.sub(
393 return content 470 r'waterfall',
471 new_waterfall_url,
472 page_data['content'])
473 return page_data
394 474
395 475
396 476
397 # List of URLs to fetch. 477 # List of URLs to fetch.
398 URLS = [ 478 URLS = [
399 # Console URLs. 479 # Console URLs.
400 { 480 {
401 'remoteurl': 'http://build.chromium.org/p/chromium/console', 481 'remoteurl': 'http://build.chromium.org/p/chromium/console',
402 'localpath': 'chromium.main/console', 482 'localpath': 'chromium.main/console',
403 'postfetch': console_handler, 483 'postfetch': console_handler,
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
637 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs) 717 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs)
638 except urlfetch.DownloadError: 718 except urlfetch.DownloadError:
639 logging.warn('urlfetch failed: %s' % url, exc_info=1) 719 logging.warn('urlfetch failed: %s' % url, exc_info=1)
640 return None 720 return None
641 721
642 722
643 class Page(db.Model): 723 class Page(db.Model):
644 fetch_timestamp = db.DateTimeProperty(required=True) 724 fetch_timestamp = db.DateTimeProperty(required=True)
645 localpath = db.StringProperty(required=True) 725 localpath = db.StringProperty(required=True)
646 content = db.TextProperty() 726 content = db.TextProperty()
727 title = db.StringProperty()
728 offsite_base = db.StringProperty()
729 body_class = db.StringProperty()
647 remoteurl = db.TextProperty() 730 remoteurl = db.TextProperty()
648 # Data updated separately, after creation. 731 # Data updated separately, after creation.
649 content_blob = blobstore.BlobReferenceProperty() 732 content_blob = blobstore.BlobReferenceProperty()
650 733
651 734
652 def write_blob(data, mime_type): 735 def write_blob(data, mime_type):
653 """Saves a Unicode string as a new blob, returns the blob's key.""" 736 """Saves a Unicode string as a new blob, returns the blob's key."""
654 file_name = files.blobstore.create(mime_type=mime_type) 737 file_name = files.blobstore.create(mime_type=mime_type)
655 data = data.encode('utf-8') 738 data = data.encode('utf-8')
656 with files.open(file_name, 'a') as blob_file: 739 with files.open(file_name, 'a') as blob_file:
657 blob_file.write(data) 740 blob_file.write(data)
658 files.finalize(file_name) 741 files.finalize(file_name)
659 return files.blobstore.get_blob_key(file_name) 742 return files.blobstore.get_blob_key(file_name)
660 743
661 744
662 def save_page(page, localpath, content, fetch_timestamp): 745 def save_page(page, localpath, fetch_timestamp, page_data):
746 body_class = page_data.get('body_class', '')
747 content = page_data.get('content')
748 offsite_base = page_data.get('offsite_base', '')
749 title = page_data.get('title', '')
750
663 content_blob_key = None 751 content_blob_key = None
664 try: 752 try:
665 content = content.decode('utf-8', 'replace') 753 content = content.decode('utf-8', 'replace')
666 except UnicodeEncodeError: 754 except UnicodeEncodeError:
667 logging.debug('save_page: content was already in unicode') 755 logging.debug('save_page: content was already in unicode')
668 logging.debug('save_page: content size is %d' % len(content)) 756 logging.debug('save_page: content size is %d' % len(content))
669 if len(content.encode('utf-8')) >= 1024*1024: 757 if len(content.encode('utf-8')) >= 1024*1024:
670 logging.debug('save_page: saving to blob') 758 logging.debug('save_page: saving to blob')
671 content_blob_key = write_blob(content, path_to_mime_type(localpath)) 759 content_blob_key = write_blob(content, path_to_mime_type(localpath))
672 content = None 760 content = None
673 def tx_page(page_key): 761 def tx_page(page_key):
674 page = Page.get(page_key) 762 page = Page.get(page_key)
675 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no 763 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no
676 # 'fetch_timestamp' member (but some types could not be inferred) 764 # 'fetch_timestamp' member (but some types could not be inferred)
677 # pylint: disable=E1103 765 # pylint: disable=E1103
678 if page.fetch_timestamp > fetch_timestamp: 766 if page.fetch_timestamp > fetch_timestamp:
679 return 767 return
680 page.content = content 768 page.content = content
681 page.content_blob = content_blob_key 769 page.content_blob = content_blob_key
682 page.fetch_timestamp = fetch_timestamp 770 page.fetch_timestamp = fetch_timestamp
771 # title, offsite_base, body_class can all be empty strings for some
772 # content. Where that's true, they're not used for displaying a console-
773 # like resource, and the content alone is returned to the web user.
774 page.title = title
775 page.offsite_base = offsite_base
776 page.body_class = body_class
683 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member 777 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member
684 # (but some types could not be inferred) 778 # (but some types could not be inferred)
685 # pylint: disable=E1103 779 # pylint: disable=E1103
686 page.put() 780 page.put()
687 db.run_in_transaction(tx_page, page.key()) 781 db.run_in_transaction(tx_page, page.key())
688 # E1101:232,11:fetch_page.tx_page: Module 'google.appengine.api.memcache' 782 page_data = {
M-A Ruel 2012/05/29 20:03:49 I agree the function shouldn't be named as_dict()
cmp 2012/05/29 20:20:28 My preference given we're both on the fence is to
689 # has no 'set' member 783 'body_class': body_class,
690 # pylint: disable=E1101 784 'content': content,
691 if page.content_blob is None: 785 'offsite_base': offsite_base,
692 if memcache.set(key=localpath, value=page.content, time=60): 786 'title': title,
693 logging.debug('tx_page(page key="%s"): memcache.set() succeeded' % 787 }
694 page.key()) 788 if content_blob_key:
695 else: 789 page_data['content_blob'] = True
696 logging.error('tx_page(page key="%s"): memcache.set() failed' % 790 put_pagedata_into_cache(localpath, page_data)
697 page.key())
698 791
699 792
700 def get_or_create_page(localpath, remoteurl, maxage): 793 def get_or_create_page(localpath, remoteurl, maxage):
701 return Page.get_or_insert( 794 return Page.get_or_insert(
702 key_name=localpath, 795 key_name=localpath,
703 localpath=localpath, 796 localpath=localpath,
704 remoteurl=remoteurl, 797 remoteurl=remoteurl,
705 maxage=maxage, 798 maxage=maxage,
706 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24), 799 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),
707 content=None, 800 content=None,
708 content_blob=None) 801 content_blob=None)
709 802
710 803
711 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None): 804 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,
805 fetch_url=nonfatal_fetch_url):
712 """Fetches data about a set of pages.""" 806 """Fetches data about a set of pages."""
713 unquoted_localpath = urllib.unquote(localpath) 807 unquoted_localpath = urllib.unquote(localpath)
714 logging.debug('fetch_page("%s", "%s", "%s")' % ( 808 logging.debug('fetch_page("%s", "%s", "%s")' % (
715 unquoted_localpath, remoteurl, maxage)) 809 unquoted_localpath, remoteurl, maxage))
716 page = get_or_create_page(unquoted_localpath, remoteurl, maxage) 810 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)
717 811
718 # Check if our copy of the page is younger than maxage. If it is, we'll 812 # Check if our copy of the page is younger than maxage. If it is, we'll
719 # skip the fetch. 813 # skip the fetch.
720 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta( 814 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(
721 seconds=maxage) 815 seconds=maxage)
722 if (page.fetch_timestamp and 816 if (page.fetch_timestamp and
723 page.fetch_timestamp > oldest_acceptable_timestamp): 817 page.fetch_timestamp > oldest_acceptable_timestamp):
724 logging.debug('fetch_page: too recent, skipping') 818 logging.debug('fetch_page: too recent, skipping')
725 return 819 return
726 820
727 # Perform the actual page fetch. 821 # Perform the actual page fetch.
728 fetch_timestamp = datetime.datetime.now() 822 fetch_timestamp = datetime.datetime.now()
729 response = nonfatal_fetch_url(remoteurl) 823 response = fetch_url(remoteurl)
730 if not response: 824 if not response:
731 logging.warning('fetch_page: got empty response') 825 logging.warning('fetch_page: got empty response')
732 return 826 return
733 if response.status_code != 200: 827 if response.status_code != 200:
734 logging.warning('fetch_page: got non-empty response but code ' 828 logging.warning('fetch_page: got non-empty response but code '
735 '%d' % response.status_code) 829 '%d' % response.status_code)
736 return 830 return
737 831
738 # We have actual content. If there's one or more handlers, call them. 832 # We have actual content. If there's one or more handlers, call them.
739 content = response.content 833 page_data = {}
834 page_data['content'] = response.content
740 if postfetch: 835 if postfetch:
741 if not isinstance(postfetch, list): 836 if not isinstance(postfetch, list):
742 postfetch = [postfetch] 837 postfetch = [postfetch]
743 for handler in postfetch: 838 for handler in postfetch:
744 logging.debug('fetch_page: calling postfetch handler ' 839 logging.debug('fetch_page: calling postfetch handler '
745 '%s' % handler.__name__) 840 '%s' % handler.__name__)
746 content = handler(unquoted_localpath, remoteurl, content) 841 page_data = handler(unquoted_localpath, remoteurl, page_data)
747 842
748 # Save the returned content into the DB and caching layers. 843 # Save the returned content into the DB and caching layers.
749 logging.debug('fetch_page: saving page') 844 logging.debug('fetch_page: saving page')
750 save_page(page, unquoted_localpath, content, fetch_timestamp) 845 save_page(page, unquoted_localpath, fetch_timestamp, page_data)
751 if postsave: 846 if postsave:
752 if not isinstance(postsave, list): 847 if not isinstance(postsave, list):
753 postsave = [postsave] 848 postsave = [postsave]
754 for handler in postsave: 849 for handler in postsave:
755 logging.debug('fetch_page: calling postsave handler ' 850 logging.debug('fetch_page: calling postsave handler '
756 '%s' % handler.__name__) 851 '%s' % handler.__name__)
757 handler(unquoted_localpath, remoteurl, content) 852 handler(unquoted_localpath, remoteurl, page_data)
758 853
759 854
760 EXT_TO_MIME = { 855 EXT_TO_MIME = {
761 '.css': 'text/css', 856 '.css': 'text/css',
762 '.js': 'text/javascript', 857 '.js': 'text/javascript',
763 '.json': 'application/json', 858 '.json': 'application/json',
764 '.html': 'text/html', 859 '.html': 'text/html',
765 } 860 }
766 861
767 862
768 def path_to_mime_type(path): 863 def path_to_mime_type(path):
769 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html') 864 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')
770 865
771 866
772 def fetch_pages(): 867 def fetch_pages():
773 """Starts a background fetch operation for pages that need it.""" 868 """Starts a background fetch operation for pages that need it."""
774 logging.debug('fetch_pages()') 869 logging.debug('fetch_pages()')
775 for url in URLS: 870 for url in URLS:
776 deferred.defer(fetch_page, **url) 871 deferred.defer(fetch_page, **url)
OLDNEW
« PRESUBMIT.py ('K') | « PRESUBMIT.py ('k') | app_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698