Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: app.py

Issue 10448057: Add refresh support to the console page. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from __future__ import with_statement 5 from __future__ import with_statement
6 6
7 import datetime 7 import datetime
8 import json
8 import logging 9 import logging
9 import os 10 import os
10 import random 11 import random
11 import re 12 import re
12 import string 13 import string
13 import urllib 14 import urllib
14 15
15 from google.appengine.api import files, memcache, urlfetch 16 from google.appengine.api import files, memcache, urlfetch
16 from google.appengine.api.app_identity import get_application_id 17 from google.appengine.api.app_identity import get_application_id
17 from google.appengine.ext import blobstore, db, deferred 18 from google.appengine.ext import blobstore, db, deferred
(...skipping 12 matching lines...) Expand all
30 APP_NAME = get_application_id() 31 APP_NAME = get_application_id()
31 32
32 # Deadline for fetching URLs (in seconds). 33 # Deadline for fetching URLs (in seconds).
33 URLFETCH_DEADLINE = 60*5 # 5 mins 34 URLFETCH_DEADLINE = 60*5 # 5 mins
34 35
35 36
36 # Perform initial bootstrap for this module. 37 # Perform initial bootstrap for this module.
37 console_template = '' 38 console_template = ''
38 def bootstrap(): 39 def bootstrap():
39 global console_template 40 global console_template
40 with open('templates/console.html', 'r') as fh: 41 with open('templates/merger.html', 'r') as fh:
41 console_template = fh.read() 42 console_template = fh.read()
42 43
43 44
45 def get_pagedata_from_cache(localpath):
46 memcache_data = memcache.get(localpath)
47 if not memcache_data:
48 return None
49 logging.debug('content for %s found in memcache' % localpath)
50 return json.loads(memcache_data)
51
52
53 def put_pagedata_into_cache(localpath, page_data):
54 memcache_data = json.dumps(page_data)
55 if not memcache.set(key=localpath, value=memcache_data, time=2*60):
56 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % (
57 localpath))
58 return True
M-A Ruel 2012/05/29 18:46:33 why return something at all?
cmp 2012/05/29 19:38:03 Done.
59
60
44 # Assumes localpath is already unquoted. 61 # Assumes localpath is already unquoted.
M-A Ruel 2012/05/29 18:46:33 This should go in a docstring to explain what the
cmp 2012/05/29 19:38:03 Done.
45 def get_and_cache_page(localpath): 62 def get_and_cache_pagedata(localpath):
46 # E1101: 29,12:get_and_cache_page: Module 'google.appengine.api.memcache' has 63 page_data = get_pagedata_from_cache(localpath)
47 # no 'get' member 64 if page_data and not page_data.get('content_blob'):
48 # pylint: disable=E1101 65 return page_data
49 content = memcache.get(localpath)
50 if content is not None:
51 logging.debug('content for %s found in memcache' % localpath)
52 return content
53
54 page = Page.all().filter('localpath =', localpath).get() 66 page = Page.all().filter('localpath =', localpath).get()
55 if not page: 67 if not page:
56 logging.error('get_and_cache_page(\'%s\'): no matching localpath in ' 68 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '
57 'datastore' % localpath) 69 'datastore' % localpath)
58 return None 70 return {'content': None}
59 if page.content_blob is not None: 71 page_data = {
72 'body_class': page.body_class,
73 'offsite_base': page.offsite_base,
74 'title': page.title,
75 }
76 if page.content_blob:
60 # Get the blob. 77 # Get the blob.
78 logging.debug('content for %s found in blobstore' % localpath)
61 blob_reader = blobstore.BlobReader(page.content_blob) 79 blob_reader = blobstore.BlobReader(page.content_blob)
62 content = blob_reader.read().decode('utf-8', 'replace') 80 page_data['content_blob'] = True
63 logging.debug('content for %s found in blobstore' % localpath) 81 put_pagedata_into_cache(localpath, page_data)
82 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')
64 else: 83 else:
65 logging.debug('content for %s found in datastore' % localpath) 84 logging.debug('content for %s found in datastore' % localpath)
66 content = page.content 85 page_data['content'] = page.content
67 # E1101: 39,11:get_and_cache_page: Module 'google.appengine.api.memcache' 86 put_pagedata_into_cache(localpath, page_data)
68 # has no 'set' member 87 return page_data
69 # pylint: disable=E1101
70 if not memcache.set(key=localpath, value=content, time=2*60):
71 logging.error('get_and_cache_page(\'%s\'): memcache.set() failed' %
72 localpath)
73 return content
74 88
75 89
76 class ConsoleData(object): 90 class ConsoleData(object):
77 def __init__(self): 91 def __init__(self):
78 self.row_orderedkeys = [] 92 self.row_orderedkeys = []
79 self.row_data = {} 93 self.row_data = {}
80 94
81 # Retain order of observed masters. 95 # Retain order of observed masters.
82 self.masters = [] 96 self.masters = []
83 97
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 def Finish(self): 178 def Finish(self):
165 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True) 179 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)
166 # TODO(cmp): Look for row/master/categories that are unset. If they are 180 # TODO(cmp): Look for row/master/categories that are unset. If they are
167 # at the latest revisions, leave them unset. If they are at 181 # at the latest revisions, leave them unset. If they are at
168 # the earliest revisions, set them to ''. 182 # the earliest revisions, set them to ''.
169 183
170 184
171 # W0613:169,39:console_merger: Unused argument 'remoteurl' 185 # W0613:169,39:console_merger: Unused argument 'remoteurl'
172 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath' 186 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'
173 # pylint: disable=W0613 187 # pylint: disable=W0613
174 def console_merger(unquoted_localpath, remote_url, content=None): 188 def console_merger(unquoted_localpath, remote_url, page_data=None):
175 if content is None: 189 page_data = page_data or {}
176 return None
177 190
178 masters = [ 191 masters = [
179 'chromium.main', 192 'chromium.main',
180 'chromium.chromiumos', 193 'chromium.chromiumos',
181 'chromium.chrome', 194 'chromium.chrome',
182 'chromium.memory', 195 'chromium.memory',
183 ] 196 ]
184 mergedconsole = ConsoleData() 197 mergedconsole = ConsoleData()
185 merged_page = None 198 merged_page = None
186 merged_tag = None 199 merged_tag = None
187 fetch_timestamp = datetime.datetime.now() 200 fetch_timestamp = datetime.datetime.now()
188 for master in masters: 201 for master in masters:
189 master_content = get_and_cache_page('%s/console' % master) 202 page_data = get_and_cache_pagedata('%s/console' % master)
203 master_content = page_data['content']
190 if master_content is None: 204 if master_content is None:
191 continue 205 continue
192 master_content = master_content.encode('ascii', 'replace') 206 master_content = master_content.encode('ascii', 'replace')
193 this_page = BeautifulSoup(master_content) 207 this_page = BeautifulSoup(master_content)
194 this_tag = this_page.find('table', {'class': 'ConsoleData'}) 208 this_tag = this_page.find('table', {'class': 'ConsoleData'})
195 # The first console is special, we reuse all of the console page. 209 # The first console is special, we reuse all of the console page.
196 if not merged_page: 210 if not merged_page:
197 merged_page = this_page 211 merged_page = this_page
198 merged_tag = this_tag 212 merged_tag = this_tag
199 mergedconsole.SawMaster(master) 213 mergedconsole.SawMaster(master)
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
274 merged_content = re.sub( 288 merged_content = re.sub(
275 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) 289 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)
276 merged_content = re.sub( 290 merged_content = re.sub(
277 r'\<iframe\>\</iframe\>', 291 r'\<iframe\>\</iframe\>',
278 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>', 292 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',
279 merged_content) 293 merged_content)
280 294
281 # Update the merged console page. 295 # Update the merged console page.
282 merged_page = get_or_create_page('chromium/console', None, maxage=30) 296 merged_page = get_or_create_page('chromium/console', None, maxage=30)
283 logging.debug('console_merger: saving merged console') 297 logging.debug('console_merger: saving merged console')
284 save_page(merged_page, 'chromium/console', merged_content, 298 page_data['title'] = 'BuildBot: Chromium'
285 fetch_timestamp) 299 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'
286 return merged_content 300 page_data['body_class'] = 'interface'
301 page_data['content'] = merged_content
302 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)
303 return
287 304
288 305
289 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath' 306 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath'
290 # pylint: disable=W0613 307 # pylint: disable=W0613
291 def console_handler(unquoted_localpath, remoteurl, content=None): 308 def console_handler(unquoted_localpath, remoteurl, page_data=None):
M-A Ruel 2012/05/29 18:46:33 Use a leading underscore to silence the warning, e
cmp 2012/05/29 19:38:03 Done.
292 if content is None: 309 page_data = page_data or {}
293 return None 310 content = page_data.get('content')
294 # TODO(cmp): Fix the LKGR link. 311 if not content:
312 return page_data
295 313
296 # Decode content from utf-8 to unicode, replacing bad characters. 314 # Decode content from utf-8 to unicode, replacing bad characters.
297 content = content.decode('utf-8', 'replace') 315 content = content.decode('utf-8', 'replace')
298 316
299 # Scrub in sheriff file content to console. 317 # Scrub in sheriff file content to console.
300 sheriff_files = [ 318 sheriff_files = [
301 'sheriff', 319 'sheriff',
302 'sheriff_android', 320 'sheriff_android',
303 'sheriff_cr_cros_gardeners', 321 'sheriff_cr_cros_gardeners',
304 'sheriff_cros_mtv', 322 'sheriff_cros_mtv',
305 'sheriff_cros_nonmtv', 323 'sheriff_cros_nonmtv',
306 'sheriff_gpu', 324 'sheriff_gpu',
307 'sheriff_memory', 325 'sheriff_memory',
308 'sheriff_nacl', 326 'sheriff_nacl',
309 'sheriff_perf', 327 'sheriff_perf',
310 'sheriff_webkit', 328 'sheriff_webkit',
311 ] 329 ]
312 for sheriff_file in sheriff_files: 330 for sheriff_file in sheriff_files:
313 sheriff_content = get_and_cache_page('chromium/%s.js' % sheriff_file) 331 sheriff_page_data = get_and_cache_pagedata('chromium/%s.js' % sheriff_file)
332 sheriff_content = sheriff_page_data['content']
314 console_re = (r'<script src=\'http://chromium-build.appspot.com/' 333 console_re = (r'<script src=\'http://chromium-build.appspot.com/'
315 'p/chromium/%s.js\'></script>') 334 'p/chromium/%s.js\'></script>')
316 content = re.sub(console_re % sheriff_file, 335 content = re.sub(console_re % sheriff_file,
317 '<script>%s</script>' % sheriff_content, content) 336 '<script>%s</script>' % sheriff_content, content)
318 337
319 # Replace showBuildBox with direct links. 338 # Replace showBuildBox with direct links.
320 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox\(\"./(.+)\", event\);' 339 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox\(\"./(.+)\", event\);'
321 ' return false;\'', 340 ' return false;\'',
322 r"<a href='\1'", content) 341 r"<a href='\1'", content)
323 342
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 content = string.replace(content, 386 content = string.replace(content,
368 "'/json/builders/Linux%20x64/builds/-1?as_text=1';", 387 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",
369 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';") 388 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")
370 389
371 # Fix up a reference to http chromium-build in BarUrl(). 390 # Fix up a reference to http chromium-build in BarUrl().
372 content = string.replace(content, 391 content = string.replace(content,
373 "return 'http://chromium-build.appspot.com/p/'", 392 "return 'http://chromium-build.appspot.com/p/'",
374 "return 'https://chromium-build.appspot.com/p/'") 393 "return 'https://chromium-build.appspot.com/p/'")
375 394
376 # Encode content from unicode to utf-8. 395 # Encode content from unicode to utf-8.
377 content = content.encode('utf-8') 396 page_data['content'] = content.encode('utf-8')
378 return content 397
398 # Last tweaks to HTML, plus extracting metadata about the page itself.
399 page_data['offsite_base'] = remoteurl + '/../'
400
401 # Extract the title from the page.
402 md = re.search(
403 r'^.*<title>([^\<]+)</title>',
404 page_data['content'],
405 re.MULTILINE|re.DOTALL)
406 if md:
407 page_data['title'] = md.group(1)
408
409 # Remove the leading text up to the end of the opening body tag. While
410 # there, extract the body_class from the page.
411 md = re.search(
412 r'^.*<body class="(\w+)\">(.*)$',
413 page_data['content'],
414 re.MULTILINE|re.DOTALL)
415 if md:
M-A Ruel 2012/05/29 18:46:33 Are these expected to fail in the normal case? Bec
cmp 2012/05/29 19:38:03 Done.
416 page_data['body_class'] = md.group(1)
417 page_data['content'] = md.group(2)
418
419 # Remove the leading div and hr tags.
420 md = re.search(
421 r'^.*?<hr/>(.*)$',
422 page_data['content'],
423 re.MULTILINE|re.DOTALL)
424 if md:
425 page_data['content'] = md.group(1)
426
427 # Strip the trailing body and html tags.
428 md = re.search(
429 r'^(.*)</body>.*$',
430 page_data['content'],
431 re.MULTILINE|re.DOTALL)
432 if md:
433 page_data['content'] = md.group(1)
434
435 return page_data
379 436
380 437
381 def one_box_handler(unquoted_localpath, remoteurl, content=None): 438 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):
439 page_data = page_data or {}
440 content = page_data.get('content')
382 if content is None: 441 if content is None:
383 return None 442 return page_data
384 # Get the site name from the local path. 443 # Get the site name from the local path.
385 md = re.match('^([^\/]+)/.*$', unquoted_localpath) 444 md = re.match('^([^\/]+)/.*$', unquoted_localpath)
386 if not md: 445 if not md:
387 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site ' 446 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '
388 'from local path' % (unquoted_localpath, remoteurl, content)) 447 'from local path' % (
389 return content 448 unquoted_localpath, remoteurl, page_data))
449 return page_data
390 site = md.group(1) 450 site = md.group(1)
391 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site 451 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site
392 content = re.sub(r'waterfall', new_waterfall_url, content) 452 page_data['content'] = re.sub(
393 return content 453 r'waterfall',
454 new_waterfall_url,
455 page_data['content'])
456 return page_data
394 457
395 458
396 459
397 # List of URLs to fetch. 460 # List of URLs to fetch.
398 URLS = [ 461 URLS = [
399 # Console URLs. 462 # Console URLs.
400 { 463 {
401 'remoteurl': 'http://build.chromium.org/p/chromium/console', 464 'remoteurl': 'http://build.chromium.org/p/chromium/console',
402 'localpath': 'chromium.main/console', 465 'localpath': 'chromium.main/console',
403 'postfetch': console_handler, 466 'postfetch': console_handler,
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
637 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs) 700 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs)
638 except urlfetch.DownloadError: 701 except urlfetch.DownloadError:
639 logging.warn('urlfetch failed: %s' % url, exc_info=1) 702 logging.warn('urlfetch failed: %s' % url, exc_info=1)
640 return None 703 return None
641 704
642 705
643 class Page(db.Model): 706 class Page(db.Model):
644 fetch_timestamp = db.DateTimeProperty(required=True) 707 fetch_timestamp = db.DateTimeProperty(required=True)
645 localpath = db.StringProperty(required=True) 708 localpath = db.StringProperty(required=True)
646 content = db.TextProperty() 709 content = db.TextProperty()
710 title = db.StringProperty()
711 offsite_base = db.StringProperty()
712 body_class = db.StringProperty()
647 remoteurl = db.TextProperty() 713 remoteurl = db.TextProperty()
648 # Data updated separately, after creation. 714 # Data updated separately, after creation.
649 content_blob = blobstore.BlobReferenceProperty() 715 content_blob = blobstore.BlobReferenceProperty()
650 716
651 717
652 def write_blob(data, mime_type): 718 def write_blob(data, mime_type):
653 """Saves a Unicode string as a new blob, returns the blob's key.""" 719 """Saves a Unicode string as a new blob, returns the blob's key."""
654 file_name = files.blobstore.create(mime_type=mime_type) 720 file_name = files.blobstore.create(mime_type=mime_type)
655 data = data.encode('utf-8') 721 data = data.encode('utf-8')
656 with files.open(file_name, 'a') as blob_file: 722 with files.open(file_name, 'a') as blob_file:
657 blob_file.write(data) 723 blob_file.write(data)
658 files.finalize(file_name) 724 files.finalize(file_name)
659 return files.blobstore.get_blob_key(file_name) 725 return files.blobstore.get_blob_key(file_name)
660 726
661 727
662 def save_page(page, localpath, content, fetch_timestamp): 728 def save_page(page, localpath, fetch_timestamp, page_data):
729 body_class = page_data.get('body_class', '')
730 content = page_data.get('content')
731 offsite_base = page_data.get('offsite_base', '')
732 title = page_data.get('title', '')
733
663 content_blob_key = None 734 content_blob_key = None
664 try: 735 try:
665 content = content.decode('utf-8', 'replace') 736 content = content.decode('utf-8', 'replace')
666 except UnicodeEncodeError: 737 except UnicodeEncodeError:
667 logging.debug('save_page: content was already in unicode') 738 logging.debug('save_page: content was already in unicode')
668 logging.debug('save_page: content size is %d' % len(content)) 739 logging.debug('save_page: content size is %d' % len(content))
669 if len(content.encode('utf-8')) >= 1024*1024: 740 if len(content.encode('utf-8')) >= 1024*1024:
670 logging.debug('save_page: saving to blob') 741 logging.debug('save_page: saving to blob')
671 content_blob_key = write_blob(content, path_to_mime_type(localpath)) 742 content_blob_key = write_blob(content, path_to_mime_type(localpath))
672 content = None 743 content = None
673 def tx_page(page_key): 744 def tx_page(page_key):
674 page = Page.get(page_key) 745 page = Page.get(page_key)
675 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no 746 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no
676 # 'fetch_timestamp' member (but some types could not be inferred) 747 # 'fetch_timestamp' member (but some types could not be inferred)
677 # pylint: disable=E1103 748 # pylint: disable=E1103
678 if page.fetch_timestamp > fetch_timestamp: 749 if page.fetch_timestamp > fetch_timestamp:
679 return 750 return
680 page.content = content 751 page.content = content
681 page.content_blob = content_blob_key 752 page.content_blob = content_blob_key
682 page.fetch_timestamp = fetch_timestamp 753 page.fetch_timestamp = fetch_timestamp
754 # title, offsite_base, body_class can all be empty strings for some
755 # content. Where that's true, they're not used for displaying a console-
756 # like resource, and the content alone is returned to the web user.
757 page.title = title
758 page.offsite_base = offsite_base
759 page.body_class = body_class
683 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member 760 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member
684 # (but some types could not be inferred) 761 # (but some types could not be inferred)
685 # pylint: disable=E1103 762 # pylint: disable=E1103
686 page.put() 763 page.put()
687 db.run_in_transaction(tx_page, page.key()) 764 db.run_in_transaction(tx_page, page.key())
688 # E1101:232,11:fetch_page.tx_page: Module 'google.appengine.api.memcache' 765 page_data = {
M-A Ruel 2012/05/29 18:46:33 you could make a page.as_dict() member function, t
cmp 2012/05/29 19:38:03 page_data is not really a Page-as-dict. It's more
689 # has no 'set' member 766 'body_class': body_class,
690 # pylint: disable=E1101 767 'content': content,
691 if page.content_blob is None: 768 'offsite_base': offsite_base,
692 if memcache.set(key=localpath, value=page.content, time=60): 769 'title': title,
693 logging.debug('tx_page(page key="%s"): memcache.set() succeeded' % 770 }
694 page.key()) 771 if content_blob_key:
695 else: 772 page_data['content_blob'] = True
696 logging.error('tx_page(page key="%s"): memcache.set() failed' % 773 put_pagedata_into_cache(localpath, page_data)
697 page.key())
698 774
699 775
700 def get_or_create_page(localpath, remoteurl, maxage): 776 def get_or_create_page(localpath, remoteurl, maxage):
701 return Page.get_or_insert( 777 return Page.get_or_insert(
702 key_name=localpath, 778 key_name=localpath,
703 localpath=localpath, 779 localpath=localpath,
704 remoteurl=remoteurl, 780 remoteurl=remoteurl,
705 maxage=maxage, 781 maxage=maxage,
706 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24), 782 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),
707 content=None, 783 content=None,
708 content_blob=None) 784 content_blob=None)
709 785
710 786
711 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None): 787 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,
788 fetch_url=nonfatal_fetch_url):
712 """Fetches data about a set of pages.""" 789 """Fetches data about a set of pages."""
713 unquoted_localpath = urllib.unquote(localpath) 790 unquoted_localpath = urllib.unquote(localpath)
714 logging.debug('fetch_page("%s", "%s", "%s")' % ( 791 logging.debug('fetch_page("%s", "%s", "%s")' % (
715 unquoted_localpath, remoteurl, maxage)) 792 unquoted_localpath, remoteurl, maxage))
716 page = get_or_create_page(unquoted_localpath, remoteurl, maxage) 793 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)
717 794
718 # Check if our copy of the page is younger than maxage. If it is, we'll 795 # Check if our copy of the page is younger than maxage. If it is, we'll
719 # skip the fetch. 796 # skip the fetch.
720 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta( 797 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(
721 seconds=maxage) 798 seconds=maxage)
722 if (page.fetch_timestamp and 799 if (page.fetch_timestamp and
723 page.fetch_timestamp > oldest_acceptable_timestamp): 800 page.fetch_timestamp > oldest_acceptable_timestamp):
724 logging.debug('fetch_page: too recent, skipping') 801 logging.debug('fetch_page: too recent, skipping')
725 return 802 return
726 803
727 # Perform the actual page fetch. 804 # Perform the actual page fetch.
728 fetch_timestamp = datetime.datetime.now() 805 fetch_timestamp = datetime.datetime.now()
729 response = nonfatal_fetch_url(remoteurl) 806 response = fetch_url(remoteurl)
730 if not response: 807 if not response:
731 logging.warning('fetch_page: got empty response') 808 logging.warning('fetch_page: got empty response')
732 return 809 return
733 if response.status_code != 200: 810 if response.status_code != 200:
734 logging.warning('fetch_page: got non-empty response but code ' 811 logging.warning('fetch_page: got non-empty response but code '
735 '%d' % response.status_code) 812 '%d' % response.status_code)
736 return 813 return
737 814
738 # We have actual content. If there's one or more handlers, call them. 815 # We have actual content. If there's one or more handlers, call them.
739 content = response.content 816 page_data = {}
817 page_data['content'] = response.content
740 if postfetch: 818 if postfetch:
741 if not isinstance(postfetch, list): 819 if not isinstance(postfetch, list):
742 postfetch = [postfetch] 820 postfetch = [postfetch]
743 for handler in postfetch: 821 for handler in postfetch:
744 logging.debug('fetch_page: calling postfetch handler ' 822 logging.debug('fetch_page: calling postfetch handler '
745 '%s' % handler.__name__) 823 '%s' % handler.__name__)
746 content = handler(unquoted_localpath, remoteurl, content) 824 page_data = handler(unquoted_localpath, remoteurl, page_data)
747 825
748 # Save the returned content into the DB and caching layers. 826 # Save the returned content into the DB and caching layers.
749 logging.debug('fetch_page: saving page') 827 logging.debug('fetch_page: saving page')
750 save_page(page, unquoted_localpath, content, fetch_timestamp) 828 save_page(page, unquoted_localpath, fetch_timestamp, page_data)
751 if postsave: 829 if postsave:
752 if not isinstance(postsave, list): 830 if not isinstance(postsave, list):
753 postsave = [postsave] 831 postsave = [postsave]
754 for handler in postsave: 832 for handler in postsave:
755 logging.debug('fetch_page: calling postsave handler ' 833 logging.debug('fetch_page: calling postsave handler '
756 '%s' % handler.__name__) 834 '%s' % handler.__name__)
757 handler(unquoted_localpath, remoteurl, content) 835 handler(unquoted_localpath, remoteurl, page_data)
758 836
759 837
760 EXT_TO_MIME = { 838 EXT_TO_MIME = {
761 '.css': 'text/css', 839 '.css': 'text/css',
762 '.js': 'text/javascript', 840 '.js': 'text/javascript',
763 '.json': 'application/json', 841 '.json': 'application/json',
764 '.html': 'text/html', 842 '.html': 'text/html',
765 } 843 }
766 844
767 845
768 def path_to_mime_type(path): 846 def path_to_mime_type(path):
769 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html') 847 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')
770 848
771 849
772 def fetch_pages(): 850 def fetch_pages():
773 """Starts a background fetch operation for pages that need it.""" 851 """Starts a background fetch operation for pages that need it."""
774 logging.debug('fetch_pages()') 852 logging.debug('fetch_pages()')
775 for url in URLS: 853 for url in URLS:
776 deferred.defer(fetch_page, **url) 854 deferred.defer(fetch_page, **url)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698