app.py - Issue 10448057: Add refresh support to the console page.

Side by Side Diff: app.py

Issue 10448057: Add refresh support to the console page. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build

Patch Set: use reload instead of refresh Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from __future__ import with_statement	5 from __future__ import with_statement

6	6

7 import datetime	7 import datetime

	8 import json

8 import logging	9 import logging

9 import os	10 import os

10 import random	11 import random

11 import re	12 import re

12 import string	13 import string

13 import urllib	14 import urllib

14	15

15 from google.appengine.api import files, memcache, urlfetch	16 from google.appengine.api import files, memcache, urlfetch

16 from google.appengine.api.app_identity import get_application_id	17 from google.appengine.api.app_identity import get_application_id

17 from google.appengine.ext import blobstore, db, deferred	18 from google.appengine.ext import blobstore, db, deferred

(...skipping 12 matching lines...) Expand all Loading...
30 APP_NAME = get_application_id()	31 APP_NAME = get_application_id()

31	32

32 # Deadline for fetching URLs (in seconds).	33 # Deadline for fetching URLs (in seconds).

33 URLFETCH_DEADLINE = 60*5 # 5 mins	34 URLFETCH_DEADLINE = 60*5 # 5 mins

34	35

35	36

36 # Perform initial bootstrap for this module.	37 # Perform initial bootstrap for this module.

37 console_template = ''	38 console_template = ''

38 def bootstrap():	39 def bootstrap():

39 global console_template	40 global console_template

40 with open('templates/console.html', 'r') as fh:	41 with open('templates/merger.html', 'r') as fh:

41 console_template = fh.read()	42 console_template = fh.read()

42	43

43	44

44 # Assumes localpath is already unquoted.	45 def get_pagedata_from_cache(localpath):

45 def get_and_cache_page(localpath):	46 memcache_data = memcache.get(localpath)

46 # E1101: 29,12:get_and_cache_page: Module 'google.appengine.api.memcache' has	47 if not memcache_data:

47 # no 'get' member	48 return None

48 # pylint: disable=E1101	49 logging.debug('content for %s found in memcache' % localpath)

49 content = memcache.get(localpath)	50 return json.loads(memcache_data)

50 if content is not None:

51 logging.debug('content for %s found in memcache' % localpath)

52 return content

53	51

	52

	53 def put_pagedata_into_cache(localpath, page_data):

	54 memcache_data = json.dumps(page_data)

	55 if not memcache.set(key=localpath, value=memcache_data, time=2*60):

	56 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % (

	57 localpath))

	58

	59

	60 def get_and_cache_pagedata(localpath):

	61 """Return a page_data dict, optionally caching and looking up a blob.
	M-A Ruel 2012/05/29 20:03:49 Returns Returns cmp 2012/05/29 20:20:28 Done. Show quoted text On 2012/05/29 20:03:49, Marc-Antoine Ruel wrote: > Returns Done.
	62

	63 get_and_cache_pagedata takes a localpath which is used to fetch data

	64 from the cache. If the data is present and there's no content blob,

	65 then we have all of the data we need to return a page view to the user

	66 and we return early.

	67

	68 Otherwise, we need to fetch the page object and set up the page data

	69 for the page view. If the page has a blob associated with it, then we

	70 mark the page data as having a blob and cache it as-is without the blob.

	71 If there's no blob, we associate the content with the page data and

	72 cache that. This is so the next time get_and_cache_pagedata is called

	73 for either case, we'll get the same behavior (a page-lookup for blobful

	74 content and a page cache hit for blobless content).

	75

	76 Here we assume localpath is already unquoted.

	77 """

	78 page_data = get_pagedata_from_cache(localpath)

	79 if page_data and not page_data.get('content_blob'):

	80 return page_data

54 page = Page.all().filter('localpath =', localpath).get()	81 page = Page.all().filter('localpath =', localpath).get()

55 if not page:	82 if not page:

56 logging.error('get_and_cache_page(\'%s\'): no matching localpath in '	83 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '

57 'datastore' % localpath)	84 'datastore' % localpath)

58 return None	85 return {'content': None}

59 if page.content_blob is not None:	86 page_data = {

	87 'body_class': page.body_class,

	88 'offsite_base': page.offsite_base,

	89 'title': page.title,

	90 }

	91 if page.content_blob:

60 # Get the blob.	92 # Get the blob.

	93 logging.debug('content for %s found in blobstore' % localpath)

61 blob_reader = blobstore.BlobReader(page.content_blob)	94 blob_reader = blobstore.BlobReader(page.content_blob)

62 content = blob_reader.read().decode('utf-8', 'replace')	95 page_data['content_blob'] = True

63 logging.debug('content for %s found in blobstore' % localpath)	96 put_pagedata_into_cache(localpath, page_data)

	97 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')

64 else:	98 else:

65 logging.debug('content for %s found in datastore' % localpath)	99 logging.debug('content for %s found in datastore' % localpath)

66 content = page.content	100 page_data['content'] = page.content

67 # E1101: 39,11:get_and_cache_page: Module 'google.appengine.api.memcache'	101 put_pagedata_into_cache(localpath, page_data)

68 # has no 'set' member	102 return page_data

69 # pylint: disable=E1101

70 if not memcache.set(key=localpath, value=content, time=2*60):

71 logging.error('get_and_cache_page(\'%s\'): memcache.set() failed' %

72 localpath)

73 return content

74	103

75	104

76 class ConsoleData(object):	105 class ConsoleData(object):

77 def __init__(self):	106 def __init__(self):

78 self.row_orderedkeys = []	107 self.row_orderedkeys = []

79 self.row_data = {}	108 self.row_data = {}

80	109

81 # Retain order of observed masters.	110 # Retain order of observed masters.

82 self.masters = []	111 self.masters = []

83	112

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
164 def Finish(self):	193 def Finish(self):

165 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)	194 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)

166 # TODO(cmp): Look for row/master/categories that are unset. If they are	195 # TODO(cmp): Look for row/master/categories that are unset. If they are

167 # at the latest revisions, leave them unset. If they are at	196 # at the latest revisions, leave them unset. If they are at

168 # the earliest revisions, set them to ''.	197 # the earliest revisions, set them to ''.

169	198

170	199

171 # W0613:169,39:console_merger: Unused argument 'remoteurl'	200 # W0613:169,39:console_merger: Unused argument 'remoteurl'

172 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'	201 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'

173 # pylint: disable=W0613	202 # pylint: disable=W0613

174 def console_merger(unquoted_localpath, remote_url, content=None):	203 def console_merger(unquoted_localpath, remote_url, page_data=None):

175 if content is None:	204 page_data = page_data or {}

176 return None

177	205

178 masters = [	206 masters = [

179 'chromium.main',	207 'chromium.main',

180 'chromium.chromiumos',	208 'chromium.chromiumos',

181 'chromium.chrome',	209 'chromium.chrome',

182 'chromium.memory',	210 'chromium.memory',

183 ]	211 ]

184 mergedconsole = ConsoleData()	212 mergedconsole = ConsoleData()

185 merged_page = None	213 merged_page = None

186 merged_tag = None	214 merged_tag = None

187 fetch_timestamp = datetime.datetime.now()	215 fetch_timestamp = datetime.datetime.now()

188 for master in masters:	216 for master in masters:

189 master_content = get_and_cache_page('%s/console' % master)	217 page_data = get_and_cache_pagedata('%s/console' % master)

	218 master_content = page_data['content']

190 if master_content is None:	219 if master_content is None:

191 continue	220 continue

192 master_content = master_content.encode('ascii', 'replace')	221 master_content = master_content.encode('ascii', 'replace')

193 this_page = BeautifulSoup(master_content)	222 this_page = BeautifulSoup(master_content)

194 this_tag = this_page.find('table', {'class': 'ConsoleData'})	223 this_tag = this_page.find('table', {'class': 'ConsoleData'})

195 # The first console is special, we reuse all of the console page.	224 # The first console is special, we reuse all of the console page.

196 if not merged_page:	225 if not merged_page:

197 merged_page = this_page	226 merged_page = this_page

198 merged_tag = this_tag	227 merged_tag = this_tag

199 mergedconsole.SawMaster(master)	228 mergedconsole.SawMaster(master)

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
274 merged_content = re.sub(	303 merged_content = re.sub(

275 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)	304 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)

276 merged_content = re.sub(	305 merged_content = re.sub(

277 r'\<iframe\>\</iframe\>',	306 r'\<iframe\>\</iframe\>',

278 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',	307 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',

279 merged_content)	308 merged_content)

280	309

281 # Update the merged console page.	310 # Update the merged console page.

282 merged_page = get_or_create_page('chromium/console', None, maxage=30)	311 merged_page = get_or_create_page('chromium/console', None, maxage=30)

283 logging.debug('console_merger: saving merged console')	312 logging.debug('console_merger: saving merged console')

284 save_page(merged_page, 'chromium/console', merged_content,	313 page_data['title'] = 'BuildBot: Chromium'

285 fetch_timestamp)	314 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'

286 return merged_content	315 page_data['body_class'] = 'interface'

	316 page_data['content'] = merged_content

	317 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)

	318 return

287	319

288	320

289 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath'	321 def console_handler(_unquoted_localpath, remoteurl, page_data=None):

290 # pylint: disable=W0613	322 page_data = page_data or {}

291 def console_handler(unquoted_localpath, remoteurl, content=None):	323 content = page_data.get('content')

292 if content is None:	324 if not content:

293 return None	325 return page_data

294 # TODO(cmp): Fix the LKGR link.

295	326

296 # Decode content from utf-8 to unicode, replacing bad characters.	327 # Decode content from utf-8 to unicode, replacing bad characters.

297 content = content.decode('utf-8', 'replace')	328 content = content.decode('utf-8', 'replace')

298	329

299 # Scrub in sheriff file content to console.	330 # Scrub in sheriff file content to console.

300 sheriff_files = [	331 sheriff_files = [

301 'sheriff',	332 'sheriff',

302 'sheriff_android',	333 'sheriff_android',

303 'sheriff_cr_cros_gardeners',	334 'sheriff_cr_cros_gardeners',

304 'sheriff_cros_mtv',	335 'sheriff_cros_mtv',

305 'sheriff_cros_nonmtv',	336 'sheriff_cros_nonmtv',

306 'sheriff_gpu',	337 'sheriff_gpu',

307 'sheriff_memory',	338 'sheriff_memory',

308 'sheriff_nacl',	339 'sheriff_nacl',

309 'sheriff_perf',	340 'sheriff_perf',

310 'sheriff_webkit',	341 'sheriff_webkit',

311 ]	342 ]

312 for sheriff_file in sheriff_files:	343 for sheriff_file in sheriff_files:

313 sheriff_content = get_and_cache_page('chromium/%s.js' % sheriff_file)	344 sheriff_page_data = get_and_cache_pagedata('chromium/%s.js' % sheriff_file)

	345 sheriff_content = sheriff_page_data['content']

314 console_re = (r'<script src=\'http://chromium-build.appspot.com/'	346 console_re = (r'<script src=\'http://chromium-build.appspot.com/'

315 'p/chromium/%s.js\'></script>')	347 'p/chromium/%s.js\'></script>')

316 content = re.sub(console_re % sheriff_file,	348 content = re.sub(console_re % sheriff_file,

317 '<script>%s</script>' % sheriff_content, content)	349 '<script>%s</script>' % sheriff_content, content)

318	350

319 # Replace showBuildBox with direct links.	351 # Replace showBuildBox with direct links.

320 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox$\"./(.+)\", event$;'	352 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox$\"./(.+)\", event$;'

321 ' return false;\'',	353 ' return false;\'',

322 r"<a href='\1'", content)	354 r"<a href='\1'", content)

323	355

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
367 content = string.replace(content,	399 content = string.replace(content,

368 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",	400 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",

369 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")	401 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")

370	402

371 # Fix up a reference to http chromium-build in BarUrl().	403 # Fix up a reference to http chromium-build in BarUrl().

372 content = string.replace(content,	404 content = string.replace(content,

373 "return 'http://chromium-build.appspot.com/p/'",	405 "return 'http://chromium-build.appspot.com/p/'",

374 "return 'https://chromium-build.appspot.com/p/'")	406 "return 'https://chromium-build.appspot.com/p/'")

375	407

376 # Encode content from unicode to utf-8.	408 # Encode content from unicode to utf-8.

377 content = content.encode('utf-8')	409 page_data['content'] = content.encode('utf-8')

378 return content	410

	411 # Last tweaks to HTML, plus extracting metadata about the page itself.

	412 page_data['offsite_base'] = remoteurl + '/../'

	413

	414 # Extract the title from the page.

	415 md = re.search(

	416 r'^.*<title>([^\<]+)</title>',

	417 page_data['content'],

	418 re.MULTILINE\|re.DOTALL)

	419 if not md:

	420 raise Exception('failed to locate title in page')

	421 page_data['title'] = md.group(1)

	422

	423 # Remove the leading text up to the end of the opening body tag. While

	424 # there, extract the body_class from the page.

	425 md = re.search(

	426 r'^.<body class="(\w+)\">(.)$',

	427 page_data['content'],

	428 re.MULTILINE\|re.DOTALL)

	429 if not md:

	430 raise Exception('failed to locate leading text up to body tag')

	431 page_data['body_class'] = md.group(1)

	432 page_data['content'] = md.group(2)

	433

	434 # Remove the leading div and hr tags.

	435 md = re.search(

	436 r'^.?<hr/>(.)$',

	437 page_data['content'],

	438 re.MULTILINE\|re.DOTALL)

	439 if not md:

	440 raise Exception('failed to locate leading div and hr tags')

	441 page_data['content'] = md.group(1)

	442

	443 # Strip the trailing body and html tags.

	444 md = re.search(

	445 r'^(.)</body>.$',

	446 page_data['content'],

	447 re.MULTILINE\|re.DOTALL)

	448 if not md:

	449 raise Exception('failed to locate trailing body and html tags')

	450 page_data['content'] = md.group(1)

	451

	452 return page_data

379	453

380	454

381 def one_box_handler(unquoted_localpath, remoteurl, content=None):	455 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):

	456 page_data = page_data or {}

	457 content = page_data.get('content')

382 if content is None:	458 if content is None:

383 return None	459 return page_data

384 # Get the site name from the local path.	460 # Get the site name from the local path.

385 md = re.match('^([^\/]+)/.*$', unquoted_localpath)	461 md = re.match('^([^\/]+)/.*$', unquoted_localpath)

386 if not md:	462 if not md:

387 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '	463 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '

388 'from local path' % (unquoted_localpath, remoteurl, content))	464 'from local path' % (

389 return content	465 unquoted_localpath, remoteurl, page_data))

	466 return page_data

390 site = md.group(1)	467 site = md.group(1)

391 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site	468 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site

392 content = re.sub(r'waterfall', new_waterfall_url, content)	469 page_data['content'] = re.sub(

393 return content	470 r'waterfall',

	471 new_waterfall_url,

	472 page_data['content'])

	473 return page_data

394	474

395	475

396	476

397 # List of URLs to fetch.	477 # List of URLs to fetch.

398 URLS = [	478 URLS = [

399 # Console URLs.	479 # Console URLs.

400 {	480 {

401 'remoteurl': 'http://build.chromium.org/p/chromium/console',	481 'remoteurl': 'http://build.chromium.org/p/chromium/console',

402 'localpath': 'chromium.main/console',	482 'localpath': 'chromium.main/console',

403 'postfetch': console_handler,	483 'postfetch': console_handler,

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
637 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)	717 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)

638 except urlfetch.DownloadError:	718 except urlfetch.DownloadError:

639 logging.warn('urlfetch failed: %s' % url, exc_info=1)	719 logging.warn('urlfetch failed: %s' % url, exc_info=1)

640 return None	720 return None

641	721

642	722

643 class Page(db.Model):	723 class Page(db.Model):

644 fetch_timestamp = db.DateTimeProperty(required=True)	724 fetch_timestamp = db.DateTimeProperty(required=True)

645 localpath = db.StringProperty(required=True)	725 localpath = db.StringProperty(required=True)

646 content = db.TextProperty()	726 content = db.TextProperty()

	727 title = db.StringProperty()

	728 offsite_base = db.StringProperty()

	729 body_class = db.StringProperty()

647 remoteurl = db.TextProperty()	730 remoteurl = db.TextProperty()

648 # Data updated separately, after creation.	731 # Data updated separately, after creation.

649 content_blob = blobstore.BlobReferenceProperty()	732 content_blob = blobstore.BlobReferenceProperty()

650	733

651	734

652 def write_blob(data, mime_type):	735 def write_blob(data, mime_type):

653 """Saves a Unicode string as a new blob, returns the blob's key."""	736 """Saves a Unicode string as a new blob, returns the blob's key."""

654 file_name = files.blobstore.create(mime_type=mime_type)	737 file_name = files.blobstore.create(mime_type=mime_type)

655 data = data.encode('utf-8')	738 data = data.encode('utf-8')

656 with files.open(file_name, 'a') as blob_file:	739 with files.open(file_name, 'a') as blob_file:

657 blob_file.write(data)	740 blob_file.write(data)

658 files.finalize(file_name)	741 files.finalize(file_name)

659 return files.blobstore.get_blob_key(file_name)	742 return files.blobstore.get_blob_key(file_name)

660	743

661	744

662 def save_page(page, localpath, content, fetch_timestamp):	745 def save_page(page, localpath, fetch_timestamp, page_data):

	746 body_class = page_data.get('body_class', '')

	747 content = page_data.get('content')

	748 offsite_base = page_data.get('offsite_base', '')

	749 title = page_data.get('title', '')

	750

663 content_blob_key = None	751 content_blob_key = None

664 try:	752 try:

665 content = content.decode('utf-8', 'replace')	753 content = content.decode('utf-8', 'replace')

666 except UnicodeEncodeError:	754 except UnicodeEncodeError:

667 logging.debug('save_page: content was already in unicode')	755 logging.debug('save_page: content was already in unicode')

668 logging.debug('save_page: content size is %d' % len(content))	756 logging.debug('save_page: content size is %d' % len(content))

669 if len(content.encode('utf-8')) >= 1024*1024:	757 if len(content.encode('utf-8')) >= 1024*1024:

670 logging.debug('save_page: saving to blob')	758 logging.debug('save_page: saving to blob')

671 content_blob_key = write_blob(content, path_to_mime_type(localpath))	759 content_blob_key = write_blob(content, path_to_mime_type(localpath))

672 content = None	760 content = None

673 def tx_page(page_key):	761 def tx_page(page_key):

674 page = Page.get(page_key)	762 page = Page.get(page_key)

675 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no	763 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no

676 # 'fetch_timestamp' member (but some types could not be inferred)	764 # 'fetch_timestamp' member (but some types could not be inferred)

677 # pylint: disable=E1103	765 # pylint: disable=E1103

678 if page.fetch_timestamp > fetch_timestamp:	766 if page.fetch_timestamp > fetch_timestamp:

679 return	767 return

680 page.content = content	768 page.content = content

681 page.content_blob = content_blob_key	769 page.content_blob = content_blob_key

682 page.fetch_timestamp = fetch_timestamp	770 page.fetch_timestamp = fetch_timestamp

	771 # title, offsite_base, body_class can all be empty strings for some

	772 # content. Where that's true, they're not used for displaying a console-

	773 # like resource, and the content alone is returned to the web user.

	774 page.title = title

	775 page.offsite_base = offsite_base

	776 page.body_class = body_class

683 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member	777 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member

684 # (but some types could not be inferred)	778 # (but some types could not be inferred)

685 # pylint: disable=E1103	779 # pylint: disable=E1103

686 page.put()	780 page.put()

687 db.run_in_transaction(tx_page, page.key())	781 db.run_in_transaction(tx_page, page.key())

688 # E1101:232,11:fetch_page.tx_page: Module 'google.appengine.api.memcache'	782 page_data = {
	M-A Ruel 2012/05/29 20:03:49 I agree the function shouldn't be named as_dict() Show quoted text On 2012/05/29 19:38:03, cmp wrote: > On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > > you could make a page.as_dict() member function, then you wouldn't need the 4 > > new members. > > page_data is not really a Page-as-dict. It's more like a > what-does-the-console-template-need-from-this-Page-object. But I see your > point, and maybe a method to get that would help. In other places, I create > such a dict without having a Page object handy (like in fetch_page or in one of > the caching methods). If I moved these there, I think I'd still end up having > some kind of abstraction leak due to these other places. Do you have more ideas > about that? I agree the function shouldn't be named as_dict() but something more specific. I also agree it's not a perfect mapping but it's also overkill to define an embedded class, and it's not really easy to do in python for serialized data. It's used 3 times so it's still maybe worth doing one; app.py:88 app.py:782 handler.py:39 I don't mind, it's up to you. cmp 2012/05/29 20:20:28 My preference given we're both on the fence is to Show quoted text On 2012/05/29 20:03:49, Marc-Antoine Ruel wrote: > I agree the function shouldn't be named as_dict() but something more specific. I > also agree it's not a perfect mapping but it's also overkill to define an > embedded class, and it's not really easy to do in python for serialized data. > > It's used 3 times so it's still maybe worth doing one; > app.py:88 > app.py:782 > handler.py:39 > > I don't mind, it's up to you. My preference given we're both on the fence is to leave it as-is for now and revisit in a future CL as needed then.
689 # has no 'set' member	783 'body_class': body_class,

690 # pylint: disable=E1101	784 'content': content,

691 if page.content_blob is None:	785 'offsite_base': offsite_base,

692 if memcache.set(key=localpath, value=page.content, time=60):	786 'title': title,

693 logging.debug('tx_page(page key="%s"): memcache.set() succeeded' %	787 }

694 page.key())	788 if content_blob_key:

695 else:	789 page_data['content_blob'] = True

696 logging.error('tx_page(page key="%s"): memcache.set() failed' %	790 put_pagedata_into_cache(localpath, page_data)

697 page.key())

698	791

699	792

700 def get_or_create_page(localpath, remoteurl, maxage):	793 def get_or_create_page(localpath, remoteurl, maxage):

701 return Page.get_or_insert(	794 return Page.get_or_insert(

702 key_name=localpath,	795 key_name=localpath,

703 localpath=localpath,	796 localpath=localpath,

704 remoteurl=remoteurl,	797 remoteurl=remoteurl,

705 maxage=maxage,	798 maxage=maxage,

706 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),	799 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),

707 content=None,	800 content=None,

708 content_blob=None)	801 content_blob=None)

709	802

710	803

711 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None):	804 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,

	805 fetch_url=nonfatal_fetch_url):

712 """Fetches data about a set of pages."""	806 """Fetches data about a set of pages."""

713 unquoted_localpath = urllib.unquote(localpath)	807 unquoted_localpath = urllib.unquote(localpath)

714 logging.debug('fetch_page("%s", "%s", "%s")' % (	808 logging.debug('fetch_page("%s", "%s", "%s")' % (

715 unquoted_localpath, remoteurl, maxage))	809 unquoted_localpath, remoteurl, maxage))

716 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)	810 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)

717	811

718 # Check if our copy of the page is younger than maxage. If it is, we'll	812 # Check if our copy of the page is younger than maxage. If it is, we'll

719 # skip the fetch.	813 # skip the fetch.

720 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(	814 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(

721 seconds=maxage)	815 seconds=maxage)

722 if (page.fetch_timestamp and	816 if (page.fetch_timestamp and

723 page.fetch_timestamp > oldest_acceptable_timestamp):	817 page.fetch_timestamp > oldest_acceptable_timestamp):

724 logging.debug('fetch_page: too recent, skipping')	818 logging.debug('fetch_page: too recent, skipping')

725 return	819 return

726	820

727 # Perform the actual page fetch.	821 # Perform the actual page fetch.

728 fetch_timestamp = datetime.datetime.now()	822 fetch_timestamp = datetime.datetime.now()

729 response = nonfatal_fetch_url(remoteurl)	823 response = fetch_url(remoteurl)

730 if not response:	824 if not response:

731 logging.warning('fetch_page: got empty response')	825 logging.warning('fetch_page: got empty response')

732 return	826 return

733 if response.status_code != 200:	827 if response.status_code != 200:

734 logging.warning('fetch_page: got non-empty response but code '	828 logging.warning('fetch_page: got non-empty response but code '

735 '%d' % response.status_code)	829 '%d' % response.status_code)

736 return	830 return

737	831

738 # We have actual content. If there's one or more handlers, call them.	832 # We have actual content. If there's one or more handlers, call them.

739 content = response.content	833 page_data = {}

	834 page_data['content'] = response.content

740 if postfetch:	835 if postfetch:

741 if not isinstance(postfetch, list):	836 if not isinstance(postfetch, list):

742 postfetch = [postfetch]	837 postfetch = [postfetch]

743 for handler in postfetch:	838 for handler in postfetch:

744 logging.debug('fetch_page: calling postfetch handler '	839 logging.debug('fetch_page: calling postfetch handler '

745 '%s' % handler.__name__)	840 '%s' % handler.__name__)

746 content = handler(unquoted_localpath, remoteurl, content)	841 page_data = handler(unquoted_localpath, remoteurl, page_data)

747	842

748 # Save the returned content into the DB and caching layers.	843 # Save the returned content into the DB and caching layers.

749 logging.debug('fetch_page: saving page')	844 logging.debug('fetch_page: saving page')

750 save_page(page, unquoted_localpath, content, fetch_timestamp)	845 save_page(page, unquoted_localpath, fetch_timestamp, page_data)

751 if postsave:	846 if postsave:

752 if not isinstance(postsave, list):	847 if not isinstance(postsave, list):

753 postsave = [postsave]	848 postsave = [postsave]

754 for handler in postsave:	849 for handler in postsave:

755 logging.debug('fetch_page: calling postsave handler '	850 logging.debug('fetch_page: calling postsave handler '

756 '%s' % handler.__name__)	851 '%s' % handler.__name__)

757 handler(unquoted_localpath, remoteurl, content)	852 handler(unquoted_localpath, remoteurl, page_data)

758	853

759	854

760 EXT_TO_MIME = {	855 EXT_TO_MIME = {

761 '.css': 'text/css',	856 '.css': 'text/css',

762 '.js': 'text/javascript',	857 '.js': 'text/javascript',

763 '.json': 'application/json',	858 '.json': 'application/json',

764 '.html': 'text/html',	859 '.html': 'text/html',

765 }	860 }

766	861

767	862

768 def path_to_mime_type(path):	863 def path_to_mime_type(path):

769 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')	864 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')

770	865

771	866

772 def fetch_pages():	867 def fetch_pages():

773 """Starts a background fetch operation for pages that need it."""	868 """Starts a background fetch operation for pages that need it."""

774 logging.debug('fetch_pages()')	869 logging.debug('fetch_pages()')

775 for url in URLS:	870 for url in URLS:

776 deferred.defer(fetch_page, **url)	871 deferred.defer(fetch_page, **url)

OLD	NEW

« PRESUBMIT.py ('K') | « PRESUBMIT.py ('k') | app_test.py » ('j') | no next file with comments »