app.py - Issue 10448057: Add refresh support to the console page.

Side by Side Diff: app.py

Issue 10448057: Add refresh support to the console page. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build

Patch Set: Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from __future__ import with_statement	5 from __future__ import with_statement

6	6

7 import datetime	7 import datetime

	8 import json

8 import logging	9 import logging

9 import os	10 import os

10 import random	11 import random

11 import re	12 import re

12 import string	13 import string

13 import urllib	14 import urllib

14	15

15 from google.appengine.api import files, memcache, urlfetch	16 from google.appengine.api import files, memcache, urlfetch

16 from google.appengine.api.app_identity import get_application_id	17 from google.appengine.api.app_identity import get_application_id

17 from google.appengine.ext import blobstore, db, deferred	18 from google.appengine.ext import blobstore, db, deferred

(...skipping 12 matching lines...) Expand all Loading...
30 APP_NAME = get_application_id()	31 APP_NAME = get_application_id()

31	32

32 # Deadline for fetching URLs (in seconds).	33 # Deadline for fetching URLs (in seconds).

33 URLFETCH_DEADLINE = 60*5 # 5 mins	34 URLFETCH_DEADLINE = 60*5 # 5 mins

34	35

35	36

36 # Perform initial bootstrap for this module.	37 # Perform initial bootstrap for this module.

37 console_template = ''	38 console_template = ''

38 def bootstrap():	39 def bootstrap():

39 global console_template	40 global console_template

40 with open('templates/console.html', 'r') as fh:	41 with open('templates/merger.html', 'r') as fh:

41 console_template = fh.read()	42 console_template = fh.read()

42	43

43	44

	45 def get_pagedata_from_cache(localpath):

	46 memcache_data = memcache.get(localpath)

	47 if not memcache_data:

	48 return None

	49 logging.debug('content for %s found in memcache' % localpath)

	50 return json.loads(memcache_data)

	51

	52

	53 def put_pagedata_into_cache(localpath, page_data):

	54 memcache_data = json.dumps(page_data)

	55 if not memcache.set(key=localpath, value=memcache_data, time=2*60):

	56 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % (

	57 localpath))

	58 return True
	M-A Ruel 2012/05/29 18:46:33 why return something at all? why return something at all? cmp 2012/05/29 19:38:03 Done. Show quoted text On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > why return something at all? Done.
	59

	60

44 # Assumes localpath is already unquoted.	61 # Assumes localpath is already unquoted.
	M-A Ruel 2012/05/29 18:46:33 This should go in a docstring to explain what the This should go in a docstring to explain what the function does and why it's looking up content_blob? cmp 2012/05/29 19:38:03 Done. Show quoted text On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > This should go in a docstring to explain what the function does and why it's > looking up content_blob? Done.
45 def get_and_cache_page(localpath):	62 def get_and_cache_pagedata(localpath):

46 # E1101: 29,12:get_and_cache_page: Module 'google.appengine.api.memcache' has	63 page_data = get_pagedata_from_cache(localpath)

47 # no 'get' member	64 if page_data and not page_data.get('content_blob'):

48 # pylint: disable=E1101	65 return page_data

49 content = memcache.get(localpath)

50 if content is not None:

51 logging.debug('content for %s found in memcache' % localpath)

52 return content

53

54 page = Page.all().filter('localpath =', localpath).get()	66 page = Page.all().filter('localpath =', localpath).get()

55 if not page:	67 if not page:

56 logging.error('get_and_cache_page(\'%s\'): no matching localpath in '	68 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '

57 'datastore' % localpath)	69 'datastore' % localpath)

58 return None	70 return {'content': None}

59 if page.content_blob is not None:	71 page_data = {

	72 'body_class': page.body_class,

	73 'offsite_base': page.offsite_base,

	74 'title': page.title,

	75 }

	76 if page.content_blob:

60 # Get the blob.	77 # Get the blob.

	78 logging.debug('content for %s found in blobstore' % localpath)

61 blob_reader = blobstore.BlobReader(page.content_blob)	79 blob_reader = blobstore.BlobReader(page.content_blob)

62 content = blob_reader.read().decode('utf-8', 'replace')	80 page_data['content_blob'] = True

63 logging.debug('content for %s found in blobstore' % localpath)	81 put_pagedata_into_cache(localpath, page_data)

	82 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')

64 else:	83 else:

65 logging.debug('content for %s found in datastore' % localpath)	84 logging.debug('content for %s found in datastore' % localpath)

66 content = page.content	85 page_data['content'] = page.content

67 # E1101: 39,11:get_and_cache_page: Module 'google.appengine.api.memcache'	86 put_pagedata_into_cache(localpath, page_data)

68 # has no 'set' member	87 return page_data

69 # pylint: disable=E1101

70 if not memcache.set(key=localpath, value=content, time=2*60):

71 logging.error('get_and_cache_page(\'%s\'): memcache.set() failed' %

72 localpath)

73 return content

74	88

75	89

76 class ConsoleData(object):	90 class ConsoleData(object):

77 def __init__(self):	91 def __init__(self):

78 self.row_orderedkeys = []	92 self.row_orderedkeys = []

79 self.row_data = {}	93 self.row_data = {}

80	94

81 # Retain order of observed masters.	95 # Retain order of observed masters.

82 self.masters = []	96 self.masters = []

83	97

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
164 def Finish(self):	178 def Finish(self):

165 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)	179 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)

166 # TODO(cmp): Look for row/master/categories that are unset. If they are	180 # TODO(cmp): Look for row/master/categories that are unset. If they are

167 # at the latest revisions, leave them unset. If they are at	181 # at the latest revisions, leave them unset. If they are at

168 # the earliest revisions, set them to ''.	182 # the earliest revisions, set them to ''.

169	183

170	184

171 # W0613:169,39:console_merger: Unused argument 'remoteurl'	185 # W0613:169,39:console_merger: Unused argument 'remoteurl'

172 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'	186 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'

173 # pylint: disable=W0613	187 # pylint: disable=W0613

174 def console_merger(unquoted_localpath, remote_url, content=None):	188 def console_merger(unquoted_localpath, remote_url, page_data=None):

175 if content is None:	189 page_data = page_data or {}

176 return None

177	190

178 masters = [	191 masters = [

179 'chromium.main',	192 'chromium.main',

180 'chromium.chromiumos',	193 'chromium.chromiumos',

181 'chromium.chrome',	194 'chromium.chrome',

182 'chromium.memory',	195 'chromium.memory',

183 ]	196 ]

184 mergedconsole = ConsoleData()	197 mergedconsole = ConsoleData()

185 merged_page = None	198 merged_page = None

186 merged_tag = None	199 merged_tag = None

187 fetch_timestamp = datetime.datetime.now()	200 fetch_timestamp = datetime.datetime.now()

188 for master in masters:	201 for master in masters:

189 master_content = get_and_cache_page('%s/console' % master)	202 page_data = get_and_cache_pagedata('%s/console' % master)

	203 master_content = page_data['content']

190 if master_content is None:	204 if master_content is None:

191 continue	205 continue

192 master_content = master_content.encode('ascii', 'replace')	206 master_content = master_content.encode('ascii', 'replace')

193 this_page = BeautifulSoup(master_content)	207 this_page = BeautifulSoup(master_content)

194 this_tag = this_page.find('table', {'class': 'ConsoleData'})	208 this_tag = this_page.find('table', {'class': 'ConsoleData'})

195 # The first console is special, we reuse all of the console page.	209 # The first console is special, we reuse all of the console page.

196 if not merged_page:	210 if not merged_page:

197 merged_page = this_page	211 merged_page = this_page

198 merged_tag = this_tag	212 merged_tag = this_tag

199 mergedconsole.SawMaster(master)	213 mergedconsole.SawMaster(master)

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
274 merged_content = re.sub(	288 merged_content = re.sub(

275 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)	289 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)

276 merged_content = re.sub(	290 merged_content = re.sub(

277 r'\<iframe\>\</iframe\>',	291 r'\<iframe\>\</iframe\>',

278 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',	292 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',

279 merged_content)	293 merged_content)

280	294

281 # Update the merged console page.	295 # Update the merged console page.

282 merged_page = get_or_create_page('chromium/console', None, maxage=30)	296 merged_page = get_or_create_page('chromium/console', None, maxage=30)

283 logging.debug('console_merger: saving merged console')	297 logging.debug('console_merger: saving merged console')

284 save_page(merged_page, 'chromium/console', merged_content,	298 page_data['title'] = 'BuildBot: Chromium'

285 fetch_timestamp)	299 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'

286 return merged_content	300 page_data['body_class'] = 'interface'

	301 page_data['content'] = merged_content

	302 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)

	303 return

287	304

288	305

289 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath'	306 # W0613:284,20:console_handler: Unused argument 'unquoted_localpath'

290 # pylint: disable=W0613	307 # pylint: disable=W0613

291 def console_handler(unquoted_localpath, remoteurl, content=None):	308 def console_handler(unquoted_localpath, remoteurl, page_data=None):
	M-A Ruel 2012/05/29 18:46:33 Use a leading underscore to silence the warning, e Use a leading underscore to silence the warning, e.g. _unquoted_localpath. cmp 2012/05/29 19:38:03 Done. Show quoted text On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > Use a leading underscore to silence the warning, e.g. _unquoted_localpath. Done.
292 if content is None:	309 page_data = page_data or {}

293 return None	310 content = page_data.get('content')

294 # TODO(cmp): Fix the LKGR link.	311 if not content:

	312 return page_data

295	313

296 # Decode content from utf-8 to unicode, replacing bad characters.	314 # Decode content from utf-8 to unicode, replacing bad characters.

297 content = content.decode('utf-8', 'replace')	315 content = content.decode('utf-8', 'replace')

298	316

299 # Scrub in sheriff file content to console.	317 # Scrub in sheriff file content to console.

300 sheriff_files = [	318 sheriff_files = [

301 'sheriff',	319 'sheriff',

302 'sheriff_android',	320 'sheriff_android',

303 'sheriff_cr_cros_gardeners',	321 'sheriff_cr_cros_gardeners',

304 'sheriff_cros_mtv',	322 'sheriff_cros_mtv',

305 'sheriff_cros_nonmtv',	323 'sheriff_cros_nonmtv',

306 'sheriff_gpu',	324 'sheriff_gpu',

307 'sheriff_memory',	325 'sheriff_memory',

308 'sheriff_nacl',	326 'sheriff_nacl',

309 'sheriff_perf',	327 'sheriff_perf',

310 'sheriff_webkit',	328 'sheriff_webkit',

311 ]	329 ]

312 for sheriff_file in sheriff_files:	330 for sheriff_file in sheriff_files:

313 sheriff_content = get_and_cache_page('chromium/%s.js' % sheriff_file)	331 sheriff_page_data = get_and_cache_pagedata('chromium/%s.js' % sheriff_file)

	332 sheriff_content = sheriff_page_data['content']

314 console_re = (r'<script src=\'http://chromium-build.appspot.com/'	333 console_re = (r'<script src=\'http://chromium-build.appspot.com/'

315 'p/chromium/%s.js\'></script>')	334 'p/chromium/%s.js\'></script>')

316 content = re.sub(console_re % sheriff_file,	335 content = re.sub(console_re % sheriff_file,

317 '<script>%s</script>' % sheriff_content, content)	336 '<script>%s</script>' % sheriff_content, content)

318	337

319 # Replace showBuildBox with direct links.	338 # Replace showBuildBox with direct links.

320 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox$\"./(.+)\", event$;'	339 content = re.sub(r'<a href=\'#\' onclick=\'showBuildBox$\"./(.+)\", event$;'

321 ' return false;\'',	340 ' return false;\'',

322 r"<a href='\1'", content)	341 r"<a href='\1'", content)

323	342

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
367 content = string.replace(content,	386 content = string.replace(content,

368 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",	387 "'/json/builders/Linux%20x64/builds/-1?as_text=1';",

369 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")	388 "'/json/builders/Linux%20x64/builds/-1/as_text=1.json';")

370	389

371 # Fix up a reference to http chromium-build in BarUrl().	390 # Fix up a reference to http chromium-build in BarUrl().

372 content = string.replace(content,	391 content = string.replace(content,

373 "return 'http://chromium-build.appspot.com/p/'",	392 "return 'http://chromium-build.appspot.com/p/'",

374 "return 'https://chromium-build.appspot.com/p/'")	393 "return 'https://chromium-build.appspot.com/p/'")

375	394

376 # Encode content from unicode to utf-8.	395 # Encode content from unicode to utf-8.

377 content = content.encode('utf-8')	396 page_data['content'] = content.encode('utf-8')

378 return content	397

	398 # Last tweaks to HTML, plus extracting metadata about the page itself.

	399 page_data['offsite_base'] = remoteurl + '/../'

	400

	401 # Extract the title from the page.

	402 md = re.search(

	403 r'^.*<title>([^\<]+)</title>',

	404 page_data['content'],

	405 re.MULTILINE\|re.DOTALL)

	406 if md:

	407 page_data['title'] = md.group(1)

	408

	409 # Remove the leading text up to the end of the opening body tag. While

	410 # there, extract the body_class from the page.

	411 md = re.search(

	412 r'^.<body class="(\w+)\">(.)$',

	413 page_data['content'],

	414 re.MULTILINE\|re.DOTALL)

	415 if md:
	M-A Ruel 2012/05/29 18:46:33 Are these expected to fail in the normal case? Bec Are these expected to fail in the normal case? Because otherwise it'd probably be better to fail explicitly than failing silently. cmp 2012/05/29 19:38:03 Done. Show quoted text On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > Are these expected to fail in the normal case? Because otherwise it'd probably > be better to fail explicitly than failing silently. Done.
	416 page_data['body_class'] = md.group(1)

	417 page_data['content'] = md.group(2)

	418

	419 # Remove the leading div and hr tags.

	420 md = re.search(

	421 r'^.?<hr/>(.)$',

	422 page_data['content'],

	423 re.MULTILINE\|re.DOTALL)

	424 if md:

	425 page_data['content'] = md.group(1)

	426

	427 # Strip the trailing body and html tags.

	428 md = re.search(

	429 r'^(.)</body>.$',

	430 page_data['content'],

	431 re.MULTILINE\|re.DOTALL)

	432 if md:

	433 page_data['content'] = md.group(1)

	434

	435 return page_data

379	436

380	437

381 def one_box_handler(unquoted_localpath, remoteurl, content=None):	438 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):

	439 page_data = page_data or {}

	440 content = page_data.get('content')

382 if content is None:	441 if content is None:

383 return None	442 return page_data

384 # Get the site name from the local path.	443 # Get the site name from the local path.

385 md = re.match('^([^\/]+)/.*$', unquoted_localpath)	444 md = re.match('^([^\/]+)/.*$', unquoted_localpath)

386 if not md:	445 if not md:

387 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '	446 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '

388 'from local path' % (unquoted_localpath, remoteurl, content))	447 'from local path' % (

389 return content	448 unquoted_localpath, remoteurl, page_data))

	449 return page_data

390 site = md.group(1)	450 site = md.group(1)

391 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site	451 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site

392 content = re.sub(r'waterfall', new_waterfall_url, content)	452 page_data['content'] = re.sub(

393 return content	453 r'waterfall',

	454 new_waterfall_url,

	455 page_data['content'])

	456 return page_data

394	457

395	458

396	459

397 # List of URLs to fetch.	460 # List of URLs to fetch.

398 URLS = [	461 URLS = [

399 # Console URLs.	462 # Console URLs.

400 {	463 {

401 'remoteurl': 'http://build.chromium.org/p/chromium/console',	464 'remoteurl': 'http://build.chromium.org/p/chromium/console',

402 'localpath': 'chromium.main/console',	465 'localpath': 'chromium.main/console',

403 'postfetch': console_handler,	466 'postfetch': console_handler,

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
637 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)	700 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)

638 except urlfetch.DownloadError:	701 except urlfetch.DownloadError:

639 logging.warn('urlfetch failed: %s' % url, exc_info=1)	702 logging.warn('urlfetch failed: %s' % url, exc_info=1)

640 return None	703 return None

641	704

642	705

643 class Page(db.Model):	706 class Page(db.Model):

644 fetch_timestamp = db.DateTimeProperty(required=True)	707 fetch_timestamp = db.DateTimeProperty(required=True)

645 localpath = db.StringProperty(required=True)	708 localpath = db.StringProperty(required=True)

646 content = db.TextProperty()	709 content = db.TextProperty()

	710 title = db.StringProperty()

	711 offsite_base = db.StringProperty()

	712 body_class = db.StringProperty()

647 remoteurl = db.TextProperty()	713 remoteurl = db.TextProperty()

648 # Data updated separately, after creation.	714 # Data updated separately, after creation.

649 content_blob = blobstore.BlobReferenceProperty()	715 content_blob = blobstore.BlobReferenceProperty()

650	716

651	717

652 def write_blob(data, mime_type):	718 def write_blob(data, mime_type):

653 """Saves a Unicode string as a new blob, returns the blob's key."""	719 """Saves a Unicode string as a new blob, returns the blob's key."""

654 file_name = files.blobstore.create(mime_type=mime_type)	720 file_name = files.blobstore.create(mime_type=mime_type)

655 data = data.encode('utf-8')	721 data = data.encode('utf-8')

656 with files.open(file_name, 'a') as blob_file:	722 with files.open(file_name, 'a') as blob_file:

657 blob_file.write(data)	723 blob_file.write(data)

658 files.finalize(file_name)	724 files.finalize(file_name)

659 return files.blobstore.get_blob_key(file_name)	725 return files.blobstore.get_blob_key(file_name)

660	726

661	727

662 def save_page(page, localpath, content, fetch_timestamp):	728 def save_page(page, localpath, fetch_timestamp, page_data):

	729 body_class = page_data.get('body_class', '')

	730 content = page_data.get('content')

	731 offsite_base = page_data.get('offsite_base', '')

	732 title = page_data.get('title', '')

	733

663 content_blob_key = None	734 content_blob_key = None

664 try:	735 try:

665 content = content.decode('utf-8', 'replace')	736 content = content.decode('utf-8', 'replace')

666 except UnicodeEncodeError:	737 except UnicodeEncodeError:

667 logging.debug('save_page: content was already in unicode')	738 logging.debug('save_page: content was already in unicode')

668 logging.debug('save_page: content size is %d' % len(content))	739 logging.debug('save_page: content size is %d' % len(content))

669 if len(content.encode('utf-8')) >= 1024*1024:	740 if len(content.encode('utf-8')) >= 1024*1024:

670 logging.debug('save_page: saving to blob')	741 logging.debug('save_page: saving to blob')

671 content_blob_key = write_blob(content, path_to_mime_type(localpath))	742 content_blob_key = write_blob(content, path_to_mime_type(localpath))

672 content = None	743 content = None

673 def tx_page(page_key):	744 def tx_page(page_key):

674 page = Page.get(page_key)	745 page = Page.get(page_key)

675 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no	746 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no

676 # 'fetch_timestamp' member (but some types could not be inferred)	747 # 'fetch_timestamp' member (but some types could not be inferred)

677 # pylint: disable=E1103	748 # pylint: disable=E1103

678 if page.fetch_timestamp > fetch_timestamp:	749 if page.fetch_timestamp > fetch_timestamp:

679 return	750 return

680 page.content = content	751 page.content = content

681 page.content_blob = content_blob_key	752 page.content_blob = content_blob_key

682 page.fetch_timestamp = fetch_timestamp	753 page.fetch_timestamp = fetch_timestamp

	754 # title, offsite_base, body_class can all be empty strings for some

	755 # content. Where that's true, they're not used for displaying a console-

	756 # like resource, and the content alone is returned to the web user.

	757 page.title = title

	758 page.offsite_base = offsite_base

	759 page.body_class = body_class

683 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member	760 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member

684 # (but some types could not be inferred)	761 # (but some types could not be inferred)

685 # pylint: disable=E1103	762 # pylint: disable=E1103

686 page.put()	763 page.put()

687 db.run_in_transaction(tx_page, page.key())	764 db.run_in_transaction(tx_page, page.key())

688 # E1101:232,11:fetch_page.tx_page: Module 'google.appengine.api.memcache'	765 page_data = {
	M-A Ruel 2012/05/29 18:46:33 you could make a page.as_dict() member function, t you could make a page.as_dict() member function, then you wouldn't need the 4 new members. cmp 2012/05/29 19:38:03 page_data is not really a Page-as-dict. It's more Show quoted text On 2012/05/29 18:46:33, Marc-Antoine Ruel wrote: > you could make a page.as_dict() member function, then you wouldn't need the 4 > new members. page_data is not really a Page-as-dict. It's more like a what-does-the-console-template-need-from-this-Page-object. But I see your point, and maybe a method to get that would help. In other places, I create such a dict without having a Page object handy (like in fetch_page or in one of the caching methods). If I moved these there, I think I'd still end up having some kind of abstraction leak due to these other places. Do you have more ideas about that?
689 # has no 'set' member	766 'body_class': body_class,

690 # pylint: disable=E1101	767 'content': content,

691 if page.content_blob is None:	768 'offsite_base': offsite_base,

692 if memcache.set(key=localpath, value=page.content, time=60):	769 'title': title,

693 logging.debug('tx_page(page key="%s"): memcache.set() succeeded' %	770 }

694 page.key())	771 if content_blob_key:

695 else:	772 page_data['content_blob'] = True

696 logging.error('tx_page(page key="%s"): memcache.set() failed' %	773 put_pagedata_into_cache(localpath, page_data)

697 page.key())

698	774

699	775

700 def get_or_create_page(localpath, remoteurl, maxage):	776 def get_or_create_page(localpath, remoteurl, maxage):

701 return Page.get_or_insert(	777 return Page.get_or_insert(

702 key_name=localpath,	778 key_name=localpath,

703 localpath=localpath,	779 localpath=localpath,

704 remoteurl=remoteurl,	780 remoteurl=remoteurl,

705 maxage=maxage,	781 maxage=maxage,

706 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),	782 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),

707 content=None,	783 content=None,

708 content_blob=None)	784 content_blob=None)

709	785

710	786

711 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None):	787 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,

	788 fetch_url=nonfatal_fetch_url):

712 """Fetches data about a set of pages."""	789 """Fetches data about a set of pages."""

713 unquoted_localpath = urllib.unquote(localpath)	790 unquoted_localpath = urllib.unquote(localpath)

714 logging.debug('fetch_page("%s", "%s", "%s")' % (	791 logging.debug('fetch_page("%s", "%s", "%s")' % (

715 unquoted_localpath, remoteurl, maxage))	792 unquoted_localpath, remoteurl, maxage))

716 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)	793 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)

717	794

718 # Check if our copy of the page is younger than maxage. If it is, we'll	795 # Check if our copy of the page is younger than maxage. If it is, we'll

719 # skip the fetch.	796 # skip the fetch.

720 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(	797 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(

721 seconds=maxage)	798 seconds=maxage)

722 if (page.fetch_timestamp and	799 if (page.fetch_timestamp and

723 page.fetch_timestamp > oldest_acceptable_timestamp):	800 page.fetch_timestamp > oldest_acceptable_timestamp):

724 logging.debug('fetch_page: too recent, skipping')	801 logging.debug('fetch_page: too recent, skipping')

725 return	802 return

726	803

727 # Perform the actual page fetch.	804 # Perform the actual page fetch.

728 fetch_timestamp = datetime.datetime.now()	805 fetch_timestamp = datetime.datetime.now()

729 response = nonfatal_fetch_url(remoteurl)	806 response = fetch_url(remoteurl)

730 if not response:	807 if not response:

731 logging.warning('fetch_page: got empty response')	808 logging.warning('fetch_page: got empty response')

732 return	809 return

733 if response.status_code != 200:	810 if response.status_code != 200:

734 logging.warning('fetch_page: got non-empty response but code '	811 logging.warning('fetch_page: got non-empty response but code '

735 '%d' % response.status_code)	812 '%d' % response.status_code)

736 return	813 return

737	814

738 # We have actual content. If there's one or more handlers, call them.	815 # We have actual content. If there's one or more handlers, call them.

739 content = response.content	816 page_data = {}

	817 page_data['content'] = response.content

740 if postfetch:	818 if postfetch:

741 if not isinstance(postfetch, list):	819 if not isinstance(postfetch, list):

742 postfetch = [postfetch]	820 postfetch = [postfetch]

743 for handler in postfetch:	821 for handler in postfetch:

744 logging.debug('fetch_page: calling postfetch handler '	822 logging.debug('fetch_page: calling postfetch handler '

745 '%s' % handler.__name__)	823 '%s' % handler.__name__)

746 content = handler(unquoted_localpath, remoteurl, content)	824 page_data = handler(unquoted_localpath, remoteurl, page_data)

747	825

748 # Save the returned content into the DB and caching layers.	826 # Save the returned content into the DB and caching layers.

749 logging.debug('fetch_page: saving page')	827 logging.debug('fetch_page: saving page')

750 save_page(page, unquoted_localpath, content, fetch_timestamp)	828 save_page(page, unquoted_localpath, fetch_timestamp, page_data)

751 if postsave:	829 if postsave:

752 if not isinstance(postsave, list):	830 if not isinstance(postsave, list):

753 postsave = [postsave]	831 postsave = [postsave]

754 for handler in postsave:	832 for handler in postsave:

755 logging.debug('fetch_page: calling postsave handler '	833 logging.debug('fetch_page: calling postsave handler '

756 '%s' % handler.__name__)	834 '%s' % handler.__name__)

757 handler(unquoted_localpath, remoteurl, content)	835 handler(unquoted_localpath, remoteurl, page_data)

758	836

759	837

760 EXT_TO_MIME = {	838 EXT_TO_MIME = {

761 '.css': 'text/css',	839 '.css': 'text/css',

762 '.js': 'text/javascript',	840 '.js': 'text/javascript',

763 '.json': 'application/json',	841 '.json': 'application/json',

764 '.html': 'text/html',	842 '.html': 'text/html',

765 }	843 }

766	844

767	845

768 def path_to_mime_type(path):	846 def path_to_mime_type(path):

769 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')	847 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')

770	848

771	849

772 def fetch_pages():	850 def fetch_pages():

773 """Starts a background fetch operation for pages that need it."""	851 """Starts a background fetch operation for pages that need it."""

774 logging.debug('fetch_pages()')	852 logging.debug('fetch_pages()')

775 for url in URLS:	853 for url in URLS:

776 deferred.defer(fetch_page, **url)	854 deferred.defer(fetch_page, **url)

OLD	NEW

« PRESUBMIT.py ('K') | « PRESUBMIT.py ('k') | app_test.py » ('j') | app_test.py » ('J')