app.py - Issue 11535002: chromium-build app now renders console from stored rows.

Side by Side Diff: app.py

Issue 11535002: chromium-build app now renders console from stored rows. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build

Patch Set: Add test_parse_master_utf8 Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from __future__ import with_statement	5 from __future__ import with_statement

6	6

7 import datetime	7 import datetime

8 import json	8 import json

9 import logging	9 import logging

10 import os	10 import os

11 import random	11 import random

12 import re	12 import re

13 import string	13 import string

14 import urllib	14 import urllib

15	15

16 from google.appengine.api import files, memcache, urlfetch	16 from google.appengine.api import files, memcache, urlfetch

17 from google.appengine.api.app_identity import get_application_id	17 from google.appengine.api.app_identity import get_application_id

18 from google.appengine.ext import blobstore, db, deferred	18 from google.appengine.ext import blobstore, db, deferred

19 # F0401: 16,0: Unable to import 'webapp2_extras'	19 # F0401: 16,0: Unable to import 'webapp2_extras'

20 # W0611: 16,0: Unused import jinja2	20 # W0611: 16,0: Unused import jinja2

21 # pylint: disable=F0401, W0611	21 # pylint: disable=F0401, W0611

22 from webapp2_extras import jinja2	22 from webapp2_extras import jinja2

23 # F0401:22,0: Unable to import 'jinja2'	23 # F0401:22,0: Unable to import 'jinja2'

24 # pylint: disable=F0401	24 # pylint: disable=F0401

25 from jinja2 import Environment, FileSystemLoader	25 from jinja2 import Environment, FileSystemLoader

26	26

27 from third_party.BeautifulSoup.BeautifulSoup import BeautifulSoup	27 from third_party.BeautifulSoup.BeautifulSoup import BeautifulSoup, Tag

28	28

29	29

30 # Current application name.	30 # Current application name.

31 APP_NAME = get_application_id()	31 APP_NAME = get_application_id()

32	32

33 # Deadline for fetching URLs (in seconds).	33 # Deadline for fetching URLs (in seconds).

34 URLFETCH_DEADLINE = 60*5 # 5 mins	34 URLFETCH_DEADLINE = 60*5 # 5 mins

35	35

36 # Default masters to merge together.	36 # Default masters to merge together.

37 DEFAULT_MASTERS_TO_MERGE = [	37 DEFAULT_MASTERS_TO_MERGE = [

38 'chromium.main',	38 'chromium.main',

39 'chromium.win',	39 'chromium.win',

40 'chromium.mac',	40 'chromium.mac',

41 'chromium.linux',	41 'chromium.linux',

42 'chromium.chromiumos',	42 'chromium.chromiumos',

43 'chromium.chrome',	43 'chromium.chrome',

44 'chromium.memory',	44 'chromium.memory',

45 ]	45 ]

46	46

47	47

48 # Perform initial bootstrap for this module.	48 # Perform initial bootstrap for this module.

49 console_template = ''	49 console_template = ''

50 def bootstrap():	50 def bootstrap():

51 global console_template	51 global console_template

52 with open('templates/merger.html', 'r') as fh:	52 with open('templates/merger.html', 'r') as fh:

53 console_template = fh.read()	53 console_template = fh.read()

54	54

55	55

56 def get_pagedata_from_cache(localpath):	56 ##########

57 memcache_data = memcache.get(localpath)	57 # Page class definition and related functions.

58 if not memcache_data:	58 ##########

59 return None	59 class Page(db.Model):

60 logging.debug('content for %s found in memcache' % localpath)	60 fetch_timestamp = db.DateTimeProperty(required=True)

61 return json.loads(memcache_data)	61 localpath = db.StringProperty(required=True)

	62 content = db.TextProperty()

	63 title = db.StringProperty()

	64 offsite_base = db.StringProperty()

	65 body_class = db.StringProperty()

	66 remoteurl = db.TextProperty()

	67 # Data updated separately, after creation.

	68 content_blob = blobstore.BlobReferenceProperty()

62	69

63	70

64 def put_pagedata_into_cache(localpath, page_data):	71 def get_or_create_page(localpath, remoteurl, maxage):

65 memcache_data = json.dumps(page_data)	72 return Page.get_or_insert(

66 if not memcache.set(key=localpath, value=memcache_data, time=2*60):	73 key_name=localpath,

67 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % (	74 localpath=localpath,

68 localpath))	75 remoteurl=remoteurl,

	76 maxage=maxage,

	77 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),

	78 content=None,

	79 content_blob=None)

69	80

70	81

71 def get_and_cache_pagedata(localpath):	82 def get_and_cache_pagedata(localpath):

72 """Returns a page_data dict, optionally caching and looking up a blob.	83 """Returns a page_data dict, optionally caching and looking up a blob.

73	84

74 get_and_cache_pagedata takes a localpath which is used to fetch data	85 get_and_cache_pagedata takes a localpath which is used to fetch data

75 from the cache. If the data is present and there's no content blob,	86 from the cache. If the data is present and there's no content blob,

76 then we have all of the data we need to return a page view to the user	87 then we have all of the data we need to return a page view to the user

77 and we return early.	88 and we return early.

78	89

79 Otherwise, we need to fetch the page object and set up the page data	90 Otherwise, we need to fetch the page object and set up the page data

80 for the page view. If the page has a blob associated with it, then we	91 for the page view. If the page has a blob associated with it, then we

81 mark the page data as having a blob and cache it as-is without the blob.	92 mark the page data as having a blob and cache it as-is without the blob.

82 If there's no blob, we associate the content with the page data and	93 If there's no blob, we associate the content with the page data and

83 cache that. This is so the next time get_and_cache_pagedata is called	94 cache that. This is so the next time get_and_cache_pagedata is called

84 for either case, we'll get the same behavior (a page-lookup for blobful	95 for either case, we'll get the same behavior (a page-lookup for blobful

85 content and a page cache hit for blobless content).	96 content and a page cache hit for blobless content).

86	97

87 Here we assume localpath is already unquoted.	98 Here we assume localpath is already unquoted.

88 """	99 """

89 page_data = get_pagedata_from_cache(localpath)	100 page_data = get_data_from_cache(localpath)

90 if page_data and not page_data.get('content_blob'):	101 if page_data and not page_data.get('content_blob'):

91 return page_data	102 return page_data

92 page = Page.all().filter('localpath =', localpath).get()	103 page = Page.all().filter('localpath =', localpath).get()

93 if not page:	104 if not page:

94 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '	105 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '

95 'datastore' % localpath)	106 'datastore' % localpath)

96 return {'content': None}	107 return {'content': None}

97 page_data = {	108 page_data = {

98 'body_class': page.body_class,	109 'body_class': page.body_class,

99 'offsite_base': page.offsite_base,	110 'offsite_base': page.offsite_base,

100 'title': page.title,	111 'title': page.title,

101 }	112 }

102 if page.content_blob:	113 if page.content_blob:

103 # Get the blob.	114 # Get the blob.

104 logging.debug('content for %s found in blobstore' % localpath)	115 logging.debug('content for %s found in blobstore' % localpath)

105 blob_reader = blobstore.BlobReader(page.content_blob)	116 blob_reader = blobstore.BlobReader(page.content_blob)

106 page_data['content_blob'] = True	117 page_data['content_blob'] = True

107 put_pagedata_into_cache(localpath, page_data)	118 put_data_into_cache(localpath, page_data)

108 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')	119 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')

109 else:	120 else:

110 logging.debug('content for %s found in datastore' % localpath)	121 logging.debug('content for %s found in datastore' % localpath)

111 page_data['content'] = page.content	122 page_data['content'] = page.content

112 put_pagedata_into_cache(localpath, page_data)	123 put_data_into_cache(localpath, page_data)

113 return page_data	124 return page_data

114	125

115	126

	127 def save_page(page, localpath, fetch_timestamp, page_data):

	128 body_class = page_data.get('body_class', '')

	129 content = page_data.get('content')

	130 offsite_base = page_data.get('offsite_base', '')

	131 title = page_data.get('title', '')

	132

	133 content_blob_key = None

	134 try:

	135 content = content.decode('utf-8', 'replace')

	136 except UnicodeEncodeError:

	137 logging.debug('save_page: content was already in unicode')

	138 logging.debug('save_page: content size is %d' % len(content))

	139 if len(content.encode('utf-8')) >= 10**6:

	140 logging.debug('save_page: saving to blob')

	141 content_blob_key = write_blob(content, path_to_mime_type(localpath))

	142 content = None

	143 def tx_page(page_key):

	144 page = Page.get(page_key)

	145 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no

	146 # 'fetch_timestamp' member (but some types could not be inferred)

	147 # pylint: disable=E1103

	148 if page.fetch_timestamp > fetch_timestamp:

	149 return

	150 page.content = content

	151 page.content_blob = content_blob_key

	152 page.fetch_timestamp = fetch_timestamp

	153 # title, offsite_base, body_class can all be empty strings for some

	154 # content. Where that's true, they're not used for displaying a console-

	155 # like resource, and the content alone is returned to the web user.

	156 page.title = title

	157 page.offsite_base = offsite_base

	158 page.body_class = body_class

	159 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member

	160 # (but some types could not be inferred)

	161 # pylint: disable=E1103

	162 page.put()

	163 db.run_in_transaction(tx_page, page.key())

	164 page_data = {

	165 'body_class': body_class,

	166 'content': content,

	167 'offsite_base': offsite_base,

	168 'title': title,

	169 }

	170 if content_blob_key:

	171 page_data['content_blob'] = True

	172 put_data_into_cache(localpath, page_data)

	173 logging.info('Saved and cached page with localpath %s' % localpath)

	174

	175

	176 ##########

	177 # Row class definition and related functions.

	178 ##########

	179 class Row(db.Model):

	180 fetch_timestamp = db.DateTimeProperty(required=True)

	181 rev_number = db.StringProperty(required=True)

	182 localpath = db.StringProperty(required=True)

	183 revision = db.TextProperty()

	184 name = db.TextProperty()

	185 status = db.TextProperty()

	186 comment = db.TextProperty()

	187 details = db.TextProperty()

	188

	189

	190 def get_or_create_row(localpath, revision):

	191 return Row.get_or_insert(

	192 key_name=localpath,

	193 rev_number=revision,

	194 localpath=localpath,

	195 fetch_timestamp=datetime.datetime.now())

	196

	197

	198 def get_and_cache_rowdata(localpath):

	199 """Returns a row_data dict.

	200

	201 get_and_cache_rowdata takes a localpath which is used to fetch data from the

	202 cache. If the data is present, then we have all of the data we need and we

	203 return early.

	204

	205 Otherwise, we need to fetch the row object and set up the row data.

	206

	207 Here we assume localpath is already unquoted.

	208 """

	209 row_data = get_data_from_cache(localpath)

	210 if row_data:

	211 return row_data

	212 row = Row.all().filter('localpath =', localpath).get()

	213 if not row:

	214 logging.error('get_and_cache_rowdata(\'%s\'): no matching localpath in '

	215 'datastore' % localpath)

	216 return {}

	217 row_data = {}

	218 row_data['rev'] = row.revision

	219 row_data['name'] = row.name

	220 row_data['status'] = row.status

	221 row_data['comment'] = row.comment

	222 row_data['details'] = row.details

	223 row_data['rev_number'] = row.rev_number

	224 logging.debug('content for %s found in datastore' % localpath)

	225 put_data_into_cache(localpath, row_data)

	226 return row_data

	227

	228

	229 def save_row(row_data, localpath, timestamp):

	230 rev_number = row_data['rev_number']

	231 row = get_or_create_row(localpath, rev_number)

	232 row_key = row.key()

	233 def tx_row(row_key):

	234 row = Row.get(row_key)

	235 # E1103:959,7:save_row.tx_row: Instance of 'list' has no

	236 # 'fetch_timestamp' member (but some types could not be inferred)

	237 # pylint: disable=E1103

	238 # if row.fetch_timestamp > timestamp:

	239 # return

	240 row.fetch_timestamp = timestamp

	241 row.revision = row_data['rev']

	242 row.name = row_data['name']

	243 row.status = row_data['status']

	244 row.comment = row_data['comment']

	245 row.details = row_data['details']

	246 # E1103:967,4:save_row.tx_row: Instance of 'list' has no 'put' member

	247 # (but some types could not be inferred)

	248 # pylint: disable=E1103

	249 row.put()

	250 db.run_in_transaction(tx_row, row_key)

	251 prev_rev = memcache.get(key='latest_rev')

	252 if (rev_number > prev_rev):

	253 memcache.set(key='latest_rev', value=rev_number)

	254 put_data_into_cache(localpath, row_data)

	255 logging.info('Saved and cached row with localpath %s' % localpath)

	256

	257

	258 ##########

	259 # ConsoleData class definition and related functions.

	260 ##########

116 class ConsoleData(object):	261 class ConsoleData(object):

117 def __init__(self):	262 def __init__(self):

118 self.row_orderedkeys = []	263 self.row_orderedkeys = []

119 self.row_data = {}	264 self.row_data = {}

120	265

121 # Retain order of observed masters.	266 # Retain order of observed masters.

122 self.masters = []	267 self.masters = []

123	268

124 # Map(k,v): k=Master, v=List of categories	269 # Map(k,v): k=Master, v=List of categories

125 self.category_order = {}	270 self.category_order = {}

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171 def SetDetail(self, detail):	316 def SetDetail(self, detail):

172 self.last_row['detail'] = detail	317 self.last_row['detail'] = detail

173	318

174 def AddCategory(self, category, builder_status):	319 def AddCategory(self, category, builder_status):

175 self.category_order[self.lastMasterSeen].append(category)	320 self.category_order[self.lastMasterSeen].append(category)

176 # Map(k,v): k=Master/category, v=Dict of category data (last build status)	321 # Map(k,v): k=Master/category, v=Dict of category data (last build status)

177 self.category_data[self.lastMasterSeen].setdefault(category, {})	322 self.category_data[self.lastMasterSeen].setdefault(category, {})

178 self.category_data[self.lastMasterSeen][category] = builder_status	323 self.category_data[self.lastMasterSeen][category] = builder_status

179 self.category_count += 1	324 self.category_count += 1

180	325

	326 def AddRow(self, row):

	327 revision = row['rev_number']

	328 self.SawRevision(revision)

	329 revlink = BeautifulSoup(row['rev']).td.a['href']

	330 self.SetLink(revlink)

	331 name = BeautifulSoup(row['name']).td.contents

	332 self.SetName(self.ContentsToHtml(name))

	333 status = BeautifulSoup(row['status']).findAll('table')

	334 for i, stat in enumerate(status):

	335 self.SetStatus(self.category_order[self.lastMasterSeen][i],

	336 unicode(stat))

	337 comment = BeautifulSoup(row['comment']).td.contents

	338 self.SetComment(self.ContentsToHtml(comment))

	339 if row['details']:

	340 details = BeautifulSoup(row['details']).td.contents

	341 self.SetDetail(self.ContentsToHtml(details))

	342

181 def ParseRow(self, row):	343 def ParseRow(self, row):

182 cells = row.findAll('td', recursive=False)	344 cells = row.findAll('td', recursive=False)

183 # Figure out which row this is.	345 # Figure out which row this is.

184 for attrname, attrvalue in cells[0].attrs:	346 for attrname, attrvalue in cells[0].attrs:

185 if attrname != 'class':	347 if attrname != 'class':

186 continue	348 continue

187 attrvalue = re.sub(r'^(\S+).*', r'\1', attrvalue)	349 attrvalue = re.sub(r'^(\S+).*', r'\1', attrvalue)

188 if attrvalue == 'DevRev':	350 if attrvalue == 'DevRev':

189 revision = cells[0]	351 revision = cells[0]

190 self.SawRevision(revision=revision.findAll('a')[0].contents[0])	352 self.SawRevision(revision=revision.findAll('a')[0].contents[0])

(...skipping 10 matching lines...) Expand all Loading...
201 if attrvalue == 'DevDetails':	363 if attrvalue == 'DevDetails':

202 self.SetDetail(detail=self.ContentsToHtml(cells[0].contents))	364 self.SetDetail(detail=self.ContentsToHtml(cells[0].contents))

203	365

204 def Finish(self):	366 def Finish(self):

205 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)	367 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)

206 # TODO(cmp): Look for row/master/categories that are unset. If they are	368 # TODO(cmp): Look for row/master/categories that are unset. If they are

207 # at the latest revisions, leave them unset. If they are at	369 # at the latest revisions, leave them unset. If they are at

208 # the earliest revisions, set them to ''.	370 # the earliest revisions, set them to ''.

209	371

210	372

211 # W0613:169,39:console_merger: Unused argument 'remoteurl'	373 ##########

212 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath'	374 # Heavy-lifting functions that do most of the console processing.

213 # pylint: disable=W0613	375 # AKA postfetch and postsave functions/handlers.

214 def console_merger(unquoted_localpath, remote_url, page_data=None,	376 ##########

215 masters_to_merge=None):	377 def console_merger(localpath, remoteurl, page_data,

216 page_data = page_data or {}	378 masters_to_merge=None, num_rows_to_merge=25):

217

218 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE	379 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE

219 mergedconsole = ConsoleData()	380 mergedconsole = ConsoleData()

220 merged_page = None	381 surroundings = get_and_cache_pagedata('surroundings')

221 merged_tag = None	382 merged_page = BeautifulSoup(surroundings['content'])

	383 merged_tag = merged_page.find('table', 'ConsoleData')

	384 latest_rev = int(memcache.get(key='latest_rev'))

	385 if not latest_rev:

	386 logging.error('console_merger(\'%s\', \'%s\', \'%s\'): cannot get latest '

	387 'revision number.' % (

	388 localpath, remoteurl, page_data))

	389 return

222 fetch_timestamp = datetime.datetime.now()	390 fetch_timestamp = datetime.datetime.now()

223 for master in masters_to_merge:	391 for master in masters_to_merge:

224 page_data = get_and_cache_pagedata('%s/console' % master)	392 # Fetch the summary one-box-per-builder for the master.

225 master_content = page_data['content']	393 # If we don't get it, something is wrong, skip the master entirely.

226 if master_content is None:	394 master_summary = get_and_cache_pagedata('%s/console/summary' % master)

	395 if not master_summary['content']:

227 continue	396 continue

228 master_content = master_content.encode('ascii', 'replace')

229 this_page = BeautifulSoup(master_content)

230 this_tag = this_page.find('table', {'class': 'ConsoleData'})

231 # The first console is special, we reuse all of the console page.

232 if not merged_page:

233 merged_page = this_page

234 merged_tag = this_tag

235 mergedconsole.SawMaster(master)	397 mergedconsole.SawMaster(master)

	398 # Get the categories for this builder. If the builder doesn't have any

	399 # categories, just use the default empty-string category.

	400 category_list = []

	401 master_categories = get_and_cache_pagedata('%s/console/categories' % master)

	402 if not master_categories['content']:

	403 category_list.append('')

	404 else:

	405 category_row = BeautifulSoup(master_categories['content'])

	406 category_list = map(lambda x: x.text,

	407 category_row.findAll('td', 'DevStatus'))

	408 # Get the corresponding summary box(es).

	409 summary_row = BeautifulSoup(master_summary['content'])

	410 summary_list = summary_row.findAll('table')

	411 for category, summary in zip(category_list, summary_list):

	412 mergedconsole.AddCategory(category, summary)

236	413

237 # Parse each of the rows.	414 # Fetch all of the rows that we need.

238 CATEGORY_ROW = 0	415 rows_fetched = 0

239 trs = this_tag.findAll('tr', recursive=False)	416 current_rev = latest_rev

240	417 while rows_fetched < num_rows_to_merge and current_rev >= 0:

241 # Get the list of categories in \|master\|.	418 row_data = get_and_cache_rowdata('%s/console/%s' % (master, current_rev))

242 category_tds = trs[CATEGORY_ROW].findAll('td', recursive=False)[2:]	419 if not row_data:

243 third_cell = category_tds[0]	420 current_rev -= 1

244 third_cell_class = third_cell.attrs[0][1]	421 continue

245 categories = []	422 mergedconsole.AddRow(row_data)

246 if third_cell_class.startswith('DevStatus '):	423 current_rev -= 1

247 BUILDER_STATUS_ROW = 2	424 rows_fetched += 1

248 FIRST_CL_ROW = 3

249 for index, category_td in enumerate(category_tds):

250 categories.append(category_td.contents[0].strip())

251 else:

252 # There's no categories + spacing row, the first row will be the builder

253 # status row.

254 categories.append('')

255 BUILDER_STATUS_ROW = 0

256 FIRST_CL_ROW = 1

257

258 # For each category in \|master\|, add the category plus its \|builder_status\|.

259 builder_tds = trs[BUILDER_STATUS_ROW].findAll('td', recursive=False)[2:]

260 for index, category in enumerate(categories):

261 builder_status = builder_tds[index].findAll('table', recursive=False)[0]

262 mergedconsole.AddCategory(category=category,

263 builder_status=builder_status)

264

265 # For each of the remaining rows, add them to the console data.

266 for console_index in range(FIRST_CL_ROW, len(trs)):

267 console_row = trs[console_index]

268 mergedconsole.ParseRow(console_row)

269 # Add GC memory profiling.

270 # import gc

271 # gc.set_debug(gc.DEBUG_LEAK)

272 # logging.debug(gc.garbage)

273 # del gc.garbage[:]

274 mergedconsole.Finish()

275	425

276 # Convert the merged content into console content.	426 # Convert the merged content into console content.

	427 mergedconsole.Finish()

277 template_environment = Environment()	428 template_environment = Environment()

278 template_environment.loader = FileSystemLoader('.')	429 template_environment.loader = FileSystemLoader('.')

279 def notstarted(builder_status):	430 def notstarted(builder_status):

280 """Convert a BeautifulSoup Tag from builder status to a notstarted line."""	431 """Convert a BeautifulSoup Tag from builder status to a notstarted line."""

281 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status))	432 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status))

282 builder_status = re.sub(r'class=\'([^\']*)\' target=',	433 builder_status = re.sub(r'class=\'([^\']*)\' target=',

283 'class=\'DevStatusBox notstarted\' target=',	434 'class=\'DevStatusBox notstarted\' target=',

284 builder_status)	435 builder_status)

285 builder_status = re.sub(r'class="([^"]*)" target=',	436 builder_status = re.sub(r'class="([^"]*)" target=',

286 'class="DevStatusBox notstarted" target=',	437 'class="DevStatusBox notstarted" target=',

(...skipping 22 matching lines...) Expand all Loading...
309 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content)	460 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content)

310 merged_content = re.sub(	461 merged_content = re.sub(

311 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)	462 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)

312 merged_content = re.sub(	463 merged_content = re.sub(

313 r'\<iframe\>\</iframe\>',	464 r'\<iframe\>\</iframe\>',

314 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',	465 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',

315 merged_content)	466 merged_content)

316	467

317 # Update the merged console page.	468 # Update the merged console page.

318 merged_page = get_or_create_page('chromium/console', None, maxage=30)	469 merged_page = get_or_create_page('chromium/console', None, maxage=30)

319 logging.debug('console_merger: saving merged console')	470 logging.info('console_merger: saving merged console')

	471 page_data = get_and_cache_pagedata('chromium/console')

320 page_data['title'] = 'BuildBot: Chromium'	472 page_data['title'] = 'BuildBot: Chromium'

321 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'	473 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'

322 page_data['body_class'] = 'interface'	474 page_data['body_class'] = 'interface'

323 page_data['content'] = merged_content	475 page_data['content'] = merged_content

324 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)	476 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)

325 return	477 return

326	478

327	479

328 def console_handler(_unquoted_localpath, remoteurl, page_data=None):	480 def console_handler(unquoted_localpath, remoteurl, page_data=None):

329 page_data = page_data or {}	481 page_data = page_data or {}

330 content = page_data.get('content')	482 content = page_data.get('content')

331 if not content:	483 if not content:

	484 logging.error('console_handler(\'%s\', \'%s\', \'%s\'): cannot get site '

	485 'from local path' % (

	486 unquoted_localpath, remoteurl, page_data))

332 return page_data	487 return page_data

333	488

334 # Decode content from utf-8 to unicode, replacing bad characters.	489 # Decode content from utf-8 to unicode, replacing bad characters.

335 content = content.decode('utf-8', 'replace')	490 content = content.decode('utf-8', 'replace')

336	491

337 # Scrub in sheriff file content to console.	492 # Scrub in sheriff file content to console.

338 sheriff_files = [	493 sheriff_files = [

339 'sheriff',	494 'sheriff',

340 'sheriff_android',	495 'sheriff_android',

341 'sheriff_cr_cros_gardeners',	496 'sheriff_cr_cros_gardeners',

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
453 md = re.search(	608 md = re.search(

454 r'^(.)</body>.$',	609 r'^(.)</body>.$',

455 page_data['content'],	610 page_data['content'],

456 re.MULTILINE\|re.DOTALL)	611 re.MULTILINE\|re.DOTALL)

457 if not md:	612 if not md:

458 raise Exception('failed to locate trailing body and html tags')	613 raise Exception('failed to locate trailing body and html tags')

459 page_data['content'] = md.group(1)	614 page_data['content'] = md.group(1)

460	615

461 return page_data	616 return page_data

462	617

463	618 # W0613:600,28:parse_master: Unused argument 'remoteurl'

464 def get_or_create_row(localpath, revision):	619 # pylint: disable=W0613

465 return Row.get_or_insert(

466 key_name=revision + ' '+ localpath,

467 rev_number=revision,

468 localpath=localpath,

469 fetch_timestamp=datetime.datetime.now())

470

471

472 def save_row(row_data, localpath, timestamp):

473 rev_number = row_data['rev_number']

474 row = get_or_create_row(localpath, rev_number)

475 row_key = row.key()

476 def tx_row(row_key):

477 row = Row.get(row_key)

478 # E1103:959,7:save_row.tx_row: Instance of 'list' has no

479 # 'fetch_timestamp' member (but some types could not be inferred)

480 # pylint: disable=E1103

481 # if row.fetch_timestamp > timestamp:

482 # return

483 row.fetch_timestamp = timestamp

484 row.revision = row_data['rev']

485 row.name = row_data['name']

486 row.status = row_data['status']

487 row.comment = row_data['comment']

488 row.details = row_data['details']

489 # E1103:967,4:save_row.tx_row: Instance of 'list' has no 'put' member

490 # (but some types could not be inferred)

491 # pylint: disable=E1103

492 row.put()

493 db.run_in_transaction(tx_row, row_key)

494 memcache_data = json.dumps(row_data)

495 # A row should never be large enough to hit the blobstore, so we

496 # explicitly don't handle rows larger than 10^6 bytes.

497 if not memcache.set(key=str(row_key), value=memcache_data, time=2*60):

498 logging.error('save_row(\'%s\'): memcache.set() failed' % (row_key))

499

500

501 def parse_master(localpath, remoteurl, page_data=None):	620 def parse_master(localpath, remoteurl, page_data=None):

502 """Part of the new pipeline to store individual rows rather than	621 """Part of the new pipeline to store individual rows rather than

503 whole pages of html. Parses the master data into a set of rows,	622 whole pages of html. Parses the master data into a set of rows,

504 and writes them out to the datastore in an easily retrievable format.	623 and writes them out to the datastore in an easily retrievable format.

505	624

506 Returns the same page_data as it was passed, so as to not interrupt	625 Doesn't modify page_data dict.

507 the current pipeline. This may change when we switch over.

508 """	626 """

509 ts = datetime.datetime.now()	627 ts = datetime.datetime.now()

510 page_data = page_data or {}	628 page_data = page_data or {}

511 content = page_data.get('content')	629 content = page_data.get('content')

512 if not content:	630 if not content:

513 return page_data	631 return page_data

514 content = content.decode('utf-8', 'replace')	632 content = content.decode('utf-8', 'replace')

515	633

516 # Split page into surroundings (announce, legend, footer) and data (rows).	634 # Split page into surroundings (announce, legend, footer) and data (rows).

517 surroundings = BeautifulSoup(content)	635 surroundings = BeautifulSoup(content)

518 data = surroundings.find('table', 'ConsoleData')	636 data = surroundings.find('table', 'ConsoleData')

519 data.extract()	637 new_data = Tag(surroundings, 'table', [('class', 'ConsoleData'),

	638 ('width', '96%')])

	639 data.replaceWith(new_data)

520	640

521 surroundings_page = get_or_create_page(localpath + '/surroundings',	641 surroundings_page = get_or_create_page('surroundings',

522 None, maxage=30)	642 None, maxage=30)

523 surroundings_data = {}	643 surroundings_data = {}

524 surroundings_data['title'] = 'Surroundings for ' + localpath	644 surroundings_data['title'] = 'Surroundings'

525 surroundings_data['content'] = unicode(surroundings)	645 surroundings_data['content'] = unicode(surroundings)

526 save_page(surroundings_page, localpath + '/surroundings', ts,	646 save_page(surroundings_page, 'surroundings', ts,

527 surroundings_data)	647 surroundings_data)

528	648

529 rows = data.tbody.findAll('tr', recursive=False)	649 rows = data.findAll('tr', recursive=False)

530 # The first table row can be special: the list of categories.	650 # The first table row can be special: the list of categories.

531 categories = None	651 categories = None

532 # If the first row contains a DevStatus cell...	652 # If the first row contains a DevStatus cell...

533 if rows[0].find('td', 'DevStatus') != None:	653 if rows[0].find('td', 'DevStatus') != None:

534 # ...extract it into the categories...	654 # ...extract it into the categories...

535 categories = rows[0]	655 categories = rows[0]

536 # ...and get rid of the next (spacer) row too.	656 # ...and get rid of the next (spacer) row too.

537 rows = rows[2:]	657 rows = rows[2:]

538	658

539 if categories:	659 if categories:

(...skipping 20 matching lines...) Expand all Loading...
560 # or a spacer row (in which case we finalize the row and save it).	680 # or a spacer row (in which case we finalize the row and save it).

561 for row in rows:	681 for row in rows:

562 if row.find('td', 'DevComment'):	682 if row.find('td', 'DevComment'):

563 curr_row['comment'] = unicode(row)	683 curr_row['comment'] = unicode(row)

564 elif row.find('td', 'DevDetails'):	684 elif row.find('td', 'DevDetails'):

565 curr_row['details'] = unicode(row)	685 curr_row['details'] = unicode(row)

566 elif row.find('td', 'DevStatus'):	686 elif row.find('td', 'DevStatus'):

567 curr_row['rev'] = unicode(row.find('td', 'DevRev'))	687 curr_row['rev'] = unicode(row.find('td', 'DevRev'))

568 curr_row['rev_number'] = unicode(row.find('td', 'DevRev').a.string)	688 curr_row['rev_number'] = unicode(row.find('td', 'DevRev').a.string)

569 curr_row['name'] = unicode(row.find('td', 'DevName'))	689 curr_row['name'] = unicode(row.find('td', 'DevName'))

570 curr_row['status'] = unicode(row.findAll('td', 'DevStatus'))	690 curr_row['status'] = unicode(row.findAll('table'))

571 else:	691 else:

572 if 'details' not in curr_row:	692 if 'details' not in curr_row:

573 curr_row['details'] = ''	693 curr_row['details'] = ''

574 save_row(curr_row, localpath, ts)	694 save_row(curr_row, localpath + '/' + curr_row['rev_number'], ts)

575 curr_row = {}	695 curr_row = {}

576	696

577 return page_data	697 return page_data

578	698

579	699

580 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):	700 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):

581 page_data = page_data or {}	701 page_data = page_data or {}

582 content = page_data.get('content')	702 content = page_data.get('content')

583 if content is None:	703 if content is None:

584 return page_data	704 return page_data

585 # Get the site name from the local path.	705 # Get the site name from the local path.

586 md = re.match('^([^\/]+)/.*$', unquoted_localpath)	706 md = re.match('^([^\/]+)/.*$', unquoted_localpath)

587 if not md:	707 if not md:

588 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '	708 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '

589 'from local path' % (	709 'from local path' % (

590 unquoted_localpath, remoteurl, page_data))	710 unquoted_localpath, remoteurl, page_data))

591 return page_data	711 return page_data

592 site = md.group(1)	712 site = md.group(1)

593 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site	713 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site

594 page_data['content'] = re.sub(	714 page_data['content'] = re.sub(

595 r'waterfall',	715 r'waterfall',

596 new_waterfall_url,	716 new_waterfall_url,

597 page_data['content'])	717 page_data['content'])

598 return page_data	718 return page_data

599	719

600	720

	721 ##########

	722 # Utility functions for blobstore and memcache.

	723 ##########

	724 def get_data_from_cache(localpath):

	725 memcache_data = memcache.get(localpath)

	726 if not memcache_data:

	727 return None

	728 logging.debug('content for %s found in memcache' % localpath)

	729 return json.loads(memcache_data)

	730

	731

	732 def put_data_into_cache(localpath, data):

	733 memcache_data = json.dumps(data)

	734 if not memcache.set(key=localpath, value=memcache_data, time=2*60):

	735 logging.error('put_data_into_cache(\'%s\'): memcache.set() failed' % (

	736 localpath))

	737

	738

	739 def write_blob(data, mime_type):

	740 """Saves a Unicode string as a new blob, returns the blob's key."""

	741 file_name = files.blobstore.create(mime_type=mime_type)

	742 data = data.encode('utf-8')

	743 with files.open(file_name, 'a') as blob_file:

	744 blob_file.write(data)

	745 files.finalize(file_name)

	746 return files.blobstore.get_blob_key(file_name)

	747

	748

	749 def path_to_mime_type(path):

	750 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')

	751

	752

	753 EXT_TO_MIME = {

	754 '.css': 'text/css',

	755 '.js': 'text/javascript',

	756 '.json': 'application/json',

	757 '.html': 'text/html',

	758 }

	759

	760

	761 ##########

	762 # Functions for actually fetching original pages.

	763 ##########

	764 def fetch_pages():

	765 """Starts a background fetch operation for pages that need it."""

	766 logging.debug('fetch_pages()')

	767 for url in URLS:

	768 deferred.defer(fetch_page, **url)

	769

	770

	771 def nonfatal_fetch_url(url, args, *kwargs):

	772 # Temporary workaround to disable AppEngine global cache of these pages.

	773 if '?' in url:

	774 url += '&' + str(random.random())

	775 else:

	776 url += '?' + str(random.random())

	777

	778 try:

	779 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)

	780 except urlfetch.DownloadError:

	781 logging.warn('urlfetch failed: %s' % url, exc_info=1)

	782 return None

	783

	784

	785 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,

	786 fetch_url=nonfatal_fetch_url):

	787 """Fetches data about a set of pages."""

	788 if type(localpath) != type(''):

	789 logging.error('fetch_page: localpath is %r, expected a string' % (

	790 repr(localpath)))

	791 return

	792 unquoted_localpath = urllib.unquote(localpath)

	793 logging.debug('fetch_page("%s", "%s", "%s")' % (

	794 unquoted_localpath, remoteurl, maxage))

	795 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)

	796

	797 # Check if our copy of the page is younger than maxage. If it is, we'll

	798 # skip the fetch.

	799 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(

	800 seconds=maxage)

	801 if (page.fetch_timestamp and

	802 page.fetch_timestamp > oldest_acceptable_timestamp):

	803 logging.debug('fetch_page: too recent, skipping')

	804 return

	805

	806 # Perform the actual page fetch.

	807 fetch_timestamp = datetime.datetime.now()

	808 response = fetch_url(remoteurl)

	809 if not response:

	810 logging.warning('fetch_page: got empty response')

	811 return

	812 if response.status_code != 200:

	813 logging.warning('fetch_page: got non-empty response but code '

	814 '%d' % response.status_code)

	815 return

	816

	817 # We have actual content. If there's one or more handlers, call them.

	818 page_data = {}

	819 page_data['content'] = response.content

	820 if postfetch:

	821 if not isinstance(postfetch, list):

	822 postfetch = [postfetch]

	823 for handler in postfetch:

	824 logging.debug('fetch_page: calling postfetch handler '

	825 '%s' % handler.__name__)

	826 page_data = handler(unquoted_localpath, remoteurl, page_data)

	827

	828 # Save the returned content into the DB and caching layers.

	829 logging.debug('fetch_page: saving page')

	830 save_page(page, unquoted_localpath, fetch_timestamp, page_data)

	831 if postsave:

	832 if not isinstance(postsave, list):

	833 postsave = [postsave]

	834 for handler in postsave:

	835 logging.debug('fetch_page: calling postsave handler '

	836 '%s' % handler.__name__)

	837 handler(unquoted_localpath, remoteurl, page_data)

	838

601	839

602 # List of URLs to fetch.	840 # List of URLs to fetch.

603 URLS = [	841 URLS = [

604 # Console URLs.	842 # Console URLs.

605 {	843 {

606 'remoteurl': 'http://build.chromium.org/p/chromium.chrome/console',	844 'remoteurl': 'http://build.chromium.org/p/chromium.chrome/console',

607 'localpath': 'chromium.chrome/console',	845 'localpath': 'chromium.chrome/console',

608 'postfetch': [console_handler, parse_master],	846 'postfetch': [console_handler, parse_master],

609 'postsave': console_merger,	847 'postsave': console_merger,

610 'maxage': 30, # 30 secs	848 'maxage': 30, # 30 secs

(...skipping 303 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
914 # LKGR JSON.	1152 # LKGR JSON.

915 {	1153 {

916 'remoteurl':	1154 'remoteurl':

917 ('http://build.chromium.org/p/chromium.lkgr/json/builders/Linux%20x64/'	1155 ('http://build.chromium.org/p/chromium.lkgr/json/builders/Linux%20x64/'

918 'builds/-1?as_text=1'),	1156 'builds/-1?as_text=1'),

919 'localpath':	1157 'localpath':

920 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json',	1158 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json',

921 'maxage': 2*60, # 2 mins	1159 'maxage': 2*60, # 2 mins

922 },	1160 },

923 ]	1161 ]

924

925

926 def nonfatal_fetch_url(url, args, *kwargs):

927 # Temporary workaround to disable AppEngine global cache of these pages.

928 if '?' in url:

929 url += '&' + str(random.random())

930 else:

931 url += '?' + str(random.random())

932

933 try:

934 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, args, *kwargs)

935 except urlfetch.DownloadError:

936 logging.warn('urlfetch failed: %s' % url, exc_info=1)

937 return None

938

939

940 class Row(db.Model):

941 fetch_timestamp = db.DateTimeProperty(required=True)

942 rev_number = db.StringProperty(required=True)

943 localpath = db.StringProperty(required=True)

944 revision = db.TextProperty()

945 name = db.TextProperty()

946 status = db.TextProperty()

947 comment = db.TextProperty()

948 details = db.TextProperty()

949

950

951 class Page(db.Model):

952 fetch_timestamp = db.DateTimeProperty(required=True)

953 localpath = db.StringProperty(required=True)

954 content = db.TextProperty()

955 title = db.StringProperty()

956 offsite_base = db.StringProperty()

957 body_class = db.StringProperty()

958 remoteurl = db.TextProperty()

959 # Data updated separately, after creation.

960 content_blob = blobstore.BlobReferenceProperty()

961

962

963 def write_blob(data, mime_type):

964 """Saves a Unicode string as a new blob, returns the blob's key."""

965 file_name = files.blobstore.create(mime_type=mime_type)

966 data = data.encode('utf-8')

967 with files.open(file_name, 'a') as blob_file:

968 blob_file.write(data)

969 files.finalize(file_name)

970 return files.blobstore.get_blob_key(file_name)

971

972

973 def save_page(page, localpath, fetch_timestamp, page_data):

974 body_class = page_data.get('body_class', '')

975 content = page_data.get('content')

976 offsite_base = page_data.get('offsite_base', '')

977 title = page_data.get('title', '')

978

979 content_blob_key = None

980 try:

981 content = content.decode('utf-8', 'replace')

982 except UnicodeEncodeError:

983 logging.debug('save_page: content was already in unicode')

984 logging.debug('save_page: content size is %d' % len(content))

985 if len(content.encode('utf-8')) >= 10**6:

986 logging.debug('save_page: saving to blob')

987 content_blob_key = write_blob(content, path_to_mime_type(localpath))

988 content = None

989 def tx_page(page_key):

990 page = Page.get(page_key)

991 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no

992 # 'fetch_timestamp' member (but some types could not be inferred)

993 # pylint: disable=E1103

994 if page.fetch_timestamp > fetch_timestamp:

995 return

996 page.content = content

997 page.content_blob = content_blob_key

998 page.fetch_timestamp = fetch_timestamp

999 # title, offsite_base, body_class can all be empty strings for some

1000 # content. Where that's true, they're not used for displaying a console-

1001 # like resource, and the content alone is returned to the web user.

1002 page.title = title

1003 page.offsite_base = offsite_base

1004 page.body_class = body_class

1005 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member

1006 # (but some types could not be inferred)

1007 # pylint: disable=E1103

1008 page.put()

1009 db.run_in_transaction(tx_page, page.key())

1010 page_data = {

1011 'body_class': body_class,

1012 'content': content,

1013 'offsite_base': offsite_base,

1014 'title': title,

1015 }

1016 if content_blob_key:

1017 page_data['content_blob'] = True

1018 put_pagedata_into_cache(localpath, page_data)

1019

1020

1021 def get_or_create_page(localpath, remoteurl, maxage):

1022 return Page.get_or_insert(

1023 key_name=localpath,

1024 localpath=localpath,

1025 remoteurl=remoteurl,

1026 maxage=maxage,

1027 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),

1028 content=None,

1029 content_blob=None)

1030

1031

1032 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,

1033 fetch_url=nonfatal_fetch_url):

1034 """Fetches data about a set of pages."""

1035 if type(localpath) != type(''):

1036 logging.error('fetch_page: localpath is %r, expected a string' % (

1037 repr(localpath)))

1038 return

1039 unquoted_localpath = urllib.unquote(localpath)

1040 logging.debug('fetch_page("%s", "%s", "%s")' % (

1041 unquoted_localpath, remoteurl, maxage))

1042 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)

1043

1044 # Check if our copy of the page is younger than maxage. If it is, we'll

1045 # skip the fetch.

1046 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(

1047 seconds=maxage)

1048 if (page.fetch_timestamp and

1049 page.fetch_timestamp > oldest_acceptable_timestamp):

1050 logging.debug('fetch_page: too recent, skipping')

1051 return

1052

1053 # Perform the actual page fetch.

1054 fetch_timestamp = datetime.datetime.now()

1055 response = fetch_url(remoteurl)

1056 if not response:

1057 logging.warning('fetch_page: got empty response')

1058 return

1059 if response.status_code != 200:

1060 logging.warning('fetch_page: got non-empty response but code '

1061 '%d' % response.status_code)

1062 return

1063

1064 # We have actual content. If there's one or more handlers, call them.

1065 page_data = {}

1066 page_data['content'] = response.content

1067 if postfetch:

1068 if not isinstance(postfetch, list):

1069 postfetch = [postfetch]

1070 for handler in postfetch:

1071 logging.debug('fetch_page: calling postfetch handler '

1072 '%s' % handler.__name__)

1073 page_data = handler(unquoted_localpath, remoteurl, page_data)

1074

1075 # Save the returned content into the DB and caching layers.

1076 logging.debug('fetch_page: saving page')

1077 save_page(page, unquoted_localpath, fetch_timestamp, page_data)

1078 if postsave:

1079 if not isinstance(postsave, list):

1080 postsave = [postsave]

1081 for handler in postsave:

1082 logging.debug('fetch_page: calling postsave handler '

1083 '%s' % handler.__name__)

1084 handler(unquoted_localpath, remoteurl, page_data)

1085

1086

1087 EXT_TO_MIME = {

1088 '.css': 'text/css',

1089 '.js': 'text/javascript',

1090 '.json': 'application/json',

1091 '.html': 'text/html',

1092 }

1093

1094

1095 def path_to_mime_type(path):

1096 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')

1097

1098

1099 def fetch_pages():

1100 """Starts a background fetch operation for pages that need it."""

1101 logging.debug('fetch_pages()')

1102 for url in URLS:

1103 deferred.defer(fetch_page, **url)

OLD	NEW

« no previous file with comments | « no previous file | app_test.py » ('j') | no next file with comments »