Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(185)

Side by Side Diff: app.py

Issue 11535002: chromium-build app now renders console from stored rows. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/chromium-build
Patch Set: Add test_parse_master_utf8 Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | app_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from __future__ import with_statement 5 from __future__ import with_statement
6 6
7 import datetime 7 import datetime
8 import json 8 import json
9 import logging 9 import logging
10 import os 10 import os
11 import random 11 import random
12 import re 12 import re
13 import string 13 import string
14 import urllib 14 import urllib
15 15
16 from google.appengine.api import files, memcache, urlfetch 16 from google.appengine.api import files, memcache, urlfetch
17 from google.appengine.api.app_identity import get_application_id 17 from google.appengine.api.app_identity import get_application_id
18 from google.appengine.ext import blobstore, db, deferred 18 from google.appengine.ext import blobstore, db, deferred
19 # F0401: 16,0: Unable to import 'webapp2_extras' 19 # F0401: 16,0: Unable to import 'webapp2_extras'
20 # W0611: 16,0: Unused import jinja2 20 # W0611: 16,0: Unused import jinja2
21 # pylint: disable=F0401, W0611 21 # pylint: disable=F0401, W0611
22 from webapp2_extras import jinja2 22 from webapp2_extras import jinja2
23 # F0401:22,0: Unable to import 'jinja2' 23 # F0401:22,0: Unable to import 'jinja2'
24 # pylint: disable=F0401 24 # pylint: disable=F0401
25 from jinja2 import Environment, FileSystemLoader 25 from jinja2 import Environment, FileSystemLoader
26 26
27 from third_party.BeautifulSoup.BeautifulSoup import BeautifulSoup 27 from third_party.BeautifulSoup.BeautifulSoup import BeautifulSoup, Tag
28 28
29 29
30 # Current application name. 30 # Current application name.
31 APP_NAME = get_application_id() 31 APP_NAME = get_application_id()
32 32
33 # Deadline for fetching URLs (in seconds). 33 # Deadline for fetching URLs (in seconds).
34 URLFETCH_DEADLINE = 60*5 # 5 mins 34 URLFETCH_DEADLINE = 60*5 # 5 mins
35 35
36 # Default masters to merge together. 36 # Default masters to merge together.
37 DEFAULT_MASTERS_TO_MERGE = [ 37 DEFAULT_MASTERS_TO_MERGE = [
38 'chromium.main', 38 'chromium.main',
39 'chromium.win', 39 'chromium.win',
40 'chromium.mac', 40 'chromium.mac',
41 'chromium.linux', 41 'chromium.linux',
42 'chromium.chromiumos', 42 'chromium.chromiumos',
43 'chromium.chrome', 43 'chromium.chrome',
44 'chromium.memory', 44 'chromium.memory',
45 ] 45 ]
46 46
47 47
48 # Perform initial bootstrap for this module. 48 # Perform initial bootstrap for this module.
49 console_template = '' 49 console_template = ''
50 def bootstrap(): 50 def bootstrap():
51 global console_template 51 global console_template
52 with open('templates/merger.html', 'r') as fh: 52 with open('templates/merger.html', 'r') as fh:
53 console_template = fh.read() 53 console_template = fh.read()
54 54
55 55
56 def get_pagedata_from_cache(localpath): 56 ##########
57 memcache_data = memcache.get(localpath) 57 # Page class definition and related functions.
58 if not memcache_data: 58 ##########
59 return None 59 class Page(db.Model):
60 logging.debug('content for %s found in memcache' % localpath) 60 fetch_timestamp = db.DateTimeProperty(required=True)
61 return json.loads(memcache_data) 61 localpath = db.StringProperty(required=True)
62 content = db.TextProperty()
63 title = db.StringProperty()
64 offsite_base = db.StringProperty()
65 body_class = db.StringProperty()
66 remoteurl = db.TextProperty()
67 # Data updated separately, after creation.
68 content_blob = blobstore.BlobReferenceProperty()
62 69
63 70
64 def put_pagedata_into_cache(localpath, page_data): 71 def get_or_create_page(localpath, remoteurl, maxage):
65 memcache_data = json.dumps(page_data) 72 return Page.get_or_insert(
66 if not memcache.set(key=localpath, value=memcache_data, time=2*60): 73 key_name=localpath,
67 logging.error('put_pagedata_into_cache(\'%s\'): memcache.set() failed' % ( 74 localpath=localpath,
68 localpath)) 75 remoteurl=remoteurl,
76 maxage=maxage,
77 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),
78 content=None,
79 content_blob=None)
69 80
70 81
71 def get_and_cache_pagedata(localpath): 82 def get_and_cache_pagedata(localpath):
72 """Returns a page_data dict, optionally caching and looking up a blob. 83 """Returns a page_data dict, optionally caching and looking up a blob.
73 84
74 get_and_cache_pagedata takes a localpath which is used to fetch data 85 get_and_cache_pagedata takes a localpath which is used to fetch data
75 from the cache. If the data is present and there's no content blob, 86 from the cache. If the data is present and there's no content blob,
76 then we have all of the data we need to return a page view to the user 87 then we have all of the data we need to return a page view to the user
77 and we return early. 88 and we return early.
78 89
79 Otherwise, we need to fetch the page object and set up the page data 90 Otherwise, we need to fetch the page object and set up the page data
80 for the page view. If the page has a blob associated with it, then we 91 for the page view. If the page has a blob associated with it, then we
81 mark the page data as having a blob and cache it as-is without the blob. 92 mark the page data as having a blob and cache it as-is without the blob.
82 If there's no blob, we associate the content with the page data and 93 If there's no blob, we associate the content with the page data and
83 cache that. This is so the next time get_and_cache_pagedata is called 94 cache that. This is so the next time get_and_cache_pagedata is called
84 for either case, we'll get the same behavior (a page-lookup for blobful 95 for either case, we'll get the same behavior (a page-lookup for blobful
85 content and a page cache hit for blobless content). 96 content and a page cache hit for blobless content).
86 97
87 Here we assume localpath is already unquoted. 98 Here we assume localpath is already unquoted.
88 """ 99 """
89 page_data = get_pagedata_from_cache(localpath) 100 page_data = get_data_from_cache(localpath)
90 if page_data and not page_data.get('content_blob'): 101 if page_data and not page_data.get('content_blob'):
91 return page_data 102 return page_data
92 page = Page.all().filter('localpath =', localpath).get() 103 page = Page.all().filter('localpath =', localpath).get()
93 if not page: 104 if not page:
94 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in ' 105 logging.error('get_and_cache_pagedata(\'%s\'): no matching localpath in '
95 'datastore' % localpath) 106 'datastore' % localpath)
96 return {'content': None} 107 return {'content': None}
97 page_data = { 108 page_data = {
98 'body_class': page.body_class, 109 'body_class': page.body_class,
99 'offsite_base': page.offsite_base, 110 'offsite_base': page.offsite_base,
100 'title': page.title, 111 'title': page.title,
101 } 112 }
102 if page.content_blob: 113 if page.content_blob:
103 # Get the blob. 114 # Get the blob.
104 logging.debug('content for %s found in blobstore' % localpath) 115 logging.debug('content for %s found in blobstore' % localpath)
105 blob_reader = blobstore.BlobReader(page.content_blob) 116 blob_reader = blobstore.BlobReader(page.content_blob)
106 page_data['content_blob'] = True 117 page_data['content_blob'] = True
107 put_pagedata_into_cache(localpath, page_data) 118 put_data_into_cache(localpath, page_data)
108 page_data['content'] = blob_reader.read().decode('utf-8', 'replace') 119 page_data['content'] = blob_reader.read().decode('utf-8', 'replace')
109 else: 120 else:
110 logging.debug('content for %s found in datastore' % localpath) 121 logging.debug('content for %s found in datastore' % localpath)
111 page_data['content'] = page.content 122 page_data['content'] = page.content
112 put_pagedata_into_cache(localpath, page_data) 123 put_data_into_cache(localpath, page_data)
113 return page_data 124 return page_data
114 125
115 126
127 def save_page(page, localpath, fetch_timestamp, page_data):
128 body_class = page_data.get('body_class', '')
129 content = page_data.get('content')
130 offsite_base = page_data.get('offsite_base', '')
131 title = page_data.get('title', '')
132
133 content_blob_key = None
134 try:
135 content = content.decode('utf-8', 'replace')
136 except UnicodeEncodeError:
137 logging.debug('save_page: content was already in unicode')
138 logging.debug('save_page: content size is %d' % len(content))
139 if len(content.encode('utf-8')) >= 10**6:
140 logging.debug('save_page: saving to blob')
141 content_blob_key = write_blob(content, path_to_mime_type(localpath))
142 content = None
143 def tx_page(page_key):
144 page = Page.get(page_key)
145 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no
146 # 'fetch_timestamp' member (but some types could not be inferred)
147 # pylint: disable=E1103
148 if page.fetch_timestamp > fetch_timestamp:
149 return
150 page.content = content
151 page.content_blob = content_blob_key
152 page.fetch_timestamp = fetch_timestamp
153 # title, offsite_base, body_class can all be empty strings for some
154 # content. Where that's true, they're not used for displaying a console-
155 # like resource, and the content alone is returned to the web user.
156 page.title = title
157 page.offsite_base = offsite_base
158 page.body_class = body_class
159 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member
160 # (but some types could not be inferred)
161 # pylint: disable=E1103
162 page.put()
163 db.run_in_transaction(tx_page, page.key())
164 page_data = {
165 'body_class': body_class,
166 'content': content,
167 'offsite_base': offsite_base,
168 'title': title,
169 }
170 if content_blob_key:
171 page_data['content_blob'] = True
172 put_data_into_cache(localpath, page_data)
173 logging.info('Saved and cached page with localpath %s' % localpath)
174
175
176 ##########
177 # Row class definition and related functions.
178 ##########
179 class Row(db.Model):
180 fetch_timestamp = db.DateTimeProperty(required=True)
181 rev_number = db.StringProperty(required=True)
182 localpath = db.StringProperty(required=True)
183 revision = db.TextProperty()
184 name = db.TextProperty()
185 status = db.TextProperty()
186 comment = db.TextProperty()
187 details = db.TextProperty()
188
189
190 def get_or_create_row(localpath, revision):
191 return Row.get_or_insert(
192 key_name=localpath,
193 rev_number=revision,
194 localpath=localpath,
195 fetch_timestamp=datetime.datetime.now())
196
197
198 def get_and_cache_rowdata(localpath):
199 """Returns a row_data dict.
200
201 get_and_cache_rowdata takes a localpath which is used to fetch data from the
202 cache. If the data is present, then we have all of the data we need and we
203 return early.
204
205 Otherwise, we need to fetch the row object and set up the row data.
206
207 Here we assume localpath is already unquoted.
208 """
209 row_data = get_data_from_cache(localpath)
210 if row_data:
211 return row_data
212 row = Row.all().filter('localpath =', localpath).get()
213 if not row:
214 logging.error('get_and_cache_rowdata(\'%s\'): no matching localpath in '
215 'datastore' % localpath)
216 return {}
217 row_data = {}
218 row_data['rev'] = row.revision
219 row_data['name'] = row.name
220 row_data['status'] = row.status
221 row_data['comment'] = row.comment
222 row_data['details'] = row.details
223 row_data['rev_number'] = row.rev_number
224 logging.debug('content for %s found in datastore' % localpath)
225 put_data_into_cache(localpath, row_data)
226 return row_data
227
228
229 def save_row(row_data, localpath, timestamp):
230 rev_number = row_data['rev_number']
231 row = get_or_create_row(localpath, rev_number)
232 row_key = row.key()
233 def tx_row(row_key):
234 row = Row.get(row_key)
235 # E1103:959,7:save_row.tx_row: Instance of 'list' has no
236 # 'fetch_timestamp' member (but some types could not be inferred)
237 # pylint: disable=E1103
238 # if row.fetch_timestamp > timestamp:
239 # return
240 row.fetch_timestamp = timestamp
241 row.revision = row_data['rev']
242 row.name = row_data['name']
243 row.status = row_data['status']
244 row.comment = row_data['comment']
245 row.details = row_data['details']
246 # E1103:967,4:save_row.tx_row: Instance of 'list' has no 'put' member
247 # (but some types could not be inferred)
248 # pylint: disable=E1103
249 row.put()
250 db.run_in_transaction(tx_row, row_key)
251 prev_rev = memcache.get(key='latest_rev')
252 if (rev_number > prev_rev):
253 memcache.set(key='latest_rev', value=rev_number)
254 put_data_into_cache(localpath, row_data)
255 logging.info('Saved and cached row with localpath %s' % localpath)
256
257
258 ##########
259 # ConsoleData class definition and related functions.
260 ##########
116 class ConsoleData(object): 261 class ConsoleData(object):
117 def __init__(self): 262 def __init__(self):
118 self.row_orderedkeys = [] 263 self.row_orderedkeys = []
119 self.row_data = {} 264 self.row_data = {}
120 265
121 # Retain order of observed masters. 266 # Retain order of observed masters.
122 self.masters = [] 267 self.masters = []
123 268
124 # Map(k,v): k=Master, v=List of categories 269 # Map(k,v): k=Master, v=List of categories
125 self.category_order = {} 270 self.category_order = {}
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 def SetDetail(self, detail): 316 def SetDetail(self, detail):
172 self.last_row['detail'] = detail 317 self.last_row['detail'] = detail
173 318
174 def AddCategory(self, category, builder_status): 319 def AddCategory(self, category, builder_status):
175 self.category_order[self.lastMasterSeen].append(category) 320 self.category_order[self.lastMasterSeen].append(category)
176 # Map(k,v): k=Master/category, v=Dict of category data (last build status) 321 # Map(k,v): k=Master/category, v=Dict of category data (last build status)
177 self.category_data[self.lastMasterSeen].setdefault(category, {}) 322 self.category_data[self.lastMasterSeen].setdefault(category, {})
178 self.category_data[self.lastMasterSeen][category] = builder_status 323 self.category_data[self.lastMasterSeen][category] = builder_status
179 self.category_count += 1 324 self.category_count += 1
180 325
326 def AddRow(self, row):
327 revision = row['rev_number']
328 self.SawRevision(revision)
329 revlink = BeautifulSoup(row['rev']).td.a['href']
330 self.SetLink(revlink)
331 name = BeautifulSoup(row['name']).td.contents
332 self.SetName(self.ContentsToHtml(name))
333 status = BeautifulSoup(row['status']).findAll('table')
334 for i, stat in enumerate(status):
335 self.SetStatus(self.category_order[self.lastMasterSeen][i],
336 unicode(stat))
337 comment = BeautifulSoup(row['comment']).td.contents
338 self.SetComment(self.ContentsToHtml(comment))
339 if row['details']:
340 details = BeautifulSoup(row['details']).td.contents
341 self.SetDetail(self.ContentsToHtml(details))
342
181 def ParseRow(self, row): 343 def ParseRow(self, row):
182 cells = row.findAll('td', recursive=False) 344 cells = row.findAll('td', recursive=False)
183 # Figure out which row this is. 345 # Figure out which row this is.
184 for attrname, attrvalue in cells[0].attrs: 346 for attrname, attrvalue in cells[0].attrs:
185 if attrname != 'class': 347 if attrname != 'class':
186 continue 348 continue
187 attrvalue = re.sub(r'^(\S+).*', r'\1', attrvalue) 349 attrvalue = re.sub(r'^(\S+).*', r'\1', attrvalue)
188 if attrvalue == 'DevRev': 350 if attrvalue == 'DevRev':
189 revision = cells[0] 351 revision = cells[0]
190 self.SawRevision(revision=revision.findAll('a')[0].contents[0]) 352 self.SawRevision(revision=revision.findAll('a')[0].contents[0])
(...skipping 10 matching lines...) Expand all
201 if attrvalue == 'DevDetails': 363 if attrvalue == 'DevDetails':
202 self.SetDetail(detail=self.ContentsToHtml(cells[0].contents)) 364 self.SetDetail(detail=self.ContentsToHtml(cells[0].contents))
203 365
204 def Finish(self): 366 def Finish(self):
205 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True) 367 self.row_orderedkeys = sorted(self.row_orderedkeys, key=int, reverse=True)
206 # TODO(cmp): Look for row/master/categories that are unset. If they are 368 # TODO(cmp): Look for row/master/categories that are unset. If they are
207 # at the latest revisions, leave them unset. If they are at 369 # at the latest revisions, leave them unset. If they are at
208 # the earliest revisions, set them to ''. 370 # the earliest revisions, set them to ''.
209 371
210 372
211 # W0613:169,39:console_merger: Unused argument 'remoteurl' 373 ##########
212 # W0613:169,19:console_merger: Unused argument 'unquoted_localpath' 374 # Heavy-lifting functions that do most of the console processing.
213 # pylint: disable=W0613 375 # AKA postfetch and postsave functions/handlers.
214 def console_merger(unquoted_localpath, remote_url, page_data=None, 376 ##########
215 masters_to_merge=None): 377 def console_merger(localpath, remoteurl, page_data,
216 page_data = page_data or {} 378 masters_to_merge=None, num_rows_to_merge=25):
217
218 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE 379 masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE
219 mergedconsole = ConsoleData() 380 mergedconsole = ConsoleData()
220 merged_page = None 381 surroundings = get_and_cache_pagedata('surroundings')
221 merged_tag = None 382 merged_page = BeautifulSoup(surroundings['content'])
383 merged_tag = merged_page.find('table', 'ConsoleData')
384 latest_rev = int(memcache.get(key='latest_rev'))
385 if not latest_rev:
386 logging.error('console_merger(\'%s\', \'%s\', \'%s\'): cannot get latest '
387 'revision number.' % (
388 localpath, remoteurl, page_data))
389 return
222 fetch_timestamp = datetime.datetime.now() 390 fetch_timestamp = datetime.datetime.now()
223 for master in masters_to_merge: 391 for master in masters_to_merge:
224 page_data = get_and_cache_pagedata('%s/console' % master) 392 # Fetch the summary one-box-per-builder for the master.
225 master_content = page_data['content'] 393 # If we don't get it, something is wrong, skip the master entirely.
226 if master_content is None: 394 master_summary = get_and_cache_pagedata('%s/console/summary' % master)
395 if not master_summary['content']:
227 continue 396 continue
228 master_content = master_content.encode('ascii', 'replace')
229 this_page = BeautifulSoup(master_content)
230 this_tag = this_page.find('table', {'class': 'ConsoleData'})
231 # The first console is special, we reuse all of the console page.
232 if not merged_page:
233 merged_page = this_page
234 merged_tag = this_tag
235 mergedconsole.SawMaster(master) 397 mergedconsole.SawMaster(master)
398 # Get the categories for this builder. If the builder doesn't have any
399 # categories, just use the default empty-string category.
400 category_list = []
401 master_categories = get_and_cache_pagedata('%s/console/categories' % master)
402 if not master_categories['content']:
403 category_list.append('')
404 else:
405 category_row = BeautifulSoup(master_categories['content'])
406 category_list = map(lambda x: x.text,
407 category_row.findAll('td', 'DevStatus'))
408 # Get the corresponding summary box(es).
409 summary_row = BeautifulSoup(master_summary['content'])
410 summary_list = summary_row.findAll('table')
411 for category, summary in zip(category_list, summary_list):
412 mergedconsole.AddCategory(category, summary)
236 413
237 # Parse each of the rows. 414 # Fetch all of the rows that we need.
238 CATEGORY_ROW = 0 415 rows_fetched = 0
239 trs = this_tag.findAll('tr', recursive=False) 416 current_rev = latest_rev
240 417 while rows_fetched < num_rows_to_merge and current_rev >= 0:
241 # Get the list of categories in |master|. 418 row_data = get_and_cache_rowdata('%s/console/%s' % (master, current_rev))
242 category_tds = trs[CATEGORY_ROW].findAll('td', recursive=False)[2:] 419 if not row_data:
243 third_cell = category_tds[0] 420 current_rev -= 1
244 third_cell_class = third_cell.attrs[0][1] 421 continue
245 categories = [] 422 mergedconsole.AddRow(row_data)
246 if third_cell_class.startswith('DevStatus '): 423 current_rev -= 1
247 BUILDER_STATUS_ROW = 2 424 rows_fetched += 1
248 FIRST_CL_ROW = 3
249 for index, category_td in enumerate(category_tds):
250 categories.append(category_td.contents[0].strip())
251 else:
252 # There's no categories + spacing row, the first row will be the builder
253 # status row.
254 categories.append('')
255 BUILDER_STATUS_ROW = 0
256 FIRST_CL_ROW = 1
257
258 # For each category in |master|, add the category plus its |builder_status|.
259 builder_tds = trs[BUILDER_STATUS_ROW].findAll('td', recursive=False)[2:]
260 for index, category in enumerate(categories):
261 builder_status = builder_tds[index].findAll('table', recursive=False)[0]
262 mergedconsole.AddCategory(category=category,
263 builder_status=builder_status)
264
265 # For each of the remaining rows, add them to the console data.
266 for console_index in range(FIRST_CL_ROW, len(trs)):
267 console_row = trs[console_index]
268 mergedconsole.ParseRow(console_row)
269 # Add GC memory profiling.
270 # import gc
271 # gc.set_debug(gc.DEBUG_LEAK)
272 # logging.debug(gc.garbage)
273 # del gc.garbage[:]
274 mergedconsole.Finish()
275 425
276 # Convert the merged content into console content. 426 # Convert the merged content into console content.
427 mergedconsole.Finish()
277 template_environment = Environment() 428 template_environment = Environment()
278 template_environment.loader = FileSystemLoader('.') 429 template_environment.loader = FileSystemLoader('.')
279 def notstarted(builder_status): 430 def notstarted(builder_status):
280 """Convert a BeautifulSoup Tag from builder status to a notstarted line.""" 431 """Convert a BeautifulSoup Tag from builder status to a notstarted line."""
281 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status)) 432 builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status))
282 builder_status = re.sub(r'class=\'([^\']*)\' target=', 433 builder_status = re.sub(r'class=\'([^\']*)\' target=',
283 'class=\'DevStatusBox notstarted\' target=', 434 'class=\'DevStatusBox notstarted\' target=',
284 builder_status) 435 builder_status)
285 builder_status = re.sub(r'class="([^"]*)" target=', 436 builder_status = re.sub(r'class="([^"]*)" target=',
286 'class="DevStatusBox notstarted" target=', 437 'class="DevStatusBox notstarted" target=',
(...skipping 22 matching lines...) Expand all
309 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content) 460 r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content)
310 merged_content = re.sub( 461 merged_content = re.sub(
311 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) 462 r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content)
312 merged_content = re.sub( 463 merged_content = re.sub(
313 r'\<iframe\>\</iframe\>', 464 r'\<iframe\>\</iframe\>',
314 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>', 465 '<iframe \' + attributes + \' src="\' + url + \'"></iframe>',
315 merged_content) 466 merged_content)
316 467
317 # Update the merged console page. 468 # Update the merged console page.
318 merged_page = get_or_create_page('chromium/console', None, maxage=30) 469 merged_page = get_or_create_page('chromium/console', None, maxage=30)
319 logging.debug('console_merger: saving merged console') 470 logging.info('console_merger: saving merged console')
471 page_data = get_and_cache_pagedata('chromium/console')
320 page_data['title'] = 'BuildBot: Chromium' 472 page_data['title'] = 'BuildBot: Chromium'
321 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium' 473 page_data['offsite_base'] = 'http://build.chromium.org/p/chromium'
322 page_data['body_class'] = 'interface' 474 page_data['body_class'] = 'interface'
323 page_data['content'] = merged_content 475 page_data['content'] = merged_content
324 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data) 476 save_page(merged_page, 'chromium/console', fetch_timestamp, page_data)
325 return 477 return
326 478
327 479
328 def console_handler(_unquoted_localpath, remoteurl, page_data=None): 480 def console_handler(unquoted_localpath, remoteurl, page_data=None):
329 page_data = page_data or {} 481 page_data = page_data or {}
330 content = page_data.get('content') 482 content = page_data.get('content')
331 if not content: 483 if not content:
484 logging.error('console_handler(\'%s\', \'%s\', \'%s\'): cannot get site '
485 'from local path' % (
486 unquoted_localpath, remoteurl, page_data))
332 return page_data 487 return page_data
333 488
334 # Decode content from utf-8 to unicode, replacing bad characters. 489 # Decode content from utf-8 to unicode, replacing bad characters.
335 content = content.decode('utf-8', 'replace') 490 content = content.decode('utf-8', 'replace')
336 491
337 # Scrub in sheriff file content to console. 492 # Scrub in sheriff file content to console.
338 sheriff_files = [ 493 sheriff_files = [
339 'sheriff', 494 'sheriff',
340 'sheriff_android', 495 'sheriff_android',
341 'sheriff_cr_cros_gardeners', 496 'sheriff_cr_cros_gardeners',
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 md = re.search( 608 md = re.search(
454 r'^(.*)</body>.*$', 609 r'^(.*)</body>.*$',
455 page_data['content'], 610 page_data['content'],
456 re.MULTILINE|re.DOTALL) 611 re.MULTILINE|re.DOTALL)
457 if not md: 612 if not md:
458 raise Exception('failed to locate trailing body and html tags') 613 raise Exception('failed to locate trailing body and html tags')
459 page_data['content'] = md.group(1) 614 page_data['content'] = md.group(1)
460 615
461 return page_data 616 return page_data
462 617
463 618 # W0613:600,28:parse_master: Unused argument 'remoteurl'
464 def get_or_create_row(localpath, revision): 619 # pylint: disable=W0613
465 return Row.get_or_insert(
466 key_name=revision + ' '+ localpath,
467 rev_number=revision,
468 localpath=localpath,
469 fetch_timestamp=datetime.datetime.now())
470
471
472 def save_row(row_data, localpath, timestamp):
473 rev_number = row_data['rev_number']
474 row = get_or_create_row(localpath, rev_number)
475 row_key = row.key()
476 def tx_row(row_key):
477 row = Row.get(row_key)
478 # E1103:959,7:save_row.tx_row: Instance of 'list' has no
479 # 'fetch_timestamp' member (but some types could not be inferred)
480 # pylint: disable=E1103
481 # if row.fetch_timestamp > timestamp:
482 # return
483 row.fetch_timestamp = timestamp
484 row.revision = row_data['rev']
485 row.name = row_data['name']
486 row.status = row_data['status']
487 row.comment = row_data['comment']
488 row.details = row_data['details']
489 # E1103:967,4:save_row.tx_row: Instance of 'list' has no 'put' member
490 # (but some types could not be inferred)
491 # pylint: disable=E1103
492 row.put()
493 db.run_in_transaction(tx_row, row_key)
494 memcache_data = json.dumps(row_data)
495 # A row should never be large enough to hit the blobstore, so we
496 # explicitly don't handle rows larger than 10^6 bytes.
497 if not memcache.set(key=str(row_key), value=memcache_data, time=2*60):
498 logging.error('save_row(\'%s\'): memcache.set() failed' % (row_key))
499
500
501 def parse_master(localpath, remoteurl, page_data=None): 620 def parse_master(localpath, remoteurl, page_data=None):
502 """Part of the new pipeline to store individual rows rather than 621 """Part of the new pipeline to store individual rows rather than
503 whole pages of html. Parses the master data into a set of rows, 622 whole pages of html. Parses the master data into a set of rows,
504 and writes them out to the datastore in an easily retrievable format. 623 and writes them out to the datastore in an easily retrievable format.
505 624
506 Returns the same page_data as it was passed, so as to not interrupt 625 Doesn't modify page_data dict.
507 the current pipeline. This may change when we switch over.
508 """ 626 """
509 ts = datetime.datetime.now() 627 ts = datetime.datetime.now()
510 page_data = page_data or {} 628 page_data = page_data or {}
511 content = page_data.get('content') 629 content = page_data.get('content')
512 if not content: 630 if not content:
513 return page_data 631 return page_data
514 content = content.decode('utf-8', 'replace') 632 content = content.decode('utf-8', 'replace')
515 633
516 # Split page into surroundings (announce, legend, footer) and data (rows). 634 # Split page into surroundings (announce, legend, footer) and data (rows).
517 surroundings = BeautifulSoup(content) 635 surroundings = BeautifulSoup(content)
518 data = surroundings.find('table', 'ConsoleData') 636 data = surroundings.find('table', 'ConsoleData')
519 data.extract() 637 new_data = Tag(surroundings, 'table', [('class', 'ConsoleData'),
638 ('width', '96%')])
639 data.replaceWith(new_data)
520 640
521 surroundings_page = get_or_create_page(localpath + '/surroundings', 641 surroundings_page = get_or_create_page('surroundings',
522 None, maxage=30) 642 None, maxage=30)
523 surroundings_data = {} 643 surroundings_data = {}
524 surroundings_data['title'] = 'Surroundings for ' + localpath 644 surroundings_data['title'] = 'Surroundings'
525 surroundings_data['content'] = unicode(surroundings) 645 surroundings_data['content'] = unicode(surroundings)
526 save_page(surroundings_page, localpath + '/surroundings', ts, 646 save_page(surroundings_page, 'surroundings', ts,
527 surroundings_data) 647 surroundings_data)
528 648
529 rows = data.tbody.findAll('tr', recursive=False) 649 rows = data.findAll('tr', recursive=False)
530 # The first table row can be special: the list of categories. 650 # The first table row can be special: the list of categories.
531 categories = None 651 categories = None
532 # If the first row contains a DevStatus cell... 652 # If the first row contains a DevStatus cell...
533 if rows[0].find('td', 'DevStatus') != None: 653 if rows[0].find('td', 'DevStatus') != None:
534 # ...extract it into the categories... 654 # ...extract it into the categories...
535 categories = rows[0] 655 categories = rows[0]
536 # ...and get rid of the next (spacer) row too. 656 # ...and get rid of the next (spacer) row too.
537 rows = rows[2:] 657 rows = rows[2:]
538 658
539 if categories: 659 if categories:
(...skipping 20 matching lines...) Expand all
560 # or a spacer row (in which case we finalize the row and save it). 680 # or a spacer row (in which case we finalize the row and save it).
561 for row in rows: 681 for row in rows:
562 if row.find('td', 'DevComment'): 682 if row.find('td', 'DevComment'):
563 curr_row['comment'] = unicode(row) 683 curr_row['comment'] = unicode(row)
564 elif row.find('td', 'DevDetails'): 684 elif row.find('td', 'DevDetails'):
565 curr_row['details'] = unicode(row) 685 curr_row['details'] = unicode(row)
566 elif row.find('td', 'DevStatus'): 686 elif row.find('td', 'DevStatus'):
567 curr_row['rev'] = unicode(row.find('td', 'DevRev')) 687 curr_row['rev'] = unicode(row.find('td', 'DevRev'))
568 curr_row['rev_number'] = unicode(row.find('td', 'DevRev').a.string) 688 curr_row['rev_number'] = unicode(row.find('td', 'DevRev').a.string)
569 curr_row['name'] = unicode(row.find('td', 'DevName')) 689 curr_row['name'] = unicode(row.find('td', 'DevName'))
570 curr_row['status'] = unicode(row.findAll('td', 'DevStatus')) 690 curr_row['status'] = unicode(row.findAll('table'))
571 else: 691 else:
572 if 'details' not in curr_row: 692 if 'details' not in curr_row:
573 curr_row['details'] = '' 693 curr_row['details'] = ''
574 save_row(curr_row, localpath, ts) 694 save_row(curr_row, localpath + '/' + curr_row['rev_number'], ts)
575 curr_row = {} 695 curr_row = {}
576 696
577 return page_data 697 return page_data
578 698
579 699
580 def one_box_handler(unquoted_localpath, remoteurl, page_data=None): 700 def one_box_handler(unquoted_localpath, remoteurl, page_data=None):
581 page_data = page_data or {} 701 page_data = page_data or {}
582 content = page_data.get('content') 702 content = page_data.get('content')
583 if content is None: 703 if content is None:
584 return page_data 704 return page_data
585 # Get the site name from the local path. 705 # Get the site name from the local path.
586 md = re.match('^([^\/]+)/.*$', unquoted_localpath) 706 md = re.match('^([^\/]+)/.*$', unquoted_localpath)
587 if not md: 707 if not md:
588 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site ' 708 logging.error('one_box_handler(\'%s\', \'%s\', \'%s\'): cannot get site '
589 'from local path' % ( 709 'from local path' % (
590 unquoted_localpath, remoteurl, page_data)) 710 unquoted_localpath, remoteurl, page_data))
591 return page_data 711 return page_data
592 site = md.group(1) 712 site = md.group(1)
593 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site 713 new_waterfall_url = 'http://build.chromium.org/p/%s/waterfall' % site
594 page_data['content'] = re.sub( 714 page_data['content'] = re.sub(
595 r'waterfall', 715 r'waterfall',
596 new_waterfall_url, 716 new_waterfall_url,
597 page_data['content']) 717 page_data['content'])
598 return page_data 718 return page_data
599 719
600 720
721 ##########
722 # Utility functions for blobstore and memcache.
723 ##########
724 def get_data_from_cache(localpath):
725 memcache_data = memcache.get(localpath)
726 if not memcache_data:
727 return None
728 logging.debug('content for %s found in memcache' % localpath)
729 return json.loads(memcache_data)
730
731
732 def put_data_into_cache(localpath, data):
733 memcache_data = json.dumps(data)
734 if not memcache.set(key=localpath, value=memcache_data, time=2*60):
735 logging.error('put_data_into_cache(\'%s\'): memcache.set() failed' % (
736 localpath))
737
738
739 def write_blob(data, mime_type):
740 """Saves a Unicode string as a new blob, returns the blob's key."""
741 file_name = files.blobstore.create(mime_type=mime_type)
742 data = data.encode('utf-8')
743 with files.open(file_name, 'a') as blob_file:
744 blob_file.write(data)
745 files.finalize(file_name)
746 return files.blobstore.get_blob_key(file_name)
747
748
749 def path_to_mime_type(path):
750 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')
751
752
753 EXT_TO_MIME = {
754 '.css': 'text/css',
755 '.js': 'text/javascript',
756 '.json': 'application/json',
757 '.html': 'text/html',
758 }
759
760
761 ##########
762 # Functions for actually fetching original pages.
763 ##########
764 def fetch_pages():
765 """Starts a background fetch operation for pages that need it."""
766 logging.debug('fetch_pages()')
767 for url in URLS:
768 deferred.defer(fetch_page, **url)
769
770
771 def nonfatal_fetch_url(url, *args, **kwargs):
772 # Temporary workaround to disable AppEngine global cache of these pages.
773 if '?' in url:
774 url += '&' + str(random.random())
775 else:
776 url += '?' + str(random.random())
777
778 try:
779 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs)
780 except urlfetch.DownloadError:
781 logging.warn('urlfetch failed: %s' % url, exc_info=1)
782 return None
783
784
785 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,
786 fetch_url=nonfatal_fetch_url):
787 """Fetches data about a set of pages."""
788 if type(localpath) != type(''):
789 logging.error('fetch_page: localpath is %r, expected a string' % (
790 repr(localpath)))
791 return
792 unquoted_localpath = urllib.unquote(localpath)
793 logging.debug('fetch_page("%s", "%s", "%s")' % (
794 unquoted_localpath, remoteurl, maxage))
795 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)
796
797 # Check if our copy of the page is younger than maxage. If it is, we'll
798 # skip the fetch.
799 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(
800 seconds=maxage)
801 if (page.fetch_timestamp and
802 page.fetch_timestamp > oldest_acceptable_timestamp):
803 logging.debug('fetch_page: too recent, skipping')
804 return
805
806 # Perform the actual page fetch.
807 fetch_timestamp = datetime.datetime.now()
808 response = fetch_url(remoteurl)
809 if not response:
810 logging.warning('fetch_page: got empty response')
811 return
812 if response.status_code != 200:
813 logging.warning('fetch_page: got non-empty response but code '
814 '%d' % response.status_code)
815 return
816
817 # We have actual content. If there's one or more handlers, call them.
818 page_data = {}
819 page_data['content'] = response.content
820 if postfetch:
821 if not isinstance(postfetch, list):
822 postfetch = [postfetch]
823 for handler in postfetch:
824 logging.debug('fetch_page: calling postfetch handler '
825 '%s' % handler.__name__)
826 page_data = handler(unquoted_localpath, remoteurl, page_data)
827
828 # Save the returned content into the DB and caching layers.
829 logging.debug('fetch_page: saving page')
830 save_page(page, unquoted_localpath, fetch_timestamp, page_data)
831 if postsave:
832 if not isinstance(postsave, list):
833 postsave = [postsave]
834 for handler in postsave:
835 logging.debug('fetch_page: calling postsave handler '
836 '%s' % handler.__name__)
837 handler(unquoted_localpath, remoteurl, page_data)
838
601 839
602 # List of URLs to fetch. 840 # List of URLs to fetch.
603 URLS = [ 841 URLS = [
604 # Console URLs. 842 # Console URLs.
605 { 843 {
606 'remoteurl': 'http://build.chromium.org/p/chromium.chrome/console', 844 'remoteurl': 'http://build.chromium.org/p/chromium.chrome/console',
607 'localpath': 'chromium.chrome/console', 845 'localpath': 'chromium.chrome/console',
608 'postfetch': [console_handler, parse_master], 846 'postfetch': [console_handler, parse_master],
609 'postsave': console_merger, 847 'postsave': console_merger,
610 'maxage': 30, # 30 secs 848 'maxage': 30, # 30 secs
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
914 # LKGR JSON. 1152 # LKGR JSON.
915 { 1153 {
916 'remoteurl': 1154 'remoteurl':
917 ('http://build.chromium.org/p/chromium.lkgr/json/builders/Linux%20x64/' 1155 ('http://build.chromium.org/p/chromium.lkgr/json/builders/Linux%20x64/'
918 'builds/-1?as_text=1'), 1156 'builds/-1?as_text=1'),
919 'localpath': 1157 'localpath':
920 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json', 1158 'chromium.lkgr/json/builders/Linux%20x64/builds/-1/as_text=1.json',
921 'maxage': 2*60, # 2 mins 1159 'maxage': 2*60, # 2 mins
922 }, 1160 },
923 ] 1161 ]
924
925
926 def nonfatal_fetch_url(url, *args, **kwargs):
927 # Temporary workaround to disable AppEngine global cache of these pages.
928 if '?' in url:
929 url += '&' + str(random.random())
930 else:
931 url += '?' + str(random.random())
932
933 try:
934 return urlfetch.fetch(url, deadline=URLFETCH_DEADLINE, *args, **kwargs)
935 except urlfetch.DownloadError:
936 logging.warn('urlfetch failed: %s' % url, exc_info=1)
937 return None
938
939
940 class Row(db.Model):
941 fetch_timestamp = db.DateTimeProperty(required=True)
942 rev_number = db.StringProperty(required=True)
943 localpath = db.StringProperty(required=True)
944 revision = db.TextProperty()
945 name = db.TextProperty()
946 status = db.TextProperty()
947 comment = db.TextProperty()
948 details = db.TextProperty()
949
950
951 class Page(db.Model):
952 fetch_timestamp = db.DateTimeProperty(required=True)
953 localpath = db.StringProperty(required=True)
954 content = db.TextProperty()
955 title = db.StringProperty()
956 offsite_base = db.StringProperty()
957 body_class = db.StringProperty()
958 remoteurl = db.TextProperty()
959 # Data updated separately, after creation.
960 content_blob = blobstore.BlobReferenceProperty()
961
962
963 def write_blob(data, mime_type):
964 """Saves a Unicode string as a new blob, returns the blob's key."""
965 file_name = files.blobstore.create(mime_type=mime_type)
966 data = data.encode('utf-8')
967 with files.open(file_name, 'a') as blob_file:
968 blob_file.write(data)
969 files.finalize(file_name)
970 return files.blobstore.get_blob_key(file_name)
971
972
973 def save_page(page, localpath, fetch_timestamp, page_data):
974 body_class = page_data.get('body_class', '')
975 content = page_data.get('content')
976 offsite_base = page_data.get('offsite_base', '')
977 title = page_data.get('title', '')
978
979 content_blob_key = None
980 try:
981 content = content.decode('utf-8', 'replace')
982 except UnicodeEncodeError:
983 logging.debug('save_page: content was already in unicode')
984 logging.debug('save_page: content size is %d' % len(content))
985 if len(content.encode('utf-8')) >= 10**6:
986 logging.debug('save_page: saving to blob')
987 content_blob_key = write_blob(content, path_to_mime_type(localpath))
988 content = None
989 def tx_page(page_key):
990 page = Page.get(page_key)
991 # E1103:225,7:fetch_page.tx_page: Instance of 'list' has no
992 # 'fetch_timestamp' member (but some types could not be inferred)
993 # pylint: disable=E1103
994 if page.fetch_timestamp > fetch_timestamp:
995 return
996 page.content = content
997 page.content_blob = content_blob_key
998 page.fetch_timestamp = fetch_timestamp
999 # title, offsite_base, body_class can all be empty strings for some
1000 # content. Where that's true, they're not used for displaying a console-
1001 # like resource, and the content alone is returned to the web user.
1002 page.title = title
1003 page.offsite_base = offsite_base
1004 page.body_class = body_class
1005 # E1103:231,4:fetch_page.tx_page: Instance of 'list' has no 'put' member
1006 # (but some types could not be inferred)
1007 # pylint: disable=E1103
1008 page.put()
1009 db.run_in_transaction(tx_page, page.key())
1010 page_data = {
1011 'body_class': body_class,
1012 'content': content,
1013 'offsite_base': offsite_base,
1014 'title': title,
1015 }
1016 if content_blob_key:
1017 page_data['content_blob'] = True
1018 put_pagedata_into_cache(localpath, page_data)
1019
1020
1021 def get_or_create_page(localpath, remoteurl, maxage):
1022 return Page.get_or_insert(
1023 key_name=localpath,
1024 localpath=localpath,
1025 remoteurl=remoteurl,
1026 maxage=maxage,
1027 fetch_timestamp=datetime.datetime.now() - datetime.timedelta(hours=24),
1028 content=None,
1029 content_blob=None)
1030
1031
1032 def fetch_page(localpath, remoteurl, maxage, postfetch=None, postsave=None,
1033 fetch_url=nonfatal_fetch_url):
1034 """Fetches data about a set of pages."""
1035 if type(localpath) != type(''):
1036 logging.error('fetch_page: localpath is %r, expected a string' % (
1037 repr(localpath)))
1038 return
1039 unquoted_localpath = urllib.unquote(localpath)
1040 logging.debug('fetch_page("%s", "%s", "%s")' % (
1041 unquoted_localpath, remoteurl, maxage))
1042 page = get_or_create_page(unquoted_localpath, remoteurl, maxage)
1043
1044 # Check if our copy of the page is younger than maxage. If it is, we'll
1045 # skip the fetch.
1046 oldest_acceptable_timestamp = datetime.datetime.now() - datetime.timedelta(
1047 seconds=maxage)
1048 if (page.fetch_timestamp and
1049 page.fetch_timestamp > oldest_acceptable_timestamp):
1050 logging.debug('fetch_page: too recent, skipping')
1051 return
1052
1053 # Perform the actual page fetch.
1054 fetch_timestamp = datetime.datetime.now()
1055 response = fetch_url(remoteurl)
1056 if not response:
1057 logging.warning('fetch_page: got empty response')
1058 return
1059 if response.status_code != 200:
1060 logging.warning('fetch_page: got non-empty response but code '
1061 '%d' % response.status_code)
1062 return
1063
1064 # We have actual content. If there's one or more handlers, call them.
1065 page_data = {}
1066 page_data['content'] = response.content
1067 if postfetch:
1068 if not isinstance(postfetch, list):
1069 postfetch = [postfetch]
1070 for handler in postfetch:
1071 logging.debug('fetch_page: calling postfetch handler '
1072 '%s' % handler.__name__)
1073 page_data = handler(unquoted_localpath, remoteurl, page_data)
1074
1075 # Save the returned content into the DB and caching layers.
1076 logging.debug('fetch_page: saving page')
1077 save_page(page, unquoted_localpath, fetch_timestamp, page_data)
1078 if postsave:
1079 if not isinstance(postsave, list):
1080 postsave = [postsave]
1081 for handler in postsave:
1082 logging.debug('fetch_page: calling postsave handler '
1083 '%s' % handler.__name__)
1084 handler(unquoted_localpath, remoteurl, page_data)
1085
1086
1087 EXT_TO_MIME = {
1088 '.css': 'text/css',
1089 '.js': 'text/javascript',
1090 '.json': 'application/json',
1091 '.html': 'text/html',
1092 }
1093
1094
1095 def path_to_mime_type(path):
1096 return EXT_TO_MIME.get(os.path.splitext(path)[1], 'text/html')
1097
1098
1099 def fetch_pages():
1100 """Starts a background fetch operation for pages that need it."""
1101 logging.debug('fetch_pages()')
1102 for url in URLS:
1103 deferred.defer(fetch_page, **url)
OLDNEW
« no previous file with comments | « no previous file | app_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698