Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: infra/tools/cq_stats/cq_stats.py

Issue 1125653004: Check in cq_stats.py (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: use utc Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « infra/tools/cq_stats/__main__.py ('k') | infra/tools/cq_stats/test/__init__.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 '''Generate stats of CQ usage.'''
6
7 import argparse
8 import calendar
9 import collections
10 import datetime
11 import dateutil.parser
12 import dateutil.tz
13 from xml.etree import ElementTree
14 import infra.libs.logs
15 import json
16 import logging
17 from multiprocessing.pool import ThreadPool
18 import numbers
19 import numpy
20 import re
21 import subprocess
22 import sys
23 import time
24 import urllib
25 import urllib2
26 import urlparse
27
28
29 STATS_URL = 'http://chromium-cq-status.appspot.com'
30 # Expects % project.
31 TREE_STATUS_URL = 'http://%s-status.appspot.com'
32 PROJECTS = {
33 'chromium': {
34 'tree-status': TREE_STATUS_URL % 'chromium',
35 'type': 'git',
36 'repo': 'https://chromium.googlesource.com/chromium/src',
37 },
38 'blink': {
39 'tree-status': TREE_STATUS_URL % 'blink',
40 'type': 'svn',
41 'repo': 'svn://svn.chromium.org/blink/trunk/',
42 },
43 }
44 # Map of intervals to minutes.
45 INTERVALS = {
46 'week': 60 * 24 * 7,
47 'day': 60 * 24,
48 'hour': 60,
49 '15min': 15,
50 }
51 VALID_REASONS = collections.OrderedDict([
52 ('manual-cancel', {
53 'item': 'stopped manually',
54 'message': 'stopped manually (CQ box was unchecked)',
55 }),
56 ('missing-lgtm', {
57 'item': 'missed LGTM',
58 'message': 'are missing LGTM',
59 }),
60 ('not-lgtm', {
61 'item': 'NOT LGTMs',
62 'message': 'have been disapproved (NOT LGTM)',
63 }),
64 ('failed-patch', {
65 'item': 'failures',
66 'message': 'failed to apply patch',
67 }),
68 ('invalid-delimiter', {
69 'item': 'errors',
70 'message': 'have incorrect CQ_EXTRA_TRYBOTS flag',
71 }),
72 ('failed-presubmit-bot', {
73 'item': 'failures',
74 'message': 'failed presubmit bot (often due to missing OWNERS LGTM)',
75 }),
76 ])
77 FLAKY_REASONS = collections.OrderedDict([
78 ('failed-commit', {
79 'item': 'failures',
80 'message': 'failed to commit',
81 }),
82 ('failed-jobs', {
83 'item': 'failures',
84 'message': 'failed jobs (excluding presubmit)',
85 }),
86 ('failed-to-trigger', {
87 'item': 'failures',
88 'message': 'failed to trigger jobs',
89 }),
90 ('failed-presubmit-check', {
91 'item': 'failures',
92 'message': 'failed presubmit check',
93 }),
94 ])
95 KNOWN_REASONS = collections.OrderedDict()
96 KNOWN_REASONS.update(VALID_REASONS)
97 KNOWN_REASONS.update(FLAKY_REASONS)
98
99 REASONS = collections.OrderedDict()
100 REASONS.update(KNOWN_REASONS)
101 REASONS['failed-unknown'] = {
102 'item': 'failures',
103 'message': 'failed for any other reason',
104 }
105
106
107 def parse_args():
108 parser = argparse.ArgumentParser(description=sys.modules['__main__'].__doc__)
109 parser.add_argument(
110 '--project',
111 required=True,
112 choices=PROJECTS.keys(),
113 help='Collect stats about this project.')
114 parser.add_argument(
115 '--bot', type=str, dest='bots',
116 action='append',
117 default=['blink-deps-roller@chromium.org',
118 'chrome-admin@google.com',
119 'chrome-release@google.com',
120 'chromeos-commit-bot@chromium.org',
121 'skia-deps-roller@chromium.org',
122 ],
123 help=('Add an author to be treated as a bot. '
124 'Repeat to add several bots. Default: %(default)s.'))
125 parser.add_argument(
126 '--seq', action='store_true',
127 help='Run everything sequentially for debugging.')
128 parser.add_argument(
129 '--thread-pool', type=int, default=200,
130 help='Fetch data using this many parallel threads. Default=%(default)s.')
131 parser.add_argument(
132 '--list-rejections', action='store_true',
133 help='List rejected CLs and reasons for rejection.')
134 parser.add_argument(
135 '--list-false-rejections', action='store_true',
136 help='List CLs that were committed in more than one attempt.')
137 parser.add_argument(
138 '--use-logs', action='store_true',
139 default=True,
140 help=('On by default. '
141 'Fetch the detailed logs and recompute the stats in this script, '
142 'instead of fetching precomputed stats. '
143 'Slower, but more accurate, and helps validate the cached stats.'))
144 parser.add_argument(
145 '--use-cache',
146 dest='use_logs',
147 action='store_false',
148 help=('Fetch the cached stats from the app. Opposite to --use-logs.'))
149 parser.add_argument(
150 '--date',
151 help='Start date of stats YYYY-MM-DD[ HH[:MM]]. Default: --range ago.')
152 parser.add_argument('--range',
153 choices=INTERVALS.keys(),
154 default='week',
155 help='Time range to print stats for.')
156 infra.libs.logs.add_argparse_options(parser, default_level=logging.ERROR)
157
158 args = parser.parse_args()
159
160 if args.date:
161 args.date = date_from_string(args.date)
162 else:
163 args.date = (datetime.datetime.now() -
164 datetime.timedelta(minutes=INTERVALS[args.range]))
165
166 return args
167
168
169 def date_from_string(iso_str):
170 try:
171 return dateutil.parser.parse(iso_str)
172 except ValueError:
173 pass
174 raise ValueError('Unrecognized date/time format: %s' % iso_str)
175
176
177 def date_from_timestamp(timestamp):
178 return datetime.datetime.utcfromtimestamp(float(timestamp))
179
180
181 def date_from_git(date_str):
182 """If date_str is not valid or None, return None."""
183 if not date_str:
184 return None
185 date = None
186 try:
187 date = datetime.datetime.strptime(
188 date_str, '%a %b %d %H:%M:%S %Y')
189 except ValueError:
190 pass
191 return date
192
193
194 def local_date_to_timestamp(date):
195 return time.mktime(date.timetuple())
196
197
198 def local_to_utc(local_time):
199 timestamp = local_date_to_timestamp(local_time)
200 utcTime = datetime.datetime.utcfromtimestamp(timestamp)
201 return utcTime
202
203
204 def fetch_json(url):
205 result = {'error': '404'}
206 delays = [0.0, 0.5, 1.0, 2.0]
207 for retry in range(len(delays)):
208 if retry:
209 time.sleep(delays[retry])
210 try:
211 result = json.loads(urllib2.urlopen(url).read())
212 return result
213 except (IOError, ValueError) as e:
214 logging.warning('Failed to fetch (attempt %d) %s: %s', retry + 1, url, e)
215 logging.error('Permanently failed to fetch %s: %s', url, e)
216 return result
217
218
219 def fetch_tree_status(project, end_date, start_date=None, limit=1000):
220 """Fetch all tree events in the given interval.
221
222 Args:
223 project (str): e.g. 'chromium' or 'blink'.
224 end_date (datetime):
225 start_date (datetime): define the time interval in local timezone.
226 limit (int): max. number of events.
227
228 Returns:
229 List of events {'open': bool, 'date': datetime} sorted by date.
230 """
231 end_timestamp = int(time.mktime(end_date.timetuple()))
232 params = {
233 'format': 'json',
234 'limit': limit,
235 # Confusingly, chromium-status app defines the range as
236 # endTime <= t < startTime.
237 'startTime': end_timestamp,
238 }
239 if start_date:
240 params['endTime'] = int(time.mktime(start_date.timetuple()))
241 query = 'allstatus?' + urllib.urlencode(params)
242 url = urlparse.urljoin(PROJECTS[project]['tree-status'], query)
243 logging.debug('Fetching %s', url)
244 status = fetch_json(url)
245 # Bug in AE app: it may return events outside of time range.
246 def entry(event):
247 date_utc = date_from_string(event['date'])
248 date_local = date_from_timestamp(calendar.timegm(date_utc.utctimetuple()))
249 return {'date': date_local,
250 'open': event['general_state'] == 'open'}
251 def in_range(entry):
252 logging.debug('in_range(%r)', entry)
253 if entry['date'] >= end_date:
254 return False
255 if not start_date:
256 return True
257 return start_date <= entry['date']
258
259 if not status or type(status) is not list:
260 status = []
261 status = [entry(event) for event in status]
262 status = sorted([e for e in status if in_range(e)], key=lambda e: e['date'])
263 return status
264
265
266 def fetch_git_page(repo_url, cursor=None, page_size=2000):
267 """Fetch one page worth of logs from gitiles."""
268 params = {
269 'pretty': 'full',
270 'format': 'JSON',
271 'n': page_size,
272 }
273 if cursor:
274 params.update({'s': cursor})
275 url = '%s/%s?%s' % (repo_url, '/+log/master', urllib.urlencode(params))
276 logging.debug('fetch_git_page: url = %s', url)
277 try:
278 # Strip off the anti-XSS string from the response.
279 response = urllib2.urlopen(url)
280 lines = [l.rstrip() for l in response if l.rstrip() != ")]}'"]
281 raw_data = ''.join(lines)
282 page = json.loads(raw_data)
283 except (IOError, ValueError) as e:
284 page = {}
285 logging.error('Failed to fetch a page: %s', e)
286 return page
287
288
289 def fetch_git_logs(repo, from_date, to_date):
290 """Fetch all logs from Gitiles for the given date range.
291
292 Gitiles does not natively support time ranges, so we just fetch
293 everything until the range is covered. Assume that logs are ordered
294 in reverse chronological order.
295 """
296 cursor = ''
297 commit_date = to_date
298 data = []
299 while cursor is not None:
300 page = fetch_git_page(repo, cursor)
301 logs = page.get('log', [])
302 cursor = page.get('next')
303 for log in logs:
304 committer = log.get('committer', {})
305 commit_date = date_from_git(committer.get('time'))
306 if not commit_date:
307 continue
308 if commit_date > to_date:
309 continue
310 if commit_date < from_date:
311 break
312 data.append({
313 'author': log.get('author', {}).get('email'),
314 'date': commit_date,
315 'commit-bot': bool('commit-bot' in committer.get('email', '')),
316 'revision': log.get('commit'),
317 })
318
319 if commit_date < from_date:
320 break
321 return data
322
323
324 def fetch_svn_logs(repo, from_date, to_date):
325 from_date = local_to_utc(from_date)
326 to_date = local_to_utc(to_date)
327 range_str = (
328 '{%s +0000}:{%s +0000}' % (from_date, to_date))
329 out = subprocess.check_output(
330 ['svn', 'log', '--with-all-revprops', '--xml', repo, '-r', range_str])
331 data = []
332 for logentry in ElementTree.XML(out).findall('logentry'):
333 date_str = logentry.find('date').text
334 date = date_from_string(date_str)
335 entry = {
336 'author': logentry.find('author').text,
337 'date': date,
338 'revprops': {},
339 'commit-bot': False,
340 }
341 revprops = logentry.find('revprops')
342 if revprops is not None:
343 for revprop in revprops.findall('property'):
344 entry['revprops'][revprop.attrib['name']] = revprop.text
345 if revprop.attrib['name'] == 'commit-bot':
346 entry['commit-bot'] = True
347 data.append(entry)
348 return data
349
350
351 def fetch_stats(args, begin_date=None, stats_range=None):
352 if not begin_date:
353 begin_date = args.date
354 if not stats_range:
355 stats_range = args.range
356 if begin_date:
357 timestamp = (int(local_date_to_timestamp(begin_date)) +
358 INTERVALS[stats_range] * 60)
359 else:
360 timestamp = int(time.time())
361
362 params = {
363 'project': args.project,
364 'interval_minutes': INTERVALS[stats_range],
365 'end': timestamp,
366 'count': 2, # Fetch requested and previous set, for comparison.
367 }
368
369 query = 'stats/query?' + urllib.urlencode(params)
370 url = urlparse.urljoin(STATS_URL, query)
371 logging.debug('Fetching %s', url)
372 return fetch_json(url)
373
374
375 # "Dangerous default value []": pylint: disable=W0102
376 def fetch_cq_logs(start_date=None, end_date=None, filters=[]):
377 begin_time = None
378 end_time = None
379 if start_date:
380 begin_time = int(time.mktime(start_date.timetuple()))
381 if end_date:
382 end_time = int(time.mktime(end_date.timetuple()))
383 results = []
384 cursor = None
385 while True:
386 params = {}
387 if begin_time:
388 params['begin'] = begin_time
389 if end_time:
390 params['end'] = end_time
391 if cursor:
392 params['cursor'] = cursor
393 query = 'query/%s?%s' % ('/'.join(filters), urllib.urlencode(params))
394 url = urlparse.urljoin(STATS_URL, query)
395 logging.debug('Fetching %s', url)
396 data = fetch_json(url)
397 results.extend(data.get('results', []))
398 logging.info('fetch_cq_logs: Got %d results', len(results))
399 logging.debug(' %s', '\n '.join(['%s %s' % (
400 patch_url((r.get('fields', {}).get('issue', 0),
401 r.get('fields', {}).get('patchset', 0))),
402 r.get('fields', {}).get('action', '')) for r in results]))
403 cursor = data.get('cursor', None)
404 if not data.get('more', False) or not cursor:
405 break
406
407 return results
408
409
410 def default_stats():
411 """Generate all the required stats fields with default values."""
412 stats = {
413 'begin': datetime.datetime.now(),
414 'end': datetime.datetime(1, 1, 1),
415 'issue-count': 0,
416 'patchset-count': 0,
417 'attempt-count': 0,
418 'patch_stats': {},
419 'patchset-false-reject-count': 0, # Deprecated stats?
420 'attempt-reject-count': 0, # Num. of rejected attempts
421 'attempt-false-reject-count': 0, # Num. of falsely rejected attempts
422 'false-rejections': [], # patches with falsely rejected attempts
423 'rejections': [], # patches with rejected attempts
424 'rejected-patches': set(), # Patches that never committed
425 'patchset-commit-count': 0,
426 'patchset-total-commit-queue-durations': derive_list_stats([0]),
427 'patchset-durations': derive_list_stats([0]),
428 'patchset-committed-durations': derive_list_stats([0]),
429 'patchset-attempts': derive_list_stats([0]),
430 'patchset-committed-attempts': derive_list_stats([0]),
431 'jobs': {},
432 'tree': {'open': 0.0, 'total': 0.0},
433 'usage': {},
434 }
435 for reason in REASONS:
436 stats[reason] = []
437 return stats
438
439
440 def organize_stats(stats, latest_init=None, previous_init=None):
441 """Changes cached lists of stats into dictionaries.
442
443 Args:
444 stats (dict): set of stats as returned by chromium-cq-status.
445
446 Returns:
447 result (dict): mapping ('latest' | 'previous') -> stat.name
448 -> <stats json>. If latest/previous_init are given, add to those
449 stats rather than compute them from scratch.
450 """
451 if 'results' not in stats:
452 return None
453 result = {
454 'latest' : latest_init if latest_init else default_stats(),
455 'previous': previous_init if previous_init else default_stats(),
456 }
457 for dataset, name in zip(stats['results'], ['latest', 'previous']):
458 result[name]['begin'] = min(
459 date_from_timestamp(dataset['begin']),
460 result[name].get('begin', datetime.datetime.now()))
461 result[name]['end'] = max(date_from_timestamp(dataset['end']),
462 result[name]['end'])
463 re_trybot_pass_count = re.compile('^trybot-(.+)-pass-count$')
464 re_trybot_false_reject_count = re.compile(
465 '^trybot-(.+)-false-reject-count$')
466 assert 'jobs' in result[name] and isinstance(result[name]['jobs'], dict)
467 for data in dataset['stats']:
468 if data['type'] == 'count':
469 result[name][data['name']] = data['count']
470 match_pass = re_trybot_pass_count.match(data['name'])
471 match_false_reject = re_trybot_false_reject_count.match(data['name'])
472 if match_pass:
473 job_name = match_pass.group(1)
474 result[name]['jobs'].setdefault(job_name, {
475 'pass-count': 0,
476 'false-reject-count': 0,
477 })
478 result[name]['jobs'][job_name]['pass-count'] += data['count']
479 logging.debug('Job %s passed %d times. Jobs: %r',
480 job_name, data['count'], result[name]['jobs'])
481 if match_false_reject:
482 job_name = match_false_reject.group(1)
483 result[name]['jobs'].setdefault(job_name, {
484 'pass-count': 0,
485 'false-reject-count': 0,
486 })
487 result[name]['jobs'][job_name]['false-reject-count'] += data['count']
488 logging.debug('Job %s flakily failed %d times',
489 job_name, data['count'])
490 else:
491 assert data['type'] == 'list'
492 result[name][data['name']] = {
493 '10': data['percentile_10'],
494 '25': data['percentile_25'],
495 '50': data['percentile_50'],
496 '75': data['percentile_75'],
497 '90': data['percentile_90'],
498 '95': data['percentile_95'],
499 '99': data['percentile_99'],
500 'min': data['min'],
501 'max': data['max'],
502 'mean': data['mean'],
503 'size': data['sample_size'],
504 }
505 return result
506
507
508 def derive_list_stats(series):
509 if not series:
510 series = [0]
511 return {
512 '10': numpy.percentile(series, 10),
513 '25': numpy.percentile(series, 25),
514 '50': numpy.percentile(series, 50),
515 '75': numpy.percentile(series, 75),
516 '90': numpy.percentile(series, 90),
517 '95': numpy.percentile(series, 95),
518 '99': numpy.percentile(series, 99),
519 'min': min(series),
520 'max': max(series),
521 'mean': numpy.mean(series),
522 'size': len(series),
523 }
524
525
526 def sort_by_count(elements):
527 return sorted(elements, key=lambda p: p['count'], reverse=True)
528
529
530 def stats_by_count_entry(patch_stats, name, patch, reasons):
531 entry = {
532 'count': patch_stats[name],
533 'patch_id': patch,
534 'failed-jobs-details': patch_stats['failed-jobs-details']
535 }
536 for n in reasons:
537 if n in patch_stats:
538 entry[n] = patch_stats[n]
539 assert type(entry[n]) is int, 'Bad type in %s[%s]: %r\nEntry=%r' % (
540 patch, n, entry[n], entry)
541 return entry
542
543
544 # "Dangerous default value []": pylint: disable=W0102
545 def stats_by_count(patch_stats, name, reasons=[]):
546 return sort_by_count([
547 stats_by_count_entry(patch_stats[p], name, p, reasons)
548 for p in patch_stats if patch_stats[p].get(name)])
549
550
551 def _derive_stats_from_patch_stats(stats):
552 patch_stats = stats['patch_stats']
553 stats['attempt-count'] = sum(
554 patch_stats[p]['attempts'] for p in patch_stats)
555 stats['patchset-false-reject-count'] = sum(
556 patch_stats[p]['false-rejections'] for p in patch_stats)
557 stats['attempt-reject-count'] = sum(
558 patch_stats[p]['rejections'] for p in patch_stats)
559 stats['rejected-patches'] = set(
560 p for p in patch_stats if not patch_stats[p]['committed'])
561 stats['false-rejections'] = stats_by_count(
562 patch_stats, 'false-rejections', REASONS)
563 stats['rejections'] = stats_by_count(patch_stats, 'rejections', REASONS)
564 for r in REASONS:
565 stats[r] = stats_by_count(patch_stats, r, set(REASONS) - set([r]))
566
567 stats['patchset-commit-count'] = len([
568 p for p in patch_stats if patch_stats[p]['committed']])
569 stats['patchset-total-commit-queue-durations'] = derive_list_stats([
570 patch_stats[p]['patchset-duration-wallclock'] for p in patch_stats])
571 stats['patchset-durations'] = derive_list_stats([
572 patch_stats[p]['patchset-duration'] for p in patch_stats])
573 stats['patchset-committed-durations'] = derive_list_stats([
574 patch_stats[p]['patchset-duration'] for p in patch_stats
575 if patch_stats[p]['committed']])
576 stats['patchset-attempts'] = derive_list_stats([
577 patch_stats[p]['attempts'] for p in patch_stats])
578 stats['patchset-committed-attempts'] = derive_list_stats([
579 patch_stats[p]['attempts'] for p in patch_stats
580 if patch_stats[p]['committed']])
581
582
583 def derive_stats(args, begin_date, init_stats=None):
584 """Process raw CQ updates log and derive stats.
585
586 Fetches raw CQ events and returns the same format as organize_stats().
587 If ``init_stats`` are given, preserve the jobs stats and replace the
588 other stats.
589 """
590 stats = init_stats or default_stats()
591 filters = ['project=%s' % args.project, 'action=patch_stop']
592 end_date = begin_date + datetime.timedelta(minutes=INTERVALS[args.range])
593 results = fetch_cq_logs(begin_date, end_date, filters=filters)
594 if not results:
595 return stats
596
597 stats['begin'] = date_from_timestamp(results[-1]['timestamp'])
598 stats['end'] = date_from_timestamp(results[0]['timestamp'])
599 # Create map issue:patchset -> #attempts
600 patches, issues = set(), set()
601 for reason in results:
602 issues.add(reason['fields']['issue'])
603 patches.add((reason['fields']['issue'], reason['fields']['patchset']))
604 stats['issue-count'] = len(issues)
605 stats['patchset-count'] = len(patches)
606 patch_stats = {}
607 # Fetch and process each patchset log
608 def get_patch_stats(patch_id):
609 return derive_patch_stats(end_date, patch_id)
610
611 if args.seq or not args.thread_pool:
612 iterable = map(get_patch_stats, patches)
613 else:
614 pool = ThreadPool(min(args.thread_pool, len(patches)))
615 iterable = pool.imap_unordered(get_patch_stats, patches)
616 for patch_id, pstats in iterable:
617 patch_stats[patch_id] = pstats
618
619 stats['patch_stats'] = patch_stats
620 _derive_stats_from_patch_stats(stats)
621 return stats
622
623
624 def patch_url(patch_id):
625 return '%s/patch-status/%s/%s' % ((STATS_URL,) + patch_id)
626
627
628 def parse_json(obj, return_type=None):
629 """Attempt to interpret a string as JSON.
630
631 Guarantee the return type if given, pass through otherwise.
632 """
633 result = obj
634 if type(obj) in [str, unicode]:
635 try:
636 result = json.loads(obj)
637 except ValueError as e:
638 logging.error('Could not decode json in "%s": %s', obj, e)
639 # If the type is wrong, return an empty object of the correct type.
640 # In most cases, casting to the required type will not work anyway
641 # (e.g. list to dict).
642 if return_type and type(result) is not return_type:
643 result = return_type()
644 return result
645
646
647 def parse_failing_tryjobs(message):
648 """Parse the message to extract failing try jobs."""
649 builders = []
650 msg_lines = message.splitlines()
651 for line in msg_lines[1:]:
652 words = line.split(None, 1)
653 if not words:
654 continue
655 builder = words[0]
656 builders.append(builder)
657 return builders
658
659
660 def derive_patch_stats(end_date, patch_id):
661 """``patch_id`` is a tuple (issue, patchset)."""
662 results = fetch_cq_logs(end_date=end_date, filters=[
663 'issue=%s' % patch_id[0], 'patchset=%s' % patch_id[1]])
664 # The results should already ordered, but sort it again just to be sure.
665 results = sorted(results, key=lambda r: r['timestamp'], reverse=True)
666 logging.debug('derive_patch_stats(%r): fetched %d entries.',
667 patch_id, len(results))
668 # Group by attempts
669 attempts = []
670
671 def new_attempt():
672 attempt_empty = {
673 'id': 0,
674 'begin': 0.0,
675 'end': 0.0,
676 'duration': 0.0,
677 'actions': [],
678 'committed': False,
679 'reason': {},
680 }
681 for reason in REASONS:
682 attempt_empty[reason] = False
683 return attempt_empty
684
685 def add_attempt(attempt, counter):
686 """Create a new attempt from accumulated actions."""
687 assert attempt['actions']
688 attempt['id'] = counter
689 attempt['duration'] = attempt['end'] - attempt['begin']
690 known_reasons = [r for r in KNOWN_REASONS if attempt[r]]
691 if not attempt['committed'] and not known_reasons:
692 attempt['failed-unknown'] = True
693 logging.debug(
694 'add_attempt: #%d (%s)',
695 attempt['id'],
696 ', '.join([r for r in REASONS if attempt[r]]))
697 attempts.append(attempt)
698
699 # An attempt is a set of actions between patch_start and patch_stop
700 # actions. Repeated patch_start / patch_stop actions are ignored.
701 attempt = new_attempt()
702 failing_builders = {}
703 state = 'stop'
704 attempt_counter = 0
705 for result in reversed(results):
706 action = result['fields'].get('action')
707 if state == 'stop':
708 if action == 'patch_start':
709 state = 'start'
710 attempt['begin'] = result['timestamp']
711 if state != 'start':
712 continue
713 attempt['actions'].append(result)
714 if action == 'patch_stop':
715 attempt['end'] = result['timestamp']
716 message = result['fields'].get('message', '')
717 if 'CQ bit was unchecked on CL' in message:
718 attempt['manual-cancel'] = True
719 if 'No LGTM' in message:
720 attempt['missing-lgtm'] = True
721 if 'A disapproval has been posted' in message:
722 attempt['not-lgtm'] = True
723 if 'Transient error: Invalid delimiter' in message:
724 attempt['invalid-delimiter'] = True
725 if 'Failed to commit' in message:
726 attempt['failed-commit'] = True
727 if('Failed to apply patch' in message or
728 'Failed to apply the patch' in message):
729 attempt['failed-patch'] = True
730 if 'Presubmit check' in message:
731 attempt['failed-presubmit-check'] = True
732 if 'Try jobs failed' in message:
733 if 'presubmit' in message:
734 attempt['failed-presubmit-bot'] = True
735 else:
736 attempt['failed-jobs'] = message
737 builders = parse_failing_tryjobs(message)
738 for b in builders:
739 failing_builders.setdefault(b, 0)
740 failing_builders[b] += 1
741 if 'Exceeded time limit waiting for builds to trigger' in message:
742 attempt['failed-to-trigger'] = True
743 attempt_counter += 1
744 add_attempt(attempt, attempt_counter)
745 attempt = new_attempt()
746 state = 'stop'
747 continue
748 if action == 'patch_committed':
749 attempt['committed'] = True
750 # TODO(sergeyberezin): enable this after this action is stable in CQ.
751 if action == 'patch_failed':
752 attempt['reason'] = parse_json(
753 result['fields'].get('reason', {}), return_type=dict)
754 logging.info('Attempt reason: %r', attempt['reason'])
755 if attempt['reason'].get('fail_type') == 'reviewer_lgtm':
756 attempt['missing-lgtm'] = True
757 if attempt['reason'].get('fail_type') == 'commit':
758 attempt['failed-commit'] = True
759 if attempt['reason'].get('fail_type') == 'simple try job':
760 failed_jobs = parse_json(attempt['reason'].get(
761 'fail_details', [('unknown_master', 'unknown_bot')]))
762 # Remove presubmit bot - it's accounted separately.
763 failed_jobs = [j for j in failed_jobs if 'presubmit' in j[1]]
764 attempt['failed-jobs'] = failed_jobs
765
766 stats = {}
767 committed_set = set(a['id'] for a in attempts if a['committed'])
768 stats['committed'] = len(committed_set)
769 stats['attempts'] = len(attempts)
770 stats['rejections'] = stats['attempts'] - stats['committed']
771
772 logging.info('derive_patch_stats: %s has %d attempts, committed=%d',
773 patch_url(patch_id), len(attempts), stats['committed'])
774
775 valid_reasons_set = set()
776 for reason in VALID_REASONS:
777 s = set(a['id'] for a in attempts if a[reason])
778 stats[reason] = len(s)
779 valid_reasons_set.update(s)
780 for reason in set(REASONS) - set(VALID_REASONS):
781 stats[reason] = len(set(a['id'] for a in attempts if a[reason]))
782
783 # Populate failed builders.
784 stats['failed-jobs-details'] = failing_builders
785
786 stats['false-rejections'] = 0
787 if stats['committed']:
788 stats['false-rejections'] = len(
789 set(a['id'] for a in attempts) - committed_set - valid_reasons_set)
790 # Sum of attempt duration.
791 stats['patchset-duration'] = sum(a['duration'] for a in attempts)
792 if attempts:
793 stats['patchset-duration-wallclock'] = (
794 attempts[-1]['end'] - attempts[0]['begin'])
795 else:
796 stats['patchset-duration-wallclock'] = 0.0
797 return patch_id, stats
798
799
800 def derive_tree_stats(project, start_date, end_date):
801 """Given a list of tree status events, derive tree closure stats."""
802 # Fetch one more event right before the range, so we know the
803 # initial tree status.
804 status = (fetch_tree_status(project, end_date=start_date, limit=1) +
805 fetch_tree_status(project, end_date, start_date))
806 stats = {'open': 0.0, 'total': (end_date - start_date).total_seconds()}
807 if not status:
808 return stats
809
810 logging.debug('Tree status:\n%s', '\n'.join([' %r' % e for e in status]))
811
812 is_open = status[0]['open']
813 curr_date = start_date
814 for event in status[1:]:
815 delta = event['date'] - curr_date
816 if is_open and not event['open']:
817 stats['open'] += delta.total_seconds()
818 logging.debug('Tree was open from %s to %s for %s (total of %s)',
819 curr_date, event['date'],
820 delta, datetime.timedelta(seconds=stats['open']))
821 if not is_open:
822 curr_date = event['date']
823 is_open = event['open']
824
825 # Account for the remaining time after the last event.
826 if is_open:
827 delta = end_date - curr_date
828 stats['open'] += delta.total_seconds()
829 logging.debug('Tree was open from %s to %s for %s (total of %s)',
830 curr_date, end_date,
831 delta, datetime.timedelta(seconds=stats['open']))
832 return stats
833
834
835 def derive_log_stats(log_data, bots):
836 # Calculate stats.
837 cq_commits = [v for v in log_data if v['commit-bot']]
838 users = {}
839 for commit in cq_commits:
840 users[commit['author']] = users.get(commit['author'], 0) + 1
841 committers = {}
842 manual_committers = {}
843 bot_committers = {}
844 bot_manual_committers = {}
845 for commit in log_data:
846 committers[commit['author']] = committers.get(commit['author'], 0) + 1
847 if not commit['commit-bot']:
848 manual_committers[commit['author']] = manual_committers.get(
849 commit['author'], 0) + 1
850 if commit['author'] in bots:
851 bot_committers[commit['author']] = bot_committers.get(
852 commit['author'], 0) + 1
853 if not commit['commit-bot']:
854 bot_manual_committers[commit['author']] = bot_manual_committers.get(
855 commit['author'], 0) + 1
856
857 stats = {}
858 stats['cq_commits'] = len(cq_commits)
859 stats['total_commits'] = len(log_data)
860 stats['users'] = len(users)
861 stats['committers'] = len(committers)
862 stats['manual_committers'] = len(manual_committers)
863 stats['manual_commits'] = sum(x for x in manual_committers.itervalues())
864 stats['bot_committers'] = len(bot_committers)
865 stats['bot_commits'] = sum(x for x in bot_committers.itervalues())
866 stats['bot_manual_commits'] = sum(
867 x for x in bot_manual_committers.itervalues())
868 stats['manual_only_committers'] = {
869 a: c for a, c in committers.iteritems()
870 if c == manual_committers.get(a, 0)}
871 return stats
872
873 def derive_git_stats(project, start_date, end_date, bots):
874 log_data = fetch_git_logs(PROJECTS[project]['repo'], start_date, end_date)
875 return derive_log_stats(log_data, bots)
876
877
878 def derive_svn_stats(project, start_date, end_date, bots):
879 log_data = fetch_svn_logs(PROJECTS[project]['repo'], start_date, end_date)
880 return derive_log_stats(log_data, bots)
881
882
883 def percentage_tuple(data, total):
884 num_data = data if isinstance(data, numbers.Number) else len(data)
885 num_total = total if isinstance(total, numbers.Number) else len(total)
886 percent = 100. * num_data / num_total if num_total else 0.
887 return num_data, num_total, percent
888
889
890 def percentage(data, total):
891 return percentage_tuple(data, total)[2]
892
893
894 def round_timedelta(seconds):
895 # We never care about the milliseconds when printing timedeltas:
896 return datetime.timedelta(seconds=round(seconds))
897
898
899 def output(fmt='', *args):
900 """An equivalent of print to mock out in testing."""
901 print fmt % args
902
903
904 def print_attempt_counts(stats, name, message, item_name='',
905 details=False, committed=None, indent=0,
906 print_zero=False):
907 """Print a summary of a ``name`` slice of attempts.
908
909 |committed|: None=print all, True=only committed patches, False=only
910 rejected patches.
911
912 |print_zero|: print stats even if no attempts match."""
913 if not item_name:
914 item_name = message
915 patches = [
916 p for p in stats[name]
917 if committed is None or
918 bool(stats['patch_stats'][p['patch_id']]['committed']) is committed]
919 count = sum(p['count'] for p in patches)
920 if not print_zero and not count:
921 return
922
923 failing_builders = {}
924 for p in patches:
925 for b, cnt in p['failed-jobs-details'].iteritems():
926 failing_builders.setdefault(b, {})
927 failing_builders[b][p['patch_id']] = cnt
928
929 indent_str = ''.join(' ' for _ in range(indent))
930 if message.startswith('failed jobs'):
931 output(
932 '%s%4d attempt%s (%.1f%% of %d attempts) %s: %d in %d%s patches',
933 indent_str, count, ' ' if count == 1 else 's',
934 percentage(count, stats['attempt-count']),
935 stats['attempt-count'],
936 message,
937 sum(sum(d.values()) for d in failing_builders.values()),
938 len(patches),
939 {True: ' committed', False: ' uncommitted'}.get(committed, ''))
940 else:
941 output(
942 '%s%4d attempt%s (%.1f%% of %d attempts) %s in %d%s patches',
943 indent_str, count, ' ' if count == 1 else 's',
944 percentage(count, stats['attempt-count']),
945 stats['attempt-count'],
946 message,
947 len(patches),
948 {True: ' committed', False: ' uncommitted'}.get(committed, ''))
949 if details:
950 lines = []
951 for p in patches:
952 line = ' %d %s %s' % (
953 p['count'], item_name, patch_url(p['patch_id']))
954 causes = ['%d %s' % (p['failed-jobs-details'][k], k)
955 for k in p['failed-jobs-details']]
956 line += ' (%s)' % ', '.join(causes)
957 lines.append(line)
958 output('\n'.join(lines))
959 output()
960
961
962 def print_duration(name, args, latest, previous, print_name=None):
963 if not print_name:
964 print_name = name.capitalize()
965 cq_only = round_timedelta(latest['patchset-durations'][name])
966 wallclock = round_timedelta(
967 latest['patchset-total-commit-queue-durations'][name])
968 prev_cq_only = 'unknown'
969 prev_cq_only_s = 'unknown'
970 prev_wallclock = 'unknown'
971 prev_wallclock_s = 'unknown'
972 if previous:
973 prev_cq_only = round_timedelta(previous['patchset-durations'][name])
974 prev_cq_only_s = int(prev_cq_only.total_seconds() / 60)
975 prev_wallclock = round_timedelta(
976 previous['patchset-total-commit-queue-durations'][name])
977 prev_wallclock_s = int(prev_wallclock.total_seconds() / 60)
978 output('\n%s duration in CQ trying a patch:', print_name)
979 output(
980 ' wallclock: %8s (%3d min). Prev. %s: %8s (%3s min).',
981 wallclock, round(wallclock.total_seconds() / 60.0), args.range,
982 prev_wallclock, prev_wallclock_s)
983 output(
984 ' sum of attempts: %8s (%3d min). Prev. %s: %8s (%3s min).',
985 cq_only, round(cq_only.total_seconds() / 60.0), args.range,
986 prev_cq_only, prev_cq_only_s)
987
988
989 def print_usage(args, latest, previous):
990 if not latest['usage']:
991 return
992 output()
993 output(
994 'CQ users: %6d out of %6d total committers %6.2f%%',
995 latest['usage']['users'], latest['usage']['committers'],
996 percentage(latest['usage']['users'], latest['usage']['committers']))
997 fmt_str = (
998 ' Committed %6d out of %6d commits %6.2f%%. '
999 'Prev. %s: %6.2f%%.')
1000 data = percentage_tuple(latest['usage']['cq_commits'],
1001 latest['usage']['total_commits'])
1002 data += (args.range, percentage(previous['usage']['cq_commits'],
1003 previous['usage']['total_commits']))
1004 if latest['usage']['bot_manual_commits']:
1005 fmt_str += ' (%6.2f%% by humans)'
1006 data += (percentage(latest['usage']['cq_commits'],
1007 latest['usage']['total_commits'] -
1008 latest['usage']['bot_manual_commits']),)
1009 output(fmt_str, *data)
1010
1011 output()
1012 output('Bots: %6d out of %6d total committers %6.2f%%',
1013 *percentage_tuple(latest['usage']['bot_committers'],
1014 latest['usage']['committers']))
1015 output(' Committed by CQ %6d out of %6d commits %6.2f%%',
1016 *percentage_tuple(latest['usage']['bot_commits'],
1017 latest['usage']['total_commits']))
1018 output(' Committed directly %6d out of %6d commits %6.2f%%',
1019 *percentage_tuple(latest['usage']['bot_manual_commits'],
1020 latest['usage']['total_commits']))
1021 output()
1022 output('Manual committers: %6d out of all %6d users %6.2f%%',
1023 *percentage_tuple(latest['usage']['manual_committers'],
1024 latest['usage']['committers']))
1025 output(' Committed %6d out of %6d commits %6.2f%%',
1026 *percentage_tuple(latest['usage']['manual_commits'],
1027 latest['usage']['total_commits']))
1028
1029
1030 def print_tree_status(args, latest, previous):
1031 output()
1032 output(
1033 'Total time tree open: %.1f hours of %.1f hours (%.2f%%). '
1034 'Prev. %s: %.2f%%.',
1035 latest['tree']['open'] / 3600.0,
1036 latest['tree']['total'] / 3600.0,
1037 percentage(latest['tree']['open'], latest['tree']['total']),
1038 args.range,
1039 percentage(previous['tree']['open'], previous['tree']['total']))
1040
1041
1042 def print_stats(args, stats):
1043 latest = stats.get('latest')
1044 previous = stats.get('previous')
1045 if not latest:
1046 output('No stats to display.')
1047 return
1048 if not previous:
1049 output('No previous %s stats loaded.', args.range)
1050 return
1051 output('Statistics for project %s', args.project)
1052 if latest['begin'] > latest['end']:
1053 output(' No stats since %s', args.date)
1054 return
1055
1056 output('from %s till %s (local time).',
1057 latest['begin'], latest['end'])
1058
1059 print_usage(args, latest, previous)
1060 print_tree_status(args, latest, previous)
1061
1062 output()
1063 output(
1064 '%4d issues (%d patches) were tried by CQ, '
1065 'resulting in %d attempts.',
1066 latest['issue-count'], latest['patchset-count'], latest['attempt-count'])
1067 output(
1068 '%4d patches (%.1f%% of tried patches, %.1f%% of attempts) '
1069 'were committed by CQ,',
1070 latest['patchset-commit-count'],
1071 percentage(latest['patchset-commit-count'], latest['patchset-count']),
1072 percentage(latest['patchset-commit-count'], latest['attempt-count']))
1073
1074
1075 output()
1076 output('Rejections:')
1077 print_attempt_counts(latest, 'rejections', 'were unsuccessful',
1078 item_name='failures',
1079 committed=False)
1080 output(' This includes:')
1081 for reason in REASONS:
1082 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
1083 details=args.list_rejections,
1084 item_name=REASONS[reason]['item'], committed=False)
1085
1086 # TODO(sergeyberezin): add gave up count (committed manually after trying CQ).
1087 # TODO(sergeyberezin): add count of NOTRY=true (if possible).
1088
1089 output()
1090 output('False Rejections:')
1091 if args.use_logs:
1092 print_attempt_counts(latest, 'false-rejections', 'were false rejections',
1093 item_name='flakes', committed=True)
1094 else:
1095 output(
1096 ' %4d attempts (%.1f%% of %d attempts) were false rejections',
1097 latest['attempt-false-reject-count'],
1098 percentage(latest['attempt-false-reject-count'],
1099 latest['attempt-count']),
1100 latest['attempt-count'])
1101
1102 output(' False rejections include:')
1103 for reason in FLAKY_REASONS.keys() + ['failed-unknown']:
1104 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
1105 item_name=REASONS[reason]['item'], committed=True,
1106 details=args.list_false_rejections)
1107
1108 output(' Other rejections in committed patches for valid reasons:')
1109 for reason in VALID_REASONS.keys():
1110 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
1111 item_name=REASONS[reason]['item'], committed=True,
1112 details=args.list_false_rejections)
1113
1114 print_duration('mean', args, latest, previous)
1115 print_duration('50', args, latest, previous, 'Median')
1116
1117 output()
1118 output('Patches which eventually land percentiles:')
1119 for p in ['10', '25', '50', '75', '90', '95', '99']:
1120 output('%s: %4.1f hrs, %2d attempts',
1121 p, latest['patchset-committed-durations'][p] / 3600.0,
1122 latest['patchset-committed-attempts'][p])
1123
1124 # TODO(sergeyberezin): add total try jobs / by CQ / unknown. Get it from CBE.
1125 # TODO(sergeyberezin): recompute bot flakiness from CBE. CQ does not
1126 # have enough info.
1127 output()
1128 output('Top flaky builders (which fail and succeed in the same patch):')
1129
1130 logging.debug('Found %d jobs', len(latest['jobs'].keys()))
1131
1132 def flakiness(stats, job):
1133 passes = stats['jobs'][job]['pass-count']
1134 failures = stats['jobs'][job]['false-reject-count']
1135 return percentage(failures, passes + failures)
1136
1137 jobs = sorted(latest['jobs'].iterkeys(), key=lambda j: flakiness(latest, j),
1138 reverse=True)
1139 output('%-40s %-15s %-15s %-15s',
1140 'Builder Name', 'Succeeded', 'Flaky Failures', 'Flakiness (%)')
1141 for job in jobs:
1142 passes = latest['jobs'][job]['pass-count']
1143 failures = latest['jobs'][job]['false-reject-count']
1144 output('%-40s %-15s %-15s %-15s',
1145 job, '%5d' % passes, '%5d' % failures,
1146 '%6.2f%%' % flakiness(latest, job))
1147
1148
1149 def acquire_stats(args):
1150 stats = {}
1151 logging.info('Acquiring stats for project %s for a %s of %s using %s',
1152 args.project, args.range, args.date,
1153 'logs' if args.use_logs else 'cache')
1154 end_date = args.date + datetime.timedelta(minutes=INTERVALS[args.range])
1155 prev_date = args.date - datetime.timedelta(minutes=INTERVALS[args.range])
1156 if args.use_logs:
1157 init_stats = {'latest': default_stats(), 'previous': default_stats()}
1158 assert args.date
1159 # For weekly stats, collect job flakiness from daily cached stats.
1160 if args.range == 'week':
1161 for day in range(7):
1162 d = args.date + datetime.timedelta(minutes=INTERVALS['day']*day)
1163 raw_stats = fetch_stats(args, d, 'day')
1164 init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])
1165 elif args.range == 'day':
1166 for hour in range(24):
1167 d = args.date + datetime.timedelta(minutes=INTERVALS['hour']*hour)
1168 raw_stats = fetch_stats(args, d, 'hour')
1169 init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])
1170 else:
1171 init_stats = organize_stats(fetch_stats(args))
1172 stats['latest'] = derive_stats(
1173 args, args.date, init_stats=init_stats['latest'])
1174 stats['previous'] = derive_stats(args, prev_date)
1175 else:
1176 stats = organize_stats(fetch_stats(args))
1177
1178 stats['latest']['tree'] = derive_tree_stats(args.project, args.date, end_date)
1179 stats['previous']['tree'] = derive_tree_stats(
1180 args.project, prev_date, args.date)
1181
1182 if PROJECTS[args.project]['type'] == 'git':
1183 stats['latest']['usage'] = derive_git_stats(
1184 args.project, args.date, end_date, args.bots)
1185 stats['previous']['usage'] = derive_git_stats(
1186 args.project, prev_date, args.date, args.bots)
1187 else:
1188 stats['latest']['usage'] = derive_svn_stats(
1189 args.project, args.date, end_date, args.bots)
1190 stats['previous']['usage'] = derive_svn_stats(
1191 args.project, prev_date, args.date, args.bots)
1192
1193 return stats
1194
1195 def main():
1196 args = parse_args()
1197 logger = logging.getLogger()
1198 # TODO(sergeyberezin): how do I derive local timezone string?
1199 # Need to be able to pass dateutil.tz.tzlocal() directly.
1200 infra.libs.logs.process_argparse_options(args, logger)
1201 stats = acquire_stats(args)
1202 print_stats(args, stats)
1203
1204
1205 if __name__ == '__main__':
1206 sys.exit(main())
OLDNEW
« no previous file with comments | « infra/tools/cq_stats/__main__.py ('k') | infra/tools/cq_stats/test/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698