infra/tools/cq_stats/cq_stats.py - Issue 1125653004: Check in cq_stats.py

Side by Side Diff: infra/tools/cq_stats/cq_stats.py

Issue 1125653004: Check in cq_stats.py (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: use utc Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright 2015 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5 '''Generate stats of CQ usage.'''

	6

	7 import argparse

	8 import calendar

	9 import collections

	10 import datetime

	11 import dateutil.parser

	12 import dateutil.tz

	13 from xml.etree import ElementTree

	14 import infra.libs.logs

	15 import json

	16 import logging

	17 from multiprocessing.pool import ThreadPool

	18 import numbers

	19 import numpy

	20 import re

	21 import subprocess

	22 import sys

	23 import time

	24 import urllib

	25 import urllib2

	26 import urlparse

	27

	28

	29 STATS_URL = 'http://chromium-cq-status.appspot.com'

	30 # Expects % project.

	31 TREE_STATUS_URL = 'http://%s-status.appspot.com'

	32 PROJECTS = {

	33 'chromium': {

	34 'tree-status': TREE_STATUS_URL % 'chromium',

	35 'type': 'git',

	36 'repo': 'https://chromium.googlesource.com/chromium/src',

	37 },

	38 'blink': {

	39 'tree-status': TREE_STATUS_URL % 'blink',

	40 'type': 'svn',

	41 'repo': 'svn://svn.chromium.org/blink/trunk/',

	42 },

	43 }

	44 # Map of intervals to minutes.

	45 INTERVALS = {

	46 'week': 60 * 24 * 7,

	47 'day': 60 * 24,

	48 'hour': 60,

	49 '15min': 15,

	50 }

	51 VALID_REASONS = collections.OrderedDict([

	52 ('manual-cancel', {

	53 'item': 'stopped manually',

	54 'message': 'stopped manually (CQ box was unchecked)',

	55 }),

	56 ('missing-lgtm', {

	57 'item': 'missed LGTM',

	58 'message': 'are missing LGTM',

	59 }),

	60 ('not-lgtm', {

	61 'item': 'NOT LGTMs',

	62 'message': 'have been disapproved (NOT LGTM)',

	63 }),

	64 ('failed-patch', {

	65 'item': 'failures',

	66 'message': 'failed to apply patch',

	67 }),

	68 ('invalid-delimiter', {

	69 'item': 'errors',

	70 'message': 'have incorrect CQ_EXTRA_TRYBOTS flag',

	71 }),

	72 ('failed-presubmit-bot', {

	73 'item': 'failures',

	74 'message': 'failed presubmit bot (often due to missing OWNERS LGTM)',

	75 }),

	76 ])

	77 FLAKY_REASONS = collections.OrderedDict([

	78 ('failed-commit', {

	79 'item': 'failures',

	80 'message': 'failed to commit',

	81 }),

	82 ('failed-jobs', {

	83 'item': 'failures',

	84 'message': 'failed jobs (excluding presubmit)',

	85 }),

	86 ('failed-to-trigger', {

	87 'item': 'failures',

	88 'message': 'failed to trigger jobs',

	89 }),

	90 ('failed-presubmit-check', {

	91 'item': 'failures',

	92 'message': 'failed presubmit check',

	93 }),

	94 ])

	95 KNOWN_REASONS = collections.OrderedDict()

	96 KNOWN_REASONS.update(VALID_REASONS)

	97 KNOWN_REASONS.update(FLAKY_REASONS)

	98

	99 REASONS = collections.OrderedDict()

	100 REASONS.update(KNOWN_REASONS)

	101 REASONS['failed-unknown'] = {

	102 'item': 'failures',

	103 'message': 'failed for any other reason',

	104 }

	105

	106

	107 def parse_args():

	108 parser = argparse.ArgumentParser(description=sys.modules['__main__'].__doc__)

	109 parser.add_argument(

	110 '--project',

	111 required=True,

	112 choices=PROJECTS.keys(),

	113 help='Collect stats about this project.')

	114 parser.add_argument(

	115 '--bot', type=str, dest='bots',

	116 action='append',

	117 default=['blink-deps-roller@chromium.org',

	118 'chrome-admin@google.com',

	119 'chrome-release@google.com',

	120 'chromeos-commit-bot@chromium.org',

	121 'skia-deps-roller@chromium.org',

	122 ],

	123 help=('Add an author to be treated as a bot. '

	124 'Repeat to add several bots. Default: %(default)s.'))

	125 parser.add_argument(

	126 '--seq', action='store_true',

	127 help='Run everything sequentially for debugging.')

	128 parser.add_argument(

	129 '--thread-pool', type=int, default=200,

	130 help='Fetch data using this many parallel threads. Default=%(default)s.')

	131 parser.add_argument(

	132 '--list-rejections', action='store_true',

	133 help='List rejected CLs and reasons for rejection.')

	134 parser.add_argument(

	135 '--list-false-rejections', action='store_true',

	136 help='List CLs that were committed in more than one attempt.')

	137 parser.add_argument(

	138 '--use-logs', action='store_true',

	139 default=True,

	140 help=('On by default. '

	141 'Fetch the detailed logs and recompute the stats in this script, '

	142 'instead of fetching precomputed stats. '

	143 'Slower, but more accurate, and helps validate the cached stats.'))

	144 parser.add_argument(

	145 '--use-cache',

	146 dest='use_logs',

	147 action='store_false',

	148 help=('Fetch the cached stats from the app. Opposite to --use-logs.'))

	149 parser.add_argument(

	150 '--date',

	151 help='Start date of stats YYYY-MM-DD[ HH[:MM]]. Default: --range ago.')

	152 parser.add_argument('--range',

	153 choices=INTERVALS.keys(),

	154 default='week',

	155 help='Time range to print stats for.')

	156 infra.libs.logs.add_argparse_options(parser, default_level=logging.ERROR)

	157

	158 args = parser.parse_args()

	159

	160 if args.date:

	161 args.date = date_from_string(args.date)

	162 else:

	163 args.date = (datetime.datetime.now() -

	164 datetime.timedelta(minutes=INTERVALS[args.range]))

	165

	166 return args

	167

	168

	169 def date_from_string(iso_str):

	170 try:

	171 return dateutil.parser.parse(iso_str)

	172 except ValueError:

	173 pass

	174 raise ValueError('Unrecognized date/time format: %s' % iso_str)

	175

	176

	177 def date_from_timestamp(timestamp):

	178 return datetime.datetime.utcfromtimestamp(float(timestamp))

	179

	180

	181 def date_from_git(date_str):

	182 """If date_str is not valid or None, return None."""

	183 if not date_str:

	184 return None

	185 date = None

	186 try:

	187 date = datetime.datetime.strptime(

	188 date_str, '%a %b %d %H:%M:%S %Y')

	189 except ValueError:

	190 pass

	191 return date

	192

	193

	194 def local_date_to_timestamp(date):

	195 return time.mktime(date.timetuple())

	196

	197

	198 def local_to_utc(local_time):

	199 timestamp = local_date_to_timestamp(local_time)

	200 utcTime = datetime.datetime.utcfromtimestamp(timestamp)

	201 return utcTime

	202

	203

	204 def fetch_json(url):

	205 result = {'error': '404'}

	206 delays = [0.0, 0.5, 1.0, 2.0]

	207 for retry in range(len(delays)):

	208 if retry:

	209 time.sleep(delays[retry])

	210 try:

	211 result = json.loads(urllib2.urlopen(url).read())

	212 return result

	213 except (IOError, ValueError) as e:

	214 logging.warning('Failed to fetch (attempt %d) %s: %s', retry + 1, url, e)

	215 logging.error('Permanently failed to fetch %s: %s', url, e)

	216 return result

	217

	218

	219 def fetch_tree_status(project, end_date, start_date=None, limit=1000):

	220 """Fetch all tree events in the given interval.

	221

	222 Args:

	223 project (str): e.g. 'chromium' or 'blink'.

	224 end_date (datetime):

	225 start_date (datetime): define the time interval in local timezone.

	226 limit (int): max. number of events.

	227

	228 Returns:

	229 List of events {'open': bool, 'date': datetime} sorted by date.

	230 """

	231 end_timestamp = int(time.mktime(end_date.timetuple()))

	232 params = {

	233 'format': 'json',

	234 'limit': limit,

	235 # Confusingly, chromium-status app defines the range as

	236 # endTime <= t < startTime.

	237 'startTime': end_timestamp,

	238 }

	239 if start_date:

	240 params['endTime'] = int(time.mktime(start_date.timetuple()))

	241 query = 'allstatus?' + urllib.urlencode(params)

	242 url = urlparse.urljoin(PROJECTS[project]['tree-status'], query)

	243 logging.debug('Fetching %s', url)

	244 status = fetch_json(url)

	245 # Bug in AE app: it may return events outside of time range.

	246 def entry(event):

	247 date_utc = date_from_string(event['date'])

	248 date_local = date_from_timestamp(calendar.timegm(date_utc.utctimetuple()))

	249 return {'date': date_local,

	250 'open': event['general_state'] == 'open'}

	251 def in_range(entry):

	252 logging.debug('in_range(%r)', entry)

	253 if entry['date'] >= end_date:

	254 return False

	255 if not start_date:

	256 return True

	257 return start_date <= entry['date']

	258

	259 if not status or type(status) is not list:

	260 status = []

	261 status = [entry(event) for event in status]

	262 status = sorted([e for e in status if in_range(e)], key=lambda e: e['date'])

	263 return status

	264

	265

	266 def fetch_git_page(repo_url, cursor=None, page_size=2000):

	267 """Fetch one page worth of logs from gitiles."""

	268 params = {

	269 'pretty': 'full',

	270 'format': 'JSON',

	271 'n': page_size,

	272 }

	273 if cursor:

	274 params.update({'s': cursor})

	275 url = '%s/%s?%s' % (repo_url, '/+log/master', urllib.urlencode(params))

	276 logging.debug('fetch_git_page: url = %s', url)

	277 try:

	278 # Strip off the anti-XSS string from the response.

	279 response = urllib2.urlopen(url)

	280 lines = [l.rstrip() for l in response if l.rstrip() != ")]}'"]

	281 raw_data = ''.join(lines)

	282 page = json.loads(raw_data)

	283 except (IOError, ValueError) as e:

	284 page = {}

	285 logging.error('Failed to fetch a page: %s', e)

	286 return page

	287

	288

	289 def fetch_git_logs(repo, from_date, to_date):

	290 """Fetch all logs from Gitiles for the given date range.

	291

	292 Gitiles does not natively support time ranges, so we just fetch

	293 everything until the range is covered. Assume that logs are ordered

	294 in reverse chronological order.

	295 """

	296 cursor = ''

	297 commit_date = to_date

	298 data = []

	299 while cursor is not None:

	300 page = fetch_git_page(repo, cursor)

	301 logs = page.get('log', [])

	302 cursor = page.get('next')

	303 for log in logs:

	304 committer = log.get('committer', {})

	305 commit_date = date_from_git(committer.get('time'))

	306 if not commit_date:

	307 continue

	308 if commit_date > to_date:

	309 continue

	310 if commit_date < from_date:

	311 break

	312 data.append({

	313 'author': log.get('author', {}).get('email'),

	314 'date': commit_date,

	315 'commit-bot': bool('commit-bot' in committer.get('email', '')),

	316 'revision': log.get('commit'),

	317 })

	318

	319 if commit_date < from_date:

	320 break

	321 return data

	322

	323

	324 def fetch_svn_logs(repo, from_date, to_date):

	325 from_date = local_to_utc(from_date)

	326 to_date = local_to_utc(to_date)

	327 range_str = (

	328 '{%s +0000}:{%s +0000}' % (from_date, to_date))

	329 out = subprocess.check_output(

	330 ['svn', 'log', '--with-all-revprops', '--xml', repo, '-r', range_str])

	331 data = []

	332 for logentry in ElementTree.XML(out).findall('logentry'):

	333 date_str = logentry.find('date').text

	334 date = date_from_string(date_str)

	335 entry = {

	336 'author': logentry.find('author').text,

	337 'date': date,

	338 'revprops': {},

	339 'commit-bot': False,

	340 }

	341 revprops = logentry.find('revprops')

	342 if revprops is not None:

	343 for revprop in revprops.findall('property'):

	344 entry['revprops'][revprop.attrib['name']] = revprop.text

	345 if revprop.attrib['name'] == 'commit-bot':

	346 entry['commit-bot'] = True

	347 data.append(entry)

	348 return data

	349

	350

	351 def fetch_stats(args, begin_date=None, stats_range=None):

	352 if not begin_date:

	353 begin_date = args.date

	354 if not stats_range:

	355 stats_range = args.range

	356 if begin_date:

	357 timestamp = (int(local_date_to_timestamp(begin_date)) +

	358 INTERVALS[stats_range] * 60)

	359 else:

	360 timestamp = int(time.time())

	361

	362 params = {

	363 'project': args.project,

	364 'interval_minutes': INTERVALS[stats_range],

	365 'end': timestamp,

	366 'count': 2, # Fetch requested and previous set, for comparison.

	367 }

	368

	369 query = 'stats/query?' + urllib.urlencode(params)

	370 url = urlparse.urljoin(STATS_URL, query)

	371 logging.debug('Fetching %s', url)

	372 return fetch_json(url)

	373

	374

	375 # "Dangerous default value []": pylint: disable=W0102

	376 def fetch_cq_logs(start_date=None, end_date=None, filters=[]):

	377 begin_time = None

	378 end_time = None

	379 if start_date:

	380 begin_time = int(time.mktime(start_date.timetuple()))

	381 if end_date:

	382 end_time = int(time.mktime(end_date.timetuple()))

	383 results = []

	384 cursor = None

	385 while True:

	386 params = {}

	387 if begin_time:

	388 params['begin'] = begin_time

	389 if end_time:

	390 params['end'] = end_time

	391 if cursor:

	392 params['cursor'] = cursor

	393 query = 'query/%s?%s' % ('/'.join(filters), urllib.urlencode(params))

	394 url = urlparse.urljoin(STATS_URL, query)

	395 logging.debug('Fetching %s', url)

	396 data = fetch_json(url)

	397 results.extend(data.get('results', []))

	398 logging.info('fetch_cq_logs: Got %d results', len(results))

	399 logging.debug(' %s', '\n '.join(['%s %s' % (

	400 patch_url((r.get('fields', {}).get('issue', 0),

	401 r.get('fields', {}).get('patchset', 0))),

	402 r.get('fields', {}).get('action', '')) for r in results]))

	403 cursor = data.get('cursor', None)

	404 if not data.get('more', False) or not cursor:

	405 break

	406

	407 return results

	408

	409

	410 def default_stats():

	411 """Generate all the required stats fields with default values."""

	412 stats = {

	413 'begin': datetime.datetime.now(),

	414 'end': datetime.datetime(1, 1, 1),

	415 'issue-count': 0,

	416 'patchset-count': 0,

	417 'attempt-count': 0,

	418 'patch_stats': {},

	419 'patchset-false-reject-count': 0, # Deprecated stats?

	420 'attempt-reject-count': 0, # Num. of rejected attempts

	421 'attempt-false-reject-count': 0, # Num. of falsely rejected attempts

	422 'false-rejections': [], # patches with falsely rejected attempts

	423 'rejections': [], # patches with rejected attempts

	424 'rejected-patches': set(), # Patches that never committed

	425 'patchset-commit-count': 0,

	426 'patchset-total-commit-queue-durations': derive_list_stats([0]),

	427 'patchset-durations': derive_list_stats([0]),

	428 'patchset-committed-durations': derive_list_stats([0]),

	429 'patchset-attempts': derive_list_stats([0]),

	430 'patchset-committed-attempts': derive_list_stats([0]),

	431 'jobs': {},

	432 'tree': {'open': 0.0, 'total': 0.0},

	433 'usage': {},

	434 }

	435 for reason in REASONS:

	436 stats[reason] = []

	437 return stats

	438

	439

	440 def organize_stats(stats, latest_init=None, previous_init=None):

	441 """Changes cached lists of stats into dictionaries.

	442

	443 Args:

	444 stats (dict): set of stats as returned by chromium-cq-status.

	445

	446 Returns:

	447 result (dict): mapping ('latest' \| 'previous') -> stat.name

	448 -> <stats json>. If latest/previous_init are given, add to those

	449 stats rather than compute them from scratch.

	450 """

	451 if 'results' not in stats:

	452 return None

	453 result = {

	454 'latest' : latest_init if latest_init else default_stats(),

	455 'previous': previous_init if previous_init else default_stats(),

	456 }

	457 for dataset, name in zip(stats['results'], ['latest', 'previous']):

	458 result[name]['begin'] = min(

	459 date_from_timestamp(dataset['begin']),

	460 result[name].get('begin', datetime.datetime.now()))

	461 result[name]['end'] = max(date_from_timestamp(dataset['end']),

	462 result[name]['end'])

	463 re_trybot_pass_count = re.compile('^trybot-(.+)-pass-count$')

	464 re_trybot_false_reject_count = re.compile(

	465 '^trybot-(.+)-false-reject-count$')

	466 assert 'jobs' in result[name] and isinstance(result[name]['jobs'], dict)

	467 for data in dataset['stats']:

	468 if data['type'] == 'count':

	469 result[name][data['name']] = data['count']

	470 match_pass = re_trybot_pass_count.match(data['name'])

	471 match_false_reject = re_trybot_false_reject_count.match(data['name'])

	472 if match_pass:

	473 job_name = match_pass.group(1)

	474 result[name]['jobs'].setdefault(job_name, {

	475 'pass-count': 0,

	476 'false-reject-count': 0,

	477 })

	478 result[name]['jobs'][job_name]['pass-count'] += data['count']

	479 logging.debug('Job %s passed %d times. Jobs: %r',

	480 job_name, data['count'], result[name]['jobs'])

	481 if match_false_reject:

	482 job_name = match_false_reject.group(1)

	483 result[name]['jobs'].setdefault(job_name, {

	484 'pass-count': 0,

	485 'false-reject-count': 0,

	486 })

	487 result[name]['jobs'][job_name]['false-reject-count'] += data['count']

	488 logging.debug('Job %s flakily failed %d times',

	489 job_name, data['count'])

	490 else:

	491 assert data['type'] == 'list'

	492 result[name][data['name']] = {

	493 '10': data['percentile_10'],

	494 '25': data['percentile_25'],

	495 '50': data['percentile_50'],

	496 '75': data['percentile_75'],

	497 '90': data['percentile_90'],

	498 '95': data['percentile_95'],

	499 '99': data['percentile_99'],

	500 'min': data['min'],

	501 'max': data['max'],

	502 'mean': data['mean'],

	503 'size': data['sample_size'],

	504 }

	505 return result

	506

	507

	508 def derive_list_stats(series):

	509 if not series:

	510 series = [0]

	511 return {

	512 '10': numpy.percentile(series, 10),

	513 '25': numpy.percentile(series, 25),

	514 '50': numpy.percentile(series, 50),

	515 '75': numpy.percentile(series, 75),

	516 '90': numpy.percentile(series, 90),

	517 '95': numpy.percentile(series, 95),

	518 '99': numpy.percentile(series, 99),

	519 'min': min(series),

	520 'max': max(series),

	521 'mean': numpy.mean(series),

	522 'size': len(series),

	523 }

	524

	525

	526 def sort_by_count(elements):

	527 return sorted(elements, key=lambda p: p['count'], reverse=True)

	528

	529

	530 def stats_by_count_entry(patch_stats, name, patch, reasons):

	531 entry = {

	532 'count': patch_stats[name],

	533 'patch_id': patch,

	534 'failed-jobs-details': patch_stats['failed-jobs-details']

	535 }

	536 for n in reasons:

	537 if n in patch_stats:

	538 entry[n] = patch_stats[n]

	539 assert type(entry[n]) is int, 'Bad type in %s[%s]: %r\nEntry=%r' % (

	540 patch, n, entry[n], entry)

	541 return entry

	542

	543

	544 # "Dangerous default value []": pylint: disable=W0102

	545 def stats_by_count(patch_stats, name, reasons=[]):

	546 return sort_by_count([

	547 stats_by_count_entry(patch_stats[p], name, p, reasons)

	548 for p in patch_stats if patch_stats[p].get(name)])

	549

	550

	551 def _derive_stats_from_patch_stats(stats):

	552 patch_stats = stats['patch_stats']

	553 stats['attempt-count'] = sum(

	554 patch_stats[p]['attempts'] for p in patch_stats)

	555 stats['patchset-false-reject-count'] = sum(

	556 patch_stats[p]['false-rejections'] for p in patch_stats)

	557 stats['attempt-reject-count'] = sum(

	558 patch_stats[p]['rejections'] for p in patch_stats)

	559 stats['rejected-patches'] = set(

	560 p for p in patch_stats if not patch_stats[p]['committed'])

	561 stats['false-rejections'] = stats_by_count(

	562 patch_stats, 'false-rejections', REASONS)

	563 stats['rejections'] = stats_by_count(patch_stats, 'rejections', REASONS)

	564 for r in REASONS:

	565 stats[r] = stats_by_count(patch_stats, r, set(REASONS) - set([r]))

	566

	567 stats['patchset-commit-count'] = len([

	568 p for p in patch_stats if patch_stats[p]['committed']])

	569 stats['patchset-total-commit-queue-durations'] = derive_list_stats([

	570 patch_stats[p]['patchset-duration-wallclock'] for p in patch_stats])

	571 stats['patchset-durations'] = derive_list_stats([

	572 patch_stats[p]['patchset-duration'] for p in patch_stats])

	573 stats['patchset-committed-durations'] = derive_list_stats([

	574 patch_stats[p]['patchset-duration'] for p in patch_stats

	575 if patch_stats[p]['committed']])

	576 stats['patchset-attempts'] = derive_list_stats([

	577 patch_stats[p]['attempts'] for p in patch_stats])

	578 stats['patchset-committed-attempts'] = derive_list_stats([

	579 patch_stats[p]['attempts'] for p in patch_stats

	580 if patch_stats[p]['committed']])

	581

	582

	583 def derive_stats(args, begin_date, init_stats=None):

	584 """Process raw CQ updates log and derive stats.

	585

	586 Fetches raw CQ events and returns the same format as organize_stats().

	587 If ``init_stats`` are given, preserve the jobs stats and replace the

	588 other stats.

	589 """

	590 stats = init_stats or default_stats()

	591 filters = ['project=%s' % args.project, 'action=patch_stop']

	592 end_date = begin_date + datetime.timedelta(minutes=INTERVALS[args.range])

	593 results = fetch_cq_logs(begin_date, end_date, filters=filters)

	594 if not results:

	595 return stats

	596

	597 stats['begin'] = date_from_timestamp(results[-1]['timestamp'])

	598 stats['end'] = date_from_timestamp(results[0]['timestamp'])

	599 # Create map issue:patchset -> #attempts

	600 patches, issues = set(), set()

	601 for reason in results:

	602 issues.add(reason['fields']['issue'])

	603 patches.add((reason['fields']['issue'], reason['fields']['patchset']))

	604 stats['issue-count'] = len(issues)

	605 stats['patchset-count'] = len(patches)

	606 patch_stats = {}

	607 # Fetch and process each patchset log

	608 def get_patch_stats(patch_id):

	609 return derive_patch_stats(end_date, patch_id)

	610

	611 if args.seq or not args.thread_pool:

	612 iterable = map(get_patch_stats, patches)

	613 else:

	614 pool = ThreadPool(min(args.thread_pool, len(patches)))

	615 iterable = pool.imap_unordered(get_patch_stats, patches)

	616 for patch_id, pstats in iterable:

	617 patch_stats[patch_id] = pstats

	618

	619 stats['patch_stats'] = patch_stats

	620 _derive_stats_from_patch_stats(stats)

	621 return stats

	622

	623

	624 def patch_url(patch_id):

	625 return '%s/patch-status/%s/%s' % ((STATS_URL,) + patch_id)

	626

	627

	628 def parse_json(obj, return_type=None):

	629 """Attempt to interpret a string as JSON.

	630

	631 Guarantee the return type if given, pass through otherwise.

	632 """

	633 result = obj

	634 if type(obj) in [str, unicode]:

	635 try:

	636 result = json.loads(obj)

	637 except ValueError as e:

	638 logging.error('Could not decode json in "%s": %s', obj, e)

	639 # If the type is wrong, return an empty object of the correct type.

	640 # In most cases, casting to the required type will not work anyway

	641 # (e.g. list to dict).

	642 if return_type and type(result) is not return_type:

	643 result = return_type()

	644 return result

	645

	646

	647 def parse_failing_tryjobs(message):

	648 """Parse the message to extract failing try jobs."""

	649 builders = []

	650 msg_lines = message.splitlines()

	651 for line in msg_lines[1:]:

	652 words = line.split(None, 1)

	653 if not words:

	654 continue

	655 builder = words[0]

	656 builders.append(builder)

	657 return builders

	658

	659

	660 def derive_patch_stats(end_date, patch_id):

	661 """``patch_id`` is a tuple (issue, patchset)."""

	662 results = fetch_cq_logs(end_date=end_date, filters=[

	663 'issue=%s' % patch_id[0], 'patchset=%s' % patch_id[1]])

	664 # The results should already ordered, but sort it again just to be sure.

	665 results = sorted(results, key=lambda r: r['timestamp'], reverse=True)

	666 logging.debug('derive_patch_stats(%r): fetched %d entries.',

	667 patch_id, len(results))

	668 # Group by attempts

	669 attempts = []

	670

	671 def new_attempt():

	672 attempt_empty = {

	673 'id': 0,

	674 'begin': 0.0,

	675 'end': 0.0,

	676 'duration': 0.0,

	677 'actions': [],

	678 'committed': False,

	679 'reason': {},

	680 }

	681 for reason in REASONS:

	682 attempt_empty[reason] = False

	683 return attempt_empty

	684

	685 def add_attempt(attempt, counter):

	686 """Create a new attempt from accumulated actions."""

	687 assert attempt['actions']

	688 attempt['id'] = counter

	689 attempt['duration'] = attempt['end'] - attempt['begin']

	690 known_reasons = [r for r in KNOWN_REASONS if attempt[r]]

	691 if not attempt['committed'] and not known_reasons:

	692 attempt['failed-unknown'] = True

	693 logging.debug(

	694 'add_attempt: #%d (%s)',

	695 attempt['id'],

	696 ', '.join([r for r in REASONS if attempt[r]]))

	697 attempts.append(attempt)

	698

	699 # An attempt is a set of actions between patch_start and patch_stop

	700 # actions. Repeated patch_start / patch_stop actions are ignored.

	701 attempt = new_attempt()

	702 failing_builders = {}

	703 state = 'stop'

	704 attempt_counter = 0

	705 for result in reversed(results):

	706 action = result['fields'].get('action')

	707 if state == 'stop':

	708 if action == 'patch_start':

	709 state = 'start'

	710 attempt['begin'] = result['timestamp']

	711 if state != 'start':

	712 continue

	713 attempt['actions'].append(result)

	714 if action == 'patch_stop':

	715 attempt['end'] = result['timestamp']

	716 message = result['fields'].get('message', '')

	717 if 'CQ bit was unchecked on CL' in message:

	718 attempt['manual-cancel'] = True

	719 if 'No LGTM' in message:

	720 attempt['missing-lgtm'] = True

	721 if 'A disapproval has been posted' in message:

	722 attempt['not-lgtm'] = True

	723 if 'Transient error: Invalid delimiter' in message:

	724 attempt['invalid-delimiter'] = True

	725 if 'Failed to commit' in message:

	726 attempt['failed-commit'] = True

	727 if('Failed to apply patch' in message or

	728 'Failed to apply the patch' in message):

	729 attempt['failed-patch'] = True

	730 if 'Presubmit check' in message:

	731 attempt['failed-presubmit-check'] = True

	732 if 'Try jobs failed' in message:

	733 if 'presubmit' in message:

	734 attempt['failed-presubmit-bot'] = True

	735 else:

	736 attempt['failed-jobs'] = message

	737 builders = parse_failing_tryjobs(message)

	738 for b in builders:

	739 failing_builders.setdefault(b, 0)

	740 failing_builders[b] += 1

	741 if 'Exceeded time limit waiting for builds to trigger' in message:

	742 attempt['failed-to-trigger'] = True

	743 attempt_counter += 1

	744 add_attempt(attempt, attempt_counter)

	745 attempt = new_attempt()

	746 state = 'stop'

	747 continue

	748 if action == 'patch_committed':

	749 attempt['committed'] = True

	750 # TODO(sergeyberezin): enable this after this action is stable in CQ.

	751 if action == 'patch_failed':

	752 attempt['reason'] = parse_json(

	753 result['fields'].get('reason', {}), return_type=dict)

	754 logging.info('Attempt reason: %r', attempt['reason'])

	755 if attempt['reason'].get('fail_type') == 'reviewer_lgtm':

	756 attempt['missing-lgtm'] = True

	757 if attempt['reason'].get('fail_type') == 'commit':

	758 attempt['failed-commit'] = True

	759 if attempt['reason'].get('fail_type') == 'simple try job':

	760 failed_jobs = parse_json(attempt['reason'].get(

	761 'fail_details', [('unknown_master', 'unknown_bot')]))

	762 # Remove presubmit bot - it's accounted separately.

	763 failed_jobs = [j for j in failed_jobs if 'presubmit' in j[1]]

	764 attempt['failed-jobs'] = failed_jobs

	765

	766 stats = {}

	767 committed_set = set(a['id'] for a in attempts if a['committed'])

	768 stats['committed'] = len(committed_set)

	769 stats['attempts'] = len(attempts)

	770 stats['rejections'] = stats['attempts'] - stats['committed']

	771

	772 logging.info('derive_patch_stats: %s has %d attempts, committed=%d',

	773 patch_url(patch_id), len(attempts), stats['committed'])

	774

	775 valid_reasons_set = set()

	776 for reason in VALID_REASONS:

	777 s = set(a['id'] for a in attempts if a[reason])

	778 stats[reason] = len(s)

	779 valid_reasons_set.update(s)

	780 for reason in set(REASONS) - set(VALID_REASONS):

	781 stats[reason] = len(set(a['id'] for a in attempts if a[reason]))

	782

	783 # Populate failed builders.

	784 stats['failed-jobs-details'] = failing_builders

	785

	786 stats['false-rejections'] = 0

	787 if stats['committed']:

	788 stats['false-rejections'] = len(

	789 set(a['id'] for a in attempts) - committed_set - valid_reasons_set)

	790 # Sum of attempt duration.

	791 stats['patchset-duration'] = sum(a['duration'] for a in attempts)

	792 if attempts:

	793 stats['patchset-duration-wallclock'] = (

	794 attempts[-1]['end'] - attempts[0]['begin'])

	795 else:

	796 stats['patchset-duration-wallclock'] = 0.0

	797 return patch_id, stats

	798

	799

	800 def derive_tree_stats(project, start_date, end_date):

	801 """Given a list of tree status events, derive tree closure stats."""

	802 # Fetch one more event right before the range, so we know the

	803 # initial tree status.

	804 status = (fetch_tree_status(project, end_date=start_date, limit=1) +

	805 fetch_tree_status(project, end_date, start_date))

	806 stats = {'open': 0.0, 'total': (end_date - start_date).total_seconds()}

	807 if not status:

	808 return stats

	809

	810 logging.debug('Tree status:\n%s', '\n'.join([' %r' % e for e in status]))

	811

	812 is_open = status[0]['open']

	813 curr_date = start_date

	814 for event in status[1:]:

	815 delta = event['date'] - curr_date

	816 if is_open and not event['open']:

	817 stats['open'] += delta.total_seconds()

	818 logging.debug('Tree was open from %s to %s for %s (total of %s)',

	819 curr_date, event['date'],

	820 delta, datetime.timedelta(seconds=stats['open']))

	821 if not is_open:

	822 curr_date = event['date']

	823 is_open = event['open']

	824

	825 # Account for the remaining time after the last event.

	826 if is_open:

	827 delta = end_date - curr_date

	828 stats['open'] += delta.total_seconds()

	829 logging.debug('Tree was open from %s to %s for %s (total of %s)',

	830 curr_date, end_date,

	831 delta, datetime.timedelta(seconds=stats['open']))

	832 return stats

	833

	834

	835 def derive_log_stats(log_data, bots):

	836 # Calculate stats.

	837 cq_commits = [v for v in log_data if v['commit-bot']]

	838 users = {}

	839 for commit in cq_commits:

	840 users[commit['author']] = users.get(commit['author'], 0) + 1

	841 committers = {}

	842 manual_committers = {}

	843 bot_committers = {}

	844 bot_manual_committers = {}

	845 for commit in log_data:

	846 committers[commit['author']] = committers.get(commit['author'], 0) + 1

	847 if not commit['commit-bot']:

	848 manual_committers[commit['author']] = manual_committers.get(

	849 commit['author'], 0) + 1

	850 if commit['author'] in bots:

	851 bot_committers[commit['author']] = bot_committers.get(

	852 commit['author'], 0) + 1

	853 if not commit['commit-bot']:

	854 bot_manual_committers[commit['author']] = bot_manual_committers.get(

	855 commit['author'], 0) + 1

	856

	857 stats = {}

	858 stats['cq_commits'] = len(cq_commits)

	859 stats['total_commits'] = len(log_data)

	860 stats['users'] = len(users)

	861 stats['committers'] = len(committers)

	862 stats['manual_committers'] = len(manual_committers)

	863 stats['manual_commits'] = sum(x for x in manual_committers.itervalues())

	864 stats['bot_committers'] = len(bot_committers)

	865 stats['bot_commits'] = sum(x for x in bot_committers.itervalues())

	866 stats['bot_manual_commits'] = sum(

	867 x for x in bot_manual_committers.itervalues())

	868 stats['manual_only_committers'] = {

	869 a: c for a, c in committers.iteritems()

	870 if c == manual_committers.get(a, 0)}

	871 return stats

	872

	873 def derive_git_stats(project, start_date, end_date, bots):

	874 log_data = fetch_git_logs(PROJECTS[project]['repo'], start_date, end_date)

	875 return derive_log_stats(log_data, bots)

	876

	877

	878 def derive_svn_stats(project, start_date, end_date, bots):

	879 log_data = fetch_svn_logs(PROJECTS[project]['repo'], start_date, end_date)

	880 return derive_log_stats(log_data, bots)

	881

	882

	883 def percentage_tuple(data, total):

	884 num_data = data if isinstance(data, numbers.Number) else len(data)

	885 num_total = total if isinstance(total, numbers.Number) else len(total)

	886 percent = 100. * num_data / num_total if num_total else 0.

	887 return num_data, num_total, percent

	888

	889

	890 def percentage(data, total):

	891 return percentage_tuple(data, total)[2]

	892

	893

	894 def round_timedelta(seconds):

	895 # We never care about the milliseconds when printing timedeltas:

	896 return datetime.timedelta(seconds=round(seconds))

	897

	898

	899 def output(fmt='', *args):

	900 """An equivalent of print to mock out in testing."""

	901 print fmt % args

	902

	903

	904 def print_attempt_counts(stats, name, message, item_name='',

	905 details=False, committed=None, indent=0,

	906 print_zero=False):

	907 """Print a summary of a ``name`` slice of attempts.

	908

	909 \|committed\|: None=print all, True=only committed patches, False=only

	910 rejected patches.

	911

	912 \|print_zero\|: print stats even if no attempts match."""

	913 if not item_name:

	914 item_name = message

	915 patches = [

	916 p for p in stats[name]

	917 if committed is None or

	918 bool(stats['patch_stats'][p['patch_id']]['committed']) is committed]

	919 count = sum(p['count'] for p in patches)

	920 if not print_zero and not count:

	921 return

	922

	923 failing_builders = {}

	924 for p in patches:

	925 for b, cnt in p['failed-jobs-details'].iteritems():

	926 failing_builders.setdefault(b, {})

	927 failing_builders[b][p['patch_id']] = cnt

	928

	929 indent_str = ''.join(' ' for _ in range(indent))

	930 if message.startswith('failed jobs'):

	931 output(

	932 '%s%4d attempt%s (%.1f%% of %d attempts) %s: %d in %d%s patches',

	933 indent_str, count, ' ' if count == 1 else 's',

	934 percentage(count, stats['attempt-count']),

	935 stats['attempt-count'],

	936 message,

	937 sum(sum(d.values()) for d in failing_builders.values()),

	938 len(patches),

	939 {True: ' committed', False: ' uncommitted'}.get(committed, ''))

	940 else:

	941 output(

	942 '%s%4d attempt%s (%.1f%% of %d attempts) %s in %d%s patches',

	943 indent_str, count, ' ' if count == 1 else 's',

	944 percentage(count, stats['attempt-count']),

	945 stats['attempt-count'],

	946 message,

	947 len(patches),

	948 {True: ' committed', False: ' uncommitted'}.get(committed, ''))

	949 if details:

	950 lines = []

	951 for p in patches:

	952 line = ' %d %s %s' % (

	953 p['count'], item_name, patch_url(p['patch_id']))

	954 causes = ['%d %s' % (p['failed-jobs-details'][k], k)

	955 for k in p['failed-jobs-details']]

	956 line += ' (%s)' % ', '.join(causes)

	957 lines.append(line)

	958 output('\n'.join(lines))

	959 output()

	960

	961

	962 def print_duration(name, args, latest, previous, print_name=None):

	963 if not print_name:

	964 print_name = name.capitalize()

	965 cq_only = round_timedelta(latest['patchset-durations'][name])

	966 wallclock = round_timedelta(

	967 latest['patchset-total-commit-queue-durations'][name])

	968 prev_cq_only = 'unknown'

	969 prev_cq_only_s = 'unknown'

	970 prev_wallclock = 'unknown'

	971 prev_wallclock_s = 'unknown'

	972 if previous:

	973 prev_cq_only = round_timedelta(previous['patchset-durations'][name])

	974 prev_cq_only_s = int(prev_cq_only.total_seconds() / 60)

	975 prev_wallclock = round_timedelta(

	976 previous['patchset-total-commit-queue-durations'][name])

	977 prev_wallclock_s = int(prev_wallclock.total_seconds() / 60)

	978 output('\n%s duration in CQ trying a patch:', print_name)

	979 output(

	980 ' wallclock: %8s (%3d min). Prev. %s: %8s (%3s min).',

	981 wallclock, round(wallclock.total_seconds() / 60.0), args.range,

	982 prev_wallclock, prev_wallclock_s)

	983 output(

	984 ' sum of attempts: %8s (%3d min). Prev. %s: %8s (%3s min).',

	985 cq_only, round(cq_only.total_seconds() / 60.0), args.range,

	986 prev_cq_only, prev_cq_only_s)

	987

	988

	989 def print_usage(args, latest, previous):

	990 if not latest['usage']:

	991 return

	992 output()

	993 output(

	994 'CQ users: %6d out of %6d total committers %6.2f%%',

	995 latest['usage']['users'], latest['usage']['committers'],

	996 percentage(latest['usage']['users'], latest['usage']['committers']))

	997 fmt_str = (

	998 ' Committed %6d out of %6d commits %6.2f%%. '

	999 'Prev. %s: %6.2f%%.')

	1000 data = percentage_tuple(latest['usage']['cq_commits'],

	1001 latest['usage']['total_commits'])

	1002 data += (args.range, percentage(previous['usage']['cq_commits'],

	1003 previous['usage']['total_commits']))

	1004 if latest['usage']['bot_manual_commits']:

	1005 fmt_str += ' (%6.2f%% by humans)'

	1006 data += (percentage(latest['usage']['cq_commits'],

	1007 latest['usage']['total_commits'] -

	1008 latest['usage']['bot_manual_commits']),)

	1009 output(fmt_str, *data)

	1010

	1011 output()

	1012 output('Bots: %6d out of %6d total committers %6.2f%%',

	1013 *percentage_tuple(latest['usage']['bot_committers'],

	1014 latest['usage']['committers']))

	1015 output(' Committed by CQ %6d out of %6d commits %6.2f%%',

	1016 *percentage_tuple(latest['usage']['bot_commits'],

	1017 latest['usage']['total_commits']))

	1018 output(' Committed directly %6d out of %6d commits %6.2f%%',

	1019 *percentage_tuple(latest['usage']['bot_manual_commits'],

	1020 latest['usage']['total_commits']))

	1021 output()

	1022 output('Manual committers: %6d out of all %6d users %6.2f%%',

	1023 *percentage_tuple(latest['usage']['manual_committers'],

	1024 latest['usage']['committers']))

	1025 output(' Committed %6d out of %6d commits %6.2f%%',

	1026 *percentage_tuple(latest['usage']['manual_commits'],

	1027 latest['usage']['total_commits']))

	1028

	1029

	1030 def print_tree_status(args, latest, previous):

	1031 output()

	1032 output(

	1033 'Total time tree open: %.1f hours of %.1f hours (%.2f%%). '

	1034 'Prev. %s: %.2f%%.',

	1035 latest['tree']['open'] / 3600.0,

	1036 latest['tree']['total'] / 3600.0,

	1037 percentage(latest['tree']['open'], latest['tree']['total']),

	1038 args.range,

	1039 percentage(previous['tree']['open'], previous['tree']['total']))

	1040

	1041

	1042 def print_stats(args, stats):

	1043 latest = stats.get('latest')

	1044 previous = stats.get('previous')

	1045 if not latest:

	1046 output('No stats to display.')

	1047 return

	1048 if not previous:

	1049 output('No previous %s stats loaded.', args.range)

	1050 return

	1051 output('Statistics for project %s', args.project)

	1052 if latest['begin'] > latest['end']:

	1053 output(' No stats since %s', args.date)

	1054 return

	1055

	1056 output('from %s till %s (local time).',

	1057 latest['begin'], latest['end'])

	1058

	1059 print_usage(args, latest, previous)

	1060 print_tree_status(args, latest, previous)

	1061

	1062 output()

	1063 output(

	1064 '%4d issues (%d patches) were tried by CQ, '

	1065 'resulting in %d attempts.',

	1066 latest['issue-count'], latest['patchset-count'], latest['attempt-count'])

	1067 output(

	1068 '%4d patches (%.1f%% of tried patches, %.1f%% of attempts) '

	1069 'were committed by CQ,',

	1070 latest['patchset-commit-count'],

	1071 percentage(latest['patchset-commit-count'], latest['patchset-count']),

	1072 percentage(latest['patchset-commit-count'], latest['attempt-count']))

	1073

	1074

	1075 output()

	1076 output('Rejections:')

	1077 print_attempt_counts(latest, 'rejections', 'were unsuccessful',

	1078 item_name='failures',

	1079 committed=False)

	1080 output(' This includes:')

	1081 for reason in REASONS:

	1082 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,

	1083 details=args.list_rejections,

	1084 item_name=REASONS[reason]['item'], committed=False)

	1085

	1086 # TODO(sergeyberezin): add gave up count (committed manually after trying CQ).

	1087 # TODO(sergeyberezin): add count of NOTRY=true (if possible).

	1088

	1089 output()

	1090 output('False Rejections:')

	1091 if args.use_logs:

	1092 print_attempt_counts(latest, 'false-rejections', 'were false rejections',

	1093 item_name='flakes', committed=True)

	1094 else:

	1095 output(

	1096 ' %4d attempts (%.1f%% of %d attempts) were false rejections',

	1097 latest['attempt-false-reject-count'],

	1098 percentage(latest['attempt-false-reject-count'],

	1099 latest['attempt-count']),

	1100 latest['attempt-count'])

	1101

	1102 output(' False rejections include:')

	1103 for reason in FLAKY_REASONS.keys() + ['failed-unknown']:

	1104 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,

	1105 item_name=REASONS[reason]['item'], committed=True,

	1106 details=args.list_false_rejections)

	1107

	1108 output(' Other rejections in committed patches for valid reasons:')

	1109 for reason in VALID_REASONS.keys():

	1110 print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,

	1111 item_name=REASONS[reason]['item'], committed=True,

	1112 details=args.list_false_rejections)

	1113

	1114 print_duration('mean', args, latest, previous)

	1115 print_duration('50', args, latest, previous, 'Median')

	1116

	1117 output()

	1118 output('Patches which eventually land percentiles:')

	1119 for p in ['10', '25', '50', '75', '90', '95', '99']:

	1120 output('%s: %4.1f hrs, %2d attempts',

	1121 p, latest['patchset-committed-durations'][p] / 3600.0,

	1122 latest['patchset-committed-attempts'][p])

	1123

	1124 # TODO(sergeyberezin): add total try jobs / by CQ / unknown. Get it from CBE.

	1125 # TODO(sergeyberezin): recompute bot flakiness from CBE. CQ does not

	1126 # have enough info.

	1127 output()

	1128 output('Top flaky builders (which fail and succeed in the same patch):')

	1129

	1130 logging.debug('Found %d jobs', len(latest['jobs'].keys()))

	1131

	1132 def flakiness(stats, job):

	1133 passes = stats['jobs'][job]['pass-count']

	1134 failures = stats['jobs'][job]['false-reject-count']

	1135 return percentage(failures, passes + failures)

	1136

	1137 jobs = sorted(latest['jobs'].iterkeys(), key=lambda j: flakiness(latest, j),

	1138 reverse=True)

	1139 output('%-40s %-15s %-15s %-15s',

	1140 'Builder Name', 'Succeeded', 'Flaky Failures', 'Flakiness (%)')

	1141 for job in jobs:

	1142 passes = latest['jobs'][job]['pass-count']

	1143 failures = latest['jobs'][job]['false-reject-count']

	1144 output('%-40s %-15s %-15s %-15s',

	1145 job, '%5d' % passes, '%5d' % failures,

	1146 '%6.2f%%' % flakiness(latest, job))

	1147

	1148

	1149 def acquire_stats(args):

	1150 stats = {}

	1151 logging.info('Acquiring stats for project %s for a %s of %s using %s',

	1152 args.project, args.range, args.date,

	1153 'logs' if args.use_logs else 'cache')

	1154 end_date = args.date + datetime.timedelta(minutes=INTERVALS[args.range])

	1155 prev_date = args.date - datetime.timedelta(minutes=INTERVALS[args.range])

	1156 if args.use_logs:

	1157 init_stats = {'latest': default_stats(), 'previous': default_stats()}

	1158 assert args.date

	1159 # For weekly stats, collect job flakiness from daily cached stats.

	1160 if args.range == 'week':

	1161 for day in range(7):

	1162 d = args.date + datetime.timedelta(minutes=INTERVALS['day']*day)

	1163 raw_stats = fetch_stats(args, d, 'day')

	1164 init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])

	1165 elif args.range == 'day':

	1166 for hour in range(24):

	1167 d = args.date + datetime.timedelta(minutes=INTERVALS['hour']*hour)

	1168 raw_stats = fetch_stats(args, d, 'hour')

	1169 init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])

	1170 else:

	1171 init_stats = organize_stats(fetch_stats(args))

	1172 stats['latest'] = derive_stats(

	1173 args, args.date, init_stats=init_stats['latest'])

	1174 stats['previous'] = derive_stats(args, prev_date)

	1175 else:

	1176 stats = organize_stats(fetch_stats(args))

	1177

	1178 stats['latest']['tree'] = derive_tree_stats(args.project, args.date, end_date)

	1179 stats['previous']['tree'] = derive_tree_stats(

	1180 args.project, prev_date, args.date)

	1181

	1182 if PROJECTS[args.project]['type'] == 'git':

	1183 stats['latest']['usage'] = derive_git_stats(

	1184 args.project, args.date, end_date, args.bots)

	1185 stats['previous']['usage'] = derive_git_stats(

	1186 args.project, prev_date, args.date, args.bots)

	1187 else:

	1188 stats['latest']['usage'] = derive_svn_stats(

	1189 args.project, args.date, end_date, args.bots)

	1190 stats['previous']['usage'] = derive_svn_stats(

	1191 args.project, prev_date, args.date, args.bots)

	1192

	1193 return stats

	1194

	1195 def main():

	1196 args = parse_args()

	1197 logger = logging.getLogger()

	1198 # TODO(sergeyberezin): how do I derive local timezone string?

	1199 # Need to be able to pass dateutil.tz.tzlocal() directly.

	1200 infra.libs.logs.process_argparse_options(args, logger)

	1201 stats = acquire_stats(args)

	1202 print_stats(args, stats)

	1203

	1204

	1205 if __name__ == '__main__':

	1206 sys.exit(main())

OLD	NEW

« no previous file with comments | « infra/tools/cq_stats/__main__.py ('k') | infra/tools/cq_stats/test/__init__.py » ('j') | no next file with comments »