experimental/chrome_speed_metrics/scripts/parse_ctp_results.py - Issue 3015613002: Add data processing scripts for chrome-speed-metrics

Side by Side Diff: experimental/chrome_speed_metrics/scripts/parse_ctp_results.py

Issue 3015613002: Add data processing scripts for chrome-speed-metrics

Patch Set: Created 3 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « experimental/chrome_speed_metrics/scripts/ct_csv_to_traces.py ('k') | experimental/chrome_speed_metrics/scripts/url_to_bigstore_path.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 """Helper functions to parse result file produced by Chrome Trace Processor. We

	2 usually have to have a more specialized version of this file when we modify a

	3 tbmv2 metric for an analysis.

	4 """

	5

	6 # TODO(dproy): Make this file pylint compatible.

	7 # pylint: skip-file

	8

	9 import csv

	10 import json

	11 import sys

	12

	13 # Quick and dirty debug logging.

	14 debug_mode = False

	15 def log_debug(args, *kwargs):

	16 if debug_mode:

	17 for arg in args:

	18 print args

	19 for k, v in kwargs.items():

	20 print v

	21

	22 def load_json_results_from_file(filename):

	23 results = [];

	24 with open(filename) as f:

	25 for line in f:

	26 try:

	27 results.append(json.loads(line))

	28 except:

	29 log_debug("----------------------")

	30 log_debug("Could not parse json: ")

	31 log_debug(line)

	32 log_debug("----------------------")

	33 print "Loaded " + filename

	34 return results

	35

	36 def get_unique_histogram_value(histogram):

	37 """If histogram has a unique value, returns that value. Otherwise returns a

	38 string of the format "Not Unique. {count: <number of values>,

	39 sampleValues: <a representative sample of values>}". If no value is found,

	40 returns an empty string.

	41

	42 The decision to return a string instead of raising an exception in these

	43 failure cases is intentional. The json results produced by cluster telemetry

	44 / chrome trace processor pipeline often has all kinds of errors, and we don't

	45 want to choke on them, but we also want to be aware of their presence so we

	46 can fix the errors if possible.

	47 """

	48 if 'running' in histogram:

	49 running_stats = histogram['running']

	50 running_max = running_stats[1]

	51 running_min = running_stats[4]

	52 if running_min == running_max:

	53 return running_min

	54 else:

	55 return "Not Unique. {count: {count}, sampleValues: {samples}}".format(

	56 count=running_stats[0], sampleValues=histogram.get('sampleValues', []))

	57 return ''

	58

	59 def parse_results_json_list(result_json_list):

	60 """

	61 Produces a list of trace_data dicts. A trace_data dict contains information

	62 about a single trace. It's format is

	63 {

	64 telemetry_info: <metadata about the run>

	65 metrics: <dict of all histograms gathered from the run>

	66 }

	67 """

	68 results = []

	69 for result_json in result_json_list:

	70 trace_data = {}

	71 metrics_dict = {}

	72 trace_data['metrics'] = metrics_dict

	73 histograms = result_json['pairs']['histograms']

	74 for histogram in histograms:

	75 if histogram.get('type', '') == 'TelemetryInfo':

	76 trace_data['telemetry_info'] = histogram

	77 if 'name' in histogram:

	78 metrics_dict[histogram['name']] = get_unique_histogram_value(histogram)

	79 results.append(trace_data)

	80 return results

	81

	82 def get_csv_dicts(trace_data_list):

	83 """

	84 Converts list of trace_data to list of csv_dict, a flat dictionary that will

	85 be written out to the csv file.

	86 """

	87 csv_dicts = []

	88 for trace_data in trace_data_list:

	89 csv_dict = {}

	90 csv_dict['site'] = trace_data['telemetry_info']['storyDisplayName']

	91 csv_dict['cache_temperature'] = (trace_data['telemetry_info']

	92 ['storyGroupingKeys']

	93 ['cache_temperature'])

	94 csv_dict.update(trace_data['metrics'])

	95 csv_dicts.append(csv_dict)

	96 return csv_dicts

	97

	98

	99 def write_csv(trace_data_list, output_filename):

	100 csv_dicts = get_csv_dicts(trace_data_list)

	101

	102 # Not all histograms contain all metrics so we need to gather all the

	103 # possible fieldnames first.

	104 fieldnames = set()

	105 for d in csv_dicts:

	106 fieldnames = fieldnames.union(d.keys())

	107

	108 with open(output_filename, 'w') as f:

	109 writer = csv.DictWriter(f, list(fieldnames), extrasaction='ignore')

	110 writer.writeheader()

	111 writer.writerows(csv_dicts)

	112

	113 def main():

	114 # TODO(dproy): It may eventually make sense to use a real argument parser.

	115 if len(sys.argv) < 2:

	116 print "Usage: {0} <ctp-results> [output-filename]".format(sys.argv[0])

	117 print "<ctp-results> is the results file produced by chrome trace processor. "

	118 print "[output-filename] is the produced csv file. Defaults to out.csv."

	119

	120 input_filename = sys.argv[1]

	121 if len(sys.argv) > 2:

	122 output_filename = sys.argv[2]

	123 else:

	124 output_filename = "out.csv"

	125

	126 result_json_list = load_json_results_from_file(input_filename)

	127 trace_data_list = parse_results_json_list(result_json_list)

	128 write_csv(trace_data_list, output_filename)

	129

	130 print "Wrote csv output to " + output_filename

	131 print "Total results processed:", len(result_json_list)

	132

	133 if __name__ == '__main__':

	134 main()

OLD	NEW