Index: experimental/chrome_speed_metrics/scripts/parse_ctp_results.py |
diff --git a/experimental/chrome_speed_metrics/scripts/parse_ctp_results.py b/experimental/chrome_speed_metrics/scripts/parse_ctp_results.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..68c81661ac4ff7094af0c6d748defa3a61263c52 |
--- /dev/null |
+++ b/experimental/chrome_speed_metrics/scripts/parse_ctp_results.py |
@@ -0,0 +1,134 @@ |
+"""Helper functions to parse result file produced by Chrome Trace Processor. We |
+usually have to have a more specialized version of this file when we modify a |
+tbmv2 metric for an analysis. |
+""" |
+ |
+# TODO(dproy): Make this file pylint compatible. |
+# pylint: skip-file |
+ |
+import csv |
+import json |
+import sys |
+ |
+# Quick and dirty debug logging. |
+debug_mode = False |
+def log_debug(*args, **kwargs): |
+ if debug_mode: |
+ for arg in args: |
+ print args |
+ for k, v in kwargs.items(): |
+ print v |
+ |
+def load_json_results_from_file(filename): |
+ results = []; |
+ with open(filename) as f: |
+ for line in f: |
+ try: |
+ results.append(json.loads(line)) |
+ except: |
+ log_debug("----------------------") |
+ log_debug("Could not parse json: ") |
+ log_debug(line) |
+ log_debug("----------------------") |
+ print "Loaded " + filename |
+ return results |
+ |
+def get_unique_histogram_value(histogram): |
+ """If histogram has a unique value, returns that value. Otherwise returns a |
+ string of the format "Not Unique. {count: <number of values>, |
+ sampleValues: <a representative sample of values>}". If no value is found, |
+ returns an empty string. |
+ |
+ The decision to return a string instead of raising an exception in these |
+ failure cases is intentional. The json results produced by cluster telemetry |
+ / chrome trace processor pipeline often has all kinds of errors, and we don't |
+ want to choke on them, but we also want to be aware of their presence so we |
+ can fix the errors if possible. |
+ """ |
+ if 'running' in histogram: |
+ running_stats = histogram['running'] |
+ running_max = running_stats[1] |
+ running_min = running_stats[4] |
+ if running_min == running_max: |
+ return running_min |
+ else: |
+ return "Not Unique. {count: {count}, sampleValues: {samples}}".format( |
+ count=running_stats[0], sampleValues=histogram.get('sampleValues', [])) |
+ return '' |
+ |
+def parse_results_json_list(result_json_list): |
+ """ |
+ Produces a list of trace_data dicts. A trace_data dict contains information |
+ about a single trace. It's format is |
+ { |
+ telemetry_info: <metadata about the run> |
+ metrics: <dict of all histograms gathered from the run> |
+ } |
+ """ |
+ results = [] |
+ for result_json in result_json_list: |
+ trace_data = {} |
+ metrics_dict = {} |
+ trace_data['metrics'] = metrics_dict |
+ histograms = result_json['pairs']['histograms'] |
+ for histogram in histograms: |
+ if histogram.get('type', '') == 'TelemetryInfo': |
+ trace_data['telemetry_info'] = histogram |
+ if 'name' in histogram: |
+ metrics_dict[histogram['name']] = get_unique_histogram_value(histogram) |
+ results.append(trace_data) |
+ return results |
+ |
+def get_csv_dicts(trace_data_list): |
+ """ |
+ Converts list of trace_data to list of csv_dict, a flat dictionary that will |
+ be written out to the csv file. |
+ """ |
+ csv_dicts = [] |
+ for trace_data in trace_data_list: |
+ csv_dict = {} |
+ csv_dict['site'] = trace_data['telemetry_info']['storyDisplayName'] |
+ csv_dict['cache_temperature'] = (trace_data['telemetry_info'] |
+ ['storyGroupingKeys'] |
+ ['cache_temperature']) |
+ csv_dict.update(trace_data['metrics']) |
+ csv_dicts.append(csv_dict) |
+ return csv_dicts |
+ |
+ |
+def write_csv(trace_data_list, output_filename): |
+ csv_dicts = get_csv_dicts(trace_data_list) |
+ |
+ # Not all histograms contain all metrics so we need to gather all the |
+ # possible fieldnames first. |
+ fieldnames = set() |
+ for d in csv_dicts: |
+ fieldnames = fieldnames.union(d.keys()) |
+ |
+ with open(output_filename, 'w') as f: |
+ writer = csv.DictWriter(f, list(fieldnames), extrasaction='ignore') |
+ writer.writeheader() |
+ writer.writerows(csv_dicts) |
+ |
+def main(): |
+ # TODO(dproy): It may eventually make sense to use a real argument parser. |
+ if len(sys.argv) < 2: |
+ print "Usage: {0} <ctp-results> [output-filename]".format(sys.argv[0]) |
+ print "<ctp-results> is the results file produced by chrome trace processor." |
+ print "[output-filename] is the produced csv file. Defaults to out.csv." |
+ |
+ input_filename = sys.argv[1] |
+ if len(sys.argv) > 2: |
+ output_filename = sys.argv[2] |
+ else: |
+ output_filename = "out.csv" |
+ |
+ result_json_list = load_json_results_from_file(input_filename) |
+ trace_data_list = parse_results_json_list(result_json_list) |
+ write_csv(trace_data_list, output_filename) |
+ |
+ print "Wrote csv output to " + output_filename |
+ print "Total results processed:", len(result_json_list) |
+ |
+if __name__ == '__main__': |
+ main() |