OLD | NEW |
(Empty) | |
| 1 """Helper functions to parse result file produced by Chrome Trace Processor. We |
| 2 usually have to have a more specialized version of this file when we modify a |
| 3 tbmv2 metric for an analysis. |
| 4 """ |
| 5 |
| 6 # TODO(dproy): Make this file pylint compatible. |
| 7 # pylint: skip-file |
| 8 |
| 9 import csv |
| 10 import json |
| 11 import sys |
| 12 |
| 13 # Quick and dirty debug logging. |
| 14 debug_mode = False |
| 15 def log_debug(*args, **kwargs): |
| 16 if debug_mode: |
| 17 for arg in args: |
| 18 print args |
| 19 for k, v in kwargs.items(): |
| 20 print v |
| 21 |
| 22 def load_json_results_from_file(filename): |
| 23 results = []; |
| 24 with open(filename) as f: |
| 25 for line in f: |
| 26 try: |
| 27 results.append(json.loads(line)) |
| 28 except: |
| 29 log_debug("----------------------") |
| 30 log_debug("Could not parse json: ") |
| 31 log_debug(line) |
| 32 log_debug("----------------------") |
| 33 print "Loaded " + filename |
| 34 return results |
| 35 |
| 36 def get_unique_histogram_value(histogram): |
| 37 """If histogram has a unique value, returns that value. Otherwise returns a |
| 38 string of the format "Not Unique. {count: <number of values>, |
| 39 sampleValues: <a representative sample of values>}". If no value is found, |
| 40 returns an empty string. |
| 41 |
| 42 The decision to return a string instead of raising an exception in these |
| 43 failure cases is intentional. The json results produced by cluster telemetry |
| 44 / chrome trace processor pipeline often has all kinds of errors, and we don't |
| 45 want to choke on them, but we also want to be aware of their presence so we |
| 46 can fix the errors if possible. |
| 47 """ |
| 48 if 'running' in histogram: |
| 49 running_stats = histogram['running'] |
| 50 running_max = running_stats[1] |
| 51 running_min = running_stats[4] |
| 52 if running_min == running_max: |
| 53 return running_min |
| 54 else: |
| 55 return "Not Unique. {count: {count}, sampleValues: {samples}}".format( |
| 56 count=running_stats[0], sampleValues=histogram.get('sampleValues', [])) |
| 57 return '' |
| 58 |
| 59 def parse_results_json_list(result_json_list): |
| 60 """ |
| 61 Produces a list of trace_data dicts. A trace_data dict contains information |
| 62 about a single trace. It's format is |
| 63 { |
| 64 telemetry_info: <metadata about the run> |
| 65 metrics: <dict of all histograms gathered from the run> |
| 66 } |
| 67 """ |
| 68 results = [] |
| 69 for result_json in result_json_list: |
| 70 trace_data = {} |
| 71 metrics_dict = {} |
| 72 trace_data['metrics'] = metrics_dict |
| 73 histograms = result_json['pairs']['histograms'] |
| 74 for histogram in histograms: |
| 75 if histogram.get('type', '') == 'TelemetryInfo': |
| 76 trace_data['telemetry_info'] = histogram |
| 77 if 'name' in histogram: |
| 78 metrics_dict[histogram['name']] = get_unique_histogram_value(histogram) |
| 79 results.append(trace_data) |
| 80 return results |
| 81 |
| 82 def get_csv_dicts(trace_data_list): |
| 83 """ |
| 84 Converts list of trace_data to list of csv_dict, a flat dictionary that will |
| 85 be written out to the csv file. |
| 86 """ |
| 87 csv_dicts = [] |
| 88 for trace_data in trace_data_list: |
| 89 csv_dict = {} |
| 90 csv_dict['site'] = trace_data['telemetry_info']['storyDisplayName'] |
| 91 csv_dict['cache_temperature'] = (trace_data['telemetry_info'] |
| 92 ['storyGroupingKeys'] |
| 93 ['cache_temperature']) |
| 94 csv_dict.update(trace_data['metrics']) |
| 95 csv_dicts.append(csv_dict) |
| 96 return csv_dicts |
| 97 |
| 98 |
| 99 def write_csv(trace_data_list, output_filename): |
| 100 csv_dicts = get_csv_dicts(trace_data_list) |
| 101 |
| 102 # Not all histograms contain all metrics so we need to gather all the |
| 103 # possible fieldnames first. |
| 104 fieldnames = set() |
| 105 for d in csv_dicts: |
| 106 fieldnames = fieldnames.union(d.keys()) |
| 107 |
| 108 with open(output_filename, 'w') as f: |
| 109 writer = csv.DictWriter(f, list(fieldnames), extrasaction='ignore') |
| 110 writer.writeheader() |
| 111 writer.writerows(csv_dicts) |
| 112 |
| 113 def main(): |
| 114 # TODO(dproy): It may eventually make sense to use a real argument parser. |
| 115 if len(sys.argv) < 2: |
| 116 print "Usage: {0} <ctp-results> [output-filename]".format(sys.argv[0]) |
| 117 print "<ctp-results> is the results file produced by chrome trace processor.
" |
| 118 print "[output-filename] is the produced csv file. Defaults to out.csv." |
| 119 |
| 120 input_filename = sys.argv[1] |
| 121 if len(sys.argv) > 2: |
| 122 output_filename = sys.argv[2] |
| 123 else: |
| 124 output_filename = "out.csv" |
| 125 |
| 126 result_json_list = load_json_results_from_file(input_filename) |
| 127 trace_data_list = parse_results_json_list(result_json_list) |
| 128 write_csv(trace_data_list, output_filename) |
| 129 |
| 130 print "Wrote csv output to " + output_filename |
| 131 print "Total results processed:", len(result_json_list) |
| 132 |
| 133 if __name__ == '__main__': |
| 134 main() |
OLD | NEW |