Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(437)

Side by Side Diff: experimental/chrome_speed_metrics/scripts/parse_ctp_results.py

Issue 3015613002: Add data processing scripts for chrome-speed-metrics
Patch Set: Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 """Helper functions to parse result file produced by Chrome Trace Processor. We
2 usually have to have a more specialized version of this file when we modify a
3 tbmv2 metric for an analysis.
4 """
5
6 # TODO(dproy): Make this file pylint compatible.
7 # pylint: skip-file
8
9 import csv
10 import json
11 import sys
12
13 # Quick and dirty debug logging.
14 debug_mode = False
15 def log_debug(*args, **kwargs):
16 if debug_mode:
17 for arg in args:
18 print args
19 for k, v in kwargs.items():
20 print v
21
22 def load_json_results_from_file(filename):
23 results = [];
24 with open(filename) as f:
25 for line in f:
26 try:
27 results.append(json.loads(line))
28 except:
29 log_debug("----------------------")
30 log_debug("Could not parse json: ")
31 log_debug(line)
32 log_debug("----------------------")
33 print "Loaded " + filename
34 return results
35
36 def get_unique_histogram_value(histogram):
37 """If histogram has a unique value, returns that value. Otherwise returns a
38 string of the format "Not Unique. {count: <number of values>,
39 sampleValues: <a representative sample of values>}". If no value is found,
40 returns an empty string.
41
42 The decision to return a string instead of raising an exception in these
43 failure cases is intentional. The json results produced by cluster telemetry
44 / chrome trace processor pipeline often has all kinds of errors, and we don't
45 want to choke on them, but we also want to be aware of their presence so we
46 can fix the errors if possible.
47 """
48 if 'running' in histogram:
49 running_stats = histogram['running']
50 running_max = running_stats[1]
51 running_min = running_stats[4]
52 if running_min == running_max:
53 return running_min
54 else:
55 return "Not Unique. {count: {count}, sampleValues: {samples}}".format(
56 count=running_stats[0], sampleValues=histogram.get('sampleValues', []))
57 return ''
58
59 def parse_results_json_list(result_json_list):
60 """
61 Produces a list of trace_data dicts. A trace_data dict contains information
62 about a single trace. It's format is
63 {
64 telemetry_info: <metadata about the run>
65 metrics: <dict of all histograms gathered from the run>
66 }
67 """
68 results = []
69 for result_json in result_json_list:
70 trace_data = {}
71 metrics_dict = {}
72 trace_data['metrics'] = metrics_dict
73 histograms = result_json['pairs']['histograms']
74 for histogram in histograms:
75 if histogram.get('type', '') == 'TelemetryInfo':
76 trace_data['telemetry_info'] = histogram
77 if 'name' in histogram:
78 metrics_dict[histogram['name']] = get_unique_histogram_value(histogram)
79 results.append(trace_data)
80 return results
81
82 def get_csv_dicts(trace_data_list):
83 """
84 Converts list of trace_data to list of csv_dict, a flat dictionary that will
85 be written out to the csv file.
86 """
87 csv_dicts = []
88 for trace_data in trace_data_list:
89 csv_dict = {}
90 csv_dict['site'] = trace_data['telemetry_info']['storyDisplayName']
91 csv_dict['cache_temperature'] = (trace_data['telemetry_info']
92 ['storyGroupingKeys']
93 ['cache_temperature'])
94 csv_dict.update(trace_data['metrics'])
95 csv_dicts.append(csv_dict)
96 return csv_dicts
97
98
99 def write_csv(trace_data_list, output_filename):
100 csv_dicts = get_csv_dicts(trace_data_list)
101
102 # Not all histograms contain all metrics so we need to gather all the
103 # possible fieldnames first.
104 fieldnames = set()
105 for d in csv_dicts:
106 fieldnames = fieldnames.union(d.keys())
107
108 with open(output_filename, 'w') as f:
109 writer = csv.DictWriter(f, list(fieldnames), extrasaction='ignore')
110 writer.writeheader()
111 writer.writerows(csv_dicts)
112
113 def main():
114 # TODO(dproy): It may eventually make sense to use a real argument parser.
115 if len(sys.argv) < 2:
116 print "Usage: {0} <ctp-results> [output-filename]".format(sys.argv[0])
117 print "<ctp-results> is the results file produced by chrome trace processor. "
118 print "[output-filename] is the produced csv file. Defaults to out.csv."
119
120 input_filename = sys.argv[1]
121 if len(sys.argv) > 2:
122 output_filename = sys.argv[2]
123 else:
124 output_filename = "out.csv"
125
126 result_json_list = load_json_results_from_file(input_filename)
127 trace_data_list = parse_results_json_list(result_json_list)
128 write_csv(trace_data_list, output_filename)
129
130 print "Wrote csv output to " + output_filename
131 print "Total results processed:", len(result_json_list)
132
133 if __name__ == '__main__':
134 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698