Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(411)

Side by Side Diff: tools/metrics/histograms/find_unmapped_histograms.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Scans the Chromium source for histograms that are absent from histograms.xml.
6
7 This is a heuristic scan, so a clean run of this script does not guarantee that
8 all histograms in the Chromium source are properly mapped. Notably, field
9 trials are entirely ignored by this script.
10
11 """
12
13 import commands
14 import extract_histograms
15 import logging
16 import optparse
17 import os
18 import re
19 import sys
20
21
22 ADJACENT_C_STRING_REGEX = re.compile(r"""
23 (" # Opening quotation mark
24 [^"]*) # Literal string contents
25 " # Closing quotation mark
26 \s* # Any number of spaces
27 " # Another opening quotation mark
28 """, re.VERBOSE)
29 CONSTANT_REGEX = re.compile(r"""
30 (\w*::)? # Optional namespace
31 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter
32 \w* # Match the rest of the constant identifier
33 $ # Make sure there's only the identifier, nothing else
34 """, re.VERBOSE)
35 HISTOGRAM_REGEX = re.compile(r"""
36 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros
37 \w* # Match the rest of the macro name, e.g. '_ENUMERATION'
38 \( # Match the opening parenthesis for the macro
39 \s* # Match any whitespace -- especially, any newlines
40 ([^,]*) # Capture the first parameter to the macro
41 , # Match the comma that delineates the first parameter
42 """, re.VERBOSE)
43
44
45 class DirectoryNotFoundException(Exception):
46 """Base class to distinguish locally defined exceptions from standard ones."""
47 def __init__(self, msg):
48 self.msg = msg
49
50 def __str__(self):
51 return self.msg
52
53
54 def changeWorkingDirectory(target_directory):
55 """Changes the working directory to the given |target_directory|, which
56 defaults to the root of the Chromium checkout.
57
58 Returns:
59 None
60
61 Raises:
62 DirectoryNotFoundException if the target directory cannot be found.
63 """
64 working_directory = os.getcwd()
65 pos = working_directory.find(target_directory)
66 if pos < 0:
67 raise DirectoryNotFoundException('Could not find root directory "' +
68 target_directory + '". ' +
69 'Please run this script within your ' +
70 'Chromium checkout.')
71
72 os.chdir(working_directory[:pos + len(target_directory)])
73
74
75 def collapseAdjacentCStrings(string):
76 """Collapses any adjacent C strings into a single string.
77
78 Useful to re-combine strings that were split across multiple lines to satisfy
79 the 80-col restriction.
80
81 Args:
82 string: The string to recombine, e.g. '"Foo"\n "bar"'
83
84 Returns:
85 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"'
86 """
87 while True:
88 collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
89 if collapsed == string:
90 return collapsed
91
92 string = collapsed
93
94
95 def logNonLiteralHistogram(filename, histogram):
96 """Logs a statement warning about a non-literal histogram name found in the
97 Chromium source.
98
99 Filters out known acceptable exceptions.
100
101 Args:
102 filename: The filename for the file containing the histogram, e.g.
103 'chrome/browser/memory_details.cc'
104 histogram: The expression that evaluates to the name of the histogram, e.g.
105 '"FakeHistogram" + variant'
106
107 Returns:
108 None
109 """
110 # Ignore histogram macros, which typically contain backslashes so that they
111 # can be formatted across lines.
112 if '\\' in histogram:
113 return
114
115 # Field trials are unique within a session, so are effectively constants.
116 if histogram.startswith('base::FieldTrial::MakeName'):
117 return
118
119 # Ignore histogram names that have been pulled out into C++ constants.
120 if CONSTANT_REGEX.match(histogram):
121 return
122
123 # TODO(isherman): This is still a little noisy... needs further filtering to
124 # reduce the noise.
125 logging.warning('%s contains non-literal histogram name <%s>', filename,
126 histogram)
127
128
129 def readChromiumHistograms():
130 """Searches the Chromium source for all histogram names.
131
132 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
133 names that might vary during a single run of the app.
134
135 Returns:
136 A set cotaining any found literal histogram names.
137 """
138 logging.info('Scanning Chromium source for histograms...')
139
140 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
141 # Examples:
142 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
143 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION('
144 locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n')
145 filenames = set([location.split(':')[0] for location in locations])
146
147 histograms = set()
148 for filename in filenames:
149 contents = ''
150 with open(filename, 'r') as f:
151 contents = f.read()
152
153 matches = set(HISTOGRAM_REGEX.findall(contents))
154 for histogram in matches:
155 histogram = collapseAdjacentCStrings(histogram)
156
157 # Must begin and end with a quotation mark.
158 if histogram[0] != '"' or histogram[-1] != '"':
159 logNonLiteralHistogram(filename, histogram)
160 continue
161
162 # Must not include any quotation marks other than at the beginning or end.
163 histogram_stripped = histogram.strip('"')
164 if '"' in histogram_stripped:
165 logNonLiteralHistogram(filename, histogram)
166 continue
167
168 histograms.add(histogram_stripped)
169
170 return histograms
171
172
173 def readXmlHistograms(histograms_file_location):
174 """Parses all histogram names from histograms.xml.
175
176 Returns:
177 A set cotaining the parsed histogram names.
178 """
179 logging.info('Reading histograms from %s...' % histograms_file_location)
180 histograms = extract_histograms.ExtractHistograms(histograms_file_location)
181 return set(extract_histograms.ExtractNames(histograms))
182
183
184 def main():
185 # Parse command line options
186 parser = optparse.OptionParser()
187 parser.add_option(
188 '--root-directory', dest='root_directory', default='src',
189 help='scan within DIRECTORY for histograms [optional, defaults to "src/"]',
190 metavar='DIRECTORY')
191 parser.add_option(
192 '--histograms-file', dest='histograms_file_location',
193 default='tools/metrics/histograms/histograms.xml',
194 help='read histogram definitions from FILE (relative to --root-directory) '
195 '[optional, defaults to "tools/histograms/histograms.xml"]',
196 metavar='FILE')
197
198 (options, args) = parser.parse_args()
199 if args:
200 parser.print_help()
201 sys.exit(1)
202
203 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
204
205 try:
206 changeWorkingDirectory(options.root_directory)
207 except DirectoryNotFoundException as e:
208 logging.error(e)
209 sys.exit(1)
210 chromium_histograms = readChromiumHistograms()
211 xml_histograms = readXmlHistograms(options.histograms_file_location)
212
213 unmapped_histograms = sorted(chromium_histograms - xml_histograms)
214 if len(unmapped_histograms):
215 logging.info('')
216 logging.info('')
217 logging.info('Histograms in Chromium but not in %s:' %
218 options.histograms_file_location)
219 logging.info('-------------------------------------------------')
220 for histogram in unmapped_histograms:
221 logging.info(' %s', histogram)
222 else:
223 logging.info('Success! No unmapped histograms found.')
224
225
226 if __name__ == '__main__':
227 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698