Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Unified Diff: tools/metrics/histograms/find_unmapped_histograms.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/metrics/histograms/find_unmapped_histograms.py
diff --git a/tools/metrics/histograms/find_unmapped_histograms.py b/tools/metrics/histograms/find_unmapped_histograms.py
new file mode 100644
index 0000000000000000000000000000000000000000..a72eac447ee5483de221eac4ad2a7355da7ef345
--- /dev/null
+++ b/tools/metrics/histograms/find_unmapped_histograms.py
@@ -0,0 +1,227 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Scans the Chromium source for histograms that are absent from histograms.xml.
+
+This is a heuristic scan, so a clean run of this script does not guarantee that
+all histograms in the Chromium source are properly mapped. Notably, field
+trials are entirely ignored by this script.
+
+"""
+
+import commands
+import extract_histograms
+import logging
+import optparse
+import os
+import re
+import sys
+
+
+ADJACENT_C_STRING_REGEX = re.compile(r"""
+ (" # Opening quotation mark
+ [^"]*) # Literal string contents
+ " # Closing quotation mark
+ \s* # Any number of spaces
+ " # Another opening quotation mark
+ """, re.VERBOSE)
+CONSTANT_REGEX = re.compile(r"""
+ (\w*::)? # Optional namespace
+ k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter
+ \w* # Match the rest of the constant identifier
+ $ # Make sure there's only the identifier, nothing else
+ """, re.VERBOSE)
+HISTOGRAM_REGEX = re.compile(r"""
+ UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros
+ \w* # Match the rest of the macro name, e.g. '_ENUMERATION'
+ \( # Match the opening parenthesis for the macro
+ \s* # Match any whitespace -- especially, any newlines
+ ([^,]*) # Capture the first parameter to the macro
+ , # Match the comma that delineates the first parameter
+ """, re.VERBOSE)
+
+
+class DirectoryNotFoundException(Exception):
+ """Base class to distinguish locally defined exceptions from standard ones."""
+ def __init__(self, msg):
+ self.msg = msg
+
+ def __str__(self):
+ return self.msg
+
+
+def changeWorkingDirectory(target_directory):
+ """Changes the working directory to the given |target_directory|, which
+ defaults to the root of the Chromium checkout.
+
+ Returns:
+ None
+
+ Raises:
+ DirectoryNotFoundException if the target directory cannot be found.
+ """
+ working_directory = os.getcwd()
+ pos = working_directory.find(target_directory)
+ if pos < 0:
+ raise DirectoryNotFoundException('Could not find root directory "' +
+ target_directory + '". ' +
+ 'Please run this script within your ' +
+ 'Chromium checkout.')
+
+ os.chdir(working_directory[:pos + len(target_directory)])
+
+
+def collapseAdjacentCStrings(string):
+ """Collapses any adjacent C strings into a single string.
+
+ Useful to re-combine strings that were split across multiple lines to satisfy
+ the 80-col restriction.
+
+ Args:
+ string: The string to recombine, e.g. '"Foo"\n "bar"'
+
+ Returns:
+ The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"'
+ """
+ while True:
+ collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
+ if collapsed == string:
+ return collapsed
+
+ string = collapsed
+
+
+def logNonLiteralHistogram(filename, histogram):
+ """Logs a statement warning about a non-literal histogram name found in the
+ Chromium source.
+
+ Filters out known acceptable exceptions.
+
+ Args:
+ filename: The filename for the file containing the histogram, e.g.
+ 'chrome/browser/memory_details.cc'
+ histogram: The expression that evaluates to the name of the histogram, e.g.
+ '"FakeHistogram" + variant'
+
+ Returns:
+ None
+ """
+ # Ignore histogram macros, which typically contain backslashes so that they
+ # can be formatted across lines.
+ if '\\' in histogram:
+ return
+
+ # Field trials are unique within a session, so are effectively constants.
+ if histogram.startswith('base::FieldTrial::MakeName'):
+ return
+
+ # Ignore histogram names that have been pulled out into C++ constants.
+ if CONSTANT_REGEX.match(histogram):
+ return
+
+ # TODO(isherman): This is still a little noisy... needs further filtering to
+ # reduce the noise.
+ logging.warning('%s contains non-literal histogram name <%s>', filename,
+ histogram)
+
+
+def readChromiumHistograms():
+ """Searches the Chromium source for all histogram names.
+
+ Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
+ names that might vary during a single run of the app.
+
+ Returns:
+ A set cotaining any found literal histogram names.
+ """
+ logging.info('Scanning Chromium source for histograms...')
+
+ # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
+ # Examples:
+ # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
+ # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION('
+ locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n')
+ filenames = set([location.split(':')[0] for location in locations])
+
+ histograms = set()
+ for filename in filenames:
+ contents = ''
+ with open(filename, 'r') as f:
+ contents = f.read()
+
+ matches = set(HISTOGRAM_REGEX.findall(contents))
+ for histogram in matches:
+ histogram = collapseAdjacentCStrings(histogram)
+
+ # Must begin and end with a quotation mark.
+ if histogram[0] != '"' or histogram[-1] != '"':
+ logNonLiteralHistogram(filename, histogram)
+ continue
+
+ # Must not include any quotation marks other than at the beginning or end.
+ histogram_stripped = histogram.strip('"')
+ if '"' in histogram_stripped:
+ logNonLiteralHistogram(filename, histogram)
+ continue
+
+ histograms.add(histogram_stripped)
+
+ return histograms
+
+
+def readXmlHistograms(histograms_file_location):
+ """Parses all histogram names from histograms.xml.
+
+ Returns:
+ A set cotaining the parsed histogram names.
+ """
+ logging.info('Reading histograms from %s...' % histograms_file_location)
+ histograms = extract_histograms.ExtractHistograms(histograms_file_location)
+ return set(extract_histograms.ExtractNames(histograms))
+
+
+def main():
+ # Parse command line options
+ parser = optparse.OptionParser()
+ parser.add_option(
+ '--root-directory', dest='root_directory', default='src',
+ help='scan within DIRECTORY for histograms [optional, defaults to "src/"]',
+ metavar='DIRECTORY')
+ parser.add_option(
+ '--histograms-file', dest='histograms_file_location',
+ default='tools/metrics/histograms/histograms.xml',
+ help='read histogram definitions from FILE (relative to --root-directory) '
+ '[optional, defaults to "tools/histograms/histograms.xml"]',
+ metavar='FILE')
+
+ (options, args) = parser.parse_args()
+ if args:
+ parser.print_help()
+ sys.exit(1)
+
+ logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
+
+ try:
+ changeWorkingDirectory(options.root_directory)
+ except DirectoryNotFoundException as e:
+ logging.error(e)
+ sys.exit(1)
+ chromium_histograms = readChromiumHistograms()
+ xml_histograms = readXmlHistograms(options.histograms_file_location)
+
+ unmapped_histograms = sorted(chromium_histograms - xml_histograms)
+ if len(unmapped_histograms):
+ logging.info('')
+ logging.info('')
+ logging.info('Histograms in Chromium but not in %s:' %
+ options.histograms_file_location)
+ logging.info('-------------------------------------------------')
+ for histogram in unmapped_histograms:
+ logging.info(' %s', histogram)
+ else:
+ logging.info('Success! No unmapped histograms found.')
+
+
+if __name__ == '__main__':
+ main()

Powered by Google App Engine
This is Rietveld 408576698