Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Unified Diff: tools/metrics/histograms/pretty_print.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/metrics/histograms/pretty_print.py
diff --git a/tools/metrics/histograms/pretty_print.py b/tools/metrics/histograms/pretty_print.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2988f1f731d9d06c47582bb78f0c6c41a15f1cf
--- /dev/null
+++ b/tools/metrics/histograms/pretty_print.py
@@ -0,0 +1,336 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
+at 80 chars, enforcing standard attribute ordering, and standardizing
+indentation.
+
+This is quite a bit more complicated than just calling tree.toprettyxml();
+we need additional customization, like special attribute ordering in tags
+and wrapping text nodes, so we implement our own full custom XML pretty-printer.
+"""
+
+from __future__ import with_statement
+
+import diffutil
+import json
+import logging
+import shutil
+import sys
+import textwrap
+import xml.dom.minidom
+
+
+WRAP_COLUMN = 80
+
+# Desired order for tag attributes; attributes listed here will appear first,
+# and in the same order as in these lists.
+# { tag_name: [attribute_name, ...] }
+ATTRIBUTE_ORDER = {
+ 'enum': ['name', 'type'],
+ 'histogram': ['name', 'enum', 'units'],
Ilya Sherman 2013/03/29 08:22:53 Dropped "dummy" and TODO here.
+ 'int': ['value', 'label'],
+ 'fieldtrial': ['name', 'separator', 'ordering'],
+ 'group': ['name', 'label'],
+ 'affected-histogram': ['name'],
+}
+
+# Tag names for top-level nodes whose children we don't want to indent.
+TAGS_THAT_DONT_INDENT = [
+ 'histogram-configuration',
+ 'histograms',
+ 'fieldtrials',
+ 'enums'
+]
+
+# Extra vertical spacing rules for special tag names.
+# {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
+TAGS_THAT_HAVE_EXTRA_NEWLINE = {
+ 'histogram-configuration': (2, 1, 1),
+ 'histograms': (2, 1, 1),
+ 'fieldtrials': (2, 1, 1),
+ 'enums': (2, 1, 1),
Ilya Sherman 2013/03/29 08:22:53 Changed from (2, 2, 1) to (2, 1, 1)
+ 'histogram': (1, 1, 1),
+ 'enum': (1, 1, 1),
+ 'fieldtrial': (1, 1, 1),
+}
+
+# Tags that we allow to be squished into a single line for brevity.
+TAGS_THAT_ALLOW_SINGLE_LINE = [
+ 'summary',
+ 'int',
+]
+
+# Tags whose children we want to alphabetize. The key is the parent tag name,
+# and the value is a pair of the tag name of the children we want to sort,
+# and a key function that maps each child node to the desired sort key.
+ALPHABETIZATION_RULES = {
+ 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
+ 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
+ 'enum': ('int', lambda n: int(n.attributes['value'].value)),
+ 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
+ 'fieldtrial': ('affected-histogram',
+ lambda n: n.attributes['name'].value.lower()),
+}
+
+
+def LastLineLength(s):
+ """Returns the length of the last line in s.
+
+ Args:
+ s: A multi-line string, including newlines.
+
+ Returns:
+ The length of the last line in s, in characters.
+ """
+ if s.rfind('\n') == -1: return len(s)
+ return len(s) - s.rfind('\n') - len('\n')
+
+
+def XmlEscape(s):
+ """XML-escapes the given string, replacing magic characters (&<>") with their
+ escaped equivalents."""
+ s = s.replace("&", "&amp;").replace("<", "&lt;")
+ s = s.replace("\"", "&quot;").replace(">", "&gt;")
+ return s
+
+
+def PrettyPrintNode(node, indent=0):
+ """Pretty-prints the given XML node at the given indent level.
+
+ Args:
+ node: The minidom node to pretty-print.
+ indent: The current indent level.
+
+ Returns:
+ The pretty-printed string (including embedded newlines).
+ """
+ # Handle the top-level document node.
+ if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
+ return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
+
+ # Handle text nodes.
+ if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
+ # Wrap each paragraph in the text to fit in the 80 column limit.
+ wrapper = textwrap.TextWrapper()
+ wrapper.initial_indent = ' ' * indent
+ wrapper.subsequent_indent = ' ' * indent
+ wrapper.break_on_hyphens = False
+ wrapper.break_long_words = False
+ wrapper.width = WRAP_COLUMN
+ text = XmlEscape(node.data)
+ # Remove any common indent.
+ text = textwrap.dedent(text.strip('\n'))
+ lines = text.split('\n')
+ # Split the text into paragraphs at blank line boundaries.
+ paragraphs = [[]]
+ for l in lines:
+ if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
+ paragraphs.append([])
+ else:
+ paragraphs[-1].append(l)
+ # Remove trailing empty paragraph if present.
+ if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
+ paragraphs = paragraphs[:-1]
+ # Wrap each paragraph and separate with two newlines.
+ return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
+
+ # Handle element nodes.
+ if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
+ newlines_after_open, newlines_before_close, newlines_after_close = (
+ TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
+ # Open the tag.
+ s = ' ' * indent + '<' + node.tagName
+
+ # Calculate how much space to allow for the '>' or '/>'.
+ closing_chars = 1
+ if not node.childNodes:
+ closing_chars = 2
+
+ # Pretty-print the attributes.
+ attributes = node.attributes.keys()
+ if len(attributes) > 0:
+ # Reorder the attributes.
+ if node.tagName in ATTRIBUTE_ORDER:
+ recognized_attributes = (
+ [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
+ unrecognized_attributes = (
+ [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
+ for a in unrecognized_attributes:
+ logging.error(
+ 'Unrecognized attribute %s in tag %s' % (a, node.tagName))
+ attributes = recognized_attributes + unrecognized_attributes
+ for a in attributes:
+ value = XmlEscape(node.attributes[a].value)
+ # Replace sequences of whitespace with single spaces.
+ words = value.split()
+ a_str = ' %s="%s"' % (a, ' '.join(words))
+ # Start a new line if the attribute will make this line too long.
+ if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
+ s += '\n' + ' ' * (indent + 3)
+ # Output everything up to the first quote.
+ s += ' %s="' % (a)
+ value_indent_level = LastLineLength(s)
+ # Output one word at a time, splitting to the next line where necessary.
+ column = value_indent_level
+ for i, word in enumerate(words):
+ # This is slightly too conservative since not every word will be
+ # followed by the closing characters...
+ if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
+ s = s.rstrip() # remove any trailing whitespace
+ s += '\n' + ' ' * value_indent_level
+ column = value_indent_level
+ s += word + ' '
+ column += len(word) + 1
+ s = s.rstrip() # remove any trailing whitespace
+ s += '"'
+ s = s.rstrip() # remove any trailing whitespace
+
+ # Pretty-print the child nodes.
+ if node.childNodes:
+ s += '>'
+ # Calculate the new indent level for child nodes.
+ new_indent = indent
+ if node.tagName not in TAGS_THAT_DONT_INDENT:
+ new_indent += 2
+ child_nodes = node.childNodes
+
+ # Recursively pretty-print the child nodes.
+ child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
+ child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
+
+ # Determine whether we can fit the entire node on a single line.
+ close_tag = '</%s>' % node.tagName
+ space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
+ if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
+ len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
+ s += child_nodes[0].strip()
+ else:
+ s += '\n' * newlines_after_open + '\n'.join(child_nodes)
+ s += '\n' * newlines_before_close + ' ' * indent
+ s += close_tag
+ else:
+ s += '/>'
+ s += '\n' * newlines_after_close
+ return s
+
+ # Handle comment nodes.
+ if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
+ return '<!--%s-->\n' % node.data
+
+ # Ignore other node types. This could be a processing instruction (<? ... ?>)
+ # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
+ # histograms XML at present.
+ logging.error('Ignoring unrecognized node data: %s' % node.toxml())
+ return ''
+
+
+def unsafeAppendChild(parent, child):
+ """Append child to parent's list of children, ignoring the possibility that it
+ is already in another node's childNodes list. Requires that the previous
+ parent of child is discarded (to avoid non-tree DOM graphs).
+ This can provide a significant speedup as O(n^2) operations are removed (in
+ particular, each child insertion avoids the need to traverse the old parent's
+ entire list of children)."""
+ child.parentNode = None
+ parent.appendChild(child)
+ child.parentNode = parent
+
+
+def TransformByAlphabetizing(node):
+ """Transform the given XML by alphabetizing specific node types according to
+ the rules in ALPHABETIZATION_RULES.
+
+ Args:
+ node: The minidom node to transform.
+
+ Returns:
+ The minidom node, with children appropriately alphabetized. Note that the
+ transformation is done in-place, i.e. the original minidom tree is modified
+ directly.
+ """
+ if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
+ for c in node.childNodes: TransformByAlphabetizing(c)
+ return node
+
+ # Element node with a tag name that we alphabetize the children of?
+ if node.tagName in ALPHABETIZATION_RULES:
+ subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
+ # Remove the subnodes to be alphabetized.
+ clone = node.cloneNode(False)
+ subnodes = []
+ for c in node.childNodes:
+ if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
+ c.tagName == subtag):
+ subnodes.append(c)
+ continue
+ unsafeAppendChild(clone, c)
+ # Sort the subnodes.
+ subnodes.sort(key=key_function)
+ # Readd the subnodes, transforming each recursively.
+ for c in subnodes:
+ unsafeAppendChild(clone, TransformByAlphabetizing(c))
+ node = clone
+ return node
+
+ # Recursively handle other element nodes and other node types.
+ for c in node.childNodes: TransformByAlphabetizing(c)
+ return node
+
+
+def PrettyPrint(raw_xml):
+ """Pretty-print the given XML.
+
+ Args:
+ xml: The contents of the histograms XML file, as a string.
+
+ Returns:
+ The pretty-printed version.
+ """
+ tree = xml.dom.minidom.parseString(raw_xml)
+ tree = TransformByAlphabetizing(tree)
+ return PrettyPrintNode(tree)
+
+
+def main():
+ logging.basicConfig(level=logging.INFO)
+
+ presubmit = ('--presubmit' in sys.argv)
+
+ logging.info('Loading histograms.xml...')
+ with open('histograms.xml', 'rb') as f:
+ xml = f.read()
+
+ # Check there are no CR ('\r') characters in the file.
+ if '\r' in xml:
+ logging.info('DOS-style line endings (CR characters) detected - these are '
+ 'not allowed. Please run dos2unix histograms.xml')
+ sys.exit(1)
+
+ logging.info('Pretty-printing...')
+ pretty = PrettyPrint(xml)
+
+ if xml == pretty:
+ logging.info('histograms.xml is correctly pretty-printed.')
+ sys.exit(0)
+ if presubmit:
+ logging.info('histograms.xml is not formatted correctly; run '
+ 'pretty_print.py to fix.')
+ sys.exit(1)
+ if not diffutil.PromptUserToAcceptDiff(
+ xml, pretty,
+ 'Is the prettified version acceptable?'):
+ logging.error('Aborting')
+ return
+
+ logging.info('Creating backup file histograms.before.pretty-print.xml')
+ shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
+
+ logging.info('Writing new histograms.xml file')
+ with open('histograms.xml', 'wb') as f:
+ f.write(pretty)
+
+
+if __name__ == '__main__':
+ main()

Powered by Google App Engine
This is Rietveld 408576698