tools/metrics/histograms/pretty_print.py - Issue 13245008: Open-source histograms.xml, starting with Autofill histograms.

Side by Side Diff: tools/metrics/histograms/pretty_print.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« tools/metrics/histograms/extract_histograms.py ('K') | « tools/metrics/histograms/histograms.xml ('k') | tools/metrics/histograms/update_extension_functions.py » ('j') | tools/metrics/histograms/validate_format.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2013 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text

	6 at 80 chars, enforcing standard attribute ordering, and standardizing

	7 indentation.

	8

	9 This is quite a bit more complicated than just calling tree.toprettyxml();

	10 we need additional customization, like special attribute ordering in tags

	11 and wrapping text nodes, so we implement our own full custom XML pretty-printer.

	12 """

	13

	14 from __future__ import with_statement

	15

	16 import diffutil

	17 import json

	18 import logging

	19 import shutil

	20 import sys

	21 import textwrap

	22 import xml.dom.minidom

	23

	24

	25 WRAP_COLUMN = 80

	26

	27 # Desired order for tag attributes; attributes listed here will appear first,

	28 # and in the same order as in these lists.

	29 # { tag_name: [attribute_name, ...] }

	30 ATTRIBUTE_ORDER = {

	31 'enum': ['name', 'type'],

	32 'histogram': ['name', 'enum', 'units'],
	Ilya Sherman 2013/03/29 08:22:53 Dropped "dummy" and TODO here. Dropped "dummy" and TODO here.
	33 'int': ['value', 'label'],

	34 'fieldtrial': ['name', 'separator', 'ordering'],

	35 'group': ['name', 'label'],

	36 'affected-histogram': ['name'],

	37 }

	38

	39 # Tag names for top-level nodes whose children we don't want to indent.

	40 TAGS_THAT_DONT_INDENT = [

	41 'histogram-configuration',

	42 'histograms',

	43 'fieldtrials',

	44 'enums'

	45 ]

	46

	47 # Extra vertical spacing rules for special tag names.

	48 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}

	49 TAGS_THAT_HAVE_EXTRA_NEWLINE = {

	50 'histogram-configuration': (2, 1, 1),

	51 'histograms': (2, 1, 1),

	52 'fieldtrials': (2, 1, 1),

	53 'enums': (2, 1, 1),
	Ilya Sherman 2013/03/29 08:22:53 Changed from (2, 2, 1) to (2, 1, 1) Changed from (2, 2, 1) to (2, 1, 1)
	54 'histogram': (1, 1, 1),

	55 'enum': (1, 1, 1),

	56 'fieldtrial': (1, 1, 1),

	57 }

	58

	59 # Tags that we allow to be squished into a single line for brevity.

	60 TAGS_THAT_ALLOW_SINGLE_LINE = [

	61 'summary',

	62 'int',

	63 ]

	64

	65 # Tags whose children we want to alphabetize. The key is the parent tag name,

	66 # and the value is a pair of the tag name of the children we want to sort,

	67 # and a key function that maps each child node to the desired sort key.

	68 ALPHABETIZATION_RULES = {

	69 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),

	70 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),

	71 'enum': ('int', lambda n: int(n.attributes['value'].value)),

	72 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),

	73 'fieldtrial': ('affected-histogram',

	74 lambda n: n.attributes['name'].value.lower()),

	75 }

	76

	77

	78 def LastLineLength(s):

	79 """Returns the length of the last line in s.

	80

	81 Args:

	82 s: A multi-line string, including newlines.

	83

	84 Returns:

	85 The length of the last line in s, in characters.

	86 """

	87 if s.rfind('\n') == -1: return len(s)

	88 return len(s) - s.rfind('\n') - len('\n')

	89

	90

	91 def XmlEscape(s):

	92 """XML-escapes the given string, replacing magic characters (&<>") with their

	93 escaped equivalents."""

	94 s = s.replace("&", "&").replace("<", "<")

	95 s = s.replace("\"", """).replace(">", ">")

	96 return s

	97

	98

	99 def PrettyPrintNode(node, indent=0):

	100 """Pretty-prints the given XML node at the given indent level.

	101

	102 Args:

	103 node: The minidom node to pretty-print.

	104 indent: The current indent level.

	105

	106 Returns:

	107 The pretty-printed string (including embedded newlines).

	108 """

	109 # Handle the top-level document node.

	110 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:

	111 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])

	112

	113 # Handle text nodes.

	114 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:

	115 # Wrap each paragraph in the text to fit in the 80 column limit.

	116 wrapper = textwrap.TextWrapper()

	117 wrapper.initial_indent = ' ' * indent

	118 wrapper.subsequent_indent = ' ' * indent

	119 wrapper.break_on_hyphens = False

	120 wrapper.break_long_words = False

	121 wrapper.width = WRAP_COLUMN

	122 text = XmlEscape(node.data)

	123 # Remove any common indent.

	124 text = textwrap.dedent(text.strip('\n'))

	125 lines = text.split('\n')

	126 # Split the text into paragraphs at blank line boundaries.

	127 paragraphs = [[]]

	128 for l in lines:

	129 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:

	130 paragraphs.append([])

	131 else:

	132 paragraphs[-1].append(l)

	133 # Remove trailing empty paragraph if present.

	134 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:

	135 paragraphs = paragraphs[:-1]

	136 # Wrap each paragraph and separate with two newlines.

	137 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])

	138

	139 # Handle element nodes.

	140 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:

	141 newlines_after_open, newlines_before_close, newlines_after_close = (

	142 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))

	143 # Open the tag.

	144 s = ' ' * indent + '<' + node.tagName

	145

	146 # Calculate how much space to allow for the '>' or '/>'.

	147 closing_chars = 1

	148 if not node.childNodes:

	149 closing_chars = 2

	150

	151 # Pretty-print the attributes.

	152 attributes = node.attributes.keys()

	153 if len(attributes) > 0:

	154 # Reorder the attributes.

	155 if node.tagName in ATTRIBUTE_ORDER:

	156 recognized_attributes = (

	157 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])

	158 unrecognized_attributes = (

	159 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])

	160 for a in unrecognized_attributes:

	161 logging.error(

	162 'Unrecognized attribute %s in tag %s' % (a, node.tagName))

	163 attributes = recognized_attributes + unrecognized_attributes

	164 for a in attributes:

	165 value = XmlEscape(node.attributes[a].value)

	166 # Replace sequences of whitespace with single spaces.

	167 words = value.split()

	168 a_str = ' %s="%s"' % (a, ' '.join(words))

	169 # Start a new line if the attribute will make this line too long.

	170 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:

	171 s += '\n' + ' ' * (indent + 3)

	172 # Output everything up to the first quote.

	173 s += ' %s="' % (a)

	174 value_indent_level = LastLineLength(s)

	175 # Output one word at a time, splitting to the next line where necessary.

	176 column = value_indent_level

	177 for i, word in enumerate(words):

	178 # This is slightly too conservative since not every word will be

	179 # followed by the closing characters...

	180 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):

	181 s = s.rstrip() # remove any trailing whitespace

	182 s += '\n' + ' ' * value_indent_level

	183 column = value_indent_level

	184 s += word + ' '

	185 column += len(word) + 1

	186 s = s.rstrip() # remove any trailing whitespace

	187 s += '"'

	188 s = s.rstrip() # remove any trailing whitespace

	189

	190 # Pretty-print the child nodes.

	191 if node.childNodes:

	192 s += '>'

	193 # Calculate the new indent level for child nodes.

	194 new_indent = indent

	195 if node.tagName not in TAGS_THAT_DONT_INDENT:

	196 new_indent += 2

	197 child_nodes = node.childNodes

	198

	199 # Recursively pretty-print the child nodes.

	200 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]

	201 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]

	202

	203 # Determine whether we can fit the entire node on a single line.

	204 close_tag = '</%s>' % node.tagName

	205 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)

	206 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and

	207 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):

	208 s += child_nodes[0].strip()

	209 else:

	210 s += '\n' * newlines_after_open + '\n'.join(child_nodes)

	211 s += '\n' * newlines_before_close + ' ' * indent

	212 s += close_tag

	213 else:

	214 s += '/>'

	215 s += '\n' * newlines_after_close

	216 return s

	217

	218 # Handle comment nodes.

	219 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:

	220 return '<!--%s-->\n' % node.data

	221

	222 # Ignore other node types. This could be a processing instruction (<? ... ?>)

	223 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the

	224 # histograms XML at present.

	225 logging.error('Ignoring unrecognized node data: %s' % node.toxml())

	226 return ''

	227

	228

	229 def unsafeAppendChild(parent, child):

	230 """Append child to parent's list of children, ignoring the possibility that it

	231 is already in another node's childNodes list. Requires that the previous

	232 parent of child is discarded (to avoid non-tree DOM graphs).

	233 This can provide a significant speedup as O(n^2) operations are removed (in

	234 particular, each child insertion avoids the need to traverse the old parent's

	235 entire list of children)."""

	236 child.parentNode = None

	237 parent.appendChild(child)

	238 child.parentNode = parent

	239

	240

	241 def TransformByAlphabetizing(node):

	242 """Transform the given XML by alphabetizing specific node types according to

	243 the rules in ALPHABETIZATION_RULES.

	244

	245 Args:

	246 node: The minidom node to transform.

	247

	248 Returns:

	249 The minidom node, with children appropriately alphabetized. Note that the

	250 transformation is done in-place, i.e. the original minidom tree is modified

	251 directly.

	252 """

	253 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:

	254 for c in node.childNodes: TransformByAlphabetizing(c)

	255 return node

	256

	257 # Element node with a tag name that we alphabetize the children of?

	258 if node.tagName in ALPHABETIZATION_RULES:

	259 subtag, key_function = ALPHABETIZATION_RULES[node.tagName]

	260 # Remove the subnodes to be alphabetized.

	261 clone = node.cloneNode(False)

	262 subnodes = []

	263 for c in node.childNodes:

	264 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and

	265 c.tagName == subtag):

	266 subnodes.append(c)

	267 continue

	268 unsafeAppendChild(clone, c)

	269 # Sort the subnodes.

	270 subnodes.sort(key=key_function)

	271 # Readd the subnodes, transforming each recursively.

	272 for c in subnodes:

	273 unsafeAppendChild(clone, TransformByAlphabetizing(c))

	274 node = clone

	275 return node

	276

	277 # Recursively handle other element nodes and other node types.

	278 for c in node.childNodes: TransformByAlphabetizing(c)

	279 return node

	280

	281

	282 def PrettyPrint(raw_xml):

	283 """Pretty-print the given XML.

	284

	285 Args:

	286 xml: The contents of the histograms XML file, as a string.

	287

	288 Returns:

	289 The pretty-printed version.

	290 """

	291 tree = xml.dom.minidom.parseString(raw_xml)

	292 tree = TransformByAlphabetizing(tree)

	293 return PrettyPrintNode(tree)

	294

	295

	296 def main():

	297 logging.basicConfig(level=logging.INFO)

	298

	299 presubmit = ('--presubmit' in sys.argv)

	300

	301 logging.info('Loading histograms.xml...')

	302 with open('histograms.xml', 'rb') as f:

	303 xml = f.read()

	304

	305 # Check there are no CR ('\r') characters in the file.

	306 if '\r' in xml:

	307 logging.info('DOS-style line endings (CR characters) detected - these are '

	308 'not allowed. Please run dos2unix histograms.xml')

	309 sys.exit(1)

	310

	311 logging.info('Pretty-printing...')

	312 pretty = PrettyPrint(xml)

	313

	314 if xml == pretty:

	315 logging.info('histograms.xml is correctly pretty-printed.')

	316 sys.exit(0)

	317 if presubmit:

	318 logging.info('histograms.xml is not formatted correctly; run '

	319 'pretty_print.py to fix.')

	320 sys.exit(1)

	321 if not diffutil.PromptUserToAcceptDiff(

	322 xml, pretty,

	323 'Is the prettified version acceptable?'):

	324 logging.error('Aborting')

	325 return

	326

	327 logging.info('Creating backup file histograms.before.pretty-print.xml')

	328 shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')

	329

	330 logging.info('Writing new histograms.xml file')

	331 with open('histograms.xml', 'wb') as f:

	332 f.write(pretty)

	333

	334

	335 if __name__ == '__main__':

	336 main()

OLD	NEW