Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text | |
| 6 at 80 chars, enforcing standard attribute ordering, and standardizing | |
| 7 indentation. | |
| 8 | |
| 9 This is quite a bit more complicated than just calling tree.toprettyxml(); | |
| 10 we need additional customization, like special attribute ordering in tags | |
| 11 and wrapping text nodes, so we implement our own full custom XML pretty-printer. | |
| 12 """ | |
| 13 | |
| 14 from __future__ import with_statement | |
| 15 | |
| 16 import diffutil | |
| 17 import json | |
| 18 import logging | |
| 19 import shutil | |
| 20 import sys | |
| 21 import textwrap | |
| 22 import xml.dom.minidom | |
| 23 | |
| 24 | |
| 25 WRAP_COLUMN = 80 | |
| 26 | |
| 27 # Desired order for tag attributes; attributes listed here will appear first, | |
| 28 # and in the same order as in these lists. | |
| 29 # { tag_name: [attribute_name, ...] } | |
| 30 ATTRIBUTE_ORDER = { | |
| 31 'enum': ['name', 'type'], | |
| 32 'histogram': ['name', 'enum', 'units'], | |
|
Ilya Sherman
2013/03/29 08:22:53
Dropped "dummy" and TODO here.
| |
| 33 'int': ['value', 'label'], | |
| 34 'fieldtrial': ['name', 'separator', 'ordering'], | |
| 35 'group': ['name', 'label'], | |
| 36 'affected-histogram': ['name'], | |
| 37 } | |
| 38 | |
| 39 # Tag names for top-level nodes whose children we don't want to indent. | |
| 40 TAGS_THAT_DONT_INDENT = [ | |
| 41 'histogram-configuration', | |
| 42 'histograms', | |
| 43 'fieldtrials', | |
| 44 'enums' | |
| 45 ] | |
| 46 | |
| 47 # Extra vertical spacing rules for special tag names. | |
| 48 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} | |
| 49 TAGS_THAT_HAVE_EXTRA_NEWLINE = { | |
| 50 'histogram-configuration': (2, 1, 1), | |
| 51 'histograms': (2, 1, 1), | |
| 52 'fieldtrials': (2, 1, 1), | |
| 53 'enums': (2, 1, 1), | |
|
Ilya Sherman
2013/03/29 08:22:53
Changed from (2, 2, 1) to (2, 1, 1)
| |
| 54 'histogram': (1, 1, 1), | |
| 55 'enum': (1, 1, 1), | |
| 56 'fieldtrial': (1, 1, 1), | |
| 57 } | |
| 58 | |
| 59 # Tags that we allow to be squished into a single line for brevity. | |
| 60 TAGS_THAT_ALLOW_SINGLE_LINE = [ | |
| 61 'summary', | |
| 62 'int', | |
| 63 ] | |
| 64 | |
| 65 # Tags whose children we want to alphabetize. The key is the parent tag name, | |
| 66 # and the value is a pair of the tag name of the children we want to sort, | |
| 67 # and a key function that maps each child node to the desired sort key. | |
| 68 ALPHABETIZATION_RULES = { | |
| 69 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), | |
| 70 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), | |
| 71 'enum': ('int', lambda n: int(n.attributes['value'].value)), | |
| 72 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), | |
| 73 'fieldtrial': ('affected-histogram', | |
| 74 lambda n: n.attributes['name'].value.lower()), | |
| 75 } | |
| 76 | |
| 77 | |
| 78 def LastLineLength(s): | |
| 79 """Returns the length of the last line in s. | |
| 80 | |
| 81 Args: | |
| 82 s: A multi-line string, including newlines. | |
| 83 | |
| 84 Returns: | |
| 85 The length of the last line in s, in characters. | |
| 86 """ | |
| 87 if s.rfind('\n') == -1: return len(s) | |
| 88 return len(s) - s.rfind('\n') - len('\n') | |
| 89 | |
| 90 | |
| 91 def XmlEscape(s): | |
| 92 """XML-escapes the given string, replacing magic characters (&<>") with their | |
| 93 escaped equivalents.""" | |
| 94 s = s.replace("&", "&").replace("<", "<") | |
| 95 s = s.replace("\"", """).replace(">", ">") | |
| 96 return s | |
| 97 | |
| 98 | |
| 99 def PrettyPrintNode(node, indent=0): | |
| 100 """Pretty-prints the given XML node at the given indent level. | |
| 101 | |
| 102 Args: | |
| 103 node: The minidom node to pretty-print. | |
| 104 indent: The current indent level. | |
| 105 | |
| 106 Returns: | |
| 107 The pretty-printed string (including embedded newlines). | |
| 108 """ | |
| 109 # Handle the top-level document node. | |
| 110 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: | |
| 111 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) | |
| 112 | |
| 113 # Handle text nodes. | |
| 114 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: | |
| 115 # Wrap each paragraph in the text to fit in the 80 column limit. | |
| 116 wrapper = textwrap.TextWrapper() | |
| 117 wrapper.initial_indent = ' ' * indent | |
| 118 wrapper.subsequent_indent = ' ' * indent | |
| 119 wrapper.break_on_hyphens = False | |
| 120 wrapper.break_long_words = False | |
| 121 wrapper.width = WRAP_COLUMN | |
| 122 text = XmlEscape(node.data) | |
| 123 # Remove any common indent. | |
| 124 text = textwrap.dedent(text.strip('\n')) | |
| 125 lines = text.split('\n') | |
| 126 # Split the text into paragraphs at blank line boundaries. | |
| 127 paragraphs = [[]] | |
| 128 for l in lines: | |
| 129 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: | |
| 130 paragraphs.append([]) | |
| 131 else: | |
| 132 paragraphs[-1].append(l) | |
| 133 # Remove trailing empty paragraph if present. | |
| 134 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: | |
| 135 paragraphs = paragraphs[:-1] | |
| 136 # Wrap each paragraph and separate with two newlines. | |
| 137 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) | |
| 138 | |
| 139 # Handle element nodes. | |
| 140 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: | |
| 141 newlines_after_open, newlines_before_close, newlines_after_close = ( | |
| 142 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) | |
| 143 # Open the tag. | |
| 144 s = ' ' * indent + '<' + node.tagName | |
| 145 | |
| 146 # Calculate how much space to allow for the '>' or '/>'. | |
| 147 closing_chars = 1 | |
| 148 if not node.childNodes: | |
| 149 closing_chars = 2 | |
| 150 | |
| 151 # Pretty-print the attributes. | |
| 152 attributes = node.attributes.keys() | |
| 153 if len(attributes) > 0: | |
| 154 # Reorder the attributes. | |
| 155 if node.tagName in ATTRIBUTE_ORDER: | |
| 156 recognized_attributes = ( | |
| 157 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) | |
| 158 unrecognized_attributes = ( | |
| 159 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) | |
| 160 for a in unrecognized_attributes: | |
| 161 logging.error( | |
| 162 'Unrecognized attribute %s in tag %s' % (a, node.tagName)) | |
| 163 attributes = recognized_attributes + unrecognized_attributes | |
| 164 for a in attributes: | |
| 165 value = XmlEscape(node.attributes[a].value) | |
| 166 # Replace sequences of whitespace with single spaces. | |
| 167 words = value.split() | |
| 168 a_str = ' %s="%s"' % (a, ' '.join(words)) | |
| 169 # Start a new line if the attribute will make this line too long. | |
| 170 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: | |
| 171 s += '\n' + ' ' * (indent + 3) | |
| 172 # Output everything up to the first quote. | |
| 173 s += ' %s="' % (a) | |
| 174 value_indent_level = LastLineLength(s) | |
| 175 # Output one word at a time, splitting to the next line where necessary. | |
| 176 column = value_indent_level | |
| 177 for i, word in enumerate(words): | |
| 178 # This is slightly too conservative since not every word will be | |
| 179 # followed by the closing characters... | |
| 180 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): | |
| 181 s = s.rstrip() # remove any trailing whitespace | |
| 182 s += '\n' + ' ' * value_indent_level | |
| 183 column = value_indent_level | |
| 184 s += word + ' ' | |
| 185 column += len(word) + 1 | |
| 186 s = s.rstrip() # remove any trailing whitespace | |
| 187 s += '"' | |
| 188 s = s.rstrip() # remove any trailing whitespace | |
| 189 | |
| 190 # Pretty-print the child nodes. | |
| 191 if node.childNodes: | |
| 192 s += '>' | |
| 193 # Calculate the new indent level for child nodes. | |
| 194 new_indent = indent | |
| 195 if node.tagName not in TAGS_THAT_DONT_INDENT: | |
| 196 new_indent += 2 | |
| 197 child_nodes = node.childNodes | |
| 198 | |
| 199 # Recursively pretty-print the child nodes. | |
| 200 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] | |
| 201 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] | |
| 202 | |
| 203 # Determine whether we can fit the entire node on a single line. | |
| 204 close_tag = '</%s>' % node.tagName | |
| 205 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) | |
| 206 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and | |
| 207 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): | |
| 208 s += child_nodes[0].strip() | |
| 209 else: | |
| 210 s += '\n' * newlines_after_open + '\n'.join(child_nodes) | |
| 211 s += '\n' * newlines_before_close + ' ' * indent | |
| 212 s += close_tag | |
| 213 else: | |
| 214 s += '/>' | |
| 215 s += '\n' * newlines_after_close | |
| 216 return s | |
| 217 | |
| 218 # Handle comment nodes. | |
| 219 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: | |
| 220 return '<!--%s-->\n' % node.data | |
| 221 | |
| 222 # Ignore other node types. This could be a processing instruction (<? ... ?>) | |
| 223 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the | |
| 224 # histograms XML at present. | |
| 225 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) | |
| 226 return '' | |
| 227 | |
| 228 | |
| 229 def unsafeAppendChild(parent, child): | |
| 230 """Append child to parent's list of children, ignoring the possibility that it | |
| 231 is already in another node's childNodes list. Requires that the previous | |
| 232 parent of child is discarded (to avoid non-tree DOM graphs). | |
| 233 This can provide a significant speedup as O(n^2) operations are removed (in | |
| 234 particular, each child insertion avoids the need to traverse the old parent's | |
| 235 entire list of children).""" | |
| 236 child.parentNode = None | |
| 237 parent.appendChild(child) | |
| 238 child.parentNode = parent | |
| 239 | |
| 240 | |
| 241 def TransformByAlphabetizing(node): | |
| 242 """Transform the given XML by alphabetizing specific node types according to | |
| 243 the rules in ALPHABETIZATION_RULES. | |
| 244 | |
| 245 Args: | |
| 246 node: The minidom node to transform. | |
| 247 | |
| 248 Returns: | |
| 249 The minidom node, with children appropriately alphabetized. Note that the | |
| 250 transformation is done in-place, i.e. the original minidom tree is modified | |
| 251 directly. | |
| 252 """ | |
| 253 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE: | |
| 254 for c in node.childNodes: TransformByAlphabetizing(c) | |
| 255 return node | |
| 256 | |
| 257 # Element node with a tag name that we alphabetize the children of? | |
| 258 if node.tagName in ALPHABETIZATION_RULES: | |
| 259 subtag, key_function = ALPHABETIZATION_RULES[node.tagName] | |
| 260 # Remove the subnodes to be alphabetized. | |
| 261 clone = node.cloneNode(False) | |
| 262 subnodes = [] | |
| 263 for c in node.childNodes: | |
| 264 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and | |
| 265 c.tagName == subtag): | |
| 266 subnodes.append(c) | |
| 267 continue | |
| 268 unsafeAppendChild(clone, c) | |
| 269 # Sort the subnodes. | |
| 270 subnodes.sort(key=key_function) | |
| 271 # Readd the subnodes, transforming each recursively. | |
| 272 for c in subnodes: | |
| 273 unsafeAppendChild(clone, TransformByAlphabetizing(c)) | |
| 274 node = clone | |
| 275 return node | |
| 276 | |
| 277 # Recursively handle other element nodes and other node types. | |
| 278 for c in node.childNodes: TransformByAlphabetizing(c) | |
| 279 return node | |
| 280 | |
| 281 | |
| 282 def PrettyPrint(raw_xml): | |
| 283 """Pretty-print the given XML. | |
| 284 | |
| 285 Args: | |
| 286 xml: The contents of the histograms XML file, as a string. | |
| 287 | |
| 288 Returns: | |
| 289 The pretty-printed version. | |
| 290 """ | |
| 291 tree = xml.dom.minidom.parseString(raw_xml) | |
| 292 tree = TransformByAlphabetizing(tree) | |
| 293 return PrettyPrintNode(tree) | |
| 294 | |
| 295 | |
| 296 def main(): | |
| 297 logging.basicConfig(level=logging.INFO) | |
| 298 | |
| 299 presubmit = ('--presubmit' in sys.argv) | |
| 300 | |
| 301 logging.info('Loading histograms.xml...') | |
| 302 with open('histograms.xml', 'rb') as f: | |
| 303 xml = f.read() | |
| 304 | |
| 305 # Check there are no CR ('\r') characters in the file. | |
| 306 if '\r' in xml: | |
| 307 logging.info('DOS-style line endings (CR characters) detected - these are ' | |
| 308 'not allowed. Please run dos2unix histograms.xml') | |
| 309 sys.exit(1) | |
| 310 | |
| 311 logging.info('Pretty-printing...') | |
| 312 pretty = PrettyPrint(xml) | |
| 313 | |
| 314 if xml == pretty: | |
| 315 logging.info('histograms.xml is correctly pretty-printed.') | |
| 316 sys.exit(0) | |
| 317 if presubmit: | |
| 318 logging.info('histograms.xml is not formatted correctly; run ' | |
| 319 'pretty_print.py to fix.') | |
| 320 sys.exit(1) | |
| 321 if not diffutil.PromptUserToAcceptDiff( | |
| 322 xml, pretty, | |
| 323 'Is the prettified version acceptable?'): | |
| 324 logging.error('Aborting') | |
| 325 return | |
| 326 | |
| 327 logging.info('Creating backup file histograms.before.pretty-print.xml') | |
| 328 shutil.move('histograms.xml', 'histograms.before.pretty-print.xml') | |
| 329 | |
| 330 logging.info('Writing new histograms.xml file') | |
| 331 with open('histograms.xml', 'wb') as f: | |
| 332 f.write(pretty) | |
| 333 | |
| 334 | |
| 335 if __name__ == '__main__': | |
| 336 main() | |
| OLD | NEW |