Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(130)

Side by Side Diff: tools/metrics/histograms/pretty_print.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
6 at 80 chars, enforcing standard attribute ordering, and standardizing
7 indentation.
8
9 This is quite a bit more complicated than just calling tree.toprettyxml();
10 we need additional customization, like special attribute ordering in tags
11 and wrapping text nodes, so we implement our own full custom XML pretty-printer.
12 """
13
14 from __future__ import with_statement
15
16 import diffutil
17 import json
18 import logging
19 import shutil
20 import sys
21 import textwrap
22 import xml.dom.minidom
23
24
25 WRAP_COLUMN = 80
26
27 # Desired order for tag attributes; attributes listed here will appear first,
28 # and in the same order as in these lists.
29 # { tag_name: [attribute_name, ...] }
30 ATTRIBUTE_ORDER = {
31 'enum': ['name', 'type'],
32 'histogram': ['name', 'enum', 'units'],
Ilya Sherman 2013/03/29 08:22:53 Dropped "dummy" and TODO here.
33 'int': ['value', 'label'],
34 'fieldtrial': ['name', 'separator', 'ordering'],
35 'group': ['name', 'label'],
36 'affected-histogram': ['name'],
37 }
38
39 # Tag names for top-level nodes whose children we don't want to indent.
40 TAGS_THAT_DONT_INDENT = [
41 'histogram-configuration',
42 'histograms',
43 'fieldtrials',
44 'enums'
45 ]
46
47 # Extra vertical spacing rules for special tag names.
48 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
49 TAGS_THAT_HAVE_EXTRA_NEWLINE = {
50 'histogram-configuration': (2, 1, 1),
51 'histograms': (2, 1, 1),
52 'fieldtrials': (2, 1, 1),
53 'enums': (2, 1, 1),
Ilya Sherman 2013/03/29 08:22:53 Changed from (2, 2, 1) to (2, 1, 1)
54 'histogram': (1, 1, 1),
55 'enum': (1, 1, 1),
56 'fieldtrial': (1, 1, 1),
57 }
58
59 # Tags that we allow to be squished into a single line for brevity.
60 TAGS_THAT_ALLOW_SINGLE_LINE = [
61 'summary',
62 'int',
63 ]
64
65 # Tags whose children we want to alphabetize. The key is the parent tag name,
66 # and the value is a pair of the tag name of the children we want to sort,
67 # and a key function that maps each child node to the desired sort key.
68 ALPHABETIZATION_RULES = {
69 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
70 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
71 'enum': ('int', lambda n: int(n.attributes['value'].value)),
72 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
73 'fieldtrial': ('affected-histogram',
74 lambda n: n.attributes['name'].value.lower()),
75 }
76
77
78 def LastLineLength(s):
79 """Returns the length of the last line in s.
80
81 Args:
82 s: A multi-line string, including newlines.
83
84 Returns:
85 The length of the last line in s, in characters.
86 """
87 if s.rfind('\n') == -1: return len(s)
88 return len(s) - s.rfind('\n') - len('\n')
89
90
91 def XmlEscape(s):
92 """XML-escapes the given string, replacing magic characters (&<>") with their
93 escaped equivalents."""
94 s = s.replace("&", "&amp;").replace("<", "&lt;")
95 s = s.replace("\"", "&quot;").replace(">", "&gt;")
96 return s
97
98
99 def PrettyPrintNode(node, indent=0):
100 """Pretty-prints the given XML node at the given indent level.
101
102 Args:
103 node: The minidom node to pretty-print.
104 indent: The current indent level.
105
106 Returns:
107 The pretty-printed string (including embedded newlines).
108 """
109 # Handle the top-level document node.
110 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
111 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
112
113 # Handle text nodes.
114 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
115 # Wrap each paragraph in the text to fit in the 80 column limit.
116 wrapper = textwrap.TextWrapper()
117 wrapper.initial_indent = ' ' * indent
118 wrapper.subsequent_indent = ' ' * indent
119 wrapper.break_on_hyphens = False
120 wrapper.break_long_words = False
121 wrapper.width = WRAP_COLUMN
122 text = XmlEscape(node.data)
123 # Remove any common indent.
124 text = textwrap.dedent(text.strip('\n'))
125 lines = text.split('\n')
126 # Split the text into paragraphs at blank line boundaries.
127 paragraphs = [[]]
128 for l in lines:
129 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
130 paragraphs.append([])
131 else:
132 paragraphs[-1].append(l)
133 # Remove trailing empty paragraph if present.
134 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
135 paragraphs = paragraphs[:-1]
136 # Wrap each paragraph and separate with two newlines.
137 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
138
139 # Handle element nodes.
140 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
141 newlines_after_open, newlines_before_close, newlines_after_close = (
142 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
143 # Open the tag.
144 s = ' ' * indent + '<' + node.tagName
145
146 # Calculate how much space to allow for the '>' or '/>'.
147 closing_chars = 1
148 if not node.childNodes:
149 closing_chars = 2
150
151 # Pretty-print the attributes.
152 attributes = node.attributes.keys()
153 if len(attributes) > 0:
154 # Reorder the attributes.
155 if node.tagName in ATTRIBUTE_ORDER:
156 recognized_attributes = (
157 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
158 unrecognized_attributes = (
159 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
160 for a in unrecognized_attributes:
161 logging.error(
162 'Unrecognized attribute %s in tag %s' % (a, node.tagName))
163 attributes = recognized_attributes + unrecognized_attributes
164 for a in attributes:
165 value = XmlEscape(node.attributes[a].value)
166 # Replace sequences of whitespace with single spaces.
167 words = value.split()
168 a_str = ' %s="%s"' % (a, ' '.join(words))
169 # Start a new line if the attribute will make this line too long.
170 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
171 s += '\n' + ' ' * (indent + 3)
172 # Output everything up to the first quote.
173 s += ' %s="' % (a)
174 value_indent_level = LastLineLength(s)
175 # Output one word at a time, splitting to the next line where necessary.
176 column = value_indent_level
177 for i, word in enumerate(words):
178 # This is slightly too conservative since not every word will be
179 # followed by the closing characters...
180 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
181 s = s.rstrip() # remove any trailing whitespace
182 s += '\n' + ' ' * value_indent_level
183 column = value_indent_level
184 s += word + ' '
185 column += len(word) + 1
186 s = s.rstrip() # remove any trailing whitespace
187 s += '"'
188 s = s.rstrip() # remove any trailing whitespace
189
190 # Pretty-print the child nodes.
191 if node.childNodes:
192 s += '>'
193 # Calculate the new indent level for child nodes.
194 new_indent = indent
195 if node.tagName not in TAGS_THAT_DONT_INDENT:
196 new_indent += 2
197 child_nodes = node.childNodes
198
199 # Recursively pretty-print the child nodes.
200 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
201 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
202
203 # Determine whether we can fit the entire node on a single line.
204 close_tag = '</%s>' % node.tagName
205 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
206 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
207 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
208 s += child_nodes[0].strip()
209 else:
210 s += '\n' * newlines_after_open + '\n'.join(child_nodes)
211 s += '\n' * newlines_before_close + ' ' * indent
212 s += close_tag
213 else:
214 s += '/>'
215 s += '\n' * newlines_after_close
216 return s
217
218 # Handle comment nodes.
219 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
220 return '<!--%s-->\n' % node.data
221
222 # Ignore other node types. This could be a processing instruction (<? ... ?>)
223 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
224 # histograms XML at present.
225 logging.error('Ignoring unrecognized node data: %s' % node.toxml())
226 return ''
227
228
229 def unsafeAppendChild(parent, child):
230 """Append child to parent's list of children, ignoring the possibility that it
231 is already in another node's childNodes list. Requires that the previous
232 parent of child is discarded (to avoid non-tree DOM graphs).
233 This can provide a significant speedup as O(n^2) operations are removed (in
234 particular, each child insertion avoids the need to traverse the old parent's
235 entire list of children)."""
236 child.parentNode = None
237 parent.appendChild(child)
238 child.parentNode = parent
239
240
241 def TransformByAlphabetizing(node):
242 """Transform the given XML by alphabetizing specific node types according to
243 the rules in ALPHABETIZATION_RULES.
244
245 Args:
246 node: The minidom node to transform.
247
248 Returns:
249 The minidom node, with children appropriately alphabetized. Note that the
250 transformation is done in-place, i.e. the original minidom tree is modified
251 directly.
252 """
253 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
254 for c in node.childNodes: TransformByAlphabetizing(c)
255 return node
256
257 # Element node with a tag name that we alphabetize the children of?
258 if node.tagName in ALPHABETIZATION_RULES:
259 subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
260 # Remove the subnodes to be alphabetized.
261 clone = node.cloneNode(False)
262 subnodes = []
263 for c in node.childNodes:
264 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
265 c.tagName == subtag):
266 subnodes.append(c)
267 continue
268 unsafeAppendChild(clone, c)
269 # Sort the subnodes.
270 subnodes.sort(key=key_function)
271 # Readd the subnodes, transforming each recursively.
272 for c in subnodes:
273 unsafeAppendChild(clone, TransformByAlphabetizing(c))
274 node = clone
275 return node
276
277 # Recursively handle other element nodes and other node types.
278 for c in node.childNodes: TransformByAlphabetizing(c)
279 return node
280
281
282 def PrettyPrint(raw_xml):
283 """Pretty-print the given XML.
284
285 Args:
286 xml: The contents of the histograms XML file, as a string.
287
288 Returns:
289 The pretty-printed version.
290 """
291 tree = xml.dom.minidom.parseString(raw_xml)
292 tree = TransformByAlphabetizing(tree)
293 return PrettyPrintNode(tree)
294
295
296 def main():
297 logging.basicConfig(level=logging.INFO)
298
299 presubmit = ('--presubmit' in sys.argv)
300
301 logging.info('Loading histograms.xml...')
302 with open('histograms.xml', 'rb') as f:
303 xml = f.read()
304
305 # Check there are no CR ('\r') characters in the file.
306 if '\r' in xml:
307 logging.info('DOS-style line endings (CR characters) detected - these are '
308 'not allowed. Please run dos2unix histograms.xml')
309 sys.exit(1)
310
311 logging.info('Pretty-printing...')
312 pretty = PrettyPrint(xml)
313
314 if xml == pretty:
315 logging.info('histograms.xml is correctly pretty-printed.')
316 sys.exit(0)
317 if presubmit:
318 logging.info('histograms.xml is not formatted correctly; run '
319 'pretty_print.py to fix.')
320 sys.exit(1)
321 if not diffutil.PromptUserToAcceptDiff(
322 xml, pretty,
323 'Is the prettified version acceptable?'):
324 logging.error('Aborting')
325 return
326
327 logging.info('Creating backup file histograms.before.pretty-print.xml')
328 shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
329
330 logging.info('Writing new histograms.xml file')
331 with open('histograms.xml', 'wb') as f:
332 f.write(pretty)
333
334
335 if __name__ == '__main__':
336 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698