tools/metrics/histograms/extract_histograms.py - Issue 13245008: Open-source histograms.xml, starting with Autofill histograms.

Side by Side Diff: tools/metrics/histograms/extract_histograms.py

Issue 13245008: Open-source histograms.xml, starting with Autofill histograms. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2013 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Extract histogram names from the description XML file.

	6

	7 For more information on the format of the XML file, which is self-documenting,

	8 see histograms.xml; however, here is a simple example to get you started. The

	9 XML below will generate the following five histograms:

	10

	11 HistogramTime

	12 HistogramEnum

	13 HistogramEnum_Chrome

	14 HistogramEnum_IE

	15 HistogramEnum_Firefox

	16

	17 <histogram-configuration>

	18

	19 <histograms>

	20

	21 <histogram name="HistogramTime" units="milliseconds">

	22 <summary>A brief description.</summary>

	23 <details>This is a more thorough description of this histogram.</details>

	24 </histogram>

	25

	26 <histogram name="HistogramEnum" enum="MyEnumType">

	27 <summary>This histogram sports an enum value type.</summary>

	28 </histogram>

	29

	30 </histograms>

	31

	32 <enums>

	33

	34 <enum name="MyEnumType">

	35 <summary>This is an example enum type, where the values mean little.</summary>

	36 <int value="1" label="FIRST_VALUE">This is the first value.</int>

	37 <int value="2" label="SECOND_VALUE">This is the second value.</int>

	38 </enum>

	39

	40 </enums>

	41

	42 <fieldtrials>

	43

	44 <fieldtrial name="BrowserType">

	45 <group name="Chrome"/>

	46 <group name="IE"/>

	47 <group name="Firefox"/>

	48 <affected-histogram name="HistogramEnum"/>

	49 </fieldtrial>

	50

	51 </fieldtrials>

	52

	53 </histogram-configuration>

	54

	55 """

	56

	57 import copy

	58 import logging

	59 import xml.dom.minidom

	60

	61

	62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5

	63

	64

	65 class Error(Exception):

	66 pass

	67

	68

	69 def JoinChildNodes(tag):

	70 return ''.join([c.toxml() for c in tag.childNodes]).strip()

	71

	72

	73 def NormalizeAttributeValue(s):

	74 """Normalizes an attribute value (which might be wrapped over multiple lines)

	75 by replacing each whitespace sequence with a single space.

	76

	77 Args:

	78 s: The string to normalize, e.g. ' \n a b c\n d '

	79

	80 Returns:

	81 The normalized string, e.g. 'a b c d'

	82 """

	83 return ' '.join(s.split())

	84

	85

	86 def NormalizeAllAttributeValues(node):

	87 """Recursively normalizes all tag attribute values in the given tree.

	88

	89 Args:

	90 node: The minidom node to be normalized.

	91

	92 Returns:

	93 The normalized minidom node.

	94 """

	95 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:

	96 for a in node.attributes.keys():

	97 node.attributes[a].value = NormalizeAttributeValue(

	98 node.attributes[a].value)

	99

	100 for c in node.childNodes: NormalizeAllAttributeValues(c)

	101 return node

	102

	103

	104 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):

	105 """Creates a new histogram name based on the field trial group.

	106

	107 Args:

	108 group_name: The name of the field trial group. May be empty.

	109 histogram_name: The name of the histogram. May be of the form

	110 Group.BaseName or BaseName

	111 field_trial: The FieldTrial XML element.

	112

	113 Returns:

	114 A string with the expanded histogram name.

	115

	116 Raises:

	117 Error if the expansion can't be done.

	118 """

	119 if fieldtrial.hasAttribute('separator'):

	120 separator = fieldtrial.getAttribute('separator')

	121 else:

	122 separator = '_'

	123

	124 if fieldtrial.hasAttribute('ordering'):

	125 ordering = fieldtrial.getAttribute('ordering')

	126 else:

	127 ordering = 'suffix'

	128 if ordering not in ['prefix', 'suffix']:

	129 logging.error('ordering needs to be prefix or suffix, value is %s' %

	130 ordering)

	131 raise Error()

	132

	133 if not group_name:

	134 return histogram_name

	135

	136 if ordering == 'suffix':

	137 return histogram_name + separator + group_name

	138

	139 # For prefixes, the group_name is inserted between the "cluster" and the

	140 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.

	141 sections = histogram_name.split('.')

	142 if len(sections) <= 1:

	143 logging.error(

	144 'Prefix Field Trial expansions require histogram names which include a '

	145 'dot separator. Histogram name is %s, and Field Trial is %s' %

	146 (histogram_name, fieldtrial.getAttribute('name')))

	147 raise Error()

	148

	149 cluster = sections[0] + '.'

	150 remainder = '.'.join(sections[1:])

	151 return cluster + group_name + separator + remainder

	152

	153

	154 def ExtractHistograms(filename):

	155 """Compute the histogram names and descriptions from the XML representation.

	156

	157 Args:

	158 filename: The path to the histograms XML file.

	159

	160 Returns:

	161 { 'histogram_name': 'histogram_description', ... }

	162

	163 Raises:

	164 Error if the file is not well-formatted.

	165 """

	166 # Slurp in histograms.xml

	167 raw_xml = ''

	168 with open(filename, 'r') as f:

	169 raw_xml = f.read()

	170

	171 # Parse the XML into a tree

	172 tree = xml.dom.minidom.parseString(raw_xml)

	173 NormalizeAllAttributeValues(tree)

	174

	175 histograms = {}

	176 have_errors = False

	177

	178 # Load the enums.

	179 enums = {}

	180 last_name = None

	181 for enum in tree.getElementsByTagName("enum"):

	182 if enum.getAttribute('type') != 'int':

	183 logging.error('Unknown enum type %s' % enum.getAttribute('type'))

	184 have_errors = True

	185 continue

	186

	187 name = enum.getAttribute('name')

	188 if last_name is not None and name.lower() < last_name.lower():

	189 logging.error('Enums %s and %s are not in alphabetical order'

	190 % (last_name, name))

	191 have_errors = True

	192 last_name = name

	193

	194 if name in enums:

	195 logging.error('Duplicate enum %s' % name)

	196 have_errors = True

	197 continue

	198

	199 last_int_value = None

	200 enum_dict = {}

	201 enum_dict['name'] = name

	202 enum_dict['values'] = {}

	203

	204 for int_tag in enum.getElementsByTagName("int"):

	205 value_dict = {}

	206 int_value = int(int_tag.getAttribute('value'))

	207 if last_int_value is not None and int_value < last_int_value:

	208 logging.error('Enum %s int values %d and %d are not in numerical order'

	209 % (name, last_int_value, int_value))

	210 have_errors = True

	211 last_int_value = int_value

	212 if int_value in enum_dict['values']:

	213 logging.error('Duplicate enum value %d for enum %s' % (int_value, name))

	214 have_errors = True

	215 continue

	216 value_dict['label'] = int_tag.getAttribute('label')

	217 value_dict['summary'] = JoinChildNodes(int_tag)

	218 enum_dict['values'][int_value] = value_dict

	219

	220 summary_nodes = enum.getElementsByTagName("summary")

	221 if len(summary_nodes) > 0:

	222 enum_dict['summary'] = JoinChildNodes(summary_nodes[0])

	223

	224 enums[name] = enum_dict

	225

	226 # Process the histograms. The descriptions can include HTML tags.

	227 last_name = None

	228 for histogram in tree.getElementsByTagName("histogram"):

	229 name = histogram.getAttribute('name')

	230 if last_name is not None and name.lower() < last_name.lower():

	231 logging.error('Histograms %s and %s are not in alphabetical order'

	232 % (last_name, name))

	233 have_errors = True

	234 last_name = name

	235 if name in histograms:

	236 logging.error('Duplicate histogram definition %s' % name)

	237 have_errors = True

	238 continue

	239 histograms[name] = {}

	240

	241 # Find <summary> tag.

	242 summary_nodes = histogram.getElementsByTagName("summary")

	243 if len(summary_nodes) > 0:

	244 histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])

	245 else:

	246 histograms[name]['summary'] = 'TBD'

	247

	248 # Find <obsolete> tag.

	249 obsolete_nodes = histogram.getElementsByTagName("obsolete")

	250 if len(obsolete_nodes) > 0:

	251 reason = JoinChildNodes(obsolete_nodes[0])

	252 histograms[name]['obsolete'] = reason

	253

	254 # Handle units.

	255 if histogram.hasAttribute('units'):

	256 histograms[name]['units'] = histogram.getAttribute('units')

	257

	258 # Find <details> tag.

	259 details_nodes = histogram.getElementsByTagName("details")

	260 if len(details_nodes) > 0:

	261 histograms[name]['details'] = JoinChildNodes(details_nodes[0])

	262

	263 # Handle enum types.

	264 if histogram.hasAttribute('enum'):

	265 enum_name = histogram.getAttribute('enum')

	266 if not enum_name in enums:

	267 logging.error('Unknown enum %s in histogram %s' % (enum_name, name))

	268 have_errors = True

	269 else:

	270 histograms[name]['enum'] = enums[enum_name]

	271

	272 # Process the field trials and compute the combinations with their affected

	273 # histograms.

	274 last_name = None

	275 for fieldtrial in tree.getElementsByTagName("fieldtrial"):

	276 name = fieldtrial.getAttribute('name')

	277 if last_name is not None and name.lower() < last_name.lower():

	278 logging.error('Field trials %s and %s are not in alphabetical order'

	279 % (last_name, name))

	280 have_errors = True

	281 last_name = name

	282 # Field trials can depend on other field trials, so we need to be careful.

	283 # Make a temporary copy of the list of field trials to use as a queue.

	284 # Field trials whose dependencies have not yet been processed will get

	285 # relegated to the back of the queue to be processed later.

	286 reprocess_queue = []

	287 def GenerateFieldTrials():

	288 for f in tree.getElementsByTagName("fieldtrial"): yield 0, f

	289 for r, f in reprocess_queue: yield r, f

	290 for reprocess_count, fieldtrial in GenerateFieldTrials():

	291 # Check dependencies first

	292 dependencies_valid = True

	293 affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')

	294 for affected_histogram in affected_histograms:

	295 histogram_name = affected_histogram.getAttribute('name')

	296 if not histogram_name in histograms:

	297 # Base histogram is missing

	298 dependencies_valid = False

	299 missing_dependency = histogram_name

	300 break

	301 if not dependencies_valid:

	302 if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:

	303 reprocess_queue.append( (reprocess_count + 1, fieldtrial) )

	304 continue

	305 else:

	306 logging.error('Field trial %s is missing its dependency %s'

	307 % (fieldtrial.getAttribute('name'),

	308 missing_dependency))

	309 have_errors = True

	310 continue

	311

	312 name = fieldtrial.getAttribute('name')

	313 groups = fieldtrial.getElementsByTagName('group')

	314 group_labels = {}

	315 for group in groups:

	316 group_labels[group.getAttribute('name')] = group.getAttribute('label')

	317 last_histogram_name = None

	318 for affected_histogram in affected_histograms:

	319 histogram_name = affected_histogram.getAttribute('name')

	320 if (last_histogram_name is not None

	321 and histogram_name.lower() < last_histogram_name.lower()):

	322 logging.error('Affected histograms %s and %s of field trial %s are not '

	323 'in alphabetical order'

	324 % (last_histogram_name, histogram_name, name))

	325 have_errors = True

	326 last_histogram_name = histogram_name

	327 base_description = histograms[histogram_name]

	328 with_groups = affected_histogram.getElementsByTagName('with-group')

	329 if len(with_groups) > 0:

	330 histogram_groups = with_groups

	331 else:

	332 histogram_groups = groups

	333 for group in histogram_groups:

	334 group_name = group.getAttribute('name')

	335 try:

	336 new_histogram_name = _ExpandHistogramNameWithFieldTrial(

	337 group_name, histogram_name, fieldtrial)

	338 if new_histogram_name != histogram_name:

	339 histograms[new_histogram_name] = copy.deepcopy(

	340 histograms[histogram_name])

	341

	342 group_label = group_labels.get(group_name, '')

	343

	344 if not 'fieldtrial_groups' in histograms[new_histogram_name]:

	345 histograms[new_histogram_name]['fieldtrial_groups'] = []

	346 histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)

	347

	348 if not 'fieldtrial_names' in histograms[new_histogram_name]:

	349 histograms[new_histogram_name]['fieldtrial_names'] = []

	350 histograms[new_histogram_name]['fieldtrial_names'].append(name)

	351

	352 if not 'fieldtrial_labels' in histograms[new_histogram_name]:

	353 histograms[new_histogram_name]['fieldtrial_labels'] = []

	354 histograms[new_histogram_name]['fieldtrial_labels'].append(

	355 group_label)

	356

	357 except Error:

	358 have_errors = True

	359

	360 if have_errors:

	361 logging.error('Error parsing %s' % filename)

	362 raise Error()
	Ilya Sherman 2013/03/29 08:22:53 This was sys.exit(1) in the internal repo. This was sys.exit(1) in the internal repo.
	363

	364 return histograms

	365

	366

	367 def ExtractNames(histograms):

	368 return sorted(histograms.keys())

OLD	NEW

« no previous file with comments | « tools/metrics/histograms/diffutil.py ('k') | tools/metrics/histograms/find_unmapped_histograms.py » ('j') | tools/metrics/histograms/pretty_print.py » ('J')