scripts/slave/process_log_utils.py - Issue 548773004: Rename process_log_utils and related things.

Side by Side Diff: scripts/slave/process_log_utils.py

Issue 548773004: Rename process_log_utils and related things. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Renamed remaining occurrences of "results_tracker". Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 """This module contains PerformanceLogProcessor and subclasses.

6

7 Several performance tests have complicated log output, this module is intended

8 to help buildsteps parse these logs and identify if tests had anomalies.

9

10 The classes in this file all have the same method ProcessLine, just like

11 GTestLogParser in //tools/build/scripts/common/gtest_utils.py. They also

12 construct a set of files which are used for graphing.

13

14 Note: This module is doomed to be deprecated in the future, as Telemetry

15 results will be passed more directly to the new performance dashboard.

16 """

17

18 import collections

19 import json

20 import logging

21 import os

22 import re

23

24 from common import chromium_utils

25 import config

26

27 # Status codes that can be returned by the evaluateCommand method.

28 # From buildbot.status.builder.

29 # See: http://docs.buildbot.net/current/developer/results.html

30 SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6)

31

32

33 def _FormatFloat(number):

34 """Formats float with two decimal points."""

35 if number:

36 return '%.2f' % number

37 else:

38 return '0.00'

39

40

41 def _FormatPercentage(ratio):

42 """Formats a number as a string with a percentage (e.g. 0.5 => "50%")."""

43 return '%s%%' % _FormatFloat(100 * ratio)

44

45

46 def _Divide(x, y):

47 """Divides with float division, or returns infinity if denominator is 0."""

48 if y == 0:

49 return float('inf')

50 return float(x) / y

51

52

53 def _FormatHumanReadable(number):

54 """Formats a float into three significant figures, using metric suffixes.

55

56 Only m, k, and M prefixes (for 1/1000, 1000, and 1,000,000) are used.

57 Examples:

58 0.0387 => 38.7m

59 1.1234 => 1.12

60 10866 => 10.8k

61 682851200 => 683M

62 """

63 metric_prefixes = {-3: 'm', 0: '', 3: 'k', 6: 'M'}

64 scientific = '%.2e' % float(number) # 6.83e+005

65 e_idx = scientific.find('e') # 4, or 5 if negative

66 digits = float(scientific[:e_idx]) # 6.83

67 exponent = int(scientific[e_idx + 1:]) # int('+005') = 5

68 while exponent % 3:

69 digits *= 10

70 exponent -= 1

71 while exponent > 6:

72 digits *= 10

73 exponent -= 1

74 while exponent < -3:

75 digits /= 10

76 exponent += 1

77 if digits >= 100:

78 # Don't append a meaningless '.0' to an integer number.

79 digits = int(digits)

80 # Exponent is now divisible by 3, between -3 and 6 inclusive: (-3, 0, 3, 6).

81 return '%s%s' % (digits, metric_prefixes[exponent])

82

83

84 def _JoinWithSpacesAndNewLine(words):

85 """Joins a list of words together with spaces."""

86 return ' '.join(str(w) for w in words) + '\n'

87

88

89 class PerformanceLogProcessor(object):

90 """Parent class for performance log parsers.

91

92 The only essential public method that subclasses must define is the method

93 ProcessLine, which takes one line of a test output log and uses it

94 to change the internal state of the PerformanceLogProcessor object,

95 so that methods such as PerformanceLogs return the right thing.

96 """

97

98 # The file perf_expectations.json holds performance expectations.

99 # For more info, see: http://goo.gl/BhYvDa

100 PERF_EXPECTATIONS_PATH = 'src/tools/perf_expectations/'

101

102 def __init__(self, revision=None, factory_properties=None,

103 build_properties=None, webkit_revision='undefined'):

104 """Initializes the log processor.

105

106 Args:

107 revision: Revision number; this currently could also be a git number.

108 It is sent to the perf dashboard to be used as the x-value.

109 factory_properties: Factory properties dict.

110 build_properties: Build properties dict.

111 webkit_revision: Blink revision number.

112 """

113 if factory_properties is None:

114 factory_properties = {}

115

116 # Performance regression/speedup alerts.

117 self._read_expectations = False

118

119 self._perf_id = factory_properties.get('perf_id')

120 self._perf_name = factory_properties.get('perf_name')

121 self._perf_filename = factory_properties.get('perf_filename')

122 self._test_name = factory_properties.get('test_name')

123

124 self._perf_data = {}

125 self._perf_test_keys = {}

126 self._perf_ref_keys = {}

127 self._perf_regress = []

128 self._perf_improve = []

129

130 # A dict mapping output file names to lists of lines in a file.

131 self._output = {}

132

133 # Whether or not the processing has been finalized (i.e. whether

134 # self._FinalizeProcessing has been called.)

135 self._finalized = False

136

137 # The text summary will be built by other methods as we go.

138 # This is a list of strings with messages about the processing.

139 self._text_summary = []

140

141 # Enable expectations if the local configuration supports it.

142 self._expectations = (factory_properties.get('expectations')

143 and self._perf_id and self._perf_name)

144 if self._expectations and not self._perf_filename:

145 self._perf_filename = os.path.join(self.PERF_EXPECTATIONS_PATH,

146 'perf_expectations.json')

147

148 if revision:

149 self._revision = revision

150 else:

151 raise ValueError('Must provide a revision to PerformanceLogProcessor.')

152

153 self._webkit_revision = webkit_revision

154

155 if not build_properties:

156 build_properties = {}

157 self._git_revision = build_properties.get('git_revision', 'undefined')

158 self._version = build_properties.get('version', 'undefined')

159 self._channel = build_properties.get('channel', 'undefined')

160 self._webrtc_revision = build_properties.get('got_webrtc_revision',

161 'undefined')

162

163 self._v8_revision = 'undefined'

164 if factory_properties.get('show_v8_revision'):

165 self._v8_revision = build_properties.get('got_v8_revision', 'undefined')

166

167 self._percentiles = [.1, .25, .5, .75, .90, .95, .99]

168

169 def PerformanceLogs(self):

170 if not self._finalized:

171 self._FinalizeProcessing()

172 self._finalized = True

173 return self._output

174

175 def PerformanceSummary(self):

176 """Returns a list of strings about performance changes and other info."""

177 if not self._finalized:

178 self._FinalizeProcessing()

179 self._finalized = True

180 return self.PerformanceChanges() + self._text_summary

181

182 def _FinalizeProcessing(self):

183 """Hook for subclasses to do final operations before output is returned."""

184 # This method is to be defined by inheriting classes.

185 pass

186

187 def AppendLog(self, filename, data):

188 """Appends some data to an output file."""

189 self._output[filename] = self._output.get(filename, []) + data

190

191 def PrependLog(self, filename, data):

192 """Prepends some data to an output file."""

193 self._output[filename] = data + self._output.get(filename, [])

194

195 def FailedTests(self): # pylint: disable=R0201

196 return []

197

198 def SuppressionHashes(self): # pylint: disable=R0201

199 return []

200

201 def ParsingErrors(self): # pylint: disable=R0201

202 return []

203

204 def LoadPerformanceExpectationsData(self, all_perf_data):

205 """Load the expectations data.

206

207 All keys in perf_expectations have 4 components:

208 slave/test/graph/trace

209

210 LoadPerformanceExpectationsData finds all keys that match the initial

211 portion of the string ("slave/test") and adds the graph and result

212 portions to the expected performance structure.

213 """

214

215 for perf_key in all_perf_data.keys():

216 # tools/perf_expectations/tests/perf_expectations_unittest.py should have

217 # a matching regular expression.

218 m = re.search(r'^' + self._perf_name + '/' + self._test_name +

219 '/([\w\.-]+)/([\w\.-]+)$', perf_key)

220 if not m:

221 continue

222

223 perf_data = all_perf_data[perf_key]

224 graph = m.group(1)

225 trace = m.group(2)

226

227 # By default, all perf data is type=relative.

228 perf_data.setdefault('type', 'relative')

229

230 # By default, relative perf data is compare against the fqtn+'_ref'.

231 if perf_data['type'] == 'relative' and 'ref' not in perf_data:

232 perf_data['ref'] = '%s/%s/%s/%s_ref' % (

233 self._perf_name, self._test_name, graph, trace)

234

235 # For each test key, we add a reference in _perf_test_keys to perf_data.

236 self._perf_test_keys.setdefault(perf_key, [])

237 self._perf_test_keys[perf_key].append(perf_data)

238

239 # For each ref key, we add a reference in _perf_ref_keys to perf_data.

240 if 'ref' in perf_data:

241 self._perf_ref_keys.setdefault(perf_data['ref'], [])

242 self._perf_ref_keys[perf_data['ref']].append(perf_data)

243

244 self._perf_data.setdefault(graph, {})

245 self._perf_data[graph][trace] = perf_data

246

247 def LoadPerformanceExpectations(self):

248 if not self._expectations:

249 # self._expectations is false when a given factory doesn't enable

250 # expectations, or doesn't have both perf_id and perf_name set.

251 return

252 try:

253 perf_file = open(self._perf_filename, 'r')

254 except IOError, e:

255 logging.error('I/O Error reading expectations %s(%s): %s' %

256 (self._perf_filename, e.errno, e.strerror))

257 return

258

259 perf_data = {}

260 if perf_file:

261 try:

262 perf_data = json.load(perf_file)

263 except ValueError:

264 perf_file.seek(0)

265 logging.error('Error parsing expectations %s: \'%s\'' %

266 (self._perf_filename, perf_file.read().strip()))

267 perf_file.close()

268

269 # Find this perf/test entry

270 if perf_data and perf_data.has_key('load') and perf_data['load']:

271 self.LoadPerformanceExpectationsData(perf_data)

272 else:

273 logging.error('not loading perf expectations: perf_data is disabled')

274 self._read_expectations = True

275

276 def TrackActualPerformance(self, graph=None, trace=None, value=None,

277 stddev=None):

278 """Set actual performance data when we come across useful values.

279

280 trace will be of the form "RESULTTYPE" or "RESULTTYPE_ref".

281 A trace with _ref in its name refers to a reference build.

282

283 Common result types for page cyclers: t, vm_rss_f_r, IO_b_b, etc.

284 A test's result types vary between test types. Currently, a test

285 only needs to output the appropriate text format to embed a new

286 result type.

287 """

288

289 fqtn = '%s/%s/%s/%s' % (self._perf_name, self._test_name, graph, trace)

290 if fqtn in self._perf_test_keys:

291 for perf_data in self._perf_test_keys[fqtn]:

292 perf_data['actual_test'] = value

293 perf_data['actual_var'] = stddev

294

295 if perf_data['type'] == 'absolute' and 'actual_test' in perf_data:

296 perf_data['actual_delta'] = perf_data['actual_test']

297

298 elif perf_data['type'] == 'relative':

299 if 'actual_test' in perf_data and 'actual_ref' in perf_data:

300 perf_data['actual_delta'] = (

301 perf_data['actual_test'] - perf_data['actual_ref'])

302

303 if fqtn in self._perf_ref_keys:

304 for perf_data in self._perf_ref_keys[fqtn]:

305 perf_data['actual_ref'] = value

306

307 if 'actual_test' in perf_data and 'actual_ref' in perf_data:

308 perf_data['actual_delta'] = (

309 perf_data['actual_test'] - perf_data['actual_ref'])

310

311 def PerformanceChangesAsText(self):

312 """Returns a list of strings which describe performance changes."""

313

314 text = []

315

316 if self._expectations and not self._read_expectations:

317 text.append('MISS_EXPECTATIONS')

318

319 if self._perf_regress:

320 text.append('PERF_REGRESS: ' + ', '.join(self._perf_regress))

321

322 if self._perf_improve:

323 text.append('PERF_IMPROVE: ' + ', '.join(self._perf_improve))

324

325 return text

326

327 def ComparePerformance(self, graph, trace):

328 """Populates internal data about improvements and regressions."""

329 # Skip graphs and traces we don't expect values for.

330 if not graph in self._perf_data or not trace in self._perf_data[graph]:

331 return

332

333 perfdata = self._perf_data[graph][trace]

334 graph_result = graph + '/' + trace

335

336 # Skip result types that didn't calculate a delta.

337 if not 'actual_delta' in perfdata:

338 return

339

340 # Skip result types that don't have regress/improve values.

341 if 'regress' not in perfdata or 'improve' not in perfdata:

342 return

343

344 # Set the high and low performance tests.

345 # The actual delta needs to be within this range to keep the perf test

346 # green. If the results fall above or below this range, the test will go

347 # red (signaling a regression) or orange (signaling a speedup).

348 actual = perfdata['actual_delta']

349 regress = perfdata['regress']

350 improve = perfdata['improve']

351 if (('better' in perfdata and perfdata['better'] == 'lower') or

352 ('better' not in perfdata and regress > improve)):

353 # The "lower is better" case. (ie. time results)

354 if actual < improve:

355 ratio = 1 - _Divide(actual, improve)

356 self._perf_improve.append('%s (%s)' % (graph_result,

357 _FormatPercentage(ratio)))

358 elif actual > regress:

359 ratio = _Divide(actual, regress) - 1

360 self._perf_regress.append('%s (%s)' % (graph_result,

361 _FormatPercentage(ratio)))

362 else:

363 # The "higher is better" case. (ie. score results)

364 if actual > improve:

365 ratio = _Divide(actual, improve) - 1

366 self._perf_improve.append('%s (%s)' % (graph_result,

367 _FormatPercentage(ratio)))

368 elif actual < regress:

369 ratio = 1 - _Divide(actual, regress)

370 self._perf_regress.append('%s (%s)' % (graph_result,

371 _FormatPercentage(ratio)))

372

373 def PerformanceChanges(self):

374 """Compares actual and expected results.

375

376 Returns:

377 A list of strings indicating improvements or regressions.

378 """

379 # Compare actual and expected results.

380 for graph in self._perf_data:

381 for trace in self._perf_data[graph]:

382 self.ComparePerformance(graph, trace)

383

384 return self.PerformanceChangesAsText()

385

386 # Unused argument cmd.

387 # pylint: disable=W0613

388 def evaluateCommand(self, cmd):

389 """Returns a status code indicating success, failure, etc.

390

391 See: http://docs.buildbot.net/current/developer/cls-buildsteps.html

392

393 Args:

394 cmd: A command object. Not used here.

395

396 Returns:

397 A status code (One of SUCCESS, WARNINGS, FAILURE, etc.)

398 """

399 if self._expectations and not self._read_expectations:

400 return WARNINGS

401

402 # make sure regression and improvement logs are calculated

403 self.PerformanceSummary()

404

405 if self._perf_regress:

406 return FAILURE

407

408 if self._perf_improve:

409 return WARNINGS

410

411 # There was no change in performance, report success.

412 return SUCCESS

413

414 def ProcessLine(self, line):

415 """Process one line of a log file."""

416 # This method must be overridden by subclass

417 pass

418

419

420 class GraphingLogProcessor(PerformanceLogProcessor):

421 """Parent class for any log processor expecting standard data to be graphed.

422

423 The log will be parsed looking for any lines of the forms:

424 <*>RESULT <graph_name>: <trace_name>= <value> <units>

425 or

426 <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...] <units>

427 or

428 <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>} <units>

429

430 For example,

431 *RESULT vm_final_browser: OneTab= 8488 kb

432 RESULT startup: ref= [167.00,148.00,146.00,142.00] ms

433 RESULT TabCapturePerformance_foo: Capture= {30.7, 1.45} ms

434

435 The leading * is optional; it indicates that the data from that line should

436 be considered "important", which may mean for example that it's graphed by

437 default.

438

439 If multiple values are given in [], their mean and (sample) standard

440 deviation will be written; if only one value is given, that will be written.

441 A trailing comma is permitted in the list of values.

442

443 NOTE: All lines except for RESULT lines are ignored, including the Avg and

444 Stddev lines output by Telemetry!

445

446 Any of the <fields> except <value> may be empty, in which case the

447 not-terribly-useful defaults will be used. The <graph_name> and <trace_name>

448 should not contain any spaces, colons (:) nor equals-signs (=). Furthermore,

449 the <trace_name> will be used on the waterfall display, so it should be kept

450 short. If the trace_name ends with '_ref', it will be interpreted as a

451 reference value, and shown alongside the corresponding main value on the

452 waterfall.

453

454 Semantic note: The terms graph and chart are used interchangeably here.

455 """

456

457 # The file into which the GraphingLogProcessor will save a list of graph

458 # names for use by the JS doing the plotting.

459 GRAPH_LIST = config.Master.perf_graph_list

460

461 RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT '

462 '(?P<GRAPH>[^:]): (?P<TRACE>[^=])= '

463 '(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)('

464 ' ?(?P<UNITS>.+))?')

465 HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM '

466 '(?P<GRAPH>[^:]): (?P<TRACE>[^=])= '

467 '(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?')

468

469 class Trace(object):

470 """Encapsulates data for one trace. Here, this means one point."""

471

472 def __init__(self):

473 self.important = False

474 self.value = 0.0

475 self.stddev = 0.0

476

477 def __str__(self):

478 result = _FormatHumanReadable(self.value)

479 if self.stddev:

480 result += '+/-%s' % _FormatHumanReadable(self.stddev)

481 return result

482

483 class Graph(object):

484 """Encapsulates a set of points that should appear on the same graph."""

485

486 def __init__(self):

487 self.units = None

488 self.traces = {}

489

490 def IsImportant(self):

491 """A graph is considered important if any of its traces is important."""

492 for trace in self.traces.itervalues():

493 if trace.important:

494 return True

495 return False

496

497 def BuildTracesDict(self):

498 """Returns a dictionary mapping trace names to [value, stddev]."""

499 traces_dict = {}

500 for name, trace in self.traces.items():

501 traces_dict[name] = [str(trace.value), str(trace.stddev)]

502 return traces_dict

503

504 def __init__(self, args, *kwargs):

505 """Initiates this log processor."""

506 PerformanceLogProcessor.__init__(self, args, *kwargs)

507

508 # A dict of Graph objects, by name.

509 self._graphs = {}

510

511 # Load performance expectations for this test.

512 self.LoadPerformanceExpectations()

513

514 def ProcessLine(self, line):

515 """Processes one result line, and updates the state accordingly."""

516 results_match = self.RESULTS_REGEX.search(line)

517 histogram_match = self.HISTOGRAM_REGEX.search(line)

518 if results_match:

519 self._ProcessResultLine(results_match)

520 elif histogram_match:

521 self._ProcessHistogramLine(histogram_match)

522

523 def _ProcessResultLine(self, line_match):

524 """Processes a line that matches the standard RESULT line format.

525

526 Args:

527 line_match: A MatchObject as returned by re.search.

528 """

529 match_dict = line_match.groupdict()

530 graph_name = match_dict['GRAPH'].strip()

531 trace_name = match_dict['TRACE'].strip()

532

533 graph = self._graphs.get(graph_name, self.Graph())

534 graph.units = match_dict['UNITS'] or ''

535 trace = graph.traces.get(trace_name, self.Trace())

536 trace.value = match_dict['VALUE']

537 trace.important = match_dict['IMPORTANT'] or False

538

539 # Compute the mean and standard deviation for a multiple-valued item,

540 # or the numerical value of a single-valued item.

541 if trace.value.startswith('['):

542 try:

543 value_list = [float(x) for x in trace.value.strip('[],').split(',')]

544 except ValueError:

545 # Report, but ignore, corrupted data lines. (Lines that are so badly

546 # broken that they don't even match the RESULTS_REGEX won't be

547 # detected.)

548 logging.warning("Bad test output: '%s'" % trace.value.strip())

549 return

550 trace.value, trace.stddev, filedata = self._CalculateStatistics(

551 value_list, trace_name)

552 assert filedata is not None

553 for filename in filedata:

554 self.PrependLog(filename, filedata[filename])

555 elif trace.value.startswith('{'):

556 stripped = trace.value.strip('{},')

557 try:

558 trace.value, trace.stddev = [float(x) for x in stripped.split(',')]

559 except ValueError:

560 logging.warning("Bad test output: '%s'" % trace.value.strip())

561 return

562 else:

563 try:

564 trace.value = float(trace.value)

565 except ValueError:

566 logging.warning("Bad test output: '%s'" % trace.value.strip())

567 return

568

569 graph.traces[trace_name] = trace

570 self._graphs[graph_name] = graph

571

572 # Store values in actual performance.

573 self.TrackActualPerformance(graph=graph_name, trace=trace_name,

574 value=trace.value, stddev=trace.stddev)

575

576 def _ProcessHistogramLine(self, line_match):

577 """Processes a line that matches the HISTOGRAM line format.

578

579 Args:

580 line_match: A MatchObject as returned by re.search.

581 """

582 match_dict = line_match.groupdict()

583 graph_name = match_dict['GRAPH'].strip()

584 trace_name = match_dict['TRACE'].strip()

585 units = (match_dict['UNITS'] or '').strip()

586 histogram_json = match_dict['VALUE_JSON']

587 important = match_dict['IMPORTANT'] or False

588 try:

589 histogram_data = json.loads(histogram_json)

590 except ValueError:

591 # Report, but ignore, corrupted data lines. (Lines that are so badly

592 # broken that they don't even match the HISTOGRAM_REGEX won't be

593 # detected.)

594 logging.warning("Bad test output: '%s'" % histogram_json.strip())

595 return

596

597 # Compute percentile data, create a graph for all percentile values.

598 percentiles = self._CalculatePercentiles(histogram_data, trace_name)

599 for i in percentiles:

600 percentile_graph_name = graph_name + "_" + str(i['percentile'])

601 graph = self._graphs.get(percentile_graph_name, self.Graph())

602 graph.units = units

603 trace = graph.traces.get(trace_name, self.Trace())

604 trace.value = i['value']

605 trace.important = important

606 graph.traces[trace_name] = trace

607 self._graphs[percentile_graph_name] = graph

608 self.TrackActualPerformance(graph=percentile_graph_name,

609 trace=trace_name,

610 value=i['value'])

611

612 # Compute geometric mean and standard deviation.

613 graph = self._graphs.get(graph_name, self.Graph())

614 graph.units = units

615 trace = graph.traces.get(trace_name, self.Trace())

616 trace.value, trace.stddev = self._CalculateHistogramStatistics(

617 histogram_data, trace_name)

618 trace.important = important

619 graph.traces[trace_name] = trace

620 self._graphs[graph_name] = graph

621 self.TrackActualPerformance(graph=graph_name, trace=trace_name,

622 value=trace.value, stddev=trace.stddev)

623

624 # _CalculateStatistics needs to be a member function.

625 # pylint: disable=R0201

626 # Unused argument value_list.

627 # pylint: disable=W0613

628 def _CalculateStatistics(self, value_list, trace_name):

629 """Returns a tuple with some statistics based on the given value list.

630

631 This method may be overridden by subclasses wanting a different standard

632 deviation calcuation (or some other sort of error value entirely).

633

634 Args:

635 value_list: the list of values to use in the calculation

636 trace_name: the trace that produced the data (not used in the base

637 implementation, but subclasses may use it)

638

639 Returns:

640 A 3-tuple - mean, standard deviation, and a dict which is either

641 empty or contains information about some file contents.

642 """

643 mean, stddev = chromium_utils.MeanAndStandardDeviation(value_list)

644 return mean, stddev, {}

645

646 def _CalculatePercentiles(self, histogram, trace_name):

647 """Returns a list of percentile values from a histogram.

648

649 This method may be overridden by subclasses.

650

651 Args:

652 histogram: histogram data (relevant keys: "buckets", and for each bucket,

653 "min", "max" and "count").

654 trace_name: the trace that produced the data (not used in the base

655 implementation, but subclasses may use it)

656

657 Returns:

658 A list of dicts, each of which has the keys "percentile" and "value".

659 """

660 return chromium_utils.HistogramPercentiles(histogram, self._percentiles)

661

662 def _CalculateHistogramStatistics(self, histogram, trace_name):

663 """Returns the geometric mean and standard deviation for a histogram.

664

665 This method may be overridden by subclasses.

666

667 Args:

668 histogram: histogram data (relevant keys: "buckets", and for each bucket,

669 "min", "max" and "count").

670 trace_name: the trace that produced the data (not used in the base

671 implementation, but subclasses may use it)

672 """

673 geom_mean, stddev = chromium_utils.GeomMeanAndStdDevFromHistogram(histogram)

674 return geom_mean, stddev

675

676 def _BuildSummaryJson(self, graph):

677 """Returns JSON with the data in the given graph plus revision information.

678

679 Args:

680 graph: A GraphingLogProcessor.Graph object.

681

682 Returns:

683 The format output here is the "-summary.dat line" format; that is, it

684 is a JSON encoding of a dictionary that has the key "traces"

685 """

686 assert self._revision, 'revision must always be present'

687

688 graph_dict = collections.OrderedDict([

689 ('traces', graph.BuildTracesDict()),

690 ('rev', str(self._revision)),

691 ('git_revision', str(self._git_revision)),

692 ('webkit_rev', str(self._webkit_revision)),

693 ('webrtc_rev', str(self._webrtc_revision)),

694 ('v8_rev', str(self._v8_revision)),

695 ('ver', str(self._version)),

696 ('chan', str(self._channel)),

697 ('units', str(graph.units)),

698 ])

699

700 # Include a sorted list of important trace names if there are any.

701 important = [t for t in graph.traces.keys() if graph.traces[t].important]

702 if important:

703 graph_dict['important'] = sorted(important)

704

705 return json.dumps(graph_dict)

706

707 def _FinalizeProcessing(self):

708 self._CreateSummaryOutput()

709 self._GenerateGraphInfo()

710

711 def _CreateSummaryOutput(self):

712 """Writes the summary data file and collect the waterfall display text.

713

714 The summary file contains JSON-encoded data.

715

716 The waterfall contains lines for each important trace, in the form

717 tracename: value< (refvalue)>

718 """

719

720 for graph_name, graph in self._graphs.iteritems():

721 # Write a line in the applicable summary file for each graph.

722 filename = ('%s-summary.dat' % graph_name)

723 data = [self._BuildSummaryJson(graph) + '\n']

724 self._output[filename] = data + self._output.get(filename, [])

725

726 # Add a line to the waterfall for each important trace.

727 for trace_name, trace in graph.traces.iteritems():

728 if trace_name.endswith('_ref'):

729 continue

730 if trace.important:

731 display = '%s: %s' % (trace_name, _FormatHumanReadable(trace.value))

732 if graph.traces.get(trace_name + '_ref'):

733 display += ' (%s)' % _FormatHumanReadable(

734 graph.traces[trace_name + '_ref'].value)

735 self._text_summary.append(display)

736

737 self._text_summary.sort()

738

739 def _GenerateGraphInfo(self):

740 """Outputs a list of graphs viewed this session, for use by the plotter.

741

742 These will be collated and sorted on the master side.

743 """

744 graphs = {}

745 for name, graph in self._graphs.iteritems():

746 graphs[name] = {'name': name,

747 'important': graph.IsImportant(),

748 'units': graph.units}

749 self._output[self.GRAPH_LIST] = json.dumps(graphs).split('\n')

750

751 def GetGraphs(self):

752 """Returns a list of graph names."""

753 return self._graphs.keys()

754

755 def GetTraces(self, graph):

756 """Returns a dict of traces associated with the given graph.

757

758 Returns:

759 A dict mapping names of traces to two-element lists of value, stddev.

760 """

761 return self._graphs[graph].BuildTracesDict()

762

763 def GetUnits(self, graph):

764 """Returns the units associated with the given graph."""

765 return str(self._graphs[graph].units)

766

767

768 class GraphingPageCyclerLogProcessor(GraphingLogProcessor):

769 """Handles additional processing for page-cycler timing data."""

770

771 _page_list = ['(unknown)']

772 PAGES_REGEXP = re.compile(r'^Pages: \[(?P<LIST>.*)\]')

773

774 def ProcessLine(self, line):

775 """Also looks for the Pages: line to find the page count."""

776 # super() should be used instead of GetParentClass().

777 # pylint: disable=W0212

778 line_match = self.PAGES_REGEXP.search(line)

779 if line_match:

780 self._page_list = line_match.groupdict()['LIST'].strip().split(',')

781 if len(self._page_list) < 1:

782 self._page_list = ['(unknown)']

783 else:

784 chromium_utils.GetParentClass(self).ProcessLine(self, line)

785

786 def _CalculateStatistics(self, value_list, trace_name):

787 """Handles statistics generation and recording for page-cycler data.

788

789 Sums the timings over all pages for each iteration and returns a tuple

790 (mean, standard deviation) of those sums. Also saves a data file

791 <revision>_<tracename>.dat holding a line of times for each URL loaded,

792 for use by humans when debugging a regression.

793 """

794

795 # If the name of the trace is one of the pages in the page list then we are

796 # dealing with the results for that page only, not the overall results. So

797 # calculate the statistics like a normal GraphingLogProcessor, not the

798 # GraphingPageCyclerLogProcessor.

799 if trace_name in self._page_list:

800 return super(GraphingPageCyclerLogProcessor, self)._CalculateStatistics(

801 value_list, trace_name)

802

803 sums = []

804 page_times = {}

805 page_count = len(self._page_list)

806

807 iteration_count = len(value_list) / page_count

808 for iteration in range(iteration_count):

809 start = page_count * iteration

810 end = start + page_count

811 iteration_times = value_list[start:end]

812 sums += [sum(iteration_times)]

813 for page_index in range(page_count):

814 page = self._page_list[page_index]

815 if page not in page_times:

816 page_times[page] = []

817 page_times[page].append(iteration_times[page_index])

818 pagedata = self._SavePageData(page_times, trace_name)

819 val, stddev = chromium_utils.FilteredMeanAndStandardDeviation(sums)

820 return val, stddev, pagedata

821

822 def _SavePageData(self, page_times, trace_name):

823 """Saves a file holding the timing data for each page loaded.

824

825 Args:

826 page_times: a dict mapping a page URL to a list of its times

827 trace_name: the trace that produced this set of times

828

829 Returns:

830 A dict with one entry, mapping filename to file contents.

831 """

832 file_data = []

833 for page in self._page_list:

834 times = page_times[page]

835 mean, stddev = chromium_utils.FilteredMeanAndStandardDeviation(times)

836 file_data.append('%s (%s+/-%s): %s' % (page,

837 _FormatFloat(mean),

838 _FormatFloat(stddev),

839 _JoinWithSpacesAndNewLine(times)))

840

841 filename = '%s_%s.dat' % (self._revision, trace_name)

842 return {filename: file_data}

OLD	NEW

« no previous file with comments | « scripts/slave/performance_log_processor.py ('k') | scripts/slave/runtest.py » ('j') | no next file with comments »