tools/testing/perf_testing/run_perf_tests.py - Issue 10828077: Rerun each CL 10 times to smooth out noise in perf testing.

Side by Side Diff: tools/testing/perf_testing/run_perf_tests.py

Issue 10828077: Rerun each CL 10 times to smooth out noise in perf testing. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python

2	2

3 # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	3 # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

4 # for details. All rights reserved. Use of this source code is governed by a	4 # for details. All rights reserved. Use of this source code is governed by a

5 # BSD-style license that can be found in the LICENSE file.	5 # BSD-style license that can be found in the LICENSE file.

6	6

7 import datetime	7 import datetime

8 import math	8 import math

9 try:

10 from matplotlib.font_manager import FontProperties

11 import matplotlib.pyplot as plt

12 except ImportError:

13 pass # Only needed if we want to make graphs.

14 import optparse	9 import optparse

15 import os	10 import os

16 from os.path import dirname, abspath	11 from os.path import dirname, abspath

	12 import pickle

17 import platform	13 import platform

18 import re	14 import re

19 import shutil	15 import shutil

20 import stat	16 import stat

21 import subprocess	17 import subprocess

22 import sys	18 import sys

23 import time	19 import time

24	20

25 TOOLS_PATH = os.path.join(dirname(dirname(dirname(abspath(__file__)))))	21 TOOLS_PATH = os.path.join(dirname(dirname(dirname(abspath(__file__)))))

26 DART_INSTALL_LOCATION = abspath(os.path.join(dirname(abspath(__file__)),	22 DART_INSTALL_LOCATION = abspath(os.path.join(dirname(abspath(__file__)),

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
60 if append:	56 if append:

61 mode = 'a'	57 mode = 'a'

62 out = open(outfile, mode)	58 out = open(outfile, mode)

63 if append:	59 if append:

64 # Annoying Windows "feature" -- append doesn't actually append unless	60 # Annoying Windows "feature" -- append doesn't actually append unless

65 # you explicitly go to the end of the file.	61 # you explicitly go to the end of the file.

66 # http://mail.python.org/pipermail/python-list/2009-October/1221859.html	62 # http://mail.python.org/pipermail/python-list/2009-October/1221859.html

67 out.seek(0, os.SEEK_END)	63 out.seek(0, os.SEEK_END)

68 p = subprocess.Popen(cmd_list, stdout = out, stderr=subprocess.PIPE,	64 p = subprocess.Popen(cmd_list, stdout = out, stderr=subprocess.PIPE,

69 stdin=subprocess.PIPE, shell=self.has_shell)	65 stdin=subprocess.PIPE, shell=self.has_shell)

70 output, stderr = p.communicate(std_in);	66 output, stderr = p.communicate(std_in)

71 if output:	67 if output:

72 print output	68 print output

73 if stderr:	69 if stderr:

74 print stderr	70 print stderr

75 return output	71 return output, stderr

76	72

77 def time_cmd(self, cmd):	73 def time_cmd(self, cmd):

78 """Determine the amount of (real) time it takes to execute a given	74 """Determine the amount of (real) time it takes to execute a given

79 command."""	75 command."""

80 start = time.time()	76 start = time.time()

81 self.run_cmd(cmd)	77 self.run_cmd(cmd)

82 return time.time() - start	78 return time.time() - start

83	79

84 @staticmethod	80 def sync_and_build(self, suites, revision_num=''):

85 def get_build_targets(suites):

86 """Loop through a set of tests that we want to run and find the build

87 targets that are necessary.

88

89 Args:

90 suites: The test suites that we wish to run."""

91 build_targets = set()

92 for test in suites:

93 if test.build_targets is not None:

94 for target in test.build_targets:

95 build_targets.add(target)

96 return build_targets

97

98 def sync_and_build(self, suites):

99 """Make sure we have the latest version of of the repo, and build it. We	81 """Make sure we have the latest version of of the repo, and build it. We

100 begin and end standing in DART_INSTALL_LOCATION.	82 begin and end standing in DART_INSTALL_LOCATION.

101	83

102 Args:	84 Args:

103 suites: The set of suites that we wish to build.	85 suites: The set of suites that we wish to build.

104	86

105 Returns:	87 Returns:

106 err_code = 1 if there was a problem building."""	88 err_code = 1 if there was a problem building."""

107 os.chdir(DART_INSTALL_LOCATION)	89 os.chdir(DART_INSTALL_LOCATION)

	90

	91 if revision_num == '':

	92 self.run_cmd(['gclient', 'sync'])

	93 else:

	94 self.run_cmd(['gclient', 'sync', '-r', revision_num, '-t'])

108	95

109 self.run_cmd(['gclient', 'sync'])	96 if revision_num == '':

110	97 revision_num = search_for_revision(['svn', 'info'])

111 # On Windows, the output directory is marked as "Read Only," which causes an	98 if revision_num == -1:

112 # error to be thrown when we use shutil.rmtree. This helper function changes	99 revision_num = search_for_revision(['git', 'svn', 'info'])

113 # the permissions so we can still delete the directory.	100 _, stderr = self.run_cmd(['python', os.path.join(DART_INSTALL_LOCATION,

114 def on_rm_error(func, path, exc_info):	101 'tools', 'get_archive.py'), 'sdk', '-r', revision_num])

115 if os.path.exists(path):	102 if 'InvalidUriError' in stderr:

116 os.chmod(path, stat.S_IWRITE)	103 return 1

117 os.unlink(path)

118 # TODO(efortuna): building the sdk locally is a band-aid until all build

119 # platform SDKs are hosted in Google storage. Pull from https://sandbox.

120 # google.com/storage/?arg=dart-dump-render-tree/sdk/#dart-dump-render-tree

121 # %2Fsdk eventually.

122 # TODO(efortuna): Currently always building ia32 architecture because we

123 # don't have test statistics for what's passing on x64. Eliminate arch

124 # specification when we have tests running on x64, too.

125 shutil.rmtree(os.path.join(os.getcwd(),

126 utils.GetBuildRoot(utils.GuessOS(), 'release', 'ia32')),

127 onerror=on_rm_error)

128

129 for target in TestRunner.get_build_targets(suites):

130 lines = self.run_cmd([os.path.join('.', 'tools', 'build.py'), '-m',

131 'release', '--arch=ia32', target])

132

133 for line in lines:

134 if 'BUILD FAILED' in lines:

135 # Someone checked in a broken build! Stop trying to make it work

136 # and wait to try again.

137 print 'Broken Build'

138 return 1

139 return 0	104 return 0

140	105

141 def ensure_output_directory(self, dir_name):	106 def ensure_output_directory(self, dir_name):

142 """Test that the listed directory name exists, and if not, create one for	107 """Test that the listed directory name exists, and if not, create one for

143 our output to be placed.	108 our output to be placed.

144	109

145 Args:	110 Args:

146 dir_name: the directory we will create if it does not exist."""	111 dir_name: the directory we will create if it does not exist."""

147 dir_path = os.path.join(DART_INSTALL_LOCATION, 'tools',	112 dir_path = os.path.join(DART_INSTALL_LOCATION, 'tools',

148 'testing', 'perf_testing', dir_name)	113 'testing', 'perf_testing', dir_name)

149 if not os.path.exists(dir_path):	114 if not os.path.exists(dir_path):

150 os.makedirs(dir_path)	115 os.makedirs(dir_path)

151 print 'Creating output directory ', dir_path	116 print 'Creating output directory ', dir_path

152	117

153 def has_new_code(self):	118 def has_new_code(self):

154 """Tests if there are any newer versions of files on the server."""	119 """Tests if there are any newer versions of files on the server."""

155 os.chdir(DART_INSTALL_LOCATION)	120 os.chdir(DART_INSTALL_LOCATION)

156 # Pass 'p' in if we have a new certificate for the svn server, we want to	121 # Pass 'p' in if we have a new certificate for the svn server, we want to

157 # (p)ermanently accept it.	122 # (p)ermanently accept it.

158 results = self.run_cmd(['svn', 'st', '-u'], std_in='p\r\n')	123 results, _ = self.run_cmd(['svn', 'st', '-u'], std_in='p\r\n')

159 for line in results:	124 for line in results:

160 if '*' in line:	125 if '*' in line:

161 return True	126 return True

162 return False	127 return False

163	128

164 def get_os_directory(self):	129 def get_os_directory(self):

165 """Specifies the name of the directory for the testing build of dart, which	130 """Specifies the name of the directory for the testing build of dart, which

166 has yet a different naming convention from utils.getBuildRoot(...)."""	131 has yet a different naming convention from utils.getBuildRoot(...)."""

167 if platform.system() == 'Windows':	132 if platform.system() == 'Windows':

168 return 'windows'	133 return 'windows'

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
204 print ('Error: Invalid suite %s not in ' % name) + \	169 print ('Error: Invalid suite %s not in ' % name) + \

205 '%s' % ','.join(TestBuilder.available_suite_names())	170 '%s' % ','.join(TestBuilder.available_suite_names())

206 sys.exit(1)	171 sys.exit(1)

207 self.suite_names = suites	172 self.suite_names = suites

208 self.no_build = args.no_build	173 self.no_build = args.no_build

209 self.no_upload = args.no_upload	174 self.no_upload = args.no_upload

210 self.no_test = args.no_test	175 self.no_test = args.no_test

211 self.verbose = args.verbose	176 self.verbose = args.verbose

212 return args.continuous	177 return args.continuous

213	178

214 def run_test_sequence(self):	179 def run_test_sequence(self, revision_num='', num_reruns=1):

215 """Run the set of commands to (possibly) build, run, and graph the results	180 """Run the set of commands to (possibly) build, run, and post the results

216 of our tests.	181 of our tests. Returns 0 on a successful run, 1 if we fail to post results or

	182 the run failed, -1 if the build is broken.

217 """	183 """

218 suites = []	184 suites = []

	185 success = True

219 for name in self.suite_names:	186 for name in self.suite_names:

220 suites += [TestBuilder.make_test(name, self)]	187 for run in range(num_reruns):

	188 suites += [TestBuilder.make_test(name, self)]

221	189

222 if not self.no_build and self.sync_and_build(suites) == 1:	190 if not self.no_build and self.sync_and_build(suites, revision_num) == 1:

223 return # The build is broken.	191 return -1 # The build is broken.

224	192

225 for test in suites:	193 for test in suites:

226 test.run()	194 success = success and test.run()

	195 if success:

	196 return 0

	197 else:

	198 return 1

227	199

228	200

229 class Test(object):	201 class Test(object):

230 """The base class to provide shared code for different tests we will run and	202 """The base class to provide shared code for different tests we will run and

231 graph. At a high level, each test has three visitors (the tester, the	203 post. At a high level, each test has three visitors (the tester and the

232 file_processor and the grapher) that perform operations on the test object."""	204 file_processor) that perform operations on the test object."""

233	205

234 def __init__(self, result_folder_name, platform_list, variants,	206 def __init__(self, result_folder_name, platform_list, variants,

235 values_list, test_runner, tester, file_processor, grapher,	207 values_list, test_runner, tester, file_processor,

236 extra_metrics=['Geo-Mean'], build_targets=['create_sdk']):	208 extra_metrics=['Geo-Mean']):

237 """Args:	209 """Args:

238 result_folder_name: The name of the folder where a tracefile of	210 result_folder_name: The name of the folder where a tracefile of

239 performance results will be stored.	211 performance results will be stored.

240 platform_list: A list containing the platform(s) that our data has been	212 platform_list: A list containing the platform(s) that our data has been

241 run on. (command line, firefox, chrome, etc)	213 run on. (command line, firefox, chrome, etc)

242 variants: A list specifying whether we hold data about Frog	214 variants: A list specifying whether we hold data about Frog

243 generated code, plain JS code, or a combination of both, or	215 generated code, plain JS code, or a combination of both, or

244 Dart depending on the test.	216 Dart depending on the test.

245 values_list: A list containing the type of data we will be graphing	217 values_list: A list containing the type of data we will be graphing

246 (benchmarks, percentage passing, etc).	218 (benchmarks, percentage passing, etc).

247 test_runner: Reference to the parent test runner object that notifies a	219 test_runner: Reference to the parent test runner object that notifies a

248 test when to run.	220 test when to run.

249 tester: The visitor that actually performs the test running mechanics.	221 tester: The visitor that actually performs the test running mechanics.

250 file_processor: The visitor that processes files in the format	222 file_processor: The visitor that processes files in the format

251 appropriate for this test.	223 appropriate for this test.

252 grapher: The visitor that generates graphs given our test result data.

253 extra_metrics: A list of any additional measurements we wish to keep	224 extra_metrics: A list of any additional measurements we wish to keep

254 track of (such as the geometric mean of a set, the sum, etc).	225 track of (such as the geometric mean of a set, the sum, etc)."""

255 build_targets: The targets necessary to build to run these tests

256 (default target is create_sdk)."""

257 self.result_folder_name = result_folder_name	226 self.result_folder_name = result_folder_name

258 # cur_time is used as a timestamp of when this performance test was run.	227 # cur_time is used as a timestamp of when this performance test was run.

259 self.cur_time = str(time.mktime(datetime.datetime.now().timetuple()))	228 self.cur_time = str(time.mktime(datetime.datetime.now().timetuple()))

260 self.values_list = values_list	229 self.values_list = values_list

261 self.platform_list = platform_list	230 self.platform_list = platform_list

262 self.test_runner = test_runner	231 self.test_runner = test_runner

263 self.tester = tester	232 self.tester = tester

264 self.file_processor = file_processor	233 self.file_processor = file_processor

265 self.build_targets = build_targets

266 self.revision_dict = dict()	234 self.revision_dict = dict()

267 self.values_dict = dict()	235 self.values_dict = dict()

268 self.grapher = grapher

269 self.extra_metrics = extra_metrics	236 self.extra_metrics = extra_metrics

270 # Initialize our values store.	237 # Initialize our values store.

271 for platform in platform_list:	238 for platform in platform_list:

272 self.revision_dict[platform] = dict()	239 self.revision_dict[platform] = dict()

273 self.values_dict[platform] = dict()	240 self.values_dict[platform] = dict()

274 for f in variants:	241 for f in variants:

275 self.revision_dict[platform][f] = dict()	242 self.revision_dict[platform][f] = dict()

276 self.values_dict[platform][f] = dict()	243 self.values_dict[platform][f] = dict()

277 for val in values_list:	244 for val in values_list:

278 self.revision_dict[platform][f][val] = []	245 self.revision_dict[platform][f][val] = []

279 self.values_dict[platform][f][val] = []	246 self.values_dict[platform][f][val] = []

280 for extra_metric in extra_metrics:	247 for extra_metric in extra_metrics:

281 self.revision_dict[platform][f][extra_metric] = []	248 self.revision_dict[platform][f][extra_metric] = []

282 self.values_dict[platform][f][extra_metric] = []	249 self.values_dict[platform][f][extra_metric] = []

283	250

284 def is_valid_combination(self, platform, variant):	251 def is_valid_combination(self, platform, variant):

285 """Check whether data should be captured for this platform/variant	252 """Check whether data should be captured for this platform/variant

286 combination.	253 combination.

287 """	254 """

288 return True	255 return True

289	256

290 def run(self):	257 def run(self):

291 """Run the benchmarks/tests from the command line and plot the	258 """Run the benchmarks/tests from the command line and plot the

292 results.	259 results.

293 """	260 """

294 for visitor in [self.tester, self.file_processor, self.grapher]:	261 for visitor in [self.tester, self.file_processor]:

295 visitor.prepare()	262 visitor.prepare()

296	263

297 os.chdir(DART_INSTALL_LOCATION)	264 os.chdir(DART_INSTALL_LOCATION)

298 self.test_runner.ensure_output_directory(self.result_folder_name)	265 self.test_runner.ensure_output_directory(self.result_folder_name)

299 self.test_runner.ensure_output_directory(os.path.join(	266 self.test_runner.ensure_output_directory(os.path.join(

300 'old', self.result_folder_name))	267 'old', self.result_folder_name))

301 if not self.test_runner.no_test:	268 if not self.test_runner.no_test:

302 self.tester.run_tests()	269 self.tester.run_tests()

303	270

304 os.chdir(os.path.join('tools', 'testing', 'perf_testing'))	271 os.chdir(os.path.join('tools', 'testing', 'perf_testing'))

305	272

306 for afile in os.listdir(os.path.join('old', self.result_folder_name)):	273 for afile in os.listdir(os.path.join('old', self.result_folder_name)):

307 if not afile.startswith('.'):	274 if not afile.startswith('.'):

308 self.file_processor.process_file(afile, False)	275 self.file_processor.process_file(afile, False)

309	276

310 files = os.listdir(self.result_folder_name)	277 files = os.listdir(self.result_folder_name)

	278 post_success = True

311 for afile in files:	279 for afile in files:

312 if not afile.startswith('.'):	280 if not afile.startswith('.'):

313 should_move_file = self.file_processor.process_file(afile, True)	281 should_move_file = self.file_processor.process_file(afile, True)

314 if should_move_file:	282 if should_move_file:

315 shutil.move(os.path.join(self.result_folder_name, afile),	283 shutil.move(os.path.join(self.result_folder_name, afile),

316 os.path.join('old', self.result_folder_name, afile))	284 os.path.join('old', self.result_folder_name, afile))

	285 else:

	286 post_success = False

317	287

318 if 'plt' in globals():	288 return post_success

319 # Only run Matplotlib if it is installed.

320 self.grapher.plot_results('%s.png' % self.result_folder_name)

321	289

322	290

323 class Tester(object):	291 class Tester(object):

324 """The base level visitor class that runs tests. It contains convenience	292 """The base level visitor class that runs tests. It contains convenience

325 methods that many Tester objects use. Any class that would like to be a	293 methods that many Tester objects use. Any class that would like to be a

326 TesterVisitor must implement the run_tests() method."""	294 TesterVisitor must implement the run_tests() method."""

327	295

328 def __init__(self, test):	296 def __init__(self, test):

329 self.test = test	297 self.test = test

330	298

331 def prepare(self):	299 def prepare(self):

332 """Perform any initial setup required before the test is run."""	300 """Perform any initial setup required before the test is run."""

333 pass	301 pass

334	302

335 def add_svn_revision_to_trace(self, outfile, browser = None):	303 def add_svn_revision_to_trace(self, outfile, browser = None):

336 """Add the svn version number to the provided tracefile."""	304 """Add the svn version number to the provided tracefile."""

337 def search_for_revision(svn_info_command):

338 p = subprocess.Popen(svn_info_command, stdout = subprocess.PIPE,

339 stderr = subprocess.STDOUT, shell =

340 self.test.test_runner.has_shell)

341 output, _ = p.communicate()

342 for line in output.split('\n'):

343 if 'Revision' in line:

344 self.test.test_runner.run_cmd(['echo', line.strip()], outfile)

345 return True

346 return False

347

348 def get_dartium_revision():	305 def get_dartium_revision():

349 version_file_name = os.path.join(DART_INSTALL_LOCATION, 'client', 'tests',	306 version_file_name = os.path.join(DART_INSTALL_LOCATION, 'client', 'tests',

350 'dartium', 'LAST_VERSION')	307 'dartium', 'LAST_VERSION')

351 version_file = open(version_file_name, 'r')	308 version_file = open(version_file_name, 'r')

352 version = version_file.read().split('.')[-2]	309 version = version_file.read().split('.')[-2]

353 version_file.close()	310 version_file.close()

354 return version	311 return version

355	312

356 if browser and browser == 'dartium':	313 if browser and browser == 'dartium':

357 revision = get_dartium_revision()	314 revision = get_dartium_revision()

358 self.test.test_runner.run_cmd(['echo', 'Revision: ' + revision], outfile)	315 self.test.test_runner.run_cmd(['echo', 'Revision: ' + revision], outfile)

359 elif not search_for_revision(['svn', 'info']):	316 else:

360 if not search_for_revision(['git', 'svn', 'info']):	317 revision = search_for_revision(['svn', 'info'])

361 self.test.test_runner.run_cmd(['echo', 'Revision: unknown'], outfile)	318 if revision == -1:

	319 revision = search_for_revision(['git', 'svn', 'info'])

	320 self.test.test_runner.run_cmd(['echo', 'Revision: ' + revision], outfile)

362	321

363	322

364 class Processor(object):	323 class Processor(object):

365 """The base level vistor class that processes tests. It contains convenience	324 """The base level vistor class that processes tests. It contains convenience

366 methods that many File Processor objects use. Any class that would like to be	325 methods that many File Processor objects use. Any class that would like to be

367 a ProcessorVisitor must implement the process_file() method."""	326 a ProcessorVisitor must implement the process_file() method."""

368	327

369 SCORE = 'Score'	328 SCORE = 'Score'

370 COMPILE_TIME = 'CompileTime'	329 COMPILE_TIME = 'CompileTime'

371 CODE_SIZE = 'CodeSize'	330 CODE_SIZE = 'CodeSize'

(...skipping 30 matching lines...) Expand all Loading...
402 dartium).	361 dartium).

403	362

404 Returns: True if the post was successful file."""	363 Returns: True if the post was successful file."""

405 return post_results.report_results(benchmark_name, score, platform, variant,	364 return post_results.report_results(benchmark_name, score, platform, variant,

406 revision_number, metric)	365 revision_number, metric)

407	366

408 def calculate_geometric_mean(self, platform, variant, svn_revision):	367 def calculate_geometric_mean(self, platform, variant, svn_revision):

409 """Calculate the aggregate geometric mean for JS and frog benchmark sets,	368 """Calculate the aggregate geometric mean for JS and frog benchmark sets,

410 given two benchmark dictionaries."""	369 given two benchmark dictionaries."""

411 geo_mean = 0	370 geo_mean = 0

412 # TODO(vsm): Suppress graphing this combination altogether. For

413 # now, we feed a geomean of 0.

414 if self.test.is_valid_combination(platform, variant):	371 if self.test.is_valid_combination(platform, variant):

415 for benchmark in self.test.values_list:	372 for benchmark in self.test.values_list:

416 geo_mean += math.log(	373 geo_mean += math.log(

417 self.test.values_dict[platform][variant][benchmark][	374 self.test.values_dict[platform][variant][benchmark][

418 len(self.test.values_dict[platform][variant][benchmark]) - 1])	375 len(self.test.values_dict[platform][variant][benchmark]) - 1])

419	376

420 self.test.values_dict[platform][variant]['Geo-Mean'] += \	377 self.test.values_dict[platform][variant]['Geo-Mean'] += \

421 [math.pow(math.e, geo_mean / len(self.test.values_list))]	378 [math.pow(math.e, geo_mean / len(self.test.values_list))]

422 self.test.revision_dict[platform][variant]['Geo-Mean'] += [svn_revision]	379 self.test.revision_dict[platform][variant]['Geo-Mean'] += [svn_revision]

423

424

425 class Grapher(object):

426 """The base level visitor class that generates graphs for data. It contains

427 convenience methods that many Grapher objects use. Any class that would like

428 to be a GrapherVisitor must implement the plot_results() method."""

429	380

430 graph_out_dir = 'graphs'	381 def get_score_type(self, benchmark_name):

	382 """Determine the type of score for posting -- default is 'Score' (aka

	383 Runtime), other options are CompileTime and CodeSize."""

	384 return self.SCORE

431	385

432 def __init__(self, test):

433 self.color_index = 0

434 self.test = test

435

436 def prepare(self):

437 """Perform any initial setup required before the test is run."""

438 if 'plt' in globals():

439 plt.cla() # cla = clear current axes

440 else:

441 print 'Unable to import Matplotlib and therefore unable to generate ' + \

442 'graphs. Please install it for this version of Python.'

443 self.test.test_runner.ensure_output_directory(Grapher.graph_out_dir)

444

445 def style_and_save_perf_plot(self, chart_title, y_axis_label, size_x, size_y,

446 legend_loc, filename, platform_list, variants,

447 values_list, should_clear_axes=True):

448 """Sets style preferences for chart boilerplate that is consistent across

449 all charts, and saves the chart as a png.

450 Args:

451 size_x: the size of the printed chart, in inches, in the horizontal

452 direction

453 size_y: the size of the printed chart, in inches in the vertical direction

454 legend_loc: the location of the legend in on the chart. See suitable

455 arguments for the loc argument in matplotlib

456 filename: the filename that we want to save the resulting chart as

457 platform_list: a list containing the platform(s) that our data has been

458 run on. (command line, firefox, chrome, etc)

459 values_list: a list containing the type of data we will be graphing

460 (performance, percentage passing, etc)

461 should_clear_axes: True if we want to create a fresh graph, instead of

462 plotting additional lines on the current graph."""

463 if should_clear_axes:

464 plt.cla() # cla = clear current axes

465 for platform in platform_list:

466 for f in variants:

467 for val in values_list:

468 plt.plot(self.test.revision_dict[platform][f][val],

469 self.test.values_dict[platform][f][val],

470 color=self.get_color(), label='%s-%s-%s' % (platform, f, val))

471

472 plt.xlabel('Revision Number')

473 plt.ylabel(y_axis_label)

474 plt.title(chart_title)

475 fontP = FontProperties()

476 fontP.set_size('small')

477 plt.legend(loc=legend_loc, prop = fontP)

478

479 fig = plt.gcf()

480 fig.set_size_inches(size_x, size_y)

481 fig.savefig(os.path.join(Grapher.graph_out_dir, filename))

482

483 def get_color(self):

484 # Just a bunch of distinct colors for a potentially large number of values

485 # we wish to graph.

486 colors = [

487 'blue', 'green', 'red', 'cyan', 'magenta', 'black', '#3366CC',

488 '#DC3912', '#FF9900', '#109618', '#990099', '#0099C6', '#DD4477',

489 '#66AA00', '#B82E2E', '#316395', '#994499', '#22AA99', '#AAAA11',

490 '#6633CC', '#E67300', '#8B0707', '#651067', '#329262', '#5574A6',

491 '#3B3EAC', '#B77322', '#16D620', '#B91383', '#F4359E', '#9C5935',

492 '#A9C413', '#2A778D', '#668D1C', '#BEA413', '#0C5922', '#743411',

493 '#45AFE2', '#FF3300', '#FFCC00', '#14C21D', '#DF51FD', '#15CBFF',

494 '#FF97D2', '#97FB00', '#DB6651', '#518BC6', '#BD6CBD', '#35D7C2',

495 '#E9E91F', '#9877DD', '#FF8F20', '#D20B0B', '#B61DBA', '#40BD7E',

496 '#6AA7C4', '#6D70CD', '#DA9136', '#2DEA36', '#E81EA6', '#F558AE',

497 '#C07145', '#D7EE53', '#3EA7C6', '#97D129', '#E9CA1D', '#149638',

498 '#C5571D']

499 color = colors[self.color_index]

500 self.color_index = (self.color_index + 1) % len(colors)

501 return color

502	386

503 class RuntimePerformanceTest(Test):	387 class RuntimePerformanceTest(Test):

504 """Super class for all runtime performance testing."""	388 """Super class for all runtime performance testing."""

505	389

506 def __init__(self, result_folder_name, platform_list, platform_type,	390 def __init__(self, result_folder_name, platform_list, platform_type,

507 versions, benchmarks, test_runner, tester, file_processor,	391 versions, benchmarks, test_runner, tester, file_processor):

508 build_targets=['create_sdk']):

509 """Args:	392 """Args:

510 result_folder_name: The name of the folder where a tracefile of	393 result_folder_name: The name of the folder where a tracefile of

511 performance results will be stored.	394 performance results will be stored.

512 platform_list: A list containing the platform(s) that our data has been	395 platform_list: A list containing the platform(s) that our data has been

513 run on. (command line, firefox, chrome, etc)	396 run on. (command line, firefox, chrome, etc)

514 variants: A list specifying whether we hold data about Frog	397 variants: A list specifying whether we hold data about Frog

515 generated code, plain JS code, or a combination of both, or	398 generated code, plain JS code, or a combination of both, or

516 Dart depending on the test.	399 Dart depending on the test.

517 values_list: A list containing the type of data we will be graphing	400 values_list: A list containing the type of data we will be graphing

518 (benchmarks, percentage passing, etc).	401 (benchmarks, percentage passing, etc).

519 test_runner: Reference to the parent test runner object that notifies a	402 test_runner: Reference to the parent test runner object that notifies a

520 test when to run.	403 test when to run.

521 tester: The visitor that actually performs the test running mechanics.	404 tester: The visitor that actually performs the test running mechanics.

522 file_processor: The visitor that processes files in the format	405 file_processor: The visitor that processes files in the format

523 appropriate for this test.	406 appropriate for this test.

524 grapher: The visitor that generates graphs given our test result data.

525 extra_metrics: A list of any additional measurements we wish to keep	407 extra_metrics: A list of any additional measurements we wish to keep

526 track of (such as the geometric mean of a set, the sum, etc).	408 track of (such as the geometric mean of a set, the sum, etc)."""

527 build_targets: The targets necessary to build to run these tests

528 (default target is create_sdk)."""

529 super(RuntimePerformanceTest, self).__init__(result_folder_name,	409 super(RuntimePerformanceTest, self).__init__(result_folder_name,

530 platform_list, versions, benchmarks, test_runner, tester,	410 platform_list, versions, benchmarks, test_runner, tester,

531 file_processor, RuntimePerfGrapher(self),	411 file_processor)

532 build_targets=build_targets)

533 self.platform_list = platform_list	412 self.platform_list = platform_list

534 self.platform_type = platform_type	413 self.platform_type = platform_type

535 self.versions = versions	414 self.versions = versions

536 self.benchmarks = benchmarks	415 self.benchmarks = benchmarks

537	416

538 class RuntimePerfGrapher(Grapher):

539 def plot_all_perf(self, png_filename):

540 """Create a plot that shows the performance changes of individual

541 benchmarks run by JS and generated by frog, over svn history."""

542 for benchmark in self.test.benchmarks:

543 self.style_and_save_perf_plot(

544 'Performance of %s over time on the %s on %s' % (benchmark,

545 self.test.platform_type, utils.GuessOS()),

546 'Speed (bigger = better)', 16, 14, 'lower left',

547 benchmark + png_filename, self.test.platform_list,

548 self.test.versions, [benchmark])

549

550 def plot_avg_perf(self, png_filename, platforms=None, versions=None):

551 """Generate a plot that shows the performance changes of the geomentric

552 mean of JS and frog benchmark performance over svn history."""

553 if platforms == None:

554 platforms = self.test.platform_list

555 if versions == None:

556 versions = self.test.versions

557 (title, y_axis, size_x, size_y, loc, filename) = \

558 ('Geometric Mean of benchmark %s performance on %s ' %

559 (self.test.platform_type, utils.GuessOS()), 'Speed (bigger = better)',

560 16, 5, 'lower left', 'avg'+png_filename)

561 clear_axis = True

562 for platform in platforms:

563 for version in versions:

564 if self.test.is_valid_combination(platform, version):

565 for metric in self.test.extra_metrics:

566 self.style_and_save_perf_plot(title, y_axis, size_x, size_y, loc,

567 filename, [platform], [version],

568 [metric], clear_axis)

569 clear_axis = False

570

571 def plot_results(self, png_filename):

572 self.plot_all_perf(png_filename)

573 self.plot_avg_perf('2' + png_filename)

574	417

575 class BrowserTester(Tester):	418 class BrowserTester(Tester):

576 @staticmethod	419 @staticmethod

577 def get_browsers(add_dartium=True):	420 def get_browsers(add_dartium=True):

578 browsers = ['ff', 'chrome']	421 browsers = ['ff', 'chrome']

579 if add_dartium:	422 if add_dartium:

580 browsers += ['dartium']	423 browsers += ['dartium']

581 has_shell = False	424 has_shell = False

582 if platform.system() == 'Darwin':	425 if platform.system() == 'Darwin':

583 browsers += ['safari']	426 browsers += ['safari']

584 if platform.system() == 'Windows':	427 if platform.system() == 'Windows':

585 browsers += ['ie']	428 browsers += ['ie']

586 has_shell = True	429 has_shell = True

587 if 'dartium' in browsers:	430 if 'dartium' in browsers:

588 # Fetch it if necessary.	431 # Fetch it if necessary.

589 get_dartium = ['python',	432 get_dartium = ['python',

590 os.path.join(DART_INSTALL_LOCATION, 'tools', 'get_drt.py'),	433 os.path.join(DART_INSTALL_LOCATION, 'tools',

591 '--dartium']	434 'get_archive.py'), 'dartium']

592 # TODO(vsm): It's inconvenient that run_cmd isn't in scope here.	435 # TODO(vsm): It's inconvenient that run_cmd isn't in scope here.

593 # Perhaps there is a better place to put that or this.	436 # Perhaps there is a better place to put that or this.

594 subprocess.call(get_dartium, stdout=sys.stdout, stderr=sys.stderr,	437 subprocess.call(get_dartium, stdout=sys.stdout, stderr=sys.stderr,

595 shell=has_shell)	438 shell=has_shell)

596 return browsers	439 return browsers

597	440

598	441

599 class CommonBrowserTest(RuntimePerformanceTest):	442 class CommonBrowserTest(RuntimePerformanceTest):

600 """Runs this basic performance tests (Benchpress, some V8 benchmarks) in the	443 """Runs this basic performance tests (Benchpress, some V8 benchmarks) in the

601 browser."""	444 browser."""

(...skipping 15 matching lines...) Expand all Loading...
617 @staticmethod	460 @staticmethod

618 def get_standalone_benchmarks():	461 def get_standalone_benchmarks():

619 return ['Mandelbrot', 'DeltaBlue', 'Richards', 'NBody', 'BinaryTrees',	462 return ['Mandelbrot', 'DeltaBlue', 'Richards', 'NBody', 'BinaryTrees',

620 'Fannkuch', 'Meteor', 'BubbleSort', 'Fibonacci', 'Loop', 'Permute',	463 'Fannkuch', 'Meteor', 'BubbleSort', 'Fibonacci', 'Loop', 'Permute',

621 'Queens', 'QuickSort', 'Recurse', 'Sieve', 'Sum', 'Tak', 'Takl', 'Towers',	464 'Queens', 'QuickSort', 'Recurse', 'Sieve', 'Sum', 'Tak', 'Takl', 'Towers',

622 'TreeSort']	465 'TreeSort']

623	466

624 class CommonBrowserTester(BrowserTester):	467 class CommonBrowserTester(BrowserTester):

625 def run_tests(self):	468 def run_tests(self):

626 """Run a performance test in the browser."""	469 """Run a performance test in the browser."""

627 os.chdir('frog')	470 self.test.test_runner.run_cmd([

628 self.test.test_runner.run_cmd(['python', os.path.join('benchmarks',	471 'python', os.path.join('internal', 'browserBenchmarks',

629 'make_web_benchmarks.py')])	472 'make_web_benchmarks.py')])

630 os.chdir('..')

631	473

632 for browser in self.test.platform_list:	474 for browser in self.test.platform_list:

633 for version in self.test.versions:	475 for version in self.test.versions:

634 if not self.test.is_valid_combination(browser, version):	476 if not self.test.is_valid_combination(browser, version):

635 continue	477 continue

636 self.test.trace_file = os.path.join(	478 self.test.trace_file = os.path.join(

637 'tools', 'testing', 'perf_testing', self.test.result_folder_name,	479 'tools', 'testing', 'perf_testing', self.test.result_folder_name,

638 'perf-%s-%s-%s' % (self.test.cur_time, browser, version))	480 'perf-%s-%s-%s' % (self.test.cur_time, browser, version))

639 self.add_svn_revision_to_trace(self.test.trace_file, browser)	481 self.add_svn_revision_to_trace(self.test.trace_file, browser)

640 file_path = os.path.join(	482 file_path = os.path.join(

641 os.getcwd(), 'internal', 'browserBenchmarks',	483 os.getcwd(), 'internal', 'browserBenchmarks',

642 'benchmark_page_%s.html' % version)	484 'benchmark_page_%s.html' % version)

643 self.test.test_runner.run_cmd(	485 self.test.test_runner.run_cmd(

644 ['python', os.path.join('tools', 'testing', 'run_selenium.py'),	486 ['python', os.path.join('tools', 'testing', 'run_selenium.py'),

645 '--out', file_path, '--browser', browser,	487 '--out', file_path, '--browser', browser,

646 '--timeout', '600', '--mode', 'perf'], self.test.trace_file,	488 '--timeout', '600', '--mode', 'perf'], self.test.trace_file,

647 append=True)	489 append=True)

648	490

649 class CommonBrowserFileProcessor(Processor):	491 class CommonBrowserFileProcessor(Processor):

	492

650 def process_file(self, afile, should_post_file):	493 def process_file(self, afile, should_post_file):

651 """Comb through the html to find the performance results.	494 """Comb through the html to find the performance results.

652 Returns: True if we successfully posted our data to storage and/or we can	495 Returns: True if we successfully posted our data to storage and/or we can

653 delete the trace file."""	496 delete the trace file."""

654 os.chdir(os.path.join(DART_INSTALL_LOCATION, 'tools',	497 os.chdir(os.path.join(DART_INSTALL_LOCATION, 'tools',

655 'testing', 'perf_testing'))	498 'testing', 'perf_testing'))

656 parts = afile.split('-')	499 parts = afile.split('-')

657 browser = parts[2]	500 browser = parts[2]

658 version = parts[3]	501 version = parts[3]

659 f = self.open_trace_file(afile, should_post_file)	502 f = self.open_trace_file(afile, should_post_file)

(...skipping 27 matching lines...) Expand all Loading...
687 break	530 break

688 name = name_and_score[0].strip()	531 name = name_and_score[0].strip()

689 score = name_and_score[1].strip()	532 score = name_and_score[1].strip()

690 if version == 'js' or version == 'v8':	533 if version == 'js' or version == 'v8':

691 version = 'js'	534 version = 'js'

692 bench_dict = self.test.values_dict[browser][version]	535 bench_dict = self.test.values_dict[browser][version]

693 bench_dict[name] += [float(score)]	536 bench_dict[name] += [float(score)]

694 self.test.revision_dict[browser][version][name] += [revision_num]	537 self.test.revision_dict[browser][version][name] += [revision_num]

695 if not self.test.test_runner.no_upload and should_post_file:	538 if not self.test.test_runner.no_upload and should_post_file:

696 upload_success = upload_success and self.report_results(	539 upload_success = upload_success and self.report_results(

697 name, score, browser, version, revision_num, self.SCORE)	540 name, score, browser, version, revision_num,

	541 self.get_score_type(name))

698 else:	542 else:

699 upload_success = False	543 upload_success = False

700	544

701 f.close()	545 f.close()

702 self.calculate_geometric_mean(browser, version, revision_num)	546 self.calculate_geometric_mean(browser, version, revision_num)

703 return upload_success	547 return upload_success

704	548

705	549

706 class DromaeoTester(Tester):	550 class DromaeoTester(Tester):

707 DROMAEO_BENCHMARKS = {	551 DROMAEO_BENCHMARKS = {

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
771 @staticmethod	615 @staticmethod

772 def get_dromaeo_versions():	616 def get_dromaeo_versions():

773 return ['js', 'frog_dom', 'frog_html', 'dart2js_dom', 'dart2js_html']	617 return ['js', 'frog_dom', 'frog_html', 'dart2js_dom', 'dart2js_html']

774	618

775	619

776 class DromaeoTest(RuntimePerformanceTest):	620 class DromaeoTest(RuntimePerformanceTest):

777 """Runs Dromaeo tests, in the browser."""	621 """Runs Dromaeo tests, in the browser."""

778 def __init__(self, test_runner):	622 def __init__(self, test_runner):

779 super(DromaeoTest, self).__init__(	623 super(DromaeoTest, self).__init__(

780 self.name(),	624 self.name(),

781 filter(lambda x: x != 'ie', BrowserTester.get_browsers()),	625 BrowserTester.get_browsers(),

782 'browser',	626 'browser',

783 DromaeoTester.get_dromaeo_versions(),	627 DromaeoTester.get_dromaeo_versions(),

784 DromaeoTester.get_dromaeo_benchmarks(), test_runner,	628 DromaeoTester.get_dromaeo_benchmarks(), test_runner,

785 self.DromaeoPerfTester(self),	629 self.DromaeoPerfTester(self),

786 self.DromaeoFileProcessor(self))	630 self.DromaeoFileProcessor(self))

787 # TODO(vsm): These tester/grapher/processor classes should be

788 # cleaner to override.

789 self.grapher = self.DromaeoPerfGrapher(self)

790	631

791 @staticmethod	632 @staticmethod

792 def name():	633 def name():

793 return 'dromaeo'	634 return 'dromaeo'

794	635

795 def is_valid_combination(self, browser, version):	636 def is_valid_combination(self, browser, version):

796 # TODO(vsm): This avoids a bug in 32-bit Chrome (dartium)	637 # TODO(vsm): This avoids a bug in 32-bit Chrome (dartium)

797 # running JS dromaeo.	638 # running JS dromaeo.

798 if browser == 'dartium' and version == 'js':	639 if browser == 'dartium' and version == 'js':

799 return False	640 return False

800 # dart:dom has been removed from Dartium.	641 # dart:dom has been removed from Dartium.

801 if browser == 'dartium' and 'dom' in version:	642 if browser == 'dartium' and 'dom' in version:

802 return False	643 return False

803 if browser == 'ff':

804 # TODO(vsm): We are waiting on a fix from Issue 3152 from dart2js.

805 return False

806 return True	644 return True

807	645

808 class DromaeoPerfGrapher(RuntimePerfGrapher):

809 def plot_results(self, png_filename):

810 self.plot_all_perf(png_filename)

811 self.plot_avg_perf('2' + png_filename)

812 self.plot_avg_perf('3' + png_filename, ['chrome', 'dartium'],

813 ['js', 'frog_dom', 'frog_html'])

814 self.plot_avg_perf('4' + png_filename, ['chrome'],

815 ['js', 'frog_dom', 'dart2js_dom'])

816 self.plot_avg_perf('5' + png_filename, ['chrome'],

817 ['js', 'dart2js_dom', 'dart2js_html'])

818 self.plot_avg_perf('6' + png_filename, ['chrome'],

819 ['js', 'frog_dom', 'frog_html', 'dart2js_dom',

820 'dart2js_html'])

821	646

822 class DromaeoPerfTester(DromaeoTester):	647 class DromaeoPerfTester(DromaeoTester):

823 def move_chrome_driver_if_needed(self, browser):	648 def move_chrome_driver_if_needed(self, browser):

824 """Move the appropriate version of ChromeDriver onto the path.	649 """Move the appropriate version of ChromeDriver onto the path.

825 TODO(efortuna): This is a total hack because the latest version of Chrome	650 TODO(efortuna): This is a total hack because the latest version of Chrome

826 (Dartium builds) requires a different version of ChromeDriver, that is	651 (Dartium builds) requires a different version of ChromeDriver, that is

827 incompatible with the release or beta Chrome and vice versa. Remove these	652 incompatible with the release or beta Chrome and vice versa. Remove these

828 shenanigans once we're back to both versions of Chrome using the same	653 shenanigans once we're back to both versions of Chrome using the same

829 version of ChromeDriver. IMPORTANT NOTE: This assumes your chromedriver is	654 version of ChromeDriver. IMPORTANT NOTE: This assumes your chromedriver is

830 in the default location (inside depot_tools).	655 in the default location (inside depot_tools).

831 """	656 """

832 current_dir = os.getcwd()	657 current_dir = os.getcwd()

833 os.chdir(DART_INSTALL_LOCATION)	658 os.chdir(DART_INSTALL_LOCATION)

834 self.test.test_runner.run_cmd(['python', os.path.join(	659 self.test.test_runner.run_cmd(['python', os.path.join(

835 'tools', 'get_drt.py'), '--chromedriver'])	660 'tools', 'get_archive.py'), 'chromedriver'])

836 path = os.environ['PATH'].split(os.pathsep)	661 path = os.environ['PATH'].split(os.pathsep)

837 orig_chromedriver_path = os.path.join('tools', 'testing',	662 orig_chromedriver_path = os.path.join('tools', 'testing',

838 'orig-chromedriver')	663 'orig-chromedriver')

839 dartium_chromedriver_path = os.path.join('tools', 'testing',	664 dartium_chromedriver_path = os.path.join('tools', 'testing',

840 'dartium-chromedriver')	665 'dartium-chromedriver')

841 extension = ''	666 extension = ''

842 if platform.system() == 'Windows':	667 if platform.system() == 'Windows':

843 extension = '.exe'	668 extension = '.exe'

844	669

845 def move_chromedriver(depot_tools, copy_to_depot_tools_dir=True,	670 def move_chromedriver(depot_tools, copy_to_depot_tools_dir=True,

(...skipping 15 matching lines...) Expand all Loading...
861 shutil.copyfile(from_dir, to_dir)	686 shutil.copyfile(from_dir, to_dir)

862	687

863 for loc in path:	688 for loc in path:

864 if 'depot_tools' in loc:	689 if 'depot_tools' in loc:

865 if browser == 'chrome':	690 if browser == 'chrome':

866 if os.path.exists(orig_chromedriver_path):	691 if os.path.exists(orig_chromedriver_path):

867 move_chromedriver(loc)	692 move_chromedriver(loc)

868 elif browser == 'dartium':	693 elif browser == 'dartium':

869 if not os.path.exists(dartium_chromedriver_path):	694 if not os.path.exists(dartium_chromedriver_path):

870 self.test.test_runner.run_cmd(['python',	695 self.test.test_runner.run_cmd(['python',

871 os.path.join('tools', 'get_drt.py'), '--chromedriver'])	696 os.path.join('tools', 'get_archive.py'), 'chromedriver'])

872 # Move original chromedriver for storage.	697 # Move original chromedriver for storage.

873 if not os.path.exists(orig_chromedriver_path):	698 if not os.path.exists(orig_chromedriver_path):

874 move_chromedriver(loc, copy_to_depot_tools_dir=False)	699 move_chromedriver(loc, copy_to_depot_tools_dir=False)

875 # Copy Dartium chromedriver into depot_tools	700 # Copy Dartium chromedriver into depot_tools

876 move_chromedriver(loc, from_path=os.path.join(	701 move_chromedriver(loc, from_path=os.path.join(

877 dartium_chromedriver_path, 'chromedriver'))	702 dartium_chromedriver_path, 'chromedriver'))

878 os.chdir(current_dir)	703 os.chdir(current_dir)

879	704

880 def run_tests(self):	705 def run_tests(self):

881 """Run dromaeo in the browser."""	706 """Run dromaeo in the browser."""

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
951 if results:	776 if results:

952 for result in results:	777 for result in results:

953 r = re.match(result_pattern, result)	778 r = re.match(result_pattern, result)

954 name = DromaeoTester.legalize_filename(r.group(1).strip(':'))	779 name = DromaeoTester.legalize_filename(r.group(1).strip(':'))

955 score = float(r.group(2))	780 score = float(r.group(2))

956 bench_dict[name] += [float(score)]	781 bench_dict[name] += [float(score)]

957 self.test.revision_dict[browser][version][name] += \	782 self.test.revision_dict[browser][version][name] += \

958 [revision_num]	783 [revision_num]

959 if not self.test.test_runner.no_upload and should_post_file:	784 if not self.test.test_runner.no_upload and should_post_file:

960 upload_success = upload_success and self.report_results(	785 upload_success = upload_success and self.report_results(

961 name, score, browser, version, revision_num, self.SCORE)	786 name, score, browser, version, revision_num,

	787 self.get_score_type(name))

962 else:	788 else:

963 upload_success = False	789 upload_success = False

964	790

965 f.close()	791 f.close()

966 self.calculate_geometric_mean(browser, version, revision_num)	792 self.calculate_geometric_mean(browser, version, revision_num)

967 return upload_success	793 return upload_success

968	794

969	795

970 class DromaeoSizeTest(Test):	796 class DromaeoSizeTest(Test):

971 """Run tests to determine the compiled file output size of Dromaeo."""	797 """Run tests to determine the compiled file output size of Dromaeo."""

972 def __init__(self, test_runner):	798 def __init__(self, test_runner):

973 super(DromaeoSizeTest, self).__init__(	799 super(DromaeoSizeTest, self).__init__(

974 self.name(),	800 self.name(),

975 ['commandline'], ['dart', 'frog_dom', 'frog_html',	801 ['commandline'], ['dart', 'frog_dom', 'frog_html',

976 'frog_htmlidiomatic'],	802 'frog_htmlidiomatic'],

977 DromaeoTester.DROMAEO_BENCHMARKS.keys(), test_runner,	803 DromaeoTester.DROMAEO_BENCHMARKS.keys(), test_runner,

978 self.DromaeoSizeTester(self),	804 self.DromaeoSizeTester(self),

979 self.DromaeoSizeProcessor(self),	805 self.DromaeoSizeProcessor(self), extra_metrics=['sum'])

980 self.DromaeoSizeGrapher(self), extra_metrics=['sum'])

981	806

982 @staticmethod	807 @staticmethod

983 def name():	808 def name():

984 return 'dromaeo-size'	809 return 'dromaeo-size'

985	810

986	811

987 class DromaeoSizeTester(DromaeoTester):	812 class DromaeoSizeTester(DromaeoTester):

988 def run_tests(self):	813 def run_tests(self):

989 # Build tests.	814 # Build tests.

990 dromaeo_path = os.path.join('samples', 'third_party', 'dromaeo')	815 dromaeo_path = os.path.join('samples', 'third_party', 'dromaeo')

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1078 if num.find('.') == -1:	903 if num.find('.') == -1:

1079 num = int(num)	904 num = int(num)

1080 else:	905 else:

1081 num = float(num)	906 num = float(num)

1082 self.test.values_dict['commandline'][variant][metric] += [num]	907 self.test.values_dict['commandline'][variant][metric] += [num]

1083 self.test.revision_dict['commandline'][variant][metric] += \	908 self.test.revision_dict['commandline'][variant][metric] += \

1084 [revision_num]	909 [revision_num]

1085 if not self.test.test_runner.no_upload and should_post_file:	910 if not self.test.test_runner.no_upload and should_post_file:

1086 upload_success = upload_success and self.report_results(	911 upload_success = upload_success and self.report_results(

1087 metric, num, 'commandline', variant, revision_num,	912 metric, num, 'commandline', variant, revision_num,

1088 self.CODE_SIZE)	913 self.get_score_type(metric))

1089 else:	914 else:

1090 upload_success = False	915 upload_success = False

1091	916

1092 f.close()	917 f.close()

1093 return upload_success	918 return upload_success

	919

	920 def get_score_type(self, metric):

	921 return self.CODE_SIZE

1094	922

1095 class DromaeoSizeGrapher(Grapher):

1096 def plot_results(self, png_filename):

1097 self.style_and_save_perf_plot(

1098 'Compiled Dromaeo Sizes',

1099 'Size (in bytes)', 10, 10, 'lower left', png_filename,

1100 ['commandline'],

1101 ['dart', 'frog_dom', 'frog_html', 'frog_htmlidiomatic'],

1102 DromaeoTester.DROMAEO_BENCHMARKS.keys())

1103

1104 self.style_and_save_perf_plot(

1105 'Compiled Dromaeo Sizes',

1106 'Size (in bytes)', 10, 10, 'lower left', '2' + png_filename,

1107 ['commandline'],

1108 ['dart', 'frog_dom', 'frog_html', 'frog_htmlidiomatic'],

1109 [self.test.extra_metrics[0]])

1110	923

1111 class CompileTimeAndSizeTest(Test):	924 class CompileTimeAndSizeTest(Test):

1112 """Run tests to determine how long frogc takes to compile, and the compiled	925 """Run tests to determine how long frogc takes to compile, and the compiled

1113 file output size of some benchmarking files.	926 file output size of some benchmarking files.

1114 Note: This test is now 'deprecated' since frog is no longer in the sdk. We	927 Note: This test is now 'deprecated' since frog is no longer in the sdk. We

1115 just return the last numbers found for frog."""	928 just return the last numbers found for frog."""

1116 def __init__(self, test_runner):	929 def __init__(self, test_runner):

1117 """Reference to the test_runner object that notifies us when to begin	930 """Reference to the test_runner object that notifies us when to begin

1118 testing."""	931 testing."""

1119 super(CompileTimeAndSizeTest, self).__init__(	932 super(CompileTimeAndSizeTest, self).__init__(

1120 self.name(), ['commandline'], ['frog'], ['swarm', 'total'],	933 self.name(), ['commandline'], ['frog'], ['swarm', 'total'],

1121 test_runner, self.CompileTester(self),	934 test_runner, self.CompileTester(self),

1122 self.CompileProcessor(self), self.CompileGrapher(self))	935 self.CompileProcessor(self))

1123 self.dart_compiler = os.path.join(	936 self.dart_compiler = os.path.join(

1124 DART_INSTALL_LOCATION, utils.GetBuildRoot(utils.GuessOS(),	937 DART_INSTALL_LOCATION, utils.GetBuildRoot(utils.GuessOS(),

1125 'release', 'ia32'), 'dart-sdk', 'bin', 'frogc')	938 'release', 'ia32'), 'dart-sdk', 'bin', 'frogc')

1126 _suffix = ''	939 _suffix = ''

1127 if platform.system() == 'Windows':	940 if platform.system() == 'Windows':

1128 _suffix = '.exe'	941 _suffix = '.exe'

1129 self.dart_vm = os.path.join(	942 self.dart_vm = os.path.join(

1130 DART_INSTALL_LOCATION, utils.GetBuildRoot(utils.GuessOS(),	943 DART_INSTALL_LOCATION, utils.GetBuildRoot(utils.GuessOS(),

1131 'release', 'ia32'), 'dart-sdk', 'bin','dart' + _suffix)	944 'release', 'ia32'), 'dart-sdk', 'bin','dart' + _suffix)

1132 self.failure_threshold = {'swarm' : 100, 'total' : 100}	945 self.failure_threshold = {'swarm' : 100, 'total' : 100}

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1190 for metric in self.test.values_list:	1003 for metric in self.test.values_list:

1191 if metric in line:	1004 if metric in line:

1192 num = tokens[0]	1005 num = tokens[0]

1193 if num.find('.') == -1:	1006 if num.find('.') == -1:

1194 num = int(num)	1007 num = int(num)

1195 else:	1008 else:

1196 num = float(num)	1009 num = float(num)

1197 self.test.values_dict['commandline']['frog'][metric] += [num]	1010 self.test.values_dict['commandline']['frog'][metric] += [num]

1198 self.test.revision_dict['commandline']['frog'][metric] += \	1011 self.test.revision_dict['commandline']['frog'][metric] += \

1199 [revision_num]	1012 [revision_num]

1200 score_type = self.CODE_SIZE	1013 score_type = self.get_score_type(metric)

1201 if 'Compiling' in metric or 'Bootstrapping' in metric:

1202 score_type = self.COMPILE_TIME

1203 if not self.test.test_runner.no_upload and should_post_file:	1014 if not self.test.test_runner.no_upload and should_post_file:

1204 if num < self.test.failure_threshold[metric]:	1015 if num < self.test.failure_threshold[metric]:

1205 num = 0	1016 num = 0

1206 upload_success = upload_success and self.report_results(	1017 upload_success = upload_success and self.report_results(

1207 metric, num, 'commandline', 'frog', revision_num,	1018 metric, num, 'commandline', 'frog', revision_num,

1208 score_type)	1019 score_type)

1209 else:	1020 else:

1210 upload_success = False	1021 upload_success = False

1211 if revision_num != 0:	1022 if revision_num != 0:

1212 for metric in self.test.values_list:	1023 for metric in self.test.values_list:

1213 try:	1024 try:

1214 self.test.revision_dict['commandline']['frog'][metric].pop()	1025 self.test.revision_dict['commandline']['frog'][metric].pop()

1215 self.test.revision_dict['commandline']['frog'][metric] += \	1026 self.test.revision_dict['commandline']['frog'][metric] += \

1216 [revision_num]	1027 [revision_num]

1217 # Fill in 0 if compilation failed.	1028 # Fill in 0 if compilation failed.

1218 if self.test.values_dict['commandline']['frog'][metric][-1] < \	1029 if self.test.values_dict['commandline']['frog'][metric][-1] < \

1219 self.test.failure_threshold[metric]:	1030 self.test.failure_threshold[metric]:

1220 self.test.values_dict['commandline']['frog'][metric] += [0]	1031 self.test.values_dict['commandline']['frog'][metric] += [0]

1221 self.test.revision_dict['commandline']['frog'][metric] += \	1032 self.test.revision_dict['commandline']['frog'][metric] += \

1222 [revision_num]	1033 [revision_num]

1223 except IndexError:	1034 except IndexError:

1224 # We tried to pop from an empty list. This happens if the first	1035 # We tried to pop from an empty list. This happens if the first

1225 # trace file we encounter is incomplete.	1036 # trace file we encounter is incomplete.

1226 pass	1037 pass

1227	1038

1228 f.close()	1039 f.close()

1229 return upload_success	1040 return upload_success

1230	1041

1231 class CompileGrapher(Grapher):	1042 def get_score_type(self, metric):

1232	1043 if 'Compiling' in metric or 'Bootstrapping' in metric:

1233 def plot_results(self, png_filename):» »	1044 return self.COMPILE_TIME

1234 self.style_and_save_perf_plot(» »	1045 return self.CODE_SIZE

1235 'Compiled frog sizes', 'Size (in bytes)', 10, 10, 'lower left',

1236 png_filename, ['commandline'], ['frog'], ['swarm', 'total'])

1237 » »

1238	1046

1239 class TestBuilder(object):	1047 class TestBuilder(object):

1240 """Construct the desired test object."""	1048 """Construct the desired test object."""

1241 available_suites = dict((suite.name(), suite) for suite in [	1049 available_suites = dict((suite.name(), suite) for suite in [

1242 CompileTimeAndSizeTest, CommonBrowserTest, DromaeoTest, DromaeoSizeTest])	1050 CompileTimeAndSizeTest, CommonBrowserTest, DromaeoTest, DromaeoSizeTest])

1243	1051

1244 @staticmethod	1052 @staticmethod

1245 def make_test(test_name, test_runner):	1053 def make_test(test_name, test_runner):

1246 return TestBuilder.available_suites[test_name](test_runner)	1054 return TestBuilder.available_suites[test_name](test_runner)

1247	1055

1248 @staticmethod	1056 @staticmethod

1249 def available_suite_names():	1057 def available_suite_names():

1250 return TestBuilder.available_suites.keys()	1058 return TestBuilder.available_suites.keys()

1251	1059

	1060 def search_for_revision(svn_info_command):

	1061 p = subprocess.Popen(svn_info_command, stdout = subprocess.PIPE,

	1062 stderr = subprocess.STDOUT,

	1063 shell = (platform.system() == 'Windows'))

	1064 output, _ = p.communicate()

	1065 for line in output.split('\n'):

	1066 if 'Revision' in line:

	1067 return line.split()[1]

	1068 return -1

	1069

	1070 def update_set_of_done_cls(revision_num=None):

	1071 """Update the set of CLs that do not need additional performance runs.

	1072 Args:

	1073 revision_num: an additional number to be added to the 'done set'

	1074 """

	1075 filename = os.path.join(dirname(abspath(__file__)), 'cached_results.txt')

	1076 if not os.path.exists(filename):

	1077 f = open(filename, 'w')

	1078 results = set()

	1079 pickle.dump(results, f)

	1080 f.close()

	1081 f = open(filename)

	1082 result_set = pickle.load(f)

	1083 if revision_num:

	1084 f.seek(0)

	1085 result_set.add(revision_num)

	1086 pickle.dump(result_set, f)

	1087 f.close()

	1088 return result_set

1252	1089

1253 def main():	1090 def main():

1254 runner = TestRunner()	1091 runner = TestRunner()

1255 continuous = runner.parse_args()	1092 continuous = runner.parse_args()

1256 if continuous:	1093 if continuous:

1257 while True:	1094 while True:

	1095 results_set = update_set_of_done_cls()

1258 if runner.has_new_code():	1096 if runner.has_new_code():

1259 runner.run_test_sequence()	1097 runner.run_test_sequence()

1260 else:	1098 else:

	1099 # Try to get up to 10 runs of each CL, starting with the most recent CL

	1100 # that does not yet have 10 runs. But only perform a set of extra runs

	1101 # at most 10 at a time (get all the extra runs for one CL) before

	1102 # checking to see if new code has been checked in.

	1103 has_run_extra = False

	1104 revision_num = int(search_for_revision(['svn', 'info']))

	1105 if revision_num == -1:

	1106 revision_num = int(search_for_revision(['git', 'svn', 'info']))

	1107

	1108 # No need to track the performance before revision 3000. That's way in

	1109 # the past.

	1110 while revision_num > 3000 and not has_run_extra:
	ricow1 2012/08/06 11:10:03 This will take us back in time really really slow, This will take us back in time really really slow, and this might very well starve out even recent revisions since I don't think that the current setup with just 2 macs can keep up with our commit pace. This will make it harder to track down regressions, and I doubt that we will ever get back historic results. How does the distribution of download build/run benchmark look, i.e., if fetching the build is dominating the time to run the benchmarks I see why you would do this Emily Fortuna 2012/08/06 17:39:34 Right, so when this was originally designed, I tho Show quoted text On 2012/08/06 11:10:03, ricow1 wrote: > This will take us back in time really really slow, and this might very well > starve out even recent revisions since I don't think that the current setup with > just 2 macs can keep up with our commit pace. This will make it harder to track > down regressions, and I doubt that we will ever get back historic results. How > does the distribution of download build/run benchmark look, i.e., if fetching > the build is dominating the time to run the benchmarks I see why you would do > this Right, so when this was originally designed, I thought we had SDK builds going back for a long time so switching between revisions would be cheap. After writing this I discovered this is not the case -- we only have the last 100 revisions saved on Google Storage, and building/switching between random revisions is much more expensive. So, I was planning on starting at revision 9000 instead, and from this point onward, we'll be storing SDK revisions, so switching between revisions in the future (should, I hope) be cheap. Does this sound reasonable?
	1111 if revision_num not in results_set:

	1112 a_test = TestBuilder.make_test(runner.suite_names[0], runner)

	1113 benchmark_name = a_test.values_list[0]

	1114 platform_name = a_test.platform_list[0]

	1115 variant = a_test.values_dict[platform_name].keys()[0]

	1116 number_of_results = post_results.get_num_results(benchmark_name,

	1117 platform_name, variant, revision_num,

	1118 a_test.file_processor.get_score_type(benchmark_name))

	1119 if number_of_results < 10 and number_of_results >= 0:

	1120 run = runner.run_test_sequence(revision_num=str(revision_num),

	1121 num_reruns=(10-number_of_results))

	1122 if run == 0:

	1123 has_run_extra = True

	1124 results_set = update_set_of_done_cls(revision_num)

	1125 elif run == -1:

	1126 # This revision is a broken build. Don't try to run it again.

	1127 results_set = update_set_of_done_cls(revision_num)

	1128 revision_num -= 1

	1129 # No more extra back-runs to do (for now). Wait for new code.

1261 time.sleep(200)	1130 time.sleep(200)

1262 else:	1131 else:

1263 runner.run_test_sequence()	1132 runner.run_test_sequence()

1264	1133

1265 if __name__ == '__main__':	1134 if __name__ == '__main__':

1266 main()	1135 main()

OLD	NEW

« no previous file with comments | « tools/testing/dart/drt_updater.dart ('k') | no next file » | no next file with comments »