build/android/bb_run_sharded_steps.py - Issue 11308344: Telemetry: shard tests on android.

Side by Side Diff: build/android/bb_run_sharded_steps.py

Issue 11308344: Telemetry: shard tests on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Comments / port allocation Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 #

	3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

	4 # Use of this source code is governed by a BSD-style license that can be

	5 # found in the LICENSE file.

	6

	7 """Helper script to shard build bot steps and save results to disk.

	8

	9 Our buildbot infrastructure requires each slave to run steps serially.

	10 This is sub-optimal for android, where these steps can run independently on

	11 multiple connected devices.

	12

	13 The buildbots will run this script multiple times per cycle:

	14 - First, without params: all steps will be executed in parallel using all

	15 connected devices. Step results will be pickled to disk (each step has a unique

	16 name).

	17 The buildbot will treat this step as a regular step, and will not process any

	18 graph data.

	19

	20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the

	21 step results previously saved. The buildbot will then process the graph data

	22 accordingly.

	23

	24 The JSON config contains is a file containing a dictionary in the format:

	25 {

	26 'step_name_foo': 'script_to_execute foo',

	27 'step_name_bar': 'script_to_execute bar'

	28 }

	29

	30 Note that script_to_execute necessarily have to take at least the following

	31 options:

	32 --device: the serial number to be passed to all adb commands.

	33 --keep_test_server_ports: indicates it's being run as a shard, and shouldn't

	34 reset test server port allocation.

	35 """

	36

	37

	38 import datetime

	39 import json

	40 import logging

	41 import multiprocessing

	42 import optparse

	43 import pexpect

	44 import pickle

	45 import os

	46 import signal

	47 import shutil

	48 import sys

	49

	50 from pylib import android_commands

	51 from pylib import cmd_helper

	52 from pylib import constants

	53 from pylib import ports

	54

	55

	56 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results')

	57

	58

	59 def _SaveResult(result):

	60 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f:

	61 f.write(pickle.dumps(result))

	62

	63

	64 def _RunStepsPerDevice(steps):

	65 results = []

	66 for step in steps:

	67 start_time = datetime.datetime.now()

	68 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'],

	69 start_time, step['device'])

	70 output, exit_code = pexpect.run(

	71 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR),

	72 withexitstatus=True, logfile=sys.stdout, timeout=1800,

	73 env=os.environ)

	74 end_time = datetime.datetime.now()

	75 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'],

	76 end_time, step['device'])

	77 result = {'name': step['name'],

	78 'output': output,

	79 'exit_code': exit_code or 0,

	80 'total_time': (end_time - start_time).seconds,

	81 'device': step['device']}

	82 _SaveResult(result)

	83 results += [result]

	84 return results

	85

	86

	87 def _RunShardedSteps(steps, devices):

	88 assert steps

	89 assert devices, 'No devices connected?'

	90 if os.path.exists(_OUTPUT_DIR):

	91 assert '/step_results' in _OUTPUT_DIR

	92 shutil.rmtree(_OUTPUT_DIR)

	93 if not os.path.exists(_OUTPUT_DIR):

	94 os.makedirs(_OUTPUT_DIR)

	95 step_names = sorted(steps.keys())

	96 all_params = []

	97 num_devices = len(devices)

	98 shard_size = (len(steps) + num_devices - 1) / num_devices

	99 for i, device in enumerate(devices):

	100 steps_per_device = []

	101 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]:

	102 steps_per_device += [{'name': s,

	103 'device': device,

	104 'cmd': steps[s] + ' --device ' + device +

	105 ' --keep_test_server_ports'}]

	106 all_params += [steps_per_device]

	107 print 'Start sharding (note: output is not synchronized...)'

	108 print '' 80

	109 start_time = datetime.datetime.now()

	110 pool = multiprocessing.Pool(processes=num_devices)

	111 async_results = pool.map_async(_RunStepsPerDevice, all_params)

	112 results_per_device = async_results.get(999999)

	113 end_time = datetime.datetime.now()

	114 print '' 80

	115 print 'Finished sharding.'

	116 print 'Summary'

	117 total_time = 0

	118 for results in results_per_device:

	119 for result in results:

	120 print('%s : exit_code=%d in %d secs at %s' %

	121 (result['name'], result['exit_code'], result['total_time'],

	122 result['device']))

	123 total_time += result['total_time']

	124 print 'Step time: %d secs' % ((end_time - start_time).seconds)

	125 print 'Bots time: %d secs' % total_time

	126 # No exit_code for the sharding step: the individual _PrintResults step

	127 # will return the corresponding exit_code.

	128 return 0

	129

	130

	131 def _PrintStepOutput(step_name):

	132 file_name = os.path.join(_OUTPUT_DIR, step_name)

	133 if not os.path.exists(file_name):

	134 print 'File not found ', file_name

	135 return 1

	136 with file(file_name, 'r') as f:

	137 result = pickle.loads(f.read())

	138 print result['output']

	139 return result['exit_code']

	140

	141

	142 def _KillPendingServers():

	143 for retry in range(5):

	144 for server in ['lighttpd', 'web-page-replay']:

	145 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server])

	146 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()]

	147 for pid in pids:

	148 try:

	149 logging.warning('Killing %s %s', server, pid)

	150 os.kill(int(pid), signal.SIGQUIT)

	151 except Exception as e:

	152 logging.warning('Failed killing %s %s %s', server, pid, e)

	153

	154

	155 def main(argv):

	156 parser = optparse.OptionParser()

	157 parser.add_option('-s', '--steps',

	158 help='A JSON file containing all the steps to be '

	159 'sharded.')

	160 parser.add_option('-p', '--print_results',

	161 help='Only prints the results for the previously '

	162 'executed step, do not run it again.')

	163 options, urls = parser.parse_args(argv)

	164 if options.print_results:

	165 return _PrintStepOutput(options.print_results)

	166

	167 # At this point, we should kill everything that may have been left over from

	168 # previous runs.

	169 _KillPendingServers()

	170

	171 # Reset the test port allocation. It's important to do it before starting

	172 # to dispatch any step.

	173 if not ports.ResetTestServerPortAllocation():

	174 raise Exception('Failed to reset test server port.')

	175

	176 # Sort the devices so that we'll try to always run a step in the same device.

	177 devices = sorted(android_commands.GetAttachedDevices())

	178 if not devices:

	179 print 'You must attach a device'

	180 return 1

	181

	182 with file(options.steps, 'r') as f:

	183 steps = json.load(f)

	184 return _RunShardedSteps(steps, devices)

	185

	186

	187 if __name__ == '__main__':

	188 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « no previous file | tools/telemetry/telemetry/adb_commands.py » ('j') | no next file with comments »