Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: build/android/bb_run_sharded_steps.py

Issue 11308344: Telemetry: shard tests on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments / port allocation Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/telemetry/telemetry/adb_commands.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 """Helper script to shard build bot steps and save results to disk.
8
9 Our buildbot infrastructure requires each slave to run steps serially.
10 This is sub-optimal for android, where these steps can run independently on
11 multiple connected devices.
12
13 The buildbots will run this script multiple times per cycle:
14 - First, without params: all steps will be executed in parallel using all
15 connected devices. Step results will be pickled to disk (each step has a unique
16 name).
17 The buildbot will treat this step as a regular step, and will not process any
18 graph data.
19
20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the
21 step results previously saved. The buildbot will then process the graph data
22 accordingly.
23
24 The JSON config contains is a file containing a dictionary in the format:
25 {
26 'step_name_foo': 'script_to_execute foo',
27 'step_name_bar': 'script_to_execute bar'
28 }
29
30 Note that script_to_execute necessarily have to take at least the following
31 options:
32 --device: the serial number to be passed to all adb commands.
33 --keep_test_server_ports: indicates it's being run as a shard, and shouldn't
34 reset test server port allocation.
35 """
36
37
38 import datetime
39 import json
40 import logging
41 import multiprocessing
42 import optparse
43 import pexpect
44 import pickle
45 import os
46 import signal
47 import shutil
48 import sys
49
50 from pylib import android_commands
51 from pylib import cmd_helper
52 from pylib import constants
53 from pylib import ports
54
55
56 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results')
57
58
59 def _SaveResult(result):
60 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f:
61 f.write(pickle.dumps(result))
62
63
64 def _RunStepsPerDevice(steps):
65 results = []
66 for step in steps:
67 start_time = datetime.datetime.now()
68 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'],
69 start_time, step['device'])
70 output, exit_code = pexpect.run(
71 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR),
72 withexitstatus=True, logfile=sys.stdout, timeout=1800,
73 env=os.environ)
74 end_time = datetime.datetime.now()
75 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'],
76 end_time, step['device'])
77 result = {'name': step['name'],
78 'output': output,
79 'exit_code': exit_code or 0,
80 'total_time': (end_time - start_time).seconds,
81 'device': step['device']}
82 _SaveResult(result)
83 results += [result]
84 return results
85
86
87 def _RunShardedSteps(steps, devices):
88 assert steps
89 assert devices, 'No devices connected?'
90 if os.path.exists(_OUTPUT_DIR):
91 assert '/step_results' in _OUTPUT_DIR
92 shutil.rmtree(_OUTPUT_DIR)
93 if not os.path.exists(_OUTPUT_DIR):
94 os.makedirs(_OUTPUT_DIR)
95 step_names = sorted(steps.keys())
96 all_params = []
97 num_devices = len(devices)
98 shard_size = (len(steps) + num_devices - 1) / num_devices
99 for i, device in enumerate(devices):
100 steps_per_device = []
101 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]:
102 steps_per_device += [{'name': s,
103 'device': device,
104 'cmd': steps[s] + ' --device ' + device +
105 ' --keep_test_server_ports'}]
106 all_params += [steps_per_device]
107 print 'Start sharding (note: output is not synchronized...)'
108 print '*' * 80
109 start_time = datetime.datetime.now()
110 pool = multiprocessing.Pool(processes=num_devices)
111 async_results = pool.map_async(_RunStepsPerDevice, all_params)
112 results_per_device = async_results.get(999999)
113 end_time = datetime.datetime.now()
114 print '*' * 80
115 print 'Finished sharding.'
116 print 'Summary'
117 total_time = 0
118 for results in results_per_device:
119 for result in results:
120 print('%s : exit_code=%d in %d secs at %s' %
121 (result['name'], result['exit_code'], result['total_time'],
122 result['device']))
123 total_time += result['total_time']
124 print 'Step time: %d secs' % ((end_time - start_time).seconds)
125 print 'Bots time: %d secs' % total_time
126 # No exit_code for the sharding step: the individual _PrintResults step
127 # will return the corresponding exit_code.
128 return 0
129
130
131 def _PrintStepOutput(step_name):
132 file_name = os.path.join(_OUTPUT_DIR, step_name)
133 if not os.path.exists(file_name):
134 print 'File not found ', file_name
135 return 1
136 with file(file_name, 'r') as f:
137 result = pickle.loads(f.read())
138 print result['output']
139 return result['exit_code']
140
141
142 def _KillPendingServers():
143 for retry in range(5):
144 for server in ['lighttpd', 'web-page-replay']:
145 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server])
146 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()]
147 for pid in pids:
148 try:
149 logging.warning('Killing %s %s', server, pid)
150 os.kill(int(pid), signal.SIGQUIT)
151 except Exception as e:
152 logging.warning('Failed killing %s %s %s', server, pid, e)
153
154
155 def main(argv):
156 parser = optparse.OptionParser()
157 parser.add_option('-s', '--steps',
158 help='A JSON file containing all the steps to be '
159 'sharded.')
160 parser.add_option('-p', '--print_results',
161 help='Only prints the results for the previously '
162 'executed step, do not run it again.')
163 options, urls = parser.parse_args(argv)
164 if options.print_results:
165 return _PrintStepOutput(options.print_results)
166
167 # At this point, we should kill everything that may have been left over from
168 # previous runs.
169 _KillPendingServers()
170
171 # Reset the test port allocation. It's important to do it before starting
172 # to dispatch any step.
173 if not ports.ResetTestServerPortAllocation():
174 raise Exception('Failed to reset test server port.')
175
176 # Sort the devices so that we'll try to always run a step in the same device.
177 devices = sorted(android_commands.GetAttachedDevices())
178 if not devices:
179 print 'You must attach a device'
180 return 1
181
182 with file(options.steps, 'r') as f:
183 steps = json.load(f)
184 return _RunShardedSteps(steps, devices)
185
186
187 if __name__ == '__main__':
188 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | tools/telemetry/telemetry/adb_commands.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698