Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(402)

Unified Diff: tools/swarming_load_test_bot.py

Issue 25530003: Rename load_test to isolateserver_load_test, create swarming_load_test. (Closed) Base URL: https://chromium.googlesource.com/a/chromium/tools/swarm_client@2_exception
Patch Set: This CL will get in one way or another Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/swarming_load_test_client.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/swarming_load_test_bot.py
diff --git a/tools/swarming_load_test_bot.py b/tools/swarming_load_test_bot.py
new file mode 100755
index 0000000000000000000000000000000000000000..12edae13ba16d69d731e8f7306deb8b2d5403e9d
--- /dev/null
+++ b/tools/swarming_load_test_bot.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Triggers a ton of fake jobs to test its handling under high load.
+
+Generates an histogram with the latencies to process the tasks and number of
+retries.
+"""
+
+import hashlib
+import json
+import logging
+import optparse
+import os
+import Queue
+import socket
+import StringIO
+import sys
+import threading
+import time
+import zipfile
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+sys.path.insert(0, ROOT_DIR)
+
+from third_party import colorama
+
+from utils import graph
+from utils import net
+from utils import threading_utils
+
+# Line too long (NN/80)
+# pylint: disable=C0301
+
+
+TASK_OUTPUT = 'This task ran with great success'
+
+
+def print_results(results, columns, buckets):
+ delays = [i for i in results if isinstance(i, float)]
+ failures = [i for i in results if not isinstance(i, float)]
+
+ print('%sDELAYS%s:' % (colorama.Fore.RED, colorama.Fore.RESET))
+ graph.print_histogram(
+ graph.generate_histogram(delays, buckets), columns, ' %.3f')
+ print('')
+ print('Total items : %d' % len(results))
+ average = 0
+ if delays:
+ average = sum(delays)/ len(delays)
+ print('Average delay: %s' % graph.to_units(average))
+ print('')
+
+ if failures:
+ print('%sEVENTS%s:' % (colorama.Fore.RED, colorama.Fore.RESET))
+ values = {}
+ for f in failures:
+ values.setdefault(f, 0)
+ values[f] += 1
+ graph.print_histogram(values, columns, ' %s')
+ print('')
+
+
+def calculate_version(url):
+ """Retrieves the swarm_bot code and returns the SHA-1 for it."""
+ # Cannot use url_open() since zipfile requires .seek().
+ archive = zipfile.ZipFile(StringIO.StringIO(net.url_read(url)))
+ # See
+ # https://code.google.com/p/swarming/source/browse/src/common/version.py?repo=swarming-server
+ files = (
+ 'slave_machine.py',
+ 'swarm_bot/local_test_runner.py',
+ 'common/__init__.py',
+ 'common/swarm_constants.py',
+ 'common/version.py',
+ 'common/test_request_message.py',
+ 'common/url_helper.py',
+ )
+ d = hashlib.sha1()
+ for f in files:
+ d.update(archive.read(f))
+ return d.hexdigest()
+
+
+class FakeSwarmBot(object):
+ """This is a Fake swarm_bot implementation simulating it is running
+ Comodore64.
+
+ It polls for job, acts as if it was processing them and return the fake
+ result.
+ """
+ def __init__(
+ self, swarming_url, swarm_bot_hash, index, progress, duration,
+ events, kill_event):
+ self._lock = threading.Lock()
+ self._swarming = swarming_url
+ self._index = index
+ self._progress = progress
+ self._duration = duration
+ self._events = events
+ self._kill_event = kill_event
+ # Use an impossible hostname.
+ self._machine_id = '%s-%d' % (socket.getfqdn().lower(), index)
+
+ # See
+ # https://code.google.com/p/swarming/source/browse/src/swarm_bot/slave_machine.py?repo=swarming-server
+ # and
+ # https://chromium.googlesource.com/chromium/tools/build.git/ \
+ # +/master/scripts/tools/swarm_bootstrap/swarm_bootstrap.py
+ # for more details.
+ self._attributes = {
+ 'dimensions': {
+ # Use improbable values to reduce the chance of interferring with real
+ # slaves.
+ 'bits': '36',
+ 'machine': os.uname()[4] + '-experimental',
+ 'os': ['Comodore64'],
+ },
+ 'id': self._machine_id,
+ 'try_count': 0,
+ 'tag': self._machine_id,
+ 'version': swarm_bot_hash,
+ }
+
+ self._thread = threading.Thread(target=self._run, name='bot%d' % index)
+ self._thread.daemon = True
+ self._thread.start()
+
+ def join(self):
+ self._thread.join()
+
+ def is_alive(self):
+ return self._thread.is_alive()
+
+ def _run(self):
+ try:
+ self._progress.update_item('%d alive' % self._index, bots=1)
+ while True:
+ if self._kill_event.get():
+ return
+ data = {'attributes': json.dumps(self._attributes)}
+ request = net.url_open(self._swarming + '/poll_for_test', data=data)
+ if request is None:
+ self._events.put('poll_for_test_empty')
+ continue
+ start = time.time()
+ try:
+ manifest = json.load(request)
+ except ValueError:
+ self._progress.update_item('Failed to poll')
+ self._events.put('poll_for_test_invalid')
+ continue
+
+ commands = [c['function'] for c in manifest.get('commands', [])]
+ if not commands:
+ # Nothing to run.
+ self._events.put('sleep')
+ time.sleep(manifest['come_back'])
+ continue
+
+ if commands == ['UpdateSlave']:
+ # Calculate the proper SHA-1 and loop again.
+ # This could happen if the Swarming server is upgraded while this
+ # script runs.
+ self._attributes['version'] = calculate_version(
+ manifest['commands'][0]['args'])
+ self._events.put('update_slave')
+ continue
+
+ if commands != ['StoreFiles', 'RunCommands']:
+ self._progress.update_item(
+ 'Unexpected RPC call %s\n%s' % (commands, manifest))
+ self._events.put('unknown_rpc')
+ break
+
+ # The normal way Swarming works is that it 'stores' a test_run.swarm
+ # file and then defer control to swarm_bot/local_test_runner.py.
+ store_cmd = manifest['commands'][0]
+ assert len(store_cmd['args']) == 1, store_cmd['args']
+ filepath, filename, test_run_content = store_cmd['args'][0]
+ assert filepath == ''
+ assert filename == 'test_run.swarm'
+ assert 'local_test_runner.py' in manifest['commands'][1]['args'][0], (
+ manifest['commands'][1])
+ result_url = manifest['result_url']
+ test_run = json.loads(test_run_content)
+ assert result_url == test_run['result_url']
+ ping_url = test_run['ping_url']
+ ping_delay = test_run['ping_delay']
+ self._progress.update_item('%d processing' % self._index, processing=1)
+
+ # Fake activity and send pings as requested.
+ while True:
+ remaining = max(0, time.time() - start - self._duration)
+ if remaining > ping_delay:
+ result = net.url_read(ping_url)
+ assert result == 'OK'
+ remaining = max(0, time.time() - start - self._duration)
+ if not remaining:
+ break
+ time.sleep(remaining)
+
+ data = {
+ 'c': test_run['configuration']['config_name'],
+ 'n': test_run['test_run_name'],
+ 'o': False,
+ 'result_output': TASK_OUTPUT,
+ 's': True,
+ 'x': '0',
+ }
+ result = net.url_read(manifest['result_url'], data=data)
+ self._progress.update_item(
+ '%d processed' % self._index, processing=-1, processed=1)
+ if not result:
+ self._events.put('result_url_fail')
+ else:
+ assert result == 'Successfully update the runner results.', result
+ self._events.put(time.time() - start)
+ finally:
+ try:
+ # Unregister itself. Otherwise the server will have tons of fake slaves
+ # that the admin will have to remove manually.
+ response = net.url_open(
+ self._swarming + '/delete_machine_stats',
+ data=[('r', self._machine_id)])
+ if not response:
+ self._events.put('failed_unregister')
+ else:
+ response.read()
+ finally:
+ self._progress.update_item('%d quit' % self._index, bots=-1)
+
+
+def main():
+ colorama.init()
+ parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
+ parser.add_option(
+ '-S', '--swarming',
+ metavar='URL', default='',
+ help='Swarming server to use')
+
+ group = optparse.OptionGroup(parser, 'Load generated')
+ group.add_option(
+ '--slaves', type='int', default=300, metavar='N',
+ help='Number of swarm bot slaves, default: %default')
+ group.add_option(
+ '-c', '--consume', type='float', default=60., metavar='N',
+ help='Duration (s) for consuming a request, default: %default')
+ parser.add_option_group(group)
+
+ group = optparse.OptionGroup(parser, 'Display options')
+ group.add_option(
+ '--columns', type='int', default=graph.get_console_width(), metavar='N',
+ help='For histogram display, default:%default')
+ group.add_option(
+ '--buckets', type='int', default=20, metavar='N',
+ help='Number of buckets for histogram display, default:%default')
+ parser.add_option_group(group)
+
+ parser.add_option(
+ '--dump', metavar='FOO.JSON', help='Dumps to json file')
+ parser.add_option(
+ '-v', '--verbose', action='store_true', help='Enables logging')
+
+ options, args = parser.parse_args()
+ logging.basicConfig(level=logging.INFO if options.verbose else logging.FATAL)
+ if args:
+ parser.error('Unsupported args: %s' % args)
+ options.swarming = options.swarming.rstrip('/')
+ if not options.swarming:
+ parser.error('--swarming is required.')
+ if options.consume <= 0:
+ parser.error('Needs --consume > 0. 0.01 is a valid value.')
+
+ print(
+ 'Running %d slaves, each task lasting %.1fs' % (
+ options.slaves, options.consume))
+ print('Ctrl-C to exit.')
+ print('[processing/processed/bots]')
+ columns = [('processing', 0), ('processed', 0), ('bots', 0)]
+ progress = threading_utils.Progress(columns)
+ events = Queue.Queue()
+ start = time.time()
+ kill_event = threading_utils.Bit()
+ swarm_bot_hash = calculate_version(options.swarming + '/get_slave_code')
+ slaves = [
+ FakeSwarmBot(
+ options.swarming, swarm_bot_hash, i, progress, options.consume,
+ events, kill_event)
+ for i in range(options.slaves)
+ ]
+ try:
+ # Wait for all the slaves to come alive.
+ while not all(s.is_alive() for s in slaves):
+ time.sleep(0.01)
+ progress.update_item('Ready to run')
+ while slaves:
+ progress.print_update()
+ time.sleep(0.01)
+ # The slaves could be told to die.
+ slaves = [s for s in slaves if s.is_alive()]
+ except KeyboardInterrupt:
+ kill_event.set()
+
+ progress.update_item('Waiting for slaves to quit.', raw=True)
+ progress.update_item('')
+ while slaves:
+ progress.print_update()
+ slaves = [s for s in slaves if s.is_alive()]
+ # At this point, progress is not used anymore.
+ print('')
+ print('Ran for %.1fs.' % (time.time() - start))
+ print('')
+ results = events.queue
+ print_results(results, options.columns, options.buckets)
+ if options.dump:
+ with open(options.dump, 'w') as f:
+ json.dump(results, f, separators=(',',':'))
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
« no previous file with comments | « no previous file | tools/swarming_load_test_client.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698