Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(438)

Unified Diff: infra/tools/master_manager/__main__.py

Issue 1108523002: Add buildbot state machine and master_manager tool. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@daemon_bot_lib_merge
Patch Set: Address iannucci's final comments. Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: infra/tools/master_manager/__main__.py
diff --git a/infra/tools/master_manager/__main__.py b/infra/tools/master_manager/__main__.py
new file mode 100755
index 0000000000000000000000000000000000000000..e67a6c3e1e0b1d8a73eaf017485b24ec5e354091
--- /dev/null
+++ b/infra/tools/master_manager/__main__.py
@@ -0,0 +1,131 @@
+#!/usr/bin/python
+# Copyright 2015 Google Inc. All Rights Reserved.
+# pylint: disable=F0401
+
+"""Start, restart and shut down masters as needed."""
+
+import argparse
+import json
+import logging
+import os
+import subprocess
+import sys
+
+from functools import partial
+
+from infra.libs import logs
+from infra.libs.buildbot import master
+from infra.libs.service_utils import daemon
+from infra.libs.service_utils import outer_loop
+from infra.services.master_lifecycle import buildbot_state
+
+
+def parse_args(): # pragma: no cover
+ parser = argparse.ArgumentParser(
+ description='Manage the state of a buildbot master. NOTE: Does nothing '
+ 'unless --prod is specified')
+ parser.add_argument('directory', nargs='?',
+ help='location of the master to manage')
+ parser.add_argument('--desired-state-file', default='desired_state.json',
+ help='location of the state file to look up information')
+ parser.add_argument('--list-all-states', action='store_true',
+ help='list all states with their actions and exit')
+ parser.add_argument('--enable-gclient-sync', action='store_true',
+ help='perform a gclient sync before every master start')
+ parser.add_argument('--emergency-file',
+ default='.stop_master_lifecycle',
+ help='filename of the emergency stop file. if this file is found in the '
+ 'master directory, exit immediately')
+ parser.add_argument('--prod', action='store_true',
+ help='actually run commands instead of printing them.')
+ parser.add_argument('--loop', action='store_true',
+ help='repeatedly run the state machine. will not terminate unless killed')
+ parser.add_argument('--loop-sleep-secs', type=int, default=5,
+ help='how many seconds to wait between loop runs. default %(default)s')
+ parser.add_argument('--connection-timeout', type=int, default=30,
+ help='how many seconds to wait for a master http request before timing '
+ 'out.')
+ outer_loop.add_argparse_options(parser)
+ logs.add_argparse_options(parser)
+
+ args = parser.parse_args()
+ logs.process_argparse_options(args)
+
+ if not args.list_all_states and not args.directory:
+ parser.error('A master directory must be specified.')
+ return args
+
+
+def run_state_machine_pass(
+ logger, matchlist, abs_master_directory, emergency_file, desired_state_file,
+ enable_gclient_sync, prod, connection_timeout): # pragma: no cover
+ if os.path.exists(os.path.join(abs_master_directory, emergency_file)):
+ logger.error('%s detected in %s, aborting!',
+ emergency_file, abs_master_directory)
+ return 1
+
+ desired_state_file = os.path.abspath(desired_state_file)
+ master_directory = os.path.basename(abs_master_directory)
+ evidence = buildbot_state.collect_evidence(
+ abs_master_directory, connection_timeout=connection_timeout)
+ with open(desired_state_file) as f:
+ evidence['desired_buildbot_state'] = json.load(f).get(master_directory)
+ if not evidence['desired_buildbot_state']:
+ raise KeyError('Couldn\'t get evidence for master %s from %s.' %
+ (master_directory, desired_state_file))
+
+ state, action_name, action_items = matchlist.execution_list(evidence)
+ execution_list = list(
+ master.convert_action_items_to_cli(
+ action_items, abs_master_directory,
+ enable_gclient=enable_gclient_sync))
+ logger.info('current state: %s', state)
+ logger.info('performing action: %s', action_name)
+
+ if execution_list:
+ if prod:
+ logger.info('production run, executing:')
+ else:
+ logger.info('dry run, not executing:')
+ for cmd in execution_list:
+ logger.info('* %s (in %s)', cmd['cmd'], cmd['cwd'])
+ if prod:
+ try:
+ with daemon.flock(cmd['lockfile']):
+ subprocess.check_call([str(x) for x in cmd['cmd']], cwd=cmd['cwd'])
+ except daemon.LockAlreadyLocked:
+ logger.warn(' lock on %s could not be acquired, no action taken.',
+ cmd['lockfile'])
+ else:
+ logger.info('no action to be taken.')
+ return 0
+
+
+def main(): # pragma: no cover
+ args = parse_args()
+ matchlist = buildbot_state.construct_pattern_matcher()
+ logger = logging.getLogger(__name__)
+
+ if args.list_all_states:
+ matchlist.print_all_states()
+ return 0
+
+ abs_master_directory = os.path.abspath(args.directory)
+
+ state_machine = partial(run_state_machine_pass, logger,
+ matchlist, abs_master_directory, args.emergency_file,
+ args.desired_state_file, args.enable_gclient_sync, args.prod,
+ args.connection_timeout)
+
+ if args.loop:
+ loop_opts = outer_loop.process_argparse_options(args)
+ outer_loop.loop(
+ state_machine, lambda: args.loop_sleep_secs, **loop_opts)
+ else:
+ return state_machine()
+
+ return 0
+
+
+if __name__ == '__main__': # pragma: no cover
+ sys.exit(main())
« infra/services/master_lifecycle/buildbot_state.py ('K') | « infra/tools/master_manager/__init__.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698