build/android/pylib/base/shard.py - Issue 18770008: [Android] Redesigns the sharder to allow replicated vs distributed tests

Unified Diff: build/android/pylib/base/shard.py

Issue 18770008: [Android] Redesigns the sharder to allow replicated vs distributed tests (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Re-adds -f short form to gtest_filter switch Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: build/android/pylib/base/shard.py

diff --git a/build/android/pylib/base/shard.py b/build/android/pylib/base/shard.py

deleted file mode 100644

index af02eed1508bf318623c282e4d9e860d522ee608..0000000000000000000000000000000000000000

--- a/build/android/pylib/base/shard.py

+++ /dev/null

@@ -1,302 +0,0 @@

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-"""Implements test sharding logic."""

-import logging

-import threading

-from pylib import android_commands

-from pylib import constants

-from pylib.utils import reraiser_thread

-from pylib.utils import watchdog_timer

-import base_test_result

-DEFAULT_TIMEOUT = 7 * 60 # seven minutes

-class _ThreadSafeCounter(object):

- """A threadsafe counter."""

- def __init__(self):

- self._lock = threading.Lock()

- self._value = 0

- def GetAndIncrement(self):

- """Get the current value and increment it atomically.

- Returns:

- The value before incrementing.

- """

- with self._lock:

- pre_increment = self._value

- self._value += 1

- return pre_increment

-class _Test(object):

- """Holds a test with additional metadata."""

- def __init__(self, test, tries=0):

- """Initializes the _Test object.

- Args:

- test: the test.

- tries: number of tries so far.

- """

- self.test = test

- self.tries = tries

-class _TestCollection(object):

- """A threadsafe collection of tests.

- Args:

- tests: list of tests to put in the collection.

- """

- def __init__(self, tests=[]):

- self._lock = threading.Lock()

- self._tests = []

- self._tests_in_progress = 0

- # Used to signal that an item is avaliable or all items have been handled.

- self._item_avaliable_or_all_done = threading.Event()

- for t in tests:

- self.add(t)

- def _pop(self):

- """Pop a test from the collection.

- Waits until a test is avaliable or all tests have been handled.

- Returns:

- A test or None if all tests have been handled.

- """

- while True:

- # Wait for a test to be avaliable or all tests to have been handled.

- self._item_avaliable_or_all_done.wait()

- with self._lock:

- # Check which of the two conditions triggered the signal.

- if self._tests_in_progress == 0:

- return None

- try:

- return self._tests.pop(0)

- except IndexError:

- # Another thread beat us to the avaliable test, wait again.

- self._item_avaliable_or_all_done.clear()

- def add(self, test):

- """Add an test to the collection.

- Args:

- test: A test to add.

- """

- with self._lock:

- self._tests.append(test)

- self._item_avaliable_or_all_done.set()

- self._tests_in_progress += 1

- def test_completed(self):

- """Indicate that a test has been fully handled."""

- with self._lock:

- self._tests_in_progress -= 1

- if self._tests_in_progress == 0:

- # All tests have been handled, signal all waiting threads.

- self._item_avaliable_or_all_done.set()

- def __iter__(self):

- """Iterate through tests in the collection until all have been handled."""

- while True:

- r = self._pop()

- if r is None:

- break

- yield r

-def _RunTestsFromQueue(runner, test_collection, out_results, watcher,

- num_retries):

- """Runs tests from the test_collection until empty using the given runner.

- Adds TestRunResults objects to the out_results list and may add tests to the

- out_retry list.

- Args:

- runner: A TestRunner object used to run the tests.

- test_collection: A _TestCollection from which to get _Test objects to run.

- out_results: A list to add TestRunResults to.

- watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.

- num_retries: Number of retries for a test.

- """

- for test in test_collection:

- watcher.Reset()

- try:

- if not android_commands.IsDeviceAttached(runner.device):

- # Device is unresponsive, stop handling tests on this device.

- msg = 'Device %s is unresponsive.' % runner.device

- logging.warning(msg)

- raise android_commands.errors.DeviceUnresponsiveError(msg)

- result, retry = runner.RunTest(test.test)

- test.tries += 1

- if retry and test.tries <= num_retries:

- # Retry non-passing results, only record passing results.

- pass_results = base_test_result.TestRunResults()

- pass_results.AddResults(result.GetPass())

- out_results.append(pass_results)

- logging.warning('Will retry test, try #%s.' % test.tries)

- test_collection.add(_Test(test=retry, tries=test.tries))

- else:

- # All tests passed or retry limit reached. Either way, record results.

- out_results.append(result)

- except:

- # An unhandleable exception, ensure tests get run by another device and

- # reraise this exception on the main thread.

- test_collection.add(test)

- raise

- finally:

- # Retries count as separate tasks so always mark the popped test as done.

- test_collection.test_completed()

-def _SetUp(runner_factory, device, out_runners, threadsafe_counter):

- """Creates a test runner for each device and calls SetUp() in parallel.

- Note: if a device is unresponsive the corresponding TestRunner will not be

- added to out_runners.

- Args:

- runner_factory: callable that takes a device and index and returns a

- TestRunner object.

- device: the device serial number to set up.

- out_runners: list to add the successfully set up TestRunner object.

- threadsafe_counter: a _ThreadSafeCounter object used to get shard indices.

- """

- try:

- index = threadsafe_counter.GetAndIncrement()

- logging.warning('Creating shard %s for device %s.', index, device)

- runner = runner_factory(device, index)

- runner.SetUp()

- out_runners.append(runner)

- except android_commands.errors.DeviceUnresponsiveError as e:

- logging.warning('Failed to create shard for %s: [%s]', device, e)

-def _RunAllTests(runners, tests, num_retries, timeout=None):

- """Run all tests using the given TestRunners.

- Args:

- runners: a list of TestRunner objects.

- tests: a list of Tests to run using the given TestRunners.

- num_retries: number of retries for a test.

- timeout: watchdog timeout in seconds, defaults to the default timeout.

- Returns:

- A tuple of (TestRunResults object, exit code)

- """

- logging.warning('Running %s tests with %s test runners.' %

- (len(tests), len(runners)))

- tests_collection = _TestCollection([_Test(t) for t in tests])

- results = []

- exit_code = 0

- watcher = watchdog_timer.WatchdogTimer(timeout)

- workers = reraiser_thread.ReraiserThreadGroup(

- [reraiser_thread.ReraiserThread(

- _RunTestsFromQueue,

- [r, tests_collection, results, watcher, num_retries],

- name=r.device[-4:])

- for r in runners])

- run_results = base_test_result.TestRunResults()

- workers.StartAll()

- # Catch DeviceUnresponsiveErrors and set a warning exit code

- try:

- workers.JoinAll(watcher)

- except android_commands.errors.DeviceUnresponsiveError as e:

- logging.error(e)

- exit_code = constants.WARNING_EXIT_CODE

- for r in results:

- run_results.AddTestRunResults(r)

- if not run_results.DidRunPass():

- exit_code = constants.ERROR_EXIT_CODE

- return (run_results, exit_code)

-def _CreateRunners(runner_factory, devices, timeout=None):

- """Creates a test runner for each device and calls SetUp() in parallel.

- Note: if a device is unresponsive the corresponding TestRunner will not be

- included in the returned list.

- Args:

- runner_factory: callable that takes a device and index and returns a

- TestRunner object.

- devices: list of device serial numbers as strings.

- timeout: watchdog timeout in seconds, defaults to the default timeout.

- Returns:

- A list of TestRunner objects.

- """

- logging.warning('Creating %s test runners.' % len(devices))

- runners = []

- counter = _ThreadSafeCounter()

- threads = reraiser_thread.ReraiserThreadGroup(

- [reraiser_thread.ReraiserThread(_SetUp,

- [runner_factory, d, runners, counter],

- name=d[-4:])

- for d in devices])

- threads.StartAll()

- threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))

- return runners

-def _TearDownRunners(runners, timeout=None):

- """Calls TearDown() for each test runner in parallel.

- Args:

- runners: a list of TestRunner objects.

- timeout: watchdog timeout in seconds, defaults to the default timeout.

- """

- threads = reraiser_thread.ReraiserThreadGroup(

- [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:])

- for r in runners])

- threads.StartAll()

- threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))

-def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug',

- test_timeout=DEFAULT_TIMEOUT,

- setup_timeout=DEFAULT_TIMEOUT,

- num_retries=2):

- """Run all tests on attached devices, retrying tests that don't pass.

- Args:

- runner_factory: callable that takes a device and index and returns a

- TestRunner object.

- devices: list of attached device serial numbers as strings.

- tests: list of tests to run.

- build_type: either 'Debug' or 'Release'.

- test_timeout: watchdog timeout in seconds for running tests, defaults to the

- default timeout.

- setup_timeout: watchdog timeout in seconds for creating and cleaning up

- test runners, defaults to the default timeout.

- num_retries: number of retries for a test.

- Returns:

- A tuple of (base_test_result.TestRunResults object, exit code).

- """

- if not tests:

- logging.error('No tests to run.')

- return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE)

- logging.info('Will run %d tests: %s', len(tests), str(tests))

- runners = _CreateRunners(runner_factory, devices, setup_timeout)

- try:

- return _RunAllTests(runners, tests, num_retries, test_timeout)

- finally:

- try:

- _TearDownRunners(runners, setup_timeout)

- except android_commands.errors.DeviceUnresponsiveError as e:

- logging.warning('Device unresponsive during TearDown: [%s]', e)

« no previous file with comments | « build/android/pylib/base/base_test_result.py ('k') | build/android/pylib/base/shard_unittest.py » ('j') | no next file with comments »