Chromium Code Reviews| Index: build/android/pylib/base_test_sharder.py |
| diff --git a/build/android/pylib/base_test_sharder.py b/build/android/pylib/base_test_sharder.py |
| index 5306769155050ba032364650daeecaa1581e609b..b8d03c8300dd170c6a9b46b17c1af16504feacf5 100644 |
| --- a/build/android/pylib/base_test_sharder.py |
| +++ b/build/android/pylib/base_test_sharder.py |
| @@ -7,6 +7,7 @@ import android_commands |
| import logging |
| import multiprocessing |
| +from android_commands import errors |
| from test_result import TestResults |
| @@ -43,7 +44,9 @@ class BaseTestSharder(object): |
| def __init__(self, attached_devices): |
| self.attached_devices = attached_devices |
| - self.retries = 1 |
| + # Worst case scenario: a device will drop offline per run, so we need |
| + # to retry until we're out of devices. |
| + self.retries = len(self.attached_devices) |
| self.tests = [] |
| def CreateShardedTestRunner(self, device, index): |
| @@ -83,12 +86,20 @@ class BaseTestSharder(object): |
| logging.warning('Try %d of %d', retry + 1, self.retries) |
| self.SetupSharding(self.tests) |
| test_runners = [] |
| - for index, device in enumerate(self.attached_devices): |
| - logging.warning('*' * 80) |
| - logging.warning('Creating shard %d for %s', index, device) |
| - logging.warning('*' * 80) |
| - test_runner = self.CreateShardedTestRunner(device, index) |
| - test_runners += [test_runner] |
| + |
| + # Try to create N shards, and retrying on failure. |
| + try: |
| + for index, device in enumerate(self.attached_devices): |
| + logging.warning('*' * 80) |
| + logging.warning('Creating shard %d for %s', index, device) |
| + logging.warning('*' * 80) |
| + test_runner = self.CreateShardedTestRunner(device, index) |
| + test_runners += [test_runner] |
| + except errors.DeviceUnresponsiveError as e: |
| + logging.critical('****Failed to create a shard: [%s]', e) |
| + self.attached_devices.remove(device) |
| + continue |
| + |
| logging.warning('Starting...') |
| pool = multiprocessing.Pool(len(self.attached_devices), |
| SetTestsContainer, |
| @@ -96,8 +107,12 @@ class BaseTestSharder(object): |
| # map can't handle KeyboardInterrupt exception. It's a python bug. |
| # So use map_async instead. |
| async_results = pool.map_async(_ShardedTestRunnable, test_runners) |
| - results_lists = async_results.get(999999) |
| - |
| + try: |
| + results_lists = async_results.get(999999) |
| + except errors.DeviceUnresponsiveError as e: |
| + logging.critical('****Failed to run test: [%s]', e) |
| + self.attached_devices = android_commands.GetAttachedDevices() |
| + continue |
|
yongsheng
2012/11/01 02:22:49
If one device raises an exception, this might bloc
bulach
2012/11/01 11:41:06
hmm, sorry, I'm not sure what do you mean by "bloc
yongsheng
2012/11/02 00:55:11
you're right. That's what i mean: one exception is
|
| test_results = TestResults.FromTestResults(results_lists) |
| # Re-check the attached devices for some devices may |
| # become offline |
| @@ -119,5 +134,9 @@ class BaseTestSharder(object): |
| self.tests += [t.name] |
| if not self.tests: |
| break |
| + else: |
| + # We ran out retries, possibly out of healthy devices. |
| + # There's no recovery at this point. |
| + raise Exception('Unrecoverable error while retrying test runs.') |
| self.OnTestsCompleted(test_runners, final_results) |
| return final_results |