Index: build/android/pylib/base_test_sharder.py |
diff --git a/build/android/pylib/base_test_sharder.py b/build/android/pylib/base_test_sharder.py |
index 5306769155050ba032364650daeecaa1581e609b..b8d03c8300dd170c6a9b46b17c1af16504feacf5 100644 |
--- a/build/android/pylib/base_test_sharder.py |
+++ b/build/android/pylib/base_test_sharder.py |
@@ -7,6 +7,7 @@ import android_commands |
import logging |
import multiprocessing |
+from android_commands import errors |
from test_result import TestResults |
@@ -43,7 +44,9 @@ class BaseTestSharder(object): |
def __init__(self, attached_devices): |
self.attached_devices = attached_devices |
- self.retries = 1 |
+ # Worst case scenario: a device will drop offline per run, so we need |
+ # to retry until we're out of devices. |
+ self.retries = len(self.attached_devices) |
self.tests = [] |
def CreateShardedTestRunner(self, device, index): |
@@ -83,12 +86,20 @@ class BaseTestSharder(object): |
logging.warning('Try %d of %d', retry + 1, self.retries) |
self.SetupSharding(self.tests) |
test_runners = [] |
- for index, device in enumerate(self.attached_devices): |
- logging.warning('*' * 80) |
- logging.warning('Creating shard %d for %s', index, device) |
- logging.warning('*' * 80) |
- test_runner = self.CreateShardedTestRunner(device, index) |
- test_runners += [test_runner] |
+ |
+ # Try to create N shards, and retrying on failure. |
+ try: |
+ for index, device in enumerate(self.attached_devices): |
+ logging.warning('*' * 80) |
+ logging.warning('Creating shard %d for %s', index, device) |
+ logging.warning('*' * 80) |
+ test_runner = self.CreateShardedTestRunner(device, index) |
+ test_runners += [test_runner] |
+ except errors.DeviceUnresponsiveError as e: |
+ logging.critical('****Failed to create a shard: [%s]', e) |
+ self.attached_devices.remove(device) |
+ continue |
+ |
logging.warning('Starting...') |
pool = multiprocessing.Pool(len(self.attached_devices), |
SetTestsContainer, |
@@ -96,8 +107,12 @@ class BaseTestSharder(object): |
# map can't handle KeyboardInterrupt exception. It's a python bug. |
# So use map_async instead. |
async_results = pool.map_async(_ShardedTestRunnable, test_runners) |
- results_lists = async_results.get(999999) |
- |
+ try: |
+ results_lists = async_results.get(999999) |
+ except errors.DeviceUnresponsiveError as e: |
+ logging.critical('****Failed to run test: [%s]', e) |
+ self.attached_devices = android_commands.GetAttachedDevices() |
+ continue |
yongsheng
2012/11/01 02:22:49
If one device raises an exception, this might bloc
bulach
2012/11/01 11:41:06
hmm, sorry, I'm not sure what do you mean by "bloc
yongsheng
2012/11/02 00:55:11
you're right. That's what i mean: one exception is
|
test_results = TestResults.FromTestResults(results_lists) |
# Re-check the attached devices for some devices may |
# become offline |
@@ -119,5 +134,9 @@ class BaseTestSharder(object): |
self.tests += [t.name] |
if not self.tests: |
break |
+ else: |
+ # We ran out retries, possibly out of healthy devices. |
+ # There's no recovery at this point. |
+ raise Exception('Unrecoverable error while retrying test runs.') |
self.OnTestsCompleted(test_runners, final_results) |
return final_results |