Commit 8cf53d67 authored by jbudorick's avatar jbudorick Committed by Commit bot

[Android] Log once per minute while waiting for timeout_retry thread completion.

This should prevent all commands run via timeout_retry.Run from triggering the
20-minute buildbot timeout. In particular, this should prevent long-running
gtest suites, e.g. the webrtc perf tests, from timing out.

BUG=

Review URL: https://codereview.chromium.org/1370133004

Cr-Commit-Position: refs/heads/master@{#351112}
parent 1e5de1b0
......@@ -7,6 +7,7 @@ Function/method decorators that provide timeout and retry logic.
"""
import functools
import itertools
import sys
import threading
......@@ -33,12 +34,13 @@ def _TimeoutRetryWrapper(f, timeout_func, retries_func, pass_values=False):
The wrapped function.
"""
@functools.wraps(f)
def TimeoutRetryWrapper(*args, **kwargs):
def timeout_retry_wrapper(*args, **kwargs):
timeout = timeout_func(*args, **kwargs)
retries = retries_func(*args, **kwargs)
if pass_values:
kwargs['timeout'] = timeout
kwargs['retries'] = retries
@functools.wraps(f)
def impl():
return f(*args, **kwargs)
try:
......@@ -46,14 +48,17 @@ def _TimeoutRetryWrapper(f, timeout_func, retries_func, pass_values=False):
timeout_retry.TimeoutRetryThread):
return impl()
else:
return timeout_retry.Run(impl, timeout, retries)
desc = '%s(%s)' % (f.__name__, ', '.join(itertools.chain(
(str(a) for a in args),
('%s=%s' % (k, str(v)) for k, v in kwargs.iteritems()))))
return timeout_retry.Run(impl, timeout, retries, desc=desc)
except reraiser_thread.TimeoutError as e:
raise device_errors.CommandTimeoutError(str(e)), None, (
sys.exc_info()[2])
except cmd_helper.TimeoutError as e:
raise device_errors.CommandTimeoutError(str(e)), None, (
sys.exc_info()[2])
return TimeoutRetryWrapper
return timeout_retry_wrapper
def WithTimeoutAndRetries(f):
......
......@@ -8,6 +8,7 @@
import logging
import sys
import threading
import time
import traceback
from devil.utils import watchdog_timer
......@@ -104,19 +105,23 @@ class ReraiserThreadGroup(object):
for thread in self._threads:
thread.start()
def _JoinAll(self, watcher=None):
def _JoinAll(self, watcher=None, timeout=None):
"""Join all threads without stack dumps.
Reraises exceptions raised by the child threads and supports breaking
immediately on exceptions raised on the main thread.
Args:
watcher: Watchdog object providing timeout, by default waits forever.
watcher: Watchdog object providing the thread timeout. If none is
provided, the thread will never be timed out.
timeout: An optional number of seconds to wait before timing out the join
operation. This will not time out the threads.
"""
if watcher is None:
watcher = watchdog_timer.WatchdogTimer(None)
alive_threads = self._threads[:]
while alive_threads:
end_time = (time.time() + timeout) if timeout else None
while alive_threads and (end_time is None or end_time > time.time()):
for thread in alive_threads[:]:
if watcher.IsTimedOut():
raise TimeoutError('Timed out waiting for %d of %d threads.' %
......@@ -129,7 +134,15 @@ class ReraiserThreadGroup(object):
for thread in self._threads:
thread.ReraiseIfException()
def JoinAll(self, watcher=None):
def IsAlive(self):
"""Check whether any of the threads are still alive.
Returns:
Whether any of the threads are still alive.
"""
return any(t.isAlive() for t in self._threads)
def JoinAll(self, watcher=None, timeout=None):
"""Join all threads.
Reraises exceptions raised by the child threads and supports breaking
......@@ -137,10 +150,13 @@ class ReraiserThreadGroup(object):
stacks will be logged on watchdog timeout.
Args:
watcher: Watchdog object providing timeout, by default waits forever.
watcher: Watchdog object providing the thread timeout. If none is
provided, the thread will never be timed out.
timeout: An optional number of seconds to wait before timing out the join
operation. This will not time out the threads.
"""
try:
self._JoinAll(watcher)
self._JoinAll(watcher, timeout)
except TimeoutError:
logging.critical('Timed out. Dumping threads.')
for thread in (t for t in self._threads if t.isAlive()):
......
......@@ -85,7 +85,7 @@ def CurrentTimeoutThread():
def WaitFor(condition, wait_period=5, max_tries=None):
"""Wait for a condition to become true.
Repeadly call the function condition(), with no arguments, until it returns
Repeatedly call the function condition(), with no arguments, until it returns
a true value.
If called within a TimeoutRetryThread, it cooperates nicely with it.
......@@ -126,7 +126,7 @@ def WaitFor(condition, wait_period=5, max_tries=None):
return None
def Run(func, timeout, retries, args=None, kwargs=None):
def Run(func, timeout, retries, args=None, kwargs=None, desc=None):
"""Runs the passed function in a separate thread with timeouts and retries.
Args:
......@@ -135,6 +135,8 @@ def Run(func, timeout, retries, args=None, kwargs=None):
retries: the number of retries.
args: list of positional args to pass to |func|.
kwargs: dictionary of keyword args to pass to |func|.
desc: An optional description of |func| used in logging. If omitted,
|func.__name__| will be used.
Returns:
The return value of func(*args, **kwargs).
......@@ -160,8 +162,12 @@ def Run(func, timeout, retries, args=None, kwargs=None):
try:
thread_group = reraiser_thread.ReraiserThreadGroup([child_thread])
thread_group.StartAll()
thread_group.JoinAll(child_thread.GetWatcher())
return ret[0]
while True:
thread_group.JoinAll(watcher=child_thread.GetWatcher(), timeout=60)
if thread_group.IsAlive():
logging.info('Still working on %s', desc if desc else func.__name__)
else:
return ret[0]
except:
child_thread.LogTimeoutException()
if num_try > retries:
......
......@@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import functools
import logging
from devil.android import device_errors
......@@ -18,6 +19,7 @@ def handle_shard_failures(f):
f: the function being decorated. The function must take at least one
argument, and that argument must be the device.
"""
@functools.wraps(f)
def wrapper(dev, *args, **kwargs):
try:
return f(dev, *args, **kwargs)
......@@ -29,7 +31,6 @@ def handle_shard_failures(f):
logging.exception('Shard died: %s(%s)', f.__name__, str(dev))
return None
wrapper.__name__ = f.__name__
return wrapper
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment