Commit 9d2ac622 authored by Olivier Li's avatar Olivier Li Committed by Commit Bot

Add monitoring loop to HangWatcher

Bug: 1034046
Change-Id: Id471b00549b34887039dcddb3d4c22a6d7f0a9fc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2033428
Commit-Queue: Oliver Li <olivierli@chromium.org>
Reviewed-by: default avatarFrançois Doray <fdoray@chromium.org>
Cr-Commit-Position: refs/heads/master@{#738388}
parent 4b299a40
......@@ -11,6 +11,7 @@
#include "base/callback_helpers.h"
#include "base/no_destructor.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "base/threading/thread_checker.h"
#include "base/threading/thread_restrictions.h"
#include "base/time/time.h"
......@@ -21,6 +22,16 @@ namespace {
HangWatcher* g_instance = nullptr;
}
constexpr const char* kThreadName = "HangWatcher";
// The time that the HangWatcher thread will sleep for between calls to
// Monitor(). Increasing or decreasing this does not modify the type of hangs
// that can be detected. It instead increases the probability that a call to
// Monitor() will happen at the right time to catch a hang. This has to be
// balanced with power/cpu use concerns as busy looping would catch amost all
// hangs but present unacceptable overhead.
const base::TimeDelta kMonitoringPeriod = base::TimeDelta::FromSeconds(10);
HangWatchScope::HangWatchScope(TimeDelta timeout) {
internal::HangWatchState* current_hang_watch_state =
internal::HangWatchState::GetHangWatchStateForCurrentThread()->Get();
......@@ -61,15 +72,59 @@ HangWatchScope::~HangWatchScope() {
}
HangWatcher::HangWatcher(RepeatingClosure on_hang_closure)
: on_hang_closure_(std::move(on_hang_closure)) {
: monitor_period_(kMonitoringPeriod),
monitor_event_(WaitableEvent::ResetPolicy::AUTOMATIC,
WaitableEvent::InitialState::NOT_SIGNALED),
on_hang_closure_(std::move(on_hang_closure)),
thread_(this, kThreadName) {
// |thread_checker_| should not be bound to the constructing thread.
DETACH_FROM_THREAD(thread_checker_);
DCHECK(!g_instance);
g_instance = this;
Start();
}
HangWatcher::~HangWatcher() {
DCHECK_EQ(g_instance, this);
DCHECK(watch_states_.empty());
g_instance = nullptr;
Stop();
}
void HangWatcher::Start() {
thread_.Start();
}
void HangWatcher::Stop() {
keep_monitoring_.store(false);
monitor_event_.Signal();
thread_.Join();
}
bool HangWatcher::IsWatchListEmpty() {
AutoLock auto_lock(watch_state_lock_);
return watch_states_.empty();
}
void HangWatcher::Run() {
// Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
// sure of that.
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
while (keep_monitoring_) {
// If there is nothing to watch sleep until there is.
if (IsWatchListEmpty()) {
monitor_event_.Wait();
} else {
Monitor();
}
if (keep_monitoring_) {
// Sleep until next scheduled monitoring.
monitor_event_.TimedWait(monitor_period_);
}
}
}
// static
......@@ -83,11 +138,18 @@ ScopedClosureRunner HangWatcher::RegisterThread() {
watch_states_.push_back(
internal::HangWatchState::CreateHangWatchStateForCurrentThread());
// Now that there is a thread to monitor we wake the HangWatcher thread.
if (watch_states_.size() == 1) {
monitor_event_.Signal();
}
return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
Unretained(HangWatcher::GetInstance())));
}
void HangWatcher::Monitor() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
bool must_invoke_hang_closure = false;
{
AutoLock auto_lock(watch_state_lock_);
......@@ -104,6 +166,23 @@ void HangWatcher::Monitor() {
// to prevent lock reentrancy.
on_hang_closure_.Run();
}
if (after_monitor_closure_for_testing_) {
after_monitor_closure_for_testing_.Run();
}
}
void HangWatcher::SetAfterMonitorClosureForTesting(
base::RepeatingClosure closure) {
after_monitor_closure_for_testing_ = std::move(closure);
}
void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
monitor_period_ = period;
}
void HangWatcher::SignalMonitorEventForTesting() {
monitor_event_.Signal();
}
void HangWatcher::UnregisterThread() {
......@@ -129,11 +208,20 @@ namespace internal {
// |deadline_| starts at Max() to avoid validation problems
// when setting the first legitimate value.
HangWatchState::HangWatchState() : deadline_(TimeTicks::Max()) {}
HangWatchState::HangWatchState() : deadline_(TimeTicks::Max()) {
// There should not exist a state object for this thread already.
DCHECK(!GetHangWatchStateForCurrentThread()->Get());
// Bind the new instance to this thread.
GetHangWatchStateForCurrentThread()->Set(this);
}
HangWatchState::~HangWatchState() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
DCHECK_EQ(GetHangWatchStateForCurrentThread()->Get(), this);
GetHangWatchStateForCurrentThread()->Set(nullptr);
#if DCHECK_IS_ON()
// Destroying the HangWatchState should not be done if there are live
// HangWatchScopes.
......@@ -144,18 +232,13 @@ HangWatchState::~HangWatchState() {
// static
std::unique_ptr<HangWatchState>
HangWatchState::CreateHangWatchStateForCurrentThread() {
// There should not exist a state object for this thread already.
DCHECK(!GetHangWatchStateForCurrentThread()->Get());
// Allocate a watch state object for this thread.
std::unique_ptr<HangWatchState> hang_state =
std::make_unique<HangWatchState>();
// Bind the new instance to this thread.
GetHangWatchStateForCurrentThread()->Set(hang_state.get());
// Setting the thread local worked.
DCHECK(GetHangWatchStateForCurrentThread()->Get());
DCHECK_EQ(GetHangWatchStateForCurrentThread()->Get(), hang_state.get());
// Transfer ownership to caller.
return hang_state;
......
......@@ -12,6 +12,7 @@
#include "base/atomicops.h"
#include "base/callback.h"
#include "base/callback_helpers.h"
#include "base/threading/simple_thread.h"
#include "base/threading/thread_checker.h"
#include "base/threading/thread_local.h"
#include "base/time/time.h"
......@@ -65,10 +66,10 @@ class BASE_EXPORT HangWatchScope {
};
// Monitors registered threads for hangs by inspecting their associated
// HangWatchStates for deadline overruns. Only one instance of HangWatcher can
// exist at a time within a single process. This instance must outlive all
// monitored threads.
class BASE_EXPORT HangWatcher {
// HangWatchStates for deadline overruns. This happens at a regular interval on
// a separate thread. Only one instance of HangWatcher can exist at a time
// within a single process. This instance must outlive all monitored threads.
class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
public:
// The first invocation of the constructor will set the global instance
// accessible through GetInstance(). This means that only one instance can
......@@ -76,7 +77,7 @@ class BASE_EXPORT HangWatcher {
explicit HangWatcher(RepeatingClosure on_hang_closure);
// Clears the global instance for the class.
~HangWatcher();
~HangWatcher() override;
HangWatcher(const HangWatcher&) = delete;
HangWatcher& operator=(const HangWatcher&) = delete;
......@@ -87,21 +88,65 @@ class BASE_EXPORT HangWatcher {
// Sets up the calling thread to be monitored for threads. Returns a
// ScopedClosureRunner that unregisters the thread. This closure has to be
// called from the registered thread before it's joined.
ScopedClosureRunner RegisterThread() WARN_UNUSED_RESULT;
ScopedClosureRunner RegisterThread()
LOCKS_EXCLUDED(watch_state_lock_) WARN_UNUSED_RESULT;
// Inspects the state of all registered threads to check if they are hung.
void Monitor();
// Choose a closure to be run at the end of each call to Monitor(). Use only
// for testing.
void SetAfterMonitorClosureForTesting(base::RepeatingClosure closure);
// Set a monitoring period other than the default. Use only for
// testing.
void SetMonitoringPeriodForTesting(base::TimeDelta period);
// Force the monitoring loop to resume and evaluate whether to continue.
// This can trigger a call to Monitor() or not depending on why the
// HangWatcher thread is sleeping. Use only for testing.
void SignalMonitorEventForTesting();
private:
THREAD_CHECKER(thread_checker_);
// Inspects the state of all registered threads to check if they are hung and
// invokes the appropriate closure if so.
void Monitor();
// Call Run() on the HangWatcher thread.
void Start();
// Stop all monitoring and join the HangWatcher thread.
void Stop();
// Run the loop that periodically monitors the registered thread at a
// set time interval.
void Run() override;
base::TimeDelta monitor_period_;
// Indicates whether Run() should return after the next monitoring.
std::atomic<bool> keep_monitoring_{true};
// Use to make the HangWatcher thread wake or sleep to schedule the
// appropriate monitoring frequency.
WaitableEvent monitor_event_;
bool IsWatchListEmpty() LOCKS_EXCLUDED(watch_state_lock_);
// Stops hang watching on the calling thread by removing the entry from the
// watch list.
void UnregisterThread();
void UnregisterThread() LOCKS_EXCLUDED(watch_state_lock_);
const RepeatingClosure on_hang_closure_;
Lock watch_state_lock_;
std::vector<std::unique_ptr<internal::HangWatchState>> watch_states_
GUARDED_BY(watch_state_lock_);
base::DelegateSimpleThread thread_;
base::RepeatingClosure after_monitor_closure_for_testing_;
FRIEND_TEST_ALL_PREFIXES(HangWatcherTest, NestedScopes);
};
// Classes here are exposed in the header only for testing. They are not
......
......@@ -10,6 +10,7 @@
#include "base/callback_helpers.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "base/test/bind_test_util.h"
#include "base/test/task_environment.h"
#include "base/threading/platform_thread.h"
#include "base/time/time.h"
......@@ -64,7 +65,16 @@ class HangWatcherTest : public testing::Test {
: hang_watcher_(std::make_unique<HangWatcher>(
base::BindRepeating(&WaitableEvent::Signal,
base::Unretained(&hang_event_)))),
thread_(&unblock_thread_) {}
thread_(&unblock_thread_) {
hang_watcher_->SetAfterMonitorClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&monitor_event_)));
}
void SetUp() override {
// We're not testing the monitoring loop behavior in this test so we want to
// trigger monitoring manually.
hang_watcher_->SetMonitoringPeriodForTesting(base::TimeDelta::Max());
}
void StartBlockedThread() {
// Thread has not run yet.
......@@ -75,11 +85,20 @@ class HangWatcherTest : public testing::Test {
ASSERT_TRUE(PlatformThread::Create(0, &thread_, &handle));
thread_.WaitUntilScopeEntered();
// Thread registration triggered a call to HangWatcher::Monitor() which
// signaled |monitor_event_|. Reset it so it's ready for waiting later on.
monitor_event_.Reset();
}
void MonitorHangsAndJoinThread() {
// Try to detect a hang if any.
HangWatcher::GetInstance()->Monitor();
// HangWatcher::Monitor() should not be set which would mean a call to
// HangWatcher::Monitor() happened and was unacounted for.
ASSERT_FALSE(monitor_event_.IsSignaled());
// Triger a monitoring on HangWatcher thread and verify results.
hang_watcher_->SignalMonitorEventForTesting();
monitor_event_.Wait();
unblock_thread_.Signal();
......@@ -91,25 +110,39 @@ class HangWatcherTest : public testing::Test {
}
protected:
// Used to wait for monitoring. Will be signaled by the HangWatcher thread and
// so needs to outlive it.
WaitableEvent monitor_event_;
// Signaled from the HangWatcher thread when a hang is detected. Needs to
// outlive the HangWatcher thread.
WaitableEvent hang_event_;
std::unique_ptr<HangWatcher> hang_watcher_;
// Used exclusively for MOCK_TIME. No tasks will be run on the environment.
test::TaskEnvironment task_environment_{
test::TaskEnvironment::TimeSource::MOCK_TIME};
PlatformThreadHandle handle;
// Used to unblock the monitored thread. Signaled from the test main thread.
WaitableEvent unblock_thread_;
PlatformThreadHandle handle;
BlockingThread thread_;
// Signaled when a hang is detected.
WaitableEvent hang_event_;
};
} // namespace
TEST_F(HangWatcherTest, NoScopes) {
HangWatcher::GetInstance()->Monitor();
TEST_F(HangWatcherTest, NoRegisteredThreads) {
ASSERT_FALSE(monitor_event_.IsSignaled());
// Signal to advance the Run() loop.
base::HangWatcher::GetInstance()->SignalMonitorEventForTesting();
// Monitoring should just not happen when there are no registered threads.
// Wait a while to make sure it does not.
ASSERT_FALSE(monitor_event_.TimedWait(base::TimeDelta::FromSeconds(1)));
ASSERT_FALSE(hang_event_.IsSignaled());
}
......@@ -171,4 +204,64 @@ TEST_F(HangWatcherTest, NoHang) {
ASSERT_FALSE(hang_event_.IsSignaled());
}
// |HangWatcher| relies on |WaitableEvent::TimedWait| to schedule monitoring
// which cannot be tested using MockTime. Some tests will have to actually wait
// in real time before observing results but the TimeDeltas used are chosen to
// minimize flakiness as much as possible.
class HangWatcherRealTimeTest : public testing::Test {
public:
HangWatcherRealTimeTest()
: hang_watcher_(std::make_unique<HangWatcher>(
base::BindRepeating(&WaitableEvent::Signal,
base::Unretained(&hang_event_)))) {}
protected:
std::unique_ptr<HangWatcher> hang_watcher_;
// Signaled when a hang is detected.
WaitableEvent hang_event_;
std::atomic<int> monitor_count_{0};
base::ScopedClosureRunner unregister_thread_closure_;
};
TEST_F(HangWatcherRealTimeTest, PeriodicCallsCount) {
// These values are chosen to execute fast enough while running the unit tests
// but be large enough to buffer against clock precision problems.
const base::TimeDelta kMonitoringPeriod(
base::TimeDelta::FromMilliseconds(100));
const base::TimeDelta kExecutionTime = kMonitoringPeriod * 5;
// HangWatcher::Monitor() will run once right away on thread registration.
// We want to make sure it runs at least once more from being scheduled.
constexpr int kMinimumMonitorCount = 2;
// Some amount of extra monitoring can happen but it has to be of the right
// order of magnitude. Otherwise it could indicate a problem like some code
// signaling the Thread to wake up excessivelly.
const int kMaximumMonitorCount = 2 * (kExecutionTime / kMonitoringPeriod);
auto increment_monitor_count = [this]() { ++monitor_count_; };
hang_watcher_->SetMonitoringPeriodForTesting(kMonitoringPeriod);
hang_watcher_->SetAfterMonitorClosureForTesting(
base::BindLambdaForTesting(increment_monitor_count));
hang_event_.TimedWait(kExecutionTime);
// No thread ever registered so no monitoring took place at all.
ASSERT_EQ(monitor_count_.load(), 0);
unregister_thread_closure_ = hang_watcher_->RegisterThread();
hang_event_.TimedWait(kExecutionTime);
ASSERT_GE(monitor_count_.load(), kMinimumMonitorCount);
ASSERT_LE(monitor_count_.load(), kMaximumMonitorCount);
// No monitored scope means no possible hangs.
ASSERT_FALSE(hang_event_.IsSignaled());
}
} // namespace base
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment