Commit 64ffc321 authored by Olivier Li's avatar Olivier Li Committed by Commit Bot

Allow logging of HangWatcher histograms without crash dumping.

This is done by defining logging levels instead of just activating
complete hang watching per thread type. Hang watching a thread type can
be :

1) Deactivated.
2) Logging metrics to UMA only.
3) DumpWithoutCrashing on finding a hang.

Bug: 1135528
Change-Id: I98d04c24447129cd28e2d7d04ce26743e922fef3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2529192
Commit-Queue: Oliver Li <olivierli@chromium.org>
Reviewed-by: default avatarFrançois Doray <fdoray@chromium.org>
Cr-Commit-Position: refs/heads/master@{#828506}
parent 571bc134
...@@ -31,24 +31,33 @@ ...@@ -31,24 +31,33 @@
namespace base { namespace base {
// static // static
constexpr base::Feature kEnableHangWatcher{"EnableHangWatcher", const base::Feature HangWatcher::kEnableHangWatcher{
base::FEATURE_DISABLED_BY_DEFAULT}; "EnableHangWatcher", base::FEATURE_DISABLED_BY_DEFAULT};
constexpr base::FeatureParam<bool> kHangWatchIOThread{
&kEnableHangWatcher, "hang_watch_io_thread", false}; constexpr base::FeatureParam<int> kIOThreadLogLevel{
constexpr base::FeatureParam<bool> kHangWatchUIThread{ &HangWatcher::kEnableHangWatcher, "io_thread_log_level", 0};
&kEnableHangWatcher, "hang_watch_ui_thread", false}; constexpr base::FeatureParam<int> kUIThreadLogLevel{
constexpr base::FeatureParam<bool> kHangWatchThreadPool{ &HangWatcher::kEnableHangWatcher, "ui_thread_log_level", 0};
&kEnableHangWatcher, "hang_watch_threadpool", false}; constexpr base::FeatureParam<int> kThreadPoolLogLevel{
&HangWatcher::kEnableHangWatcher, "threadpool_log_level", 0};
constexpr base::TimeDelta HangWatchScopeEnabled::kDefaultHangWatchTime = constexpr base::TimeDelta HangWatchScopeEnabled::kDefaultHangWatchTime =
base::TimeDelta::FromSeconds(10); base::TimeDelta::FromSeconds(10);
namespace { namespace {
// Defines how much logging happens when the HangWatcher monitors the threads.
// Logging levels are set per thread type through Finch. It's important that
// the order of the enum members stay the and that their numerical
// values be in increasing order. The implementation of
// ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
HangWatcher* g_instance = nullptr; HangWatcher* g_instance = nullptr;
std::atomic<bool> g_use_hang_watcher{false}; std::atomic<bool> g_use_hang_watcher{false};
std::atomic<bool> g_hang_watch_workers{false}; std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
std::atomic<bool> g_hang_watch_io_thread{false}; std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
std::atomic<bool> g_hang_watch_ui_thread{false}; std::atomic<LoggingLevel> g_ui_thread_log_level{LoggingLevel::kNone};
// Emits the hung thread count histogram. |count| is the number of threads // Emits the hung thread count histogram. |count| is the number of threads
// of type |thread_type| that were hung or became hung during the last // of type |thread_type| that were hung or became hung during the last
...@@ -78,6 +87,23 @@ void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type, ...@@ -78,6 +87,23 @@ void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
break; break;
} }
} }
// Returns true if |thread_type| was configured through Finch to have a logging
// level that is equal to or exceeds |logging_level|.
bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
LoggingLevel logging_level) {
switch (thread_type) {
case HangWatcher::ThreadType::kIOThread:
return g_io_thread_log_level.load(std::memory_order_relaxed) >=
logging_level;
case HangWatcher::ThreadType::kUIThread:
return g_ui_thread_log_level.load(std::memory_order_relaxed) >=
logging_level;
case HangWatcher::ThreadType::kThreadPoolThread:
return g_threadpool_log_level.load(std::memory_order_relaxed) >=
logging_level;
}
}
} }
constexpr const char* kThreadName = "HangWatcher"; constexpr const char* kThreadName = "HangWatcher";
...@@ -224,9 +250,9 @@ HangWatchScopeDisabled::~HangWatchScopeDisabled() { ...@@ -224,9 +250,9 @@ HangWatchScopeDisabled::~HangWatchScopeDisabled() {
// static // static
void HangWatcher::InitializeOnMainThread() { void HangWatcher::InitializeOnMainThread() {
DCHECK(!g_use_hang_watcher); DCHECK(!g_use_hang_watcher);
DCHECK(!g_hang_watch_workers); DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
DCHECK(!g_hang_watch_io_thread); DCHECK(g_ui_thread_log_level == LoggingLevel::kNone);
DCHECK(!g_hang_watch_ui_thread); DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
g_use_hang_watcher.store(base::FeatureList::IsEnabled(kEnableHangWatcher), g_use_hang_watcher.store(base::FeatureList::IsEnabled(kEnableHangWatcher),
std::memory_order_relaxed); std::memory_order_relaxed);
...@@ -234,15 +260,25 @@ void HangWatcher::InitializeOnMainThread() { ...@@ -234,15 +260,25 @@ void HangWatcher::InitializeOnMainThread() {
// If hang watching is disabled as a whole there is no need to read the // If hang watching is disabled as a whole there is no need to read the
// params. // params.
if (g_use_hang_watcher.load(std::memory_order_relaxed)) { if (g_use_hang_watcher.load(std::memory_order_relaxed)) {
g_hang_watch_workers.store(kHangWatchThreadPool.Get(), g_threadpool_log_level.store(
std::memory_order_relaxed); static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
g_hang_watch_io_thread.store(kHangWatchIOThread.Get(), std::memory_order_relaxed);
std::memory_order_relaxed); g_io_thread_log_level.store(
g_hang_watch_ui_thread.store(kHangWatchUIThread.Get(), static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
std::memory_order_relaxed); std::memory_order_relaxed);
g_ui_thread_log_level.store(
static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
std::memory_order_relaxed);
} }
} }
void HangWatcher::UnitializeOnMainThreadForTesting() {
g_use_hang_watcher.store(false, std::memory_order_relaxed);
g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
g_ui_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
}
// static // static
bool HangWatcher::IsEnabled() { bool HangWatcher::IsEnabled() {
return g_use_hang_watcher.load(std::memory_order_relaxed); return g_use_hang_watcher.load(std::memory_order_relaxed);
...@@ -250,15 +286,18 @@ bool HangWatcher::IsEnabled() { ...@@ -250,15 +286,18 @@ bool HangWatcher::IsEnabled() {
// static // static
bool HangWatcher::IsThreadPoolHangWatchingEnabled() { bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
return g_hang_watch_workers.load(std::memory_order_relaxed); return g_threadpool_log_level.load(std::memory_order_relaxed) !=
LoggingLevel::kNone;
} }
bool HangWatcher::IsIOThreadHangWatchingEnabled() { bool HangWatcher::IsIOThreadHangWatchingEnabled() {
return g_hang_watch_io_thread.load(std::memory_order_relaxed); return g_io_thread_log_level.load(std::memory_order_relaxed) !=
LoggingLevel::kNone;
} }
bool HangWatcher::IsUIThreadHangWatchingEnabled() { bool HangWatcher::IsUIThreadHangWatchingEnabled() {
return g_hang_watch_ui_thread.load(std::memory_order_relaxed); return g_ui_thread_log_level.load(std::memory_order_relaxed) !=
LoggingLevel::kNone;
} }
HangWatcher::HangWatcher() HangWatcher::HangWatcher()
...@@ -465,6 +504,10 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot( ...@@ -465,6 +504,10 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
constexpr int kInvalidHangCount = -1; constexpr int kInvalidHangCount = -1;
hung_counts_per_thread_type.fill(kInvalidHangCount); hung_counts_per_thread_type.fill(kInvalidHangCount);
// Will be true if any of the hung threads has a logging level high enough,
// as defined through finch params, to warant dumping a crash.
bool any_hung_thread_has_dumping_enabled = false;
// Copy hung thread information. // Copy hung thread information.
for (const auto& watch_state : watch_states) { for (const auto& watch_state : watch_states) {
uint64_t flags; uint64_t flags;
...@@ -494,6 +537,11 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot( ...@@ -494,6 +537,11 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
if (deadline <= now) { if (deadline <= now) {
++hung_counts_per_thread_type[hang_count_index]; ++hung_counts_per_thread_type[hang_count_index];
if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
LoggingLevel::kUmaAndCrash)) {
any_hung_thread_has_dumping_enabled = true;
}
// Attempt to mark the thread as needing to stay within its current // Attempt to mark the thread as needing to stay within its current
// HangWatchScopeEnabled until capture is complete. // HangWatchScopeEnabled until capture is complete.
bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline); bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
...@@ -516,12 +564,16 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot( ...@@ -516,12 +564,16 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
// of the type were found. // of the type were found.
for (size_t i = 0; i < kHangCountArraySize; ++i) { for (size_t i = 0; i < kHangCountArraySize; ++i) {
const int hang_count = hung_counts_per_thread_type[i]; const int hang_count = hung_counts_per_thread_type[i];
if (hang_count != kInvalidHangCount) const HangWatcher::ThreadType thread_type =
LogHungThreadCountHistogram(static_cast<HangWatcher::ThreadType>(i), static_cast<HangWatcher::ThreadType>(i);
hang_count); if (hang_count != kInvalidHangCount &&
ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
LoggingLevel::kUmaOnly))
LogHungThreadCountHistogram(thread_type, hang_count);
} }
// Two cases can invalidate this snapshot and prevent the capture of the hang. // Three cases can invalidate this snapshot and prevent the capture of the
// hang.
// //
// 1. Some threads could not be marked for blocking so this snapshot isn't // 1. Some threads could not be marked for blocking so this snapshot isn't
// actionable since marked threads could be hung because of unmarked ones. // actionable since marked threads could be hung because of unmarked ones.
...@@ -531,7 +583,12 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot( ...@@ -531,7 +583,12 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
// 2. Any of the threads have a deadline before |deadline_ignore_threshold|. // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
// If any thread is ignored it reduces the confidence in the whole state and // If any thread is ignored it reduces the confidence in the whole state and
// it's better to avoid capturing misleading data. // it's better to avoid capturing misleading data.
if (!all_threads_marked || found_deadline_before_ignore_threshold) { //
// 3. The hung threads found were all of types that are not configured through
// Finch to trigger a crash dump.
//
if (!all_threads_marked || found_deadline_before_ignore_threshold ||
!any_hung_thread_has_dumping_enabled) {
hung_watch_state_copies_.clear(); hung_watch_state_copies_.clear();
return; return;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "base/callback_helpers.h" #include "base/callback_helpers.h"
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/debug/crash_logging.h" #include "base/debug/crash_logging.h"
#include "base/feature_list.h"
#include "base/memory/memory_pressure_listener.h" #include "base/memory/memory_pressure_listener.h"
#include "base/synchronization/lock.h" #include "base/synchronization/lock.h"
#include "base/thread_annotations.h" #include "base/thread_annotations.h"
...@@ -136,6 +137,10 @@ class BASE_EXPORT HangWatchScopeDisabled { ...@@ -136,6 +137,10 @@ class BASE_EXPORT HangWatchScopeDisabled {
// within a single process. This instance must outlive all monitored threads. // within a single process. This instance must outlive all monitored threads.
class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
public: public:
// Determines if the HangWatcher is activated. When false the HangWatcher
// thread never started.
static const base::Feature kEnableHangWatcher;
// Describes the type of a thread for logging purposes. // Describes the type of a thread for logging purposes.
enum class ThreadType { enum class ThreadType {
kIOThread = 0, kIOThread = 0,
...@@ -162,6 +167,11 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { ...@@ -162,6 +167,11 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// startup while single-threaded. // startup while single-threaded.
static void InitializeOnMainThread(); static void InitializeOnMainThread();
// Returns the values that were set through InitializeOnMainThread() to their
// default value. Used for testing since in prod initialization should happen
// only once.
static void UnitializeOnMainThreadForTesting();
// Thread safe functions to verify if hang watching is activated. If called // Thread safe functions to verify if hang watching is activated. If called
// before InitializeOnMainThread returns the default value which is false. // before InitializeOnMainThread returns the default value which is false.
static bool IsEnabled(); static bool IsEnabled();
...@@ -255,8 +265,13 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { ...@@ -255,8 +265,13 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// Construct the snapshot from provided data. |snapshot_time| can be // Construct the snapshot from provided data. |snapshot_time| can be
// different than now() to be coherent with other operations recently done // different than now() to be coherent with other operations recently done
// on |watch_states|. If any deadline in |watch_states| is before // on |watch_states|. The snapshot can be empty for a number of reasons:
// |deadline_ignore_threshold|, the snapshot is empty. // 1. If any deadline in |watch_states| is before
// |deadline_ignore_threshold|.
// 2. If some of the hung threads could not be marked as blocking on
// capture.
// 3. If none of the hung threads are of a type configured to trigger a
// crash dump.
WatchStateSnapShot(const HangWatchStates& watch_states, WatchStateSnapShot(const HangWatchStates& watch_states,
base::TimeTicks deadline_ignore_threshold); base::TimeTicks deadline_ignore_threshold);
WatchStateSnapShot(const WatchStateSnapShot& other); WatchStateSnapShot(const WatchStateSnapShot& other);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "base/synchronization/waitable_event.h" #include "base/synchronization/waitable_event.h"
#include "base/test/bind.h" #include "base/test/bind.h"
#include "base/test/metrics/histogram_tester.h" #include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/simple_test_tick_clock.h" #include "base/test/simple_test_tick_clock.h"
#include "base/test/task_environment.h" #include "base/test/task_environment.h"
#include "base/test/test_timeouts.h" #include "base/test/test_timeouts.h"
...@@ -34,6 +35,12 @@ using testing::IsEmpty; ...@@ -34,6 +35,12 @@ using testing::IsEmpty;
namespace base { namespace base {
namespace { namespace {
// Use with a FeatureList to activate crash dumping for threads marked as
// threadpool threads.
const std::vector<base::test::ScopedFeatureList::FeatureAndParams>
kFeatureAndParams{{base::HangWatcher::kEnableHangWatcher,
{{"threadpool_log_level", "2"}}}};
// Use this value to mark things very far off in the future. Adding this // Use this value to mark things very far off in the future. Adding this
// to TimeTicks::Now() gives a point that will never be reached during the // to TimeTicks::Now() gives a point that will never be reached during the
// normal execution of a test. // normal execution of a test.
...@@ -104,6 +111,9 @@ class HangWatcherTest : public testing::Test { ...@@ -104,6 +111,9 @@ class HangWatcherTest : public testing::Test {
const base::TimeDelta kHangTime = kTimeout + base::TimeDelta::FromSeconds(1); const base::TimeDelta kHangTime = kTimeout + base::TimeDelta::FromSeconds(1);
HangWatcherTest() { HangWatcherTest() {
feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {});
hang_watcher_.InitializeOnMainThread();
hang_watcher_.SetAfterMonitorClosureForTesting(base::BindRepeating( hang_watcher_.SetAfterMonitorClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&monitor_event_))); &WaitableEvent::Signal, base::Unretained(&monitor_event_)));
...@@ -118,6 +128,8 @@ class HangWatcherTest : public testing::Test { ...@@ -118,6 +128,8 @@ class HangWatcherTest : public testing::Test {
hang_watcher_.Start(); hang_watcher_.Start();
} }
void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); }
HangWatcherTest(const HangWatcherTest& other) = delete; HangWatcherTest(const HangWatcherTest& other) = delete;
HangWatcherTest& operator=(const HangWatcherTest& other) = delete; HangWatcherTest& operator=(const HangWatcherTest& other) = delete;
...@@ -130,6 +142,8 @@ class HangWatcherTest : public testing::Test { ...@@ -130,6 +142,8 @@ class HangWatcherTest : public testing::Test {
// outlive the HangWatcher thread. // outlive the HangWatcher thread.
WaitableEvent hang_event_; WaitableEvent hang_event_;
base::test::ScopedFeatureList feature_list_;
HangWatcher hang_watcher_; HangWatcher hang_watcher_;
// Used exclusively for MOCK_TIME. No tasks will be run on the environment. // Used exclusively for MOCK_TIME. No tasks will be run on the environment.
...@@ -896,6 +910,9 @@ namespace { ...@@ -896,6 +910,9 @@ namespace {
class HangWatchScopeEnabledBlockingTest : public testing::Test { class HangWatchScopeEnabledBlockingTest : public testing::Test {
public: public:
HangWatchScopeEnabledBlockingTest() { HangWatchScopeEnabledBlockingTest() {
feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {});
hang_watcher_.InitializeOnMainThread();
hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([&] { hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([&] {
capture_started_.Signal(); capture_started_.Signal();
// Simulate capturing that takes a long time. // Simulate capturing that takes a long time.
...@@ -922,6 +939,8 @@ class HangWatchScopeEnabledBlockingTest : public testing::Test { ...@@ -922,6 +939,8 @@ class HangWatchScopeEnabledBlockingTest : public testing::Test {
base::HangWatcher::ThreadType::kThreadPoolThread); base::HangWatcher::ThreadType::kThreadPoolThread);
} }
void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); }
HangWatchScopeEnabledBlockingTest( HangWatchScopeEnabledBlockingTest(
const HangWatchScopeEnabledBlockingTest& other) = delete; const HangWatchScopeEnabledBlockingTest& other) = delete;
HangWatchScopeEnabledBlockingTest& operator=( HangWatchScopeEnabledBlockingTest& operator=(
...@@ -960,6 +979,7 @@ class HangWatchScopeEnabledBlockingTest : public testing::Test { ...@@ -960,6 +979,7 @@ class HangWatchScopeEnabledBlockingTest : public testing::Test {
base::WaitableEvent continue_capture_; base::WaitableEvent continue_capture_;
bool completed_capture_{false}; bool completed_capture_{false};
base::test::ScopedFeatureList feature_list_;
HangWatcher hang_watcher_; HangWatcher hang_watcher_;
base::ScopedClosureRunner unregister_thread_closure_; base::ScopedClosureRunner unregister_thread_closure_;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment