Commit af1a3a8c authored by Olivier Li's avatar Olivier Li Committed by Commit Bot

Simplify logic to skip capture in HangWatcher.

Previously reasons to skip capturing were checked in both Monitor()
and WatchStateSnapshot() which is redundant. This also needs to change
to be able to record hang metrics even when we don't want to grab a crash
dump.


Bug: 1135528
Change-Id: Ie50c57146a9f79d968956ec02f4e24a41c252c80
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2506352Reviewed-by: default avatarFrançois Doray <fdoray@chromium.org>
Commit-Queue: François Doray <fdoray@chromium.org>
Cr-Commit-Position: refs/heads/master@{#822788}
parent 7538717f
......@@ -419,10 +419,11 @@ base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
const HangWatchStates& watch_states,
base::TimeTicks snapshot_time,
base::TimeTicks deadline_ignore_threshold)
: snapshot_time_(snapshot_time) {
base::TimeTicks deadline_ignore_threshold) {
const base::TimeTicks now = base::TimeTicks::Now();
bool all_threads_marked = true;
bool found_deadline_before_ignore_threshold = false;
// Copy hung thread information.
for (const auto& watch_state : watch_states) {
uint64_t flags;
......@@ -430,12 +431,18 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
if (deadline <= deadline_ignore_threshold) {
hung_watch_state_copies_.clear();
return;
found_deadline_before_ignore_threshold = true;
}
if (internal::HangWatchDeadline::IsFlagSet(
internal::HangWatchDeadline::Flag::
kIgnoreCurrentHangWatchScopeEnabled,
flags)) {
continue;
}
// Only copy hung threads.
if (deadline <= snapshot_time) {
if (deadline <= now) {
// Attempt to mark the thread as needing to stay within its current
// HangWatchScopeEnabled until capture is complete.
bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
......@@ -454,11 +461,17 @@ HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
}
}
// If some threads could not be marked for blocking then this snapshot is not
// Two cases can invalidate this snapshot and prevent the capture of the hang.
//
// 1. Some threads could not be marked for blocking so this snapshot isn't
// actionable since marked threads could be hung because of unmarked ones.
// If only the marked threads were captured the information would be
// incomplete.
if (!all_threads_marked) {
//
// 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
// If any thread is ignored it reduces the confidence in the whole state and
// it's better to avoid capturing misleading data.
if (!all_threads_marked || found_deadline_before_ignore_threshold) {
hung_watch_state_copies_.clear();
return;
}
......@@ -507,8 +520,7 @@ bool HangWatcher::WatchStateSnapShot::IsActionable() const {
HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
const {
WatchStateSnapShot snapshot(watch_states_, base::TimeTicks::Now(),
deadline_ignore_threshold_);
WatchStateSnapShot snapshot(watch_states_, deadline_ignore_threshold_);
return snapshot;
}
......@@ -521,41 +533,19 @@ void HangWatcher::Monitor() {
if (watch_states_.empty())
return;
const base::TimeTicks now = base::TimeTicks::Now();
// See if any thread hung. We're holding |watch_state_lock_| so threads
// can't register or unregister but their deadline still can change
// atomically. This is fine. Detecting a hang is generally best effort and
// if a thread resumes from hang in the time it takes to move on to
// capturing then its ID will be absent from the crash keys.
bool any_thread_hung = ranges::any_of(
watch_states_,
[this, now](const std::unique_ptr<internal::HangWatchState>& state) {
uint64_t flags;
base::TimeTicks deadline;
std::tie(flags, deadline) = state->GetFlagsAndDeadline();
return !internal::HangWatchDeadline::IsFlagSet(
internal::HangWatchDeadline::Flag::
kIgnoreCurrentHangWatchScopeEnabled,
flags) &&
deadline > deadline_ignore_threshold_ && deadline < now;
});
WatchStateSnapShot watch_state_snapshot(watch_states_,
deadline_ignore_threshold_);
// If at least a thread is hung we need to capture.
if (any_thread_hung)
CaptureHang(now);
if (watch_state_snapshot.IsActionable()) {
DoDumpWithoutCrashing(watch_state_snapshot);
}
}
void HangWatcher::CaptureHang(base::TimeTicks capture_time) {
void HangWatcher::DoDumpWithoutCrashing(
const WatchStateSnapShot& watch_state_snapshot) {
capture_in_progress_.store(true, std::memory_order_relaxed);
base::AutoLock scope_lock(capture_lock_);
WatchStateSnapShot watch_state_snapshot(watch_states_, capture_time,
deadline_ignore_threshold_);
if (!watch_state_snapshot.IsActionable()) {
return;
}
#if not defined(OS_NACL)
const std::string list_of_hung_thread_ids =
watch_state_snapshot.PrepareHungThreadListCrashKey();
......
......@@ -258,7 +258,6 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// on |watch_states|. If any deadline in |watch_states| is before
// |deadline_ignore_threshold|, the snapshot is empty.
WatchStateSnapShot(const HangWatchStates& watch_states,
base::TimeTicks snapshot_time,
base::TimeTicks deadline_ignore_threshold);
WatchStateSnapShot(const WatchStateSnapShot& other);
~WatchStateSnapShot();
......@@ -277,7 +276,6 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
bool IsActionable() const;
private:
base::TimeTicks snapshot_time_;
std::vector<WatchStateCopy> hung_watch_state_copies_;
};
......@@ -292,8 +290,9 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// invokes the appropriate closure if so.
void Monitor() LOCKS_EXCLUDED(watch_state_lock_);
// Record the hang and perform the necessary housekeeping before and after.
void CaptureHang(base::TimeTicks capture_time)
// Record the hang crash dump and perform the necessary housekeeping before
// and after.
void DoDumpWithoutCrashing(const WatchStateSnapShot& watch_state_snapshot)
EXCLUSIVE_LOCKS_REQUIRED(watch_state_lock_) LOCKS_EXCLUDED(capture_lock_);
// Stop all monitoring and join the HangWatcher thread.
......
......@@ -167,16 +167,14 @@ class HangWatcherBlockingThreadTest : public HangWatcherTest {
monitor_event_.Reset();
}
void MonitorHangsAndJoinThread() {
void MonitorHangs() {
// HangWatcher::Monitor() should not be set which would mean a call to
// HangWatcher::Monitor() happened and was unacounted for.
ASSERT_FALSE(monitor_event_.IsSignaled());
// ASSERT_FALSE(monitor_event_.IsSignaled());
// Triger a monitoring on HangWatcher thread and verify results.
hang_watcher_.SignalMonitorEventForTesting();
monitor_event_.Wait();
JoinThread();
}
// Used to unblock the monitored thread. Signaled from the test main thread.
......@@ -454,15 +452,43 @@ TEST_F(HangWatcherBlockingThreadTest, Hang) {
// Simulate hang.
task_environment_.FastForwardBy(kHangTime);
MonitorHangsAndJoinThread();
// First monitoring catches and records the hang.
MonitorHangs();
ASSERT_TRUE(hang_event_.IsSignaled());
JoinThread();
}
TEST_F(HangWatcherBlockingThreadTest, HangAlreadyRecorded) {
StartBlockedThread();
// Simulate hang.
task_environment_.FastForwardBy(kHangTime);
// First monitoring catches and records the hang.
MonitorHangs();
ASSERT_TRUE(hang_event_.IsSignaled());
// Reset to attempt capture again.
hang_event_.Reset();
monitor_event_.Reset();
// Second monitoring does not record because a hang that was already recorded
// is still live.
MonitorHangs();
ASSERT_FALSE(hang_event_.IsSignaled());
JoinThread();
}
TEST_F(HangWatcherBlockingThreadTest, NoHang) {
StartBlockedThread();
MonitorHangsAndJoinThread();
// No hang to catch so nothing is recorded.
MonitorHangs();
ASSERT_FALSE(hang_event_.IsSignaled());
JoinThread();
}
namespace {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment