Commit b6273fad authored by Maggie Chen's avatar Maggie Chen Committed by Commit Bot

Enable GPU watchdog V2 by default

The current watchdog V2 has the same timeout value as V1. All experiment
code in V2 is removed.

Bug: 949839
Change-Id: I5f85d2ee538833234e328bbe2b98c2891e305a22
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2083755Reviewed-by: default avatarIlya Sherman <isherman@chromium.org>
Reviewed-by: default avatarZhenyao Mo <zmo@chromium.org>
Commit-Queue: Maggie Chen <magchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#747453}
parent 1a5557f0
......@@ -64,7 +64,7 @@ const base::Feature kGpuUseDisplayThreadPriority{
// Gpu watchdog V2 to simplify the logic and reduce GPU hangs
const base::Feature kGpuWatchdogV2{"GpuWatchdogV2",
base::FEATURE_DISABLED_BY_DEFAULT};
base::FEATURE_ENABLED_BY_DEFAULT};
// Use a different set of watchdog timeouts on V1
const base::Feature kGpuWatchdogV1NewTimeout{"GpuWatchdogV1NewTimeout",
......
......@@ -20,9 +20,6 @@ namespace {
constexpr auto kGpuWatchdogTimeoutForTesting =
base::TimeDelta::FromMilliseconds(1000);
constexpr base::TimeDelta kMaxWaitTimeForTesting =
base::TimeDelta::FromMilliseconds(4000);
// This task will run for duration_ms milliseconds.
void SimpleTask(base::TimeDelta duration) {
base::PlatformThread::Sleep(duration);
......@@ -74,7 +71,6 @@ void GpuWatchdogTest::SetUp() {
watchdog_thread_ = gpu::GpuWatchdogThreadImplV2::Create(
/*start_backgrounded*/ false,
/*timeout*/ kGpuWatchdogTimeoutForTesting,
/*max_wait_time*/ kMaxWaitTimeForTesting,
/*test_mode*/ true);
}
......@@ -146,10 +142,10 @@ TEST_F(GpuWatchdogTest, GpuInitializationHang) {
SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
kGpuWatchdogTimeoutForTesting *
kMaxCountOfMoreGpuThreadTimeAllowed +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(3000));
base::TimeDelta::FromMilliseconds(3000));
#else
SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(3000));
base::TimeDelta::FromMilliseconds(3000));
#endif
// Gpu hangs. OnInitComplete() is not called
......@@ -203,13 +199,13 @@ TEST_F(GpuWatchdogTest, GpuRunningATaskHang) {
base::BindOnce(&SimpleTask, kGpuWatchdogTimeoutForTesting * 2 +
kGpuWatchdogTimeoutForTesting *
kMaxCountOfMoreGpuThreadTimeAllowed +
kMaxWaitTimeForTesting +
base::TimeDelta::FromMilliseconds(4000)));
#else
task_environment_.GetMainThreadTaskRunner()->PostTask(
FROM_HERE,
base::BindOnce(&SimpleTask, kGpuWatchdogTimeoutForTesting * 2 +
kMaxWaitTimeForTesting +
base::TimeDelta::FromMilliseconds(4000)));
#endif
......@@ -262,7 +258,7 @@ TEST_F(GpuWatchdogTest, GpuSwitchingToForegroundHang) {
/*duration*/ kGpuWatchdogTimeoutForTesting * 2 +
kGpuWatchdogTimeoutForTesting *
kMaxCountOfMoreGpuThreadTimeAllowed +
kMaxWaitTimeForTesting +
base::TimeDelta::FromMilliseconds(4200),
/*time_to_switch_to_foreground*/
base::TimeDelta::FromMilliseconds(200)));
......@@ -272,7 +268,7 @@ TEST_F(GpuWatchdogTest, GpuSwitchingToForegroundHang) {
base::BindOnce(&GpuWatchdogTest::LongTaskFromBackgroundToForeground,
base::Unretained(this),
/*duration*/ kGpuWatchdogTimeoutForTesting * 2 +
kMaxWaitTimeForTesting +
base::TimeDelta::FromMilliseconds(4200),
/*time_to_switch_to_foreground*/
base::TimeDelta::FromMilliseconds(200)));
......@@ -308,10 +304,10 @@ TEST_F(GpuWatchdogTest, GpuInitializationPause) {
SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
kGpuWatchdogTimeoutForTesting *
kMaxCountOfMoreGpuThreadTimeAllowed +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4000));
base::TimeDelta::FromMilliseconds(4000));
#else
SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4000));
base::TimeDelta::FromMilliseconds(4000));
#endif
// A GPU hang should be detected.
......@@ -354,7 +350,7 @@ TEST_F(GpuWatchdogPowerTest, GpuOnResumeHang) {
/*duration*/ kGpuWatchdogTimeoutForTesting * kRestartFactor +
kGpuWatchdogTimeoutForTesting *
kMaxCountOfMoreGpuThreadTimeAllowed +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4200),
base::TimeDelta::FromMilliseconds(4200),
/*time_to_power_resume*/
base::TimeDelta::FromMilliseconds(200)));
#else
......@@ -363,7 +359,7 @@ TEST_F(GpuWatchdogPowerTest, GpuOnResumeHang) {
base::BindOnce(
&GpuWatchdogPowerTest::LongTaskOnResume, base::Unretained(this),
/*duration*/ kGpuWatchdogTimeoutForTesting * kRestartFactor +
kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4200),
base::TimeDelta::FromMilliseconds(4200),
/*time_to_power_resume*/
base::TimeDelta::FromMilliseconds(200)));
#endif
......
......@@ -26,11 +26,9 @@
namespace gpu {
GpuWatchdogThreadImplV2::GpuWatchdogThreadImplV2(base::TimeDelta timeout,
base::TimeDelta max_wait_time,
bool is_test_mode)
: watchdog_timeout_(timeout),
in_gpu_initialization_(true),
max_wait_time_(max_wait_time),
is_test_mode_(is_test_mode),
watched_gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()) {
base::MessageLoopCurrent::Get()->AddTaskObserver(this);
......@@ -83,10 +81,9 @@ GpuWatchdogThreadImplV2::~GpuWatchdogThreadImplV2() {
std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
bool start_backgrounded,
base::TimeDelta timeout,
base::TimeDelta max_wait_time,
bool is_test_mode) {
auto watchdog_thread = base::WrapUnique(
new GpuWatchdogThreadImplV2(timeout, max_wait_time, is_test_mode));
auto watchdog_thread =
base::WrapUnique(new GpuWatchdogThreadImplV2(timeout, is_test_mode));
base::Thread::Options options;
options.timer_slack = base::TIMER_SLACK_MAXIMUM;
watchdog_thread->StartWithOptions(options);
......@@ -98,7 +95,7 @@ std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
// static
std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
bool start_backgrounded) {
return Create(start_backgrounded, kGpuWatchdogTimeout, kMaxWaitTime, false);
return Create(start_backgrounded, kGpuWatchdogTimeout, false);
}
// Do not add power observer during watchdog init, PowerMonitor might not be up
......@@ -386,8 +383,6 @@ void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
DCHECK(!in_power_suspension_);
DCHECK(!is_paused_);
base::TimeTicks on_watchdog_timeout_start = base::TimeTicks::Now();
// If this metric is added too early (eg. watchdog creation time), it cannot
// be persistent. The histogram data will be lost after crash or browser exit.
// Delay the recording of kGpuWatchdogStart until the firs
......@@ -430,34 +425,11 @@ void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
return;
}
// An experiment for all platforms: Wait for max_wait_time_ and see if GPU
// will response.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeoutWait);
if (GpuRespondsAfterWaiting(on_watchdog_timeout_start)) {
last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
last_arm_disarm_counter_ =
base::subtle::NoBarrier_Load(&arm_disarm_counter_);
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout, weak_ptr_),
watchdog_timeout_);
return;
}
// Still armed without any progress. GPU possibly hangs.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKill);
DeliberatelyTerminateToRecoverFromHang();
}
bool GpuWatchdogThreadImplV2::GpuIsAlive() {
base::subtle::Atomic32 arm_disarm_counter =
base::subtle::NoBarrier_Load(&arm_disarm_counter_);
bool gpu_makes_progress = arm_disarm_counter != last_arm_disarm_counter_;
return (gpu_makes_progress);
}
bool GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreTime(
bool no_gpu_hang_detected) {
#if defined(OS_WIN)
......@@ -480,10 +452,6 @@ bool GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreTime(
count_of_more_gpu_thread_time_allowed_);
}
// For metrics only - The extra time was give in timeouts.
time_in_extra_timeouts_ =
count_of_more_gpu_thread_time_allowed_ * watchdog_timeout_;
// Calculate how many thread ticks the watched thread spent doing the work.
base::ThreadTicks now = GetWatchedThreadTime();
base::TimeDelta thread_time_elapsed =
......@@ -546,31 +514,6 @@ base::ThreadTicks GpuWatchdogThreadImplV2::GetWatchedThreadTime() {
}
#endif
// This is an experiment on all platforms to see whether GPU will response
// after waiting longer.
bool GpuWatchdogThreadImplV2::GpuRespondsAfterWaiting(
base::TimeTicks on_watchdog_timeout_start) {
base::TimeDelta duration;
int count = 0;
NumOfUsersWaitHistogram(count++, false /*gpu_is_active*/);
while (duration < max_wait_time_) {
// Sleep for 1 seconds each time and check if the GPU makes a progress.
base::PlatformThread::Sleep(base::TimeDelta::FromSeconds(1));
duration = base::TimeTicks::Now() - on_watchdog_timeout_start;
NumOfUsersWaitHistogram(count++, false /*gpu_is_active*/);
if (GpuIsAlive()) {
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kProgressAfterWait);
GpuWatchdogWaitTimeHistogram(duration);
NumOfUsersWaitHistogram(count++, true /*gpu_is_active*/);
return true;
}
}
return false;
}
void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
// If this is for gpu testing, do not terminate the gpu process.
......@@ -620,7 +563,7 @@ void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
WithinOneMinFromPowerResumed() ? "1" : "0");
// Deliberately crash the process to create a crash dump.
*((volatile int*)0) = 0xdeadface;
*static_cast<volatile int*>(nullptr) = 0x1337;
}
void GpuWatchdogThreadImplV2::GpuWatchdogHistogram(
......@@ -707,58 +650,6 @@ void GpuWatchdogThreadImplV2::NumOfUsersWaitingWithExtraThreadTimeHistogram(
}
#endif
void GpuWatchdogThreadImplV2::GpuWatchdogWaitTimeHistogram(
base::TimeDelta wait_time) {
#if defined(OS_WIN)
// Add the time the GPU thread was given for full thread time.
wait_time += time_in_extra_timeouts_;
#endif
// Record the wait time in OnWatchdogTimeout() for the GPU main thread to
// make a progress. The maximum recodrding time is 150 seconds because
// Windows need to add the time spent before reaching here (max 60 sec).
constexpr base::TimeDelta kMin = base::TimeDelta::FromSeconds(1);
constexpr base::TimeDelta kMax = base::TimeDelta::FromSeconds(150);
constexpr int kBuckets = 50;
bool recorded = false;
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime", wait_time, kMin,
kMax, kBuckets);
if (in_gpu_initialization_) {
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Init", wait_time,
kMin, kMax, kBuckets);
recorded = true;
}
if (WithinOneMinFromPowerResumed()) {
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.PowerResume",
wait_time, kMin, kMax, kBuckets);
recorded = true;
}
if (WithinOneMinFromForegrounded()) {
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Foregrounded",
wait_time, kMin, kMax, kBuckets);
recorded = true;
}
if (!recorded) {
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Normal",
wait_time, kMin, kMax, kBuckets);
}
}
void GpuWatchdogThreadImplV2::NumOfUsersWaitHistogram(int count,
bool gpu_is_active) {
constexpr int kMax = 60; // == kMaxWaitTime.InSeconds();
base::UmaHistogramExactLinear(
gpu_is_active ? "GPU.WatchdogThread.WaitTime.ProgressAfterWait"
: "GPU.WatchdogThread.WaitTime.NumOfUsers",
count, kMax);
}
bool GpuWatchdogThreadImplV2::WithinOneMinFromPowerResumed() {
size_t count = base::TimeDelta::FromSeconds(60) / watchdog_timeout_;
return power_resumed_event_ && num_of_timeout_after_power_resume_ <= count;
......
......@@ -14,7 +14,6 @@ namespace gpu {
// OnGPUWatchdogTimeout for at most 4 times before the gpu thread is killed.
constexpr int kMaxCountOfMoreGpuThreadTimeAllowed = 4;
#endif
constexpr base::TimeDelta kMaxWaitTime = base::TimeDelta::FromSeconds(60);
class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
: public GpuWatchdogThread,
......@@ -26,7 +25,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
static std::unique_ptr<GpuWatchdogThreadImplV2> Create(
bool start_backgrounded,
base::TimeDelta timeout,
base::TimeDelta max_wait_time,
bool test_mode);
~GpuWatchdogThreadImplV2() override;
......@@ -67,7 +65,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
};
GpuWatchdogThreadImplV2(base::TimeDelta timeout,
base::TimeDelta max_wait_time,
bool test_mode);
void OnAddPowerObserver();
void RestartWatchdogTimeoutTask(PauseResumeSource source_of_request);
......@@ -78,12 +75,10 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
void InProgress();
bool IsArmed();
void OnWatchdogTimeout();
bool GpuIsAlive();
bool WatchedThreadNeedsMoreTime(bool no_gpu_hang_detected);
#if defined(OS_WIN)
base::ThreadTicks GetWatchedThreadTime();
#endif
bool GpuRespondsAfterWaiting(base::TimeTicks on_watchdog_timeout_start);
// Do not change the function name. It is used for [GPU HANG] carsh reports.
void DeliberatelyTerminateToRecoverFromHang();
......@@ -99,14 +94,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
void NumOfUsersWaitingWithExtraThreadTimeHistogram(int count);
#endif
// The wait time in OnWatchdogTimeout() for the GPU main thread to make a
// progress.
void GpuWatchdogWaitTimeHistogram(base::TimeDelta wait_time);
// The number of users per second stay in Chrome after entering the 60-second
// wait time.
void NumOfUsersWaitHistogram(int count, bool gpu_is_active);
// Used for metrics. It's 1 minute after the event.
bool WithinOneMinFromPowerResumed();
bool WithinOneMinFromForegrounded();
......@@ -161,9 +148,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
// After GPU hang detected, how many times has the GPU thread been allowed to
// continue due to not enough thread time.
int count_of_more_gpu_thread_time_allowed_ = 0;
// The accumulated timeout time the GPU main thread was given.
base::TimeDelta time_in_extra_timeouts_;
#endif
#if defined(USE_X11)
......@@ -203,7 +187,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
size_t num_of_timeout_after_foregrounded_ = 0;
bool foregrounded_event_ = false;
bool power_resumed_event_ = false;
base::TimeDelta max_wait_time_;
// For gpu testing only.
const bool is_test_mode_;
......
......@@ -2388,25 +2388,6 @@
]
}
],
"GpuWatchdogV2": [
{
"platforms": [
"android",
"chromeos",
"linux",
"mac",
"windows"
],
"experiments": [
{
"name": "Enabled",
"enable_features": [
"GpuWatchdogV2"
]
}
]
}
],
"GwpAsanAndroid": [
{
"platforms": [
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment