Enable GPU watchdog V2 by default

The current watchdog V2 has the same timeout value as V1. All experiment code in V2 is removed. Bug: 949839 Change-Id: I5f85d2ee538833234e328bbe2b98c2891e305a22 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2083755Reviewed-by: Ilya Sherman <isherman@chromium.org> Reviewed-by: Zhenyao Mo <zmo@chromium.org> Commit-Queue: Maggie Chen <magchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#747453}

Enable GPU watchdog V2 by default
The current watchdog V2 has the same timeout value as V1. All experiment code in V2 is removed. Bug: 949839 Change-Id: I5f85d2ee538833234e328bbe2b98c2891e305a22 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2083755Reviewed-by: Ilya Sherman <isherman@chromium.org> Reviewed-by: Zhenyao Mo <zmo@chromium.org> Commit-Queue: Maggie Chen <magchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#747453}
b6273fad · Maggie Chen · Commit Bot · 1a5557f0 · b6273fad · b6273fad
Commit b6273fad authored Mar 05, 2020 by Maggie Chen Committed by Commit Bot Mar 05, 2020
5 changed files
--- a/gpu/config/gpu_finch_features.cc
+++ b/gpu/config/gpu_finch_features.cc
@@ -64,7 +64,7 @@ const base::Feature kGpuUseDisplayThreadPriority{

 // Gpu watchdog V2 to simplify the logic and reduce GPU hangs
 const base::Feature kGpuWatchdogV2{"GpuWatchdogV2",
-                                   base::FEATURE_DISABLED_BY_DEFAULT};
+                                   base::FEATURE_ENABLED_BY_DEFAULT};

 // Use a different set of watchdog timeouts on V1
 const base::Feature kGpuWatchdogV1NewTimeout{"GpuWatchdogV1NewTimeout",

--- a/gpu/ipc/service/gpu_watchdog_thread_unittest.cc
+++ b/gpu/ipc/service/gpu_watchdog_thread_unittest.cc
@@ -20,9 +20,6 @@ namespace {
 constexpr auto kGpuWatchdogTimeoutForTesting =
    base::TimeDelta::FromMilliseconds(1000);

-constexpr base::TimeDelta kMaxWaitTimeForTesting =
-    base::TimeDelta::FromMilliseconds(4000);
-
 // This task will run for duration_ms milliseconds.
 void SimpleTask(base::TimeDelta duration) {
  base::PlatformThread::Sleep(duration);
@@ -74,7 +71,6 @@ void GpuWatchdogTest::SetUp() {
  watchdog_thread_ = gpu::GpuWatchdogThreadImplV2::Create(
      /*start_backgrounded*/ false,
      /*timeout*/ kGpuWatchdogTimeoutForTesting,
-      /*max_wait_time*/ kMaxWaitTimeForTesting,
      /*test_mode*/ true);
 }

@@ -146,10 +142,10 @@ TEST_F(GpuWatchdogTest, GpuInitializationHang) {
  SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
             kGpuWatchdogTimeoutForTesting *
                 kMaxCountOfMoreGpuThreadTimeAllowed +
-             kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(3000));
+             base::TimeDelta::FromMilliseconds(3000));
 #else
  SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
-             kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(3000));
+             base::TimeDelta::FromMilliseconds(3000));
 #endif

  // Gpu hangs. OnInitComplete() is not called
@@ -203,13 +199,13 @@ TEST_F(GpuWatchdogTest, GpuRunningATaskHang) {
      base::BindOnce(&SimpleTask, kGpuWatchdogTimeoutForTesting * 2 +
                                      kGpuWatchdogTimeoutForTesting *
                                          kMaxCountOfMoreGpuThreadTimeAllowed +
-                                      kMaxWaitTimeForTesting +
+
                                      base::TimeDelta::FromMilliseconds(4000)));
 #else
  task_environment_.GetMainThreadTaskRunner()->PostTask(
      FROM_HERE,
      base::BindOnce(&SimpleTask, kGpuWatchdogTimeoutForTesting * 2 +
-                                      kMaxWaitTimeForTesting +
+
                                      base::TimeDelta::FromMilliseconds(4000)));
 #endif

@@ -262,7 +258,7 @@ TEST_F(GpuWatchdogTest, GpuSwitchingToForegroundHang) {
                     /*duration*/ kGpuWatchdogTimeoutForTesting * 2 +
                         kGpuWatchdogTimeoutForTesting *
                             kMaxCountOfMoreGpuThreadTimeAllowed +
-                         kMaxWaitTimeForTesting +
+
                         base::TimeDelta::FromMilliseconds(4200),
                     /*time_to_switch_to_foreground*/
                     base::TimeDelta::FromMilliseconds(200)));
@@ -272,7 +268,7 @@ TEST_F(GpuWatchdogTest, GpuSwitchingToForegroundHang) {
      base::BindOnce(&GpuWatchdogTest::LongTaskFromBackgroundToForeground,
                     base::Unretained(this),
                     /*duration*/ kGpuWatchdogTimeoutForTesting * 2 +
-                         kMaxWaitTimeForTesting +
+
                         base::TimeDelta::FromMilliseconds(4200),
                     /*time_to_switch_to_foreground*/
                     base::TimeDelta::FromMilliseconds(200)));
@@ -308,10 +304,10 @@ TEST_F(GpuWatchdogTest, GpuInitializationPause) {
  SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
             kGpuWatchdogTimeoutForTesting *
                 kMaxCountOfMoreGpuThreadTimeAllowed +
-             kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4000));
+             base::TimeDelta::FromMilliseconds(4000));
 #else
  SimpleTask(kGpuWatchdogTimeoutForTesting * kInitFactor +
-             kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4000));
+             base::TimeDelta::FromMilliseconds(4000));
 #endif

  // A GPU hang should be detected.
@@ -354,7 +350,7 @@ TEST_F(GpuWatchdogPowerTest, GpuOnResumeHang) {
          /*duration*/ kGpuWatchdogTimeoutForTesting * kRestartFactor +
              kGpuWatchdogTimeoutForTesting *
                  kMaxCountOfMoreGpuThreadTimeAllowed +
-              kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4200),
+              base::TimeDelta::FromMilliseconds(4200),
          /*time_to_power_resume*/
          base::TimeDelta::FromMilliseconds(200)));
 #else
@@ -363,7 +359,7 @@ TEST_F(GpuWatchdogPowerTest, GpuOnResumeHang) {
      base::BindOnce(
          &GpuWatchdogPowerTest::LongTaskOnResume, base::Unretained(this),
          /*duration*/ kGpuWatchdogTimeoutForTesting * kRestartFactor +
-              kMaxWaitTimeForTesting + base::TimeDelta::FromMilliseconds(4200),
+              base::TimeDelta::FromMilliseconds(4200),
          /*time_to_power_resume*/
          base::TimeDelta::FromMilliseconds(200)));
 #endif

--- a/gpu/ipc/service/gpu_watchdog_thread_v2.cc
+++ b/gpu/ipc/service/gpu_watchdog_thread_v2.cc
@@ -26,11 +26,9 @@
 namespace gpu {

 GpuWatchdogThreadImplV2::GpuWatchdogThreadImplV2(base::TimeDelta timeout,
-                                                 base::TimeDelta max_wait_time,
                                                 bool is_test_mode)
    : watchdog_timeout_(timeout),
      in_gpu_initialization_(true),
-      max_wait_time_(max_wait_time),
      is_test_mode_(is_test_mode),
      watched_gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()) {
  base::MessageLoopCurrent::Get()->AddTaskObserver(this);
@@ -83,10 +81,9 @@ GpuWatchdogThreadImplV2::~GpuWatchdogThreadImplV2() {
 std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
    bool start_backgrounded,
    base::TimeDelta timeout,
-    base::TimeDelta max_wait_time,
    bool is_test_mode) {
-  auto watchdog_thread = base::WrapUnique(
-      new GpuWatchdogThreadImplV2(timeout, max_wait_time, is_test_mode));
+  auto watchdog_thread =
+      base::WrapUnique(new GpuWatchdogThreadImplV2(timeout, is_test_mode));
  base::Thread::Options options;
  options.timer_slack = base::TIMER_SLACK_MAXIMUM;
  watchdog_thread->StartWithOptions(options);
@@ -98,7 +95,7 @@ std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
 // static
 std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
    bool start_backgrounded) {
-  return Create(start_backgrounded, kGpuWatchdogTimeout, kMaxWaitTime, false);
+  return Create(start_backgrounded, kGpuWatchdogTimeout, false);
 }

 // Do not add power observer during watchdog init, PowerMonitor might not be up
@@ -386,8 +383,6 @@ void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
  DCHECK(!in_power_suspension_);
  DCHECK(!is_paused_);

-  base::TimeTicks on_watchdog_timeout_start = base::TimeTicks::Now();
-
  // If this metric is added too early (eg. watchdog creation time), it cannot
  // be persistent. The histogram data will be lost after crash or browser exit.
  // Delay the recording of kGpuWatchdogStart until the firs
@@ -430,34 +425,11 @@ void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
    return;
  }

-  // An experiment for all platforms: Wait for max_wait_time_ and see if GPU
-  // will response.
-  GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeoutWait);
-  if (GpuRespondsAfterWaiting(on_watchdog_timeout_start)) {
-    last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
-    last_arm_disarm_counter_ =
-        base::subtle::NoBarrier_Load(&arm_disarm_counter_);
-
-    task_runner()->PostDelayedTask(
-        FROM_HERE,
-        base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout, weak_ptr_),
-        watchdog_timeout_);
-    return;
-  }
-
  // Still armed without any progress. GPU possibly hangs.
  GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKill);
  DeliberatelyTerminateToRecoverFromHang();
 }

-bool GpuWatchdogThreadImplV2::GpuIsAlive() {
-  base::subtle::Atomic32 arm_disarm_counter =
-      base::subtle::NoBarrier_Load(&arm_disarm_counter_);
-  bool gpu_makes_progress = arm_disarm_counter != last_arm_disarm_counter_;
-
-  return (gpu_makes_progress);
-}
-
 bool GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreTime(
    bool no_gpu_hang_detected) {
 #if defined(OS_WIN)
@@ -480,10 +452,6 @@ bool GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreTime(
        count_of_more_gpu_thread_time_allowed_);
  }

-  // For metrics only - The extra time was give in timeouts.
-  time_in_extra_timeouts_ =
-      count_of_more_gpu_thread_time_allowed_ * watchdog_timeout_;
-
  // Calculate how many thread ticks the watched thread spent doing the work.
  base::ThreadTicks now = GetWatchedThreadTime();
  base::TimeDelta thread_time_elapsed =
@@ -546,31 +514,6 @@ base::ThreadTicks GpuWatchdogThreadImplV2::GetWatchedThreadTime() {
 }
 #endif

-// This is an experiment on all platforms to see whether GPU will response
-// after waiting longer.
-bool GpuWatchdogThreadImplV2::GpuRespondsAfterWaiting(
-    base::TimeTicks on_watchdog_timeout_start) {
-  base::TimeDelta duration;
-  int count = 0;
-  NumOfUsersWaitHistogram(count++, false /*gpu_is_active*/);
-
-  while (duration < max_wait_time_) {
-    // Sleep for 1 seconds each time and check if the GPU makes a progress.
-    base::PlatformThread::Sleep(base::TimeDelta::FromSeconds(1));
-    duration = base::TimeTicks::Now() - on_watchdog_timeout_start;
-    NumOfUsersWaitHistogram(count++, false /*gpu_is_active*/);
-
-    if (GpuIsAlive()) {
-      GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kProgressAfterWait);
-      GpuWatchdogWaitTimeHistogram(duration);
-      NumOfUsersWaitHistogram(count++, true /*gpu_is_active*/);
-      return true;
-    }
-  }
-
-  return false;
-}
-
 void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
  DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
  // If this is for gpu testing, do not terminate the gpu process.
@@ -620,7 +563,7 @@ void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
      WithinOneMinFromPowerResumed() ? "1" : "0");

  // Deliberately crash the process to create a crash dump.
-  *((volatile int*)0) = 0xdeadface;
+  *static_cast<volatile int*>(nullptr) = 0x1337;
 }

 void GpuWatchdogThreadImplV2::GpuWatchdogHistogram(
@@ -707,58 +650,6 @@ void GpuWatchdogThreadImplV2::NumOfUsersWaitingWithExtraThreadTimeHistogram(
 }
 #endif

-void GpuWatchdogThreadImplV2::GpuWatchdogWaitTimeHistogram(
-    base::TimeDelta wait_time) {
-#if defined(OS_WIN)
-  // Add the time the GPU thread was given for full thread time.
-  wait_time += time_in_extra_timeouts_;
-#endif
-
-  // Record the wait time in OnWatchdogTimeout() for the GPU main thread to
-  // make a progress. The maximum recodrding time is 150 seconds because
-  // Windows need to add the time spent before reaching here (max 60 sec).
-  constexpr base::TimeDelta kMin = base::TimeDelta::FromSeconds(1);
-  constexpr base::TimeDelta kMax = base::TimeDelta::FromSeconds(150);
-  constexpr int kBuckets = 50;
-  bool recorded = false;
-
-  base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime", wait_time, kMin,
-                                kMax, kBuckets);
-
-  if (in_gpu_initialization_) {
-    base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Init", wait_time,
-                                  kMin, kMax, kBuckets);
-    recorded = true;
-  }
-
-  if (WithinOneMinFromPowerResumed()) {
-    base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.PowerResume",
-                                  wait_time, kMin, kMax, kBuckets);
-    recorded = true;
-  }
-
-  if (WithinOneMinFromForegrounded()) {
-    base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Foregrounded",
-                                  wait_time, kMin, kMax, kBuckets);
-    recorded = true;
-  }
-
-  if (!recorded) {
-    base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime.Normal",
-                                  wait_time, kMin, kMax, kBuckets);
-  }
-}
-
-void GpuWatchdogThreadImplV2::NumOfUsersWaitHistogram(int count,
-                                                      bool gpu_is_active) {
-  constexpr int kMax = 60;  // == kMaxWaitTime.InSeconds();
-
-  base::UmaHistogramExactLinear(
-      gpu_is_active ? "GPU.WatchdogThread.WaitTime.ProgressAfterWait"
-                    : "GPU.WatchdogThread.WaitTime.NumOfUsers",
-      count, kMax);
-}
-
 bool GpuWatchdogThreadImplV2::WithinOneMinFromPowerResumed() {
  size_t count = base::TimeDelta::FromSeconds(60) / watchdog_timeout_;
  return power_resumed_event_ && num_of_timeout_after_power_resume_ <= count;

--- a/gpu/ipc/service/gpu_watchdog_thread_v2.h
+++ b/gpu/ipc/service/gpu_watchdog_thread_v2.h
@@ -14,7 +14,6 @@ namespace gpu {
 // OnGPUWatchdogTimeout for at most 4 times before the gpu thread is killed.
 constexpr int kMaxCountOfMoreGpuThreadTimeAllowed = 4;
 #endif
-constexpr base::TimeDelta kMaxWaitTime = base::TimeDelta::FromSeconds(60);

 class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
    : public GpuWatchdogThread,
@@ -26,7 +25,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  static std::unique_ptr<GpuWatchdogThreadImplV2> Create(
      bool start_backgrounded,
      base::TimeDelta timeout,
-      base::TimeDelta max_wait_time,
      bool test_mode);

  ~GpuWatchdogThreadImplV2() override;
@@ -67,7 +65,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  };

  GpuWatchdogThreadImplV2(base::TimeDelta timeout,
-                          base::TimeDelta max_wait_time,
                          bool test_mode);
  void OnAddPowerObserver();
  void RestartWatchdogTimeoutTask(PauseResumeSource source_of_request);
@@ -78,12 +75,10 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  void InProgress();
  bool IsArmed();
  void OnWatchdogTimeout();
-  bool GpuIsAlive();
  bool WatchedThreadNeedsMoreTime(bool no_gpu_hang_detected);
 #if defined(OS_WIN)
  base::ThreadTicks GetWatchedThreadTime();
 #endif
-  bool GpuRespondsAfterWaiting(base::TimeTicks on_watchdog_timeout_start);

  // Do not change the function name. It is used for [GPU HANG] carsh reports.
  void DeliberatelyTerminateToRecoverFromHang();
@@ -99,14 +94,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  void NumOfUsersWaitingWithExtraThreadTimeHistogram(int count);
 #endif

-  // The wait time in OnWatchdogTimeout() for the GPU main thread to make a
-  // progress.
-  void GpuWatchdogWaitTimeHistogram(base::TimeDelta wait_time);
-
-  // The number of users per second stay in Chrome after entering the 60-second
-  // wait time.
-  void NumOfUsersWaitHistogram(int count, bool gpu_is_active);
-
  // Used for metrics. It's 1 minute after the event.
  bool WithinOneMinFromPowerResumed();
  bool WithinOneMinFromForegrounded();
@@ -161,9 +148,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  // After GPU hang detected, how many times has the GPU thread been allowed to
  // continue due to not enough thread time.
  int count_of_more_gpu_thread_time_allowed_ = 0;
-
-  // The accumulated timeout time the GPU main thread was given.
-  base::TimeDelta time_in_extra_timeouts_;
 #endif

 #if defined(USE_X11)
@@ -203,7 +187,6 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
  size_t num_of_timeout_after_foregrounded_ = 0;
  bool foregrounded_event_ = false;
  bool power_resumed_event_ = false;
-  base::TimeDelta max_wait_time_;

  // For gpu testing only.
  const bool is_test_mode_;

--- a/testing/variations/fieldtrial_testing_config.json
+++ b/testing/variations/fieldtrial_testing_config.json
@@ -2388,25 +2388,6 @@
            ]
        }
    ],
-    "GpuWatchdogV2": [
-        {
-            "platforms": [
-                "android",
-                "chromeos",
-                "linux",
-                "mac",
-                "windows"
-            ],
-            "experiments": [
-                {
-                    "name": "Enabled",
-                    "enable_features": [
-                        "GpuWatchdogV2"
-                    ]
-                }
-            ]
-        }
-    ],
    "GwpAsanAndroid": [
        {
            "platforms": [