Commit 05430111 authored by Maggie Chen's avatar Maggie Chen Committed by Commit Bot

Watchdog V2 - Add the logic in arm, in progress, disarm and timeout functions.

A delayed task OnWatchdogTimeout() is posted during watchdog thread init. It will
continue posting the next delayed timeout task when no GPU hang is detected.

A atomic counter is incremented by one in Arm() or Disarm(), and by two in
ReportProgress(). When OnWatchdogTimeout() task is running, it checks if the
counter increases from the last timeout (i.e still running) or if the counter
is an even number (i.e. disarmed). If not, Watchdog thread will crash deliberately
in order to recover from gpu hang.




Bug:949839

Change-Id: Ie0e98c9e7621068a5143bf9c3a70cc8a6267955a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1674786
Commit-Queue: Maggie Chen <magchen@chromium.org>
Reviewed-by: default avatarAntoine Labour <piman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#672789}
parent 686d5f50
......@@ -419,6 +419,11 @@ bool GpuInit::InitializeAndStartSandbox(base::CommandLine* command_line,
UMA_HISTOGRAM_BOOLEAN("GPU.Sandbox.InitializedSuccessfully",
gpu_info_.sandboxed);
// Notify the gpu watchdog that the gpu init has completed So the watchdog
// can be disarmed.
if (watchdog_thread_)
watchdog_thread_->OnInitComplete();
init_successful_ = true;
#if defined(USE_OZONE)
ui::OzonePlatform::GetInstance()->AfterSandboxEntry();
......
......@@ -44,10 +44,14 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThread : public base::Thread,
virtual void OnBackgrounded() = 0;
virtual void OnForegrounded() = 0;
// The watchdog starts armed to catch startup hangs, and needs to be disarmed
// once init is complete, before executing tasks.
virtual void OnInitComplete() = 0;
protected:
GpuWatchdogThread();
// Do not change this name. This is used for [GPU HANG] carsh reports
// Do not change this name. This is used for [GPU HANG] carsh reports.
virtual void DeliberatelyTerminateToRecoverFromHang() = 0;
private:
......@@ -62,14 +66,15 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV1
static std::unique_ptr<GpuWatchdogThreadImplV1> Create(
bool start_backgrounded);
// Implements GpuWatchdogThread.
void AddPowerObserver() override;
void OnBackgrounded() override;
void OnForegrounded() override;
void OnInitComplete() override {}
// gl::ProgressReporter implementation:
void ReportProgress() override;
void OnBackgrounded() override;
void OnForegrounded() override;
protected:
void Init() override;
void CleanUp() override;
......
......@@ -4,32 +4,33 @@
#include "gpu/ipc/service/gpu_watchdog_thread_v2.h"
#include "base/atomicops.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/message_loop/message_loop_current.h"
#include "base/power_monitor/power_monitor.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/time.h"
#include "build/build_config.h"
namespace gpu {
namespace {
#if defined(CYGPROFILE_INSTRUMENTATION)
const int kGpuTimeoutInMs = 30000;
const int kGpuTimeoutInSec = 30;
#elif defined(OS_WIN) || defined(OS_MACOSX)
const int kGpuTimeoutInMs = 15000;
const int kGpuTimeoutInSec = 15;
#else
const int kGpuTimeoutInMs = 10000;
const int kGpuTimeoutInSec = 10;
#endif
} // namespace
GpuWatchdogThreadImplV2::GpuWatchdogThreadImplV2()
: timeout_(base::TimeDelta::FromMilliseconds(kGpuTimeoutInMs)),
: watchdog_timeout_(base::TimeDelta::FromSeconds(kGpuTimeoutInSec)),
watched_task_runner_(base::ThreadTaskRunnerHandle::Get()),
weak_factory_(this) {
Disarm();
base::MessageLoopCurrent::Get()->AddTaskObserver(this);
Arm();
}
GpuWatchdogThreadImplV2::~GpuWatchdogThreadImplV2() {
......@@ -43,8 +44,6 @@ GpuWatchdogThreadImplV2::~GpuWatchdogThreadImplV2() {
// static
std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
bool start_backgrounded) {
NOTREACHED(); // Not ready yet
auto watchdog_thread = base::WrapUnique(new GpuWatchdogThreadImplV2);
base::Thread::Options options;
options.timer_slack = base::TIMER_SLACK_MAXIMUM;
......@@ -65,9 +64,17 @@ void GpuWatchdogThreadImplV2::OnBackgrounded() {}
void GpuWatchdogThreadImplV2::OnForegrounded() {}
void GpuWatchdogThreadImplV2::ReportProgress() {}
void GpuWatchdogThreadImplV2::ReportProgress() {
InProgress();
}
void GpuWatchdogThreadImplV2::Init() {}
void GpuWatchdogThreadImplV2::Init() {
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout,
weak_factory_.GetWeakPtr()),
watchdog_timeout_);
}
void GpuWatchdogThreadImplV2::CleanUp() {
weak_factory_.InvalidateWeakPtrs();
......@@ -83,9 +90,54 @@ void GpuWatchdogThreadImplV2::DidProcessTask(
Disarm();
}
void GpuWatchdogThreadImplV2::Arm() {}
// Called from the gpu thread when gpu init has completed
void GpuWatchdogThreadImplV2::OnInitComplete() {
Disarm();
}
void GpuWatchdogThreadImplV2::Arm() {
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// Arm/Disarm are always called in sequence. Now it's an odd number.
DCHECK(base::subtle::NoBarrier_Load(&arm_disarm_counter_) & 1);
}
void GpuWatchdogThreadImplV2::Disarm() {
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// Arm/Disarm are always called in sequence. Now it's an even number.
DCHECK(base::subtle::NoBarrier_Load(&arm_disarm_counter_) % 2 == 0);
}
void GpuWatchdogThreadImplV2::InProgress() {
// This is equivalent to Disarm() + Arm()
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 2);
void GpuWatchdogThreadImplV2::Disarm() {}
// Now it's an odd number.
DCHECK(base::subtle::NoBarrier_Load(&arm_disarm_counter_) & 1);
}
void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
base::subtle::Atomic32 arm_disarm_counter =
base::subtle::NoBarrier_Load(&arm_disarm_counter_);
// disarmed is true if it's an even number
bool disarmed = arm_disarm_counter % 2 == 0;
bool gpu_makes_progress = arm_disarm_counter != last_arm_disarm_counter_;
last_arm_disarm_counter_ = arm_disarm_counter;
if (disarmed || gpu_makes_progress) {
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout,
weak_factory_.GetWeakPtr()),
watchdog_timeout_);
return;
}
// Still armed without any progress. GPU possibly hangs.
DeliberatelyTerminateToRecoverFromHang();
}
void GpuWatchdogThreadImplV2::OnSuspend() {}
......@@ -95,6 +147,11 @@ void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
// Store variables so they're available in crash dumps to help determine the
// cause of any hang.
#if defined(OS_WIN)
if (IsDebuggerPresent())
return;
#endif
// Deliberately crash the process to create a crash dump.
*((volatile int*)0) = 0xdeadface;
}
......
......@@ -17,16 +17,17 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
bool start_backgrounded);
~GpuWatchdogThreadImplV2() override;
// Implements GpuWatchdogThread
// Implements GpuWatchdogThread.
void AddPowerObserver() override;
void OnBackgrounded() override;
void OnForegrounded() override;
void OnInitComplete() override;
// Implements gl::ProgressReporter
// Implements gl::ProgressReporter.
void ReportProgress() override;
protected:
// Implements base::Thread
// Implements base::Thread.
void Init() override;
void CleanUp() override;
......@@ -34,8 +35,10 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
GpuWatchdogThreadImplV2();
void Arm();
void Disarm();
void InProgress();
void OnWatchdogTimeout();
// Implements base::PowerObserver
// Implements base::PowerObserver.
void OnSuspend() override;
void OnResume() override;
......@@ -43,10 +46,19 @@ class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
void WillProcessTask(const base::PendingTask& pending_task) override;
void DidProcessTask(const base::PendingTask& pending_task) override;
// Implements GpuWatchdogThread
// Implements GpuWatchdogThread.
void DeliberatelyTerminateToRecoverFromHang() override;
base::TimeDelta timeout_;
// This counter is only written on the gpu thread, and read on the watchdog
// thread.
base::subtle::Atomic32 arm_disarm_counter_ = 0;
// The counter number read in the last OnWatchdogTimeout() on the watchdog
// thread.
int32_t last_arm_disarm_counter_ = 0;
// Timeout on the watchdog thread to check if gpu hangs
base::TimeDelta watchdog_timeout_;
scoped_refptr<base::SingleThreadTaskRunner> watched_task_runner_;
base::WeakPtrFactory<GpuWatchdogThreadImplV2> weak_factory_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment